org.apache.kafka.connect.sink.SinkRecord Scala Examples
The following examples show how to use org.apache.kafka.connect.sink.SinkRecord.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: PulsarSinkTask.scala From stream-reactor with Apache License 2.0 | 7 votes |
package com.datamountaineer.streamreactor.connect.pulsar.sink import java.util import java.util.UUID import com.datamountaineer.streamreactor.connect.errors.ErrorPolicyEnum import com.datamountaineer.streamreactor.connect.pulsar.config.{PulsarConfigConstants, PulsarSinkConfig, PulsarSinkSettings} import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter} import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.clients.consumer.OffsetAndMetadata import org.apache.kafka.common.TopicPartition import org.apache.kafka.connect.sink.{SinkRecord, SinkTask} import scala.collection.JavaConverters._ override def stop(): Unit = { logger.info("Stopping Pulsar sink.") writer.foreach(w => w.close) progressCounter.empty } override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = { require(writer.nonEmpty, "Writer is not set!") writer.foreach(w => w.flush) } override def version: String = manifest.version() }
Example 2
Source File: ElasticSinkTask.scala From stream-reactor with Apache License 2.0 | 6 votes |
package com.datamountaineer.streamreactor.connect.elastic6 import java.util import com.datamountaineer.streamreactor.connect.elastic6.config.{ElasticConfig, ElasticConfigConstants, ElasticSettings} import com.datamountaineer.streamreactor.connect.errors.ErrorPolicyEnum import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter} import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.clients.consumer.OffsetAndMetadata import org.apache.kafka.common.TopicPartition import org.apache.kafka.connect.sink.{SinkRecord, SinkTask} import scala.collection.JavaConverters._ class ElasticSinkTask extends SinkTask with StrictLogging { private var writer: Option[ElasticJsonWriter] = None private val progressCounter = new ProgressCounter private var enableProgress: Boolean = false private val manifest = JarManifest(getClass.getProtectionDomain.getCodeSource.getLocation) override def stop(): Unit = { logger.info("Stopping Elastic sink.") writer.foreach(w => w.close()) progressCounter.empty } override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = { logger.info("Flushing Elastic Sink") } override def version: String = manifest.version() }
Example 3
Source File: JMSSinkTask.scala From stream-reactor with Apache License 2.0 | 6 votes |
package com.datamountaineer.streamreactor.connect.jms.sink import java.util import com.datamountaineer.streamreactor.connect.errors.ErrorPolicyEnum import com.datamountaineer.streamreactor.connect.jms.config.{JMSConfig, JMSConfigConstants, JMSSettings} import com.datamountaineer.streamreactor.connect.jms.sink.writer.JMSWriter import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter} import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.clients.consumer.OffsetAndMetadata import org.apache.kafka.common.TopicPartition import org.apache.kafka.connect.sink.{SinkRecord, SinkTask} import scala.collection.JavaConverters._ override def stop(): Unit = { logger.info("Stopping JMS sink.") writer.foreach(w => w.close()) } override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = { //TODO //have the writer expose a is busy; can expose an await using a countdownlatch internally } override def version: String = manifest.version() }
Example 4
Source File: KuduSinkTask.scala From stream-reactor with Apache License 2.0 | 6 votes |
package com.datamountaineer.streamreactor.connect.kudu.sink import java.util import com.datamountaineer.streamreactor.connect.errors.ErrorPolicyEnum import com.datamountaineer.streamreactor.connect.kudu.config.{KuduConfig, KuduConfigConstants, KuduSettings} import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter} import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.clients.consumer.OffsetAndMetadata import org.apache.kafka.common.TopicPartition import org.apache.kafka.connect.sink.{SinkRecord, SinkTask} import scala.collection.JavaConverters._ override def stop(): Unit = { logger.info("Stopping Kudu sink.") writer.foreach(w => w.close()) progressCounter.empty } override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = { require(writer.nonEmpty, "Writer is not set!") writer.foreach(w => w.flush()) } override def version: String = manifest.version() }
Example 5
Source File: RedisPubSubTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.redis.sink.writer import com.datamountaineer.streamreactor.connect.redis.sink.config.{RedisConfig, RedisConfigConstants, RedisConnectionInfo, RedisSinkSettings} import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.mockito.MockitoSugar import org.scalatest.BeforeAndAfterAll import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import redis.clients.jedis.{Jedis, JedisPubSub} import redis.embedded.RedisServer import scala.collection.JavaConverters._ import scala.collection.mutable.ListBuffer class RedisPubSubTest extends AnyWordSpec with Matchers with BeforeAndAfterAll with MockitoSugar { val redisServer = new RedisServer(6379) override def beforeAll() = redisServer.start() override def afterAll() = redisServer.stop() "Redis PUBSUB writer" should { "write Kafka records to a Redis PubSub" in { val TOPIC = "cpuTopic" val KCQL = s"SELECT * from $TOPIC STOREAS PubSub (channel=type)" println("Testing KCQL : " + KCQL) val props = Map( RedisConfigConstants.REDIS_HOST->"localhost", RedisConfigConstants.REDIS_PORT->"6379", RedisConfigConstants.KCQL_CONFIG->KCQL ).asJava val config = RedisConfig(props) val connectionInfo = new RedisConnectionInfo("localhost", 6379, None) val settings = RedisSinkSettings(config) val writer = new RedisPubSub(settings) writer.createClient(settings) val schema = SchemaBuilder.struct().name("com.example.Cpu") .field("type", Schema.STRING_SCHEMA) .field("temperature", Schema.FLOAT64_SCHEMA) .field("voltage", Schema.FLOAT64_SCHEMA) .field("ts", Schema.INT64_SCHEMA).build() val struct1 = new Struct(schema).put("type", "Xeon").put("temperature", 60.4).put("voltage", 90.1).put("ts", 1482180657010L) val struct2 = new Struct(schema).put("type", "i7").put("temperature", 62.1).put("voltage", 103.3).put("ts", 1482180657020L) val struct3 = new Struct(schema).put("type", "i7-i").put("temperature", 64.5).put("voltage", 101.1).put("ts", 1482180657030L) val sinkRecord1 = new SinkRecord(TOPIC, 0, null, null, schema, struct1, 1) val sinkRecord2 = new SinkRecord(TOPIC, 0, null, null, schema, struct2, 2) val sinkRecord3 = new SinkRecord(TOPIC, 0, null, null, schema, struct3, 3) val jedis = new Jedis(connectionInfo.host, connectionInfo.port) // Clean up in-memory jedis jedis.flushAll() val messagesMap = collection.mutable.Map[String, ListBuffer[String]]() val t = new Thread { private val pubsub = new JedisPubSub { override def onMessage(channel: String, message: String): Unit = { messagesMap.get(channel) match { case Some(msgs) => messagesMap.put(channel, msgs += message) case None => messagesMap.put(channel, ListBuffer(message)) } } } override def run(): Unit = { jedis.subscribe(pubsub, "Xeon", "i7", "i7-i") } override def interrupt(): Unit = { pubsub.punsubscribe("*") super.interrupt() } } t.start() t.join(5000) if (t.isAlive) t.interrupt() writer.write(Seq(sinkRecord1)) writer.write(Seq(sinkRecord2, sinkRecord3)) messagesMap.size shouldBe 3 messagesMap("Xeon").head shouldBe """{"type":"Xeon","temperature":60.4,"voltage":90.1,"ts":1482180657010}""" messagesMap("i7").head shouldBe """{"type":"i7","temperature":62.1,"voltage":103.3,"ts":1482180657020}""" messagesMap("i7-i").head shouldBe """{"type":"i7-i","temperature":64.5,"voltage":101.1,"ts":1482180657030}""" } } }
Example 6
Source File: ICacheWriter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hazelcast.writers import com.datamountaineer.streamreactor.connect.hazelcast.HazelCastConnection import com.datamountaineer.streamreactor.connect.hazelcast.config.HazelCastSinkSettings import com.hazelcast.core.HazelcastInstance import javax.cache.{Cache, CacheManager} import org.apache.kafka.connect.sink.SinkRecord case class ICacheWriter(client: HazelcastInstance, topic: String, settings: HazelCastSinkSettings) extends Writer(settings) { val name: String = settings.topicObject(topic).name val cacheManager: CacheManager = HazelCastConnection.getCacheManager(client, s"${client.getName}-$name-cache-manager") val cacheWriter: Cache[String, Object] = cacheManager.getCache(name, classOf[String], classOf[Object]) override def write(record: SinkRecord): Unit = cacheWriter.put(buildPKs(record), convert(record)) override def close: Unit = { cacheWriter.close() cacheManager.close() } }
Example 7
Source File: MapWriter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hazelcast.writers import java.util.concurrent.TimeUnit import com.datamountaineer.streamreactor.connect.hazelcast.config.HazelCastSinkSettings import com.hazelcast.core.{HazelcastInstance, IMap} import org.apache.kafka.connect.sink.SinkRecord case class MapWriter(client: HazelcastInstance, topic: String, settings: HazelCastSinkSettings) extends Writer(settings) { val mapWriter: IMap[String, Object] = client.getMap(settings.topicObject(topic).name).asInstanceOf[IMap[String, Object]] override def write(record: SinkRecord): Unit = { val ttl = settings.topicObject(topic).ttl val keys = buildPKs(record) if (ttl > 0) { mapWriter.put(keys, convert(record), ttl, TimeUnit.MILLISECONDS) } else { mapWriter.put(keys, convert(record)) } } override def close: Unit = {} }
Example 8
Source File: HazelCastSinkTask.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hazelcast.sink import java.util import com.datamountaineer.streamreactor.connect.errors.ErrorPolicyEnum import com.datamountaineer.streamreactor.connect.hazelcast.config.{HazelCastSinkConfig, HazelCastSinkConfigConstants, HazelCastSinkSettings} import com.datamountaineer.streamreactor.connect.hazelcast.writers.HazelCastWriter import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter} import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.clients.consumer.OffsetAndMetadata import org.apache.kafka.common.TopicPartition import org.apache.kafka.connect.sink.{SinkRecord, SinkTask} import scala.collection.JavaConverters._ override def stop(): Unit = { logger.info("Stopping Hazelcast sink.") writer.foreach(w => w.close()) progressCounter.empty } override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = { require(writer.nonEmpty, "Writer is not set!") writer.foreach(w => w.flush()) } override def version: String = manifest.version() }
Example 9
Source File: VoltDbWriter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.voltdb.writers import com.datamountaineer.streamreactor.connect.errors.ErrorHandler import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil import com.datamountaineer.streamreactor.connect.sink.DbWriter import com.datamountaineer.streamreactor.connect.voltdb.config.VoltSettings import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.connect.data.Struct import org.apache.kafka.connect.sink.SinkRecord import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException import org.voltdb.client.{ClientConfig, ClientFactory} import scala.util.Try class VoltDbWriter(settings: VoltSettings) extends DbWriter with StrictLogging with ConverterUtil with ErrorHandler { //ValidateStringParameterFn(settings.servers, "settings") //ValidateStringParameterFn(settings.user, "settings") //initialize error tracker initialize(settings.maxRetries, settings.errorPolicy) private val voltConfig = new ClientConfig(settings.user, settings.password) private val client = ClientFactory.createClient(voltConfig) VoltConnectionConnectFn(client, settings) private val proceduresMap = settings.fieldsExtractorMap.values.map { extract => val procName = s"${extract.targetTable}.${if (extract.isUpsert) "upsert" else "insert"}" logger.info(s"Retrieving the metadata for $procName ...") val fields = VoltDbMetadataReader.getProcedureParameters(client, extract.targetTable).map(_.toUpperCase) logger.info(s"$procName expected arguments are: ${fields.mkString(",")}") extract.targetTable -> ProcAndFields(procName, fields) }.toMap override def write(records: Seq[SinkRecord]): Unit = { if (records.isEmpty) { logger.debug("No records received.") } else { val t = Try(records.withFilter(_.value() != null).foreach(insert)) t.foreach(_ => logger.info("Writing complete")) handleTry(t) } } private def insert(record: SinkRecord) = { require(record.value().getClass == classOf[Struct], "Only Struct payloads are handled") val extractor = settings.fieldsExtractorMap.getOrElse(record.topic(), throw new ConfigException(s"${record.topic()} is not handled by the configuration:${settings.fieldsExtractorMap.keys.mkString(",")}")) val fieldsAndValuesMap = extractor.get(record.value().asInstanceOf[Struct]).map { case (k, v) => (k.toUpperCase, v) } logger.info(fieldsAndValuesMap.mkString(",")) val procAndFields: ProcAndFields = proceduresMap(extractor.targetTable) //get the list of arguments to pass to the table insert/upsert procedure. if the procedure expects a field and is //not present in the incoming SinkRecord it would use null //No table evolution is supported yet val arguments: Array[String] = PrepareProcedureFieldsFn(procAndFields.fields, fieldsAndValuesMap).toArray logger.info(s"Calling procedure:${procAndFields.procName} with parameters:${procAndFields.fields.mkString(",")} with arguments:${arguments.mkString(",")}") client.callProcedure(procAndFields.procName, arguments: _*) } override def close(): Unit = client.close() private case class ProcAndFields(procName: String, fields: Seq[String]) }
Example 10
Source File: ValueConverter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import scala.collection.JavaConverters._ object ValueConverter { def apply(record: SinkRecord): Struct = record.value match { case struct: Struct => StructValueConverter.convert(struct) case map: Map[_, _] => MapValueConverter.convert(map) case map: java.util.Map[_, _] => MapValueConverter.convert(map.asScala.toMap) case string: String => StringValueConverter.convert(string) case other => sys.error(s"Unsupported record $other:${other.getClass.getCanonicalName}") } } trait ValueConverter[T] { def convert(value: T): Struct } object StructValueConverter extends ValueConverter[Struct] { override def convert(struct: Struct): Struct = struct } object MapValueConverter extends ValueConverter[Map[_, _]] { def convertValue(value: Any, key: String, builder: SchemaBuilder): Any = { value match { case s: String => builder.field(key, Schema.OPTIONAL_STRING_SCHEMA) s case l: Long => builder.field(key, Schema.OPTIONAL_INT64_SCHEMA) l case i: Int => builder.field(key, Schema.OPTIONAL_INT64_SCHEMA) i.toLong case b: Boolean => builder.field(key, Schema.OPTIONAL_BOOLEAN_SCHEMA) b case f: Float => builder.field(key, Schema.OPTIONAL_FLOAT64_SCHEMA) f.toDouble case d: Double => builder.field(key, Schema.OPTIONAL_FLOAT64_SCHEMA) d case innerMap: java.util.Map[_, _] => val innerStruct = convert(innerMap.asScala.toMap, true) builder.field(key, innerStruct.schema()) innerStruct case innerMap: Map[_, _] => val innerStruct = convert(innerMap, true) builder.field(key, innerStruct.schema()) innerStruct } } def convert(map: Map[_, _], optional: Boolean) = { val builder = SchemaBuilder.struct() val values = map.map { case (k, v) => val key = k.toString val value = convertValue(v, key, builder) key -> value }.toList if (optional) builder.optional() val schema = builder.build val struct = new Struct(schema) values.foreach { case (key, value) => struct.put(key.toString, value) } struct } override def convert(map: Map[_, _]): Struct = convert(map, false) } object StringValueConverter extends ValueConverter[String] { override def convert(string: String): Struct = { val schema = SchemaBuilder.struct().field("a", Schema.OPTIONAL_STRING_SCHEMA).name("struct").build() new Struct(schema).put("a", string) } }
Example 11
Source File: MqttSinkTask.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.mqtt.sink import java.util import com.datamountaineer.streamreactor.connect.converters.sink.Converter import com.datamountaineer.streamreactor.connect.errors.ErrorPolicyEnum import com.datamountaineer.streamreactor.connect.mqtt.config.{MqttConfigConstants, MqttSinkConfig, MqttSinkSettings} import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter} import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.clients.consumer.OffsetAndMetadata import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.config.ConfigException import org.apache.kafka.connect.sink.{SinkRecord, SinkTask} import scala.collection.JavaConverters._ import scala.util.{Failure, Success, Try} override def stop(): Unit = { logger.info("Stopping Mqtt sink.") writer.foreach(w => w.close) progressCounter.empty } override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = { require(writer.nonEmpty, "Writer is not set!") writer.foreach(w => w.flush) } override def version: String = manifest.version() }
Example 12
Source File: RedisStreamTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.redis.sink.writer /* * Copyright 2017 Datamountaineer. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util import com.datamountaineer.streamreactor.connect.redis.sink.RedisSinkTask import com.datamountaineer.streamreactor.connect.redis.sink.config.{RedisConfig, RedisConfigConstants, RedisConnectionInfo, RedisSinkSettings} import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.mockito.MockitoSugar import org.scalatest.BeforeAndAfterAll import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import redis.clients.jedis.{Jedis, StreamEntryID} import scala.collection.JavaConverters._ class RedisStreamTest extends AnyWordSpec with Matchers with BeforeAndAfterAll with MockitoSugar { // // val redisServer = new RedisServer(6379) // // override def beforeAll() = redisServer.start() // // override def afterAll() = redisServer.stop() "Redis Stream writer" should { "write Kafka records to a Redis Stream" in { val TOPIC = "cpuTopic" val KCQL = s"INSERT INTO stream1 SELECT * from $TOPIC STOREAS STREAM" println("Testing KCQL : " + KCQL) val props = Map( RedisConfigConstants.REDIS_HOST->"localhost", RedisConfigConstants.REDIS_PORT->"6379", RedisConfigConstants.KCQL_CONFIG->KCQL, RedisConfigConstants.REDIS_PASSWORD -> "" ).asJava val config = RedisConfig(props) val connectionInfo = new RedisConnectionInfo("localhost", 6379, None) val settings = RedisSinkSettings(config) val writer = new RedisStreams(settings) val schema = SchemaBuilder.struct().name("com.example.Cpu") .field("type", Schema.STRING_SCHEMA) .field("temperature", Schema.FLOAT64_SCHEMA) .field("voltage", Schema.FLOAT64_SCHEMA) .field("ts", Schema.INT64_SCHEMA).build() val struct1 = new Struct(schema).put("type", "Xeon").put("temperature", 60.4).put("voltage", 90.1).put("ts", 1482180657010L) val sinkRecord1 = new SinkRecord(TOPIC, 0, null, null, schema, struct1, 1) val jedis = mock[Jedis] writer.jedis = jedis val map = new util.HashMap[String, String]() map.put("type", "Xeon") map.put("temperature", "60.4") map.put("voltage", "90.1") map.put("ts", 1482180657010L.toString) when(jedis.auth("")).isLenient() when(jedis.xadd("stream1", null, map)).thenReturn(mock[StreamEntryID]) writer.initialize(1, settings.errorPolicy) writer.write(Seq(sinkRecord1)) } } }
Example 13
Source File: HazelCastWriter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hazelcast.writers import java.util.concurrent.Executors import com.datamountaineer.streamreactor.connect.concurrent.ExecutorExtension._ import com.datamountaineer.streamreactor.connect.concurrent.FutureAwaitWithFailFastFn import com.datamountaineer.streamreactor.connect.errors.ErrorHandler import com.datamountaineer.streamreactor.connect.hazelcast.config.{HazelCastSinkSettings, HazelCastStoreAsType, TargetType} import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.connect.sink.SinkRecord import scala.concurrent.duration._ import scala.util.{Failure, Success} def write(records: Seq[SinkRecord]): Unit = { if (records.isEmpty) { logger.debug("No records received.") } else { logger.debug(s"Received ${records.size} records.") if (settings.allowParallel) parallelWrite(records) else sequentialWrite(records) logger.debug(s"Written ${records.size}") } } def sequentialWrite(records: Seq[SinkRecord]): Any = { try { records.foreach(r => insert(r)) } catch { case t: Throwable => logger.error(s"There was an error inserting the records ${t.getMessage}", t) handleTry(Failure(t)) } } def parallelWrite(records: Seq[SinkRecord]): Any = { logger.warn("Running parallel writes! Order of writes not guaranteed.") val executor = Executors.newFixedThreadPool(settings.threadPoolSize) try { val futures = records.map { record => executor.submit { insert(record) () } } //when the call returns the pool is shutdown FutureAwaitWithFailFastFn(executor, futures, 1.hours) handleTry(Success(())) logger.debug(s"Processed ${futures.size} records.") } catch { case t: Throwable => logger.error(s"There was an error inserting the records ${t.getMessage}", t) handleTry(Failure(t)) } } def insert(record: SinkRecord): Unit = { val writer = writers.get(record.topic()) writer.foreach(w => w.write(record)) } def close(): Unit = { logger.info("Shutting down Hazelcast client.") writers.values.foreach(_.close) settings.client.shutdown() } def flush(): Unit = {} }
Example 14
Source File: RedisInsertSortedSetTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.redis.sink.writer import com.datamountaineer.streamreactor.connect.redis.sink.config.{RedisConfig, RedisConfigConstants, RedisConnectionInfo, RedisSinkSettings} import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.mockito.MockitoSugar import org.scalatest.BeforeAndAfterAll import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import redis.clients.jedis.Jedis import redis.embedded.RedisServer import scala.collection.JavaConverters._ class RedisInsertSortedSetTest extends AnyWordSpec with Matchers with BeforeAndAfterAll with MockitoSugar { val redisServer = new RedisServer(6379) override def beforeAll() = redisServer.start() override def afterAll() = redisServer.stop() "Redis INSERT into Sorted Set (SS) writer" should { "write Kafka records to a Redis Sorted Set" in { val TOPIC = "cpuTopic" val KCQL = s"INSERT INTO cpu_stats SELECT * from $TOPIC STOREAS SortedSet(score=ts)" println("Testing KCQL : " + KCQL) val props = Map( RedisConfigConstants.REDIS_HOST->"localhost", RedisConfigConstants.REDIS_PORT->"6379", RedisConfigConstants.KCQL_CONFIG->KCQL ).asJava val config = RedisConfig(props) val connectionInfo = new RedisConnectionInfo("localhost", 6379, None) val settings = RedisSinkSettings(config) val writer = new RedisInsertSortedSet(settings) writer.createClient(settings) val schema = SchemaBuilder.struct().name("com.example.Cpu") .field("type", Schema.STRING_SCHEMA) .field("temperature", Schema.FLOAT64_SCHEMA) .field("voltage", Schema.FLOAT64_SCHEMA) .field("ts", Schema.INT64_SCHEMA).build() val struct1 = new Struct(schema).put("type", "Xeon").put("temperature", 60.4).put("voltage", 90.1).put("ts", 1482180657010L) val struct2 = new Struct(schema).put("type", "i7").put("temperature", 62.1).put("voltage", 103.3).put("ts", 1482180657020L) val struct3 = new Struct(schema).put("type", "i7-i").put("temperature", 64.5).put("voltage", 101.1).put("ts", 1482180657030L) val sinkRecord1 = new SinkRecord(TOPIC, 0, null, null, schema, struct1, 1) val sinkRecord2 = new SinkRecord(TOPIC, 0, null, null, schema, struct2, 2) val sinkRecord3 = new SinkRecord(TOPIC, 0, null, null, schema, struct3, 3) val jedis = new Jedis(connectionInfo.host, connectionInfo.port) // Clean up in-memory jedis jedis.flushAll() writer.write(Seq(sinkRecord1)) writer.write(Seq(sinkRecord2, sinkRecord3)) // Redis cardinality should now be 3 jedis.zcard("cpu_stats") shouldBe 3 val allSSrecords = jedis.zrange("cpu_stats", 0, 999999999999L) val results = allSSrecords.asScala.toList results.head shouldBe """{"type":"Xeon","temperature":60.4,"voltage":90.1,"ts":1482180657010}""" results(1) shouldBe """{"type":"i7","temperature":62.1,"voltage":103.3,"ts":1482180657020}""" results(2) shouldBe """{"type":"i7-i","temperature":64.5,"voltage":101.1,"ts":1482180657030}""" } } }
Example 15
Source File: RedisFieldsKeyBuilder.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.redis.sink.writer import com.datamountaineer.streamreactor.connect.rowkeys.StringKeyBuilder import org.apache.kafka.connect.data.{Field, Schema, Struct} import org.apache.kafka.connect.sink.SinkRecord import scala.annotation.tailrec import scala.collection.JavaConverters._ override def build(record: SinkRecord): String = { val struct: Struct = record.value.asInstanceOf[Struct] val schema: Schema = struct.schema def extractAvailableFieldNames(schema: Schema): Seq[String] = { if (schema.`type` == Schema.Type.STRUCT) { val fields = schema.fields fields.asScala.map(_.name) ++ fields.asScala.flatMap { f => extractAvailableFieldNames(f.schema).map(name => f.name + "." + name) } } else Seq.empty } val availableFields = extractAvailableFieldNames(schema) val missingKeys = keys.filterNot(availableFields.contains) require( missingKeys.isEmpty, s"${missingKeys.mkString(",")} keys are not present in the SinkRecord payload: ${availableFields.mkString(", ")}" ) def getValue(key: String): AnyRef = { @tailrec def findValue(keyParts: List[String], obj: AnyRef): Option[AnyRef] = (obj, keyParts) match { case (f: Field, k :: tail) => findValue(tail, f.schema.field(k)) case (s: Struct, k :: tail) => findValue(tail, s.get(k)) case (v, _) => Option(v) } findValue(key.split('.').toList, struct).getOrElse { throw new IllegalArgumentException( s"$key field value is null. Non null value is required for the fields creating the row key" ) } } keys.map(getValue).mkString(pkDelimiter) } }
Example 16
Source File: RedisGeoAdd.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.redis.sink.writer import com.datamountaineer.kcql.Kcql import com.datamountaineer.streamreactor.connect.redis.sink.config.{RedisKCQLSetting, RedisSinkSettings} import com.datamountaineer.streamreactor.connect.schemas.StructFieldsExtractor import org.apache.kafka.connect.data.Struct import org.apache.kafka.connect.sink.SinkRecord import scala.collection.JavaConverters._ import scala.util.Try import scala.util.control.Exception.allCatch class RedisGeoAdd(sinkSettings: RedisSinkSettings) extends RedisWriter with GeoAddSupport { val configs: Set[Kcql] = sinkSettings.kcqlSettings.map(_.kcqlConfig) configs.foreach { c => assert(c.getSource.trim.length > 0, "You need to supply a valid source kafka topic to fetch records from. Review your KCQL syntax") assert(c.getPrimaryKeys.asScala.length >= 1, "The Redis GeoAdd mode requires at least 1 PK (Primary Key) to be defined") assert(c.getStoredAs.equalsIgnoreCase("GeoAdd"), "The Redis GeoAdd mode requires the KCQL syntax: STOREAS GeoAdd") } // Write a sequence of SinkRecords to Redis override def write(records: Seq[SinkRecord]): Unit = { if (records.isEmpty) logger.debug("No records received on 'GeoAdd' Redis writer") else { logger.debug(s"'GeoAdd' Redis writer received ${records.size} records") insert(records.groupBy(_.topic)) } } // Insert a batch of sink records def insert(records: Map[String, Seq[SinkRecord]]): Unit = { records.foreach { case (topic, sinkRecords: Seq[SinkRecord]) => { val topicSettings: Set[RedisKCQLSetting] = sinkSettings.kcqlSettings.filter(_.kcqlConfig.getSource == topic) if (topicSettings.isEmpty) logger.warn(s"Received a batch for topic $topic - but no KCQL supports it") //pass try to error handler and try val t = Try { sinkRecords.foreach { record => topicSettings.map { KCQL => val extractor = StructFieldsExtractor(includeAllFields = false, KCQL.kcqlConfig.getPrimaryKeys.asScala.map(f => f.getName -> f.getName).toMap) val fieldsAndValues = extractor.get(record.value.asInstanceOf[Struct]).toMap val pkValue = KCQL.kcqlConfig.getPrimaryKeys.asScala.map(pk => fieldsAndValues(pk.getName).toString).mkString(":") // Use the target (and optionally the prefix) to name the GeoAdd key val optionalPrefix = if (Option(KCQL.kcqlConfig.getTarget).isEmpty) "" else KCQL.kcqlConfig.getTarget.trim val key = optionalPrefix + pkValue val recordToSink = convert(record, fields = KCQL.fieldsAndAliases, ignoreFields = KCQL.ignoredFields) val payload = convertValueToJson(recordToSink) val longitudeField = getLongitudeField(KCQL.kcqlConfig) val latitudeField = getLatitudeField(KCQL.kcqlConfig) val longitude = getFieldValue(record, longitudeField) val latitude = getFieldValue(record, latitudeField) if (isDoubleNumber(longitude) && isDoubleNumber(latitude)) { logger.debug(s"GEOADD $key longitude=$longitude latitude=$latitude payload = ${payload.toString}") val response = jedis.geoadd(key, longitude.toDouble, latitude.toDouble, payload.toString) if (response == 1) { logger.debug("New element added") } else if (response == 0) logger.debug("The element was already a member of the sorted set and the score was updated") response } else { logger.warn(s"GeoAdd record contains invalid longitude=$longitude and latitude=$latitude values, " + s"Record with key ${record.key} is skipped"); None } } } } handleTry(t) } logger.debug(s"Wrote ${sinkRecords.size} rows for topic $topic") } } def getFieldValue(record: SinkRecord, fieldName: String): String = { val struct = record.value().asInstanceOf[Struct] struct.getString(fieldName) } def isDoubleNumber(s: String): Boolean = (allCatch opt s.toDouble).isDefined }
Example 17
Source File: RedisPubSub.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.redis.sink.writer import com.datamountaineer.kcql.Kcql import com.datamountaineer.streamreactor.connect.redis.sink.config.{RedisKCQLSetting, RedisSinkSettings} import com.datamountaineer.streamreactor.connect.rowkeys.StringStructFieldsStringKeyBuilder import org.apache.kafka.connect.sink.SinkRecord import scala.collection.JavaConverters._ import scala.util.Try class RedisPubSub(sinkSettings: RedisSinkSettings) extends RedisWriter with PubSubSupport { val configs: Set[Kcql] = sinkSettings.kcqlSettings.map(_.kcqlConfig) configs.foreach { c => // assert(c.getTarget.length > 0, "Add to your KCQL syntax : INSERT INTO REDIS_KEY_NAME ") assert(c.getSource.trim.length > 0, "You need to define one (1) topic to source data. Add to your KCQL syntax: SELECT * FROM topicName") val allFields = if (c.getIgnoredFields.isEmpty) false else true assert(c.getFields.asScala.nonEmpty || allFields, "You need to SELECT at least one field from the topic to be published to the redis channel. Please review the KCQL syntax of the connector") assert(c.getPrimaryKeys.isEmpty, "They keyword PK (Primary Key) is not supported in Redis PUBSUB mode. Please review the KCQL syntax of connector") assert(c.getStoredAs.equalsIgnoreCase("PubSub"), "This mode requires the KCQL syntax: STOREAS PubSub") } // Write a sequence of SinkRecords to Redis override def write(records: Seq[SinkRecord]): Unit = { if (records.isEmpty) logger.debug("No records received on 'PUBSUB' Redis writer") else { logger.debug(s"'PUBSUB' Redis writer received ${records.size} records") insert(records.groupBy(_.topic)) } } // Insert a batch of sink records def insert(records: Map[String, Seq[SinkRecord]]): Unit = { records.foreach({ case (topic, sinkRecords: Seq[SinkRecord]) => { val topicSettings: Set[RedisKCQLSetting] = sinkSettings.kcqlSettings.filter(_.kcqlConfig.getSource == topic) if (topicSettings.isEmpty) logger.warn(s"Received a batch for topic $topic - but no KCQL supports it") val t = Try { sinkRecords.foreach { record => topicSettings.map { KCQL => // Get a SinkRecord val recordToSink = convert(record, fields = KCQL.fieldsAndAliases, ignoreFields = KCQL.ignoredFields) // Use the target to name the SortedSet val payload = convertValueToJson(recordToSink) val channelField = getChannelField(KCQL.kcqlConfig) val channel = StringStructFieldsStringKeyBuilder(Seq(channelField)).build(record) logger.debug(s"PUBLISH $channel channel = $channel payload = ${payload.toString}") val response = jedis.publish(channel, payload.toString) logger.debug(s"Published a new message to $response clients.") response } } } handleTry(t) } logger.debug(s"Published ${sinkRecords.size} messages for topic $topic") }) } }
Example 18
Source File: RedisStreams.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.redis.sink.writer import com.datamountaineer.kcql.Kcql import com.datamountaineer.streamreactor.connect.redis.sink.config.{RedisKCQLSetting, RedisSinkSettings} import com.fasterxml.jackson.databind.ObjectMapper import org.apache.kafka.connect.errors.ConnectException import org.apache.kafka.connect.sink.SinkRecord import scala.collection.JavaConverters._ import scala.util.{Failure, Success, Try} class RedisStreams(sinkSettings: RedisSinkSettings) extends RedisWriter with PubSubSupport { val configs: Set[Kcql] = sinkSettings.kcqlSettings.map(_.kcqlConfig) configs.foreach { c => // assert(c.getTarget.length > 0, "Add to your KCQL syntax : INSERT INTO REDIS_KEY_NAME ") assert(c.getSource.trim.length > 0, "You need to define one (1) topic to source data. Add to your KCQL syntax: SELECT * FROM topicName") val allFields = if (c.getIgnoredFields.isEmpty) false else true assert(c.getStoredAs.equalsIgnoreCase("Stream"), "This mode requires the KCQL syntax: STOREAS Stream") } // Write a sequence of SinkRecords to Redis override def write(records: Seq[SinkRecord]): Unit = { if (records.isEmpty) logger.debug("No records received on 'STREAM' Redis writer") else { logger.debug(s"'STREAM' Redis writer received ${records.size} records") insert(records.groupBy(_.topic)) } } // Insert a batch of sink records def insert(records: Map[String, Seq[SinkRecord]]): Unit = { records.foreach({ case (topic, sinkRecords: Seq[SinkRecord]) => { val topicSettings: Set[RedisKCQLSetting] = sinkSettings.kcqlSettings.filter(_.kcqlConfig.getSource == topic) if (topicSettings.isEmpty) logger.warn(s"Received a batch for topic $topic - but no KCQL supports it") val t = Try { sinkRecords.foreach { record => topicSettings.map { KCQL => // Get a SinkRecord val recordToSink = convert(record, fields = KCQL.fieldsAndAliases, ignoreFields = KCQL.ignoredFields) val jsonPayload = convertValueToJson(recordToSink) val payload = Try(new ObjectMapper().convertValue(jsonPayload, classOf[java.util.HashMap[String, Any]])) match { case Success(value) => value.asScala.toMap.map{ case(k, v) => (k, v.toString) } case Failure(exception) => throw new ConnectException(s"Failed to convert payload to key value pairs", exception) } jedis.xadd(KCQL.kcqlConfig.getTarget, null, payload.asJava) } } } handleTry(t) } logger.debug(s"Published ${sinkRecords.size} messages for topic $topic") }) } }
Example 19
Source File: TestUtilsBase.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect import java.util import java.util.Collections import org.apache.avro.generic.{GenericData, GenericRecord} import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.apache.kafka.connect.source.SourceTaskContext import org.apache.kafka.connect.storage.OffsetStorageReader import org.mockito.Mockito._ import org.mockito.MockitoSugar import org.scalatest.BeforeAndAfter import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import scala.collection.JavaConverters._ //set up partition val partition: util.Map[String, String] = Collections.singletonMap(lookupPartitionKey, table) //as a list to search for val partitionList: util.List[util.Map[String, String]] = List(partition).asJava //set up the offset val offset: util.Map[String, Object] = (Collections.singletonMap(offsetColumn,offsetValue )) //create offsets to initialize from val offsets :util.Map[util.Map[String, String],util.Map[String, Object]] = Map(partition -> offset).asJava //mock out reader and task context val taskContext = mock[SourceTaskContext] val reader = mock[OffsetStorageReader] when(reader.offsets(partitionList)).thenReturn(offsets) when(taskContext.offsetStorageReader()).thenReturn(reader) taskContext } }
Example 20
Source File: CoapWriter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.coap.sink import com.datamountaineer.streamreactor.connect.coap.configs.CoapSetting import com.datamountaineer.streamreactor.connect.coap.connection.CoapManager import com.datamountaineer.streamreactor.connect.converters.source.SinkRecordToJson import com.datamountaineer.streamreactor.connect.errors.ErrorHandler import org.apache.kafka.connect.errors.ConnectException import org.apache.kafka.connect.sink.SinkRecord import org.eclipse.californium.core.CoapResponse import org.eclipse.californium.core.coap.MediaTypeRegistry import scala.collection.JavaConverters._ import scala.util.Try class CoapWriter(setting: CoapSetting) extends CoapManager(setting) with ErrorHandler { logger.info(s"Initialising CoapWriter for resource ${setting.kcql.getTarget}") //initialize error tracker initialize(setting.retries.get, setting.errorPolicy.get) val fields = Map(setting.kcql.getSource -> setting.kcql.getFields.asScala.map(fa => (fa.getName, fa.getAlias)).toMap) val ignoredFields = Map(setting.kcql.getSource -> setting.kcql.getIgnoredFields.asScala.map(f => f.getName).toSet) def write(records: List[SinkRecord]): Option[Unit] = { val responses = Try(records .map(record => SinkRecordToJson(record, fields, ignoredFields)) .map(json => (json, client.put(json, MediaTypeRegistry.APPLICATION_JSON))) .filterNot({ case (_, resp) => resp.getCode.codeClass.equals(2) }) .foreach({ case (json, resp) => logger.error(s"Failure sending message $json. Response is ${resp.advanced().getPayload()}, " + s"Code ${resp.getCode.toString}") throw new ConnectException(s"Failure sending message $json. Response is ${resp.advanced().getPayload()}, " + s"Code ${resp.getCode.toString}") })) handleTry(responses) } def stop(): CoapResponse = delete() } object CoapWriter { def apply(setting: CoapSetting): CoapWriter = new CoapWriter(setting) }
Example 21
Source File: CoapSinkTask.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.coap.sink import java.util import com.datamountaineer.streamreactor.connect.coap.configs.{CoapConstants, CoapSettings, CoapSinkConfig} import com.datamountaineer.streamreactor.connect.errors.ErrorPolicyEnum import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter} import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.clients.consumer.OffsetAndMetadata import org.apache.kafka.common.TopicPartition import org.apache.kafka.connect.sink.{SinkRecord, SinkTask} import scala.collection.JavaConverters._ import scala.collection.mutable class CoapSinkTask extends SinkTask with StrictLogging { private val writers = mutable.Map.empty[String, CoapWriter] private val progressCounter = new ProgressCounter private var enableProgress: Boolean = false private val manifest = JarManifest(getClass.getProtectionDomain.getCodeSource.getLocation) override def start(props: util.Map[String, String]): Unit = { logger.info(scala.io.Source.fromInputStream(getClass.getResourceAsStream("/coap-sink-ascii.txt")).mkString + s" $version") logger.info(manifest.printManifest()) val conf = if (context.configs().isEmpty) props else context.configs() val sinkConfig = CoapSinkConfig(conf) enableProgress = sinkConfig.getBoolean(CoapConstants.PROGRESS_COUNTER_ENABLED) val settings = CoapSettings(sinkConfig) //if error policy is retry set retry interval if (settings.head.errorPolicy.getOrElse(ErrorPolicyEnum.THROW).equals(ErrorPolicyEnum.RETRY)) { context.timeout(sinkConfig.getString(CoapConstants.ERROR_RETRY_INTERVAL).toLong) } settings.map(s => (s.kcql.getSource, CoapWriter(s))).map({ case (k, v) => writers.put(k, v) }) } override def put(records: util.Collection[SinkRecord]): Unit = { records.asScala.map(r => writers(r.topic()).write(List(r))) val seq = records.asScala.toVector if (enableProgress) { progressCounter.update(seq) } } override def stop(): Unit = { writers.foreach({ case (t, w) => logger.info(s"Shutting down writer for $t") w.stop() }) progressCounter.empty } override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {} override def version: String = manifest.version() }
Example 22
Source File: TwitterSinkTask.scala From kafka-tweet-producer with Apache License 2.0 | 5 votes |
package com.eneco.trading.kafka.connect.twitter import java.util import org.apache.kafka.clients.consumer.OffsetAndMetadata import org.apache.kafka.common.TopicPartition import org.apache.kafka.connect.sink.{SinkRecord, SinkTask} import scala.collection.JavaConverters._ import scala.util.{Success, Failure} class TwitterSinkTask extends SinkTask with Logging { var writer: Option[SimpleTwitterWriter] = None override def start(props: util.Map[String, String]): Unit = { val sinkConfig = new TwitterSinkConfig(props) writer = Some(new TwitterWriter( sinkConfig.getString(TwitterSinkConfig.CONSUMER_KEY_CONFIG), sinkConfig.getPassword(TwitterSinkConfig.CONSUMER_SECRET_CONFIG).value, sinkConfig.getString(TwitterSinkConfig.TOKEN_CONFIG), sinkConfig.getPassword(TwitterSinkConfig.SECRET_CONFIG).value)) } override def put(records: util.Collection[SinkRecord]): Unit = records.asScala .map(_.value.toString) .map(text => (text, writer match { case Some(writer) => writer.updateStatus(text) case None => Failure(new IllegalStateException("twitter writer is not set")) })) .foreach { case (text, result) => result match { case Success(id) => log.info(s"successfully tweeted `${text}`; got assigned id ${id}") case Failure(err) => log.warn(s"tweeting `${text}` failed: ${err.getMessage}") } } override def stop(): Unit = { } override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]) = { } override def version(): String = "" }
Example 23
Source File: TestSinkTask.scala From kafka-tweet-producer with Apache License 2.0 | 5 votes |
package com.eneco.trading.kafka.connect.twitter import org.apache.kafka.connect.sink.SinkRecord import scala.collection.JavaConverters._ import scala.util.{Success, Try} class TestSinkTask extends TestTwitterBase { test("Strings put to to Task are tweeted") { val sinkTask = new TwitterSinkTask() val myTestTweet = "I tweet, ergo sum." sinkTask.writer = Some(new SimpleTwitterWriter { //TODO: use DI? def updateStatus(s: String): Try[Long] = { s shouldEqual myTestTweet Success(5) } }) val sr = new SinkRecord("topic", 5, null, null, null, myTestTweet, 123) sinkTask.put(Seq(sr).asJava) } }
Example 24
Source File: TwitterSinkTask.scala From kafka-connect-twitter with Apache License 2.0 | 5 votes |
package com.eneco.trading.kafka.connect.twitter import java.util import org.apache.kafka.clients.consumer.OffsetAndMetadata import org.apache.kafka.common.TopicPartition import org.apache.kafka.connect.sink.{SinkRecord, SinkTask} import scala.collection.JavaConverters._ import scala.util.{Success, Failure} class TwitterSinkTask extends SinkTask with Logging { var writer: Option[SimpleTwitterWriter] = None override def start(props: util.Map[String, String]): Unit = { val sinkConfig = new TwitterSinkConfig(props) writer = Some(new TwitterWriter( sinkConfig.getString(TwitterSinkConfig.CONSUMER_KEY_CONFIG), sinkConfig.getPassword(TwitterSinkConfig.CONSUMER_SECRET_CONFIG).value, sinkConfig.getString(TwitterSinkConfig.TOKEN_CONFIG), sinkConfig.getPassword(TwitterSinkConfig.SECRET_CONFIG).value)) } override def put(records: util.Collection[SinkRecord]): Unit = records.asScala .map(_.value.toString) .map(text => (text, writer match { case Some(writer) => writer.updateStatus(text) case None => Failure(new IllegalStateException("twitter writer is not set")) })) .foreach { case (text, result) => result match { case Success(id) => log.info(s"successfully tweeted `${text}`; got assigned id ${id}") case Failure(err) => log.warn(s"tweeting `${text}` failed: ${err.getMessage}") } } override def stop(): Unit = { } override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]) = { } override def version(): String = "" }
Example 25
Source File: TestSinkTask.scala From kafka-connect-twitter with Apache License 2.0 | 5 votes |
package com.eneco.trading.kafka.connect.twitter import org.apache.kafka.connect.sink.SinkRecord import scala.collection.JavaConverters._ import scala.util.{Success, Try} class TestSinkTask extends TestTwitterBase { test("Strings put to to Task are tweeted") { val sinkTask = new TwitterSinkTask() val myTestTweet = "I tweet, ergo sum." sinkTask.writer = Some(new SimpleTwitterWriter { //TODO: use DI? def updateStatus(s: String): Try[Long] = { s shouldEqual myTestTweet Success(5) } }) val sr = new SinkRecord("topic", 5, null, null, null, myTestTweet, 123) sinkTask.put(Seq(sr).asJava) } }
Example 26
Source File: RowKeyBuilderString.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.rowkeys import org.apache.kafka.connect.data.{Schema, Struct} import org.apache.kafka.connect.sink.SinkRecord import scala.collection.JavaConverters._ override def build(record: SinkRecord): String = { val struct = record.value().asInstanceOf[Struct] val schema = struct.schema val availableFields = schema.fields().asScala.map(_.name).toSet val missingKeys = keys.filterNot(availableFields.contains) require(missingKeys.isEmpty, s"${missingKeys.mkString(",")} keys are not present in the SinkRecord payload:${availableFields.mkString(",")}") keys.flatMap { case key => val field = schema.field(key) val value = struct.get(field) require(value != null, s"$key field value is null. Non null value is required for the fileds creating the Hbase row key") if (availableSchemaTypes.contains(field.schema().`type`())) Some(value.toString) else None }.mkString(keyDelimiter) } }
Example 27
Source File: SinkRecordToJson.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.source import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil import com.fasterxml.jackson.databind.ObjectMapper import com.landoop.json.sql.JacksonJson import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.sink.SinkRecord import org.json4s.jackson.JsonMethods._ import scala.util.Try object SinkRecordToJson extends ConverterUtil { private val mapper = new ObjectMapper() def apply(record: SinkRecord, fields: Map[String, Map[String, String]], ignoreFields: Map[String, Set[String]]): String = { val schema = record.valueSchema() val value = record.value() if (schema == null) { if(value == null){ throw new IllegalArgumentException(s"The sink record value is null.(topic=${record.topic()} partition=${record.kafkaPartition()} offset=${record.kafkaOffset()})".stripMargin) } //try to take it as string value match { case map: java.util.Map[_, _] => val extracted = convertSchemalessJson(record, fields.getOrElse(record.topic(), Map.empty), ignoreFields.getOrElse(record.topic(), Set.empty)) .asInstanceOf[java.util.Map[String, Any]] //not ideal; but the implementation is hashmap anyway mapper.writeValueAsString(extracted) case other => sys.error( s""" |For schemaless record only String and Map types are supported. Class =${Option(other).map(_.getClass.getCanonicalName).getOrElse("unknown(null value)}")} |Record info: |topic=${record.topic()} partition=${record.kafkaPartition()} offset=${record.kafkaOffset()} |${Try(JacksonJson.toJson(value)).getOrElse("")}""".stripMargin) } } else { schema.`type`() match { case Schema.Type.STRING => val extracted = convertStringSchemaAndJson(record, fields.getOrElse(record.topic(), Map.empty), ignoreFields.getOrElse(record.topic(), Set.empty)) compact(render(extracted)) case Schema.Type.STRUCT => val extracted = convert(record, fields.getOrElse(record.topic(), Map.empty), ignoreFields.getOrElse(record.topic(), Set.empty)) simpleJsonConverter.fromConnectData(extracted.valueSchema(), extracted.value()).toString case other => sys.error(s"$other schema is not supported") } } } }
Example 28
Source File: AvroConverter.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.sink import com.datamountaineer.streamreactor.connect.converters.MsgKey import io.confluent.connect.avro.AvroData import java.io.ByteArrayOutputStream import java.io.File import org.apache.avro.{Schema => AvroSchema} import org.apache.avro.generic.GenericRecord import org.apache.avro.io.EncoderFactory import org.apache.avro.reflect.ReflectDatumWriter import org.apache.kafka.connect.sink.SinkRecord import org.apache.zookeeper.server.quorum.QuorumPeerConfig.ConfigException class AvroConverter extends Converter { private val avroData = new AvroData(8) private var sinkToSchemaMap: Map[String, AvroSchema] = Map.empty private var avroWritersMap: Map[String, ReflectDatumWriter[Object]] = Map.empty override def convert(sinkTopic: String, data: SinkRecord): SinkRecord = { Option(data) match { case None => new SinkRecord( sinkTopic, 0, null, null, avroData.toConnectSchema(sinkToSchemaMap(sinkTopic)), null, 0 ) case Some(_) => val kafkaTopic = data.topic() val writer = avroWritersMap.getOrElse(kafkaTopic.toLowerCase, throw new ConfigException(s"Invalid ${AvroConverter.SCHEMA_CONFIG} is not configured for $kafkaTopic")) val output = new ByteArrayOutputStream(); val decoder = EncoderFactory.get().binaryEncoder(output, null) output.reset() val avro = avroData.fromConnectData(data.valueSchema(), data.value()) avro.asInstanceOf[GenericRecord] val record = writer.write(avro, decoder) decoder.flush() val arr = output.toByteArray new SinkRecord( kafkaTopic, data.kafkaPartition(), MsgKey.schema, MsgKey.getStruct(sinkTopic, data.key().toString()), data.valueSchema(), arr, 0 ) } } override def initialize(config: Map[String, String]): Unit = { sinkToSchemaMap = AvroConverter.getSchemas(config) avroWritersMap = sinkToSchemaMap.map { case (key, schema) => key -> new ReflectDatumWriter[Object](schema) } } } object AvroConverter { val SCHEMA_CONFIG = "connect.converter.avro.schemas" def getSchemas(config: Map[String, String]): Map[String, AvroSchema] = { config.getOrElse(SCHEMA_CONFIG, throw new ConfigException(s"$SCHEMA_CONFIG is not provided")) .toString .split(';') .filter(_.trim.nonEmpty) .map(_.split("=")) .map { case Array(sink, path) => val file = new File(path) if (!file.exists()) { throw new ConfigException(s"Invalid $SCHEMA_CONFIG. The file $path doesn't exist!") } val s = sink.trim.toLowerCase() if (s.isEmpty) { throw new ConfigException(s"Invalid $SCHEMA_CONFIG. The topic is not valid for entry containing $path") } s -> new AvroSchema.Parser().parse(file) case other => throw new ConfigException(s"$SCHEMA_CONFIG is not properly set. The format is Mqtt_Sink->AVRO_FILE") }.toMap } }
Example 29
Source File: BytesConverter.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.sink import com.datamountaineer.streamreactor.connect.converters.MsgKey import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.sink.SinkRecord class BytesConverter extends Converter { override def convert(sinkTopic: String, data: SinkRecord): SinkRecord = { Option(data) match { case None => new SinkRecord( sinkTopic, 0, null, null, Schema.BYTES_SCHEMA, null, 0 ) case Some(_) => new SinkRecord( data.topic(), data.kafkaPartition(), MsgKey.schema, MsgKey.getStruct(sinkTopic, data.key().toString()), Schema.BYTES_SCHEMA, data.value(), 0 ) } } }
Example 30
Source File: StringSinkRecordKeyBuilderTest.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.sink import com.datamountaineer.streamreactor.connect.rowkeys.StringSinkRecordKeyBuilder import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.sink.SinkRecord import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class StringSinkRecordKeyBuilderTest extends AnyWordSpec with Matchers { val keyRowKeyBuilder = new StringSinkRecordKeyBuilder() "SinkRecordKeyStringKeyBuilder" should { "create the right key from the Schema key value - Byte" in { val b = 123.toByte val sinkRecord = new SinkRecord("", 1, Schema.INT8_SCHEMA, b, Schema.FLOAT64_SCHEMA, Nil, 0) keyRowKeyBuilder.build(sinkRecord) shouldBe "123" } "create the right key from the Schema key value - String" in { val s = "somekey" val sinkRecord = new SinkRecord("", 1, Schema.STRING_SCHEMA, s, Schema.FLOAT64_SCHEMA, Nil, 0) keyRowKeyBuilder.build(sinkRecord) shouldBe s } "create the right key from the Schema key value - Bytes" in { val bArray = Array(23.toByte, 24.toByte, 242.toByte) val sinkRecord = new SinkRecord("", 1, Schema.BYTES_SCHEMA, bArray, Schema.FLOAT64_SCHEMA, Nil, 0) keyRowKeyBuilder.build(sinkRecord) shouldBe bArray.toString } "create the right key from the Schema key value - Boolean" in { val bool = true val sinkRecord = new SinkRecord("", 1, Schema.BOOLEAN_SCHEMA, bool, Schema.FLOAT64_SCHEMA, Nil, 0) keyRowKeyBuilder.build(sinkRecord) shouldBe "true" } } }
Example 31
Source File: StringGenericRowKeyBuilderTest.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.sink import com.datamountaineer.streamreactor.connect.rowkeys.StringGenericRowKeyBuilder import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.sink.SinkRecord import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class StringGenericRowKeyBuilderTest extends AnyWordSpec with Matchers { "StringGenericRowKeyBuilder" should { "use the topic, partition and offset to make the key" in { val topic = "sometopic" val partition = 2 val offset = 1243L val sinkRecord = new SinkRecord(topic, partition, Schema.INT32_SCHEMA, 345, Schema.STRING_SCHEMA, "", offset) val keyBuilder = new StringGenericRowKeyBuilder() val expected = Seq(topic, partition, offset).mkString("|") keyBuilder.build(sinkRecord) shouldBe expected } } }
Example 32
Source File: StringStructFieldsStringKeyBuilderTest.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.sink import com.datamountaineer.streamreactor.connect.rowkeys.StringStructFieldsStringKeyBuilder import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class StringStructFieldsStringKeyBuilderTest extends AnyWordSpec with Matchers { "StructFieldsStringKeyBuilder" should { "raise an exception if the field is not present in the struct" in { intercept[IllegalArgumentException] { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StringStructFieldsStringKeyBuilder(Seq("threshold")).build(sinkRecord) } } "create the row key based on one single field in the struct" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StringStructFieldsStringKeyBuilder(Seq("firstName")).build(sinkRecord) shouldBe "Alex" } "create the row key based on one single field with doc in the struct" in { val firstNameSchema = SchemaBuilder.`type`(Schema.Type.STRING).doc("first name") val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", firstNameSchema) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StringStructFieldsStringKeyBuilder(Seq("firstName")).build(sinkRecord) shouldBe "Alex" } "create the row key based on more thant one field in the struct" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StringStructFieldsStringKeyBuilder(Seq("firstName", "age")).build(sinkRecord) shouldBe "Alex.30" } } }
Example 33
Source File: BytesConverterTest.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.converters.sink import com.datamountaineer.streamreactor.connect.converters.MsgKey import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.sink.SinkRecord import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class BytesConverterTest extends AnyWordSpec with Matchers { private val converter = new BytesConverter() private val topic = "topicA" "Sink BytesConverter" should { "handle null payloads" in { val sinkRecord = converter.convert(topic, null) sinkRecord.keySchema() shouldBe null sinkRecord.key() shouldBe null sinkRecord.valueSchema() shouldBe Schema.BYTES_SCHEMA sinkRecord.value() shouldBe null } "handle non-null payloads" in { val expectedPayload: Array[Byte] = Array(245, 2, 10, 200, 22, 0, 0, 11).map(_.toByte) val data = new SinkRecord(topic, 0, null, "keyA", null, expectedPayload, 0) val sinkRecord = converter.convert(topic, data) sinkRecord.keySchema() shouldBe MsgKey.schema sinkRecord.key() shouldBe MsgKey.getStruct("topicA", "keyA") sinkRecord.valueSchema() shouldBe Schema.BYTES_SCHEMA sinkRecord.value() shouldBe expectedPayload } } }
Example 34
Source File: TestConsoleSinkTask.scala From ohara with Apache License 2.0 | 5 votes |
package oharastream.ohara.connector.console import java.util.concurrent.TimeUnit import oharastream.ohara.common.data.Row import oharastream.ohara.common.rule.OharaTest import oharastream.ohara.common.setting.{ConnectorKey, TopicKey} import oharastream.ohara.common.util.CommonUtils import oharastream.ohara.kafka.connector.json.ConnectorDefUtils import org.apache.kafka.connect.sink.SinkRecord import org.junit.Test import org.scalatest.matchers.should.Matchers._ import scala.concurrent.duration.Duration import scala.jdk.CollectionConverters._ class TestConsoleSinkTask extends OharaTest { private[this] val connectorKey = ConnectorKey.of("group", "TestConsoleSinkTask") private[this] def configs(key: String, value: String): java.util.Map[String, String] = Map( ConnectorDefUtils.CONNECTOR_KEY_DEFINITION.key() -> ConnectorKey.toJsonString(connectorKey), ConnectorDefUtils.CONNECTOR_NAME_DEFINITION.key() -> CommonUtils.randomString(), key -> value ).asJava @Test def testEmptySetting(): Unit = { val task = new ConsoleSinkTask() task.start( Map( ConnectorDefUtils.CONNECTOR_KEY_DEFINITION.key() -> ConnectorKey.toJsonString(connectorKey), ConnectorDefUtils.CONNECTOR_NAME_DEFINITION.key() -> CommonUtils.randomString() ).asJava ) task.freq shouldBe CONSOLE_FREQUENCE_DEFAULT task.divider shouldBe CONSOLE_ROW_DIVIDER_DEFAULT } @Test def testFrequence(): Unit = { val task = new ConsoleSinkTask() task.start(configs(CONSOLE_FREQUENCE, "20 seconds")) task.freq shouldBe Duration(20, TimeUnit.SECONDS) } @Test def testDivider(): Unit = { val task = new ConsoleSinkTask() val divider = CommonUtils.randomString() task.start(configs(CONSOLE_ROW_DIVIDER, divider)) task.divider shouldBe divider } @Test def testPrint(): Unit = { val task = new ConsoleSinkTask() task.start(configs(CONSOLE_FREQUENCE, "2 seconds")) task.lastLog shouldBe -1 task.put(java.util.List.of()) task.lastLog shouldBe -1 putRecord(task) val lastLogCopy1 = task.lastLog lastLogCopy1 should not be -1 TimeUnit.SECONDS.sleep(1) putRecord(task) val lastLogCopy2 = task.lastLog lastLogCopy2 shouldBe lastLogCopy1 TimeUnit.SECONDS.sleep(1) putRecord(task) val lastLogCopy3 = task.lastLog lastLogCopy3 should not be lastLogCopy2 lastLogCopy3 should not be -1 } private[this] def putRecord(task: ConsoleSinkTask): Unit = task.put( java.util.List.of( new SinkRecord( TopicKey.of("g", "n").topicNameOnKafka(), 1, null, Row.EMPTY, null, null, 1 ) ) ) }
Example 35
Source File: MapMessageConverter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.jms.sink.converters import com.datamountaineer.streamreactor.connect.jms.config.JMSSetting import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil import javax.jms.{MapMessage, Session} import org.apache.kafka.connect.data.{Schema, Struct} import org.apache.kafka.connect.sink.SinkRecord import scala.collection.JavaConverters._ class MapMessageConverter extends JMSMessageConverter with ConverterUtil { override def convert(record: SinkRecord, session: Session, setting: JMSSetting): (String, MapMessage) = { val converted = super[ConverterUtil].convert(record, setting.fields, setting.ignoreField) val msg = session.createMapMessage() val value = converted.value() val schema = converted.valueSchema() schema.`type`() match { case Schema.Type.STRUCT => val struct = value.asInstanceOf[Struct] struct.schema().fields().asScala.foreach { f => MapMessageBuilderFn(f.name(), struct.get(f), f.schema(), msg, session) } case _ => MapMessageBuilderFn("field", value, schema, msg, session) } (setting.source, msg) } } object MapMessageBuilderFn { def apply(fieldName: String, value: AnyRef, schema: Schema, msg: MapMessage, session: Session): Unit = { schema.`type`() match { case Schema.Type.BYTES => msg.setBytes(fieldName, value.asInstanceOf[Array[Byte]]) case Schema.Type.BOOLEAN => msg.setBoolean(fieldName, value.asInstanceOf[Boolean]) case Schema.Type.FLOAT32 => msg.setFloat(fieldName, value.asInstanceOf[Float]) case Schema.Type.FLOAT64 => msg.setDouble(fieldName, value.asInstanceOf[Double]) case Schema.Type.INT8 => msg.setByte(fieldName, value.asInstanceOf[Byte]) case Schema.Type.INT16 => msg.setShort(fieldName, value.asInstanceOf[Short]) case Schema.Type.INT32 => msg.setInt(fieldName, value.asInstanceOf[Int]) case Schema.Type.INT64 => msg.setLong(fieldName, value.asInstanceOf[Long]) case Schema.Type.STRING => msg.setString(fieldName, value.asInstanceOf[String]) case Schema.Type.MAP => msg.setObject(fieldName, value) case Schema.Type.ARRAY => msg.setObject(fieldName, value) case Schema.Type.STRUCT => val nestedMsg = session.createMapMessage() val struct = value.asInstanceOf[Struct] struct.schema().fields().asScala.foreach { f => MapMessageBuilderFn(f.name(), struct.get(f), f.schema(), nestedMsg, session) } msg.setObject(fieldName, nestedMsg) } } }
Example 36
Source File: CassandraSinkTaskSpec.scala From kafka-connect-cassandra with Apache License 2.0 | 5 votes |
package com.tuplejump.kafka.connect.cassandra import scala.collection.JavaConverters._ import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.{SinkRecord, SinkTaskContext} class CassandraSinkTaskSpec extends AbstractFlatSpec { val topicName = "test_kv_topic" val tableName = "test.kv" val config = sinkProperties(Map(topicName -> tableName)) it should "start sink task" in { val sinkTask = new CassandraSinkTask() val mockContext = mock[SinkTaskContext] sinkTask.initialize(mockContext) sinkTask.start(config.asJava) sinkTask.stop() } it should "save records in cassandra" in { val sinkTask = new CassandraSinkTask() val mockContext = mock[SinkTaskContext] sinkTask.initialize(mockContext) sinkTask.start(config.asJava) val valueSchema = SchemaBuilder.struct.name("record").version(1) .field("key", Schema.STRING_SCHEMA) .field("value", Schema.INT32_SCHEMA).build val value1 = new Struct(valueSchema).put("key", "pqr").put("value", 15) val value2 = new Struct(valueSchema).put("key", "abc").put("value", 17) val record1 = new SinkRecord(topicName, 1, SchemaBuilder.struct.build, "key", valueSchema, value1, 0) val record2 = new SinkRecord(topicName, 1, SchemaBuilder.struct.build, "key", valueSchema, value2, 0) sinkTask.put(List(record1, record2).asJavaCollection) sinkTask.stop() val cc = CassandraCluster.local val session = cc.session val result = session.execute(s"select count(1) from $tableName").one() val rowCount = result.getLong(0) rowCount should be(2) cc.shutdown() } }
Example 37
Source File: SchemaSpec.scala From kafka-connect-cassandra with Apache License 2.0 | 5 votes |
package com.tuplejump.kafka.connect.cassandra import com.datastax.driver.core.{ DataType, TestUtil} import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord class SchemaSpec extends AbstractFlatSpec { it should "convert a struct schema with single field" in { val topic = "topicx" val sc = sinkConfig(topic, "keyspacex", "tablex", List("id")) sc.options.consistency should be (TaskConfig.DefaultSinkConsistency) sc.schema.columnNames should === (List("id")) sc.query.cql should be ("INSERT INTO keyspacex.tablex(id) VALUES(?)") val schema = SchemaBuilder.struct.name("record").version(1).field("id", Schema.INT32_SCHEMA).build val value = new Struct(schema).put("id", 1) val record = new SinkRecord(topic, 1, SchemaBuilder.struct.build, "key", schema, value, 0) sc.schema.route.topic should be (record.topic) sc.schema.route.keyspace should be ("keyspacex") sc.schema.route.table should be ("tablex") sc.schema is record should be (true) val query = record.as(sc.schema.namespace) query.cql should be("INSERT INTO keyspacex.tablex(id) VALUES(1)") } it should "convert a struct schema with multiple fields" in { val topic = "test_kfk" val sc = sinkConfig(topic, "keyspacex", "tablex", List("available", "name", "age")) val schema = SchemaBuilder.struct.name("record").version(1) .field("available", Schema.BOOLEAN_SCHEMA) .field("name", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA).build val value = new Struct(schema).put("name", "user").put("available", false).put("age", 15) val record = new SinkRecord("test_kfk", 1, SchemaBuilder.struct.build, "key", schema, value, 0) schema.asColumnNames should be (sc.schema.columnNames) sc.schema.route.topic should be (record.topic) sc.schema is record should be (true) sc.query.cql should be ("INSERT INTO keyspacex.tablex(available,name,age) VALUES(?,?,?)") val query = record.as(sc.schema.namespace) query.cql should be("INSERT INTO keyspacex.tablex(available,name,age) VALUES(false,'user',15)") } it should "convert cassandra column defs to a source schema" in { val colDef = Map( "id" -> DataType.cint(), "name" -> DataType.varchar()) val columns = TestUtil.getColumnDef(colDef) val expectedSchema = SchemaBuilder.struct() .field("id", Schema.INT32_SCHEMA) .field("name", Schema.STRING_SCHEMA).build() columns.asSchema should be(expectedSchema) } it should "convert kafka schema and struct to cassandra columns and schema mapping" in { import scala.collection.JavaConverters._ val topic = "a" val route = InternalConfig.Route(TaskConfig.SinkRoute + topic, "ks1.t1").get val schemaMap = new InternalConfig.Schema(route, Nil, Nil, Nil, List("available","name","age"), "") val schema = SchemaBuilder.struct.name("record").version(1) .field("available", Schema.BOOLEAN_SCHEMA) .field("name", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA).build val struct = new Struct(schema).put("name", "user").put("available", false).put("age", 15) val record = new SinkRecord(topic, 1, SchemaBuilder.struct.build, "key", schema, value, 0) schema.asColumnNames should ===(schemaMap.columnNames) schemaMap.columnNames should ===(schema.fields.asScala.map(_.name).toList) schemaMap is record should be (true) } }
Example 38
Source File: IotHubSinkTask.scala From toketi-kafka-connect-iothub with MIT License | 5 votes |
package com.microsoft.azure.iot.kafka.connect.sink import java.util import com.microsoft.azure.iot.kafka.connect.source.JsonSerialization import com.microsoft.azure.sdk.iot.service.{DeliveryAcknowledgement, Message} import com.typesafe.scalalogging.LazyLogging import org.apache.kafka.clients.consumer.OffsetAndMetadata import org.apache.kafka.common.TopicPartition import org.apache.kafka.connect.sink.{SinkRecord, SinkTask} import scala.collection.JavaConverters._ class IotHubSinkTask extends SinkTask with LazyLogging with JsonSerialization { // Protected for testing purposes protected var messageSender : Option[MessageSender] = None protected var acknowledgement : DeliveryAcknowledgement = DeliveryAcknowledgement.None private[this] var isClosing : Boolean = false override def stop(): Unit = { logger.info("Stopping IotHubSink Task") if (this.messageSender.isDefined && !this.isClosing) { this.messageSender.synchronized { if (!this.isClosing) { this.isClosing = true logger.info("Closing IotHub clients") this.messageSender.get.close() } } } } override def put(records: util.Collection[SinkRecord]): Unit = { if (this.messageSender.isDefined && !this.isClosing) { this.messageSender.synchronized { if (!this.isClosing) { logger.info(s"Received ${records.size()} messages to be sent to devices. ") for (record: SinkRecord ← records.asScala) { val c2DMessage = C2DMessageConverter.validateSchemaAndGetMessage(record) this.sendMessage(c2DMessage) } logger.info(s"Started tasks to send ${records.size()} messages to devices.") } } } else { logger.info(s"Unable to send messages to devices - MessageSender is undefined " + s"= ${messageSender.isEmpty.toString}, isClosing = ${this.isClosing.toString}") } } private def sendMessage(c2DMessage: C2DMessage): Unit = { logger.info(s"Sending c2d message ${c2DMessage.toString}") val message = new Message(c2DMessage.message) message.setMessageId(c2DMessage.messageId) message.setDeliveryAcknowledgement(acknowledgement) if (c2DMessage.expiryTime.isDefined) { message.setExpiryTimeUtc(c2DMessage.expiryTime.get) } this.messageSender.get.sendMessage(c2DMessage.deviceId, message) } override def flush(offsets: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {} override def start(props: util.Map[String, String]): Unit = { logger.info("Starting IotHub Sink") val connectionString = props.get(IotHubSinkConfig.IotHubConnectionString) this.messageSender = Some(this.getMessageSender(connectionString)) this.acknowledgement = DeliveryAcknowledgement.valueOf(props.get(IotHubSinkConfig.IotHubMessageDeliveryAcknowledgement)) } protected def getMessageSender(connectionString: String): MessageSender = { new IotHubMessageSender(connectionString) } override def version(): String = getClass.getPackage.getImplementationVersion }
Example 39
Source File: GenericSinkTask.scala From kafka-connect-sap with Apache License 2.0 | 5 votes |
package com.sap.kafka.connect.sink import java.util import com.sap.kafka.connect.config.BaseConfig import com.sap.kafka.utils.ConnectorException import org.apache.kafka.clients.consumer.OffsetAndMetadata import org.apache.kafka.common.TopicPartition import org.apache.kafka.connect.sink.{SinkRecord, SinkTask} import org.slf4j.Logger abstract class GenericSinkTask extends SinkTask with SinkWriter { override def put(records: util.Collection[SinkRecord]): Unit = { log.info(s"PHASE - 1 - get records from kafka, Started for task with assigned " + s"partitions ${this.context.assignment().toString} ") log.info(s"Number of Records read for Sink: ${records.size}") retriesLeft = config.maxRetries if (records.isEmpty) { return } val recordsCount: Int = records.size log.trace("Received {} records for Sink", recordsCount) try { writer.write(records) } catch { case exception : ConnectorException => log.error("Write of {} records failed, remainingRetries={}", records.size(), retriesLeft) while (retriesLeft > 0) { try { retriesLeft = retriesLeft - 1 writer.close() writer = initWriter(config) writer.write(records) retriesLeft = -1 } catch { case exception: ConnectorException => // ignore } } if (retriesLeft == 0) throw exception } finally { log.info(s"PHASE - 1 ended for task, with assigned partitions ${this.context.assignment().toString}") } } override def stop(): Unit = { log.info("Stopping task") writer.close() } override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]) : Unit = { } override def version(): String = getClass.getPackage.getImplementationVersion }
Example 40
Source File: BaseWriter.scala From kafka-connect-sap with Apache License 2.0 | 5 votes |
package com.sap.kafka.connect.sink import java.sql.Connection import java.util import org.apache.kafka.connect.sink.SinkRecord import org.slf4j.{Logger, LoggerFactory} abstract class BaseWriter { private val log: Logger = LoggerFactory.getLogger(getClass) private var connection:Connection = null protected[sink] def initializeConnection(): Unit protected[sink] def write(records: util.Collection[SinkRecord]): Unit private[sink] def close(): Unit = { if (connection != null) { try { connection.close() connection = null } catch { case _: Exception => log.warn("Ignoring error closing connection") } } } }
Example 41
Source File: HANAWriter.scala From kafka-connect-sap with Apache License 2.0 | 5 votes |
package com.sap.kafka.connect.sink.hana import java.sql.Connection import java.util import com.google.common.base.Function import com.google.common.collect.Multimaps import com.sap.kafka.client.hana.HANAJdbcClient import com.sap.kafka.connect.config.hana.HANAConfig import com.sap.kafka.connect.sink.BaseWriter import org.apache.kafka.connect.sink.SinkRecord import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConversions._ class HANAWriter(config: HANAConfig, hanaClient: HANAJdbcClient, tableCache: scala.collection.mutable.Map[String, HANASinkRecordsCollector]) extends BaseWriter { private val log: Logger = LoggerFactory.getLogger(getClass) private var connection:Connection = null override def initializeConnection(): Unit = { if(connection == null || connection.isClosed ) { connection = hanaClient.getConnection } else if(!connection.isValid(120)) { connection.close() connection = hanaClient.getConnection } connection.setAutoCommit(false) } override def write(records: util.Collection[SinkRecord]): Unit = { log.info("write records to HANA") log.info("initialize connection to HANA") initializeConnection() val topicMap = Multimaps.index(records, new Function[SinkRecord, String] { override def apply(sinkRecord: SinkRecord) = sinkRecord.topic() }).asMap().toMap for ((topic, recordsPerTopic) <- topicMap) { var table = config.topicProperties(topic).get("table.name").get if (table.contains("${topic}")) { table = table.replace("${topic}", topic) } val recordsCollector: Option[HANASinkRecordsCollector] = tableCache.get(table) recordsCollector match { case None => val tableRecordsCollector = new HANASinkRecordsCollector(table, hanaClient, connection, config) tableCache.put(table, tableRecordsCollector) tableRecordsCollector.add(recordsPerTopic.toSeq) case Some(tableRecordsCollector) => if (config.autoSchemaUpdateOn) { tableRecordsCollector.tableConfigInitialized = false } tableRecordsCollector.add(recordsPerTopic.toSeq) } } flush(tableCache.toMap) log.info("flushing records to HANA successful") } private def flush(tableCache: Map[String, HANASinkRecordsCollector]): Unit = { log.info("flush records into HANA") for ((table, recordsCollector) <- tableCache) { recordsCollector.flush() } hanaClient.commit(connection) } }
Example 42
Source File: DocumentDbWriter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.azure.documentdb.sink import com.datamountaineer.kcql.WriteModeEnum import com.datamountaineer.streamreactor.connect.azure.documentdb.DocumentClientProvider import com.datamountaineer.streamreactor.connect.azure.documentdb.config.{DocumentDbConfig, DocumentDbConfigConstants, DocumentDbSinkSettings} import com.datamountaineer.streamreactor.connect.errors.{ErrorHandler, ErrorPolicyEnum} import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil import com.microsoft.azure.documentdb._ import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.connect.sink.{SinkRecord, SinkTaskContext} import scala.util.Failure private def insert(records: Seq[SinkRecord]) = { try { records.groupBy(_.topic()).foreach { case (_, groupedRecords) => groupedRecords.foreach { record => val (document, keysAndValues) = SinkRecordToDocument(record, settings.keyBuilderMap.getOrElse(record.topic(), Set.empty))(settings) val key = keysAndValues.flatMap { case (_, v) => Option(v) }.mkString(".") if (key.nonEmpty) { document.setId(key) } val config = configMap.getOrElse(record.topic(), sys.error(s"${record.topic()} is not handled by the configuration.")) config.getWriteMode match { case WriteModeEnum.INSERT => documentClient.createDocument(s"dbs/${settings.database}/colls/${config.getTarget}", document, requestOptionsInsert, key.nonEmpty).getResource case WriteModeEnum.UPSERT => documentClient.upsertDocument(s"dbs/${settings.database}/colls/${config.getTarget}", document, requestOptionsInsert, key.nonEmpty).getResource } } } } catch { case t: Throwable => logger.error(s"There was an error inserting the records ${t.getMessage}", t) handleTry(Failure(t)) } } def close(): Unit = { logger.info("Shutting down Document DB writer.") documentClient.close() } } //Factory to build object DocumentDbWriter extends StrictLogging { def apply(connectorConfig: DocumentDbConfig, context: SinkTaskContext): DocumentDbWriter = { implicit val settings = DocumentDbSinkSettings(connectorConfig) //if error policy is retry set retry interval if (settings.errorPolicy.equals(ErrorPolicyEnum.RETRY)) { context.timeout(connectorConfig.getLong(DocumentDbConfigConstants.ERROR_RETRY_INTERVAL_CONFIG)) } logger.info(s"Initialising Document Db writer.") val provider = DocumentClientProvider.get(settings) new DocumentDbWriter(settings, provider) } }
Example 43
Source File: SinkRecordToDocument.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.azure.documentdb.sink import com.datamountaineer.streamreactor.connect.azure.documentdb.config.DocumentDbSinkSettings import com.datamountaineer.streamreactor.connect.azure.documentdb.converters.SinkRecordConverter import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil import com.microsoft.azure.documentdb.Document import org.apache.kafka.connect.data.{Schema, Struct} import org.apache.kafka.connect.sink.SinkRecord object SinkRecordToDocument extends ConverterUtil { def apply(record: SinkRecord, keys: Set[String] = Set.empty)(implicit settings: DocumentDbSinkSettings): (Document, Iterable[(String, Any)]) = { val schema = record.valueSchema() val value = record.value() if (schema == null) { //try to take it as string value match { case _: java.util.Map[_, _] => val fields = settings.fields(record.topic()) val extracted = convertSchemalessJson(record, settings.fields(record.topic()), settings.ignoredField(record.topic())) //not ideal; but the compile is hashmap anyway SinkRecordConverter.fromMap(extracted.asInstanceOf[java.util.Map[String, AnyRef]]) -> keys.headOption.map(_ => KeysExtractor.fromMap(extracted, keys)).getOrElse(Iterable.empty) case _: String => val extracted = convertStringSchemaAndJson(record, settings.fields(record.topic()), settings.ignoredField(record.topic())) SinkRecordConverter.fromJson(extracted) -> keys.headOption.map(_ => KeysExtractor.fromJson(extracted, keys)).getOrElse(Iterable.empty) case _ => sys.error("For schemaless record only String and Map types are supported") } } else { schema.`type`() match { case Schema.Type.STRING => val extracted = convertStringSchemaAndJson(record, settings.fields(record.topic()), settings.ignoredField(record.topic())) SinkRecordConverter.fromJson(extracted) -> keys.headOption.map(_ => KeysExtractor.fromJson(extracted, keys)).getOrElse(Iterable.empty) case Schema.Type.STRUCT => val extracted = convert(record, settings.fields(record.topic()), settings.ignoredField(record.topic())) SinkRecordConverter.fromStruct(extracted) -> keys.headOption.map(_ => KeysExtractor.fromStruct(extracted.value().asInstanceOf[Struct], keys)).getOrElse(Iterable.empty) case other => sys.error(s"$other schema is not supported") } } } }
Example 44
Source File: DocumentDbSinkTask.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.azure.documentdb.sink import java.util import com.datamountaineer.streamreactor.connect.azure.documentdb.DocumentClientProvider import com.datamountaineer.streamreactor.connect.azure.documentdb.config.{DocumentDbConfig, DocumentDbConfigConstants, DocumentDbSinkSettings} import com.datamountaineer.streamreactor.connect.errors.ErrorPolicyEnum import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter} import com.microsoft.azure.documentdb.DocumentClient import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.clients.consumer.OffsetAndMetadata import org.apache.kafka.common.TopicPartition import org.apache.kafka.connect.errors.ConnectException import org.apache.kafka.connect.sink.{SinkRecord, SinkTask} import scala.collection.JavaConverters._ import scala.util.{Failure, Success, Try} override def put(records: util.Collection[SinkRecord]): Unit = { require(writer.nonEmpty, "Writer is not set!") val seq = records.asScala.toVector writer.foreach(w => w.write(seq)) if (enableProgress) { progressCounter.update(seq) } } override def stop(): Unit = { logger.info("Stopping Azure Document DB sink.") writer.foreach(w => w.close()) progressCounter.empty() } override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {} override def version: String = manifest.version() }
Example 45
Source File: SinkRecordToDocumentTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.azure.documentdb.sink import com.datamountaineer.streamreactor.connect.azure.documentdb.Json import com.datamountaineer.streamreactor.connect.azure.documentdb.config.DocumentDbSinkSettings import com.datamountaineer.streamreactor.connect.errors.NoopErrorPolicy import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil import com.microsoft.azure.documentdb.{ConsistencyLevel, Document} import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.sink.SinkRecord import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class SinkRecordToDocumentTest extends AnyWordSpec with Matchers with ConverterUtil { private val connection = "https://accountName.documents.azure.com:443/" "SinkRecordToDocument" should { "convert Kafka Struct to a Azure Document Db Document" in { for (i <- 1 to 4) { val json = scala.io.Source.fromFile(getClass.getResource(s"/transaction$i.json").toURI.getPath).mkString val tx = Json.fromJson[Transaction](json) val record = new SinkRecord("topic1", 0, null, null, Transaction.ConnectSchema, tx.toStruct(), 0) implicit val settings = DocumentDbSinkSettings( connection, "secret", "database", Seq.empty, Map("topic1" -> Set.empty[String]), Map("topic1" -> Map.empty), Map("topic1" -> Set.empty), NoopErrorPolicy(), ConsistencyLevel.Session, false, None) val (document, _) = SinkRecordToDocument(record) val expected = new Document(json) //comparing string representation; we have more specific types given the schema document.toString shouldBe expected.toString } } "convert String Schema + Json payload to a Azure Document DB Document" in { for (i <- 1 to 4) { val json = scala.io.Source.fromFile(getClass.getResource(s"/transaction$i.json").toURI.getPath).mkString val record = new SinkRecord("topic1", 0, null, null, Schema.STRING_SCHEMA, json, 0) implicit val settings = DocumentDbSinkSettings( connection, "secret", "database", Seq.empty, Map("topic1" -> Set.empty[String]), Map("topic1" -> Map.empty), Map("topic1" -> Set.empty), NoopErrorPolicy(), ConsistencyLevel.Session, false, None) val (document, _) = SinkRecordToDocument(record) val expected = new Document(json) //comparing string representation; we have more specific types given the schema document.toString() shouldBe expected.toString } } "convert Schemaless + Json payload to a Azure Document DB Document" in { for (i <- 1 to 4) { val json = scala.io.Source.fromFile(getClass.getResource(s"/transaction$i.json").toURI.getPath).mkString val record = new SinkRecord("topic1", 0, null, null, Schema.STRING_SCHEMA, json, 0) implicit val settings = DocumentDbSinkSettings( connection, "secret", "database", Seq.empty, Map("topic1" -> Set.empty[String]), Map("topic1" -> Map.empty), Map("topic1" -> Set.empty), NoopErrorPolicy(), ConsistencyLevel.Session, false, None) val (document, _) = SinkRecordToDocument(record) val expected = new Document(json) //comparing string representation; we have more specific types given the schema document.toString() shouldBe expected.toString } } } }
Example 46
Source File: MongoSinkTask.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.mongodb.sink import java.util import com.datamountaineer.streamreactor.connect.mongodb.config.{MongoConfig, MongoConfigConstants} import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter} import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.clients.consumer.OffsetAndMetadata import org.apache.kafka.common.TopicPartition import org.apache.kafka.connect.errors.ConnectException import org.apache.kafka.connect.sink.{SinkRecord, SinkTask} import scala.collection.JavaConverters._ import scala.util.{Failure, Success, Try} override def put(records: util.Collection[SinkRecord]): Unit = { require(writer.nonEmpty, "Writer is not set!") val seq = records.asScala.toVector writer.foreach(w => w.write(seq)) if (enableProgress) { progressCounter.update(seq) } } override def stop(): Unit = { logger.info("Stopping Mongo Database sink.") writer.foreach(w => w.close()) progressCounter.empty } override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {} override def version: String = manifest.version() }
Example 47
Source File: SinkRecordToDocument.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.mongodb.sink import com.datamountaineer.streamreactor.connect.mongodb.config.MongoSettings import com.datamountaineer.streamreactor.connect.mongodb.converters.SinkRecordConverter import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil import org.apache.kafka.connect.data.{Schema, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.bson.Document object SinkRecordToDocument extends ConverterUtil { def apply(record: SinkRecord, keys: Set[String] = Set.empty)(implicit settings: MongoSettings): (Document, Iterable[(String, Any)]) = { val schema = record.valueSchema() val value = record.value() val fields = settings.fields.getOrElse(record.topic(), Map.empty) val allFields = if (fields.size == 1 && fields.head._1 == "*") true else false if (schema == null) { //try to take it as string value match { case _: java.util.Map[_, _] => val extracted = convertSchemalessJson( record, fields, settings.ignoredField.getOrElse(record.topic(), Set.empty) ) //not ideal; but the compile is hashmap anyway SinkRecordConverter.fromMap(extracted.asInstanceOf[java.util.Map[String, AnyRef]]) -> keys.headOption.map(_ => KeysExtractor.fromMap(extracted, keys)).getOrElse(Iterable.empty) case _ => sys.error("For schemaless record only String and Map types are supported") } } else { schema.`type`() match { case Schema.Type.STRING => val extracted = convertStringSchemaAndJson( record, fields, settings.ignoredField.getOrElse(record.topic(), Set.empty), includeAllFields = allFields) SinkRecordConverter.fromJson(extracted) -> keys.headOption.map(_ => KeysExtractor.fromJson(extracted, keys)).getOrElse(Iterable.empty) case Schema.Type.STRUCT => val extracted = convert( record, fields, settings.ignoredField.getOrElse(record.topic(), Set.empty) ) SinkRecordConverter.fromStruct(extracted) -> keys.headOption.map(_ => KeysExtractor.fromStruct(extracted.value().asInstanceOf[Struct], keys)).getOrElse(Iterable.empty) case other => sys.error(s"$other schema is not supported") } } } }
Example 48
Source File: CassandraSinkTask.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.cassandra.sink import java.util import com.datamountaineer.streamreactor.connect.cassandra.config.{CassandraConfigSink, CassandraSettings} import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter} import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.clients.consumer.OffsetAndMetadata import org.apache.kafka.common.TopicPartition import org.apache.kafka.connect.errors.ConnectException import org.apache.kafka.connect.sink.{SinkRecord, SinkTask} import scala.collection.JavaConverters._ import scala.util.{Failure, Success, Try} override def stop(): Unit = { logger.info("Stopping Cassandra sink.") writer.foreach(w => w.close()) if (enableProgress) { progressCounter.empty } } override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {} override def version: String = manifest.version() }
Example 49
Source File: JMSWriter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.jms.sink.writer import com.datamountaineer.streamreactor.connect.errors.ErrorHandler import com.datamountaineer.streamreactor.connect.jms.JMSSessionProvider import com.datamountaineer.streamreactor.connect.jms.config.{JMSSetting, JMSSettings} import com.datamountaineer.streamreactor.connect.jms.sink.converters.{JMSHeadersConverterWrapper, JMSMessageConverter, JMSMessageConverterFn} import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil import com.typesafe.scalalogging.StrictLogging import javax.jms._ import org.apache.kafka.connect.sink.SinkRecord import scala.util.{Failure, Success, Try} case class JMSWriter(settings: JMSSettings) extends AutoCloseable with ConverterUtil with ErrorHandler with StrictLogging { val provider = JMSSessionProvider(settings, sink = true) provider.start() val producers: Map[String, MessageProducer] = provider.queueProducers ++ provider.topicProducers val converterMap: Map[String, JMSMessageConverter] = settings.settings .map(s => (s.source, JMSHeadersConverterWrapper(s.headers, JMSMessageConverterFn(s.format)))).toMap val settingsMap: Map[String, JMSSetting] = settings.settings.map(s => (s.source, s)).toMap //initialize error tracker initialize(settings.retries, settings.errorPolicy) def send(messages: Seq[(String, Message)]): Unit = { messages.foreach({ case (name, message) => producers(name).send(message)}) } override def close(): Unit = provider.close() }
Example 50
Source File: RedisCache.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.redis.sink.writer import com.datamountaineer.kcql.Kcql import com.datamountaineer.streamreactor.connect.redis.sink.config.{RedisKCQLSetting, RedisSinkSettings} import org.apache.kafka.connect.sink.SinkRecord import scala.collection.JavaConverters._ import scala.util.Try class RedisCache(sinkSettings: RedisSinkSettings) extends RedisWriter { val configs: Set[Kcql] = sinkSettings.kcqlSettings.map(_.kcqlConfig) configs.foreach { c => assert(c.getSource.trim.length > 0, "You need to supply a valid source kafka topic to fetch records from. Review your KCQL syntax") assert(c.getPrimaryKeys.asScala.nonEmpty, "The Redis CACHE mode requires at least 1 PK (Primary Key) to be defined") assert(c.getStoredAs == null, "The Redis CACHE mode does not support STOREAS") } // Write a sequence of SinkRecords to Redis override def write(records: Seq[SinkRecord]): Unit = { if (records.isEmpty) { logger.debug("No records received on 'Cache' Redis writer") } else { logger.debug(s"'Cache' Redis writer received ${records.size} records") insert(records.groupBy(_.topic)) } } // Insert a batch of sink records def insert(records: Map[String, Seq[SinkRecord]]): Unit = { records.foreach { case (topic, sinkRecords) => { val topicSettings: Set[RedisKCQLSetting] = sinkSettings.kcqlSettings.filter(_.kcqlConfig.getSource == topic) if (topicSettings.isEmpty) logger.warn(s"Received a batch for topic $topic - but no KCQL supports it") //pass try to error handler and try val t = Try( { sinkRecords.foreach { record => topicSettings.map { KCQL => // We can prefix the name of the <KEY> using the target val optionalPrefix = if (Option(KCQL.kcqlConfig.getTarget).isEmpty) "" else KCQL.kcqlConfig.getTarget.trim // Use first primary key's value and (optional) prefix val pkDelimiter = sinkSettings.pkDelimiter val keyBuilder = RedisFieldsKeyBuilder(KCQL.kcqlConfig.getPrimaryKeys.asScala.map(_.toString), pkDelimiter) val extracted = convert(record, fields = KCQL.fieldsAndAliases, ignoreFields = KCQL.ignoredFields) val key = optionalPrefix + keyBuilder.build(record) val payload = convertValueToJson(extracted).toString val ttl = KCQL.kcqlConfig.getTTL if (ttl <= 0) { jedis.set(key, payload) } else { jedis.setex(key, ttl.toInt, payload) } } } }) handleTry(t) } logger.debug(s"Wrote ${sinkRecords.size} rows for topic $topic") } } }
Example 51
Source File: ObjectMessageConverter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.jms.sink.converters import com.datamountaineer.streamreactor.connect.jms.config.JMSSetting import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil import javax.jms.{ObjectMessage, Session} import org.apache.kafka.connect.data.{Schema, Struct} import org.apache.kafka.connect.sink.SinkRecord import scala.collection.JavaConverters._ class ObjectMessageConverter extends JMSMessageConverter with ConverterUtil { override def convert(record: SinkRecord, session: Session, setting: JMSSetting): (String, ObjectMessage) = { val converted = super[ConverterUtil].convert(record, setting.fields, setting.ignoreField) val msg = session.createObjectMessage() val value = converted.value() val schema = converted.valueSchema() schema.`type`() match { case Schema.Type.STRUCT => val struct = value.asInstanceOf[Struct] struct.schema().fields().asScala.foreach { f => ObjectMessageConverterFn(f.name(), struct.get(f), f.schema(), msg, session) } case _ => ObjectMessageConverterFn("field", value, schema, msg, session) } (setting.source, msg) } } object ObjectMessageConverterFn { def apply(fieldName: String, value: AnyRef, schema: Schema, msg: ObjectMessage, session: Session): Unit = { schema.`type`() match { case Schema.Type.BYTES => msg.setObjectProperty(fieldName, value.asInstanceOf[Array[Byte]].toList.asJava) case Schema.Type.BOOLEAN => msg.setBooleanProperty(fieldName, value.asInstanceOf[Boolean]) case Schema.Type.FLOAT32 => msg.setFloatProperty(fieldName, value.asInstanceOf[Float]) case Schema.Type.FLOAT64 => msg.setDoubleProperty(fieldName, value.asInstanceOf[Double]) case Schema.Type.INT8 => msg.setByteProperty(fieldName, value.asInstanceOf[Byte]) case Schema.Type.INT16 => msg.setShortProperty(fieldName, value.asInstanceOf[Short]) case Schema.Type.INT32 => msg.setIntProperty(fieldName, value.asInstanceOf[Int]) case Schema.Type.INT64 => msg.setLongProperty(fieldName, value.asInstanceOf[Long]) case Schema.Type.STRING => msg.setStringProperty(fieldName, value.asInstanceOf[String]) case Schema.Type.MAP => msg.setObjectProperty(fieldName, value) case Schema.Type.ARRAY => msg.setObjectProperty(fieldName, value) case Schema.Type.STRUCT => val nestedMsg = session.createObjectMessage() val struct = value.asInstanceOf[Struct] struct.schema().fields().asScala.foreach { f => ObjectMessageConverterFn(f.name(), struct.get(f), f.schema(), nestedMsg, session) } msg.setObjectProperty(fieldName, nestedMsg) } } }
Example 52
Source File: AvroMessageConverter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.jms.sink.converters import java.io.ByteArrayOutputStream import com.datamountaineer.streamreactor.connect.jms.config.JMSSetting import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil import com.datamountaineer.streamreactor.connect.serialization.AvroSerializer import javax.jms.{BytesMessage, Session} import org.apache.kafka.connect.sink.SinkRecord class AvroMessageConverter extends JMSMessageConverter with ConverterUtil { override def convert(record: SinkRecord, session: Session, setting: JMSSetting): (String, BytesMessage) = { val converted = super[ConverterUtil].convert(record, setting.fields, setting.ignoreField) val avroRecord = convertValueToGenericAvro(converted) val avroSchema = avroData.fromConnectSchema(converted.valueSchema()) implicit val os = new ByteArrayOutputStream() AvroSerializer.write(avroRecord, avroSchema) val message = session.createBytesMessage() message.writeBytes(os.toByteArray) (setting.source, message) } }
Example 53
Source File: JMSHeadersConverterWrapper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.jms.sink.converters import com.datamountaineer.streamreactor.connect.jms.config.JMSSetting import javax.jms.{Message, Session} import org.apache.kafka.connect.sink.SinkRecord class JMSHeadersConverterWrapper(headers: Map[String, String], delegate: JMSMessageConverter) extends JMSMessageConverter { override def convert(record: SinkRecord, session: Session, setting: JMSSetting): (String, Message) = { val response = delegate.convert(record, session, setting) val message = response._2 for((key, value) <- headers) { message.setObjectProperty(key, value) } response } } object JMSHeadersConverterWrapper { def apply(config: Map[String, String], delegate: JMSMessageConverter): JMSMessageConverter = new JMSHeadersConverterWrapper(config, delegate) }
Example 54
Source File: JsonMessageConverterTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.sink.converters import java.util.UUID import com.datamountaineer.streamreactor.connect.jms.config.{JMSConfig, JMSSettings} import com.datamountaineer.streamreactor.connect.jms.sink.converters.JsonMessageConverter import com.datamountaineer.streamreactor.connect.{TestBase, Using} import javax.jms.TextMessage import org.apache.activemq.ActiveMQConnectionFactory import org.apache.kafka.connect.sink.SinkRecord import org.scalatest.BeforeAndAfterAll import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import scala.collection.JavaConverters._ import scala.reflect.io.Path class JsonMessageConverterTest extends AnyWordSpec with Matchers with Using with TestBase with BeforeAndAfterAll { val converter = new JsonMessageConverter() val kafkaTopic1 = s"kafka-${UUID.randomUUID().toString}" val queueName = UUID.randomUUID().toString val kcql = getKCQL(queueName, kafkaTopic1, "QUEUE") val props = getProps(kcql, JMS_URL) val config = JMSConfig(props.asJava) val settings = JMSSettings(config, true) val setting = settings.settings.head override def afterAll(): Unit = { Path(AVRO_FILE).delete() } "JsonMessageConverter" should { "create a TextMessage with Json payload" in { val connectionFactory = new ActiveMQConnectionFactory("vm://localhost?broker.persistent=false") using(connectionFactory.createConnection()) { connection => using(connection.createSession(false, 1)) { session => val schema = getSchema val struct = getStruct(schema) val record = new SinkRecord(kafkaTopic1, 0, null, null, schema, struct, 1) val msg = converter.convert(record, session, setting)._2.asInstanceOf[TextMessage] Option(msg).isDefined shouldBe true val json = msg.getText json shouldBe """{"int8":12,"int16":12,"int32":12,"int64":12,"float32":12.2,"float64":12.2,"boolean":true,"string":"foo","bytes":"Zm9v","array":["a","b","c"],"map":{"field":1},"mapNonStringKeys":[[1,1]]}""".stripMargin } } } } }
Example 55
Source File: GenericRowKeyBuilderTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._ import org.apache.hadoop.hbase.util.Bytes import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.sink.SinkRecord import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class GenericRowKeyBuilderTest extends AnyWordSpec with Matchers { "GenericRowKeyBuilder" should { "use the topic, partition and offset to make the key" in { val topic = "sometopic" val partition = 2 val offset = 1243L val sinkRecord = new SinkRecord(topic, partition, Schema.INT32_SCHEMA, 345, Schema.STRING_SCHEMA, "", offset) val keyBuilder = new GenericRowKeyBuilderBytes() val expected = Bytes.add(Array(topic.fromString(), keyBuilder.delimiterBytes, partition.fromString(), keyBuilder.delimiterBytes, offset.fromString())) keyBuilder.build(sinkRecord, Nil) shouldBe expected } } }
Example 56
Source File: StructFieldsRowKeyBuilderTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._ import org.apache.hadoop.hbase.util.Bytes import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class StructFieldsRowKeyBuilderTest extends AnyWordSpec with Matchers { "StructFieldsRowKeyBuilder" should { "raise an exception if the field is not present in the struct" in { intercept[IllegalArgumentException] { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) //val field = Field("threshold", "threshold", false) StructFieldsRowKeyBuilderBytes(List("threshold")).build(sinkRecord, null) } } "create the row key based on one single field in the struct" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) //val field = Field("firstName", "firstName", true) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StructFieldsRowKeyBuilderBytes(List("firstName")).build(sinkRecord, null) shouldBe "Alex".fromString } "create the row key based on more thant one field in the struct" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) //val field = Field("firstName", "firstName", true) //val field2 = Field("age", "age", true) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StructFieldsRowKeyBuilderBytes(List("firstName", "age")).build(sinkRecord, null) shouldBe Bytes.add("Alex".fromString(), "\n".fromString(), 30.fromInt()) } } }
Example 57
Source File: AvroRecordRowKeyBuilderTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._ import com.datamountaineer.streamreactor.connect.hbase.avro.AvroRecordFieldExtractorMapFn import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import org.apache.hadoop.hbase.util.Bytes import org.apache.kafka.connect.sink.SinkRecord import org.mockito.MockitoSugar import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class AvroRecordRowKeyBuilderTest extends AnyWordSpec with Matchers with MockitoSugar { val schema: Schema = new Schema.Parser().parse(PersonAvroSchema.schema) "AvroRecordRowKeyBuilder" should { "extract the values from the avro record and create the key" in { val keys = Seq("firstName", "lastName", "age") val rowKeyBuilder = new AvroRecordRowKeyBuilderBytes(AvroRecordFieldExtractorMapFn(schema, keys), keys) val sinkRecord = mock[SinkRecord] val firstName = "Jack" val lastName = "Smith" val age = 29 val record = new GenericRecord { val values: Map[String, AnyRef] = Map("firstName" -> firstName, "lastName" -> lastName, "age" -> Int.box(age)) override def get(key: String): AnyRef = values(key) override def put(key: String, v: scala.Any): Unit = sys.error("not supported") override def get(i: Int): AnyRef = sys.error("not supported") override def put(i: Int, v: scala.Any): Unit = sys.error("not supported") override def getSchema: Schema = sys.error("not supported") } val expectedValue = Bytes.add( Array( firstName.fromString(), rowKeyBuilder.delimBytes, lastName.fromString(), rowKeyBuilder.delimBytes, age.fromInt())) rowKeyBuilder.build(sinkRecord, record) shouldBe expectedValue } } }
Example 58
Source File: SinkRecordKeyRowKeyBuilderTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._ import org.apache.kafka.connect.data.Schema import org.apache.kafka.connect.sink.SinkRecord import org.mockito.MockitoSugar import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class SinkRecordKeyRowKeyBuilderTest extends AnyWordSpec with Matchers with MockitoSugar { val keyRowKeyBuilder = new SinkRecordKeyRowKeyBuilderBytes() "SinkRecordKeyRowKeyBuilder" should { "create the right key from the Schema key value - Byte" in { val b = 123.toByte val sinkRecord = new SinkRecord("", 1, Schema.INT8_SCHEMA, b, Schema.FLOAT64_SCHEMA, Nil, 0) keyRowKeyBuilder.build(sinkRecord, "Should not matter") shouldBe Array(b) } "create the right key from the Schema key value - String" in { val s = "somekey" val sinkRecord = new SinkRecord("", 1, Schema.STRING_SCHEMA, s, Schema.FLOAT64_SCHEMA, Nil, 0) keyRowKeyBuilder.build(sinkRecord, Nil) shouldBe s.fromString() } "create the right key from the Schema key value - Bytes" in { val bArray = Array(23.toByte, 24.toByte, 242.toByte) val sinkRecord = new SinkRecord("", 1, Schema.BYTES_SCHEMA, bArray, Schema.FLOAT64_SCHEMA, Nil, 0) keyRowKeyBuilder.build(sinkRecord, Nil) shouldBe bArray } "create the right key from the Schema key value - Boolean" in { val bool = true val sinkRecord = new SinkRecord("", 1, Schema.BOOLEAN_SCHEMA, bool, Schema.FLOAT64_SCHEMA, Nil, 0) keyRowKeyBuilder.build(sinkRecord, Nil) shouldBe bool.fromBoolean() } } }
Example 59
Source File: InfluxDbWriter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.influx.writers import com.datamountaineer.streamreactor.connect.errors.ErrorHandler import com.datamountaineer.streamreactor.connect.influx.config.InfluxSettings import com.datamountaineer.streamreactor.connect.influx.{NanoClock, ValidateStringParameterFn} import com.datamountaineer.streamreactor.connect.sink.DbWriter import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.connect.sink.SinkRecord import org.influxdb.InfluxDBFactory import scala.util.Try class InfluxDbWriter(settings: InfluxSettings) extends DbWriter with StrictLogging with ErrorHandler { ValidateStringParameterFn(settings.connectionUrl, "settings") ValidateStringParameterFn(settings.user, "settings") //initialize error tracker initialize(settings.maxRetries, settings.errorPolicy) private val influxDB = InfluxDBFactory.connect(settings.connectionUrl, settings.user, settings.password) private val builder = new InfluxBatchPointsBuilder(settings, new NanoClock()) override def write(records: Seq[SinkRecord]): Unit = { if (records.isEmpty) { logger.debug("No records received.") } else { handleTry( builder .build(records) .flatMap { batchPoints => logger.debug(s"Writing ${batchPoints.getPoints.size()} points to the database...") Try(influxDB.write(batchPoints)) }.map(_ => logger.debug("Writing complete"))) } } override def close(): Unit = {} }
Example 60
Source File: SinkRecordParser.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.influx.converters import com.datamountaineer.streamreactor.connect.influx.helpers.Util import com.datamountaineer.streamreactor.connect.influx.writers.KcqlDetails.Path import com.datamountaineer.streamreactor.connect.influx.writers.ValuesExtractor import com.fasterxml.jackson.databind.JsonNode import com.landoop.json.sql.JacksonJson import org.apache.kafka.connect.data.{Schema, Struct} import org.apache.kafka.connect.sink.SinkRecord import scala.util.Try object SinkRecordParser { type Field = String trait ParsedSinkRecord { def valueFields(ignored: Set[Path]): Seq[(String, Any)] def field(path: Path): Option[Any] } trait ParsedKeyValueSinkRecord extends ParsedSinkRecord { def keyFields(ignored: Set[Path]): Seq[(String, Any)] } private case class JsonSinkRecord(json: JsonNode) extends ParsedSinkRecord { override def valueFields(ignored: Set[Path]): Seq[(String, Any)] = ValuesExtractor.extractAllFields(json, ignored.map(_.value.last)) override def field(path: Path): Option[Any] = Option(ValuesExtractor.extract(json, path.value)) } private case class StructSinkRecord(struct: Struct) extends ParsedSinkRecord { override def valueFields(ignored: Set[Path]): Seq[(String, Any)] = ValuesExtractor.extractAllFields(struct, ignored.map(_.value.last)) override def field(path: Path): Option[Any] = Option(ValuesExtractor.extract(struct, path.value)) } private case class MapSinkRecord(map: java.util.Map[String, Any]) extends ParsedSinkRecord { override def valueFields(ignored: Set[Path]): Seq[(String, Any)] = ValuesExtractor.extractAllFields(map, ignored.map(_.value.last)) override def field(path: Path): Option[Any] = Option(ValuesExtractor.extract(map, path.value)) } private case class KeyValueRecord(key: ParsedSinkRecord, value: ParsedSinkRecord) extends ParsedKeyValueSinkRecord { override def valueFields(ignored: Set[Path]): Seq[(String, Any)] = value.valueFields(ignored) override def field(path: Path): Option[Any] = path.value.headOption match { case Some(fieldName) if Util.caseInsensitiveComparison(fieldName, Util.KEY_CONSTANT) => key.field(Path(path.value.tail)) case Some(_) => value.field(path) case None => throw new IllegalArgumentException("Unreachable situation detected. Path should never be empty") } override def keyFields(ignored: Set[Path]): Seq[(String, Any)] = key.valueFields(ignored) } def build(record: SinkRecord): Try[ParsedKeyValueSinkRecord] = { val key = Option(record.keySchema()).map(_.`type`()) match { case Some(Schema.Type.STRING) => Try(JsonSinkRecord(JacksonJson.asJson(record.key().asInstanceOf[String]))) case Some(Schema.Type.STRUCT) => Try(StructSinkRecord(record.key().asInstanceOf[Struct])) case None => Try(MapSinkRecord(record.key().asInstanceOf[java.util.Map[String, Any]])) } val value = Option(record.valueSchema()).map(_.`type`()) match { case Some(Schema.Type.STRING) => Try(require(record.value() != null && record.value().getClass == classOf[String], "The SinkRecord payload should be of type String")).flatMap(_ => Try(JsonSinkRecord(JacksonJson.asJson(record.value().asInstanceOf[String])))) case Some(Schema.Type.STRUCT) => Try(require(record.value() != null && record.value().getClass == classOf[Struct], "The SinkRecord payload should be of type Struct")).flatMap(_ => Try(StructSinkRecord(record.value().asInstanceOf[Struct]))) case None => Try(require(record.value() != null && record.value().isInstanceOf[java.util.Map[_, _]], "The SinkRecord payload should be of type java.util.Map[String, Any]")).flatMap(_ => Try(MapSinkRecord(record.value().asInstanceOf[java.util.Map[String, Any]]))) } key .flatMap(key => value.map(key -> _)) .map { case (k, v) => KeyValueRecord(k, v) } } }
Example 61
Source File: ValueConverter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import scala.collection.JavaConverters._ object ValueConverter { def apply(record: SinkRecord): Struct = record.value match { case struct: Struct => StructValueConverter.convert(struct) case map: Map[_, _] => MapValueConverter.convert(map) case map: java.util.Map[_, _] => MapValueConverter.convert(map.asScala.toMap) case string: String => StringValueConverter.convert(string) case other => sys.error(s"Unsupported record $other:${other.getClass.getCanonicalName}") } } trait ValueConverter[T] { def convert(value: T): Struct } object StructValueConverter extends ValueConverter[Struct] { override def convert(struct: Struct): Struct = struct } object MapValueConverter extends ValueConverter[Map[_, _]] { def convertValue(value: Any, key: String, builder: SchemaBuilder): Any = { value match { case s: String => builder.field(key, Schema.OPTIONAL_STRING_SCHEMA) s case l: Long => builder.field(key, Schema.OPTIONAL_INT64_SCHEMA) l case i: Int => builder.field(key, Schema.OPTIONAL_INT64_SCHEMA) i.toLong case b: Boolean => builder.field(key, Schema.OPTIONAL_BOOLEAN_SCHEMA) b case f: Float => builder.field(key, Schema.OPTIONAL_FLOAT64_SCHEMA) f.toDouble case d: Double => builder.field(key, Schema.OPTIONAL_FLOAT64_SCHEMA) d case innerMap: java.util.Map[_, _] => val innerStruct = convert(innerMap.asScala.toMap, true) builder.field(key, innerStruct.schema()) innerStruct case innerMap: Map[_, _] => val innerStruct = convert(innerMap, true) builder.field(key, innerStruct.schema()) innerStruct } } def convert(map: Map[_, _], optional: Boolean) = { val builder = SchemaBuilder.struct() val values = map.map { case (k, v) => val key = k.toString val value = convertValue(v, key, builder) key -> value }.toList if (optional) builder.optional() val schema = builder.build val struct = new Struct(schema) values.foreach { case (key, value) => struct.put(key.toString, value) } struct } override def convert(map: Map[_, _]): Struct = convert(map, false) } object StringValueConverter extends ValueConverter[String] { override def convert(string: String): Struct = { val schema = SchemaBuilder.struct().field("a", Schema.OPTIONAL_STRING_SCHEMA).name("struct").build() new Struct(schema).put("a", string) } }
Example 62
Source File: TestKuduSink.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.kudu import com.datamountaineer.streamreactor.connect.kudu.sink.KuduSinkTask import org.apache.kafka.connect.sink.{SinkRecord, SinkTaskContext} import org.mockito.MockitoSugar import scala.collection.JavaConverters._ class TestKuduSink extends TestBase with MockitoSugar { "Should start a Kudu Sink" in { val config = getConfig val context = mock[SinkTaskContext] when(context.assignment()).thenReturn(getAssignment) when(context.configs()).thenReturn(config) val task = new KuduSinkTask() //initialise the tasks context task.initialize(context) val recs = List.empty[SinkRecord].asJavaCollection //start task task.start(config) task.put(recs) //task.stop() ///need write test here, get kudu docker image going! } }
Example 63
Source File: ElasticJsonWriter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.elastic6 import java.util import com.datamountaineer.kcql.{Kcql, WriteModeEnum} import com.datamountaineer.streamreactor.connect.converters.FieldConverter import com.datamountaineer.streamreactor.connect.elastic6.config.ElasticSettings import com.datamountaineer.streamreactor.connect.elastic6.indexname.CreateIndex import com.datamountaineer.streamreactor.connect.errors.ErrorHandler import com.datamountaineer.streamreactor.connect.schemas.ConverterUtil import com.fasterxml.jackson.databind.JsonNode import com.landoop.sql.Field import com.sksamuel.elastic4s.Indexable import com.sksamuel.elastic4s.http.ElasticDsl._ import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.connect.sink.SinkRecord import scala.collection.JavaConverters._ import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration._ import scala.concurrent.{Await, Future} import scala.util.Try class ElasticJsonWriter(client: KElasticClient, settings: ElasticSettings) extends ErrorHandler with StrictLogging with ConverterUtil { logger.info("Initialising Elastic Json writer") //initialize error tracker initialize(settings.taskRetries, settings.errorPolicy) //create the index automatically if it was set to do so settings.kcqls.filter(_.isAutoCreate).foreach(client.index) private val topicKcqlMap = settings.kcqls.groupBy(_.getSource) private val kcqlMap = new util.IdentityHashMap[Kcql, KcqlValues]() settings.kcqls.foreach { kcql => kcqlMap.put(kcql, KcqlValues( kcql.getFields.asScala.map(FieldConverter.apply), kcql.getIgnoredFields.asScala.map(FieldConverter.apply), kcql.getPrimaryKeys.asScala.map { pk => val path = Option(pk.getParentFields).map(_.asScala.toVector).getOrElse(Vector.empty) path :+ pk.getName } )) } implicit object SinkRecordIndexable extends Indexable[SinkRecord] { override def json(t: SinkRecord): String = convertValueToJson(t).toString } def autoGenId(record: SinkRecord): String = { val pks = Seq(record.topic(), record.kafkaPartition(), record.kafkaOffset()) pks.mkString(settings.pkJoinerSeparator) } private case class KcqlValues(fields: Seq[Field], ignoredFields: Seq[Field], primaryKeysPath: Seq[Vector[String]]) } case object IndexableJsonNode extends Indexable[JsonNode] { override def json(t: JsonNode): String = t.toString }
Example 64
Source File: PulsarWriterTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.pulsar.sink import com.datamountaineer.streamreactor.connect.pulsar.ProducerConfigFactory import com.datamountaineer.streamreactor.connect.pulsar.config.{PulsarConfigConstants, PulsarSinkConfig, PulsarSinkSettings} import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.apache.pulsar.client.api.{Message, MessageId, Producer, PulsarClient} import org.mockito.ArgumentMatchers.any import org.mockito.MockitoSugar import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import scala.collection.JavaConverters._ class PulsarWriterTest extends AnyWordSpec with MockitoSugar with Matchers { val pulsarTopic = "persistent://landoop/standalone/connect/kafka-topic" def getSchema: Schema = { SchemaBuilder.struct .field("int8", SchemaBuilder.int8().defaultValue(2.toByte).doc("int8 field").build()) .field("int16", Schema.INT16_SCHEMA) .field("int32", Schema.INT32_SCHEMA) .field("int64", Schema.INT64_SCHEMA) .field("float32", Schema.FLOAT32_SCHEMA) .field("float64", Schema.FLOAT64_SCHEMA) .field("boolean", Schema.BOOLEAN_SCHEMA) .field("string", Schema.STRING_SCHEMA) .build() } def getStruct(schema: Schema): Struct = { new Struct(schema) .put("int8", 12.toByte) .put("int16", 12.toShort) .put("int32", 12) .put("int64", 12L) .put("float32", 12.2f) .put("float64", 12.2) .put("boolean", true) .put("string", "foo") } "should write messages" in { val config = PulsarSinkConfig(Map( PulsarConfigConstants.HOSTS_CONFIG -> "pulsar://localhost:6650", PulsarConfigConstants.KCQL_CONFIG -> s"INSERT INTO $pulsarTopic SELECT * FROM kafka_topic BATCH = 10 WITHPARTITIONER = SinglePartition WITHCOMPRESSION = ZLIB WITHDELAY = 1000" ).asJava) val schema = getSchema val struct = getStruct(schema) val record1 = new SinkRecord("kafka_topic", 0, null, null, schema, struct, 1) val settings = PulsarSinkSettings(config) val producerConfig = ProducerConfigFactory("test", settings.kcql) val client = mock[PulsarClient] val producer = mock[Producer] val messageId = mock[MessageId] when(client.createProducer(pulsarTopic, producerConfig(pulsarTopic))).thenReturn(producer) when(producer.send(any[Message])).thenReturn(messageId) val writer = PulsarWriter(client, "test", settings) writer.write(List(record1)) } }