org.apache.kafka.connect.data.SchemaBuilder Scala Examples
The following examples show how to use org.apache.kafka.connect.data.SchemaBuilder.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: ProjectionMapper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.source.mapper import cats.data.NonEmptyList import com.landoop.streamreactor.connect.hive.StructMapper import com.landoop.streamreactor.connect.hive.source.config.ProjectionField import org.apache.kafka.connect.data.{SchemaBuilder, Struct} class ProjectionMapper(projection: NonEmptyList[ProjectionField]) extends StructMapper { override def map(input: Struct): Struct = { val builder = projection.foldLeft(SchemaBuilder.struct) { (builder, projectionField) => Option(input.schema.field(projectionField.name)) .fold(sys.error(s"Projection field ${projectionField.name} cannot be found in input")) { field => builder.field(projectionField.alias, field.schema) } } val schema = builder.build() projection.foldLeft(new Struct(schema)) { (struct, field) => struct.put(field.alias, input.get(field.name)) } } }
Example 2
Source File: ReThinkSourceReadersFactory.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.rethink.source import java.util import java.util.concurrent.LinkedBlockingQueue import java.util.concurrent.atomic.AtomicBoolean import com.datamountaineer.streamreactor.connect.rethink.ReThinkConnection import com.datamountaineer.streamreactor.connect.rethink.config.{ReThinkSourceConfig, ReThinkSourceSetting, ReThinkSourceSettings} import com.rethinkdb.RethinkDB import com.rethinkdb.net.{Connection, Cursor} import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.connect.data.SchemaBuilder import org.apache.kafka.connect.source.SourceRecord import scala.collection.JavaConverters._ import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future object ReThinkSourceReadersFactory { def apply(config: ReThinkSourceConfig, r: RethinkDB): Set[ReThinkSourceReader] = { val conn = Some(ReThinkConnection(r, config)) val settings = ReThinkSourceSettings(config) settings.map(s => new ReThinkSourceReader(r, conn.get, s)) } } class ReThinkSourceReader(rethink: RethinkDB, conn: Connection, setting: ReThinkSourceSetting) extends StrictLogging { logger.info(s"Initialising ReThink Reader for ${setting.source}") private val keySchema = SchemaBuilder.string().optional().build() private val valueSchema = ChangeFeedStructBuilder.schema private val sourcePartition = Map.empty[String, String] private val offset = Map.empty[String, String] private val stopFeed = new AtomicBoolean(false) private val handlingFeed = new AtomicBoolean(false) private var feed : Cursor[util.HashMap[String, String]] = _ val queue = new LinkedBlockingQueue[SourceRecord]() val batchSize = setting.batchSize def start() = { feed = getChangeFeed() startFeed(feed) } def stop() = { logger.info(s"Closing change feed for ${setting.source}") stopFeed.set(true) while (handlingFeed.get()) { logger.debug("Waiting for feed to shutdown...") Thread.sleep(1000) } feed.close() logger.info(s"Change feed closed for ${setting.source}") } private def handleFeed(feed: Cursor[util.HashMap[String, String]]) = { handlingFeed.set(true) //feed.next is blocking while(!stopFeed.get()) { logger.debug(s"Waiting for next change feed event for ${setting.source}") val cdc = convert(feed.next().asScala.toMap) queue.put(cdc) } handlingFeed.set(false) } private def getChangeFeed(): Cursor[util.HashMap[String, String]] = { logger.info(s"Initialising change feed for ${setting.source}") rethink .db(setting.db) .table(setting.source) .changes() .optArg("include_states", true) .optArg("include_initial", setting.initialise) .optArg("include_types", true) .run(conn) } private def convert(feed: Map[String, String]) = { new SourceRecord(sourcePartition.asJava, offset.asJava, setting.target, keySchema, setting.source, valueSchema, ChangeFeedStructBuilder(feed)) } }
Example 3
Source File: ChangeFeedStructBuilder.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.rethink.source import com.fasterxml.jackson.databind.ObjectMapper import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} object ChangeFeedStructBuilder extends StrictLogging { val mapper = new ObjectMapper() val oldVal = "old_val" val newVal = "new_val" val state = "state" val `type` = "type" val schema: Schema = SchemaBuilder.struct.name("ReThinkChangeFeed") .version(1) .field(state, Schema.OPTIONAL_STRING_SCHEMA) .field(oldVal, Schema.OPTIONAL_STRING_SCHEMA) .field(newVal, Schema.OPTIONAL_STRING_SCHEMA) .field(`type`, Schema.OPTIONAL_STRING_SCHEMA) .build def apply(hm: Map[String, Object]): Struct = { val struct = new Struct(schema) hm.foreach({ case (k, v) => if (v != null) struct.put(k, v.toString) }) struct } }
Example 4
Source File: PulsarWriterTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.pulsar.sink import com.datamountaineer.streamreactor.connect.pulsar.ProducerConfigFactory import com.datamountaineer.streamreactor.connect.pulsar.config.{PulsarConfigConstants, PulsarSinkConfig, PulsarSinkSettings} import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.apache.pulsar.client.api.{Message, MessageId, Producer, PulsarClient} import org.mockito.ArgumentMatchers.any import org.mockito.MockitoSugar import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import scala.collection.JavaConverters._ class PulsarWriterTest extends AnyWordSpec with MockitoSugar with Matchers { val pulsarTopic = "persistent://landoop/standalone/connect/kafka-topic" def getSchema: Schema = { SchemaBuilder.struct .field("int8", SchemaBuilder.int8().defaultValue(2.toByte).doc("int8 field").build()) .field("int16", Schema.INT16_SCHEMA) .field("int32", Schema.INT32_SCHEMA) .field("int64", Schema.INT64_SCHEMA) .field("float32", Schema.FLOAT32_SCHEMA) .field("float64", Schema.FLOAT64_SCHEMA) .field("boolean", Schema.BOOLEAN_SCHEMA) .field("string", Schema.STRING_SCHEMA) .build() } def getStruct(schema: Schema): Struct = { new Struct(schema) .put("int8", 12.toByte) .put("int16", 12.toShort) .put("int32", 12) .put("int64", 12L) .put("float32", 12.2f) .put("float64", 12.2) .put("boolean", true) .put("string", "foo") } "should write messages" in { val config = PulsarSinkConfig(Map( PulsarConfigConstants.HOSTS_CONFIG -> "pulsar://localhost:6650", PulsarConfigConstants.KCQL_CONFIG -> s"INSERT INTO $pulsarTopic SELECT * FROM kafka_topic BATCH = 10 WITHPARTITIONER = SinglePartition WITHCOMPRESSION = ZLIB WITHDELAY = 1000" ).asJava) val schema = getSchema val struct = getStruct(schema) val record1 = new SinkRecord("kafka_topic", 0, null, null, schema, struct, 1) val settings = PulsarSinkSettings(config) val producerConfig = ProducerConfigFactory("test", settings.kcql) val client = mock[PulsarClient] val producer = mock[Producer] val messageId = mock[MessageId] when(client.createProducer(pulsarTopic, producerConfig(pulsarTopic))).thenReturn(producer) when(producer.send(any[Message])).thenReturn(messageId) val writer = PulsarWriter(client, "test", settings) writer.write(List(record1)) } }
Example 5
Source File: RedisInsertSortedSetTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.redis.sink.writer import com.datamountaineer.streamreactor.connect.redis.sink.config.{RedisConfig, RedisConfigConstants, RedisConnectionInfo, RedisSinkSettings} import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.mockito.MockitoSugar import org.scalatest.BeforeAndAfterAll import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import redis.clients.jedis.Jedis import redis.embedded.RedisServer import scala.collection.JavaConverters._ class RedisInsertSortedSetTest extends AnyWordSpec with Matchers with BeforeAndAfterAll with MockitoSugar { val redisServer = new RedisServer(6379) override def beforeAll() = redisServer.start() override def afterAll() = redisServer.stop() "Redis INSERT into Sorted Set (SS) writer" should { "write Kafka records to a Redis Sorted Set" in { val TOPIC = "cpuTopic" val KCQL = s"INSERT INTO cpu_stats SELECT * from $TOPIC STOREAS SortedSet(score=ts)" println("Testing KCQL : " + KCQL) val props = Map( RedisConfigConstants.REDIS_HOST->"localhost", RedisConfigConstants.REDIS_PORT->"6379", RedisConfigConstants.KCQL_CONFIG->KCQL ).asJava val config = RedisConfig(props) val connectionInfo = new RedisConnectionInfo("localhost", 6379, None) val settings = RedisSinkSettings(config) val writer = new RedisInsertSortedSet(settings) writer.createClient(settings) val schema = SchemaBuilder.struct().name("com.example.Cpu") .field("type", Schema.STRING_SCHEMA) .field("temperature", Schema.FLOAT64_SCHEMA) .field("voltage", Schema.FLOAT64_SCHEMA) .field("ts", Schema.INT64_SCHEMA).build() val struct1 = new Struct(schema).put("type", "Xeon").put("temperature", 60.4).put("voltage", 90.1).put("ts", 1482180657010L) val struct2 = new Struct(schema).put("type", "i7").put("temperature", 62.1).put("voltage", 103.3).put("ts", 1482180657020L) val struct3 = new Struct(schema).put("type", "i7-i").put("temperature", 64.5).put("voltage", 101.1).put("ts", 1482180657030L) val sinkRecord1 = new SinkRecord(TOPIC, 0, null, null, schema, struct1, 1) val sinkRecord2 = new SinkRecord(TOPIC, 0, null, null, schema, struct2, 2) val sinkRecord3 = new SinkRecord(TOPIC, 0, null, null, schema, struct3, 3) val jedis = new Jedis(connectionInfo.host, connectionInfo.port) // Clean up in-memory jedis jedis.flushAll() writer.write(Seq(sinkRecord1)) writer.write(Seq(sinkRecord2, sinkRecord3)) // Redis cardinality should now be 3 jedis.zcard("cpu_stats") shouldBe 3 val allSSrecords = jedis.zrange("cpu_stats", 0, 999999999999L) val results = allSSrecords.asScala.toList results.head shouldBe """{"type":"Xeon","temperature":60.4,"voltage":90.1,"ts":1482180657010}""" results(1) shouldBe """{"type":"i7","temperature":62.1,"voltage":103.3,"ts":1482180657020}""" results(2) shouldBe """{"type":"i7-i","temperature":64.5,"voltage":101.1,"ts":1482180657030}""" } } }
Example 6
Source File: RedisPubSubTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.redis.sink.writer import com.datamountaineer.streamreactor.connect.redis.sink.config.{RedisConfig, RedisConfigConstants, RedisConnectionInfo, RedisSinkSettings} import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.mockito.MockitoSugar import org.scalatest.BeforeAndAfterAll import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import redis.clients.jedis.{Jedis, JedisPubSub} import redis.embedded.RedisServer import scala.collection.JavaConverters._ import scala.collection.mutable.ListBuffer class RedisPubSubTest extends AnyWordSpec with Matchers with BeforeAndAfterAll with MockitoSugar { val redisServer = new RedisServer(6379) override def beforeAll() = redisServer.start() override def afterAll() = redisServer.stop() "Redis PUBSUB writer" should { "write Kafka records to a Redis PubSub" in { val TOPIC = "cpuTopic" val KCQL = s"SELECT * from $TOPIC STOREAS PubSub (channel=type)" println("Testing KCQL : " + KCQL) val props = Map( RedisConfigConstants.REDIS_HOST->"localhost", RedisConfigConstants.REDIS_PORT->"6379", RedisConfigConstants.KCQL_CONFIG->KCQL ).asJava val config = RedisConfig(props) val connectionInfo = new RedisConnectionInfo("localhost", 6379, None) val settings = RedisSinkSettings(config) val writer = new RedisPubSub(settings) writer.createClient(settings) val schema = SchemaBuilder.struct().name("com.example.Cpu") .field("type", Schema.STRING_SCHEMA) .field("temperature", Schema.FLOAT64_SCHEMA) .field("voltage", Schema.FLOAT64_SCHEMA) .field("ts", Schema.INT64_SCHEMA).build() val struct1 = new Struct(schema).put("type", "Xeon").put("temperature", 60.4).put("voltage", 90.1).put("ts", 1482180657010L) val struct2 = new Struct(schema).put("type", "i7").put("temperature", 62.1).put("voltage", 103.3).put("ts", 1482180657020L) val struct3 = new Struct(schema).put("type", "i7-i").put("temperature", 64.5).put("voltage", 101.1).put("ts", 1482180657030L) val sinkRecord1 = new SinkRecord(TOPIC, 0, null, null, schema, struct1, 1) val sinkRecord2 = new SinkRecord(TOPIC, 0, null, null, schema, struct2, 2) val sinkRecord3 = new SinkRecord(TOPIC, 0, null, null, schema, struct3, 3) val jedis = new Jedis(connectionInfo.host, connectionInfo.port) // Clean up in-memory jedis jedis.flushAll() val messagesMap = collection.mutable.Map[String, ListBuffer[String]]() val t = new Thread { private val pubsub = new JedisPubSub { override def onMessage(channel: String, message: String): Unit = { messagesMap.get(channel) match { case Some(msgs) => messagesMap.put(channel, msgs += message) case None => messagesMap.put(channel, ListBuffer(message)) } } } override def run(): Unit = { jedis.subscribe(pubsub, "Xeon", "i7", "i7-i") } override def interrupt(): Unit = { pubsub.punsubscribe("*") super.interrupt() } } t.start() t.join(5000) if (t.isAlive) t.interrupt() writer.write(Seq(sinkRecord1)) writer.write(Seq(sinkRecord2, sinkRecord3)) messagesMap.size shouldBe 3 messagesMap("Xeon").head shouldBe """{"type":"Xeon","temperature":60.4,"voltage":90.1,"ts":1482180657010}""" messagesMap("i7").head shouldBe """{"type":"i7","temperature":62.1,"voltage":103.3,"ts":1482180657020}""" messagesMap("i7-i").head shouldBe """{"type":"i7-i","temperature":64.5,"voltage":101.1,"ts":1482180657030}""" } } }
Example 7
Source File: RedisStreamTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.redis.sink.writer /* * Copyright 2017 Datamountaineer. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util import com.datamountaineer.streamreactor.connect.redis.sink.RedisSinkTask import com.datamountaineer.streamreactor.connect.redis.sink.config.{RedisConfig, RedisConfigConstants, RedisConnectionInfo, RedisSinkSettings} import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.mockito.MockitoSugar import org.scalatest.BeforeAndAfterAll import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import redis.clients.jedis.{Jedis, StreamEntryID} import scala.collection.JavaConverters._ class RedisStreamTest extends AnyWordSpec with Matchers with BeforeAndAfterAll with MockitoSugar { // // val redisServer = new RedisServer(6379) // // override def beforeAll() = redisServer.start() // // override def afterAll() = redisServer.stop() "Redis Stream writer" should { "write Kafka records to a Redis Stream" in { val TOPIC = "cpuTopic" val KCQL = s"INSERT INTO stream1 SELECT * from $TOPIC STOREAS STREAM" println("Testing KCQL : " + KCQL) val props = Map( RedisConfigConstants.REDIS_HOST->"localhost", RedisConfigConstants.REDIS_PORT->"6379", RedisConfigConstants.KCQL_CONFIG->KCQL, RedisConfigConstants.REDIS_PASSWORD -> "" ).asJava val config = RedisConfig(props) val connectionInfo = new RedisConnectionInfo("localhost", 6379, None) val settings = RedisSinkSettings(config) val writer = new RedisStreams(settings) val schema = SchemaBuilder.struct().name("com.example.Cpu") .field("type", Schema.STRING_SCHEMA) .field("temperature", Schema.FLOAT64_SCHEMA) .field("voltage", Schema.FLOAT64_SCHEMA) .field("ts", Schema.INT64_SCHEMA).build() val struct1 = new Struct(schema).put("type", "Xeon").put("temperature", 60.4).put("voltage", 90.1).put("ts", 1482180657010L) val sinkRecord1 = new SinkRecord(TOPIC, 0, null, null, schema, struct1, 1) val jedis = mock[Jedis] writer.jedis = jedis val map = new util.HashMap[String, String]() map.put("type", "Xeon") map.put("temperature", "60.4") map.put("voltage", "90.1") map.put("ts", 1482180657010L.toString) when(jedis.auth("")).isLenient() when(jedis.xadd("stream1", null, map)).thenReturn(mock[StreamEntryID]) writer.initialize(1, settings.errorPolicy) writer.write(Seq(sinkRecord1)) } } }
Example 8
Source File: OrcSchemas.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.orc import com.landoop.streamreactor.connect.hive.UnsupportedSchemaType import org.apache.kafka.connect.data.{Decimal, Schema, SchemaBuilder} import org.apache.orc.TypeDescription import org.apache.orc.TypeDescription.Category import scala.collection.JavaConverters._ object OrcSchemas { def toKafka(schema: TypeDescription): Schema = schema.getCategory match { case Category.BOOLEAN => Schema.OPTIONAL_BOOLEAN_SCHEMA case Category.BYTE => Schema.OPTIONAL_INT8_SCHEMA case Category.DOUBLE => Schema.OPTIONAL_FLOAT64_SCHEMA case Category.INT => Schema.OPTIONAL_INT32_SCHEMA case Category.FLOAT => Schema.OPTIONAL_FLOAT32_SCHEMA case Category.LONG => Schema.OPTIONAL_INT64_SCHEMA case Category.SHORT => Schema.OPTIONAL_INT16_SCHEMA case Category.STRING => Schema.OPTIONAL_STRING_SCHEMA case Category.VARCHAR => Schema.OPTIONAL_STRING_SCHEMA case Category.CHAR => Schema.OPTIONAL_STRING_SCHEMA case Category.DATE => Schema.OPTIONAL_STRING_SCHEMA case Category.TIMESTAMP => Schema.OPTIONAL_STRING_SCHEMA case Category.BYTE => Schema.OPTIONAL_BYTES_SCHEMA case Category.STRUCT => toKafkaStruct(schema) } def toKafkaStruct(schema: TypeDescription): Schema = { import scala.collection.JavaConverters._ val builder = SchemaBuilder.struct().name("from_orc") schema.getFieldNames.asScala.zipWithIndex.foreach { case (field, k) => builder.field(field, toKafka(schema.getChildren.get(k))) } builder.build() } def toOrc(schema: Schema): TypeDescription = { schema.`type`() match { case Schema.Type.STRING if schema.name() == Decimal.LOGICAL_NAME => TypeDescription.createDecimal() case Schema.Type.STRING => TypeDescription.createString() case Schema.Type.BOOLEAN => TypeDescription.createBoolean() case Schema.Type.FLOAT32 => TypeDescription.createFloat() case Schema.Type.FLOAT64 => TypeDescription.createDouble() case Schema.Type.INT8 => TypeDescription.createByte() case Schema.Type.INT16 => TypeDescription.createShort() case Schema.Type.INT32 => TypeDescription.createInt() case Schema.Type.INT64 => TypeDescription.createLong() case Schema.Type.BYTES if schema.name() == Decimal.LOGICAL_NAME => TypeDescription.createDecimal() case Schema.Type.BYTES => TypeDescription.createBinary() case Schema.Type.ARRAY => TypeDescription.createList(toOrc(schema.valueSchema())) case Schema.Type.MAP => TypeDescription.createMap(toOrc(schema.keySchema()), toOrc(schema.valueSchema())) case Schema.Type.STRUCT => schema.fields().asScala.foldLeft(TypeDescription.createStruct) { case (struct, field) => struct.addField(field.name, toOrc(field.schema)) } case unsupportedDataType => throw UnsupportedSchemaType(unsupportedDataType.toString) } } }
Example 9
Source File: ValueConverter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import scala.collection.JavaConverters._ object ValueConverter { def apply(record: SinkRecord): Struct = record.value match { case struct: Struct => StructValueConverter.convert(struct) case map: Map[_, _] => MapValueConverter.convert(map) case map: java.util.Map[_, _] => MapValueConverter.convert(map.asScala.toMap) case string: String => StringValueConverter.convert(string) case other => sys.error(s"Unsupported record $other:${other.getClass.getCanonicalName}") } } trait ValueConverter[T] { def convert(value: T): Struct } object StructValueConverter extends ValueConverter[Struct] { override def convert(struct: Struct): Struct = struct } object MapValueConverter extends ValueConverter[Map[_, _]] { def convertValue(value: Any, key: String, builder: SchemaBuilder): Any = { value match { case s: String => builder.field(key, Schema.OPTIONAL_STRING_SCHEMA) s case l: Long => builder.field(key, Schema.OPTIONAL_INT64_SCHEMA) l case i: Int => builder.field(key, Schema.OPTIONAL_INT64_SCHEMA) i.toLong case b: Boolean => builder.field(key, Schema.OPTIONAL_BOOLEAN_SCHEMA) b case f: Float => builder.field(key, Schema.OPTIONAL_FLOAT64_SCHEMA) f.toDouble case d: Double => builder.field(key, Schema.OPTIONAL_FLOAT64_SCHEMA) d case innerMap: java.util.Map[_, _] => val innerStruct = convert(innerMap.asScala.toMap, true) builder.field(key, innerStruct.schema()) innerStruct case innerMap: Map[_, _] => val innerStruct = convert(innerMap, true) builder.field(key, innerStruct.schema()) innerStruct } } def convert(map: Map[_, _], optional: Boolean) = { val builder = SchemaBuilder.struct() val values = map.map { case (k, v) => val key = k.toString val value = convertValue(v, key, builder) key -> value }.toList if (optional) builder.optional() val schema = builder.build val struct = new Struct(schema) values.foreach { case (key, value) => struct.put(key.toString, value) } struct } override def convert(map: Map[_, _]): Struct = convert(map, false) } object StringValueConverter extends ValueConverter[String] { override def convert(string: String): Struct = { val schema = SchemaBuilder.struct().field("a", Schema.OPTIONAL_STRING_SCHEMA).name("struct").build() new Struct(schema).put("a", string) } }
Example 10
Source File: DropPartitionValuesMapper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.mapper import com.landoop.streamreactor.connect.hive.{PartitionPlan, StructMapper} import org.apache.kafka.connect.data.{SchemaBuilder, Struct} class DropPartitionValuesMapper(plan: PartitionPlan) extends StructMapper { import scala.collection.JavaConverters._ override def map(input: Struct): Struct = { val partitionKeys = plan.keys.map(_.value).toList val dataFields = input.schema.fields().asScala.filterNot(field => partitionKeys.contains(field.name)) val builder = dataFields.foldLeft(SchemaBuilder.struct) { (builder, field) => builder.field(field.name, field.schema) } val schema = builder.build() dataFields.foldLeft(new Struct(schema)) { (struct, field) => struct.put(field.name, input.get(field.name)) } } }
Example 11
Source File: ProjectionMapper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.mapper import cats.data.NonEmptyList import com.datamountaineer.kcql.Field import com.landoop.streamreactor.connect.hive.StructMapper import org.apache.kafka.connect.data.{SchemaBuilder, Struct} class ProjectionMapper(projection: NonEmptyList[Field]) extends StructMapper { override def map(input: Struct): Struct = { // the compatible output schema built from projected fields with aliases applied val builder = projection.foldLeft(SchemaBuilder.struct) { (builder, kcqlField) => Option(input.schema.field(kcqlField.getName)).fold(sys.error(s"Missing field $kcqlField")) { field => builder.field(kcqlField.getAlias, field.schema) } } val schema = builder.build() projection.foldLeft(new Struct(schema)) { (struct, field) => struct.put(field.getAlias, input.get(field.getName)) } } }
Example 12
Source File: PartitionValueMapper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.source.mapper import com.landoop.streamreactor.connect.hive.{Partition, StructMapper} import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import scala.collection.JavaConverters._ class PartitionValueMapper(partition: Partition) extends StructMapper { override def map(input: Struct): Struct = { val builder = SchemaBuilder.struct() input.schema.fields.asScala.foreach { field => builder.field(field.name, field.schema) } partition.entries.toList.foreach { entry => builder.field(entry._1.value, Schema.STRING_SCHEMA) } val schema = builder.build() val struct = new Struct(schema) input.schema.fields.asScala.foreach { field => struct.put(field.name, input.get(field.name)) } partition.entries.toList.foreach { entry => struct.put(entry._1.value, entry._2) } struct } }
Example 13
Source File: ConnectSchema.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.bloomberg import org.apache.kafka.connect.data.{Schema, SchemaBuilder} import scala.collection.JavaConverters._ def createSchema(name: String, value: Any): Schema = { value match { case _: Boolean => Schema.BOOLEAN_SCHEMA case _: Int => Schema.INT32_SCHEMA case _: Long => Schema.INT64_SCHEMA case _: Double => Schema.FLOAT64_SCHEMA case _: Char => Schema.STRING_SCHEMA case _: String => Schema.STRING_SCHEMA case _: Float => Schema.FLOAT32_SCHEMA case list: java.util.List[_] => val firstItemSchema = if (list.isEmpty) Schema.OPTIONAL_STRING_SCHEMA else createSchema(name, list.get(0)) SchemaBuilder.array(firstItemSchema).build() case map: java.util.LinkedHashMap[String @unchecked, _] => val recordBuilder = SchemaBuilder.struct() recordBuilder.name(name) map.entrySet().asScala.foreach(kvp => recordBuilder.field(kvp.getKey, createSchema(kvp.getKey, kvp.getValue))) recordBuilder.build() case v => sys.error(s"${v.getClass} is not handled.") } } } object ConnectSchema { val namespace = "com.datamountaineer.streamreactor.connect.bloomberg" val connectSchema = new ConnectSchema(namespace) implicit class BloombergDataToConnectSchema(val data: BloombergData) { def getConnectSchema : Schema = { connectSchema.createSchema("BloombergData", data.data) } } }
Example 14
Source File: OrcTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.orc import com.landoop.streamreactor.connect.hive.{OrcSinkConfig, OrcSourceConfig, StructUtils, orc} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.kafka.connect.data.{SchemaBuilder, Struct} import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class OrcTest extends AnyFlatSpec with Matchers { implicit val conf = new Configuration() implicit val fs = FileSystem.getLocal(conf) "Orc" should "read and write orc files" in { val schema = SchemaBuilder.struct() .field("name", SchemaBuilder.string().optional().build()) .field("age", SchemaBuilder.int32().optional().build()) .field("salary", SchemaBuilder.float64().optional().build()) .name("from_orc") .build() val users = Seq( new Struct(schema).put("name", "sammy").put("age", 38).put("salary", 54.67), new Struct(schema).put("name", "laura").put("age", 37).put("salary", 91.84) ) val path = new Path("orctest.orc") val sink = orc.sink(path, schema, OrcSinkConfig(overwrite = true)) users.foreach(sink.write) sink.close() val source = orc.source(path, OrcSourceConfig()) val actual = source.iterator.toList actual.head.schema shouldBe schema actual.map(StructUtils.extractValues) shouldBe List(Vector("sammy", 38, 54.67), Vector("laura", 37, 91.84)) fs.delete(path, false) } }
Example 15
Source File: DefaultCommitPolicyTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.staging import com.landoop.streamreactor.connect.hive.{Offset, Topic, TopicPartitionOffset} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, LocalFileSystem, Path} import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import scala.concurrent.duration._ class DefaultCommitPolicyTest extends AnyWordSpec with Matchers { val schema: Schema = SchemaBuilder.struct() .field("name", SchemaBuilder.string().required().build()) .build() val struct = new Struct(schema) implicit val conf: Configuration = new Configuration() implicit val fs: LocalFileSystem = FileSystem.getLocal(conf) val tpo = TopicPartitionOffset(Topic("mytopic"), 1, Offset(100)) private def shouldFlush(policy: CommitPolicy, path: Path, count: Long) = { val status = fs.getFileStatus(path) policy.shouldFlush(CommitContext(tpo, path, count, status.getLen, status.getModificationTime)) } "DefaultCommitPolicy" should { "roll over after interval" in { val policy = DefaultCommitPolicy(None, Option(2.seconds), None) val path = new Path("foo") fs.create(path) shouldFlush(policy, path, 10) shouldBe false Thread.sleep(2000) shouldFlush(policy, path, 10) shouldBe true fs.delete(path, false) } "roll over after file count" in { val policy = DefaultCommitPolicy(None, None, Some(9)) val path = new Path("foo") fs.create(path) shouldFlush(policy, path, 7) shouldBe false shouldFlush(policy, path, 8) shouldBe false shouldFlush(policy, path, 9) shouldBe true shouldFlush(policy, path, 10) shouldBe true fs.delete(path, false) } "roll over after file size" in { val policy = DefaultCommitPolicy(Some(10), None, None) val path = new Path("foo") val out = fs.create(path) shouldFlush(policy, path, 7) shouldBe false out.writeBytes("wibble wobble wabble wubble") out.close() shouldFlush(policy, path, 9) shouldBe true fs.delete(path, false) } } }
Example 16
Source File: DropPartitionValuesMapperTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.mapper import cats.data.NonEmptyList import com.landoop.streamreactor.connect.hive.{PartitionKey, PartitionPlan, TableName} import org.apache.kafka.connect.data.{SchemaBuilder, Struct} import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import scala.collection.JavaConverters._ class DropPartitionValuesMapperTest extends AnyFunSuite with Matchers { test("strip partition values") { val schema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("p", SchemaBuilder.string().required().build()) .field("q", SchemaBuilder.string().required().build()) .field("z", SchemaBuilder.string().required().build()) .build() val plan = PartitionPlan(TableName("foo"), NonEmptyList.of(PartitionKey("p"), PartitionKey("q"))) val struct = new Struct(schema).put("a", "a").put("p", "p").put("q", "q").put("z", "z") val output = new DropPartitionValuesMapper(plan).map(struct) output.schema().fields().asScala.map(_.name) shouldBe Seq("a", "z") } test("handle partition field is missing in input") { val schema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("q", SchemaBuilder.string().required().build()) .field("z", SchemaBuilder.string().required().build()) .build() val plan = PartitionPlan(TableName("foo"), NonEmptyList.of(PartitionKey("p"), PartitionKey("q"))) val struct = new Struct(schema).put("a", "a").put("q", "q").put("z", "z") val output = new DropPartitionValuesMapper(plan).map(struct) output.schema().fields().asScala.map(_.name) shouldBe Seq("a", "z") } }
Example 17
Source File: MetastoreSchemaAlignMapperTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.mapper import org.apache.kafka.connect.data.{SchemaBuilder, Struct} import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import scala.collection.JavaConverters._ class MetastoreSchemaAlignMapperTest extends AnyFunSuite with Matchers { test("pad optional missing fields with null") { val recordSchema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("b", SchemaBuilder.string().required().build()) .field("c", SchemaBuilder.string().required().build()) .build() val struct = new Struct(recordSchema).put("a", "a").put("b", "b").put("c", "c") val metastoreSchema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("b", SchemaBuilder.string().required().build()) .field("c", SchemaBuilder.string().required().build()) .field("z", SchemaBuilder.string().optional().build()) .build() val output = new MetastoreSchemaAlignMapper(metastoreSchema).map(struct) output.schema().fields().asScala.map(_.name) shouldBe Seq("a", "b", "c", "z") } test("drop fields not specified in metastore") { val recordSchema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("b", SchemaBuilder.string().required().build()) .field("c", SchemaBuilder.string().required().build()) .build() val struct = new Struct(recordSchema).put("a", "a").put("b", "b").put("c", "c") val metastoreSchema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("b", SchemaBuilder.string().required().build()) .build() val output = new MetastoreSchemaAlignMapper(metastoreSchema).map(struct) output.schema().fields().asScala.map(_.name) shouldBe Seq("a", "b") } }
Example 18
Source File: ParquetWriterTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.parquet import com.landoop.streamreactor.connect.hive.StructUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.kafka.connect.data.{SchemaBuilder, Struct} import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class ParquetWriterTest extends AnyWordSpec with Matchers { implicit val conf = new Configuration() implicit val fs = FileSystem.getLocal(conf) "ParquetWriter" should { "write parquet files" in { val schema = SchemaBuilder.struct() .field("name", SchemaBuilder.string().required().build()) .field("title", SchemaBuilder.string().optional().build()) .field("salary", SchemaBuilder.float64().optional().build()) .build() val users = List( new Struct(schema).put("name", "sam").put("title", "mr").put("salary", 100.43), new Struct(schema).put("name", "laura").put("title", "ms").put("salary", 429.06) ) val path = new Path("sinktest.parquet") val writer = parquetWriter(path, schema, ParquetSinkConfig(overwrite = true)) users.foreach(writer.write) writer.close() val reader = parquetReader(path) val actual = Iterator.continually(reader.read).takeWhile(_ != null).toList reader.close() actual.map(StructUtils.extractValues) shouldBe users.map(StructUtils.extractValues) fs.delete(path, false) } "support writing nulls" in { val schema = SchemaBuilder.struct() .field("name", SchemaBuilder.string().required().build()) .field("title", SchemaBuilder.string().optional().build()) .field("salary", SchemaBuilder.float64().optional().build()) .build() val users = List( new Struct(schema).put("name", "sam").put("title", null).put("salary", 100.43), new Struct(schema).put("name", "laura").put("title", "ms").put("salary", 429.06) ) val path = new Path("sinktest.parquet") val writer = parquetWriter(path, schema, ParquetSinkConfig(overwrite = true)) users.foreach(writer.write) writer.close() val reader = parquetReader(path) val actual = Iterator.continually(reader.read).takeWhile(_ != null).toList reader.close() actual.map(StructUtils.extractValues) shouldBe users.map(StructUtils.extractValues) fs.delete(path, false) } } }
Example 19
Source File: StructFieldsExtractorTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.voltdb import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class StructFieldsExtractorTest extends AnyWordSpec with Matchers { "StructFieldsExtractor" should { "return all the fields and their bytes value" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("lastName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema) .put("firstName", "Alex") .put("lastName", "Smith") .put("age", 30) val min = System.currentTimeMillis() val record = StructFieldsExtractor("table", true, Map.empty).get(struct) val map = record map("firstName") shouldBe "Alex" map("lastName") shouldBe "Smith" map("age") shouldBe 30 } "return all fields and apply the mapping" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("lastName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema) .put("firstName", "Alex") .put("lastName", "Smith") .put("age", 30) val map = StructFieldsExtractor("table", includeAllFields = true, Map("lastName" -> "Name", "age" -> "a")).get(struct) map("firstName") shouldBe "Alex" map("Name") shouldBe "Smith" map("a") shouldBe 30 } "return only the specified fields" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("lastName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema) .put("firstName", "Alex") .put("lastName", "Smith") .put("age", 30) val map = StructFieldsExtractor("table", includeAllFields = false, Map("lastName" -> "Name", "age" -> "age")).get(struct) map("Name") shouldBe "Smith" map("age") shouldBe 30 map.size shouldBe 2 } } }
Example 20
Source File: SourceRecordProducers.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.ftp.source import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.source.SourceRecord object SourceRecordProducers { type SourceRecordProducer = (ConnectFileMetaDataStore, String, FileMetaData, FileBody) => SourceRecord val fileInfoSchema = SchemaBuilder.struct() .field("name", Schema.STRING_SCHEMA) .field("offset", Schema.INT64_SCHEMA) .build() def stringKeyRecord(store: ConnectFileMetaDataStore, topic: String, meta: FileMetaData, body: FileBody): SourceRecord = new SourceRecord( store.fileMetasToConnectPartition(meta), // source part store.fileMetasToConnectOffset(meta), // source off topic, //topic Schema.STRING_SCHEMA, // key sch meta.attribs.path, // key Schema.BYTES_SCHEMA, // val sch body.bytes // val ) def structKeyRecord(store: ConnectFileMetaDataStore, topic: String, meta: FileMetaData, body: FileBody): SourceRecord = { new SourceRecord( store.fileMetasToConnectPartition(meta), // source part store.fileMetasToConnectOffset(meta), // source off topic, //topic fileInfoSchema, // key sch new Struct(fileInfoSchema) .put("name",meta.attribs.path) .put("offset",body.offset), Schema.BYTES_SCHEMA, // val sch body.bytes // val ) } }
Example 21
Source File: StringStructFieldsStringKeyBuilderTest.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.sink import com.datamountaineer.streamreactor.connect.rowkeys.StringStructFieldsStringKeyBuilder import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class StringStructFieldsStringKeyBuilderTest extends AnyWordSpec with Matchers { "StructFieldsStringKeyBuilder" should { "raise an exception if the field is not present in the struct" in { intercept[IllegalArgumentException] { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StringStructFieldsStringKeyBuilder(Seq("threshold")).build(sinkRecord) } } "create the row key based on one single field in the struct" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StringStructFieldsStringKeyBuilder(Seq("firstName")).build(sinkRecord) shouldBe "Alex" } "create the row key based on one single field with doc in the struct" in { val firstNameSchema = SchemaBuilder.`type`(Schema.Type.STRING).doc("first name") val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", firstNameSchema) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StringStructFieldsStringKeyBuilder(Seq("firstName")).build(sinkRecord) shouldBe "Alex" } "create the row key based on more thant one field in the struct" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StringStructFieldsStringKeyBuilder(Seq("firstName", "age")).build(sinkRecord) shouldBe "Alex.30" } } }
Example 22
Source File: TestUtilsBase.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect import java.util import java.util.Collections import org.apache.avro.generic.{GenericData, GenericRecord} import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.apache.kafka.connect.source.SourceTaskContext import org.apache.kafka.connect.storage.OffsetStorageReader import org.mockito.Mockito._ import org.mockito.MockitoSugar import org.scalatest.BeforeAndAfter import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import scala.collection.JavaConverters._ //set up partition val partition: util.Map[String, String] = Collections.singletonMap(lookupPartitionKey, table) //as a list to search for val partitionList: util.List[util.Map[String, String]] = List(partition).asJava //set up the offset val offset: util.Map[String, Object] = (Collections.singletonMap(offsetColumn,offsetValue )) //create offsets to initialize from val offsets :util.Map[util.Map[String, String],util.Map[String, Object]] = Map(partition -> offset).asJava //mock out reader and task context val taskContext = mock[SourceTaskContext] val reader = mock[OffsetStorageReader] when(reader.offsets(partitionList)).thenReturn(offsets) when(taskContext.offsetStorageReader()).thenReturn(reader) taskContext } }
Example 23
Source File: StructFieldExtractorTest.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.schemas import org.apache.kafka.connect.data.{Date, Schema, SchemaBuilder, Struct} import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class StructFieldExtractorTest extends AnyWordSpec with Matchers { "StructFieldExtractor" should { "return all the fields and their bytes value" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("lastName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema) .put("firstName", "Alex") .put("lastName", "Smith") .put("age", 30) val map = new StructFieldsExtractor(true, Map.empty).get(struct).toMap map.get("firstName").get shouldBe "Alex" map.get("lastName").get shouldBe "Smith" map.get("age").get shouldBe 30 } "return all fields and apply the mapping" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("lastName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema) .put("firstName", "Alex") .put("lastName", "Smith") .put("age", 30) val map = new StructFieldsExtractor(true, Map("lastName" -> "Name", "age" -> "a")).get(struct).toMap map.get("firstName").get shouldBe "Alex" map.get("Name").get shouldBe "Smith" map.get("a").get shouldBe 30 } "return only the specified fields" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("lastName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema) .put("firstName", "Alex") .put("lastName", "Smith") .put("age", 30) val map = new StructFieldsExtractor(false, Map("lastName" -> "Name", "age" -> "age")).get(struct).toMap map.get("Name").get shouldBe "Smith" map.get("age").get shouldBe 30 map.size shouldBe 2 } } "handle Date fieldds" in { val dateSchema = Date.builder().build() val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("lastName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("date", dateSchema).build() val date = java.sql.Date.valueOf("2017-04-25") val struct = new Struct(schema) .put("firstName", "Alex") .put("lastName", "Smith") .put("age", 30) .put("date", date) val map1 = new StructFieldsExtractor(false, Map("date" -> "date")).get(struct).toMap map1.get("date").get shouldBe date map1.size shouldBe 1 val d = Date.toLogical(dateSchema, 10000) struct.put("date", d) val map2 = new StructFieldsExtractor(false, Map("date" -> "date")).get(struct).toMap map2.get("date").get shouldBe d map2.size shouldBe 1 } }
Example 24
Source File: Output.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.cassandra.sink import java.util import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} case class Output(addr_tag_link: Option[String], addr_tag: Option[String], spent: Boolean, tx_index: Long, `type`: Int, addr: Option[String], value: Long, n: Int, script: String) { def toHashMap: util.HashMap[String, Any] = { val map = new util.HashMap[String, Any]() addr_tag_link.foreach(map.put("addr_tag_link", _)) addr_tag_link.foreach(map.put("addr_tag", _)) map.put("spent", spent) map.put("tx_index", tx_index) map.put("type", `type`) addr.foreach(map.put("addr", _)) map.put("value", value) map.put("n", n) map.put("script", script) map } } object Output { val ConnectSchema: Schema = SchemaBuilder.struct .name("datamountaineer.blockchain.output") .doc("The output instance part of a transaction.") .field("addr_tag_link", Schema.OPTIONAL_STRING_SCHEMA) .field("addr_tag", Schema.OPTIONAL_STRING_SCHEMA) .field("spent", Schema.BOOLEAN_SCHEMA) .field("tx_index", Schema.INT64_SCHEMA) .field("type", Schema.OPTIONAL_INT32_SCHEMA) .field("addr", Schema.OPTIONAL_STRING_SCHEMA) .field("value", Schema.INT64_SCHEMA) .field("n", Schema.INT32_SCHEMA) .field("script", Schema.STRING_SCHEMA) .build() implicit class OutputToStructConverter(val output: Output) extends AnyVal { def toStruct() = { val struct = new Struct(ConnectSchema) .put("spent", output.spent) .put("tx_index", output.tx_index) .put("type", output.`type`) .put("value", output.value) .put("n", output.n) .put("script", output.script) output.addr.foreach(struct.put("addr", _)) output.addr_tag.foreach(struct.put("addr_tag", _)) output.addr_tag_link.foreach(struct.put("addr_tag_link", _)) struct } } }
Example 25
Source File: SchemaSpec.scala From kafka-connect-cassandra with Apache License 2.0 | 5 votes |
package com.tuplejump.kafka.connect.cassandra import com.datastax.driver.core.{ DataType, TestUtil} import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord class SchemaSpec extends AbstractFlatSpec { it should "convert a struct schema with single field" in { val topic = "topicx" val sc = sinkConfig(topic, "keyspacex", "tablex", List("id")) sc.options.consistency should be (TaskConfig.DefaultSinkConsistency) sc.schema.columnNames should === (List("id")) sc.query.cql should be ("INSERT INTO keyspacex.tablex(id) VALUES(?)") val schema = SchemaBuilder.struct.name("record").version(1).field("id", Schema.INT32_SCHEMA).build val value = new Struct(schema).put("id", 1) val record = new SinkRecord(topic, 1, SchemaBuilder.struct.build, "key", schema, value, 0) sc.schema.route.topic should be (record.topic) sc.schema.route.keyspace should be ("keyspacex") sc.schema.route.table should be ("tablex") sc.schema is record should be (true) val query = record.as(sc.schema.namespace) query.cql should be("INSERT INTO keyspacex.tablex(id) VALUES(1)") } it should "convert a struct schema with multiple fields" in { val topic = "test_kfk" val sc = sinkConfig(topic, "keyspacex", "tablex", List("available", "name", "age")) val schema = SchemaBuilder.struct.name("record").version(1) .field("available", Schema.BOOLEAN_SCHEMA) .field("name", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA).build val value = new Struct(schema).put("name", "user").put("available", false).put("age", 15) val record = new SinkRecord("test_kfk", 1, SchemaBuilder.struct.build, "key", schema, value, 0) schema.asColumnNames should be (sc.schema.columnNames) sc.schema.route.topic should be (record.topic) sc.schema is record should be (true) sc.query.cql should be ("INSERT INTO keyspacex.tablex(available,name,age) VALUES(?,?,?)") val query = record.as(sc.schema.namespace) query.cql should be("INSERT INTO keyspacex.tablex(available,name,age) VALUES(false,'user',15)") } it should "convert cassandra column defs to a source schema" in { val colDef = Map( "id" -> DataType.cint(), "name" -> DataType.varchar()) val columns = TestUtil.getColumnDef(colDef) val expectedSchema = SchemaBuilder.struct() .field("id", Schema.INT32_SCHEMA) .field("name", Schema.STRING_SCHEMA).build() columns.asSchema should be(expectedSchema) } it should "convert kafka schema and struct to cassandra columns and schema mapping" in { import scala.collection.JavaConverters._ val topic = "a" val route = InternalConfig.Route(TaskConfig.SinkRoute + topic, "ks1.t1").get val schemaMap = new InternalConfig.Schema(route, Nil, Nil, Nil, List("available","name","age"), "") val schema = SchemaBuilder.struct.name("record").version(1) .field("available", Schema.BOOLEAN_SCHEMA) .field("name", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA).build val struct = new Struct(schema).put("name", "user").put("available", false).put("age", 15) val record = new SinkRecord(topic, 1, SchemaBuilder.struct.build, "key", schema, value, 0) schema.asColumnNames should ===(schemaMap.columnNames) schemaMap.columnNames should ===(schema.fields.asScala.map(_.name).toList) schemaMap is record should be (true) } }
Example 26
Source File: IotMessageConverter.scala From toketi-kafka-connect-iothub with MIT License | 5 votes |
// Copyright (c) Microsoft. All rights reserved. package com.microsoft.azure.iot.kafka.connect.source import java.time.Instant import java.util.Date import com.microsoft.azure.eventhubs.impl.AmqpConstants import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import scala.collection.JavaConverters._ import scala.reflect.ClassTag object IotMessageConverter { val offsetKey = "offset" private val schemaName = "iothub.kafka.connect" private val schemaVersion = 1 private val deviceIdKey = "deviceId" private val contentTypeKey = "contentType" private val sequenceNumberKey = "sequenceNumber" private val enqueuedTimeKey = "enqueuedTime" private val contentKey = "content" private val systemPropertiesKey = "systemProperties" private val propertiesKey = "properties" private val deviceIdIotHubKey = "iothub-connection-device-id" // Public for testing purposes lazy val schema: Schema = SchemaBuilder.struct() .name(schemaName) .version(schemaVersion) .field(deviceIdKey, Schema.STRING_SCHEMA) .field(offsetKey, Schema.STRING_SCHEMA) .field(contentTypeKey, Schema.OPTIONAL_STRING_SCHEMA) .field(enqueuedTimeKey, Schema.STRING_SCHEMA) .field(sequenceNumberKey, Schema.INT64_SCHEMA) .field(contentKey, Schema.STRING_SCHEMA) .field(systemPropertiesKey, propertiesMapSchema) .field(propertiesKey, propertiesMapSchema) private lazy val propertiesMapSchema: Schema = SchemaBuilder.map(Schema.STRING_SCHEMA, Schema.STRING_SCHEMA) def getIotMessageStruct(iotMessage: IotMessage): Struct = { val systemProperties = iotMessage.systemProperties val deviceId: String = getOrDefaultAndRemove(systemProperties, deviceIdIotHubKey, "") val offset: String = getOrDefaultAndRemove(systemProperties, AmqpConstants.OFFSET_ANNOTATION_NAME, "") val sequenceNumber: Long = getOrDefaultAndRemove(systemProperties, AmqpConstants.SEQUENCE_NUMBER_ANNOTATION_NAME, 0) val enqueuedTime: Option[Instant] = getEnqueuedTime(systemProperties) val enqueuedTimeStr = if(enqueuedTime.isDefined) enqueuedTime.get.toString else "" val properties = iotMessage.properties val contentType: String = getOrDefaultAndRemove(properties, contentTypeKey, "") val systemPropertiesMap = systemProperties.map(i => (i._1, i._2.toString)) new Struct(schema) .put(deviceIdKey, deviceId) .put(offsetKey, offset) .put(contentTypeKey, contentType) .put(enqueuedTimeKey, enqueuedTimeStr) .put(sequenceNumberKey, sequenceNumber) .put(contentKey, iotMessage.content) .put(systemPropertiesKey, systemPropertiesMap.asJava) .put(propertiesKey, properties.asJava) } private def getEnqueuedTime(map: scala.collection.mutable.Map[String, Object]): Option[Instant] = { val enqueuedTimeValue: Date = getOrDefaultAndRemove(map, AmqpConstants.ENQUEUED_TIME_UTC_ANNOTATION_NAME, null) if (enqueuedTimeValue != null) Some(enqueuedTimeValue.toInstant) else None } private def getOrDefaultAndRemove[T: ClassTag, S: ClassTag](map: scala.collection.mutable.Map[String, S], key: String, defaultVal: T): T = { if (map.contains(key)) { val retVal: T = map(key).asInstanceOf[T] map.remove(key) retVal } else { defaultVal } } }
Example 27
Source File: ConnectMongoConverterSpec.scala From kafka-connect-mongodb with Apache License 2.0 | 5 votes |
package com.startapp.data import java.lang.Boolean import java.util import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.scalatest.{FlatSpec, Matchers} class ConnectMongoConverterSpec extends FlatSpec with Matchers{ private val FIELD1_NAME = "fieldInt" private val FIELD1_VALUE = new Integer(5) private val FIELD2_NAME = "fieldString" private val FIELD2_VALUE = "str" private val FIELD3_NAME = "fieldBoolean" private val FIELD3_VALUE = new Boolean(true) val schema = SchemaBuilder.struct().name("test schema") .field(FIELD1_NAME, Schema.INT32_SCHEMA) .field(FIELD2_NAME, Schema.STRING_SCHEMA) .field(FIELD3_NAME, Schema.BOOLEAN_SCHEMA) .build() "No Schema Connect Mongo Converter Bad Data" should "throw an exception" in { var exceptionThrown = false val badData = new Struct(schema) try{ checkJsonMap(NoSchemaConnectMongoConverter, badData) } catch { case _ : java.lang.ClassCastException => exceptionThrown = true } exceptionThrown should be(true) } "No Schema Connect Mongo Converter Good Data" should "return the same map" in { val jsonMap = new util.HashMap[String, Object]() jsonMap.put(FIELD1_NAME, FIELD1_VALUE) jsonMap.put(FIELD2_NAME, FIELD2_VALUE) jsonMap.put(FIELD3_NAME, FIELD3_VALUE) checkJsonMap(NoSchemaConnectMongoConverter, jsonMap) } "Schema Connect Mongo Converter Bad Data" should "throw an exception" in { var exceptionThrown = false val badData = new util.HashMap[String, Object]() badData.put(FIELD1_NAME, FIELD1_VALUE) try { checkJsonMap(SchemaConnectMongoConverter, badData) } catch { case _ : java.lang.ClassCastException => exceptionThrown = true } exceptionThrown should be(true) } "Schema Connect Mongo Converter Good Data" should "convert data to json map" in { val data = new Struct(schema) .put(FIELD1_NAME, FIELD1_VALUE) .put(FIELD2_NAME, FIELD2_VALUE) .put(FIELD3_NAME, FIELD3_VALUE) checkJsonMap(SchemaConnectMongoConverter, data) } private def checkJsonMap(converter : ConnectMongoConverter, value: Object): Unit ={ val newJsonMap = converter.toJsonMap(value).toMap newJsonMap(FIELD1_NAME) should be(FIELD1_VALUE) newJsonMap(FIELD2_NAME) should be(FIELD2_VALUE) newJsonMap(FIELD3_NAME) should be(FIELD3_VALUE) } }
Example 28
Source File: Input.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.blockchain.data import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} case class Input(sequence: Long, prev_out: Option[Output], script: String) object Input { val ConnectSchema: Schema = SchemaBuilder.struct .name("datamountaineer.blockchain.input") .doc("The input instance part of a transaction.") .field("sequence", Schema.INT64_SCHEMA) .field("prev_out", Output.ConnectSchema) .field("script", Schema.STRING_SCHEMA) .build() implicit class InputToStructConverter(val input: Input) extends AnyVal { def toStruct(): Struct = { val struct = new Struct(ConnectSchema) .put("sequence", input.sequence) .put("script", input.script) input.prev_out.foreach(po=>struct.put("prev_out", po.toStruct())) struct } } }
Example 29
Source File: Output.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.blockchain.data import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} case class Output(addr_tag_link: Option[String], addr_tag: Option[String], spent: Boolean, tx_index: Long, `type`: Int, addr: Option[String], value: Long, n: Int, script: String) object Output { val ConnectSchema: Schema = SchemaBuilder.struct .name("datamountaineer.blockchain.output") .doc("The output instance part of a transaction.") .field("addr_tag_link", Schema.OPTIONAL_STRING_SCHEMA) .field("addr_tag", Schema.OPTIONAL_STRING_SCHEMA) .field("spent", Schema.BOOLEAN_SCHEMA) .field("tx_index", Schema.INT64_SCHEMA) .field("type", Schema.INT32_SCHEMA) .field("addr", Schema.OPTIONAL_STRING_SCHEMA) .field("value", Schema.INT64_SCHEMA) .field("n", Schema.INT32_SCHEMA) .field("script", Schema.STRING_SCHEMA) .build() implicit class OutputToStructConverter(val output: Output) extends AnyVal { def toStruct(): Struct = { val struct = new Struct(ConnectSchema) .put("spent", output.spent) .put("tx_index", output.tx_index) .put("type", output.`type`) .put("value", output.value) .put("n", output.n) .put("script", output.script) output.addr.foreach(struct.put("addr", _)) output.addr_tag.foreach(struct.put("addr_tag", _)) output.addr_tag_link.foreach(struct.put("addr_tag_link", _)) struct } } }
Example 30
Source File: Transaction.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.blockchain.data import java.util import com.datamountaineer.streamreactor.connect.blockchain.data.Input._ import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.source.SourceRecord case class Transaction(lock_time: Long, ver: Int, size: Long, inputs: Seq[Input], rbf: Option[Boolean], time: Long, tx_index: Long, vin_sz: Int, hash: String, vout_sz: Int, relayed_by: String, out: Seq[Output]) object Transaction { val ConnectSchema: Schema = SchemaBuilder.struct .name("datamountaineer.blockchain.transaction") .field("lock_time", Schema.INT64_SCHEMA) .field("ver", Schema.INT32_SCHEMA) .field("size", Schema.INT64_SCHEMA) .field("inputs", SchemaBuilder.array(Input.ConnectSchema).optional().build()) .field("rbf", Schema.OPTIONAL_BOOLEAN_SCHEMA) .field("time", Schema.INT64_SCHEMA) .field("tx_index", Schema.INT64_SCHEMA) .field("vin_sz", Schema.INT32_SCHEMA) .field("hash", Schema.STRING_SCHEMA) .field("vout_sz", Schema.INT32_SCHEMA) .field("relayed_by", Schema.STRING_SCHEMA) .field("out", SchemaBuilder.array(Output.ConnectSchema).optional().build()) .build() implicit class TransactionToSourceRecordConverter(val tx: Transaction) extends AnyVal { def toSourceRecord(topic: String, partition: Int, key: Option[String]): SourceRecord = { new SourceRecord( null, null, topic, partition, key.map(_ => Schema.STRING_SCHEMA).orNull, key.orNull, ConnectSchema, tx.toStruct() ) } //private def getOffset() = Collections.singletonMap("position", System.currentTimeMillis()) def toStruct(): Struct = { val struct = new Struct(ConnectSchema) .put("lock_time", tx.lock_time) .put("ver", tx.ver) .put("size", tx.size) .put("time", tx.time) .put("tx_index", tx.tx_index) .put("vin_sz", tx.vin_sz) .put("hash", tx.hash) .put("vout_sz", tx.vout_sz) .put("relayed_by", tx.relayed_by) tx.out.headOption.foreach { _ => import scala.collection.JavaConverters._ struct.put("out", tx.out.map(_.toStruct()).asJava) } tx.rbf.foreach(struct.put("rbf", _)) tx.inputs.headOption.foreach { _ => val inputs = new util.ArrayList[Struct] tx.inputs.foreach(i => inputs.add(i.toStruct())) struct.put("inputs", inputs) } tx.out.headOption.foreach { _ => val outputs = new util.ArrayList[Struct] tx.out.foreach(output => outputs.add(output.toStruct())) } struct } } }
Example 31
Source File: Input.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.azure.documentdb.sink import java.util import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} case class Input(sequence: Long, prev_out: Option[Output], script: String) { def toHashMap: util.HashMap[String, Any] = { val map = new util.HashMap[String, Any]() map.put("sequence", sequence) prev_out.foreach(p => map.put("prev_out", p.toHashMap)) map.put("script", script) map } } object Input { val ConnectSchema = SchemaBuilder.struct .name("input") .doc("The input instance part of a transaction.") .field("sequence", Schema.INT64_SCHEMA) .field("prev_out", Output.ConnectSchema) .field("script", Schema.STRING_SCHEMA) .build() implicit class InputToStructConverter(val input: Input) extends AnyVal { def toStruct() = { val struct = new Struct(ConnectSchema) .put("sequence", input.sequence) .put("script", input.script) input.prev_out.foreach(po => struct.put("prev_out", po.toStruct())) struct } } }
Example 32
Source File: Output.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.azure.documentdb.sink import java.util import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} case class Output(addr_tag_link: Option[String], addr_tag: Option[String], spent: Boolean, tx_index: Long, `type`: Int, addr: Option[String], value: Long, n: Int, script: String) { def toHashMap: util.HashMap[String, Any] = { val map = new util.HashMap[String, Any]() addr_tag_link.foreach(map.put("addr_tag_link", _)) addr_tag_link.foreach(map.put("addr_tag", _)) map.put("spent", spent) map.put("tx_index", tx_index) map.put("type", `type`) addr.foreach(map.put("addr", _)) map.put("value", value) map.put("n", n) map.put("script", script) map } } object Output { val ConnectSchema: Schema = SchemaBuilder.struct .name("output") .doc("The output instance part of a transaction.") .field("addr_tag_link", Schema.OPTIONAL_STRING_SCHEMA) .field("addr_tag", Schema.OPTIONAL_STRING_SCHEMA) .field("spent", Schema.BOOLEAN_SCHEMA) .field("tx_index", Schema.INT64_SCHEMA) .field("type", Schema.OPTIONAL_INT32_SCHEMA) .field("addr", Schema.OPTIONAL_STRING_SCHEMA) .field("value", Schema.INT64_SCHEMA) .field("n", Schema.INT32_SCHEMA) .field("script", Schema.STRING_SCHEMA) .build() implicit class OutputToStructConverter(val output: Output) extends AnyVal { def toStruct() = { val struct = new Struct(ConnectSchema) .put("spent", output.spent) .put("tx_index", output.tx_index) .put("type", output.`type`) .put("value", output.value) .put("n", output.n) .put("script", output.script) output.addr.foreach(struct.put("addr", _)) output.addr_tag.foreach(struct.put("addr_tag", _)) output.addr_tag_link.foreach(struct.put("addr_tag_link", _)) struct } } }
Example 33
Source File: Input.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.mongodb import java.util import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} case class Input(sequence: Long, prev_out: Option[Output], script: String) { def toHashMap: util.HashMap[String, Any] = { val map = new util.HashMap[String, Any]() map.put("sequence", sequence) prev_out.foreach(p => map.put("prev_out", p.toHashMap)) map.put("script", script) map } } object Input { val ConnectSchema: Schema = SchemaBuilder.struct .name("datamountaineer.blockchain.input") .doc("The input instance part of a transaction.") .field("sequence", Schema.INT64_SCHEMA) .field("prev_out", Output.ConnectSchema) .field("script", Schema.STRING_SCHEMA) .build() implicit class InputToStructConverter(val input: Input) extends AnyVal { def toStruct(): Struct = { val struct = new Struct(ConnectSchema) .put("sequence", input.sequence) .put("script", input.script) input.prev_out.foreach(po => struct.put("prev_out", po.toStruct())) struct } } }
Example 34
Source File: Output.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.mongodb import java.util import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} case class Output(addr_tag_link: Option[String], addr_tag: Option[String], spent: Boolean, tx_index: Long, `type`: Int, addr: Option[String], value: Long, n: Int, script: String) { def toHashMap: util.HashMap[String, Any] = { val map = new util.HashMap[String, Any]() addr_tag_link.foreach(map.put("addr_tag_link", _)) addr_tag_link.foreach(map.put("addr_tag", _)) map.put("spent", spent) map.put("tx_index", tx_index) map.put("type", `type`) addr.foreach(map.put("addr", _)) map.put("value", value) map.put("n", n) map.put("script", script) map } } object Output { val ConnectSchema: Schema = SchemaBuilder.struct .name("datamountaineer.blockchain.output") .doc("The output instance part of a transaction.") .field("addr_tag_link", Schema.OPTIONAL_STRING_SCHEMA) .field("addr_tag", Schema.OPTIONAL_STRING_SCHEMA) .field("spent", Schema.BOOLEAN_SCHEMA) .field("tx_index", Schema.INT64_SCHEMA) .field("type", Schema.OPTIONAL_INT32_SCHEMA) .field("addr", Schema.OPTIONAL_STRING_SCHEMA) .field("value", Schema.INT64_SCHEMA) .field("n", Schema.INT32_SCHEMA) .field("script", Schema.STRING_SCHEMA) .build() implicit class OutputToStructConverter(val output: Output) extends AnyVal { def toStruct() = { val struct = new Struct(ConnectSchema) .put("spent", output.spent) .put("tx_index", output.tx_index) .put("type", output.`type`) .put("value", output.value) .put("n", output.n) .put("script", output.script) output.addr.foreach(struct.put("addr", _)) output.addr_tag.foreach(struct.put("addr_tag", _)) output.addr_tag_link.foreach(struct.put("addr_tag_link", _)) struct } } }
Example 35
Source File: Input.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.cassandra.sink import java.util import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} case class Input(sequence: Long, prev_out: Option[Output], script: String) { def toHashMap: util.HashMap[String, Any] = { val map = new util.HashMap[String, Any]() map.put("sequence", sequence) prev_out.foreach(p => map.put("prev_out", p.toHashMap)) map.put("script", script) map } } object Input { val ConnectSchema = SchemaBuilder.struct .name("datamountaineer.blockchain.input") .doc("The input instance part of a transaction.") .field("sequence", Schema.INT64_SCHEMA) .field("prev_out", Output.ConnectSchema) .field("script", Schema.STRING_SCHEMA) .build() implicit class InputToStructConverter(val input: Input) extends AnyVal { def toStruct() = { val struct = new Struct(ConnectSchema) .put("sequence", input.sequence) .put("script", input.script) input.prev_out.foreach(po => struct.put("prev_out", po.toStruct())) struct } } }
Example 36
Source File: CassandraSinkTaskSpec.scala From kafka-connect-cassandra with Apache License 2.0 | 5 votes |
package com.tuplejump.kafka.connect.cassandra import scala.collection.JavaConverters._ import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.{SinkRecord, SinkTaskContext} class CassandraSinkTaskSpec extends AbstractFlatSpec { val topicName = "test_kv_topic" val tableName = "test.kv" val config = sinkProperties(Map(topicName -> tableName)) it should "start sink task" in { val sinkTask = new CassandraSinkTask() val mockContext = mock[SinkTaskContext] sinkTask.initialize(mockContext) sinkTask.start(config.asJava) sinkTask.stop() } it should "save records in cassandra" in { val sinkTask = new CassandraSinkTask() val mockContext = mock[SinkTaskContext] sinkTask.initialize(mockContext) sinkTask.start(config.asJava) val valueSchema = SchemaBuilder.struct.name("record").version(1) .field("key", Schema.STRING_SCHEMA) .field("value", Schema.INT32_SCHEMA).build val value1 = new Struct(valueSchema).put("key", "pqr").put("value", 15) val value2 = new Struct(valueSchema).put("key", "abc").put("value", 17) val record1 = new SinkRecord(topicName, 1, SchemaBuilder.struct.build, "key", valueSchema, value1, 0) val record2 = new SinkRecord(topicName, 1, SchemaBuilder.struct.build, "key", valueSchema, value2, 0) sinkTask.put(List(record1, record2).asJavaCollection) sinkTask.stop() val cc = CassandraCluster.local val session = cc.session val result = session.execute(s"select count(1) from $tableName").one() val rowCount = result.getLong(0) rowCount should be(2) cc.shutdown() } }
Example 37
Source File: StructFieldsRowKeyBuilderTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._ import org.apache.hadoop.hbase.util.Bytes import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class StructFieldsRowKeyBuilderTest extends AnyWordSpec with Matchers { "StructFieldsRowKeyBuilder" should { "raise an exception if the field is not present in the struct" in { intercept[IllegalArgumentException] { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) //val field = Field("threshold", "threshold", false) StructFieldsRowKeyBuilderBytes(List("threshold")).build(sinkRecord, null) } } "create the row key based on one single field in the struct" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) //val field = Field("firstName", "firstName", true) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StructFieldsRowKeyBuilderBytes(List("firstName")).build(sinkRecord, null) shouldBe "Alex".fromString } "create the row key based on more thant one field in the struct" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) //val field = Field("firstName", "firstName", true) //val field2 = Field("age", "age", true) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StructFieldsRowKeyBuilderBytes(List("firstName", "age")).build(sinkRecord, null) shouldBe Bytes.add("Alex".fromString(), "\n".fromString(), 30.fromInt()) } } }
Example 38
Source File: ValueConverter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import scala.collection.JavaConverters._ object ValueConverter { def apply(record: SinkRecord): Struct = record.value match { case struct: Struct => StructValueConverter.convert(struct) case map: Map[_, _] => MapValueConverter.convert(map) case map: java.util.Map[_, _] => MapValueConverter.convert(map.asScala.toMap) case string: String => StringValueConverter.convert(string) case other => sys.error(s"Unsupported record $other:${other.getClass.getCanonicalName}") } } trait ValueConverter[T] { def convert(value: T): Struct } object StructValueConverter extends ValueConverter[Struct] { override def convert(struct: Struct): Struct = struct } object MapValueConverter extends ValueConverter[Map[_, _]] { def convertValue(value: Any, key: String, builder: SchemaBuilder): Any = { value match { case s: String => builder.field(key, Schema.OPTIONAL_STRING_SCHEMA) s case l: Long => builder.field(key, Schema.OPTIONAL_INT64_SCHEMA) l case i: Int => builder.field(key, Schema.OPTIONAL_INT64_SCHEMA) i.toLong case b: Boolean => builder.field(key, Schema.OPTIONAL_BOOLEAN_SCHEMA) b case f: Float => builder.field(key, Schema.OPTIONAL_FLOAT64_SCHEMA) f.toDouble case d: Double => builder.field(key, Schema.OPTIONAL_FLOAT64_SCHEMA) d case innerMap: java.util.Map[_, _] => val innerStruct = convert(innerMap.asScala.toMap, true) builder.field(key, innerStruct.schema()) innerStruct case innerMap: Map[_, _] => val innerStruct = convert(innerMap, true) builder.field(key, innerStruct.schema()) innerStruct } } def convert(map: Map[_, _], optional: Boolean) = { val builder = SchemaBuilder.struct() val values = map.map { case (k, v) => val key = k.toString val value = convertValue(v, key, builder) key -> value }.toList if (optional) builder.optional() val schema = builder.build val struct = new Struct(schema) values.foreach { case (key, value) => struct.put(key.toString, value) } struct } override def convert(map: Map[_, _]): Struct = convert(map, false) } object StringValueConverter extends ValueConverter[String] { override def convert(string: String): Struct = { val schema = SchemaBuilder.struct().field("a", Schema.OPTIONAL_STRING_SCHEMA).name("struct").build() new Struct(schema).put("a", string) } }
Example 39
Source File: DropPartitionValuesMapper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.mapper import com.landoop.streamreactor.connect.hive.{PartitionPlan, StructMapper} import org.apache.kafka.connect.data.{SchemaBuilder, Struct} class DropPartitionValuesMapper(plan: PartitionPlan) extends StructMapper { import scala.collection.JavaConverters._ override def map(input: Struct): Struct = { val partitionKeys = plan.keys.map(_.value).toList val dataFields = input.schema.fields().asScala.filterNot(field => partitionKeys.contains(field.name)) val builder = dataFields.foldLeft(SchemaBuilder.struct) { (builder, field) => builder.field(field.name, field.schema) } val schema = builder.build() dataFields.foldLeft(new Struct(schema)) { (struct, field) => struct.put(field.name, input.get(field.name)) } } }
Example 40
Source File: ProjectionMapper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.mapper import cats.data.NonEmptyList import com.datamountaineer.kcql.Field import com.landoop.streamreactor.connect.hive.StructMapper import org.apache.kafka.connect.data.{SchemaBuilder, Struct} class ProjectionMapper(projection: NonEmptyList[Field]) extends StructMapper { override def map(input: Struct): Struct = { // the compatible output schema built from projected fields with aliases applied val builder = projection.foldLeft(SchemaBuilder.struct) { (builder, kcqlField) => Option(input.schema.field(kcqlField.getName)).fold(sys.error(s"Missing field $kcqlField")) { field => builder.field(kcqlField.getAlias, field.schema) } } val schema = builder.build() projection.foldLeft(new Struct(schema)) { (struct, field) => struct.put(field.getAlias, input.get(field.getName)) } } }
Example 41
Source File: PartitionValueMapper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.source.mapper import com.landoop.streamreactor.connect.hive.{Partition, StructMapper} import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import scala.collection.JavaConverters._ class PartitionValueMapper(partition: Partition) extends StructMapper { override def map(input: Struct): Struct = { val builder = SchemaBuilder.struct() input.schema.fields.asScala.foreach { field => builder.field(field.name, field.schema) } partition.entries.toList.foreach { entry => builder.field(entry._1.value, Schema.STRING_SCHEMA) } val schema = builder.build() val struct = new Struct(schema) input.schema.fields.asScala.foreach { field => struct.put(field.name, input.get(field.name)) } partition.entries.toList.foreach { entry => struct.put(entry._1.value, entry._2) } struct } }
Example 42
Source File: ProjectionMapper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.source.mapper import cats.data.NonEmptyList import com.landoop.streamreactor.connect.hive.StructMapper import com.landoop.streamreactor.connect.hive.source.config.ProjectionField import org.apache.kafka.connect.data.{SchemaBuilder, Struct} class ProjectionMapper(projection: NonEmptyList[ProjectionField]) extends StructMapper { override def map(input: Struct): Struct = { val builder = projection.foldLeft(SchemaBuilder.struct) { (builder, projectionField) => Option(input.schema.field(projectionField.name)) .fold(sys.error(s"Projection field ${projectionField.name} cannot be found in input")) { field => builder.field(projectionField.alias, field.schema) } } val schema = builder.build() projection.foldLeft(new Struct(schema)) { (struct, field) => struct.put(field.alias, input.get(field.name)) } } }
Example 43
Source File: DefaultCommitPolicyTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.staging import com.landoop.streamreactor.connect.hive.{Offset, Topic, TopicPartitionOffset} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, LocalFileSystem, Path} import org.apache.kafka.connect.data.{Schema, SchemaBuilder, Struct} import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import scala.concurrent.duration._ class DefaultCommitPolicyTest extends AnyWordSpec with Matchers { val schema: Schema = SchemaBuilder.struct() .field("name", SchemaBuilder.string().required().build()) .build() val struct = new Struct(schema) implicit val conf: Configuration = new Configuration() implicit val fs: LocalFileSystem = FileSystem.getLocal(conf) val tpo = TopicPartitionOffset(Topic("mytopic"), 1, Offset(100)) private def shouldFlush(policy: CommitPolicy, path: Path, count: Long) = { val status = fs.getFileStatus(path) policy.shouldFlush(CommitContext(tpo, path, count, status.getLen, status.getModificationTime)) } "DefaultCommitPolicy" should { "roll over after interval" in { val policy = DefaultCommitPolicy(None, Option(2.seconds), None) val path = new Path("foo") fs.create(path) shouldFlush(policy, path, 10) shouldBe false Thread.sleep(2000) shouldFlush(policy, path, 10) shouldBe true fs.delete(path, false) } "roll over after file count" in { val policy = DefaultCommitPolicy(None, None, Some(9)) val path = new Path("foo") fs.create(path) shouldFlush(policy, path, 7) shouldBe false shouldFlush(policy, path, 8) shouldBe false shouldFlush(policy, path, 9) shouldBe true shouldFlush(policy, path, 10) shouldBe true fs.delete(path, false) } "roll over after file size" in { val policy = DefaultCommitPolicy(Some(10), None, None) val path = new Path("foo") val out = fs.create(path) shouldFlush(policy, path, 7) shouldBe false out.writeBytes("wibble wobble wabble wubble") out.close() shouldFlush(policy, path, 9) shouldBe true fs.delete(path, false) } } }
Example 44
Source File: DropPartitionValuesMapperTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.mapper import cats.data.NonEmptyList import com.landoop.streamreactor.connect.hive.{PartitionKey, PartitionPlan, TableName} import org.apache.kafka.connect.data.{SchemaBuilder, Struct} import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import scala.collection.JavaConverters._ class DropPartitionValuesMapperTest extends AnyFunSuite with Matchers { test("strip partition values") { val schema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("p", SchemaBuilder.string().required().build()) .field("q", SchemaBuilder.string().required().build()) .field("z", SchemaBuilder.string().required().build()) .build() val plan = PartitionPlan(TableName("foo"), NonEmptyList.of(PartitionKey("p"), PartitionKey("q"))) val struct = new Struct(schema).put("a", "a").put("p", "p").put("q", "q").put("z", "z") val output = new DropPartitionValuesMapper(plan).map(struct) output.schema().fields().asScala.map(_.name) shouldBe Seq("a", "z") } test("handle partition field is missing in input") { val schema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("q", SchemaBuilder.string().required().build()) .field("z", SchemaBuilder.string().required().build()) .build() val plan = PartitionPlan(TableName("foo"), NonEmptyList.of(PartitionKey("p"), PartitionKey("q"))) val struct = new Struct(schema).put("a", "a").put("q", "q").put("z", "z") val output = new DropPartitionValuesMapper(plan).map(struct) output.schema().fields().asScala.map(_.name) shouldBe Seq("a", "z") } }
Example 45
Source File: MetastoreSchemaAlignMapperTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.mapper import org.apache.kafka.connect.data.{SchemaBuilder, Struct} import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import scala.collection.JavaConverters._ class MetastoreSchemaAlignMapperTest extends AnyFunSuite with Matchers { test("pad optional missing fields with null") { val recordSchema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("b", SchemaBuilder.string().required().build()) .field("c", SchemaBuilder.string().required().build()) .build() val struct = new Struct(recordSchema).put("a", "a").put("b", "b").put("c", "c") val metastoreSchema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("b", SchemaBuilder.string().required().build()) .field("c", SchemaBuilder.string().required().build()) .field("z", SchemaBuilder.string().optional().build()) .build() val output = new MetastoreSchemaAlignMapper(metastoreSchema).map(struct) output.schema().fields().asScala.map(_.name) shouldBe Seq("a", "b", "c", "z") } test("drop fields not specified in metastore") { val recordSchema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("b", SchemaBuilder.string().required().build()) .field("c", SchemaBuilder.string().required().build()) .build() val struct = new Struct(recordSchema).put("a", "a").put("b", "b").put("c", "c") val metastoreSchema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("b", SchemaBuilder.string().required().build()) .build() val output = new MetastoreSchemaAlignMapper(metastoreSchema).map(struct) output.schema().fields().asScala.map(_.name) shouldBe Seq("a", "b") } }
Example 46
Source File: ParquetWriterTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.parquet import com.landoop.streamreactor.connect.hive.StructUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.kafka.connect.data.{SchemaBuilder, Struct} import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class ParquetWriterTest extends AnyWordSpec with Matchers { implicit val conf = new Configuration() implicit val fs = FileSystem.getLocal(conf) "ParquetWriter" should { "write parquet files" in { val schema = SchemaBuilder.struct() .field("name", SchemaBuilder.string().required().build()) .field("title", SchemaBuilder.string().optional().build()) .field("salary", SchemaBuilder.float64().optional().build()) .build() val users = List( new Struct(schema).put("name", "sam").put("title", "mr").put("salary", 100.43), new Struct(schema).put("name", "laura").put("title", "ms").put("salary", 429.06) ) val path = new Path("sinktest.parquet") val writer = parquetWriter(path, schema, ParquetSinkConfig(overwrite = true)) users.foreach(writer.write) writer.close() val reader = parquetReader(path) val actual = Iterator.continually(reader.read).takeWhile(_ != null).toList reader.close() actual.map(StructUtils.extractValues) shouldBe users.map(StructUtils.extractValues) fs.delete(path, false) } "support writing nulls" in { val schema = SchemaBuilder.struct() .field("name", SchemaBuilder.string().required().build()) .field("title", SchemaBuilder.string().optional().build()) .field("salary", SchemaBuilder.float64().optional().build()) .build() val users = List( new Struct(schema).put("name", "sam").put("title", null).put("salary", 100.43), new Struct(schema).put("name", "laura").put("title", "ms").put("salary", 429.06) ) val path = new Path("sinktest.parquet") val writer = parquetWriter(path, schema, ParquetSinkConfig(overwrite = true)) users.foreach(writer.write) writer.close() val reader = parquetReader(path) val actual = Iterator.continually(reader.read).takeWhile(_ != null).toList reader.close() actual.map(StructUtils.extractValues) shouldBe users.map(StructUtils.extractValues) fs.delete(path, false) } } }