Example 1
Source File: ProjectionMapper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.source.mapper import import com.landoop.streamreactor.connect.hive.StructMapper import com.landoop.streamreactor.connect.hive.source.config.ProjectionField import{SchemaBuilder, Struct} class ProjectionMapper(projection: NonEmptyList[ProjectionField]) extends StructMapper { override def map(input: Struct): Struct = { val builder = projection.foldLeft(SchemaBuilder.struct) { (builder, projectionField) => Option(input.schema.field( .fold(sys.error(s"Projection field ${} cannot be found in input")) { field => builder.field(projectionField.alias, field.schema) } } val schema = projection.foldLeft(new Struct(schema)) { (struct, field) => struct.put(field.alias, input.get( } } }
Example 2
Source File: ReThinkSourceReadersFactory.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.rethink.source import java.util import java.util.concurrent.LinkedBlockingQueue import java.util.concurrent.atomic.AtomicBoolean import com.datamountaineer.streamreactor.connect.rethink.ReThinkConnection import com.datamountaineer.streamreactor.connect.rethink.config.{ReThinkSourceConfig, ReThinkSourceSetting, ReThinkSourceSettings} import com.rethinkdb.RethinkDB import{Connection, Cursor} import com.typesafe.scalalogging.StrictLogging import import org.apache.kafka.connect.source.SourceRecord import scala.collection.JavaConverters._ import import scala.concurrent.Future object ReThinkSourceReadersFactory { def apply(config: ReThinkSourceConfig, r: RethinkDB): Set[ReThinkSourceReader] = { val conn = Some(ReThinkConnection(r, config)) val settings = ReThinkSourceSettings(config) => new ReThinkSourceReader(r, conn.get, s)) } } class ReThinkSourceReader(rethink: RethinkDB, conn: Connection, setting: ReThinkSourceSetting) extends StrictLogging {"Initialising ReThink Reader for ${setting.source}") private val keySchema = SchemaBuilder.string().optional().build() private val valueSchema = ChangeFeedStructBuilder.schema private val sourcePartition = Map.empty[String, String] private val offset = Map.empty[String, String] private val stopFeed = new AtomicBoolean(false) private val handlingFeed = new AtomicBoolean(false) private var feed : Cursor[util.HashMap[String, String]] = _ val queue = new LinkedBlockingQueue[SourceRecord]() val batchSize = setting.batchSize def start() = { feed = getChangeFeed() startFeed(feed) } def stop() = {"Closing change feed for ${setting.source}") stopFeed.set(true) while (handlingFeed.get()) { logger.debug("Waiting for feed to shutdown...") Thread.sleep(1000) } feed.close()"Change feed closed for ${setting.source}") } private def handleFeed(feed: Cursor[util.HashMap[String, String]]) = { handlingFeed.set(true) // is blocking while(!stopFeed.get()) { logger.debug(s"Waiting for next change feed event for ${setting.source}") val cdc = convert( queue.put(cdc) } handlingFeed.set(false) } private def getChangeFeed(): Cursor[util.HashMap[String, String]] = {"Initialising change feed for ${setting.source}") rethink .db(setting.db) .table(setting.source) .changes() .optArg("include_states", true) .optArg("include_initial", setting.initialise) .optArg("include_types", true) .run(conn) } private def convert(feed: Map[String, String]) = { new SourceRecord(sourcePartition.asJava, offset.asJava,, keySchema, setting.source, valueSchema, ChangeFeedStructBuilder(feed)) } }
Example 3
Source File: ChangeFeedStructBuilder.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.rethink.source import com.fasterxml.jackson.databind.ObjectMapper import com.typesafe.scalalogging.StrictLogging import{Schema, SchemaBuilder, Struct} object ChangeFeedStructBuilder extends StrictLogging { val mapper = new ObjectMapper() val oldVal = "old_val" val newVal = "new_val" val state = "state" val `type` = "type" val schema: Schema ="ReThinkChangeFeed") .version(1) .field(state, Schema.OPTIONAL_STRING_SCHEMA) .field(oldVal, Schema.OPTIONAL_STRING_SCHEMA) .field(newVal, Schema.OPTIONAL_STRING_SCHEMA) .field(`type`, Schema.OPTIONAL_STRING_SCHEMA) .build def apply(hm: Map[String, Object]): Struct = { val struct = new Struct(schema) hm.foreach({ case (k, v) => if (v != null) struct.put(k, v.toString) }) struct } }
Example 4
Source File: PulsarWriterTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.pulsar.sink import com.datamountaineer.streamreactor.connect.pulsar.ProducerConfigFactory import com.datamountaineer.streamreactor.connect.pulsar.config.{PulsarConfigConstants, PulsarSinkConfig, PulsarSinkSettings} import{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.apache.pulsar.client.api.{Message, MessageId, Producer, PulsarClient} import org.mockito.ArgumentMatchers.any import org.mockito.MockitoSugar import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import scala.collection.JavaConverters._ class PulsarWriterTest extends AnyWordSpec with MockitoSugar with Matchers { val pulsarTopic = "persistent://landoop/standalone/connect/kafka-topic" def getSchema: Schema = { SchemaBuilder.struct .field("int8", SchemaBuilder.int8().defaultValue(2.toByte).doc("int8 field").build()) .field("int16", Schema.INT16_SCHEMA) .field("int32", Schema.INT32_SCHEMA) .field("int64", Schema.INT64_SCHEMA) .field("float32", Schema.FLOAT32_SCHEMA) .field("float64", Schema.FLOAT64_SCHEMA) .field("boolean", Schema.BOOLEAN_SCHEMA) .field("string", Schema.STRING_SCHEMA) .build() } def getStruct(schema: Schema): Struct = { new Struct(schema) .put("int8", 12.toByte) .put("int16", 12.toShort) .put("int32", 12) .put("int64", 12L) .put("float32", 12.2f) .put("float64", 12.2) .put("boolean", true) .put("string", "foo") } "should write messages" in { val config = PulsarSinkConfig(Map( PulsarConfigConstants.HOSTS_CONFIG -> "pulsar://localhost:6650", PulsarConfigConstants.KCQL_CONFIG -> s"INSERT INTO $pulsarTopic SELECT * FROM kafka_topic BATCH = 10 WITHPARTITIONER = SinglePartition WITHCOMPRESSION = ZLIB WITHDELAY = 1000" ).asJava) val schema = getSchema val struct = getStruct(schema) val record1 = new SinkRecord("kafka_topic", 0, null, null, schema, struct, 1) val settings = PulsarSinkSettings(config) val producerConfig = ProducerConfigFactory("test", settings.kcql) val client = mock[PulsarClient] val producer = mock[Producer] val messageId = mock[MessageId] when(client.createProducer(pulsarTopic, producerConfig(pulsarTopic))).thenReturn(producer) when(producer.send(any[Message])).thenReturn(messageId) val writer = PulsarWriter(client, "test", settings) writer.write(List(record1)) } }
Example 5
Source File: RedisInsertSortedSetTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.redis.sink.writer import com.datamountaineer.streamreactor.connect.redis.sink.config.{RedisConfig, RedisConfigConstants, RedisConnectionInfo, RedisSinkSettings} import{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.mockito.MockitoSugar import org.scalatest.BeforeAndAfterAll import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import redis.clients.jedis.Jedis import redis.embedded.RedisServer import scala.collection.JavaConverters._ class RedisInsertSortedSetTest extends AnyWordSpec with Matchers with BeforeAndAfterAll with MockitoSugar { val redisServer = new RedisServer(6379) override def beforeAll() = redisServer.start() override def afterAll() = redisServer.stop() "Redis INSERT into Sorted Set (SS) writer" should { "write Kafka records to a Redis Sorted Set" in { val TOPIC = "cpuTopic" val KCQL = s"INSERT INTO cpu_stats SELECT * from $TOPIC STOREAS SortedSet(score=ts)" println("Testing KCQL : " + KCQL) val props = Map( RedisConfigConstants.REDIS_HOST->"localhost", RedisConfigConstants.REDIS_PORT->"6379", RedisConfigConstants.KCQL_CONFIG->KCQL ).asJava val config = RedisConfig(props) val connectionInfo = new RedisConnectionInfo("localhost", 6379, None) val settings = RedisSinkSettings(config) val writer = new RedisInsertSortedSet(settings) writer.createClient(settings) val schema = SchemaBuilder.struct().name("com.example.Cpu") .field("type", Schema.STRING_SCHEMA) .field("temperature", Schema.FLOAT64_SCHEMA) .field("voltage", Schema.FLOAT64_SCHEMA) .field("ts", Schema.INT64_SCHEMA).build() val struct1 = new Struct(schema).put("type", "Xeon").put("temperature", 60.4).put("voltage", 90.1).put("ts", 1482180657010L) val struct2 = new Struct(schema).put("type", "i7").put("temperature", 62.1).put("voltage", 103.3).put("ts", 1482180657020L) val struct3 = new Struct(schema).put("type", "i7-i").put("temperature", 64.5).put("voltage", 101.1).put("ts", 1482180657030L) val sinkRecord1 = new SinkRecord(TOPIC, 0, null, null, schema, struct1, 1) val sinkRecord2 = new SinkRecord(TOPIC, 0, null, null, schema, struct2, 2) val sinkRecord3 = new SinkRecord(TOPIC, 0, null, null, schema, struct3, 3) val jedis = new Jedis(, connectionInfo.port) // Clean up in-memory jedis jedis.flushAll() writer.write(Seq(sinkRecord1)) writer.write(Seq(sinkRecord2, sinkRecord3)) // Redis cardinality should now be 3 jedis.zcard("cpu_stats") shouldBe 3 val allSSrecords = jedis.zrange("cpu_stats", 0, 999999999999L) val results = allSSrecords.asScala.toList results.head shouldBe """{"type":"Xeon","temperature":60.4,"voltage":90.1,"ts":1482180657010}""" results(1) shouldBe """{"type":"i7","temperature":62.1,"voltage":103.3,"ts":1482180657020}""" results(2) shouldBe """{"type":"i7-i","temperature":64.5,"voltage":101.1,"ts":1482180657030}""" } } }
Example 6
Source File: RedisPubSubTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.redis.sink.writer import com.datamountaineer.streamreactor.connect.redis.sink.config.{RedisConfig, RedisConfigConstants, RedisConnectionInfo, RedisSinkSettings} import{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.mockito.MockitoSugar import org.scalatest.BeforeAndAfterAll import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import redis.clients.jedis.{Jedis, JedisPubSub} import redis.embedded.RedisServer import scala.collection.JavaConverters._ import scala.collection.mutable.ListBuffer class RedisPubSubTest extends AnyWordSpec with Matchers with BeforeAndAfterAll with MockitoSugar { val redisServer = new RedisServer(6379) override def beforeAll() = redisServer.start() override def afterAll() = redisServer.stop() "Redis PUBSUB writer" should { "write Kafka records to a Redis PubSub" in { val TOPIC = "cpuTopic" val KCQL = s"SELECT * from $TOPIC STOREAS PubSub (channel=type)" println("Testing KCQL : " + KCQL) val props = Map( RedisConfigConstants.REDIS_HOST->"localhost", RedisConfigConstants.REDIS_PORT->"6379", RedisConfigConstants.KCQL_CONFIG->KCQL ).asJava val config = RedisConfig(props) val connectionInfo = new RedisConnectionInfo("localhost", 6379, None) val settings = RedisSinkSettings(config) val writer = new RedisPubSub(settings) writer.createClient(settings) val schema = SchemaBuilder.struct().name("com.example.Cpu") .field("type", Schema.STRING_SCHEMA) .field("temperature", Schema.FLOAT64_SCHEMA) .field("voltage", Schema.FLOAT64_SCHEMA) .field("ts", Schema.INT64_SCHEMA).build() val struct1 = new Struct(schema).put("type", "Xeon").put("temperature", 60.4).put("voltage", 90.1).put("ts", 1482180657010L) val struct2 = new Struct(schema).put("type", "i7").put("temperature", 62.1).put("voltage", 103.3).put("ts", 1482180657020L) val struct3 = new Struct(schema).put("type", "i7-i").put("temperature", 64.5).put("voltage", 101.1).put("ts", 1482180657030L) val sinkRecord1 = new SinkRecord(TOPIC, 0, null, null, schema, struct1, 1) val sinkRecord2 = new SinkRecord(TOPIC, 0, null, null, schema, struct2, 2) val sinkRecord3 = new SinkRecord(TOPIC, 0, null, null, schema, struct3, 3) val jedis = new Jedis(, connectionInfo.port) // Clean up in-memory jedis jedis.flushAll() val messagesMap = collection.mutable.Map[String, ListBuffer[String]]() val t = new Thread { private val pubsub = new JedisPubSub { override def onMessage(channel: String, message: String): Unit = { messagesMap.get(channel) match { case Some(msgs) => messagesMap.put(channel, msgs += message) case None => messagesMap.put(channel, ListBuffer(message)) } } } override def run(): Unit = { jedis.subscribe(pubsub, "Xeon", "i7", "i7-i") } override def interrupt(): Unit = { pubsub.punsubscribe("*") super.interrupt() } } t.start() t.join(5000) if (t.isAlive) t.interrupt() writer.write(Seq(sinkRecord1)) writer.write(Seq(sinkRecord2, sinkRecord3)) messagesMap.size shouldBe 3 messagesMap("Xeon").head shouldBe """{"type":"Xeon","temperature":60.4,"voltage":90.1,"ts":1482180657010}""" messagesMap("i7").head shouldBe """{"type":"i7","temperature":62.1,"voltage":103.3,"ts":1482180657020}""" messagesMap("i7-i").head shouldBe """{"type":"i7-i","temperature":64.5,"voltage":101.1,"ts":1482180657030}""" } } }
Example 7
Source File: RedisStreamTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.redis.sink.writer /* * Copyright 2017 Datamountaineer. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util import com.datamountaineer.streamreactor.connect.redis.sink.RedisSinkTask import com.datamountaineer.streamreactor.connect.redis.sink.config.{RedisConfig, RedisConfigConstants, RedisConnectionInfo, RedisSinkSettings} import{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.mockito.MockitoSugar import org.scalatest.BeforeAndAfterAll import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import redis.clients.jedis.{Jedis, StreamEntryID} import scala.collection.JavaConverters._ class RedisStreamTest extends AnyWordSpec with Matchers with BeforeAndAfterAll with MockitoSugar { // // val redisServer = new RedisServer(6379) // // override def beforeAll() = redisServer.start() // // override def afterAll() = redisServer.stop() "Redis Stream writer" should { "write Kafka records to a Redis Stream" in { val TOPIC = "cpuTopic" val KCQL = s"INSERT INTO stream1 SELECT * from $TOPIC STOREAS STREAM" println("Testing KCQL : " + KCQL) val props = Map( RedisConfigConstants.REDIS_HOST->"localhost", RedisConfigConstants.REDIS_PORT->"6379", RedisConfigConstants.KCQL_CONFIG->KCQL, RedisConfigConstants.REDIS_PASSWORD -> "" ).asJava val config = RedisConfig(props) val connectionInfo = new RedisConnectionInfo("localhost", 6379, None) val settings = RedisSinkSettings(config) val writer = new RedisStreams(settings) val schema = SchemaBuilder.struct().name("com.example.Cpu") .field("type", Schema.STRING_SCHEMA) .field("temperature", Schema.FLOAT64_SCHEMA) .field("voltage", Schema.FLOAT64_SCHEMA) .field("ts", Schema.INT64_SCHEMA).build() val struct1 = new Struct(schema).put("type", "Xeon").put("temperature", 60.4).put("voltage", 90.1).put("ts", 1482180657010L) val sinkRecord1 = new SinkRecord(TOPIC, 0, null, null, schema, struct1, 1) val jedis = mock[Jedis] writer.jedis = jedis val map = new util.HashMap[String, String]() map.put("type", "Xeon") map.put("temperature", "60.4") map.put("voltage", "90.1") map.put("ts", 1482180657010L.toString) when(jedis.auth("")).isLenient() when(jedis.xadd("stream1", null, map)).thenReturn(mock[StreamEntryID]) writer.initialize(1, settings.errorPolicy) writer.write(Seq(sinkRecord1)) } } }
Example 8
Source File: OrcSchemas.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.orc import com.landoop.streamreactor.connect.hive.UnsupportedSchemaType import{Decimal, Schema, SchemaBuilder} import org.apache.orc.TypeDescription import org.apache.orc.TypeDescription.Category import scala.collection.JavaConverters._ object OrcSchemas { def toKafka(schema: TypeDescription): Schema = schema.getCategory match { case Category.BOOLEAN => Schema.OPTIONAL_BOOLEAN_SCHEMA case Category.BYTE => Schema.OPTIONAL_INT8_SCHEMA case Category.DOUBLE => Schema.OPTIONAL_FLOAT64_SCHEMA case Category.INT => Schema.OPTIONAL_INT32_SCHEMA case Category.FLOAT => Schema.OPTIONAL_FLOAT32_SCHEMA case Category.LONG => Schema.OPTIONAL_INT64_SCHEMA case Category.SHORT => Schema.OPTIONAL_INT16_SCHEMA case Category.STRING => Schema.OPTIONAL_STRING_SCHEMA case Category.VARCHAR => Schema.OPTIONAL_STRING_SCHEMA case Category.CHAR => Schema.OPTIONAL_STRING_SCHEMA case Category.DATE => Schema.OPTIONAL_STRING_SCHEMA case Category.TIMESTAMP => Schema.OPTIONAL_STRING_SCHEMA case Category.BYTE => Schema.OPTIONAL_BYTES_SCHEMA case Category.STRUCT => toKafkaStruct(schema) } def toKafkaStruct(schema: TypeDescription): Schema = { import scala.collection.JavaConverters._ val builder = SchemaBuilder.struct().name("from_orc") schema.getFieldNames.asScala.zipWithIndex.foreach { case (field, k) => builder.field(field, toKafka(schema.getChildren.get(k))) } } def toOrc(schema: Schema): TypeDescription = { schema.`type`() match { case Schema.Type.STRING if == Decimal.LOGICAL_NAME => TypeDescription.createDecimal() case Schema.Type.STRING => TypeDescription.createString() case Schema.Type.BOOLEAN => TypeDescription.createBoolean() case Schema.Type.FLOAT32 => TypeDescription.createFloat() case Schema.Type.FLOAT64 => TypeDescription.createDouble() case Schema.Type.INT8 => TypeDescription.createByte() case Schema.Type.INT16 => TypeDescription.createShort() case Schema.Type.INT32 => TypeDescription.createInt() case Schema.Type.INT64 => TypeDescription.createLong() case Schema.Type.BYTES if == Decimal.LOGICAL_NAME => TypeDescription.createDecimal() case Schema.Type.BYTES => TypeDescription.createBinary() case Schema.Type.ARRAY => TypeDescription.createList(toOrc(schema.valueSchema())) case Schema.Type.MAP => TypeDescription.createMap(toOrc(schema.keySchema()), toOrc(schema.valueSchema())) case Schema.Type.STRUCT => schema.fields().asScala.foldLeft(TypeDescription.createStruct) { case (struct, field) => struct.addField(, toOrc(field.schema)) } case unsupportedDataType => throw UnsupportedSchemaType(unsupportedDataType.toString) } } }
Example 9
Source File: ValueConverter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink import{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import scala.collection.JavaConverters._ object ValueConverter { def apply(record: SinkRecord): Struct = record.value match { case struct: Struct => StructValueConverter.convert(struct) case map: Map[_, _] => MapValueConverter.convert(map) case map: java.util.Map[_, _] => MapValueConverter.convert(map.asScala.toMap) case string: String => StringValueConverter.convert(string) case other => sys.error(s"Unsupported record $other:${other.getClass.getCanonicalName}") } } trait ValueConverter[T] { def convert(value: T): Struct } object StructValueConverter extends ValueConverter[Struct] { override def convert(struct: Struct): Struct = struct } object MapValueConverter extends ValueConverter[Map[_, _]] { def convertValue(value: Any, key: String, builder: SchemaBuilder): Any = { value match { case s: String => builder.field(key, Schema.OPTIONAL_STRING_SCHEMA) s case l: Long => builder.field(key, Schema.OPTIONAL_INT64_SCHEMA) l case i: Int => builder.field(key, Schema.OPTIONAL_INT64_SCHEMA) i.toLong case b: Boolean => builder.field(key, Schema.OPTIONAL_BOOLEAN_SCHEMA) b case f: Float => builder.field(key, Schema.OPTIONAL_FLOAT64_SCHEMA) f.toDouble case d: Double => builder.field(key, Schema.OPTIONAL_FLOAT64_SCHEMA) d case innerMap: java.util.Map[_, _] => val innerStruct = convert(innerMap.asScala.toMap, true) builder.field(key, innerStruct.schema()) innerStruct case innerMap: Map[_, _] => val innerStruct = convert(innerMap, true) builder.field(key, innerStruct.schema()) innerStruct } } def convert(map: Map[_, _], optional: Boolean) = { val builder = SchemaBuilder.struct() val values = { case (k, v) => val key = k.toString val value = convertValue(v, key, builder) key -> value }.toList if (optional) builder.optional() val schema = val struct = new Struct(schema) values.foreach { case (key, value) => struct.put(key.toString, value) } struct } override def convert(map: Map[_, _]): Struct = convert(map, false) } object StringValueConverter extends ValueConverter[String] { override def convert(string: String): Struct = { val schema = SchemaBuilder.struct().field("a", Schema.OPTIONAL_STRING_SCHEMA).name("struct").build() new Struct(schema).put("a", string) } }
Example 10
Source File: DropPartitionValuesMapper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.mapper import com.landoop.streamreactor.connect.hive.{PartitionPlan, StructMapper} import{SchemaBuilder, Struct} class DropPartitionValuesMapper(plan: PartitionPlan) extends StructMapper { import scala.collection.JavaConverters._ override def map(input: Struct): Struct = { val partitionKeys = val dataFields = input.schema.fields().asScala.filterNot(field => partitionKeys.contains( val builder = dataFields.foldLeft(SchemaBuilder.struct) { (builder, field) => builder.field(, field.schema) } val schema = dataFields.foldLeft(new Struct(schema)) { (struct, field) => struct.put(, input.get( } } }
Example 11
Source File: ProjectionMapper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.mapper import import com.datamountaineer.kcql.Field import com.landoop.streamreactor.connect.hive.StructMapper import{SchemaBuilder, Struct} class ProjectionMapper(projection: NonEmptyList[Field]) extends StructMapper { override def map(input: Struct): Struct = { // the compatible output schema built from projected fields with aliases applied val builder = projection.foldLeft(SchemaBuilder.struct) { (builder, kcqlField) => Option(input.schema.field(kcqlField.getName)).fold(sys.error(s"Missing field $kcqlField")) { field => builder.field(kcqlField.getAlias, field.schema) } } val schema = projection.foldLeft(new Struct(schema)) { (struct, field) => struct.put(field.getAlias, input.get(field.getName)) } } }
Example 12
Source File: PartitionValueMapper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.source.mapper import com.landoop.streamreactor.connect.hive.{Partition, StructMapper} import{Schema, SchemaBuilder, Struct} import scala.collection.JavaConverters._ class PartitionValueMapper(partition: Partition) extends StructMapper { override def map(input: Struct): Struct = { val builder = SchemaBuilder.struct() input.schema.fields.asScala.foreach { field => builder.field(, field.schema) } partition.entries.toList.foreach { entry => builder.field(entry._1.value, Schema.STRING_SCHEMA) } val schema = val struct = new Struct(schema) input.schema.fields.asScala.foreach { field => struct.put(, input.get( } partition.entries.toList.foreach { entry => struct.put(entry._1.value, entry._2) } struct } }
Example 13
Source File: ConnectSchema.scala From stream-reactor with Apache License 2.0 | 5 votes |
package import{Schema, SchemaBuilder} import scala.collection.JavaConverters._ def createSchema(name: String, value: Any): Schema = { value match { case _: Boolean => Schema.BOOLEAN_SCHEMA case _: Int => Schema.INT32_SCHEMA case _: Long => Schema.INT64_SCHEMA case _: Double => Schema.FLOAT64_SCHEMA case _: Char => Schema.STRING_SCHEMA case _: String => Schema.STRING_SCHEMA case _: Float => Schema.FLOAT32_SCHEMA case list: java.util.List[_] => val firstItemSchema = if (list.isEmpty) Schema.OPTIONAL_STRING_SCHEMA else createSchema(name, list.get(0)) SchemaBuilder.array(firstItemSchema).build() case map: java.util.LinkedHashMap[String @unchecked, _] => val recordBuilder = SchemaBuilder.struct() map.entrySet().asScala.foreach(kvp => recordBuilder.field(kvp.getKey, createSchema(kvp.getKey, kvp.getValue))) case v => sys.error(s"${v.getClass} is not handled.") } } } object ConnectSchema { val namespace = "" val connectSchema = new ConnectSchema(namespace) implicit class BloombergDataToConnectSchema(val data: BloombergData) { def getConnectSchema : Schema = { connectSchema.createSchema("BloombergData", } } }
Example 14
Source File: OrcTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.orc import com.landoop.streamreactor.connect.hive.{OrcSinkConfig, OrcSourceConfig, StructUtils, orc} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import{SchemaBuilder, Struct} import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class OrcTest extends AnyFlatSpec with Matchers { implicit val conf = new Configuration() implicit val fs = FileSystem.getLocal(conf) "Orc" should "read and write orc files" in { val schema = SchemaBuilder.struct() .field("name", SchemaBuilder.string().optional().build()) .field("age", SchemaBuilder.int32().optional().build()) .field("salary", SchemaBuilder.float64().optional().build()) .name("from_orc") .build() val users = Seq( new Struct(schema).put("name", "sammy").put("age", 38).put("salary", 54.67), new Struct(schema).put("name", "laura").put("age", 37).put("salary", 91.84) ) val path = new Path("orctest.orc") val sink = orc.sink(path, schema, OrcSinkConfig(overwrite = true)) users.foreach(sink.write) sink.close() val source = orc.source(path, OrcSourceConfig()) val actual = source.iterator.toList actual.head.schema shouldBe schema shouldBe List(Vector("sammy", 38, 54.67), Vector("laura", 37, 91.84)) fs.delete(path, false) } }
Example 15
Source File: DefaultCommitPolicyTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.staging import com.landoop.streamreactor.connect.hive.{Offset, Topic, TopicPartitionOffset} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, LocalFileSystem, Path} import{Schema, SchemaBuilder, Struct} import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import scala.concurrent.duration._ class DefaultCommitPolicyTest extends AnyWordSpec with Matchers { val schema: Schema = SchemaBuilder.struct() .field("name", SchemaBuilder.string().required().build()) .build() val struct = new Struct(schema) implicit val conf: Configuration = new Configuration() implicit val fs: LocalFileSystem = FileSystem.getLocal(conf) val tpo = TopicPartitionOffset(Topic("mytopic"), 1, Offset(100)) private def shouldFlush(policy: CommitPolicy, path: Path, count: Long) = { val status = fs.getFileStatus(path) policy.shouldFlush(CommitContext(tpo, path, count, status.getLen, status.getModificationTime)) } "DefaultCommitPolicy" should { "roll over after interval" in { val policy = DefaultCommitPolicy(None, Option(2.seconds), None) val path = new Path("foo") fs.create(path) shouldFlush(policy, path, 10) shouldBe false Thread.sleep(2000) shouldFlush(policy, path, 10) shouldBe true fs.delete(path, false) } "roll over after file count" in { val policy = DefaultCommitPolicy(None, None, Some(9)) val path = new Path("foo") fs.create(path) shouldFlush(policy, path, 7) shouldBe false shouldFlush(policy, path, 8) shouldBe false shouldFlush(policy, path, 9) shouldBe true shouldFlush(policy, path, 10) shouldBe true fs.delete(path, false) } "roll over after file size" in { val policy = DefaultCommitPolicy(Some(10), None, None) val path = new Path("foo") val out = fs.create(path) shouldFlush(policy, path, 7) shouldBe false out.writeBytes("wibble wobble wabble wubble") out.close() shouldFlush(policy, path, 9) shouldBe true fs.delete(path, false) } } }
Example 16
Source File: DropPartitionValuesMapperTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.mapper import import com.landoop.streamreactor.connect.hive.{PartitionKey, PartitionPlan, TableName} import{SchemaBuilder, Struct} import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import scala.collection.JavaConverters._ class DropPartitionValuesMapperTest extends AnyFunSuite with Matchers { test("strip partition values") { val schema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("p", SchemaBuilder.string().required().build()) .field("q", SchemaBuilder.string().required().build()) .field("z", SchemaBuilder.string().required().build()) .build() val plan = PartitionPlan(TableName("foo"), NonEmptyList.of(PartitionKey("p"), PartitionKey("q"))) val struct = new Struct(schema).put("a", "a").put("p", "p").put("q", "q").put("z", "z") val output = new DropPartitionValuesMapper(plan).map(struct) output.schema().fields() shouldBe Seq("a", "z") } test("handle partition field is missing in input") { val schema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("q", SchemaBuilder.string().required().build()) .field("z", SchemaBuilder.string().required().build()) .build() val plan = PartitionPlan(TableName("foo"), NonEmptyList.of(PartitionKey("p"), PartitionKey("q"))) val struct = new Struct(schema).put("a", "a").put("q", "q").put("z", "z") val output = new DropPartitionValuesMapper(plan).map(struct) output.schema().fields() shouldBe Seq("a", "z") } }
Example 17
Source File: MetastoreSchemaAlignMapperTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.mapper import{SchemaBuilder, Struct} import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import scala.collection.JavaConverters._ class MetastoreSchemaAlignMapperTest extends AnyFunSuite with Matchers { test("pad optional missing fields with null") { val recordSchema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("b", SchemaBuilder.string().required().build()) .field("c", SchemaBuilder.string().required().build()) .build() val struct = new Struct(recordSchema).put("a", "a").put("b", "b").put("c", "c") val metastoreSchema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("b", SchemaBuilder.string().required().build()) .field("c", SchemaBuilder.string().required().build()) .field("z", SchemaBuilder.string().optional().build()) .build() val output = new MetastoreSchemaAlignMapper(metastoreSchema).map(struct) output.schema().fields() shouldBe Seq("a", "b", "c", "z") } test("drop fields not specified in metastore") { val recordSchema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("b", SchemaBuilder.string().required().build()) .field("c", SchemaBuilder.string().required().build()) .build() val struct = new Struct(recordSchema).put("a", "a").put("b", "b").put("c", "c") val metastoreSchema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("b", SchemaBuilder.string().required().build()) .build() val output = new MetastoreSchemaAlignMapper(metastoreSchema).map(struct) output.schema().fields() shouldBe Seq("a", "b") } }
Example 18
Source File: ParquetWriterTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.parquet import com.landoop.streamreactor.connect.hive.StructUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import{SchemaBuilder, Struct} import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class ParquetWriterTest extends AnyWordSpec with Matchers { implicit val conf = new Configuration() implicit val fs = FileSystem.getLocal(conf) "ParquetWriter" should { "write parquet files" in { val schema = SchemaBuilder.struct() .field("name", SchemaBuilder.string().required().build()) .field("title", SchemaBuilder.string().optional().build()) .field("salary", SchemaBuilder.float64().optional().build()) .build() val users = List( new Struct(schema).put("name", "sam").put("title", "mr").put("salary", 100.43), new Struct(schema).put("name", "laura").put("title", "ms").put("salary", 429.06) ) val path = new Path("sinktest.parquet") val writer = parquetWriter(path, schema, ParquetSinkConfig(overwrite = true)) users.foreach(writer.write) writer.close() val reader = parquetReader(path) val actual = Iterator.continually( != null).toList reader.close() shouldBe fs.delete(path, false) } "support writing nulls" in { val schema = SchemaBuilder.struct() .field("name", SchemaBuilder.string().required().build()) .field("title", SchemaBuilder.string().optional().build()) .field("salary", SchemaBuilder.float64().optional().build()) .build() val users = List( new Struct(schema).put("name", "sam").put("title", null).put("salary", 100.43), new Struct(schema).put("name", "laura").put("title", "ms").put("salary", 429.06) ) val path = new Path("sinktest.parquet") val writer = parquetWriter(path, schema, ParquetSinkConfig(overwrite = true)) users.foreach(writer.write) writer.close() val reader = parquetReader(path) val actual = Iterator.continually( != null).toList reader.close() shouldBe fs.delete(path, false) } } }
Example 19
Source File: StructFieldsExtractorTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.voltdb import{Schema, SchemaBuilder, Struct} import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class StructFieldsExtractorTest extends AnyWordSpec with Matchers { "StructFieldsExtractor" should { "return all the fields and their bytes value" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("lastName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema) .put("firstName", "Alex") .put("lastName", "Smith") .put("age", 30) val min = System.currentTimeMillis() val record = StructFieldsExtractor("table", true, Map.empty).get(struct) val map = record map("firstName") shouldBe "Alex" map("lastName") shouldBe "Smith" map("age") shouldBe 30 } "return all fields and apply the mapping" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("lastName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema) .put("firstName", "Alex") .put("lastName", "Smith") .put("age", 30) val map = StructFieldsExtractor("table", includeAllFields = true, Map("lastName" -> "Name", "age" -> "a")).get(struct) map("firstName") shouldBe "Alex" map("Name") shouldBe "Smith" map("a") shouldBe 30 } "return only the specified fields" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("lastName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema) .put("firstName", "Alex") .put("lastName", "Smith") .put("age", 30) val map = StructFieldsExtractor("table", includeAllFields = false, Map("lastName" -> "Name", "age" -> "age")).get(struct) map("Name") shouldBe "Smith" map("age") shouldBe 30 map.size shouldBe 2 } } }
Example 20
Source File: SourceRecordProducers.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.ftp.source import{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.source.SourceRecord object SourceRecordProducers { type SourceRecordProducer = (ConnectFileMetaDataStore, String, FileMetaData, FileBody) => SourceRecord val fileInfoSchema = SchemaBuilder.struct() .field("name", Schema.STRING_SCHEMA) .field("offset", Schema.INT64_SCHEMA) .build() def stringKeyRecord(store: ConnectFileMetaDataStore, topic: String, meta: FileMetaData, body: FileBody): SourceRecord = new SourceRecord( store.fileMetasToConnectPartition(meta), // source part store.fileMetasToConnectOffset(meta), // source off topic, //topic Schema.STRING_SCHEMA, // key sch meta.attribs.path, // key Schema.BYTES_SCHEMA, // val sch body.bytes // val ) def structKeyRecord(store: ConnectFileMetaDataStore, topic: String, meta: FileMetaData, body: FileBody): SourceRecord = { new SourceRecord( store.fileMetasToConnectPartition(meta), // source part store.fileMetasToConnectOffset(meta), // source off topic, //topic fileInfoSchema, // key sch new Struct(fileInfoSchema) .put("name",meta.attribs.path) .put("offset",body.offset), Schema.BYTES_SCHEMA, // val sch body.bytes // val ) } }
Example 21
Source File: StringStructFieldsStringKeyBuilderTest.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.sink import com.datamountaineer.streamreactor.connect.rowkeys.StringStructFieldsStringKeyBuilder import{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class StringStructFieldsStringKeyBuilderTest extends AnyWordSpec with Matchers { "StructFieldsStringKeyBuilder" should { "raise an exception if the field is not present in the struct" in { intercept[IllegalArgumentException] { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StringStructFieldsStringKeyBuilder(Seq("threshold")).build(sinkRecord) } } "create the row key based on one single field in the struct" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StringStructFieldsStringKeyBuilder(Seq("firstName")).build(sinkRecord) shouldBe "Alex" } "create the row key based on one single field with doc in the struct" in { val firstNameSchema = SchemaBuilder.`type`(Schema.Type.STRING).doc("first name") val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", firstNameSchema) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StringStructFieldsStringKeyBuilder(Seq("firstName")).build(sinkRecord) shouldBe "Alex" } "create the row key based on more thant one field in the struct" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StringStructFieldsStringKeyBuilder(Seq("firstName", "age")).build(sinkRecord) shouldBe "Alex.30" } } }
Example 22
Source File: TestUtilsBase.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect import java.util import java.util.Collections import org.apache.avro.generic.{GenericData, GenericRecord} import{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.apache.kafka.connect.source.SourceTaskContext import import org.mockito.Mockito._ import org.mockito.MockitoSugar import org.scalatest.BeforeAndAfter import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import scala.collection.JavaConverters._ //set up partition val partition: util.Map[String, String] = Collections.singletonMap(lookupPartitionKey, table) //as a list to search for val partitionList: util.List[util.Map[String, String]] = List(partition).asJava //set up the offset val offset: util.Map[String, Object] = (Collections.singletonMap(offsetColumn,offsetValue )) //create offsets to initialize from val offsets :util.Map[util.Map[String, String],util.Map[String, Object]] = Map(partition -> offset).asJava //mock out reader and task context val taskContext = mock[SourceTaskContext] val reader = mock[OffsetStorageReader] when(reader.offsets(partitionList)).thenReturn(offsets) when(taskContext.offsetStorageReader()).thenReturn(reader) taskContext } }
Example 23
Source File: StructFieldExtractorTest.scala From kafka-connect-common with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.schemas import{Date, Schema, SchemaBuilder, Struct} import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class StructFieldExtractorTest extends AnyWordSpec with Matchers { "StructFieldExtractor" should { "return all the fields and their bytes value" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("lastName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema) .put("firstName", "Alex") .put("lastName", "Smith") .put("age", 30) val map = new StructFieldsExtractor(true, Map.empty).get(struct).toMap map.get("firstName").get shouldBe "Alex" map.get("lastName").get shouldBe "Smith" map.get("age").get shouldBe 30 } "return all fields and apply the mapping" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("lastName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema) .put("firstName", "Alex") .put("lastName", "Smith") .put("age", 30) val map = new StructFieldsExtractor(true, Map("lastName" -> "Name", "age" -> "a")).get(struct).toMap map.get("firstName").get shouldBe "Alex" map.get("Name").get shouldBe "Smith" map.get("a").get shouldBe 30 } "return only the specified fields" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("lastName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema) .put("firstName", "Alex") .put("lastName", "Smith") .put("age", 30) val map = new StructFieldsExtractor(false, Map("lastName" -> "Name", "age" -> "age")).get(struct).toMap map.get("Name").get shouldBe "Smith" map.get("age").get shouldBe 30 map.size shouldBe 2 } } "handle Date fieldds" in { val dateSchema = Date.builder().build() val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("lastName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("date", dateSchema).build() val date = java.sql.Date.valueOf("2017-04-25") val struct = new Struct(schema) .put("firstName", "Alex") .put("lastName", "Smith") .put("age", 30) .put("date", date) val map1 = new StructFieldsExtractor(false, Map("date" -> "date")).get(struct).toMap map1.get("date").get shouldBe date map1.size shouldBe 1 val d = Date.toLogical(dateSchema, 10000) struct.put("date", d) val map2 = new StructFieldsExtractor(false, Map("date" -> "date")).get(struct).toMap map2.get("date").get shouldBe d map2.size shouldBe 1 } }
Example 24
Source File: Output.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.cassandra.sink import java.util import{Schema, SchemaBuilder, Struct} case class Output(addr_tag_link: Option[String], addr_tag: Option[String], spent: Boolean, tx_index: Long, `type`: Int, addr: Option[String], value: Long, n: Int, script: String) { def toHashMap: util.HashMap[String, Any] = { val map = new util.HashMap[String, Any]() addr_tag_link.foreach(map.put("addr_tag_link", _)) addr_tag_link.foreach(map.put("addr_tag", _)) map.put("spent", spent) map.put("tx_index", tx_index) map.put("type", `type`) addr.foreach(map.put("addr", _)) map.put("value", value) map.put("n", n) map.put("script", script) map } } object Output { val ConnectSchema: Schema = SchemaBuilder.struct .name("datamountaineer.blockchain.output") .doc("The output instance part of a transaction.") .field("addr_tag_link", Schema.OPTIONAL_STRING_SCHEMA) .field("addr_tag", Schema.OPTIONAL_STRING_SCHEMA) .field("spent", Schema.BOOLEAN_SCHEMA) .field("tx_index", Schema.INT64_SCHEMA) .field("type", Schema.OPTIONAL_INT32_SCHEMA) .field("addr", Schema.OPTIONAL_STRING_SCHEMA) .field("value", Schema.INT64_SCHEMA) .field("n", Schema.INT32_SCHEMA) .field("script", Schema.STRING_SCHEMA) .build() implicit class OutputToStructConverter(val output: Output) extends AnyVal { def toStruct() = { val struct = new Struct(ConnectSchema) .put("spent", output.spent) .put("tx_index", output.tx_index) .put("type", output.`type`) .put("value", output.value) .put("n", output.n) .put("script", output.script) output.addr.foreach(struct.put("addr", _)) output.addr_tag.foreach(struct.put("addr_tag", _)) output.addr_tag_link.foreach(struct.put("addr_tag_link", _)) struct } } }
Example 25
Source File: SchemaSpec.scala From kafka-connect-cassandra with Apache License 2.0 | 5 votes |
package com.tuplejump.kafka.connect.cassandra import com.datastax.driver.core.{ DataType, TestUtil} import{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord class SchemaSpec extends AbstractFlatSpec { it should "convert a struct schema with single field" in { val topic = "topicx" val sc = sinkConfig(topic, "keyspacex", "tablex", List("id")) sc.options.consistency should be (TaskConfig.DefaultSinkConsistency) sc.schema.columnNames should === (List("id")) sc.query.cql should be ("INSERT INTO keyspacex.tablex(id) VALUES(?)") val schema ="record").version(1).field("id", Schema.INT32_SCHEMA).build val value = new Struct(schema).put("id", 1) val record = new SinkRecord(topic, 1,, "key", schema, value, 0) sc.schema.route.topic should be (record.topic) sc.schema.route.keyspace should be ("keyspacex") sc.schema.route.table should be ("tablex") sc.schema is record should be (true) val query = query.cql should be("INSERT INTO keyspacex.tablex(id) VALUES(1)") } it should "convert a struct schema with multiple fields" in { val topic = "test_kfk" val sc = sinkConfig(topic, "keyspacex", "tablex", List("available", "name", "age")) val schema ="record").version(1) .field("available", Schema.BOOLEAN_SCHEMA) .field("name", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA).build val value = new Struct(schema).put("name", "user").put("available", false).put("age", 15) val record = new SinkRecord("test_kfk", 1,, "key", schema, value, 0) schema.asColumnNames should be (sc.schema.columnNames) sc.schema.route.topic should be (record.topic) sc.schema is record should be (true) sc.query.cql should be ("INSERT INTO keyspacex.tablex(available,name,age) VALUES(?,?,?)") val query = query.cql should be("INSERT INTO keyspacex.tablex(available,name,age) VALUES(false,'user',15)") } it should "convert cassandra column defs to a source schema" in { val colDef = Map( "id" -> DataType.cint(), "name" -> DataType.varchar()) val columns = TestUtil.getColumnDef(colDef) val expectedSchema = SchemaBuilder.struct() .field("id", Schema.INT32_SCHEMA) .field("name", Schema.STRING_SCHEMA).build() columns.asSchema should be(expectedSchema) } it should "convert kafka schema and struct to cassandra columns and schema mapping" in { import scala.collection.JavaConverters._ val topic = "a" val route = InternalConfig.Route(TaskConfig.SinkRoute + topic, "ks1.t1").get val schemaMap = new InternalConfig.Schema(route, Nil, Nil, Nil, List("available","name","age"), "") val schema ="record").version(1) .field("available", Schema.BOOLEAN_SCHEMA) .field("name", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA).build val struct = new Struct(schema).put("name", "user").put("available", false).put("age", 15) val record = new SinkRecord(topic, 1,, "key", schema, value, 0) schema.asColumnNames should ===(schemaMap.columnNames) schemaMap.columnNames should ===( schemaMap is record should be (true) } }
Example 26
Source File: IotMessageConverter.scala From toketi-kafka-connect-iothub with MIT License | 5 votes |
// Copyright (c) Microsoft. All rights reserved. package import java.time.Instant import java.util.Date import import{Schema, SchemaBuilder, Struct} import scala.collection.JavaConverters._ import scala.reflect.ClassTag object IotMessageConverter { val offsetKey = "offset" private val schemaName = "iothub.kafka.connect" private val schemaVersion = 1 private val deviceIdKey = "deviceId" private val contentTypeKey = "contentType" private val sequenceNumberKey = "sequenceNumber" private val enqueuedTimeKey = "enqueuedTime" private val contentKey = "content" private val systemPropertiesKey = "systemProperties" private val propertiesKey = "properties" private val deviceIdIotHubKey = "iothub-connection-device-id" // Public for testing purposes lazy val schema: Schema = SchemaBuilder.struct() .name(schemaName) .version(schemaVersion) .field(deviceIdKey, Schema.STRING_SCHEMA) .field(offsetKey, Schema.STRING_SCHEMA) .field(contentTypeKey, Schema.OPTIONAL_STRING_SCHEMA) .field(enqueuedTimeKey, Schema.STRING_SCHEMA) .field(sequenceNumberKey, Schema.INT64_SCHEMA) .field(contentKey, Schema.STRING_SCHEMA) .field(systemPropertiesKey, propertiesMapSchema) .field(propertiesKey, propertiesMapSchema) private lazy val propertiesMapSchema: Schema =, Schema.STRING_SCHEMA) def getIotMessageStruct(iotMessage: IotMessage): Struct = { val systemProperties = iotMessage.systemProperties val deviceId: String = getOrDefaultAndRemove(systemProperties, deviceIdIotHubKey, "") val offset: String = getOrDefaultAndRemove(systemProperties, AmqpConstants.OFFSET_ANNOTATION_NAME, "") val sequenceNumber: Long = getOrDefaultAndRemove(systemProperties, AmqpConstants.SEQUENCE_NUMBER_ANNOTATION_NAME, 0) val enqueuedTime: Option[Instant] = getEnqueuedTime(systemProperties) val enqueuedTimeStr = if(enqueuedTime.isDefined) enqueuedTime.get.toString else "" val properties = val contentType: String = getOrDefaultAndRemove(properties, contentTypeKey, "") val systemPropertiesMap = => (i._1, i._2.toString)) new Struct(schema) .put(deviceIdKey, deviceId) .put(offsetKey, offset) .put(contentTypeKey, contentType) .put(enqueuedTimeKey, enqueuedTimeStr) .put(sequenceNumberKey, sequenceNumber) .put(contentKey, iotMessage.content) .put(systemPropertiesKey, systemPropertiesMap.asJava) .put(propertiesKey, properties.asJava) } private def getEnqueuedTime(map: scala.collection.mutable.Map[String, Object]): Option[Instant] = { val enqueuedTimeValue: Date = getOrDefaultAndRemove(map, AmqpConstants.ENQUEUED_TIME_UTC_ANNOTATION_NAME, null) if (enqueuedTimeValue != null) Some(enqueuedTimeValue.toInstant) else None } private def getOrDefaultAndRemove[T: ClassTag, S: ClassTag](map: scala.collection.mutable.Map[String, S], key: String, defaultVal: T): T = { if (map.contains(key)) { val retVal: T = map(key).asInstanceOf[T] map.remove(key) retVal } else { defaultVal } } }
Example 27
Source File: ConnectMongoConverterSpec.scala From kafka-connect-mongodb with Apache License 2.0 | 5 votes |
package import java.lang.Boolean import java.util import{Schema, SchemaBuilder, Struct} import org.scalatest.{FlatSpec, Matchers} class ConnectMongoConverterSpec extends FlatSpec with Matchers{ private val FIELD1_NAME = "fieldInt" private val FIELD1_VALUE = new Integer(5) private val FIELD2_NAME = "fieldString" private val FIELD2_VALUE = "str" private val FIELD3_NAME = "fieldBoolean" private val FIELD3_VALUE = new Boolean(true) val schema = SchemaBuilder.struct().name("test schema") .field(FIELD1_NAME, Schema.INT32_SCHEMA) .field(FIELD2_NAME, Schema.STRING_SCHEMA) .field(FIELD3_NAME, Schema.BOOLEAN_SCHEMA) .build() "No Schema Connect Mongo Converter Bad Data" should "throw an exception" in { var exceptionThrown = false val badData = new Struct(schema) try{ checkJsonMap(NoSchemaConnectMongoConverter, badData) } catch { case _ : java.lang.ClassCastException => exceptionThrown = true } exceptionThrown should be(true) } "No Schema Connect Mongo Converter Good Data" should "return the same map" in { val jsonMap = new util.HashMap[String, Object]() jsonMap.put(FIELD1_NAME, FIELD1_VALUE) jsonMap.put(FIELD2_NAME, FIELD2_VALUE) jsonMap.put(FIELD3_NAME, FIELD3_VALUE) checkJsonMap(NoSchemaConnectMongoConverter, jsonMap) } "Schema Connect Mongo Converter Bad Data" should "throw an exception" in { var exceptionThrown = false val badData = new util.HashMap[String, Object]() badData.put(FIELD1_NAME, FIELD1_VALUE) try { checkJsonMap(SchemaConnectMongoConverter, badData) } catch { case _ : java.lang.ClassCastException => exceptionThrown = true } exceptionThrown should be(true) } "Schema Connect Mongo Converter Good Data" should "convert data to json map" in { val data = new Struct(schema) .put(FIELD1_NAME, FIELD1_VALUE) .put(FIELD2_NAME, FIELD2_VALUE) .put(FIELD3_NAME, FIELD3_VALUE) checkJsonMap(SchemaConnectMongoConverter, data) } private def checkJsonMap(converter : ConnectMongoConverter, value: Object): Unit ={ val newJsonMap = converter.toJsonMap(value).toMap newJsonMap(FIELD1_NAME) should be(FIELD1_VALUE) newJsonMap(FIELD2_NAME) should be(FIELD2_VALUE) newJsonMap(FIELD3_NAME) should be(FIELD3_VALUE) } }
Example 28
Source File: Input.scala From stream-reactor with Apache License 2.0 | 5 votes |
package import{Schema, SchemaBuilder, Struct} case class Input(sequence: Long, prev_out: Option[Output], script: String) object Input { val ConnectSchema: Schema = SchemaBuilder.struct .name("datamountaineer.blockchain.input") .doc("The input instance part of a transaction.") .field("sequence", Schema.INT64_SCHEMA) .field("prev_out", Output.ConnectSchema) .field("script", Schema.STRING_SCHEMA) .build() implicit class InputToStructConverter(val input: Input) extends AnyVal { def toStruct(): Struct = { val struct = new Struct(ConnectSchema) .put("sequence", input.sequence) .put("script", input.script) input.prev_out.foreach(po=>struct.put("prev_out", po.toStruct())) struct } } }
Example 29
Source File: Output.scala From stream-reactor with Apache License 2.0 | 5 votes |
package import{Schema, SchemaBuilder, Struct} case class Output(addr_tag_link: Option[String], addr_tag: Option[String], spent: Boolean, tx_index: Long, `type`: Int, addr: Option[String], value: Long, n: Int, script: String) object Output { val ConnectSchema: Schema = SchemaBuilder.struct .name("datamountaineer.blockchain.output") .doc("The output instance part of a transaction.") .field("addr_tag_link", Schema.OPTIONAL_STRING_SCHEMA) .field("addr_tag", Schema.OPTIONAL_STRING_SCHEMA) .field("spent", Schema.BOOLEAN_SCHEMA) .field("tx_index", Schema.INT64_SCHEMA) .field("type", Schema.INT32_SCHEMA) .field("addr", Schema.OPTIONAL_STRING_SCHEMA) .field("value", Schema.INT64_SCHEMA) .field("n", Schema.INT32_SCHEMA) .field("script", Schema.STRING_SCHEMA) .build() implicit class OutputToStructConverter(val output: Output) extends AnyVal { def toStruct(): Struct = { val struct = new Struct(ConnectSchema) .put("spent", output.spent) .put("tx_index", output.tx_index) .put("type", output.`type`) .put("value", output.value) .put("n", output.n) .put("script", output.script) output.addr.foreach(struct.put("addr", _)) output.addr_tag.foreach(struct.put("addr_tag", _)) output.addr_tag_link.foreach(struct.put("addr_tag_link", _)) struct } } }
Example 30
Source File: Transaction.scala From stream-reactor with Apache License 2.0 | 5 votes |
package import java.util import import{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.source.SourceRecord case class Transaction(lock_time: Long, ver: Int, size: Long, inputs: Seq[Input], rbf: Option[Boolean], time: Long, tx_index: Long, vin_sz: Int, hash: String, vout_sz: Int, relayed_by: String, out: Seq[Output]) object Transaction { val ConnectSchema: Schema = SchemaBuilder.struct .name("datamountaineer.blockchain.transaction") .field("lock_time", Schema.INT64_SCHEMA) .field("ver", Schema.INT32_SCHEMA) .field("size", Schema.INT64_SCHEMA) .field("inputs", SchemaBuilder.array(Input.ConnectSchema).optional().build()) .field("rbf", Schema.OPTIONAL_BOOLEAN_SCHEMA) .field("time", Schema.INT64_SCHEMA) .field("tx_index", Schema.INT64_SCHEMA) .field("vin_sz", Schema.INT32_SCHEMA) .field("hash", Schema.STRING_SCHEMA) .field("vout_sz", Schema.INT32_SCHEMA) .field("relayed_by", Schema.STRING_SCHEMA) .field("out", SchemaBuilder.array(Output.ConnectSchema).optional().build()) .build() implicit class TransactionToSourceRecordConverter(val tx: Transaction) extends AnyVal { def toSourceRecord(topic: String, partition: Int, key: Option[String]): SourceRecord = { new SourceRecord( null, null, topic, partition, => Schema.STRING_SCHEMA).orNull, key.orNull, ConnectSchema, tx.toStruct() ) } //private def getOffset() = Collections.singletonMap("position", System.currentTimeMillis()) def toStruct(): Struct = { val struct = new Struct(ConnectSchema) .put("lock_time", tx.lock_time) .put("ver", tx.ver) .put("size", tx.size) .put("time", tx.time) .put("tx_index", tx.tx_index) .put("vin_sz", tx.vin_sz) .put("hash", tx.hash) .put("vout_sz", tx.vout_sz) .put("relayed_by", tx.relayed_by) tx.out.headOption.foreach { _ => import scala.collection.JavaConverters._ struct.put("out", } tx.rbf.foreach(struct.put("rbf", _)) tx.inputs.headOption.foreach { _ => val inputs = new util.ArrayList[Struct] tx.inputs.foreach(i => inputs.add(i.toStruct())) struct.put("inputs", inputs) } tx.out.headOption.foreach { _ => val outputs = new util.ArrayList[Struct] tx.out.foreach(output => outputs.add(output.toStruct())) } struct } } }
Example 31
Source File: Input.scala From stream-reactor with Apache License 2.0 | 5 votes |
package import java.util import{Schema, SchemaBuilder, Struct} case class Input(sequence: Long, prev_out: Option[Output], script: String) { def toHashMap: util.HashMap[String, Any] = { val map = new util.HashMap[String, Any]() map.put("sequence", sequence) prev_out.foreach(p => map.put("prev_out", p.toHashMap)) map.put("script", script) map } } object Input { val ConnectSchema = SchemaBuilder.struct .name("input") .doc("The input instance part of a transaction.") .field("sequence", Schema.INT64_SCHEMA) .field("prev_out", Output.ConnectSchema) .field("script", Schema.STRING_SCHEMA) .build() implicit class InputToStructConverter(val input: Input) extends AnyVal { def toStruct() = { val struct = new Struct(ConnectSchema) .put("sequence", input.sequence) .put("script", input.script) input.prev_out.foreach(po => struct.put("prev_out", po.toStruct())) struct } } }
Example 32
Source File: Output.scala From stream-reactor with Apache License 2.0 | 5 votes |
package import java.util import{Schema, SchemaBuilder, Struct} case class Output(addr_tag_link: Option[String], addr_tag: Option[String], spent: Boolean, tx_index: Long, `type`: Int, addr: Option[String], value: Long, n: Int, script: String) { def toHashMap: util.HashMap[String, Any] = { val map = new util.HashMap[String, Any]() addr_tag_link.foreach(map.put("addr_tag_link", _)) addr_tag_link.foreach(map.put("addr_tag", _)) map.put("spent", spent) map.put("tx_index", tx_index) map.put("type", `type`) addr.foreach(map.put("addr", _)) map.put("value", value) map.put("n", n) map.put("script", script) map } } object Output { val ConnectSchema: Schema = SchemaBuilder.struct .name("output") .doc("The output instance part of a transaction.") .field("addr_tag_link", Schema.OPTIONAL_STRING_SCHEMA) .field("addr_tag", Schema.OPTIONAL_STRING_SCHEMA) .field("spent", Schema.BOOLEAN_SCHEMA) .field("tx_index", Schema.INT64_SCHEMA) .field("type", Schema.OPTIONAL_INT32_SCHEMA) .field("addr", Schema.OPTIONAL_STRING_SCHEMA) .field("value", Schema.INT64_SCHEMA) .field("n", Schema.INT32_SCHEMA) .field("script", Schema.STRING_SCHEMA) .build() implicit class OutputToStructConverter(val output: Output) extends AnyVal { def toStruct() = { val struct = new Struct(ConnectSchema) .put("spent", output.spent) .put("tx_index", output.tx_index) .put("type", output.`type`) .put("value", output.value) .put("n", output.n) .put("script", output.script) output.addr.foreach(struct.put("addr", _)) output.addr_tag.foreach(struct.put("addr_tag", _)) output.addr_tag_link.foreach(struct.put("addr_tag_link", _)) struct } } }
Example 33
Source File: Input.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.mongodb import java.util import{Schema, SchemaBuilder, Struct} case class Input(sequence: Long, prev_out: Option[Output], script: String) { def toHashMap: util.HashMap[String, Any] = { val map = new util.HashMap[String, Any]() map.put("sequence", sequence) prev_out.foreach(p => map.put("prev_out", p.toHashMap)) map.put("script", script) map } } object Input { val ConnectSchema: Schema = SchemaBuilder.struct .name("datamountaineer.blockchain.input") .doc("The input instance part of a transaction.") .field("sequence", Schema.INT64_SCHEMA) .field("prev_out", Output.ConnectSchema) .field("script", Schema.STRING_SCHEMA) .build() implicit class InputToStructConverter(val input: Input) extends AnyVal { def toStruct(): Struct = { val struct = new Struct(ConnectSchema) .put("sequence", input.sequence) .put("script", input.script) input.prev_out.foreach(po => struct.put("prev_out", po.toStruct())) struct } } }
Example 34
Source File: Output.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.mongodb import java.util import{Schema, SchemaBuilder, Struct} case class Output(addr_tag_link: Option[String], addr_tag: Option[String], spent: Boolean, tx_index: Long, `type`: Int, addr: Option[String], value: Long, n: Int, script: String) { def toHashMap: util.HashMap[String, Any] = { val map = new util.HashMap[String, Any]() addr_tag_link.foreach(map.put("addr_tag_link", _)) addr_tag_link.foreach(map.put("addr_tag", _)) map.put("spent", spent) map.put("tx_index", tx_index) map.put("type", `type`) addr.foreach(map.put("addr", _)) map.put("value", value) map.put("n", n) map.put("script", script) map } } object Output { val ConnectSchema: Schema = SchemaBuilder.struct .name("datamountaineer.blockchain.output") .doc("The output instance part of a transaction.") .field("addr_tag_link", Schema.OPTIONAL_STRING_SCHEMA) .field("addr_tag", Schema.OPTIONAL_STRING_SCHEMA) .field("spent", Schema.BOOLEAN_SCHEMA) .field("tx_index", Schema.INT64_SCHEMA) .field("type", Schema.OPTIONAL_INT32_SCHEMA) .field("addr", Schema.OPTIONAL_STRING_SCHEMA) .field("value", Schema.INT64_SCHEMA) .field("n", Schema.INT32_SCHEMA) .field("script", Schema.STRING_SCHEMA) .build() implicit class OutputToStructConverter(val output: Output) extends AnyVal { def toStruct() = { val struct = new Struct(ConnectSchema) .put("spent", output.spent) .put("tx_index", output.tx_index) .put("type", output.`type`) .put("value", output.value) .put("n", output.n) .put("script", output.script) output.addr.foreach(struct.put("addr", _)) output.addr_tag.foreach(struct.put("addr_tag", _)) output.addr_tag_link.foreach(struct.put("addr_tag_link", _)) struct } } }
Example 35
Source File: Input.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.cassandra.sink import java.util import{Schema, SchemaBuilder, Struct} case class Input(sequence: Long, prev_out: Option[Output], script: String) { def toHashMap: util.HashMap[String, Any] = { val map = new util.HashMap[String, Any]() map.put("sequence", sequence) prev_out.foreach(p => map.put("prev_out", p.toHashMap)) map.put("script", script) map } } object Input { val ConnectSchema = SchemaBuilder.struct .name("datamountaineer.blockchain.input") .doc("The input instance part of a transaction.") .field("sequence", Schema.INT64_SCHEMA) .field("prev_out", Output.ConnectSchema) .field("script", Schema.STRING_SCHEMA) .build() implicit class InputToStructConverter(val input: Input) extends AnyVal { def toStruct() = { val struct = new Struct(ConnectSchema) .put("sequence", input.sequence) .put("script", input.script) input.prev_out.foreach(po => struct.put("prev_out", po.toStruct())) struct } } }
Example 36
Source File: CassandraSinkTaskSpec.scala From kafka-connect-cassandra with Apache License 2.0 | 5 votes |
package com.tuplejump.kafka.connect.cassandra import scala.collection.JavaConverters._ import{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.{SinkRecord, SinkTaskContext} class CassandraSinkTaskSpec extends AbstractFlatSpec { val topicName = "test_kv_topic" val tableName = "test.kv" val config = sinkProperties(Map(topicName -> tableName)) it should "start sink task" in { val sinkTask = new CassandraSinkTask() val mockContext = mock[SinkTaskContext] sinkTask.initialize(mockContext) sinkTask.start(config.asJava) sinkTask.stop() } it should "save records in cassandra" in { val sinkTask = new CassandraSinkTask() val mockContext = mock[SinkTaskContext] sinkTask.initialize(mockContext) sinkTask.start(config.asJava) val valueSchema ="record").version(1) .field("key", Schema.STRING_SCHEMA) .field("value", Schema.INT32_SCHEMA).build val value1 = new Struct(valueSchema).put("key", "pqr").put("value", 15) val value2 = new Struct(valueSchema).put("key", "abc").put("value", 17) val record1 = new SinkRecord(topicName, 1,, "key", valueSchema, value1, 0) val record2 = new SinkRecord(topicName, 1,, "key", valueSchema, value2, 0) sinkTask.put(List(record1, record2).asJavaCollection) sinkTask.stop() val cc = CassandraCluster.local val session = cc.session val result = session.execute(s"select count(1) from $tableName").one() val rowCount = result.getLong(0) rowCount should be(2) cc.shutdown() } }
Example 37
Source File: StructFieldsRowKeyBuilderTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.hbase import com.datamountaineer.streamreactor.connect.hbase.BytesHelper._ import org.apache.hadoop.hbase.util.Bytes import{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class StructFieldsRowKeyBuilderTest extends AnyWordSpec with Matchers { "StructFieldsRowKeyBuilder" should { "raise an exception if the field is not present in the struct" in { intercept[IllegalArgumentException] { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) //val field = Field("threshold", "threshold", false) StructFieldsRowKeyBuilderBytes(List("threshold")).build(sinkRecord, null) } } "create the row key based on one single field in the struct" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) //val field = Field("firstName", "firstName", true) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StructFieldsRowKeyBuilderBytes(List("firstName")).build(sinkRecord, null) shouldBe "Alex".fromString } "create the row key based on more thant one field in the struct" in { val schema = SchemaBuilder.struct().name("com.example.Person") .field("firstName", Schema.STRING_SCHEMA) .field("age", Schema.INT32_SCHEMA) .field("threshold", Schema.OPTIONAL_FLOAT64_SCHEMA).build() val struct = new Struct(schema).put("firstName", "Alex").put("age", 30) //val field = Field("firstName", "firstName", true) //val field2 = Field("age", "age", true) val sinkRecord = new SinkRecord("sometopic", 1, null, null, schema, struct, 1) StructFieldsRowKeyBuilderBytes(List("firstName", "age")).build(sinkRecord, null) shouldBe Bytes.add("Alex".fromString(), "\n".fromString(), 30.fromInt()) } } }
Example 38
Source File: ValueConverter.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink import{Schema, SchemaBuilder, Struct} import org.apache.kafka.connect.sink.SinkRecord import scala.collection.JavaConverters._ object ValueConverter { def apply(record: SinkRecord): Struct = record.value match { case struct: Struct => StructValueConverter.convert(struct) case map: Map[_, _] => MapValueConverter.convert(map) case map: java.util.Map[_, _] => MapValueConverter.convert(map.asScala.toMap) case string: String => StringValueConverter.convert(string) case other => sys.error(s"Unsupported record $other:${other.getClass.getCanonicalName}") } } trait ValueConverter[T] { def convert(value: T): Struct } object StructValueConverter extends ValueConverter[Struct] { override def convert(struct: Struct): Struct = struct } object MapValueConverter extends ValueConverter[Map[_, _]] { def convertValue(value: Any, key: String, builder: SchemaBuilder): Any = { value match { case s: String => builder.field(key, Schema.OPTIONAL_STRING_SCHEMA) s case l: Long => builder.field(key, Schema.OPTIONAL_INT64_SCHEMA) l case i: Int => builder.field(key, Schema.OPTIONAL_INT64_SCHEMA) i.toLong case b: Boolean => builder.field(key, Schema.OPTIONAL_BOOLEAN_SCHEMA) b case f: Float => builder.field(key, Schema.OPTIONAL_FLOAT64_SCHEMA) f.toDouble case d: Double => builder.field(key, Schema.OPTIONAL_FLOAT64_SCHEMA) d case innerMap: java.util.Map[_, _] => val innerStruct = convert(innerMap.asScala.toMap, true) builder.field(key, innerStruct.schema()) innerStruct case innerMap: Map[_, _] => val innerStruct = convert(innerMap, true) builder.field(key, innerStruct.schema()) innerStruct } } def convert(map: Map[_, _], optional: Boolean) = { val builder = SchemaBuilder.struct() val values = { case (k, v) => val key = k.toString val value = convertValue(v, key, builder) key -> value }.toList if (optional) builder.optional() val schema = val struct = new Struct(schema) values.foreach { case (key, value) => struct.put(key.toString, value) } struct } override def convert(map: Map[_, _]): Struct = convert(map, false) } object StringValueConverter extends ValueConverter[String] { override def convert(string: String): Struct = { val schema = SchemaBuilder.struct().field("a", Schema.OPTIONAL_STRING_SCHEMA).name("struct").build() new Struct(schema).put("a", string) } }
Example 39
Source File: DropPartitionValuesMapper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.mapper import com.landoop.streamreactor.connect.hive.{PartitionPlan, StructMapper} import{SchemaBuilder, Struct} class DropPartitionValuesMapper(plan: PartitionPlan) extends StructMapper { import scala.collection.JavaConverters._ override def map(input: Struct): Struct = { val partitionKeys = val dataFields = input.schema.fields().asScala.filterNot(field => partitionKeys.contains( val builder = dataFields.foldLeft(SchemaBuilder.struct) { (builder, field) => builder.field(, field.schema) } val schema = dataFields.foldLeft(new Struct(schema)) { (struct, field) => struct.put(, input.get( } } }
Example 40
Source File: ProjectionMapper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.mapper import import com.datamountaineer.kcql.Field import com.landoop.streamreactor.connect.hive.StructMapper import{SchemaBuilder, Struct} class ProjectionMapper(projection: NonEmptyList[Field]) extends StructMapper { override def map(input: Struct): Struct = { // the compatible output schema built from projected fields with aliases applied val builder = projection.foldLeft(SchemaBuilder.struct) { (builder, kcqlField) => Option(input.schema.field(kcqlField.getName)).fold(sys.error(s"Missing field $kcqlField")) { field => builder.field(kcqlField.getAlias, field.schema) } } val schema = projection.foldLeft(new Struct(schema)) { (struct, field) => struct.put(field.getAlias, input.get(field.getName)) } } }
Example 41
Source File: PartitionValueMapper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.source.mapper import com.landoop.streamreactor.connect.hive.{Partition, StructMapper} import{Schema, SchemaBuilder, Struct} import scala.collection.JavaConverters._ class PartitionValueMapper(partition: Partition) extends StructMapper { override def map(input: Struct): Struct = { val builder = SchemaBuilder.struct() input.schema.fields.asScala.foreach { field => builder.field(, field.schema) } partition.entries.toList.foreach { entry => builder.field(entry._1.value, Schema.STRING_SCHEMA) } val schema = val struct = new Struct(schema) input.schema.fields.asScala.foreach { field => struct.put(, input.get( } partition.entries.toList.foreach { entry => struct.put(entry._1.value, entry._2) } struct } }
Example 42
Source File: ProjectionMapper.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.source.mapper import import com.landoop.streamreactor.connect.hive.StructMapper import com.landoop.streamreactor.connect.hive.source.config.ProjectionField import{SchemaBuilder, Struct} class ProjectionMapper(projection: NonEmptyList[ProjectionField]) extends StructMapper { override def map(input: Struct): Struct = { val builder = projection.foldLeft(SchemaBuilder.struct) { (builder, projectionField) => Option(input.schema.field( .fold(sys.error(s"Projection field ${} cannot be found in input")) { field => builder.field(projectionField.alias, field.schema) } } val schema = projection.foldLeft(new Struct(schema)) { (struct, field) => struct.put(field.alias, input.get( } } }
Example 43
Source File: DefaultCommitPolicyTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.staging import com.landoop.streamreactor.connect.hive.{Offset, Topic, TopicPartitionOffset} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, LocalFileSystem, Path} import{Schema, SchemaBuilder, Struct} import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec import scala.concurrent.duration._ class DefaultCommitPolicyTest extends AnyWordSpec with Matchers { val schema: Schema = SchemaBuilder.struct() .field("name", SchemaBuilder.string().required().build()) .build() val struct = new Struct(schema) implicit val conf: Configuration = new Configuration() implicit val fs: LocalFileSystem = FileSystem.getLocal(conf) val tpo = TopicPartitionOffset(Topic("mytopic"), 1, Offset(100)) private def shouldFlush(policy: CommitPolicy, path: Path, count: Long) = { val status = fs.getFileStatus(path) policy.shouldFlush(CommitContext(tpo, path, count, status.getLen, status.getModificationTime)) } "DefaultCommitPolicy" should { "roll over after interval" in { val policy = DefaultCommitPolicy(None, Option(2.seconds), None) val path = new Path("foo") fs.create(path) shouldFlush(policy, path, 10) shouldBe false Thread.sleep(2000) shouldFlush(policy, path, 10) shouldBe true fs.delete(path, false) } "roll over after file count" in { val policy = DefaultCommitPolicy(None, None, Some(9)) val path = new Path("foo") fs.create(path) shouldFlush(policy, path, 7) shouldBe false shouldFlush(policy, path, 8) shouldBe false shouldFlush(policy, path, 9) shouldBe true shouldFlush(policy, path, 10) shouldBe true fs.delete(path, false) } "roll over after file size" in { val policy = DefaultCommitPolicy(Some(10), None, None) val path = new Path("foo") val out = fs.create(path) shouldFlush(policy, path, 7) shouldBe false out.writeBytes("wibble wobble wabble wubble") out.close() shouldFlush(policy, path, 9) shouldBe true fs.delete(path, false) } } }
Example 44
Source File: DropPartitionValuesMapperTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.mapper import import com.landoop.streamreactor.connect.hive.{PartitionKey, PartitionPlan, TableName} import{SchemaBuilder, Struct} import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import scala.collection.JavaConverters._ class DropPartitionValuesMapperTest extends AnyFunSuite with Matchers { test("strip partition values") { val schema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("p", SchemaBuilder.string().required().build()) .field("q", SchemaBuilder.string().required().build()) .field("z", SchemaBuilder.string().required().build()) .build() val plan = PartitionPlan(TableName("foo"), NonEmptyList.of(PartitionKey("p"), PartitionKey("q"))) val struct = new Struct(schema).put("a", "a").put("p", "p").put("q", "q").put("z", "z") val output = new DropPartitionValuesMapper(plan).map(struct) output.schema().fields() shouldBe Seq("a", "z") } test("handle partition field is missing in input") { val schema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("q", SchemaBuilder.string().required().build()) .field("z", SchemaBuilder.string().required().build()) .build() val plan = PartitionPlan(TableName("foo"), NonEmptyList.of(PartitionKey("p"), PartitionKey("q"))) val struct = new Struct(schema).put("a", "a").put("q", "q").put("z", "z") val output = new DropPartitionValuesMapper(plan).map(struct) output.schema().fields() shouldBe Seq("a", "z") } }
Example 45
Source File: MetastoreSchemaAlignMapperTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.sink.mapper import{SchemaBuilder, Struct} import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import scala.collection.JavaConverters._ class MetastoreSchemaAlignMapperTest extends AnyFunSuite with Matchers { test("pad optional missing fields with null") { val recordSchema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("b", SchemaBuilder.string().required().build()) .field("c", SchemaBuilder.string().required().build()) .build() val struct = new Struct(recordSchema).put("a", "a").put("b", "b").put("c", "c") val metastoreSchema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("b", SchemaBuilder.string().required().build()) .field("c", SchemaBuilder.string().required().build()) .field("z", SchemaBuilder.string().optional().build()) .build() val output = new MetastoreSchemaAlignMapper(metastoreSchema).map(struct) output.schema().fields() shouldBe Seq("a", "b", "c", "z") } test("drop fields not specified in metastore") { val recordSchema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("b", SchemaBuilder.string().required().build()) .field("c", SchemaBuilder.string().required().build()) .build() val struct = new Struct(recordSchema).put("a", "a").put("b", "b").put("c", "c") val metastoreSchema = SchemaBuilder.struct() .field("a", SchemaBuilder.string().required().build()) .field("b", SchemaBuilder.string().required().build()) .build() val output = new MetastoreSchemaAlignMapper(metastoreSchema).map(struct) output.schema().fields() shouldBe Seq("a", "b") } }
Example 46
Source File: ParquetWriterTest.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.landoop.streamreactor.connect.hive.parquet import com.landoop.streamreactor.connect.hive.StructUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import{SchemaBuilder, Struct} import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec class ParquetWriterTest extends AnyWordSpec with Matchers { implicit val conf = new Configuration() implicit val fs = FileSystem.getLocal(conf) "ParquetWriter" should { "write parquet files" in { val schema = SchemaBuilder.struct() .field("name", SchemaBuilder.string().required().build()) .field("title", SchemaBuilder.string().optional().build()) .field("salary", SchemaBuilder.float64().optional().build()) .build() val users = List( new Struct(schema).put("name", "sam").put("title", "mr").put("salary", 100.43), new Struct(schema).put("name", "laura").put("title", "ms").put("salary", 429.06) ) val path = new Path("sinktest.parquet") val writer = parquetWriter(path, schema, ParquetSinkConfig(overwrite = true)) users.foreach(writer.write) writer.close() val reader = parquetReader(path) val actual = Iterator.continually( != null).toList reader.close() shouldBe fs.delete(path, false) } "support writing nulls" in { val schema = SchemaBuilder.struct() .field("name", SchemaBuilder.string().required().build()) .field("title", SchemaBuilder.string().optional().build()) .field("salary", SchemaBuilder.float64().optional().build()) .build() val users = List( new Struct(schema).put("name", "sam").put("title", null).put("salary", 100.43), new Struct(schema).put("name", "laura").put("title", "ms").put("salary", 429.06) ) val path = new Path("sinktest.parquet") val writer = parquetWriter(path, schema, ParquetSinkConfig(overwrite = true)) users.foreach(writer.write) writer.close() val reader = parquetReader(path) val actual = Iterator.continually( != null).toList reader.close() shouldBe fs.delete(path, false) } } }