org.apache.spark.sql.execution.streaming.SerializedOffset Scala Examples
The following examples show how to use org.apache.spark.sql.execution.streaming.SerializedOffset.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: RedisSourceOffset.scala From spark-redis with BSD 3-Clause "New" or "Revised" License | 5 votes |
package org.apache.spark.sql.redis.stream import com.redislabs.provider.redis.util.JsonUtils import org.apache.spark.sql.execution.streaming.{Offset, SerializedOffset} import org.json4s.jackson.Serialization import org.json4s.{Formats, NoTypeHints} case class RedisSourceOffset(offsets: Map[String, RedisConsumerOffset]) extends Offset { override def json(): String = JsonUtils.toJson(this) } object RedisSourceOffset { private implicit val formats: Formats = Serialization.formats(NoTypeHints) def fromOffset(offset: Offset): RedisSourceOffset = { offset match { case o: RedisSourceOffset => o case so: SerializedOffset => fromJson(so.json) case _ => throw new IllegalArgumentException( s"Invalid conversion from offset of ${offset.getClass} to RedisSourceOffset") } fromJson(offset.json()) } def fromJson(json: String): RedisSourceOffset = { try { Serialization.read[RedisSourceOffset](json) } catch { case e: Throwable => val example = RedisSourceOffset(Map("my-stream" -> RedisConsumerOffset("redis-source", "1543674099961-0"))) val jsonExample = Serialization.write(example) throw new RuntimeException(s"Unable to parse offset json. Example of valid json: $jsonExample", e) } } } case class RedisConsumerOffset(groupName: String, offset: String) case class RedisSourceOffsetRange(start: Option[String], end: String, config: RedisConsumerConfig)
Example 2
Source File: OffsetSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming import org.apache.spark.SparkFunSuite import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, SerializedOffset} trait OffsetSuite extends SparkFunSuite { def compare(one: Offset, two: Offset): Unit = { test(s"comparison $one <=> $two") { assert(one == one) assert(two == two) assert(one != two) assert(two != one) } } } class LongOffsetSuite extends OffsetSuite { val one = LongOffset(1) val two = LongOffset(2) val three = LongOffset(3) compare(one, two) compare(LongOffset(SerializedOffset(one.json)), LongOffset(SerializedOffset(three.json))) }
Example 3
Source File: OffsetSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming import org.apache.spark.SparkFunSuite import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, SerializedOffset} trait OffsetSuite extends SparkFunSuite { def compare(one: Offset, two: Offset): Unit = { test(s"comparison $one <=> $two") { assert(one == one) assert(two == two) assert(one != two) assert(two != one) } } } class LongOffsetSuite extends OffsetSuite { val one = LongOffset(1) val two = LongOffset(2) val three = LongOffset(3) compare(one, two) compare(LongOffset(SerializedOffset(one.json)), LongOffset(SerializedOffset(three.json))) }
Example 4
Source File: OffsetSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming import org.apache.spark.SparkFunSuite import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, SerializedOffset} trait OffsetSuite extends SparkFunSuite { def compare(one: Offset, two: Offset): Unit = { test(s"comparison $one <=> $two") { assert(one == one) assert(two == two) assert(one != two) assert(two != one) } } } class LongOffsetSuite extends OffsetSuite { val one = LongOffset(1) val two = LongOffset(2) val three = LongOffset(3) compare(one, two) compare(LongOffset(SerializedOffset(one.json)), LongOffset(SerializedOffset(three.json))) }
Example 5
Source File: OffsetSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming import org.apache.spark.SparkFunSuite import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, SerializedOffset} trait OffsetSuite extends SparkFunSuite { def compare(one: Offset, two: Offset): Unit = { test(s"comparison $one <=> $two") { assert(one == one) assert(two == two) assert(one != two) assert(two != one) } } } class LongOffsetSuite extends OffsetSuite { val one = LongOffset(1) val two = LongOffset(2) val three = LongOffset(3) compare(one, two) compare(LongOffset(SerializedOffset(one.json)), LongOffset(SerializedOffset(three.json))) }
Example 6
Source File: KinesisSourceOffset.scala From kinesis-sql with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kinesis import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import scala.collection.mutable.HashMap import scala.util.control.NonFatal import org.apache.spark.sql.execution.streaming.Offset import org.apache.spark.sql.execution.streaming.SerializedOffset import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, PartitionOffset} def apply(json: String): KinesisSourceOffset = { try { val readObj = Serialization.read[ Map[ String, Map[ String, String ] ] ](json) val metadata = readObj.get("metadata") val shardInfoMap: Map[String, ShardInfo ] = readObj.filter(_._1 != "metadata").map { case (shardId, value) => shardId.toString -> new ShardInfo(shardId.toString, value.get("iteratorType").get, value.get("iteratorPosition").get) }.toMap KinesisSourceOffset( new ShardOffsets( metadata.get("batchId").toLong, metadata.get("streamName"), shardInfoMap)) } catch { case NonFatal(x) => throw new IllegalArgumentException(x) } } def getMap(shardInfos: Array[ShardInfo]): Map[String, ShardInfo] = { shardInfos.map { s: ShardInfo => (s.shardId -> s) }.toMap } }
Example 7
Source File: PulsarOffset.scala From pulsar-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.pulsar import org.apache.pulsar.client.api.MessageId import org.apache.pulsar.client.impl.MessageIdImpl import org.apache.spark.sql.execution.streaming.{Offset, SerializedOffset} import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, PartitionOffset} private[pulsar] sealed trait PulsarOffset private[pulsar] case object EarliestOffset extends PulsarOffset private[pulsar] case object LatestOffset extends PulsarOffset private[pulsar] case class TimeOffset(ts: Long) extends PulsarOffset private[pulsar] sealed trait PerTopicOffset extends PulsarOffset private[pulsar] case class SpecificPulsarOffset(topicOffsets: Map[String, MessageId]) extends OffsetV2 with PerTopicOffset { override val json = JsonUtils.topicOffsets(topicOffsets) } private[pulsar] case class SpecificPulsarStartingTime(topicTimes: Map[String, Long]) extends OffsetV2 with PerTopicOffset { override def json(): String = JsonUtils.topicTimes(topicTimes) } private[pulsar] case class PulsarPartitionOffset(topic: String, messageId: MessageId) extends PartitionOffset private[pulsar] object SpecificPulsarOffset { def getTopicOffsets(offset: Offset): Map[String, MessageId] = { offset match { case o: SpecificPulsarOffset => o.topicOffsets case so: SerializedOffset => SpecificPulsarOffset(so).topicOffsets case _ => throw new IllegalArgumentException( s"Invalid conversion from offset of ${offset.getClass} to PulsarSourceOffset") } } def apply(offset: SerializedOffset): SpecificPulsarOffset = SpecificPulsarOffset(JsonUtils.topicOffsets(offset.json)) def apply(offsetTuples: (String, MessageId)*): SpecificPulsarOffset = { SpecificPulsarOffset(offsetTuples.toMap) } } private[pulsar] case class UserProvidedMessageId(mid: MessageId) extends MessageIdImpl( mid.asInstanceOf[MessageIdImpl].getLedgerId, mid.asInstanceOf[MessageIdImpl].getEntryId, mid.asInstanceOf[MessageIdImpl].getPartitionIndex)
Example 8
Source File: PulsarSourceOffsetSuite.scala From pulsar-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.pulsar import java.io.File import org.apache.pulsar.client.impl.MessageIdImpl import org.apache.spark.sql.execution.streaming.{LongOffset, OffsetSeq, OffsetSeqLog, SerializedOffset} import org.apache.spark.sql.streaming.OffsetSuite import org.apache.spark.sql.test.SharedSQLContext class PulsarSourceOffsetSuite extends OffsetSuite with SharedSQLContext { compare( one = SpecificPulsarOffset(("t", new MessageIdImpl(1, 1, -1))), two = SpecificPulsarOffset(("t", new MessageIdImpl(1, 2, -1)))) compare( one = SpecificPulsarOffset( ("t", new MessageIdImpl(1, 1, -1)), ("t1", new MessageIdImpl(1, 1, -1))), two = SpecificPulsarOffset( ("t", new MessageIdImpl(1, 2, -1)), ("t1", new MessageIdImpl(1, 2, -1))) ) compare( one = SpecificPulsarOffset(("t", new MessageIdImpl(1, 1, -1))), two = SpecificPulsarOffset( ("t", new MessageIdImpl(1, 2, -1)), ("t1", new MessageIdImpl(1, 1, -1)))) val kso1 = SpecificPulsarOffset(("t", new MessageIdImpl(1, 1, -1))) val kso2 = SpecificPulsarOffset(("t", new MessageIdImpl(1, 2, -1)), ("t1", new MessageIdImpl(1, 3, -1))) val kso3 = SpecificPulsarOffset( ("t", new MessageIdImpl(1, 2, -1)), ("t1", new MessageIdImpl(1, 3, -1)), ("t2", new MessageIdImpl(1, 4, -1))) compare( SpecificPulsarOffset(SerializedOffset(kso1.json)), SpecificPulsarOffset(SerializedOffset(kso2.json))) test("basic serialization - deserialization") { assert( SpecificPulsarOffset.getTopicOffsets(kso1) == SpecificPulsarOffset.getTopicOffsets(SerializedOffset(kso1.json))) } test("OffsetSeqLog serialization - deserialization") { withTempDir { temp => // use non-existent directory to test whether log make the dir val dir = new File(temp, "dir") val metadataLog = new OffsetSeqLog(spark, dir.getAbsolutePath) val batch0 = OffsetSeq.fill(kso1) val batch1 = OffsetSeq.fill(kso2, kso3) val batch0Serialized = OffsetSeq.fill(batch0.offsets.flatMap(_.map(o => SerializedOffset(o.json))): _*) val batch1Serialized = OffsetSeq.fill(batch1.offsets.flatMap(_.map(o => SerializedOffset(o.json))): _*) assert(metadataLog.add(0, batch0)) assert(metadataLog.getLatest() === Some(0 -> batch0Serialized)) assert(metadataLog.get(0) === Some(batch0Serialized)) assert(metadataLog.add(1, batch1)) assert(metadataLog.get(0) === Some(batch0Serialized)) assert(metadataLog.get(1) === Some(batch1Serialized)) assert(metadataLog.getLatest() === Some(1 -> batch1Serialized)) assert( metadataLog.get(None, Some(1)) === Array(0 -> batch0Serialized, 1 -> batch1Serialized)) // Adding the same batch does nothing metadataLog.add(1, OffsetSeq.fill(LongOffset(3))) assert(metadataLog.get(0) === Some(batch0Serialized)) assert(metadataLog.get(1) === Some(batch1Serialized)) assert(metadataLog.getLatest() === Some(1 -> batch1Serialized)) assert( metadataLog.get(None, Some(1)) === Array(0 -> batch0Serialized, 1 -> batch1Serialized)) } } }