org.apache.spark.sql.execution.streaming.Offset Scala Examples
The following examples show how to use org.apache.spark.sql.execution.streaming.Offset.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: OffsetSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming import org.apache.spark.SparkFunSuite import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, SerializedOffset} trait OffsetSuite extends SparkFunSuite { def compare(one: Offset, two: Offset): Unit = { test(s"comparison $one <=> $two") { assert(one == one) assert(two == two) assert(one != two) assert(two != one) } } } class LongOffsetSuite extends OffsetSuite { val one = LongOffset(1) val two = LongOffset(2) val three = LongOffset(3) compare(one, two) compare(LongOffset(SerializedOffset(one.json)), LongOffset(SerializedOffset(three.json))) }
Example 2
Source File: CurrentPersistenceIdsQuerySourceProvider.scala From apache-spark-test with Apache License 2.0 | 5 votes |
package akka.persistence.jdbc.spark.sql.execution.streaming import org.apache.spark.sql.execution.streaming.{ LongOffset, Offset, Source } import org.apache.spark.sql.sources.{ DataSourceRegister, StreamSourceProvider } import org.apache.spark.sql.types.{ StringType, StructField, StructType } import org.apache.spark.sql.{ SQLContext, _ } object CurrentPersistenceIdsQuerySourceProvider { val name = "current-persistence-id" val schema: StructType = StructType(Array( StructField("persistence_id", StringType, nullable = false) )) } class CurrentPersistenceIdsQuerySourceProvider extends StreamSourceProvider with DataSourceRegister with Serializable { override def sourceSchema( sqlContext: SQLContext, schema: Option[StructType], providerName: String, parameters: Map[String, String] ): (String, StructType) = { CurrentPersistenceIdsQuerySourceProvider.name -> CurrentPersistenceIdsQuerySourceProvider.schema } override def createSource( sqlContext: SQLContext, metadataPath: String, schema: Option[StructType], providerName: String, parameters: Map[String, String] ): Source = { new CurrentPersistenceIdsQuerySourceImpl(sqlContext, parameters("path")) } override def shortName(): String = CurrentPersistenceIdsQuerySourceProvider.name } class CurrentPersistenceIdsQuerySourceImpl(val sqlContext: SQLContext, val readJournalPluginId: String) extends Source with ReadJournalSource { override def schema: StructType = CurrentPersistenceIdsQuerySourceProvider.schema override def getOffset: Option[Offset] = { val offset = maxPersistenceIds println("[CurrentPersistenceIdsQuery]: Returning maximum offset: " + offset) Some(LongOffset(offset)) } override def getBatch(_start: Option[Offset], _end: Offset): DataFrame = { val (start, end) = getStartEnd(_start, _end) println(s"[CurrentPersistenceIdsQuery]: Getting currentPersistenceIds from start: $start, end: $end") import sqlContext.implicits._ persistenceIds(start, end).toDF() } }
Example 3
Source File: ReadJournalSource.scala From apache-spark-test with Apache License 2.0 | 5 votes |
package akka.persistence.jdbc.spark.sql.execution.streaming import akka.actor.{ ActorSystem, ExtendedActorSystem } import akka.persistence.query.PersistenceQuery import akka.persistence.query.scaladsl.{ CurrentEventsByPersistenceIdQuery, CurrentEventsByTagQuery, CurrentPersistenceIdsQuery, ReadJournal } import akka.stream.scaladsl.Sink import akka.stream.scaladsl.extension.{ Sink => Snk } import akka.stream.{ ActorMaterializer, Materializer } import org.apache.spark.sql._ import org.apache.spark.sql.execution.streaming.{ LongOffset, Offset, Source } import org.apache.spark.sql.types.StructType import scala.collection.immutable._ import scala.concurrent.duration.{ FiniteDuration, _ } import scala.concurrent.{ Await, ExecutionContext, Future } trait ReadJournalSource { _: Source => def readJournalPluginId: String def sqlContext: SQLContext // some machinery implicit val system: ActorSystem = ActorSystem() implicit val mat: Materializer = ActorMaterializer() implicit val ec: ExecutionContext = system.dispatcher // read journal, only interested in the Current queries, as Spark isn't asynchronous lazy val readJournal = PersistenceQuery(system).readJournalFor(readJournalPluginId) .asInstanceOf[ReadJournal with CurrentPersistenceIdsQuery with CurrentEventsByPersistenceIdQuery with CurrentEventsByTagQuery] implicit class FutureOps[A](f: Future[A])(implicit ec: ExecutionContext, timeout: FiniteDuration = null) { def futureValue: A = Await.result(f, Option(timeout).getOrElse(10.seconds)) } def maxPersistenceIds: Long = readJournal.currentPersistenceIds().runWith(Snk.count).futureValue def persistenceIds(start: Long, end: Long) = readJournal.currentPersistenceIds().drop(start).take(end).runWith(Sink.seq).futureValue def maxEventsByPersistenceId(pid: String): Long = readJournal.currentEventsByPersistenceId(pid, 0, Long.MaxValue).runWith(Snk.count).futureValue def eventsByPersistenceId(pid: String, start: Long, end: Long, eventMapperFQCN: String): Seq[Row] = { readJournal.currentEventsByPersistenceId(pid, start, end) .map(env => getMapper(eventMapperFQCN).get.row(env, sqlContext)).runWith(Sink.seq).futureValue } implicit def mapToDataFrame(rows: Seq[Row]): DataFrame = { import scala.collection.JavaConversions._ sqlContext.createDataFrame(rows, schema) } def getStartEnd(_start: Option[Offset], _end: Offset): (Long, Long) = (_start, _end) match { case (Some(LongOffset(start)), LongOffset(end)) => (start, end) case (None, LongOffset(end)) => (0L, end) } def getMapper(eventMapperFQCN: String): Option[EventMapper] = system.asInstanceOf[ExtendedActorSystem].dynamicAccess.createInstanceFor[EventMapper](eventMapperFQCN, List.empty) .recover { case cause => cause.printStackTrace(); null }.toOption override def stop(): Unit = { println("Stopping jdbc read journal") system.terminate() } }
Example 4
Source File: CurrentEventsByPersistenceIdQuerySourceProvider.scala From apache-spark-test with Apache License 2.0 | 5 votes |
package akka.persistence.jdbc.spark.sql.execution.streaming import org.apache.spark.sql.execution.streaming.{ LongOffset, Offset, Source } import org.apache.spark.sql.sources.{ DataSourceRegister, StreamSourceProvider } import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{ SQLContext, _ } object CurrentEventsByPersistenceIdQuerySourceProvider { val name = "current-events-by-persistence-id" } class CurrentEventsByPersistenceIdQuerySourceProvider extends StreamSourceProvider with DataSourceRegister with Serializable { override def sourceSchema( sqlContext: SQLContext, schema: Option[StructType], providerName: String, parameters: Map[String, String] ): (String, StructType) = { println(s"[CurrentEventsByPersistenceIdQuerySourceProvider.sourceSchema]: schema: $schema, providerName: $providerName, parameters: $parameters") CurrentEventsByPersistenceIdQuerySourceProvider.name -> schema.get } override def createSource( sqlContext: SQLContext, metadataPath: String, schema: Option[StructType], providerName: String, parameters: Map[String, String] ): Source = { val eventMapperFQCN: String = parameters.get("event-mapper") match { case Some(_eventMapper) => _eventMapper case _ => throw new RuntimeException("No event mapper FQCN") } val pid = (parameters.get("pid"), parameters.get("persistence-id")) match { case (Some(pid), _) => pid case (_, Some(pid)) => pid case _ => throw new RuntimeException("No persistence_id") } new CurrentEventsByPersistenceIdQuerySourceImpl(sqlContext, parameters("path"), eventMapperFQCN, pid, schema.get) } override def shortName(): String = CurrentEventsByPersistenceIdQuerySourceProvider.name } class CurrentEventsByPersistenceIdQuerySourceImpl(val sqlContext: SQLContext, val readJournalPluginId: String, eventMapperFQCN: String, persistenceId: String, override val schema: StructType) extends Source with ReadJournalSource { override def getOffset: Option[Offset] = { val offset = maxEventsByPersistenceId(persistenceId) println("[CurrentEventsByPersistenceIdQuery]: Returning maximum offset: " + offset) Some(LongOffset(offset)) } override def getBatch(_start: Option[Offset], _end: Offset): DataFrame = { val (start, end) = getStartEnd(_start, _end) val df: DataFrame = eventsByPersistenceId(persistenceId, start, end, eventMapperFQCN) println(s"[CurrentEventsByPersistenceIdQuery]: Getting currentPersistenceIds from start: $start, end: $end, DataFrame.count: ${df.count}") df } }
Example 5
Source File: PulsarOffset.scala From pulsar-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.pulsar import org.apache.pulsar.client.api.MessageId import org.apache.pulsar.client.impl.MessageIdImpl import org.apache.spark.sql.execution.streaming.{Offset, SerializedOffset} import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, PartitionOffset} private[pulsar] sealed trait PulsarOffset private[pulsar] case object EarliestOffset extends PulsarOffset private[pulsar] case object LatestOffset extends PulsarOffset private[pulsar] case class TimeOffset(ts: Long) extends PulsarOffset private[pulsar] sealed trait PerTopicOffset extends PulsarOffset private[pulsar] case class SpecificPulsarOffset(topicOffsets: Map[String, MessageId]) extends OffsetV2 with PerTopicOffset { override val json = JsonUtils.topicOffsets(topicOffsets) } private[pulsar] case class SpecificPulsarStartingTime(topicTimes: Map[String, Long]) extends OffsetV2 with PerTopicOffset { override def json(): String = JsonUtils.topicTimes(topicTimes) } private[pulsar] case class PulsarPartitionOffset(topic: String, messageId: MessageId) extends PartitionOffset private[pulsar] object SpecificPulsarOffset { def getTopicOffsets(offset: Offset): Map[String, MessageId] = { offset match { case o: SpecificPulsarOffset => o.topicOffsets case so: SerializedOffset => SpecificPulsarOffset(so).topicOffsets case _ => throw new IllegalArgumentException( s"Invalid conversion from offset of ${offset.getClass} to PulsarSourceOffset") } } def apply(offset: SerializedOffset): SpecificPulsarOffset = SpecificPulsarOffset(JsonUtils.topicOffsets(offset.json)) def apply(offsetTuples: (String, MessageId)*): SpecificPulsarOffset = { SpecificPulsarOffset(offsetTuples.toMap) } } private[pulsar] case class UserProvidedMessageId(mid: MessageId) extends MessageIdImpl( mid.asInstanceOf[MessageIdImpl].getLedgerId, mid.asInstanceOf[MessageIdImpl].getEntryId, mid.asInstanceOf[MessageIdImpl].getPartitionIndex)
Example 6
Source File: KinesisSourceOffset.scala From kinesis-sql with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kinesis import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import scala.collection.mutable.HashMap import scala.util.control.NonFatal import org.apache.spark.sql.execution.streaming.Offset import org.apache.spark.sql.execution.streaming.SerializedOffset import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, PartitionOffset} def apply(json: String): KinesisSourceOffset = { try { val readObj = Serialization.read[ Map[ String, Map[ String, String ] ] ](json) val metadata = readObj.get("metadata") val shardInfoMap: Map[String, ShardInfo ] = readObj.filter(_._1 != "metadata").map { case (shardId, value) => shardId.toString -> new ShardInfo(shardId.toString, value.get("iteratorType").get, value.get("iteratorPosition").get) }.toMap KinesisSourceOffset( new ShardOffsets( metadata.get("batchId").toLong, metadata.get("streamName"), shardInfoMap)) } catch { case NonFatal(x) => throw new IllegalArgumentException(x) } } def getMap(shardInfos: Array[ShardInfo]): Map[String, ShardInfo] = { shardInfos.map { s: ShardInfo => (s.shardId -> s) }.toMap } }
Example 7
Source File: BlockingSource.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming.util import java.util.concurrent.CountDownLatch import org.apache.spark.sql.{SQLContext, _} import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, Sink, Source} import org.apache.spark.sql.sources.{StreamSinkProvider, StreamSourceProvider} import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types.{IntegerType, StructField, StructType} class BlockingSource extends StreamSourceProvider with StreamSinkProvider { private val fakeSchema = StructType(StructField("a", IntegerType) :: Nil) override def sourceSchema( spark: SQLContext, schema: Option[StructType], providerName: String, parameters: Map[String, String]): (String, StructType) = { ("dummySource", fakeSchema) } override def createSource( spark: SQLContext, metadataPath: String, schema: Option[StructType], providerName: String, parameters: Map[String, String]): Source = { BlockingSource.latch.await() new Source { override def schema: StructType = fakeSchema override def getOffset: Option[Offset] = Some(new LongOffset(0)) override def getBatch(start: Option[Offset], end: Offset): DataFrame = { import spark.implicits._ Seq[Int]().toDS().toDF() } override def stop() {} } } override def createSink( spark: SQLContext, parameters: Map[String, String], partitionColumns: Seq[String], outputMode: OutputMode): Sink = { new Sink { override def addBatch(batchId: Long, data: DataFrame): Unit = {} } } } object BlockingSource { var latch: CountDownLatch = null }
Example 8
Source File: OffsetSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming import org.apache.spark.SparkFunSuite import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, SerializedOffset} trait OffsetSuite extends SparkFunSuite { def compare(one: Offset, two: Offset): Unit = { test(s"comparison $one <=> $two") { assert(one == one) assert(two == two) assert(one != two) assert(two != one) } } } class LongOffsetSuite extends OffsetSuite { val one = LongOffset(1) val two = LongOffset(2) val three = LongOffset(3) compare(one, two) compare(LongOffset(SerializedOffset(one.json)), LongOffset(SerializedOffset(three.json))) }
Example 9
Source File: BlockingSource.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming.util import java.util.concurrent.CountDownLatch import org.apache.spark.sql.{SQLContext, _} import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, Sink, Source} import org.apache.spark.sql.sources.{StreamSinkProvider, StreamSourceProvider} import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types.{IntegerType, StructField, StructType} class BlockingSource extends StreamSourceProvider with StreamSinkProvider { private val fakeSchema = StructType(StructField("a", IntegerType) :: Nil) override def sourceSchema( spark: SQLContext, schema: Option[StructType], providerName: String, parameters: Map[String, String]): (String, StructType) = { ("dummySource", fakeSchema) } override def createSource( spark: SQLContext, metadataPath: String, schema: Option[StructType], providerName: String, parameters: Map[String, String]): Source = { BlockingSource.latch.await() new Source { override def schema: StructType = fakeSchema override def getOffset: Option[Offset] = Some(new LongOffset(0)) override def getBatch(start: Option[Offset], end: Offset): DataFrame = { import spark.implicits._ Seq[Int]().toDS().toDF() } override def stop() {} } } override def createSink( spark: SQLContext, parameters: Map[String, String], partitionColumns: Seq[String], outputMode: OutputMode): Sink = { new Sink { override def addBatch(batchId: Long, data: DataFrame): Unit = {} } } } object BlockingSource { var latch: CountDownLatch = null }
Example 10
Source File: StreamingQueryException.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming import org.apache.spark.annotation.Experimental import org.apache.spark.sql.execution.streaming.{Offset, StreamExecution} val time: Long = System.currentTimeMillis override def toString(): String = { val causeStr = s"${cause.getMessage} ${cause.getStackTrace.take(10).mkString("", "\n|\t", "\n")}" s""" |$causeStr | |${query.asInstanceOf[StreamExecution].toDebugString} """.stripMargin } }
Example 11
Source File: KafkaSourceOffset.scala From spark-kafka-0-8-sql with Apache License 2.0 | 5 votes |
package com.hortonworks.spark.sql.kafka08 import kafka.common.TopicAndPartition import org.apache.spark.sql.execution.streaming.Offset import org.apache.spark.streaming.kafka.KafkaCluster.LeaderOffset object KafkaSourceOffset { def getPartitionOffsets(offset: Offset): Map[TopicAndPartition, LeaderOffset] = { offset match { case o: KafkaSourceOffset => o.partitionToOffsets case _ => throw new IllegalArgumentException( s"Invalid conversion from offset of ${offset.getClass} to KafkaSourceOffset") } } }
Example 12
Source File: BigQuerySource.scala From spark-bigquery with Apache License 2.0 | 5 votes |
package com.samelamin.spark.bigquery.streaming import java.math.BigInteger import com.google.cloud.hadoop.io.bigquery.BigQueryStrings import com.samelamin.spark.bigquery.BigQueryClient import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.sql.execution.streaming.{Offset, _} import org.apache.spark.sql.types.{BinaryType, StringType, StructField, StructType} import com.samelamin.spark.bigquery._ import com.samelamin.spark.bigquery.converters.SchemaConverters import org.joda.time.DateTime import org.slf4j.LoggerFactory override def getBatch(start: Option[Offset], end: Offset): DataFrame = { val startIndex = start.getOrElse(LongOffset(0L)).asInstanceOf[LongOffset].offset.toLong val endIndex = end.asInstanceOf[LongOffset].offset.toLong val startPartitionTime = new DateTime(startIndex).toLocalDate val endPartitionTime = new DateTime(endIndex).toLocalDate.toString logger.info(s"Fetching data between $startIndex and $endIndex") val query = s""" |SELECT | * |FROM | `${fullyQualifiedOutputTableId.replace(':','.')}` |WHERE | $timestampColumn BETWEEN TIMESTAMP_MILLIS($startIndex) AND TIMESTAMP_MILLIS($endIndex) | AND _PARTITIONTIME BETWEEN TIMESTAMP('$startPartitionTime') AND TIMESTAMP('$endPartitionTime') | """.stripMargin val bigQuerySQLContext = new BigQuerySQLContext(sqlContext) val df = bigQuerySQLContext.bigQuerySelect(query) df } override def stop(): Unit = {} def getConvertedSchema(sqlContext: SQLContext): StructType = { val bigqueryClient = BigQueryClient.getInstance(sqlContext) val tableReference = BigQueryStrings.parseTableReference(fullyQualifiedOutputTableId) SchemaConverters.BQToSQLSchema(bigqueryClient.getTableSchema(tableReference)) } } object BigQuerySource { val DEFAULT_SCHEMA = StructType( StructField("Sample Column", StringType) :: StructField("value", BinaryType) :: Nil ) }
Example 13
Source File: BlockingSource.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming.util import java.util.concurrent.CountDownLatch import org.apache.spark.sql.{SQLContext, _} import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, Sink, Source} import org.apache.spark.sql.sources.{StreamSinkProvider, StreamSourceProvider} import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types.{IntegerType, StructField, StructType} class BlockingSource extends StreamSourceProvider with StreamSinkProvider { private val fakeSchema = StructType(StructField("a", IntegerType) :: Nil) override def sourceSchema( spark: SQLContext, schema: Option[StructType], providerName: String, parameters: Map[String, String]): (String, StructType) = { ("dummySource", fakeSchema) } override def createSource( spark: SQLContext, metadataPath: String, schema: Option[StructType], providerName: String, parameters: Map[String, String]): Source = { BlockingSource.latch.await() new Source { override def schema: StructType = fakeSchema override def getOffset: Option[Offset] = Some(new LongOffset(0)) override def getBatch(start: Option[Offset], end: Offset): DataFrame = { import spark.implicits._ Seq[Int]().toDS().toDF() } override def stop() {} } } override def createSink( spark: SQLContext, parameters: Map[String, String], partitionColumns: Seq[String], outputMode: OutputMode): Sink = { new Sink { override def addBatch(batchId: Long, data: DataFrame): Unit = {} } } } object BlockingSource { var latch: CountDownLatch = null }
Example 14
Source File: OffsetSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming import org.apache.spark.SparkFunSuite import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, SerializedOffset} trait OffsetSuite extends SparkFunSuite { def compare(one: Offset, two: Offset): Unit = { test(s"comparison $one <=> $two") { assert(one == one) assert(two == two) assert(one != two) assert(two != one) } } } class LongOffsetSuite extends OffsetSuite { val one = LongOffset(1) val two = LongOffset(2) val three = LongOffset(3) compare(one, two) compare(LongOffset(SerializedOffset(one.json)), LongOffset(SerializedOffset(three.json))) }
Example 15
Source File: BlockingSource.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming.util import java.util.concurrent.CountDownLatch import org.apache.spark.sql.{SQLContext, _} import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, Sink, Source} import org.apache.spark.sql.sources.{StreamSinkProvider, StreamSourceProvider} import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types.{IntegerType, StructField, StructType} class BlockingSource extends StreamSourceProvider with StreamSinkProvider { private val fakeSchema = StructType(StructField("a", IntegerType) :: Nil) override def sourceSchema( spark: SQLContext, schema: Option[StructType], providerName: String, parameters: Map[String, String]): (String, StructType) = { ("dummySource", fakeSchema) } override def createSource( spark: SQLContext, metadataPath: String, schema: Option[StructType], providerName: String, parameters: Map[String, String]): Source = { BlockingSource.latch.await() new Source { override def schema: StructType = fakeSchema override def getOffset: Option[Offset] = Some(new LongOffset(0)) override def getBatch(start: Option[Offset], end: Offset): DataFrame = { import spark.implicits._ Seq[Int]().toDS().toDF() } override def stop() {} } } override def createSink( spark: SQLContext, parameters: Map[String, String], partitionColumns: Seq[String], outputMode: OutputMode): Sink = { new Sink { override def addBatch(batchId: Long, data: DataFrame): Unit = {} } } } object BlockingSource { var latch: CountDownLatch = null }
Example 16
Source File: OffsetSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming import org.apache.spark.SparkFunSuite import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, SerializedOffset} trait OffsetSuite extends SparkFunSuite { def compare(one: Offset, two: Offset): Unit = { test(s"comparison $one <=> $two") { assert(one == one) assert(two == two) assert(one != two) assert(two != one) } } } class LongOffsetSuite extends OffsetSuite { val one = LongOffset(1) val two = LongOffset(2) val three = LongOffset(3) compare(one, two) compare(LongOffset(SerializedOffset(one.json)), LongOffset(SerializedOffset(three.json))) }
Example 17
Source File: RedisSourceOffset.scala From spark-redis with BSD 3-Clause "New" or "Revised" License | 5 votes |
package org.apache.spark.sql.redis.stream import com.redislabs.provider.redis.util.JsonUtils import org.apache.spark.sql.execution.streaming.{Offset, SerializedOffset} import org.json4s.jackson.Serialization import org.json4s.{Formats, NoTypeHints} case class RedisSourceOffset(offsets: Map[String, RedisConsumerOffset]) extends Offset { override def json(): String = JsonUtils.toJson(this) } object RedisSourceOffset { private implicit val formats: Formats = Serialization.formats(NoTypeHints) def fromOffset(offset: Offset): RedisSourceOffset = { offset match { case o: RedisSourceOffset => o case so: SerializedOffset => fromJson(so.json) case _ => throw new IllegalArgumentException( s"Invalid conversion from offset of ${offset.getClass} to RedisSourceOffset") } fromJson(offset.json()) } def fromJson(json: String): RedisSourceOffset = { try { Serialization.read[RedisSourceOffset](json) } catch { case e: Throwable => val example = RedisSourceOffset(Map("my-stream" -> RedisConsumerOffset("redis-source", "1543674099961-0"))) val jsonExample = Serialization.write(example) throw new RuntimeException(s"Unable to parse offset json. Example of valid json: $jsonExample", e) } } } case class RedisConsumerOffset(groupName: String, offset: String) case class RedisSourceOffsetRange(start: Option[String], end: String, config: RedisConsumerConfig)
Example 18
Source File: OffsetSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming import org.apache.spark.SparkFunSuite import org.apache.spark.sql.execution.streaming.{CompositeOffset, LongOffset, Offset} trait OffsetSuite extends SparkFunSuite { def compare(one: Offset, two: Offset): Unit = { test(s"comparison $one <=> $two") { assert(one == one) assert(two == two) assert(one != two) assert(two != one) } } } class LongOffsetSuite extends OffsetSuite { val one = LongOffset(1) val two = LongOffset(2) compare(one, two) } class CompositeOffsetSuite extends OffsetSuite { compare( one = CompositeOffset(Some(LongOffset(1)) :: Nil), two = CompositeOffset(Some(LongOffset(2)) :: Nil)) compare( one = CompositeOffset(None :: Nil), two = CompositeOffset(Some(LongOffset(2)) :: Nil)) compare( one = CompositeOffset.fill(LongOffset(0), LongOffset(1)), two = CompositeOffset.fill(LongOffset(1), LongOffset(2))) compare( one = CompositeOffset.fill(LongOffset(1), LongOffset(1)), two = CompositeOffset.fill(LongOffset(1), LongOffset(2))) }