org.apache.spark.sql.execution.streaming.Offset Scala Example

Source File: OffsetSuite.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.streaming

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, SerializedOffset}

trait OffsetSuite extends SparkFunSuite {
  
  def compare(one: Offset, two: Offset): Unit = {
    test(s"comparison $one <=> $two") {
      assert(one == one)
      assert(two == two)
      assert(one != two)
      assert(two != one)
    }
  }
}

class LongOffsetSuite extends OffsetSuite {
  val one = LongOffset(1)
  val two = LongOffset(2)
  val three = LongOffset(3)
  compare(one, two)

  compare(LongOffset(SerializedOffset(one.json)),
          LongOffset(SerializedOffset(three.json)))
}

Source File: CurrentPersistenceIdsQuerySourceProvider.scala From apache-spark-test with Apache License 2.0

5 votes

package akka.persistence.jdbc.spark.sql.execution.streaming

import org.apache.spark.sql.execution.streaming.{ LongOffset, Offset, Source }
import org.apache.spark.sql.sources.{ DataSourceRegister, StreamSourceProvider }
import org.apache.spark.sql.types.{ StringType, StructField, StructType }
import org.apache.spark.sql.{ SQLContext, _ }

object CurrentPersistenceIdsQuerySourceProvider {
  val name = "current-persistence-id"
  val schema: StructType = StructType(Array(
    StructField("persistence_id", StringType, nullable = false)
  ))
}

class CurrentPersistenceIdsQuerySourceProvider extends StreamSourceProvider with DataSourceRegister with Serializable {
  override def sourceSchema(
    sqlContext: SQLContext,
    schema: Option[StructType],
    providerName: String,
    parameters: Map[String, String]
  ): (String, StructType) = {
    CurrentPersistenceIdsQuerySourceProvider.name -> CurrentPersistenceIdsQuerySourceProvider.schema
  }

  override def createSource(
    sqlContext: SQLContext,
    metadataPath: String,
    schema: Option[StructType],
    providerName: String,
    parameters: Map[String, String]
  ): Source = {
    new CurrentPersistenceIdsQuerySourceImpl(sqlContext, parameters("path"))
  }
  override def shortName(): String = CurrentPersistenceIdsQuerySourceProvider.name
}

class CurrentPersistenceIdsQuerySourceImpl(val sqlContext: SQLContext, val readJournalPluginId: String) extends Source with ReadJournalSource {
  override def schema: StructType = CurrentPersistenceIdsQuerySourceProvider.schema

  override def getOffset: Option[Offset] = {
    val offset = maxPersistenceIds
    println("[CurrentPersistenceIdsQuery]: Returning maximum offset: " + offset)
    Some(LongOffset(offset))
  }

  override def getBatch(_start: Option[Offset], _end: Offset): DataFrame = {
    val (start, end) = getStartEnd(_start, _end)
    println(s"[CurrentPersistenceIdsQuery]: Getting currentPersistenceIds from start: $start, end: $end")
    import sqlContext.implicits._
    persistenceIds(start, end).toDF()
  }
}

Source File: ReadJournalSource.scala From apache-spark-test with Apache License 2.0

5 votes

package akka.persistence.jdbc.spark.sql.execution.streaming

import akka.actor.{ ActorSystem, ExtendedActorSystem }
import akka.persistence.query.PersistenceQuery
import akka.persistence.query.scaladsl.{ CurrentEventsByPersistenceIdQuery, CurrentEventsByTagQuery, CurrentPersistenceIdsQuery, ReadJournal }
import akka.stream.scaladsl.Sink
import akka.stream.scaladsl.extension.{ Sink => Snk }
import akka.stream.{ ActorMaterializer, Materializer }
import org.apache.spark.sql._
import org.apache.spark.sql.execution.streaming.{ LongOffset, Offset, Source }
import org.apache.spark.sql.types.StructType

import scala.collection.immutable._
import scala.concurrent.duration.{ FiniteDuration, _ }
import scala.concurrent.{ Await, ExecutionContext, Future }

trait ReadJournalSource {
  _: Source =>
  def readJournalPluginId: String
  def sqlContext: SQLContext

  // some machinery
  implicit val system: ActorSystem = ActorSystem()
  implicit val mat: Materializer = ActorMaterializer()
  implicit val ec: ExecutionContext = system.dispatcher

  // read journal, only interested in the Current queries, as Spark isn't asynchronous
  lazy val readJournal = PersistenceQuery(system).readJournalFor(readJournalPluginId)
    .asInstanceOf[ReadJournal with CurrentPersistenceIdsQuery with CurrentEventsByPersistenceIdQuery with CurrentEventsByTagQuery]

  implicit class FutureOps[A](f: Future[A])(implicit ec: ExecutionContext, timeout: FiniteDuration = null) {
    def futureValue: A = Await.result(f, Option(timeout).getOrElse(10.seconds))
  }

  def maxPersistenceIds: Long =
    readJournal.currentPersistenceIds().runWith(Snk.count).futureValue

  def persistenceIds(start: Long, end: Long) =
    readJournal.currentPersistenceIds().drop(start).take(end).runWith(Sink.seq).futureValue

  def maxEventsByPersistenceId(pid: String): Long =
    readJournal.currentEventsByPersistenceId(pid, 0, Long.MaxValue).runWith(Snk.count).futureValue

  def eventsByPersistenceId(pid: String, start: Long, end: Long, eventMapperFQCN: String): Seq[Row] = {
    readJournal.currentEventsByPersistenceId(pid, start, end)
      .map(env => getMapper(eventMapperFQCN).get.row(env, sqlContext)).runWith(Sink.seq).futureValue
  }

  implicit def mapToDataFrame(rows: Seq[Row]): DataFrame = {
    import scala.collection.JavaConversions._
    sqlContext.createDataFrame(rows, schema)
  }

  def getStartEnd(_start: Option[Offset], _end: Offset): (Long, Long) = (_start, _end) match {
    case (Some(LongOffset(start)), LongOffset(end)) => (start, end)
    case (None, LongOffset(end))                    => (0L, end)
  }

  def getMapper(eventMapperFQCN: String): Option[EventMapper] =
    system.asInstanceOf[ExtendedActorSystem].dynamicAccess.createInstanceFor[EventMapper](eventMapperFQCN, List.empty)
      .recover { case cause => cause.printStackTrace(); null }.toOption

  override def stop(): Unit = {
    println("Stopping jdbc read journal")
    system.terminate()
  }
}

Source File: CurrentEventsByPersistenceIdQuerySourceProvider.scala From apache-spark-test with Apache License 2.0

5 votes

package akka.persistence.jdbc.spark.sql.execution.streaming

import org.apache.spark.sql.execution.streaming.{ LongOffset, Offset, Source }
import org.apache.spark.sql.sources.{ DataSourceRegister, StreamSourceProvider }
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{ SQLContext, _ }

object CurrentEventsByPersistenceIdQuerySourceProvider {
  val name = "current-events-by-persistence-id"
}

class CurrentEventsByPersistenceIdQuerySourceProvider extends StreamSourceProvider with DataSourceRegister with Serializable {
  override def sourceSchema(
    sqlContext: SQLContext,
    schema: Option[StructType],
    providerName: String,
    parameters: Map[String, String]
  ): (String, StructType) = {
    println(s"[CurrentEventsByPersistenceIdQuerySourceProvider.sourceSchema]: schema: $schema, providerName: $providerName, parameters: $parameters")
    CurrentEventsByPersistenceIdQuerySourceProvider.name -> schema.get
  }

  override def createSource(
    sqlContext: SQLContext,
    metadataPath: String,
    schema: Option[StructType],
    providerName: String,
    parameters: Map[String, String]
  ): Source = {

    val eventMapperFQCN: String = parameters.get("event-mapper") match {
      case Some(_eventMapper) => _eventMapper
      case _                  => throw new RuntimeException("No event mapper FQCN")
    }

    val pid = (parameters.get("pid"), parameters.get("persistence-id")) match {
      case (Some(pid), _) => pid
      case (_, Some(pid)) => pid
      case _              => throw new RuntimeException("No persistence_id")
    }

    new CurrentEventsByPersistenceIdQuerySourceImpl(sqlContext, parameters("path"), eventMapperFQCN, pid, schema.get)
  }
  override def shortName(): String = CurrentEventsByPersistenceIdQuerySourceProvider.name
}

class CurrentEventsByPersistenceIdQuerySourceImpl(val sqlContext: SQLContext, val readJournalPluginId: String, eventMapperFQCN: String, persistenceId: String, override val schema: StructType) extends Source with ReadJournalSource {
  override def getOffset: Option[Offset] = {
    val offset = maxEventsByPersistenceId(persistenceId)
    println("[CurrentEventsByPersistenceIdQuery]: Returning maximum offset: " + offset)
    Some(LongOffset(offset))
  }

  override def getBatch(_start: Option[Offset], _end: Offset): DataFrame = {
    val (start, end) = getStartEnd(_start, _end)
    val df: DataFrame = eventsByPersistenceId(persistenceId, start, end, eventMapperFQCN)
    println(s"[CurrentEventsByPersistenceIdQuery]: Getting currentPersistenceIds from start: $start, end: $end, DataFrame.count: ${df.count}")
    df
  }
}

Source File: PulsarOffset.scala From pulsar-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.pulsar

import org.apache.pulsar.client.api.MessageId
import org.apache.pulsar.client.impl.MessageIdImpl

import org.apache.spark.sql.execution.streaming.{Offset, SerializedOffset}
import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, PartitionOffset}

private[pulsar] sealed trait PulsarOffset

private[pulsar] case object EarliestOffset extends PulsarOffset

private[pulsar] case object LatestOffset extends PulsarOffset

private[pulsar] case class TimeOffset(ts: Long) extends PulsarOffset

private[pulsar] sealed trait PerTopicOffset extends PulsarOffset

private[pulsar] case class SpecificPulsarOffset(topicOffsets: Map[String, MessageId])
    extends OffsetV2
    with PerTopicOffset {

  override val json = JsonUtils.topicOffsets(topicOffsets)
}

private[pulsar] case class SpecificPulsarStartingTime(topicTimes: Map[String, Long])
    extends OffsetV2
    with PerTopicOffset {

  override def json(): String = JsonUtils.topicTimes(topicTimes)
}

private[pulsar] case class PulsarPartitionOffset(topic: String, messageId: MessageId)
    extends PartitionOffset

private[pulsar] object SpecificPulsarOffset {

  def getTopicOffsets(offset: Offset): Map[String, MessageId] = {
    offset match {
      case o: SpecificPulsarOffset => o.topicOffsets
      case so: SerializedOffset => SpecificPulsarOffset(so).topicOffsets
      case _ =>
        throw new IllegalArgumentException(
          s"Invalid conversion from offset of ${offset.getClass} to PulsarSourceOffset")
    }
  }

  def apply(offset: SerializedOffset): SpecificPulsarOffset =
    SpecificPulsarOffset(JsonUtils.topicOffsets(offset.json))

  def apply(offsetTuples: (String, MessageId)*): SpecificPulsarOffset = {
    SpecificPulsarOffset(offsetTuples.toMap)
  }
}

private[pulsar] case class UserProvidedMessageId(mid: MessageId)
    extends MessageIdImpl(
      mid.asInstanceOf[MessageIdImpl].getLedgerId,
      mid.asInstanceOf[MessageIdImpl].getEntryId,
      mid.asInstanceOf[MessageIdImpl].getPartitionIndex)

Source File: KinesisSourceOffset.scala From kinesis-sql with Apache License 2.0

5 votes

package org.apache.spark.sql.kinesis

import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization
import scala.collection.mutable.HashMap
import scala.util.control.NonFatal

import org.apache.spark.sql.execution.streaming.Offset
import org.apache.spark.sql.execution.streaming.SerializedOffset
import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, PartitionOffset}


 
  def apply(json: String): KinesisSourceOffset = {
    try {
      val readObj = Serialization.read[ Map[ String, Map[ String, String ] ] ](json)
      val metadata = readObj.get("metadata")
      val shardInfoMap: Map[String, ShardInfo ] = readObj.filter(_._1 != "metadata").map {
        case (shardId, value) => shardId.toString -> new ShardInfo(shardId.toString,
          value.get("iteratorType").get,
          value.get("iteratorPosition").get)
      }.toMap
      KinesisSourceOffset(
        new ShardOffsets(
          metadata.get("batchId").toLong,
          metadata.get("streamName"),
          shardInfoMap))
    } catch {
      case NonFatal(x) => throw new IllegalArgumentException(x)
    }
  }

  def getMap(shardInfos: Array[ShardInfo]): Map[String, ShardInfo] = {
    shardInfos.map {
      s: ShardInfo => (s.shardId -> s)
    }.toMap
  }

}

Source File: BlockingSource.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.streaming.util

import java.util.concurrent.CountDownLatch

import org.apache.spark.sql.{SQLContext, _}
import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, Sink, Source}
import org.apache.spark.sql.sources.{StreamSinkProvider, StreamSourceProvider}
import org.apache.spark.sql.streaming.OutputMode
import org.apache.spark.sql.types.{IntegerType, StructField, StructType}


class BlockingSource extends StreamSourceProvider with StreamSinkProvider {

  private val fakeSchema = StructType(StructField("a", IntegerType) :: Nil)

  override def sourceSchema(
      spark: SQLContext,
      schema: Option[StructType],
      providerName: String,
      parameters: Map[String, String]): (String, StructType) = {
    ("dummySource", fakeSchema)
  }

  override def createSource(
      spark: SQLContext,
      metadataPath: String,
      schema: Option[StructType],
      providerName: String,
      parameters: Map[String, String]): Source = {
    BlockingSource.latch.await()
    new Source {
      override def schema: StructType = fakeSchema
      override def getOffset: Option[Offset] = Some(new LongOffset(0))
      override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
        import spark.implicits._
        Seq[Int]().toDS().toDF()
      }
      override def stop() {}
    }
  }

  override def createSink(
      spark: SQLContext,
      parameters: Map[String, String],
      partitionColumns: Seq[String],
      outputMode: OutputMode): Sink = {
    new Sink {
      override def addBatch(batchId: Long, data: DataFrame): Unit = {}
    }
  }
}

object BlockingSource {
  var latch: CountDownLatch = null
}

Source File: OffsetSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.streaming

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, SerializedOffset}

trait OffsetSuite extends SparkFunSuite {
  
  def compare(one: Offset, two: Offset): Unit = {
    test(s"comparison $one <=> $two") {
      assert(one == one)
      assert(two == two)
      assert(one != two)
      assert(two != one)
    }
  }
}

class LongOffsetSuite extends OffsetSuite {
  val one = LongOffset(1)
  val two = LongOffset(2)
  val three = LongOffset(3)
  compare(one, two)

  compare(LongOffset(SerializedOffset(one.json)),
          LongOffset(SerializedOffset(three.json)))
}

Source File: BlockingSource.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.streaming.util

import java.util.concurrent.CountDownLatch

import org.apache.spark.sql.{SQLContext, _}
import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, Sink, Source}
import org.apache.spark.sql.sources.{StreamSinkProvider, StreamSourceProvider}
import org.apache.spark.sql.streaming.OutputMode
import org.apache.spark.sql.types.{IntegerType, StructField, StructType}


class BlockingSource extends StreamSourceProvider with StreamSinkProvider {

  private val fakeSchema = StructType(StructField("a", IntegerType) :: Nil)

  override def sourceSchema(
      spark: SQLContext,
      schema: Option[StructType],
      providerName: String,
      parameters: Map[String, String]): (String, StructType) = {
    ("dummySource", fakeSchema)
  }

  override def createSource(
      spark: SQLContext,
      metadataPath: String,
      schema: Option[StructType],
      providerName: String,
      parameters: Map[String, String]): Source = {
    BlockingSource.latch.await()
    new Source {
      override def schema: StructType = fakeSchema
      override def getOffset: Option[Offset] = Some(new LongOffset(0))
      override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
        import spark.implicits._
        Seq[Int]().toDS().toDF()
      }
      override def stop() {}
    }
  }

  override def createSink(
      spark: SQLContext,
      parameters: Map[String, String],
      partitionColumns: Seq[String],
      outputMode: OutputMode): Sink = {
    new Sink {
      override def addBatch(batchId: Long, data: DataFrame): Unit = {}
    }
  }
}

object BlockingSource {
  var latch: CountDownLatch = null
}

Source File: StreamingQueryException.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.streaming

import org.apache.spark.annotation.Experimental
import org.apache.spark.sql.execution.streaming.{Offset, StreamExecution}


  val time: Long = System.currentTimeMillis

  override def toString(): String = {
    val causeStr =
      s"${cause.getMessage} ${cause.getStackTrace.take(10).mkString("", "\n|\t", "\n")}"
    s"""
       |$causeStr
       |
       |${query.asInstanceOf[StreamExecution].toDebugString}
       """.stripMargin
  }
}

Source File: KafkaSourceOffset.scala From spark-kafka-0-8-sql with Apache License 2.0

5 votes

package com.hortonworks.spark.sql.kafka08

import kafka.common.TopicAndPartition
import org.apache.spark.sql.execution.streaming.Offset
import org.apache.spark.streaming.kafka.KafkaCluster.LeaderOffset


object KafkaSourceOffset {

  def getPartitionOffsets(offset: Offset): Map[TopicAndPartition, LeaderOffset] = {
    offset match {
      case o: KafkaSourceOffset => o.partitionToOffsets
      case _ =>
        throw new IllegalArgumentException(
          s"Invalid conversion from offset of ${offset.getClass} to KafkaSourceOffset")
    }
  }
}

Source File: BigQuerySource.scala From spark-bigquery with Apache License 2.0

5 votes

package com.samelamin.spark.bigquery.streaming

import java.math.BigInteger
import com.google.cloud.hadoop.io.bigquery.BigQueryStrings
import com.samelamin.spark.bigquery.BigQueryClient
import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.sql.execution.streaming.{Offset, _}
import org.apache.spark.sql.types.{BinaryType, StringType, StructField, StructType}
import com.samelamin.spark.bigquery._
import com.samelamin.spark.bigquery.converters.SchemaConverters
import org.joda.time.DateTime
import org.slf4j.LoggerFactory


  override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
    val startIndex = start.getOrElse(LongOffset(0L)).asInstanceOf[LongOffset].offset.toLong
    val endIndex = end.asInstanceOf[LongOffset].offset.toLong
    val startPartitionTime = new DateTime(startIndex).toLocalDate
    val endPartitionTime = new DateTime(endIndex).toLocalDate.toString
    logger.info(s"Fetching data between $startIndex and $endIndex")
    val query =
      s"""
         |SELECT
         |  *
         |FROM
         |  `${fullyQualifiedOutputTableId.replace(':','.')}`
         |WHERE
         |  $timestampColumn BETWEEN TIMESTAMP_MILLIS($startIndex) AND TIMESTAMP_MILLIS($endIndex)
         |  AND _PARTITIONTIME BETWEEN TIMESTAMP('$startPartitionTime') AND TIMESTAMP('$endPartitionTime')
         |  """.stripMargin
    val bigQuerySQLContext = new BigQuerySQLContext(sqlContext)
    val df = bigQuerySQLContext.bigQuerySelect(query)
    df
  }

  override def stop(): Unit = {}
  def getConvertedSchema(sqlContext: SQLContext): StructType = {
    val bigqueryClient = BigQueryClient.getInstance(sqlContext)
    val tableReference = BigQueryStrings.parseTableReference(fullyQualifiedOutputTableId)
    SchemaConverters.BQToSQLSchema(bigqueryClient.getTableSchema(tableReference))
  }
}

object BigQuerySource {
  val DEFAULT_SCHEMA = StructType(
    StructField("Sample Column", StringType) ::
      StructField("value", BinaryType) :: Nil
  )
}

Source File: BlockingSource.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.sql.streaming.util

import java.util.concurrent.CountDownLatch

import org.apache.spark.sql.{SQLContext, _}
import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, Sink, Source}
import org.apache.spark.sql.sources.{StreamSinkProvider, StreamSourceProvider}
import org.apache.spark.sql.streaming.OutputMode
import org.apache.spark.sql.types.{IntegerType, StructField, StructType}


class BlockingSource extends StreamSourceProvider with StreamSinkProvider {

  private val fakeSchema = StructType(StructField("a", IntegerType) :: Nil)

  override def sourceSchema(
      spark: SQLContext,
      schema: Option[StructType],
      providerName: String,
      parameters: Map[String, String]): (String, StructType) = {
    ("dummySource", fakeSchema)
  }

  override def createSource(
      spark: SQLContext,
      metadataPath: String,
      schema: Option[StructType],
      providerName: String,
      parameters: Map[String, String]): Source = {
    BlockingSource.latch.await()
    new Source {
      override def schema: StructType = fakeSchema
      override def getOffset: Option[Offset] = Some(new LongOffset(0))
      override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
        import spark.implicits._
        Seq[Int]().toDS().toDF()
      }
      override def stop() {}
    }
  }

  override def createSink(
      spark: SQLContext,
      parameters: Map[String, String],
      partitionColumns: Seq[String],
      outputMode: OutputMode): Sink = {
    new Sink {
      override def addBatch(batchId: Long, data: DataFrame): Unit = {}
    }
  }
}

object BlockingSource {
  var latch: CountDownLatch = null
}

Source File: OffsetSuite.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.sql.streaming

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, SerializedOffset}

trait OffsetSuite extends SparkFunSuite {
  
  def compare(one: Offset, two: Offset): Unit = {
    test(s"comparison $one <=> $two") {
      assert(one == one)
      assert(two == two)
      assert(one != two)
      assert(two != one)
    }
  }
}

class LongOffsetSuite extends OffsetSuite {
  val one = LongOffset(1)
  val two = LongOffset(2)
  val three = LongOffset(3)
  compare(one, two)

  compare(LongOffset(SerializedOffset(one.json)),
          LongOffset(SerializedOffset(three.json)))
}

Source File: BlockingSource.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.streaming.util

import java.util.concurrent.CountDownLatch

import org.apache.spark.sql.{SQLContext, _}
import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, Sink, Source}
import org.apache.spark.sql.sources.{StreamSinkProvider, StreamSourceProvider}
import org.apache.spark.sql.streaming.OutputMode
import org.apache.spark.sql.types.{IntegerType, StructField, StructType}


class BlockingSource extends StreamSourceProvider with StreamSinkProvider {

  private val fakeSchema = StructType(StructField("a", IntegerType) :: Nil)

  override def sourceSchema(
      spark: SQLContext,
      schema: Option[StructType],
      providerName: String,
      parameters: Map[String, String]): (String, StructType) = {
    ("dummySource", fakeSchema)
  }

  override def createSource(
      spark: SQLContext,
      metadataPath: String,
      schema: Option[StructType],
      providerName: String,
      parameters: Map[String, String]): Source = {
    BlockingSource.latch.await()
    new Source {
      override def schema: StructType = fakeSchema
      override def getOffset: Option[Offset] = Some(new LongOffset(0))
      override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
        import spark.implicits._
        Seq[Int]().toDS().toDF()
      }
      override def stop() {}
    }
  }

  override def createSink(
      spark: SQLContext,
      parameters: Map[String, String],
      partitionColumns: Seq[String],
      outputMode: OutputMode): Sink = {
    new Sink {
      override def addBatch(batchId: Long, data: DataFrame): Unit = {}
    }
  }
}

object BlockingSource {
  var latch: CountDownLatch = null
}

Source File: OffsetSuite.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.streaming

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.execution.streaming.{LongOffset, Offset, SerializedOffset}

trait OffsetSuite extends SparkFunSuite {
  
  def compare(one: Offset, two: Offset): Unit = {
    test(s"comparison $one <=> $two") {
      assert(one == one)
      assert(two == two)
      assert(one != two)
      assert(two != one)
    }
  }
}

class LongOffsetSuite extends OffsetSuite {
  val one = LongOffset(1)
  val two = LongOffset(2)
  val three = LongOffset(3)
  compare(one, two)

  compare(LongOffset(SerializedOffset(one.json)),
          LongOffset(SerializedOffset(three.json)))
}

Source File: RedisSourceOffset.scala From spark-redis with BSD 3-Clause "New" or "Revised" License

5 votes

package org.apache.spark.sql.redis.stream

import com.redislabs.provider.redis.util.JsonUtils
import org.apache.spark.sql.execution.streaming.{Offset, SerializedOffset}
import org.json4s.jackson.Serialization
import org.json4s.{Formats, NoTypeHints}


case class RedisSourceOffset(offsets: Map[String, RedisConsumerOffset]) extends Offset {

  override def json(): String = JsonUtils.toJson(this)
}

object RedisSourceOffset {

  private implicit val formats: Formats = Serialization.formats(NoTypeHints)

  def fromOffset(offset: Offset): RedisSourceOffset = {
    offset match {
      case o: RedisSourceOffset => o
      case so: SerializedOffset => fromJson(so.json)
      case _ =>
        throw new IllegalArgumentException(
          s"Invalid conversion from offset of ${offset.getClass} to RedisSourceOffset")
    }

    fromJson(offset.json())
  }

  def fromJson(json: String): RedisSourceOffset = {
    try {
      Serialization.read[RedisSourceOffset](json)
    } catch {
      case e: Throwable =>
        val example = RedisSourceOffset(Map("my-stream" -> RedisConsumerOffset("redis-source", "1543674099961-0")))
        val jsonExample = Serialization.write(example)
        throw new RuntimeException(s"Unable to parse offset json. Example of valid json: $jsonExample", e)
    }
  }
}

case class RedisConsumerOffset(groupName: String, offset: String)

case class RedisSourceOffsetRange(start: Option[String], end: String, config: RedisConsumerConfig)

Source File: OffsetSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.streaming

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.execution.streaming.{CompositeOffset, LongOffset, Offset}

trait OffsetSuite extends SparkFunSuite {
  
  def compare(one: Offset, two: Offset): Unit = {
    test(s"comparison $one <=> $two") {
      assert(one == one)
      assert(two == two)
      assert(one != two)
      assert(two != one)
    }
  }
}

class LongOffsetSuite extends OffsetSuite {
  val one = LongOffset(1)
  val two = LongOffset(2)
  compare(one, two)
}

class CompositeOffsetSuite extends OffsetSuite {
  compare(
    one = CompositeOffset(Some(LongOffset(1)) :: Nil),
    two = CompositeOffset(Some(LongOffset(2)) :: Nil))

  compare(
    one = CompositeOffset(None :: Nil),
    two = CompositeOffset(Some(LongOffset(2)) :: Nil))

  compare(
    one = CompositeOffset.fill(LongOffset(0), LongOffset(1)),
    two = CompositeOffset.fill(LongOffset(1), LongOffset(2)))

  compare(
    one = CompositeOffset.fill(LongOffset(1), LongOffset(1)),
    two = CompositeOffset.fill(LongOffset(1), LongOffset(2)))

}

org.apache.spark.sql.execution.streaming.Offset Scala Examples