org.apache.spark.sql.execution.streaming.MemoryStream Scala Example

Source File: CustomSinkSuite.scala From spark-structured-streaming-ml with Apache License 2.0

5 votes

package com.highperformancespark.examples.structuredstreaming

import com.holdenkarau.spark.testing.DataFrameSuiteBase

import scala.collection.mutable.ListBuffer

import org.scalatest.FunSuite

import org.apache.spark._
import org.apache.spark.sql.{Dataset, DataFrame, Encoder, SQLContext}
import org.apache.spark.sql.execution.streaming.MemoryStream

class CustomSinkSuite extends FunSuite with DataFrameSuiteBase {

  test("really simple test of the custom sink") {
    import spark.implicits._
    val input = MemoryStream[String]
    val doubled = input.toDS().map(x => x + " " + x)
    val formatName = ("com.highperformancespark.examples" +
      "structuredstreaming.CustomSinkCollectorProvider")
    val query = doubled.writeStream
      .queryName("testCustomSinkBasic")
      .format(formatName)
      .start()
    val inputData = List("hi", "holden", "bye", "pandas")
    input.addData(inputData)
    assert(query.isActive === true)
    query.processAllAvailable()
    assert(query.exception === None)
    assert(Pandas.results(0) === inputData.map(x => x + " " + x))
  }
}

object Pandas{
  val results = new ListBuffer[Seq[String]]()
}

class CustomSinkCollectorProvider extends ForeachDatasetSinkProvider {
  override def func(df: DataFrame) {
    val spark = df.sparkSession
    import spark.implicits._
    Pandas.results += df.as[String].rdd.collect()
  }
}

Source File: StreamingKMeansSuite.scala From spark-structured-streaming-ml with Apache License 2.0

5 votes

package com.highperformancespark.examples.structuredstreaming

import com.holdenkarau.spark.testing.DataFrameSuiteBase
import org.apache.spark.mllib.clustering.{KMeans, KMeansModel}
import org.apache.spark.ml.linalg._
import org.apache.spark.sql.{DataFrame, Dataset}
import org.apache.spark.sql.execution.streaming.MemoryStream
import org.scalatest.FunSuite
import org.apache.log4j.{Level, Logger}

case class TestRow(features: Vector)

class StreamingKMeansSuite extends FunSuite with DataFrameSuiteBase {

  override def beforeAll(): Unit = {
    super.beforeAll()
    Logger.getLogger("org").setLevel(Level.OFF)
  }

  test("streaming model with one center should converge to true center") {
    import spark.implicits._
    val k = 1
    val dim = 5
    val clusterSpread = 0.1
    val seed = 63
    // TODO: this test is very flaky. The centers do not converge for some
    // (most?) random seeds
    val (batches, trueCenters) =
      StreamingKMeansSuite.generateBatches(100, 80, k, dim, clusterSpread, seed)
    val inputStream = MemoryStream[TestRow]
    val ds = inputStream.toDS()
    val skm = new StreamingKMeans().setK(k).setRandomCenters(dim, 0.01)
    val query = skm.evilTrain(ds.toDF())
    val streamingModels = batches.map { batch =>
      inputStream.addData(batch)
      query.processAllAvailable()
      skm.getModel
    }
    // TODO: use spark's testing suite
    streamingModels.last.centers.zip(trueCenters).foreach {
      case (center, trueCenter) =>
        val centers = center.toArray.mkString(",")
        val trueCenters = trueCenter.toArray.mkString(",")
        println(s"${centers} | ${trueCenters}")
        assert(center.toArray.zip(trueCenter.toArray).forall(
          x => math.abs(x._1 - x._2) < 0.1))
    }
    query.stop()
  }

  def compareBatchAndStreaming(
      batchModel: KMeansModel,
      streamingModel: StreamingKMeansModel,
      validationData: DataFrame): Unit = {
    assert(batchModel.clusterCenters === streamingModel.centers)
    // TODO: implement prediction comparison
  }

}

object StreamingKMeansSuite {

  def generateBatches(
      numPoints: Int,
      numBatches: Int,
      k: Int,
      d: Int,
      r: Double,
      seed: Int,
      initCenters: Array[Vector] = null):
      (IndexedSeq[IndexedSeq[TestRow]], Array[Vector]) = {
    val rand = scala.util.Random
    rand.setSeed(seed)
    val centers = initCenters match {
      case null => Array.fill(k)(Vectors.dense(Array.fill(d)(rand.nextGaussian())))
      case _ => initCenters
    }
    val data = (0 until numBatches).map { i =>
      (0 until numPoints).map { idx =>
        val center = centers(idx % k)
        val vec = Vectors.dense(
          Array.tabulate(d)(x => center(x) + rand.nextGaussian() * r))
        TestRow(vec)
      }
    }
    (data, centers)
  }
}

Source File: EvolvabilitySuiteBase.scala From delta with Apache License 2.0

5 votes

package org.apache.spark.sql.delta

import java.io.File

import org.apache.spark.sql.delta.actions.{Action, FileAction, SingleAction}
import org.apache.hadoop.fs.Path

import org.apache.spark.sql.{QueryTest, SparkSession}
import org.apache.spark.sql.execution.streaming.MemoryStream
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.sql.types.StructType
import org.apache.spark.util.Utils

trait EvolvabilitySuiteBase extends QueryTest with SharedSparkSession {
  import testImplicits._

  protected def testEvolvability(tablePath: String): Unit = {
    // Check we can load everything from a log checkpoint
    val deltaLog = DeltaLog.forTable(spark, new Path(tablePath))
    val path = deltaLog.dataPath.toString
    checkDatasetUnorderly(
      spark.read.format("delta").load(path).select("id", "value").as[(Int, String)],
      4 -> "d", 5 -> "e", 6 -> "f")
    assert(deltaLog.snapshot.metadata.schema === StructType.fromDDL("id INT, value STRING"))
    assert(deltaLog.snapshot.metadata.partitionSchema === StructType.fromDDL("id INT"))

    // Check we can load CheckpointMetaData
    assert(deltaLog.lastCheckpoint === Some(CheckpointMetaData(3, 6L, None)))

    // Check we can parse all `Action`s in delta files. It doesn't check correctness.
    deltaLog.getChanges(0L).toList.map(_._2.toList)
  }
}


// scalastyle:off

  def validateData(spark: SparkSession, path: String): Unit = {
    import org.apache.spark.sql.delta.util.FileNames._
    import scala.reflect.runtime.{universe => ru}
    import spark.implicits._

    val mirror = ru.runtimeMirror(this.getClass.getClassLoader)

    val tpe = ru.typeOf[Action]
    val clazz = tpe.typeSymbol.asClass
    assert(clazz.isSealed, s"${classOf[Action]} must be sealed")

    val deltaLog = DeltaLog.forTable(spark, new Path(path))
    val deltas = 0L to deltaLog.snapshot.version
    val deltaFiles = deltas.map(deltaFile(deltaLog.logPath, _)).map(_.toString)
    val actionsTypesInLog =
      spark.read.schema(Action.logSchema).json(deltaFiles: _*)
        .as[SingleAction]
        .collect()
        .map(_.unwrap.getClass.asInstanceOf[Class[_]])
        .toSet

    val allActionTypes =
      clazz.knownDirectSubclasses
        .flatMap {
          case t if t == ru.typeOf[FileAction].typeSymbol => t.asClass.knownDirectSubclasses
          case t => Set(t)
        }
        .map(t => mirror.runtimeClass(t.asClass))

    val missingTypes = allActionTypes -- actionsTypesInLog
    val unknownTypes = actionsTypesInLog -- allActionTypes
    assert(
      missingTypes.isEmpty,
      s"missing types: $missingTypes. " +
        "Please update EvolveabilitySuite.generateData to include them in the log.")
    assert(
      unknownTypes.isEmpty,
      s"unknown types: $unknownTypes. " +
        s"Please make sure they inherit ${classOf[Action]} or ${classOf[FileAction]} directly.")
  }

  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().master("local[2]").getOrCreate()
    val path = new File(args(0))
    if (path.exists()) {
      // Don't delete automatically in case the user types a wrong path.
      // scalastyle:off throwerror
      throw new AssertionError(s"${path.getCanonicalPath} exists. Please delete it and retry.")
      // scalastyle:on throwerror
    }
    generateData(spark, path.toString)
    validateData(spark, path.toString)
  }
}

Source File: EventMemoryStreamSpec.scala From odsc-west-streaming-trends with GNU General Public License v3.0

5 votes

package com.twilio.open.streaming.trend.discovery

import java.nio.charset.StandardCharsets
import java.nio.file.Files

import com.twilio.open.protocol.Calls.CallEvent
import com.twilio.open.streaming.trend.discovery.config.{AppConfig, AppConfiguration}
import com.twilio.open.streaming.trend.discovery.streams.EventAggregation
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{SQLContext, SparkSession}
import org.apache.spark.sql.execution.streaming.MemoryStream
import org.apache.spark.sql.streaming.Trigger
import org.scalatest.{FunSuite, Matchers}
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.duration._

class EventMemoryStreamSpec extends FunSuite with Matchers with SparkSqlTest {

  val log: Logger = LoggerFactory.getLogger(classOf[EventAggregation])
  private val pathToTestScenarios = "src/test/resources/scenarios"

  lazy val session: SparkSession = sparkSql

  override def conf: SparkConf = {
    new SparkConf()
      .setMaster("local[*]")
      .setAppName("aggregation-test-app")
      .set("spark.ui.enabled", "false")
      .set("spark.app.id", appID)
      .set("spark.driver.host", "localhost")
      .set("spark.sql.shuffle.partitions", "32")
      .set("spark.executor.cores", "4")
      .set("spark.executor.memory", "1g")
      .set("spark.ui.enabled", "false")
      .setJars(SparkContext.jarOfClass(classOf[EventAggregation]).toList)
  }

  protected val checkpointDir: String = Files.createTempDirectory(appID).toString

  def appConfigForTest(): AppConfiguration = {
    val baseConfig = AppConfig("src/test/resources/app.yaml")

    baseConfig.copy(
      checkpointPath = checkpointDir
    )

    baseConfig
  }

  test("Should aggregate call events") {
    implicit val sqlContext: SQLContext = session.sqlContext
    import session.implicits._
    val appConfig = appConfigForTest()
    val scenario = TestHelper.loadScenario[CallEvent](s"$pathToTestScenarios/pdd_events.json")
    val scenarioIter = scenario.toIterator
    scenario.nonEmpty shouldBe true

    val trendDiscoveryApp = new TrendDiscoveryApp(appConfigForTest(), session)

    val kafkaData = MemoryStream[MockKafkaDataFrame]
    val processingTimeTrigger = Trigger.ProcessingTime(2.seconds)
    val eventAggregation = EventAggregation(appConfig)

    val processingStream = eventAggregation.process(kafkaData.toDF())(session)
      .writeStream
      .format("memory")
      .queryName("calleventaggs")
      .outputMode(eventAggregation.outputMode)
      .trigger(processingTimeTrigger)
      .start()


    // 22 events
    kafkaData.addData(scenarioIter.take(11).map(TestHelper.asMockKafkaDataFrame))
    processingStream.processAllAvailable()
    kafkaData.addData(scenarioIter.take(10).map(TestHelper.asMockKafkaDataFrame))
    processingStream.processAllAvailable()
    kafkaData.addData(scenarioIter.take(1).map(TestHelper.asMockKafkaDataFrame))
    processingStream.processAllAvailable()

    val df = session.sql("select * from calleventaggs")
    df.printSchema()
    df.show

    val res = session
      .sql("select avg(stats.p99) from calleventaggs")
      .collect()
      .map { r =>
        r.getAs[Double](0) }
      .head

    DiscoveryUtils.round(res) shouldEqual 7.56

    processingStream.stop()

  }



}

Source File: SparkIngressSpec.scala From cloudflow with Apache License 2.0

5 votes

package cloudflow.spark

import scala.collection.immutable.Seq
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.streaming.OutputMode
import org.apache.spark.sql.execution.streaming.MemoryStream
import cloudflow.streamlets.StreamletShape
import cloudflow.streamlets.avro._
import cloudflow.spark.avro._
import cloudflow.spark.testkit._
import cloudflow.spark.sql.SQLImplicits._

class SparkIngressSpec extends SparkScalaTestSupport {

  "SparkIngress" should {
    "produce elements to its outlet" in {

      val testKit  = SparkStreamletTestkit(session)
      val instance = new MySparkIngress()

      // setup outlet tap on outlet port
      val out: SparkOutletTap[Data] = testKit.outletAsTap[Data](instance.out)

      val run = testKit.run(instance, Seq.empty, Seq(out))

      // get processed rows from the run
      run.totalRows must be(10)

      // get data from outlet tap
      val results = out.asCollection(session)

      // assert
      results must contain(Data(1, "name1"))
    }
  }
}
// create sparkStreamlet
class MySparkIngress extends SparkStreamlet {
  val out   = AvroOutlet[Data]("out", d ⇒ d.id.toString)
  val shape = StreamletShape(out)

  override def createLogic() = new SparkStreamletLogic {
    private def process: Dataset[Data] = {
      implicit val sqlCtx = session.sqlContext
      val data            = (1 to 10).map(i ⇒ Data(i, s"name$i"))
      val m               = MemoryStream[Data]
      m.addData(data)
      m.toDF.as[Data]
    }
    override def buildStreamingQueries = {
      val outStream: Dataset[Data] = process
      require(outStream.isStreaming, "The Dataset created by an Ingress must be a Streaming Dataset")
      val query = writeStream(outStream, out, OutputMode.Append)
      StreamletQueryExecution(query)
    }
  }
}

Source File: TestSparkStreamletContext.scala From cloudflow with Apache License 2.0

5 votes

package cloudflow.spark
package testkit

import java.nio.file.attribute.FileAttribute

import com.typesafe.config._

import scala.reflect.runtime.universe._
import scala.concurrent.duration._
import org.apache.spark.sql.{ Dataset, Encoder, SparkSession }
import org.apache.spark.sql.execution.streaming.MemoryStream
import org.apache.spark.sql.streaming.{ OutputMode, StreamingQuery, Trigger }
import cloudflow.streamlets._
import org.apache.spark.sql.catalyst.InternalRow


class TestSparkStreamletContext(override val streamletRef: String,
                                session: SparkSession,
                                inletTaps: Seq[SparkInletTap[_]],
                                outletTaps: Seq[SparkOutletTap[_]],
                                override val config: Config = ConfigFactory.empty)
    extends SparkStreamletContext(StreamletDefinition("appId", "appVersion", streamletRef, "streamletClass", List(), List(), config),
                                  session) {
  val ProcessingTimeInterval = 1500.milliseconds
  override def readStream[In](inPort: CodecInlet[In])(implicit encoder: Encoder[In], typeTag: TypeTag[In]): Dataset[In] =
    inletTaps
      .find(_.portName == inPort.name)
      .map(_.instream.asInstanceOf[MemoryStream[In]].toDF.as[In])
      .getOrElse(throw TestContextException(inPort.name, s"Bad test context, could not find source for inlet ${inPort.name}"))

  override def writeStream[Out](stream: Dataset[Out],
                                outPort: CodecOutlet[Out],
                                outputMode: OutputMode)(implicit encoder: Encoder[Out], typeTag: TypeTag[Out]): StreamingQuery = {
    // RateSource can only work with a microBatch query because it contains no data at time zero.
    // Trigger.Once requires data at start to work.
    val trigger = if (isRateSource(stream)) {
      Trigger.ProcessingTime(ProcessingTimeInterval)
    } else {
      Trigger.Once()
    }
    val streamingQuery = outletTaps
      .find(_.portName == outPort.name)
      .map { outletTap ⇒
        stream.writeStream
          .outputMode(outputMode)
          .format("memory")
          .trigger(trigger)
          .queryName(outletTap.queryName)
          .start()
      }
      .getOrElse(throw TestContextException(outPort.name, s"Bad test context, could not find destination for outlet ${outPort.name}"))
    streamingQuery
  }

  override def checkpointDir(dirName: String): String = {
    val fileAttibutes: Array[FileAttribute[_]] = Array()
    val tmpDir                                 = java.nio.file.Files.createTempDirectory("spark-test", fileAttibutes: _*)
    tmpDir.toFile.getAbsolutePath
  }

  private def isRateSource(stream: Dataset[_]): Boolean = {
    import org.apache.spark.sql.execution.command.ExplainCommand
    val explain = ExplainCommand(stream.queryExecution.logical, true)
    val res     = session.sessionState.executePlan(explain).executedPlan.executeCollect()
    res.exists((row: InternalRow) => row.getString(0).contains("org.apache.spark.sql.execution.streaming.sources.RateStreamProvider"))
  }

}

case class TestContextException(portName: String, msg: String) extends RuntimeException(msg)

Source File: StreamingTestHelper.scala From spark-acid with Apache License 2.0

5 votes

package com.qubole.spark.hiveacid.streaming

import java.io.{File, IOException}
import java.util.UUID

import com.qubole.spark.hiveacid.TestHelper

import org.apache.spark.network.util.JavaUtils
import org.apache.spark.sql.execution.streaming.MemoryStream
import org.apache.spark.sql.streaming.{OutputMode, StreamingQuery}
import org.scalatest.concurrent.TimeLimits
import org.scalatest.time.SpanSugar

class StreamingTestHelper extends TestHelper with TimeLimits  {

  import StreamingTestHelper._


  def runStreaming(tableName: String,
                   outputMode: OutputMode,
                   cols: Seq[String],
                   inputRange: Range,
                   options: List[(String, String)] = List.empty): Unit = {

    val inputData = MemoryStream[Int]
    val ds = inputData.toDS()

    val checkpointDir = createCheckpointDir(namePrefix = "stream.checkpoint").getCanonicalPath

    var query: StreamingQuery = null

    try {
      // Starting streaming query
      val writerDf =
        ds.map(i => (i*100, i*10, i))
          .toDF(cols:_*)
          .writeStream
          .format("HiveAcid")
          .option("table", tableName)
          .outputMode(outputMode)
          .option("checkpointLocation", checkpointDir)
          //.start()

      query = options.map { option =>
        writerDf.option(option._1, option._2)
      }.lastOption.getOrElse(writerDf).start()

      // Adding data for streaming query
      inputData.addData(inputRange)
      failAfter(STREAMING_TIMEOUT) {
        query.processAllAvailable()
      }
    } finally {
      if (query != null) {
        // Terminating streaming query
        query.stop()
        deleteCheckpointDir(checkpointDir)
      }
    }
  }

  def deleteCheckpointDir(fileStr: String): Unit = {
    val file = new File(fileStr)
    if (file != null) {
      JavaUtils.deleteRecursively(file)
    }
  }

  def createCheckpointDir(root: String = System.getProperty("java.io.tmpdir"),
                          namePrefix: String = "spark"): File = {

    var attempts = 0
    val maxAttempts = MAX_DIR_CREATION_ATTEMPTS
    var dir: File = null
    while (dir == null) {
      attempts += 1
      if (attempts > maxAttempts) {
        throw new IOException("Failed to create a temp directory (under " + root + ") after " +
          maxAttempts + " attempts!")
      }
      try {
        dir = new File(root, namePrefix + "-" + UUID.randomUUID.toString)
        if (dir.exists() || !dir.mkdirs()) {
          dir = null
        }
      } catch { case e: SecurityException => dir = null; }
    }
    dir.getCanonicalFile
  }

}

object StreamingTestHelper extends TestHelper with SpanSugar  {

  val MAX_DIR_CREATION_ATTEMPTS = 10
  val STREAMING_TIMEOUT = 60.seconds

}

Source File: TransactionsFlowUnitTest.scala From kafka-examples with Apache License 2.0

5 votes

package com.cloudera.streaming.refapp

import java.sql.Timestamp

import org.scalatest.BeforeAndAfter

import org.apache.spark.sql.execution.streaming.MemoryStream

class TransactionsFlowUnitTest extends UnitTestBase with BeforeAndAfter {
  import testImplicits._

  var transactionsFromStream: MemoryStream[Transaction] = _
  var transactiosnFlow: TransactionsFlow = _

  before {
    transactionsFromStream = MemoryStream[Transaction]
    transactiosnFlow = new TransactionsFlow(
      spark,
      statesFromCluster,
      customersFromCluster,
      vendorsFromCluster,
      transactionsFromStream = transactionsFromStream
        .toDF.withColumn("timestamp", $"event_timestamp".cast("timestamp")))
  }

  test("Valid records are written to the validTransactions output") {

    val validTransaction = Transaction(
      transaction_id = "1",
      customer_id = Some(1),
      vendor_id = Some(1),
      event_state = Some("CREATED"),
      event_timestamp = Timestamp.valueOf("2018-11-12 09:42:00"),
      price = Some("100"),
      card_type = Some("Credit"))

    testStream(transactiosnFlow.validTransactions.select('transaction_id, 'customer_id, 'vendor_id, 'event_state, 'event_timestamp, 'price, 'card_type)) (
      AddData(transactionsFromStream, validTransaction),
      CheckAnswer(validTransaction)
    )
  }

  test("Invalid records are written to the invalidTransactions output") {
    // Note: transactionsFlow.validTransactions and invalidTransactions contain the fields that we used for internal calculations, e.g. for validation
    // It enables us to check the internal calculations
    testStream(transactiosnFlow.invalidTransactions.select('transaction_id, 'valid_card_type)) (
      AddData(transactionsFromStream,
        Transaction(
          transaction_id = "2",
          customer_id = Some(1),
          vendor_id = Some(1),
          event_state = Some("CREATED"),
          event_timestamp = Timestamp.valueOf("2018-11-12 09:42:00"),
          price = Some("100"),
          card_type = Some("Invalid"))),
      CheckAnswer(("2", false))
    )
  }

}

Source File: StreamingPredictionsSpec.scala From odsc-east-realish-predictions with Apache License 2.0

4 votes

package com.twilio.open.odsc.realish

import java.sql.Timestamp
import java.time.Instant
import java.util.{Random, UUID}

import org.apache.spark.SparkConf
import org.apache.spark.sql.{Encoders, SQLContext, SparkSession}
import org.scalatest.{FunSuite, Matchers}
import org.apache.spark.sql.execution.streaming.MemoryStream
import org.apache.spark.sql.functions._
import org.apache.spark.sql.streaming.{OutputMode, Trigger}

import scala.concurrent.duration._

class StreamingPredictionsSpec extends FunSuite with Matchers with SharedSparkSql {

  override def conf: SparkConf = {
    new SparkConf()
      .setMaster("local[*]")
      .setAppName("odsc-spark-utils")
      .set("spark.ui.enabled", "false")
      .set("spark.app.id", appID)
      .set("spark.driver.host", "localhost")
      .set("spark.sql.session.timeZone", "UTC")
  }

  final val notRandomRandom = {
    val generator = new Random
    generator.setSeed(100L)
    generator
  }

  test("should stream in some mock data for fun") {
    implicit val spark: SparkSession = sparkSql
    import spark.implicits._
    implicit val sqlContext: SQLContext = spark.sqlContext

    implicit val metricEncoder = Encoders.product[Metric]
    val metricData = MemoryStream[Metric]

    val startingInstant = Instant.now()

    val backingData = (1 to 10000).map(offset => {
      val metric = if (offset % 2 == 0) "loss_percentage" else "connect_duration"
      val nextLoss = notRandomRandom.nextDouble() * notRandomRandom.nextInt(100)
      Metric(
        Timestamp.from(startingInstant.minusSeconds(offset)),
        UUID.randomUUID().toString,
        metric,
        value = if (metric == "loss_percentage") nextLoss else notRandomRandom.nextDouble() * notRandomRandom.nextInt(240),
        countryCode = if (offset % 8 == 0) "US" else "BR",
        callDirection = if (metric == "loss_percentage") "inbound" else "outbound"
      )
    })
    val processingTimeTrigger = Trigger.ProcessingTime(2.seconds)


    val streamingQuery = metricData.toDF()
      .withWatermark("timestamp", "2 hours")
      .groupBy(col("metric"), col("countryCode"), window($"timestamp", "5 minutes"))
      .agg(
        min("value") as "min",
        avg("value") as "mean",
        max("value") as "max",
        count("*") as "total"
      )
      .writeStream
      .format("memory")
      .queryName("datastream")
      .outputMode(OutputMode.Append())
      .trigger(processingTimeTrigger)
      .start()

    metricData.addData(backingData)

    streamingQuery.processAllAvailable()

    spark.sql("select * from datastream").show(20, false)

    val checkChange = spark.sql("select * from datastream")
      .groupBy("metric","countryCode")
      .agg(
        sum("total") as "total",
        avg("mean") as "mean"
      )

    checkChange.show(20, false)

    // now can do interesting things with minor back tracking...

    streamingQuery.stop()

  }

}

org.apache.spark.sql.execution.streaming.MemoryStream Scala Examples