org.apache.spark.sql.streaming.StreamingQueryListener.QueryProgressEvent Scala Examples

The following examples show how to use org.apache.spark.sql.streaming.StreamingQueryListener.QueryProgressEvent. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: InsightsQueryListener.scala    From odsc-east-realish-predictions   with Apache License 2.0 5 votes vote down vote up
package com.twilio.open.odsc.realish.listeners

import kamon.Kamon
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.streaming.StreamingQueryListener
import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.JavaConverters._

object InsightsQueryListener {
  val log: Logger = LoggerFactory.getLogger(classOf[InsightsQueryListener])

  def apply(spark: SparkSession, restart: () => Unit): InsightsQueryListener = {
    new InsightsQueryListener(spark, restart)
  }

}

class InsightsQueryListener(sparkSession: SparkSession, restart: () => Unit) extends StreamingQueryListener {
  import InsightsQueryListener._
  private val streams = sparkSession.streams
  private val defaultTag = Map("app_name" -> sparkSession.sparkContext.appName)

  def doubleToLong(value: Double): Long = {
    value match {
      case a if a.isInfinite => 0L
      case b if b == Math.floor(b) => b.toLong
      case c => Math.rint(c).toLong
    }
  }

  override def onQueryStarted(event: QueryStartedEvent): Unit = {
    if (log.isDebugEnabled) log.debug(s"onQueryStarted queryName=${event.name} id=${event.id} runId=${event.runId}")
  }

  //https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
  override def onQueryProgress(progressEvent: QueryProgressEvent): Unit = {
    val progress = progressEvent.progress
    val inputRowsPerSecond = progress.inputRowsPerSecond
    val processedRowsPerSecond = progress.processedRowsPerSecond

    // note: leaving this here to remind that we can do fancy things with this for metrics sake
    

    val sources = progress.sources.map { source =>
      val description = source.description
      val startOffset = source.startOffset
      val endOffset = source.endOffset
      val inputRows = source.numInputRows

      s"topic=$description startOffset=$startOffset endOffset=$endOffset numRows=$inputRows"
    }
    val tags = defaultTag + ( "stream_name" -> progress.name )
    Kamon.metrics.histogram("spark.query.progress.processed.rows.rate", tags).record(doubleToLong(processedRowsPerSecond))
    Kamon.metrics.histogram("spark.query.progress.input.rows.rate", tags).record(doubleToLong(inputRowsPerSecond))

    // todo - could take num.rows.total, given total percentage of records that will be watermarked going forwards... (simple metric that say loss_percentage due to watermark)

    // should give min, avg, max, watermark
    val eventTime = progress.eventTime
    if (eventTime != null) {

      log.info(s"event.time=${eventTime.asScala.mkString(",")}")
    }

    log.info(s"query.progress query=${progress.name} kafka=${sources.mkString(",")} inputRows/s=$inputRowsPerSecond processedRows/s=$processedRowsPerSecond durationMs=${progress.durationMs} sink=${progress.sink.json}")
  }

  override def onQueryTerminated(event: QueryTerminatedEvent): Unit = {
    log.warn(s"queryTerminated: $event")
    val possibleStreamingQuery = streams.get(event.id)
    if (possibleStreamingQuery != null) {
      val progress = possibleStreamingQuery.lastProgress
      val sources = progress.sources
      log.warn(s"last.progress.sources sources=$sources")
    }

    event.exception match {
      case Some(exception) =>
        log.warn(s"queryEndedWithException exception=$exception resetting.all.streams")
        restart()
      case None =>
    }
  }
} 
Example 2
Source File: AtlasStreamingQueryProgressListener.scala    From spark-atlas-connector   with Apache License 2.0 5 votes vote down vote up
package com.hortonworks.spark.atlas.sql.testhelper

import com.hortonworks.spark.atlas.sql.QueryDetail
import com.hortonworks.spark.atlas.utils.Logging
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.execution.streaming.{StreamExecution, StreamingQueryWrapper}

import scala.collection.mutable
import org.apache.spark.sql.streaming.StreamingQueryListener
import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent}

class AtlasStreamingQueryProgressListener extends StreamingQueryListener with Logging {
  val queryDetails = new mutable.MutableList[QueryDetail]()

  def onQueryStarted(event: QueryStartedEvent): Unit = {}

  def onQueryProgress(event: QueryProgressEvent): Unit = {
    // FIXME: this is totally duplicated with SparkAtlasStreamingQueryEventTracker...
    //  Extract into somewhere...
    val query = SparkSession.active.streams.get(event.progress.id)
    if (query != null) {
      query match {
        case query: StreamingQueryWrapper =>
          val qd = QueryDetail.fromStreamingQueryListener(query.streamingQuery, event)
          queryDetails += qd

        case query: StreamExecution =>
          val qd = QueryDetail.fromStreamingQueryListener(query, event)
          queryDetails += qd

        case _ => logWarn(s"Unexpected type of streaming query: ${query.getClass}")
      }
    } else {
      logWarn(s"Cannot find query ${event.progress.id} from active spark session!")
    }
  }

  def onQueryTerminated(event: QueryTerminatedEvent): Unit = {}

  def clear(): Unit = {
    queryDetails.clear()
  }
} 
Example 3
Source File: LogAnalyticsStreamingQueryListenerSuite.scala    From spark-monitoring   with MIT License 5 votes vote down vote up
package org.apache.spark.sql.streaming

import java.util.UUID

import org.apache.spark.listeners.ListenerSuite
import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent}
import org.scalatest.BeforeAndAfterEach

import scala.collection.JavaConversions.mapAsJavaMap

object LogAnalyticsStreamingQueryListenerSuite {
  val queryStartedEvent = new QueryStartedEvent(UUID.randomUUID, UUID.randomUUID, "name")
  val queryTerminatedEvent = new QueryTerminatedEvent(UUID.randomUUID, UUID.randomUUID, None)
  val queryProgressEvent = new QueryProgressEvent(
    new StreamingQueryProgress(
      UUID.randomUUID,
      UUID.randomUUID,
      null,
      ListenerSuite.EPOCH_TIME_AS_ISO8601,
      2L,
      mapAsJavaMap(Map("total" -> 0L)),
      mapAsJavaMap(Map.empty[String, String]),
      Array(new StateOperatorProgress(
        0, 1, 2)),
      Array(
        new SourceProgress(
          "source",
          "123",
          "456",
          678,
          Double.NaN,
          Double.NegativeInfinity
        )
      ),
      new SinkProgress("sink")
    )
  )
}

class LogAnalyticsStreamingQueryListenerSuite extends ListenerSuite
  with BeforeAndAfterEach {

  test("should invoke sendToSink for QueryStartedEvent with full class name") {
    val (json, event) = this.onStreamingQueryListenerEvent(
      LogAnalyticsStreamingQueryListenerSuite.queryStartedEvent
    )

    this.assertEvent(json, event)
  }

  test("should invoke sendToSink for QueryTerminatedEvent with full class name") {
    val (json, event) = this.onStreamingQueryListenerEvent(
      LogAnalyticsStreamingQueryListenerSuite.queryTerminatedEvent
    )

    this.assertEvent(json, event)
  }

  test("should invoke sendToSink for QueryProgressEvent with full class name") {
    val (json, event) = this.onStreamingQueryListenerEvent(
      LogAnalyticsStreamingQueryListenerSuite.queryProgressEvent
    )

    this.assertEvent(json, event)
  }

  test("QueryProgressEvent should have expected SparkEventTime") {
    val (json, _) = this.onStreamingQueryListenerEvent(
      LogAnalyticsStreamingQueryListenerSuite.queryProgressEvent
    )

    this.assertSparkEventTime(
      json,
      (_, value) => assert(value.extract[String] === ListenerSuite.EPOCH_TIME_AS_ISO8601)
    )
  }

  test("QueryStartedEvent should have SparkEventTime") {
    val (json, _) = this.onStreamingQueryListenerEvent(
      LogAnalyticsStreamingQueryListenerSuite.queryStartedEvent
    )
    this.assertSparkEventTime(
      json,
      (_, value) => assert(!value.extract[String].isEmpty)
    )
  }

  test("QueryTerminatedEvent should have SparkEventTime") {
    val (json, _) = this.onStreamingQueryListenerEvent(
      LogAnalyticsStreamingQueryListenerSuite.queryTerminatedEvent
    )
    this.assertSparkEventTime(
      json,
      (_, value) => assert(!value.extract[String].isEmpty)
    )
  }
} 
Example 4
Source File: SparkStreamingQueryListener.scala    From spark-summit-2018   with GNU General Public License v3.0 5 votes vote down vote up
package com.twilio.open.streaming.trend.discovery.listeners

import kamon.Kamon
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.streaming.StreamingQueryListener
import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent}
import org.slf4j.{Logger, LoggerFactory}

object SparkStreamingQueryListener {
  val log: Logger = LoggerFactory.getLogger(classOf[SparkStreamingQueryListener])

  def apply(spark: SparkSession, restart: () => Unit): SparkStreamingQueryListener = {
    new SparkStreamingQueryListener(spark, restart)
  }

}

class SparkStreamingQueryListener(sparkSession: SparkSession, restart: () => Unit) extends StreamingQueryListener {
  import SparkStreamingQueryListener._
  private val streams = sparkSession.streams
  private val defaultTag = Map("app_name" -> sparkSession.sparkContext.appName)


  override def onQueryStarted(event: QueryStartedEvent): Unit = {
    if (log.isDebugEnabled) log.debug(s"onQueryStarted queryName=${event.name} id=${event.id} runId=${event.runId}")
  }

  //https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
  override def onQueryProgress(progressEvent: QueryProgressEvent): Unit = {
    val progress = progressEvent.progress
    val inputRowsPerSecond = progress.inputRowsPerSecond
    val processedRowsPerSecond = progress.processedRowsPerSecond

    val sources = progress.sources.map { source =>
      val description = source.description
      val startOffset = source.startOffset
      val endOffset = source.endOffset
      val inputRows = source.numInputRows

      s"topic=$description startOffset=$startOffset endOffset=$endOffset numRows=$inputRows"
    }
    Kamon.metrics.histogram("spark.query.progress.processed.rows.rate").record(processedRowsPerSecond.toLong)
    Kamon.metrics.histogram("spark.query.progress.input.rows.rate", defaultTag).record(inputRowsPerSecond.toLong)
    log.info(s"query.progress query=${progress.name} kafka=${sources.mkString(",")} inputRows/s=$inputRowsPerSecond processedRows/s=$processedRowsPerSecond durationMs=${progress.durationMs} sink=${progress.sink.json}")
  }

  override def onQueryTerminated(event: QueryTerminatedEvent): Unit = {
    log.warn(s"queryTerminated: $event")
    val possibleStreamingQuery = streams.get(event.id)
    if (possibleStreamingQuery != null) {
      val progress = possibleStreamingQuery.lastProgress
      val sources = progress.sources
      log.warn(s"last.progress.sources sources=$sources")
    }

    event.exception match {
      case Some(exception) =>
        log.warn(s"queryEndedWithException exception=$exception resetting.all.streams")
        restart()
      case None =>
    }
  }
} 
Example 5
Source File: SparkStreamingQueryListener.scala    From odsc-west-streaming-trends   with GNU General Public License v3.0 5 votes vote down vote up
package com.twilio.open.streaming.trend.discovery.listeners

import kamon.Kamon
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.streaming.StreamingQueryListener
import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent}
import org.slf4j.{Logger, LoggerFactory}

object SparkStreamingQueryListener {
  val log: Logger = LoggerFactory.getLogger(classOf[SparkStreamingQueryListener])

  def apply(spark: SparkSession, restart: () => Unit): SparkStreamingQueryListener = {
    new SparkStreamingQueryListener(spark, restart)
  }

}

class SparkStreamingQueryListener(sparkSession: SparkSession, restart: () => Unit) extends StreamingQueryListener {
  import SparkStreamingQueryListener._
  private val streams = sparkSession.streams
  private val defaultTag = Map("app_name" -> sparkSession.sparkContext.appName)


  override def onQueryStarted(event: QueryStartedEvent): Unit = {
    if (log.isDebugEnabled) log.debug(s"onQueryStarted queryName=${event.name} id=${event.id} runId=${event.runId}")
  }

  //https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
  override def onQueryProgress(progressEvent: QueryProgressEvent): Unit = {
    val progress = progressEvent.progress
    val inputRowsPerSecond = progress.inputRowsPerSecond
    val processedRowsPerSecond = progress.processedRowsPerSecond

    val sources = progress.sources.map { source =>
      val description = source.description
      val startOffset = source.startOffset
      val endOffset = source.endOffset
      val inputRows = source.numInputRows

      s"topic=$description startOffset=$startOffset endOffset=$endOffset numRows=$inputRows"
    }
    Kamon.metrics.histogram("spark.query.progress.processed.rows.rate").record(processedRowsPerSecond.toLong)
    Kamon.metrics.histogram("spark.query.progress.input.rows.rate", defaultTag).record(inputRowsPerSecond.toLong)
    log.info(s"query.progress query=${progress.name} kafka=${sources.mkString(",")} inputRows/s=$inputRowsPerSecond processedRows/s=$processedRowsPerSecond durationMs=${progress.durationMs} sink=${progress.sink.json}")
  }

  override def onQueryTerminated(event: QueryTerminatedEvent): Unit = {
    log.warn(s"queryTerminated: $event")
    val possibleStreamingQuery = streams.get(event.id)
    if (possibleStreamingQuery != null) {
      val progress = possibleStreamingQuery.lastProgress
      val sources = progress.sources
      log.warn(s"last.progress.sources sources=$sources")
    }

    event.exception match {
      case Some(exception) =>
        log.warn(s"queryEndedWithException exception=$exception resetting.all.streams")
        restart()
      case None =>
    }
  }
}