org.apache.spark.sql.streaming.StreamingQueryListener Scala Examples
The following examples show how to use org.apache.spark.sql.streaming.StreamingQueryListener.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: StreamingQueryListenerBus.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import org.apache.spark.scheduler.{LiveListenerBus, SparkListener, SparkListenerEvent} import org.apache.spark.sql.streaming.StreamingQueryListener import org.apache.spark.util.ListenerBus def post(event: StreamingQueryListener.Event) { event match { case s: QueryStartedEvent => postToAll(s) case _ => sparkListenerBus.post(event) } } override def onOtherEvent(event: SparkListenerEvent): Unit = { event match { case e: StreamingQueryListener.Event => postToAll(e) case _ => } } override protected def doPostEvent( listener: StreamingQueryListener, event: StreamingQueryListener.Event): Unit = { event match { case queryStarted: QueryStartedEvent => listener.onQueryStarted(queryStarted) case queryProgress: QueryProgressEvent => listener.onQueryProgress(queryProgress) case queryTerminated: QueryTerminatedEvent => listener.onQueryTerminated(queryTerminated) case _ => } } }
Example 2
Source File: SimpleListener.scala From spark-structured-streaming-examples with Apache License 2.0 | 5 votes |
package com.phylosoft.spark.learning.sql.streaming.monitoring import org.apache.spark.sql.streaming.StreamingQueryListener import org.apache.spark.sql.streaming.StreamingQueryListener._ class SimpleListener extends StreamingQueryListener { @volatile private var startTime: Long = 0L @volatile private var endTime: Long = 0L @volatile private var numRecs: Long = 0L override def onQueryStarted(event: QueryStartedEvent): Unit = { println("Query started: " + event.id) startTime = System.currentTimeMillis } override def onQueryProgress(event: QueryProgressEvent): Unit = { println("Query made progress: " + event.progress) numRecs += event.progress.numInputRows } override def onQueryTerminated(event: QueryTerminatedEvent): Unit = { println("Query terminated: " + event.id) endTime = System.currentTimeMillis } }
Example 3
Source File: InsightsQueryListener.scala From odsc-east-realish-predictions with Apache License 2.0 | 5 votes |
package com.twilio.open.odsc.realish.listeners import kamon.Kamon import org.apache.spark.sql.SparkSession import org.apache.spark.sql.streaming.StreamingQueryListener import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent} import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ object InsightsQueryListener { val log: Logger = LoggerFactory.getLogger(classOf[InsightsQueryListener]) def apply(spark: SparkSession, restart: () => Unit): InsightsQueryListener = { new InsightsQueryListener(spark, restart) } } class InsightsQueryListener(sparkSession: SparkSession, restart: () => Unit) extends StreamingQueryListener { import InsightsQueryListener._ private val streams = sparkSession.streams private val defaultTag = Map("app_name" -> sparkSession.sparkContext.appName) def doubleToLong(value: Double): Long = { value match { case a if a.isInfinite => 0L case b if b == Math.floor(b) => b.toLong case c => Math.rint(c).toLong } } override def onQueryStarted(event: QueryStartedEvent): Unit = { if (log.isDebugEnabled) log.debug(s"onQueryStarted queryName=${event.name} id=${event.id} runId=${event.runId}") } //https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala override def onQueryProgress(progressEvent: QueryProgressEvent): Unit = { val progress = progressEvent.progress val inputRowsPerSecond = progress.inputRowsPerSecond val processedRowsPerSecond = progress.processedRowsPerSecond // note: leaving this here to remind that we can do fancy things with this for metrics sake val sources = progress.sources.map { source => val description = source.description val startOffset = source.startOffset val endOffset = source.endOffset val inputRows = source.numInputRows s"topic=$description startOffset=$startOffset endOffset=$endOffset numRows=$inputRows" } val tags = defaultTag + ( "stream_name" -> progress.name ) Kamon.metrics.histogram("spark.query.progress.processed.rows.rate", tags).record(doubleToLong(processedRowsPerSecond)) Kamon.metrics.histogram("spark.query.progress.input.rows.rate", tags).record(doubleToLong(inputRowsPerSecond)) // todo - could take num.rows.total, given total percentage of records that will be watermarked going forwards... (simple metric that say loss_percentage due to watermark) // should give min, avg, max, watermark val eventTime = progress.eventTime if (eventTime != null) { log.info(s"event.time=${eventTime.asScala.mkString(",")}") } log.info(s"query.progress query=${progress.name} kafka=${sources.mkString(",")} inputRows/s=$inputRowsPerSecond processedRows/s=$processedRowsPerSecond durationMs=${progress.durationMs} sink=${progress.sink.json}") } override def onQueryTerminated(event: QueryTerminatedEvent): Unit = { log.warn(s"queryTerminated: $event") val possibleStreamingQuery = streams.get(event.id) if (possibleStreamingQuery != null) { val progress = possibleStreamingQuery.lastProgress val sources = progress.sources log.warn(s"last.progress.sources sources=$sources") } event.exception match { case Some(exception) => log.warn(s"queryEndedWithException exception=$exception resetting.all.streams") restart() case None => } } }
Example 4
Source File: SparkAtlasStreamingQueryEventTracker.scala From spark-atlas-connector with Apache License 2.0 | 5 votes |
package com.hortonworks.spark.atlas import com.hortonworks.spark.atlas.sql.{QueryDetail, SparkExecutionPlanProcessor} import scala.collection.mutable import org.apache.spark.sql.streaming.StreamingQueryListener import org.apache.spark.sql.streaming.StreamingQueryListener._ import com.hortonworks.spark.atlas.utils.Logging import org.apache.spark.sql.SparkSession import org.apache.spark.sql.execution.streaming.{StreamExecution, StreamingQueryWrapper} class SparkAtlasStreamingQueryEventTracker( atlasClient: AtlasClient, atlasClientConf: AtlasClientConf) extends StreamingQueryListener with Logging { def this(atlasClientConf: AtlasClientConf) = { this(AtlasClient.atlasClient(atlasClientConf), atlasClientConf) } def this() { this(new AtlasClientConf) } private val enabled: Boolean = AtlasUtils.isSacEnabled(atlasClientConf) private val executionPlanTracker = new SparkExecutionPlanProcessor(atlasClient, atlasClientConf) executionPlanTracker.startThread() override def onQueryStarted(event: QueryStartedEvent): Unit = { logDebug(s"Start to track the Spark Streaming query in the Spark Atlas $event") } override def onQueryProgress(event: QueryProgressEvent): Unit = { if (!enabled) { // No op if SAC is disabled return } logInfo(s"Track running Spark Streaming query in the Spark Atlas: $event") val query = SparkSession.active.streams.get(event.progress.id) if (query != null) { val qd = query match { case query: StreamingQueryWrapper => Some(QueryDetail.fromStreamingQueryListener(query.streamingQuery, event)) case query: StreamExecution => Some(QueryDetail.fromStreamingQueryListener(query, event)) case _ => logWarn(s"Unexpected type of streaming query: ${query.getClass}") None } qd.foreach { q => if (q.qe != null) { executionPlanTracker.pushEvent(q) } else { logInfo(s"Can't retrieve query execution information for query ${event.progress.id}" + " - skip and wait for next batch.") } } } else { logWarn(s"Cannot find query ${event.progress.id} from active spark session!") } } override def onQueryTerminated(event: QueryTerminatedEvent): Unit = { logDebug(s"Tack Spark Streaming query in the Spark Atlas Terminated: $event") } }
Example 5
Source File: AtlasStreamingQueryProgressListener.scala From spark-atlas-connector with Apache License 2.0 | 5 votes |
package com.hortonworks.spark.atlas.sql.testhelper import com.hortonworks.spark.atlas.sql.QueryDetail import com.hortonworks.spark.atlas.utils.Logging import org.apache.spark.sql.SparkSession import org.apache.spark.sql.execution.streaming.{StreamExecution, StreamingQueryWrapper} import scala.collection.mutable import org.apache.spark.sql.streaming.StreamingQueryListener import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent} class AtlasStreamingQueryProgressListener extends StreamingQueryListener with Logging { val queryDetails = new mutable.MutableList[QueryDetail]() def onQueryStarted(event: QueryStartedEvent): Unit = {} def onQueryProgress(event: QueryProgressEvent): Unit = { // FIXME: this is totally duplicated with SparkAtlasStreamingQueryEventTracker... // Extract into somewhere... val query = SparkSession.active.streams.get(event.progress.id) if (query != null) { query match { case query: StreamingQueryWrapper => val qd = QueryDetail.fromStreamingQueryListener(query.streamingQuery, event) queryDetails += qd case query: StreamExecution => val qd = QueryDetail.fromStreamingQueryListener(query, event) queryDetails += qd case _ => logWarn(s"Unexpected type of streaming query: ${query.getClass}") } } else { logWarn(s"Cannot find query ${event.progress.id} from active spark session!") } } def onQueryTerminated(event: QueryTerminatedEvent): Unit = {} def clear(): Unit = { queryDetails.clear() } }
Example 6
Source File: UnifiedSparkListener.scala From spark-monitoring with MIT License | 5 votes |
package org.apache.spark.listeners import java.time.Instant import org.apache.spark.{SparkConf, SparkException, SparkInformation} import org.apache.spark.internal.Logging import org.apache.spark.listeners.sink.SparkListenerSink import org.apache.spark.scheduler._ import org.apache.spark.sql.streaming.StreamingQueryListener import org.apache.spark.util.JsonProtocol import org.json4s.JsonAST.JValue import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods.{compact, render} import scala.util.control.NonFatal class UnifiedSparkListener(override val conf: SparkConf) extends UnifiedSparkListenerHandler with Logging with SparkListenerHandlers with StreamingListenerHandlers with StreamingQueryListenerHandlers { private val listenerSink = this.createSink(this.conf) override def onOtherEvent(event: SparkListenerEvent): Unit = { // All events in Spark that are not specific to SparkListener go through // this method. The typed ListenerBus implementations intercept and forward to // their "local" listeners. // We will just handle everything here so we only have to have one listener. // The advantage is that this can be registered in extraListeners, so no // code change is required to add listener support. event match { // We will use the ClassTag for the private wrapper class to match case this.streamingListenerEventClassTag(e) => this.onStreamingListenerEvent(e) case streamingQueryListenerEvent: StreamingQueryListener.Event => this.onStreamingQueryListenerEvent(streamingQueryListenerEvent) case sparkListenerEvent: SparkListenerEvent => if (sparkListenerEvent.logEvent) { logSparkListenerEvent(sparkListenerEvent) } } } private def createSink(conf: SparkConf): SparkListenerSink = { val sink = conf.getOption("spark.unifiedListener.sink") match { case Some(listenerSinkClassName) => listenerSinkClassName case None => throw new SparkException("spark.unifiedListener.sink setting is required") } logInfo(s"Creating listener sink: ${sink}") org.apache.spark.util.Utils.loadExtensions( classOf[SparkListenerSink], Seq(sink), conf).head } protected def logSparkListenerEvent( event: SparkListenerEvent, getTimestamp: () => Instant = () => Instant.now()): Unit = { val json = try { // Add a well-known time field. Some( JsonProtocol.sparkEventToJson(event) .merge(render( SparkInformation.get() + ("SparkEventTime" -> getTimestamp().toString) )) ) } catch { case NonFatal(e) => logError(s"Error serializing SparkListenerEvent to JSON: $event", e) None } sendToSink(json) } private[spark] def sendToSink(json: Option[JValue]): Unit = { try { json match { case Some(j) => { logDebug(s"Sending event to listener sink: ${compact(j)}") this.listenerSink.logEvent(json) } case None => { logWarning("json value was None") } } } catch { case NonFatal(e) => logError(s"Error sending to listener sink: $e") } } }
Example 7
Source File: StreamingQueryListenerHandlers.scala From spark-monitoring with MIT License | 5 votes |
package org.apache.spark.listeners import java.time.Instant import org.apache.spark.sql.streaming.StreamingQueryListener trait StreamingQueryListenerHandlers{ this: UnifiedSparkListenerHandler => private[listeners] def onStreamingQueryListenerEvent(event: StreamingQueryListener.Event): Unit = { // Only the query progress event has a timestamp, so we'll send everything else // on through event match { case queryProgress: StreamingQueryListener.QueryProgressEvent => logSparkListenerEvent( event, () => Instant.parse(queryProgress.progress.timestamp) ) case streamingQueryListenerEvent: StreamingQueryListener.Event => logSparkListenerEvent(streamingQueryListenerEvent) } } }
Example 8
Source File: SparkStreamingQueryListener.scala From spark-summit-2018 with GNU General Public License v3.0 | 5 votes |
package com.twilio.open.streaming.trend.discovery.listeners import kamon.Kamon import org.apache.spark.sql.SparkSession import org.apache.spark.sql.streaming.StreamingQueryListener import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent} import org.slf4j.{Logger, LoggerFactory} object SparkStreamingQueryListener { val log: Logger = LoggerFactory.getLogger(classOf[SparkStreamingQueryListener]) def apply(spark: SparkSession, restart: () => Unit): SparkStreamingQueryListener = { new SparkStreamingQueryListener(spark, restart) } } class SparkStreamingQueryListener(sparkSession: SparkSession, restart: () => Unit) extends StreamingQueryListener { import SparkStreamingQueryListener._ private val streams = sparkSession.streams private val defaultTag = Map("app_name" -> sparkSession.sparkContext.appName) override def onQueryStarted(event: QueryStartedEvent): Unit = { if (log.isDebugEnabled) log.debug(s"onQueryStarted queryName=${event.name} id=${event.id} runId=${event.runId}") } //https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala override def onQueryProgress(progressEvent: QueryProgressEvent): Unit = { val progress = progressEvent.progress val inputRowsPerSecond = progress.inputRowsPerSecond val processedRowsPerSecond = progress.processedRowsPerSecond val sources = progress.sources.map { source => val description = source.description val startOffset = source.startOffset val endOffset = source.endOffset val inputRows = source.numInputRows s"topic=$description startOffset=$startOffset endOffset=$endOffset numRows=$inputRows" } Kamon.metrics.histogram("spark.query.progress.processed.rows.rate").record(processedRowsPerSecond.toLong) Kamon.metrics.histogram("spark.query.progress.input.rows.rate", defaultTag).record(inputRowsPerSecond.toLong) log.info(s"query.progress query=${progress.name} kafka=${sources.mkString(",")} inputRows/s=$inputRowsPerSecond processedRows/s=$processedRowsPerSecond durationMs=${progress.durationMs} sink=${progress.sink.json}") } override def onQueryTerminated(event: QueryTerminatedEvent): Unit = { log.warn(s"queryTerminated: $event") val possibleStreamingQuery = streams.get(event.id) if (possibleStreamingQuery != null) { val progress = possibleStreamingQuery.lastProgress val sources = progress.sources log.warn(s"last.progress.sources sources=$sources") } event.exception match { case Some(exception) => log.warn(s"queryEndedWithException exception=$exception resetting.all.streams") restart() case None => } } }
Example 9
Source File: CarbonStreamingQueryListener.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.streaming import java.util import java.util.UUID import org.apache.spark.sql.SparkSession import org.apache.spark.sql.execution.streaming.{CarbonAppendableStreamSink, StreamExecution} import org.apache.spark.sql.streaming.StreamingQueryListener import org.apache.carbondata.common.logging.LogServiceFactory class CarbonStreamingQueryListener(spark: SparkSession) extends StreamingQueryListener { private val LOGGER = LogServiceFactory.getLogService(this.getClass.getCanonicalName) private val cache = new util.HashMap[UUID, String]() override def onQueryStarted(event: StreamingQueryListener.QueryStartedEvent): Unit = { val streamQuery = spark.streams.get(event.id) val qry = if (streamQuery.isInstanceOf[StreamExecution]) { // adapt spark 2.1 streamQuery.asInstanceOf[StreamExecution] } else { // adapt spark 2.2 and later version val clazz = Class.forName("org.apache.spark.sql.execution.streaming.StreamingQueryWrapper") val method = clazz.getMethod("streamingQuery") method.invoke(streamQuery).asInstanceOf[StreamExecution] } if (qry.sink.isInstanceOf[CarbonAppendableStreamSink]) { LOGGER.info("Carbon streaming query started: " + event.id) val sink = qry.sink.asInstanceOf[CarbonAppendableStreamSink] val carbonTable = sink.carbonTable cache.put(event.id, carbonTable.getTableUniqueName) } } override def onQueryProgress(event: StreamingQueryListener.QueryProgressEvent): Unit = { } override def onQueryTerminated(event: StreamingQueryListener.QueryTerminatedEvent): Unit = { val tableUniqueName = cache.remove(event.id) if (null != tableUniqueName) { LOGGER.info("Carbon streaming query End: " + event.id) StreamSinkFactory.unLock(tableUniqueName) } } }
Example 10
Source File: SparkStreamingQueryListener.scala From odsc-west-streaming-trends with GNU General Public License v3.0 | 5 votes |
package com.twilio.open.streaming.trend.discovery.listeners import kamon.Kamon import org.apache.spark.sql.SparkSession import org.apache.spark.sql.streaming.StreamingQueryListener import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent} import org.slf4j.{Logger, LoggerFactory} object SparkStreamingQueryListener { val log: Logger = LoggerFactory.getLogger(classOf[SparkStreamingQueryListener]) def apply(spark: SparkSession, restart: () => Unit): SparkStreamingQueryListener = { new SparkStreamingQueryListener(spark, restart) } } class SparkStreamingQueryListener(sparkSession: SparkSession, restart: () => Unit) extends StreamingQueryListener { import SparkStreamingQueryListener._ private val streams = sparkSession.streams private val defaultTag = Map("app_name" -> sparkSession.sparkContext.appName) override def onQueryStarted(event: QueryStartedEvent): Unit = { if (log.isDebugEnabled) log.debug(s"onQueryStarted queryName=${event.name} id=${event.id} runId=${event.runId}") } //https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala override def onQueryProgress(progressEvent: QueryProgressEvent): Unit = { val progress = progressEvent.progress val inputRowsPerSecond = progress.inputRowsPerSecond val processedRowsPerSecond = progress.processedRowsPerSecond val sources = progress.sources.map { source => val description = source.description val startOffset = source.startOffset val endOffset = source.endOffset val inputRows = source.numInputRows s"topic=$description startOffset=$startOffset endOffset=$endOffset numRows=$inputRows" } Kamon.metrics.histogram("spark.query.progress.processed.rows.rate").record(processedRowsPerSecond.toLong) Kamon.metrics.histogram("spark.query.progress.input.rows.rate", defaultTag).record(inputRowsPerSecond.toLong) log.info(s"query.progress query=${progress.name} kafka=${sources.mkString(",")} inputRows/s=$inputRowsPerSecond processedRows/s=$processedRowsPerSecond durationMs=${progress.durationMs} sink=${progress.sink.json}") } override def onQueryTerminated(event: QueryTerminatedEvent): Unit = { log.warn(s"queryTerminated: $event") val possibleStreamingQuery = streams.get(event.id) if (possibleStreamingQuery != null) { val progress = possibleStreamingQuery.lastProgress val sources = progress.sources log.warn(s"last.progress.sources sources=$sources") } event.exception match { case Some(exception) => log.warn(s"queryEndedWithException exception=$exception resetting.all.streams") restart() case None => } } }
Example 11
Source File: StreamingQueryListenerBus.scala From XSQL with Apache License 2.0 | 4 votes |
package org.apache.spark.sql.execution.streaming import java.util.UUID import scala.collection.mutable import org.apache.spark.scheduler.{LiveListenerBus, SparkListener, SparkListenerEvent} import org.apache.spark.sql.streaming.StreamingQueryListener import org.apache.spark.util.ListenerBus override protected def doPostEvent( listener: StreamingQueryListener, event: StreamingQueryListener.Event): Unit = { def shouldReport(runId: UUID): Boolean = { activeQueryRunIds.synchronized { activeQueryRunIds.contains(runId) } } event match { case queryStarted: QueryStartedEvent => if (shouldReport(queryStarted.runId)) { listener.onQueryStarted(queryStarted) } case queryProgress: QueryProgressEvent => if (shouldReport(queryProgress.progress.runId)) { listener.onQueryProgress(queryProgress) } case queryTerminated: QueryTerminatedEvent => if (shouldReport(queryTerminated.runId)) { listener.onQueryTerminated(queryTerminated) } case _ => } } } object StreamingQueryListenerBus { val STREAM_EVENT_QUERY = "streams" }