org.apache.spark.sql.streaming.StreamingQueryListener.QueryProgressEvent Scala Examples
The following examples show how to use org.apache.spark.sql.streaming.StreamingQueryListener.QueryProgressEvent.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: InsightsQueryListener.scala From odsc-east-realish-predictions with Apache License 2.0 | 5 votes |
package com.twilio.open.odsc.realish.listeners import kamon.Kamon import org.apache.spark.sql.SparkSession import org.apache.spark.sql.streaming.StreamingQueryListener import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent} import org.slf4j.{Logger, LoggerFactory} import scala.collection.JavaConverters._ object InsightsQueryListener { val log: Logger = LoggerFactory.getLogger(classOf[InsightsQueryListener]) def apply(spark: SparkSession, restart: () => Unit): InsightsQueryListener = { new InsightsQueryListener(spark, restart) } } class InsightsQueryListener(sparkSession: SparkSession, restart: () => Unit) extends StreamingQueryListener { import InsightsQueryListener._ private val streams = sparkSession.streams private val defaultTag = Map("app_name" -> sparkSession.sparkContext.appName) def doubleToLong(value: Double): Long = { value match { case a if a.isInfinite => 0L case b if b == Math.floor(b) => b.toLong case c => Math.rint(c).toLong } } override def onQueryStarted(event: QueryStartedEvent): Unit = { if (log.isDebugEnabled) log.debug(s"onQueryStarted queryName=${event.name} id=${event.id} runId=${event.runId}") } //https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala override def onQueryProgress(progressEvent: QueryProgressEvent): Unit = { val progress = progressEvent.progress val inputRowsPerSecond = progress.inputRowsPerSecond val processedRowsPerSecond = progress.processedRowsPerSecond // note: leaving this here to remind that we can do fancy things with this for metrics sake val sources = progress.sources.map { source => val description = source.description val startOffset = source.startOffset val endOffset = source.endOffset val inputRows = source.numInputRows s"topic=$description startOffset=$startOffset endOffset=$endOffset numRows=$inputRows" } val tags = defaultTag + ( "stream_name" -> progress.name ) Kamon.metrics.histogram("spark.query.progress.processed.rows.rate", tags).record(doubleToLong(processedRowsPerSecond)) Kamon.metrics.histogram("spark.query.progress.input.rows.rate", tags).record(doubleToLong(inputRowsPerSecond)) // todo - could take num.rows.total, given total percentage of records that will be watermarked going forwards... (simple metric that say loss_percentage due to watermark) // should give min, avg, max, watermark val eventTime = progress.eventTime if (eventTime != null) { log.info(s"event.time=${eventTime.asScala.mkString(",")}") } log.info(s"query.progress query=${progress.name} kafka=${sources.mkString(",")} inputRows/s=$inputRowsPerSecond processedRows/s=$processedRowsPerSecond durationMs=${progress.durationMs} sink=${progress.sink.json}") } override def onQueryTerminated(event: QueryTerminatedEvent): Unit = { log.warn(s"queryTerminated: $event") val possibleStreamingQuery = streams.get(event.id) if (possibleStreamingQuery != null) { val progress = possibleStreamingQuery.lastProgress val sources = progress.sources log.warn(s"last.progress.sources sources=$sources") } event.exception match { case Some(exception) => log.warn(s"queryEndedWithException exception=$exception resetting.all.streams") restart() case None => } } }
Example 2
Source File: AtlasStreamingQueryProgressListener.scala From spark-atlas-connector with Apache License 2.0 | 5 votes |
package com.hortonworks.spark.atlas.sql.testhelper import com.hortonworks.spark.atlas.sql.QueryDetail import com.hortonworks.spark.atlas.utils.Logging import org.apache.spark.sql.SparkSession import org.apache.spark.sql.execution.streaming.{StreamExecution, StreamingQueryWrapper} import scala.collection.mutable import org.apache.spark.sql.streaming.StreamingQueryListener import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent} class AtlasStreamingQueryProgressListener extends StreamingQueryListener with Logging { val queryDetails = new mutable.MutableList[QueryDetail]() def onQueryStarted(event: QueryStartedEvent): Unit = {} def onQueryProgress(event: QueryProgressEvent): Unit = { // FIXME: this is totally duplicated with SparkAtlasStreamingQueryEventTracker... // Extract into somewhere... val query = SparkSession.active.streams.get(event.progress.id) if (query != null) { query match { case query: StreamingQueryWrapper => val qd = QueryDetail.fromStreamingQueryListener(query.streamingQuery, event) queryDetails += qd case query: StreamExecution => val qd = QueryDetail.fromStreamingQueryListener(query, event) queryDetails += qd case _ => logWarn(s"Unexpected type of streaming query: ${query.getClass}") } } else { logWarn(s"Cannot find query ${event.progress.id} from active spark session!") } } def onQueryTerminated(event: QueryTerminatedEvent): Unit = {} def clear(): Unit = { queryDetails.clear() } }
Example 3
Source File: LogAnalyticsStreamingQueryListenerSuite.scala From spark-monitoring with MIT License | 5 votes |
package org.apache.spark.sql.streaming import java.util.UUID import org.apache.spark.listeners.ListenerSuite import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent} import org.scalatest.BeforeAndAfterEach import scala.collection.JavaConversions.mapAsJavaMap object LogAnalyticsStreamingQueryListenerSuite { val queryStartedEvent = new QueryStartedEvent(UUID.randomUUID, UUID.randomUUID, "name") val queryTerminatedEvent = new QueryTerminatedEvent(UUID.randomUUID, UUID.randomUUID, None) val queryProgressEvent = new QueryProgressEvent( new StreamingQueryProgress( UUID.randomUUID, UUID.randomUUID, null, ListenerSuite.EPOCH_TIME_AS_ISO8601, 2L, mapAsJavaMap(Map("total" -> 0L)), mapAsJavaMap(Map.empty[String, String]), Array(new StateOperatorProgress( 0, 1, 2)), Array( new SourceProgress( "source", "123", "456", 678, Double.NaN, Double.NegativeInfinity ) ), new SinkProgress("sink") ) ) } class LogAnalyticsStreamingQueryListenerSuite extends ListenerSuite with BeforeAndAfterEach { test("should invoke sendToSink for QueryStartedEvent with full class name") { val (json, event) = this.onStreamingQueryListenerEvent( LogAnalyticsStreamingQueryListenerSuite.queryStartedEvent ) this.assertEvent(json, event) } test("should invoke sendToSink for QueryTerminatedEvent with full class name") { val (json, event) = this.onStreamingQueryListenerEvent( LogAnalyticsStreamingQueryListenerSuite.queryTerminatedEvent ) this.assertEvent(json, event) } test("should invoke sendToSink for QueryProgressEvent with full class name") { val (json, event) = this.onStreamingQueryListenerEvent( LogAnalyticsStreamingQueryListenerSuite.queryProgressEvent ) this.assertEvent(json, event) } test("QueryProgressEvent should have expected SparkEventTime") { val (json, _) = this.onStreamingQueryListenerEvent( LogAnalyticsStreamingQueryListenerSuite.queryProgressEvent ) this.assertSparkEventTime( json, (_, value) => assert(value.extract[String] === ListenerSuite.EPOCH_TIME_AS_ISO8601) ) } test("QueryStartedEvent should have SparkEventTime") { val (json, _) = this.onStreamingQueryListenerEvent( LogAnalyticsStreamingQueryListenerSuite.queryStartedEvent ) this.assertSparkEventTime( json, (_, value) => assert(!value.extract[String].isEmpty) ) } test("QueryTerminatedEvent should have SparkEventTime") { val (json, _) = this.onStreamingQueryListenerEvent( LogAnalyticsStreamingQueryListenerSuite.queryTerminatedEvent ) this.assertSparkEventTime( json, (_, value) => assert(!value.extract[String].isEmpty) ) } }
Example 4
Source File: SparkStreamingQueryListener.scala From spark-summit-2018 with GNU General Public License v3.0 | 5 votes |
package com.twilio.open.streaming.trend.discovery.listeners import kamon.Kamon import org.apache.spark.sql.SparkSession import org.apache.spark.sql.streaming.StreamingQueryListener import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent} import org.slf4j.{Logger, LoggerFactory} object SparkStreamingQueryListener { val log: Logger = LoggerFactory.getLogger(classOf[SparkStreamingQueryListener]) def apply(spark: SparkSession, restart: () => Unit): SparkStreamingQueryListener = { new SparkStreamingQueryListener(spark, restart) } } class SparkStreamingQueryListener(sparkSession: SparkSession, restart: () => Unit) extends StreamingQueryListener { import SparkStreamingQueryListener._ private val streams = sparkSession.streams private val defaultTag = Map("app_name" -> sparkSession.sparkContext.appName) override def onQueryStarted(event: QueryStartedEvent): Unit = { if (log.isDebugEnabled) log.debug(s"onQueryStarted queryName=${event.name} id=${event.id} runId=${event.runId}") } //https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala override def onQueryProgress(progressEvent: QueryProgressEvent): Unit = { val progress = progressEvent.progress val inputRowsPerSecond = progress.inputRowsPerSecond val processedRowsPerSecond = progress.processedRowsPerSecond val sources = progress.sources.map { source => val description = source.description val startOffset = source.startOffset val endOffset = source.endOffset val inputRows = source.numInputRows s"topic=$description startOffset=$startOffset endOffset=$endOffset numRows=$inputRows" } Kamon.metrics.histogram("spark.query.progress.processed.rows.rate").record(processedRowsPerSecond.toLong) Kamon.metrics.histogram("spark.query.progress.input.rows.rate", defaultTag).record(inputRowsPerSecond.toLong) log.info(s"query.progress query=${progress.name} kafka=${sources.mkString(",")} inputRows/s=$inputRowsPerSecond processedRows/s=$processedRowsPerSecond durationMs=${progress.durationMs} sink=${progress.sink.json}") } override def onQueryTerminated(event: QueryTerminatedEvent): Unit = { log.warn(s"queryTerminated: $event") val possibleStreamingQuery = streams.get(event.id) if (possibleStreamingQuery != null) { val progress = possibleStreamingQuery.lastProgress val sources = progress.sources log.warn(s"last.progress.sources sources=$sources") } event.exception match { case Some(exception) => log.warn(s"queryEndedWithException exception=$exception resetting.all.streams") restart() case None => } } }
Example 5
Source File: SparkStreamingQueryListener.scala From odsc-west-streaming-trends with GNU General Public License v3.0 | 5 votes |
package com.twilio.open.streaming.trend.discovery.listeners import kamon.Kamon import org.apache.spark.sql.SparkSession import org.apache.spark.sql.streaming.StreamingQueryListener import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryProgressEvent, QueryStartedEvent, QueryTerminatedEvent} import org.slf4j.{Logger, LoggerFactory} object SparkStreamingQueryListener { val log: Logger = LoggerFactory.getLogger(classOf[SparkStreamingQueryListener]) def apply(spark: SparkSession, restart: () => Unit): SparkStreamingQueryListener = { new SparkStreamingQueryListener(spark, restart) } } class SparkStreamingQueryListener(sparkSession: SparkSession, restart: () => Unit) extends StreamingQueryListener { import SparkStreamingQueryListener._ private val streams = sparkSession.streams private val defaultTag = Map("app_name" -> sparkSession.sparkContext.appName) override def onQueryStarted(event: QueryStartedEvent): Unit = { if (log.isDebugEnabled) log.debug(s"onQueryStarted queryName=${event.name} id=${event.id} runId=${event.runId}") } //https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala override def onQueryProgress(progressEvent: QueryProgressEvent): Unit = { val progress = progressEvent.progress val inputRowsPerSecond = progress.inputRowsPerSecond val processedRowsPerSecond = progress.processedRowsPerSecond val sources = progress.sources.map { source => val description = source.description val startOffset = source.startOffset val endOffset = source.endOffset val inputRows = source.numInputRows s"topic=$description startOffset=$startOffset endOffset=$endOffset numRows=$inputRows" } Kamon.metrics.histogram("spark.query.progress.processed.rows.rate").record(processedRowsPerSecond.toLong) Kamon.metrics.histogram("spark.query.progress.input.rows.rate", defaultTag).record(inputRowsPerSecond.toLong) log.info(s"query.progress query=${progress.name} kafka=${sources.mkString(",")} inputRows/s=$inputRowsPerSecond processedRows/s=$processedRowsPerSecond durationMs=${progress.durationMs} sink=${progress.sink.json}") } override def onQueryTerminated(event: QueryTerminatedEvent): Unit = { log.warn(s"queryTerminated: $event") val possibleStreamingQuery = streams.get(event.id) if (possibleStreamingQuery != null) { val progress = possibleStreamingQuery.lastProgress val sources = progress.sources log.warn(s"last.progress.sources sources=$sources") } event.exception match { case Some(exception) => log.warn(s"queryEndedWithException exception=$exception resetting.all.streams") restart() case None => } } }