org.apache.spark.scheduler.SparkListenerEvent Scala Example

Source File: OapListener.scala From OAP with Apache License 2.0

6 votes

package org.apache.spark.sql.oap.listener

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent}
import org.apache.spark.sql.oap.OapRuntime

@DeveloperApi
case class SparkListenerCustomInfoUpdate(
    hostName: String,
    executorId: String,
    clazzName: String,
    customizedInfo: String) extends SparkListenerEvent {
  override def logEvent: Boolean = false
}

class OapListener extends SparkListener {
  override def onOtherEvent(event: SparkListenerEvent): Unit = event match {
    case customInfo: SparkListenerCustomInfoUpdate =>
      if (customInfo.clazzName.contains("OapFiberCacheHeartBeatMessager")) {
        OapRuntime.getOrCreate.fiberSensor.updateLocations(customInfo)
      } else if (customInfo.clazzName.contains("FiberCacheManagerMessager")) {
        OapRuntime.getOrCreate.fiberSensor.updateMetrics(customInfo)
      }
    case _ =>
  }
}

Source File: ProfilerListener.scala From carbondata with Apache License 2.0

6 votes

package org.apache.spark.sql.profiler

import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListenerTaskEnd, SparkListenerTaskGettingResult, SparkListenerTaskStart}
import org.apache.spark.sql.execution.ui.{SparkListenerSQLExecutionEnd, SparkListenerSQLExecutionStart}


private[profiler] class ProfilerListener extends SparkListener {
  override def onOtherEvent(event: SparkListenerEvent): Unit = {
    Profiler.invokeIfEnable {
      event match {
        case executionStart: SparkListenerSQLExecutionStart =>
          Profiler.addExecutionMessage(
            executionStart.executionId,
            ExecutionStart(
              executionStart.executionId,
              executionStart.time,
              executionStart.physicalPlanDescription
            ))
        case executionEnd: SparkListenerSQLExecutionEnd =>
          Profiler.send(
            ExecutionEnd(
              executionEnd.executionId,
              executionEnd.time
            )
          )
        case _ =>
      }
    }
  }
}

Source File: StreamingQueryListenerBus.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.streaming

import org.apache.spark.scheduler.{LiveListenerBus, SparkListener, SparkListenerEvent}
import org.apache.spark.sql.streaming.StreamingQueryListener
import org.apache.spark.util.ListenerBus


  def post(event: StreamingQueryListener.Event) {
    event match {
      case s: QueryStartedEvent =>
        postToAll(s)
      case _ =>
        sparkListenerBus.post(event)
    }
  }

  override def onOtherEvent(event: SparkListenerEvent): Unit = {
    event match {
      case e: StreamingQueryListener.Event =>
        postToAll(e)
      case _ =>
    }
  }

  override protected def doPostEvent(
      listener: StreamingQueryListener,
      event: StreamingQueryListener.Event): Unit = {
    event match {
      case queryStarted: QueryStartedEvent =>
        listener.onQueryStarted(queryStarted)
      case queryProgress: QueryProgressEvent =>
        listener.onQueryProgress(queryProgress)
      case queryTerminated: QueryTerminatedEvent =>
        listener.onQueryTerminated(queryTerminated)
      case _ =>
    }
  }

}

Source File: ApplicationIdleMonitor.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.monitor.job

import java.util.concurrent.{Executors, ScheduledFuture, TimeUnit}
import java.util.concurrent.atomic.AtomicReference

import scala.collection.JavaConverters._

import org.apache.spark.JobExecutionStatus
import org.apache.spark.alarm.{AlertMessage, HtmlMessage}
import org.apache.spark.monitor.{Monitor, MonitorItem}
import org.apache.spark.monitor.MonitorItem.MonitorItem
import org.apache.spark.scheduler.{SparkListenerEvent, SparkListenerJobEnd, SparkListenerJobStart}
import org.apache.spark.status.JobDataWrapper

class ApplicationIdleMonitor extends JobMonitor {

  override val item: MonitorItem = MonitorItem.APP_IDLE_WARNER
  val delayThread = Executors.newScheduledThreadPool(1)
  lazy val endureLimit =
    conf.getTimeAsMs(s"${Monitor.PREFIX}.${item.toString.toLowerCase}.timeout", "1h")
  private var idleTimeout: AtomicReference[ScheduledFuture[_]] = new AtomicReference()

  private def getActiveJobNum(): Int = {
//    appStore.count(classOf[JobDataWrapper], "completionTime", -1L)
    kvStore
      .view(classOf[JobDataWrapper])
      .reverse()
      .asScala
      .map(_.info)
      .filter(_.status == JobExecutionStatus.RUNNING)
      .size
  }

  private def stopIdleTimeout(): Unit = {
    val idleTimeout = this.idleTimeout.getAndSet(null)
    if (idleTimeout != null) {
      idleTimeout.cancel(false)
    }
  }

  private def setupIdleTimeout(): Unit = {
    if (getActiveJobNum > 0) return
    val timeoutTask = new Runnable() {
      override def run(): Unit = {
        // scalastyle:off
        val driverlUrl = conf
          .get(
            "spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES")
          .split(",")
          .head
        val a = <h2>您的Spark应用</h2>
            <a href={driverlUrl}>{driverlUrl}</a>
            <h2>空闲已超过 {conf.get(
              s"${Monitor.PREFIX}.${item}.timeout", "1h")}</h2>
            <h2>请及时关闭</h2>
        val message = new HtmlMessage(title = item, content = a.mkString)
        alarms.foreach(_.alarm(message))
        // scalastyle:on
      }
    }

    val timeout = delayThread
      .scheduleWithFixedDelay(timeoutTask, endureLimit, endureLimit, TimeUnit.MILLISECONDS)
    // If there's already an idle task registered, then cancel the new one.
    if (!this.idleTimeout.compareAndSet(null, timeout)) {
      timeout.cancel(false)
    }
    // If a new client connected while the idle task was being set up, then stop the task.
    if (getActiveJobNum > 0) stopIdleTimeout()
  }

  override def watchOut(event: SparkListenerEvent): Option[AlertMessage] = {
    event match {
      case env: SparkListenerJobStart =>
        stopIdleTimeout
        Option.empty
      case env: SparkListenerJobEnd =>
        setupIdleTimeout
        Option.empty
      case _ =>
        Option.empty
    }
  }
}

Source File: Monitor.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.monitor

import scala.collection.mutable.ArrayBuffer

import org.apache.spark.SparkConf
import org.apache.spark.alarm.{Alarm, AlertMessage}
import org.apache.spark.alarm.AlertType.AlertType
import org.apache.spark.internal.config.ConfigBuilder
import org.apache.spark.monitor.MonitorItem.MonitorItem
import org.apache.spark.scheduler.SparkListenerEvent
import org.apache.spark.status.AppStatusStore
import org.apache.spark.util.kvstore.KVStore

trait Monitor {

  val alertType: Seq[AlertType]
  val item: MonitorItem
  val alarms: ArrayBuffer[Alarm] = ArrayBuffer()
  var kvStore: KVStore = null
  var appStore: AppStatusStore = null
  var conf: SparkConf = null

  def watchOut(event: SparkListenerEvent): Option[AlertMessage]
  def bind(alarm: Alarm): Monitor = {
    alarms.append(alarm)
    this
  }
  def bind(alarms: Seq[Alarm]): Monitor = {
    this.alarms.appendAll(alarms)
    this
  }
  def bind(kvStore: KVStore): Monitor = {
    this.kvStore = kvStore
    this.appStore = new AppStatusStore(kvStore)
    this
  }
  def bind(conf: SparkConf): Monitor = {
    this.conf = conf
    this
  }
  def onEvent(event: SparkListenerEvent): Unit = {
    val message = watchOut(event)
    if (message.isDefined) {
      alarms.foreach(_.alarm(message.get))
    }
  }
}
object Monitor {
  val commonClasses = Seq(
    "org.apache.spark.sql.xsql.shell.SparkXSQLShell",
    "org.apache.spark.repl.Main",
    "org.apache.spark.sql.hive.xitong.shell.SparkHiveShell",
    "org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver")
  val dateFormats = Seq("yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd")
  val PREFIX = "spark.monitor"
  private[spark] val MONITOR_ITEMS =
    ConfigBuilder("spark.monitor.items")
      .internal()
      .doc("choose monitors to open, split with `,`")
      .stringConf
      .transform(_.toUpperCase)
      .toSequence
      .checkValue(
        _.toSet.subsetOf(MonitorItem.values.map(_.toString)),
        s"must be one of ${MonitorItem.values.map(_.toString)}")
      .createWithDefault(Seq.empty)
}
object MonitorItem extends Enumeration {
  type MonitorItem = Value
  val SQL_CHANGE_NOTIFIER = Value
  val APP_FINISH_NOTIFIER, EXECUTOR_NUM_NOTIFIER, DATASKEW_NOTIFIER, EXECUTOR_MEMORY_ADVISER =
    Value
  val SPARK_APPLICATION_SUMMARY, APP_IDLE_WARNER = Value
}

Source File: SparkAtlasEventTracker.scala From spark-atlas-connector with Apache License 2.0

5 votes

package com.hortonworks.spark.atlas

import com.google.common.annotations.VisibleForTesting
import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent}
import org.apache.spark.sql.catalyst.catalog.ExternalCatalogEvent
import org.apache.spark.sql.execution.QueryExecution
import org.apache.spark.sql.util.QueryExecutionListener
import com.hortonworks.spark.atlas.sql._
import com.hortonworks.spark.atlas.ml.MLPipelineEventProcessor
import com.hortonworks.spark.atlas.utils.Logging

class SparkAtlasEventTracker(atlasClient: AtlasClient, atlasClientConf: AtlasClientConf)
    extends SparkListener with QueryExecutionListener with Logging {

  def this(atlasClientConf: AtlasClientConf) = {
    this(AtlasClient.atlasClient(atlasClientConf), atlasClientConf)
  }

  def this() {
    this(new AtlasClientConf)
  }

  private val enabled: Boolean = AtlasUtils.isSacEnabled(atlasClientConf)

  // Processor to handle DDL related events
  @VisibleForTesting
  private[atlas] val catalogEventTracker =
    new SparkCatalogEventProcessor(atlasClient, atlasClientConf)
  catalogEventTracker.startThread()

  // Processor to handle DML related events
  private val executionPlanTracker = new SparkExecutionPlanProcessor(atlasClient, atlasClientConf)
  executionPlanTracker.startThread()

  private val mlEventTracker = new MLPipelineEventProcessor(atlasClient, atlasClientConf)
  mlEventTracker.startThread()

  override def onOtherEvent(event: SparkListenerEvent): Unit = {
    if (!enabled) {
      // No op if SAC is disabled
      return
    }

    // We only care about SQL related events.
    event match {
      case e: ExternalCatalogEvent => catalogEventTracker.pushEvent(e)
      case e: SparkListenerEvent if e.getClass.getName.contains("org.apache.spark.ml") =>
        mlEventTracker.pushEvent(e)
      case _ => // Ignore other events
    }
  }

  override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
    if (!enabled) {
      // No op if SAC is disabled
      return
    }

    if (qe.logical.isStreaming) {
      // streaming query will be tracked via SparkAtlasStreamingQueryEventTracker
      return
    }

    val qd = QueryDetail.fromQueryExecutionListener(qe, durationNs)
    executionPlanTracker.pushEvent(qd)
  }

  override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {
    // No-op: SAC is one of the listener.
  }

}

Source File: StreamingQueryListenerBus.scala From XSQL with Apache License 2.0

4 votes

package org.apache.spark.sql.execution.streaming

import java.util.UUID

import scala.collection.mutable

import org.apache.spark.scheduler.{LiveListenerBus, SparkListener, SparkListenerEvent}
import org.apache.spark.sql.streaming.StreamingQueryListener
import org.apache.spark.util.ListenerBus


  override protected def doPostEvent(
      listener: StreamingQueryListener,
      event: StreamingQueryListener.Event): Unit = {
    def shouldReport(runId: UUID): Boolean = {
      activeQueryRunIds.synchronized { activeQueryRunIds.contains(runId) }
    }

    event match {
      case queryStarted: QueryStartedEvent =>
        if (shouldReport(queryStarted.runId)) {
          listener.onQueryStarted(queryStarted)
        }
      case queryProgress: QueryProgressEvent =>
        if (shouldReport(queryProgress.progress.runId)) {
          listener.onQueryProgress(queryProgress)
        }
      case queryTerminated: QueryTerminatedEvent =>
        if (shouldReport(queryTerminated.runId)) {
          listener.onQueryTerminated(queryTerminated)
        }
      case _ =>
    }
  }
}

object StreamingQueryListenerBus {
  val STREAM_EVENT_QUERY = "streams"
}

org.apache.spark.scheduler.SparkListenerEvent Scala Examples