org.apache.spark.scheduler.SparkListenerEvent Scala Examples

The following examples show how to use org.apache.spark.scheduler.SparkListenerEvent. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: OapListener.scala    From OAP   with Apache License 2.0 6 votes vote down vote up
package org.apache.spark.sql.oap.listener

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent}
import org.apache.spark.sql.oap.OapRuntime

@DeveloperApi
case class SparkListenerCustomInfoUpdate(
    hostName: String,
    executorId: String,
    clazzName: String,
    customizedInfo: String) extends SparkListenerEvent {
  override def logEvent: Boolean = false
}

class OapListener extends SparkListener {
  override def onOtherEvent(event: SparkListenerEvent): Unit = event match {
    case customInfo: SparkListenerCustomInfoUpdate =>
      if (customInfo.clazzName.contains("OapFiberCacheHeartBeatMessager")) {
        OapRuntime.getOrCreate.fiberSensor.updateLocations(customInfo)
      } else if (customInfo.clazzName.contains("FiberCacheManagerMessager")) {
        OapRuntime.getOrCreate.fiberSensor.updateMetrics(customInfo)
      }
    case _ =>
  }
} 
Example 2
Source File: ProfilerListener.scala    From carbondata   with Apache License 2.0 6 votes vote down vote up
package org.apache.spark.sql.profiler

import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListenerTaskEnd, SparkListenerTaskGettingResult, SparkListenerTaskStart}
import org.apache.spark.sql.execution.ui.{SparkListenerSQLExecutionEnd, SparkListenerSQLExecutionStart}


private[profiler] class ProfilerListener extends SparkListener {
  override def onOtherEvent(event: SparkListenerEvent): Unit = {
    Profiler.invokeIfEnable {
      event match {
        case executionStart: SparkListenerSQLExecutionStart =>
          Profiler.addExecutionMessage(
            executionStart.executionId,
            ExecutionStart(
              executionStart.executionId,
              executionStart.time,
              executionStart.physicalPlanDescription
            ))
        case executionEnd: SparkListenerSQLExecutionEnd =>
          Profiler.send(
            ExecutionEnd(
              executionEnd.executionId,
              executionEnd.time
            )
          )
        case _ =>
      }
    }
  }
} 
Example 3
Source File: StreamingQueryListenerBus.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming

import org.apache.spark.scheduler.{LiveListenerBus, SparkListener, SparkListenerEvent}
import org.apache.spark.sql.streaming.StreamingQueryListener
import org.apache.spark.util.ListenerBus


  def post(event: StreamingQueryListener.Event) {
    event match {
      case s: QueryStartedEvent =>
        postToAll(s)
      case _ =>
        sparkListenerBus.post(event)
    }
  }

  override def onOtherEvent(event: SparkListenerEvent): Unit = {
    event match {
      case e: StreamingQueryListener.Event =>
        postToAll(e)
      case _ =>
    }
  }

  override protected def doPostEvent(
      listener: StreamingQueryListener,
      event: StreamingQueryListener.Event): Unit = {
    event match {
      case queryStarted: QueryStartedEvent =>
        listener.onQueryStarted(queryStarted)
      case queryProgress: QueryProgressEvent =>
        listener.onQueryProgress(queryProgress)
      case queryTerminated: QueryTerminatedEvent =>
        listener.onQueryTerminated(queryTerminated)
      case _ =>
    }
  }

} 
Example 4
Source File: ApplicationIdleMonitor.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.monitor.job

import java.util.concurrent.{Executors, ScheduledFuture, TimeUnit}
import java.util.concurrent.atomic.AtomicReference

import scala.collection.JavaConverters._

import org.apache.spark.JobExecutionStatus
import org.apache.spark.alarm.{AlertMessage, HtmlMessage}
import org.apache.spark.monitor.{Monitor, MonitorItem}
import org.apache.spark.monitor.MonitorItem.MonitorItem
import org.apache.spark.scheduler.{SparkListenerEvent, SparkListenerJobEnd, SparkListenerJobStart}
import org.apache.spark.status.JobDataWrapper

class ApplicationIdleMonitor extends JobMonitor {

  override val item: MonitorItem = MonitorItem.APP_IDLE_WARNER
  val delayThread = Executors.newScheduledThreadPool(1)
  lazy val endureLimit =
    conf.getTimeAsMs(s"${Monitor.PREFIX}.${item.toString.toLowerCase}.timeout", "1h")
  private var idleTimeout: AtomicReference[ScheduledFuture[_]] = new AtomicReference()

  private def getActiveJobNum(): Int = {
//    appStore.count(classOf[JobDataWrapper], "completionTime", -1L)
    kvStore
      .view(classOf[JobDataWrapper])
      .reverse()
      .asScala
      .map(_.info)
      .filter(_.status == JobExecutionStatus.RUNNING)
      .size
  }

  private def stopIdleTimeout(): Unit = {
    val idleTimeout = this.idleTimeout.getAndSet(null)
    if (idleTimeout != null) {
      idleTimeout.cancel(false)
    }
  }

  private def setupIdleTimeout(): Unit = {
    if (getActiveJobNum > 0) return
    val timeoutTask = new Runnable() {
      override def run(): Unit = {
        // scalastyle:off
        val driverlUrl = conf
          .get(
            "spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES")
          .split(",")
          .head
        val a = <h2>您的Spark应用</h2>
            <a href={driverlUrl}>{driverlUrl}</a>
            <h2>空闲已超过 {conf.get(
              s"${Monitor.PREFIX}.${item}.timeout", "1h")}</h2>
            <h2>请及时关闭</h2>
        val message = new HtmlMessage(title = item, content = a.mkString)
        alarms.foreach(_.alarm(message))
        // scalastyle:on
      }
    }

    val timeout = delayThread
      .scheduleWithFixedDelay(timeoutTask, endureLimit, endureLimit, TimeUnit.MILLISECONDS)
    // If there's already an idle task registered, then cancel the new one.
    if (!this.idleTimeout.compareAndSet(null, timeout)) {
      timeout.cancel(false)
    }
    // If a new client connected while the idle task was being set up, then stop the task.
    if (getActiveJobNum > 0) stopIdleTimeout()
  }

  override def watchOut(event: SparkListenerEvent): Option[AlertMessage] = {
    event match {
      case env: SparkListenerJobStart =>
        stopIdleTimeout
        Option.empty
      case env: SparkListenerJobEnd =>
        setupIdleTimeout
        Option.empty
      case _ =>
        Option.empty
    }
  }
} 
Example 5
Source File: Monitor.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.monitor

import scala.collection.mutable.ArrayBuffer

import org.apache.spark.SparkConf
import org.apache.spark.alarm.{Alarm, AlertMessage}
import org.apache.spark.alarm.AlertType.AlertType
import org.apache.spark.internal.config.ConfigBuilder
import org.apache.spark.monitor.MonitorItem.MonitorItem
import org.apache.spark.scheduler.SparkListenerEvent
import org.apache.spark.status.AppStatusStore
import org.apache.spark.util.kvstore.KVStore

trait Monitor {

  val alertType: Seq[AlertType]
  val item: MonitorItem
  val alarms: ArrayBuffer[Alarm] = ArrayBuffer()
  var kvStore: KVStore = null
  var appStore: AppStatusStore = null
  var conf: SparkConf = null

  def watchOut(event: SparkListenerEvent): Option[AlertMessage]
  def bind(alarm: Alarm): Monitor = {
    alarms.append(alarm)
    this
  }
  def bind(alarms: Seq[Alarm]): Monitor = {
    this.alarms.appendAll(alarms)
    this
  }
  def bind(kvStore: KVStore): Monitor = {
    this.kvStore = kvStore
    this.appStore = new AppStatusStore(kvStore)
    this
  }
  def bind(conf: SparkConf): Monitor = {
    this.conf = conf
    this
  }
  def onEvent(event: SparkListenerEvent): Unit = {
    val message = watchOut(event)
    if (message.isDefined) {
      alarms.foreach(_.alarm(message.get))
    }
  }
}
object Monitor {
  val commonClasses = Seq(
    "org.apache.spark.sql.xsql.shell.SparkXSQLShell",
    "org.apache.spark.repl.Main",
    "org.apache.spark.sql.hive.xitong.shell.SparkHiveShell",
    "org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver")
  val dateFormats = Seq("yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd")
  val PREFIX = "spark.monitor"
  private[spark] val MONITOR_ITEMS =
    ConfigBuilder("spark.monitor.items")
      .internal()
      .doc("choose monitors to open, split with `,`")
      .stringConf
      .transform(_.toUpperCase)
      .toSequence
      .checkValue(
        _.toSet.subsetOf(MonitorItem.values.map(_.toString)),
        s"must be one of ${MonitorItem.values.map(_.toString)}")
      .createWithDefault(Seq.empty)
}
object MonitorItem extends Enumeration {
  type MonitorItem = Value
  val SQL_CHANGE_NOTIFIER = Value
  val APP_FINISH_NOTIFIER, EXECUTOR_NUM_NOTIFIER, DATASKEW_NOTIFIER, EXECUTOR_MEMORY_ADVISER =
    Value
  val SPARK_APPLICATION_SUMMARY, APP_IDLE_WARNER = Value
} 
Example 6
Source File: SparkAtlasEventTracker.scala    From spark-atlas-connector   with Apache License 2.0 5 votes vote down vote up
package com.hortonworks.spark.atlas

import com.google.common.annotations.VisibleForTesting
import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent}
import org.apache.spark.sql.catalyst.catalog.ExternalCatalogEvent
import org.apache.spark.sql.execution.QueryExecution
import org.apache.spark.sql.util.QueryExecutionListener
import com.hortonworks.spark.atlas.sql._
import com.hortonworks.spark.atlas.ml.MLPipelineEventProcessor
import com.hortonworks.spark.atlas.utils.Logging

class SparkAtlasEventTracker(atlasClient: AtlasClient, atlasClientConf: AtlasClientConf)
    extends SparkListener with QueryExecutionListener with Logging {

  def this(atlasClientConf: AtlasClientConf) = {
    this(AtlasClient.atlasClient(atlasClientConf), atlasClientConf)
  }

  def this() {
    this(new AtlasClientConf)
  }

  private val enabled: Boolean = AtlasUtils.isSacEnabled(atlasClientConf)

  // Processor to handle DDL related events
  @VisibleForTesting
  private[atlas] val catalogEventTracker =
    new SparkCatalogEventProcessor(atlasClient, atlasClientConf)
  catalogEventTracker.startThread()

  // Processor to handle DML related events
  private val executionPlanTracker = new SparkExecutionPlanProcessor(atlasClient, atlasClientConf)
  executionPlanTracker.startThread()

  private val mlEventTracker = new MLPipelineEventProcessor(atlasClient, atlasClientConf)
  mlEventTracker.startThread()

  override def onOtherEvent(event: SparkListenerEvent): Unit = {
    if (!enabled) {
      // No op if SAC is disabled
      return
    }

    // We only care about SQL related events.
    event match {
      case e: ExternalCatalogEvent => catalogEventTracker.pushEvent(e)
      case e: SparkListenerEvent if e.getClass.getName.contains("org.apache.spark.ml") =>
        mlEventTracker.pushEvent(e)
      case _ => // Ignore other events
    }
  }

  override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
    if (!enabled) {
      // No op if SAC is disabled
      return
    }

    if (qe.logical.isStreaming) {
      // streaming query will be tracked via SparkAtlasStreamingQueryEventTracker
      return
    }

    val qd = QueryDetail.fromQueryExecutionListener(qe, durationNs)
    executionPlanTracker.pushEvent(qd)
  }

  override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {
    // No-op: SAC is one of the listener.
  }

} 
Example 7
Source File: StreamingQueryListenerBus.scala    From XSQL   with Apache License 2.0 4 votes vote down vote up
package org.apache.spark.sql.execution.streaming

import java.util.UUID

import scala.collection.mutable

import org.apache.spark.scheduler.{LiveListenerBus, SparkListener, SparkListenerEvent}
import org.apache.spark.sql.streaming.StreamingQueryListener
import org.apache.spark.util.ListenerBus


  override protected def doPostEvent(
      listener: StreamingQueryListener,
      event: StreamingQueryListener.Event): Unit = {
    def shouldReport(runId: UUID): Boolean = {
      activeQueryRunIds.synchronized { activeQueryRunIds.contains(runId) }
    }

    event match {
      case queryStarted: QueryStartedEvent =>
        if (shouldReport(queryStarted.runId)) {
          listener.onQueryStarted(queryStarted)
        }
      case queryProgress: QueryProgressEvent =>
        if (shouldReport(queryProgress.progress.runId)) {
          listener.onQueryProgress(queryProgress)
        }
      case queryTerminated: QueryTerminatedEvent =>
        if (shouldReport(queryTerminated.runId)) {
          listener.onQueryTerminated(queryTerminated)
        }
      case _ =>
    }
  }
}

object StreamingQueryListenerBus {
  val STREAM_EVENT_QUERY = "streams"
}