org.apache.spark.scheduler.SparkListenerJobStart Scala Examples
The following examples show how to use org.apache.spark.scheduler.SparkListenerJobStart.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: CleanupUtil.scala From hazelcast-spark with Apache License 2.0 | 7 votes |
package com.hazelcast.spark.connector.util import com.hazelcast.spark.connector.util.ConnectionUtil.closeAll import org.apache.spark.SparkContext import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd, SparkListenerJobStart} object CleanupUtil { val jobIds: collection.mutable.Map[Int, Seq[Int]] = collection.mutable.Map[Int, Seq[Int]]() val cleanupJobRddName: String = "HazelcastResourceCleanupJob" def addCleanupListener(sc: SparkContext): Unit = { sc.addSparkListener(new SparkListener { override def onJobStart(jobStart: SparkListenerJobStart): Unit = { this.synchronized { jobStart.stageInfos.foreach(info => { info.rddInfos.foreach(rdd => { if (!cleanupJobRddName.equals(rdd.name)) { val ids: Seq[Int] = info.rddInfos.map(_.id) val maybeIds: Option[Seq[Int]] = jobIds.get(jobStart.jobId) if (maybeIds.isDefined) { jobIds.put(jobStart.jobId, ids ++ maybeIds.get) } else { jobIds.put(jobStart.jobId, ids) } } }) }) } } override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = { this.synchronized { if (jobIds.contains(jobEnd.jobId)) { try { val workers = sc.getConf.getInt("spark.executor.instances", sc.getExecutorStorageStatus.length) val rddId: Option[Seq[Int]] = jobIds.get(jobEnd.jobId) if (rddId.isDefined) { sc.parallelize(1 to workers, workers).setName(cleanupJobRddName).foreachPartition(it ⇒ closeAll(rddId.get)) } jobIds -= jobEnd.jobId } catch { case e: Exception => } } } } }) } }
Example 2
Source File: ApplicationIdleMonitor.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.monitor.job import java.util.concurrent.{Executors, ScheduledFuture, TimeUnit} import java.util.concurrent.atomic.AtomicReference import scala.collection.JavaConverters._ import org.apache.spark.JobExecutionStatus import org.apache.spark.alarm.{AlertMessage, HtmlMessage} import org.apache.spark.monitor.{Monitor, MonitorItem} import org.apache.spark.monitor.MonitorItem.MonitorItem import org.apache.spark.scheduler.{SparkListenerEvent, SparkListenerJobEnd, SparkListenerJobStart} import org.apache.spark.status.JobDataWrapper class ApplicationIdleMonitor extends JobMonitor { override val item: MonitorItem = MonitorItem.APP_IDLE_WARNER val delayThread = Executors.newScheduledThreadPool(1) lazy val endureLimit = conf.getTimeAsMs(s"${Monitor.PREFIX}.${item.toString.toLowerCase}.timeout", "1h") private var idleTimeout: AtomicReference[ScheduledFuture[_]] = new AtomicReference() private def getActiveJobNum(): Int = { // appStore.count(classOf[JobDataWrapper], "completionTime", -1L) kvStore .view(classOf[JobDataWrapper]) .reverse() .asScala .map(_.info) .filter(_.status == JobExecutionStatus.RUNNING) .size } private def stopIdleTimeout(): Unit = { val idleTimeout = this.idleTimeout.getAndSet(null) if (idleTimeout != null) { idleTimeout.cancel(false) } } private def setupIdleTimeout(): Unit = { if (getActiveJobNum > 0) return val timeoutTask = new Runnable() { override def run(): Unit = { // scalastyle:off val driverlUrl = conf .get( "spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES") .split(",") .head val a = <h2>您的Spark应用</h2> <a href={driverlUrl}>{driverlUrl}</a> <h2>空闲已超过 {conf.get( s"${Monitor.PREFIX}.${item}.timeout", "1h")}</h2> <h2>请及时关闭</h2> val message = new HtmlMessage(title = item, content = a.mkString) alarms.foreach(_.alarm(message)) // scalastyle:on } } val timeout = delayThread .scheduleWithFixedDelay(timeoutTask, endureLimit, endureLimit, TimeUnit.MILLISECONDS) // If there's already an idle task registered, then cancel the new one. if (!this.idleTimeout.compareAndSet(null, timeout)) { timeout.cancel(false) } // If a new client connected while the idle task was being set up, then stop the task. if (getActiveJobNum > 0) stopIdleTimeout() } override def watchOut(event: SparkListenerEvent): Option[AlertMessage] = { event match { case env: SparkListenerJobStart => stopIdleTimeout Option.empty case env: SparkListenerJobEnd => setupIdleTimeout Option.empty case _ => Option.empty } } }
Example 3
Source File: QueryGuardListener.scala From gimel with Apache License 2.0 | 5 votes |
package com.paypal.gimel.common.query.guard import java.util.concurrent.atomic.AtomicBoolean import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd, SparkListenerJobStart, SparkListenerStageCompleted} import org.apache.spark.sql.SparkSession import org.joda.time.{DateTime, Instant} import com.paypal.gimel.common.conf.{QueryGuardConfigs, QueryGuardConstants} import com.paypal.gimel.common.utilities.GenericUtils import com.paypal.gimel.logger.Logger class QueryGuardListener[E >: QueryGuardDelayedEvent](spark: SparkSession, discoveryType: String = "job") extends SparkListener with Producer[E] { private val logger = new Logger(this.getClass.getName) private val stopped = new AtomicBoolean(true) private val HEADER: String = "[DISCOVERY] " private var _consumers: Seq[Consumer[E]] = Seq.empty override def onJobStart(jobStart: SparkListenerJobStart) { logger.info( s"${HEADER}Job[${jobStart.jobId}] started with ${jobStart.stageInfos.size} stages @ ${Instant.now()}" ) if (!stopped.get) { val job = JobSubmitted( jobStart.jobId, discoveryType, System.currentTimeMillis(), jobTtl = GenericUtils.getValue( spark.conf, QueryGuardConfigs.JOB_TTL, QueryGuardConstants.DEFAULT_JOB_TTL ), delayTtl = GenericUtils.getValue( spark.conf, QueryGuardConfigs.DELAY_TTL, QueryGuardConstants.DEFAULT_DELAY_TTL ) ) logger.info( s"${HEADER}Proceeding to queue in Job[${jobStart.jobId}] onto QueryGuard" ) publish(job) } else { logger.info( s"${HEADER}As QueryGuardListener is ${stopped.get()}," + s" unable to queue in Job[${jobStart.jobId}]" ) } } override def publish(queryGuardEvent: E): Unit = { for (consumer <- _consumers) { consumer.consume(queryGuardEvent) } } override def onStageCompleted( stageCompleted: SparkListenerStageCompleted ): Unit = { logger.info( s"Stage ${stageCompleted.stageInfo.stageId} completed with ${stageCompleted.stageInfo.numTasks} tasks." ) } override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = { logger.info( s"Job[${jobEnd.jobId}] completed at ${new DateTime(jobEnd.time)}" + s" with result -> ${jobEnd.jobResult}" ) super.onJobEnd(jobEnd) } override def registerConsumers(consumers: Seq[Consumer[E]]): Unit = { _consumers = consumers } def start(): Unit = { // toggle stopped to true stopped.set(false) logger.info(s"${HEADER}Started QueryGuardListener: $stopped") } def stop(): Unit = { // toggle stopped to true stopped.compareAndSet(false, true) logger.info(s"${HEADER}Stopped QueryGuardListener: $stopped") } }