org.apache.spark.JobExecutionStatus Scala Examples
The following examples show how to use org.apache.spark.JobExecutionStatus.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: OneJobResource.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import javax.ws.rs.{PathParam, GET, Produces} import javax.ws.rs.core.MediaType import org.apache.spark.JobExecutionStatus import org.apache.spark.ui.SparkUI import org.apache.spark.ui.jobs.UIData.JobUIData @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class OneJobResource(ui: SparkUI) { @GET def oneJob(@PathParam("jobId") jobId: Int): JobData = { val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] = AllJobsResource.getStatusToJobs(ui) val jobOpt = statusToJobs.map {_._2} .flatten.find { jobInfo => jobInfo.jobId == jobId} jobOpt.map { job => AllJobsResource.convertJobData(job, ui.jobProgressListener, false) }.getOrElse { throw new NotFoundException("unknown job: " + jobId) } } }
Example 2
Source File: AppKill.scala From gimel with Apache License 2.0 | 5 votes |
package com.paypal.gimel.common.query.guard import org.apache.spark.JobExecutionStatus import org.apache.spark.sql.SparkSession import com.paypal.gimel.logger.Logger class AppKill[E](spark: SparkSession, incomingLogger: Option[Logger] = None) extends EventLoop[E](name = "app-kill-event-loop") with Consumer[E] { private val logger = Logger(this.getClass.getName) private val HEADER: String = "[APP-KILL] " override def consume(queryGuardEvent: E): Unit = { this.post(queryGuardEvent) } override protected def onReceive(event: E): Unit = { // kill the received job event match { case jobKill: JobKill if jobKill.jobType == "job" => val jobInfo = spark.sparkContext.statusTracker.getJobInfo(jobKill.jobId) if (jobInfo.isDefined && JobExecutionStatus.RUNNING == jobInfo.get .status()) { logger.info(s"${HEADER}Proceeding to cancel Job: $jobKill") spark.sparkContext.cancelJob(jobKill.jobId, jobKill.reason) } case jobKill: JobKill if jobKill.jobType == "stage" => val stageInfo = spark.sparkContext.statusTracker.getStageInfo(jobKill.jobId) if (stageInfo.isDefined && stageInfo.get.numActiveTasks() > 0) { logger.info(s"${HEADER}Proceeding to cancel Stage: $jobKill") spark.sparkContext.cancelStage(jobKill.jobId, jobKill.reason) } } } override protected def onError(e: Throwable): Unit = { // stop all the monitoring task logger.info(s"${HEADER}Proceeding to stop ${_name}") stop() } }
Example 3
Source File: UIData.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable import scala.collection.mutable.HashMap private[spark] object UIData { class ExecutorSummary { var taskTime : Long = 0 var failedTasks : Int = 0 var succeededTasks : Int = 0 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None, var completionTime: Option[Long] = None, var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) case class ExecutorUIData( val startTime: Long, var finishTime: Option[Long] = None, var finishReason: Option[String] = None) }
Example 4
Source File: OneJobResource.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import javax.ws.rs.{PathParam, GET, Produces} import javax.ws.rs.core.MediaType import org.apache.spark.JobExecutionStatus import org.apache.spark.ui.SparkUI import org.apache.spark.ui.jobs.UIData.JobUIData @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class OneJobResource(ui: SparkUI) { @GET def oneJob(@PathParam("jobId") jobId: Int): JobData = { val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] = AllJobsResource.getStatusToJobs(ui) val jobOpt = statusToJobs.map {_._2} .flatten.find { jobInfo => jobInfo.jobId == jobId} jobOpt.map { job => AllJobsResource.convertJobData(job, ui.jobProgressListener, false) }.getOrElse { throw new NotFoundException("unknown job: " + jobId) } } }
Example 5
Source File: AllJobsResource.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.util.{Arrays, Date, List => JList} import javax.ws.rs._ import javax.ws.rs.core.MediaType import org.apache.spark.JobExecutionStatus import org.apache.spark.ui.SparkUI import org.apache.spark.ui.jobs.JobProgressListener import org.apache.spark.ui.jobs.UIData.JobUIData @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class AllJobsResource(ui: SparkUI) { @GET def jobsList(@QueryParam("status") statuses: JList[JobExecutionStatus]): Seq[JobData] = { val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] = AllJobsResource.getStatusToJobs(ui) val adjStatuses: JList[JobExecutionStatus] = { if (statuses.isEmpty) { Arrays.asList(JobExecutionStatus.values(): _*) } else { statuses } } val jobInfos = for { (status, jobs) <- statusToJobs job <- jobs if adjStatuses.contains(status) } yield { AllJobsResource.convertJobData(job, ui.jobProgressListener, false) } jobInfos.sortBy{- _.jobId} } } private[v1] object AllJobsResource { def getStatusToJobs(ui: SparkUI): Seq[(JobExecutionStatus, Seq[JobUIData])] = { val statusToJobs = ui.jobProgressListener.synchronized { Seq( JobExecutionStatus.RUNNING -> ui.jobProgressListener.activeJobs.values.toSeq, JobExecutionStatus.SUCCEEDED -> ui.jobProgressListener.completedJobs.toSeq, JobExecutionStatus.FAILED -> ui.jobProgressListener.failedJobs.reverse.toSeq ) } statusToJobs } def convertJobData( job: JobUIData, listener: JobProgressListener, includeStageDetails: Boolean): JobData = { listener.synchronized { val lastStageInfo = listener.stageIdToInfo.get(job.stageIds.max) val lastStageData = lastStageInfo.flatMap { s => listener.stageIdToData.get((s.stageId, s.attemptId)) } val lastStageName = lastStageInfo.map { _.name }.getOrElse("(Unknown Stage Name)") val lastStageDescription = lastStageData.flatMap { _.description } new JobData( jobId = job.jobId, name = lastStageName, description = lastStageDescription, submissionTime = job.submissionTime.map{new Date(_)}, completionTime = job.completionTime.map{new Date(_)}, stageIds = job.stageIds, jobGroup = job.jobGroup, status = job.status, numTasks = job.numTasks, numActiveTasks = job.numActiveTasks, numCompletedTasks = job.numCompletedTasks, numSkippedTasks = job.numCompletedTasks, numFailedTasks = job.numFailedTasks, numActiveStages = job.numActiveStages, numCompletedStages = job.completedStageIndices.size, numSkippedStages = job.numSkippedStages, numFailedStages = job.numFailedStages ) } } }
Example 6
Source File: JobsTab.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import javax.servlet.http.HttpServletRequest import scala.collection.JavaConverters._ import org.apache.spark.JobExecutionStatus import org.apache.spark.scheduler.SchedulingMode import org.apache.spark.status.AppStatusStore import org.apache.spark.ui._ private[ui] class JobsTab(parent: SparkUI, store: AppStatusStore) extends SparkUITab(parent, "jobs") { val sc = parent.sc val killEnabled = parent.killEnabled def isFairScheduler: Boolean = { store .environmentInfo() .sparkProperties .contains(("spark.scheduler.mode", SchedulingMode.FAIR.toString)) } def getSparkUser: String = parent.getSparkUser attachPage(new AllJobsPage(this, store)) attachPage(new JobPage(this, store)) def handleKillRequest(request: HttpServletRequest): Unit = { if (killEnabled && parent.securityManager.checkModifyPermissions(request.getRemoteUser)) { // stripXSS is called first to remove suspicious characters used in XSS attacks val jobId = Option(UIUtils.stripXSS(request.getParameter("id"))).map(_.toInt) jobId.foreach { id => store.asOption(store.job(id)).foreach { job => if (job.status == JobExecutionStatus.RUNNING) { sc.foreach(_.cancelJob(id)) // Do a quick pause here to give Spark time to kill the job so it shows up as // killed after the refresh. Note that this will block the serving thread so the // time should be limited in duration. Thread.sleep(100) } } } } } }
Example 7
Source File: SQLAppStatusStore.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.ui import java.lang.{Long => JLong} import java.util.Date import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import com.fasterxml.jackson.annotation.JsonIgnore import com.fasterxml.jackson.databind.annotation.JsonDeserialize import org.apache.spark.JobExecutionStatus import org.apache.spark.status.KVUtils.KVIndexParam import org.apache.spark.util.kvstore.{KVIndex, KVStore} class SparkPlanGraphNodeWrapper( val node: SparkPlanGraphNode, val cluster: SparkPlanGraphClusterWrapper) { def toSparkPlanGraphNode(): SparkPlanGraphNode = { assert(node == null ^ cluster == null, "One and only of of nore or cluster must be set.") if (node != null) node else cluster.toSparkPlanGraphCluster() } } case class SQLPlanMetric( name: String, accumulatorId: Long, metricType: String)
Example 8
Source File: UIData.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable import scala.collection.mutable.HashMap private[spark] object UIData { class ExecutorSummary { var taskTime : Long = 0//任务时间 var failedTasks : Int = 0//失败任务数 var succeededTasks : Int = 0//完成任务数 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None,//提交时间 var completionTime: Option[Long] = None,//完成时间 var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) case class ExecutorUIData( val startTime: Long, var finishTime: Option[Long] = None, var finishReason: Option[String] = None) }
Example 9
Source File: OneJobResource.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import javax.ws.rs.{PathParam, GET, Produces} import javax.ws.rs.core.MediaType import org.apache.spark.JobExecutionStatus import org.apache.spark.ui.SparkUI import org.apache.spark.ui.jobs.UIData.JobUIData //注解@Produces用于定义方法的响应实体的数据类型,可以定义一个或多个 @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class OneJobResource(ui: SparkUI) { @GET def oneJob(@PathParam("jobId") jobId: Int): JobData = { val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] = AllJobsResource.getStatusToJobs(ui) val jobOpt = statusToJobs.map {_._2} .flatten.find { jobInfo => jobInfo.jobId == jobId} jobOpt.map { job => AllJobsResource.convertJobData(job, ui.jobProgressListener, false) }.getOrElse { throw new NotFoundException("unknown job: " + jobId) } } }
Example 10
Source File: AllJobsResource.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.util.{Arrays, Date, List => JList} import javax.ws.rs._ import javax.ws.rs.core.MediaType import org.apache.spark.JobExecutionStatus import org.apache.spark.ui.SparkUI import org.apache.spark.ui.jobs.JobProgressListener import org.apache.spark.ui.jobs.UIData.JobUIData @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class AllJobsResource(ui: SparkUI) { @GET def jobsList(@QueryParam("status") statuses: JList[JobExecutionStatus]): Seq[JobData] = { val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] = AllJobsResource.getStatusToJobs(ui) val adjStatuses: JList[JobExecutionStatus] = { if (statuses.isEmpty) { Arrays.asList(JobExecutionStatus.values(): _*) } else { statuses } } val jobInfos = for { (status, jobs) <- statusToJobs job <- jobs if adjStatuses.contains(status) } yield { AllJobsResource.convertJobData(job, ui.jobProgressListener, false) } jobInfos.sortBy{- _.jobId} } } private[v1] object AllJobsResource { def getStatusToJobs(ui: SparkUI): Seq[(JobExecutionStatus, Seq[JobUIData])] = { val statusToJobs = ui.jobProgressListener.synchronized { Seq( JobExecutionStatus.RUNNING -> ui.jobProgressListener.activeJobs.values.toSeq, JobExecutionStatus.SUCCEEDED -> ui.jobProgressListener.completedJobs.toSeq, JobExecutionStatus.FAILED -> ui.jobProgressListener.failedJobs.reverse.toSeq ) } statusToJobs } def convertJobData( job: JobUIData, listener: JobProgressListener, includeStageDetails: Boolean): JobData = { listener.synchronized { val lastStageInfo = listener.stageIdToInfo.get(job.stageIds.max) val lastStageData = lastStageInfo.flatMap { s => listener.stageIdToData.get((s.stageId, s.attemptId)) } val lastStageName = lastStageInfo.map { _.name }.getOrElse("(Unknown Stage Name)") val lastStageDescription = lastStageData.flatMap { _.description } new JobData( jobId = job.jobId, name = lastStageName, description = lastStageDescription, submissionTime = job.submissionTime.map{new Date(_)}, completionTime = job.completionTime.map{new Date(_)}, stageIds = job.stageIds, jobGroup = job.jobGroup, status = job.status, numTasks = job.numTasks, numActiveTasks = job.numActiveTasks, numCompletedTasks = job.numCompletedTasks, numSkippedTasks = job.numCompletedTasks, numFailedTasks = job.numFailedTasks, numActiveStages = job.numActiveStages, numCompletedStages = job.completedStageIndices.size, numSkippedStages = job.numSkippedStages, numFailedStages = job.numFailedStages ) } } }
Example 11
Source File: UIData.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable.HashMap private[spark] object UIData { class ExecutorSummary { var taskTime : Long = 0 var failedTasks : Int = 0 var succeededTasks : Int = 0 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None, var completionTime: Option[Long] = None, var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) case class ExecutorUIData( val startTime: Long, var finishTime: Option[Long] = None, var finishReason: Option[String] = None) }
Example 12
Source File: AllJobsResource.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.util.{Arrays, Date, List => JList} import javax.ws.rs._ import javax.ws.rs.core.MediaType import org.apache.spark.JobExecutionStatus import org.apache.spark.ui.SparkUI import org.apache.spark.ui.jobs.JobProgressListener import org.apache.spark.ui.jobs.UIData.JobUIData @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class AllJobsResource(ui: SparkUI) { @GET def jobsList(@QueryParam("status") statuses: JList[JobExecutionStatus]): Seq[JobData] = { val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] = AllJobsResource.getStatusToJobs(ui) val adjStatuses: JList[JobExecutionStatus] = { if (statuses.isEmpty) { Arrays.asList(JobExecutionStatus.values(): _*) } else { statuses } } val jobInfos = for { (status, jobs) <- statusToJobs job <- jobs if adjStatuses.contains(status) } yield { AllJobsResource.convertJobData(job, ui.jobProgressListener, false) } jobInfos.sortBy{- _.jobId} } } private[v1] object AllJobsResource { def getStatusToJobs(ui: SparkUI): Seq[(JobExecutionStatus, Seq[JobUIData])] = { val statusToJobs = ui.jobProgressListener.synchronized { Seq( JobExecutionStatus.RUNNING -> ui.jobProgressListener.activeJobs.values.toSeq, JobExecutionStatus.SUCCEEDED -> ui.jobProgressListener.completedJobs.toSeq, JobExecutionStatus.FAILED -> ui.jobProgressListener.failedJobs.reverse.toSeq ) } statusToJobs } def convertJobData( job: JobUIData, listener: JobProgressListener, includeStageDetails: Boolean): JobData = { listener.synchronized { val lastStageInfo = if (job.stageIds.isEmpty) { None } else { listener.stageIdToInfo.get(job.stageIds.max) } val lastStageData = lastStageInfo.flatMap { s => listener.stageIdToData.get((s.stageId, s.attemptId)) } val lastStageName = lastStageInfo.map { _.name }.getOrElse("(Unknown Stage Name)") val lastStageDescription = lastStageData.flatMap { _.description } new JobData( jobId = job.jobId, name = lastStageName, description = lastStageDescription, submissionTime = job.submissionTime.map{new Date(_)}, completionTime = job.completionTime.map{new Date(_)}, stageIds = job.stageIds, jobGroup = job.jobGroup, status = job.status, numTasks = job.numTasks, numActiveTasks = job.numActiveTasks, numCompletedTasks = job.numCompletedTasks, numSkippedTasks = job.numSkippedTasks, numFailedTasks = job.numFailedTasks, numActiveStages = job.numActiveStages, numCompletedStages = job.completedStageIndices.size, numSkippedStages = job.numSkippedStages, numFailedStages = job.numFailedStages ) } } }
Example 13
Source File: AllJobsResource.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.util.{Arrays, Date, List => JList} import javax.ws.rs._ import javax.ws.rs.core.MediaType import org.apache.spark.JobExecutionStatus import org.apache.spark.ui.SparkUI import org.apache.spark.ui.jobs.JobProgressListener import org.apache.spark.ui.jobs.UIData.JobUIData @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class AllJobsResource(ui: SparkUI) { @GET def jobsList(@QueryParam("status") statuses: JList[JobExecutionStatus]): Seq[JobData] = { val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] = AllJobsResource.getStatusToJobs(ui) val adjStatuses: JList[JobExecutionStatus] = { if (statuses.isEmpty) { Arrays.asList(JobExecutionStatus.values(): _*) } else { statuses } } val jobInfos = for { (status, jobs) <- statusToJobs job <- jobs if adjStatuses.contains(status) } yield { AllJobsResource.convertJobData(job, ui.jobProgressListener, false) } jobInfos.sortBy{- _.jobId} } } private[v1] object AllJobsResource { def getStatusToJobs(ui: SparkUI): Seq[(JobExecutionStatus, Seq[JobUIData])] = { val statusToJobs = ui.jobProgressListener.synchronized { Seq( JobExecutionStatus.RUNNING -> ui.jobProgressListener.activeJobs.values.toSeq, JobExecutionStatus.SUCCEEDED -> ui.jobProgressListener.completedJobs.toSeq, JobExecutionStatus.FAILED -> ui.jobProgressListener.failedJobs.reverse.toSeq ) } statusToJobs } def convertJobData( job: JobUIData, listener: JobProgressListener, includeStageDetails: Boolean): JobData = { listener.synchronized { val lastStageInfo = listener.stageIdToInfo.get(job.stageIds.max) val lastStageData = lastStageInfo.flatMap { s => listener.stageIdToData.get((s.stageId, s.attemptId)) } val lastStageName = lastStageInfo.map { _.name }.getOrElse("(Unknown Stage Name)") val lastStageDescription = lastStageData.flatMap { _.description } new JobData( jobId = job.jobId, name = lastStageName, description = lastStageDescription, submissionTime = job.submissionTime.map{new Date(_)}, completionTime = job.completionTime.map{new Date(_)}, stageIds = job.stageIds, jobGroup = job.jobGroup, status = job.status, numTasks = job.numTasks, numActiveTasks = job.numActiveTasks, numCompletedTasks = job.numCompletedTasks, numSkippedTasks = job.numCompletedTasks, numFailedTasks = job.numFailedTasks, numActiveStages = job.numActiveStages, numCompletedStages = job.completedStageIndices.size, numSkippedStages = job.numSkippedStages, numFailedStages = job.numFailedStages ) } } }
Example 14
Source File: OneJobResource.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import javax.ws.rs.{GET, PathParam, Produces} import javax.ws.rs.core.MediaType import org.apache.spark.JobExecutionStatus import org.apache.spark.ui.SparkUI import org.apache.spark.ui.jobs.UIData.JobUIData @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class OneJobResource(ui: SparkUI) { @GET def oneJob(@PathParam("jobId") jobId: Int): JobData = { val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] = AllJobsResource.getStatusToJobs(ui) val jobOpt = statusToJobs.flatMap(_._2).find { jobInfo => jobInfo.jobId == jobId} jobOpt.map { job => AllJobsResource.convertJobData(job, ui.jobProgressListener, false) }.getOrElse { throw new NotFoundException("unknown job: " + jobId) } } }
Example 15
Source File: AllJobsResource.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.util.{Arrays, Date, List => JList} import javax.ws.rs._ import javax.ws.rs.core.MediaType import org.apache.spark.JobExecutionStatus import org.apache.spark.ui.SparkUI import org.apache.spark.ui.jobs.JobProgressListener import org.apache.spark.ui.jobs.UIData.JobUIData @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class AllJobsResource(ui: SparkUI) { @GET def jobsList(@QueryParam("status") statuses: JList[JobExecutionStatus]): Seq[JobData] = { val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] = AllJobsResource.getStatusToJobs(ui) val adjStatuses: JList[JobExecutionStatus] = { if (statuses.isEmpty) { Arrays.asList(JobExecutionStatus.values(): _*) } else { statuses } } val jobInfos = for { (status, jobs) <- statusToJobs job <- jobs if adjStatuses.contains(status) } yield { AllJobsResource.convertJobData(job, ui.jobProgressListener, false) } jobInfos.sortBy{- _.jobId} } } private[v1] object AllJobsResource { def getStatusToJobs(ui: SparkUI): Seq[(JobExecutionStatus, Seq[JobUIData])] = { val statusToJobs = ui.jobProgressListener.synchronized { Seq( JobExecutionStatus.RUNNING -> ui.jobProgressListener.activeJobs.values.toSeq, JobExecutionStatus.SUCCEEDED -> ui.jobProgressListener.completedJobs.toSeq, JobExecutionStatus.FAILED -> ui.jobProgressListener.failedJobs.reverse.toSeq ) } statusToJobs } def convertJobData( job: JobUIData, listener: JobProgressListener, includeStageDetails: Boolean): JobData = { listener.synchronized { val lastStageInfo = if (job.stageIds.isEmpty) { None } else { listener.stageIdToInfo.get(job.stageIds.max) } val lastStageData = lastStageInfo.flatMap { s => listener.stageIdToData.get((s.stageId, s.attemptId)) } val lastStageName = lastStageInfo.map { _.name }.getOrElse("(Unknown Stage Name)") val lastStageDescription = lastStageData.flatMap { _.description } new JobData( jobId = job.jobId, name = lastStageName, description = lastStageDescription, submissionTime = job.submissionTime.map{new Date(_)}, completionTime = job.completionTime.map{new Date(_)}, stageIds = job.stageIds, jobGroup = job.jobGroup, status = job.status, numTasks = job.numTasks, numActiveTasks = job.numActiveTasks, numCompletedTasks = job.numCompletedTasks, numSkippedTasks = job.numSkippedTasks, numFailedTasks = job.numFailedTasks, numActiveStages = job.numActiveStages, numCompletedStages = job.completedStageIndices.size, numSkippedStages = job.numSkippedStages, numFailedStages = job.numFailedStages ) } } }
Example 16
Source File: UIData.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable.HashMap private[jobs] object UIData { class ExecutorSummary { var taskTime : Long = 0 var failedTasks : Int = 0 var succeededTasks : Int = 0 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None, var completionTime: Option[Long] = None, var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) }
Example 17
Source File: OneJobResource.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import javax.ws.rs.{GET, PathParam, Produces} import javax.ws.rs.core.MediaType import org.apache.spark.JobExecutionStatus import org.apache.spark.ui.SparkUI import org.apache.spark.ui.jobs.UIData.JobUIData @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class OneJobResource(ui: SparkUI) { @GET def oneJob(@PathParam("jobId") jobId: Int): JobData = { val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] = AllJobsResource.getStatusToJobs(ui) val jobOpt = statusToJobs.flatMap(_._2).find { jobInfo => jobInfo.jobId == jobId} jobOpt.map { job => AllJobsResource.convertJobData(job, ui.jobProgressListener, false) }.getOrElse { throw new NotFoundException("unknown job: " + jobId) } } }
Example 18
Source File: AllJobsResource.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import java.util.{Arrays, Date, List => JList} import javax.ws.rs._ import javax.ws.rs.core.MediaType import org.apache.spark.JobExecutionStatus import org.apache.spark.ui.SparkUI import org.apache.spark.ui.jobs.JobProgressListener import org.apache.spark.ui.jobs.UIData.JobUIData @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class AllJobsResource(ui: SparkUI) { @GET def jobsList(@QueryParam("status") statuses: JList[JobExecutionStatus]): Seq[JobData] = { val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] = AllJobsResource.getStatusToJobs(ui) val adjStatuses: JList[JobExecutionStatus] = { if (statuses.isEmpty) { Arrays.asList(JobExecutionStatus.values(): _*) } else { statuses } } val jobInfos = for { (status, jobs) <- statusToJobs job <- jobs if adjStatuses.contains(status) } yield { AllJobsResource.convertJobData(job, ui.jobProgressListener, false) } jobInfos.sortBy{- _.jobId} } } private[v1] object AllJobsResource { def getStatusToJobs(ui: SparkUI): Seq[(JobExecutionStatus, Seq[JobUIData])] = { val statusToJobs = ui.jobProgressListener.synchronized { Seq( JobExecutionStatus.RUNNING -> ui.jobProgressListener.activeJobs.values.toSeq, JobExecutionStatus.SUCCEEDED -> ui.jobProgressListener.completedJobs.toSeq, JobExecutionStatus.FAILED -> ui.jobProgressListener.failedJobs.reverse.toSeq ) } statusToJobs } def convertJobData( job: JobUIData, listener: JobProgressListener, includeStageDetails: Boolean): JobData = { listener.synchronized { val lastStageInfo = if (job.stageIds.isEmpty) { None } else { listener.stageIdToInfo.get(job.stageIds.max) } val lastStageData = lastStageInfo.flatMap { s => listener.stageIdToData.get((s.stageId, s.attemptId)) } val lastStageName = lastStageInfo.map { _.name }.getOrElse("(Unknown Stage Name)") val lastStageDescription = lastStageData.flatMap { _.description } new JobData( jobId = job.jobId, name = lastStageName, description = lastStageDescription, submissionTime = job.submissionTime.map{new Date(_)}, completionTime = job.completionTime.map{new Date(_)}, stageIds = job.stageIds, jobGroup = job.jobGroup, status = job.status, numTasks = job.numTasks, numActiveTasks = job.numActiveTasks, numCompletedTasks = job.numCompletedTasks, numSkippedTasks = job.numSkippedTasks, numFailedTasks = job.numFailedTasks, numActiveStages = job.numActiveStages, numCompletedStages = job.completedStageIndices.size, numSkippedStages = job.numSkippedStages, numFailedStages = job.numFailedStages ) } } }
Example 19
Source File: ApplicationIdleMonitor.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.monitor.job import java.util.concurrent.{Executors, ScheduledFuture, TimeUnit} import java.util.concurrent.atomic.AtomicReference import scala.collection.JavaConverters._ import org.apache.spark.JobExecutionStatus import org.apache.spark.alarm.{AlertMessage, HtmlMessage} import org.apache.spark.monitor.{Monitor, MonitorItem} import org.apache.spark.monitor.MonitorItem.MonitorItem import org.apache.spark.scheduler.{SparkListenerEvent, SparkListenerJobEnd, SparkListenerJobStart} import org.apache.spark.status.JobDataWrapper class ApplicationIdleMonitor extends JobMonitor { override val item: MonitorItem = MonitorItem.APP_IDLE_WARNER val delayThread = Executors.newScheduledThreadPool(1) lazy val endureLimit = conf.getTimeAsMs(s"${Monitor.PREFIX}.${item.toString.toLowerCase}.timeout", "1h") private var idleTimeout: AtomicReference[ScheduledFuture[_]] = new AtomicReference() private def getActiveJobNum(): Int = { // appStore.count(classOf[JobDataWrapper], "completionTime", -1L) kvStore .view(classOf[JobDataWrapper]) .reverse() .asScala .map(_.info) .filter(_.status == JobExecutionStatus.RUNNING) .size } private def stopIdleTimeout(): Unit = { val idleTimeout = this.idleTimeout.getAndSet(null) if (idleTimeout != null) { idleTimeout.cancel(false) } } private def setupIdleTimeout(): Unit = { if (getActiveJobNum > 0) return val timeoutTask = new Runnable() { override def run(): Unit = { // scalastyle:off val driverlUrl = conf .get( "spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES") .split(",") .head val a = <h2>您的Spark应用</h2> <a href={driverlUrl}>{driverlUrl}</a> <h2>空闲已超过 {conf.get( s"${Monitor.PREFIX}.${item}.timeout", "1h")}</h2> <h2>请及时关闭</h2> val message = new HtmlMessage(title = item, content = a.mkString) alarms.foreach(_.alarm(message)) // scalastyle:on } } val timeout = delayThread .scheduleWithFixedDelay(timeoutTask, endureLimit, endureLimit, TimeUnit.MILLISECONDS) // If there's already an idle task registered, then cancel the new one. if (!this.idleTimeout.compareAndSet(null, timeout)) { timeout.cancel(false) } // If a new client connected while the idle task was being set up, then stop the task. if (getActiveJobNum > 0) stopIdleTimeout() } override def watchOut(event: SparkListenerEvent): Option[AlertMessage] = { event match { case env: SparkListenerJobStart => stopIdleTimeout Option.empty case env: SparkListenerJobEnd => setupIdleTimeout Option.empty case _ => Option.empty } } }
Example 20
Source File: SQLAppStatusStore.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.ui import java.lang.{Long => JLong} import java.util.Date import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import com.fasterxml.jackson.annotation.JsonIgnore import com.fasterxml.jackson.databind.annotation.JsonDeserialize import org.apache.spark.JobExecutionStatus import org.apache.spark.status.KVUtils.KVIndexParam import org.apache.spark.util.kvstore.{KVIndex, KVStore} class SparkPlanGraphNodeWrapper( val node: SparkPlanGraphNode, val cluster: SparkPlanGraphClusterWrapper) { def toSparkPlanGraphNode(): SparkPlanGraphNode = { assert(node == null ^ cluster == null, "One and only of of nore or cluster must be set.") if (node != null) node else cluster.toSparkPlanGraphCluster() } } case class SQLPlanMetric( name: String, accumulatorId: Long, metricType: String)
Example 21
Source File: JobProgressUtil.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.spark.utils import java.text.NumberFormat import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.protocol.engine.JobProgressInfo import org.apache.commons.lang.time.DateFormatUtils import org.apache.spark.{JobExecutionStatus, SparkContext, SparkJobInfo} object JobProgressUtil extends Logging{ def progress(sc: SparkContext, jobGroup : String):Float = { val jobIds = sc.statusTracker.getJobIdsForGroup(jobGroup) val jobs = jobIds.flatMap { id => sc.statusTracker.getJobInfo(id) } val stages = jobs.flatMap { job => job.stageIds().flatMap(sc.statusTracker.getStageInfo) } val taskCount = stages.map(_.numTasks).sum val completedTaskCount = stages.map(_.numCompletedTasks).sum if (taskCount == 0) { 0f } else { (completedTaskCount.toDouble / taskCount).toFloat } } def getActiveJobProgressInfo(sc:SparkContext,jobGroup : String):Array[JobProgressInfo] = { val jobIds = sc.statusTracker.getJobIdsForGroup(jobGroup) val activeJobs = jobIds.flatMap { id => sc.statusTracker.getJobInfo(id) }.filter(_.status() == JobExecutionStatus.RUNNING) val progressInfos = activeJobs.map { job => val jobProgressInfo = getJobProgressInfoByStages(job, sc, jobGroup) val timestamp = DateFormatUtils.format(System.currentTimeMillis, "yyyy-MM-dd HH:mm:ss") val progress = jobProgressInfo.succeedTasks * 1d / jobProgressInfo.totalTasks info(s"${jobProgressInfo.id} numTasks = ${jobProgressInfo.totalTasks}, numCompletedTasks = ${jobProgressInfo.succeedTasks}," + s" numActiveTasks = ${jobProgressInfo.runningTasks} , completed:${percentageFormat(progress)}") jobProgressInfo } progressInfos } def getCompletedJobProgressInfo(sc:SparkContext,jobGroup : String):Array[JobProgressInfo] = { val jobIds = sc.statusTracker.getJobIdsForGroup(jobGroup) val completedJobs = jobIds.flatMap { id => sc.statusTracker.getJobInfo(id) }.filter(_.status() == JobExecutionStatus.SUCCEEDED) val progressInfos = completedJobs.map { job => getJobProgressInfoByStages(job, sc, jobGroup) } progressInfos } private def getJobProgressInfoByStages(job:SparkJobInfo, sc:SparkContext, jobGroup : String) : JobProgressInfo = { val stages = job.stageIds().flatMap(sc.statusTracker.getStageInfo) var numTasks = 0 var numActiveTasks = 0 var numFailedTasks = 0 var numSucceedTasks = 0 stages.foreach{stageInfo => if (stageInfo.submissionTime() > 0){ numTasks += stageInfo.numTasks() numActiveTasks += stageInfo.numActiveTasks() numFailedTasks += stageInfo.numFailedTasks() numSucceedTasks += stageInfo.numCompletedTasks() } } JobProgressInfo(getJobId(job.jobId(), jobGroup), numTasks, numActiveTasks, numFailedTasks, numSucceedTasks) } private def getJobId( jobId : Int , jobGroup : String ): String = "jobId-" + jobId + "(" + jobGroup + ")" private var _percentFormat: NumberFormat = _ def percentageFormat(decimal: Double): String = { if(_percentFormat == null) { _percentFormat = NumberFormat.getPercentInstance _percentFormat.setMinimumFractionDigits(2) } _percentFormat.format(decimal) } }
Example 22
Source File: OneJobResource.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.status.api.v1 import javax.ws.rs.{GET, PathParam, Produces} import javax.ws.rs.core.MediaType import org.apache.spark.JobExecutionStatus import org.apache.spark.ui.SparkUI import org.apache.spark.ui.jobs.UIData.JobUIData @Produces(Array(MediaType.APPLICATION_JSON)) private[v1] class OneJobResource(ui: SparkUI) { @GET def oneJob(@PathParam("jobId") jobId: Int): JobData = { val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] = AllJobsResource.getStatusToJobs(ui) val jobOpt = statusToJobs.flatMap(_._2).find { jobInfo => jobInfo.jobId == jobId} jobOpt.map { job => AllJobsResource.convertJobData(job, ui.jobProgressListener, false) }.getOrElse { throw new NotFoundException("unknown job: " + jobId) } } }