org.apache.spark.JobExecutionStatus Scala Examples

The following examples show how to use org.apache.spark.JobExecutionStatus. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: OneJobResource.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import javax.ws.rs.{PathParam, GET, Produces}
import javax.ws.rs.core.MediaType

import org.apache.spark.JobExecutionStatus
import org.apache.spark.ui.SparkUI
import org.apache.spark.ui.jobs.UIData.JobUIData

@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class OneJobResource(ui: SparkUI) {

  @GET
  def oneJob(@PathParam("jobId") jobId: Int): JobData = {
    val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] =
      AllJobsResource.getStatusToJobs(ui)
    val jobOpt = statusToJobs.map {_._2} .flatten.find { jobInfo => jobInfo.jobId == jobId}
    jobOpt.map { job =>
      AllJobsResource.convertJobData(job, ui.jobProgressListener, false)
    }.getOrElse {
      throw new NotFoundException("unknown job: " + jobId)
    }
  }

} 
Example 2
Source File: AppKill.scala    From gimel   with Apache License 2.0 5 votes vote down vote up
package com.paypal.gimel.common.query.guard

import org.apache.spark.JobExecutionStatus
import org.apache.spark.sql.SparkSession

import com.paypal.gimel.logger.Logger

class AppKill[E](spark: SparkSession, incomingLogger: Option[Logger] = None)
    extends EventLoop[E](name = "app-kill-event-loop")
    with Consumer[E] {

  private val logger = Logger(this.getClass.getName)
  private val HEADER: String = "[APP-KILL] "

  override def consume(queryGuardEvent: E): Unit = {
    this.post(queryGuardEvent)
  }

  override protected def onReceive(event: E): Unit = {
    // kill the received job
    event match {
      case jobKill: JobKill if jobKill.jobType == "job" =>
        val jobInfo = spark.sparkContext.statusTracker.getJobInfo(jobKill.jobId)
        if (jobInfo.isDefined && JobExecutionStatus.RUNNING == jobInfo.get
              .status()) {
          logger.info(s"${HEADER}Proceeding to cancel Job: $jobKill")
          spark.sparkContext.cancelJob(jobKill.jobId, jobKill.reason)
        }
      case jobKill: JobKill if jobKill.jobType == "stage" =>
        val stageInfo =
          spark.sparkContext.statusTracker.getStageInfo(jobKill.jobId)
        if (stageInfo.isDefined && stageInfo.get.numActiveTasks() > 0) {
          logger.info(s"${HEADER}Proceeding to cancel Stage: $jobKill")
          spark.sparkContext.cancelStage(jobKill.jobId, jobKill.reason)
        }
    }
  }

  override protected def onError(e: Throwable): Unit = {
    // stop all the monitoring task
    logger.info(s"${HEADER}Proceeding to stop ${_name}")
    stop()
  }

} 
Example 3
Source File: UIData.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import org.apache.spark.JobExecutionStatus
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
import org.apache.spark.util.collection.OpenHashSet

import scala.collection.mutable
import scala.collection.mutable.HashMap

private[spark] object UIData {

  class ExecutorSummary {
    var taskTime : Long = 0
    var failedTasks : Int = 0
    var succeededTasks : Int = 0
    var inputBytes : Long = 0
    var inputRecords : Long = 0
    var outputBytes : Long = 0
    var outputRecords : Long = 0
    var shuffleRead : Long = 0
    var shuffleReadRecords : Long = 0
    var shuffleWrite : Long = 0
    var shuffleWriteRecords : Long = 0
    var memoryBytesSpilled : Long = 0
    var diskBytesSpilled : Long = 0
  }

  class JobUIData(
    var jobId: Int = -1,
    var submissionTime: Option[Long] = None,
    var completionTime: Option[Long] = None,
    var stageIds: Seq[Int] = Seq.empty,
    var jobGroup: Option[String] = None,
    var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN,
    
  case class TaskUIData(
      var taskInfo: TaskInfo,
      var taskMetrics: Option[TaskMetrics] = None,
      var errorMessage: Option[String] = None)

  case class ExecutorUIData(
      val startTime: Long,
      var finishTime: Option[Long] = None,
      var finishReason: Option[String] = None)
} 
Example 4
Source File: OneJobResource.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import javax.ws.rs.{PathParam, GET, Produces}
import javax.ws.rs.core.MediaType

import org.apache.spark.JobExecutionStatus
import org.apache.spark.ui.SparkUI
import org.apache.spark.ui.jobs.UIData.JobUIData

@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class OneJobResource(ui: SparkUI) {

  @GET
  def oneJob(@PathParam("jobId") jobId: Int): JobData = {
    val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] =
      AllJobsResource.getStatusToJobs(ui)
    val jobOpt = statusToJobs.map {_._2} .flatten.find { jobInfo => jobInfo.jobId == jobId}
    jobOpt.map { job =>
      AllJobsResource.convertJobData(job, ui.jobProgressListener, false)
    }.getOrElse {
      throw new NotFoundException("unknown job: " + jobId)
    }
  }

} 
Example 5
Source File: AllJobsResource.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.util.{Arrays, Date, List => JList}
import javax.ws.rs._
import javax.ws.rs.core.MediaType

import org.apache.spark.JobExecutionStatus
import org.apache.spark.ui.SparkUI
import org.apache.spark.ui.jobs.JobProgressListener
import org.apache.spark.ui.jobs.UIData.JobUIData

@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class AllJobsResource(ui: SparkUI) {

  @GET
  def jobsList(@QueryParam("status") statuses: JList[JobExecutionStatus]): Seq[JobData] = {
    val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] =
      AllJobsResource.getStatusToJobs(ui)
    val adjStatuses: JList[JobExecutionStatus] = {
      if (statuses.isEmpty) {
        Arrays.asList(JobExecutionStatus.values(): _*)
      } else {
        statuses
      }
    }
    val jobInfos = for {
      (status, jobs) <- statusToJobs
      job <- jobs if adjStatuses.contains(status)
    } yield {
      AllJobsResource.convertJobData(job, ui.jobProgressListener, false)
    }
    jobInfos.sortBy{- _.jobId}
  }

}

private[v1] object AllJobsResource {

  def getStatusToJobs(ui: SparkUI): Seq[(JobExecutionStatus, Seq[JobUIData])] = {
    val statusToJobs = ui.jobProgressListener.synchronized {
      Seq(
        JobExecutionStatus.RUNNING -> ui.jobProgressListener.activeJobs.values.toSeq,
        JobExecutionStatus.SUCCEEDED -> ui.jobProgressListener.completedJobs.toSeq,
        JobExecutionStatus.FAILED -> ui.jobProgressListener.failedJobs.reverse.toSeq
      )
    }
    statusToJobs
  }

  def convertJobData(
      job: JobUIData,
      listener: JobProgressListener,
      includeStageDetails: Boolean): JobData = {
    listener.synchronized {
      val lastStageInfo = listener.stageIdToInfo.get(job.stageIds.max)
      val lastStageData = lastStageInfo.flatMap { s =>
        listener.stageIdToData.get((s.stageId, s.attemptId))
      }
      val lastStageName = lastStageInfo.map { _.name }.getOrElse("(Unknown Stage Name)")
      val lastStageDescription = lastStageData.flatMap { _.description }
      new JobData(
        jobId = job.jobId,
        name = lastStageName,
        description = lastStageDescription,
        submissionTime = job.submissionTime.map{new Date(_)},
        completionTime = job.completionTime.map{new Date(_)},
        stageIds = job.stageIds,
        jobGroup = job.jobGroup,
        status = job.status,
        numTasks = job.numTasks,
        numActiveTasks = job.numActiveTasks,
        numCompletedTasks = job.numCompletedTasks,
        numSkippedTasks = job.numCompletedTasks,
        numFailedTasks = job.numFailedTasks,
        numActiveStages = job.numActiveStages,
        numCompletedStages = job.completedStageIndices.size,
        numSkippedStages = job.numSkippedStages,
        numFailedStages = job.numFailedStages
      )
    }
  }
} 
Example 6
Source File: JobsTab.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import javax.servlet.http.HttpServletRequest

import scala.collection.JavaConverters._

import org.apache.spark.JobExecutionStatus
import org.apache.spark.scheduler.SchedulingMode
import org.apache.spark.status.AppStatusStore
import org.apache.spark.ui._


private[ui] class JobsTab(parent: SparkUI, store: AppStatusStore)
  extends SparkUITab(parent, "jobs") {

  val sc = parent.sc
  val killEnabled = parent.killEnabled

  def isFairScheduler: Boolean = {
    store
      .environmentInfo()
      .sparkProperties
      .contains(("spark.scheduler.mode", SchedulingMode.FAIR.toString))
  }

  def getSparkUser: String = parent.getSparkUser

  attachPage(new AllJobsPage(this, store))
  attachPage(new JobPage(this, store))

  def handleKillRequest(request: HttpServletRequest): Unit = {
    if (killEnabled && parent.securityManager.checkModifyPermissions(request.getRemoteUser)) {
      // stripXSS is called first to remove suspicious characters used in XSS attacks
      val jobId = Option(UIUtils.stripXSS(request.getParameter("id"))).map(_.toInt)
      jobId.foreach { id =>
        store.asOption(store.job(id)).foreach { job =>
          if (job.status == JobExecutionStatus.RUNNING) {
            sc.foreach(_.cancelJob(id))
            // Do a quick pause here to give Spark time to kill the job so it shows up as
            // killed after the refresh. Note that this will block the serving thread so the
            // time should be limited in duration.
            Thread.sleep(100)
          }
        }
      }
    }
  }
} 
Example 7
Source File: SQLAppStatusStore.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.ui

import java.lang.{Long => JLong}
import java.util.Date

import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer

import com.fasterxml.jackson.annotation.JsonIgnore
import com.fasterxml.jackson.databind.annotation.JsonDeserialize

import org.apache.spark.JobExecutionStatus
import org.apache.spark.status.KVUtils.KVIndexParam
import org.apache.spark.util.kvstore.{KVIndex, KVStore}


class SparkPlanGraphNodeWrapper(
    val node: SparkPlanGraphNode,
    val cluster: SparkPlanGraphClusterWrapper) {

  def toSparkPlanGraphNode(): SparkPlanGraphNode = {
    assert(node == null ^ cluster == null, "One and only of of nore or cluster must be set.")
    if (node != null) node else cluster.toSparkPlanGraphCluster()
  }

}

case class SQLPlanMetric(
    name: String,
    accumulatorId: Long,
    metricType: String) 
Example 8
Source File: UIData.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import org.apache.spark.JobExecutionStatus
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
import org.apache.spark.util.collection.OpenHashSet

import scala.collection.mutable
import scala.collection.mutable.HashMap

private[spark] object UIData {

  class ExecutorSummary {
    var taskTime : Long = 0//任务时间
    var failedTasks : Int = 0//失败任务数
    var succeededTasks : Int = 0//完成任务数
    var inputBytes : Long = 0
    var inputRecords : Long = 0
    var outputBytes : Long = 0
    var outputRecords : Long = 0
    var shuffleRead : Long = 0
    var shuffleReadRecords : Long = 0
    var shuffleWrite : Long = 0
    var shuffleWriteRecords : Long = 0
    var memoryBytesSpilled : Long = 0
    var diskBytesSpilled : Long = 0
  }

  class JobUIData(
    var jobId: Int = -1,
    var submissionTime: Option[Long] = None,//提交时间
    var completionTime: Option[Long] = None,//完成时间
    var stageIds: Seq[Int] = Seq.empty,
    var jobGroup: Option[String] = None,
    var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN,
    
  case class TaskUIData(
      var taskInfo: TaskInfo,
      var taskMetrics: Option[TaskMetrics] = None,
      var errorMessage: Option[String] = None)

  case class ExecutorUIData(
      val startTime: Long,
      var finishTime: Option[Long] = None,
      var finishReason: Option[String] = None)
} 
Example 9
Source File: OneJobResource.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import javax.ws.rs.{PathParam, GET, Produces}
import javax.ws.rs.core.MediaType

import org.apache.spark.JobExecutionStatus
import org.apache.spark.ui.SparkUI
import org.apache.spark.ui.jobs.UIData.JobUIData
//注解@Produces用于定义方法的响应实体的数据类型,可以定义一个或多个
@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class OneJobResource(ui: SparkUI) {

  @GET
  def oneJob(@PathParam("jobId") jobId: Int): JobData = {
    val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] =
      AllJobsResource.getStatusToJobs(ui)
    val jobOpt = statusToJobs.map {_._2} .flatten.find { jobInfo => jobInfo.jobId == jobId}
    jobOpt.map { job =>
      AllJobsResource.convertJobData(job, ui.jobProgressListener, false)
    }.getOrElse {
      throw new NotFoundException("unknown job: " + jobId)
    }
  }

} 
Example 10
Source File: AllJobsResource.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.util.{Arrays, Date, List => JList}
import javax.ws.rs._
import javax.ws.rs.core.MediaType

import org.apache.spark.JobExecutionStatus
import org.apache.spark.ui.SparkUI
import org.apache.spark.ui.jobs.JobProgressListener
import org.apache.spark.ui.jobs.UIData.JobUIData

@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class AllJobsResource(ui: SparkUI) {

  @GET
  def jobsList(@QueryParam("status") statuses: JList[JobExecutionStatus]): Seq[JobData] = {
    val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] =
      AllJobsResource.getStatusToJobs(ui)
    val adjStatuses: JList[JobExecutionStatus] = {
      if (statuses.isEmpty) {
        Arrays.asList(JobExecutionStatus.values(): _*)
      } else {
        statuses
      }
    }
    val jobInfos = for {
      (status, jobs) <- statusToJobs
      job <- jobs if adjStatuses.contains(status)
    } yield {
      AllJobsResource.convertJobData(job, ui.jobProgressListener, false)
    }
    jobInfos.sortBy{- _.jobId}
  }

}

private[v1] object AllJobsResource {

  def getStatusToJobs(ui: SparkUI): Seq[(JobExecutionStatus, Seq[JobUIData])] = {
    val statusToJobs = ui.jobProgressListener.synchronized {
      Seq(
        JobExecutionStatus.RUNNING -> ui.jobProgressListener.activeJobs.values.toSeq,
        JobExecutionStatus.SUCCEEDED -> ui.jobProgressListener.completedJobs.toSeq,
        JobExecutionStatus.FAILED -> ui.jobProgressListener.failedJobs.reverse.toSeq
      )
    }
    statusToJobs
  }

  def convertJobData(
      job: JobUIData,
      listener: JobProgressListener,
      includeStageDetails: Boolean): JobData = {
    listener.synchronized {
      val lastStageInfo = listener.stageIdToInfo.get(job.stageIds.max)
      val lastStageData = lastStageInfo.flatMap { s =>
        listener.stageIdToData.get((s.stageId, s.attemptId))
      }
      val lastStageName = lastStageInfo.map { _.name }.getOrElse("(Unknown Stage Name)")
      val lastStageDescription = lastStageData.flatMap { _.description }
      new JobData(
        jobId = job.jobId,
        name = lastStageName,
        description = lastStageDescription,
        submissionTime = job.submissionTime.map{new Date(_)},
        completionTime = job.completionTime.map{new Date(_)},
        stageIds = job.stageIds,
        jobGroup = job.jobGroup,
        status = job.status,
        numTasks = job.numTasks,
        numActiveTasks = job.numActiveTasks,
        numCompletedTasks = job.numCompletedTasks,
        numSkippedTasks = job.numCompletedTasks,
        numFailedTasks = job.numFailedTasks,
        numActiveStages = job.numActiveStages,
        numCompletedStages = job.completedStageIndices.size,
        numSkippedStages = job.numSkippedStages,
        numFailedStages = job.numFailedStages
      )
    }
  }
} 
Example 11
Source File: UIData.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import org.apache.spark.JobExecutionStatus
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
import org.apache.spark.util.collection.OpenHashSet

import scala.collection.mutable.HashMap

private[spark] object UIData {

  class ExecutorSummary {
    var taskTime : Long = 0
    var failedTasks : Int = 0
    var succeededTasks : Int = 0
    var inputBytes : Long = 0
    var inputRecords : Long = 0
    var outputBytes : Long = 0
    var outputRecords : Long = 0
    var shuffleRead : Long = 0
    var shuffleReadRecords : Long = 0
    var shuffleWrite : Long = 0
    var shuffleWriteRecords : Long = 0
    var memoryBytesSpilled : Long = 0
    var diskBytesSpilled : Long = 0
  }

  class JobUIData(
    var jobId: Int = -1,
    var submissionTime: Option[Long] = None,
    var completionTime: Option[Long] = None,
    var stageIds: Seq[Int] = Seq.empty,
    var jobGroup: Option[String] = None,
    var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN,
    
  case class TaskUIData(
      var taskInfo: TaskInfo,
      var taskMetrics: Option[TaskMetrics] = None,
      var errorMessage: Option[String] = None)

  case class ExecutorUIData(
      val startTime: Long,
      var finishTime: Option[Long] = None,
      var finishReason: Option[String] = None)
} 
Example 12
Source File: AllJobsResource.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.util.{Arrays, Date, List => JList}
import javax.ws.rs._
import javax.ws.rs.core.MediaType

import org.apache.spark.JobExecutionStatus
import org.apache.spark.ui.SparkUI
import org.apache.spark.ui.jobs.JobProgressListener
import org.apache.spark.ui.jobs.UIData.JobUIData

@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class AllJobsResource(ui: SparkUI) {

  @GET
  def jobsList(@QueryParam("status") statuses: JList[JobExecutionStatus]): Seq[JobData] = {
    val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] =
      AllJobsResource.getStatusToJobs(ui)
    val adjStatuses: JList[JobExecutionStatus] = {
      if (statuses.isEmpty) {
        Arrays.asList(JobExecutionStatus.values(): _*)
      } else {
        statuses
      }
    }
    val jobInfos = for {
      (status, jobs) <- statusToJobs
      job <- jobs if adjStatuses.contains(status)
    } yield {
      AllJobsResource.convertJobData(job, ui.jobProgressListener, false)
    }
    jobInfos.sortBy{- _.jobId}
  }

}

private[v1] object AllJobsResource {

  def getStatusToJobs(ui: SparkUI): Seq[(JobExecutionStatus, Seq[JobUIData])] = {
    val statusToJobs = ui.jobProgressListener.synchronized {
      Seq(
        JobExecutionStatus.RUNNING -> ui.jobProgressListener.activeJobs.values.toSeq,
        JobExecutionStatus.SUCCEEDED -> ui.jobProgressListener.completedJobs.toSeq,
        JobExecutionStatus.FAILED -> ui.jobProgressListener.failedJobs.reverse.toSeq
      )
    }
    statusToJobs
  }

  def convertJobData(
      job: JobUIData,
      listener: JobProgressListener,
      includeStageDetails: Boolean): JobData = {
    listener.synchronized {
      val lastStageInfo =
        if (job.stageIds.isEmpty) {
          None
        } else {
          listener.stageIdToInfo.get(job.stageIds.max)
        }
      val lastStageData = lastStageInfo.flatMap { s =>
        listener.stageIdToData.get((s.stageId, s.attemptId))
      }
      val lastStageName = lastStageInfo.map { _.name }.getOrElse("(Unknown Stage Name)")
      val lastStageDescription = lastStageData.flatMap { _.description }
      new JobData(
        jobId = job.jobId,
        name = lastStageName,
        description = lastStageDescription,
        submissionTime = job.submissionTime.map{new Date(_)},
        completionTime = job.completionTime.map{new Date(_)},
        stageIds = job.stageIds,
        jobGroup = job.jobGroup,
        status = job.status,
        numTasks = job.numTasks,
        numActiveTasks = job.numActiveTasks,
        numCompletedTasks = job.numCompletedTasks,
        numSkippedTasks = job.numSkippedTasks,
        numFailedTasks = job.numFailedTasks,
        numActiveStages = job.numActiveStages,
        numCompletedStages = job.completedStageIndices.size,
        numSkippedStages = job.numSkippedStages,
        numFailedStages = job.numFailedStages
      )
    }
  }
} 
Example 13
Source File: AllJobsResource.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.util.{Arrays, Date, List => JList}
import javax.ws.rs._
import javax.ws.rs.core.MediaType

import org.apache.spark.JobExecutionStatus
import org.apache.spark.ui.SparkUI
import org.apache.spark.ui.jobs.JobProgressListener
import org.apache.spark.ui.jobs.UIData.JobUIData

@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class AllJobsResource(ui: SparkUI) {

  @GET
  def jobsList(@QueryParam("status") statuses: JList[JobExecutionStatus]): Seq[JobData] = {
    val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] =
      AllJobsResource.getStatusToJobs(ui)
    val adjStatuses: JList[JobExecutionStatus] = {
      if (statuses.isEmpty) {
        Arrays.asList(JobExecutionStatus.values(): _*)
      } else {
        statuses
      }
    }
    val jobInfos = for {
      (status, jobs) <- statusToJobs
      job <- jobs if adjStatuses.contains(status)
    } yield {
      AllJobsResource.convertJobData(job, ui.jobProgressListener, false)
    }
    jobInfos.sortBy{- _.jobId}
  }

}

private[v1] object AllJobsResource {

  def getStatusToJobs(ui: SparkUI): Seq[(JobExecutionStatus, Seq[JobUIData])] = {
    val statusToJobs = ui.jobProgressListener.synchronized {
      Seq(
        JobExecutionStatus.RUNNING -> ui.jobProgressListener.activeJobs.values.toSeq,
        JobExecutionStatus.SUCCEEDED -> ui.jobProgressListener.completedJobs.toSeq,
        JobExecutionStatus.FAILED -> ui.jobProgressListener.failedJobs.reverse.toSeq
      )
    }
    statusToJobs
  }

  def convertJobData(
      job: JobUIData,
      listener: JobProgressListener,
      includeStageDetails: Boolean): JobData = {
    listener.synchronized {
      val lastStageInfo = listener.stageIdToInfo.get(job.stageIds.max)
      val lastStageData = lastStageInfo.flatMap { s =>
        listener.stageIdToData.get((s.stageId, s.attemptId))
      }
      val lastStageName = lastStageInfo.map { _.name }.getOrElse("(Unknown Stage Name)")
      val lastStageDescription = lastStageData.flatMap { _.description }
      new JobData(
        jobId = job.jobId,
        name = lastStageName,
        description = lastStageDescription,
        submissionTime = job.submissionTime.map{new Date(_)},
        completionTime = job.completionTime.map{new Date(_)},
        stageIds = job.stageIds,
        jobGroup = job.jobGroup,
        status = job.status,
        numTasks = job.numTasks,
        numActiveTasks = job.numActiveTasks,
        numCompletedTasks = job.numCompletedTasks,
        numSkippedTasks = job.numCompletedTasks,
        numFailedTasks = job.numFailedTasks,
        numActiveStages = job.numActiveStages,
        numCompletedStages = job.completedStageIndices.size,
        numSkippedStages = job.numSkippedStages,
        numFailedStages = job.numFailedStages
      )
    }
  }
} 
Example 14
Source File: OneJobResource.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import javax.ws.rs.{GET, PathParam, Produces}
import javax.ws.rs.core.MediaType

import org.apache.spark.JobExecutionStatus
import org.apache.spark.ui.SparkUI
import org.apache.spark.ui.jobs.UIData.JobUIData

@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class OneJobResource(ui: SparkUI) {

  @GET
  def oneJob(@PathParam("jobId") jobId: Int): JobData = {
    val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] =
      AllJobsResource.getStatusToJobs(ui)
    val jobOpt = statusToJobs.flatMap(_._2).find { jobInfo => jobInfo.jobId == jobId}
    jobOpt.map { job =>
      AllJobsResource.convertJobData(job, ui.jobProgressListener, false)
    }.getOrElse {
      throw new NotFoundException("unknown job: " + jobId)
    }
  }

} 
Example 15
Source File: AllJobsResource.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.util.{Arrays, Date, List => JList}
import javax.ws.rs._
import javax.ws.rs.core.MediaType

import org.apache.spark.JobExecutionStatus
import org.apache.spark.ui.SparkUI
import org.apache.spark.ui.jobs.JobProgressListener
import org.apache.spark.ui.jobs.UIData.JobUIData

@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class AllJobsResource(ui: SparkUI) {

  @GET
  def jobsList(@QueryParam("status") statuses: JList[JobExecutionStatus]): Seq[JobData] = {
    val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] =
      AllJobsResource.getStatusToJobs(ui)
    val adjStatuses: JList[JobExecutionStatus] = {
      if (statuses.isEmpty) {
        Arrays.asList(JobExecutionStatus.values(): _*)
      } else {
        statuses
      }
    }
    val jobInfos = for {
      (status, jobs) <- statusToJobs
      job <- jobs if adjStatuses.contains(status)
    } yield {
      AllJobsResource.convertJobData(job, ui.jobProgressListener, false)
    }
    jobInfos.sortBy{- _.jobId}
  }

}

private[v1] object AllJobsResource {

  def getStatusToJobs(ui: SparkUI): Seq[(JobExecutionStatus, Seq[JobUIData])] = {
    val statusToJobs = ui.jobProgressListener.synchronized {
      Seq(
        JobExecutionStatus.RUNNING -> ui.jobProgressListener.activeJobs.values.toSeq,
        JobExecutionStatus.SUCCEEDED -> ui.jobProgressListener.completedJobs.toSeq,
        JobExecutionStatus.FAILED -> ui.jobProgressListener.failedJobs.reverse.toSeq
      )
    }
    statusToJobs
  }

  def convertJobData(
      job: JobUIData,
      listener: JobProgressListener,
      includeStageDetails: Boolean): JobData = {
    listener.synchronized {
      val lastStageInfo =
        if (job.stageIds.isEmpty) {
          None
        } else {
          listener.stageIdToInfo.get(job.stageIds.max)
        }
      val lastStageData = lastStageInfo.flatMap { s =>
        listener.stageIdToData.get((s.stageId, s.attemptId))
      }
      val lastStageName = lastStageInfo.map { _.name }.getOrElse("(Unknown Stage Name)")
      val lastStageDescription = lastStageData.flatMap { _.description }
      new JobData(
        jobId = job.jobId,
        name = lastStageName,
        description = lastStageDescription,
        submissionTime = job.submissionTime.map{new Date(_)},
        completionTime = job.completionTime.map{new Date(_)},
        stageIds = job.stageIds,
        jobGroup = job.jobGroup,
        status = job.status,
        numTasks = job.numTasks,
        numActiveTasks = job.numActiveTasks,
        numCompletedTasks = job.numCompletedTasks,
        numSkippedTasks = job.numSkippedTasks,
        numFailedTasks = job.numFailedTasks,
        numActiveStages = job.numActiveStages,
        numCompletedStages = job.completedStageIndices.size,
        numSkippedStages = job.numSkippedStages,
        numFailedStages = job.numFailedStages
      )
    }
  }
} 
Example 16
Source File: UIData.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import org.apache.spark.JobExecutionStatus
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
import org.apache.spark.util.collection.OpenHashSet

import scala.collection.mutable.HashMap

private[jobs] object UIData {

  class ExecutorSummary {
    var taskTime : Long = 0
    var failedTasks : Int = 0
    var succeededTasks : Int = 0
    var inputBytes : Long = 0
    var inputRecords : Long = 0
    var outputBytes : Long = 0
    var outputRecords : Long = 0
    var shuffleRead : Long = 0
    var shuffleReadRecords : Long = 0
    var shuffleWrite : Long = 0
    var shuffleWriteRecords : Long = 0
    var memoryBytesSpilled : Long = 0
    var diskBytesSpilled : Long = 0
  }

  class JobUIData(
    var jobId: Int = -1,
    var submissionTime: Option[Long] = None,
    var completionTime: Option[Long] = None,
    var stageIds: Seq[Int] = Seq.empty,
    var jobGroup: Option[String] = None,
    var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN,
    
  case class TaskUIData(
      var taskInfo: TaskInfo,
      var taskMetrics: Option[TaskMetrics] = None,
      var errorMessage: Option[String] = None)
} 
Example 17
Source File: OneJobResource.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import javax.ws.rs.{GET, PathParam, Produces}
import javax.ws.rs.core.MediaType

import org.apache.spark.JobExecutionStatus
import org.apache.spark.ui.SparkUI
import org.apache.spark.ui.jobs.UIData.JobUIData

@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class OneJobResource(ui: SparkUI) {

  @GET
  def oneJob(@PathParam("jobId") jobId: Int): JobData = {
    val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] =
      AllJobsResource.getStatusToJobs(ui)
    val jobOpt = statusToJobs.flatMap(_._2).find { jobInfo => jobInfo.jobId == jobId}
    jobOpt.map { job =>
      AllJobsResource.convertJobData(job, ui.jobProgressListener, false)
    }.getOrElse {
      throw new NotFoundException("unknown job: " + jobId)
    }
  }

} 
Example 18
Source File: AllJobsResource.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.util.{Arrays, Date, List => JList}
import javax.ws.rs._
import javax.ws.rs.core.MediaType

import org.apache.spark.JobExecutionStatus
import org.apache.spark.ui.SparkUI
import org.apache.spark.ui.jobs.JobProgressListener
import org.apache.spark.ui.jobs.UIData.JobUIData

@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class AllJobsResource(ui: SparkUI) {

  @GET
  def jobsList(@QueryParam("status") statuses: JList[JobExecutionStatus]): Seq[JobData] = {
    val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] =
      AllJobsResource.getStatusToJobs(ui)
    val adjStatuses: JList[JobExecutionStatus] = {
      if (statuses.isEmpty) {
        Arrays.asList(JobExecutionStatus.values(): _*)
      } else {
        statuses
      }
    }
    val jobInfos = for {
      (status, jobs) <- statusToJobs
      job <- jobs if adjStatuses.contains(status)
    } yield {
      AllJobsResource.convertJobData(job, ui.jobProgressListener, false)
    }
    jobInfos.sortBy{- _.jobId}
  }

}

private[v1] object AllJobsResource {

  def getStatusToJobs(ui: SparkUI): Seq[(JobExecutionStatus, Seq[JobUIData])] = {
    val statusToJobs = ui.jobProgressListener.synchronized {
      Seq(
        JobExecutionStatus.RUNNING -> ui.jobProgressListener.activeJobs.values.toSeq,
        JobExecutionStatus.SUCCEEDED -> ui.jobProgressListener.completedJobs.toSeq,
        JobExecutionStatus.FAILED -> ui.jobProgressListener.failedJobs.reverse.toSeq
      )
    }
    statusToJobs
  }

  def convertJobData(
      job: JobUIData,
      listener: JobProgressListener,
      includeStageDetails: Boolean): JobData = {
    listener.synchronized {
      val lastStageInfo =
        if (job.stageIds.isEmpty) {
          None
        } else {
          listener.stageIdToInfo.get(job.stageIds.max)
        }
      val lastStageData = lastStageInfo.flatMap { s =>
        listener.stageIdToData.get((s.stageId, s.attemptId))
      }
      val lastStageName = lastStageInfo.map { _.name }.getOrElse("(Unknown Stage Name)")
      val lastStageDescription = lastStageData.flatMap { _.description }
      new JobData(
        jobId = job.jobId,
        name = lastStageName,
        description = lastStageDescription,
        submissionTime = job.submissionTime.map{new Date(_)},
        completionTime = job.completionTime.map{new Date(_)},
        stageIds = job.stageIds,
        jobGroup = job.jobGroup,
        status = job.status,
        numTasks = job.numTasks,
        numActiveTasks = job.numActiveTasks,
        numCompletedTasks = job.numCompletedTasks,
        numSkippedTasks = job.numSkippedTasks,
        numFailedTasks = job.numFailedTasks,
        numActiveStages = job.numActiveStages,
        numCompletedStages = job.completedStageIndices.size,
        numSkippedStages = job.numSkippedStages,
        numFailedStages = job.numFailedStages
      )
    }
  }
} 
Example 19
Source File: ApplicationIdleMonitor.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.monitor.job

import java.util.concurrent.{Executors, ScheduledFuture, TimeUnit}
import java.util.concurrent.atomic.AtomicReference

import scala.collection.JavaConverters._

import org.apache.spark.JobExecutionStatus
import org.apache.spark.alarm.{AlertMessage, HtmlMessage}
import org.apache.spark.monitor.{Monitor, MonitorItem}
import org.apache.spark.monitor.MonitorItem.MonitorItem
import org.apache.spark.scheduler.{SparkListenerEvent, SparkListenerJobEnd, SparkListenerJobStart}
import org.apache.spark.status.JobDataWrapper

class ApplicationIdleMonitor extends JobMonitor {

  override val item: MonitorItem = MonitorItem.APP_IDLE_WARNER
  val delayThread = Executors.newScheduledThreadPool(1)
  lazy val endureLimit =
    conf.getTimeAsMs(s"${Monitor.PREFIX}.${item.toString.toLowerCase}.timeout", "1h")
  private var idleTimeout: AtomicReference[ScheduledFuture[_]] = new AtomicReference()

  private def getActiveJobNum(): Int = {
//    appStore.count(classOf[JobDataWrapper], "completionTime", -1L)
    kvStore
      .view(classOf[JobDataWrapper])
      .reverse()
      .asScala
      .map(_.info)
      .filter(_.status == JobExecutionStatus.RUNNING)
      .size
  }

  private def stopIdleTimeout(): Unit = {
    val idleTimeout = this.idleTimeout.getAndSet(null)
    if (idleTimeout != null) {
      idleTimeout.cancel(false)
    }
  }

  private def setupIdleTimeout(): Unit = {
    if (getActiveJobNum > 0) return
    val timeoutTask = new Runnable() {
      override def run(): Unit = {
        // scalastyle:off
        val driverlUrl = conf
          .get(
            "spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES")
          .split(",")
          .head
        val a = <h2>您的Spark应用</h2>
            <a href={driverlUrl}>{driverlUrl}</a>
            <h2>空闲已超过 {conf.get(
              s"${Monitor.PREFIX}.${item}.timeout", "1h")}</h2>
            <h2>请及时关闭</h2>
        val message = new HtmlMessage(title = item, content = a.mkString)
        alarms.foreach(_.alarm(message))
        // scalastyle:on
      }
    }

    val timeout = delayThread
      .scheduleWithFixedDelay(timeoutTask, endureLimit, endureLimit, TimeUnit.MILLISECONDS)
    // If there's already an idle task registered, then cancel the new one.
    if (!this.idleTimeout.compareAndSet(null, timeout)) {
      timeout.cancel(false)
    }
    // If a new client connected while the idle task was being set up, then stop the task.
    if (getActiveJobNum > 0) stopIdleTimeout()
  }

  override def watchOut(event: SparkListenerEvent): Option[AlertMessage] = {
    event match {
      case env: SparkListenerJobStart =>
        stopIdleTimeout
        Option.empty
      case env: SparkListenerJobEnd =>
        setupIdleTimeout
        Option.empty
      case _ =>
        Option.empty
    }
  }
} 
Example 20
Source File: SQLAppStatusStore.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.ui

import java.lang.{Long => JLong}
import java.util.Date

import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer

import com.fasterxml.jackson.annotation.JsonIgnore
import com.fasterxml.jackson.databind.annotation.JsonDeserialize

import org.apache.spark.JobExecutionStatus
import org.apache.spark.status.KVUtils.KVIndexParam
import org.apache.spark.util.kvstore.{KVIndex, KVStore}


class SparkPlanGraphNodeWrapper(
    val node: SparkPlanGraphNode,
    val cluster: SparkPlanGraphClusterWrapper) {

  def toSparkPlanGraphNode(): SparkPlanGraphNode = {
    assert(node == null ^ cluster == null, "One and only of of nore or cluster must be set.")
    if (node != null) node else cluster.toSparkPlanGraphCluster()
  }

}

case class SQLPlanMetric(
    name: String,
    accumulatorId: Long,
    metricType: String) 
Example 21
Source File: JobProgressUtil.scala    From Linkis   with Apache License 2.0 5 votes vote down vote up
package com.webank.wedatasphere.linkis.engine.spark.utils

import java.text.NumberFormat

import com.webank.wedatasphere.linkis.common.utils.Logging
import com.webank.wedatasphere.linkis.protocol.engine.JobProgressInfo
import org.apache.commons.lang.time.DateFormatUtils
import org.apache.spark.{JobExecutionStatus, SparkContext, SparkJobInfo}


object JobProgressUtil extends Logging{
  def progress(sc: SparkContext, jobGroup : String):Float = {
    val jobIds = sc.statusTracker.getJobIdsForGroup(jobGroup)
    val jobs = jobIds.flatMap { id => sc.statusTracker.getJobInfo(id) }
    val stages = jobs.flatMap { job =>
      job.stageIds().flatMap(sc.statusTracker.getStageInfo)
    }

    val taskCount = stages.map(_.numTasks).sum
    val completedTaskCount = stages.map(_.numCompletedTasks).sum
    if (taskCount == 0) {
      0f
    } else {
      (completedTaskCount.toDouble / taskCount).toFloat
    }
  }

  def getActiveJobProgressInfo(sc:SparkContext,jobGroup : String):Array[JobProgressInfo] = {
    val jobIds = sc.statusTracker.getJobIdsForGroup(jobGroup)
    val activeJobs = jobIds.flatMap { id => sc.statusTracker.getJobInfo(id) }.filter(_.status() == JobExecutionStatus.RUNNING)
    val progressInfos = activeJobs.map { job =>
      val jobProgressInfo = getJobProgressInfoByStages(job, sc, jobGroup)
      val timestamp = DateFormatUtils.format(System.currentTimeMillis, "yyyy-MM-dd HH:mm:ss")
      val progress = jobProgressInfo.succeedTasks * 1d /  jobProgressInfo.totalTasks
      info(s"${jobProgressInfo.id} numTasks = ${jobProgressInfo.totalTasks}, numCompletedTasks = ${jobProgressInfo.succeedTasks}," +
        s" numActiveTasks = ${jobProgressInfo.runningTasks} , completed:${percentageFormat(progress)}")
      jobProgressInfo
    }
    progressInfos
  }

  def getCompletedJobProgressInfo(sc:SparkContext,jobGroup : String):Array[JobProgressInfo] = {
    val jobIds = sc.statusTracker.getJobIdsForGroup(jobGroup)
    val completedJobs = jobIds.flatMap { id => sc.statusTracker.getJobInfo(id) }.filter(_.status() == JobExecutionStatus.SUCCEEDED)
    val progressInfos = completedJobs.map { job =>
      getJobProgressInfoByStages(job, sc, jobGroup)
    }
    progressInfos
  }

  private  def getJobProgressInfoByStages(job:SparkJobInfo, sc:SparkContext, jobGroup : String) : JobProgressInfo = {
    val stages = job.stageIds().flatMap(sc.statusTracker.getStageInfo)

    var numTasks = 0
    var numActiveTasks = 0
    var numFailedTasks = 0
    var numSucceedTasks = 0
    stages.foreach{stageInfo =>
      if (stageInfo.submissionTime() > 0){
        numTasks += stageInfo.numTasks()
        numActiveTasks += stageInfo.numActiveTasks()
        numFailedTasks += stageInfo.numFailedTasks()
        numSucceedTasks += stageInfo.numCompletedTasks()
      }
    }
    JobProgressInfo(getJobId(job.jobId(), jobGroup), numTasks, numActiveTasks, numFailedTasks, numSucceedTasks)
  }

  private def getJobId( jobId : Int , jobGroup : String ): String = "jobId-" + jobId + "(" + jobGroup + ")"

  private var _percentFormat: NumberFormat = _

  def percentageFormat(decimal: Double): String = {
    if(_percentFormat == null) {
      _percentFormat = NumberFormat.getPercentInstance
      _percentFormat.setMinimumFractionDigits(2)
    }
    _percentFormat.format(decimal)
  }
} 
Example 22
Source File: OneJobResource.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import javax.ws.rs.{GET, PathParam, Produces}
import javax.ws.rs.core.MediaType

import org.apache.spark.JobExecutionStatus
import org.apache.spark.ui.SparkUI
import org.apache.spark.ui.jobs.UIData.JobUIData

@Produces(Array(MediaType.APPLICATION_JSON))
private[v1] class OneJobResource(ui: SparkUI) {

  @GET
  def oneJob(@PathParam("jobId") jobId: Int): JobData = {
    val statusToJobs: Seq[(JobExecutionStatus, Seq[JobUIData])] =
      AllJobsResource.getStatusToJobs(ui)
    val jobOpt = statusToJobs.flatMap(_._2).find { jobInfo => jobInfo.jobId == jobId}
    jobOpt.map { job =>
      AllJobsResource.convertJobData(job, ui.jobProgressListener, false)
    }.getOrElse {
      throw new NotFoundException("unknown job: " + jobId)
    }
  }

}