org.apache.spark.scheduler.TaskInfo Scala Examples

The following examples show how to use org.apache.spark.scheduler.TaskInfo. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: AllStagesResourceSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.util.Date

import scala.collection.mutable.LinkedHashMap

import org.apache.spark.SparkFunSuite
import org.apache.spark.scheduler.{StageInfo, TaskInfo, TaskLocality}
import org.apache.spark.ui.jobs.UIData.{StageUIData, TaskUIData}

class AllStagesResourceSuite extends SparkFunSuite {

  def getFirstTaskLaunchTime(taskLaunchTimes: Seq[Long]): Option[Date] = {
    val tasks = new LinkedHashMap[Long, TaskUIData]
    taskLaunchTimes.zipWithIndex.foreach { case (time, idx) =>
      tasks(idx.toLong) = TaskUIData(
        new TaskInfo(idx, idx, 1, time, "", "", TaskLocality.ANY, false), None)
    }

    val stageUiData = new StageUIData()
    stageUiData.taskData = tasks
    val status = StageStatus.ACTIVE
    val stageInfo = new StageInfo(
      1, 1, "stage 1", 10, Seq.empty, Seq.empty, "details abc")
    val stageData = AllStagesResource.stageUiToStageData(status, stageInfo, stageUiData, false)

    stageData.firstTaskLaunchedTime
  }

  test("firstTaskLaunchedTime when there are no tasks") {
    val result = getFirstTaskLaunchTime(Seq())
    assert(result == None)
  }

  test("firstTaskLaunchedTime when there are tasks but none launched") {
    val result = getFirstTaskLaunchTime(Seq(-100L, -200L, -300L))
    assert(result == None)
  }

  test("firstTaskLaunchedTime when there are tasks and some launched") {
    val result = getFirstTaskLaunchTime(Seq(-100L, 1449255596000L, 1449255597000L))
    assert(result == Some(new Date(1449255596000L)))
  }

} 
Example 2
Source File: AllStagesResourceSuite.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.util.Date

import scala.collection.mutable.LinkedHashMap

import org.apache.spark.SparkFunSuite
import org.apache.spark.scheduler.{StageInfo, TaskInfo, TaskLocality}
import org.apache.spark.ui.jobs.UIData.{StageUIData, TaskUIData}

class AllStagesResourceSuite extends SparkFunSuite {

  def getFirstTaskLaunchTime(taskLaunchTimes: Seq[Long]): Option[Date] = {
    val tasks = new LinkedHashMap[Long, TaskUIData]
    taskLaunchTimes.zipWithIndex.foreach { case (time, idx) =>
      tasks(idx.toLong) = TaskUIData(
        new TaskInfo(idx, idx, 1, time, "", "", TaskLocality.ANY, false), None)
    }

    val stageUiData = new StageUIData()
    stageUiData.taskData = tasks
    val status = StageStatus.ACTIVE
    val stageInfo = new StageInfo(
      1, 1, "stage 1", 10, Seq.empty, Seq.empty, "details abc")
    val stageData = AllStagesResource.stageUiToStageData(status, stageInfo, stageUiData, false)

    stageData.firstTaskLaunchedTime
  }

  test("firstTaskLaunchedTime when there are no tasks") {
    val result = getFirstTaskLaunchTime(Seq())
    assert(result == None)
  }

  test("firstTaskLaunchedTime when there are tasks but none launched") {
    val result = getFirstTaskLaunchTime(Seq(-100L, -200L, -300L))
    assert(result == None)
  }

  test("firstTaskLaunchedTime when there are tasks and some launched") {
    val result = getFirstTaskLaunchTime(Seq(-100L, 1449255596000L, 1449255597000L))
    assert(result == Some(new Date(1449255596000L)))
  }

} 
Example 3
Source File: UIData.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import org.apache.spark.JobExecutionStatus
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
import org.apache.spark.util.collection.OpenHashSet

import scala.collection.mutable.HashMap

private[jobs] object UIData {

  class ExecutorSummary {
    var taskTime : Long = 0
    var failedTasks : Int = 0
    var succeededTasks : Int = 0
    var inputBytes : Long = 0
    var inputRecords : Long = 0
    var outputBytes : Long = 0
    var outputRecords : Long = 0
    var shuffleRead : Long = 0
    var shuffleReadRecords : Long = 0
    var shuffleWrite : Long = 0
    var shuffleWriteRecords : Long = 0
    var memoryBytesSpilled : Long = 0
    var diskBytesSpilled : Long = 0
  }

  class JobUIData(
    var jobId: Int = -1,
    var submissionTime: Option[Long] = None,
    var completionTime: Option[Long] = None,
    var stageIds: Seq[Int] = Seq.empty,
    var jobGroup: Option[String] = None,
    var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN,
    
  case class TaskUIData(
      var taskInfo: TaskInfo,
      var taskMetrics: Option[TaskMetrics] = None,
      var errorMessage: Option[String] = None)
} 
Example 4
Source File: AllStagesResourceSuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.status.api.v1

import java.util.Date

import scala.collection.mutable.LinkedHashMap

import org.apache.spark.SparkFunSuite
import org.apache.spark.scheduler.{StageInfo, TaskInfo, TaskLocality}
import org.apache.spark.ui.jobs.UIData.{StageUIData, TaskUIData}

class AllStagesResourceSuite extends SparkFunSuite {

  def getFirstTaskLaunchTime(taskLaunchTimes: Seq[Long]): Option[Date] = {
    val tasks = new LinkedHashMap[Long, TaskUIData]
    taskLaunchTimes.zipWithIndex.foreach { case (time, idx) =>
      tasks(idx.toLong) = TaskUIData(
        new TaskInfo(idx, idx, 1, time, "", "", TaskLocality.ANY, false), None)
    }

    val stageUiData = new StageUIData()
    stageUiData.taskData = tasks
    val status = StageStatus.ACTIVE
    val stageInfo = new StageInfo(
      1, 1, "stage 1", 10, Seq.empty, Seq.empty, "details abc")
    val stageData = AllStagesResource.stageUiToStageData(status, stageInfo, stageUiData, false)

    stageData.firstTaskLaunchedTime
  }

  test("firstTaskLaunchedTime when there are no tasks") {
    val result = getFirstTaskLaunchTime(Seq())
    assert(result == None)
  }

  test("firstTaskLaunchedTime when there are tasks but none launched") {
    val result = getFirstTaskLaunchTime(Seq(-100L, -200L, -300L))
    assert(result == None)
  }

  test("firstTaskLaunchedTime when there are tasks and some launched") {
    val result = getFirstTaskLaunchTime(Seq(-100L, 1449255596000L, 1449255597000L))
    assert(result == Some(new Date(1449255596000L)))
  }

} 
Example 5
Source File: HostTimeSpan.scala    From sparklens   with Apache License 2.0 5 votes vote down vote up
package com.qubole.sparklens.timespan

import com.qubole.sparklens.common.AggregateMetrics
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.scheduler.TaskInfo
import org.json4s.DefaultFormats
import org.json4s.JsonAST.JValue

import scala.collection.mutable


class HostTimeSpan(val hostID: String) extends TimeSpan {
  var hostMetrics = new AggregateMetrics()


  override def duration():Option[Long] = {
    Some(super.duration().getOrElse(System.currentTimeMillis() - startTime))
  }

  def updateAggregateTaskMetrics (taskMetrics: TaskMetrics, taskInfo: TaskInfo): Unit = {
    hostMetrics.update(taskMetrics, taskInfo)
  }
  override def getMap(): Map[String, _ <: Any] = {
    implicit val formats = DefaultFormats
    Map("hostID" -> hostID, "hostMetrics" -> hostMetrics.getMap) ++ super.getStartEndTime()
  }

}

object HostTimeSpan {
  def getTimeSpan(json: Map[String, JValue]): mutable.HashMap[String, HostTimeSpan] = {
    implicit val formats = DefaultFormats
    val map = new mutable.HashMap[String, HostTimeSpan]

    json.keys.map(key => {
      val value = json.get(key).get
      val timeSpan = new HostTimeSpan((value \ "hostID").extract[String])
      timeSpan.hostMetrics = AggregateMetrics.getAggregateMetrics((value \ "hostMetrics")
        .extract[JValue])
      timeSpan.addStartEnd(value)
      map.put(key, timeSpan)
    })

    map
  }
} 
Example 6
Source File: JobTimeSpan.scala    From sparklens   with Apache License 2.0 5 votes vote down vote up
package com.qubole.sparklens.timespan

import com.qubole.sparklens.common.{AggregateMetrics, AppContext}
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.scheduler.TaskInfo
import org.json4s.DefaultFormats
import org.json4s.JsonAST.JValue

import scala.collection.{immutable, mutable}



  private def criticalTime(stageID: Int, data: mutable.HashMap[Int, (Seq[Int], Long)]): Long = {
    //Provide 0 value for
    val stageData = data.getOrElse(stageID, (List.empty[Int], 0L))
    stageData._2 + {
      if (stageData._1.size == 0) {
        0L
      }else {
        stageData._1.map(x => criticalTime(x, data)).max
      }
    }
  }

  override def getMap(): Map[String, _ <: Any] = {
    implicit val formats = DefaultFormats

    Map(
      "jobID" -> jobID,
      "jobMetrics" -> jobMetrics.getMap,
      "stageMap" -> AppContext.getMap(stageMap)) ++ super.getStartEndTime()
  }
}

object JobTimeSpan {
  def getTimeSpan(json: Map[String, JValue]): mutable.HashMap[Long, JobTimeSpan] = {
    implicit val formats = DefaultFormats
    val map = new mutable.HashMap[Long, JobTimeSpan]

    json.keys.map(key => {
      val value = json.get(key).get.extract[JValue]
      val timeSpan = new JobTimeSpan((value \ "jobID").extract[Long])

      timeSpan.jobMetrics = AggregateMetrics.getAggregateMetrics((value \ "jobMetrics")
              .extract[JValue])
      timeSpan.stageMap = StageTimeSpan.getTimeSpan((value \ "stageMap").extract[
        immutable.Map[String, JValue]])
      timeSpan.addStartEnd(value)
      map.put(key.toLong, timeSpan)

    })
    map
  }
} 
Example 7
Source File: ExecutorTimeSpan.scala    From sparklens   with Apache License 2.0 5 votes vote down vote up
package com.qubole.sparklens.timespan

import com.qubole.sparklens.common.AggregateMetrics
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.scheduler.TaskInfo
import org.json4s.DefaultFormats
import org.json4s.JsonAST.JValue

import scala.collection.mutable

class ExecutorTimeSpan(val executorID: String,
                       val hostID: String,
                       val cores: Int) extends TimeSpan {
  var executorMetrics = new AggregateMetrics()

  def updateAggregateTaskMetrics (taskMetrics: TaskMetrics, taskInfo: TaskInfo): Unit = {
    executorMetrics.update(taskMetrics, taskInfo)
  }

  override def getMap(): Map[String, _ <: Any] = {
    implicit val formats = DefaultFormats

    Map("executorID" -> executorID, "hostID" -> hostID, "cores" -> cores, "executorMetrics" ->
      executorMetrics.getMap()) ++ super.getStartEndTime()
  }
}

object ExecutorTimeSpan {
  def getTimeSpan(json: Map[String, JValue]): mutable.HashMap[String, ExecutorTimeSpan] = {

    implicit val formats = DefaultFormats
    val map = new mutable.HashMap[String, ExecutorTimeSpan]

    json.keys.map(key => {
      val value = json.get(key).get
      val timeSpan = new ExecutorTimeSpan(
        (value \ "executorID").extract[String],
        (value \ "hostID").extract[String],
        (value \ "cores").extract[Int]
      )
      timeSpan.executorMetrics = AggregateMetrics.getAggregateMetrics((value
              \ "executorMetrics").extract[JValue])
      timeSpan.addStartEnd(value)
      map.put(key, timeSpan)
    })
    map
  }
} 
Example 8
Source File: UIData.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import org.apache.spark.JobExecutionStatus
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
import org.apache.spark.util.collection.OpenHashSet

import scala.collection.mutable.HashMap

private[spark] object UIData {

  class ExecutorSummary {
    var taskTime : Long = 0
    var failedTasks : Int = 0
    var succeededTasks : Int = 0
    var inputBytes : Long = 0
    var inputRecords : Long = 0
    var outputBytes : Long = 0
    var outputRecords : Long = 0
    var shuffleRead : Long = 0
    var shuffleReadRecords : Long = 0
    var shuffleWrite : Long = 0
    var shuffleWriteRecords : Long = 0
    var memoryBytesSpilled : Long = 0
    var diskBytesSpilled : Long = 0
  }

  class JobUIData(
    var jobId: Int = -1,
    var submissionTime: Option[Long] = None,
    var completionTime: Option[Long] = None,
    var stageIds: Seq[Int] = Seq.empty,
    var jobGroup: Option[String] = None,
    var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN,
    
  case class TaskUIData(
      var taskInfo: TaskInfo,
      var taskMetrics: Option[TaskMetrics] = None,
      var errorMessage: Option[String] = None)

  case class ExecutorUIData(
      val startTime: Long,
      var finishTime: Option[Long] = None,
      var finishReason: Option[String] = None)
} 
Example 9
Source File: UIData.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import org.apache.spark.JobExecutionStatus
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
import org.apache.spark.util.collection.OpenHashSet

import scala.collection.mutable
import scala.collection.mutable.HashMap

private[spark] object UIData {

  class ExecutorSummary {
    var taskTime : Long = 0//任务时间
    var failedTasks : Int = 0//失败任务数
    var succeededTasks : Int = 0//完成任务数
    var inputBytes : Long = 0
    var inputRecords : Long = 0
    var outputBytes : Long = 0
    var outputRecords : Long = 0
    var shuffleRead : Long = 0
    var shuffleReadRecords : Long = 0
    var shuffleWrite : Long = 0
    var shuffleWriteRecords : Long = 0
    var memoryBytesSpilled : Long = 0
    var diskBytesSpilled : Long = 0
  }

  class JobUIData(
    var jobId: Int = -1,
    var submissionTime: Option[Long] = None,//提交时间
    var completionTime: Option[Long] = None,//完成时间
    var stageIds: Seq[Int] = Seq.empty,
    var jobGroup: Option[String] = None,
    var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN,
    
  case class TaskUIData(
      var taskInfo: TaskInfo,
      var taskMetrics: Option[TaskMetrics] = None,
      var errorMessage: Option[String] = None)

  case class ExecutorUIData(
      val startTime: Long,
      var finishTime: Option[Long] = None,
      var finishReason: Option[String] = None)
} 
Example 10
Source File: UIData.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ui.jobs

import org.apache.spark.JobExecutionStatus
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo}
import org.apache.spark.util.collection.OpenHashSet

import scala.collection.mutable
import scala.collection.mutable.HashMap

private[spark] object UIData {

  class ExecutorSummary {
    var taskTime : Long = 0
    var failedTasks : Int = 0
    var succeededTasks : Int = 0
    var inputBytes : Long = 0
    var inputRecords : Long = 0
    var outputBytes : Long = 0
    var outputRecords : Long = 0
    var shuffleRead : Long = 0
    var shuffleReadRecords : Long = 0
    var shuffleWrite : Long = 0
    var shuffleWriteRecords : Long = 0
    var memoryBytesSpilled : Long = 0
    var diskBytesSpilled : Long = 0
  }

  class JobUIData(
    var jobId: Int = -1,
    var submissionTime: Option[Long] = None,
    var completionTime: Option[Long] = None,
    var stageIds: Seq[Int] = Seq.empty,
    var jobGroup: Option[String] = None,
    var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN,
    
  case class TaskUIData(
      var taskInfo: TaskInfo,
      var taskMetrics: Option[TaskMetrics] = None,
      var errorMessage: Option[String] = None)

  case class ExecutorUIData(
      val startTime: Long,
      var finishTime: Option[Long] = None,
      var finishReason: Option[String] = None)
}