org.apache.spark.executor.TaskMetrics Scala Examples
The following examples show how to use org.apache.spark.executor.TaskMetrics.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: TaskResult.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io._ import java.nio.ByteBuffer import scala.collection.Map import scala.collection.mutable import org.apache.spark.SparkEnv import org.apache.spark.executor.TaskMetrics import org.apache.spark.storage.BlockId import org.apache.spark.util.Utils // Task result. Also contains updates to accumulator variables. //任务结果,还包含累加器变量的更新, private[spark] sealed trait TaskResult[T] def value(): T = { if (valueObjectDeserialized) { valueObject } else { // This should not run when holding a lock because it may cost dozens of seconds for a large // value. //这不应该在持有锁时运行,因为它可能花费数十秒钟值 val resultSer = SparkEnv.get.serializer.newInstance() valueObject = resultSer.deserialize(valueBytes) valueObjectDeserialized = true valueObject } } }
Example 2
Source File: StagePageSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ui import javax.servlet.http.HttpServletRequest import scala.xml.Node import org.mockito.Mockito.{RETURNS_SMART_NULLS, mock, when} import org.apache.spark._ import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler._ import org.apache.spark.storage.StorageStatusListener import org.apache.spark.ui.exec.ExecutorsListener import org.apache.spark.ui.jobs.{JobProgressListener, StagePage, StagesTab} import org.apache.spark.ui.scope.RDDOperationGraphListener import org.apache.spark.util.Utils class StagePageSuite extends SparkFunSuite with LocalSparkContext { private val peakExecutionMemory = 10 test("peak execution memory should displayed") { val conf = new SparkConf(false) val html = renderStagePage(conf).toString().toLowerCase val targetString = "peak execution memory" assert(html.contains(targetString)) } test("SPARK-10543: peak execution memory should be per-task rather than cumulative") { val conf = new SparkConf(false) val html = renderStagePage(conf).toString().toLowerCase // verify min/25/50/75/max show task value not cumulative values assert(html.contains(s"<td>$peakExecutionMemory.0 b</td>" * 5)) } private def renderStagePage(conf: SparkConf): Seq[Node] = { val jobListener = new JobProgressListener(conf, Utils.getCurrentUserName()) val graphListener = new RDDOperationGraphListener(conf) val executorsListener = new ExecutorsListener(new StorageStatusListener(conf), conf) val tab = mock(classOf[StagesTab], RETURNS_SMART_NULLS) val request = mock(classOf[HttpServletRequest]) when(tab.conf).thenReturn(conf) when(tab.progressListener).thenReturn(jobListener) when(tab.operationGraphListener).thenReturn(graphListener) when(tab.executorsListener).thenReturn(executorsListener) when(tab.appName).thenReturn("testing") when(tab.headerTabs).thenReturn(Seq.empty) when(request.getParameter("id")).thenReturn("0") when(request.getParameter("attempt")).thenReturn("0") val page = new StagePage(tab) // Simulate a stage in job progress listener val stageInfo = new StageInfo(0, 0, "dummy", 1, Seq.empty, Seq.empty, "details") // Simulate two tasks to test PEAK_EXECUTION_MEMORY correctness (1 to 2).foreach { taskId => val taskInfo = new TaskInfo(taskId, taskId, 0, 0, "0", "localhost", TaskLocality.ANY, false) jobListener.onStageSubmitted(SparkListenerStageSubmitted(stageInfo)) jobListener.onTaskStart(SparkListenerTaskStart(0, 0, taskInfo)) taskInfo.markFinished(TaskState.FINISHED) val taskMetrics = TaskMetrics.empty taskMetrics.incPeakExecutionMemory(peakExecutionMemory) jobListener.onTaskEnd( SparkListenerTaskEnd(0, 0, "result", Success, taskInfo, taskMetrics)) } jobListener.onStageCompleted(SparkListenerStageCompleted(stageInfo)) page.render(request) } }
Example 3
Source File: HostTimeSpan.scala From sparklens with Apache License 2.0 | 5 votes |
package com.qubole.sparklens.timespan import com.qubole.sparklens.common.AggregateMetrics import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.TaskInfo import org.json4s.DefaultFormats import org.json4s.JsonAST.JValue import scala.collection.mutable class HostTimeSpan(val hostID: String) extends TimeSpan { var hostMetrics = new AggregateMetrics() override def duration():Option[Long] = { Some(super.duration().getOrElse(System.currentTimeMillis() - startTime)) } def updateAggregateTaskMetrics (taskMetrics: TaskMetrics, taskInfo: TaskInfo): Unit = { hostMetrics.update(taskMetrics, taskInfo) } override def getMap(): Map[String, _ <: Any] = { implicit val formats = DefaultFormats Map("hostID" -> hostID, "hostMetrics" -> hostMetrics.getMap) ++ super.getStartEndTime() } } object HostTimeSpan { def getTimeSpan(json: Map[String, JValue]): mutable.HashMap[String, HostTimeSpan] = { implicit val formats = DefaultFormats val map = new mutable.HashMap[String, HostTimeSpan] json.keys.map(key => { val value = json.get(key).get val timeSpan = new HostTimeSpan((value \ "hostID").extract[String]) timeSpan.hostMetrics = AggregateMetrics.getAggregateMetrics((value \ "hostMetrics") .extract[JValue]) timeSpan.addStartEnd(value) map.put(key, timeSpan) }) map } }
Example 4
Source File: JobTimeSpan.scala From sparklens with Apache License 2.0 | 5 votes |
package com.qubole.sparklens.timespan import com.qubole.sparklens.common.{AggregateMetrics, AppContext} import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.TaskInfo import org.json4s.DefaultFormats import org.json4s.JsonAST.JValue import scala.collection.{immutable, mutable} private def criticalTime(stageID: Int, data: mutable.HashMap[Int, (Seq[Int], Long)]): Long = { //Provide 0 value for val stageData = data.getOrElse(stageID, (List.empty[Int], 0L)) stageData._2 + { if (stageData._1.size == 0) { 0L }else { stageData._1.map(x => criticalTime(x, data)).max } } } override def getMap(): Map[String, _ <: Any] = { implicit val formats = DefaultFormats Map( "jobID" -> jobID, "jobMetrics" -> jobMetrics.getMap, "stageMap" -> AppContext.getMap(stageMap)) ++ super.getStartEndTime() } } object JobTimeSpan { def getTimeSpan(json: Map[String, JValue]): mutable.HashMap[Long, JobTimeSpan] = { implicit val formats = DefaultFormats val map = new mutable.HashMap[Long, JobTimeSpan] json.keys.map(key => { val value = json.get(key).get.extract[JValue] val timeSpan = new JobTimeSpan((value \ "jobID").extract[Long]) timeSpan.jobMetrics = AggregateMetrics.getAggregateMetrics((value \ "jobMetrics") .extract[JValue]) timeSpan.stageMap = StageTimeSpan.getTimeSpan((value \ "stageMap").extract[ immutable.Map[String, JValue]]) timeSpan.addStartEnd(value) map.put(key.toLong, timeSpan) }) map } }
Example 5
Source File: ExecutorTimeSpan.scala From sparklens with Apache License 2.0 | 5 votes |
package com.qubole.sparklens.timespan import com.qubole.sparklens.common.AggregateMetrics import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.TaskInfo import org.json4s.DefaultFormats import org.json4s.JsonAST.JValue import scala.collection.mutable class ExecutorTimeSpan(val executorID: String, val hostID: String, val cores: Int) extends TimeSpan { var executorMetrics = new AggregateMetrics() def updateAggregateTaskMetrics (taskMetrics: TaskMetrics, taskInfo: TaskInfo): Unit = { executorMetrics.update(taskMetrics, taskInfo) } override def getMap(): Map[String, _ <: Any] = { implicit val formats = DefaultFormats Map("executorID" -> executorID, "hostID" -> hostID, "cores" -> cores, "executorMetrics" -> executorMetrics.getMap()) ++ super.getStartEndTime() } } object ExecutorTimeSpan { def getTimeSpan(json: Map[String, JValue]): mutable.HashMap[String, ExecutorTimeSpan] = { implicit val formats = DefaultFormats val map = new mutable.HashMap[String, ExecutorTimeSpan] json.keys.map(key => { val value = json.get(key).get val timeSpan = new ExecutorTimeSpan( (value \ "executorID").extract[String], (value \ "hostID").extract[String], (value \ "cores").extract[Int] ) timeSpan.executorMetrics = AggregateMetrics.getAggregateMetrics((value \ "executorMetrics").extract[JValue]) timeSpan.addStartEnd(value) map.put(key, timeSpan) }) map } }
Example 6
Source File: UIData.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable.HashMap private[spark] object UIData { class ExecutorSummary { var taskTime : Long = 0 var failedTasks : Int = 0 var succeededTasks : Int = 0 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None, var completionTime: Option[Long] = None, var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) case class ExecutorUIData( val startTime: Long, var finishTime: Option[Long] = None, var finishReason: Option[String] = None) }
Example 7
Source File: TaskResult.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io._ import java.nio.ByteBuffer import scala.collection.mutable.Map import org.apache.spark.SparkEnv import org.apache.spark.executor.TaskMetrics import org.apache.spark.storage.BlockId import org.apache.spark.util.Utils // Task result. Also contains updates to accumulator variables. private[spark] sealed trait TaskResult[T] def value(): T = { if (valueObjectDeserialized) { valueObject } else { // This should not run when holding a lock because it may cost dozens of seconds for a large // value. val resultSer = SparkEnv.get.serializer.newInstance() valueObject = resultSer.deserialize(valueBytes) valueObjectDeserialized = true valueObject } } }
Example 8
Source File: TaskContextImpl.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark import org.apache.spark.executor.TaskMetrics import org.apache.spark.unsafe.memory.TaskMemoryManager import org.apache.spark.util.{TaskCompletionListener, TaskCompletionListenerException} import scala.collection.mutable.ArrayBuffer private[spark] class TaskContextImpl( val stageId: Int, val partitionId: Int, override val taskAttemptId: Long, override val attemptNumber: Int, override val taskMemoryManager: TaskMemoryManager, val runningLocally: Boolean = false, val taskMetrics: TaskMetrics = TaskMetrics.empty) extends TaskContext with Logging { // For backwards-compatibility; this method is now deprecated as of 1.3.0. override def attemptId(): Long = taskAttemptId // List of callback functions to execute when the task completes. @transient private val onCompleteCallbacks = new ArrayBuffer[TaskCompletionListener] // Whether the corresponding task has been killed. @volatile private var interrupted: Boolean = false // Whether the task has completed. @volatile private var completed: Boolean = false override def addTaskCompletionListener(listener: TaskCompletionListener): this.type = { onCompleteCallbacks += listener this } override def addTaskCompletionListener(f: TaskContext => Unit): this.type = { onCompleteCallbacks += new TaskCompletionListener { override def onTaskCompletion(context: TaskContext): Unit = f(context) } this } @deprecated("use addTaskCompletionListener", "1.1.0") override def addOnCompleteCallback(f: () => Unit) { onCompleteCallbacks += new TaskCompletionListener { override def onTaskCompletion(context: TaskContext): Unit = f() } } private[spark] def markInterrupted(): Unit = { interrupted = true } override def isCompleted(): Boolean = completed override def isRunningLocally(): Boolean = runningLocally override def isInterrupted(): Boolean = interrupted }
Example 9
Source File: HeartbeatReceiverSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark import scala.concurrent.duration._ import scala.language.postfixOps import org.apache.spark.executor.TaskMetrics import org.apache.spark.storage.BlockManagerId import org.mockito.Mockito.{mock, spy, verify, when} import org.mockito.Matchers import org.mockito.Matchers._ import org.apache.spark.scheduler.TaskScheduler import org.apache.spark.util.RpcUtils import org.scalatest.concurrent.Eventually._ class HeartbeatReceiverSuite extends SparkFunSuite with LocalSparkContext { test("HeartbeatReceiver") { sc = spy(new SparkContext("local[2]", "test")) val scheduler = mock(classOf[TaskScheduler]) when(scheduler.executorHeartbeatReceived(any(), any(), any())).thenReturn(true) when(sc.taskScheduler).thenReturn(scheduler) val heartbeatReceiver = new HeartbeatReceiver(sc) sc.env.rpcEnv.setupEndpoint("heartbeat", heartbeatReceiver).send(TaskSchedulerIsSet) eventually(timeout(5 seconds), interval(5 millis)) { assert(heartbeatReceiver.scheduler != null) } val receiverRef = RpcUtils.makeDriverRef("heartbeat", sc.conf, sc.env.rpcEnv) val metrics = new TaskMetrics val blockManagerId = BlockManagerId("executor-1", "localhost", 12345) val response = receiverRef.askWithRetry[HeartbeatResponse]( Heartbeat("executor-1", Array(1L -> metrics), blockManagerId)) verify(scheduler).executorHeartbeatReceived( Matchers.eq("executor-1"), Matchers.eq(Array(1L -> metrics)), Matchers.eq(blockManagerId)) assert(false === response.reregisterBlockManager) } test("HeartbeatReceiver re-register") { sc = spy(new SparkContext("local[2]", "test")) val scheduler = mock(classOf[TaskScheduler]) when(scheduler.executorHeartbeatReceived(any(), any(), any())).thenReturn(false) when(sc.taskScheduler).thenReturn(scheduler) val heartbeatReceiver = new HeartbeatReceiver(sc) sc.env.rpcEnv.setupEndpoint("heartbeat", heartbeatReceiver).send(TaskSchedulerIsSet) eventually(timeout(5 seconds), interval(5 millis)) { assert(heartbeatReceiver.scheduler != null) } val receiverRef = RpcUtils.makeDriverRef("heartbeat", sc.conf, sc.env.rpcEnv) val metrics = new TaskMetrics val blockManagerId = BlockManagerId("executor-1", "localhost", 12345) val response = receiverRef.askWithRetry[HeartbeatResponse]( Heartbeat("executor-1", Array(1L -> metrics), blockManagerId)) verify(scheduler).executorHeartbeatReceived( Matchers.eq("executor-1"), Matchers.eq(Array(1L -> metrics)), Matchers.eq(blockManagerId)) assert(true === response.reregisterBlockManager) } }
Example 10
Source File: UIData.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable import scala.collection.mutable.HashMap private[spark] object UIData { class ExecutorSummary { var taskTime : Long = 0//任务时间 var failedTasks : Int = 0//失败任务数 var succeededTasks : Int = 0//完成任务数 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None,//提交时间 var completionTime: Option[Long] = None,//完成时间 var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) case class ExecutorUIData( val startTime: Long, var finishTime: Option[Long] = None, var finishReason: Option[String] = None) }
Example 11
Source File: TaskContextImpl.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark import java.util.Properties import scala.collection.mutable.ArrayBuffer import org.apache.spark.executor.TaskMetrics import org.apache.spark.internal.Logging import org.apache.spark.memory.TaskMemoryManager import org.apache.spark.metrics.MetricsSystem import org.apache.spark.metrics.source.Source import org.apache.spark.util._ private[spark] class TaskContextImpl( val stageId: Int, val partitionId: Int, override val taskAttemptId: Long, override val attemptNumber: Int, override val taskMemoryManager: TaskMemoryManager, localProperties: Properties, @transient private val metricsSystem: MetricsSystem, // The default value is only used in tests. override val taskMetrics: TaskMetrics = TaskMetrics.empty) extends TaskContext with Logging { private[spark] def markInterrupted(): Unit = { interrupted = true } override def isCompleted(): Boolean = completed override def isRunningLocally(): Boolean = false override def isInterrupted(): Boolean = interrupted override def getLocalProperty(key: String): String = localProperties.getProperty(key) override def getMetricsSources(sourceName: String): Seq[Source] = metricsSystem.getSourcesByName(sourceName) private[spark] override def registerAccumulator(a: AccumulatorV2[_, _]): Unit = { taskMetrics.registerAccumulator(a) } }
Example 12
Source File: CacheManagerSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark import org.mockito.Mockito._ import org.scalatest.BeforeAndAfter import org.scalatest.mock.MockitoSugar import org.apache.spark.executor.{DataReadMethod, TaskMetrics} import org.apache.spark.rdd.RDD import org.apache.spark.storage._ // TODO: Test the CacheManager's thread-safety aspects(测试线程安全方面) class CacheManagerSuite extends SparkFunSuite with LocalSparkContext with BeforeAndAfter with MockitoSugar { var blockManager: BlockManager = _ var cacheManager: CacheManager = _ var split: Partition = _ var rdd: RDD[Int] = _ var rdd2: RDD[Int] = _ var rdd3: RDD[Int] = _ before { sc = new SparkContext("local", "test") blockManager = mock[BlockManager]//模拟BlockManager //引用BlockManager cacheManager = new CacheManager(blockManager) split = new Partition { override def index: Int = 0 } rdd = new RDD[Int](sc, Nil) { override def getPartitions: Array[Partition] = Array(split) override val getDependencies = List[Dependency[_]]()//获得依赖关系 override def compute(split: Partition, context: TaskContext): Iterator[Int] ={ //println(split.index+"=="+context.taskMetrics().hostname); Array(1, 2, 3, 4).iterator//计算 } } rdd2 = new RDD[Int](sc, List(new OneToOneDependency(rdd))) {//依赖RDD override def getPartitions: Array[Partition] = firstParent[Int].partitions override def compute(split: Partition, context: TaskContext): Iterator[Int] = firstParent[Int].iterator(split, context) }.cache()//缓存 rdd3 = new RDD[Int](sc, List(new OneToOneDependency(rdd2))) {//依赖RDD1 override def getPartitions: Array[Partition] = firstParent[Int].partitions override def compute(split: Partition, context: TaskContext): Iterator[Int] = firstParent[Int].iterator(split, context) }.cache()//缓存 } test("get uncached rdd") {//得到未缓存的RDD // Do not mock this test, because attempting to match Array[Any], which is not covariant, // in blockManager.put is a losing battle(可能失败). You have been warned. //不要模拟这个测试,因为试图匹配数组[任何],这不是协变的,blockManager插入可能失败,你被警告了 blockManager = sc.env.blockManager cacheManager = sc.env.cacheManager val context = TaskContext.empty() val computeValue = cacheManager.getOrCompute(rdd, split, context, StorageLevel.MEMORY_ONLY) val getValue = blockManager.get(RDDBlockId(rdd.id, split.index)) assert(computeValue.toList === List(1, 2, 3, 4))//获得计算值 //getValue BlockResult //如果false,则块缓存从getorcompute没有被发现 assert(getValue.isDefined, "Block cached from getOrCompute is not found!") assert(getValue.get.data.toList === List(1, 2, 3, 4)) } test("get cached rdd") {//得到缓存的RDD val result = new BlockResult(Array(5, 6, 7).iterator, DataReadMethod.Memory, 12) when(blockManager.get(RDDBlockId(0, 0))).thenReturn(Some(result))//然后返回 val context = TaskContext.empty() val getValue = blockManager.get(RDDBlockId(rdd.id, split.index)) println(split.index+"==rddId=="+rdd.id+"==="+getValue.get) val value = cacheManager.getOrCompute(rdd, split, context, StorageLevel.MEMORY_ONLY) assert(value.toList === List(5, 6, 7)) } test("get uncached local rdd") {//得到未被缓存的本地RDD // Local computation should not persist the resulting value, so don't expect a put(). //本地计算产生的值不持久化,所以不期望一个插入 when(blockManager.get(RDDBlockId(0, 0))).thenReturn(None)//然后返回 val context = new TaskContextImpl(0, 0, 0, 0, null, null, Seq.empty, runningLocally = true) val value = cacheManager.getOrCompute(rdd, split, context, StorageLevel.MEMORY_ONLY) assert(value.toList === List(1, 2, 3, 4)) } test("verify task metrics updated correctly") {//验证任务度量的正确更新 cacheManager = sc.env.cacheManager val context = TaskContext.empty() cacheManager.getOrCompute(rdd3, split, context, StorageLevel.MEMORY_ONLY) assert(context.taskMetrics.updatedBlocks.getOrElse(Seq()).size === 2) } }
Example 13
Source File: StagePageSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.ui import javax.servlet.http.HttpServletRequest import scala.xml.Node import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS} import org.apache.spark._ import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler._ import org.apache.spark.ui.jobs.{JobProgressListener, StagePage, StagesTab} import org.apache.spark.ui.scope.RDDOperationGraphListener class StagePageSuite extends SparkFunSuite with LocalSparkContext { //仅在启用不安全时才显示执行内存值 test("peak execution memory only displayed if unsafe is enabled") { val unsafeConf = "spark.sql.unsafe.enabled" val conf = new SparkConf(false).set(unsafeConf, "true") val html = renderStagePage(conf).toString().toLowerCase println("===="+html) val targetString = "peak execution memory" assert(html.contains(targetString)) // Disable unsafe and make sure it's not there //禁用不安全的,并确保它不在那里 val conf2 = new SparkConf(false).set(unsafeConf, "false") val html2 = renderStagePage(conf2).toString().toLowerCase assert(!html2.contains(targetString)) // Avoid setting anything; it should be displayed by default //避免设置任何东西,它应该默认显示 val conf3 = new SparkConf(false) val html3 = renderStagePage(conf3).toString().toLowerCase assert(html3.contains(targetString)) } test("SPARK-10543: peak execution memory should be per-task rather than cumulative") { val unsafeConf = "spark.sql.unsafe.enabled" val conf = new SparkConf(false).set(unsafeConf, "true") val html = renderStagePage(conf).toString().toLowerCase // verify min/25/50/75/max show task value not cumulative values //验证min / 25/50/75 / max显示任务值不是累积值 assert(html.contains("<td>10.0 b</td>" * 5)) } private def renderStagePage(conf: SparkConf): Seq[Node] = { val jobListener = new JobProgressListener(conf) val graphListener = new RDDOperationGraphListener(conf) val tab = mock(classOf[StagesTab], RETURNS_SMART_NULLS) val request = mock(classOf[HttpServletRequest]) when(tab.conf).thenReturn(conf) when(tab.progressListener).thenReturn(jobListener) when(tab.operationGraphListener).thenReturn(graphListener) when(tab.appName).thenReturn("testing") when(tab.headerTabs).thenReturn(Seq.empty) when(request.getParameter("id")).thenReturn("0") when(request.getParameter("attempt")).thenReturn("0") val page = new StagePage(tab) // Simulate a stage in job progress listener //在工作进度侦听器中模拟一个阶段 val stageInfo = new StageInfo(0, 0, "dummy", 1, Seq.empty, Seq.empty, "details") // Simulate two tasks to test PEAK_EXECUTION_MEMORY correctness (1 to 2).foreach { taskId => val taskInfo = new TaskInfo(taskId, taskId, 0, 0, "0", "localhost", TaskLocality.ANY, false) val peakExecutionMemory = 10 taskInfo.accumulables += new AccumulableInfo(0, InternalAccumulator.PEAK_EXECUTION_MEMORY, Some(peakExecutionMemory.toString), (peakExecutionMemory * taskId).toString, true) jobListener.onStageSubmitted(SparkListenerStageSubmitted(stageInfo)) jobListener.onTaskStart(SparkListenerTaskStart(0, 0, taskInfo)) taskInfo.markSuccessful() jobListener.onTaskEnd( SparkListenerTaskEnd(0, 0, "result", Success, taskInfo, TaskMetrics.empty)) } jobListener.onStageCompleted(SparkListenerStageCompleted(stageInfo)) page.render(request) } }
Example 14
Source File: StageInfo.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import org.apache.spark.executor.TaskMetrics import org.apache.spark.storage.RDDInfo def fromStage( stage: Stage, attemptId: Int, numTasks: Option[Int] = None, taskMetrics: TaskMetrics = null, taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty ): StageInfo = { val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd) val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo( stage.id, attemptId, stage.name, numTasks.getOrElse(stage.numTasks), rddInfos, stage.parents.map(_.id), stage.details, taskMetrics, taskLocalityPreferences) } }
Example 15
Source File: FakeTask.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.util.Properties import org.apache.spark.{Partition, SparkEnv, TaskContext} import org.apache.spark.executor.TaskMetrics class FakeTask( stageId: Int, partitionId: Int, prefLocs: Seq[TaskLocation] = Nil, serializedTaskMetrics: Array[Byte] = SparkEnv.get.closureSerializer.newInstance().serialize(TaskMetrics.registered).array()) extends Task[Int](stageId, 0, partitionId, new Properties, serializedTaskMetrics) { override def runTask(context: TaskContext): Int = 0 override def preferredLocations: Seq[TaskLocation] = prefLocs } object FakeTask { def createTaskSet(numTasks: Int, prefLocs: Seq[TaskLocation]*): TaskSet = { createTaskSet(numTasks, stageAttemptId = 0, prefLocs: _*) } def createTaskSet(numTasks: Int, stageAttemptId: Int, prefLocs: Seq[TaskLocation]*): TaskSet = { createTaskSet(numTasks, stageId = 0, stageAttemptId, prefLocs: _*) } def createTaskSet(numTasks: Int, stageId: Int, stageAttemptId: Int, prefLocs: Seq[TaskLocation]*): TaskSet = { if (prefLocs.size != 0 && prefLocs.size != numTasks) { throw new IllegalArgumentException("Wrong number of task locations") } val tasks = Array.tabulate[Task[_]](numTasks) { i => new FakeTask(stageId, i, if (prefLocs.size != 0) prefLocs(i) else Nil) } new TaskSet(tasks, stageId, stageAttemptId, priority = 0, null) } def createShuffleMapTaskSet( numTasks: Int, stageId: Int, stageAttemptId: Int, prefLocs: Seq[TaskLocation]*): TaskSet = { if (prefLocs.size != 0 && prefLocs.size != numTasks) { throw new IllegalArgumentException("Wrong number of task locations") } val tasks = Array.tabulate[Task[_]](numTasks) { i => new ShuffleMapTask(stageId, stageAttemptId, null, new Partition { override def index: Int = i }, prefLocs(i), new Properties, SparkEnv.get.closureSerializer.newInstance().serialize(TaskMetrics.registered).array()) } new TaskSet(tasks, stageId, stageAttemptId, priority = 0, null) } }
Example 16
Source File: UIData.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable import scala.collection.mutable.HashMap private[spark] object UIData { class ExecutorSummary { var taskTime : Long = 0 var failedTasks : Int = 0 var succeededTasks : Int = 0 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None, var completionTime: Option[Long] = None, var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) case class ExecutorUIData( val startTime: Long, var finishTime: Option[Long] = None, var finishReason: Option[String] = None) }
Example 17
Source File: TaskResult.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io._ import java.nio.ByteBuffer import scala.collection.Map import scala.collection.mutable import org.apache.spark.SparkEnv import org.apache.spark.executor.TaskMetrics import org.apache.spark.storage.BlockId import org.apache.spark.util.Utils // Task result. Also contains updates to accumulator variables. private[spark] sealed trait TaskResult[T] def value(): T = { if (valueObjectDeserialized) { valueObject } else { // This should not run when holding a lock because it may cost dozens of seconds for a large // value. val resultSer = SparkEnv.get.serializer.newInstance() valueObject = resultSer.deserialize(valueBytes) valueObjectDeserialized = true valueObject } } }
Example 18
Source File: TaskContextImpl.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark import scala.collection.mutable.{ArrayBuffer, HashMap} import org.apache.spark.executor.TaskMetrics import org.apache.spark.memory.TaskMemoryManager import org.apache.spark.metrics.MetricsSystem import org.apache.spark.metrics.source.Source import org.apache.spark.util.{TaskCompletionListener, TaskCompletionListenerException} private[spark] class TaskContextImpl( val stageId: Int, val partitionId: Int, override val taskAttemptId: Long, override val attemptNumber: Int, override val taskMemoryManager: TaskMemoryManager, @transient private val metricsSystem: MetricsSystem, internalAccumulators: Seq[Accumulator[Long]], val runningLocally: Boolean = false, val taskMetrics: TaskMetrics = TaskMetrics.empty) extends TaskContext with Logging { // For backwards-compatibility; this method is now deprecated as of 1.3.0. override def attemptId(): Long = taskAttemptId // List of callback functions to execute when the task completes. @transient private val onCompleteCallbacks = new ArrayBuffer[TaskCompletionListener] // Whether the corresponding task has been killed. @volatile private var interrupted: Boolean = false // Whether the task has completed. @volatile private var completed: Boolean = false override def addTaskCompletionListener(listener: TaskCompletionListener): this.type = { onCompleteCallbacks += listener this } override def addTaskCompletionListener(f: TaskContext => Unit): this.type = { onCompleteCallbacks += new TaskCompletionListener { override def onTaskCompletion(context: TaskContext): Unit = f(context) } this } @deprecated("use addTaskCompletionListener", "1.1.0") override def addOnCompleteCallback(f: () => Unit) { onCompleteCallbacks += new TaskCompletionListener { override def onTaskCompletion(context: TaskContext): Unit = f() } } private[spark] def markInterrupted(): Unit = { interrupted = true } override def isCompleted(): Boolean = completed override def isRunningLocally(): Boolean = runningLocally override def isInterrupted(): Boolean = interrupted override def getMetricsSources(sourceName: String): Seq[Source] = metricsSystem.getSourcesByName(sourceName) @transient private val accumulators = new HashMap[Long, Accumulable[_, _]] private[spark] override def registerAccumulator(a: Accumulable[_, _]): Unit = synchronized { accumulators(a.id) = a } private[spark] override def collectInternalAccumulators(): Map[Long, Any] = synchronized { accumulators.filter(_._2.isInternal).mapValues(_.localValue).toMap } private[spark] override def collectAccumulators(): Map[Long, Any] = synchronized { accumulators.mapValues(_.localValue).toMap } //private[spark] override val internalMetricsToAccumulators: Map[String, Accumulator[Long]] = { // Explicitly register internal accumulators here because these are // not captured in the task closure and are already deserialized internalAccumulators.foreach(registerAccumulator) internalAccumulators.map { a => (a.name.get, a) }.toMap } }
Example 19
Source File: CacheManagerSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark import org.mockito.Mockito._ import org.scalatest.BeforeAndAfter import org.scalatest.mock.MockitoSugar import org.apache.spark.executor.{DataReadMethod, TaskMetrics} import org.apache.spark.rdd.RDD import org.apache.spark.storage._ // TODO: Test the CacheManager's thread-safety aspects class CacheManagerSuite extends SparkFunSuite with LocalSparkContext with BeforeAndAfter with MockitoSugar { var blockManager: BlockManager = _ var cacheManager: CacheManager = _ var split: Partition = _ var rdd: RDD[Int] = _ var rdd2: RDD[Int] = _ var rdd3: RDD[Int] = _ before { sc = new SparkContext("local", "test") blockManager = mock[BlockManager] cacheManager = new CacheManager(blockManager) split = new Partition { override def index: Int = 0 } rdd = new RDD[Int](sc, Nil) { override def getPartitions: Array[Partition] = Array(split) override val getDependencies = List[Dependency[_]]() override def compute(split: Partition, context: TaskContext): Iterator[Int] = Array(1, 2, 3, 4).iterator } rdd2 = new RDD[Int](sc, List(new OneToOneDependency(rdd))) { override def getPartitions: Array[Partition] = firstParent[Int].partitions override def compute(split: Partition, context: TaskContext): Iterator[Int] = firstParent[Int].iterator(split, context) }.cache() rdd3 = new RDD[Int](sc, List(new OneToOneDependency(rdd2))) { override def getPartitions: Array[Partition] = firstParent[Int].partitions override def compute(split: Partition, context: TaskContext): Iterator[Int] = firstParent[Int].iterator(split, context) }.cache() } test("get uncached rdd") { // Do not mock this test, because attempting to match Array[Any], which is not covariant, // in blockManager.put is a losing battle. You have been warned. blockManager = sc.env.blockManager cacheManager = sc.env.cacheManager val context = TaskContext.empty() val computeValue = cacheManager.getOrCompute(rdd, split, context, StorageLevel.MEMORY_ONLY) val getValue = blockManager.get(RDDBlockId(rdd.id, split.index)) assert(computeValue.toList === List(1, 2, 3, 4)) assert(getValue.isDefined, "Block cached from getOrCompute is not found!") assert(getValue.get.data.toList === List(1, 2, 3, 4)) } test("get cached rdd") { val result = new BlockResult(Array(5, 6, 7).iterator, DataReadMethod.Memory, 12) when(blockManager.get(RDDBlockId(0, 0))).thenReturn(Some(result)) val context = TaskContext.empty() val value = cacheManager.getOrCompute(rdd, split, context, StorageLevel.MEMORY_ONLY) assert(value.toList === List(5, 6, 7)) } test("get uncached local rdd") { // Local computation should not persist the resulting value, so don't expect a put(). when(blockManager.get(RDDBlockId(0, 0))).thenReturn(None) val context = new TaskContextImpl(0, 0, 0, 0, null, null, Seq.empty, runningLocally = true) val value = cacheManager.getOrCompute(rdd, split, context, StorageLevel.MEMORY_ONLY) assert(value.toList === List(1, 2, 3, 4)) } test("verify task metrics updated correctly") { cacheManager = sc.env.cacheManager val context = TaskContext.empty() cacheManager.getOrCompute(rdd3, split, context, StorageLevel.MEMORY_ONLY) assert(context.taskMetrics.updatedBlocks.getOrElse(Seq()).size === 2) } }
Example 20
Source File: StagePageSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.ui import javax.servlet.http.HttpServletRequest import scala.xml.Node import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS} import org.apache.spark._ import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler._ import org.apache.spark.ui.jobs.{JobProgressListener, StagePage, StagesTab} import org.apache.spark.ui.scope.RDDOperationGraphListener class StagePageSuite extends SparkFunSuite with LocalSparkContext { test("peak execution memory only displayed if unsafe is enabled") { val unsafeConf = "spark.sql.unsafe.enabled" val conf = new SparkConf(false).set(unsafeConf, "true") val html = renderStagePage(conf).toString().toLowerCase val targetString = "peak execution memory" assert(html.contains(targetString)) // Disable unsafe and make sure it's not there val conf2 = new SparkConf(false).set(unsafeConf, "false") val html2 = renderStagePage(conf2).toString().toLowerCase assert(!html2.contains(targetString)) // Avoid setting anything; it should be displayed by default val conf3 = new SparkConf(false) val html3 = renderStagePage(conf3).toString().toLowerCase assert(html3.contains(targetString)) } test("SPARK-10543: peak execution memory should be per-task rather than cumulative") { val unsafeConf = "spark.sql.unsafe.enabled" val conf = new SparkConf(false).set(unsafeConf, "true") val html = renderStagePage(conf).toString().toLowerCase // verify min/25/50/75/max show task value not cumulative values assert(html.contains("<td>10.0 b</td>" * 5)) } private def renderStagePage(conf: SparkConf): Seq[Node] = { val jobListener = new JobProgressListener(conf) val graphListener = new RDDOperationGraphListener(conf) val tab = mock(classOf[StagesTab], RETURNS_SMART_NULLS) val request = mock(classOf[HttpServletRequest]) when(tab.conf).thenReturn(conf) when(tab.progressListener).thenReturn(jobListener) when(tab.operationGraphListener).thenReturn(graphListener) when(tab.appName).thenReturn("testing") when(tab.headerTabs).thenReturn(Seq.empty) when(request.getParameter("id")).thenReturn("0") when(request.getParameter("attempt")).thenReturn("0") val page = new StagePage(tab) // Simulate a stage in job progress listener val stageInfo = new StageInfo(0, 0, "dummy", 1, Seq.empty, Seq.empty, "details") // Simulate two tasks to test PEAK_EXECUTION_MEMORY correctness (1 to 2).foreach { taskId => val taskInfo = new TaskInfo(taskId, taskId, 0, 0, "0", "localhost", TaskLocality.ANY, false) val peakExecutionMemory = 10 taskInfo.accumulables += new AccumulableInfo(0, InternalAccumulator.PEAK_EXECUTION_MEMORY, Some(peakExecutionMemory.toString), (peakExecutionMemory * taskId).toString, true) jobListener.onStageSubmitted(SparkListenerStageSubmitted(stageInfo)) jobListener.onTaskStart(SparkListenerTaskStart(0, 0, taskInfo)) taskInfo.markSuccessful() jobListener.onTaskEnd( SparkListenerTaskEnd(0, 0, "result", Success, taskInfo, TaskMetrics.empty)) } jobListener.onStageCompleted(SparkListenerStageCompleted(stageInfo)) page.render(request) } }
Example 21
Source File: StageInfo.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import org.apache.spark.executor.TaskMetrics import org.apache.spark.storage.RDDInfo def fromStage( stage: Stage, attemptId: Int, numTasks: Option[Int] = None, taskMetrics: TaskMetrics = null, taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty ): StageInfo = { val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd) val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo( stage.id, attemptId, stage.name, numTasks.getOrElse(stage.numTasks), rddInfos, stage.parents.map(_.id), stage.details, taskMetrics, taskLocalityPreferences) } }
Example 22
Source File: StageInfo.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import org.apache.spark.executor.TaskMetrics import org.apache.spark.storage.RDDInfo def fromStage( stage: Stage, attemptId: Int, numTasks: Option[Int] = None, taskMetrics: TaskMetrics = null, taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty ): StageInfo = { val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd) val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo( stage.id, attemptId, stage.name, numTasks.getOrElse(stage.numTasks), rddInfos, stage.parents.map(_.id), stage.details, taskMetrics, taskLocalityPreferences) } }
Example 23
Source File: ResultTask.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io._ import java.lang.management.ManagementFactory import java.nio.ByteBuffer import java.util.Properties import org.apache.spark._ import org.apache.spark.broadcast.Broadcast import org.apache.spark.executor.TaskMetrics import org.apache.spark.rdd.RDD private[spark] class ResultTask[T, U]( stageId: Int, stageAttemptId: Int, taskBinary: Broadcast[Array[Byte]], partition: Partition, locs: Seq[TaskLocation], val outputId: Int, localProperties: Properties, serializedTaskMetrics: Array[Byte] = SparkEnv.get.closureSerializer.newInstance().serialize(TaskMetrics.registered).array(), isFutureTask: Boolean = false, depShuffleIds: Option[Seq[Seq[Int]]] = None, depShuffleNumMaps: Option[Seq[Int]] = None, jobId: Option[Int] = None, appId: Option[String] = None, appAttemptId: Option[String] = None) extends Task[U](stageId, stageAttemptId, partition.index, serializedTaskMetrics, localProperties, isFutureTask, depShuffleIds, depShuffleNumMaps, jobId, appId, appAttemptId) with Serializable { var rdd: RDD[T] = null var func: (TaskContext, Iterator[T]) => U = null @transient private[this] val preferredLocs: Seq[TaskLocation] = { if (locs == null) Nil else locs.toSet.toSeq } override def prepTask(): Unit = { // Deserialize the RDD and the func using the broadcast variables. val threadMXBean = ManagementFactory.getThreadMXBean val deserializeStartTime = System.currentTimeMillis() val deserializeStartCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) { threadMXBean.getCurrentThreadCpuTime } else 0L val ser = SparkEnv.get.closureSerializer.newInstance() val (_rdd, _func) = ser.deserialize[(RDD[T], (TaskContext, Iterator[T]) => U)]( ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader) rdd = _rdd func = _func _executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime _executorDeserializeCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) { threadMXBean.getCurrentThreadCpuTime - deserializeStartCpuTime } else 0L } override def runTask(context: TaskContext): U = { // Deserialize the RDD and the func using the broadcast variables. if (func == null || rdd == null) { prepTask() } func(context, rdd.iterator(partition, context)) } // This is only callable on the driver side. override def preferredLocations: Seq[TaskLocation] = preferredLocs override def toString: String = "ResultTask(" + stageId + ", " + partitionId + ")" } object ResultTask { def apply[T, U]( stageId: Int, stageAttemptId: Int, partition: Partition, outputId: Int, localProperties: Properties, internalAccumulatorsSer: Array[Byte], isFutureTask: Boolean, rdd: RDD[T], func: (TaskContext, Iterator[T]) => U): ResultTask[T, U] = { val rt = new ResultTask[T, U](stageId, stageAttemptId, null, partition, Seq.empty, outputId, localProperties, internalAccumulatorsSer, isFutureTask) rt.rdd = rdd rt.func = func rt } }
Example 24
Source File: TaskContextImpl.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark import java.util.Properties import scala.collection.mutable.ArrayBuffer import org.apache.spark.executor.TaskMetrics import org.apache.spark.internal.Logging import org.apache.spark.memory.TaskMemoryManager import org.apache.spark.metrics.MetricsSystem import org.apache.spark.metrics.source.Source import org.apache.spark.util._ private[spark] class TaskContextImpl( val stageId: Int, val partitionId: Int, override val taskAttemptId: Long, override val attemptNumber: Int, var _taskMemoryManager: TaskMemoryManager, localProperties: Properties, @transient private val metricsSystem: MetricsSystem, // The default value is only used in tests. override val taskMetrics: TaskMetrics = TaskMetrics.empty, var batchId: Int = 0) extends TaskContext with Logging { private[spark] def markInterrupted(): Unit = { interrupted = true } override def isCompleted(): Boolean = completed override def isRunningLocally(): Boolean = false override def isInterrupted(): Boolean = interrupted override def getLocalProperty(key: String): String = localProperties.getProperty(key) override def getMetricsSources(sourceName: String): Seq[Source] = metricsSystem.getSourcesByName(sourceName) private[spark] override def registerAccumulator(a: AccumulatorV2[_, _]): Unit = { taskMetrics.registerAccumulator(a) } }
Example 25
Source File: StagePageSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ui import javax.servlet.http.HttpServletRequest import scala.xml.Node import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS} import org.apache.spark._ import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler._ import org.apache.spark.storage.StorageStatusListener import org.apache.spark.ui.exec.ExecutorsListener import org.apache.spark.ui.jobs.{JobProgressListener, StagePage, StagesTab} import org.apache.spark.ui.scope.RDDOperationGraphListener class StagePageSuite extends SparkFunSuite with LocalSparkContext { private val peakExecutionMemory = 10 test("peak execution memory only displayed if unsafe is enabled") { val unsafeConf = "spark.sql.unsafe.enabled" val conf = new SparkConf(false).set(unsafeConf, "true") val html = renderStagePage(conf).toString().toLowerCase val targetString = "peak execution memory" assert(html.contains(targetString)) // Disable unsafe and make sure it's not there val conf2 = new SparkConf(false).set(unsafeConf, "false") val html2 = renderStagePage(conf2).toString().toLowerCase assert(!html2.contains(targetString)) // Avoid setting anything; it should be displayed by default val conf3 = new SparkConf(false) val html3 = renderStagePage(conf3).toString().toLowerCase assert(html3.contains(targetString)) } test("SPARK-10543: peak execution memory should be per-task rather than cumulative") { val unsafeConf = "spark.sql.unsafe.enabled" val conf = new SparkConf(false).set(unsafeConf, "true") val html = renderStagePage(conf).toString().toLowerCase // verify min/25/50/75/max show task value not cumulative values assert(html.contains(s"<td>$peakExecutionMemory.0 b</td>" * 5)) } private def renderStagePage(conf: SparkConf): Seq[Node] = { val jobListener = new JobProgressListener(conf) val graphListener = new RDDOperationGraphListener(conf) val executorsListener = new ExecutorsListener(new StorageStatusListener(conf), conf) val tab = mock(classOf[StagesTab], RETURNS_SMART_NULLS) val request = mock(classOf[HttpServletRequest]) when(tab.conf).thenReturn(conf) when(tab.progressListener).thenReturn(jobListener) when(tab.operationGraphListener).thenReturn(graphListener) when(tab.executorsListener).thenReturn(executorsListener) when(tab.appName).thenReturn("testing") when(tab.headerTabs).thenReturn(Seq.empty) when(request.getParameter("id")).thenReturn("0") when(request.getParameter("attempt")).thenReturn("0") val page = new StagePage(tab) // Simulate a stage in job progress listener val stageInfo = new StageInfo(0, 0, "dummy", 1, Seq.empty, Seq.empty, "details") // Simulate two tasks to test PEAK_EXECUTION_MEMORY correctness (1 to 2).foreach { taskId => val taskInfo = new TaskInfo(taskId, taskId, 0, 0, "0", "localhost", TaskLocality.ANY, false) jobListener.onStageSubmitted(SparkListenerStageSubmitted(stageInfo)) jobListener.onTaskStart(SparkListenerTaskStart(0, 0, taskInfo)) taskInfo.markFinished(TaskState.FINISHED) val taskMetrics = TaskMetrics.empty taskMetrics.incPeakExecutionMemory(peakExecutionMemory) jobListener.onTaskEnd( SparkListenerTaskEnd(0, 0, "result", Success, taskInfo, taskMetrics)) } jobListener.onStageCompleted(SparkListenerStageCompleted(stageInfo)) page.render(request) } }
Example 26
Source File: FakeTask.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import org.apache.spark.SparkEnv import org.apache.spark.TaskContext import org.apache.spark.executor.TaskMetrics class FakeTask( stageId: Int, partitionId: Int, prefLocs: Seq[TaskLocation] = Nil, serializedTaskMetrics: Array[Byte] = SparkEnv.get.closureSerializer.newInstance().serialize(TaskMetrics.registered).array()) extends Task[Int](stageId, 0, partitionId, serializedTaskMetrics) { override def prepTask(): Unit = {} override def runTask(context: TaskContext): Int = 0 override def preferredLocations: Seq[TaskLocation] = prefLocs } object FakeTask { def createTaskSet(numTasks: Int, prefLocs: Seq[TaskLocation]*): TaskSet = { createTaskSet(numTasks, stageAttemptId = 0, prefLocs: _*) } def createTaskSet(numTasks: Int, stageAttemptId: Int, prefLocs: Seq[TaskLocation]*): TaskSet = { createTaskSet(numTasks, stageId = 0, stageAttemptId, prefLocs: _*) } def createTaskSet(numTasks: Int, stageId: Int, stageAttemptId: Int, prefLocs: Seq[TaskLocation]*): TaskSet = { if (prefLocs.size != 0 && prefLocs.size != numTasks) { throw new IllegalArgumentException("Wrong number of task locations") } val tasks = Array.tabulate[Task[_]](numTasks) { i => new FakeTask(stageId, i, if (prefLocs.size != 0) prefLocs(i) else Nil) } new TaskSet(tasks, stageId, stageAttemptId, priority = 0, null) } }
Example 27
Source File: SplashShuffleFetcherIteratorTest.scala From splash with Apache License 2.0 | 5 votes |
package org.apache.spark.shuffle import com.memverge.splash.StorageFactoryHolder import org.apache.spark.executor.TaskMetrics import org.apache.spark.storage.{ShuffleBlockId, TestBlockId} import org.assertj.core.api.Assertions.assertThat import org.assertj.core.api.Fail.fail import org.testng.annotations.{AfterMethod, BeforeMethod, Test} @Test(groups = Array("UnitTest", "IntegrationTest")) class SplashShuffleFetcherIteratorTest { private val appId = "SplashShuffleFetcherIteratorTest" private val factory = StorageFactoryHolder.getFactory private var resolver: SplashShuffleBlockResolver = _ @BeforeMethod private def beforeMethod(): Unit = { resolver = new SplashShuffleBlockResolver(appId) } @AfterMethod private def afterMethod(): Unit = { factory.reset() assertThat(factory.getShuffleFileCount(appId)) isEqualTo 0 assertThat(factory.getTmpFileCount) isEqualTo 0 } def testNext(): Unit = { val blocks = List( resolver.putShuffleBlock(2, 1, Array(10L, 20L, 30L)), resolver.putShuffleBlock(2, 2, Array(30L, 15L, 22L))) val fetchers = SplashShuffleFetcherIterator(resolver, blocks.iterator) assertThat(fetchers.hasNext).isTrue val fetcher1 = fetchers.next() assertThat(fetcher1.blockId) isEqualTo ShuffleBlockId(2, 1, 0) assertThat(fetcher1.length) isEqualTo 10 fetcher1.close() val fetcher2 = fetchers.next() assertThat(fetcher2.blockId) isEqualTo ShuffleBlockId(2, 2, 0) assertThat(fetcher2.length) isEqualTo 30 fetcher2.close() } def testDumpOnError(): Unit = { val serializer = TestUtil.kryoSerializer val blocks = List( resolver.putShuffleBlock(3, 1, Array(10L, 20L, 30L)), resolver.putShuffleBlock(3, 2, Array(30L, 15L, 22L))) val fetchers = SplashShuffleFetcherIterator(resolver, blocks.iterator) val iterator = fetchers.flatMap( fetcher => fetcher.asMetricIterator(serializer, TaskMetrics.empty)) try { iterator.next() fail("should have raised an exception.") } catch { case _: Exception => val path = resolver.getDumpFilePath(ShuffleBlockId(3, 2, 0)) assertThat(path.toFile.exists()).isTrue } } def testNoNextValue(): Unit = { val blocks = List(TestBlockId("block-1")) val fetchers = SplashShuffleFetcherIterator(resolver, blocks.iterator) assertThat(fetchers.hasNext).isFalse } def testSkipNonShuffleBlocks(): Unit = { val blocks = List( TestBlockId("block-1"), TestBlockId("block-2"), resolver.putShuffleBlock(4, 2, Array(30L, 15L, 22L))) val fetchers = SplashShuffleFetcherIterator(resolver, blocks.iterator).toArray assertThat(fetchers.length) isEqualTo 1 fetchers.foreach(_.close()) } }
Example 28
Source File: ColumnarSortExec.scala From OAP with Apache License 2.0 | 5 votes |
package com.intel.sparkColumnarPlugin.execution import com.intel.sparkColumnarPlugin.expression._ import com.intel.sparkColumnarPlugin.vectorized._ import java.util.concurrent.TimeUnit._ import org.apache.spark.{SparkEnv, TaskContext, SparkContext} import org.apache.spark.executor.TaskMetrics import org.apache.spark.sql.execution._ import org.apache.spark.sql.catalyst.expressions.SortOrder import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.rdd.RDD import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics} import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector} class ColumnarSortExec( sortOrder: Seq[SortOrder], global: Boolean, child: SparkPlan, testSpillFrequency: Int = 0) extends SortExec(sortOrder, global, child, testSpillFrequency) { override def supportsColumnar = true // Disable code generation override def supportCodegen: Boolean = false override lazy val metrics = Map( "totalSortTime" -> SQLMetrics .createTimingMetric(sparkContext, "time in sort + shuffle process"), "sortTime" -> SQLMetrics.createTimingMetric(sparkContext, "time in sort process"), "shuffleTime" -> SQLMetrics.createTimingMetric(sparkContext, "time in shuffle process"), "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), "numOutputBatches" -> SQLMetrics.createMetric(sparkContext, "number of output batches")) override def doExecuteColumnar(): RDD[ColumnarBatch] = { val elapse = longMetric("totalSortTime") val sortTime = longMetric("sortTime") val shuffleTime = longMetric("shuffleTime") val numOutputRows = longMetric("numOutputRows") val numOutputBatches = longMetric("numOutputBatches") child.executeColumnar().mapPartitions { iter => val hasInput = iter.hasNext val res = if (!hasInput) { Iterator.empty } else { val sorter = ColumnarSorter.create( sortOrder, true, child.output, sortTime, numOutputBatches, numOutputRows, shuffleTime, elapse) TaskContext .get() .addTaskCompletionListener[Unit](_ => { sorter.close() }) new CloseableColumnBatchIterator(sorter.createColumnarIterator(iter)) } res } } }
Example 29
Source File: PerfListener.scala From spark-testing-base with Apache License 2.0 | 5 votes |
package com.holdenkarau.spark.testing import scala.collection.mutable import scala.collection.immutable import org.apache.spark.scheduler._ import org.apache.spark.executor.TaskMetrics // TODO(holden): See if we can make a more attributable listener override def onTaskEnd(taskEnd: SparkListenerTaskEnd) { val info = taskEnd.taskInfo val metrics = taskEnd.taskMetrics updateMetricsForTask(metrics) } private def updateMetricsForTask(metrics: TaskMetrics): Unit = { totalExecutorRunTime += metrics.executorRunTime jvmGCTime += metrics.jvmGCTime resultSerializationTime += metrics.resultSerializationTime metrics.inputMetrics match { case Some(inputMetrics) => recordsRead += inputMetrics.recordsRead case _ => } metrics.outputMetrics match { case Some(outputMetrics) => recordsWritten += outputMetrics.recordsWritten case _ => } } } //end::listener[]
Example 30
Source File: PerfListener.scala From spark-testing-base with Apache License 2.0 | 5 votes |
package com.holdenkarau.spark.testing import scala.collection.mutable import scala.collection.immutable import org.apache.spark.scheduler._ import org.apache.spark.executor.TaskMetrics // TODO(holden): See if we can make a more attributable listener override def onTaskEnd(taskEnd: SparkListenerTaskEnd) { val info = taskEnd.taskInfo val metrics = taskEnd.taskMetrics updateMetricsForTask(metrics) } private def updateMetricsForTask(metrics: TaskMetrics): Unit = { totalExecutorRunTime += metrics.executorRunTime jvmGCTime += metrics.jvmGCTime resultSerializationTime += metrics.resultSerializationTime recordsRead += metrics.inputMetrics.recordsRead recordsWritten += metrics.outputMetrics.recordsWritten } } //end::listener[]
Example 31
Source File: ShuffleMapTask.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.lang.management.ManagementFactory import java.nio.ByteBuffer import java.util.Properties import scala.language.existentials import org.apache.spark._ import org.apache.spark.broadcast.Broadcast import org.apache.spark.executor.TaskMetrics import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD import org.apache.spark.shuffle.ShuffleWriter import org.apache.spark.storage.BlockManagerId def this(partitionId: Int) { this(0, 0, null, new Partition { override def index: Int = 0 }, null, new Properties, null) } @transient private val preferredLocs: Seq[TaskLocation] = { if (locs == null) Nil else locs.toSet.toSeq } var rdd: RDD[_] = null var dep: ShuffleDependency[_, _, _] = null override def prepTask(): Unit = { // Deserialize the RDD using the broadcast variable. val threadMXBean = ManagementFactory.getThreadMXBean val deserializeStartTime = System.currentTimeMillis() val deserializeStartCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) { threadMXBean.getCurrentThreadCpuTime } else 0L val ser = SparkEnv.get.closureSerializer.newInstance() val (_rdd, _dep) = ser.deserialize[(RDD[_], ShuffleDependency[_, _, _])]( ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader) rdd = _rdd dep = _dep _executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime _executorDeserializeCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) { threadMXBean.getCurrentThreadCpuTime - deserializeStartCpuTime } else 0L } override def runTask(context: TaskContext): MapStatus = { if (dep == null || rdd == null) { prepTask() } var writer: ShuffleWriter[Any, Any] = null try { val manager = SparkEnv.get.shuffleManager writer = manager.getWriter[Any, Any](dep.shuffleHandle, partitionId, context) writer.write(rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]]) val status = writer.stop(success = true).get FutureTaskNotifier.taskCompleted(status, partitionId, dep.shuffleId, dep.partitioner.numPartitions, nextStageLocs, metrics.shuffleWriteMetrics, false) status } catch { case e: Exception => try { if (writer != null) { writer.stop(success = false) } } catch { case e: Exception => log.debug("Could not stop writer", e) } throw e } } override def preferredLocations: Seq[TaskLocation] = preferredLocs override def toString: String = "ShuffleMapTask(%d, %d)".format(stageId, partitionId) } object ShuffleMapTask { def apply( stageId: Int, stageAttemptId: Int, partition: Partition, properties: Properties, internalAccumulatorsSer: Array[Byte], isFutureTask: Boolean, rdd: RDD[_], dep: ShuffleDependency[_, _, _], nextStageLocs: Option[Seq[BlockManagerId]]): ShuffleMapTask = { val smt = new ShuffleMapTask(stageId, stageAttemptId, null, partition, null, properties, internalAccumulatorsSer, isFutureTask, nextStageLocs) smt.rdd = rdd smt.dep = dep smt } }
Example 32
Source File: ResultTask.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io._ import java.lang.management.ManagementFactory import java.nio.ByteBuffer import java.util.Properties import org.apache.spark._ import org.apache.spark.broadcast.Broadcast import org.apache.spark.executor.TaskMetrics import org.apache.spark.rdd.RDD private[spark] class ResultTask[T, U]( stageId: Int, stageAttemptId: Int, taskBinary: Broadcast[Array[Byte]], partition: Partition, locs: Seq[TaskLocation], val outputId: Int, localProperties: Properties, metrics: TaskMetrics, jobId: Option[Int] = None, appId: Option[String] = None, appAttemptId: Option[String] = None) extends Task[U](stageId, stageAttemptId, partition.index, metrics, localProperties, jobId, appId, appAttemptId) with Serializable { @transient private[this] val preferredLocs: Seq[TaskLocation] = { if (locs == null) Nil else locs.toSet.toSeq } override def runTask(context: TaskContext): U = { // Deserialize the RDD and the func using the broadcast variables. val threadMXBean = ManagementFactory.getThreadMXBean val deserializeStartTime = System.currentTimeMillis() val deserializeStartCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) { threadMXBean.getCurrentThreadCpuTime } else 0L val ser = SparkEnv.get.closureSerializer.newInstance() val (rdd, func) = ser.deserialize[(RDD[T], (TaskContext, Iterator[T]) => U)]( ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader) _executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime _executorDeserializeCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) { threadMXBean.getCurrentThreadCpuTime - deserializeStartCpuTime } else 0L func(context, rdd.iterator(partition, context)) } // This is only callable on the driver side. override def preferredLocations: Seq[TaskLocation] = preferredLocs override def toString: String = "ResultTask(" + stageId + ", " + partitionId + ")" }
Example 33
Source File: TaskContextImpl.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark import java.util.Properties import scala.collection.mutable.ArrayBuffer import org.apache.spark.executor.TaskMetrics import org.apache.spark.internal.Logging import org.apache.spark.memory.TaskMemoryManager import org.apache.spark.metrics.MetricsSystem import org.apache.spark.metrics.source.Source import org.apache.spark.util._ private[spark] class TaskContextImpl( val stageId: Int, val partitionId: Int, override val taskAttemptId: Long, override val attemptNumber: Int, override val taskMemoryManager: TaskMemoryManager, localProperties: Properties, @transient private val metricsSystem: MetricsSystem, // The default value is only used in tests. override val taskMetrics: TaskMetrics = TaskMetrics.empty) extends TaskContext with Logging { private[spark] def markInterrupted(): Unit = { interrupted = true } override def isCompleted(): Boolean = completed override def isRunningLocally(): Boolean = false override def isInterrupted(): Boolean = interrupted override def getLocalProperty(key: String): String = localProperties.getProperty(key) override def getMetricsSources(sourceName: String): Seq[Source] = metricsSystem.getSourcesByName(sourceName) private[spark] override def registerAccumulator(a: AccumulatorV2[_, _]): Unit = { taskMetrics.registerAccumulator(a) } }
Example 34
Source File: StagePageSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.ui import javax.servlet.http.HttpServletRequest import scala.xml.Node import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS} import org.apache.spark._ import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler._ import org.apache.spark.storage.StorageStatusListener import org.apache.spark.ui.exec.ExecutorsListener import org.apache.spark.ui.jobs.{JobProgressListener, StagePage, StagesTab} import org.apache.spark.ui.scope.RDDOperationGraphListener class StagePageSuite extends SparkFunSuite with LocalSparkContext { private val peakExecutionMemory = 10 test("peak execution memory should displayed") { val conf = new SparkConf(false) val html = renderStagePage(conf).toString().toLowerCase val targetString = "peak execution memory" assert(html.contains(targetString)) } test("SPARK-10543: peak execution memory should be per-task rather than cumulative") { val conf = new SparkConf(false) val html = renderStagePage(conf).toString().toLowerCase // verify min/25/50/75/max show task value not cumulative values assert(html.contains(s"<td>$peakExecutionMemory.0 b</td>" * 5)) } private def renderStagePage(conf: SparkConf): Seq[Node] = { val jobListener = new JobProgressListener(conf) val graphListener = new RDDOperationGraphListener(conf) val executorsListener = new ExecutorsListener(new StorageStatusListener(conf), conf) val tab = mock(classOf[StagesTab], RETURNS_SMART_NULLS) val request = mock(classOf[HttpServletRequest]) when(tab.conf).thenReturn(conf) when(tab.progressListener).thenReturn(jobListener) when(tab.operationGraphListener).thenReturn(graphListener) when(tab.executorsListener).thenReturn(executorsListener) when(tab.appName).thenReturn("testing") when(tab.headerTabs).thenReturn(Seq.empty) when(request.getParameter("id")).thenReturn("0") when(request.getParameter("attempt")).thenReturn("0") val page = new StagePage(tab) // Simulate a stage in job progress listener val stageInfo = new StageInfo(0, 0, "dummy", 1, Seq.empty, Seq.empty, "details") // Simulate two tasks to test PEAK_EXECUTION_MEMORY correctness (1 to 2).foreach { taskId => val taskInfo = new TaskInfo(taskId, taskId, 0, 0, "0", "localhost", TaskLocality.ANY, false) jobListener.onStageSubmitted(SparkListenerStageSubmitted(stageInfo)) jobListener.onTaskStart(SparkListenerTaskStart(0, 0, taskInfo)) taskInfo.markFinished(TaskState.FINISHED) val taskMetrics = TaskMetrics.empty taskMetrics.incPeakExecutionMemory(peakExecutionMemory) jobListener.onTaskEnd( SparkListenerTaskEnd(0, 0, "result", Success, taskInfo, taskMetrics)) } jobListener.onStageCompleted(SparkListenerStageCompleted(stageInfo)) page.render(request) } }
Example 35
Source File: UIData.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable.HashMap private[jobs] object UIData { class ExecutorSummary { var taskTime : Long = 0 var failedTasks : Int = 0 var succeededTasks : Int = 0 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None, var completionTime: Option[Long] = None, var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) }
Example 36
Source File: TaskResult.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io._ import java.nio.ByteBuffer import scala.collection.mutable.Map import org.apache.spark.SparkEnv import org.apache.spark.executor.TaskMetrics import org.apache.spark.storage.BlockId import org.apache.spark.util.Utils // Task result. Also contains updates to accumulator variables. private[spark] sealed trait TaskResult[T] private[spark] class DirectTaskResult[T](var valueBytes: ByteBuffer, var accumUpdates: Map[Long, Any], var metrics: TaskMetrics) extends TaskResult[T] with Externalizable { def this() = this(null.asInstanceOf[ByteBuffer], null, null) override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException { out.writeInt(valueBytes.remaining); Utils.writeByteBuffer(valueBytes, out) out.writeInt(accumUpdates.size) for ((key, value) <- accumUpdates) { out.writeLong(key) out.writeObject(value) } out.writeObject(metrics) } override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException { val blen = in.readInt() val byteVal = new Array[Byte](blen) in.readFully(byteVal) valueBytes = ByteBuffer.wrap(byteVal) val numUpdates = in.readInt if (numUpdates == 0) { accumUpdates = null } else { accumUpdates = Map() for (i <- 0 until numUpdates) { accumUpdates(in.readLong()) = in.readObject() } } metrics = in.readObject().asInstanceOf[TaskMetrics] } def value(): T = { val resultSer = SparkEnv.get.serializer.newInstance() resultSer.deserialize(valueBytes) } }
Example 37
Source File: TaskContextImpl.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark import org.apache.spark.executor.TaskMetrics import org.apache.spark.util.{TaskCompletionListener, TaskCompletionListenerException} import scala.collection.mutable.ArrayBuffer private[spark] class TaskContextImpl( val stageId: Int, val partitionId: Int, override val taskAttemptId: Long, override val attemptNumber: Int, val runningLocally: Boolean = false, val taskMetrics: TaskMetrics = TaskMetrics.empty) extends TaskContext with Logging { // For backwards-compatibility; this method is now deprecated as of 1.3.0. override def attemptId(): Long = taskAttemptId // List of callback functions to execute when the task completes. @transient private val onCompleteCallbacks = new ArrayBuffer[TaskCompletionListener] // Whether the corresponding task has been killed. @volatile private var interrupted: Boolean = false // Whether the task has completed. @volatile private var completed: Boolean = false override def addTaskCompletionListener(listener: TaskCompletionListener): this.type = { onCompleteCallbacks += listener this } override def addTaskCompletionListener(f: TaskContext => Unit): this.type = { onCompleteCallbacks += new TaskCompletionListener { override def onTaskCompletion(context: TaskContext): Unit = f(context) } this } @deprecated("use addTaskCompletionListener", "1.1.0") override def addOnCompleteCallback(f: () => Unit) { onCompleteCallbacks += new TaskCompletionListener { override def onTaskCompletion(context: TaskContext): Unit = f() } } private[spark] def markInterrupted(): Unit = { interrupted = true } override def isCompleted(): Boolean = completed override def isRunningLocally(): Boolean = runningLocally override def isInterrupted(): Boolean = interrupted }
Example 38
Source File: StageInfo.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import org.apache.spark.executor.TaskMetrics import org.apache.spark.storage.RDDInfo def fromStage( stage: Stage, attemptId: Int, numTasks: Option[Int] = None, taskMetrics: TaskMetrics = null, taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty ): StageInfo = { val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd) val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo( stage.id, attemptId, stage.name, numTasks.getOrElse(stage.numTasks), rddInfos, stage.parents.map(_.id), stage.details, taskMetrics, taskLocalityPreferences) } }
Example 39
Source File: ResultTask.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io._ import java.lang.management.ManagementFactory import java.nio.ByteBuffer import java.util.Properties import org.apache.spark._ import org.apache.spark.broadcast.Broadcast import org.apache.spark.executor.TaskMetrics import org.apache.spark.rdd.RDD private[spark] class ResultTask[T, U]( stageId: Int, stageAttemptId: Int, taskBinary: Broadcast[Array[Byte]], partition: Partition, locs: Seq[TaskLocation], val outputId: Int, localProperties: Properties, metrics: TaskMetrics, jobId: Option[Int] = None, appId: Option[String] = None, appAttemptId: Option[String] = None) extends Task[U](stageId, stageAttemptId, partition.index, metrics, localProperties, jobId, appId, appAttemptId) with Serializable { @transient private[this] val preferredLocs: Seq[TaskLocation] = { if (locs == null) Nil else locs.toSet.toSeq } override def runTask(context: TaskContext, user: String): U = { // Deserialize the RDD and the func using the broadcast variables. val threadMXBean = ManagementFactory.getThreadMXBean val deserializeStartTime = System.currentTimeMillis() val deserializeStartCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) { threadMXBean.getCurrentThreadCpuTime } else 0L val ser = SparkEnv.get(user).closureSerializer.newInstance() val (rdd, func) = ser.deserialize[(RDD[T], (TaskContext, Iterator[T]) => U)]( ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader) _executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime _executorDeserializeCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) { threadMXBean.getCurrentThreadCpuTime - deserializeStartCpuTime } else 0L func(context, rdd.iterator(partition, context)) } // This is only callable on the driver side. override def preferredLocations: Seq[TaskLocation] = preferredLocs override def toString: String = "ResultTask(" + stageId + ", " + partitionId + ")" }