org.apache.spark.scheduler.AccumulableInfo Scala Examples
The following examples show how to use org.apache.spark.scheduler.AccumulableInfo.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: SQLMetrics.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.metric import java.text.NumberFormat import java.util.Locale import org.apache.spark.SparkContext import org.apache.spark.scheduler.AccumulableInfo import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, Utils} class SQLMetric(val metricType: String, initValue: Long = 0L) extends AccumulatorV2[Long, Long] { // This is a workaround for SPARK-11013. // We may use -1 as initial value of the accumulator, if the accumulator is valid, we will // update it at the end of task and the value will be at least 0. Then we can filter out the -1 // values before calculate max, min, etc. private[this] var _value = initValue private var _zeroValue = initValue override def copy(): SQLMetric = { val newAcc = new SQLMetric(metricType, _value) newAcc._zeroValue = initValue newAcc } override def reset(): Unit = _value = _zeroValue override def merge(other: AccumulatorV2[Long, Long]): Unit = other match { case o: SQLMetric => _value += o.value case _ => throw new UnsupportedOperationException( s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}") } override def isZero(): Boolean = _value == _zeroValue override def add(v: Long): Unit = _value += v def +=(v: Long): Unit = _value += v override def value: Long = _value // Provide special identifier as metadata so we can tell that this is a `SQLMetric` later override def toInfo(update: Option[Any], value: Option[Any]): AccumulableInfo = { new AccumulableInfo( id, name, update, value, true, true, Some(AccumulatorContext.SQL_ACCUM_IDENTIFIER)) } } object SQLMetrics { private val SUM_METRIC = "sum" private val SIZE_METRIC = "size" private val TIMING_METRIC = "timing" def createMetric(sc: SparkContext, name: String): SQLMetric = { val acc = new SQLMetric(SUM_METRIC) acc.register(sc, name = Some(name), countFailedValues = false) acc } def stringValue(metricsType: String, values: Seq[Long]): String = { if (metricsType == SUM_METRIC) { val numberFormat = NumberFormat.getIntegerInstance(Locale.ENGLISH) numberFormat.format(values.sum) } else { val strFormat: Long => String = if (metricsType == SIZE_METRIC) { Utils.bytesToString } else if (metricsType == TIMING_METRIC) { Utils.msDurationToString } else { throw new IllegalStateException("unexpected metrics type: " + metricsType) } val validValues = values.filter(_ >= 0) val Seq(sum, min, med, max) = { val metric = if (validValues.isEmpty) { Seq.fill(4)(0L) } else { val sorted = validValues.sorted Seq(sorted.sum, sorted(0), sorted(validValues.length / 2), sorted(validValues.length - 1)) } metric.map(strFormat) } s"\n$sum ($min, $med, $max)" } } }
Example 2
Source File: SQLMetrics.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.metric import java.text.NumberFormat import java.util.Locale import org.apache.spark.SparkContext import org.apache.spark.scheduler.AccumulableInfo import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, Utils} class SQLMetric(val metricType: String, initValue: Long = 0L) extends AccumulatorV2[Long, Long] { // This is a workaround for SPARK-11013. // We may use -1 as initial value of the accumulator, if the accumulator is valid, we will // update it at the end of task and the value will be at least 0. Then we can filter out the -1 // values before calculate max, min, etc. private[this] var _value = initValue private var _zeroValue = initValue override def copy(): SQLMetric = { val newAcc = new SQLMetric(metricType, _value) newAcc._zeroValue = initValue newAcc } override def reset(): Unit = _value = _zeroValue override def merge(other: AccumulatorV2[Long, Long]): Unit = other match { case o: SQLMetric => _value += o.value case _ => throw new UnsupportedOperationException( s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}") } override def isZero(): Boolean = _value == _zeroValue override def add(v: Long): Unit = _value += v def +=(v: Long): Unit = _value += v override def value: Long = _value // Provide special identifier as metadata so we can tell that this is a `SQLMetric` later override def toInfo(update: Option[Any], value: Option[Any]): AccumulableInfo = { new AccumulableInfo( id, name, update, value, true, true, Some(AccumulatorContext.SQL_ACCUM_IDENTIFIER)) } } object SQLMetrics { private val SUM_METRIC = "sum" private val SIZE_METRIC = "size" private val TIMING_METRIC = "timing" def createMetric(sc: SparkContext, name: String): SQLMetric = { val acc = new SQLMetric(SUM_METRIC) acc.register(sc, name = Some(name), countFailedValues = false) acc } def stringValue(metricsType: String, values: Seq[Long]): String = { if (metricsType == SUM_METRIC) { val numberFormat = NumberFormat.getIntegerInstance(Locale.US) numberFormat.format(values.sum) } else { val strFormat: Long => String = if (metricsType == SIZE_METRIC) { Utils.bytesToString } else if (metricsType == TIMING_METRIC) { Utils.msDurationToString } else { throw new IllegalStateException("unexpected metrics type: " + metricsType) } val validValues = values.filter(_ >= 0) val Seq(sum, min, med, max) = { val metric = if (validValues.isEmpty) { Seq.fill(4)(0L) } else { val sorted = validValues.sorted Seq(sorted.sum, sorted(0), sorted(validValues.length / 2), sorted(validValues.length - 1)) } metric.map(strFormat) } s"\n$sum ($min, $med, $max)" } } }
Example 3
Source File: UIData.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable.HashMap private[jobs] object UIData { class ExecutorSummary { var taskTime : Long = 0 var failedTasks : Int = 0 var succeededTasks : Int = 0 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None, var completionTime: Option[Long] = None, var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) }
Example 4
Source File: SQLMetrics.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.metric import java.text.NumberFormat import java.util.Locale import org.apache.spark.SparkContext import org.apache.spark.scheduler.AccumulableInfo import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, Utils} class SQLMetric(val metricType: String, initValue: Long = 0L) extends AccumulatorV2[Long, Long] { // This is a workaround for SPARK-11013. // We may use -1 as initial value of the accumulator, if the accumulator is valid, we will // update it at the end of task and the value will be at least 0. Then we can filter out the -1 // values before calculate max, min, etc. private[this] var _value = initValue private var _zeroValue = initValue override def copy(): SQLMetric = { val newAcc = new SQLMetric(metricType, _value) newAcc._zeroValue = initValue newAcc } override def reset(): Unit = _value = _zeroValue override def merge(other: AccumulatorV2[Long, Long]): Unit = other match { case o: SQLMetric => _value += o.value case _ => throw new UnsupportedOperationException( s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}") } override def isZero(): Boolean = _value == _zeroValue override def add(v: Long): Unit = _value += v def +=(v: Long): Unit = _value += v override def value: Long = _value // Provide special identifier as metadata so we can tell that this is a `SQLMetric` later override def toInfo(update: Option[Any], value: Option[Any]): AccumulableInfo = { new AccumulableInfo( id, name, update, value, true, true, Some(AccumulatorContext.SQL_ACCUM_IDENTIFIER)) } } object SQLMetrics { private val SUM_METRIC = "sum" private val SIZE_METRIC = "size" private val TIMING_METRIC = "timing" def createMetric(sc: SparkContext, name: String): SQLMetric = { val acc = new SQLMetric(SUM_METRIC) acc.register(sc, name = Some(name), countFailedValues = false) acc } def stringValue(metricsType: String, values: Seq[Long]): String = { if (metricsType == SUM_METRIC) { val numberFormat = NumberFormat.getIntegerInstance(Locale.US) numberFormat.format(values.sum) } else { val strFormat: Long => String = if (metricsType == SIZE_METRIC) { Utils.bytesToString } else if (metricsType == TIMING_METRIC) { Utils.msDurationToString } else { throw new IllegalStateException("unexpected metrics type: " + metricsType) } val validValues = values.filter(_ >= 0) val Seq(sum, min, med, max) = { val metric = if (validValues.isEmpty) { Seq.fill(4)(0L) } else { val sorted = validValues.sorted Seq(sorted.sum, sorted(0), sorted(validValues.length / 2), sorted(validValues.length - 1)) } metric.map(strFormat) } s"\n$sum ($min, $med, $max)" } } }
Example 5
Source File: UIData.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable.HashMap private[spark] object UIData { class ExecutorSummary { var taskTime : Long = 0 var failedTasks : Int = 0 var succeededTasks : Int = 0 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None, var completionTime: Option[Long] = None, var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) case class ExecutorUIData( val startTime: Long, var finishTime: Option[Long] = None, var finishReason: Option[String] = None) }
Example 6
Source File: UIData.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable import scala.collection.mutable.HashMap private[spark] object UIData { class ExecutorSummary { var taskTime : Long = 0//任务时间 var failedTasks : Int = 0//失败任务数 var succeededTasks : Int = 0//完成任务数 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None,//提交时间 var completionTime: Option[Long] = None,//完成时间 var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) case class ExecutorUIData( val startTime: Long, var finishTime: Option[Long] = None, var finishReason: Option[String] = None) }
Example 7
Source File: UIData.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable import scala.collection.mutable.HashMap private[spark] object UIData { class ExecutorSummary { var taskTime : Long = 0 var failedTasks : Int = 0 var succeededTasks : Int = 0 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None, var completionTime: Option[Long] = None, var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) case class ExecutorUIData( val startTime: Long, var finishTime: Option[Long] = None, var finishReason: Option[String] = None) }