org.apache.spark.executor.InputMetrics Scala Examples

The following examples show how to use org.apache.spark.executor.InputMetrics. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

Example 1

Source File: CarbonInputMetrics.scala From carbondata with Apache License 2.0

5 votes

package org.apache.spark

import java.lang.Long

import org.apache.spark.executor.InputMetrics

import org.apache.carbondata.common.logging.LogServiceFactory
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.util.TaskMetricsMap
import org.apache.carbondata.hadoop.CarbonMultiBlockSplit
import org.apache.carbondata.spark.InitInputMetrics



class CarbonInputMetrics extends InitInputMetrics{
  @transient val LOGGER = LogServiceFactory.getLogService(this.getClass.getName)
    var inputMetrics: InputMetrics = _
    // bytes read before compute by other map rdds in lineage
    var existingBytesRead: Long = _
    var recordCount: Long = _
    var inputMetricsInterval: Long = _
    var carbonMultiBlockSplit: CarbonMultiBlockSplit = _

  def initBytesReadCallback(context: TaskContext,
      carbonMultiBlockSplit: CarbonMultiBlockSplit, inputMetricsInterval: Long) {
    inputMetrics = context.taskMetrics().inputMetrics
    existingBytesRead = inputMetrics.bytesRead
    recordCount = 0L
    this.inputMetricsInterval = inputMetricsInterval
    this.carbonMultiBlockSplit = carbonMultiBlockSplit
  }

  def incrementRecordRead(recordRead: Long) {
    val value: scala.Long = recordRead
    recordCount = recordCount + value
    if (recordCount > inputMetricsInterval) {
      inputMetrics.synchronized {
        inputMetrics.incRecordsRead(recordCount)
        updateBytesRead()
      }
      recordCount = 0L
    }
  }

  def updateBytesRead(): Unit = {
    inputMetrics
      .setBytesRead(existingBytesRead
                    + TaskMetricsMap.getInstance().getReadBytesSum(Thread.currentThread().getId))
  }

  def updateAndClose() {
    if (recordCount > 0L) {
      inputMetrics.synchronized {
        inputMetrics.incRecordsRead(recordCount)
      }
      recordCount = 0L
    }
    // if metrics supported file system ex: hdfs
    if (!TaskMetricsMap.getInstance().isCallbackEmpty(Thread.currentThread().getId)) {
      updateBytesRead()
      // after update clear parent thread entry from map.
      TaskMetricsMap.getInstance().removeEntry(Thread.currentThread().getId)
    } else if (carbonMultiBlockSplit.isInstanceOf[CarbonMultiBlockSplit]) {
      // If we can't get the bytes read from the FS stats, fall back to the split size,
      // which may be inaccurate.
      try {
        inputMetrics.incBytesRead(carbonMultiBlockSplit.getLength)
      } catch {
        case e: java.io.IOException =>
          LOGGER.warn("Unable to get input size to set InputMetrics for task:" + e.getMessage)
      }
    }
  }

  override def updateByValue(value: Object): Unit = {

  }
}

Example 2

Source File: ExposedMetrics.scala From hail with MIT License

5 votes

package org.apache.spark

import org.apache.spark.executor.{InputMetrics, OutputMetrics}

object ExposedMetrics {
  def incrementRecord(metrics: InputMetrics) {
    metrics.incRecordsRead(1)
  }

  def incrementBytes(metrics: InputMetrics, nBytes: Long) {
    metrics.incBytesRead(nBytes)
  }

  def setBytes(metrics: OutputMetrics, nBytes: Long) {
    metrics.setBytesWritten(nBytes)
  }

  def setRecords(metrics: OutputMetrics, nRecords: Long) {
    metrics.setRecordsWritten(nRecords)
  }
}