org.apache.spark.executor.InputMetrics Scala Examples
The following examples show how to use org.apache.spark.executor.InputMetrics.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: CarbonInputMetrics.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark import java.lang.Long import org.apache.spark.executor.InputMetrics import org.apache.carbondata.common.logging.LogServiceFactory import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.util.TaskMetricsMap import org.apache.carbondata.hadoop.CarbonMultiBlockSplit import org.apache.carbondata.spark.InitInputMetrics class CarbonInputMetrics extends InitInputMetrics{ @transient val LOGGER = LogServiceFactory.getLogService(this.getClass.getName) var inputMetrics: InputMetrics = _ // bytes read before compute by other map rdds in lineage var existingBytesRead: Long = _ var recordCount: Long = _ var inputMetricsInterval: Long = _ var carbonMultiBlockSplit: CarbonMultiBlockSplit = _ def initBytesReadCallback(context: TaskContext, carbonMultiBlockSplit: CarbonMultiBlockSplit, inputMetricsInterval: Long) { inputMetrics = context.taskMetrics().inputMetrics existingBytesRead = inputMetrics.bytesRead recordCount = 0L this.inputMetricsInterval = inputMetricsInterval this.carbonMultiBlockSplit = carbonMultiBlockSplit } def incrementRecordRead(recordRead: Long) { val value: scala.Long = recordRead recordCount = recordCount + value if (recordCount > inputMetricsInterval) { inputMetrics.synchronized { inputMetrics.incRecordsRead(recordCount) updateBytesRead() } recordCount = 0L } } def updateBytesRead(): Unit = { inputMetrics .setBytesRead(existingBytesRead + TaskMetricsMap.getInstance().getReadBytesSum(Thread.currentThread().getId)) } def updateAndClose() { if (recordCount > 0L) { inputMetrics.synchronized { inputMetrics.incRecordsRead(recordCount) } recordCount = 0L } // if metrics supported file system ex: hdfs if (!TaskMetricsMap.getInstance().isCallbackEmpty(Thread.currentThread().getId)) { updateBytesRead() // after update clear parent thread entry from map. TaskMetricsMap.getInstance().removeEntry(Thread.currentThread().getId) } else if (carbonMultiBlockSplit.isInstanceOf[CarbonMultiBlockSplit]) { // If we can't get the bytes read from the FS stats, fall back to the split size, // which may be inaccurate. try { inputMetrics.incBytesRead(carbonMultiBlockSplit.getLength) } catch { case e: java.io.IOException => LOGGER.warn("Unable to get input size to set InputMetrics for task:" + e.getMessage) } } } override def updateByValue(value: Object): Unit = { } }
Example 2
Source File: ExposedMetrics.scala From hail with MIT License | 5 votes |
package org.apache.spark import org.apache.spark.executor.{InputMetrics, OutputMetrics} object ExposedMetrics { def incrementRecord(metrics: InputMetrics) { metrics.incRecordsRead(1) } def incrementBytes(metrics: InputMetrics, nBytes: Long) { metrics.incBytesRead(nBytes) } def setBytes(metrics: OutputMetrics, nBytes: Long) { metrics.setBytesWritten(nBytes) } def setRecords(metrics: OutputMetrics, nRecords: Long) { metrics.setRecordsWritten(nRecords) } }