org.apache.hadoop.mapreduce.TaskAttemptID Scala Examples
The following examples show how to use org.apache.hadoop.mapreduce.TaskAttemptID.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: ConfOnlyTAC.scala From flint with Apache License 2.0 | 5 votes |
package com.twosigma.flint.hadoop import org.apache.hadoop.conf.Configuration import org.apache.hadoop.mapreduce.{ Counter, TaskAttemptID, Job, TaskAttemptContext } // This exists just because of a quirk of the record reader api. case class ConfOnlyTAC(_conf: Configuration) extends Job with TaskAttemptContext { // JobContextImpl and JobContext override def getConfiguration: Configuration = _conf // TaskAttemptContext override def getTaskAttemptID: TaskAttemptID = sys.error("not implemented") override def setStatus(msg: String): Unit = sys.error("not implemented") override def getStatus = sys.error("not implemented") override def getProgress: Float = sys.error("not implemented") override def getCounter(counterName: Enum[_]): Counter = sys.error("not implemented") override def getCounter(groupName: String, counterName: String): Counter = sys.error("not implemented") // Progressable override def progress(): Unit = sys.error("not implemented") }
Example 2
Source File: OapIndexCommitProtocolSuite.scala From OAP with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.oap.index import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.Job import org.apache.hadoop.mapreduce.MRJobConfig import org.apache.hadoop.mapreduce.TaskAttemptID import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat import org.apache.hadoop.mapreduce.task.JobContextImpl import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl import org.apache.spark.sql.test.oap.SharedOapContext import org.apache.spark.util.Utils class OapIndexCommitProtocolSuite extends SharedOapContext { test("newTaskTempFile") { val attempt = "attempt_200707121733_0001_m_000000_0" val taskID = TaskAttemptID.forName(attempt) val jobID = taskID.getJobID.toString val outDir = Utils.createTempDir().getAbsolutePath val job = Job.getInstance() FileOutputFormat.setOutputPath(job, new Path(outDir)) val conf = job.getConfiguration() conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt) val jobContext = new JobContextImpl(conf, taskID.getJobID()) val taskContext = new TaskAttemptContextImpl(conf, taskID) val commitProtocol = new OapIndexCommitProtocol(jobID, outDir) // test task temp path val pendingDirName = "_temporary_" + jobID commitProtocol.setupJob(jobContext) commitProtocol.setupTask(taskContext) val tempFile = new Path(commitProtocol.newTaskTempFile(taskContext, None, "test")) val expectedJobAttemptPath = new Path(new Path(outDir, pendingDirName), "0") val expectedTaskWorkPath = new Path(new Path(expectedJobAttemptPath, pendingDirName), attempt) assert(tempFile.getParent == expectedTaskWorkPath) } }
Example 3
Source File: SparkHadoopMapReduceUtil.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.mapreduce import java.lang.{Boolean => JBoolean, Integer => JInteger} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.mapreduce.{JobContext, JobID, TaskAttemptContext, TaskAttemptID} private[spark] trait SparkHadoopMapReduceUtil { def newJobContext(conf: Configuration, jobId: JobID): JobContext = { val klass = firstAvailableClass( "org.apache.hadoop.mapreduce.task.JobContextImpl", // hadoop2, hadoop2-yarn "org.apache.hadoop.mapreduce.JobContext") // hadoop1 val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[JobID]) ctor.newInstance(conf, jobId).asInstanceOf[JobContext] } def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = { val klass = firstAvailableClass( "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl", // hadoop2, hadoop2-yarn "org.apache.hadoop.mapreduce.TaskAttemptContext") // hadoop1 val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[TaskAttemptID]) ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext] } def newTaskAttemptID( jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = { val klass = Class.forName("org.apache.hadoop.mapreduce.TaskAttemptID") try { // First, attempt to use the old-style constructor that takes a boolean isMap // (not available in YARN) val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], classOf[Boolean], classOf[Int], classOf[Int]) ctor.newInstance(jtIdentifier, new JInteger(jobId), new JBoolean(isMap), new JInteger(taskId), new JInteger(attemptId)).asInstanceOf[TaskAttemptID] } catch { case exc: NoSuchMethodException => { // If that failed, look for the new constructor that takes a TaskType (not available in 1.x) val taskTypeClass = Class.forName("org.apache.hadoop.mapreduce.TaskType") .asInstanceOf[Class[Enum[_]]] val taskType = taskTypeClass.getMethod("valueOf", classOf[String]).invoke( taskTypeClass, if(isMap) "MAP" else "REDUCE") val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], taskTypeClass, classOf[Int], classOf[Int]) ctor.newInstance(jtIdentifier, new JInteger(jobId), taskType, new JInteger(taskId), new JInteger(attemptId)).asInstanceOf[TaskAttemptID] } } } private def firstAvailableClass(first: String, second: String): Class[_] = { try { Class.forName(first) } catch { case e: ClassNotFoundException => Class.forName(second) } } }
Example 4
Source File: SparkHadoopMapReduceUtil.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.mapreduce import java.lang.{Boolean => JBoolean, Integer => JInteger} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.mapreduce.{JobContext, JobID, TaskAttemptContext, TaskAttemptID} private[spark] trait SparkHadoopMapReduceUtil { def newJobContext(conf: Configuration, jobId: JobID): JobContext = { val klass = firstAvailableClass( "org.apache.hadoop.mapreduce.task.JobContextImpl", // hadoop2, hadoop2-yarn "org.apache.hadoop.mapreduce.JobContext") // hadoop1 val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[JobID]) ctor.newInstance(conf, jobId).asInstanceOf[JobContext] } def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = { val klass = firstAvailableClass( "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl", // hadoop2, hadoop2-yarn "org.apache.hadoop.mapreduce.TaskAttemptContext") // hadoop1 val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[TaskAttemptID]) ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext] } def newTaskAttemptID( jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int): TaskAttemptID = { val klass = Class.forName("org.apache.hadoop.mapreduce.TaskAttemptID") try { // First, attempt to use the old-style constructor that takes a boolean isMap // (not available in YARN) val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], classOf[Boolean], classOf[Int], classOf[Int]) ctor.newInstance(jtIdentifier, new JInteger(jobId), new JBoolean(isMap), new JInteger(taskId), new JInteger(attemptId)).asInstanceOf[TaskAttemptID] } catch { case exc: NoSuchMethodException => { // If that failed, look for the new constructor that takes a TaskType (not available in 1.x) val taskTypeClass = Class.forName("org.apache.hadoop.mapreduce.TaskType") .asInstanceOf[Class[Enum[_]]] val taskType = taskTypeClass.getMethod("valueOf", classOf[String]).invoke( taskTypeClass, if (isMap) "MAP" else "REDUCE") val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], taskTypeClass, classOf[Int], classOf[Int]) ctor.newInstance(jtIdentifier, new JInteger(jobId), taskType, new JInteger(taskId), new JInteger(attemptId)).asInstanceOf[TaskAttemptID] } } } private def firstAvailableClass(first: String, second: String): Class[_] = { try { Class.forName(first) } catch { case e: ClassNotFoundException => Class.forName(second) } } }
Example 5
Source File: SparkHadoopMapReduceUtil.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.mapreduce import java.lang.{Boolean => JBoolean, Integer => JInteger} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.mapreduce.{JobContext, JobID, TaskAttemptContext, TaskAttemptID} import org.apache.spark.util.Utils private[spark] trait SparkHadoopMapReduceUtil { def newJobContext(conf: Configuration, jobId: JobID): JobContext = { val klass = firstAvailableClass( "org.apache.hadoop.mapreduce.task.JobContextImpl", // hadoop2, hadoop2-yarn "org.apache.hadoop.mapreduce.JobContext") // hadoop1 val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[JobID]) ctor.newInstance(conf, jobId).asInstanceOf[JobContext] } def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = { val klass = firstAvailableClass( "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl", // hadoop2, hadoop2-yarn "org.apache.hadoop.mapreduce.TaskAttemptContext") // hadoop1 val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[TaskAttemptID]) ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext] } def newTaskAttemptID( jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int): TaskAttemptID = { val klass = Utils.classForName("org.apache.hadoop.mapreduce.TaskAttemptID") try { // First, attempt to use the old-style constructor that takes a boolean isMap // (not available in YARN) //首先,尝试使用带有boolean isMap的旧式构造函数(在YARN中不可用) val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], classOf[Boolean], classOf[Int], classOf[Int]) ctor.newInstance(jtIdentifier, new JInteger(jobId), new JBoolean(isMap), new JInteger(taskId), new JInteger(attemptId)).asInstanceOf[TaskAttemptID] } catch { case exc: NoSuchMethodException => { // If that failed, look for the new constructor that takes a TaskType (not available in 1.x) //如果失败,请查找采用TaskType的新构造函数(在1.x中不可用) val taskTypeClass = Utils.classForName("org.apache.hadoop.mapreduce.TaskType") .asInstanceOf[Class[Enum[_]]] val taskType = taskTypeClass.getMethod("valueOf", classOf[String]).invoke( taskTypeClass, if (isMap) "MAP" else "REDUCE") val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], taskTypeClass, classOf[Int], classOf[Int]) ctor.newInstance(jtIdentifier, new JInteger(jobId), taskType, new JInteger(taskId), new JInteger(attemptId)).asInstanceOf[TaskAttemptID] } } } private def firstAvailableClass(first: String, second: String): Class[_] = { try { Utils.classForName(first) } catch { case e: ClassNotFoundException => Utils.classForName(second) } } }
Example 6
Source File: SparkHadoopMapReduceUtil.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.mapreduce import java.lang.{Boolean => JBoolean, Integer => JInteger} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.mapreduce.{JobContext, JobID, TaskAttemptContext, TaskAttemptID} import org.apache.spark.util.Utils private[spark] trait SparkHadoopMapReduceUtil { def newJobContext(conf: Configuration, jobId: JobID): JobContext = { val klass = firstAvailableClass( "org.apache.hadoop.mapreduce.task.JobContextImpl", // hadoop2, hadoop2-yarn "org.apache.hadoop.mapreduce.JobContext") // hadoop1 val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[JobID]) ctor.newInstance(conf, jobId).asInstanceOf[JobContext] } def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = { val klass = firstAvailableClass( "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl", // hadoop2, hadoop2-yarn "org.apache.hadoop.mapreduce.TaskAttemptContext") // hadoop1 val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[TaskAttemptID]) ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext] } def newTaskAttemptID( jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int): TaskAttemptID = { val klass = Utils.classForName("org.apache.hadoop.mapreduce.TaskAttemptID") try { // First, attempt to use the old-style constructor that takes a boolean isMap // (not available in YARN) val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], classOf[Boolean], classOf[Int], classOf[Int]) ctor.newInstance(jtIdentifier, new JInteger(jobId), new JBoolean(isMap), new JInteger(taskId), new JInteger(attemptId)).asInstanceOf[TaskAttemptID] } catch { case exc: NoSuchMethodException => { // If that failed, look for the new constructor that takes a TaskType (not available in 1.x) val taskTypeClass = Utils.classForName("org.apache.hadoop.mapreduce.TaskType") .asInstanceOf[Class[Enum[_]]] val taskType = taskTypeClass.getMethod("valueOf", classOf[String]).invoke( taskTypeClass, if (isMap) "MAP" else "REDUCE") val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], taskTypeClass, classOf[Int], classOf[Int]) ctor.newInstance(jtIdentifier, new JInteger(jobId), taskType, new JInteger(taskId), new JInteger(attemptId)).asInstanceOf[TaskAttemptID] } } } private def firstAvailableClass(first: String, second: String): Class[_] = { try { Utils.classForName(first) } catch { case e: ClassNotFoundException => Utils.classForName(second) } } }
Example 7
Source File: PlyOutputWriter.scala From spark-iqmulus with Apache License 2.0 | 5 votes |
package fr.ign.spark.iqmulus.ply import org.apache.spark.sql.types._ import org.apache.hadoop.mapreduce.{ TaskAttemptID, RecordWriter, TaskAttemptContext, JobContext } import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter import java.io.DataOutputStream import org.apache.spark.sql.sources.OutputWriter import org.apache.hadoop.io.{ NullWritable, BytesWritable } import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat import org.apache.hadoop.fs.Path import java.text.NumberFormat import org.apache.spark.sql.{ Row, SQLContext, sources } import fr.ign.spark.iqmulus.RowOutputStream class PlyOutputWriter( name: String, context: TaskAttemptContext, dataSchema: StructType, element: String, littleEndian: Boolean ) extends OutputWriter { private val file = { val path = getDefaultWorkFile(s".ply.$element") val fs = path.getFileSystem(context.getConfiguration) fs.create(path) } private var count = 0L // strip out ids private val schema = StructType(dataSchema.filterNot { Seq("fid", "pid") contains _.name }) private val recordWriter = new RowOutputStream(new DataOutputStream(file), littleEndian, schema, dataSchema) def getDefaultWorkFile(extension: String): Path = { val uniqueWriteJobId = context.getConfiguration.get("spark.sql.sources.writeJobUUID") val taskAttemptId: TaskAttemptID = context.getTaskAttemptID val split = taskAttemptId.getTaskID.getId new Path(name, f"$split%05d-$uniqueWriteJobId$extension") } override def write(row: Row): Unit = { recordWriter.write(row) count += 1 } override def close(): Unit = { recordWriter.close // write header val path = getDefaultWorkFile(".ply.header") val fs = path.getFileSystem(context.getConfiguration) val dos = new java.io.DataOutputStream(fs.create(path)) val header = new PlyHeader(path.toString, littleEndian, Map(element -> ((count, schema)))) header.write(dos) dos.close } }
Example 8
Source File: LasOutputWriter.scala From spark-iqmulus with Apache License 2.0 | 5 votes |
package fr.ign.spark.iqmulus.las import org.apache.spark.sql.types._ import org.apache.hadoop.mapreduce.{ TaskAttemptID, RecordWriter, TaskAttemptContext } import java.io.DataOutputStream import org.apache.spark.sql.sources.OutputWriter import org.apache.spark.deploy.SparkHadoopUtil import org.apache.hadoop.io.{ NullWritable, BytesWritable } import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat import org.apache.hadoop.fs.Path import java.text.NumberFormat import org.apache.spark.sql.{ Row, SQLContext, sources } import fr.ign.spark.iqmulus.RowOutputStream class LasOutputWriter( name: String, context: TaskAttemptContext, dataSchema: StructType, formatOpt: Option[Byte] = None, version: Version = Version(), offset: Array[Double] = Array(0F, 0F, 0F), scale: Array[Double] = Array(0.01F, 0.01F, 0.01F) ) extends OutputWriter { private val file = { val path = getDefaultWorkFile("/1.pdr") val fs = path.getFileSystem(context.getConfiguration) fs.create(path) } private val pmin = Array.fill[Double](3)(Double.PositiveInfinity) private val pmax = Array.fill[Double](3)(Double.NegativeInfinity) private val countByReturn = Array.fill[Long](15)(0) private def count = countByReturn.sum private val format = formatOpt.getOrElse(LasHeader.formatFromSchema(dataSchema)) // todo, extra bytes private val schema = LasHeader.schema(format) private def header = new LasHeader(name, format, count, pmin, pmax, scale, offset, countByReturn) private val recordWriter = new RowOutputStream(new DataOutputStream(file), littleEndian = true, schema, dataSchema) def getDefaultWorkFile(extension: String): Path = { val uniqueWriteJobId = context.getConfiguration.get("spark.sql.sources.writeJobUUID") val taskAttemptId: TaskAttemptID = context.getTaskAttemptID val split = taskAttemptId.getTaskID.getId new Path(name, f"$split%05d-$uniqueWriteJobId$extension") } override def write(row: Row): Unit = { recordWriter.write(row) // gather statistics for the header val x = offset(0) + scale(0) * row.getAs[Int]("x").toDouble val y = offset(1) + scale(1) * row.getAs[Int]("y").toDouble val z = offset(2) + scale(2) * row.getAs[Int]("z").toDouble val ret = row.getAs[Byte]("flags") & 0x3 countByReturn(ret) += 1 pmin(0) = Math.min(pmin(0), x) pmin(1) = Math.min(pmin(1), y) pmin(2) = Math.min(pmin(2), z) pmax(0) = Math.max(pmax(0), x) pmax(1) = Math.max(pmax(1), y) pmax(2) = Math.max(pmax(2), z) } override def close(): Unit = { recordWriter.close // write header val path = getDefaultWorkFile("/0.header") val fs = path.getFileSystem(context.getConfiguration) val dos = new java.io.DataOutputStream(fs.create(path)) header.write(dos) dos.close // copy header and pdf to a final las file (1 per split) org.apache.hadoop.fs.FileUtil.copyMerge( fs, getDefaultWorkFile("/"), fs, getDefaultWorkFile(".las"), true, context.getConfiguration, "" ) } }