org.apache.hadoop.mapred.JobConf Scala Examples
The following examples show how to use org.apache.hadoop.mapred.JobConf.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: SerializableJobConf.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.mapred.JobConf private[spark] class SerializableJobConf(@transient var value: JobConf) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { value = new JobConf(false) value.readFields(in) } }
Example 2
Source File: SerializableJobConf.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.mapred.JobConf private[spark] class SerializableJobConf(@transient var value: JobConf) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { value = new JobConf(false) value.readFields(in) } }
Example 3
Source File: TextFileOverwrite.scala From spark_helper with Apache License 2.0 | 5 votes |
package org.apache.spark import org.apache.spark.rdd.{RDD, HadoopRDD} import org.apache.spark.util.SerializableConfiguration import org.apache.hadoop.mapred.{FileInputFormat, JobConf, TextInputFormat} import org.apache.hadoop.io.{LongWritable, Text} import org.apache.hadoop.fs.Path object TextFileOverwrite { def textFile( paths: Seq[String], minPartitions: Int, sc: SparkContext ): RDD[String] = { val confBroadcast = sc.broadcast(new SerializableConfiguration(sc.hadoopConfiguration)) val setInputPathsFunc = (jobConf: JobConf) => FileInputFormat.setInputPaths(jobConf, paths.map(p => new Path(p)): _*) new HadoopRDD( sc, confBroadcast, Some(setInputPathsFunc), classOf[TextInputFormat], classOf[LongWritable], classOf[Text], minPartitions ).map(pair => pair._2.toString) } }
Example 4
Source File: SequenceFileRDDFunctions.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import scala.reflect.ClassTag import org.apache.hadoop.io.Writable import org.apache.hadoop.io.compress.CompressionCodec import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapred.SequenceFileOutputFormat import org.apache.spark.internal.Logging def saveAsSequenceFile( path: String, codec: Option[Class[_ <: CompressionCodec]] = None): Unit = self.withScope { def anyToWritable[U <% Writable](u: U): Writable = u // TODO We cannot force the return type of `anyToWritable` be same as keyWritableClass and // valueWritableClass at the compile time. To implement that, we need to add type parameters to // SequenceFileRDDFunctions. however, SequenceFileRDDFunctions is a public class so it will be a // breaking change. val convertKey = self.keyClass != _keyWritableClass val convertValue = self.valueClass != _valueWritableClass logInfo("Saving as sequence file of type " + s"(${_keyWritableClass.getSimpleName},${_valueWritableClass.getSimpleName})" ) val format = classOf[SequenceFileOutputFormat[Writable, Writable]] val jobConf = new JobConf(self.context.hadoopConfiguration) if (!convertKey && !convertValue) { self.saveAsHadoopFile(path, _keyWritableClass, _valueWritableClass, format, jobConf, codec) } else if (!convertKey && convertValue) { self.map(x => (x._1, anyToWritable(x._2))).saveAsHadoopFile( path, _keyWritableClass, _valueWritableClass, format, jobConf, codec) } else if (convertKey && !convertValue) { self.map(x => (anyToWritable(x._1), x._2)).saveAsHadoopFile( path, _keyWritableClass, _valueWritableClass, format, jobConf, codec) } else if (convertKey && convertValue) { self.map(x => (anyToWritable(x._1), anyToWritable(x._2))).saveAsHadoopFile( path, _keyWritableClass, _valueWritableClass, format, jobConf, codec) } } }
Example 5
Source File: SerializableJobConf.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.mapred.JobConf private[spark] class SerializableJobConf(@transient var value: JobConf) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { value = new JobConf(false) value.readFields(in) } }
Example 6
Source File: SerializableJobConf.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.mapred.JobConf private[spark] class SerializableJobConf(@transient var value: JobConf) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { value = new JobConf(false) value.readFields(in) } }
Example 7
Source File: RichRDDTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.utils.spark import java.io.File import com.holdenkarau.spark.testing.RDDGenerator import com.salesforce.op.test.TestSparkContext import org.apache.hadoop.io.compress.DefaultCodec import org.apache.hadoop.mapred.JobConf import org.joda.time.DateTime import org.junit.runner.RunWith import org.scalacheck.Arbitrary import org.scalatest.PropSpec import org.scalatest.junit.JUnitRunner import org.scalatest.prop.PropertyChecks @RunWith(classOf[JUnitRunner]) class RichRDDTest extends PropSpec with PropertyChecks with TestSparkContext { import com.salesforce.op.utils.spark.RichRDD._ val data = RDDGenerator.genRDD[(Int, Int)](sc)(Arbitrary.arbitrary[(Int, Int)]) property("save as a text file") { forAll(data) { rdd => val out = new File(tempDir, "op-richrdd-" + DateTime.now().getMillis).toString rdd.saveAsTextFile(out, None, new JobConf(rdd.context.hadoopConfiguration)) spark.read.textFile(out).count() shouldBe rdd.count() } } property("save as a compressed text file") { forAll(data) { rdd => val out = new File(tempDir, "op-richrdd-" + DateTime.now().getMillis).toString rdd.saveAsTextFile(out, Some(classOf[DefaultCodec]), new JobConf(rdd.context.hadoopConfiguration)) spark.read.textFile(out).count() shouldBe rdd.count() } } }
Example 8
Source File: SerializableJobConf.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.mapred.JobConf private[spark] class SerializableJobConf(@transient var value: JobConf) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { value = new JobConf(false) value.readFields(in) } }
Example 9
Source File: ProtoParquetRDD.scala From sparksql-protobuf with Apache License 2.0 | 5 votes |
package com.github.saurfang.parquet.proto.spark import com.github.saurfang.parquet.proto.ProtoMessageParquetInputFormat import com.google.protobuf.AbstractMessage import org.apache.hadoop.conf.Configuration import org.apache.hadoop.mapred.{FileInputFormat, JobConf} import org.apache.parquet.proto.ProtoReadSupport import org.apache.spark.annotation.DeveloperApi import org.apache.spark.rdd.{NewHadoopRDD, RDD} import org.apache.spark.{Partition, SparkContext, TaskContext} import scala.reflect.ClassTag class ProtoParquetRDD[T <: AbstractMessage : ClassTag]( sc: SparkContext, input: String, protoClass: Class[T], @transient conf: Configuration ) extends RDD[T](sc, Nil) { def this(sc: SparkContext, input: String, protoClass: Class[T]) = { this(sc, input, protoClass, sc.hadoopConfiguration) } lazy private[this] val rdd = { val jconf = new JobConf(conf) FileInputFormat.setInputPaths(jconf, input) ProtoReadSupport.setProtobufClass(jconf, protoClass.getName) new NewHadoopRDD(sc, classOf[ProtoMessageParquetInputFormat[T]], classOf[Void], protoClass, jconf) } @DeveloperApi override def compute(split: Partition, context: TaskContext): Iterator[T] = rdd.compute(split, context).map(_._2) override protected def getPartitions: Array[Partition] = rdd.getPartitions }
Example 10
Source File: SerializableJobConf.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.mapred.JobConf private[spark] class SerializableJobConf(@transient var value: JobConf) extends Serializable { private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.defaultWriteObject() value.write(out) } private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { value = new JobConf(false) value.readFields(in) } }
Example 11
Source File: UnsplittableSequenceFileInputFormatTest.scala From spark-util with Apache License 2.0 | 5 votes |
package org.hammerlab.hadoop.splits import org.apache.hadoop.io.NullWritable import org.apache.hadoop.mapred import org.apache.hadoop.mapred.{ FileInputFormat, JobConf } import FileInputFormat.setInputPaths import org.hammerlab.test.Suite import org.hammerlab.test.resources.File class UnsplittableSequenceFileInputFormatTest extends Suite { test("part files") { val ifmt = new UnsplittableSequenceFileInputFormat[NullWritable, NullWritable] val jc = new JobConf() setInputPaths(jc, File("rdd")) val paths = ifmt .getSplits(jc, 2) .map(_.asInstanceOf[mapred.FileSplit]) .map(FileSplit(_).path) paths should be( 0 to 5 map( File("rdd") / PartFileBasename(_) ) ) } test("non-part file error") { val ifmt = new UnsplittableSequenceFileInputFormat[NullWritable, NullWritable] val jc = new JobConf() setInputPaths(jc, File("bad")) intercept[IllegalArgumentException] { ifmt.getSplits(jc, 2) } .getMessage should be(s"Bad partition file: error") } }
Example 12
Source File: UnsplittableSequenceFileInputFormat.scala From spark-util with Apache License 2.0 | 5 votes |
package org.hammerlab.hadoop.splits import java.io.IOException import java.util import org.apache.hadoop.fs.{ FileStatus, FileSystem, Path ⇒ HPath } import org.apache.hadoop.mapred.{ JobConf, SequenceFileInputFormat } import org.apache.hadoop.mapreduce.JobContext import org.apache.hadoop.mapreduce.lib.input import scala.collection.JavaConverters._ override def listStatus(job: JobContext): util.List[FileStatus] = super .listStatus(job) .asScala .sortBy { _.getPath.getName match { case PartFileBasename(idx) ⇒ idx case basename ⇒ throw new IllegalArgumentException(s"Bad partition file: $basename") } } .asJava }
Example 13
Source File: HiveMRSuite.scala From connectors with Apache License 2.0 | 5 votes |
package io.delta.hive import java.io.{Closeable, File} import scala.collection.JavaConverters._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.mapred.{JobConf, MiniMRCluster} import org.apache.hadoop.mapreduce.MRJobConfig import org.apache.hadoop.yarn.conf.YarnConfiguration class HiveMRSuite extends HiveConnectorTest { override val engine: String = "mr" override def createCluster(namenode: String, conf: Configuration, tempPath: File): Closeable = { val jConf = new JobConf(conf); jConf.set("yarn.scheduler.capacity.root.queues", "default"); jConf.set("yarn.scheduler.capacity.root.default.capacity", "100"); jConf.setInt(MRJobConfig.MAP_MEMORY_MB, 512); jConf.setInt(MRJobConfig.REDUCE_MEMORY_MB, 512); jConf.setInt(MRJobConfig.MR_AM_VMEM_MB, 128); jConf.setInt(YarnConfiguration.YARN_MINICLUSTER_NM_PMEM_MB, 512); jConf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128); jConf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 512); val mr = new MiniMRCluster(2, namenode, 1, null, null, jConf) new Closeable { override def close(): Unit = { mr.shutdown() } } } }
Example 14
Source File: HiveTezSuite.scala From connectors with Apache License 2.0 | 5 votes |
package io.delta.hive import java.io.{Closeable, File} import scala.collection.JavaConverters._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapreduce.MRJobConfig import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.tez.dag.api.TezConfiguration import org.apache.tez.runtime.library.api.TezRuntimeConfiguration import org.apache.tez.test.MiniTezCluster class HiveTezSuite extends HiveConnectorTest { override val engine: String = "tez" private var tezConf: Configuration = _ // scalastyle:off // scalastyle:on override def setupConfiguration(conf: Configuration): Unit = { tezConf.asScala.foreach { e => conf.set(e.getKey, e.getValue) } // Overrides values from the hive/tez-site. conf.setInt("hive.tez.container.size", 256) conf.setInt(TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB, 256) conf.setInt(TezConfiguration.TEZ_TASK_RESOURCE_MEMORY_MB, 256) conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 24) conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_BUFFER_SIZE_MB, 10) conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.4f) conf.setBoolean(TezConfiguration.TEZ_IGNORE_LIB_URIS, true) } }
Example 15
Source File: DeltaRecordReaderWrapper.scala From connectors with Apache License 2.0 | 5 votes |
package io.delta.hive import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory import org.apache.hadoop.io.ArrayWritable import org.apache.hadoop.io.NullWritable import org.apache.hadoop.io.Writable import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapred.Reporter import org.apache.parquet.hadoop.ParquetInputFormat import org.slf4j.LoggerFactory private def insertPartitionValues(value: ArrayWritable): Unit = { val valueArray = value.get() var i = 0 val n = partitionValues.length // Using while loop for better performance since this method is called for each row. while (i < n) { val partition = partitionValues(i) // The schema of `valueArray` is the Hive schema, and it's the same as the Delta // schema since we have verified it in `DeltaInputFormat`. Hence, the position of a partition // column in `valueArray` is the same as its position in Delta schema. valueArray(partition._1) = partition._2 i += 1 } } }
Example 16
Source File: HiveInputFormat.scala From connectors with Apache License 2.0 | 5 votes |
package io.delta.hive import org.apache.hadoop.fs.Path import org.apache.hadoop.mapred.JobConf class HiveInputFormat extends org.apache.hadoop.hive.ql.io.HiveInputFormat { override def pushProjectionsAndFilters( jobConf: JobConf, inputFormatClass: Class[_], splitPath: Path, nonNative: Boolean): Unit = { if (inputFormatClass == classOf[DeltaInputFormat]) { super.pushProjectionsAndFilters(jobConf, inputFormatClass, splitPath, false) } else { super.pushProjectionsAndFilters(jobConf, inputFormatClass, splitPath, nonNative) } } }
Example 17
Source File: DeltaOutputFormat.scala From connectors with Apache License 2.0 | 5 votes |
package io.delta.hive import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.io.{ArrayWritable, NullWritable} import org.apache.hadoop.mapred.{JobConf, OutputFormat, RecordWriter} import org.apache.hadoop.util.Progressable class DeltaOutputFormat extends OutputFormat[NullWritable, ArrayWritable] { private def writingNotSupported[T](): T = { throw new UnsupportedOperationException( "Writing to a Delta table in Hive is not supported. Please use Spark to write.") } override def getRecordWriter( ignored: FileSystem, job: JobConf, name: String, progress: Progressable): RecordWriter[NullWritable, ArrayWritable] = writingNotSupported() override def checkOutputSpecs(ignored: FileSystem, job: JobConf): Unit = writingNotSupported() }
Example 18
Source File: PailDataSource.scala From utils with Apache License 2.0 | 5 votes |
package com.indix.utils.spark.pail import com.backtype.hadoop.pail._ import com.backtype.support.{Utils => PailUtils} import org.apache.hadoop.io.{BytesWritable, Text} import org.apache.hadoop.mapred.{InputFormat, JobConf} import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import scala.reflect.ClassTag trait PailDataSource { implicit class PailBasedReader(sc: SparkContext) { def pailFile[R: ClassTag](inputLocation: String): RDD[R] = { pailFileWithInfo[R](inputLocation).map(_._2) } def pailFileWithInfo[R: ClassTag](inputLocation: String) = { val pail = new Pail(inputLocation) val pailSpec = pail.getSpec val inputFormat = pail.getFormat.getInputFormatClass.asSubclass(classOf[InputFormat[PailRecordInfo, BytesWritable]]) sc.hadoopFile(inputLocation, inputFormat, classOf[PailRecordInfo], classOf[BytesWritable]) .map { case (recordInfo, recordInBytes) => recordInfo -> pailSpec.getStructure.deserialize(recordInBytes.getBytes).asInstanceOf[R] } } } implicit class PailBasedWriter[R: ClassTag](rdd: RDD[R]) { def saveAsPail(outputLocation: String, pailSpec: PailSpec) = { val jobConf = new JobConf(rdd.context.hadoopConfiguration) PailUtils.setObject(jobConf, PailOutputFormat.SPEC_ARG, pailSpec) rdd.map { record => val pailStruct = pailSpec.getStructure.asInstanceOf[PailStructure[R]] val attr = PailUtils.join(pailStruct.getTarget(record), "/") val recordInBytes = pailStruct.serialize(record) new Text(attr) -> new BytesWritable(recordInBytes) }.saveAsHadoopFile(outputLocation, classOf[Text], classOf[BytesWritable], classOf[PailOutputFormat], jobConf) } } }
Example 19
Source File: SequenceFileRDDFunctions.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import scala.reflect.{classTag, ClassTag} import org.apache.hadoop.io.Writable import org.apache.hadoop.io.compress.CompressionCodec import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapred.SequenceFileOutputFormat import org.apache.spark.internal.Logging def saveAsSequenceFile( path: String, codec: Option[Class[_ <: CompressionCodec]] = None): Unit = self.withScope { def anyToWritable[U <% Writable](u: U): Writable = u // TODO We cannot force the return type of `anyToWritable` be same as keyWritableClass and // valueWritableClass at the compile time. To implement that, we need to add type parameters to // SequenceFileRDDFunctions. however, SequenceFileRDDFunctions is a public class so it will be a // breaking change. val convertKey = self.keyClass != keyWritableClass val convertValue = self.valueClass != valueWritableClass logInfo("Saving as sequence file of type (" + keyWritableClass.getSimpleName + "," + valueWritableClass.getSimpleName + ")" ) val format = classOf[SequenceFileOutputFormat[Writable, Writable]] val jobConf = new JobConf(self.context.hadoopConfiguration) if (!convertKey && !convertValue) { self.saveAsHadoopFile(path, keyWritableClass, valueWritableClass, format, jobConf, codec) } else if (!convertKey && convertValue) { self.map(x => (x._1, anyToWritable(x._2))).saveAsHadoopFile( path, keyWritableClass, valueWritableClass, format, jobConf, codec) } else if (convertKey && !convertValue) { self.map(x => (anyToWritable(x._1), x._2)).saveAsHadoopFile( path, keyWritableClass, valueWritableClass, format, jobConf, codec) } else if (convertKey && convertValue) { self.map(x => (anyToWritable(x._1), anyToWritable(x._2))).saveAsHadoopFile( path, keyWritableClass, valueWritableClass, format, jobConf, codec) } } }
Example 20
Source File: CarbonCountStar.scala From carbondata with Apache License 2.0 | 4 votes |
package org.apache.spark.sql import scala.collection.JavaConverters._ import org.apache.hadoop.fs.Path import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapreduce.Job import org.apache.hadoop.mapreduce.lib.input.FileInputFormat import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.execution.LeafExecNode import org.apache.spark.sql.optimizer.CarbonFilters import org.apache.spark.sql.types.StringType import org.apache.spark.unsafe.types.UTF8String import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier import org.apache.carbondata.core.metadata.schema.table.CarbonTable import org.apache.carbondata.core.mutate.CarbonUpdateUtil import org.apache.carbondata.core.statusmanager.StageInputCollector import org.apache.carbondata.core.util.{CarbonProperties, ThreadLocalSessionInfo} import org.apache.carbondata.hadoop.api.{CarbonInputFormat, CarbonTableInputFormat} import org.apache.carbondata.hadoop.util.CarbonInputFormatUtil import org.apache.carbondata.spark.load.DataLoadProcessBuilderOnSpark case class CarbonCountStar( attributesRaw: Seq[Attribute], carbonTable: CarbonTable, sparkSession: SparkSession, outUnsafeRows: Boolean = true) extends LeafExecNode { override def doExecute(): RDD[InternalRow] = { ThreadLocalSessionInfo .setConfigurationToCurrentThread(sparkSession.sessionState.newHadoopConf()) val absoluteTableIdentifier = carbonTable.getAbsoluteTableIdentifier val (job, tableInputFormat) = createCarbonInputFormat(absoluteTableIdentifier) CarbonInputFormat.setQuerySegment(job.getConfiguration, carbonTable) // get row count var rowCount = CarbonUpdateUtil.getRowCount( tableInputFormat.getBlockRowCount( job, carbonTable, CarbonFilters.getPartitions( Seq.empty, sparkSession, TableIdentifier( carbonTable.getTableName, Some(carbonTable.getDatabaseName))).map(_.asJava).orNull, false), carbonTable) if (CarbonProperties.isQueryStageInputEnabled) { // check for number of row for stage input val splits = StageInputCollector.createInputSplits(carbonTable, job.getConfiguration) if (!splits.isEmpty) { val df = DataLoadProcessBuilderOnSpark.createInputDataFrame( sparkSession, carbonTable, splits.asScala) rowCount += df.count() } } val valueRaw = attributesRaw.head.dataType match { case StringType => Seq(UTF8String.fromString(Long.box(rowCount).toString)).toArray .asInstanceOf[Array[Any]] case _ => Seq(Long.box(rowCount)).toArray.asInstanceOf[Array[Any]] } val value = new GenericInternalRow(valueRaw) val unsafeProjection = UnsafeProjection.create(output.map(_.dataType).toArray) val row = if (outUnsafeRows) unsafeProjection(value) else value sparkContext.parallelize(Seq(row)) } override def output: Seq[Attribute] = { attributesRaw } private def createCarbonInputFormat(absoluteTableIdentifier: AbsoluteTableIdentifier ): (Job, CarbonTableInputFormat[Array[Object]]) = { val carbonInputFormat = new CarbonTableInputFormat[Array[Object]]() val jobConf: JobConf = new JobConf(FileFactory.getConfiguration) SparkHadoopUtil.get.addCredentials(jobConf) CarbonInputFormat.setTableInfo(jobConf, carbonTable.getTableInfo) val job = new Job(jobConf) FileInputFormat.addInputPath(job, new Path(absoluteTableIdentifier.getTablePath)) CarbonInputFormat .setTransactionalTable(job.getConfiguration, carbonTable.getTableInfo.isTransactionalTable) CarbonInputFormatUtil.setIndexJobIfConfigured(job.getConfiguration) (job, carbonInputFormat) } }