org.apache.spark.sql.execution.datasources.LogicalRelation Scala Examples
The following examples show how to use org.apache.spark.sql.execution.datasources.LogicalRelation.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: PruneFileSourcePartitionsSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions} import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.StructType class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("PruneFileSourcePartitions", Once, PruneFileSourcePartitions) :: Nil } test("PruneFileSourcePartitions should not change the output of LogicalRelation") { withTable("test") { withTempDir { dir => sql( s""" |CREATE EXTERNAL TABLE test(i int) |PARTITIONED BY (p int) |STORED AS parquet |LOCATION '${dir.getAbsolutePath}'""".stripMargin) val tableMeta = spark.sharedState.externalCatalog.getTable("default", "test") val catalogFileIndex = new CatalogFileIndex(spark, tableMeta, 0) val dataSchema = StructType(tableMeta.schema.filterNot { f => tableMeta.partitionColumnNames.contains(f.name) }) val relation = HadoopFsRelation( location = catalogFileIndex, partitionSchema = tableMeta.partitionSchema, dataSchema = dataSchema, bucketSpec = None, fileFormat = new ParquetFileFormat(), options = Map.empty)(sparkSession = spark) val logicalRelation = LogicalRelation(relation, catalogTable = Some(tableMeta)) val query = Project(Seq('i, 'p), Filter('p === 1, logicalRelation)).analyze val optimized = Optimize.execute(query) assert(optimized.missingInput.isEmpty) } } } }
Example 2
Source File: CarbonFileIndexReplaceRule.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.carbondata.execution.datasources import scala.collection.mutable.ArrayBuffer import org.apache.hadoop.fs.Path import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, InMemoryFileIndex, InsertIntoHadoopFsRelationCommand, LogicalRelation} import org.apache.spark.sql.sources.BaseRelation import org.apache.carbondata.core.datastore.filesystem.CarbonFile import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.util.CarbonProperties import org.apache.carbondata.core.util.path.CarbonTablePath private def getDataFolders( tableFolder: CarbonFile, dataFolders: ArrayBuffer[CarbonFile]): Unit = { val files = tableFolder.listFiles() files.foreach { f => if (f.isDirectory) { val files = f.listFiles() if (files.nonEmpty && !files(0).isDirectory) { dataFolders += f } else { getDataFolders(f, dataFolders) } } } } }
Example 3
Source File: CarbonUDFTransformRule.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.optimizer import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, PredicateHelper, ScalaUDF} import org.apache.spark.sql.catalyst.plans.logical.{Filter, Join, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.types.StringType import org.apache.carbondata.core.constants.CarbonCommonConstants class CarbonUDFTransformRule extends Rule[LogicalPlan] with PredicateHelper { override def apply(plan: LogicalPlan): LogicalPlan = { pushDownUDFToJoinLeftRelation(plan) } private def pushDownUDFToJoinLeftRelation(plan: LogicalPlan): LogicalPlan = { val output = plan.transform { case proj@Project(cols, Join( left, right, jointype: org.apache.spark.sql.catalyst.plans.JoinType, condition)) => var projectionToBeAdded: Seq[org.apache.spark.sql.catalyst.expressions.Alias] = Seq.empty var udfExists = false val newCols = cols.map { case a@Alias(s: ScalaUDF, name) if name.equalsIgnoreCase(CarbonCommonConstants.POSITION_ID) || name.equalsIgnoreCase(CarbonCommonConstants.CARBON_IMPLICIT_COLUMN_TUPLEID) => udfExists = true projectionToBeAdded :+= a AttributeReference(name, StringType, nullable = true)().withExprId(a.exprId) case other => other } if (udfExists) { val newLeft = left match { case Project(columns, logicalPlan) => Project(columns ++ projectionToBeAdded, logicalPlan) case filter: Filter => Project(filter.output ++ projectionToBeAdded, filter) case relation: LogicalRelation => Project(relation.output ++ projectionToBeAdded, relation) case other => other } Project(newCols, Join(newLeft, right, jointype, condition)) } else { proj } case other => other } output } }
Example 4
Source File: CarbonDataSourceScan.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.strategy import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder} import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier} import org.apache.spark.sql.execution.FileSourceScanExec import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} class CarbonDataSourceScan( override val output: Seq[Attribute], val rdd: RDD[InternalRow], @transient override val relation: HadoopFsRelation, val partitioning: Partitioning, val md: Map[String, String], identifier: Option[TableIdentifier], @transient private val logicalRelation: LogicalRelation) extends FileSourceScanExec( relation, output, relation.dataSchema, Seq.empty, Seq.empty, identifier) { // added lazy since spark 2.3.2 version (SPARK-PR#21815) override lazy val supportsBatch: Boolean = true // added lazy since spark 2.3.2 version (SPARK-PR#21815) override lazy val (outputPartitioning, outputOrdering): (Partitioning, Seq[SortOrder]) = (partitioning, Nil) // added lazy since spark 2.3.2 version (SPARK-PR#21815) override lazy val metadata: Map[String, String] = md override def inputRDDs(): Seq[RDD[InternalRow]] = rdd :: Nil }
Example 5
Source File: CarbonDataSourceScan.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.strategy import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder} import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier} import org.apache.spark.sql.execution.FileSourceScanExec import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} class CarbonDataSourceScan( override val output: Seq[Attribute], val rdd: RDD[InternalRow], @transient override val relation: HadoopFsRelation, val partitioning: Partitioning, val md: Map[String, String], identifier: Option[TableIdentifier], @transient private val logicalRelation: LogicalRelation) extends FileSourceScanExec( relation, output, relation.dataSchema, Seq.empty, None, Seq.empty, identifier) { // added lazy since spark 2.3.2 version (SPARK-PR#21815) override lazy val supportsBatch: Boolean = true // added lazy since spark 2.3.2 version (SPARK-PR#21815) override lazy val (outputPartitioning, outputOrdering): (Partitioning, Seq[SortOrder]) = (partitioning, Nil) // added lazy since spark 2.3.2 version (SPARK-PR#21815) override lazy val metadata: Map[String, String] = md override def inputRDDs(): Seq[RDD[InternalRow]] = rdd :: Nil }
Example 6
Source File: MVCoalesceTestCase.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.view.rewrite import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.test.util.QueryTest import org.scalatest.BeforeAndAfterAll class MVCoalesceTestCase extends QueryTest with BeforeAndAfterAll { override def beforeAll(): Unit = { drop() sql("create table coalesce_test_main(id int,name string,height int,weight int) " + "using carbondata") sql("insert into coalesce_test_main select 1,'tom',170,130") sql("insert into coalesce_test_main select 2,'tom',170,120") sql("insert into coalesce_test_main select 3,'lily',160,100") } def drop(): Unit = { sql("drop table if exists coalesce_test_main") } test("test mv table with coalesce expression on sql not on mv and less groupby cols") { sql("drop materialized view if exists coalesce_test_main_mv") sql("create materialized view coalesce_test_main_mv as " + "select sum(id) as sum_id,name as myname,weight from coalesce_test_main group by name,weight") sql("refresh materialized view coalesce_test_main_mv") val frame = sql("select coalesce(sum(id),0) as sumid,name from coalesce_test_main group by name") assert(TestUtil.verifyMVHit(frame.queryExecution.optimizedPlan, "coalesce_test_main_mv")) checkAnswer(frame, Seq(Row(3, "tom"), Row(3, "lily"))) sql("drop materialized view if exists coalesce_test_main_mv") } test("test mv table with coalesce expression less groupby cols") { sql("drop materialized view if exists coalesce_test_main_mv") val exception: Exception = intercept[UnsupportedOperationException] { sql("create materialized view coalesce_test_main_mv as " + "select coalesce(sum(id),0) as sum_id,name as myname,weight from coalesce_test_main group by name,weight") sql("refresh materialized view coalesce_test_main_mv") } assert("MV doesn't support Coalesce".equals(exception.getMessage)) val frame = sql("select coalesce(sum(id),0) as sumid,name from coalesce_test_main group by name") assert(!TestUtil.verifyMVHit(frame.queryExecution.optimizedPlan, "coalesce_test_main_mv")) checkAnswer(frame, Seq(Row(3, "tom"), Row(3, "lily"))) sql("drop materialized view if exists coalesce_test_main_mv") } test("test mv table with coalesce expression in other expression") { sql("drop materialized view if exists coalesce_test_main_mv") sql("create materialized view coalesce_test_main_mv as " + "select sum(coalesce(id,0)) as sum_id,name as myname,weight from coalesce_test_main group by name,weight") sql("refresh materialized view coalesce_test_main_mv") val frame = sql("select sum(coalesce(id,0)) as sumid,name from coalesce_test_main group by name") assert(TestUtil.verifyMVHit(frame.queryExecution.optimizedPlan, "coalesce_test_main_mv")) checkAnswer(frame, Seq(Row(3, "tom"), Row(3, "lily"))) sql("drop materialized view if exists coalesce_test_main_mv") } override def afterAll(): Unit ={ drop } } object TestUtil { def verifyMVHit(logicalPlan: LogicalPlan, mvName: String): Boolean = { val tables = logicalPlan collect { case l: LogicalRelation => l.catalogTable.get } tables.exists(_.identifier.table.equalsIgnoreCase(mvName)) } }
Example 7
Source File: SqsSource.scala From bahir with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming.sqs import java.net.URI import org.apache.hadoop.fs.Path import org.apache.spark.internal.Logging import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} import org.apache.spark.sql.execution.datasources.{DataSource, LogicalRelation} import org.apache.spark.sql.execution.streaming._ import org.apache.spark.sql.execution.streaming.FileStreamSource._ import org.apache.spark.sql.types.StructType class SqsSource(sparkSession: SparkSession, metadataPath: String, options: Map[String, String], override val schema: StructType) extends Source with Logging { private val sourceOptions = new SqsSourceOptions(options) private val hadoopConf = sparkSession.sessionState.newHadoopConf() private val metadataLog = new FileStreamSourceLog(FileStreamSourceLog.VERSION, sparkSession, metadataPath) private var metadataLogCurrentOffset = metadataLog.getLatest().map(_._1).getOrElse(-1L) private val maxFilesPerTrigger = sourceOptions.maxFilesPerTrigger private val maxFileAgeMs: Long = sourceOptions.maxFileAgeMs private val fileFormatClassName = sourceOptions.fileFormatClassName private val shouldSortFiles = sourceOptions.shouldSortFiles private val sqsClient = new SqsClient(sourceOptions, hadoopConf) metadataLog.allFiles().foreach { entry => sqsClient.sqsFileCache.add(entry.path, MessageDescription(entry.timestamp, true, "")) } sqsClient.sqsFileCache.purge() logInfo(s"maxFilesPerBatch = $maxFilesPerTrigger, maxFileAgeMs = $maxFileAgeMs") val batchFiles = sqsClient.sqsFileCache.getUncommittedFiles(maxFilesPerTrigger, shouldSortFiles) if (batchFiles.nonEmpty) { metadataLogCurrentOffset += 1 metadataLog.add(metadataLogCurrentOffset, batchFiles.map { case (path, timestamp, receiptHandle) => FileEntry(path = path, timestamp = timestamp, batchId = metadataLogCurrentOffset) }.toArray) logInfo(s"Log offset set to $metadataLogCurrentOffset with ${batchFiles.size} new files") val messageReceiptHandles = batchFiles.map { case (path, timestamp, receiptHandle) => sqsClient.sqsFileCache.markCommitted(path) logDebug(s"New file: $path") receiptHandle }.toList sqsClient.addToDeleteMessageQueue(messageReceiptHandles) } val numPurged = sqsClient.sqsFileCache.purge() if (!sqsClient.deleteMessageQueue.isEmpty) { sqsClient.deleteMessagesFromQueue() } logTrace( s""" |Number of files selected for batch = ${batchFiles.size} |Number of files purged from tracking map = $numPurged """.stripMargin) FileStreamSourceOffset(metadataLogCurrentOffset) } override def getOffset: Option[Offset] = Some(fetchMaxOffset()).filterNot(_.logOffset == -1) override def commit(end: Offset): Unit = { // No-op for now; SqsSource currently garbage-collects files based on timestamp // and the value of the maxFileAge parameter. } override def stop(): Unit = { if (!sqsClient.sqsScheduler.isTerminated) { sqsClient.sqsScheduler.shutdownNow() } } override def toString: String = s"SqsSource[${sqsClient.sqsUrl}]" }
Example 8
Source File: PlanUtil.scala From spark-druid-olap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.util import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.hive.sparklinedata.SPLSessionState import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, SQLContext} import org.json4s._ import org.json4s.jackson.JsonMethods._ import org.sparklinedata.druid.metadata.DruidRelationInfo import org.sparklinedata.druid.{DruidQuery, DruidRelation, QuerySpec, Utils} object PlanUtil { import Utils._ def druidRelationInfo(tableName: String)(implicit sqlContext: SQLContext): Option[DruidRelationInfo] = { sqlContext.table(tableName).logicalPlan.collectFirst { case LogicalRelation(DruidRelation(drInfo, _), _, _) => drInfo } } def dataFrame(drInfo: DruidRelationInfo, dq: DruidQuery)( implicit sqlContext: SQLContext): DataFrame = { val dR = DruidRelation(drInfo, Some(dq))(sqlContext) val lP = LogicalRelation(dR, None) Dataset.ofRows(sqlContext.sparkSession, lP) } @throws(classOf[AnalysisException]) def logicalPlan(dsName: String, dqStr: String, usingHist: Boolean)( implicit sqlContext: SQLContext): LogicalPlan = { val drInfo = druidRelationInfo(dsName) if (!drInfo.isDefined) { throw new AnalysisException(s"Cannot execute a DruidQuery on $dsName") } val dq = new DruidQuery(parse(dqStr).extract[QuerySpec], drInfo.get.options.useSmile(sqlContext), usingHist, drInfo.get.options.numSegmentsPerHistoricalQuery(sqlContext)) val dR = DruidRelation(drInfo.get, Some(dq))(sqlContext) LogicalRelation(dR, None) } def maxCardinalityIsOne(lp: LogicalPlan): Boolean = { var isone = false val aggs = lp.collect {case ag: Aggregate if ag.groupingExpressions.isEmpty => ag} if (aggs.nonEmpty) { isone = !isCardinalityAugmented(lp, aggs.asInstanceOf[Seq[LogicalPlan]]) } isone } }
Example 9
Source File: PlanningTest.scala From spark-druid-olap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.sources.druid.test import java.util.TimeZone import com.github.nscala_time.time.Imports._ import org.apache.spark.sql.DataFrame import org.apache.spark.sql.catalyst.expressions.{Expression, PredicateHelper} import org.apache.spark.sql.catalyst.plans.logical.Filter import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.hive.test.sparklinedata.TestHive import org.apache.spark.sql.sources.druid.DruidPlanner import org.scalatest.BeforeAndAfterEach import org.sparklinedata.druid._ import org.sparklinedata.druid.client.test.BaseTest import org.sparklinedata.druid.metadata.DruidRelationInfo trait PlanningTestHelper extends PredicateHelper { System.setProperty("user.timezone", "UTC") TimeZone.setDefault(TimeZone.getTimeZone("UTC")) override def splitConjunctivePredicates(condition: Expression): Seq[Expression] = { super.splitConjunctivePredicates(condition) } } abstract class PlanningTest extends BaseTest with BeforeAndAfterEach with PlanningTestHelper { val dPlanner = new DruidPlanner(TestHive) var tab: DataFrame = _ var drInfo: DruidRelationInfo = _ var dqb: DruidQueryBuilder = _ var iCE: IntervalConditionExtractor = _ var iCE2: SparkIntervalConditionExtractor = _ override def beforeAll() = { super.beforeAll() tab = TestHive.table("orderLineItemPartSupplier") drInfo = tab.queryExecution.optimizedPlan. asInstanceOf[LogicalRelation].relation.asInstanceOf[DruidRelation].info } override protected def beforeEach(): Unit = { dqb = DruidQueryBuilder(drInfo) iCE = new IntervalConditionExtractor(dqb) iCE2 = new SparkIntervalConditionExtractor(dqb) } def validateFilter(filterStr: String, pushedToDruid: Boolean = true, filSpec: Option[FilterSpec] = None, intervals: List[Interval] = List() ): Unit = { val q = tab.where(filterStr) val filter = q.queryExecution.optimizedPlan.asInstanceOf[Filter] val dqbs = dPlanner.translateProjectFilter( Some(dqb), Seq(), splitConjunctivePredicates(filter.condition), true ) if (pushedToDruid) { assert(dqbs.size == 1) val odqb = dqbs(0) assert(odqb.filterSpec == filSpec) assert(odqb.queryIntervals.intervals == intervals) } } }
Example 10
Source File: IUDCommonUtil.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.mutation import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan} import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.hive.HiveSessionCatalog import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.util.CarbonProperties def checkIfSegmentListIsSet(sparkSession: SparkSession, logicalPlan: LogicalPlan): Unit = { val carbonProperties = CarbonProperties.getInstance() logicalPlan.foreach { case unresolvedRelation: UnresolvedRelation => val dbAndTb = sparkSession.sessionState.catalog.asInstanceOf[HiveSessionCatalog].getCurrentDatabase + "." + unresolvedRelation.tableIdentifier.table val segmentProperties = carbonProperties .getProperty(CarbonCommonConstants.CARBON_INPUT_SEGMENTS + dbAndTb, "") if (!(segmentProperties.equals("") || segmentProperties.trim.equals("*"))) { throw new MalformedCarbonCommandException("carbon.input.segments." + dbAndTb + "should not be set for table used in DELETE " + "query. Please reset the property to carbon" + ".input.segments." + dbAndTb + "=*") } case logicalRelation: LogicalRelation if (logicalRelation.relation .isInstanceOf[CarbonDatasourceHadoopRelation]) => val dbAndTb = logicalRelation.relation.asInstanceOf[CarbonDatasourceHadoopRelation].carbonTable .getDatabaseName + "." + logicalRelation.relation.asInstanceOf[CarbonDatasourceHadoopRelation].carbonTable .getTableName val sementProperty = carbonProperties .getProperty(CarbonCommonConstants.CARBON_INPUT_SEGMENTS + dbAndTb, "") if (!(sementProperty.equals("") || sementProperty.trim.equals("*"))) { throw new MalformedCarbonCommandException("carbon.input.segments." + dbAndTb + "should not be set for table used in UPDATE " + "query. Please reset the property to carbon" + ".input.segments." + dbAndTb + "=*") } case filter: Filter => filter.subqueries.toList .foreach(subquery => checkIfSegmentListIsSet(sparkSession, subquery)) case _ => } } }
Example 11
Source File: AddSourceToAttributes.scala From jgit-spark-connector with Apache License 2.0 | 5 votes |
package tech.sourced.engine.rule import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.sql.types.MetadataBuilder import tech.sourced.engine.{GitRelation, MetadataRelation, Sources} import tech.sourced.engine.compat def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { case compat.LogicalRelation(rel @ GitRelation(_, _, _, schemaSource), out, catalogTable) => withMetadata(rel, schemaSource, out, catalogTable) case compat.LogicalRelation( rel @ MetadataRelation(_, _, _, _, schemaSource), out, catalogTable) => withMetadata(rel, schemaSource, out, catalogTable) } private def withMetadata(relation: BaseRelation, schemaSource: Option[String], out: Seq[AttributeReference], catalogTable: Option[CatalogTable]): LogicalRelation = { val processedOut = schemaSource match { case Some(table) => out.map( _.withMetadata(new MetadataBuilder().putString(SOURCE, table).build() ).asInstanceOf[AttributeReference] ) case None => out } compat.LogicalRelation(relation, processedOut, catalogTable) } }
Example 12
Source File: hbaseCommands.scala From Heracles with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase.execution import org.apache.spark.annotation.DeveloperApi import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.hbase._ import org.apache.spark.sql.hbase.util.DataTypeUtils import org.apache.spark.sql.types._ import scala.collection.mutable.ArrayBuffer @DeveloperApi case class AlterDropColCommand(namespace: String, tableName: String, columnName: String) extends RunnableCommand { def run(sparkSession: SparkSession): Seq[Row] = { sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog] .alterTableDropNonKey(namespace, tableName, columnName) sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog].stopAdmin() Seq.empty[Row] } } @DeveloperApi case class AlterAddColCommand(namespace: String, tableName: String, colName: String, colType: String, colFamily: String, colQualifier: String) extends RunnableCommand { def run(sparkSession: SparkSession): Seq[Row] = { val hbaseCatalog = sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog] hbaseCatalog.alterTableAddNonKey(namespace, tableName, NonKeyColumn(colName, DataTypeUtils.getDataType(colType), colFamily, colQualifier)) hbaseCatalog.stopAdmin() Seq.empty[Row] } } @DeveloperApi case class InsertValueIntoTableCommand(tid: TableIdentifier, valueSeq: Seq[String]) extends RunnableCommand { override def run(sparkSession: SparkSession) = { val relation: HBaseRelation = sparkSession.sessionState.catalog.externalCatalog .asInstanceOf[HBaseCatalog] .getHBaseRelation(tid.database.getOrElse(null), tid.table).getOrElse(null) val bytes = valueSeq.zipWithIndex.map(v => DataTypeUtils.string2TypeData(v._1, relation.schema(v._2).dataType)) val rows = sparkSession.sparkContext.makeRDD(Seq(Row.fromSeq(bytes))) val inputValuesDF = sparkSession.createDataFrame(rows, relation.schema) relation.insert(inputValuesDF, overwrite = false) Seq.empty[Row] } override def output: Seq[Attribute] = Seq.empty }
Example 13
Source File: PruneFileSourcePartitionsSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions} import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.StructType class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("PruneFileSourcePartitions", Once, PruneFileSourcePartitions) :: Nil } test("PruneFileSourcePartitions should not change the output of LogicalRelation") { withTable("test") { withTempDir { dir => sql( s""" |CREATE EXTERNAL TABLE test(i int) |PARTITIONED BY (p int) |STORED AS parquet |LOCATION '${dir.toURI}'""".stripMargin) val tableMeta = spark.sharedState.externalCatalog.getTable("default", "test") val catalogFileIndex = new CatalogFileIndex(spark, tableMeta, 0) val dataSchema = StructType(tableMeta.schema.filterNot { f => tableMeta.partitionColumnNames.contains(f.name) }) val relation = HadoopFsRelation( location = catalogFileIndex, partitionSchema = tableMeta.partitionSchema, dataSchema = dataSchema, bucketSpec = None, fileFormat = new ParquetFileFormat(), options = Map.empty)(sparkSession = spark) val logicalRelation = LogicalRelation(relation, tableMeta) val query = Project(Seq('i, 'p), Filter('p === 1, logicalRelation)).analyze val optimized = Optimize.execute(query) assert(optimized.missingInput.isEmpty) } } } test("SPARK-20986 Reset table's statistics after PruneFileSourcePartitions rule") { withTable("tbl") { spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl") sql(s"ANALYZE TABLE tbl COMPUTE STATISTICS") val tableStats = spark.sessionState.catalog.getTableMetadata(TableIdentifier("tbl")).stats assert(tableStats.isDefined && tableStats.get.sizeInBytes > 0, "tableStats is lost") val df = sql("SELECT * FROM tbl WHERE p = 1") val sizes1 = df.queryExecution.analyzed.collect { case relation: LogicalRelation => relation.catalogTable.get.stats.get.sizeInBytes } assert(sizes1.size === 1, s"Size wrong for:\n ${df.queryExecution}") assert(sizes1(0) == tableStats.get.sizeInBytes) val relations = df.queryExecution.optimizedPlan.collect { case relation: LogicalRelation => relation } assert(relations.size === 1, s"Size wrong for:\n ${df.queryExecution}") val size2 = relations(0).stats.sizeInBytes assert(size2 == relations(0).catalogTable.get.stats.get.sizeInBytes) assert(size2 < tableStats.get.sizeInBytes) } } }
Example 14
Source File: SqlUtils.scala From spark-acid with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} import org.apache.spark.sql.execution.LogicalRDD import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.types.StructType object SqlUtils { def convertToDF(sparkSession: SparkSession, plan : LogicalPlan): DataFrame = { Dataset.ofRows(sparkSession, plan) } def resolveReferences(sparkSession: SparkSession, expr: Expression, planContaining: LogicalPlan, failIfUnresolved: Boolean, exprName: Option[String] = None): Expression = { resolveReferences(sparkSession, expr, Seq(planContaining), failIfUnresolved, exprName) } def resolveReferences(sparkSession: SparkSession, expr: Expression, planContaining: Seq[LogicalPlan], failIfUnresolved: Boolean, exprName: Option[String]): Expression = { val newPlan = FakeLogicalPlan(expr, planContaining) val resolvedExpr = sparkSession.sessionState.analyzer.execute(newPlan) match { case FakeLogicalPlan(resolvedExpr: Expression, _) => // Return even if it did not successfully resolve resolvedExpr case _ => expr // This is unexpected } if (failIfUnresolved) { resolvedExpr.flatMap(_.references).filter(!_.resolved).foreach { attr => { val failedMsg = exprName match { case Some(name) => s"${attr.sql} resolution in $name given these columns: "+ planContaining.flatMap(_.output).map(_.name).mkString(",") case _ => s"${attr.sql} resolution failed given these columns: "+ planContaining.flatMap(_.output).map(_.name).mkString(",") } attr.failAnalysis(failedMsg) } } } resolvedExpr } def hasSparkStopped(sparkSession: SparkSession): Boolean = { sparkSession.sparkContext.stopped.get() } def createDataFrameUsingAttributes(sparkSession: SparkSession, rdd: RDD[Row], schema: StructType, attributes: Seq[Attribute]): DataFrame = { val encoder = RowEncoder(schema) val catalystRows = rdd.map(encoder.toRow) val logicalPlan = LogicalRDD( attributes, catalystRows, isStreaming = false)(sparkSession) Dataset.ofRows(sparkSession, logicalPlan) } def analysisException(cause: String): Throwable = { new AnalysisException(cause) } } case class FakeLogicalPlan(expr: Expression, children: Seq[LogicalPlan]) extends LogicalPlan { override def output: Seq[Attribute] = children.foldLeft(Seq[Attribute]())((out, child) => out ++ child.output) }
Example 15
Source File: DeleteCommand.scala From spark-acid with Apache License 2.0 | 5 votes |
package com.qubole.spark.datasources.hiveacid.sql.catalyst.plans.command import com.qubole.spark.hiveacid.HiveAcidErrors import com.qubole.spark.hiveacid.datasource.HiveAcidRelation import org.apache.spark.sql.{Column, Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.execution.datasources.LogicalRelation case class DeleteCommand( table: LogicalPlan, condition: Expression) extends RunnableCommand { // We don't want `table` in children as sometimes we don't want to transform it. override def children: Seq[LogicalPlan] = Seq(table) override def output: Seq[Attribute] = Seq.empty override lazy val resolved: Boolean = childrenResolved override def run(sparkSession: SparkSession): Seq[Row] = { if (children.size != 1) { throw new IllegalArgumentException("DELETE command should specify exactly one table, whereas this has: " + children.size) } children(0) match { case LogicalRelation(relation: HiveAcidRelation, _, _ , _) => { relation.delete(new Column(condition)) } case _ => throw HiveAcidErrors.tableNotAcidException(table.toString()) } Seq.empty[Row] } }
Example 16
Source File: MergeCommand.scala From spark-acid with Apache License 2.0 | 5 votes |
package com.qubole.spark.datasources.hiveacid.sql.catalyst.plans.command import com.qubole.spark.hiveacid.HiveAcidErrors import com.qubole.spark.hiveacid.datasource.HiveAcidRelation import com.qubole.spark.hiveacid.merge.{MergeCondition, MergeWhenClause, MergeWhenNotInsert} import org.apache.spark.sql.catalyst.AliasIdentifier import org.apache.spark.sql.{Row, SparkSession, SqlUtils} import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.execution.datasources.LogicalRelation case class MergeCommand(targetTable: LogicalPlan, sourceTable: LogicalPlan, matched: Seq[MergeWhenClause], notMatched: Option[MergeWhenClause], mergeCondition: MergeCondition, sourceAlias: Option[AliasIdentifier], targetAlias: Option[AliasIdentifier]) extends RunnableCommand { override def children: Seq[LogicalPlan] = Seq(targetTable, sourceTable) override def output: Seq[Attribute] = Seq.empty override lazy val resolved: Boolean = childrenResolved override def run(sparkSession: SparkSession): Seq[Row] = { val insertClause: Option[MergeWhenNotInsert] = notMatched match { case Some(i: MergeWhenNotInsert) => Some(i) case None => None case _ => throw HiveAcidErrors.mergeValidationError("WHEN NOT Clause has to be INSERT CLAUSE") } children.head match { case LogicalRelation(relation: HiveAcidRelation, _, _ , _) => relation.merge(SqlUtils.logicalPlanToDataFrame(sparkSession, sourceTable), mergeCondition.expression, matched, insertClause, sourceAlias, targetAlias) case SubqueryAlias(_, LogicalRelation(relation: HiveAcidRelation, _, _, _)) => relation.merge(SqlUtils.logicalPlanToDataFrame(sparkSession, sourceTable), mergeCondition.expression, matched, insertClause, sourceAlias, targetAlias) case _ => throw HiveAcidErrors.tableNotAcidException(targetTable.toString()) } Seq.empty } }
Example 17
Source File: UpdateCommand.scala From spark-acid with Apache License 2.0 | 5 votes |
package com.qubole.spark.datasources.hiveacid.sql.catalyst.plans.command import com.qubole.spark.hiveacid.HiveAcidErrors import com.qubole.spark.hiveacid.datasource.HiveAcidRelation import org.apache.spark.sql.{Column, Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.execution.datasources.LogicalRelation case class UpdateCommand( table: LogicalPlan, setExpressions: Map[String, Expression], condition: Option[Expression]) extends RunnableCommand { override def children: Seq[LogicalPlan] = Seq(table) override def output: Seq[Attribute] = Seq.empty override lazy val resolved: Boolean = childrenResolved override def run(sparkSession: SparkSession): Seq[Row] = { if (children.size != 1) { throw new IllegalArgumentException("UPDATE command should have one table to update, whereas this has: " + children.size) } children(0) match { case LogicalRelation(relation: HiveAcidRelation, _, _ , _) => { val setColumns = setExpressions.mapValues(expr => new Column(expr)) val updateFilterColumn = condition.map(new Column(_)) relation.update(updateFilterColumn, setColumns) } case LogicalRelation(_, _, Some(catalogTable), _) => throw HiveAcidErrors.tableNotAcidException(catalogTable.qualifiedName) case _ => throw HiveAcidErrors.tableNotAcidException(table.toString()) } Seq.empty[Row] } }
Example 18
Source File: HiveAcidAutoConvert.scala From spark-acid with Apache License 2.0 | 5 votes |
package com.qubole.spark.hiveacid import java.util.Locale import com.qubole.spark.datasources.hiveacid.sql.execution.SparkAcidSqlParser import org.apache.spark.sql.{SparkSession, SparkSessionExtensions} import org.apache.spark.sql.catalyst.catalog.HiveTableRelation import org.apache.spark.sql.catalyst.plans.logical.{Filter, InsertIntoTable, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.execution.datasources.LogicalRelation import com.qubole.spark.hiveacid.datasource.HiveAcidDataSource case class HiveAcidAutoConvert(spark: SparkSession) extends Rule[LogicalPlan] { private def isConvertible(relation: HiveTableRelation): Boolean = { val serde = relation.tableMeta.storage.serde.getOrElse("").toLowerCase(Locale.ROOT) relation.tableMeta.properties.getOrElse("transactional", "false").toBoolean } private def convert(relation: HiveTableRelation): LogicalRelation = { val options = relation.tableMeta.properties ++ relation.tableMeta.storage.properties ++ Map("table" -> relation.tableMeta.qualifiedName) val newRelation = new HiveAcidDataSource().createRelation(spark.sqlContext, options) LogicalRelation(newRelation, isStreaming = false) } override def apply(plan: LogicalPlan): LogicalPlan = { plan resolveOperators { // Write path case InsertIntoTable(r: HiveTableRelation, partition, query, overwrite, ifPartitionNotExists) if query.resolved && DDLUtils.isHiveTable(r.tableMeta) && isConvertible(r) => InsertIntoTable(convert(r), partition, query, overwrite, ifPartitionNotExists) // Read path case relation: HiveTableRelation if DDLUtils.isHiveTable(relation.tableMeta) && isConvertible(relation) => convert(relation) } } } class HiveAcidAutoConvertExtension extends (SparkSessionExtensions => Unit) { def apply(extension: SparkSessionExtensions): Unit = { extension.injectResolutionRule(HiveAcidAutoConvert.apply) extension.injectParser { (session, parser) => SparkAcidSqlParser(parser) } } }
Example 19
Source File: relationMappingSystemTable.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis.systables import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.sql.SqlLikeRelation import org.apache.spark.sql.types.{StringType, StructType} import org.apache.spark.sql.{Row, SQLContext} object RelationMappingSystemTableProvider extends SystemTableProvider with LocalSpark { override def execute(): Seq[Row] = { sqlContext.tableNames().map { tableName => val plan = sqlContext.catalog.lookupRelation(TableIdentifier(tableName)) val sqlName = plan.collectFirst { case s: SqlLikeRelation => s.relationName case LogicalRelation(s: SqlLikeRelation, _) => s.relationName } Row(tableName, sqlName) } } } object RelationMappingSystemTable extends SchemaEnumeration { val sparkName = Field("RELATION_NAME", StringType, nullable = false) val providerName = Field("SQL_NAME", StringType, nullable = true) }
Example 20
Source File: rules.scala From tispark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.extensions import com.pingcap.tispark.statistics.StatisticsManager import com.pingcap.tispark.utils.ReflectionUtil._ import com.pingcap.tispark.{MetaManager, TiDBRelation, TiTableReference} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, UnresolvedRelation} import org.apache.spark.sql.catalyst.catalog.TiSessionCatalog import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.{AnalysisException, _} case class TiResolutionRule(getOrCreateTiContext: SparkSession => TiContext)( sparkSession: SparkSession) extends Rule[LogicalPlan] { protected lazy val meta: MetaManager = tiContext.meta private lazy val autoLoad = tiContext.autoLoad private lazy val tiCatalog = tiContext.tiCatalog private lazy val tiSession = tiContext.tiSession private lazy val sqlContext = tiContext.sqlContext protected val tiContext: TiContext = getOrCreateTiContext(sparkSession) protected val resolveTiDBRelation: TableIdentifier => LogicalPlan = tableIdentifier => { val dbName = getDatabaseFromIdentifier(tableIdentifier) val tableName = tableIdentifier.table val table = meta.getTable(dbName, tableName) if (table.isEmpty) { throw new AnalysisException(s"Table or view '$tableName' not found in database '$dbName'") } if (autoLoad) { StatisticsManager.loadStatisticsInfo(table.get) } val sizeInBytes = StatisticsManager.estimateTableSize(table.get) val tiDBRelation = TiDBRelation(tiSession, TiTableReference(dbName, tableName, sizeInBytes), meta)( sqlContext) // Use SubqueryAlias so that projects and joins can correctly resolve // UnresolvedAttributes in JoinConditions, Projects, Filters, etc. newSubqueryAlias(tableName, LogicalRelation(tiDBRelation)) } override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp resolveTiDBRelations protected def resolveTiDBRelations: PartialFunction[LogicalPlan, LogicalPlan] = { case i @ InsertIntoTable(UnresolvedRelation(tableIdentifier), _, _, _, _) if tiCatalog .catalogOf(tableIdentifier.database) .exists(_.isInstanceOf[TiSessionCatalog]) => i.copy(table = EliminateSubqueryAliases(resolveTiDBRelation(tableIdentifier))) case UnresolvedRelation(tableIdentifier) if tiCatalog .catalogOf(tableIdentifier.database) .exists(_.isInstanceOf[TiSessionCatalog]) => resolveTiDBRelation(tableIdentifier) } private def getDatabaseFromIdentifier(tableIdentifier: TableIdentifier): String = tableIdentifier.database.getOrElse(tiCatalog.getCurrentDatabase) } case class TiDDLRule(getOrCreateTiContext: SparkSession => TiContext)(sparkSession: SparkSession) extends Rule[LogicalPlan] { protected lazy val tiContext: TiContext = getOrCreateTiContext(sparkSession) override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { // TODO: support other commands that may concern TiSpark catalog. case sd: ShowDatabasesCommand => TiShowDatabasesCommand(tiContext, sd) case sd: SetDatabaseCommand => TiSetDatabaseCommand(tiContext, sd) case st: ShowTablesCommand => TiShowTablesCommand(tiContext, st) case st: ShowColumnsCommand => TiShowColumnsCommand(tiContext, st) case dt: DescribeTableCommand => TiDescribeTablesCommand(tiContext, dt) case dc: DescribeColumnCommand => TiDescribeColumnCommand(tiContext, dc) case ct: CreateTableLikeCommand => TiCreateTableLikeCommand(tiContext, ct) } }
Example 21
Source File: TiAggregation.scala From tispark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import com.pingcap.tispark.TiDBRelation import com.pingcap.tispark.utils.ReflectionUtil import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, NamedExpression} import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.execution.datasources.LogicalRelation object TiAggregation { type ReturnType = (Seq[NamedExpression], Seq[AggregateExpression], Seq[NamedExpression], LogicalPlan) def unapply(plan: LogicalPlan): Option[ReturnType] = ReflectionUtil.callTiAggregationImplUnapply(plan) } object TiAggregationProjection { type ReturnType = (Seq[Expression], LogicalPlan, TiDBRelation, Seq[NamedExpression]) def unapply(plan: LogicalPlan): Option[ReturnType] = plan match { // Only push down aggregates projection when all filters can be applied and // all projection expressions are column references case PhysicalOperation( projects, filters, rel @ LogicalRelation(source: TiDBRelation, _, _, _)) if projects.forall(_.isInstanceOf[Attribute]) => Some((filters, rel, source, projects)) case _ => Option.empty[ReturnType] } }
Example 22
Source File: PruneFileSourcePartitionsSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.scalatest.Matchers._ import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project, ResolvedHint} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions} import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec import org.apache.spark.sql.functions.broadcast import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.StructType class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("PruneFileSourcePartitions", Once, PruneFileSourcePartitions) :: Nil } test("PruneFileSourcePartitions should not change the output of LogicalRelation") { withTable("test") { withTempDir { dir => sql( s""" |CREATE EXTERNAL TABLE test(i int) |PARTITIONED BY (p int) |STORED AS parquet |LOCATION '${dir.toURI}'""".stripMargin) val tableMeta = spark.sharedState.externalCatalog.getTable("default", "test") val catalogFileIndex = new CatalogFileIndex(spark, tableMeta, 0) val dataSchema = StructType(tableMeta.schema.filterNot { f => tableMeta.partitionColumnNames.contains(f.name) }) val relation = HadoopFsRelation( location = catalogFileIndex, partitionSchema = tableMeta.partitionSchema, dataSchema = dataSchema, bucketSpec = None, fileFormat = new ParquetFileFormat(), options = Map.empty)(sparkSession = spark) val logicalRelation = LogicalRelation(relation, tableMeta) val query = Project(Seq('i, 'p), Filter('p === 1, logicalRelation)).analyze val optimized = Optimize.execute(query) assert(optimized.missingInput.isEmpty) } } } test("SPARK-20986 Reset table's statistics after PruneFileSourcePartitions rule") { withTable("tbl") { spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl") sql(s"ANALYZE TABLE tbl COMPUTE STATISTICS") val tableStats = spark.sessionState.catalog.getTableMetadata(TableIdentifier("tbl")).stats assert(tableStats.isDefined && tableStats.get.sizeInBytes > 0, "tableStats is lost") val df = sql("SELECT * FROM tbl WHERE p = 1") val sizes1 = df.queryExecution.analyzed.collect { case relation: LogicalRelation => relation.catalogTable.get.stats.get.sizeInBytes } assert(sizes1.size === 1, s"Size wrong for:\n ${df.queryExecution}") assert(sizes1(0) == tableStats.get.sizeInBytes) val relations = df.queryExecution.optimizedPlan.collect { case relation: LogicalRelation => relation } assert(relations.size === 1, s"Size wrong for:\n ${df.queryExecution}") val size2 = relations(0).stats.sizeInBytes assert(size2 == relations(0).catalogTable.get.stats.get.sizeInBytes) assert(size2 < tableStats.get.sizeInBytes) } } test("SPARK-26576 Broadcast hint not applied to partitioned table") { withTable("tbl") { withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl") val df = spark.table("tbl") val qe = df.join(broadcast(df), "p").queryExecution qe.optimizedPlan.collect { case _: ResolvedHint => } should have size 1 qe.sparkPlan.collect { case j: BroadcastHashJoinExec => j } should have size 1 } } } }
Example 23
Source File: TemporaryFlagProxyCatalog.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.{Catalog, OverrideCatalog} import org.apache.spark.sql.catalyst.plans.logical.Subquery import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.Relation @deprecated("Use org.apache.spark.sql.TemporaryFlagCatalog instead") trait TemporaryFlagProxyCatalog extends OverrideCatalog { abstract override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = { val tables = super.getTables(databaseName) tables.map { case (tableName: String , isTemporary: Boolean) => val tableIdentifier = TableIdentifier(tableName) lookupRelation(tableIdentifier) match { case Subquery(_, LogicalRelation(relation: Relation, _)) => (tableIdentifier.table, relation.isTemporary) case _ => (tableIdentifier.table, isTemporary) } } } }
Example 24
Source File: LogicalPlanExtractor.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.tablefunctions import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.sql.SqlLikeRelation import org.apache.spark.sql.util.PlanUtils._ updated.getOrElse(attr) case (attr, default) => attr } val originalTableName = extractName(originalAttribute, preOrderSeq.reverse).getOrElse("") val tableName = extractName(attribute, preOrderSeq).getOrElse("") (tableName, attribute.name, originalTableName, originalAttribute.name) } private def extractName(attribute: Attribute, plans: Seq[LogicalPlan]): Option[String] = plans.filter(_.outputSet.contains(attribute)).collectFirst { case Subquery(alias, _) => alias case r: SqlLikeRelation => r.relationName case LogicalRelation(r: SqlLikeRelation, _) => r.relationName } def tablePart: Seq[Any] = { tableSchema :: Nil } }
Example 25
Source File: Describable.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.sources.describable import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.types._ def apply(any: Any): Describable = any match { case describable: Describable => describable case logicalRelation: LogicalRelation => LogicalRelationDescriber(logicalRelation) case logicalPlan: LogicalPlan => DefaultLogicalPlanDescriber(logicalPlan) case default => DefaultDescriber(default) } }
Example 26
Source File: TemporaryFlagCatalog.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.Catalog import org.apache.spark.sql.catalyst.plans.logical.Subquery import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.Relation trait TemporaryFlagCatalog extends Catalog { abstract override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = { val tables = super.getTables(databaseName) tables.map { case (tableName: String , isTemporary: Boolean) => val tableIdentifier = TableIdentifier(tableName) lookupRelation(tableIdentifier) match { case Subquery(_, LogicalRelation(relation: Relation, _)) => (tableIdentifier.table, relation.isTemporary) case _ => (tableIdentifier.table, isTemporary) } } } }
Example 27
Source File: ResolveAppendCommand.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources.{AppendRunnableCommand, LogicalRelation} import org.apache.spark.sql.sources.AppendRelation import org.apache.spark.sql.sources.commands.UnresolvedAppendCommand case class ResolveAppendCommand(analyzer: Analyzer) extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan transform { case UnresolvedAppendCommand(table, options) => val resolvedTable = analyzer.execute(table) resolvedTable.collectFirst { case LogicalRelation(appendRelation: AppendRelation, _) => AppendRunnableCommand(appendRelation, options) }.getOrElse { throw new AnalysisException(s"Cannot append ${resolvedTable.treeString}") } } }
Example 28
Source File: PruneFileSourcePartitionsSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions, TableFileCatalog} import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.StructType class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("PruneFileSourcePartitions", Once, PruneFileSourcePartitions) :: Nil } test("PruneFileSourcePartitions should not change the output of LogicalRelation") { withTable("test") { withTempDir { dir => sql( s""" |CREATE EXTERNAL TABLE test(i int) |PARTITIONED BY (p int) |STORED AS parquet |LOCATION '${dir.getAbsolutePath}'""".stripMargin) val tableMeta = spark.sharedState.externalCatalog.getTable("default", "test") val tableFileCatalog = new TableFileCatalog(spark, tableMeta, 0) val dataSchema = StructType(tableMeta.schema.filterNot { f => tableMeta.partitionColumnNames.contains(f.name) }) val relation = HadoopFsRelation( location = tableFileCatalog, partitionSchema = tableMeta.partitionSchema, dataSchema = dataSchema, bucketSpec = None, fileFormat = new ParquetFileFormat(), options = Map.empty)(sparkSession = spark) val logicalRelation = LogicalRelation(relation, catalogTable = Some(tableMeta)) val query = Project(Seq('i, 'p), Filter('p === 1, logicalRelation)).analyze val optimized = Optimize.execute(query) assert(optimized.missingInput.isEmpty) } } } }
Example 29
Source File: UseAliasesForAggregationsInGroupingsSuite.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.plans.logical.Aggregate import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.sql.types._ import org.scalatest.FunSuite import org.scalatest.mock.MockitoSugar class UseAliasesForAggregationsInGroupingsSuite extends FunSuite with MockitoSugar { val br1 = new BaseRelation { override def sqlContext: SQLContext = mock[SQLContext] override def schema: StructType = StructType(Seq( StructField("name", StringType), StructField("age", IntegerType) )) } val lr1 = LogicalRelation(br1) val nameAtt = lr1.output.find(_.name == "name").get val ageAtt = lr1.output.find(_.name == "age").get test("replace functions in group by") { val avgExpr = avg(ageAtt) val avgAlias = avgExpr as 'avgAlias assertResult( lr1.groupBy(avgAlias.toAttribute)(avgAlias) )(UseAliasesForFunctionsInGroupings( lr1.groupBy(avgExpr)(avgAlias)) ) assertResult( lr1.select(ageAtt) )(UseAliasesForFunctionsInGroupings( lr1.select(ageAtt)) ) intercept[RuntimeException]( UseAliasesForFunctionsInGroupings(Aggregate(Seq(avgExpr), Seq(ageAtt), lr1)) ) } }
Example 30
Source File: RemoveNestedAliasesSuite.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import com.sap.spark.PlanTest import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.Alias import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.sql.types._ import org.scalatest.FunSuite import org.scalatest.mock.MockitoSugar class RemoveNestedAliasesSuite extends FunSuite with MockitoSugar with PlanTest { val br1 = new BaseRelation { override def sqlContext: SQLContext = mock[SQLContext] override def schema: StructType = StructType(Seq( StructField("name", StringType), StructField("age", IntegerType) )) } val lr1 = LogicalRelation(br1) val nameAtt = lr1.output.find(_.name == "name").get val ageAtt = lr1.output.find(_.name == "age").get test("Replace alias into aliases") { val avgExpr = avg(ageAtt) val avgAlias = avgExpr as 'avgAlias val aliasAlias = avgAlias as 'aliasAlias val aliasAliasAlias = aliasAlias as 'aliasAliasAlias val copiedAlias = Alias(avgExpr, aliasAlias.name)( exprId = aliasAlias.exprId ) val copiedAlias2 = Alias(avgExpr, aliasAliasAlias.name)( exprId = aliasAliasAlias.exprId ) assertResult( lr1.groupBy(avgAlias.toAttribute)(avgAlias) )(RemoveNestedAliases(lr1.groupBy(avgAlias.toAttribute)(avgAlias))) assertResult( lr1.groupBy(copiedAlias.toAttribute)(copiedAlias) )(RemoveNestedAliases(lr1.groupBy(aliasAlias.toAttribute)(aliasAlias))) assertResult( lr1.groupBy(copiedAlias2.toAttribute)(copiedAlias2) )(RemoveNestedAliases(lr1.groupBy(aliasAliasAlias.toAttribute)(aliasAliasAlias))) } test("Replace alias into expressions") { val ageAlias = ageAtt as 'ageAlias val avgExpr = avg(ageAlias) as 'avgAlias val correctedAvgExpr = avg(ageAtt) as 'avgAlias comparePlans( lr1.groupBy(correctedAvgExpr.toAttribute)(correctedAvgExpr), RemoveNestedAliases(lr1.groupBy(avgExpr.toAttribute)(avgExpr)) ) } }
Example 31
Source File: ResolveHierarchySuite.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.{Attribute, EqualTo} import org.apache.spark.sql.catalyst.plans.logical.{AdjacencyListHierarchySpec, Hierarchy} import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.sql.types._ import org.scalatest.FunSuite import org.scalatest.mock.MockitoSugar class ResolveHierarchySuite extends FunSuite with MockitoSugar { val br1 = new BaseRelation { override def sqlContext: SQLContext = mock[SQLContext] override def schema: StructType = StructType(Seq( StructField("id", IntegerType), StructField("parent", IntegerType) )) } val lr1 = LogicalRelation(br1) val idAtt = lr1.output.find(_.name == "id").get val parentAtt = lr1.output.find(_.name == "parent").get test("Check parenthood expression has no conflicting expression IDs and qualifiers") { val source = SimpleAnalyzer.execute(lr1.select('id, 'parent).subquery('u)) assert(source.resolved) val hierarchy = Hierarchy( AdjacencyListHierarchySpec(source, "v", UnresolvedAttribute("u" :: "id" :: Nil) === UnresolvedAttribute("v" :: "id" :: Nil), Some('id.isNull), Nil), 'node ) val resolveHierarchy = ResolveHierarchy(SimpleAnalyzer) val resolveReferences = ResolveReferencesWithHierarchies(SimpleAnalyzer) val resolvedHierarchy = (0 to 10).foldLeft(hierarchy: Hierarchy) { (h, _) => SimpleAnalyzer.ResolveReferences( resolveReferences(resolveHierarchy(h)) ).asInstanceOf[Hierarchy] } assert(resolvedHierarchy.node.resolved) val resolvedSpec = resolvedHierarchy.spec.asInstanceOf[AdjacencyListHierarchySpec] assert(resolvedSpec.parenthoodExp.resolved) assert(resolvedSpec.startWhere.forall(_.resolved)) assert(resolvedHierarchy.childrenResolved) assert(resolvedHierarchy.resolved) val parenthoodExpression = resolvedSpec.parenthoodExp.asInstanceOf[EqualTo] assertResult("u" :: Nil)(parenthoodExpression.left.asInstanceOf[Attribute].qualifiers) assertResult("v" :: Nil)(parenthoodExpression.right.asInstanceOf[Attribute].qualifiers) assert(parenthoodExpression.right.asInstanceOf[Attribute].exprId != source.output.find(_.name == "id").get.exprId) } }
Example 32
Source File: ResolveAnnotationsSuite.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.Project import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.sql.types._ import org.scalatest.FunSuite import org.scalatest.mock.MockitoSugar import org.apache.spark.sql.catalyst.dsl.plans._ class ResolveAnnotationsSuite extends FunSuite with MockitoSugar { // scalastyle:off magic.number val annotatedRel1 = new BaseRelation { override def sqlContext: SQLContext = mock[SQLContext] override def schema: StructType = StructType(Seq( StructField("id1.1", IntegerType, metadata = new MetadataBuilder().putLong("key1.1", 11L).build()), StructField("id1.2", IntegerType, metadata = new MetadataBuilder() .putLong("key1.2", 12L) .putLong("key1.3", 13).build())) ) } val lr1 = LogicalRelation(annotatedRel1) val id11Att = lr1.output.find(_.name == "id1.1").get val id12Att = lr1.output.find(_.name == "id1.2").get val id11AnnotatedAtt = AnnotatedAttribute(id11Att)( Map("key1.1" -> Literal.create(100L, LongType), // override the old key "newkey" -> Literal.create(200L, LongType))) // define a new key val simpleAnnotatedSelect = lr1.select(id11AnnotatedAtt) }
Example 33
Source File: ResolveCountDistinctStarSuite.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.Aggregate import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} import org.scalatest.FunSuite import org.scalatest.Inside._ import org.scalatest.mock.MockitoSugar import org.apache.spark.sql.catalyst.dsl.plans.DslLogicalPlan import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete, Count} import scala.collection.mutable.ArrayBuffer class ResolveCountDistinctStarSuite extends FunSuite with MockitoSugar { val persons = new LogicalRelation(new BaseRelation { override def sqlContext: SQLContext = mock[SQLContext] override def schema: StructType = StructType(Seq( StructField("age", IntegerType), StructField("name", StringType) )) }) test("Count distinct star is resolved correctly") { val projection = persons.select(UnresolvedAlias( AggregateExpression(Count(UnresolvedStar(None) :: Nil), Complete, true))) val stillNotCompletelyResolvedAggregate = SimpleAnalyzer.execute(projection) val resolvedAggregate = ResolveCountDistinctStar(SimpleAnalyzer) .apply(stillNotCompletelyResolvedAggregate) inside(resolvedAggregate) { case Aggregate(Nil, ArrayBuffer(Alias(AggregateExpression(Count(expressions), Complete, true), _)), _) => assert(expressions.collect { case a:AttributeReference => a.name }.toSet == Set("name", "age")) } assert(resolvedAggregate.resolved) } }
Example 34
Source File: PruneFileSourcePartitionsSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions} import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.StructType class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("PruneFileSourcePartitions", Once, PruneFileSourcePartitions) :: Nil } test("PruneFileSourcePartitions should not change the output of LogicalRelation") { withTable("test") { withTempDir { dir => sql( s""" |CREATE EXTERNAL TABLE test(i int) |PARTITIONED BY (p int) |STORED AS parquet |LOCATION '${dir.getAbsolutePath}'""".stripMargin) val tableMeta = spark.sharedState.externalCatalog.getTable("default", "test") val catalogFileIndex = new CatalogFileIndex(spark, tableMeta, 0) val dataSchema = StructType(tableMeta.schema.filterNot { f => tableMeta.partitionColumnNames.contains(f.name) }) val relation = HadoopFsRelation( location = catalogFileIndex, partitionSchema = tableMeta.partitionSchema, dataSchema = dataSchema, bucketSpec = None, fileFormat = new ParquetFileFormat(), options = Map.empty)(sparkSession = spark) val logicalRelation = LogicalRelation(relation, catalogTable = Some(tableMeta)) val query = Project(Seq('i, 'p), Filter('p === 1, logicalRelation)).analyze val optimized = Optimize.execute(query) assert(optimized.missingInput.isEmpty) } } } }
Example 35
Source File: LogicalPlanSignatureGenerator.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.mv.plans.util import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.carbondata.mv.plans._ object CheckSPJG { def isSPJG(subplan: LogicalPlan): Boolean = { subplan match { case a: Aggregate => a.child.collect { case Join(_, _, _, _) | Project(_, _) | Filter(_, _) | HiveTableRelation(_, _, _) => true case l: LogicalRelation => true case _ => false }.forall(identity) case _ => false } } } object LogicalPlanSignatureGenerator extends SignatureGenerator[LogicalPlan] { lazy val rule: SignatureRule[LogicalPlan] = LogicalPlanRule override def generate(plan: LogicalPlan): Option[Signature] = { if ( plan.isSPJG ) { super.generate(plan) } else { None } } } object LogicalPlanRule extends SignatureRule[LogicalPlan] { def apply(plan: LogicalPlan, childSignatures: Seq[Option[Signature]]): Option[Signature] = { plan match { case l: LogicalRelation => // TODO: implement this (link to BaseRelation) None case HiveTableRelation(tableMeta, _, _) => Some(Signature(false, Set(Seq(tableMeta.database, tableMeta.identifier.table).mkString(".")))) case l : LocalRelation => // LocalRelation is for unit test cases Some(Signature(groupby = false, Set(l.toString()))) case Filter(_, _) => if (childSignatures.length == 1 && !childSignatures(0).getOrElse(Signature()).groupby) { // if (!childSignatures(0).getOrElse(Signature()).groupby) { childSignatures(0) // } } else { None } case Project(_, _) => if ( childSignatures.length == 1 && !childSignatures(0).getOrElse(Signature()).groupby ) { childSignatures(0) } else { None } case Join(_, _, _, _) => if ( childSignatures.length == 2 && !childSignatures(0).getOrElse(Signature()).groupby && !childSignatures(1).getOrElse(Signature()).groupby ) { Some(Signature(false, childSignatures(0).getOrElse(Signature()).datasets .union(childSignatures(1).getOrElse(Signature()).datasets))) } else { None } case Aggregate(_, _, _) => if ( childSignatures.length == 1 && !childSignatures(0).getOrElse(Signature()).groupby ) { Some(Signature(true, childSignatures(0).getOrElse(Signature()).datasets)) } else { None } case _ => None } } }
Example 36
Source File: CarbonLoadParams.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.management import java.text.SimpleDateFormat import java.util import scala.collection.mutable import org.apache.hadoop.conf.Configuration import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.execution.command.UpdateTableModel import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.carbondata.core.indexstore.PartitionSpec import org.apache.carbondata.core.statusmanager.SegmentStatus import org.apache.carbondata.core.util.CarbonProperties import org.apache.carbondata.events.OperationContext import org.apache.carbondata.processing.loading.model.CarbonLoadModel case class CarbonLoadParams( sparkSession: SparkSession, tableName: String, sizeInBytes: Long, isOverwriteTable: Boolean, carbonLoadModel: CarbonLoadModel, hadoopConf: Configuration, logicalPartitionRelation: LogicalRelation, dateFormat : SimpleDateFormat, timeStampFormat : SimpleDateFormat, optionsOriginal: Map[String, String], finalPartition : Map[String, Option[String]], currPartitions: util.List[PartitionSpec], partitionStatus : SegmentStatus, var dataFrame: Option[DataFrame], scanResultRDD : Option[RDD[InternalRow]], updateModel: Option[UpdateTableModel], operationContext: OperationContext) { }