org.apache.spark.sql.catalyst.catalog.CatalogTableType Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.catalog.CatalogTableType.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: MetastoreRelationSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.sql.{QueryTest, Row} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.{IntegerType, StructField, StructType} class MetastoreRelationSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { test("makeCopy and toJSON should work") { val table = CatalogTable( identifier = TableIdentifier("test", Some("db")), tableType = CatalogTableType.VIEW, storage = CatalogStorageFormat.empty, schema = StructType(StructField("a", IntegerType, true) :: Nil)) val relation = MetastoreRelation("db", "test")(table, null) // No exception should be thrown relation.makeCopy(Array("db", "test")) // No exception should be thrown relation.toJSON } test("SPARK-17409: Do Not Optimize Query in CTAS (Hive Serde Table) More Than Once") { withTable("bar") { withTempView("foo") { sql("select 0 as id").createOrReplaceTempView("foo") // If we optimize the query in CTAS more than once, the following saveAsTable will fail // with the error: `GROUP BY position 0 is not in select list (valid range is [1, 1])` sql("CREATE TABLE bar AS SELECT * FROM foo group by id") checkAnswer(spark.table("bar"), Row(0) :: Nil) val tableMetadata = spark.sessionState.catalog.getTableMetadata(TableIdentifier("bar")) assert(tableMetadata.provider == Some("hive"), "the expected table is a Hive serde table") } } } }
Example 2
Source File: XSQLAnalyzeTableCommand.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.xsql.execution.command import org.apache.spark.sql.{AnalysisException, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogTableType import org.apache.spark.sql.execution.command.{CommandUtils, RunnableCommand} import org.apache.spark.sql.xsql.XSQLSessionCatalog case class XSQLAnalyzeTableCommand(tableIdent: TableIdentifier, noscan: Boolean = true) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val sessionState = sparkSession.sessionState val catalog = sparkSession.sessionState.catalog.asInstanceOf[XSQLSessionCatalog] val catalogDB = catalog.getUsedCatalogDatabase(tableIdent.dataSource, tableIdent.database) if (catalogDB == None) { return Seq.empty[Row] } val ds = catalogDB.get.dataSourceName val db = catalogDB.get.name val tableIdentWithDB = TableIdentifier(tableIdent.table, Some(db), Some(ds)) val tableMeta = catalog.getRawTable(tableIdentWithDB) if (tableMeta.tableType == CatalogTableType.VIEW) { throw new AnalysisException("ANALYZE TABLE is not supported on views.") } // Compute stats for the whole table val newTotalSize = CommandUtils.calculateTotalSize(sparkSession, tableMeta) val newRowCount = if (noscan) None else Some(BigInt(sparkSession.table(tableIdentWithDB).count())) // Update the metastore if the above statistics of the table are different from those // recorded in the metastore. val newStats = CommandUtils.compareAndGetNewStats(tableMeta.stats, newTotalSize, newRowCount) if (newStats.isDefined) { catalog.alterTableStats(tableIdentWithDB, newStats) } Seq.empty[Row] } }
Example 3
Source File: CatalogSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.types.StructType class CatalogSuite extends AnalysisTest { test("desc table when owner is set to null") { val table = CatalogTable( identifier = TableIdentifier("tbl", Some("db1")), tableType = CatalogTableType.MANAGED, storage = CatalogStorageFormat.empty, owner = null, schema = new StructType().add("col1", "int").add("col2", "string"), provider = Some("parquet")) table.toLinkedHashMap } }
Example 4
Source File: AnalyzeTableCommand.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import org.apache.spark.sql.{AnalysisException, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogTableType case class AnalyzeTableCommand( tableIdent: TableIdentifier, noscan: Boolean = true) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val sessionState = sparkSession.sessionState val db = tableIdent.database.getOrElse(sessionState.catalog.getCurrentDatabase) val tableIdentWithDB = TableIdentifier(tableIdent.table, Some(db)) val tableMeta = sessionState.catalog.getTableMetadata(tableIdentWithDB) if (tableMeta.tableType == CatalogTableType.VIEW) { throw new AnalysisException("ANALYZE TABLE is not supported on views.") } // Compute stats for the whole table val newTotalSize = CommandUtils.calculateTotalSize(sparkSession, tableMeta) val newRowCount = if (noscan) None else Some(BigInt(sparkSession.table(tableIdentWithDB).count())) // Update the metastore if the above statistics of the table are different from those // recorded in the metastore. val newStats = CommandUtils.compareAndGetNewStats(tableMeta.stats, newTotalSize, newRowCount) if (newStats.isDefined) { sessionState.catalog.alterTableStats(tableIdentWithDB, newStats) } Seq.empty[Row] } }
Example 5
Source File: CatalogUtils.scala From spark-atlas-connector with Apache License 2.0 | 5 votes |
package com.hortonworks.spark.atlas.utils import java.net.URI import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.types.StructType object CatalogUtils { def createDB(name: String, location: String): CatalogDatabase = { CatalogDatabase(name, "", new URI(location), Map.empty) } def createStorageFormat( locationUri: Option[URI] = None, inputFormat: Option[String] = None, outputFormat: Option[String] = None, serd: Option[String] = None, compressed: Boolean = false, properties: Map[String, String] = Map.empty): CatalogStorageFormat = { CatalogStorageFormat(locationUri, inputFormat, outputFormat, serd, compressed, properties) } def createTable( db: String, table: String, schema: StructType, storage: CatalogStorageFormat, isHiveTable: Boolean = false): CatalogTable = { CatalogTable( TableIdentifier(table, Some(db)), CatalogTableType.MANAGED, storage, schema, provider = if (isHiveTable) Some("hive") else None) } }
Example 6
Source File: TestUtils.scala From spark-atlas-connector with Apache License 2.0 | 5 votes |
package com.hortonworks.spark.atlas import java.net.URI import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.types.StructType import com.hortonworks.spark.atlas.utils.SparkUtils import org.apache.atlas.model.instance.AtlasObjectId object TestUtils { def createDB(name: String, location: String): CatalogDatabase = { CatalogDatabase(name, "", new URI(location), Map.empty) } def createStorageFormat( locationUri: Option[URI] = None, inputFormat: Option[String] = None, outputFormat: Option[String] = None, serd: Option[String] = None, compressed: Boolean = false, properties: Map[String, String] = Map.empty): CatalogStorageFormat = { CatalogStorageFormat(locationUri, inputFormat, outputFormat, serd, compressed, properties) } def createTable( db: String, table: String, schema: StructType, storage: CatalogStorageFormat, isHiveTable: Boolean = false): CatalogTable = { CatalogTable( TableIdentifier(table, Some(db)), CatalogTableType.MANAGED, storage, schema, provider = if (isHiveTable) Some("hive") else None, bucketSpec = None, owner = SparkUtils.currUser()) } def assertSubsetOf[T](set: Set[T], subset: Set[T]): Unit = { assert(subset.subsetOf(set), s"$subset is not a subset of $set") } def findEntity( entities: Seq[SACAtlasReferenceable], objId: AtlasObjectId): Option[SACAtlasReferenceable] = { entities.find(p => p.asObjectId == objId) } def findEntities( entities: Seq[SACAtlasReferenceable], objIds: Seq[AtlasObjectId]): Seq[SACAtlasReferenceable] = { entities.filter(p => objIds.contains(p.asObjectId)) } }
Example 7
Source File: CreateDataSourceTableAsSelectHarvesterSuite.scala From spark-atlas-connector with Apache License 2.0 | 5 votes |
package com.hortonworks.spark.atlas.sql import com.hortonworks.spark.atlas.types.metadata import scala.util.Random import com.hortonworks.spark.atlas.{SACAtlasEntityWithDependencies, WithHiveSupport} import com.hortonworks.spark.atlas.utils.SparkUtils import org.apache.atlas.model.instance.AtlasEntity import org.apache.spark.sql.SaveMode import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType} import org.apache.spark.sql.execution.command.CreateDataSourceTableAsSelectCommand import org.apache.spark.sql.execution.datasources.DataSource import org.apache.spark.sql.types.StructType import org.scalatest.{FunSuite, Matchers} // This is not leveraging BaseHarvesterSuite, as it doesn't need to be tested with // both non-remote HMS and remote HMS cases. class CreateDataSourceTableAsSelectHarvesterSuite extends FunSuite with Matchers with WithHiveSupport { private val sourceTblName = "source_" + Random.nextInt(100000) override protected def beforeAll(): Unit = { super.beforeAll() sparkSession.sql(s"CREATE TABLE $sourceTblName (name string, age int)") } test("saveAsTable should have output entity having table details - parquet") { testWithProvider("parquet") } test("saveAsTable should have output entity having table details - hive") { val entity = testWithProvider("hive") assert(entity.getAttribute("partitionProvider") == "Catalog") } def testWithProvider(provider: String): AtlasEntity = { val destTblName = "dest1_" + Random.nextInt(100000) val df = sparkSession.sql(s"SELECT * FROM $sourceTblName") // The codes below look after DataFrameWriter.saveAsTable codes as of Spark 2.4. // It uses internal APIs for this test. If the compatibility is broken, we should better // just remove this test. val tableIdent = df.sparkSession.sessionState.sqlParser.parseTableIdentifier(destTblName) val storage = DataSource.buildStorageFormatFromOptions(Map("path" -> "/tmp/foo")) val tableDesc = CatalogTable( identifier = tableIdent, tableType = CatalogTableType.EXTERNAL, storage = storage, schema = new StructType, provider = Some(provider), partitionColumnNames = Nil, bucketSpec = None) val cmd = CreateDataSourceTableAsSelectCommand( tableDesc, SaveMode.ErrorIfExists, df.queryExecution.logical, Seq("name", "age")) val newTable = tableDesc.copy( storage = tableDesc.storage.copy(), schema = df.schema) sparkSession.sessionState.catalog.createTable( newTable, ignoreIfExists = false, validateLocation = false) val qd = QueryDetail(df.queryExecution, 0L) val entities = CommandsHarvester.CreateDataSourceTableAsSelectHarvester.harvest(cmd, qd) val processDeps = entities.head.asInstanceOf[SACAtlasEntityWithDependencies].dependencies val maybeEntity = processDeps.find(_.typeName == metadata.TABLE_TYPE_STRING) .map(_.asInstanceOf[SACAtlasEntityWithDependencies].entity) assert(maybeEntity.isDefined, s"Output entity for table [$destTblName] was not found.") assert(maybeEntity.get.getAttribute("name") == destTblName) assert(maybeEntity.get.getAttribute("owner") == SparkUtils.currUser()) assert(maybeEntity.get.getAttribute("schemaDesc") == "struct<name:string,age:int>") assert(maybeEntity.get.getAttribute("provider") == provider) maybeEntity.get } }
Example 8
Source File: MetastoreRelationSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.sql.{QueryTest, Row} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.{IntegerType, StructField, StructType} class MetastoreRelationSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { test("makeCopy and toJSON should work") { val table = CatalogTable( identifier = TableIdentifier("test", Some("db")), tableType = CatalogTableType.VIEW, storage = CatalogStorageFormat.empty, schema = StructType(StructField("a", IntegerType, true) :: Nil)) val relation = MetastoreRelation("db", "test")(table, null) // No exception should be thrown relation.makeCopy(Array("db", "test")) // No exception should be thrown relation.toJSON } test("SPARK-17409: Do Not Optimize Query in CTAS (Hive Serde Table) More Than Once") { withTable("bar") { withTempView("foo") { sql("select 0 as id").createOrReplaceTempView("foo") // If we optimize the query in CTAS more than once, the following saveAsTable will fail // with the error: `GROUP BY position 0 is not in select list (valid range is [1, 1])` sql("CREATE TABLE bar AS SELECT * FROM foo group by id") checkAnswer(spark.table("bar"), Row(0) :: Nil) val tableMetadata = spark.sessionState.catalog.getTableMetadata(TableIdentifier("bar")) assert(tableMetadata.provider == Some("hive"), "the expected table is a Hive serde table") } } } }
Example 9
Source File: DescribeDeltaHistoryCommand.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta.commands // scalastyle:off import.ordering.noEmptyLine import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaTableIdentifier} import org.apache.spark.sql.delta.actions.CommitInfo import org.apache.spark.sql.delta.metering.DeltaLogging import org.apache.hadoop.fs.Path import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation import org.apache.spark.sql.catalyst.catalog.CatalogTableType import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics} import org.apache.spark.sql.execution.command.RunnableCommand case class DescribeDeltaHistoryCommand( path: Option[String], tableIdentifier: Option[TableIdentifier], limit: Option[Int], override val output: Seq[Attribute] = ExpressionEncoder[CommitInfo]().schema.toAttributes) extends RunnableCommand with DeltaLogging { override def run(sparkSession: SparkSession): Seq[Row] = { val basePath = if (path.nonEmpty) { new Path(path.get) } else if (tableIdentifier.nonEmpty) { val sessionCatalog = sparkSession.sessionState.catalog lazy val metadata = sessionCatalog.getTableMetadata(tableIdentifier.get) DeltaTableIdentifier(sparkSession, tableIdentifier.get) match { case Some(id) if id.path.nonEmpty => new Path(id.path.get) case Some(id) if id.table.nonEmpty => new Path(metadata.location) case _ => if (metadata.tableType == CatalogTableType.VIEW) { throw DeltaErrors.describeViewHistory } throw DeltaErrors.notADeltaTableException("DESCRIBE HISTORY") } } else { throw DeltaErrors.missingTableIdentifierException("DESCRIBE HISTORY") } // Max array size if (limit.exists(_ > Int.MaxValue - 8)) { throw new IllegalArgumentException("Please use a limit less than Int.MaxValue - 8.") } val deltaLog = DeltaLog.forTable(sparkSession, basePath) recordDeltaOperation(deltaLog, "delta.ddl.describeHistory") { if (deltaLog.snapshot.version == -1) { throw DeltaErrors.notADeltaTableException("DESCRIBE HISTORY") } import sparkSession.implicits._ deltaLog.history.getHistory(limit).toDF().collect().toSeq } } }
Example 10
Source File: MetastoreRelationSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.sql.{QueryTest, Row} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.{IntegerType, StructField, StructType} class MetastoreRelationSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { test("makeCopy and toJSON should work") { val table = CatalogTable( identifier = TableIdentifier("test", Some("db")), tableType = CatalogTableType.VIEW, storage = CatalogStorageFormat.empty, schema = StructType(StructField("a", IntegerType, true) :: Nil)) val relation = MetastoreRelation("db", "test")(table, null) // No exception should be thrown relation.makeCopy(Array("db", "test")) // No exception should be thrown relation.toJSON } test("SPARK-17409: Do Not Optimize Query in CTAS (Hive Serde Table) More Than Once") { withTable("bar") { withTempView("foo") { sql("select 0 as id").createOrReplaceTempView("foo") // If we optimize the query in CTAS more than once, the following saveAsTable will fail // with the error: `GROUP BY position 0 is not in select list (valid range is [1, 1])` sql("CREATE TABLE bar AS SELECT * FROM foo group by id") checkAnswer(spark.table("bar"), Row(0) :: Nil) val tableMetadata = spark.sessionState.catalog.getTableMetadata(TableIdentifier("bar")) assert(tableMetadata.provider == Some("hive"), "the expected table is a Hive serde table") } } } }
Example 11
Source File: CatalogSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.types.StructType class CatalogSuite extends AnalysisTest { test("desc table when owner is set to null") { val table = CatalogTable( identifier = TableIdentifier("tbl", Some("db1")), tableType = CatalogTableType.MANAGED, storage = CatalogStorageFormat.empty, owner = null, schema = new StructType().add("col1", "int").add("col2", "string"), provider = Some("parquet")) table.toLinkedHashMap } }
Example 12
Source File: AnalyzeTableCommand.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import org.apache.spark.sql.{AnalysisException, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogTableType case class AnalyzeTableCommand( tableIdent: TableIdentifier, noscan: Boolean = true) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val sessionState = sparkSession.sessionState val db = tableIdent.database.getOrElse(sessionState.catalog.getCurrentDatabase) val tableIdentWithDB = TableIdentifier(tableIdent.table, Some(db)) val tableMeta = sessionState.catalog.getTableMetadata(tableIdentWithDB) if (tableMeta.tableType == CatalogTableType.VIEW) { throw new AnalysisException("ANALYZE TABLE is not supported on views.") } // Compute stats for the whole table val newTotalSize = CommandUtils.calculateTotalSize(sessionState, tableMeta) val newRowCount = if (noscan) None else Some(BigInt(sparkSession.table(tableIdentWithDB).count())) // Update the metastore if the above statistics of the table are different from those // recorded in the metastore. val newStats = CommandUtils.compareAndGetNewStats(tableMeta.stats, newTotalSize, newRowCount) if (newStats.isDefined) { sessionState.catalog.alterTableStats(tableIdentWithDB, newStats) } Seq.empty[Row] } }