org.apache.spark.sql.catalyst.TableIdentifier Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.TableIdentifier.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: MetastoreRelationSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.sql.{QueryTest, Row} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.{IntegerType, StructField, StructType} class MetastoreRelationSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { test("makeCopy and toJSON should work") { val table = CatalogTable( identifier = TableIdentifier("test", Some("db")), tableType = CatalogTableType.VIEW, storage = CatalogStorageFormat.empty, schema = StructType(StructField("a", IntegerType, true) :: Nil)) val relation = MetastoreRelation("db", "test")(table, null) // No exception should be thrown relation.makeCopy(Array("db", "test")) // No exception should be thrown relation.toJSON } test("SPARK-17409: Do Not Optimize Query in CTAS (Hive Serde Table) More Than Once") { withTable("bar") { withTempView("foo") { sql("select 0 as id").createOrReplaceTempView("foo") // If we optimize the query in CTAS more than once, the following saveAsTable will fail // with the error: `GROUP BY position 0 is not in select list (valid range is [1, 1])` sql("CREATE TABLE bar AS SELECT * FROM foo group by id") checkAnswer(spark.table("bar"), Row(0) :: Nil) val tableMetadata = spark.sessionState.catalog.getTableMetadata(TableIdentifier("bar")) assert(tableMetadata.provider == Some("hive"), "the expected table is a Hive serde table") } } } }
Example 2
Source File: ListTablesSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.QueryTest import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.hive.test.TestHiveSingleton class ListTablesSuite extends QueryTest with TestHiveSingleton with BeforeAndAfterAll { import hiveContext._ import hiveContext.implicits._ val df = sparkContext.parallelize((1 to 10).map(i => (i, s"str$i"))).toDF("key", "value") override def beforeAll(): Unit = { super.beforeAll() // The catalog in HiveContext is a case insensitive one. sessionState.catalog.createTempView( "ListTablesSuiteTable", df.logicalPlan, overrideIfExists = true) sql("CREATE TABLE HiveListTablesSuiteTable (key int, value string)") sql("CREATE DATABASE IF NOT EXISTS ListTablesSuiteDB") sql("CREATE TABLE ListTablesSuiteDB.HiveInDBListTablesSuiteTable (key int, value string)") } override def afterAll(): Unit = { try { sessionState.catalog.dropTable( TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true, purge = false) sql("DROP TABLE IF EXISTS HiveListTablesSuiteTable") sql("DROP TABLE IF EXISTS ListTablesSuiteDB.HiveInDBListTablesSuiteTable") sql("DROP DATABASE IF EXISTS ListTablesSuiteDB") } finally { super.afterAll() } } test("get all tables of current database") { Seq(tables(), sql("SHOW TABLes")).foreach { case allTables => // We are using default DB. checkAnswer( allTables.filter("tableName = 'listtablessuitetable'"), Row("", "listtablessuitetable", true)) checkAnswer( allTables.filter("tableName = 'hivelisttablessuitetable'"), Row("default", "hivelisttablessuitetable", false)) assert(allTables.filter("tableName = 'hiveindblisttablessuitetable'").count() === 0) } } test("getting all tables with a database name") { Seq(tables("listtablessuiteDb"), sql("SHOW TABLes in listTablesSuitedb")).foreach { case allTables => checkAnswer( allTables.filter("tableName = 'listtablessuitetable'"), Row("", "listtablessuitetable", true)) assert(allTables.filter("tableName = 'hivelisttablessuitetable'").count() === 0) checkAnswer( allTables.filter("tableName = 'hiveindblisttablessuitetable'"), Row("listtablessuitedb", "hiveindblisttablessuitetable", false)) } } }
Example 3
Source File: TableIdentifierParserSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.parser import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.TableIdentifier class TableIdentifierParserSuite extends SparkFunSuite { import CatalystSqlParser._ // Add "$elem$", "$value$" & "$key$" val hiveNonReservedKeyword = Array("add", "admin", "after", "analyze", "archive", "asc", "before", "bucket", "buckets", "cascade", "change", "cluster", "clustered", "clusterstatus", "collection", "columns", "comment", "compact", "compactions", "compute", "concatenate", "continue", "data", "day", "databases", "datetime", "dbproperties", "deferred", "defined", "delimited", "dependency", "desc", "directories", "directory", "disable", "distribute", "enable", "escaped", "exclusive", "explain", "export", "fields", "file", "fileformat", "first", "format", "formatted", "functions", "hold_ddltime", "hour", "idxproperties", "ignore", "index", "indexes", "inpath", "inputdriver", "inputformat", "items", "jar", "keys", "key_type", "last", "limit", "offset", "lines", "load", "location", "lock", "locks", "logical", "long", "mapjoin", "materialized", "metadata", "minus", "minute", "month", "msck", "noscan", "no_drop", "nulls", "offline", "option", "outputdriver", "outputformat", "overwrite", "owner", "partitioned", "partitions", "plus", "pretty", "principals", "protection", "purge", "read", "readonly", "rebuild", "recordreader", "recordwriter", "reload", "rename", "repair", "replace", "replication", "restrict", "rewrite", "role", "roles", "schemas", "second", "serde", "serdeproperties", "server", "sets", "shared", "show", "show_database", "skewed", "sort", "sorted", "ssl", "statistics", "stored", "streamtable", "string", "struct", "tables", "tblproperties", "temporary", "terminated", "tinyint", "touch", "transactions", "unarchive", "undo", "uniontype", "unlock", "unset", "unsigned", "uri", "use", "utc", "utctimestamp", "view", "while", "year", "work", "transaction", "write", "isolation", "level", "snapshot", "autocommit", "all", "alter", "array", "as", "authorization", "between", "bigint", "binary", "boolean", "both", "by", "create", "cube", "current_date", "current_timestamp", "cursor", "date", "decimal", "delete", "describe", "double", "drop", "exists", "external", "false", "fetch", "float", "for", "grant", "group", "grouping", "import", "in", "insert", "int", "into", "is", "lateral", "like", "local", "none", "null", "of", "order", "out", "outer", "partition", "percent", "procedure", "range", "reads", "revoke", "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger", "true", "truncate", "update", "user", "using", "values", "with", "regexp", "rlike", "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float", "int", "smallint", "timestamp", "at") val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right", "natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from", "where", "having", "from", "to", "table", "with", "not") test("table identifier") { // Regular names. assert(TableIdentifier("q") === parseTableIdentifier("q")) assert(TableIdentifier("q", Option("d")) === parseTableIdentifier("d.q")) // Illegal names. Seq("", "d.q.g", "t:", "${some.var.x}", "tab:1").foreach { identifier => intercept[ParseException](parseTableIdentifier(identifier)) } } test("quoted identifiers") { assert(TableIdentifier("z", Some("x.y")) === parseTableIdentifier("`x.y`.z")) assert(TableIdentifier("y.z", Some("x")) === parseTableIdentifier("x.`y.z`")) assert(TableIdentifier("z", Some("`x.y`")) === parseTableIdentifier("```x.y```.z")) assert(TableIdentifier("`y.z`", Some("x")) === parseTableIdentifier("x.```y.z```")) assert(TableIdentifier("x.y.z", None) === parseTableIdentifier("`x.y.z`")) } test("table identifier - strict keywords") { // SQL Keywords. hiveStrictNonReservedKeyword.foreach { keyword => assert(TableIdentifier(keyword) === parseTableIdentifier(keyword)) assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`")) assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`")) } } test("table identifier - non reserved keywords") { // Hive keywords are allowed. hiveNonReservedKeyword.foreach { nonReserved => assert(TableIdentifier(nonReserved) === parseTableIdentifier(nonReserved)) } } test("SPARK-17364 table identifier - contains number") { assert(parseTableIdentifier("123_") == TableIdentifier("123_")) assert(parseTableIdentifier("1a.123_") == TableIdentifier("123_", Some("1a"))) // ".123" should not be treated as token of type DECIMAL_VALUE assert(parseTableIdentifier("a.123A") == TableIdentifier("123A", Some("a"))) // ".123E3" should not be treated as token of type SCIENTIFIC_DECIMAL_VALUE assert(parseTableIdentifier("a.123E3_LIST") == TableIdentifier("123E3_LIST", Some("a"))) // ".123D" should not be treated as token of type DOUBLE_LITERAL assert(parseTableIdentifier("a.123D_LIST") == TableIdentifier("123D_LIST", Some("a"))) // ".123BD" should not be treated as token of type BIGDECIMAL_LITERAL assert(parseTableIdentifier("a.123BD_LIST") == TableIdentifier("123BD_LIST", Some("a"))) } test("SPARK-17832 table identifier - contains backtick") { val complexName = TableIdentifier("`weird`table`name", Some("`d`b`1")) assert(complexName === parseTableIdentifier("```d``b``1`.```weird``table``name`")) assert(complexName === parseTableIdentifier(complexName.quotedString)) intercept[ParseException](parseTableIdentifier(complexName.unquotedString)) // Table identifier contains countious backticks should be treated correctly. val complexName2 = TableIdentifier("x``y", Some("d``b")) assert(complexName2 === parseTableIdentifier(complexName2.quotedString)) } }
Example 4
Source File: ddl.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.types._ case class CreateTable( tableDesc: CatalogTable, mode: SaveMode, query: Option[LogicalPlan]) extends Command { assert(tableDesc.provider.isDefined, "The table to be created must have a provider.") if (query.isEmpty) { assert( mode == SaveMode.ErrorIfExists || mode == SaveMode.Ignore, "create table without data insertion can only use ErrorIfExists or Ignore as SaveMode.") } override def innerChildren: Seq[QueryPlan[_]] = query.toSeq } class CaseInsensitiveMap(map: Map[String, String]) extends Map[String, String] with Serializable { val baseMap = map.map(kv => kv.copy(_1 = kv._1.toLowerCase)) override def get(k: String): Option[String] = baseMap.get(k.toLowerCase) override def + [B1 >: String](kv: (String, B1)): Map[String, B1] = baseMap + kv.copy(_1 = kv._1.toLowerCase) override def iterator: Iterator[(String, String)] = baseMap.iterator override def -(key: String): Map[String, String] = baseMap - key.toLowerCase }
Example 5
Source File: cache.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import org.apache.spark.sql.{Dataset, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan case class CacheTableCommand( tableIdent: TableIdentifier, plan: Option[LogicalPlan], isLazy: Boolean) extends RunnableCommand { require(plan.isEmpty || tableIdent.database.isEmpty, "Database name is not allowed in CACHE TABLE AS SELECT") override protected def innerChildren: Seq[QueryPlan[_]] = { plan.toSeq } override def run(sparkSession: SparkSession): Seq[Row] = { plan.foreach { logicalPlan => Dataset.ofRows(sparkSession, logicalPlan).createTempView(tableIdent.quotedString) } sparkSession.catalog.cacheTable(tableIdent.quotedString) if (!isLazy) { // Performs eager caching sparkSession.table(tableIdent).count() } Seq.empty[Row] } } case class UncacheTableCommand(tableIdent: TableIdentifier) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { sparkSession.catalog.uncacheTable(tableIdent.quotedString) Seq.empty[Row] } } case object ClearCacheCommand extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { sparkSession.catalog.clearCache() Seq.empty[Row] } }
Example 6
Source File: rules.scala From tispark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.extensions import com.pingcap.tispark.statistics.StatisticsManager import com.pingcap.tispark.utils.ReflectionUtil._ import com.pingcap.tispark.{MetaManager, TiDBRelation, TiTableReference} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, UnresolvedRelation} import org.apache.spark.sql.catalyst.catalog.TiSessionCatalog import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.{AnalysisException, _} case class TiResolutionRule(getOrCreateTiContext: SparkSession => TiContext)( sparkSession: SparkSession) extends Rule[LogicalPlan] { protected lazy val meta: MetaManager = tiContext.meta private lazy val autoLoad = tiContext.autoLoad private lazy val tiCatalog = tiContext.tiCatalog private lazy val tiSession = tiContext.tiSession private lazy val sqlContext = tiContext.sqlContext protected val tiContext: TiContext = getOrCreateTiContext(sparkSession) protected val resolveTiDBRelation: TableIdentifier => LogicalPlan = tableIdentifier => { val dbName = getDatabaseFromIdentifier(tableIdentifier) val tableName = tableIdentifier.table val table = meta.getTable(dbName, tableName) if (table.isEmpty) { throw new AnalysisException(s"Table or view '$tableName' not found in database '$dbName'") } if (autoLoad) { StatisticsManager.loadStatisticsInfo(table.get) } val sizeInBytes = StatisticsManager.estimateTableSize(table.get) val tiDBRelation = TiDBRelation(tiSession, TiTableReference(dbName, tableName, sizeInBytes), meta)( sqlContext) // Use SubqueryAlias so that projects and joins can correctly resolve // UnresolvedAttributes in JoinConditions, Projects, Filters, etc. newSubqueryAlias(tableName, LogicalRelation(tiDBRelation)) } override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp resolveTiDBRelations protected def resolveTiDBRelations: PartialFunction[LogicalPlan, LogicalPlan] = { case i @ InsertIntoTable(UnresolvedRelation(tableIdentifier), _, _, _, _) if tiCatalog .catalogOf(tableIdentifier.database) .exists(_.isInstanceOf[TiSessionCatalog]) => i.copy(table = EliminateSubqueryAliases(resolveTiDBRelation(tableIdentifier))) case UnresolvedRelation(tableIdentifier) if tiCatalog .catalogOf(tableIdentifier.database) .exists(_.isInstanceOf[TiSessionCatalog]) => resolveTiDBRelation(tableIdentifier) } private def getDatabaseFromIdentifier(tableIdentifier: TableIdentifier): String = tableIdentifier.database.getOrElse(tiCatalog.getCurrentDatabase) } case class TiDDLRule(getOrCreateTiContext: SparkSession => TiContext)(sparkSession: SparkSession) extends Rule[LogicalPlan] { protected lazy val tiContext: TiContext = getOrCreateTiContext(sparkSession) override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { // TODO: support other commands that may concern TiSpark catalog. case sd: ShowDatabasesCommand => TiShowDatabasesCommand(tiContext, sd) case sd: SetDatabaseCommand => TiSetDatabaseCommand(tiContext, sd) case st: ShowTablesCommand => TiShowTablesCommand(tiContext, st) case st: ShowColumnsCommand => TiShowColumnsCommand(tiContext, st) case dt: DescribeTableCommand => TiDescribeTablesCommand(tiContext, dt) case dc: DescribeColumnCommand => TiDescribeColumnCommand(tiContext, dc) case ct: CreateTableLikeCommand => TiCreateTableLikeCommand(tiContext, ct) } }
Example 7
Source File: parser.scala From tispark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.extensions import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.catalyst.expressions.{Expression, SubqueryExpression} import org.apache.spark.sql.catalyst.parser._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.execution.SparkSqlParser import org.apache.spark.sql.execution.command.{ CacheTableCommand, CreateViewCommand, ExplainCommand, UncacheTableCommand } import org.apache.spark.sql.types.{DataType, StructType} import org.apache.spark.sql.{SparkSession, TiContext} case class TiParser(getOrCreateTiContext: SparkSession => TiContext)( sparkSession: SparkSession, delegate: ParserInterface) extends ParserInterface { private lazy val tiContext = getOrCreateTiContext(sparkSession) private lazy val internal = new SparkSqlParser(sparkSession.sqlContext.conf) private def needQualify(tableIdentifier: TableIdentifier) = tableIdentifier.database.isEmpty && tiContext.sessionCatalog .getTempView(tableIdentifier.table) .isEmpty }
Example 8
Source File: TiConcreteSessionCatalog.scala From tispark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.catalog import com.pingcap.tispark.utils.ReflectionUtil._ import org.apache.spark.sql.TiContext import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.EmptyFunctionRegistry class TiConcreteSessionCatalog(val tiContext: TiContext)(tiExternalCatalog: ExternalCatalog) extends SessionCatalog(tiExternalCatalog, EmptyFunctionRegistry, tiContext.sqlContext.conf) with TiSessionCatalog { override def catalogOf(database: Option[String]): Option[SessionCatalog] = { val db = database.getOrElse(getCurrentDatabase) if (databaseExists(db)) Some(this) else None } override def databaseExists(db: String): Boolean = callTiDirectExternalCatalogDatabaseExists(tiExternalCatalog, db) override def tableExists(name: TableIdentifier): Boolean = callTiDirectExternalCatalogTableExists( tiExternalCatalog, name.database.getOrElse(getCurrentDatabase), name.table) }
Example 9
Source File: HiveExternalCatalogSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.hadoop.conf.Configuration import org.apache.spark.SparkConf import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.types.StructType class HiveExternalCatalogSuite extends ExternalCatalogSuite { private val externalCatalog: HiveExternalCatalog = { val catalog = new HiveExternalCatalog(new SparkConf, new Configuration) catalog.client.reset() catalog } protected override val utils: CatalogTestUtils = new CatalogTestUtils { override val tableInputFormat: String = "org.apache.hadoop.mapred.SequenceFileInputFormat" override val tableOutputFormat: String = "org.apache.hadoop.mapred.SequenceFileOutputFormat" override def newEmptyCatalog(): ExternalCatalog = externalCatalog override val defaultProvider: String = "hive" } protected override def resetState(): Unit = { externalCatalog.client.reset() } import utils._ test("SPARK-18647: do not put provider in table properties for Hive serde table") { val catalog = newBasicCatalog() val hiveTable = CatalogTable( identifier = TableIdentifier("hive_tbl", Some("db1")), tableType = CatalogTableType.MANAGED, storage = storageFormat, schema = new StructType().add("col1", "int").add("col2", "string"), provider = Some("hive")) catalog.createTable(hiveTable, ignoreIfExists = false) val rawTable = externalCatalog.client.getTable("db1", "hive_tbl") assert(!rawTable.properties.contains(HiveExternalCatalog.DATASOURCE_PROVIDER)) assert(DDLUtils.isHiveTable(externalCatalog.getTable("db1", "hive_tbl"))) } Seq("parquet", "hive").foreach { format => test(s"Partition columns should be put at the end of table schema for the format $format") { val catalog = newBasicCatalog() val newSchema = new StructType() .add("col1", "int") .add("col2", "string") .add("partCol1", "int") .add("partCol2", "string") val table = CatalogTable( identifier = TableIdentifier("tbl", Some("db1")), tableType = CatalogTableType.MANAGED, storage = CatalogStorageFormat.empty, schema = new StructType() .add("col1", "int") .add("partCol1", "int") .add("partCol2", "string") .add("col2", "string"), provider = Some(format), partitionColumnNames = Seq("partCol1", "partCol2")) catalog.createTable(table, ignoreIfExists = false) val restoredTable = externalCatalog.getTable("db1", "tbl") assert(restoredTable.schema == newSchema) } } test("SPARK-22306: alter table schema should not erase the bucketing metadata at hive side") { val catalog = newBasicCatalog() externalCatalog.client.runSqlHive( """ |CREATE TABLE db1.t(a string, b string) |CLUSTERED BY (a, b) SORTED BY (a, b) INTO 10 BUCKETS |STORED AS PARQUET """.stripMargin) val newSchema = new StructType().add("a", "string").add("b", "string").add("c", "string") catalog.alterTableDataSchema("db1", "t", newSchema) assert(catalog.getTable("db1", "t").schema == newSchema) val bucketString = externalCatalog.client.runSqlHive("DESC FORMATTED db1.t") .filter(_.contains("Num Buckets")).head assert(bucketString.contains("10")) } test("SPARK-23001: NullPointerException when running desc database") { val catalog = newBasicCatalog() catalog.createDatabase(newDb("dbWithNullDesc").copy(description = null), ignoreIfExists = false) assert(catalog.getDatabase("dbWithNullDesc").description == "") } }
Example 10
Source File: PruneFileSourcePartitionsSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.scalatest.Matchers._ import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project, ResolvedHint} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions} import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec import org.apache.spark.sql.functions.broadcast import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.StructType class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("PruneFileSourcePartitions", Once, PruneFileSourcePartitions) :: Nil } test("PruneFileSourcePartitions should not change the output of LogicalRelation") { withTable("test") { withTempDir { dir => sql( s""" |CREATE EXTERNAL TABLE test(i int) |PARTITIONED BY (p int) |STORED AS parquet |LOCATION '${dir.toURI}'""".stripMargin) val tableMeta = spark.sharedState.externalCatalog.getTable("default", "test") val catalogFileIndex = new CatalogFileIndex(spark, tableMeta, 0) val dataSchema = StructType(tableMeta.schema.filterNot { f => tableMeta.partitionColumnNames.contains(f.name) }) val relation = HadoopFsRelation( location = catalogFileIndex, partitionSchema = tableMeta.partitionSchema, dataSchema = dataSchema, bucketSpec = None, fileFormat = new ParquetFileFormat(), options = Map.empty)(sparkSession = spark) val logicalRelation = LogicalRelation(relation, tableMeta) val query = Project(Seq('i, 'p), Filter('p === 1, logicalRelation)).analyze val optimized = Optimize.execute(query) assert(optimized.missingInput.isEmpty) } } } test("SPARK-20986 Reset table's statistics after PruneFileSourcePartitions rule") { withTable("tbl") { spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl") sql(s"ANALYZE TABLE tbl COMPUTE STATISTICS") val tableStats = spark.sessionState.catalog.getTableMetadata(TableIdentifier("tbl")).stats assert(tableStats.isDefined && tableStats.get.sizeInBytes > 0, "tableStats is lost") val df = sql("SELECT * FROM tbl WHERE p = 1") val sizes1 = df.queryExecution.analyzed.collect { case relation: LogicalRelation => relation.catalogTable.get.stats.get.sizeInBytes } assert(sizes1.size === 1, s"Size wrong for:\n ${df.queryExecution}") assert(sizes1(0) == tableStats.get.sizeInBytes) val relations = df.queryExecution.optimizedPlan.collect { case relation: LogicalRelation => relation } assert(relations.size === 1, s"Size wrong for:\n ${df.queryExecution}") val size2 = relations(0).stats.sizeInBytes assert(size2 == relations(0).catalogTable.get.stats.get.sizeInBytes) assert(size2 < tableStats.get.sizeInBytes) } } test("SPARK-26576 Broadcast hint not applied to partitioned table") { withTable("tbl") { withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl") val df = spark.table("tbl") val qe = df.join(broadcast(df), "p").queryExecution qe.optimizedPlan.collect { case _: ResolvedHint => } should have size 1 qe.sparkPlan.collect { case j: BroadcastHashJoinExec => j } should have size 1 } } } }
Example 11
Source File: ListTablesSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.QueryTest import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.hive.test.TestHiveSingleton class ListTablesSuite extends QueryTest with TestHiveSingleton with BeforeAndAfterAll { import hiveContext._ import hiveContext.implicits._ val df = sparkContext.parallelize((1 to 10).map(i => (i, s"str$i"))).toDF("key", "value") override def beforeAll(): Unit = { super.beforeAll() // The catalog in HiveContext is a case insensitive one. sessionState.catalog.createTempView( "ListTablesSuiteTable", df.logicalPlan, overrideIfExists = true) sql("CREATE TABLE HiveListTablesSuiteTable (key int, value string)") sql("CREATE DATABASE IF NOT EXISTS ListTablesSuiteDB") sql("CREATE TABLE ListTablesSuiteDB.HiveInDBListTablesSuiteTable (key int, value string)") } override def afterAll(): Unit = { try { sessionState.catalog.dropTable( TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true, purge = false) sql("DROP TABLE IF EXISTS HiveListTablesSuiteTable") sql("DROP TABLE IF EXISTS ListTablesSuiteDB.HiveInDBListTablesSuiteTable") sql("DROP DATABASE IF EXISTS ListTablesSuiteDB") } finally { super.afterAll() } } test("get all tables of current database") { Seq(tables(), sql("SHOW TABLes")).foreach { case allTables => // We are using default DB. checkAnswer( allTables.filter("tableName = 'listtablessuitetable'"), Row("", "listtablessuitetable", true)) checkAnswer( allTables.filter("tableName = 'hivelisttablessuitetable'"), Row("default", "hivelisttablessuitetable", false)) assert(allTables.filter("tableName = 'hiveindblisttablessuitetable'").count() === 0) } } test("getting all tables with a database name") { Seq(tables("listtablessuiteDb"), sql("SHOW TABLes in listTablesSuitedb")).foreach { case allTables => checkAnswer( allTables.filter("tableName = 'listtablessuitetable'"), Row("", "listtablessuitetable", true)) assert(allTables.filter("tableName = 'hivelisttablessuitetable'").count() === 0) checkAnswer( allTables.filter("tableName = 'hiveindblisttablessuitetable'"), Row("listtablessuitedb", "hiveindblisttablessuitetable", false)) } } }
Example 12
Source File: XSQLAnalyzeTableCommand.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.xsql.execution.command import org.apache.spark.sql.{AnalysisException, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogTableType import org.apache.spark.sql.execution.command.{CommandUtils, RunnableCommand} import org.apache.spark.sql.xsql.XSQLSessionCatalog case class XSQLAnalyzeTableCommand(tableIdent: TableIdentifier, noscan: Boolean = true) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val sessionState = sparkSession.sessionState val catalog = sparkSession.sessionState.catalog.asInstanceOf[XSQLSessionCatalog] val catalogDB = catalog.getUsedCatalogDatabase(tableIdent.dataSource, tableIdent.database) if (catalogDB == None) { return Seq.empty[Row] } val ds = catalogDB.get.dataSourceName val db = catalogDB.get.name val tableIdentWithDB = TableIdentifier(tableIdent.table, Some(db), Some(ds)) val tableMeta = catalog.getRawTable(tableIdentWithDB) if (tableMeta.tableType == CatalogTableType.VIEW) { throw new AnalysisException("ANALYZE TABLE is not supported on views.") } // Compute stats for the whole table val newTotalSize = CommandUtils.calculateTotalSize(sparkSession, tableMeta) val newRowCount = if (noscan) None else Some(BigInt(sparkSession.table(tableIdentWithDB).count())) // Update the metastore if the above statistics of the table are different from those // recorded in the metastore. val newStats = CommandUtils.compareAndGetNewStats(tableMeta.stats, newTotalSize, newRowCount) if (newStats.isDefined) { catalog.alterTableStats(tableIdentWithDB, newStats) } Seq.empty[Row] } }
Example 13
Source File: TableIdentifierParserSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.parser import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.TableIdentifier class TableIdentifierParserSuite extends SparkFunSuite { import CatalystSqlParser._ // Add "$elem$", "$value$" & "$key$" val hiveNonReservedKeyword = Array("add", "admin", "after", "analyze", "archive", "asc", "before", "bucket", "buckets", "cascade", "change", "cluster", "clustered", "clusterstatus", "collection", "columns", "comment", "compact", "compactions", "compute", "concatenate", "continue", "cost", "data", "day", "databases", "datetime", "dbproperties", "deferred", "defined", "delimited", "dependency", "desc", "directories", "directory", "disable", "distribute", "enable", "escaped", "exclusive", "explain", "export", "fields", "file", "fileformat", "first", "format", "formatted", "functions", "hold_ddltime", "hour", "idxproperties", "ignore", "index", "indexes", "inpath", "inputdriver", "inputformat", "items", "jar", "keys", "key_type", "last", "limit", "offset", "lines", "load", "location", "lock", "locks", "logical", "long", "mapjoin", "materialized", "metadata", "minus", "minute", "month", "msck", "noscan", "no_drop", "nulls", "offline", "option", "outputdriver", "outputformat", "overwrite", "owner", "partitioned", "partitions", "plus", "pretty", "principals", "protection", "purge", "read", "readonly", "rebuild", "recordreader", "recordwriter", "reload", "rename", "repair", "replace", "replication", "restrict", "rewrite", "role", "roles", "schemas", "second", "serde", "serdeproperties", "server", "sets", "shared", "show", "show_database", "skewed", "sort", "sorted", "ssl", "statistics", "stored", "streamtable", "string", "struct", "tables", "tblproperties", "temporary", "terminated", "tinyint", "touch", "transactions", "unarchive", "undo", "uniontype", "unlock", "unset", "unsigned", "uri", "use", "utc", "utctimestamp", "view", "while", "year", "work", "transaction", "write", "isolation", "level", "snapshot", "autocommit", "all", "any", "alter", "array", "as", "authorization", "between", "bigint", "binary", "boolean", "both", "by", "create", "cube", "current_date", "current_timestamp", "cursor", "date", "decimal", "delete", "describe", "double", "drop", "exists", "external", "false", "fetch", "float", "for", "grant", "group", "grouping", "import", "in", "insert", "int", "into", "is", "pivot", "lateral", "like", "local", "none", "null", "of", "order", "out", "outer", "partition", "percent", "procedure", "range", "reads", "revoke", "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger", "true", "truncate", "update", "user", "values", "with", "regexp", "rlike", "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float", "int", "smallint", "timestamp", "at", "position", "both", "leading", "trailing", "extract") val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right", "natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from", "where", "having", "from", "to", "table", "with", "not") test("table identifier") { // Regular names. assert(TableIdentifier("q") === parseTableIdentifier("q")) assert(TableIdentifier("q", Option("d")) === parseTableIdentifier("d.q")) // Illegal names. Seq("", "d.q.g", "t:", "${some.var.x}", "tab:1").foreach { identifier => intercept[ParseException](parseTableIdentifier(identifier)) } } test("quoted identifiers") { assert(TableIdentifier("z", Some("x.y")) === parseTableIdentifier("`x.y`.z")) assert(TableIdentifier("y.z", Some("x")) === parseTableIdentifier("x.`y.z`")) assert(TableIdentifier("z", Some("`x.y`")) === parseTableIdentifier("```x.y```.z")) assert(TableIdentifier("`y.z`", Some("x")) === parseTableIdentifier("x.```y.z```")) assert(TableIdentifier("x.y.z", None) === parseTableIdentifier("`x.y.z`")) } test("table identifier - strict keywords") { // SQL Keywords. hiveStrictNonReservedKeyword.foreach { keyword => assert(TableIdentifier(keyword) === parseTableIdentifier(keyword)) assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`")) assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`")) } } test("table identifier - non reserved keywords") { // Hive keywords are allowed. hiveNonReservedKeyword.foreach { nonReserved => assert(TableIdentifier(nonReserved) === parseTableIdentifier(nonReserved)) } } test("SPARK-17364 table identifier - contains number") { assert(parseTableIdentifier("123_") == TableIdentifier("123_")) assert(parseTableIdentifier("1a.123_") == TableIdentifier("123_", Some("1a"))) // ".123" should not be treated as token of type DECIMAL_VALUE assert(parseTableIdentifier("a.123A") == TableIdentifier("123A", Some("a"))) // ".123E3" should not be treated as token of type SCIENTIFIC_DECIMAL_VALUE assert(parseTableIdentifier("a.123E3_LIST") == TableIdentifier("123E3_LIST", Some("a"))) // ".123D" should not be treated as token of type DOUBLE_LITERAL assert(parseTableIdentifier("a.123D_LIST") == TableIdentifier("123D_LIST", Some("a"))) // ".123BD" should not be treated as token of type BIGDECIMAL_LITERAL assert(parseTableIdentifier("a.123BD_LIST") == TableIdentifier("123BD_LIST", Some("a"))) } test("SPARK-17832 table identifier - contains backtick") { val complexName = TableIdentifier("`weird`table`name", Some("`d`b`1")) assert(complexName === parseTableIdentifier("```d``b``1`.```weird``table``name`")) assert(complexName === parseTableIdentifier(complexName.quotedString)) intercept[ParseException](parseTableIdentifier(complexName.unquotedString)) // Table identifier contains countious backticks should be treated correctly. val complexName2 = TableIdentifier("x``y", Some("d``b")) assert(complexName2 === parseTableIdentifier(complexName2.quotedString)) } }
Example 14
Source File: CatalogSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.types.StructType class CatalogSuite extends AnalysisTest { test("desc table when owner is set to null") { val table = CatalogTable( identifier = TableIdentifier("tbl", Some("db1")), tableType = CatalogTableType.MANAGED, storage = CatalogStorageFormat.empty, owner = null, schema = new StructType().add("col1", "int").add("col2", "string"), provider = Some("parquet")) table.toLinkedHashMap } }
Example 15
Source File: LookupFunctionsSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import java.net.URI import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.expressions.Alias import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.internal.SQLConf class LookupFunctionsSuite extends PlanTest { test("SPARK-23486: the functionExists for the Persistent function check") { val externalCatalog = new CustomInMemoryCatalog val conf = new SQLConf() val catalog = new SessionCatalog(externalCatalog, FunctionRegistry.builtin, conf) val analyzer = { catalog.createDatabase( CatalogDatabase("default", "", new URI("loc"), Map.empty), ignoreIfExists = false) new Analyzer(catalog, conf) } def table(ref: String): LogicalPlan = UnresolvedRelation(TableIdentifier(ref)) val unresolvedPersistentFunc = UnresolvedFunction("func", Seq.empty, false) val unresolvedRegisteredFunc = UnresolvedFunction("max", Seq.empty, false) val plan = Project( Seq(Alias(unresolvedPersistentFunc, "call1")(), Alias(unresolvedPersistentFunc, "call2")(), Alias(unresolvedPersistentFunc, "call3")(), Alias(unresolvedRegisteredFunc, "call4")(), Alias(unresolvedRegisteredFunc, "call5")()), table("TaBlE")) analyzer.LookupFunctions.apply(plan) assert(externalCatalog.getFunctionExistsCalledTimes == 1) assert(analyzer.LookupFunctions.normalizeFuncName (unresolvedPersistentFunc.name).database == Some("default")) } test("SPARK-23486: the functionExists for the Registered function check") { val externalCatalog = new InMemoryCatalog val conf = new SQLConf() val customerFunctionReg = new CustomerFunctionRegistry val catalog = new SessionCatalog(externalCatalog, customerFunctionReg, conf) val analyzer = { catalog.createDatabase( CatalogDatabase("default", "", new URI("loc"), Map.empty), ignoreIfExists = false) new Analyzer(catalog, conf) } def table(ref: String): LogicalPlan = UnresolvedRelation(TableIdentifier(ref)) val unresolvedRegisteredFunc = UnresolvedFunction("max", Seq.empty, false) val plan = Project( Seq(Alias(unresolvedRegisteredFunc, "call1")(), Alias(unresolvedRegisteredFunc, "call2")()), table("TaBlE")) analyzer.LookupFunctions.apply(plan) assert(customerFunctionReg.getIsRegisteredFunctionCalledTimes == 2) assert(analyzer.LookupFunctions.normalizeFuncName (unresolvedRegisteredFunc.name).database == Some("default")) } } class CustomerFunctionRegistry extends SimpleFunctionRegistry { private var isRegisteredFunctionCalledTimes: Int = 0; override def functionExists(funcN: FunctionIdentifier): Boolean = synchronized { isRegisteredFunctionCalledTimes = isRegisteredFunctionCalledTimes + 1 true } def getIsRegisteredFunctionCalledTimes: Int = isRegisteredFunctionCalledTimes } class CustomInMemoryCatalog extends InMemoryCatalog { private var functionExistsCalledTimes: Int = 0 override def functionExists(db: String, funcName: String): Boolean = synchronized { functionExistsCalledTimes = functionExistsCalledTimes + 1 true } def getFunctionExistsCalledTimes: Int = functionExistsCalledTimes }
Example 16
package org.apache.spark.sql.execution.datasources import java.util.Locale import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils} import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.{DDLUtils, RunnableCommand} import org.apache.spark.sql.types._ case class CreateTempViewUsing( tableIdent: TableIdentifier, userSpecifiedSchema: Option[StructType], replace: Boolean, global: Boolean, provider: String, options: Map[String, String]) extends RunnableCommand { if (tableIdent.database.isDefined) { throw new AnalysisException( s"Temporary view '$tableIdent' should not have specified a database") } override def argString: String = { s"[tableIdent:$tableIdent " + userSpecifiedSchema.map(_ + " ").getOrElse("") + s"replace:$replace " + s"provider:$provider " + CatalogUtils.maskCredentials(options) } override def run(sparkSession: SparkSession): Seq[Row] = { if (provider.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) { throw new AnalysisException("Hive data source can only be used with tables, " + "you can't use it with CREATE TEMP VIEW USING") } val dataSource = DataSource( sparkSession, userSpecifiedSchema = userSpecifiedSchema, className = provider, options = options) val catalog = sparkSession.sessionState.catalog val viewDefinition = Dataset.ofRows( sparkSession, LogicalRelation(dataSource.resolveRelation())).logicalPlan if (global) { catalog.createGlobalTempView(tableIdent.table, viewDefinition, replace) } else { catalog.createTempView(tableIdent.table, viewDefinition, replace) } Seq.empty[Row] } } case class RefreshTable(tableIdent: TableIdentifier) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { // Refresh the given table's metadata. If this table is cached as an InMemoryRelation, // drop the original cached version and make the new version cached lazily. sparkSession.catalog.refreshTable(tableIdent.quotedString) Seq.empty[Row] } } case class RefreshResource(path: String) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { sparkSession.catalog.refreshByPath(path) Seq.empty[Row] } }
Example 17
Source File: cache.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import org.apache.spark.sql.{Dataset, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan case class CacheTableCommand( tableIdent: TableIdentifier, plan: Option[LogicalPlan], isLazy: Boolean) extends RunnableCommand { require(plan.isEmpty || tableIdent.database.isEmpty, "Database name is not allowed in CACHE TABLE AS SELECT") override protected def innerChildren: Seq[QueryPlan[_]] = plan.toSeq override def run(sparkSession: SparkSession): Seq[Row] = { plan.foreach { logicalPlan => Dataset.ofRows(sparkSession, logicalPlan).createTempView(tableIdent.quotedString) } sparkSession.catalog.cacheTable(tableIdent.quotedString) if (!isLazy) { // Performs eager caching sparkSession.table(tableIdent).count() } Seq.empty[Row] } } case class UncacheTableCommand( tableIdent: TableIdentifier, ifExists: Boolean) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val tableId = tableIdent.quotedString if (!ifExists || sparkSession.catalog.tableExists(tableId)) { sparkSession.catalog.uncacheTable(tableId) } Seq.empty[Row] } } override def makeCopy(newArgs: Array[AnyRef]): ClearCacheCommand = ClearCacheCommand() }
Example 18
Source File: AnalyzeTableCommand.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import org.apache.spark.sql.{AnalysisException, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogTableType case class AnalyzeTableCommand( tableIdent: TableIdentifier, noscan: Boolean = true) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val sessionState = sparkSession.sessionState val db = tableIdent.database.getOrElse(sessionState.catalog.getCurrentDatabase) val tableIdentWithDB = TableIdentifier(tableIdent.table, Some(db)) val tableMeta = sessionState.catalog.getTableMetadata(tableIdentWithDB) if (tableMeta.tableType == CatalogTableType.VIEW) { throw new AnalysisException("ANALYZE TABLE is not supported on views.") } // Compute stats for the whole table val newTotalSize = CommandUtils.calculateTotalSize(sparkSession, tableMeta) val newRowCount = if (noscan) None else Some(BigInt(sparkSession.table(tableIdentWithDB).count())) // Update the metastore if the above statistics of the table are different from those // recorded in the metastore. val newStats = CommandUtils.compareAndGetNewStats(tableMeta.stats, newTotalSize, newRowCount) if (newStats.isDefined) { sessionState.catalog.alterTableStats(tableIdentWithDB, newStats) } Seq.empty[Row] } }
Example 19
Source File: SQLContextExtensionBase.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.extension import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.{ParserDialect, TableIdentifier} import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry, SimpleFunctionRegistry} import org.apache.spark.sql.catalyst.errors.DialectException import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources.DDLParser import org.apache.spark.sql.extension.OptimizerFactory.ExtendableOptimizerBatch import org.apache.spark.util.Utils import scala.util.Try import scala.util.control.NonFatal override protected def extendedParserDialect: ParserDialect = try { val clazz = Utils.classForName(dialectClassName) clazz.newInstance().asInstanceOf[ParserDialect] } catch { case NonFatal(e) => // Since we didn't find the available SQL Dialect, it will fail even for SET command: // SET spark.sql.dialect=sql; Let's reset as default dialect automatically. val dialect = conf.dialect // reset the sql dialect conf.unsetConf(SQLConf.DIALECT) // throw out the exception, and the default sql dialect will take effect for next query. throw new DialectException( s""" |Instantiating dialect '$dialect' failed. |Reverting to default dialect '${conf.dialect}'""".stripMargin, e) } // (suggestion) make this implicit to FunctionRegistry. protected def registerBuiltins(registry: FunctionRegistry): Unit = { FunctionRegistry.expressions.foreach { case (name, (info, builder)) => registry.registerFunction(name, builder) } } override protected def extendedDdlParser(parser: String => LogicalPlan): DDLParser = new DDLParser(sqlParser.parse(_)) override protected def registerFunctions(registry: FunctionRegistry): Unit = { } }
Example 20
Source File: TemporaryFlagProxyCatalog.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.{Catalog, OverrideCatalog} import org.apache.spark.sql.catalyst.plans.logical.Subquery import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.Relation @deprecated("Use org.apache.spark.sql.TemporaryFlagCatalog instead") trait TemporaryFlagProxyCatalog extends OverrideCatalog { abstract override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = { val tables = super.getTables(databaseName) tables.map { case (tableName: String , isTemporary: Boolean) => val tableIdentifier = TableIdentifier(tableName) lookupRelation(tableIdentifier) match { case Subquery(_, LogicalRelation(relation: Relation, _)) => (tableIdentifier.table, relation.isTemporary) case _ => (tableIdentifier.table, isTemporary) } } } }
Example 21
Source File: CreateTableUsingTemporaryAwareCommand.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.RunnableCommand import org.apache.spark.sql.sources._ import org.apache.spark.sql.types._ import org.apache.spark.sql.{DataFrame, DatasourceResolver, Row, SQLContext} import org.apache.spark.sql.catalyst.TableIdentifierUtils._ import org.apache.spark.sql.catalyst.CaseSensitivityUtils._ // scalastyle:off method.length private def resolveDataSource(sqlContext: SQLContext, dataSource: Any, tableId: TableIdentifier): ResolvedDataSource = { dataSource match { case drp: PartitionedRelationProvider => if (userSpecifiedSchema.isEmpty) { new ResolvedDataSource(drp.getClass, drp.createRelation( sqlContext, tableId.toSeq, new CaseInsensitiveMap(options), partitioningFunction, partitioningColumns, isTemporary, allowExisting)) } else { new ResolvedDataSource(drp.getClass, drp.createRelation( sqlContext, tableId.toSeq, new CaseInsensitiveMap(options), userSpecifiedSchema.get, partitioningFunction, partitioningColumns, isTemporary, allowExisting)) } case drp: TemporaryAndPersistentSchemaRelationProvider if userSpecifiedSchema.nonEmpty => new ResolvedDataSource(drp.getClass, drp.createRelation( sqlContext, tableId.toSeq, new CaseInsensitiveMap(options), userSpecifiedSchema.get, isTemporary, allowExisting)) case drp: TemporaryAndPersistentRelationProvider => new ResolvedDataSource(drp.getClass, drp.createRelation( sqlContext, tableId.toSeq, new CaseInsensitiveMap(options), isTemporary, allowExisting)) case _ => ResolvedDataSource(sqlContext, userSpecifiedSchema, partitionColumns, provider, options) } } // scalastyle:on method.length }
Example 22
Source File: SqlContextAccessor.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import scala.language.implicitConversions object SqlContextAccessor { implicit def sqlContextToCatalogAccessable(sqlContext: SQLContext): SqlContextCatalogAccessor = new SqlContextCatalogAccessor(sqlContext) class SqlContextCatalogAccessor(sqlContext: SQLContext) extends SQLContext(sqlContext.sparkContext) { def registerRawPlan(lp: LogicalPlan, tableName: String): Unit = { sqlContext.catalog.registerTable(TableIdentifier(tableName), lp) } } }
Example 23
Source File: DescribeTableUsingCommand.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.TableIdentifierUtils._ import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.RunnableCommand import org.apache.spark.sql.sources.{DatasourceCatalog, RelationInfo} import org.apache.spark.sql.types.{StringType, StructField, StructType} import org.apache.spark.sql.{DatasourceResolver, Row, SQLContext} private[sql] case class DescribeTableUsingCommand( name: TableIdentifier, provider: String, options: Map[String, String]) extends LogicalPlan with RunnableCommand { override def output: Seq[Attribute] = StructType( StructField("TABLE_NAME", StringType, nullable = false) :: StructField("DDL_STMT", StringType, nullable = false) :: Nil ).toAttributes override def run(sqlContext: SQLContext): Seq[Row] = { // Convert the table name according to the case-sensitivity settings val tableId = name.toSeq val resolver = DatasourceResolver.resolverFor(sqlContext) val catalog = resolver.newInstanceOfTyped[DatasourceCatalog](provider) Seq(catalog .getRelation(sqlContext, tableId, new CaseInsensitiveMap(options)) match { case None => Row("", "") case Some(RelationInfo(relName, _, _, ddl, _)) => Row( relName, ddl.getOrElse("")) }) } }
Example 24
Source File: RegisterAllTablesCommand.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.catalyst.CaseSensitivityUtils._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.RunnableCommand import org.apache.spark.sql.execution.datasources.SqlContextAccessor._ import org.apache.spark.sql.sources.{LogicalPlanSource, RegisterAllTableRelations} import org.apache.spark.sql.util.CollectionUtils._ import org.apache.spark.sql.{DatasourceResolver, Row, SQLContext} relations.map { case (name, source) => val lp = source.logicalPlan(sqlContext) if (lp.resolved) { sqlContext.validatedSchema(lp.schema).recover { case d: DuplicateFieldsException => throw new RuntimeException( s"Provider '$provider' returned a relation that has duplicate fields.", d) }.get } else { // TODO(AC): With the new view interface, this can be checked logWarning(s"Adding relation $name with potentially unreachable fields.") } name -> lp }.foreach { case (name, plan) => sqlContext.registerRawPlan(plan, name) } } }
Example 25
Source File: AbstractViewCommand.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.RunnableCommand import org.apache.spark.sql.sources.{AbstractViewProvider, ViewKind} import org.apache.spark.sql.{DatasourceResolver, SQLContext} def withValidProvider[B](sqlContext: SQLContext)(b: AbstractViewProvider[_] => B): B = { val resolver = DatasourceResolver.resolverFor(sqlContext) AbstractViewProvider.matcherFor(kind)(resolver.newInstanceOf(provider)) match { case Some(viewProvider) => b(viewProvider) case _ => throw new ProviderException(provider, "Does not support the " + s"execution of ${this.getClass.getSimpleName}") } } } class ProviderException(val provider: String, val reason: String) extends Exception(s"Exception using provider $provider: $reason")
Example 26
Source File: RegisterTableCommand.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.catalyst.CaseSensitivityUtils._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.RunnableCommand import org.apache.spark.sql.execution.datasources.SqlContextAccessor._ import org.apache.spark.sql.sources.RegisterAllTableRelations import org.apache.spark.sql.{DatasourceResolver, Row, SQLContext} val relation = resolvedProvider.getTableRelation(tableName, sqlContext, options) relation match { case None => sys.error(s"Relation $tableName was not found in the catalog.") case Some(r) => val lp = r.logicalPlan(sqlContext) if (lp.resolved) { sqlContext.validatedSchema(lp.schema).recover { case d: DuplicateFieldsException => throw new RuntimeException( s"Provider '$provider' returned a relation that has duplicate fields.", d) }.get } else { // TODO(AC): With the new view interface, this can be checked logWarning(s"Adding relation $tableName with potentially unreachable fields.") } sqlContext.registerRawPlan(lp, tableName) } } Seq.empty } }
Example 27
Source File: CreateTablePartitionedByUsing.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan} import org.apache.spark.sql.types.StructType case class CreateTablePartitionedByUsing(tableIdent: TableIdentifier, userSpecifiedSchema: Option[StructType], provider: String, partitioningFunc: String, partitioningColumns: Seq[String], temporary: Boolean, options: Map[String, String], allowExisting: Boolean, managedIfNoPath: Boolean) extends LogicalPlan with Command { override def output: Seq[Attribute] = Seq.empty override def children: Seq[LogicalPlan] = Seq.empty }
Example 28
Source File: DescCommand.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.sources.commands.hive import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.types.StringType import org.apache.spark.sql.{Row, SQLContext} case class DescCommand(ident: TableIdentifier) extends HiveRunnableCommand { override protected val commandName: String = s"DESC $ident" override def execute(sqlContext: SQLContext): Seq[Row] = { val plan = sqlContext.catalog.lookupRelation(ident) if (plan.resolved) { plan.schema.map { field => Row(field.name, field.dataType.simpleString, None) } } else { Seq.empty } } override lazy val output: Seq[Attribute] = AttributeReference("col_name", StringType)() :: AttributeReference("data_type", StringType)() :: AttributeReference("comment", StringType)() :: Nil }
Example 29
Source File: AbstractViewProvider.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.sources import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.plans.logical.view.{AbstractView, Persisted} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import scala.reflect._ def name: String } abstract class BaseAbstractViewProvider[A <: AbstractView with Persisted: ClassTag] extends AbstractViewProvider[A] { val tag = implicitly[ClassTag[A]] } object AbstractViewProvider { def matcherFor(kind: ViewKind)(any: Any): Option[AbstractViewProvider[_]] = { val multiProvider = MultiAbstractViewProvider.matcherFor(kind) any match { case provider: AbstractViewProvider[_] if tagMatches(provider.tag) => Some(provider) case multiProvider(provider) => Some(provider) case _ => None } } private def tagMatches[A: ClassTag](tag: ClassTag[_]): Boolean = { classTag[A].runtimeClass.isAssignableFrom(tag.runtimeClass) } } case class CreateViewInput( sqlContext: SQLContext, plan: LogicalPlan, viewSql: String, options: Map[String, String], identifier: TableIdentifier, allowExisting: Boolean) case class DropViewInput( sqlContext: SQLContext, options: Map[String, String], identifier: TableIdentifier, allowNotExisting: Boolean)
Example 30
Source File: RecursiveViewAnalysis.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.plans.logical.view.AbstractView import org.apache.spark.sql.execution.datasources.AbstractCreateViewCommand object RecursiveViewAnalysis { def apply(plan: LogicalPlan): Unit = { plan.foreach { case c:AbstractCreateViewCommand if containsViewIdentifier(c.identifier, c.plan) => throw new AnalysisException(s"The view ${c.identifier.table} " + s"cannot be defined recursively.") case _ => } } private def containsViewIdentifier(name: TableIdentifier, plan: LogicalPlan): Boolean = plan.find { case UnresolvedRelation(ident, _) if ident == name => true case AbstractView(child) => containsViewIdentifier(name, child) case _ => false }.isDefined }
Example 31
Source File: TemporaryFlagCatalog.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.Catalog import org.apache.spark.sql.catalyst.plans.logical.Subquery import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.Relation trait TemporaryFlagCatalog extends Catalog { abstract override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = { val tables = super.getTables(databaseName) tables.map { case (tableName: String , isTemporary: Boolean) => val tableIdentifier = TableIdentifier(tableName) lookupRelation(tableIdentifier) match { case Subquery(_, LogicalRelation(relation: Relation, _)) => (tableIdentifier.table, relation.isTemporary) case _ => (tableIdentifier.table, isTemporary) } } } }
Example 32
Source File: ResolveDropCommand.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.CaseSensitivityUtils._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.sources.commands.UnresolvedDropCommand import org.apache.spark.sql.sources.{DropRelation, RelationKind, Table} import scala.util.Try case class ResolveDropCommand(analyzer: Analyzer, catalog: Catalog) extends Rule[LogicalPlan] with TableDependencyCalculator { private def failAnalysis(reason: String) = throw new AnalysisException(reason) override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { case UnresolvedDropCommand(kind, allowNotExisting, tableIdent, cascade) => val plan = resolvePlan(kind, tableIdent, allowNotExisting) val affected = plan.map { lp => val targetKind = RelationKind.kindOf(lp).getOrElse(Table) checkValidKind(kind, tableIdent, targetKind) buildDependentsMap(catalog, tableIdent) } affected.foreach(checkAllowedToDrop(cascade)) DropRunnableCommand(affected.getOrElse(Map.empty)) } private def getDropRelation(plan: LogicalPlan): Option[DropRelation] = plan.collectFirst { case d: LogicalPlan with DropRelation => d case LogicalRelation(d: DropRelation, _) => d } private def resolvePlan(kind: DropTarget, tableIdent: TableIdentifier, allowNotExisting: Boolean): Option[LogicalPlan] = { Try(catalog.lookupRelation(tableIdent)).toOption match { case Some(plan) => Some(plan) case None if allowNotExisting => None case None => failAnalysis( s"""${kind.targetName.toLowerCase} ${tableIdent.unquotedString} does not exist. To " |DROP a ${kind.targetName.toLowerCase} regardless if it exists of not, use |DROP ${kind.targetName.toUpperCase} IF EXISTS.""".stripMargin) } } private def checkAllowedToDrop(cascade: Boolean) (dependents: Map[String, Option[DropRelation]]) = { if (dependents.size > 1 && !cascade) { failAnalysis("Can not drop because more than one relation has " + s"references to the target relation: ${dependents.keys.mkString(",")}. " + s"to force drop use 'CASCADE'.") } } private def checkValidKind(kind: DropTarget, tableIdent: TableIdentifier, targetKind: RelationKind): Unit = { if (!kind.accepts(targetKind)) { failAnalysis( s"Relation '${tableIdent.unquotedString} of kind" + s"$targetKind is not a ${kind.targetName}. " + s"Please use DROP ${targetKind.name.toUpperCase()} to drop it.") } } private def buildDependentsMap(catalog: Catalog, identifier: TableIdentifier): Map[String, Option[DropRelation]] = { val tables = getTables(catalog, identifier.database) val tablesAndDependents = buildDependentsMap(tables) def aggregate(acc: Set[TableIdentifier], next: List[TableIdentifier]): Set[TableIdentifier] = next match { case Nil => acc case ident :: rest => val dependents = tablesAndDependents(ident) aggregate(acc ++ dependents, rest ++ dependents.diff(acc)) } val dependentsSet = aggregate(Set(identifier), identifier :: Nil) dependentsSet.flatMap { dependent => tables.get(dependent).map(dependent.table -> getDropRelation(_)) }.toMap } }
Example 33
Source File: AbsoluteOverrideCatalog.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import java.util.concurrent.ConcurrentHashMap import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Subquery} import scala.collection.JavaConverters._ abstract override def lookupRelation( tableIdent: TableIdentifier, alias: Option[String] = None): LogicalPlan = { getOverriddenTable(tableIdent) match { case Some(table) => val tableName = getTableName(tableIdent) val tableWithQualifiers = Subquery(tableName, table) // If an alias was specified by the lookup, wrap the plan in a sub-query so that attributes // are properly qualified with this alias. alias.map(a => Subquery(a, tableWithQualifiers)).getOrElse(tableWithQualifiers) case None => throw new NoSuchTableException } } abstract override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = { overrides.keySet().asScala.map(_ -> true).toSeq } override def registerTable(tableIdent: TableIdentifier, plan: LogicalPlan): Unit = { overrides.put(getTableName(tableIdent), plan) } override def unregisterTable(tableIdent: TableIdentifier): Unit = { if (tableIdent.database.isEmpty) { overrides.remove(getTableName(tableIdent)) } } override def unregisterAllTables(): Unit = { overrides.clear() } }
Example 34
Source File: dependenciesSystemTable.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis.systables import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.TableDependencyCalculator import org.apache.spark.sql.sources.{RelationKind, Table} import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} import org.apache.spark.sql.{Row, SQLContext} object DependenciesSystemTableProvider extends SystemTableProvider with LocalSpark { override def execute(): Seq[Row] = { val tables = getTables(sqlContext.catalog) val dependentsMap = buildDependentsMap(tables) def kindOf(tableIdentifier: TableIdentifier): String = tables .get(tableIdentifier) .map(plan => RelationKind.kindOf(plan).getOrElse(Table).name) .getOrElse(DependenciesSystemTable.UnknownType) .toUpperCase dependentsMap.flatMap { case (tableIdent, dependents) => val curKind = kindOf(tableIdent) dependents.map { dependent => val dependentKind = kindOf(dependent) Row( tableIdent.database.orNull, tableIdent.table, curKind, dependent.database.orNull, dependent.table, dependentKind, ReferenceDependency.id) } }.toSeq } override val schema: StructType = DependenciesSystemTable.schema } object DependenciesSystemTable extends SchemaEnumeration { val baseSchemaName = Field("BASE_SCHEMA_NAME", StringType, nullable = true) val baseObjectName = Field("BASE_OBJECT_NAME", StringType, nullable = false) val baseObjectType = Field("BASE_OBJECT_TYPE", StringType, nullable = false) val dependentSchemaName = Field("DEPENDENT_SCHEMA_NAME", StringType, nullable = true) val dependentObjectName = Field("DEPENDENT_OBJECT_NAME", StringType, nullable = false) val dependentObjectType = Field("DEPENDENT_OBJECT_TYPE", StringType, nullable = false) val dependencyType = Field("DEPENDENCY_TYPE", IntegerType, nullable = false) private[DependenciesSystemTable] val UnknownType = "UNKNOWN" }
Example 35
Source File: tablesSystemTable.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis.systables import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.sources._ import org.apache.spark.sql.sources.commands.WithOrigin import org.apache.spark.sql.types.{StringType, StructType} import org.apache.spark.sql.util.CollectionUtils.CaseInsensitiveMap import org.apache.spark.sql.{DatasourceResolver, Row, SQLContext} import org.apache.spark.sql.catalyst.CaseSensitivityUtils._ object TablesSystemTableProvider extends SystemTableProvider with LocalSpark with ProviderBound { override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = DatasourceResolver .resolverFor(sqlContext) .newInstanceOfTyped[DatasourceCatalog](provider) match { case catalog: DatasourceCatalog with DatasourceCatalogPushDown => catalog.getRelations(sqlContext, options, requiredColumns, filters.toSeq.merge) case catalog: DatasourceCatalog => val values = catalog .getRelations(sqlContext, new CaseInsensitiveMap(options)) .map(relationInfo => Row( relationInfo.name, relationInfo.isTemporary.toString.toUpperCase, relationInfo.kind.toUpperCase, relationInfo.provider)) val rows = schema.buildPrunedFilteredScan(requiredColumns, filters)(values) sparkContext.parallelize(rows) } } sealed trait TablesSystemTable extends SystemTable { override def schema: StructType = TablesSystemTable.schema } object TablesSystemTable extends SchemaEnumeration { val tableName = Field("TABLE_NAME", StringType, nullable = false) val isTemporary = Field("IS_TEMPORARY", StringType, nullable = false) val kind = Field("KIND", StringType, nullable = false) val provider = Field("PROVIDER", StringType, nullable = true) }
Example 36
Source File: metadataSystemTable.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis.systables import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.tablefunctions.OutputFormatter import org.apache.spark.sql.sources._ import org.apache.spark.sql.types.{StringType, StructType} import org.apache.spark.sql.{DatasourceResolver, Row, SQLContext} import org.apache.spark.sql.catalyst.CaseSensitivityUtils._ override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = DatasourceResolver .resolverFor(sqlContext) .newInstanceOfTyped[MetadataCatalog](provider) match { case catalog: MetadataCatalog with MetadataCatalogPushDown => catalog.getTableMetadata(sqlContext, options, requiredColumns, filters.toSeq.merge) case catalog => val rows = catalog.getTableMetadata(sqlContext, options).flatMap { tableMetadata => val formatter = new OutputFormatter(tableMetadata.tableName, tableMetadata.metadata) formatter.format().map(Row.fromSeq) } sparkContext.parallelize(schema.buildPrunedFilteredScan(requiredColumns, filters)(rows)) } override def schema: StructType = MetadataSystemTable.schema } object MetadataSystemTable extends SchemaEnumeration { val tableName = Field("TABLE_NAME", StringType, nullable = false) val metadataKey = Field("METADATA_KEY", StringType, nullable = true) val metadataValue = Field("METADATA_VALUE", StringType, nullable = true) }
Example 37
Source File: relationMappingSystemTable.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis.systables import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.sql.SqlLikeRelation import org.apache.spark.sql.types.{StringType, StructType} import org.apache.spark.sql.{Row, SQLContext} object RelationMappingSystemTableProvider extends SystemTableProvider with LocalSpark { override def execute(): Seq[Row] = { sqlContext.tableNames().map { tableName => val plan = sqlContext.catalog.lookupRelation(TableIdentifier(tableName)) val sqlName = plan.collectFirst { case s: SqlLikeRelation => s.relationName case LogicalRelation(s: SqlLikeRelation, _) => s.relationName } Row(tableName, sqlName) } } } object RelationMappingSystemTable extends SchemaEnumeration { val sparkName = Field("RELATION_NAME", StringType, nullable = false) val providerName = Field("SQL_NAME", StringType, nullable = true) }
Example 38
Source File: TableDependencyCalculator.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan tablesAndReferences.foldLeft(Map.empty[TableIdentifier, Set[TableIdentifier]] .withDefaultValue(Set.empty)) { case (acc, (ident, references)) => references.foldLeft(acc) { case (innerAcc, referenceIdentifier) => innerAcc + (referenceIdentifier -> (innerAcc(referenceIdentifier) + ident)) } } } }
Example 39
Source File: ExtractSQLParserSuite.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import com.sap.spark.PlanTest import org.apache.spark.Logging import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.parser.SapParserDialect import org.scalatest.FunSuite class ExtractSQLParserSuite extends FunSuite with PlanTest with Logging { // scalastyle:off magic.number val t1 = UnresolvedRelation(TableIdentifier("T1")) val parser = new SapParserDialect test("Parse EXTRACT in SELECT") { val result = parser.parse("SELECT a, EXTRACT(YEAR FROM a) FROM T1") val expected = t1.select(AliasUnresolver('a, Year('a)): _*) comparePlans(expected, result) } test("Parse EXTRACT in WHERE") { val result = parser.parse("SELECT 1 FROM T1 WHERE EXTRACT(MONTH FROM a) = 2015") val expected = t1.where(Month('a) === 2015).select(AliasUnresolver(1): _*) comparePlans(expected, result) } test("Parse EXTRACT in GROUP BY") { val result = parser.parse("SELECT 1 FROM T1 GROUP BY EXTRACT(DAY FROM a)") val expected = t1.groupBy(DayOfMonth('a))(AliasUnresolver(1): _*) comparePlans(expected, result) } }
Example 40
Source File: FileSourceScanExecAdapter.scala From OAP with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.oap.adapter import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.execution.FileSourceScanExec import org.apache.spark.sql.execution.datasources.HadoopFsRelation import org.apache.spark.sql.types.StructType import org.apache.spark.util.collection.BitSet object FileSourceScanExecAdapter { def createFileSourceScanExec( relation: HadoopFsRelation, output: Seq[Attribute], requiredSchema: StructType, partitionFilters: Seq[Expression], optionalBucketSets: Option[BitSet], dataFilters: Seq[Expression], metastoreTableIdentifier: Option[TableIdentifier]): FileSourceScanExec = { FileSourceScanExec( relation, output, requiredSchema, partitionFilters, optionalBucketSets, dataFilters, metastoreTableIdentifier) } }
Example 41
Source File: CatalogUtils.scala From spark-atlas-connector with Apache License 2.0 | 5 votes |
package com.hortonworks.spark.atlas.utils import java.net.URI import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.types.StructType object CatalogUtils { def createDB(name: String, location: String): CatalogDatabase = { CatalogDatabase(name, "", new URI(location), Map.empty) } def createStorageFormat( locationUri: Option[URI] = None, inputFormat: Option[String] = None, outputFormat: Option[String] = None, serd: Option[String] = None, compressed: Boolean = false, properties: Map[String, String] = Map.empty): CatalogStorageFormat = { CatalogStorageFormat(locationUri, inputFormat, outputFormat, serd, compressed, properties) } def createTable( db: String, table: String, schema: StructType, storage: CatalogStorageFormat, isHiveTable: Boolean = false): CatalogTable = { CatalogTable( TableIdentifier(table, Some(db)), CatalogTableType.MANAGED, storage, schema, provider = if (isHiveTable) Some("hive") else None) } }
Example 42
Source File: SparkExtension.scala From spark-atlas-connector with Apache License 2.0 | 5 votes |
package com.hortonworks.spark.atlas.sql import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.parser.ParserInterface import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.types.{DataType, StructType} import org.apache.spark.sql.{SparkSession, SparkSessionExtensions} class SparkExtension extends (SparkSessionExtensions => Unit) { def apply(e: SparkSessionExtensions): Unit = { e.injectParser(SparkAtlasConnectorParser) } } case class SparkAtlasConnectorParser(spark: SparkSession, delegate: ParserInterface) extends ParserInterface { override def parsePlan(sqlText: String): LogicalPlan = { SQLQuery.set(sqlText) delegate.parsePlan(sqlText) } override def parseExpression(sqlText: String): Expression = delegate.parseExpression(sqlText) override def parseTableIdentifier(sqlText: String): TableIdentifier = delegate.parseTableIdentifier(sqlText) override def parseFunctionIdentifier(sqlText: String): FunctionIdentifier = delegate.parseFunctionIdentifier(sqlText) override def parseTableSchema(sqlText: String): StructType = delegate.parseTableSchema(sqlText) override def parseDataType(sqlText: String): DataType = delegate.parseDataType(sqlText) } object SQLQuery { private[this] val sqlQuery = new ThreadLocal[String] def get(): String = sqlQuery.get def set(s: String): Unit = sqlQuery.set(s) }
Example 43
Source File: TestUtils.scala From spark-atlas-connector with Apache License 2.0 | 5 votes |
package com.hortonworks.spark.atlas import java.net.URI import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.types.StructType import com.hortonworks.spark.atlas.utils.SparkUtils import org.apache.atlas.model.instance.AtlasObjectId object TestUtils { def createDB(name: String, location: String): CatalogDatabase = { CatalogDatabase(name, "", new URI(location), Map.empty) } def createStorageFormat( locationUri: Option[URI] = None, inputFormat: Option[String] = None, outputFormat: Option[String] = None, serd: Option[String] = None, compressed: Boolean = false, properties: Map[String, String] = Map.empty): CatalogStorageFormat = { CatalogStorageFormat(locationUri, inputFormat, outputFormat, serd, compressed, properties) } def createTable( db: String, table: String, schema: StructType, storage: CatalogStorageFormat, isHiveTable: Boolean = false): CatalogTable = { CatalogTable( TableIdentifier(table, Some(db)), CatalogTableType.MANAGED, storage, schema, provider = if (isHiveTable) Some("hive") else None, bucketSpec = None, owner = SparkUtils.currUser()) } def assertSubsetOf[T](set: Set[T], subset: Set[T]): Unit = { assert(subset.subsetOf(set), s"$subset is not a subset of $set") } def findEntity( entities: Seq[SACAtlasReferenceable], objId: AtlasObjectId): Option[SACAtlasReferenceable] = { entities.find(p => p.asObjectId == objId) } def findEntities( entities: Seq[SACAtlasReferenceable], objIds: Seq[AtlasObjectId]): Seq[SACAtlasReferenceable] = { entities.filter(p => objIds.contains(p.asObjectId)) } }
Example 44
Source File: HiveExternalCatalogSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.hadoop.conf.Configuration import org.apache.spark.SparkConf import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.types.StructType class HiveExternalCatalogSuite extends ExternalCatalogSuite { private val externalCatalog: HiveExternalCatalog = { val catalog = new HiveExternalCatalog(new SparkConf, new Configuration) catalog.client.reset() catalog } protected override val utils: CatalogTestUtils = new CatalogTestUtils { override val tableInputFormat: String = "org.apache.hadoop.mapred.SequenceFileInputFormat" override val tableOutputFormat: String = "org.apache.hadoop.mapred.SequenceFileOutputFormat" override def newEmptyCatalog(): ExternalCatalog = externalCatalog } protected override def resetState(): Unit = { externalCatalog.client.reset() } import utils._ test("list partitions by filter") { val catalog = newBasicCatalog() val selectedPartitions = catalog.listPartitionsByFilter("db2", "tbl2", Seq('a.int === 1)) assert(selectedPartitions.length == 1) assert(selectedPartitions.head.spec == part1.spec) } test("SPARK-18647: do not put provider in table properties for Hive serde table") { val catalog = newBasicCatalog() val hiveTable = CatalogTable( identifier = TableIdentifier("hive_tbl", Some("db1")), tableType = CatalogTableType.MANAGED, storage = storageFormat, schema = new StructType().add("col1", "int").add("col2", "string"), provider = Some("hive")) catalog.createTable(hiveTable, ignoreIfExists = false) val rawTable = externalCatalog.client.getTable("db1", "hive_tbl") assert(!rawTable.properties.contains(HiveExternalCatalog.DATASOURCE_PROVIDER)) assert(externalCatalog.getTable("db1", "hive_tbl").provider == Some(DDLUtils.HIVE_PROVIDER)) } }
Example 45
Source File: MetastoreRelationSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.sql.{QueryTest, Row} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.{IntegerType, StructField, StructType} class MetastoreRelationSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { test("makeCopy and toJSON should work") { val table = CatalogTable( identifier = TableIdentifier("test", Some("db")), tableType = CatalogTableType.VIEW, storage = CatalogStorageFormat.empty, schema = StructType(StructField("a", IntegerType, true) :: Nil)) val relation = MetastoreRelation("db", "test")(table, null) // No exception should be thrown relation.makeCopy(Array("db", "test")) // No exception should be thrown relation.toJSON } test("SPARK-17409: Do Not Optimize Query in CTAS (Hive Serde Table) More Than Once") { withTable("bar") { withTempView("foo") { sql("select 0 as id").createOrReplaceTempView("foo") // If we optimize the query in CTAS more than once, the following saveAsTable will fail // with the error: `GROUP BY position 0 is not in select list (valid range is [1, 1])` sql("CREATE TABLE bar AS SELECT * FROM foo group by id") checkAnswer(spark.table("bar"), Row(0) :: Nil) val tableMetadata = spark.sessionState.catalog.getTableMetadata(TableIdentifier("bar")) assert(tableMetadata.provider == Some("hive"), "the expected table is a Hive serde table") } } } }
Example 46
Source File: ListTablesSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.QueryTest import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.hive.test.TestHiveSingleton class ListTablesSuite extends QueryTest with TestHiveSingleton with BeforeAndAfterAll { import hiveContext._ import hiveContext.implicits._ val df = sparkContext.parallelize((1 to 10).map(i => (i, s"str$i"))).toDF("key", "value") override def beforeAll(): Unit = { super.beforeAll() // The catalog in HiveContext is a case insensitive one. sessionState.catalog.createTempView( "ListTablesSuiteTable", df.logicalPlan, overrideIfExists = true) sql("CREATE TABLE HiveListTablesSuiteTable (key int, value string)") sql("CREATE DATABASE IF NOT EXISTS ListTablesSuiteDB") sql("CREATE TABLE ListTablesSuiteDB.HiveInDBListTablesSuiteTable (key int, value string)") } override def afterAll(): Unit = { try { sessionState.catalog.dropTable( TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true, purge = false) sql("DROP TABLE IF EXISTS HiveListTablesSuiteTable") sql("DROP TABLE IF EXISTS ListTablesSuiteDB.HiveInDBListTablesSuiteTable") sql("DROP DATABASE IF EXISTS ListTablesSuiteDB") } finally { super.afterAll() } } test("get all tables of current database") { Seq(tables(), sql("SHOW TABLes")).foreach { case allTables => // We are using default DB. checkAnswer( allTables.filter("tableName = 'listtablessuitetable'"), Row("", "listtablessuitetable", true)) checkAnswer( allTables.filter("tableName = 'hivelisttablessuitetable'"), Row("default", "hivelisttablessuitetable", false)) assert(allTables.filter("tableName = 'hiveindblisttablessuitetable'").count() === 0) } } test("getting all tables with a database name") { Seq(tables("listtablessuiteDb"), sql("SHOW TABLes in listTablesSuitedb")).foreach { case allTables => checkAnswer( allTables.filter("tableName = 'listtablessuitetable'"), Row("", "listtablessuitetable", true)) assert(allTables.filter("tableName = 'hivelisttablessuitetable'").count() === 0) checkAnswer( allTables.filter("tableName = 'hiveindblisttablessuitetable'"), Row("listtablessuitedb", "hiveindblisttablessuitetable", false)) } } }
Example 47
Source File: TableIdentifierParserSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.parser import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.TableIdentifier class TableIdentifierParserSuite extends SparkFunSuite { import CatalystSqlParser._ // Add "$elem$", "$value$" & "$key$" val hiveNonReservedKeyword = Array("add", "admin", "after", "analyze", "archive", "asc", "before", "bucket", "buckets", "cascade", "change", "cluster", "clustered", "clusterstatus", "collection", "columns", "comment", "compact", "compactions", "compute", "concatenate", "continue", "data", "day", "databases", "datetime", "dbproperties", "deferred", "defined", "delimited", "dependency", "desc", "directories", "directory", "disable", "distribute", "enable", "escaped", "exclusive", "explain", "export", "fields", "file", "fileformat", "first", "format", "formatted", "functions", "hold_ddltime", "hour", "idxproperties", "ignore", "index", "indexes", "inpath", "inputdriver", "inputformat", "items", "jar", "keys", "key_type", "last", "limit", "offset", "lines", "load", "location", "lock", "locks", "logical", "long", "mapjoin", "materialized", "metadata", "minus", "minute", "month", "msck", "noscan", "no_drop", "nulls", "offline", "option", "outputdriver", "outputformat", "overwrite", "owner", "partitioned", "partitions", "plus", "pretty", "principals", "protection", "purge", "read", "readonly", "rebuild", "recordreader", "recordwriter", "reload", "rename", "repair", "replace", "replication", "restrict", "rewrite", "role", "roles", "schemas", "second", "serde", "serdeproperties", "server", "sets", "shared", "show", "show_database", "skewed", "sort", "sorted", "ssl", "statistics", "stored", "streamtable", "string", "struct", "tables", "tblproperties", "temporary", "terminated", "tinyint", "touch", "transactions", "unarchive", "undo", "uniontype", "unlock", "unset", "unsigned", "uri", "use", "utc", "utctimestamp", "view", "while", "year", "work", "transaction", "write", "isolation", "level", "snapshot", "autocommit", "all", "alter", "array", "as", "authorization", "between", "bigint", "binary", "boolean", "both", "by", "create", "cube", "current_date", "current_timestamp", "cursor", "date", "decimal", "delete", "describe", "double", "drop", "exists", "external", "false", "fetch", "float", "for", "grant", "group", "grouping", "import", "in", "insert", "int", "into", "is", "lateral", "like", "local", "none", "null", "of", "order", "out", "outer", "partition", "percent", "procedure", "range", "reads", "revoke", "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger", "true", "truncate", "update", "user", "using", "values", "with", "regexp", "rlike", "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float", "int", "smallint", "timestamp", "at") val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right", "natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from", "where", "having", "from", "to", "table", "with", "not") test("table identifier") { // Regular names. assert(TableIdentifier("q") === parseTableIdentifier("q")) assert(TableIdentifier("q", Option("d")) === parseTableIdentifier("d.q")) // Illegal names. Seq("", "d.q.g", "t:", "${some.var.x}", "tab:1").foreach { identifier => intercept[ParseException](parseTableIdentifier(identifier)) } } test("quoted identifiers") { assert(TableIdentifier("z", Some("x.y")) === parseTableIdentifier("`x.y`.z")) assert(TableIdentifier("y.z", Some("x")) === parseTableIdentifier("x.`y.z`")) assert(TableIdentifier("z", Some("`x.y`")) === parseTableIdentifier("```x.y```.z")) assert(TableIdentifier("`y.z`", Some("x")) === parseTableIdentifier("x.```y.z```")) assert(TableIdentifier("x.y.z", None) === parseTableIdentifier("`x.y.z`")) } test("table identifier - strict keywords") { // SQL Keywords. hiveStrictNonReservedKeyword.foreach { keyword => assert(TableIdentifier(keyword) === parseTableIdentifier(keyword)) assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`")) assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`")) } } test("table identifier - non reserved keywords") { // Hive keywords are allowed. hiveNonReservedKeyword.foreach { nonReserved => assert(TableIdentifier(nonReserved) === parseTableIdentifier(nonReserved)) } } test("SPARK-17364 table identifier - contains number") { assert(parseTableIdentifier("123_") == TableIdentifier("123_")) assert(parseTableIdentifier("1a.123_") == TableIdentifier("123_", Some("1a"))) // ".123" should not be treated as token of type DECIMAL_VALUE assert(parseTableIdentifier("a.123A") == TableIdentifier("123A", Some("a"))) // ".123E3" should not be treated as token of type SCIENTIFIC_DECIMAL_VALUE assert(parseTableIdentifier("a.123E3_LIST") == TableIdentifier("123E3_LIST", Some("a"))) // ".123D" should not be treated as token of type DOUBLE_LITERAL assert(parseTableIdentifier("a.123D_LIST") == TableIdentifier("123D_LIST", Some("a"))) // ".123BD" should not be treated as token of type BIGDECIMAL_LITERAL assert(parseTableIdentifier("a.123BD_LIST") == TableIdentifier("123BD_LIST", Some("a"))) } test("SPARK-17832 table identifier - contains backtick") { val complexName = TableIdentifier("`weird`table`name", Some("`d`b`1")) assert(complexName === parseTableIdentifier("```d``b``1`.```weird``table``name`")) assert(complexName === parseTableIdentifier(complexName.quotedString)) intercept[ParseException](parseTableIdentifier(complexName.unquotedString)) // Table identifier contains countious backticks should be treated correctly. val complexName2 = TableIdentifier("x``y", Some("d``b")) assert(complexName2 === parseTableIdentifier(complexName2.quotedString)) } }
Example 48
Source File: ddl.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.types._ case class CreateTable( tableDesc: CatalogTable, mode: SaveMode, query: Option[LogicalPlan]) extends Command { assert(tableDesc.provider.isDefined, "The table to be created must have a provider.") if (query.isEmpty) { assert( mode == SaveMode.ErrorIfExists || mode == SaveMode.Ignore, "create table without data insertion can only use ErrorIfExists or Ignore as SaveMode.") } override def innerChildren: Seq[QueryPlan[_]] = query.toSeq } case class CreateTempViewUsing( tableIdent: TableIdentifier, userSpecifiedSchema: Option[StructType], replace: Boolean, global: Boolean, provider: String, options: Map[String, String]) extends RunnableCommand { if (tableIdent.database.isDefined) { throw new AnalysisException( s"Temporary view '$tableIdent' should not have specified a database") } override def argString: String = { s"[tableIdent:$tableIdent " + userSpecifiedSchema.map(_ + " ").getOrElse("") + s"replace:$replace " + s"provider:$provider " + CatalogUtils.maskCredentials(options) } def run(sparkSession: SparkSession): Seq[Row] = { val dataSource = DataSource( sparkSession, userSpecifiedSchema = userSpecifiedSchema, className = provider, options = options) val catalog = sparkSession.sessionState.catalog val viewDefinition = Dataset.ofRows( sparkSession, LogicalRelation(dataSource.resolveRelation())).logicalPlan if (global) { catalog.createGlobalTempView(tableIdent.table, viewDefinition, replace) } else { catalog.createTempView(tableIdent.table, viewDefinition, replace) } Seq.empty[Row] } } case class RefreshTable(tableIdent: TableIdentifier) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { // Refresh the given table's metadata. If this table is cached as an InMemoryRelation, // drop the original cached version and make the new version cached lazily. sparkSession.catalog.refreshTable(tableIdent.quotedString) Seq.empty[Row] } } case class RefreshResource(path: String) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { sparkSession.catalog.refreshByPath(path) Seq.empty[Row] } }
Example 49
Source File: cache.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import org.apache.spark.sql.{Dataset, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan case class CacheTableCommand( tableIdent: TableIdentifier, plan: Option[LogicalPlan], isLazy: Boolean) extends RunnableCommand { require(plan.isEmpty || tableIdent.database.isEmpty, "Database name is not allowed in CACHE TABLE AS SELECT") override protected def innerChildren: Seq[QueryPlan[_]] = { plan.toSeq } override def run(sparkSession: SparkSession): Seq[Row] = { plan.foreach { logicalPlan => Dataset.ofRows(sparkSession, logicalPlan).createTempView(tableIdent.quotedString) } sparkSession.catalog.cacheTable(tableIdent.quotedString) if (!isLazy) { // Performs eager caching sparkSession.table(tableIdent).count() } Seq.empty[Row] } } case class UncacheTableCommand( tableIdent: TableIdentifier, ifExists: Boolean) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val tableId = tableIdent.quotedString try { sparkSession.catalog.uncacheTable(tableId) } catch { case _: NoSuchTableException if ifExists => // don't throw } Seq.empty[Row] } } case object ClearCacheCommand extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { sparkSession.catalog.clearCache() Seq.empty[Row] } }
Example 50
Source File: DropCacheSIEventListener.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.secondaryindex.events import scala.collection.JavaConverters._ import org.apache.log4j.Logger import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.command.cache.CarbonDropCacheCommand import org.apache.spark.sql.index.CarbonIndexUtil import org.apache.carbondata.common.logging.LogServiceFactory import org.apache.carbondata.core.metadata.index.IndexType import org.apache.carbondata.events.{DropTableCacheEvent, Event, OperationContext, OperationEventListener} object DropCacheSIEventListener extends OperationEventListener { val LOGGER: Logger = LogServiceFactory.getLogService(this.getClass.getCanonicalName) override protected def onEvent(event: Event, operationContext: OperationContext): Unit = { event match { case dropCacheEvent: DropTableCacheEvent => val carbonTable = dropCacheEvent.carbonTable val sparkSession = dropCacheEvent.sparkSession val internalCall = dropCacheEvent.internalCall if (carbonTable.isIndexTable && !internalCall) { throw new UnsupportedOperationException("Operation not allowed on child table.") } val allIndexTables = carbonTable.getIndexTableNames( IndexType.SI.getIndexProviderName) val dbName = carbonTable.getDatabaseName for (indexTableName <- allIndexTables.asScala) { try { val dropCacheCommandForChildTable = CarbonDropCacheCommand( TableIdentifier(indexTableName, Some(dbName)), internalCall = true) dropCacheCommandForChildTable.processMetadata(sparkSession) } catch { case e: Exception => LOGGER.error(s"Clean cache for SI table $indexTableName failed. ", e) } } } } }
Example 51
Source File: StreamingTableStrategy.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.strategy import org.apache.spark.sql.{CarbonEnv, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.{SparkPlan, SparkStrategy} import org.apache.spark.sql.execution.command.{AlterTableAddColumnsCommand, AlterTableChangeColumnCommand, AlterTableRenameCommand} import org.apache.spark.sql.execution.command.mutation.{CarbonProjectForDeleteCommand, CarbonProjectForUpdateCommand} import org.apache.spark.sql.execution.command.schema.{CarbonAlterTableAddColumnCommand, CarbonAlterTableColRenameDataTypeChangeCommand, CarbonAlterTableDropColumnCommand} import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException private def rejectIfStreamingTable(tableIdentifier: TableIdentifier, operation: String): Unit = { var streaming = false try { streaming = CarbonEnv.getCarbonTable( tableIdentifier.database, tableIdentifier.table)(sparkSession) .isStreamingSink } catch { case e: Exception => streaming = false } if (streaming) { throw new MalformedCarbonCommandException( s"$operation is not allowed for streaming table") } } def isCarbonTable(tableIdent: TableIdentifier): Boolean = { CarbonPlanHelper.isCarbonTable(tableIdent, sparkSession) } }
Example 52
Source File: CarbonShowStreamsCommand.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.stream import java.util.Date import java.util.concurrent.TimeUnit import org.apache.spark.sql.{CarbonEnv, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.execution.command.MetadataCommand import org.apache.spark.sql.types.StringType import org.apache.carbondata.stream.StreamJobManager case class CarbonShowStreamsCommand( tableOp: Option[TableIdentifier] ) extends MetadataCommand { override def output: Seq[Attribute] = { Seq(AttributeReference("Stream Name", StringType, nullable = false)(), AttributeReference("JobId", StringType, nullable = false)(), AttributeReference("Status", StringType, nullable = false)(), AttributeReference("Source", StringType, nullable = false)(), AttributeReference("Sink", StringType, nullable = false)(), AttributeReference("Start Time", StringType, nullable = false)(), AttributeReference("Time Elapse", StringType, nullable = false)()) } override def processMetadata(sparkSession: SparkSession): Seq[Row] = { val jobs = tableOp match { case None => StreamJobManager.getAllJobs.toSeq case Some(table) => val carbonTable = CarbonEnv.getCarbonTable(table.database, table.table)(sparkSession) setAuditTable(carbonTable) StreamJobManager.getAllJobs.filter { job => job.sinkTable.equalsIgnoreCase(carbonTable.getTableName) && job.sinkDb.equalsIgnoreCase(carbonTable.getDatabaseName) }.toSeq } jobs.map { job => val elapsedTime = System.currentTimeMillis() - job.startTime Row( job.streamName, job.streamingQuery.id.toString, if (job.streamingQuery.isActive) "RUNNING" else "FAILED", s"${ job.sourceDb }.${ job.sourceTable }", s"${ job.sinkDb }.${ job.sinkTable }", new Date(job.startTime).toString, String.format( "%s days, %s hours, %s min, %s sec", TimeUnit.MILLISECONDS.toDays(elapsedTime).toString, TimeUnit.MILLISECONDS.toHours(elapsedTime).toString, TimeUnit.MILLISECONDS.toMinutes(elapsedTime).toString, TimeUnit.MILLISECONDS.toSeconds(elapsedTime).toString) ) } } override protected def opName: String = "SHOW STREAMS" }
Example 53
Source File: CarbonDropMVCommand.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.view import org.apache.log4j.Logger import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.command.AtomicRunnableCommand import org.apache.spark.sql.execution.command.table.CarbonDropTableCommand import org.apache.carbondata.common.logging.LogServiceFactory import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.util.CarbonProperties import org.apache.carbondata.events.{OperationContext, OperationListenerBus} import org.apache.carbondata.view.{MVCatalogInSpark, MVManagerInSpark, UpdateMVPostExecutionEvent, UpdateMVPreExecutionEvent} case class CarbonDropMVCommand( databaseNameOption: Option[String], name: String, ifExistsSet: Boolean, forceDrop: Boolean = false) extends AtomicRunnableCommand { private val logger = CarbonDropMVCommand.LOGGER private var dropTableCommand: CarbonDropTableCommand = _ override def processMetadata(session: SparkSession): Seq[Row] = { setAuditInfo(Map("mvName" -> name)) val viewManager = MVManagerInSpark.get(session) try { logger.info("Trying to drop materialized view schema") val databaseName = databaseNameOption.getOrElse(session.sessionState.catalog.getCurrentDatabase) val schema = viewManager.getSchema(databaseName, name) if (schema != null) { // Drop mv status. val databaseLocation = viewManager.getDatabaseLocation(databaseName) val systemDirectoryPath = CarbonProperties.getInstance() .getSystemFolderLocationPerDatabase(FileFactory .getCarbonFile(databaseLocation) .getCanonicalPath) val identifier = TableIdentifier(name, Option(databaseName)) val operationContext = new OperationContext() OperationListenerBus.getInstance().fireEvent( UpdateMVPreExecutionEvent(session, systemDirectoryPath, identifier), operationContext) viewManager.onDrop(databaseName, name) OperationListenerBus.getInstance().fireEvent( UpdateMVPostExecutionEvent(session, systemDirectoryPath, identifier), operationContext) // Drop mv table. val dropTableCommand = CarbonDropTableCommand( ifExistsSet = true, Option(databaseName), name, dropChildTable = true, isInternalCall = true) dropTableCommand.processMetadata(session) // Drop mv schema. try { viewManager.deleteSchema(databaseName, name) } finally { val viewCatalog = viewManager.getCatalog() .asInstanceOf[MVCatalogInSpark] if (viewCatalog != null) { viewCatalog.deregisterSchema(schema.getIdentifier) } } this.dropTableCommand = dropTableCommand } } catch { case exception: Exception => if (!ifExistsSet) { throw exception } } Seq.empty } override def processData(sparkSession: SparkSession): Seq[Row] = { // delete the table folder if (this.dropTableCommand != null) { this.dropTableCommand.processData(sparkSession) } Seq.empty } override protected def opName: String = "DROP MATERIALIZED VIEW" } object CarbonDropMVCommand { private val LOGGER: Logger = LogServiceFactory.getLogService( classOf[CarbonDropMVCommand].getCanonicalName) }
Example 54
Source File: CarbonRefreshMVCommand.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.view import org.apache.spark.sql.{CarbonEnv, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.command.DataCommand import org.apache.carbondata.common.exceptions.sql.{MalformedMVCommandException, NoSuchMVException} import org.apache.carbondata.core.view.MVStatus import org.apache.carbondata.events.{OperationContext, OperationListenerBus} import org.apache.carbondata.view.{MVManagerInSpark, MVRefresher, RefreshMVPostExecutionEvent, RefreshMVPreExecutionEvent} case class CarbonRefreshMVCommand( databaseNameOption: Option[String], mvName: String) extends DataCommand { override def processData(session: SparkSession): Seq[Row] = { val databaseName = databaseNameOption.getOrElse(session.sessionState.catalog.getCurrentDatabase) val viewManager = MVManagerInSpark.get(session) val schema = try { viewManager.getSchema(databaseName, mvName) } catch { case _: NoSuchMVException => throw new MalformedMVCommandException( s"Materialized view ${ databaseName }.${ mvName } does not exist") } val table = CarbonEnv.getCarbonTable(Option(databaseName), mvName)(session) setAuditTable(table) MVRefresher.refresh(schema, session) // After rebuild successfully enable the MV table. val identifier = TableIdentifier(mvName, Option(databaseName)) val operationContext = new OperationContext() OperationListenerBus.getInstance().fireEvent( RefreshMVPreExecutionEvent(session, identifier), operationContext) viewManager.setStatus(schema.getIdentifier, MVStatus.ENABLED) OperationListenerBus.getInstance().fireEvent( RefreshMVPostExecutionEvent(session, identifier), operationContext) Seq.empty } override protected def opName: String = "REFRESH MATERIALIZED VIEW" }
Example 55
Source File: CarbonShowMVCommand.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.view import java.util import scala.collection.JavaConverters._ import org.apache.spark.sql.{CarbonEnv, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.execution.command.{Checker, DataCommand} import org.apache.spark.sql.types.{BooleanType, StringType} import org.apache.carbondata.core.view.{MVProperty, MVSchema} import org.apache.carbondata.view.MVManagerInSpark case class CarbonShowMVCommand( databaseNameOption: Option[String], relatedTableIdentifier: Option[TableIdentifier]) extends DataCommand { override def output: Seq[Attribute] = { Seq( AttributeReference("Database", StringType, nullable = false)(), AttributeReference("Name", StringType, nullable = false)(), AttributeReference("Status", StringType, nullable = false)(), AttributeReference("Refresh Mode", StringType, nullable = false)(), AttributeReference("Refresh Trigger Mode", StringType, nullable = false)(), AttributeReference("Properties", StringType, nullable = false)()) } override def processData(session: SparkSession): Seq[Row] = { // Get mv schemas. val schemaList = new util.ArrayList[MVSchema]() val viewManager = MVManagerInSpark.get(session) relatedTableIdentifier match { case Some(table) => val relatedTable = CarbonEnv.getCarbonTable(table)(session) setAuditTable(relatedTable) Checker.validateTableExists(table.database, table.table, session) if (databaseNameOption.isDefined) { schemaList.addAll(viewManager.getSchemasOnTable( databaseNameOption.get, relatedTable)) } else { schemaList.addAll(viewManager.getSchemasOnTable(relatedTable)) } case _ => if (databaseNameOption.isDefined) { schemaList.addAll(viewManager.getSchemas(databaseNameOption.get)) } else { schemaList.addAll(viewManager.getSchemas()) } } // Convert mv schema to row. schemaList.asScala.map { schema => Row( schema.getIdentifier.getDatabaseName, schema.getIdentifier.getTableName, schema.getStatus.name(), schema.getProperties.get(MVProperty.REFRESH_MODE), schema.getProperties.get(MVProperty.REFRESH_TRIGGER_MODE), schema.getPropertiesAsString ) } } override protected def opName: String = "SHOW MATERIALIZED VIEW" }
Example 56
Source File: CarbonDropCacheCommand.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.cache import scala.collection.JavaConverters._ import org.apache.spark.sql.{CarbonEnv, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.command.MetadataCommand import org.apache.carbondata.common.logging.LogServiceFactory import org.apache.carbondata.core.cache.CacheProvider import org.apache.carbondata.core.index.{IndexStoreManager, IndexUtil} import org.apache.carbondata.core.metadata.schema.table.CarbonTable import org.apache.carbondata.core.util.CarbonProperties import org.apache.carbondata.events.{DropTableCacheEvent, OperationContext, OperationListenerBus} import org.apache.carbondata.view.MVManagerInSpark case class CarbonDropCacheCommand(tableIdentifier: TableIdentifier, internalCall: Boolean = false) extends MetadataCommand { val LOGGER = LogServiceFactory.getLogService(this.getClass.getCanonicalName) override def processMetadata(sparkSession: SparkSession): Seq[Row] = { val carbonTable = CarbonEnv.getCarbonTable(tableIdentifier)(sparkSession) clearCache(carbonTable, sparkSession) Seq.empty } def clearCache(carbonTable: CarbonTable, sparkSession: SparkSession): Unit = { LOGGER.info("Drop cache request received for table " + carbonTable.getTableUniqueName) val dropCacheEvent = DropTableCacheEvent(carbonTable, sparkSession, internalCall) val operationContext = new OperationContext OperationListenerBus.getInstance.fireEvent(dropCacheEvent, operationContext) val cache = CacheProvider.getInstance().getCarbonCache // Clea cache from IndexServer if (CarbonProperties.getInstance().isDistributedPruningEnabled(carbonTable.getDatabaseName, carbonTable.getTableName)) { LOGGER.info("Clearing cache from IndexServer") IndexUtil.executeClearIndexJob(carbonTable, IndexUtil.DISTRIBUTED_JOB_NAME) } if (cache != null) { LOGGER.info("Clearing cache from driver side") IndexStoreManager.getInstance().clearIndex(carbonTable.getAbsoluteTableIdentifier) } val viewManager = MVManagerInSpark.get(sparkSession) val viewsOnTable = viewManager.getSchemasOnTable(carbonTable) if (!viewsOnTable.isEmpty) { viewsOnTable.asScala.foreach { viewSchema => val viewIdentifier = new TableIdentifier( viewSchema.getIdentifier.getTableName, Option(viewSchema.getIdentifier.getDatabaseName) ) CarbonDropCacheCommand(viewIdentifier, internalCall = true).run(sparkSession) } } LOGGER.info("Drop cache request served for table " + carbonTable.getTableUniqueName) } override protected def opName: String = "DROP METACACHE" }
Example 57
Source File: CarbonShowTablesCommand.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.table import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.execution.command.MetadataCommand import org.apache.spark.sql.types.{BooleanType, StringType} private[sql] case class CarbonShowTablesCommand ( databaseName: Option[String], tableIdentifierPattern: Option[String]) extends MetadataCommand{ // The result of SHOW TABLES has three columns: database, tableName and isTemporary. override val output: Seq[Attribute] = { AttributeReference("database", StringType, nullable = false)() :: AttributeReference("tableName", StringType, nullable = false)() :: AttributeReference("isTemporary", BooleanType, nullable = false)() :: Nil } override def processMetadata(sparkSession: SparkSession): Seq[Row] = { // Since we need to return a Seq of rows, we will call getTables directly // instead of calling tables in sparkSession. val catalog = sparkSession.sessionState.catalog val db = databaseName.getOrElse(catalog.getCurrentDatabase) val tables = tableIdentifierPattern.map(catalog.listTables(db, _)).getOrElse(catalog.listTables(db)) val externalCatalog = sparkSession.sharedState.externalCatalog // this method checks whether the table is mainTable or MV based on property "isVisible" def isMainTable(tableIdent: TableIdentifier) = { var isMainTable = true try { isMainTable = externalCatalog.getTable(db, tableIdent.table).storage.properties .getOrElse("isVisible", true).toString.toBoolean } catch { case ex: Throwable => // ignore the exception for show tables } isMainTable } // tables will be filtered for all the MVs to show only main tables tables.collect { case tableIdent if isMainTable(tableIdent) => val isTemp = catalog.isTemporaryTable(tableIdent) Row(tableIdent.database.getOrElse("default"), tableIdent.table, isTemp) } } override protected def opName: String = "SHOW TABLES" }
Example 58
Source File: CarbonCreateTableAsSelectCommand.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.table import org.apache.spark.sql.{CarbonEnv, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.AtomicRunnableCommand import org.apache.spark.sql.execution.command.management.{CarbonInsertIntoCommand} import org.apache.carbondata.core.metadata.schema.table.TableInfo case class CarbonCreateTableAsSelectCommand( tableInfo: TableInfo, query: LogicalPlan, ifNotExistsSet: Boolean = false, tableLocation: Option[String] = None) extends AtomicRunnableCommand { var insertIntoCommand: CarbonInsertIntoCommand = _ override def processMetadata(sparkSession: SparkSession): Seq[Row] = { val tableName = tableInfo.getFactTable.getTableName var isTableCreated = false var databaseOpt: Option[String] = None if (tableInfo.getDatabaseName != null) { databaseOpt = Some(tableInfo.getDatabaseName) } val dbName = CarbonEnv.getDatabaseName(databaseOpt)(sparkSession) setAuditTable(dbName, tableName) setAuditInfo(Map("query" -> query.simpleString)) // check if table already exists if (sparkSession.sessionState.catalog .tableExists(TableIdentifier(tableName, Some(dbName)))) { if (!ifNotExistsSet) { throw new TableAlreadyExistsException(dbName, tableName) } } else { // execute command to create carbon table CarbonCreateTableCommand(tableInfo, ifNotExistsSet, tableLocation).run(sparkSession) isTableCreated = true } if (isTableCreated) { val tableName = tableInfo.getFactTable.getTableName var databaseOpt: Option[String] = None if (tableInfo.getDatabaseName != null) { databaseOpt = Some(tableInfo.getDatabaseName) } val dbName = CarbonEnv.getDatabaseName(databaseOpt)(sparkSession) val carbonDataSourceHadoopRelation = CarbonEnv.getInstance(sparkSession).carbonMetaStore .createCarbonDataSourceHadoopRelation(sparkSession, TableIdentifier(tableName, Option(dbName))) // execute command to load data into carbon table insertIntoCommand = CarbonInsertIntoCommand( databaseNameOp = Some(carbonDataSourceHadoopRelation.carbonRelation.databaseName), tableName = carbonDataSourceHadoopRelation.carbonRelation.tableName, options = scala.collection.immutable .Map("fileheader" -> carbonDataSourceHadoopRelation.tableSchema.get.fields.map(_.name).mkString(",")), isOverwriteTable = false, logicalPlan = query, tableInfo = tableInfo) insertIntoCommand.processMetadata(sparkSession) } Seq.empty } override def processData(sparkSession: SparkSession): Seq[Row] = { if (null != insertIntoCommand) { insertIntoCommand.processData(sparkSession) } Seq.empty } override def undoMetadata(sparkSession: SparkSession, exception: Exception): Seq[Row] = { val tableName = tableInfo.getFactTable.getTableName var databaseOpt: Option[String] = None if (tableInfo.getDatabaseName != null) { databaseOpt = Some(tableInfo.getDatabaseName) } val dbName = CarbonEnv.getDatabaseName(databaseOpt)(sparkSession) // drop the created table. CarbonDropTableCommand( ifExistsSet = false, Option(dbName), tableName).run(sparkSession) Seq.empty } override protected def opName: String = "CREATE TABLE AS SELECT" }
Example 59
Source File: CarbonCreateTableLikeCommand.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.table import java.util import java.util.UUID import org.apache.spark.sql.{CarbonEnv, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.command.MetadataCommand import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException import org.apache.carbondata.common.logging.LogServiceFactory import org.apache.carbondata.core.metadata.schema.{SchemaEvolution, SchemaEvolutionEntry} import org.apache.carbondata.core.metadata.schema.table.{TableInfo, TableSchema} case class CarbonCreateTableLikeCommand( sourceTable: TableIdentifier, targetTable: TableIdentifier, ifNotExists: Boolean = false) extends MetadataCommand { private val LOGGER = LogServiceFactory.getLogService(this.getClass.getName) override def processMetadata(sparkSession: SparkSession): Seq[Row] = { val srcTable = CarbonEnv.getCarbonTable(sourceTable.database, sourceTable.table)(sparkSession) if (!srcTable.isTransactionalTable) { throw new MalformedCarbonCommandException("Unsupported operation on non transactional table") } if (srcTable.isMV) { throw new MalformedCarbonCommandException("Unsupported operation on child table or MV") } // copy schema of source table and update fields to target table val dstTableSchema = srcTable.getTableInfo.getFactTable.clone().asInstanceOf[TableSchema] // remove index information in source table tblProperties dstTableSchema.getTableProperties.remove(srcTable.getTableId) dstTableSchema.setTableName(targetTable.table) dstTableSchema.setTableId(UUID.randomUUID().toString) val schemaEvol: SchemaEvolution = new SchemaEvolution val schEntryList: util.List[SchemaEvolutionEntry] = new util.ArrayList[SchemaEvolutionEntry] schemaEvol.setSchemaEvolutionEntryList(schEntryList) dstTableSchema.setSchemaEvolution(schemaEvol) // build table info for creating table val dstTableInfo = new TableInfo val dstDB = targetTable.database.getOrElse(sparkSession.catalog.currentDatabase) dstTableInfo.setDatabaseName(dstDB) dstTableInfo.setLastUpdatedTime(System.currentTimeMillis()) dstTableInfo.setFactTable(dstTableSchema) CarbonCreateTableCommand(dstTableInfo, ifNotExists).run(sparkSession) } override protected def opName: String = "CREATE TABLE LIKE" }
Example 60
Source File: CarbonAlterTableSetCommand.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.schema import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.command._ import org.apache.spark.util.AlterTableUtil private[sql] case class CarbonAlterTableSetCommand( tableIdentifier: TableIdentifier, properties: Map[String, String], isView: Boolean) extends MetadataCommand { override def processMetadata(sparkSession: SparkSession): Seq[Row] = { setAuditTable(tableIdentifier.database.getOrElse(sparkSession.catalog.currentDatabase), tableIdentifier.table) AlterTableUtil.modifyTableProperties( tableIdentifier, properties, Nil, set = true)(sparkSession, sparkSession.sessionState.catalog) setAuditInfo(properties) Seq.empty } override protected def opName: String = "ALTER TABLE SET" }
Example 61
Source File: CarbonAlterTableUnsetCommand.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.schema import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.command._ import org.apache.spark.util.AlterTableUtil private[sql] case class CarbonAlterTableUnsetCommand( tableIdentifier: TableIdentifier, propKeys: Seq[String], ifExists: Boolean, isView: Boolean) extends MetadataCommand { override def processMetadata(sparkSession: SparkSession): Seq[Row] = { setAuditTable(tableIdentifier.database.getOrElse(sparkSession.catalog.currentDatabase), tableIdentifier.table) AlterTableUtil.modifyTableProperties(tableIdentifier, Map.empty[String, String], propKeys, false)(sparkSession, sparkSession.sessionState.catalog) setAuditInfo(Map("unset" -> propKeys.mkString(", "))) Seq.empty } override protected def opName: String = "ALTER TABLE UNSET" }
Example 62
Source File: DropCacheEventListeners.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.listeners import scala.collection.JavaConverters._ import scala.collection.mutable import org.apache.log4j.Logger import org.apache.spark.sql.{CarbonEnv, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.command.cache.{CacheUtil, CarbonDropCacheCommand} import org.apache.carbondata.common.logging.LogServiceFactory import org.apache.carbondata.core.cache.CacheProvider import org.apache.carbondata.core.metadata.index.IndexType import org.apache.carbondata.core.metadata.schema.table.IndexSchema import org.apache.carbondata.core.view.MVSchema import org.apache.carbondata.events.{DropTableCacheEvent, Event, OperationContext, OperationEventListener} import org.apache.carbondata.view.MVManagerInSpark object DropCacheMVEventListener extends OperationEventListener { val LOGGER: Logger = LogServiceFactory.getLogService(this.getClass.getCanonicalName) override protected def onEvent(event: Event, operationContext: OperationContext): Unit = { event match { case dropCacheEvent: DropTableCacheEvent => val carbonTable = dropCacheEvent.carbonTable val cache = CacheProvider.getInstance().getCarbonCache val indexProviderMap = carbonTable.getIndexesMap val bloomIndexProvider = IndexType.BLOOMFILTER.getIndexProviderName if (!indexProviderMap.isEmpty && null != indexProviderMap.get(bloomIndexProvider)) { val bloomIndexes = indexProviderMap.get(bloomIndexProvider) val bloomIndexIterator = bloomIndexes.entrySet().iterator() while (bloomIndexIterator.hasNext) { val bloomIndexEntry = bloomIndexIterator.next() val index = new IndexSchema(bloomIndexEntry.getKey, bloomIndexProvider) index.setProperties(bloomIndexEntry.getValue) try { // Get index keys val indexKeys = CacheUtil.getBloomCacheKeys(carbonTable, index) // remove index keys from cache cache.removeAll(indexKeys.asJava) } catch { case e: Exception => LOGGER.warn( s"Clean cache for Bloom index ${ index.getIndexName } failed.", e) } } } } } }
Example 63
Source File: CarbonExpressions.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Expression, ScalaUDF} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} import org.apache.spark.sql.execution.command.DescribeTableCommand import org.apache.spark.sql.types.DataType object CarbonScalaUDF { def unapply(expression: Expression): Option[(ScalaUDF)] = { expression match { case a: ScalaUDF => Some(a) case _ => None } } } }
Example 64
Source File: TableLoader.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.util.Properties import scala.collection.{immutable, mutable} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.util.CarbonProperties // scalastyle:off object TableLoader { def extractOptions(propertiesFile: String): immutable.Map[String, String] = { val props = new Properties val path = new Path(propertiesFile) val fs = path.getFileSystem(FileFactory.getConfiguration) props.load(fs.open(path)) val elments = props.entrySet().iterator() val map = new mutable.HashMap[String, String]() System.out.println("properties file:") while (elments.hasNext) { val elment = elments.next() System.out.println(s"${elment.getKey}=${elment.getValue}") map.put(elment.getKey.asInstanceOf[String], elment.getValue.asInstanceOf[String]) } immutable.Map(map.toSeq: _*) } def extractStorePath(map: immutable.Map[String, String]): String = { map.get(CarbonCommonConstants.STORE_LOCATION) match { case Some(path) => path case None => throw new Exception(s"${CarbonCommonConstants.STORE_LOCATION} can't be empty") } } def loadTable(spark: SparkSession, dbName: Option[String], tableName: String, inputPaths: String, options: scala.collection.immutable.Map[String, String]): Unit = { CarbonLoadDataCommand(dbName, tableName, inputPaths, Nil, options, false).run(spark) } def main(args: Array[String]): Unit = { if (args.length < 3) { System.err.println("Usage: TableLoader <properties file> <table name> <input files>") System.exit(1) } System.out.println("parameter list:") args.foreach(System.out.println) val map = extractOptions(TableAPIUtil.escape(args(0))) val storePath = extractStorePath(map) System.out.println(s"${CarbonCommonConstants.STORE_LOCATION}:$storePath") val (dbName, tableName) = TableAPIUtil.parseSchemaName(TableAPIUtil.escape(args(1))) System.out.println(s"table name: $dbName.$tableName") val inputPaths = TableAPIUtil.escape(args(2)) val spark = TableAPIUtil.spark(storePath, s"TableLoader: $dbName.$tableName") loadTable(spark, Option(dbName), tableName, inputPaths, map) } }
Example 65
Source File: CleanFiles.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.util import org.apache.spark.sql.{CarbonEnv, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.carbondata.api.CarbonStore def cleanFiles(spark: SparkSession, dbName: String, tableName: String, forceTableClean: Boolean = false): Unit = { TableAPIUtil.validateTableExists(spark, dbName, tableName) val tablePath = CarbonEnv.getTablePath(Some(dbName), tableName)(spark) val carbonTable = if (!forceTableClean) { CarbonEnv.getCarbonTable(Some(dbName), tableName)(spark) } else { null } CarbonStore.cleanFiles( dbName = dbName, tableName = tableName, tablePath = tablePath, carbonTable = carbonTable, forceTableClean = forceTableClean) } def main(args: Array[String]): Unit = { if (args.length < 2) { System.err.println("Usage: CleanFiles <store path> <table name>") System.exit(1) } val storePath = TableAPIUtil.escape(args(0)) val (dbName, tableName) = TableAPIUtil.parseSchemaName(TableAPIUtil.escape(args(1))) var forceTableClean = false if (args.length > 2) { forceTableClean = args(2).toBoolean } val spark = TableAPIUtil.spark(storePath, s"CleanFiles: $dbName.$tableName") cleanFiles(spark, dbName, tableName, forceTableClean) } }
Example 66
Source File: DeleteSegmentById.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.util import org.apache.spark.sql.{CarbonEnv, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.carbondata.api.CarbonStore // scalastyle:off object DeleteSegmentById { def extractSegmentIds(segmentIds: String): Seq[String] = { segmentIds.split(",").toSeq } def deleteSegmentById(spark: SparkSession, dbName: String, tableName: String, segmentIds: Seq[String]): Unit = { TableAPIUtil.validateTableExists(spark, dbName, tableName) val carbonTable = CarbonEnv.getCarbonTable(Some(dbName), tableName)(spark) CarbonStore.deleteLoadById(segmentIds, dbName, tableName, carbonTable) } def main(args: Array[String]): Unit = { if (args.length < 3) { System.err.println( "Usage: DeleteSegmentByID <store path> <table name> <segment id list>") System.exit(1) } val storePath = TableAPIUtil.escape(args(0)) val (dbName, tableName) = TableAPIUtil.parseSchemaName(TableAPIUtil.escape(args(1))) val segmentIds = extractSegmentIds(TableAPIUtil.escape(args(2))) val spark = TableAPIUtil.spark(storePath, s"DeleteSegmentById: $dbName.$tableName") deleteSegmentById(spark, dbName, tableName, segmentIds) } }
Example 67
Source File: DeleteSegmentByDate.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.util import org.apache.spark.sql.{CarbonEnv, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.carbondata.api.CarbonStore // scalastyle:off object DeleteSegmentByDate { def deleteSegmentByDate(spark: SparkSession, dbName: String, tableName: String, dateValue: String): Unit = { TableAPIUtil.validateTableExists(spark, dbName, tableName) val carbonTable = CarbonEnv.getCarbonTable(Some(dbName), tableName)(spark) CarbonStore.deleteLoadByDate(dateValue, dbName, tableName, carbonTable) } def main(args: Array[String]): Unit = { if (args.length < 3) { System.err.println( "Usage: DeleteSegmentByDate <store path> <table name> <before date value>") System.exit(1) } val storePath = TableAPIUtil.escape(args(0)) val (dbName, tableName) = TableAPIUtil.parseSchemaName(TableAPIUtil.escape(args(1))) val dateValue = TableAPIUtil.escape(args(2)) val spark = TableAPIUtil.spark(storePath, s"DeleteSegmentByDate: $dbName.$tableName") deleteSegmentByDate(spark, dbName, tableName, dateValue) } }
Example 68
Source File: Compaction.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.util import org.apache.spark.sql.{CarbonEnv, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.execution.command.AlterTableModel import org.apache.spark.sql.execution.command.management.CarbonAlterTableCompactionCommand import org.apache.spark.sql.util.CarbonException import org.apache.carbondata.core.constants.CarbonCommonConstants // scalastyle:off object Compaction { def compaction(spark: SparkSession, dbName: String, tableName: String, compactionType: String): Unit = { TableAPIUtil.validateTableExists(spark, dbName, tableName) if (compactionType.equalsIgnoreCase(CarbonCommonConstants.MAJOR) || compactionType.equalsIgnoreCase(CarbonCommonConstants.MINOR)) { CarbonAlterTableCompactionCommand(AlterTableModel(Some(dbName), tableName, None, compactionType, Some(System.currentTimeMillis()), "")).run(spark) } else { CarbonException.analysisException("Compaction type is wrong. Please select minor or major.") } } def main(args: Array[String]): Unit = { if (args.length < 3) { System.err.println("Usage: Compaction <store path> <table name> <major|minor>") System.exit(1) } val storePath = TableAPIUtil.escape(args(0)) val (dbName, tableName) = TableAPIUtil.parseSchemaName(TableAPIUtil.escape(args(1))) val compactionType = TableAPIUtil.escape(args(2)) val spark = TableAPIUtil.spark(storePath, s"Compaction: $dbName.$tableName") compaction(spark, dbName, tableName, compactionType) } }
Example 69
Source File: CarbonDataSourceScan.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.strategy import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder} import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier} import org.apache.spark.sql.execution.FileSourceScanExec import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} class CarbonDataSourceScan( override val output: Seq[Attribute], val rdd: RDD[InternalRow], @transient override val relation: HadoopFsRelation, val partitioning: Partitioning, val md: Map[String, String], identifier: Option[TableIdentifier], @transient private val logicalRelation: LogicalRelation) extends FileSourceScanExec( relation, output, relation.dataSchema, Seq.empty, Seq.empty, identifier) { // added lazy since spark 2.3.2 version (SPARK-PR#21815) override lazy val supportsBatch: Boolean = true // added lazy since spark 2.3.2 version (SPARK-PR#21815) override lazy val (outputPartitioning, outputOrdering): (Partitioning, Seq[SortOrder]) = (partitioning, Nil) // added lazy since spark 2.3.2 version (SPARK-PR#21815) override lazy val metadata: Map[String, String] = md override def inputRDDs(): Seq[RDD[InternalRow]] = rdd :: Nil }
Example 70
Source File: SparkSqlAdapter.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.execution.FileSourceScanExec import org.apache.spark.sql.execution.datasources.HadoopFsRelation import org.apache.spark.sql.types.StructType import org.apache.carbondata.core.util.ThreadLocalSessionInfo object SparkSqlAdapter { def initSparkSQL(): Unit = { } def getScanForSegments( @transient relation: HadoopFsRelation, output: Seq[Attribute], outputSchema: StructType, partitionFilters: Seq[Expression], dataFilters: Seq[Expression], tableIdentifier: Option[TableIdentifier] ): FileSourceScanExec = { FileSourceScanExec( relation, output, outputSchema, partitionFilters, dataFilters, tableIdentifier) } def addSparkSessionListener(sparkSession: SparkSession): Unit = { sparkSession.sparkContext.addSparkListener(new SparkListener { override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = { CarbonEnv.carbonEnvMap.remove(sparkSession) ThreadLocalSessionInfo.unsetAll() } }) } }
Example 71
Source File: CarbonDataSourceScan.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.strategy import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder} import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier} import org.apache.spark.sql.execution.FileSourceScanExec import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} class CarbonDataSourceScan( override val output: Seq[Attribute], val rdd: RDD[InternalRow], @transient override val relation: HadoopFsRelation, val partitioning: Partitioning, val md: Map[String, String], identifier: Option[TableIdentifier], @transient private val logicalRelation: LogicalRelation) extends FileSourceScanExec( relation, output, relation.dataSchema, Seq.empty, None, Seq.empty, identifier) { // added lazy since spark 2.3.2 version (SPARK-PR#21815) override lazy val supportsBatch: Boolean = true // added lazy since spark 2.3.2 version (SPARK-PR#21815) override lazy val (outputPartitioning, outputOrdering): (Partitioning, Seq[SortOrder]) = (partitioning, Nil) // added lazy since spark 2.3.2 version (SPARK-PR#21815) override lazy val metadata: Map[String, String] = md override def inputRDDs(): Seq[RDD[InternalRow]] = rdd :: Nil }
Example 72
Source File: SparkSqlAdapter.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.execution.FileSourceScanExec import org.apache.spark.sql.execution.datasources.HadoopFsRelation import org.apache.spark.sql.types.StructType object SparkSqlAdapter { def initSparkSQL(): Unit = { } def getScanForSegments( @transient relation: HadoopFsRelation, output: Seq[Attribute], outputSchema: StructType, partitionFilters: Seq[Expression], dataFilters: Seq[Expression], tableIdentifier: Option[TableIdentifier] ): FileSourceScanExec = { FileSourceScanExec( relation, output, outputSchema, partitionFilters, None, dataFilters, tableIdentifier) } }
Example 73
Source File: AlterTableUpgradeSegmentTest.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.carbondata.restructure import org.apache.spark.sql.CarbonEnv import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.test.util.QueryTest import org.scalatest.BeforeAndAfterAll import org.apache.carbondata.core.metadata.schema.table.CarbonTable import org.apache.carbondata.core.statusmanager.SegmentStatusManager import org.apache.carbondata.core.util.path.CarbonTablePath import org.apache.carbondata.processing.util.CarbonLoaderUtil class AlterTableUpgradeSegmentTest extends QueryTest with BeforeAndAfterAll { override protected def beforeAll(): Unit = { sql("drop table if exists altertest") sql("create table altertest(a string) STORED AS carbondata") sql("insert into altertest select 'k'") sql("insert into altertest select 'tttttt'") } private def removeDataAndIndexSizeFromTableStatus(table: CarbonTable): Unit = { val loadMetaDataDetails = SegmentStatusManager.readTableStatusFile(CarbonTablePath .getTableStatusFilePath(table.getTablePath)) loadMetaDataDetails.foreach { loadMetaDataDetail => loadMetaDataDetail.setIndexSize("0") loadMetaDataDetail.setDataSize("0") } SegmentStatusManager.writeLoadDetailsIntoFile(CarbonTablePath .getTableStatusFilePath(table.getTablePath), loadMetaDataDetails) } test("test alter table upgrade segment test") { val carbonTable = CarbonEnv.getCarbonTable(TableIdentifier("altertest"))(sqlContext.sparkSession) removeDataAndIndexSizeFromTableStatus(carbonTable) val loadMetaDataDetails = SegmentStatusManager.readTableStatusFile(CarbonTablePath .getTableStatusFilePath(carbonTable.getTablePath)) loadMetaDataDetails.foreach(detail => assert(detail.getIndexSize.toInt + detail.getDataSize .toInt == 0)) sql("alter table altertest compact 'upgrade_segment'") val loadMetaDataDetailsNew = SegmentStatusManager.readTableStatusFile(CarbonTablePath .getTableStatusFilePath(carbonTable.getTablePath)) loadMetaDataDetailsNew.foreach{detail => assert(detail.getIndexSize.toInt != 0) assert(detail.getDataSize.toInt != 0)} } override protected def afterAll(): Unit = { sql("drop table if exists altertest") } }
Example 74
Source File: CacheRefreshTestCase.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.spark.testsuite.cloud import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.hive.CarbonHiveIndexMetadataUtil import org.apache.spark.sql.test.util.QueryTest import org.scalatest.BeforeAndAfterAll class CacheRefreshTestCase extends QueryTest with BeforeAndAfterAll { override protected def beforeAll(): Unit = { sql("drop database if exists cachedb cascade") sql("create database cachedb") sql("use cachedb") } override protected def afterAll(): Unit = { sql("use default") sql("drop database if exists cachedb cascade") } test("test cache refresh") { sql("create table tbl_cache1(col1 string, col2 int, col3 int) using carbondata") sql("insert into tbl_cache1 select 'a', 123, 345") CarbonHiveIndexMetadataUtil.invalidateAndDropTable( "cachedb", "tbl_cache1", sqlContext.sparkSession) // discard cached table info in cachedDataSourceTables val tableIdentifier = TableIdentifier("tbl_cache1", Option("cachedb")) sqlContext.sparkSession.sessionState.catalog.refreshTable(tableIdentifier) sql("create table tbl_cache1(col1 string, col2 int, col3 int) using carbondata") sql("delete from tbl_cache1") sql("insert into tbl_cache1 select 'b', 123, 345") checkAnswer(sql("select * from tbl_cache1"), Seq(Row("b", 123, 345))) } }
Example 75
Source File: DescribeDeltaHistoryCommand.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta.commands // scalastyle:off import.ordering.noEmptyLine import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaTableIdentifier} import org.apache.spark.sql.delta.actions.CommitInfo import org.apache.spark.sql.delta.metering.DeltaLogging import org.apache.hadoop.fs.Path import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation import org.apache.spark.sql.catalyst.catalog.CatalogTableType import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics} import org.apache.spark.sql.execution.command.RunnableCommand case class DescribeDeltaHistoryCommand( path: Option[String], tableIdentifier: Option[TableIdentifier], limit: Option[Int], override val output: Seq[Attribute] = ExpressionEncoder[CommitInfo]().schema.toAttributes) extends RunnableCommand with DeltaLogging { override def run(sparkSession: SparkSession): Seq[Row] = { val basePath = if (path.nonEmpty) { new Path(path.get) } else if (tableIdentifier.nonEmpty) { val sessionCatalog = sparkSession.sessionState.catalog lazy val metadata = sessionCatalog.getTableMetadata(tableIdentifier.get) DeltaTableIdentifier(sparkSession, tableIdentifier.get) match { case Some(id) if id.path.nonEmpty => new Path(id.path.get) case Some(id) if id.table.nonEmpty => new Path(metadata.location) case _ => if (metadata.tableType == CatalogTableType.VIEW) { throw DeltaErrors.describeViewHistory } throw DeltaErrors.notADeltaTableException("DESCRIBE HISTORY") } } else { throw DeltaErrors.missingTableIdentifierException("DESCRIBE HISTORY") } // Max array size if (limit.exists(_ > Int.MaxValue - 8)) { throw new IllegalArgumentException("Please use a limit less than Int.MaxValue - 8.") } val deltaLog = DeltaLog.forTable(sparkSession, basePath) recordDeltaOperation(deltaLog, "delta.ddl.describeHistory") { if (deltaLog.snapshot.version == -1) { throw DeltaErrors.notADeltaTableException("DESCRIBE HISTORY") } import sparkSession.implicits._ deltaLog.history.getHistory(limit).toDF().collect().toSeq } } }
Example 76
Source File: DeltaGenerateCommand.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta.commands import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaTableIdentifier} import org.apache.spark.sql.delta.hooks.GenerateSymlinkManifest import org.apache.hadoop.fs.Path import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.execution.command.RunnableCommand case class DeltaGenerateCommand(modeName: String, tableId: TableIdentifier) extends RunnableCommand { import DeltaGenerateCommand._ override def run(sparkSession: SparkSession): Seq[Row] = { if (!modeNameToGenerationFunc.contains(modeName)) { throw DeltaErrors.unsupportedGenerateModeException(modeName) } val tablePath = DeltaTableIdentifier(sparkSession, tableId) match { case Some(id) if id.path.isDefined => new Path(id.path.get) case _ => new Path(sparkSession.sessionState.catalog.getTableMetadata(tableId).location) } val deltaLog = DeltaLog.forTable(sparkSession, tablePath) if (deltaLog.snapshot.version < 0) { throw DeltaErrors.notADeltaTableException("GENERATE") } val generationFunc = modeNameToGenerationFunc(modeName) generationFunc(sparkSession, deltaLog) Seq.empty } } object DeltaGenerateCommand { val modeNameToGenerationFunc = CaseInsensitiveMap( Map[String, (SparkSession, DeltaLog) => Unit]( "symlink_format_manifest" -> GenerateSymlinkManifest.generateFullManifest )) }
Example 77
Source File: DeltaUnsupportedOperationsCheck.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta import org.apache.spark.sql.delta.catalog.DeltaTableV2 import org.apache.spark.sql.delta.metering.DeltaLogging import org.apache.spark.sql.delta.sources.DeltaSourceUtils import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, V2WriteCommand} import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.RefreshTable import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation private def checkDeltaTableExists(command: V2WriteCommand, operation: String): Unit = { command.table match { case DeltaRelation(lr) => // the extractor performs the check that we want if this is indeed being called on a Delta // table. It should leave others unchanged if (DeltaFullTable.unapply(lr).isEmpty) { throw DeltaErrors.notADeltaTableException(operation) } case _ => } } }
Example 78
Source File: DeltaTableIdentifier.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta import org.apache.spark.sql.delta.sources.DeltaSourceUtils import org.apache.hadoop.fs.Path import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.internal.SQLConf def apply(spark: SparkSession, identifier: TableIdentifier): Option[DeltaTableIdentifier] = { if (isDeltaPath(spark, identifier)) { Some(DeltaTableIdentifier(path = Option(identifier.table))) } else if (DeltaTableUtils.isDeltaTable(spark, identifier)) { Some(DeltaTableIdentifier(table = Option(identifier))) } else { None } } }
Example 79
Source File: VacuumTableCommand.scala From delta with Apache License 2.0 | 5 votes |
package io.delta.tables.execution import org.apache.hadoop.fs.Path import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaTableIdentifier, DeltaTableUtils} import org.apache.spark.sql.delta.commands.VacuumCommand import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.types.StringType case class VacuumTableCommand( path: Option[String], table: Option[TableIdentifier], horizonHours: Option[Double], dryRun: Boolean) extends RunnableCommand { override val output: Seq[Attribute] = Seq(AttributeReference("path", StringType, nullable = true)()) override def run(sparkSession: SparkSession): Seq[Row] = { val pathToVacuum = if (path.nonEmpty) { new Path(path.get) } else if (table.nonEmpty) { DeltaTableIdentifier(sparkSession, table.get) match { case Some(id) if id.path.nonEmpty => new Path(id.path.get) case _ => new Path(sparkSession.sessionState.catalog.getTableMetadata(table.get).location) } } else { throw DeltaErrors.missingTableIdentifierException("VACUUM") } val baseDeltaPath = DeltaTableUtils.findDeltaTableRoot(sparkSession, pathToVacuum) if (baseDeltaPath.isDefined) { if (baseDeltaPath.get != pathToVacuum) { throw DeltaErrors.vacuumBasePathMissingException(baseDeltaPath.get) } } val deltaLog = DeltaLog.forTable(sparkSession, pathToVacuum) if (deltaLog.snapshot.version == -1) { throw DeltaErrors.notADeltaTableException( "VACUUM", DeltaTableIdentifier(path = Some(pathToVacuum.toString))) } VacuumCommand.gc(sparkSession, deltaLog, dryRun, horizonHours).collect() } }
Example 80
Source File: DeltaConvert.scala From delta with Apache License 2.0 | 5 votes |
package io.delta.tables.execution import org.apache.spark.sql.delta.commands.ConvertToDeltaCommand import io.delta.tables.DeltaTable import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.types.StructType trait DeltaConvertBase { def executeConvert( spark: SparkSession, tableIdentifier: TableIdentifier, partitionSchema: Option[StructType], deltaPath: Option[String]): DeltaTable = { val cvt = ConvertToDeltaCommand(tableIdentifier, partitionSchema, deltaPath) cvt.run(spark) if (cvt.isCatalogTable(spark.sessionState.analyzer, tableIdentifier)) { DeltaTable.forName(spark, tableIdentifier.toString) } else { DeltaTable.forPath(spark, tableIdentifier.table) } } } object DeltaConvert extends DeltaConvertBase {}
Example 81
Source File: DeltaTableOperations.scala From delta with Apache License 2.0 | 5 votes |
package io.delta.tables.execution import scala.collection.Map import org.apache.spark.sql.delta.{DeltaErrors, DeltaHistoryManager, DeltaLog, PreprocessTableUpdate} import org.apache.spark.sql.delta.commands.{DeleteCommand, DeltaGenerateCommand, VacuumCommand} import org.apache.spark.sql.delta.util.AnalysisHelper import io.delta.tables.DeltaTable import org.apache.spark.sql.{functions, Column, DataFrame, Dataset} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions.{Expression, SubqueryExpression} import org.apache.spark.sql.catalyst.plans.logical._ trait DeltaTableOperations extends AnalysisHelper { self: DeltaTable => protected def executeDelete(condition: Option[Expression]): Unit = improveUnsupportedOpError { val delete = DeleteFromTable(self.toDF.queryExecution.analyzed, condition) toDataset(sparkSession, delete) } protected def executeHistory(deltaLog: DeltaLog, limit: Option[Int]): DataFrame = { val history = new DeltaHistoryManager(deltaLog) val spark = self.toDF.sparkSession spark.createDataFrame(history.getHistory(limit)) } protected def executeGenerate(tblIdentifier: String, mode: String): Unit = { val tableId: TableIdentifier = sparkSession .sessionState .sqlParser .parseTableIdentifier(tblIdentifier) val generate = DeltaGenerateCommand(mode, tableId) generate.run(sparkSession) } protected def executeUpdate( set: Map[String, Column], condition: Option[Column]): Unit = improveUnsupportedOpError { val assignments = set.map { case (targetColName, column) => Assignment(UnresolvedAttribute.quotedString(targetColName), column.expr) }.toSeq val update = UpdateTable(self.toDF.queryExecution.analyzed, assignments, condition.map(_.expr)) toDataset(sparkSession, update) } protected def executeVacuum( deltaLog: DeltaLog, retentionHours: Option[Double]): DataFrame = { VacuumCommand.gc(sparkSession, deltaLog, false, retentionHours) sparkSession.emptyDataFrame } protected def toStrColumnMap(map: Map[String, String]): Map[String, Column] = { map.toSeq.map { case (k, v) => k -> functions.expr(v) }.toMap } protected def sparkSession = self.toDF.sparkSession }
Example 82
Source File: HiveConvertToDeltaSuite.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta import org.apache.spark.sql.delta.test.DeltaHiveTest import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils abstract class HiveConvertToDeltaSuiteBase extends ConvertToDeltaHiveTableTests with SQLTestUtils { override protected def convertToDelta( identifier: String, partitionSchema: Option[String] = None): Unit = { if (partitionSchema.isEmpty) { sql(s"convert to delta $identifier") } else { val stringSchema = partitionSchema.get sql(s"convert to delta $identifier partitioned by ($stringSchema) ") } } override protected def verifyExternalCatalogMetadata(tableName: String): Unit = { val catalogTable = spark.sessionState.catalog.externalCatalog.getTable("default", tableName) // Hive automatically adds some properties val cleanProps = catalogTable.properties.filterKeys(_ != "transient_lastDdlTime") // We can't alter the schema in the catalog at the moment :( assert(cleanProps.isEmpty, s"Table properties weren't empty for table $tableName: $cleanProps") } test("convert a Hive based parquet table") { val tbl = "hive_parquet" withTable(tbl) { sql( s""" |CREATE TABLE $tbl (id int, str string) |PARTITIONED BY (part string) |STORED AS PARQUET """.stripMargin) sql(s"insert into $tbl VALUES (1, 'a', 1)") val catalogTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tbl)) assert(catalogTable.provider === Some("hive")) assert(catalogTable.storage.serde.exists(_.contains("parquet"))) convertToDelta(tbl, Some("part string")) checkAnswer( sql(s"select * from delta.`${getPathForTableName(tbl)}`"), Row(1, "a", "1")) verifyExternalCatalogMetadata(tbl) val updatedTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tbl)) assert(updatedTable.provider === Some("delta")) } } test("convert a Hive based external parquet table") { val tbl = "hive_parquet" withTempDir { dir => withTable(tbl) { sql( s""" |CREATE EXTERNAL TABLE $tbl (id int, str string) |PARTITIONED BY (part string) |STORED AS PARQUET |LOCATION '${dir.getCanonicalPath}' """.stripMargin) sql(s"insert into $tbl VALUES (1, 'a', 1)") val catalogTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tbl)) assert(catalogTable.provider === Some("hive")) assert(catalogTable.storage.serde.exists(_.contains("parquet"))) convertToDelta(tbl, Some("part string")) checkAnswer( sql(s"select * from delta.`${dir.getCanonicalPath}`"), Row(1, "a", "1")) verifyExternalCatalogMetadata(tbl) val updatedTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tbl)) assert(updatedTable.provider === Some("delta")) } } } } class HiveConvertToDeltaSuite extends HiveConvertToDeltaSuiteBase with DeltaHiveTest
Example 83
Source File: DeltaSqlParserSuite.scala From delta with Apache License 2.0 | 5 votes |
package io.delta.sql.parser import io.delta.tables.execution.VacuumTableCommand import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.TableIdentifier class DeltaSqlParserSuite extends SparkFunSuite { test("isValidDecimal should recognize a table identifier and not treat them as a decimal") { // Setting `delegate` to `null` is fine. The following tests don't need to touch `delegate`. val parser = new DeltaSqlParser(null) assert(parser.parsePlan("vacuum 123_") === VacuumTableCommand(None, Some(TableIdentifier("123_")), None, false)) assert(parser.parsePlan("vacuum 1a.123_") === VacuumTableCommand(None, Some(TableIdentifier("123_", Some("1a"))), None, false)) assert(parser.parsePlan("vacuum a.123A") === VacuumTableCommand(None, Some(TableIdentifier("123A", Some("a"))), None, false)) assert(parser.parsePlan("vacuum a.123E3_column") === VacuumTableCommand(None, Some(TableIdentifier("123E3_column", Some("a"))), None, false)) assert(parser.parsePlan("vacuum a.123D_column") === VacuumTableCommand(None, Some(TableIdentifier("123D_column", Some("a"))), None, false)) assert(parser.parsePlan("vacuum a.123BD_column") === VacuumTableCommand(None, Some(TableIdentifier("123BD_column", Some("a"))), None, false)) } }
Example 84
Source File: HiveExternalCatalogSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.hadoop.conf.Configuration import org.apache.spark.SparkConf import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.types.StructType import org.apache.spark.util.Utils class HiveExternalCatalogSuite extends ExternalCatalogSuite { private val user = Utils.getCurrentUserName() private val externalCatalog: HiveExternalCatalog = { val catalog = new HiveExternalCatalog(new SparkConf, new Configuration, user) catalog.client.reset() catalog } protected override val utils: CatalogTestUtils = new CatalogTestUtils { override val tableInputFormat: String = "org.apache.hadoop.mapred.SequenceFileInputFormat" override val tableOutputFormat: String = "org.apache.hadoop.mapred.SequenceFileOutputFormat" override def newEmptyCatalog(): ExternalCatalog = externalCatalog } protected override def resetState(): Unit = { externalCatalog.client.reset() } import utils._ test("list partitions by filter") { val catalog = newBasicCatalog() val selectedPartitions = catalog.listPartitionsByFilter("db2", "tbl2", Seq('a.int === 1)) assert(selectedPartitions.length == 1) assert(selectedPartitions.head.spec == part1.spec) } test("SPARK-18647: do not put provider in table properties for Hive serde table") { val catalog = newBasicCatalog() val hiveTable = CatalogTable( identifier = TableIdentifier("hive_tbl", Some("db1")), tableType = CatalogTableType.MANAGED, storage = storageFormat, schema = new StructType().add("col1", "int").add("col2", "string"), provider = Some("hive")) catalog.createTable(hiveTable, ignoreIfExists = false) val rawTable = externalCatalog.client.getTable("db1", "hive_tbl") assert(!rawTable.properties.contains(HiveExternalCatalog.DATASOURCE_PROVIDER)) assert(externalCatalog.getTable("db1", "hive_tbl").provider == Some(DDLUtils.HIVE_PROVIDER)) } }
Example 85
Source File: MetastoreRelationSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.sql.{QueryTest, Row} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.{IntegerType, StructField, StructType} class MetastoreRelationSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { test("makeCopy and toJSON should work") { val table = CatalogTable( identifier = TableIdentifier("test", Some("db")), tableType = CatalogTableType.VIEW, storage = CatalogStorageFormat.empty, schema = StructType(StructField("a", IntegerType, true) :: Nil)) val relation = MetastoreRelation("db", "test")(table, null) // No exception should be thrown relation.makeCopy(Array("db", "test")) // No exception should be thrown relation.toJSON } test("SPARK-17409: Do Not Optimize Query in CTAS (Hive Serde Table) More Than Once") { withTable("bar") { withTempView("foo") { sql("select 0 as id").createOrReplaceTempView("foo") // If we optimize the query in CTAS more than once, the following saveAsTable will fail // with the error: `GROUP BY position 0 is not in select list (valid range is [1, 1])` sql("CREATE TABLE bar AS SELECT * FROM foo group by id") checkAnswer(spark.table("bar"), Row(0) :: Nil) val tableMetadata = spark.sessionState.catalog.getTableMetadata(TableIdentifier("bar")) assert(tableMetadata.provider == Some("hive"), "the expected table is a Hive serde table") } } } }
Example 86
Source File: ListTablesSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.QueryTest import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.hive.test.TestHiveSingleton class ListTablesSuite extends QueryTest with TestHiveSingleton with BeforeAndAfterAll { import hiveContext._ import hiveContext.implicits._ val df = sparkContext.parallelize((1 to 10).map(i => (i, s"str$i"))).toDF("key", "value") override def beforeAll(): Unit = { super.beforeAll() // The catalog in HiveContext is a case insensitive one. sessionState.catalog.createTempView( "ListTablesSuiteTable", df.logicalPlan, overrideIfExists = true) sql("CREATE TABLE HiveListTablesSuiteTable (key int, value string)") sql("CREATE DATABASE IF NOT EXISTS ListTablesSuiteDB") sql("CREATE TABLE ListTablesSuiteDB.HiveInDBListTablesSuiteTable (key int, value string)") } override def afterAll(): Unit = { try { sessionState.catalog.dropTable( TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true, purge = false) sql("DROP TABLE IF EXISTS HiveListTablesSuiteTable") sql("DROP TABLE IF EXISTS ListTablesSuiteDB.HiveInDBListTablesSuiteTable") sql("DROP DATABASE IF EXISTS ListTablesSuiteDB") } finally { super.afterAll() } } test("get all tables of current database") { Seq(tables(), sql("SHOW TABLes")).foreach { case allTables => // We are using default DB. checkAnswer( allTables.filter("tableName = 'listtablessuitetable'"), Row("", "listtablessuitetable", true)) checkAnswer( allTables.filter("tableName = 'hivelisttablessuitetable'"), Row("default", "hivelisttablessuitetable", false)) assert(allTables.filter("tableName = 'hiveindblisttablessuitetable'").count() === 0) } } test("getting all tables with a database name") { Seq(tables("listtablessuiteDb"), sql("SHOW TABLes in listTablesSuitedb")).foreach { case allTables => checkAnswer( allTables.filter("tableName = 'listtablessuitetable'"), Row("", "listtablessuitetable", true)) assert(allTables.filter("tableName = 'hivelisttablessuitetable'").count() === 0) checkAnswer( allTables.filter("tableName = 'hiveindblisttablessuitetable'"), Row("listtablessuitedb", "hiveindblisttablessuitetable", false)) } } }
Example 87
Source File: TableIdentifierParserSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.parser import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.TableIdentifier class TableIdentifierParserSuite extends SparkFunSuite { import CatalystSqlParser._ // Add "$elem$", "$value$" & "$key$" val hiveNonReservedKeyword = Array("add", "admin", "after", "analyze", "archive", "asc", "before", "bucket", "buckets", "cascade", "change", "cluster", "clustered", "clusterstatus", "collection", "columns", "comment", "compact", "compactions", "compute", "concatenate", "continue", "data", "day", "databases", "datetime", "dbproperties", "deferred", "defined", "delimited", "dependency", "desc", "directories", "directory", "disable", "distribute", "enable", "escaped", "exclusive", "explain", "export", "fields", "file", "fileformat", "first", "format", "formatted", "functions", "hold_ddltime", "hour", "idxproperties", "ignore", "index", "indexes", "inpath", "inputdriver", "inputformat", "items", "jar", "keys", "key_type", "last", "limit", "offset", "lines", "load", "location", "lock", "locks", "logical", "long", "mapjoin", "materialized", "metadata", "minus", "minute", "month", "msck", "noscan", "no_drop", "nulls", "offline", "option", "outputdriver", "outputformat", "overwrite", "owner", "partitioned", "partitions", "plus", "pretty", "principals", "protection", "purge", "read", "readonly", "rebuild", "recordreader", "recordwriter", "reload", "rename", "repair", "replace", "replication", "restrict", "rewrite", "role", "roles", "schemas", "second", "serde", "serdeproperties", "server", "sets", "shared", "show", "show_database", "skewed", "sort", "sorted", "ssl", "statistics", "stored", "streamtable", "string", "struct", "tables", "tblproperties", "temporary", "terminated", "tinyint", "touch", "transactions", "unarchive", "undo", "uniontype", "unlock", "unset", "unsigned", "uri", "use", "utc", "utctimestamp", "view", "while", "year", "work", "transaction", "write", "isolation", "level", "snapshot", "autocommit", "all", "alter", "array", "as", "authorization", "between", "bigint", "binary", "boolean", "both", "by", "create", "cube", "current_date", "current_timestamp", "cursor", "date", "decimal", "delete", "describe", "double", "drop", "exists", "external", "false", "fetch", "float", "for", "grant", "group", "grouping", "import", "in", "insert", "int", "into", "is", "lateral", "like", "local", "none", "null", "of", "order", "out", "outer", "partition", "percent", "procedure", "range", "reads", "revoke", "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger", "true", "truncate", "update", "user", "using", "values", "with", "regexp", "rlike", "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float", "int", "smallint", "timestamp", "at") val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right", "natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from", "where", "having", "from", "to", "table", "with", "not") test("table identifier") { // Regular names. assert(TableIdentifier("q") === parseTableIdentifier("q")) assert(TableIdentifier("q", Option("d")) === parseTableIdentifier("d.q")) // Illegal names. Seq("", "d.q.g", "t:", "${some.var.x}", "tab:1").foreach { identifier => intercept[ParseException](parseTableIdentifier(identifier)) } } test("quoted identifiers") { assert(TableIdentifier("z", Some("x.y")) === parseTableIdentifier("`x.y`.z")) assert(TableIdentifier("y.z", Some("x")) === parseTableIdentifier("x.`y.z`")) assert(TableIdentifier("z", Some("`x.y`")) === parseTableIdentifier("```x.y```.z")) assert(TableIdentifier("`y.z`", Some("x")) === parseTableIdentifier("x.```y.z```")) assert(TableIdentifier("x.y.z", None) === parseTableIdentifier("`x.y.z`")) } test("table identifier - strict keywords") { // SQL Keywords. hiveStrictNonReservedKeyword.foreach { keyword => assert(TableIdentifier(keyword) === parseTableIdentifier(keyword)) assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`")) assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`")) } } test("table identifier - non reserved keywords") { // Hive keywords are allowed. hiveNonReservedKeyword.foreach { nonReserved => assert(TableIdentifier(nonReserved) === parseTableIdentifier(nonReserved)) } } test("SPARK-17364 table identifier - contains number") { assert(parseTableIdentifier("123_") == TableIdentifier("123_")) assert(parseTableIdentifier("1a.123_") == TableIdentifier("123_", Some("1a"))) // ".123" should not be treated as token of type DECIMAL_VALUE assert(parseTableIdentifier("a.123A") == TableIdentifier("123A", Some("a"))) // ".123E3" should not be treated as token of type SCIENTIFIC_DECIMAL_VALUE assert(parseTableIdentifier("a.123E3_LIST") == TableIdentifier("123E3_LIST", Some("a"))) // ".123D" should not be treated as token of type DOUBLE_LITERAL assert(parseTableIdentifier("a.123D_LIST") == TableIdentifier("123D_LIST", Some("a"))) // ".123BD" should not be treated as token of type BIGDECIMAL_LITERAL assert(parseTableIdentifier("a.123BD_LIST") == TableIdentifier("123BD_LIST", Some("a"))) } test("SPARK-17832 table identifier - contains backtick") { val complexName = TableIdentifier("`weird`table`name", Some("`d`b`1")) assert(complexName === parseTableIdentifier("```d``b``1`.```weird``table``name`")) assert(complexName === parseTableIdentifier(complexName.quotedString)) intercept[ParseException](parseTableIdentifier(complexName.unquotedString)) // Table identifier contains countious backticks should be treated correctly. val complexName2 = TableIdentifier("x``y", Some("d``b")) assert(complexName2 === parseTableIdentifier(complexName2.quotedString)) } }
Example 88
Source File: ddl.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.types._ case class CreateTable( tableDesc: CatalogTable, mode: SaveMode, query: Option[LogicalPlan]) extends Command { assert(tableDesc.provider.isDefined, "The table to be created must have a provider.") if (query.isEmpty) { assert( mode == SaveMode.ErrorIfExists || mode == SaveMode.Ignore, "create table without data insertion can only use ErrorIfExists or Ignore as SaveMode.") } override def innerChildren: Seq[QueryPlan[_]] = query.toSeq } case class CreateTempViewUsing( tableIdent: TableIdentifier, userSpecifiedSchema: Option[StructType], replace: Boolean, global: Boolean, provider: String, options: Map[String, String]) extends RunnableCommand { if (tableIdent.database.isDefined) { throw new AnalysisException( s"Temporary view '$tableIdent' should not have specified a database") } override def argString: String = { s"[tableIdent:$tableIdent " + userSpecifiedSchema.map(_ + " ").getOrElse("") + s"replace:$replace " + s"provider:$provider " + CatalogUtils.maskCredentials(options) } def run(sparkSession: SparkSession): Seq[Row] = { val dataSource = DataSource( sparkSession, userSpecifiedSchema = userSpecifiedSchema, className = provider, options = options) val catalog = sparkSession.sessionState.catalog val viewDefinition = Dataset.ofRows( sparkSession, LogicalRelation(dataSource.resolveRelation())).logicalPlan if (global) { catalog.createGlobalTempView(tableIdent.table, viewDefinition, replace) } else { catalog.createTempView(tableIdent.table, viewDefinition, replace) } Seq.empty[Row] } } case class RefreshTable(tableIdent: TableIdentifier) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { // Refresh the given table's metadata. If this table is cached as an InMemoryRelation, // drop the original cached version and make the new version cached lazily. sparkSession.catalog.refreshTable(tableIdent.quotedString) Seq.empty[Row] } } case class RefreshResource(path: String) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { sparkSession.catalog.refreshByPath(path) Seq.empty[Row] } }
Example 89
Source File: cache.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import org.apache.spark.sql.{Dataset, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan case class CacheTableCommand( tableIdent: TableIdentifier, plan: Option[LogicalPlan], isLazy: Boolean) extends RunnableCommand { require(plan.isEmpty || tableIdent.database.isEmpty, "Database name is not allowed in CACHE TABLE AS SELECT") override protected def innerChildren: Seq[QueryPlan[_]] = { plan.toSeq } override def run(sparkSession: SparkSession): Seq[Row] = { plan.foreach { logicalPlan => Dataset.ofRows(sparkSession, logicalPlan).createTempView(tableIdent.quotedString) } sparkSession.catalog.cacheTable(tableIdent.quotedString) if (!isLazy) { // Performs eager caching sparkSession.table(tableIdent).count() } Seq.empty[Row] } } case class UncacheTableCommand( tableIdent: TableIdentifier, ifExists: Boolean) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val tableId = tableIdent.quotedString try { sparkSession.catalog.uncacheTable(tableId) } catch { case _: NoSuchTableException if ifExists => // don't throw } Seq.empty[Row] } } case object ClearCacheCommand extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { sparkSession.catalog.clearCache() Seq.empty[Row] } }
Example 90
Source File: hbaseCommands.scala From Heracles with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase.execution import org.apache.spark.annotation.DeveloperApi import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.hbase._ import org.apache.spark.sql.hbase.util.DataTypeUtils import org.apache.spark.sql.types._ import scala.collection.mutable.ArrayBuffer @DeveloperApi case class AlterDropColCommand(namespace: String, tableName: String, columnName: String) extends RunnableCommand { def run(sparkSession: SparkSession): Seq[Row] = { sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog] .alterTableDropNonKey(namespace, tableName, columnName) sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog].stopAdmin() Seq.empty[Row] } } @DeveloperApi case class AlterAddColCommand(namespace: String, tableName: String, colName: String, colType: String, colFamily: String, colQualifier: String) extends RunnableCommand { def run(sparkSession: SparkSession): Seq[Row] = { val hbaseCatalog = sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog] hbaseCatalog.alterTableAddNonKey(namespace, tableName, NonKeyColumn(colName, DataTypeUtils.getDataType(colType), colFamily, colQualifier)) hbaseCatalog.stopAdmin() Seq.empty[Row] } } @DeveloperApi case class InsertValueIntoTableCommand(tid: TableIdentifier, valueSeq: Seq[String]) extends RunnableCommand { override def run(sparkSession: SparkSession) = { val relation: HBaseRelation = sparkSession.sessionState.catalog.externalCatalog .asInstanceOf[HBaseCatalog] .getHBaseRelation(tid.database.getOrElse(null), tid.table).getOrElse(null) val bytes = valueSeq.zipWithIndex.map(v => DataTypeUtils.string2TypeData(v._1, relation.schema(v._2).dataType)) val rows = sparkSession.sparkContext.makeRDD(Seq(Row.fromSeq(bytes))) val inputValuesDF = sparkSession.createDataFrame(rows, relation.schema) relation.insert(inputValuesDF, overwrite = false) Seq.empty[Row] } override def output: Seq[Attribute] = Seq.empty }
Example 91
Source File: InsertIntoHiveDirCommand.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import scala.language.existentials import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.hive.common.FileUtils import org.apache.hadoop.hive.ql.plan.TableDesc import org.apache.hadoop.hive.serde.serdeConstants import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.apache.hadoop.mapred._ import org.apache.spark.SparkException import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable} import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.hive.client.HiveClientImpl case class InsertIntoHiveDirCommand( isLocal: Boolean, storage: CatalogStorageFormat, query: LogicalPlan, overwrite: Boolean, outputColumns: Seq[Attribute]) extends SaveAsHiveFile { override def run(sparkSession: SparkSession, child: SparkPlan): Seq[Row] = { assert(storage.locationUri.nonEmpty) val hiveTable = HiveClientImpl.toHiveTable(CatalogTable( identifier = TableIdentifier(storage.locationUri.get.toString, Some("default")), tableType = org.apache.spark.sql.catalyst.catalog.CatalogTableType.VIEW, storage = storage, schema = query.schema )) hiveTable.getMetadata.put(serdeConstants.SERIALIZATION_LIB, storage.serde.getOrElse(classOf[LazySimpleSerDe].getName)) val tableDesc = new TableDesc( hiveTable.getInputFormatClass, hiveTable.getOutputFormatClass, hiveTable.getMetadata ) val hadoopConf = sparkSession.sessionState.newHadoopConf() val jobConf = new JobConf(hadoopConf) val targetPath = new Path(storage.locationUri.get) val writeToPath = if (isLocal) { val localFileSystem = FileSystem.getLocal(jobConf) localFileSystem.makeQualified(targetPath) } else { val qualifiedPath = FileUtils.makeQualified(targetPath, hadoopConf) val dfs = qualifiedPath.getFileSystem(jobConf) if (!dfs.exists(qualifiedPath)) { dfs.mkdirs(qualifiedPath.getParent) } qualifiedPath } val tmpPath = getExternalTmpPath(sparkSession, hadoopConf, writeToPath) val fileSinkConf = new org.apache.spark.sql.hive.HiveShim.ShimFileSinkDesc( tmpPath.toString, tableDesc, false) try { saveAsHiveFile( sparkSession = sparkSession, plan = child, hadoopConf = hadoopConf, fileSinkConf = fileSinkConf, outputLocation = tmpPath.toString, allColumns = outputColumns) val fs = writeToPath.getFileSystem(hadoopConf) if (overwrite && fs.exists(writeToPath)) { fs.listStatus(writeToPath).foreach { existFile => if (Option(existFile.getPath) != createdTempDir) fs.delete(existFile.getPath, true) } } fs.listStatus(tmpPath).foreach { tmpFile => fs.rename(tmpFile.getPath, writeToPath) } } catch { case e: Throwable => throw new SparkException( "Failed inserting overwrite directory " + storage.locationUri.get, e) } finally { deleteExternalTmpPath(hadoopConf) } Seq.empty[Row] } }
Example 92
Source File: HiveExternalCatalogSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.hadoop.conf.Configuration import org.apache.spark.SparkConf import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.types.StructType class HiveExternalCatalogSuite extends ExternalCatalogSuite { private val externalCatalog: HiveExternalCatalog = { val catalog = new HiveExternalCatalog(new SparkConf, new Configuration) catalog.client.reset() catalog } protected override val utils: CatalogTestUtils = new CatalogTestUtils { override val tableInputFormat: String = "org.apache.hadoop.mapred.SequenceFileInputFormat" override val tableOutputFormat: String = "org.apache.hadoop.mapred.SequenceFileOutputFormat" override def newEmptyCatalog(): ExternalCatalog = externalCatalog override val defaultProvider: String = "hive" } protected override def resetState(): Unit = { externalCatalog.client.reset() } import utils._ test("SPARK-18647: do not put provider in table properties for Hive serde table") { val catalog = newBasicCatalog() val hiveTable = CatalogTable( identifier = TableIdentifier("hive_tbl", Some("db1")), tableType = CatalogTableType.MANAGED, storage = storageFormat, schema = new StructType().add("col1", "int").add("col2", "string"), provider = Some("hive")) catalog.createTable(hiveTable, ignoreIfExists = false) val rawTable = externalCatalog.client.getTable("db1", "hive_tbl") assert(!rawTable.properties.contains(HiveExternalCatalog.DATASOURCE_PROVIDER)) assert(DDLUtils.isHiveTable(externalCatalog.getTable("db1", "hive_tbl"))) } Seq("parquet", "hive").foreach { format => test(s"Partition columns should be put at the end of table schema for the format $format") { val catalog = newBasicCatalog() val newSchema = new StructType() .add("col1", "int") .add("col2", "string") .add("partCol1", "int") .add("partCol2", "string") val table = CatalogTable( identifier = TableIdentifier("tbl", Some("db1")), tableType = CatalogTableType.MANAGED, storage = CatalogStorageFormat.empty, schema = new StructType() .add("col1", "int") .add("partCol1", "int") .add("partCol2", "string") .add("col2", "string"), provider = Some(format), partitionColumnNames = Seq("partCol1", "partCol2")) catalog.createTable(table, ignoreIfExists = false) val restoredTable = externalCatalog.getTable("db1", "tbl") assert(restoredTable.schema == newSchema) } } test("SPARK-22306: alter table schema should not erase the bucketing metadata at hive side") { val catalog = newBasicCatalog() externalCatalog.client.runSqlHive( """ |CREATE TABLE db1.t(a string, b string) |CLUSTERED BY (a, b) SORTED BY (a, b) INTO 10 BUCKETS |STORED AS PARQUET """.stripMargin) val newSchema = new StructType().add("a", "string").add("b", "string").add("c", "string") catalog.alterTableDataSchema("db1", "t", newSchema) assert(catalog.getTable("db1", "t").schema == newSchema) val bucketString = externalCatalog.client.runSqlHive("DESC FORMATTED db1.t") .filter(_.contains("Num Buckets")).head assert(bucketString.contains("10")) } test("SPARK-23001: NullPointerException when running desc database") { val catalog = newBasicCatalog() catalog.createDatabase(newDb("dbWithNullDesc").copy(description = null), ignoreIfExists = false) assert(catalog.getDatabase("dbWithNullDesc").description == "") } }
Example 93
Source File: PruneFileSourcePartitionsSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions} import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.StructType class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("PruneFileSourcePartitions", Once, PruneFileSourcePartitions) :: Nil } test("PruneFileSourcePartitions should not change the output of LogicalRelation") { withTable("test") { withTempDir { dir => sql( s""" |CREATE EXTERNAL TABLE test(i int) |PARTITIONED BY (p int) |STORED AS parquet |LOCATION '${dir.toURI}'""".stripMargin) val tableMeta = spark.sharedState.externalCatalog.getTable("default", "test") val catalogFileIndex = new CatalogFileIndex(spark, tableMeta, 0) val dataSchema = StructType(tableMeta.schema.filterNot { f => tableMeta.partitionColumnNames.contains(f.name) }) val relation = HadoopFsRelation( location = catalogFileIndex, partitionSchema = tableMeta.partitionSchema, dataSchema = dataSchema, bucketSpec = None, fileFormat = new ParquetFileFormat(), options = Map.empty)(sparkSession = spark) val logicalRelation = LogicalRelation(relation, tableMeta) val query = Project(Seq('i, 'p), Filter('p === 1, logicalRelation)).analyze val optimized = Optimize.execute(query) assert(optimized.missingInput.isEmpty) } } } test("SPARK-20986 Reset table's statistics after PruneFileSourcePartitions rule") { withTable("tbl") { spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl") sql(s"ANALYZE TABLE tbl COMPUTE STATISTICS") val tableStats = spark.sessionState.catalog.getTableMetadata(TableIdentifier("tbl")).stats assert(tableStats.isDefined && tableStats.get.sizeInBytes > 0, "tableStats is lost") val df = sql("SELECT * FROM tbl WHERE p = 1") val sizes1 = df.queryExecution.analyzed.collect { case relation: LogicalRelation => relation.catalogTable.get.stats.get.sizeInBytes } assert(sizes1.size === 1, s"Size wrong for:\n ${df.queryExecution}") assert(sizes1(0) == tableStats.get.sizeInBytes) val relations = df.queryExecution.optimizedPlan.collect { case relation: LogicalRelation => relation } assert(relations.size === 1, s"Size wrong for:\n ${df.queryExecution}") val size2 = relations(0).stats.sizeInBytes assert(size2 == relations(0).catalogTable.get.stats.get.sizeInBytes) assert(size2 < tableStats.get.sizeInBytes) } } }
Example 94
Source File: ListTablesSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.QueryTest import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.hive.test.TestHiveSingleton class ListTablesSuite extends QueryTest with TestHiveSingleton with BeforeAndAfterAll { import hiveContext._ import hiveContext.implicits._ val df = sparkContext.parallelize((1 to 10).map(i => (i, s"str$i"))).toDF("key", "value") override def beforeAll(): Unit = { super.beforeAll() // The catalog in HiveContext is a case insensitive one. sessionState.catalog.createTempView( "ListTablesSuiteTable", df.logicalPlan, overrideIfExists = true) sql("CREATE TABLE HiveListTablesSuiteTable (key int, value string)") sql("CREATE DATABASE IF NOT EXISTS ListTablesSuiteDB") sql("CREATE TABLE ListTablesSuiteDB.HiveInDBListTablesSuiteTable (key int, value string)") } override def afterAll(): Unit = { try { sessionState.catalog.dropTable( TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true, purge = false) sql("DROP TABLE IF EXISTS HiveListTablesSuiteTable") sql("DROP TABLE IF EXISTS ListTablesSuiteDB.HiveInDBListTablesSuiteTable") sql("DROP DATABASE IF EXISTS ListTablesSuiteDB") } finally { super.afterAll() } } test("get all tables of current database") { Seq(tables(), sql("SHOW TABLes")).foreach { case allTables => // We are using default DB. checkAnswer( allTables.filter("tableName = 'listtablessuitetable'"), Row("", "listtablessuitetable", true)) checkAnswer( allTables.filter("tableName = 'hivelisttablessuitetable'"), Row("default", "hivelisttablessuitetable", false)) assert(allTables.filter("tableName = 'hiveindblisttablessuitetable'").count() === 0) } } test("getting all tables with a database name") { Seq(tables("listtablessuiteDb"), sql("SHOW TABLes in listTablesSuitedb")).foreach { case allTables => checkAnswer( allTables.filter("tableName = 'listtablessuitetable'"), Row("", "listtablessuitetable", true)) assert(allTables.filter("tableName = 'hivelisttablessuitetable'").count() === 0) checkAnswer( allTables.filter("tableName = 'hiveindblisttablessuitetable'"), Row("listtablessuitedb", "hiveindblisttablessuitetable", false)) } } }
Example 95
Source File: TableIdentifierParserSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.parser import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.TableIdentifier class TableIdentifierParserSuite extends SparkFunSuite { import CatalystSqlParser._ // Add "$elem$", "$value$" & "$key$" val hiveNonReservedKeyword = Array("add", "admin", "after", "analyze", "archive", "asc", "before", "bucket", "buckets", "cascade", "change", "cluster", "clustered", "clusterstatus", "collection", "columns", "comment", "compact", "compactions", "compute", "concatenate", "continue", "cost", "data", "day", "databases", "datetime", "dbproperties", "deferred", "defined", "delimited", "dependency", "desc", "directories", "directory", "disable", "distribute", "enable", "escaped", "exclusive", "explain", "export", "fields", "file", "fileformat", "first", "format", "formatted", "functions", "hold_ddltime", "hour", "idxproperties", "ignore", "index", "indexes", "inpath", "inputdriver", "inputformat", "items", "jar", "keys", "key_type", "last", "limit", "offset", "lines", "load", "location", "lock", "locks", "logical", "long", "mapjoin", "materialized", "metadata", "minus", "minute", "month", "msck", "noscan", "no_drop", "nulls", "offline", "option", "outputdriver", "outputformat", "overwrite", "owner", "partitioned", "partitions", "plus", "pretty", "principals", "protection", "purge", "read", "readonly", "rebuild", "recordreader", "recordwriter", "reload", "rename", "repair", "replace", "replication", "restrict", "rewrite", "role", "roles", "schemas", "second", "serde", "serdeproperties", "server", "sets", "shared", "show", "show_database", "skewed", "sort", "sorted", "ssl", "statistics", "stored", "streamtable", "string", "struct", "tables", "tblproperties", "temporary", "terminated", "tinyint", "touch", "transactions", "unarchive", "undo", "uniontype", "unlock", "unset", "unsigned", "uri", "use", "utc", "utctimestamp", "view", "while", "year", "work", "transaction", "write", "isolation", "level", "snapshot", "autocommit", "all", "alter", "array", "as", "authorization", "between", "bigint", "binary", "boolean", "both", "by", "create", "cube", "current_date", "current_timestamp", "cursor", "date", "decimal", "delete", "describe", "double", "drop", "exists", "external", "false", "fetch", "float", "for", "grant", "group", "grouping", "import", "in", "insert", "int", "into", "is", "lateral", "like", "local", "none", "null", "of", "order", "out", "outer", "partition", "percent", "procedure", "range", "reads", "revoke", "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger", "true", "truncate", "update", "user", "values", "with", "regexp", "rlike", "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float", "int", "smallint", "timestamp", "at", "position", "both", "leading", "trailing") val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right", "natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from", "where", "having", "from", "to", "table", "with", "not") test("table identifier") { // Regular names. assert(TableIdentifier("q") === parseTableIdentifier("q")) assert(TableIdentifier("q", Option("d")) === parseTableIdentifier("d.q")) // Illegal names. Seq("", "d.q.g", "t:", "${some.var.x}", "tab:1").foreach { identifier => intercept[ParseException](parseTableIdentifier(identifier)) } } test("quoted identifiers") { assert(TableIdentifier("z", Some("x.y")) === parseTableIdentifier("`x.y`.z")) assert(TableIdentifier("y.z", Some("x")) === parseTableIdentifier("x.`y.z`")) assert(TableIdentifier("z", Some("`x.y`")) === parseTableIdentifier("```x.y```.z")) assert(TableIdentifier("`y.z`", Some("x")) === parseTableIdentifier("x.```y.z```")) assert(TableIdentifier("x.y.z", None) === parseTableIdentifier("`x.y.z`")) } test("table identifier - strict keywords") { // SQL Keywords. hiveStrictNonReservedKeyword.foreach { keyword => assert(TableIdentifier(keyword) === parseTableIdentifier(keyword)) assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`")) assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`")) } } test("table identifier - non reserved keywords") { // Hive keywords are allowed. hiveNonReservedKeyword.foreach { nonReserved => assert(TableIdentifier(nonReserved) === parseTableIdentifier(nonReserved)) } } test("SPARK-17364 table identifier - contains number") { assert(parseTableIdentifier("123_") == TableIdentifier("123_")) assert(parseTableIdentifier("1a.123_") == TableIdentifier("123_", Some("1a"))) // ".123" should not be treated as token of type DECIMAL_VALUE assert(parseTableIdentifier("a.123A") == TableIdentifier("123A", Some("a"))) // ".123E3" should not be treated as token of type SCIENTIFIC_DECIMAL_VALUE assert(parseTableIdentifier("a.123E3_LIST") == TableIdentifier("123E3_LIST", Some("a"))) // ".123D" should not be treated as token of type DOUBLE_LITERAL assert(parseTableIdentifier("a.123D_LIST") == TableIdentifier("123D_LIST", Some("a"))) // ".123BD" should not be treated as token of type BIGDECIMAL_LITERAL assert(parseTableIdentifier("a.123BD_LIST") == TableIdentifier("123BD_LIST", Some("a"))) } test("SPARK-17832 table identifier - contains backtick") { val complexName = TableIdentifier("`weird`table`name", Some("`d`b`1")) assert(complexName === parseTableIdentifier("```d``b``1`.```weird``table``name`")) assert(complexName === parseTableIdentifier(complexName.quotedString)) intercept[ParseException](parseTableIdentifier(complexName.unquotedString)) // Table identifier contains countious backticks should be treated correctly. val complexName2 = TableIdentifier("x``y", Some("d``b")) assert(complexName2 === parseTableIdentifier(complexName2.quotedString)) } }
Example 96
Source File: CatalogSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.types.StructType class CatalogSuite extends AnalysisTest { test("desc table when owner is set to null") { val table = CatalogTable( identifier = TableIdentifier("tbl", Some("db1")), tableType = CatalogTableType.MANAGED, storage = CatalogStorageFormat.empty, owner = null, schema = new StructType().add("col1", "int").add("col2", "string"), provider = Some("parquet")) table.toLinkedHashMap } }
Example 97
Source File: ddl.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import java.util.Locale import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils} import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.{DDLUtils, RunnableCommand} import org.apache.spark.sql.types._ case class CreateTempViewUsing( tableIdent: TableIdentifier, userSpecifiedSchema: Option[StructType], replace: Boolean, global: Boolean, provider: String, options: Map[String, String]) extends RunnableCommand { if (tableIdent.database.isDefined) { throw new AnalysisException( s"Temporary view '$tableIdent' should not have specified a database") } override def argString: String = { s"[tableIdent:$tableIdent " + userSpecifiedSchema.map(_ + " ").getOrElse("") + s"replace:$replace " + s"provider:$provider " + CatalogUtils.maskCredentials(options) } override def run(sparkSession: SparkSession): Seq[Row] = { if (provider.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) { throw new AnalysisException("Hive data source can only be used with tables, " + "you can't use it with CREATE TEMP VIEW USING") } val dataSource = DataSource( sparkSession, userSpecifiedSchema = userSpecifiedSchema, className = provider, options = options) val catalog = sparkSession.sessionState.catalog val viewDefinition = Dataset.ofRows( sparkSession, LogicalRelation(dataSource.resolveRelation())).logicalPlan if (global) { catalog.createGlobalTempView(tableIdent.table, viewDefinition, replace) } else { catalog.createTempView(tableIdent.table, viewDefinition, replace) } Seq.empty[Row] } } case class RefreshTable(tableIdent: TableIdentifier) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { // Refresh the given table's metadata. If this table is cached as an InMemoryRelation, // drop the original cached version and make the new version cached lazily. sparkSession.catalog.refreshTable(tableIdent.quotedString) Seq.empty[Row] } } case class RefreshResource(path: String) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { sparkSession.catalog.refreshByPath(path) Seq.empty[Row] } }
Example 98
Source File: cache.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import org.apache.spark.sql.{Dataset, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan case class CacheTableCommand( tableIdent: TableIdentifier, plan: Option[LogicalPlan], isLazy: Boolean) extends RunnableCommand { require(plan.isEmpty || tableIdent.database.isEmpty, "Database name is not allowed in CACHE TABLE AS SELECT") override protected def innerChildren: Seq[QueryPlan[_]] = plan.toSeq override def run(sparkSession: SparkSession): Seq[Row] = { plan.foreach { logicalPlan => Dataset.ofRows(sparkSession, logicalPlan).createTempView(tableIdent.quotedString) } sparkSession.catalog.cacheTable(tableIdent.quotedString) if (!isLazy) { // Performs eager caching sparkSession.table(tableIdent).count() } Seq.empty[Row] } } case class UncacheTableCommand( tableIdent: TableIdentifier, ifExists: Boolean) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val tableId = tableIdent.quotedString if (!ifExists || sparkSession.catalog.tableExists(tableId)) { sparkSession.catalog.uncacheTable(tableId) } Seq.empty[Row] } } override def makeCopy(newArgs: Array[AnyRef]): ClearCacheCommand = ClearCacheCommand() }
Example 99
Source File: AnalyzeTableCommand.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import org.apache.spark.sql.{AnalysisException, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogTableType case class AnalyzeTableCommand( tableIdent: TableIdentifier, noscan: Boolean = true) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val sessionState = sparkSession.sessionState val db = tableIdent.database.getOrElse(sessionState.catalog.getCurrentDatabase) val tableIdentWithDB = TableIdentifier(tableIdent.table, Some(db)) val tableMeta = sessionState.catalog.getTableMetadata(tableIdentWithDB) if (tableMeta.tableType == CatalogTableType.VIEW) { throw new AnalysisException("ANALYZE TABLE is not supported on views.") } // Compute stats for the whole table val newTotalSize = CommandUtils.calculateTotalSize(sessionState, tableMeta) val newRowCount = if (noscan) None else Some(BigInt(sparkSession.table(tableIdentWithDB).count())) // Update the metastore if the above statistics of the table are different from those // recorded in the metastore. val newStats = CommandUtils.compareAndGetNewStats(tableMeta.stats, newTotalSize, newRowCount) if (newStats.isDefined) { sessionState.catalog.alterTableStats(tableIdentWithDB, newStats) } Seq.empty[Row] } }
Example 100
Source File: SparkSchemaProvider.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.data import com.typesafe.scalalogging.LazyLogging import org.apache.spark.sql.{ DataFrame, SaveMode } import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.analysis.{ UnresolvedRelation, NoSuchDatabaseException } import org.apache.spark.sql.execution.command.{ DropTableCommand, CreateDatabaseCommand } import mimir.Database import mimir.algebra._ import mimir.exec.spark.{MimirSpark, RAToSpark, RowIndexPlan} class SparkSchemaProvider(db: Database) extends LogicalPlanSchemaProvider with MaterializedTableProvider with LazyLogging { def listTables(): Seq[ID] = { try { val tables = MimirSpark.get.sparkSession .catalog .listTables( table.id) .collect() .map { col => ( ID(col.name), RAToSpark.getMimirType( RAToSpark.dataTypeFromHiveDataTypeString(col.dataType)) ) } ) } else { logger.trace(s"$table doesn't exist") None } } catch { case _:NoSuchDatabaseException => { logger.warn("Couldn't find database!!! ($sparkDBName)") None } } } def logicalplan(table: ID): LogicalPlan = { RowIndexPlan( UnresolvedRelation(TableIdentifier(table.id)), tableSchema(table).get ).getPlan(db) } def createStoredTableAs(data: DataFrame, name: ID) { data.persist() .createOrReplaceTempView(name.id) data.write .mode(SaveMode.Overwrite) .saveAsTable(name.id) } def dropStoredTable(name: ID) { DropTableCommand( TableIdentifier(name.id, None),//Option(sparkDBName)), true, false, true ).run(MimirSpark.get.sparkSession) } }
Example 101
Source File: CreateViewAsSelect.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.hive.{HiveMetastoreTypes, HiveContext} import org.apache.spark.sql.{AnalysisException, Row, SQLContext} import org.apache.spark.sql.execution.RunnableCommand import org.apache.spark.sql.hive.client.{HiveColumn, HiveTable} // TODO: Note that this class can NOT canonicalize the view SQL string entirely, which is different // from Hive and may not work for some cases like create view on self join. private[hive] case class CreateViewAsSelect( tableDesc: HiveTable, childSchema: Seq[Attribute], allowExisting: Boolean, orReplace: Boolean) extends RunnableCommand { assert(tableDesc.schema == Nil || tableDesc.schema.length == childSchema.length) assert(tableDesc.viewText.isDefined) val tableIdentifier = TableIdentifier(tableDesc.name, Some(tableDesc.database)) override def run(sqlContext: SQLContext): Seq[Row] = { val hiveContext = sqlContext.asInstanceOf[HiveContext] if (hiveContext.catalog.tableExists(tableIdentifier)) { if (allowExisting) { // view already exists, will do nothing, to keep consistent with Hive } else if (orReplace) { hiveContext.catalog.client.alertView(prepareTable()) } else { throw new AnalysisException(s"View $tableIdentifier already exists. " + "If you want to update the view definition, please use ALTER VIEW AS or " + "CREATE OR REPLACE VIEW AS") } } else { hiveContext.catalog.client.createView(prepareTable()) } Seq.empty[Row] } private def prepareTable(): HiveTable = { // setup column types according to the schema of child. val schema = if (tableDesc.schema == Nil) { childSchema.map { attr => HiveColumn(attr.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), null) } } else { childSchema.zip(tableDesc.schema).map { case (attr, col) => HiveColumn(col.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), col.comment) } } val columnNames = childSchema.map(f => verbose(f.name)) // When user specified column names for view, we should create a project to do the renaming. // When no column name specified, we still need to create a project to declare the columns // we need, to make us more robust to top level `*`s. val projectList = if (tableDesc.schema == Nil) { columnNames.mkString(", ") } else { columnNames.zip(tableDesc.schema.map(f => verbose(f.name))).map { case (name, alias) => s"$name AS $alias" }.mkString(", ") } val viewName = verbose(tableDesc.name) val expandedText = s"SELECT $projectList FROM (${tableDesc.viewText.get}) $viewName" tableDesc.copy(schema = schema, viewText = Some(expandedText)) } // escape backtick with double-backtick in column name and wrap it with backtick. private def verbose(name: String) = s"`${name.replaceAll("`", "``")}`" }
Example 102
Source File: CreateTableAsSelect.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan} import org.apache.spark.sql.execution.RunnableCommand import org.apache.spark.sql.hive.client.{HiveColumn, HiveTable} import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes, MetastoreRelation} import org.apache.spark.sql.{AnalysisException, Row, SQLContext} private[hive] case class CreateTableAsSelect( tableDesc: HiveTable, query: LogicalPlan, allowExisting: Boolean) extends RunnableCommand { val tableIdentifier = TableIdentifier(tableDesc.name, Some(tableDesc.database)) override def children: Seq[LogicalPlan] = Seq(query) override def run(sqlContext: SQLContext): Seq[Row] = { val hiveContext = sqlContext.asInstanceOf[HiveContext] lazy val metastoreRelation: MetastoreRelation = { import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.apache.hadoop.io.Text import org.apache.hadoop.mapred.TextInputFormat val withFormat = tableDesc.copy( inputFormat = tableDesc.inputFormat.orElse(Some(classOf[TextInputFormat].getName)), outputFormat = tableDesc.outputFormat .orElse(Some(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]].getName)), serde = tableDesc.serde.orElse(Some(classOf[LazySimpleSerDe].getName()))) val withSchema = if (withFormat.schema.isEmpty) { // Hive doesn't support specifying the column list for target table in CTAS // However we don't think SparkSQL should follow that. tableDesc.copy(schema = query.output.map(c => HiveColumn(c.name, HiveMetastoreTypes.toMetastoreType(c.dataType), null))) } else { withFormat } hiveContext.catalog.client.createTable(withSchema) // Get the Metastore Relation hiveContext.catalog.lookupRelation(tableIdentifier, None) match { case r: MetastoreRelation => r } } // TODO ideally, we should get the output data ready first and then // add the relation into catalog, just in case of failure occurs while data // processing. if (hiveContext.catalog.tableExists(tableIdentifier)) { if (allowExisting) { // table already exists, will do nothing, to keep consistent with Hive } else { throw new AnalysisException(s"$tableIdentifier already exists.") } } else { hiveContext.executePlan(InsertIntoTable(metastoreRelation, Map(), query, true, false)).toRdd } Seq.empty[Row] } override def argString: String = { s"[Database:${tableDesc.database}}, TableName: ${tableDesc.name}, InsertIntoHiveTable]" } }
Example 103
Source File: ListTablesSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.QueryTest import org.apache.spark.sql.Row class ListTablesSuite extends QueryTest with TestHiveSingleton with BeforeAndAfterAll { import hiveContext._ import hiveContext.implicits._ val df = sparkContext.parallelize((1 to 10).map(i => (i, s"str$i"))).toDF("key", "value") override def beforeAll(): Unit = { // The catalog in HiveContext is a case insensitive one. catalog.registerTable(TableIdentifier("ListTablesSuiteTable"), df.logicalPlan) sql("CREATE TABLE HiveListTablesSuiteTable (key int, value string)") sql("CREATE DATABASE IF NOT EXISTS ListTablesSuiteDB") sql("CREATE TABLE ListTablesSuiteDB.HiveInDBListTablesSuiteTable (key int, value string)") } override def afterAll(): Unit = { catalog.unregisterTable(TableIdentifier("ListTablesSuiteTable")) sql("DROP TABLE IF EXISTS HiveListTablesSuiteTable") sql("DROP TABLE IF EXISTS ListTablesSuiteDB.HiveInDBListTablesSuiteTable") sql("DROP DATABASE IF EXISTS ListTablesSuiteDB") } test("get all tables of current database") { Seq(tables(), sql("SHOW TABLes")).foreach { case allTables => // We are using default DB. checkAnswer( allTables.filter("tableName = 'listtablessuitetable'"), Row("listtablessuitetable", true)) checkAnswer( allTables.filter("tableName = 'hivelisttablessuitetable'"), Row("hivelisttablessuitetable", false)) assert(allTables.filter("tableName = 'hiveindblisttablessuitetable'").count() === 0) } } test("getting all tables with a database name") { Seq(tables("listtablessuiteDb"), sql("SHOW TABLes in listTablesSuitedb")).foreach { case allTables => checkAnswer( allTables.filter("tableName = 'listtablessuitetable'"), Row("listtablessuitetable", true)) assert(allTables.filter("tableName = 'hivelisttablessuitetable'").count() === 0) checkAnswer( allTables.filter("tableName = 'hiveindblisttablessuitetable'"), Row("hiveindblisttablessuitetable", false)) } } }
Example 104
Source File: AnalysisTest.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.{TableIdentifier, SimpleCatalystConf} trait AnalysisTest extends PlanTest { val (caseSensitiveAnalyzer, caseInsensitiveAnalyzer) = { val caseSensitiveConf = new SimpleCatalystConf(true) val caseInsensitiveConf = new SimpleCatalystConf(false) val caseSensitiveCatalog = new SimpleCatalog(caseSensitiveConf) val caseInsensitiveCatalog = new SimpleCatalog(caseInsensitiveConf) caseSensitiveCatalog.registerTable(TableIdentifier("TaBlE"), TestRelations.testRelation) caseInsensitiveCatalog.registerTable(TableIdentifier("TaBlE"), TestRelations.testRelation) new Analyzer(caseSensitiveCatalog, EmptyFunctionRegistry, caseSensitiveConf) { override val extendedResolutionRules = EliminateSubQueries :: Nil } -> new Analyzer(caseInsensitiveCatalog, EmptyFunctionRegistry, caseInsensitiveConf) { override val extendedResolutionRules = EliminateSubQueries :: Nil } } protected def getAnalyzer(caseSensitive: Boolean) = { if (caseSensitive) caseSensitiveAnalyzer else caseInsensitiveAnalyzer } protected def checkAnalysis( inputPlan: LogicalPlan, expectedPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val actualPlan = analyzer.execute(inputPlan) analyzer.checkAnalysis(actualPlan) comparePlans(actualPlan, expectedPlan) } protected def assertAnalysisSuccess( inputPlan: LogicalPlan, caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) analyzer.checkAnalysis(analyzer.execute(inputPlan)) } protected def assertAnalysisError( inputPlan: LogicalPlan, expectedErrors: Seq[String], caseSensitive: Boolean = true): Unit = { val analyzer = getAnalyzer(caseSensitive) val e = intercept[AnalysisException] { analyzer.checkAnalysis(analyzer.execute(inputPlan)) } assert(expectedErrors.map(_.toLowerCase).forall(e.getMessage.toLowerCase.contains), s"Expected to throw Exception contains: ${expectedErrors.mkString(", ")}, " + s"actually we get ${e.getMessage}") } }
Example 105
Source File: ListTablesSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.scalatest.BeforeAndAfter import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types.{BooleanType, StringType, StructField, StructType} import org.apache.spark.sql.catalyst.TableIdentifier class ListTablesSuite extends QueryTest with BeforeAndAfter with SharedSQLContext { import testImplicits._ private lazy val df = (1 to 10).map(i => (i, s"str$i")).toDF("key", "value") before { df.registerTempTable("ListTablesSuiteTable") } after { sqlContext.catalog.unregisterTable(TableIdentifier("ListTablesSuiteTable")) } test("get all tables") { checkAnswer( sqlContext.tables().filter("tableName = 'ListTablesSuiteTable'"), Row("ListTablesSuiteTable", true)) checkAnswer( sql("SHOW tables").filter("tableName = 'ListTablesSuiteTable'"), Row("ListTablesSuiteTable", true)) sqlContext.catalog.unregisterTable(TableIdentifier("ListTablesSuiteTable")) assert(sqlContext.tables().filter("tableName = 'ListTablesSuiteTable'").count() === 0) } test("getting all Tables with a database name has no impact on returned table names") { checkAnswer( sqlContext.tables("DB").filter("tableName = 'ListTablesSuiteTable'"), Row("ListTablesSuiteTable", true)) checkAnswer( sql("show TABLES in DB").filter("tableName = 'ListTablesSuiteTable'"), Row("ListTablesSuiteTable", true)) sqlContext.catalog.unregisterTable(TableIdentifier("ListTablesSuiteTable")) assert(sqlContext.tables().filter("tableName = 'ListTablesSuiteTable'").count() === 0) } test("query the returned DataFrame of tables") { val expectedSchema = StructType( StructField("tableName", StringType, false) :: StructField("isTemporary", BooleanType, false) :: Nil) Seq(sqlContext.tables(), sql("SHOW TABLes")).foreach { case tableDF => assert(expectedSchema === tableDF.schema) tableDF.registerTempTable("tables") checkAnswer( sql( "SELECT isTemporary, tableName from tables WHERE tableName = 'ListTablesSuiteTable'"), Row(true, "ListTablesSuiteTable") ) checkAnswer( sqlContext.tables().filter("tableName = 'tables'").select("tableName", "isTemporary"), Row("tables", true)) sqlContext.dropTempTable("tables") } } }
Example 106
Source File: CarbonCountStar.scala From carbondata with Apache License 2.0 | 4 votes |
package org.apache.spark.sql import scala.collection.JavaConverters._ import org.apache.hadoop.fs.Path import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapreduce.Job import org.apache.hadoop.mapreduce.lib.input.FileInputFormat import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.execution.LeafExecNode import org.apache.spark.sql.optimizer.CarbonFilters import org.apache.spark.sql.types.StringType import org.apache.spark.unsafe.types.UTF8String import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier import org.apache.carbondata.core.metadata.schema.table.CarbonTable import org.apache.carbondata.core.mutate.CarbonUpdateUtil import org.apache.carbondata.core.statusmanager.StageInputCollector import org.apache.carbondata.core.util.{CarbonProperties, ThreadLocalSessionInfo} import org.apache.carbondata.hadoop.api.{CarbonInputFormat, CarbonTableInputFormat} import org.apache.carbondata.hadoop.util.CarbonInputFormatUtil import org.apache.carbondata.spark.load.DataLoadProcessBuilderOnSpark case class CarbonCountStar( attributesRaw: Seq[Attribute], carbonTable: CarbonTable, sparkSession: SparkSession, outUnsafeRows: Boolean = true) extends LeafExecNode { override def doExecute(): RDD[InternalRow] = { ThreadLocalSessionInfo .setConfigurationToCurrentThread(sparkSession.sessionState.newHadoopConf()) val absoluteTableIdentifier = carbonTable.getAbsoluteTableIdentifier val (job, tableInputFormat) = createCarbonInputFormat(absoluteTableIdentifier) CarbonInputFormat.setQuerySegment(job.getConfiguration, carbonTable) // get row count var rowCount = CarbonUpdateUtil.getRowCount( tableInputFormat.getBlockRowCount( job, carbonTable, CarbonFilters.getPartitions( Seq.empty, sparkSession, TableIdentifier( carbonTable.getTableName, Some(carbonTable.getDatabaseName))).map(_.asJava).orNull, false), carbonTable) if (CarbonProperties.isQueryStageInputEnabled) { // check for number of row for stage input val splits = StageInputCollector.createInputSplits(carbonTable, job.getConfiguration) if (!splits.isEmpty) { val df = DataLoadProcessBuilderOnSpark.createInputDataFrame( sparkSession, carbonTable, splits.asScala) rowCount += df.count() } } val valueRaw = attributesRaw.head.dataType match { case StringType => Seq(UTF8String.fromString(Long.box(rowCount).toString)).toArray .asInstanceOf[Array[Any]] case _ => Seq(Long.box(rowCount)).toArray.asInstanceOf[Array[Any]] } val value = new GenericInternalRow(valueRaw) val unsafeProjection = UnsafeProjection.create(output.map(_.dataType).toArray) val row = if (outUnsafeRows) unsafeProjection(value) else value sparkContext.parallelize(Seq(row)) } override def output: Seq[Attribute] = { attributesRaw } private def createCarbonInputFormat(absoluteTableIdentifier: AbsoluteTableIdentifier ): (Job, CarbonTableInputFormat[Array[Object]]) = { val carbonInputFormat = new CarbonTableInputFormat[Array[Object]]() val jobConf: JobConf = new JobConf(FileFactory.getConfiguration) SparkHadoopUtil.get.addCredentials(jobConf) CarbonInputFormat.setTableInfo(jobConf, carbonTable.getTableInfo) val job = new Job(jobConf) FileInputFormat.addInputPath(job, new Path(absoluteTableIdentifier.getTablePath)) CarbonInputFormat .setTransactionalTable(job.getConfiguration, carbonTable.getTableInfo.isTransactionalTable) CarbonInputFormatUtil.setIndexJobIfConfigured(job.getConfiguration) (job, carbonInputFormat) } }