org.apache.spark.sql.catalyst.TableIdentifier Scala Examples

The following examples show how to use org.apache.spark.sql.catalyst.TableIdentifier. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: MetastoreRelationSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive

import org.apache.spark.sql.{QueryTest, Row}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.sql.types.{IntegerType, StructField, StructType}

class MetastoreRelationSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
  test("makeCopy and toJSON should work") {
    val table = CatalogTable(
      identifier = TableIdentifier("test", Some("db")),
      tableType = CatalogTableType.VIEW,
      storage = CatalogStorageFormat.empty,
      schema = StructType(StructField("a", IntegerType, true) :: Nil))
    val relation = MetastoreRelation("db", "test")(table, null)

    // No exception should be thrown
    relation.makeCopy(Array("db", "test"))
    // No exception should be thrown
    relation.toJSON
  }

  test("SPARK-17409: Do Not Optimize Query in CTAS (Hive Serde Table) More Than Once") {
    withTable("bar") {
      withTempView("foo") {
        sql("select 0 as id").createOrReplaceTempView("foo")
        // If we optimize the query in CTAS more than once, the following saveAsTable will fail
        // with the error: `GROUP BY position 0 is not in select list (valid range is [1, 1])`
        sql("CREATE TABLE bar AS SELECT * FROM foo group by id")
        checkAnswer(spark.table("bar"), Row(0) :: Nil)
        val tableMetadata = spark.sessionState.catalog.getTableMetadata(TableIdentifier("bar"))
        assert(tableMetadata.provider == Some("hive"), "the expected table is a Hive serde table")
      }
    }
  }
} 
Example 2
Source File: ListTablesSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive

import org.scalatest.BeforeAndAfterAll

import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.hive.test.TestHiveSingleton

class ListTablesSuite extends QueryTest with TestHiveSingleton with BeforeAndAfterAll {
  import hiveContext._
  import hiveContext.implicits._

  val df = sparkContext.parallelize((1 to 10).map(i => (i, s"str$i"))).toDF("key", "value")

  override def beforeAll(): Unit = {
    super.beforeAll()
    // The catalog in HiveContext is a case insensitive one.
    sessionState.catalog.createTempView(
      "ListTablesSuiteTable", df.logicalPlan, overrideIfExists = true)
    sql("CREATE TABLE HiveListTablesSuiteTable (key int, value string)")
    sql("CREATE DATABASE IF NOT EXISTS ListTablesSuiteDB")
    sql("CREATE TABLE ListTablesSuiteDB.HiveInDBListTablesSuiteTable (key int, value string)")
  }

  override def afterAll(): Unit = {
    try {
      sessionState.catalog.dropTable(
        TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true, purge = false)
      sql("DROP TABLE IF EXISTS HiveListTablesSuiteTable")
      sql("DROP TABLE IF EXISTS ListTablesSuiteDB.HiveInDBListTablesSuiteTable")
      sql("DROP DATABASE IF EXISTS ListTablesSuiteDB")
    } finally {
      super.afterAll()
    }
  }

  test("get all tables of current database") {
    Seq(tables(), sql("SHOW TABLes")).foreach {
      case allTables =>
        // We are using default DB.
        checkAnswer(
          allTables.filter("tableName = 'listtablessuitetable'"),
          Row("", "listtablessuitetable", true))
        checkAnswer(
          allTables.filter("tableName = 'hivelisttablessuitetable'"),
          Row("default", "hivelisttablessuitetable", false))
        assert(allTables.filter("tableName = 'hiveindblisttablessuitetable'").count() === 0)
    }
  }

  test("getting all tables with a database name") {
    Seq(tables("listtablessuiteDb"), sql("SHOW TABLes in listTablesSuitedb")).foreach {
      case allTables =>
        checkAnswer(
          allTables.filter("tableName = 'listtablessuitetable'"),
          Row("", "listtablessuitetable", true))
        assert(allTables.filter("tableName = 'hivelisttablessuitetable'").count() === 0)
        checkAnswer(
          allTables.filter("tableName = 'hiveindblisttablessuitetable'"),
          Row("listtablessuitedb", "hiveindblisttablessuitetable", false))
    }
  }
} 
Example 3
Source File: TableIdentifierParserSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.parser

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.TableIdentifier

class TableIdentifierParserSuite extends SparkFunSuite {
  import CatalystSqlParser._

  // Add "$elem$", "$value$" & "$key$"
  val hiveNonReservedKeyword = Array("add", "admin", "after", "analyze", "archive", "asc", "before",
    "bucket", "buckets", "cascade", "change", "cluster", "clustered", "clusterstatus", "collection",
    "columns", "comment", "compact", "compactions", "compute", "concatenate", "continue", "data",
    "day", "databases", "datetime", "dbproperties", "deferred", "defined", "delimited",
    "dependency", "desc", "directories", "directory", "disable", "distribute",
    "enable", "escaped", "exclusive", "explain", "export", "fields", "file", "fileformat", "first",
    "format", "formatted", "functions", "hold_ddltime", "hour", "idxproperties", "ignore", "index",
    "indexes", "inpath", "inputdriver", "inputformat", "items", "jar", "keys", "key_type", "last",
    "limit", "offset", "lines", "load", "location", "lock", "locks", "logical", "long", "mapjoin",
    "materialized", "metadata", "minus", "minute", "month", "msck", "noscan", "no_drop", "nulls",
    "offline", "option", "outputdriver", "outputformat", "overwrite", "owner", "partitioned",
    "partitions", "plus", "pretty", "principals", "protection", "purge", "read", "readonly",
    "rebuild", "recordreader", "recordwriter", "reload", "rename", "repair", "replace",
    "replication", "restrict", "rewrite", "role", "roles", "schemas", "second",
    "serde", "serdeproperties", "server", "sets", "shared", "show", "show_database", "skewed",
    "sort", "sorted", "ssl", "statistics", "stored", "streamtable", "string", "struct", "tables",
    "tblproperties", "temporary", "terminated", "tinyint", "touch", "transactions", "unarchive",
    "undo", "uniontype", "unlock", "unset", "unsigned", "uri", "use", "utc", "utctimestamp",
    "view", "while", "year", "work", "transaction", "write", "isolation", "level",
    "snapshot", "autocommit", "all", "alter", "array", "as", "authorization", "between", "bigint",
    "binary", "boolean", "both", "by", "create", "cube", "current_date", "current_timestamp",
    "cursor", "date", "decimal", "delete", "describe", "double", "drop", "exists", "external",
    "false", "fetch", "float", "for", "grant", "group", "grouping", "import", "in",
    "insert", "int", "into", "is", "lateral", "like", "local", "none", "null",
    "of", "order", "out", "outer", "partition", "percent", "procedure", "range", "reads", "revoke",
    "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger",
    "true", "truncate", "update", "user", "using", "values", "with", "regexp", "rlike",
    "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float",
    "int", "smallint", "timestamp", "at")

  val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right",
    "natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from",
    "where", "having", "from", "to", "table", "with", "not")

  test("table identifier") {
    // Regular names.
    assert(TableIdentifier("q") === parseTableIdentifier("q"))
    assert(TableIdentifier("q", Option("d")) === parseTableIdentifier("d.q"))

    // Illegal names.
    Seq("", "d.q.g", "t:", "${some.var.x}", "tab:1").foreach { identifier =>
      intercept[ParseException](parseTableIdentifier(identifier))
    }
  }

  test("quoted identifiers") {
    assert(TableIdentifier("z", Some("x.y")) === parseTableIdentifier("`x.y`.z"))
    assert(TableIdentifier("y.z", Some("x")) === parseTableIdentifier("x.`y.z`"))
    assert(TableIdentifier("z", Some("`x.y`")) === parseTableIdentifier("```x.y```.z"))
    assert(TableIdentifier("`y.z`", Some("x")) === parseTableIdentifier("x.```y.z```"))
    assert(TableIdentifier("x.y.z", None) === parseTableIdentifier("`x.y.z`"))
  }

  test("table identifier - strict keywords") {
    // SQL Keywords.
    hiveStrictNonReservedKeyword.foreach { keyword =>
      assert(TableIdentifier(keyword) === parseTableIdentifier(keyword))
      assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`"))
      assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`"))
    }
  }

  test("table identifier - non reserved keywords") {
    // Hive keywords are allowed.
    hiveNonReservedKeyword.foreach { nonReserved =>
      assert(TableIdentifier(nonReserved) === parseTableIdentifier(nonReserved))
    }
  }

  test("SPARK-17364 table identifier - contains number") {
    assert(parseTableIdentifier("123_") == TableIdentifier("123_"))
    assert(parseTableIdentifier("1a.123_") == TableIdentifier("123_", Some("1a")))
    // ".123" should not be treated as token of type DECIMAL_VALUE
    assert(parseTableIdentifier("a.123A") == TableIdentifier("123A", Some("a")))
    // ".123E3" should not be treated as token of type SCIENTIFIC_DECIMAL_VALUE
    assert(parseTableIdentifier("a.123E3_LIST") == TableIdentifier("123E3_LIST", Some("a")))
    // ".123D" should not be treated as token of type DOUBLE_LITERAL
    assert(parseTableIdentifier("a.123D_LIST") == TableIdentifier("123D_LIST", Some("a")))
    // ".123BD" should not be treated as token of type BIGDECIMAL_LITERAL
    assert(parseTableIdentifier("a.123BD_LIST") == TableIdentifier("123BD_LIST", Some("a")))
  }

  test("SPARK-17832 table identifier - contains backtick") {
    val complexName = TableIdentifier("`weird`table`name", Some("`d`b`1"))
    assert(complexName === parseTableIdentifier("```d``b``1`.```weird``table``name`"))
    assert(complexName === parseTableIdentifier(complexName.quotedString))
    intercept[ParseException](parseTableIdentifier(complexName.unquotedString))
    // Table identifier contains countious backticks should be treated correctly.
    val complexName2 = TableIdentifier("x``y", Some("d``b"))
    assert(complexName2 === parseTableIdentifier(complexName2.quotedString))
  }
} 
Example 4
Source File: ddl.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.CatalogTable
import org.apache.spark.sql.catalyst.plans.QueryPlan
import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan}
import org.apache.spark.sql.execution.command.RunnableCommand
import org.apache.spark.sql.types._

case class CreateTable(
    tableDesc: CatalogTable,
    mode: SaveMode,
    query: Option[LogicalPlan]) extends Command {
  assert(tableDesc.provider.isDefined, "The table to be created must have a provider.")

  if (query.isEmpty) {
    assert(
      mode == SaveMode.ErrorIfExists || mode == SaveMode.Ignore,
      "create table without data insertion can only use ErrorIfExists or Ignore as SaveMode.")
  }

  override def innerChildren: Seq[QueryPlan[_]] = query.toSeq
}


class CaseInsensitiveMap(map: Map[String, String]) extends Map[String, String]
  with Serializable {

  val baseMap = map.map(kv => kv.copy(_1 = kv._1.toLowerCase))

  override def get(k: String): Option[String] = baseMap.get(k.toLowerCase)

  override def + [B1 >: String](kv: (String, B1)): Map[String, B1] =
    baseMap + kv.copy(_1 = kv._1.toLowerCase)

  override def iterator: Iterator[(String, String)] = baseMap.iterator

  override def -(key: String): Map[String, String] = baseMap - key.toLowerCase
} 
Example 5
Source File: cache.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command

import org.apache.spark.sql.{Dataset, Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.plans.QueryPlan
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan

case class CacheTableCommand(
    tableIdent: TableIdentifier,
    plan: Option[LogicalPlan],
    isLazy: Boolean) extends RunnableCommand {
  require(plan.isEmpty || tableIdent.database.isEmpty,
    "Database name is not allowed in CACHE TABLE AS SELECT")

  override protected def innerChildren: Seq[QueryPlan[_]] = {
    plan.toSeq
  }

  override def run(sparkSession: SparkSession): Seq[Row] = {
    plan.foreach { logicalPlan =>
      Dataset.ofRows(sparkSession, logicalPlan).createTempView(tableIdent.quotedString)
    }
    sparkSession.catalog.cacheTable(tableIdent.quotedString)

    if (!isLazy) {
      // Performs eager caching
      sparkSession.table(tableIdent).count()
    }

    Seq.empty[Row]
  }
}


case class UncacheTableCommand(tableIdent: TableIdentifier) extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    sparkSession.catalog.uncacheTable(tableIdent.quotedString)
    Seq.empty[Row]
  }
}


case object ClearCacheCommand extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    sparkSession.catalog.clearCache()
    Seq.empty[Row]
  }
} 
Example 6
Source File: rules.scala    From tispark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.extensions

import com.pingcap.tispark.statistics.StatisticsManager
import com.pingcap.tispark.utils.ReflectionUtil._
import com.pingcap.tispark.{MetaManager, TiDBRelation, TiTableReference}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, UnresolvedRelation}
import org.apache.spark.sql.catalyst.catalog.TiSessionCatalog
import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.execution.command._
import org.apache.spark.sql.execution.datasources.LogicalRelation
import org.apache.spark.sql.{AnalysisException, _}

case class TiResolutionRule(getOrCreateTiContext: SparkSession => TiContext)(
    sparkSession: SparkSession)
    extends Rule[LogicalPlan] {
  protected lazy val meta: MetaManager = tiContext.meta
  private lazy val autoLoad = tiContext.autoLoad
  private lazy val tiCatalog = tiContext.tiCatalog
  private lazy val tiSession = tiContext.tiSession
  private lazy val sqlContext = tiContext.sqlContext
  protected val tiContext: TiContext = getOrCreateTiContext(sparkSession)
  protected val resolveTiDBRelation: TableIdentifier => LogicalPlan =
    tableIdentifier => {
      val dbName = getDatabaseFromIdentifier(tableIdentifier)
      val tableName = tableIdentifier.table
      val table = meta.getTable(dbName, tableName)
      if (table.isEmpty) {
        throw new AnalysisException(s"Table or view '$tableName' not found in database '$dbName'")
      }
      if (autoLoad) {
        StatisticsManager.loadStatisticsInfo(table.get)
      }
      val sizeInBytes = StatisticsManager.estimateTableSize(table.get)
      val tiDBRelation =
        TiDBRelation(tiSession, TiTableReference(dbName, tableName, sizeInBytes), meta)(
          sqlContext)
      // Use SubqueryAlias so that projects and joins can correctly resolve
      // UnresolvedAttributes in JoinConditions, Projects, Filters, etc.
      newSubqueryAlias(tableName, LogicalRelation(tiDBRelation))
    }

  override def apply(plan: LogicalPlan): LogicalPlan =
    plan transformUp resolveTiDBRelations

  protected def resolveTiDBRelations: PartialFunction[LogicalPlan, LogicalPlan] = {
    case i @ InsertIntoTable(UnresolvedRelation(tableIdentifier), _, _, _, _)
        if tiCatalog
          .catalogOf(tableIdentifier.database)
          .exists(_.isInstanceOf[TiSessionCatalog]) =>
      i.copy(table = EliminateSubqueryAliases(resolveTiDBRelation(tableIdentifier)))
    case UnresolvedRelation(tableIdentifier)
        if tiCatalog
          .catalogOf(tableIdentifier.database)
          .exists(_.isInstanceOf[TiSessionCatalog]) =>
      resolveTiDBRelation(tableIdentifier)
  }

  private def getDatabaseFromIdentifier(tableIdentifier: TableIdentifier): String =
    tableIdentifier.database.getOrElse(tiCatalog.getCurrentDatabase)
}

case class TiDDLRule(getOrCreateTiContext: SparkSession => TiContext)(sparkSession: SparkSession)
    extends Rule[LogicalPlan] {
  protected lazy val tiContext: TiContext = getOrCreateTiContext(sparkSession)

  override def apply(plan: LogicalPlan): LogicalPlan =
    plan transformUp {
      // TODO: support other commands that may concern TiSpark catalog.
      case sd: ShowDatabasesCommand =>
        TiShowDatabasesCommand(tiContext, sd)
      case sd: SetDatabaseCommand =>
        TiSetDatabaseCommand(tiContext, sd)
      case st: ShowTablesCommand =>
        TiShowTablesCommand(tiContext, st)
      case st: ShowColumnsCommand =>
        TiShowColumnsCommand(tiContext, st)
      case dt: DescribeTableCommand =>
        TiDescribeTablesCommand(tiContext, dt)
      case dc: DescribeColumnCommand =>
        TiDescribeColumnCommand(tiContext, dc)
      case ct: CreateTableLikeCommand =>
        TiCreateTableLikeCommand(tiContext, ct)
    }
} 
Example 7
Source File: parser.scala    From tispark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.extensions

import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.expressions.{Expression, SubqueryExpression}
import org.apache.spark.sql.catalyst.parser._
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.execution.SparkSqlParser
import org.apache.spark.sql.execution.command.{
  CacheTableCommand,
  CreateViewCommand,
  ExplainCommand,
  UncacheTableCommand
}
import org.apache.spark.sql.types.{DataType, StructType}
import org.apache.spark.sql.{SparkSession, TiContext}

case class TiParser(getOrCreateTiContext: SparkSession => TiContext)(
    sparkSession: SparkSession,
    delegate: ParserInterface)
    extends ParserInterface {
  private lazy val tiContext = getOrCreateTiContext(sparkSession)
  private lazy val internal = new SparkSqlParser(sparkSession.sqlContext.conf)

  
  private def needQualify(tableIdentifier: TableIdentifier) =
    tableIdentifier.database.isEmpty && tiContext.sessionCatalog
      .getTempView(tableIdentifier.table)
      .isEmpty
} 
Example 8
Source File: TiConcreteSessionCatalog.scala    From tispark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.catalog

import com.pingcap.tispark.utils.ReflectionUtil._
import org.apache.spark.sql.TiContext
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.EmptyFunctionRegistry

class TiConcreteSessionCatalog(val tiContext: TiContext)(tiExternalCatalog: ExternalCatalog)
    extends SessionCatalog(tiExternalCatalog, EmptyFunctionRegistry, tiContext.sqlContext.conf)
    with TiSessionCatalog {

  override def catalogOf(database: Option[String]): Option[SessionCatalog] = {
    val db = database.getOrElse(getCurrentDatabase)
    if (databaseExists(db))
      Some(this)
    else
      None
  }

  override def databaseExists(db: String): Boolean =
    callTiDirectExternalCatalogDatabaseExists(tiExternalCatalog, db)

  override def tableExists(name: TableIdentifier): Boolean =
    callTiDirectExternalCatalogTableExists(
      tiExternalCatalog,
      name.database.getOrElse(getCurrentDatabase),
      name.table)
} 
Example 9
Source File: HiveExternalCatalogSuite.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive

import org.apache.hadoop.conf.Configuration

import org.apache.spark.SparkConf
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.execution.command.DDLUtils
import org.apache.spark.sql.types.StructType


class HiveExternalCatalogSuite extends ExternalCatalogSuite {

  private val externalCatalog: HiveExternalCatalog = {
    val catalog = new HiveExternalCatalog(new SparkConf, new Configuration)
    catalog.client.reset()
    catalog
  }

  protected override val utils: CatalogTestUtils = new CatalogTestUtils {
    override val tableInputFormat: String = "org.apache.hadoop.mapred.SequenceFileInputFormat"
    override val tableOutputFormat: String = "org.apache.hadoop.mapred.SequenceFileOutputFormat"
    override def newEmptyCatalog(): ExternalCatalog = externalCatalog
    override val defaultProvider: String = "hive"
  }

  protected override def resetState(): Unit = {
    externalCatalog.client.reset()
  }

  import utils._

  test("SPARK-18647: do not put provider in table properties for Hive serde table") {
    val catalog = newBasicCatalog()
    val hiveTable = CatalogTable(
      identifier = TableIdentifier("hive_tbl", Some("db1")),
      tableType = CatalogTableType.MANAGED,
      storage = storageFormat,
      schema = new StructType().add("col1", "int").add("col2", "string"),
      provider = Some("hive"))
    catalog.createTable(hiveTable, ignoreIfExists = false)

    val rawTable = externalCatalog.client.getTable("db1", "hive_tbl")
    assert(!rawTable.properties.contains(HiveExternalCatalog.DATASOURCE_PROVIDER))
    assert(DDLUtils.isHiveTable(externalCatalog.getTable("db1", "hive_tbl")))
  }

  Seq("parquet", "hive").foreach { format =>
    test(s"Partition columns should be put at the end of table schema for the format $format") {
      val catalog = newBasicCatalog()
      val newSchema = new StructType()
        .add("col1", "int")
        .add("col2", "string")
        .add("partCol1", "int")
        .add("partCol2", "string")
      val table = CatalogTable(
        identifier = TableIdentifier("tbl", Some("db1")),
        tableType = CatalogTableType.MANAGED,
        storage = CatalogStorageFormat.empty,
        schema = new StructType()
          .add("col1", "int")
          .add("partCol1", "int")
          .add("partCol2", "string")
          .add("col2", "string"),
        provider = Some(format),
        partitionColumnNames = Seq("partCol1", "partCol2"))
      catalog.createTable(table, ignoreIfExists = false)

      val restoredTable = externalCatalog.getTable("db1", "tbl")
      assert(restoredTable.schema == newSchema)
    }
  }

  test("SPARK-22306: alter table schema should not erase the bucketing metadata at hive side") {
    val catalog = newBasicCatalog()
    externalCatalog.client.runSqlHive(
      """
        |CREATE TABLE db1.t(a string, b string)
        |CLUSTERED BY (a, b) SORTED BY (a, b) INTO 10 BUCKETS
        |STORED AS PARQUET
      """.stripMargin)

    val newSchema = new StructType().add("a", "string").add("b", "string").add("c", "string")
    catalog.alterTableDataSchema("db1", "t", newSchema)

    assert(catalog.getTable("db1", "t").schema == newSchema)
    val bucketString = externalCatalog.client.runSqlHive("DESC FORMATTED db1.t")
      .filter(_.contains("Num Buckets")).head
    assert(bucketString.contains("10"))
  }

  test("SPARK-23001: NullPointerException when running desc database") {
    val catalog = newBasicCatalog()
    catalog.createDatabase(newDb("dbWithNullDesc").copy(description = null), ignoreIfExists = false)
    assert(catalog.getDatabase("dbWithNullDesc").description == "")
  }
} 
Example 10
Source File: PruneFileSourcePartitionsSuite.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive.execution

import org.scalatest.Matchers._

import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project, ResolvedHint}
import org.apache.spark.sql.catalyst.rules.RuleExecutor
import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions}
import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
import org.apache.spark.sql.functions.broadcast
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.sql.types.StructType

class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {

  object Optimize extends RuleExecutor[LogicalPlan] {
    val batches = Batch("PruneFileSourcePartitions", Once, PruneFileSourcePartitions) :: Nil
  }

  test("PruneFileSourcePartitions should not change the output of LogicalRelation") {
    withTable("test") {
      withTempDir { dir =>
        sql(
          s"""
            |CREATE EXTERNAL TABLE test(i int)
            |PARTITIONED BY (p int)
            |STORED AS parquet
            |LOCATION '${dir.toURI}'""".stripMargin)

        val tableMeta = spark.sharedState.externalCatalog.getTable("default", "test")
        val catalogFileIndex = new CatalogFileIndex(spark, tableMeta, 0)

        val dataSchema = StructType(tableMeta.schema.filterNot { f =>
          tableMeta.partitionColumnNames.contains(f.name)
        })
        val relation = HadoopFsRelation(
          location = catalogFileIndex,
          partitionSchema = tableMeta.partitionSchema,
          dataSchema = dataSchema,
          bucketSpec = None,
          fileFormat = new ParquetFileFormat(),
          options = Map.empty)(sparkSession = spark)

        val logicalRelation = LogicalRelation(relation, tableMeta)
        val query = Project(Seq('i, 'p), Filter('p === 1, logicalRelation)).analyze

        val optimized = Optimize.execute(query)
        assert(optimized.missingInput.isEmpty)
      }
    }
  }

  test("SPARK-20986 Reset table's statistics after PruneFileSourcePartitions rule") {
    withTable("tbl") {
      spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl")
      sql(s"ANALYZE TABLE tbl COMPUTE STATISTICS")
      val tableStats = spark.sessionState.catalog.getTableMetadata(TableIdentifier("tbl")).stats
      assert(tableStats.isDefined && tableStats.get.sizeInBytes > 0, "tableStats is lost")

      val df = sql("SELECT * FROM tbl WHERE p = 1")
      val sizes1 = df.queryExecution.analyzed.collect {
        case relation: LogicalRelation => relation.catalogTable.get.stats.get.sizeInBytes
      }
      assert(sizes1.size === 1, s"Size wrong for:\n ${df.queryExecution}")
      assert(sizes1(0) == tableStats.get.sizeInBytes)

      val relations = df.queryExecution.optimizedPlan.collect {
        case relation: LogicalRelation => relation
      }
      assert(relations.size === 1, s"Size wrong for:\n ${df.queryExecution}")
      val size2 = relations(0).stats.sizeInBytes
      assert(size2 == relations(0).catalogTable.get.stats.get.sizeInBytes)
      assert(size2 < tableStats.get.sizeInBytes)
    }
  }

  test("SPARK-26576 Broadcast hint not applied to partitioned table") {
    withTable("tbl") {
      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
        spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl")
        val df = spark.table("tbl")
        val qe = df.join(broadcast(df), "p").queryExecution
        qe.optimizedPlan.collect { case _: ResolvedHint => } should have size 1
        qe.sparkPlan.collect { case j: BroadcastHashJoinExec => j } should have size 1
      }
    }
  }
} 
Example 11
Source File: ListTablesSuite.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive

import org.scalatest.BeforeAndAfterAll

import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.hive.test.TestHiveSingleton

class ListTablesSuite extends QueryTest with TestHiveSingleton with BeforeAndAfterAll {
  import hiveContext._
  import hiveContext.implicits._

  val df = sparkContext.parallelize((1 to 10).map(i => (i, s"str$i"))).toDF("key", "value")

  override def beforeAll(): Unit = {
    super.beforeAll()
    // The catalog in HiveContext is a case insensitive one.
    sessionState.catalog.createTempView(
      "ListTablesSuiteTable", df.logicalPlan, overrideIfExists = true)
    sql("CREATE TABLE HiveListTablesSuiteTable (key int, value string)")
    sql("CREATE DATABASE IF NOT EXISTS ListTablesSuiteDB")
    sql("CREATE TABLE ListTablesSuiteDB.HiveInDBListTablesSuiteTable (key int, value string)")
  }

  override def afterAll(): Unit = {
    try {
      sessionState.catalog.dropTable(
        TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true, purge = false)
      sql("DROP TABLE IF EXISTS HiveListTablesSuiteTable")
      sql("DROP TABLE IF EXISTS ListTablesSuiteDB.HiveInDBListTablesSuiteTable")
      sql("DROP DATABASE IF EXISTS ListTablesSuiteDB")
    } finally {
      super.afterAll()
    }
  }

  test("get all tables of current database") {
    Seq(tables(), sql("SHOW TABLes")).foreach {
      case allTables =>
        // We are using default DB.
        checkAnswer(
          allTables.filter("tableName = 'listtablessuitetable'"),
          Row("", "listtablessuitetable", true))
        checkAnswer(
          allTables.filter("tableName = 'hivelisttablessuitetable'"),
          Row("default", "hivelisttablessuitetable", false))
        assert(allTables.filter("tableName = 'hiveindblisttablessuitetable'").count() === 0)
    }
  }

  test("getting all tables with a database name") {
    Seq(tables("listtablessuiteDb"), sql("SHOW TABLes in listTablesSuitedb")).foreach {
      case allTables =>
        checkAnswer(
          allTables.filter("tableName = 'listtablessuitetable'"),
          Row("", "listtablessuitetable", true))
        assert(allTables.filter("tableName = 'hivelisttablessuitetable'").count() === 0)
        checkAnswer(
          allTables.filter("tableName = 'hiveindblisttablessuitetable'"),
          Row("listtablessuitedb", "hiveindblisttablessuitetable", false))
    }
  }
} 
Example 12
Source File: XSQLAnalyzeTableCommand.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.xsql.execution.command

import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.CatalogTableType
import org.apache.spark.sql.execution.command.{CommandUtils, RunnableCommand}
import org.apache.spark.sql.xsql.XSQLSessionCatalog


case class XSQLAnalyzeTableCommand(tableIdent: TableIdentifier, noscan: Boolean = true)
  extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    val sessionState = sparkSession.sessionState
    val catalog = sparkSession.sessionState.catalog.asInstanceOf[XSQLSessionCatalog]
    val catalogDB = catalog.getUsedCatalogDatabase(tableIdent.dataSource, tableIdent.database)
    if (catalogDB == None) {
      return Seq.empty[Row]
    }
    val ds = catalogDB.get.dataSourceName
    val db = catalogDB.get.name
    val tableIdentWithDB = TableIdentifier(tableIdent.table, Some(db), Some(ds))
    val tableMeta = catalog.getRawTable(tableIdentWithDB)
    if (tableMeta.tableType == CatalogTableType.VIEW) {
      throw new AnalysisException("ANALYZE TABLE is not supported on views.")
    }

    // Compute stats for the whole table
    val newTotalSize = CommandUtils.calculateTotalSize(sparkSession, tableMeta)
    val newRowCount =
      if (noscan) None else Some(BigInt(sparkSession.table(tableIdentWithDB).count()))

    // Update the metastore if the above statistics of the table are different from those
    // recorded in the metastore.
    val newStats = CommandUtils.compareAndGetNewStats(tableMeta.stats, newTotalSize, newRowCount)
    if (newStats.isDefined) {
      catalog.alterTableStats(tableIdentWithDB, newStats)
    }

    Seq.empty[Row]
  }
} 
Example 13
Source File: TableIdentifierParserSuite.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.parser

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.TableIdentifier

class TableIdentifierParserSuite extends SparkFunSuite {
  import CatalystSqlParser._

  // Add "$elem$", "$value$" & "$key$"
  val hiveNonReservedKeyword = Array("add", "admin", "after", "analyze", "archive", "asc", "before",
    "bucket", "buckets", "cascade", "change", "cluster", "clustered", "clusterstatus", "collection",
    "columns", "comment", "compact", "compactions", "compute", "concatenate", "continue", "cost",
    "data", "day", "databases", "datetime", "dbproperties", "deferred", "defined", "delimited",
    "dependency", "desc", "directories", "directory", "disable", "distribute",
    "enable", "escaped", "exclusive", "explain", "export", "fields", "file", "fileformat", "first",
    "format", "formatted", "functions", "hold_ddltime", "hour", "idxproperties", "ignore", "index",
    "indexes", "inpath", "inputdriver", "inputformat", "items", "jar", "keys", "key_type", "last",
    "limit", "offset", "lines", "load", "location", "lock", "locks", "logical", "long", "mapjoin",
    "materialized", "metadata", "minus", "minute", "month", "msck", "noscan", "no_drop", "nulls",
    "offline", "option", "outputdriver", "outputformat", "overwrite", "owner", "partitioned",
    "partitions", "plus", "pretty", "principals", "protection", "purge", "read", "readonly",
    "rebuild", "recordreader", "recordwriter", "reload", "rename", "repair", "replace",
    "replication", "restrict", "rewrite", "role", "roles", "schemas", "second",
    "serde", "serdeproperties", "server", "sets", "shared", "show", "show_database", "skewed",
    "sort", "sorted", "ssl", "statistics", "stored", "streamtable", "string", "struct", "tables",
    "tblproperties", "temporary", "terminated", "tinyint", "touch", "transactions", "unarchive",
    "undo", "uniontype", "unlock", "unset", "unsigned", "uri", "use", "utc", "utctimestamp",
    "view", "while", "year", "work", "transaction", "write", "isolation", "level", "snapshot",
    "autocommit", "all", "any", "alter", "array", "as", "authorization", "between", "bigint",
    "binary", "boolean", "both", "by", "create", "cube", "current_date", "current_timestamp",
    "cursor", "date", "decimal", "delete", "describe", "double", "drop", "exists", "external",
    "false", "fetch", "float", "for", "grant", "group", "grouping", "import", "in",
    "insert", "int", "into", "is", "pivot", "lateral", "like", "local", "none", "null",
    "of", "order", "out", "outer", "partition", "percent", "procedure", "range", "reads", "revoke",
    "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger",
    "true", "truncate", "update", "user", "values", "with", "regexp", "rlike",
    "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float",
    "int", "smallint", "timestamp", "at", "position", "both", "leading", "trailing", "extract")

  val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right",
    "natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from",
    "where", "having", "from", "to", "table", "with", "not")

  test("table identifier") {
    // Regular names.
    assert(TableIdentifier("q") === parseTableIdentifier("q"))
    assert(TableIdentifier("q", Option("d")) === parseTableIdentifier("d.q"))

    // Illegal names.
    Seq("", "d.q.g", "t:", "${some.var.x}", "tab:1").foreach { identifier =>
      intercept[ParseException](parseTableIdentifier(identifier))
    }
  }

  test("quoted identifiers") {
    assert(TableIdentifier("z", Some("x.y")) === parseTableIdentifier("`x.y`.z"))
    assert(TableIdentifier("y.z", Some("x")) === parseTableIdentifier("x.`y.z`"))
    assert(TableIdentifier("z", Some("`x.y`")) === parseTableIdentifier("```x.y```.z"))
    assert(TableIdentifier("`y.z`", Some("x")) === parseTableIdentifier("x.```y.z```"))
    assert(TableIdentifier("x.y.z", None) === parseTableIdentifier("`x.y.z`"))
  }

  test("table identifier - strict keywords") {
    // SQL Keywords.
    hiveStrictNonReservedKeyword.foreach { keyword =>
      assert(TableIdentifier(keyword) === parseTableIdentifier(keyword))
      assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`"))
      assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`"))
    }
  }

  test("table identifier - non reserved keywords") {
    // Hive keywords are allowed.
    hiveNonReservedKeyword.foreach { nonReserved =>
      assert(TableIdentifier(nonReserved) === parseTableIdentifier(nonReserved))
    }
  }

  test("SPARK-17364 table identifier - contains number") {
    assert(parseTableIdentifier("123_") == TableIdentifier("123_"))
    assert(parseTableIdentifier("1a.123_") == TableIdentifier("123_", Some("1a")))
    // ".123" should not be treated as token of type DECIMAL_VALUE
    assert(parseTableIdentifier("a.123A") == TableIdentifier("123A", Some("a")))
    // ".123E3" should not be treated as token of type SCIENTIFIC_DECIMAL_VALUE
    assert(parseTableIdentifier("a.123E3_LIST") == TableIdentifier("123E3_LIST", Some("a")))
    // ".123D" should not be treated as token of type DOUBLE_LITERAL
    assert(parseTableIdentifier("a.123D_LIST") == TableIdentifier("123D_LIST", Some("a")))
    // ".123BD" should not be treated as token of type BIGDECIMAL_LITERAL
    assert(parseTableIdentifier("a.123BD_LIST") == TableIdentifier("123BD_LIST", Some("a")))
  }

  test("SPARK-17832 table identifier - contains backtick") {
    val complexName = TableIdentifier("`weird`table`name", Some("`d`b`1"))
    assert(complexName === parseTableIdentifier("```d``b``1`.```weird``table``name`"))
    assert(complexName === parseTableIdentifier(complexName.quotedString))
    intercept[ParseException](parseTableIdentifier(complexName.unquotedString))
    // Table identifier contains countious backticks should be treated correctly.
    val complexName2 = TableIdentifier("x``y", Some("d``b"))
    assert(complexName2 === parseTableIdentifier(complexName2.quotedString))
  }
} 
Example 14
Source File: CatalogSuite.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.analysis

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
import org.apache.spark.sql.types.StructType


class CatalogSuite extends AnalysisTest {

  test("desc table when owner is set to null") {
    val table = CatalogTable(
      identifier = TableIdentifier("tbl", Some("db1")),
      tableType = CatalogTableType.MANAGED,
      storage = CatalogStorageFormat.empty,
      owner = null,
      schema = new StructType().add("col1", "int").add("col2", "string"),
      provider = Some("parquet"))
    table.toLinkedHashMap
  }
} 
Example 15
Source File: LookupFunctionsSuite.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.analysis

import java.net.URI

import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
import org.apache.spark.sql.catalyst.expressions.Alias
import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.internal.SQLConf

class LookupFunctionsSuite extends PlanTest {

  test("SPARK-23486: the functionExists for the Persistent function check") {
    val externalCatalog = new CustomInMemoryCatalog
    val conf = new SQLConf()
    val catalog = new SessionCatalog(externalCatalog, FunctionRegistry.builtin, conf)
    val analyzer = {
      catalog.createDatabase(
        CatalogDatabase("default", "", new URI("loc"), Map.empty),
        ignoreIfExists = false)
      new Analyzer(catalog, conf)
    }

    def table(ref: String): LogicalPlan = UnresolvedRelation(TableIdentifier(ref))
    val unresolvedPersistentFunc = UnresolvedFunction("func", Seq.empty, false)
    val unresolvedRegisteredFunc = UnresolvedFunction("max", Seq.empty, false)
    val plan = Project(
      Seq(Alias(unresolvedPersistentFunc, "call1")(), Alias(unresolvedPersistentFunc, "call2")(),
        Alias(unresolvedPersistentFunc, "call3")(), Alias(unresolvedRegisteredFunc, "call4")(),
        Alias(unresolvedRegisteredFunc, "call5")()),
      table("TaBlE"))
    analyzer.LookupFunctions.apply(plan)

    assert(externalCatalog.getFunctionExistsCalledTimes == 1)
    assert(analyzer.LookupFunctions.normalizeFuncName
      (unresolvedPersistentFunc.name).database == Some("default"))
  }

  test("SPARK-23486: the functionExists for the Registered function check") {
    val externalCatalog = new InMemoryCatalog
    val conf = new SQLConf()
    val customerFunctionReg = new CustomerFunctionRegistry
    val catalog = new SessionCatalog(externalCatalog, customerFunctionReg, conf)
    val analyzer = {
      catalog.createDatabase(
        CatalogDatabase("default", "", new URI("loc"), Map.empty),
        ignoreIfExists = false)
      new Analyzer(catalog, conf)
    }

    def table(ref: String): LogicalPlan = UnresolvedRelation(TableIdentifier(ref))
    val unresolvedRegisteredFunc = UnresolvedFunction("max", Seq.empty, false)
    val plan = Project(
      Seq(Alias(unresolvedRegisteredFunc, "call1")(), Alias(unresolvedRegisteredFunc, "call2")()),
      table("TaBlE"))
    analyzer.LookupFunctions.apply(plan)

    assert(customerFunctionReg.getIsRegisteredFunctionCalledTimes == 2)
    assert(analyzer.LookupFunctions.normalizeFuncName
      (unresolvedRegisteredFunc.name).database == Some("default"))
  }
}

class CustomerFunctionRegistry extends SimpleFunctionRegistry {

  private var isRegisteredFunctionCalledTimes: Int = 0;

  override def functionExists(funcN: FunctionIdentifier): Boolean = synchronized {
    isRegisteredFunctionCalledTimes = isRegisteredFunctionCalledTimes + 1
    true
  }

  def getIsRegisteredFunctionCalledTimes: Int = isRegisteredFunctionCalledTimes
}

class CustomInMemoryCatalog extends InMemoryCatalog {

  private var functionExistsCalledTimes: Int = 0

  override def functionExists(db: String, funcName: String): Boolean = synchronized {
    functionExistsCalledTimes = functionExistsCalledTimes + 1
    true
  }

  def getFunctionExistsCalledTimes: Int = functionExistsCalledTimes
} 
Example 16
Source File: ddl.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import java.util.Locale

import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils}
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.command.{DDLUtils, RunnableCommand}
import org.apache.spark.sql.types._


case class CreateTempViewUsing(
    tableIdent: TableIdentifier,
    userSpecifiedSchema: Option[StructType],
    replace: Boolean,
    global: Boolean,
    provider: String,
    options: Map[String, String]) extends RunnableCommand {

  if (tableIdent.database.isDefined) {
    throw new AnalysisException(
      s"Temporary view '$tableIdent' should not have specified a database")
  }

  override def argString: String = {
    s"[tableIdent:$tableIdent " +
      userSpecifiedSchema.map(_ + " ").getOrElse("") +
      s"replace:$replace " +
      s"provider:$provider " +
      CatalogUtils.maskCredentials(options)
  }

  override def run(sparkSession: SparkSession): Seq[Row] = {
    if (provider.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) {
      throw new AnalysisException("Hive data source can only be used with tables, " +
        "you can't use it with CREATE TEMP VIEW USING")
    }

    val dataSource = DataSource(
      sparkSession,
      userSpecifiedSchema = userSpecifiedSchema,
      className = provider,
      options = options)

    val catalog = sparkSession.sessionState.catalog
    val viewDefinition = Dataset.ofRows(
      sparkSession, LogicalRelation(dataSource.resolveRelation())).logicalPlan

    if (global) {
      catalog.createGlobalTempView(tableIdent.table, viewDefinition, replace)
    } else {
      catalog.createTempView(tableIdent.table, viewDefinition, replace)
    }

    Seq.empty[Row]
  }
}

case class RefreshTable(tableIdent: TableIdentifier)
  extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    // Refresh the given table's metadata. If this table is cached as an InMemoryRelation,
    // drop the original cached version and make the new version cached lazily.
    sparkSession.catalog.refreshTable(tableIdent.quotedString)
    Seq.empty[Row]
  }
}

case class RefreshResource(path: String)
  extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    sparkSession.catalog.refreshByPath(path)
    Seq.empty[Row]
  }
} 
Example 17
Source File: cache.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command

import org.apache.spark.sql.{Dataset, Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
import org.apache.spark.sql.catalyst.plans.QueryPlan
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan

case class CacheTableCommand(
    tableIdent: TableIdentifier,
    plan: Option[LogicalPlan],
    isLazy: Boolean) extends RunnableCommand {
  require(plan.isEmpty || tableIdent.database.isEmpty,
    "Database name is not allowed in CACHE TABLE AS SELECT")

  override protected def innerChildren: Seq[QueryPlan[_]] = plan.toSeq

  override def run(sparkSession: SparkSession): Seq[Row] = {
    plan.foreach { logicalPlan =>
      Dataset.ofRows(sparkSession, logicalPlan).createTempView(tableIdent.quotedString)
    }
    sparkSession.catalog.cacheTable(tableIdent.quotedString)

    if (!isLazy) {
      // Performs eager caching
      sparkSession.table(tableIdent).count()
    }

    Seq.empty[Row]
  }
}


case class UncacheTableCommand(
    tableIdent: TableIdentifier,
    ifExists: Boolean) extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    val tableId = tableIdent.quotedString
    if (!ifExists || sparkSession.catalog.tableExists(tableId)) {
      sparkSession.catalog.uncacheTable(tableId)
    }
    Seq.empty[Row]
  }
}


  override def makeCopy(newArgs: Array[AnyRef]): ClearCacheCommand = ClearCacheCommand()
} 
Example 18
Source File: AnalyzeTableCommand.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command

import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.CatalogTableType



case class AnalyzeTableCommand(
    tableIdent: TableIdentifier,
    noscan: Boolean = true) extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    val sessionState = sparkSession.sessionState
    val db = tableIdent.database.getOrElse(sessionState.catalog.getCurrentDatabase)
    val tableIdentWithDB = TableIdentifier(tableIdent.table, Some(db))
    val tableMeta = sessionState.catalog.getTableMetadata(tableIdentWithDB)
    if (tableMeta.tableType == CatalogTableType.VIEW) {
      throw new AnalysisException("ANALYZE TABLE is not supported on views.")
    }

    // Compute stats for the whole table
    val newTotalSize = CommandUtils.calculateTotalSize(sparkSession, tableMeta)
    val newRowCount =
      if (noscan) None else Some(BigInt(sparkSession.table(tableIdentWithDB).count()))

    // Update the metastore if the above statistics of the table are different from those
    // recorded in the metastore.
    val newStats = CommandUtils.compareAndGetNewStats(tableMeta.stats, newTotalSize, newRowCount)
    if (newStats.isDefined) {
      sessionState.catalog.alterTableStats(tableIdentWithDB, newStats)
    }

    Seq.empty[Row]
  }
} 
Example 19
Source File: SQLContextExtensionBase.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.extension

import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.{ParserDialect, TableIdentifier}
import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry, SimpleFunctionRegistry}
import org.apache.spark.sql.catalyst.errors.DialectException
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.execution.datasources.DDLParser
import org.apache.spark.sql.extension.OptimizerFactory.ExtendableOptimizerBatch
import org.apache.spark.util.Utils

import scala.util.Try
import scala.util.control.NonFatal


  override protected def extendedParserDialect: ParserDialect =
    try {
      val clazz = Utils.classForName(dialectClassName)
      clazz.newInstance().asInstanceOf[ParserDialect]
    } catch {
      case NonFatal(e) =>
        // Since we didn't find the available SQL Dialect, it will fail even for SET command:
        // SET spark.sql.dialect=sql; Let's reset as default dialect automatically.
        val dialect = conf.dialect
        // reset the sql dialect
        conf.unsetConf(SQLConf.DIALECT)
        // throw out the exception, and the default sql dialect will take effect for next query.
        throw new DialectException(
          s"""
              |Instantiating dialect '$dialect' failed.
              |Reverting to default dialect '${conf.dialect}'""".stripMargin, e)
    }

  // (suggestion) make this implicit to FunctionRegistry.
  protected def registerBuiltins(registry: FunctionRegistry): Unit = {
    FunctionRegistry.expressions.foreach {
      case (name, (info, builder)) => registry.registerFunction(name, builder)
    }
  }

  override protected def extendedDdlParser(parser: String => LogicalPlan): DDLParser =
    new DDLParser(sqlParser.parse(_))

  override protected def registerFunctions(registry: FunctionRegistry): Unit = { }

} 
Example 20
Source File: TemporaryFlagProxyCatalog.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.{Catalog, OverrideCatalog}
import org.apache.spark.sql.catalyst.plans.logical.Subquery
import org.apache.spark.sql.execution.datasources.LogicalRelation
import org.apache.spark.sql.sources.Relation


@deprecated("Use org.apache.spark.sql.TemporaryFlagCatalog instead")
trait TemporaryFlagProxyCatalog extends OverrideCatalog {
  abstract override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = {
    val tables = super.getTables(databaseName)
    tables.map {
      case (tableName: String , isTemporary: Boolean) =>
        val tableIdentifier = TableIdentifier(tableName)
        lookupRelation(tableIdentifier) match {
          case Subquery(_, LogicalRelation(relation: Relation, _)) =>
            (tableIdentifier.table, relation.isTemporary)
          case _ => (tableIdentifier.table, isTemporary)
        }
    }
  }
} 
Example 21
Source File: CreateTableUsingTemporaryAwareCommand.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.RunnableCommand
import org.apache.spark.sql.sources._
import org.apache.spark.sql.types._
import org.apache.spark.sql.{DataFrame, DatasourceResolver, Row, SQLContext}
import org.apache.spark.sql.catalyst.TableIdentifierUtils._
import org.apache.spark.sql.catalyst.CaseSensitivityUtils._


  // scalastyle:off method.length
  private def resolveDataSource(sqlContext: SQLContext,
                                dataSource: Any,
                                tableId: TableIdentifier): ResolvedDataSource = {
    dataSource match {
      case drp: PartitionedRelationProvider =>
        if (userSpecifiedSchema.isEmpty) {
          new ResolvedDataSource(drp.getClass,
            drp.createRelation(
              sqlContext,
              tableId.toSeq,
              new CaseInsensitiveMap(options),
              partitioningFunction,
              partitioningColumns,
              isTemporary,
              allowExisting))
        } else {
          new ResolvedDataSource(drp.getClass,
            drp.createRelation(
              sqlContext,
              tableId.toSeq,
              new CaseInsensitiveMap(options),
              userSpecifiedSchema.get,
              partitioningFunction,
              partitioningColumns,
              isTemporary,
              allowExisting))
        }
      case drp: TemporaryAndPersistentSchemaRelationProvider if userSpecifiedSchema.nonEmpty =>
            new ResolvedDataSource(drp.getClass,
              drp.createRelation(
                sqlContext,
                tableId.toSeq,
                new CaseInsensitiveMap(options),
                userSpecifiedSchema.get,
                isTemporary,
                allowExisting))
      case drp: TemporaryAndPersistentRelationProvider =>
        new ResolvedDataSource(drp.getClass,
          drp.createRelation(
            sqlContext,
            tableId.toSeq,
            new CaseInsensitiveMap(options),
            isTemporary,
            allowExisting))
      case _ => ResolvedDataSource(sqlContext, userSpecifiedSchema,
        partitionColumns, provider, options)
    }
  }
  // scalastyle:on method.length
} 
Example 22
Source File: SqlContextAccessor.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan

import scala.language.implicitConversions


object SqlContextAccessor {
  implicit def sqlContextToCatalogAccessable(sqlContext: SQLContext): SqlContextCatalogAccessor =
    new SqlContextCatalogAccessor(sqlContext)

  class SqlContextCatalogAccessor(sqlContext: SQLContext)
    extends SQLContext(sqlContext.sparkContext) {

    def registerRawPlan(lp: LogicalPlan, tableName: String): Unit = {
      sqlContext.catalog.registerTable(TableIdentifier(tableName), lp)
    }
  }
} 
Example 23
Source File: DescribeTableUsingCommand.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.TableIdentifierUtils._
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.RunnableCommand
import org.apache.spark.sql.sources.{DatasourceCatalog, RelationInfo}
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.{DatasourceResolver, Row, SQLContext}


private[sql]
case class DescribeTableUsingCommand(
    name: TableIdentifier,
    provider: String,
    options: Map[String, String])
  extends LogicalPlan
  with RunnableCommand {

  override def output: Seq[Attribute] = StructType(
    StructField("TABLE_NAME", StringType, nullable = false) ::
    StructField("DDL_STMT", StringType, nullable = false) ::
    Nil
  ).toAttributes

  override def run(sqlContext: SQLContext): Seq[Row] = {
    // Convert the table name according to the case-sensitivity settings
    val tableId = name.toSeq
    val resolver = DatasourceResolver.resolverFor(sqlContext)
    val catalog = resolver.newInstanceOfTyped[DatasourceCatalog](provider)

    Seq(catalog
      .getRelation(sqlContext, tableId, new CaseInsensitiveMap(options)) match {
        case None => Row("", "")
        case Some(RelationInfo(relName, _, _, ddl, _)) => Row(
          relName, ddl.getOrElse(""))
    })
  }
} 
Example 24
Source File: RegisterAllTablesCommand.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import org.apache.spark.sql.catalyst.CaseSensitivityUtils._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.RunnableCommand
import org.apache.spark.sql.execution.datasources.SqlContextAccessor._
import org.apache.spark.sql.sources.{LogicalPlanSource, RegisterAllTableRelations}
import org.apache.spark.sql.util.CollectionUtils._
import org.apache.spark.sql.{DatasourceResolver, Row, SQLContext}


    relations.map {
      case (name, source) =>
        val lp = source.logicalPlan(sqlContext)
        if (lp.resolved) {
          sqlContext.validatedSchema(lp.schema).recover {
            case d: DuplicateFieldsException =>
              throw new RuntimeException(
                s"Provider '$provider' returned a relation that has duplicate fields.",
                d)
          }.get
        } else {
          // TODO(AC): With the new view interface, this can be checked
          logWarning(s"Adding relation $name with potentially unreachable fields.")
        }
        name -> lp
    }.foreach {
      case (name, plan) =>
        sqlContext.registerRawPlan(plan, name)
    }
  }
} 
Example 25
Source File: AbstractViewCommand.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.RunnableCommand
import org.apache.spark.sql.sources.{AbstractViewProvider, ViewKind}
import org.apache.spark.sql.{DatasourceResolver, SQLContext}


  def withValidProvider[B](sqlContext: SQLContext)(b: AbstractViewProvider[_] => B): B = {
    val resolver = DatasourceResolver.resolverFor(sqlContext)
    AbstractViewProvider.matcherFor(kind)(resolver.newInstanceOf(provider)) match {
      case Some(viewProvider) =>
        b(viewProvider)
      case _ =>
        throw new ProviderException(provider, "Does not support the " +
          s"execution of ${this.getClass.getSimpleName}")
    }
  }
}

class ProviderException(val provider: String, val reason: String)
  extends Exception(s"Exception using provider $provider: $reason") 
Example 26
Source File: RegisterTableCommand.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import org.apache.spark.sql.catalyst.CaseSensitivityUtils._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.RunnableCommand
import org.apache.spark.sql.execution.datasources.SqlContextAccessor._
import org.apache.spark.sql.sources.RegisterAllTableRelations
import org.apache.spark.sql.{DatasourceResolver, Row, SQLContext}


      val relation = resolvedProvider.getTableRelation(tableName, sqlContext, options)

      relation match {
        case None =>
          sys.error(s"Relation $tableName was not found in the catalog.")
        case Some(r) =>
          val lp = r.logicalPlan(sqlContext)
          if (lp.resolved) {
            sqlContext.validatedSchema(lp.schema).recover {
              case d: DuplicateFieldsException =>
                throw new RuntimeException(
                  s"Provider '$provider' returned a relation that has duplicate fields.",
                  d)
            }.get
          } else {
            // TODO(AC): With the new view interface, this can be checked
            logWarning(s"Adding relation $tableName with potentially unreachable fields.")
          }
          sqlContext.registerRawPlan(lp, tableName)
      }
    }
    Seq.empty
  }
} 
Example 27
Source File: CreateTablePartitionedByUsing.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan}
import org.apache.spark.sql.types.StructType


case class CreateTablePartitionedByUsing(tableIdent: TableIdentifier,
                                         userSpecifiedSchema:
                                         Option[StructType],
                                         provider: String,
                                         partitioningFunc: String,
                                         partitioningColumns: Seq[String],
                                         temporary: Boolean,
                                         options: Map[String, String],
                                         allowExisting: Boolean,
                                         managedIfNoPath: Boolean)
  extends LogicalPlan with Command {

  override def output: Seq[Attribute] = Seq.empty
  override def children: Seq[LogicalPlan] = Seq.empty
} 
Example 28
Source File: DescCommand.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.sources.commands.hive

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.types.StringType
import org.apache.spark.sql.{Row, SQLContext}


case class DescCommand(ident: TableIdentifier) extends HiveRunnableCommand {

  override protected val commandName: String = s"DESC $ident"

  override def execute(sqlContext: SQLContext): Seq[Row] = {
    val plan = sqlContext.catalog.lookupRelation(ident)
    if (plan.resolved) {
      plan.schema.map { field =>
        Row(field.name, field.dataType.simpleString, None)
      }
    } else {
      Seq.empty
    }
  }

  override lazy val output: Seq[Attribute] =
    AttributeReference("col_name", StringType)() ::
    AttributeReference("data_type", StringType)() ::
    AttributeReference("comment", StringType)() :: Nil
} 
Example 29
Source File: AbstractViewProvider.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.sources

import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.plans.logical.view.{AbstractView, Persisted}
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan

import scala.reflect._


  def name: String
}

abstract class BaseAbstractViewProvider[A <: AbstractView with Persisted: ClassTag]
  extends AbstractViewProvider[A] {
  val tag = implicitly[ClassTag[A]]
}

object AbstractViewProvider {
  def matcherFor(kind: ViewKind)(any: Any): Option[AbstractViewProvider[_]] = {
    val multiProvider = MultiAbstractViewProvider.matcherFor(kind)
    any match {
      case provider: AbstractViewProvider[_] if tagMatches(provider.tag) =>
        Some(provider)
      case multiProvider(provider) =>
        Some(provider)
      case _ => None
    }
  }

  private def tagMatches[A: ClassTag](tag: ClassTag[_]): Boolean = {
    classTag[A].runtimeClass.isAssignableFrom(tag.runtimeClass)
  }
}

case class CreateViewInput(
    sqlContext: SQLContext,
    plan: LogicalPlan,
    viewSql: String,
    options: Map[String, String],
    identifier: TableIdentifier,
    allowExisting: Boolean)

case class DropViewInput(
    sqlContext: SQLContext,
    options: Map[String, String],
    identifier: TableIdentifier,
    allowNotExisting: Boolean) 
Example 30
Source File: RecursiveViewAnalysis.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.plans.logical.view.AbstractView
import org.apache.spark.sql.execution.datasources.AbstractCreateViewCommand


object RecursiveViewAnalysis {
  def apply(plan: LogicalPlan): Unit = {
    plan.foreach {
      case c:AbstractCreateViewCommand if containsViewIdentifier(c.identifier, c.plan) =>
        throw new AnalysisException(s"The view ${c.identifier.table} " +
          s"cannot be defined recursively.")
      case _ =>
    }
  }

  private def containsViewIdentifier(name: TableIdentifier,
                                     plan: LogicalPlan): Boolean = plan.find {
    case UnresolvedRelation(ident, _) if ident == name =>
      true
    case AbstractView(child) => containsViewIdentifier(name, child)
    case _ =>
      false
  }.isDefined
} 
Example 31
Source File: TemporaryFlagCatalog.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.Catalog
import org.apache.spark.sql.catalyst.plans.logical.Subquery
import org.apache.spark.sql.execution.datasources.LogicalRelation
import org.apache.spark.sql.sources.Relation


trait TemporaryFlagCatalog extends Catalog {
  abstract override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = {
    val tables = super.getTables(databaseName)
    tables.map {
      case (tableName: String , isTemporary: Boolean) =>
        val tableIdentifier = TableIdentifier(tableName)
        lookupRelation(tableIdentifier) match {
          case Subquery(_, LogicalRelation(relation: Relation, _)) =>
            (tableIdentifier.table, relation.isTemporary)
          case _ => (tableIdentifier.table, isTemporary)
        }
    }
  }
} 
Example 32
Source File: ResolveDropCommand.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.analysis

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.CaseSensitivityUtils._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.execution.datasources._
import org.apache.spark.sql.sources.commands.UnresolvedDropCommand
import org.apache.spark.sql.sources.{DropRelation, RelationKind, Table}

import scala.util.Try


case class ResolveDropCommand(analyzer: Analyzer, catalog: Catalog)
  extends Rule[LogicalPlan]
  with TableDependencyCalculator {

  private def failAnalysis(reason: String) = throw new AnalysisException(reason)

  override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
    case UnresolvedDropCommand(kind, allowNotExisting, tableIdent, cascade) =>
      val plan = resolvePlan(kind, tableIdent, allowNotExisting)

      val affected = plan.map { lp =>
        val targetKind = RelationKind.kindOf(lp).getOrElse(Table)
        checkValidKind(kind, tableIdent, targetKind)
        buildDependentsMap(catalog, tableIdent)
      }

      affected.foreach(checkAllowedToDrop(cascade))
      DropRunnableCommand(affected.getOrElse(Map.empty))
  }

  private def getDropRelation(plan: LogicalPlan): Option[DropRelation] = plan.collectFirst {
    case d: LogicalPlan with DropRelation => d
    case LogicalRelation(d: DropRelation, _) => d
  }

  private def resolvePlan(kind: DropTarget,
                          tableIdent: TableIdentifier,
                          allowNotExisting: Boolean): Option[LogicalPlan] = {
    Try(catalog.lookupRelation(tableIdent)).toOption match {
      case Some(plan) => Some(plan)
      case None if allowNotExisting => None
      case None => failAnalysis(
        s"""${kind.targetName.toLowerCase} ${tableIdent.unquotedString} does not exist. To "
          |DROP a ${kind.targetName.toLowerCase} regardless if it exists of not, use
          |DROP ${kind.targetName.toUpperCase} IF EXISTS.""".stripMargin)
    }
  }

  private def checkAllowedToDrop(cascade: Boolean)
                                (dependents: Map[String, Option[DropRelation]]) = {
    if (dependents.size > 1 && !cascade) {
      failAnalysis("Can not drop because more than one relation has " +
        s"references to the target relation: ${dependents.keys.mkString(",")}. " +
        s"to force drop use 'CASCADE'.")
    }
  }

  private def checkValidKind(kind: DropTarget,
                             tableIdent: TableIdentifier,
                             targetKind: RelationKind): Unit = {
    if (!kind.accepts(targetKind)) {
      failAnalysis(
        s"Relation '${tableIdent.unquotedString} of kind" +
        s"$targetKind is not a ${kind.targetName}. " +
        s"Please use DROP ${targetKind.name.toUpperCase()} to drop it.")
    }
  }

  private def buildDependentsMap(catalog: Catalog,
                                 identifier: TableIdentifier): Map[String, Option[DropRelation]] = {
    val tables = getTables(catalog, identifier.database)
    val tablesAndDependents = buildDependentsMap(tables)

    def aggregate(acc: Set[TableIdentifier],
                  next: List[TableIdentifier]): Set[TableIdentifier] = next match {
      case Nil => acc
      case ident :: rest =>
        val dependents = tablesAndDependents(ident)
        aggregate(acc ++ dependents, rest ++ dependents.diff(acc))
    }

    val dependentsSet = aggregate(Set(identifier), identifier :: Nil)
    dependentsSet.flatMap { dependent =>
      tables.get(dependent).map(dependent.table -> getDropRelation(_))
    }.toMap
  }
} 
Example 33
Source File: AbsoluteOverrideCatalog.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.analysis

import java.util.concurrent.ConcurrentHashMap

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Subquery}
import scala.collection.JavaConverters._


  abstract override def lookupRelation(
                                        tableIdent: TableIdentifier,
                                        alias: Option[String] = None): LogicalPlan = {
    getOverriddenTable(tableIdent) match {
      case Some(table) =>
        val tableName = getTableName(tableIdent)
        val tableWithQualifiers = Subquery(tableName, table)

        // If an alias was specified by the lookup, wrap the plan in a sub-query so that attributes
        // are properly qualified with this alias.
        alias.map(a => Subquery(a, tableWithQualifiers)).getOrElse(tableWithQualifiers)

      case None => throw new NoSuchTableException
    }
  }

  abstract override def getTables(databaseName: Option[String]): Seq[(String, Boolean)] = {
    overrides.keySet().asScala.map(_ -> true).toSeq
  }

  override def registerTable(tableIdent: TableIdentifier, plan: LogicalPlan): Unit = {
    overrides.put(getTableName(tableIdent), plan)
  }

  override def unregisterTable(tableIdent: TableIdentifier): Unit = {
    if (tableIdent.database.isEmpty) {
      overrides.remove(getTableName(tableIdent))
    }
  }

  override def unregisterAllTables(): Unit = {
    overrides.clear()
  }
} 
Example 34
Source File: dependenciesSystemTable.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.analysis.systables
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.TableDependencyCalculator
import org.apache.spark.sql.sources.{RelationKind, Table}
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SQLContext}

object DependenciesSystemTableProvider extends SystemTableProvider with LocalSpark {
  
  override def execute(): Seq[Row] = {
    val tables = getTables(sqlContext.catalog)
    val dependentsMap = buildDependentsMap(tables)

    def kindOf(tableIdentifier: TableIdentifier): String =
      tables
        .get(tableIdentifier)
        .map(plan => RelationKind.kindOf(plan).getOrElse(Table).name)
        .getOrElse(DependenciesSystemTable.UnknownType)
        .toUpperCase

    dependentsMap.flatMap {
      case (tableIdent, dependents) =>
        val curKind = kindOf(tableIdent)
        dependents.map { dependent =>
          val dependentKind = kindOf(dependent)
          Row(
            tableIdent.database.orNull,
            tableIdent.table,
            curKind,
            dependent.database.orNull,
            dependent.table,
            dependentKind,
            ReferenceDependency.id)
        }
    }.toSeq
  }

  override val schema: StructType = DependenciesSystemTable.schema
}

object DependenciesSystemTable extends SchemaEnumeration {
  val baseSchemaName = Field("BASE_SCHEMA_NAME", StringType, nullable = true)
  val baseObjectName = Field("BASE_OBJECT_NAME", StringType, nullable = false)
  val baseObjectType = Field("BASE_OBJECT_TYPE", StringType, nullable = false)
  val dependentSchemaName = Field("DEPENDENT_SCHEMA_NAME", StringType, nullable = true)
  val dependentObjectName = Field("DEPENDENT_OBJECT_NAME", StringType, nullable = false)
  val dependentObjectType = Field("DEPENDENT_OBJECT_TYPE", StringType, nullable = false)
  val dependencyType = Field("DEPENDENCY_TYPE", IntegerType, nullable = false)

  private[DependenciesSystemTable] val UnknownType = "UNKNOWN"
} 
Example 35
Source File: tablesSystemTable.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.analysis.systables

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.sources._
import org.apache.spark.sql.sources.commands.WithOrigin
import org.apache.spark.sql.types.{StringType, StructType}
import org.apache.spark.sql.util.CollectionUtils.CaseInsensitiveMap
import org.apache.spark.sql.{DatasourceResolver, Row, SQLContext}
import org.apache.spark.sql.catalyst.CaseSensitivityUtils._

object TablesSystemTableProvider extends SystemTableProvider with LocalSpark with ProviderBound {
  
  override def buildScan(requiredColumns: Array[String],
                         filters: Array[Filter]): RDD[Row] =
    DatasourceResolver
      .resolverFor(sqlContext)
      .newInstanceOfTyped[DatasourceCatalog](provider) match {
      case catalog: DatasourceCatalog with DatasourceCatalogPushDown =>
        catalog.getRelations(sqlContext, options, requiredColumns, filters.toSeq.merge)
      case catalog: DatasourceCatalog =>
        val values =
          catalog
            .getRelations(sqlContext, new CaseInsensitiveMap(options))
            .map(relationInfo => Row(
              relationInfo.name,
              relationInfo.isTemporary.toString.toUpperCase,
              relationInfo.kind.toUpperCase,
              relationInfo.provider))
        val rows = schema.buildPrunedFilteredScan(requiredColumns, filters)(values)
        sparkContext.parallelize(rows)
    }
}

sealed trait TablesSystemTable extends SystemTable {
  override def schema: StructType = TablesSystemTable.schema
}

object TablesSystemTable extends SchemaEnumeration {
  val tableName = Field("TABLE_NAME", StringType, nullable = false)
  val isTemporary = Field("IS_TEMPORARY", StringType, nullable = false)
  val kind = Field("KIND", StringType, nullable = false)
  val provider = Field("PROVIDER", StringType, nullable = true)
} 
Example 36
Source File: metadataSystemTable.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.analysis.systables

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.tablefunctions.OutputFormatter
import org.apache.spark.sql.sources._
import org.apache.spark.sql.types.{StringType, StructType}
import org.apache.spark.sql.{DatasourceResolver, Row, SQLContext}
import org.apache.spark.sql.catalyst.CaseSensitivityUtils._


  override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] =
    DatasourceResolver
      .resolverFor(sqlContext)
      .newInstanceOfTyped[MetadataCatalog](provider) match {
      case catalog: MetadataCatalog with MetadataCatalogPushDown =>
        catalog.getTableMetadata(sqlContext, options, requiredColumns, filters.toSeq.merge)
      case catalog =>
        val rows = catalog.getTableMetadata(sqlContext, options).flatMap { tableMetadata =>
          val formatter = new OutputFormatter(tableMetadata.tableName, tableMetadata.metadata)
          formatter.format().map(Row.fromSeq)
        }
        sparkContext.parallelize(schema.buildPrunedFilteredScan(requiredColumns, filters)(rows))
    }

  override def schema: StructType = MetadataSystemTable.schema
}

object MetadataSystemTable extends SchemaEnumeration {
  val tableName = Field("TABLE_NAME", StringType, nullable = false)
  val metadataKey = Field("METADATA_KEY", StringType, nullable = true)
  val metadataValue = Field("METADATA_VALUE", StringType, nullable = true)
} 
Example 37
Source File: relationMappingSystemTable.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.analysis.systables
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.datasources.LogicalRelation
import org.apache.spark.sql.sources.sql.SqlLikeRelation
import org.apache.spark.sql.types.{StringType, StructType}
import org.apache.spark.sql.{Row, SQLContext}

object RelationMappingSystemTableProvider extends SystemTableProvider with LocalSpark {

  
  override def execute(): Seq[Row] = {
    sqlContext.tableNames().map { tableName =>
      val plan = sqlContext.catalog.lookupRelation(TableIdentifier(tableName))
      val sqlName = plan.collectFirst {
        case s: SqlLikeRelation =>
          s.relationName
        case LogicalRelation(s: SqlLikeRelation, _) =>
          s.relationName
      }
      Row(tableName, sqlName)
    }
  }
}

object RelationMappingSystemTable extends SchemaEnumeration {
  val sparkName = Field("RELATION_NAME", StringType, nullable = false)
  val providerName = Field("SQL_NAME", StringType, nullable = true)
} 
Example 38
Source File: TableDependencyCalculator.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.analysis

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan


    tablesAndReferences.foldLeft(Map.empty[TableIdentifier, Set[TableIdentifier]]
      .withDefaultValue(Set.empty)) {
      case (acc, (ident, references)) =>
        references.foldLeft(acc) {
          case (innerAcc, referenceIdentifier) =>
            innerAcc + (referenceIdentifier -> (innerAcc(referenceIdentifier) + ident))
        }
    }
  }
} 
Example 39
Source File: ExtractSQLParserSuite.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import com.sap.spark.PlanTest
import org.apache.spark.Logging
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.parser.SapParserDialect
import org.scalatest.FunSuite

class ExtractSQLParserSuite extends FunSuite with PlanTest with Logging {

  // scalastyle:off magic.number

  val t1 = UnresolvedRelation(TableIdentifier("T1"))
  val parser = new SapParserDialect

  test("Parse EXTRACT in SELECT") {
    val result = parser.parse("SELECT a, EXTRACT(YEAR FROM a) FROM T1")
    val expected = t1.select(AliasUnresolver('a, Year('a)): _*)
    comparePlans(expected, result)
  }

  test("Parse EXTRACT in WHERE") {
    val result = parser.parse("SELECT 1 FROM T1 WHERE EXTRACT(MONTH FROM a) = 2015")
    val expected = t1.where(Month('a) === 2015).select(AliasUnresolver(1): _*)
    comparePlans(expected, result)
  }

  test("Parse EXTRACT in GROUP BY") {
    val result = parser.parse("SELECT 1 FROM T1 GROUP BY EXTRACT(DAY FROM a)")
    val expected = t1.groupBy(DayOfMonth('a))(AliasUnresolver(1): _*)
    comparePlans(expected, result)
  }

} 
Example 40
Source File: FileSourceScanExecAdapter.scala    From OAP   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.oap.adapter

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
import org.apache.spark.sql.execution.FileSourceScanExec
import org.apache.spark.sql.execution.datasources.HadoopFsRelation
import org.apache.spark.sql.types.StructType
import org.apache.spark.util.collection.BitSet

object FileSourceScanExecAdapter {
  def createFileSourceScanExec(
      relation: HadoopFsRelation,
      output: Seq[Attribute],
      requiredSchema: StructType,
      partitionFilters: Seq[Expression],
      optionalBucketSets: Option[BitSet],
      dataFilters: Seq[Expression],
      metastoreTableIdentifier: Option[TableIdentifier]): FileSourceScanExec = {
    FileSourceScanExec(
      relation,
      output,
      requiredSchema,
      partitionFilters,
      optionalBucketSets,
      dataFilters,
      metastoreTableIdentifier)
  }
} 
Example 41
Source File: CatalogUtils.scala    From spark-atlas-connector   with Apache License 2.0 5 votes vote down vote up
package com.hortonworks.spark.atlas.utils

import java.net.URI

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogStorageFormat, CatalogTable, CatalogTableType}
import org.apache.spark.sql.types.StructType


object CatalogUtils {

  def createDB(name: String, location: String): CatalogDatabase = {
    CatalogDatabase(name, "", new URI(location), Map.empty)
  }

  def createStorageFormat(
        locationUri: Option[URI] = None,
        inputFormat: Option[String] = None,
        outputFormat: Option[String] = None,
        serd: Option[String] = None,
        compressed: Boolean = false,
        properties: Map[String, String] = Map.empty): CatalogStorageFormat = {
      CatalogStorageFormat(locationUri, inputFormat, outputFormat, serd, compressed, properties)
  }

  def createTable(
        db: String,
        table: String,
        schema: StructType,
        storage: CatalogStorageFormat,
        isHiveTable: Boolean = false): CatalogTable = {
      CatalogTable(
        TableIdentifier(table, Some(db)),
        CatalogTableType.MANAGED,
        storage,
        schema,
        provider = if (isHiveTable) Some("hive") else None)
  }

} 
Example 42
Source File: SparkExtension.scala    From spark-atlas-connector   with Apache License 2.0 5 votes vote down vote up
package com.hortonworks.spark.atlas.sql

import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.parser.ParserInterface
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.types.{DataType, StructType}
import org.apache.spark.sql.{SparkSession, SparkSessionExtensions}


class SparkExtension extends (SparkSessionExtensions => Unit) {
  def apply(e: SparkSessionExtensions): Unit = {
    e.injectParser(SparkAtlasConnectorParser)
  }
}

case class SparkAtlasConnectorParser(spark: SparkSession, delegate: ParserInterface)
  extends ParserInterface {
  override def parsePlan(sqlText: String): LogicalPlan = {
    SQLQuery.set(sqlText)
    delegate.parsePlan(sqlText)
  }

  override def parseExpression(sqlText: String): Expression =
    delegate.parseExpression(sqlText)

  override def parseTableIdentifier(sqlText: String): TableIdentifier =
    delegate.parseTableIdentifier(sqlText)

  override def parseFunctionIdentifier(sqlText: String): FunctionIdentifier =
    delegate.parseFunctionIdentifier(sqlText)

  override def parseTableSchema(sqlText: String): StructType =
    delegate.parseTableSchema(sqlText)

  override def parseDataType(sqlText: String): DataType =
    delegate.parseDataType(sqlText)
}

object SQLQuery {
  private[this] val sqlQuery = new ThreadLocal[String]
  def get(): String = sqlQuery.get
  def set(s: String): Unit = sqlQuery.set(s)
} 
Example 43
Source File: TestUtils.scala    From spark-atlas-connector   with Apache License 2.0 5 votes vote down vote up
package com.hortonworks.spark.atlas

import java.net.URI

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogStorageFormat, CatalogTable, CatalogTableType}
import org.apache.spark.sql.types.StructType
import com.hortonworks.spark.atlas.utils.SparkUtils
import org.apache.atlas.model.instance.AtlasObjectId

object TestUtils {
  def createDB(name: String, location: String): CatalogDatabase = {
    CatalogDatabase(name, "", new URI(location), Map.empty)
  }

  def createStorageFormat(
      locationUri: Option[URI] = None,
      inputFormat: Option[String] = None,
      outputFormat: Option[String] = None,
      serd: Option[String] = None,
      compressed: Boolean = false,
      properties: Map[String, String] = Map.empty): CatalogStorageFormat = {
    CatalogStorageFormat(locationUri, inputFormat, outputFormat, serd, compressed, properties)
  }

  def createTable(
      db: String,
      table: String,
      schema: StructType,
      storage: CatalogStorageFormat,
      isHiveTable: Boolean = false): CatalogTable = {
    CatalogTable(
      TableIdentifier(table, Some(db)),
      CatalogTableType.MANAGED,
      storage,
      schema,
      provider = if (isHiveTable) Some("hive") else None,
      bucketSpec = None,
      owner = SparkUtils.currUser())
  }

  def assertSubsetOf[T](set: Set[T], subset: Set[T]): Unit = {
    assert(subset.subsetOf(set), s"$subset is not a subset of $set")
  }

  def findEntity(
                  entities: Seq[SACAtlasReferenceable],
                  objId: AtlasObjectId): Option[SACAtlasReferenceable] = {
    entities.find(p => p.asObjectId == objId)
  }

  def findEntities(
                    entities: Seq[SACAtlasReferenceable],
                    objIds: Seq[AtlasObjectId]): Seq[SACAtlasReferenceable] = {
    entities.filter(p => objIds.contains(p.asObjectId))
  }
} 
Example 44
Source File: HiveExternalCatalogSuite.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive

import org.apache.hadoop.conf.Configuration

import org.apache.spark.SparkConf
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.execution.command.DDLUtils
import org.apache.spark.sql.types.StructType


class HiveExternalCatalogSuite extends ExternalCatalogSuite {

  private val externalCatalog: HiveExternalCatalog = {
    val catalog = new HiveExternalCatalog(new SparkConf, new Configuration)
    catalog.client.reset()
    catalog
  }

  protected override val utils: CatalogTestUtils = new CatalogTestUtils {
    override val tableInputFormat: String = "org.apache.hadoop.mapred.SequenceFileInputFormat"
    override val tableOutputFormat: String = "org.apache.hadoop.mapred.SequenceFileOutputFormat"
    override def newEmptyCatalog(): ExternalCatalog = externalCatalog
  }

  protected override def resetState(): Unit = {
    externalCatalog.client.reset()
  }

  import utils._

  test("list partitions by filter") {
    val catalog = newBasicCatalog()
    val selectedPartitions = catalog.listPartitionsByFilter("db2", "tbl2", Seq('a.int === 1))
    assert(selectedPartitions.length == 1)
    assert(selectedPartitions.head.spec == part1.spec)
  }

  test("SPARK-18647: do not put provider in table properties for Hive serde table") {
    val catalog = newBasicCatalog()
    val hiveTable = CatalogTable(
      identifier = TableIdentifier("hive_tbl", Some("db1")),
      tableType = CatalogTableType.MANAGED,
      storage = storageFormat,
      schema = new StructType().add("col1", "int").add("col2", "string"),
      provider = Some("hive"))
    catalog.createTable(hiveTable, ignoreIfExists = false)

    val rawTable = externalCatalog.client.getTable("db1", "hive_tbl")
    assert(!rawTable.properties.contains(HiveExternalCatalog.DATASOURCE_PROVIDER))
    assert(externalCatalog.getTable("db1", "hive_tbl").provider == Some(DDLUtils.HIVE_PROVIDER))
  }
} 
Example 45
Source File: MetastoreRelationSuite.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive

import org.apache.spark.sql.{QueryTest, Row}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.sql.types.{IntegerType, StructField, StructType}

class MetastoreRelationSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
  test("makeCopy and toJSON should work") {
    val table = CatalogTable(
      identifier = TableIdentifier("test", Some("db")),
      tableType = CatalogTableType.VIEW,
      storage = CatalogStorageFormat.empty,
      schema = StructType(StructField("a", IntegerType, true) :: Nil))
    val relation = MetastoreRelation("db", "test")(table, null)

    // No exception should be thrown
    relation.makeCopy(Array("db", "test"))
    // No exception should be thrown
    relation.toJSON
  }

  test("SPARK-17409: Do Not Optimize Query in CTAS (Hive Serde Table) More Than Once") {
    withTable("bar") {
      withTempView("foo") {
        sql("select 0 as id").createOrReplaceTempView("foo")
        // If we optimize the query in CTAS more than once, the following saveAsTable will fail
        // with the error: `GROUP BY position 0 is not in select list (valid range is [1, 1])`
        sql("CREATE TABLE bar AS SELECT * FROM foo group by id")
        checkAnswer(spark.table("bar"), Row(0) :: Nil)
        val tableMetadata = spark.sessionState.catalog.getTableMetadata(TableIdentifier("bar"))
        assert(tableMetadata.provider == Some("hive"), "the expected table is a Hive serde table")
      }
    }
  }
} 
Example 46
Source File: ListTablesSuite.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive

import org.scalatest.BeforeAndAfterAll

import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.hive.test.TestHiveSingleton

class ListTablesSuite extends QueryTest with TestHiveSingleton with BeforeAndAfterAll {
  import hiveContext._
  import hiveContext.implicits._

  val df = sparkContext.parallelize((1 to 10).map(i => (i, s"str$i"))).toDF("key", "value")

  override def beforeAll(): Unit = {
    super.beforeAll()
    // The catalog in HiveContext is a case insensitive one.
    sessionState.catalog.createTempView(
      "ListTablesSuiteTable", df.logicalPlan, overrideIfExists = true)
    sql("CREATE TABLE HiveListTablesSuiteTable (key int, value string)")
    sql("CREATE DATABASE IF NOT EXISTS ListTablesSuiteDB")
    sql("CREATE TABLE ListTablesSuiteDB.HiveInDBListTablesSuiteTable (key int, value string)")
  }

  override def afterAll(): Unit = {
    try {
      sessionState.catalog.dropTable(
        TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true, purge = false)
      sql("DROP TABLE IF EXISTS HiveListTablesSuiteTable")
      sql("DROP TABLE IF EXISTS ListTablesSuiteDB.HiveInDBListTablesSuiteTable")
      sql("DROP DATABASE IF EXISTS ListTablesSuiteDB")
    } finally {
      super.afterAll()
    }
  }

  test("get all tables of current database") {
    Seq(tables(), sql("SHOW TABLes")).foreach {
      case allTables =>
        // We are using default DB.
        checkAnswer(
          allTables.filter("tableName = 'listtablessuitetable'"),
          Row("", "listtablessuitetable", true))
        checkAnswer(
          allTables.filter("tableName = 'hivelisttablessuitetable'"),
          Row("default", "hivelisttablessuitetable", false))
        assert(allTables.filter("tableName = 'hiveindblisttablessuitetable'").count() === 0)
    }
  }

  test("getting all tables with a database name") {
    Seq(tables("listtablessuiteDb"), sql("SHOW TABLes in listTablesSuitedb")).foreach {
      case allTables =>
        checkAnswer(
          allTables.filter("tableName = 'listtablessuitetable'"),
          Row("", "listtablessuitetable", true))
        assert(allTables.filter("tableName = 'hivelisttablessuitetable'").count() === 0)
        checkAnswer(
          allTables.filter("tableName = 'hiveindblisttablessuitetable'"),
          Row("listtablessuitedb", "hiveindblisttablessuitetable", false))
    }
  }
} 
Example 47
Source File: TableIdentifierParserSuite.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.parser

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.TableIdentifier

class TableIdentifierParserSuite extends SparkFunSuite {
  import CatalystSqlParser._

  // Add "$elem$", "$value$" & "$key$"
  val hiveNonReservedKeyword = Array("add", "admin", "after", "analyze", "archive", "asc", "before",
    "bucket", "buckets", "cascade", "change", "cluster", "clustered", "clusterstatus", "collection",
    "columns", "comment", "compact", "compactions", "compute", "concatenate", "continue", "data",
    "day", "databases", "datetime", "dbproperties", "deferred", "defined", "delimited",
    "dependency", "desc", "directories", "directory", "disable", "distribute",
    "enable", "escaped", "exclusive", "explain", "export", "fields", "file", "fileformat", "first",
    "format", "formatted", "functions", "hold_ddltime", "hour", "idxproperties", "ignore", "index",
    "indexes", "inpath", "inputdriver", "inputformat", "items", "jar", "keys", "key_type", "last",
    "limit", "offset", "lines", "load", "location", "lock", "locks", "logical", "long", "mapjoin",
    "materialized", "metadata", "minus", "minute", "month", "msck", "noscan", "no_drop", "nulls",
    "offline", "option", "outputdriver", "outputformat", "overwrite", "owner", "partitioned",
    "partitions", "plus", "pretty", "principals", "protection", "purge", "read", "readonly",
    "rebuild", "recordreader", "recordwriter", "reload", "rename", "repair", "replace",
    "replication", "restrict", "rewrite", "role", "roles", "schemas", "second",
    "serde", "serdeproperties", "server", "sets", "shared", "show", "show_database", "skewed",
    "sort", "sorted", "ssl", "statistics", "stored", "streamtable", "string", "struct", "tables",
    "tblproperties", "temporary", "terminated", "tinyint", "touch", "transactions", "unarchive",
    "undo", "uniontype", "unlock", "unset", "unsigned", "uri", "use", "utc", "utctimestamp",
    "view", "while", "year", "work", "transaction", "write", "isolation", "level",
    "snapshot", "autocommit", "all", "alter", "array", "as", "authorization", "between", "bigint",
    "binary", "boolean", "both", "by", "create", "cube", "current_date", "current_timestamp",
    "cursor", "date", "decimal", "delete", "describe", "double", "drop", "exists", "external",
    "false", "fetch", "float", "for", "grant", "group", "grouping", "import", "in",
    "insert", "int", "into", "is", "lateral", "like", "local", "none", "null",
    "of", "order", "out", "outer", "partition", "percent", "procedure", "range", "reads", "revoke",
    "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger",
    "true", "truncate", "update", "user", "using", "values", "with", "regexp", "rlike",
    "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float",
    "int", "smallint", "timestamp", "at")

  val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right",
    "natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from",
    "where", "having", "from", "to", "table", "with", "not")

  test("table identifier") {
    // Regular names.
    assert(TableIdentifier("q") === parseTableIdentifier("q"))
    assert(TableIdentifier("q", Option("d")) === parseTableIdentifier("d.q"))

    // Illegal names.
    Seq("", "d.q.g", "t:", "${some.var.x}", "tab:1").foreach { identifier =>
      intercept[ParseException](parseTableIdentifier(identifier))
    }
  }

  test("quoted identifiers") {
    assert(TableIdentifier("z", Some("x.y")) === parseTableIdentifier("`x.y`.z"))
    assert(TableIdentifier("y.z", Some("x")) === parseTableIdentifier("x.`y.z`"))
    assert(TableIdentifier("z", Some("`x.y`")) === parseTableIdentifier("```x.y```.z"))
    assert(TableIdentifier("`y.z`", Some("x")) === parseTableIdentifier("x.```y.z```"))
    assert(TableIdentifier("x.y.z", None) === parseTableIdentifier("`x.y.z`"))
  }

  test("table identifier - strict keywords") {
    // SQL Keywords.
    hiveStrictNonReservedKeyword.foreach { keyword =>
      assert(TableIdentifier(keyword) === parseTableIdentifier(keyword))
      assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`"))
      assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`"))
    }
  }

  test("table identifier - non reserved keywords") {
    // Hive keywords are allowed.
    hiveNonReservedKeyword.foreach { nonReserved =>
      assert(TableIdentifier(nonReserved) === parseTableIdentifier(nonReserved))
    }
  }

  test("SPARK-17364 table identifier - contains number") {
    assert(parseTableIdentifier("123_") == TableIdentifier("123_"))
    assert(parseTableIdentifier("1a.123_") == TableIdentifier("123_", Some("1a")))
    // ".123" should not be treated as token of type DECIMAL_VALUE
    assert(parseTableIdentifier("a.123A") == TableIdentifier("123A", Some("a")))
    // ".123E3" should not be treated as token of type SCIENTIFIC_DECIMAL_VALUE
    assert(parseTableIdentifier("a.123E3_LIST") == TableIdentifier("123E3_LIST", Some("a")))
    // ".123D" should not be treated as token of type DOUBLE_LITERAL
    assert(parseTableIdentifier("a.123D_LIST") == TableIdentifier("123D_LIST", Some("a")))
    // ".123BD" should not be treated as token of type BIGDECIMAL_LITERAL
    assert(parseTableIdentifier("a.123BD_LIST") == TableIdentifier("123BD_LIST", Some("a")))
  }

  test("SPARK-17832 table identifier - contains backtick") {
    val complexName = TableIdentifier("`weird`table`name", Some("`d`b`1"))
    assert(complexName === parseTableIdentifier("```d``b``1`.```weird``table``name`"))
    assert(complexName === parseTableIdentifier(complexName.quotedString))
    intercept[ParseException](parseTableIdentifier(complexName.unquotedString))
    // Table identifier contains countious backticks should be treated correctly.
    val complexName2 = TableIdentifier("x``y", Some("d``b"))
    assert(complexName2 === parseTableIdentifier(complexName2.quotedString))
  }
} 
Example 48
Source File: ddl.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils}
import org.apache.spark.sql.catalyst.plans.QueryPlan
import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan}
import org.apache.spark.sql.execution.command.RunnableCommand
import org.apache.spark.sql.types._

case class CreateTable(
    tableDesc: CatalogTable,
    mode: SaveMode,
    query: Option[LogicalPlan]) extends Command {
  assert(tableDesc.provider.isDefined, "The table to be created must have a provider.")

  if (query.isEmpty) {
    assert(
      mode == SaveMode.ErrorIfExists || mode == SaveMode.Ignore,
      "create table without data insertion can only use ErrorIfExists or Ignore as SaveMode.")
  }

  override def innerChildren: Seq[QueryPlan[_]] = query.toSeq
}


case class CreateTempViewUsing(
    tableIdent: TableIdentifier,
    userSpecifiedSchema: Option[StructType],
    replace: Boolean,
    global: Boolean,
    provider: String,
    options: Map[String, String]) extends RunnableCommand {

  if (tableIdent.database.isDefined) {
    throw new AnalysisException(
      s"Temporary view '$tableIdent' should not have specified a database")
  }

  override def argString: String = {
    s"[tableIdent:$tableIdent " +
      userSpecifiedSchema.map(_ + " ").getOrElse("") +
      s"replace:$replace " +
      s"provider:$provider " +
      CatalogUtils.maskCredentials(options)
  }

  def run(sparkSession: SparkSession): Seq[Row] = {
    val dataSource = DataSource(
      sparkSession,
      userSpecifiedSchema = userSpecifiedSchema,
      className = provider,
      options = options)

    val catalog = sparkSession.sessionState.catalog
    val viewDefinition = Dataset.ofRows(
      sparkSession, LogicalRelation(dataSource.resolveRelation())).logicalPlan

    if (global) {
      catalog.createGlobalTempView(tableIdent.table, viewDefinition, replace)
    } else {
      catalog.createTempView(tableIdent.table, viewDefinition, replace)
    }

    Seq.empty[Row]
  }
}

case class RefreshTable(tableIdent: TableIdentifier)
  extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    // Refresh the given table's metadata. If this table is cached as an InMemoryRelation,
    // drop the original cached version and make the new version cached lazily.
    sparkSession.catalog.refreshTable(tableIdent.quotedString)
    Seq.empty[Row]
  }
}

case class RefreshResource(path: String)
  extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    sparkSession.catalog.refreshByPath(path)
    Seq.empty[Row]
  }
} 
Example 49
Source File: cache.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command

import org.apache.spark.sql.{Dataset, Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
import org.apache.spark.sql.catalyst.plans.QueryPlan
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan

case class CacheTableCommand(
    tableIdent: TableIdentifier,
    plan: Option[LogicalPlan],
    isLazy: Boolean) extends RunnableCommand {
  require(plan.isEmpty || tableIdent.database.isEmpty,
    "Database name is not allowed in CACHE TABLE AS SELECT")

  override protected def innerChildren: Seq[QueryPlan[_]] = {
    plan.toSeq
  }

  override def run(sparkSession: SparkSession): Seq[Row] = {
    plan.foreach { logicalPlan =>
      Dataset.ofRows(sparkSession, logicalPlan).createTempView(tableIdent.quotedString)
    }
    sparkSession.catalog.cacheTable(tableIdent.quotedString)

    if (!isLazy) {
      // Performs eager caching
      sparkSession.table(tableIdent).count()
    }

    Seq.empty[Row]
  }
}


case class UncacheTableCommand(
    tableIdent: TableIdentifier,
    ifExists: Boolean) extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    val tableId = tableIdent.quotedString
    try {
      sparkSession.catalog.uncacheTable(tableId)
    } catch {
      case _: NoSuchTableException if ifExists => // don't throw
    }
    Seq.empty[Row]
  }
}


case object ClearCacheCommand extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    sparkSession.catalog.clearCache()
    Seq.empty[Row]
  }
} 
Example 50
Source File: DropCacheSIEventListener.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.secondaryindex.events

import scala.collection.JavaConverters._

import org.apache.log4j.Logger
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.command.cache.CarbonDropCacheCommand
import org.apache.spark.sql.index.CarbonIndexUtil

import org.apache.carbondata.common.logging.LogServiceFactory
import org.apache.carbondata.core.metadata.index.IndexType
import org.apache.carbondata.events.{DropTableCacheEvent, Event, OperationContext, OperationEventListener}

object DropCacheSIEventListener extends OperationEventListener {

  val LOGGER: Logger = LogServiceFactory.getLogService(this.getClass.getCanonicalName)

  override protected def onEvent(event: Event, operationContext: OperationContext): Unit = {
    event match {
      case dropCacheEvent: DropTableCacheEvent =>
        val carbonTable = dropCacheEvent.carbonTable
        val sparkSession = dropCacheEvent.sparkSession
        val internalCall = dropCacheEvent.internalCall
        if (carbonTable.isIndexTable && !internalCall) {
          throw new UnsupportedOperationException("Operation not allowed on child table.")
        }

        val allIndexTables = carbonTable.getIndexTableNames(
          IndexType.SI.getIndexProviderName)
        val dbName = carbonTable.getDatabaseName
        for (indexTableName <- allIndexTables.asScala) {
          try {
            val dropCacheCommandForChildTable =
              CarbonDropCacheCommand(
                TableIdentifier(indexTableName, Some(dbName)),
                internalCall = true)
            dropCacheCommandForChildTable.processMetadata(sparkSession)
          }
          catch {
            case e: Exception =>
              LOGGER.error(s"Clean cache for SI table $indexTableName failed. ", e)
          }
        }

    }
  }
} 
Example 51
Source File: StreamingTableStrategy.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.strategy

import org.apache.spark.sql.{CarbonEnv, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.{SparkPlan, SparkStrategy}
import org.apache.spark.sql.execution.command.{AlterTableAddColumnsCommand, AlterTableChangeColumnCommand, AlterTableRenameCommand}
import org.apache.spark.sql.execution.command.mutation.{CarbonProjectForDeleteCommand, CarbonProjectForUpdateCommand}
import org.apache.spark.sql.execution.command.schema.{CarbonAlterTableAddColumnCommand, CarbonAlterTableColRenameDataTypeChangeCommand, CarbonAlterTableDropColumnCommand}

import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException


  private def rejectIfStreamingTable(tableIdentifier: TableIdentifier, operation: String): Unit = {
    var streaming = false
    try {
      streaming = CarbonEnv.getCarbonTable(
        tableIdentifier.database, tableIdentifier.table)(sparkSession)
        .isStreamingSink
    } catch {
      case e: Exception =>
        streaming = false
    }
    if (streaming) {
      throw new MalformedCarbonCommandException(
        s"$operation is not allowed for streaming table")
    }
  }

  def isCarbonTable(tableIdent: TableIdentifier): Boolean = {
    CarbonPlanHelper.isCarbonTable(tableIdent, sparkSession)
  }
} 
Example 52
Source File: CarbonShowStreamsCommand.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command.stream

import java.util.Date
import java.util.concurrent.TimeUnit

import org.apache.spark.sql.{CarbonEnv, Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.execution.command.MetadataCommand
import org.apache.spark.sql.types.StringType

import org.apache.carbondata.stream.StreamJobManager


case class CarbonShowStreamsCommand(
    tableOp: Option[TableIdentifier]
) extends MetadataCommand {
  override def output: Seq[Attribute] = {
    Seq(AttributeReference("Stream Name", StringType, nullable = false)(),
      AttributeReference("JobId", StringType, nullable = false)(),
      AttributeReference("Status", StringType, nullable = false)(),
      AttributeReference("Source", StringType, nullable = false)(),
      AttributeReference("Sink", StringType, nullable = false)(),
      AttributeReference("Start Time", StringType, nullable = false)(),
      AttributeReference("Time Elapse", StringType, nullable = false)())
  }

  override def processMetadata(sparkSession: SparkSession): Seq[Row] = {
    val jobs = tableOp match {
      case None => StreamJobManager.getAllJobs.toSeq
      case Some(table) =>
        val carbonTable = CarbonEnv.getCarbonTable(table.database, table.table)(sparkSession)
        setAuditTable(carbonTable)
        StreamJobManager.getAllJobs.filter { job =>
          job.sinkTable.equalsIgnoreCase(carbonTable.getTableName) &&
          job.sinkDb.equalsIgnoreCase(carbonTable.getDatabaseName)
        }.toSeq
    }

    jobs.map { job =>
      val elapsedTime = System.currentTimeMillis() - job.startTime
      Row(
        job.streamName,
        job.streamingQuery.id.toString,
        if (job.streamingQuery.isActive) "RUNNING" else "FAILED",
        s"${ job.sourceDb }.${ job.sourceTable }",
        s"${ job.sinkDb }.${ job.sinkTable }",
        new Date(job.startTime).toString,
        String.format(
          "%s days, %s hours, %s min, %s sec",
          TimeUnit.MILLISECONDS.toDays(elapsedTime).toString,
          TimeUnit.MILLISECONDS.toHours(elapsedTime).toString,
          TimeUnit.MILLISECONDS.toMinutes(elapsedTime).toString,
          TimeUnit.MILLISECONDS.toSeconds(elapsedTime).toString)
      )
    }
  }

  override protected def opName: String = "SHOW STREAMS"
} 
Example 53
Source File: CarbonDropMVCommand.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command.view

import org.apache.log4j.Logger
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.command.AtomicRunnableCommand
import org.apache.spark.sql.execution.command.table.CarbonDropTableCommand

import org.apache.carbondata.common.logging.LogServiceFactory
import org.apache.carbondata.core.datastore.impl.FileFactory
import org.apache.carbondata.core.util.CarbonProperties
import org.apache.carbondata.events.{OperationContext, OperationListenerBus}
import org.apache.carbondata.view.{MVCatalogInSpark, MVManagerInSpark, UpdateMVPostExecutionEvent, UpdateMVPreExecutionEvent}


case class CarbonDropMVCommand(
    databaseNameOption: Option[String],
    name: String,
    ifExistsSet: Boolean,
    forceDrop: Boolean = false)
  extends AtomicRunnableCommand {

  private val logger = CarbonDropMVCommand.LOGGER

  private var dropTableCommand: CarbonDropTableCommand = _

  override def processMetadata(session: SparkSession): Seq[Row] = {
    setAuditInfo(Map("mvName" -> name))
    val viewManager = MVManagerInSpark.get(session)
    try {
      logger.info("Trying to drop materialized view schema")
      val databaseName =
        databaseNameOption.getOrElse(session.sessionState.catalog.getCurrentDatabase)
      val schema = viewManager.getSchema(databaseName, name)
      if (schema != null) {
        // Drop mv status.
        val databaseLocation = viewManager.getDatabaseLocation(databaseName)
        val systemDirectoryPath = CarbonProperties.getInstance()
          .getSystemFolderLocationPerDatabase(FileFactory
            .getCarbonFile(databaseLocation)
            .getCanonicalPath)
        val identifier = TableIdentifier(name, Option(databaseName))
        val operationContext = new OperationContext()
        OperationListenerBus.getInstance().fireEvent(
          UpdateMVPreExecutionEvent(session, systemDirectoryPath, identifier),
          operationContext)
        viewManager.onDrop(databaseName, name)
        OperationListenerBus.getInstance().fireEvent(
          UpdateMVPostExecutionEvent(session, systemDirectoryPath, identifier),
          operationContext)

        // Drop mv table.
        val dropTableCommand = CarbonDropTableCommand(
          ifExistsSet = true,
          Option(databaseName),
          name,
          dropChildTable = true,
          isInternalCall = true)
        dropTableCommand.processMetadata(session)

        // Drop mv schema.
        try {
          viewManager.deleteSchema(databaseName, name)
        } finally {
          val viewCatalog = viewManager.getCatalog()
            .asInstanceOf[MVCatalogInSpark]
          if (viewCatalog != null) {
            viewCatalog.deregisterSchema(schema.getIdentifier)
          }
        }

        this.dropTableCommand = dropTableCommand
      }
    } catch {
      case exception: Exception =>
        if (!ifExistsSet) {
          throw exception
        }
    }
    Seq.empty
  }

  override def processData(sparkSession: SparkSession): Seq[Row] = {
    // delete the table folder
    if (this.dropTableCommand != null) {
      this.dropTableCommand.processData(sparkSession)
    }
    Seq.empty
  }

  override protected def opName: String = "DROP MATERIALIZED VIEW"
}

object CarbonDropMVCommand {

  private val LOGGER: Logger = LogServiceFactory.getLogService(
    classOf[CarbonDropMVCommand].getCanonicalName)

} 
Example 54
Source File: CarbonRefreshMVCommand.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command.view

import org.apache.spark.sql.{CarbonEnv, Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.command.DataCommand

import org.apache.carbondata.common.exceptions.sql.{MalformedMVCommandException, NoSuchMVException}
import org.apache.carbondata.core.view.MVStatus
import org.apache.carbondata.events.{OperationContext, OperationListenerBus}
import org.apache.carbondata.view.{MVManagerInSpark, MVRefresher, RefreshMVPostExecutionEvent, RefreshMVPreExecutionEvent}


case class CarbonRefreshMVCommand(
    databaseNameOption: Option[String],
    mvName: String) extends DataCommand {

  override def processData(session: SparkSession): Seq[Row] = {
    val databaseName =
      databaseNameOption.getOrElse(session.sessionState.catalog.getCurrentDatabase)
    val viewManager = MVManagerInSpark.get(session)
    val schema = try {
      viewManager.getSchema(databaseName, mvName)
    } catch {
      case _: NoSuchMVException =>
        throw new MalformedMVCommandException(
          s"Materialized view ${ databaseName }.${ mvName } does not exist")
    }
    val table = CarbonEnv.getCarbonTable(Option(databaseName), mvName)(session)
    setAuditTable(table)

    MVRefresher.refresh(schema, session)

    // After rebuild successfully enable the MV table.
    val identifier = TableIdentifier(mvName, Option(databaseName))
    val operationContext = new OperationContext()
    OperationListenerBus.getInstance().fireEvent(
      RefreshMVPreExecutionEvent(session, identifier),
      operationContext)
    viewManager.setStatus(schema.getIdentifier, MVStatus.ENABLED)
    OperationListenerBus.getInstance().fireEvent(
      RefreshMVPostExecutionEvent(session, identifier),
      operationContext)
    Seq.empty
  }

  override protected def opName: String = "REFRESH MATERIALIZED VIEW"
} 
Example 55
Source File: CarbonShowMVCommand.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command.view

import java.util

import scala.collection.JavaConverters._

import org.apache.spark.sql.{CarbonEnv, Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.execution.command.{Checker, DataCommand}
import org.apache.spark.sql.types.{BooleanType, StringType}

import org.apache.carbondata.core.view.{MVProperty, MVSchema}
import org.apache.carbondata.view.MVManagerInSpark


case class CarbonShowMVCommand(
    databaseNameOption: Option[String],
    relatedTableIdentifier: Option[TableIdentifier]) extends DataCommand {

  override def output: Seq[Attribute] = {
    Seq(
      AttributeReference("Database", StringType, nullable = false)(),
      AttributeReference("Name", StringType, nullable = false)(),
      AttributeReference("Status", StringType, nullable = false)(),
      AttributeReference("Refresh Mode", StringType, nullable = false)(),
      AttributeReference("Refresh Trigger Mode", StringType, nullable = false)(),
      AttributeReference("Properties", StringType, nullable = false)())
  }

  override def processData(session: SparkSession): Seq[Row] = {
    // Get mv schemas.
    val schemaList = new util.ArrayList[MVSchema]()
    val viewManager = MVManagerInSpark.get(session)
    relatedTableIdentifier match {
      case Some(table) =>
        val relatedTable = CarbonEnv.getCarbonTable(table)(session)
        setAuditTable(relatedTable)
        Checker.validateTableExists(table.database, table.table, session)
        if (databaseNameOption.isDefined) {
          schemaList.addAll(viewManager.getSchemasOnTable(
            databaseNameOption.get,
            relatedTable))
        } else {
          schemaList.addAll(viewManager.getSchemasOnTable(relatedTable))
        }
      case _ =>
        if (databaseNameOption.isDefined) {
          schemaList.addAll(viewManager.getSchemas(databaseNameOption.get))
        } else {
          schemaList.addAll(viewManager.getSchemas())
        }
    }
    // Convert mv schema to row.
    schemaList.asScala.map {
      schema =>
        Row(
          schema.getIdentifier.getDatabaseName,
          schema.getIdentifier.getTableName,
          schema.getStatus.name(),
          schema.getProperties.get(MVProperty.REFRESH_MODE),
          schema.getProperties.get(MVProperty.REFRESH_TRIGGER_MODE),
          schema.getPropertiesAsString
        )
    }
  }

  override protected def opName: String = "SHOW MATERIALIZED VIEW"
} 
Example 56
Source File: CarbonDropCacheCommand.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command.cache

import scala.collection.JavaConverters._

import org.apache.spark.sql.{CarbonEnv, Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.command.MetadataCommand

import org.apache.carbondata.common.logging.LogServiceFactory
import org.apache.carbondata.core.cache.CacheProvider
import org.apache.carbondata.core.index.{IndexStoreManager, IndexUtil}
import org.apache.carbondata.core.metadata.schema.table.CarbonTable
import org.apache.carbondata.core.util.CarbonProperties
import org.apache.carbondata.events.{DropTableCacheEvent, OperationContext, OperationListenerBus}
import org.apache.carbondata.view.MVManagerInSpark

case class CarbonDropCacheCommand(tableIdentifier: TableIdentifier, internalCall: Boolean = false)
  extends MetadataCommand {

  val LOGGER = LogServiceFactory.getLogService(this.getClass.getCanonicalName)

  override def processMetadata(sparkSession: SparkSession): Seq[Row] = {
    val carbonTable = CarbonEnv.getCarbonTable(tableIdentifier)(sparkSession)
    clearCache(carbonTable, sparkSession)
    Seq.empty
  }

  def clearCache(carbonTable: CarbonTable, sparkSession: SparkSession): Unit = {
    LOGGER.info("Drop cache request received for table " + carbonTable.getTableUniqueName)

    val dropCacheEvent = DropTableCacheEvent(carbonTable, sparkSession, internalCall)
    val operationContext = new OperationContext
    OperationListenerBus.getInstance.fireEvent(dropCacheEvent, operationContext)

    val cache = CacheProvider.getInstance().getCarbonCache
    // Clea cache from IndexServer
    if (CarbonProperties.getInstance().isDistributedPruningEnabled(carbonTable.getDatabaseName,
      carbonTable.getTableName)) {
      LOGGER.info("Clearing cache from IndexServer")
      IndexUtil.executeClearIndexJob(carbonTable, IndexUtil.DISTRIBUTED_JOB_NAME)
    }
    if (cache != null) {
      LOGGER.info("Clearing cache from driver side")
      IndexStoreManager.getInstance().clearIndex(carbonTable.getAbsoluteTableIdentifier)
    }
    val viewManager = MVManagerInSpark.get(sparkSession)
    val viewsOnTable = viewManager.getSchemasOnTable(carbonTable)
    if (!viewsOnTable.isEmpty) {
      viewsOnTable.asScala.foreach {
        viewSchema =>
          val viewIdentifier = new TableIdentifier(
            viewSchema.getIdentifier.getTableName,
            Option(viewSchema.getIdentifier.getDatabaseName)
          )
          CarbonDropCacheCommand(viewIdentifier, internalCall = true).run(sparkSession)
      }
    }
    LOGGER.info("Drop cache request served for table " + carbonTable.getTableUniqueName)
  }

  override protected def opName: String = "DROP METACACHE"
} 
Example 57
Source File: CarbonShowTablesCommand.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command.table

import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.execution.command.MetadataCommand
import org.apache.spark.sql.types.{BooleanType, StringType}


private[sql] case class CarbonShowTablesCommand ( databaseName: Option[String],
    tableIdentifierPattern: Option[String])  extends MetadataCommand{

  // The result of SHOW TABLES has three columns: database, tableName and isTemporary.
  override val output: Seq[Attribute] = {
    AttributeReference("database", StringType, nullable = false)() ::
    AttributeReference("tableName", StringType, nullable = false)() ::
    AttributeReference("isTemporary", BooleanType, nullable = false)() :: Nil
  }

  override def processMetadata(sparkSession: SparkSession): Seq[Row] = {
    // Since we need to return a Seq of rows, we will call getTables directly
    // instead of calling tables in sparkSession.
    val catalog = sparkSession.sessionState.catalog
    val db = databaseName.getOrElse(catalog.getCurrentDatabase)
    val tables =
      tableIdentifierPattern.map(catalog.listTables(db, _)).getOrElse(catalog.listTables(db))
    val externalCatalog = sparkSession.sharedState.externalCatalog
    // this method checks whether the table is mainTable or MV based on property "isVisible"
    def isMainTable(tableIdent: TableIdentifier) = {
      var isMainTable = true
      try {
        isMainTable = externalCatalog.getTable(db, tableIdent.table).storage.properties
          .getOrElse("isVisible", true).toString.toBoolean
      } catch {
        case ex: Throwable =>
        // ignore the exception for show tables
      }
      isMainTable
    }
    // tables will be filtered for all the MVs to show only main tables
    tables.collect {
      case tableIdent if isMainTable(tableIdent) =>
        val isTemp = catalog.isTemporaryTable(tableIdent)
        Row(tableIdent.database.getOrElse("default"), tableIdent.table, isTemp)
    }

  }

  override protected def opName: String = "SHOW TABLES"
} 
Example 58
Source File: CarbonCreateTableAsSelectCommand.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command.table

import org.apache.spark.sql.{CarbonEnv, Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.command.AtomicRunnableCommand
import org.apache.spark.sql.execution.command.management.{CarbonInsertIntoCommand}

import org.apache.carbondata.core.metadata.schema.table.TableInfo


case class CarbonCreateTableAsSelectCommand(
    tableInfo: TableInfo,
    query: LogicalPlan,
    ifNotExistsSet: Boolean = false,
    tableLocation: Option[String] = None) extends AtomicRunnableCommand {

  var insertIntoCommand: CarbonInsertIntoCommand = _

  override def processMetadata(sparkSession: SparkSession): Seq[Row] = {
    val tableName = tableInfo.getFactTable.getTableName
    var isTableCreated = false
    var databaseOpt: Option[String] = None
    if (tableInfo.getDatabaseName != null) {
      databaseOpt = Some(tableInfo.getDatabaseName)
    }
    val dbName = CarbonEnv.getDatabaseName(databaseOpt)(sparkSession)
    setAuditTable(dbName, tableName)
    setAuditInfo(Map("query" -> query.simpleString))
    // check if table already exists
    if (sparkSession.sessionState.catalog
      .tableExists(TableIdentifier(tableName, Some(dbName)))) {
      if (!ifNotExistsSet) {
        throw new TableAlreadyExistsException(dbName, tableName)
      }
    } else {
      // execute command to create carbon table
      CarbonCreateTableCommand(tableInfo, ifNotExistsSet, tableLocation).run(sparkSession)
      isTableCreated = true
    }

    if (isTableCreated) {
      val tableName = tableInfo.getFactTable.getTableName
      var databaseOpt: Option[String] = None
      if (tableInfo.getDatabaseName != null) {
        databaseOpt = Some(tableInfo.getDatabaseName)
      }
      val dbName = CarbonEnv.getDatabaseName(databaseOpt)(sparkSession)
      val carbonDataSourceHadoopRelation = CarbonEnv.getInstance(sparkSession).carbonMetaStore
        .createCarbonDataSourceHadoopRelation(sparkSession,
          TableIdentifier(tableName, Option(dbName)))
      // execute command to load data into carbon table
      insertIntoCommand = CarbonInsertIntoCommand(
        databaseNameOp = Some(carbonDataSourceHadoopRelation.carbonRelation.databaseName),
        tableName = carbonDataSourceHadoopRelation.carbonRelation.tableName,
        options = scala.collection.immutable
          .Map("fileheader" ->
               carbonDataSourceHadoopRelation.tableSchema.get.fields.map(_.name).mkString(",")),
        isOverwriteTable = false,
        logicalPlan = query,
        tableInfo = tableInfo)
      insertIntoCommand.processMetadata(sparkSession)
    }
    Seq.empty
  }

  override def processData(sparkSession: SparkSession): Seq[Row] = {
    if (null != insertIntoCommand) {
      insertIntoCommand.processData(sparkSession)
    }
    Seq.empty
  }

  override def undoMetadata(sparkSession: SparkSession, exception: Exception): Seq[Row] = {
    val tableName = tableInfo.getFactTable.getTableName
    var databaseOpt: Option[String] = None
    if (tableInfo.getDatabaseName != null) {
      databaseOpt = Some(tableInfo.getDatabaseName)
    }
    val dbName = CarbonEnv.getDatabaseName(databaseOpt)(sparkSession)
    // drop the created table.
    CarbonDropTableCommand(
      ifExistsSet = false,
      Option(dbName), tableName).run(sparkSession)
    Seq.empty
  }

  override protected def opName: String = "CREATE TABLE AS SELECT"
} 
Example 59
Source File: CarbonCreateTableLikeCommand.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command.table

import java.util
import java.util.UUID

import org.apache.spark.sql.{CarbonEnv, Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.command.MetadataCommand

import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
import org.apache.carbondata.common.logging.LogServiceFactory
import org.apache.carbondata.core.metadata.schema.{SchemaEvolution, SchemaEvolutionEntry}
import org.apache.carbondata.core.metadata.schema.table.{TableInfo, TableSchema}


case class CarbonCreateTableLikeCommand(
    sourceTable: TableIdentifier,
    targetTable: TableIdentifier,
    ifNotExists: Boolean = false) extends MetadataCommand {

  private val LOGGER = LogServiceFactory.getLogService(this.getClass.getName)

  override def processMetadata(sparkSession: SparkSession): Seq[Row] = {
    val srcTable = CarbonEnv.getCarbonTable(sourceTable.database, sourceTable.table)(sparkSession)
    if (!srcTable.isTransactionalTable) {
      throw new MalformedCarbonCommandException("Unsupported operation on non transactional table")
    }
    if (srcTable.isMV) {
      throw new MalformedCarbonCommandException("Unsupported operation on child table or MV")
    }

    // copy schema of source table and update fields to target table
    val dstTableSchema = srcTable.getTableInfo.getFactTable.clone().asInstanceOf[TableSchema]
    // remove index information in source table tblProperties
    dstTableSchema.getTableProperties.remove(srcTable.getTableId)
    dstTableSchema.setTableName(targetTable.table)
    dstTableSchema.setTableId(UUID.randomUUID().toString)

    val schemaEvol: SchemaEvolution = new SchemaEvolution
    val schEntryList: util.List[SchemaEvolutionEntry] = new util.ArrayList[SchemaEvolutionEntry]
    schemaEvol.setSchemaEvolutionEntryList(schEntryList)
    dstTableSchema.setSchemaEvolution(schemaEvol)

    // build table info for creating table
    val dstTableInfo = new TableInfo
    val dstDB = targetTable.database.getOrElse(sparkSession.catalog.currentDatabase)
    dstTableInfo.setDatabaseName(dstDB)
    dstTableInfo.setLastUpdatedTime(System.currentTimeMillis())
    dstTableInfo.setFactTable(dstTableSchema)

    CarbonCreateTableCommand(dstTableInfo, ifNotExists).run(sparkSession)
  }

  override protected def opName: String = "CREATE TABLE LIKE"
} 
Example 60
Source File: CarbonAlterTableSetCommand.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command.schema

import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.command._
import org.apache.spark.util.AlterTableUtil

private[sql] case class CarbonAlterTableSetCommand(
    tableIdentifier: TableIdentifier,
    properties: Map[String, String],
    isView: Boolean)
  extends MetadataCommand {

  override def processMetadata(sparkSession: SparkSession): Seq[Row] = {
    setAuditTable(tableIdentifier.database.getOrElse(sparkSession.catalog.currentDatabase),
      tableIdentifier.table)
    AlterTableUtil.modifyTableProperties(
      tableIdentifier,
      properties,
      Nil,
      set = true)(sparkSession,
      sparkSession.sessionState.catalog)
    setAuditInfo(properties)
    Seq.empty
  }

  override protected def opName: String = "ALTER TABLE SET"
} 
Example 61
Source File: CarbonAlterTableUnsetCommand.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command.schema

import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.command._
import org.apache.spark.util.AlterTableUtil


private[sql] case class CarbonAlterTableUnsetCommand(
    tableIdentifier: TableIdentifier,
    propKeys: Seq[String],
    ifExists: Boolean,
    isView: Boolean)
  extends MetadataCommand {

  override def processMetadata(sparkSession: SparkSession): Seq[Row] = {
    setAuditTable(tableIdentifier.database.getOrElse(sparkSession.catalog.currentDatabase),
      tableIdentifier.table)
    AlterTableUtil.modifyTableProperties(tableIdentifier, Map.empty[String, String],
      propKeys, false)(sparkSession,
      sparkSession.sessionState.catalog)
    setAuditInfo(Map("unset" -> propKeys.mkString(", ")))
    Seq.empty
  }

  override protected def opName: String = "ALTER TABLE UNSET"
} 
Example 62
Source File: DropCacheEventListeners.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.listeners

import scala.collection.JavaConverters._
import scala.collection.mutable

import org.apache.log4j.Logger
import org.apache.spark.sql.{CarbonEnv, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.command.cache.{CacheUtil, CarbonDropCacheCommand}

import org.apache.carbondata.common.logging.LogServiceFactory
import org.apache.carbondata.core.cache.CacheProvider
import org.apache.carbondata.core.metadata.index.IndexType
import org.apache.carbondata.core.metadata.schema.table.IndexSchema
import org.apache.carbondata.core.view.MVSchema
import org.apache.carbondata.events.{DropTableCacheEvent, Event, OperationContext, OperationEventListener}
import org.apache.carbondata.view.MVManagerInSpark

object DropCacheMVEventListener extends OperationEventListener {

  val LOGGER: Logger = LogServiceFactory.getLogService(this.getClass.getCanonicalName)

  
  override protected def onEvent(event: Event, operationContext: OperationContext): Unit = {
    event match {
      case dropCacheEvent: DropTableCacheEvent =>
        val carbonTable = dropCacheEvent.carbonTable
        val cache = CacheProvider.getInstance().getCarbonCache
        val indexProviderMap = carbonTable.getIndexesMap
        val bloomIndexProvider = IndexType.BLOOMFILTER.getIndexProviderName
        if (!indexProviderMap.isEmpty && null != indexProviderMap.get(bloomIndexProvider)) {
          val bloomIndexes = indexProviderMap.get(bloomIndexProvider)
          val bloomIndexIterator = bloomIndexes.entrySet().iterator()
          while (bloomIndexIterator.hasNext) {
            val bloomIndexEntry = bloomIndexIterator.next()
            val index = new IndexSchema(bloomIndexEntry.getKey, bloomIndexProvider)
            index.setProperties(bloomIndexEntry.getValue)
            try {
              // Get index keys
              val indexKeys = CacheUtil.getBloomCacheKeys(carbonTable, index)

              // remove index keys from cache
              cache.removeAll(indexKeys.asJava)
            } catch {
              case e: Exception =>
                LOGGER.warn(
                  s"Clean cache for Bloom index ${ index.getIndexName } failed.", e)
            }
          }
        }
    }
  }
} 
Example 63
Source File: CarbonExpressions.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Expression, ScalaUDF}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
import org.apache.spark.sql.execution.command.DescribeTableCommand
import org.apache.spark.sql.types.DataType


  object CarbonScalaUDF {
    def unapply(expression: Expression): Option[(ScalaUDF)] = {
      expression match {
        case a: ScalaUDF =>
          Some(a)
        case _ =>
          None
      }
    }
  }
} 
Example 64
Source File: TableLoader.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import java.util.Properties

import scala.collection.{immutable, mutable}

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.command.management.CarbonLoadDataCommand

import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.datastore.impl.FileFactory
import org.apache.carbondata.core.util.CarbonProperties


// scalastyle:off
object TableLoader {

  def extractOptions(propertiesFile: String): immutable.Map[String, String] = {
    val props = new Properties
    val path = new Path(propertiesFile)
    val fs = path.getFileSystem(FileFactory.getConfiguration)
    props.load(fs.open(path))
    val elments = props.entrySet().iterator()
    val map = new mutable.HashMap[String, String]()
    System.out.println("properties file:")
    while (elments.hasNext) {
      val elment = elments.next()
      System.out.println(s"${elment.getKey}=${elment.getValue}")
      map.put(elment.getKey.asInstanceOf[String], elment.getValue.asInstanceOf[String])
    }

    immutable.Map(map.toSeq: _*)
  }

  def extractStorePath(map: immutable.Map[String, String]): String = {
    map.get(CarbonCommonConstants.STORE_LOCATION) match {
      case Some(path) => path
      case None => throw new Exception(s"${CarbonCommonConstants.STORE_LOCATION} can't be empty")
    }
  }

  def loadTable(spark: SparkSession, dbName: Option[String], tableName: String, inputPaths: String,
      options: scala.collection.immutable.Map[String, String]): Unit = {
    CarbonLoadDataCommand(dbName, tableName, inputPaths, Nil, options, false).run(spark)
  }

  def main(args: Array[String]): Unit = {
    if (args.length < 3) {
      System.err.println("Usage: TableLoader <properties file> <table name> <input files>")
      System.exit(1)
    }
    System.out.println("parameter list:")
    args.foreach(System.out.println)
    val map = extractOptions(TableAPIUtil.escape(args(0)))
    val storePath = extractStorePath(map)
    System.out.println(s"${CarbonCommonConstants.STORE_LOCATION}:$storePath")
    val (dbName, tableName) = TableAPIUtil.parseSchemaName(TableAPIUtil.escape(args(1)))
    System.out.println(s"table name: $dbName.$tableName")
    val inputPaths = TableAPIUtil.escape(args(2))

    val spark = TableAPIUtil.spark(storePath, s"TableLoader: $dbName.$tableName")

    loadTable(spark, Option(dbName), tableName, inputPaths, map)
  }

} 
Example 65
Source File: CleanFiles.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import org.apache.spark.sql.{CarbonEnv, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier

import org.apache.carbondata.api.CarbonStore


  def cleanFiles(spark: SparkSession, dbName: String, tableName: String,
     forceTableClean: Boolean = false): Unit = {
    TableAPIUtil.validateTableExists(spark, dbName, tableName)
    val tablePath = CarbonEnv.getTablePath(Some(dbName), tableName)(spark)
    val carbonTable = if (!forceTableClean) {
      CarbonEnv.getCarbonTable(Some(dbName), tableName)(spark)
    } else {
      null
    }
    CarbonStore.cleanFiles(
      dbName = dbName,
      tableName = tableName,
      tablePath = tablePath,
      carbonTable = carbonTable,
      forceTableClean = forceTableClean)
  }

  def main(args: Array[String]): Unit = {

    if (args.length < 2) {
      System.err.println("Usage: CleanFiles <store path> <table name>")
      System.exit(1)
    }

    val storePath = TableAPIUtil.escape(args(0))
    val (dbName, tableName) = TableAPIUtil.parseSchemaName(TableAPIUtil.escape(args(1)))
    var forceTableClean = false
    if (args.length > 2) {
      forceTableClean = args(2).toBoolean
    }
    val spark = TableAPIUtil.spark(storePath, s"CleanFiles: $dbName.$tableName")
    cleanFiles(spark, dbName, tableName, forceTableClean)
  }
} 
Example 66
Source File: DeleteSegmentById.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import org.apache.spark.sql.{CarbonEnv, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier

import org.apache.carbondata.api.CarbonStore


 // scalastyle:off
object DeleteSegmentById {

  def extractSegmentIds(segmentIds: String): Seq[String] = {
    segmentIds.split(",").toSeq
  }

  def deleteSegmentById(spark: SparkSession, dbName: String, tableName: String,
      segmentIds: Seq[String]): Unit = {
    TableAPIUtil.validateTableExists(spark, dbName, tableName)
    val carbonTable = CarbonEnv.getCarbonTable(Some(dbName), tableName)(spark)
    CarbonStore.deleteLoadById(segmentIds, dbName, tableName, carbonTable)
  }

  def main(args: Array[String]): Unit = {

    if (args.length < 3) {
      System.err.println(
        "Usage: DeleteSegmentByID <store path> <table name> <segment id list>")
      System.exit(1)
    }

    val storePath = TableAPIUtil.escape(args(0))
    val (dbName, tableName) = TableAPIUtil.parseSchemaName(TableAPIUtil.escape(args(1)))
    val segmentIds = extractSegmentIds(TableAPIUtil.escape(args(2)))
    val spark = TableAPIUtil.spark(storePath, s"DeleteSegmentById: $dbName.$tableName")
    deleteSegmentById(spark, dbName, tableName, segmentIds)
  }
} 
Example 67
Source File: DeleteSegmentByDate.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import org.apache.spark.sql.{CarbonEnv, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier

import org.apache.carbondata.api.CarbonStore


// scalastyle:off
object DeleteSegmentByDate {

  def deleteSegmentByDate(spark: SparkSession, dbName: String, tableName: String,
      dateValue: String): Unit = {
    TableAPIUtil.validateTableExists(spark, dbName, tableName)
    val carbonTable = CarbonEnv.getCarbonTable(Some(dbName), tableName)(spark)
    CarbonStore.deleteLoadByDate(dateValue, dbName, tableName, carbonTable)
  }

  def main(args: Array[String]): Unit = {
    if (args.length < 3) {
      System.err.println(
        "Usage: DeleteSegmentByDate <store path> <table name> <before date value>")
      System.exit(1)
    }

    val storePath = TableAPIUtil.escape(args(0))
    val (dbName, tableName) = TableAPIUtil.parseSchemaName(TableAPIUtil.escape(args(1)))
    val dateValue = TableAPIUtil.escape(args(2))
    val spark = TableAPIUtil.spark(storePath, s"DeleteSegmentByDate: $dbName.$tableName")
    deleteSegmentByDate(spark, dbName, tableName, dateValue)
  }
} 
Example 68
Source File: Compaction.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.util

import org.apache.spark.sql.{CarbonEnv, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.command.AlterTableModel
import org.apache.spark.sql.execution.command.management.CarbonAlterTableCompactionCommand
import org.apache.spark.sql.util.CarbonException

import org.apache.carbondata.core.constants.CarbonCommonConstants


 // scalastyle:off
object Compaction {

  def compaction(spark: SparkSession, dbName: String, tableName: String,
      compactionType: String): Unit = {
    TableAPIUtil.validateTableExists(spark, dbName, tableName)
    if (compactionType.equalsIgnoreCase(CarbonCommonConstants.MAJOR) ||
        compactionType.equalsIgnoreCase(CarbonCommonConstants.MINOR)) {
      CarbonAlterTableCompactionCommand(AlterTableModel(Some(dbName),
        tableName,
        None,
        compactionType,
        Some(System.currentTimeMillis()),
        "")).run(spark)
    }
    else {
      CarbonException.analysisException("Compaction type is wrong. Please select minor or major.")
    }

  }

  def main(args: Array[String]): Unit = {
    if (args.length < 3) {
      System.err.println("Usage: Compaction <store path> <table name> <major|minor>")
      System.exit(1)
    }

    val storePath = TableAPIUtil.escape(args(0))
    val (dbName, tableName) = TableAPIUtil.parseSchemaName(TableAPIUtil.escape(args(1)))
    val compactionType = TableAPIUtil.escape(args(2))
    val spark = TableAPIUtil.spark(storePath, s"Compaction: $dbName.$tableName")
    compaction(spark, dbName, tableName, compactionType)
  }
} 
Example 69
Source File: CarbonDataSourceScan.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.strategy

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder}
import org.apache.spark.sql.catalyst.plans.physical.Partitioning
import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
import org.apache.spark.sql.execution.FileSourceScanExec
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}


class CarbonDataSourceScan(
    override val output: Seq[Attribute],
    val rdd: RDD[InternalRow],
    @transient override val relation: HadoopFsRelation,
    val partitioning: Partitioning,
    val md: Map[String, String],
    identifier: Option[TableIdentifier],
    @transient private val logicalRelation: LogicalRelation)
  extends FileSourceScanExec(
    relation,
    output,
    relation.dataSchema,
    Seq.empty,
    Seq.empty,
    identifier) {

  // added lazy since spark 2.3.2 version (SPARK-PR#21815)
  override lazy val supportsBatch: Boolean = true

  // added lazy since spark 2.3.2 version (SPARK-PR#21815)
  override lazy val (outputPartitioning, outputOrdering): (Partitioning, Seq[SortOrder]) =
    (partitioning, Nil)

  // added lazy since spark 2.3.2 version (SPARK-PR#21815)
  override lazy val metadata: Map[String, String] = md

  override def inputRDDs(): Seq[RDD[InternalRow]] = rdd :: Nil

} 
Example 70
Source File: SparkSqlAdapter.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
import org.apache.spark.sql.execution.FileSourceScanExec
import org.apache.spark.sql.execution.datasources.HadoopFsRelation
import org.apache.spark.sql.types.StructType

import org.apache.carbondata.core.util.ThreadLocalSessionInfo

object SparkSqlAdapter {

  def initSparkSQL(): Unit = {
  }

  def getScanForSegments(
      @transient relation: HadoopFsRelation,
      output: Seq[Attribute],
      outputSchema: StructType,
      partitionFilters: Seq[Expression],
      dataFilters: Seq[Expression],
      tableIdentifier: Option[TableIdentifier]
  ): FileSourceScanExec = {
    FileSourceScanExec(
      relation,
      output,
      outputSchema,
      partitionFilters,
      dataFilters,
      tableIdentifier)
  }

  def addSparkSessionListener(sparkSession: SparkSession): Unit = {
    sparkSession.sparkContext.addSparkListener(new SparkListener {
      override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = {
        CarbonEnv.carbonEnvMap.remove(sparkSession)
        ThreadLocalSessionInfo.unsetAll()
      }
    })
  }
} 
Example 71
Source File: CarbonDataSourceScan.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.strategy

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder}
import org.apache.spark.sql.catalyst.plans.physical.Partitioning
import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
import org.apache.spark.sql.execution.FileSourceScanExec
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}


class CarbonDataSourceScan(
    override val output: Seq[Attribute],
    val rdd: RDD[InternalRow],
    @transient override val relation: HadoopFsRelation,
    val partitioning: Partitioning,
    val md: Map[String, String],
    identifier: Option[TableIdentifier],
    @transient private val logicalRelation: LogicalRelation)
  extends FileSourceScanExec(
    relation,
    output,
    relation.dataSchema,
    Seq.empty,
    None,
    Seq.empty,
    identifier) {

  // added lazy since spark 2.3.2 version (SPARK-PR#21815)
  override lazy val supportsBatch: Boolean = true

  // added lazy since spark 2.3.2 version (SPARK-PR#21815)
  override lazy val (outputPartitioning, outputOrdering): (Partitioning, Seq[SortOrder]) =
    (partitioning, Nil)

  // added lazy since spark 2.3.2 version (SPARK-PR#21815)
  override lazy val metadata: Map[String, String] = md

  override def inputRDDs(): Seq[RDD[InternalRow]] = rdd :: Nil

} 
Example 72
Source File: SparkSqlAdapter.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
import org.apache.spark.sql.execution.FileSourceScanExec
import org.apache.spark.sql.execution.datasources.HadoopFsRelation
import org.apache.spark.sql.types.StructType

object SparkSqlAdapter {

  def initSparkSQL(): Unit = {
  }

  def getScanForSegments(
      @transient relation: HadoopFsRelation,
      output: Seq[Attribute],
      outputSchema: StructType,
      partitionFilters: Seq[Expression],
      dataFilters: Seq[Expression],
      tableIdentifier: Option[TableIdentifier]
  ): FileSourceScanExec = {
    FileSourceScanExec(
      relation,
      output,
      outputSchema,
      partitionFilters,
      None,
      dataFilters,
      tableIdentifier)
  }
} 
Example 73
Source File: AlterTableUpgradeSegmentTest.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.carbondata.restructure

import org.apache.spark.sql.CarbonEnv
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.test.util.QueryTest
import org.scalatest.BeforeAndAfterAll

import org.apache.carbondata.core.metadata.schema.table.CarbonTable
import org.apache.carbondata.core.statusmanager.SegmentStatusManager
import org.apache.carbondata.core.util.path.CarbonTablePath
import org.apache.carbondata.processing.util.CarbonLoaderUtil

class AlterTableUpgradeSegmentTest extends QueryTest with BeforeAndAfterAll {
  override protected def beforeAll(): Unit = {
    sql("drop table if exists altertest")
    sql("create table altertest(a string) STORED AS carbondata")
    sql("insert into altertest select 'k'")
    sql("insert into altertest select 'tttttt'")
  }

  private def removeDataAndIndexSizeFromTableStatus(table: CarbonTable): Unit = {
    val loadMetaDataDetails = SegmentStatusManager.readTableStatusFile(CarbonTablePath
      .getTableStatusFilePath(table.getTablePath))
    loadMetaDataDetails.foreach { loadMetaDataDetail =>
      loadMetaDataDetail.setIndexSize("0")
      loadMetaDataDetail.setDataSize("0")
    }
    SegmentStatusManager.writeLoadDetailsIntoFile(CarbonTablePath
      .getTableStatusFilePath(table.getTablePath), loadMetaDataDetails)
  }

  test("test alter table upgrade segment test") {
    val carbonTable = CarbonEnv.getCarbonTable(TableIdentifier("altertest"))(sqlContext.sparkSession)
    removeDataAndIndexSizeFromTableStatus(carbonTable)
    val loadMetaDataDetails = SegmentStatusManager.readTableStatusFile(CarbonTablePath
      .getTableStatusFilePath(carbonTable.getTablePath))
    loadMetaDataDetails.foreach(detail => assert(detail.getIndexSize.toInt + detail.getDataSize
      .toInt == 0))
    sql("alter table altertest compact 'upgrade_segment'")
    val loadMetaDataDetailsNew = SegmentStatusManager.readTableStatusFile(CarbonTablePath
      .getTableStatusFilePath(carbonTable.getTablePath))
    loadMetaDataDetailsNew.foreach{detail =>
      assert(detail.getIndexSize.toInt != 0)
      assert(detail.getDataSize.toInt != 0)}
  }

  override protected def afterAll(): Unit = {
    sql("drop table if exists altertest")
  }
} 
Example 74
Source File: CacheRefreshTestCase.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.carbondata.spark.testsuite.cloud

import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.hive.CarbonHiveIndexMetadataUtil
import org.apache.spark.sql.test.util.QueryTest
import org.scalatest.BeforeAndAfterAll

class CacheRefreshTestCase extends QueryTest with BeforeAndAfterAll {

  override protected def beforeAll(): Unit = {
    sql("drop database if exists cachedb cascade")
    sql("create database cachedb")
    sql("use cachedb")
  }

  override protected def afterAll(): Unit = {
    sql("use default")
    sql("drop database if exists cachedb cascade")
  }

  test("test cache refresh") {
    sql("create table tbl_cache1(col1 string, col2 int, col3 int) using carbondata")
    sql("insert into tbl_cache1 select 'a', 123, 345")
    CarbonHiveIndexMetadataUtil.invalidateAndDropTable(
      "cachedb", "tbl_cache1", sqlContext.sparkSession)
    // discard cached table info in cachedDataSourceTables
    val tableIdentifier = TableIdentifier("tbl_cache1", Option("cachedb"))
    sqlContext.sparkSession.sessionState.catalog.refreshTable(tableIdentifier)
    sql("create table tbl_cache1(col1 string, col2 int, col3 int) using carbondata")
    sql("delete from tbl_cache1")
    sql("insert into tbl_cache1 select 'b', 123, 345")
    checkAnswer(sql("select * from tbl_cache1"),
      Seq(Row("b", 123, 345)))
  }
} 
Example 75
Source File: DescribeDeltaHistoryCommand.scala    From delta   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.delta.commands

// scalastyle:off import.ordering.noEmptyLine
import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaTableIdentifier}
import org.apache.spark.sql.delta.actions.CommitInfo
import org.apache.spark.sql.delta.metering.DeltaLogging
import org.apache.hadoop.fs.Path

import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
import org.apache.spark.sql.catalyst.catalog.CatalogTableType
import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics}
import org.apache.spark.sql.execution.command.RunnableCommand


case class DescribeDeltaHistoryCommand(
    path: Option[String],
    tableIdentifier: Option[TableIdentifier],
    limit: Option[Int],
    override val output: Seq[Attribute] = ExpressionEncoder[CommitInfo]().schema.toAttributes)
  extends RunnableCommand with DeltaLogging {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    val basePath =
      if (path.nonEmpty) {
        new Path(path.get)
      } else if (tableIdentifier.nonEmpty) {
        val sessionCatalog = sparkSession.sessionState.catalog
        lazy val metadata = sessionCatalog.getTableMetadata(tableIdentifier.get)

        DeltaTableIdentifier(sparkSession, tableIdentifier.get) match {
          case Some(id) if id.path.nonEmpty =>
            new Path(id.path.get)
          case Some(id) if id.table.nonEmpty =>
            new Path(metadata.location)
          case _ =>
            if (metadata.tableType == CatalogTableType.VIEW) {
              throw DeltaErrors.describeViewHistory
            }
            throw DeltaErrors.notADeltaTableException("DESCRIBE HISTORY")
        }
      } else {
        throw DeltaErrors.missingTableIdentifierException("DESCRIBE HISTORY")
      }

    // Max array size
    if (limit.exists(_ > Int.MaxValue - 8)) {
      throw new IllegalArgumentException("Please use a limit less than Int.MaxValue - 8.")
    }

    val deltaLog = DeltaLog.forTable(sparkSession, basePath)
    recordDeltaOperation(deltaLog, "delta.ddl.describeHistory") {
      if (deltaLog.snapshot.version == -1) {
        throw DeltaErrors.notADeltaTableException("DESCRIBE HISTORY")
      }

      import sparkSession.implicits._
      deltaLog.history.getHistory(limit).toDF().collect().toSeq
    }
  }
} 
Example 76
Source File: DeltaGenerateCommand.scala    From delta   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.delta.commands

import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaTableIdentifier}
import org.apache.spark.sql.delta.hooks.GenerateSymlinkManifest
import org.apache.hadoop.fs.Path

import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
import org.apache.spark.sql.execution.command.RunnableCommand

case class DeltaGenerateCommand(modeName: String, tableId: TableIdentifier)
  extends RunnableCommand {

  import DeltaGenerateCommand._

  override def run(sparkSession: SparkSession): Seq[Row] = {
    if (!modeNameToGenerationFunc.contains(modeName)) {
      throw DeltaErrors.unsupportedGenerateModeException(modeName)
    }

    val tablePath = DeltaTableIdentifier(sparkSession, tableId) match {
      case Some(id) if id.path.isDefined =>
        new Path(id.path.get)
      case _ =>
        new Path(sparkSession.sessionState.catalog.getTableMetadata(tableId).location)
    }

    val deltaLog = DeltaLog.forTable(sparkSession, tablePath)
    if (deltaLog.snapshot.version < 0) {
      throw DeltaErrors.notADeltaTableException("GENERATE")
    }
    val generationFunc = modeNameToGenerationFunc(modeName)
    generationFunc(sparkSession, deltaLog)
    Seq.empty
  }
}

object DeltaGenerateCommand {
  val modeNameToGenerationFunc = CaseInsensitiveMap(
    Map[String, (SparkSession, DeltaLog) => Unit](
    "symlink_format_manifest" -> GenerateSymlinkManifest.generateFullManifest
  ))
} 
Example 77
Source File: DeltaUnsupportedOperationsCheck.scala    From delta   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.delta

import org.apache.spark.sql.delta.catalog.DeltaTableV2
import org.apache.spark.sql.delta.metering.DeltaLogging
import org.apache.spark.sql.delta.sources.DeltaSourceUtils

import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, V2WriteCommand}
import org.apache.spark.sql.execution.command._
import org.apache.spark.sql.execution.datasources.RefreshTable
import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation


  private def checkDeltaTableExists(command: V2WriteCommand, operation: String): Unit = {
    command.table match {
      case DeltaRelation(lr) =>
        // the extractor performs the check that we want if this is indeed being called on a Delta
        // table. It should leave others unchanged
        if (DeltaFullTable.unapply(lr).isEmpty) {
          throw DeltaErrors.notADeltaTableException(operation)
        }
      case _ =>
    }
  }
} 
Example 78
Source File: DeltaTableIdentifier.scala    From delta   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.delta

import org.apache.spark.sql.delta.sources.DeltaSourceUtils
import org.apache.hadoop.fs.Path

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.internal.SQLConf


  def apply(spark: SparkSession, identifier: TableIdentifier): Option[DeltaTableIdentifier] = {
    if (isDeltaPath(spark, identifier)) {
      Some(DeltaTableIdentifier(path = Option(identifier.table)))
    } else if (DeltaTableUtils.isDeltaTable(spark, identifier)) {
      Some(DeltaTableIdentifier(table = Option(identifier)))
    } else {
      None
    }
  }
} 
Example 79
Source File: VacuumTableCommand.scala    From delta   with Apache License 2.0 5 votes vote down vote up
package io.delta.tables.execution

import org.apache.hadoop.fs.Path
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaTableIdentifier, DeltaTableUtils}
import org.apache.spark.sql.delta.commands.VacuumCommand
import org.apache.spark.sql.execution.command.RunnableCommand
import org.apache.spark.sql.types.StringType


case class VacuumTableCommand(
    path: Option[String],
    table: Option[TableIdentifier],
    horizonHours: Option[Double],
    dryRun: Boolean) extends RunnableCommand {

  override val output: Seq[Attribute] =
    Seq(AttributeReference("path", StringType, nullable = true)())

  override def run(sparkSession: SparkSession): Seq[Row] = {
    val pathToVacuum =
      if (path.nonEmpty) {
        new Path(path.get)
      } else if (table.nonEmpty) {
        DeltaTableIdentifier(sparkSession, table.get) match {
          case Some(id) if id.path.nonEmpty =>
            new Path(id.path.get)
          case _ =>
            new Path(sparkSession.sessionState.catalog.getTableMetadata(table.get).location)
        }
      } else {
        throw DeltaErrors.missingTableIdentifierException("VACUUM")
      }
    val baseDeltaPath = DeltaTableUtils.findDeltaTableRoot(sparkSession, pathToVacuum)
    if (baseDeltaPath.isDefined) {
      if (baseDeltaPath.get != pathToVacuum) {
        throw DeltaErrors.vacuumBasePathMissingException(baseDeltaPath.get)
      }
    }
    val deltaLog = DeltaLog.forTable(sparkSession, pathToVacuum)
    if (deltaLog.snapshot.version == -1) {
      throw DeltaErrors.notADeltaTableException(
        "VACUUM",
        DeltaTableIdentifier(path = Some(pathToVacuum.toString)))
    }
    VacuumCommand.gc(sparkSession, deltaLog, dryRun, horizonHours).collect()
  }
} 
Example 80
Source File: DeltaConvert.scala    From delta   with Apache License 2.0 5 votes vote down vote up
package io.delta.tables.execution

import org.apache.spark.sql.delta.commands.ConvertToDeltaCommand
import io.delta.tables.DeltaTable

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.types.StructType

trait DeltaConvertBase {
  def executeConvert(
      spark: SparkSession,
      tableIdentifier: TableIdentifier,
      partitionSchema: Option[StructType],
      deltaPath: Option[String]): DeltaTable = {
    val cvt = ConvertToDeltaCommand(tableIdentifier, partitionSchema, deltaPath)
    cvt.run(spark)
    if (cvt.isCatalogTable(spark.sessionState.analyzer, tableIdentifier)) {
      DeltaTable.forName(spark, tableIdentifier.toString)
    } else {
      DeltaTable.forPath(spark, tableIdentifier.table)
    }
  }
}

object DeltaConvert extends DeltaConvertBase {} 
Example 81
Source File: DeltaTableOperations.scala    From delta   with Apache License 2.0 5 votes vote down vote up
package io.delta.tables.execution

import scala.collection.Map

import org.apache.spark.sql.delta.{DeltaErrors, DeltaHistoryManager, DeltaLog, PreprocessTableUpdate}
import org.apache.spark.sql.delta.commands.{DeleteCommand, DeltaGenerateCommand, VacuumCommand}
import org.apache.spark.sql.delta.util.AnalysisHelper
import io.delta.tables.DeltaTable

import org.apache.spark.sql.{functions, Column, DataFrame, Dataset}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
import org.apache.spark.sql.catalyst.expressions.{Expression, SubqueryExpression}
import org.apache.spark.sql.catalyst.plans.logical._


trait DeltaTableOperations extends AnalysisHelper { self: DeltaTable =>

  protected def executeDelete(condition: Option[Expression]): Unit = improveUnsupportedOpError {
    val delete = DeleteFromTable(self.toDF.queryExecution.analyzed, condition)
    toDataset(sparkSession, delete)
  }

  protected def executeHistory(deltaLog: DeltaLog, limit: Option[Int]): DataFrame = {
    val history = new DeltaHistoryManager(deltaLog)
    val spark = self.toDF.sparkSession
    spark.createDataFrame(history.getHistory(limit))
  }

  protected def executeGenerate(tblIdentifier: String, mode: String): Unit = {
    val tableId: TableIdentifier = sparkSession
      .sessionState
      .sqlParser
      .parseTableIdentifier(tblIdentifier)
    val generate = DeltaGenerateCommand(mode, tableId)
    generate.run(sparkSession)
  }

  protected def executeUpdate(
      set: Map[String, Column],
      condition: Option[Column]): Unit = improveUnsupportedOpError {
    val assignments = set.map { case (targetColName, column) =>
      Assignment(UnresolvedAttribute.quotedString(targetColName), column.expr)
    }.toSeq
    val update = UpdateTable(self.toDF.queryExecution.analyzed, assignments, condition.map(_.expr))
    toDataset(sparkSession, update)
  }

  protected def executeVacuum(
      deltaLog: DeltaLog,
      retentionHours: Option[Double]): DataFrame = {
    VacuumCommand.gc(sparkSession, deltaLog, false, retentionHours)
    sparkSession.emptyDataFrame
  }

  protected def toStrColumnMap(map: Map[String, String]): Map[String, Column] = {
    map.toSeq.map { case (k, v) => k -> functions.expr(v) }.toMap
  }

  protected def sparkSession = self.toDF.sparkSession
} 
Example 82
Source File: HiveConvertToDeltaSuite.scala    From delta   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.delta

import org.apache.spark.sql.delta.test.DeltaHiveTest

import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.test.SQLTestUtils

abstract class HiveConvertToDeltaSuiteBase
  extends ConvertToDeltaHiveTableTests
  with SQLTestUtils {

  override protected def convertToDelta(
      identifier: String,
      partitionSchema: Option[String] = None): Unit = {
    if (partitionSchema.isEmpty) {
      sql(s"convert to delta $identifier")
    } else {
      val stringSchema = partitionSchema.get
      sql(s"convert to delta $identifier partitioned by ($stringSchema) ")
    }
  }

  override protected def verifyExternalCatalogMetadata(tableName: String): Unit = {
    val catalogTable = spark.sessionState.catalog.externalCatalog.getTable("default", tableName)
    // Hive automatically adds some properties
    val cleanProps = catalogTable.properties.filterKeys(_ != "transient_lastDdlTime")
    // We can't alter the schema in the catalog at the moment :(
    assert(cleanProps.isEmpty,
      s"Table properties weren't empty for table $tableName: $cleanProps")
  }

  test("convert a Hive based parquet table") {
    val tbl = "hive_parquet"
    withTable(tbl) {
      sql(
        s"""
           |CREATE TABLE $tbl (id int, str string)
           |PARTITIONED BY (part string)
           |STORED AS PARQUET
         """.stripMargin)

      sql(s"insert into $tbl VALUES (1, 'a', 1)")

      val catalogTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tbl))
      assert(catalogTable.provider === Some("hive"))
      assert(catalogTable.storage.serde.exists(_.contains("parquet")))

      convertToDelta(tbl, Some("part string"))

      checkAnswer(
        sql(s"select * from delta.`${getPathForTableName(tbl)}`"),
        Row(1, "a", "1"))

      verifyExternalCatalogMetadata(tbl)
      val updatedTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tbl))
      assert(updatedTable.provider === Some("delta"))
    }
  }

  test("convert a Hive based external parquet table") {
    val tbl = "hive_parquet"
    withTempDir { dir =>
      withTable(tbl) {
        sql(
          s"""
             |CREATE EXTERNAL TABLE $tbl (id int, str string)
             |PARTITIONED BY (part string)
             |STORED AS PARQUET
             |LOCATION '${dir.getCanonicalPath}'
         """.stripMargin)
        sql(s"insert into $tbl VALUES (1, 'a', 1)")

        val catalogTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tbl))
        assert(catalogTable.provider === Some("hive"))
        assert(catalogTable.storage.serde.exists(_.contains("parquet")))

        convertToDelta(tbl, Some("part string"))

        checkAnswer(
          sql(s"select * from delta.`${dir.getCanonicalPath}`"),
          Row(1, "a", "1"))

        verifyExternalCatalogMetadata(tbl)
        val updatedTable = spark.sessionState.catalog.getTableMetadata(TableIdentifier(tbl))
        assert(updatedTable.provider === Some("delta"))
      }
    }
  }
}

class HiveConvertToDeltaSuite extends HiveConvertToDeltaSuiteBase with DeltaHiveTest 
Example 83
Source File: DeltaSqlParserSuite.scala    From delta   with Apache License 2.0 5 votes vote down vote up
package io.delta.sql.parser

import io.delta.tables.execution.VacuumTableCommand

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.TableIdentifier

class DeltaSqlParserSuite extends SparkFunSuite {

  test("isValidDecimal should recognize a table identifier and not treat them as a decimal") {
    // Setting `delegate` to `null` is fine. The following tests don't need to touch `delegate`.
    val parser = new DeltaSqlParser(null)
    assert(parser.parsePlan("vacuum 123_") ===
      VacuumTableCommand(None, Some(TableIdentifier("123_")), None, false))
    assert(parser.parsePlan("vacuum 1a.123_") ===
      VacuumTableCommand(None, Some(TableIdentifier("123_", Some("1a"))), None, false))
    assert(parser.parsePlan("vacuum a.123A") ===
      VacuumTableCommand(None, Some(TableIdentifier("123A", Some("a"))), None, false))
    assert(parser.parsePlan("vacuum a.123E3_column") ===
      VacuumTableCommand(None, Some(TableIdentifier("123E3_column", Some("a"))), None, false))
    assert(parser.parsePlan("vacuum a.123D_column") ===
      VacuumTableCommand(None, Some(TableIdentifier("123D_column", Some("a"))), None, false))
    assert(parser.parsePlan("vacuum a.123BD_column") ===
      VacuumTableCommand(None, Some(TableIdentifier("123BD_column", Some("a"))), None, false))
  }
} 
Example 84
Source File: HiveExternalCatalogSuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive

import org.apache.hadoop.conf.Configuration
import org.apache.spark.SparkConf

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.execution.command.DDLUtils
import org.apache.spark.sql.types.StructType
import org.apache.spark.util.Utils


class HiveExternalCatalogSuite extends ExternalCatalogSuite {

  private val user = Utils.getCurrentUserName()

  private val externalCatalog: HiveExternalCatalog = {
    val catalog = new HiveExternalCatalog(new SparkConf, new Configuration, user)
    catalog.client.reset()
    catalog
  }

  protected override val utils: CatalogTestUtils = new CatalogTestUtils {
    override val tableInputFormat: String = "org.apache.hadoop.mapred.SequenceFileInputFormat"
    override val tableOutputFormat: String = "org.apache.hadoop.mapred.SequenceFileOutputFormat"
    override def newEmptyCatalog(): ExternalCatalog = externalCatalog
  }

  protected override def resetState(): Unit = {
    externalCatalog.client.reset()
  }

  import utils._

  test("list partitions by filter") {
    val catalog = newBasicCatalog()
    val selectedPartitions = catalog.listPartitionsByFilter("db2", "tbl2", Seq('a.int === 1))
    assert(selectedPartitions.length == 1)
    assert(selectedPartitions.head.spec == part1.spec)
  }

  test("SPARK-18647: do not put provider in table properties for Hive serde table") {
    val catalog = newBasicCatalog()
    val hiveTable = CatalogTable(
      identifier = TableIdentifier("hive_tbl", Some("db1")),
      tableType = CatalogTableType.MANAGED,
      storage = storageFormat,
      schema = new StructType().add("col1", "int").add("col2", "string"),
      provider = Some("hive"))
    catalog.createTable(hiveTable, ignoreIfExists = false)

    val rawTable = externalCatalog.client.getTable("db1", "hive_tbl")
    assert(!rawTable.properties.contains(HiveExternalCatalog.DATASOURCE_PROVIDER))
    assert(externalCatalog.getTable("db1", "hive_tbl").provider == Some(DDLUtils.HIVE_PROVIDER))
  }
} 
Example 85
Source File: MetastoreRelationSuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive

import org.apache.spark.sql.{QueryTest, Row}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.sql.types.{IntegerType, StructField, StructType}

class MetastoreRelationSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
  test("makeCopy and toJSON should work") {
    val table = CatalogTable(
      identifier = TableIdentifier("test", Some("db")),
      tableType = CatalogTableType.VIEW,
      storage = CatalogStorageFormat.empty,
      schema = StructType(StructField("a", IntegerType, true) :: Nil))
    val relation = MetastoreRelation("db", "test")(table, null)

    // No exception should be thrown
    relation.makeCopy(Array("db", "test"))
    // No exception should be thrown
    relation.toJSON
  }

  test("SPARK-17409: Do Not Optimize Query in CTAS (Hive Serde Table) More Than Once") {
    withTable("bar") {
      withTempView("foo") {
        sql("select 0 as id").createOrReplaceTempView("foo")
        // If we optimize the query in CTAS more than once, the following saveAsTable will fail
        // with the error: `GROUP BY position 0 is not in select list (valid range is [1, 1])`
        sql("CREATE TABLE bar AS SELECT * FROM foo group by id")
        checkAnswer(spark.table("bar"), Row(0) :: Nil)
        val tableMetadata = spark.sessionState.catalog.getTableMetadata(TableIdentifier("bar"))
        assert(tableMetadata.provider == Some("hive"), "the expected table is a Hive serde table")
      }
    }
  }
} 
Example 86
Source File: ListTablesSuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive

import org.scalatest.BeforeAndAfterAll

import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.hive.test.TestHiveSingleton

class ListTablesSuite extends QueryTest with TestHiveSingleton with BeforeAndAfterAll {
  import hiveContext._
  import hiveContext.implicits._

  val df = sparkContext.parallelize((1 to 10).map(i => (i, s"str$i"))).toDF("key", "value")

  override def beforeAll(): Unit = {
    super.beforeAll()
    // The catalog in HiveContext is a case insensitive one.
    sessionState.catalog.createTempView(
      "ListTablesSuiteTable", df.logicalPlan, overrideIfExists = true)
    sql("CREATE TABLE HiveListTablesSuiteTable (key int, value string)")
    sql("CREATE DATABASE IF NOT EXISTS ListTablesSuiteDB")
    sql("CREATE TABLE ListTablesSuiteDB.HiveInDBListTablesSuiteTable (key int, value string)")
  }

  override def afterAll(): Unit = {
    try {
      sessionState.catalog.dropTable(
        TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true, purge = false)
      sql("DROP TABLE IF EXISTS HiveListTablesSuiteTable")
      sql("DROP TABLE IF EXISTS ListTablesSuiteDB.HiveInDBListTablesSuiteTable")
      sql("DROP DATABASE IF EXISTS ListTablesSuiteDB")
    } finally {
      super.afterAll()
    }
  }

  test("get all tables of current database") {
    Seq(tables(), sql("SHOW TABLes")).foreach {
      case allTables =>
        // We are using default DB.
        checkAnswer(
          allTables.filter("tableName = 'listtablessuitetable'"),
          Row("", "listtablessuitetable", true))
        checkAnswer(
          allTables.filter("tableName = 'hivelisttablessuitetable'"),
          Row("default", "hivelisttablessuitetable", false))
        assert(allTables.filter("tableName = 'hiveindblisttablessuitetable'").count() === 0)
    }
  }

  test("getting all tables with a database name") {
    Seq(tables("listtablessuiteDb"), sql("SHOW TABLes in listTablesSuitedb")).foreach {
      case allTables =>
        checkAnswer(
          allTables.filter("tableName = 'listtablessuitetable'"),
          Row("", "listtablessuitetable", true))
        assert(allTables.filter("tableName = 'hivelisttablessuitetable'").count() === 0)
        checkAnswer(
          allTables.filter("tableName = 'hiveindblisttablessuitetable'"),
          Row("listtablessuitedb", "hiveindblisttablessuitetable", false))
    }
  }
} 
Example 87
Source File: TableIdentifierParserSuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.parser

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.TableIdentifier

class TableIdentifierParserSuite extends SparkFunSuite {
  import CatalystSqlParser._

  // Add "$elem$", "$value$" & "$key$"
  val hiveNonReservedKeyword = Array("add", "admin", "after", "analyze", "archive", "asc", "before",
    "bucket", "buckets", "cascade", "change", "cluster", "clustered", "clusterstatus", "collection",
    "columns", "comment", "compact", "compactions", "compute", "concatenate", "continue", "data",
    "day", "databases", "datetime", "dbproperties", "deferred", "defined", "delimited",
    "dependency", "desc", "directories", "directory", "disable", "distribute",
    "enable", "escaped", "exclusive", "explain", "export", "fields", "file", "fileformat", "first",
    "format", "formatted", "functions", "hold_ddltime", "hour", "idxproperties", "ignore", "index",
    "indexes", "inpath", "inputdriver", "inputformat", "items", "jar", "keys", "key_type", "last",
    "limit", "offset", "lines", "load", "location", "lock", "locks", "logical", "long", "mapjoin",
    "materialized", "metadata", "minus", "minute", "month", "msck", "noscan", "no_drop", "nulls",
    "offline", "option", "outputdriver", "outputformat", "overwrite", "owner", "partitioned",
    "partitions", "plus", "pretty", "principals", "protection", "purge", "read", "readonly",
    "rebuild", "recordreader", "recordwriter", "reload", "rename", "repair", "replace",
    "replication", "restrict", "rewrite", "role", "roles", "schemas", "second",
    "serde", "serdeproperties", "server", "sets", "shared", "show", "show_database", "skewed",
    "sort", "sorted", "ssl", "statistics", "stored", "streamtable", "string", "struct", "tables",
    "tblproperties", "temporary", "terminated", "tinyint", "touch", "transactions", "unarchive",
    "undo", "uniontype", "unlock", "unset", "unsigned", "uri", "use", "utc", "utctimestamp",
    "view", "while", "year", "work", "transaction", "write", "isolation", "level",
    "snapshot", "autocommit", "all", "alter", "array", "as", "authorization", "between", "bigint",
    "binary", "boolean", "both", "by", "create", "cube", "current_date", "current_timestamp",
    "cursor", "date", "decimal", "delete", "describe", "double", "drop", "exists", "external",
    "false", "fetch", "float", "for", "grant", "group", "grouping", "import", "in",
    "insert", "int", "into", "is", "lateral", "like", "local", "none", "null",
    "of", "order", "out", "outer", "partition", "percent", "procedure", "range", "reads", "revoke",
    "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger",
    "true", "truncate", "update", "user", "using", "values", "with", "regexp", "rlike",
    "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float",
    "int", "smallint", "timestamp", "at")

  val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right",
    "natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from",
    "where", "having", "from", "to", "table", "with", "not")

  test("table identifier") {
    // Regular names.
    assert(TableIdentifier("q") === parseTableIdentifier("q"))
    assert(TableIdentifier("q", Option("d")) === parseTableIdentifier("d.q"))

    // Illegal names.
    Seq("", "d.q.g", "t:", "${some.var.x}", "tab:1").foreach { identifier =>
      intercept[ParseException](parseTableIdentifier(identifier))
    }
  }

  test("quoted identifiers") {
    assert(TableIdentifier("z", Some("x.y")) === parseTableIdentifier("`x.y`.z"))
    assert(TableIdentifier("y.z", Some("x")) === parseTableIdentifier("x.`y.z`"))
    assert(TableIdentifier("z", Some("`x.y`")) === parseTableIdentifier("```x.y```.z"))
    assert(TableIdentifier("`y.z`", Some("x")) === parseTableIdentifier("x.```y.z```"))
    assert(TableIdentifier("x.y.z", None) === parseTableIdentifier("`x.y.z`"))
  }

  test("table identifier - strict keywords") {
    // SQL Keywords.
    hiveStrictNonReservedKeyword.foreach { keyword =>
      assert(TableIdentifier(keyword) === parseTableIdentifier(keyword))
      assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`"))
      assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`"))
    }
  }

  test("table identifier - non reserved keywords") {
    // Hive keywords are allowed.
    hiveNonReservedKeyword.foreach { nonReserved =>
      assert(TableIdentifier(nonReserved) === parseTableIdentifier(nonReserved))
    }
  }

  test("SPARK-17364 table identifier - contains number") {
    assert(parseTableIdentifier("123_") == TableIdentifier("123_"))
    assert(parseTableIdentifier("1a.123_") == TableIdentifier("123_", Some("1a")))
    // ".123" should not be treated as token of type DECIMAL_VALUE
    assert(parseTableIdentifier("a.123A") == TableIdentifier("123A", Some("a")))
    // ".123E3" should not be treated as token of type SCIENTIFIC_DECIMAL_VALUE
    assert(parseTableIdentifier("a.123E3_LIST") == TableIdentifier("123E3_LIST", Some("a")))
    // ".123D" should not be treated as token of type DOUBLE_LITERAL
    assert(parseTableIdentifier("a.123D_LIST") == TableIdentifier("123D_LIST", Some("a")))
    // ".123BD" should not be treated as token of type BIGDECIMAL_LITERAL
    assert(parseTableIdentifier("a.123BD_LIST") == TableIdentifier("123BD_LIST", Some("a")))
  }

  test("SPARK-17832 table identifier - contains backtick") {
    val complexName = TableIdentifier("`weird`table`name", Some("`d`b`1"))
    assert(complexName === parseTableIdentifier("```d``b``1`.```weird``table``name`"))
    assert(complexName === parseTableIdentifier(complexName.quotedString))
    intercept[ParseException](parseTableIdentifier(complexName.unquotedString))
    // Table identifier contains countious backticks should be treated correctly.
    val complexName2 = TableIdentifier("x``y", Some("d``b"))
    assert(complexName2 === parseTableIdentifier(complexName2.quotedString))
  }
} 
Example 88
Source File: ddl.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils}
import org.apache.spark.sql.catalyst.plans.QueryPlan
import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan}
import org.apache.spark.sql.execution.command.RunnableCommand
import org.apache.spark.sql.types._

case class CreateTable(
    tableDesc: CatalogTable,
    mode: SaveMode,
    query: Option[LogicalPlan]) extends Command {
  assert(tableDesc.provider.isDefined, "The table to be created must have a provider.")

  if (query.isEmpty) {
    assert(
      mode == SaveMode.ErrorIfExists || mode == SaveMode.Ignore,
      "create table without data insertion can only use ErrorIfExists or Ignore as SaveMode.")
  }

  override def innerChildren: Seq[QueryPlan[_]] = query.toSeq
}


case class CreateTempViewUsing(
    tableIdent: TableIdentifier,
    userSpecifiedSchema: Option[StructType],
    replace: Boolean,
    global: Boolean,
    provider: String,
    options: Map[String, String]) extends RunnableCommand {

  if (tableIdent.database.isDefined) {
    throw new AnalysisException(
      s"Temporary view '$tableIdent' should not have specified a database")
  }

  override def argString: String = {
    s"[tableIdent:$tableIdent " +
      userSpecifiedSchema.map(_ + " ").getOrElse("") +
      s"replace:$replace " +
      s"provider:$provider " +
      CatalogUtils.maskCredentials(options)
  }

  def run(sparkSession: SparkSession): Seq[Row] = {
    val dataSource = DataSource(
      sparkSession,
      userSpecifiedSchema = userSpecifiedSchema,
      className = provider,
      options = options)

    val catalog = sparkSession.sessionState.catalog
    val viewDefinition = Dataset.ofRows(
      sparkSession, LogicalRelation(dataSource.resolveRelation())).logicalPlan

    if (global) {
      catalog.createGlobalTempView(tableIdent.table, viewDefinition, replace)
    } else {
      catalog.createTempView(tableIdent.table, viewDefinition, replace)
    }

    Seq.empty[Row]
  }
}

case class RefreshTable(tableIdent: TableIdentifier)
  extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    // Refresh the given table's metadata. If this table is cached as an InMemoryRelation,
    // drop the original cached version and make the new version cached lazily.
    sparkSession.catalog.refreshTable(tableIdent.quotedString)
    Seq.empty[Row]
  }
}

case class RefreshResource(path: String)
  extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    sparkSession.catalog.refreshByPath(path)
    Seq.empty[Row]
  }
} 
Example 89
Source File: cache.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command

import org.apache.spark.sql.{Dataset, Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
import org.apache.spark.sql.catalyst.plans.QueryPlan
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan

case class CacheTableCommand(
    tableIdent: TableIdentifier,
    plan: Option[LogicalPlan],
    isLazy: Boolean) extends RunnableCommand {
  require(plan.isEmpty || tableIdent.database.isEmpty,
    "Database name is not allowed in CACHE TABLE AS SELECT")

  override protected def innerChildren: Seq[QueryPlan[_]] = {
    plan.toSeq
  }

  override def run(sparkSession: SparkSession): Seq[Row] = {
    plan.foreach { logicalPlan =>
      Dataset.ofRows(sparkSession, logicalPlan).createTempView(tableIdent.quotedString)
    }
    sparkSession.catalog.cacheTable(tableIdent.quotedString)

    if (!isLazy) {
      // Performs eager caching
      sparkSession.table(tableIdent).count()
    }

    Seq.empty[Row]
  }
}


case class UncacheTableCommand(
    tableIdent: TableIdentifier,
    ifExists: Boolean) extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    val tableId = tableIdent.quotedString
    try {
      sparkSession.catalog.uncacheTable(tableId)
    } catch {
      case _: NoSuchTableException if ifExists => // don't throw
    }
    Seq.empty[Row]
  }
}


case object ClearCacheCommand extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    sparkSession.catalog.clearCache()
    Seq.empty[Row]
  }
} 
Example 90
Source File: hbaseCommands.scala    From Heracles   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hbase.execution

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias
import org.apache.spark.sql.execution.command.RunnableCommand
import org.apache.spark.sql.execution.datasources.LogicalRelation
import org.apache.spark.sql.hbase._
import org.apache.spark.sql.hbase.util.DataTypeUtils
import org.apache.spark.sql.types._

import scala.collection.mutable.ArrayBuffer

@DeveloperApi
case class AlterDropColCommand(namespace: String, tableName: String, columnName: String)
  extends RunnableCommand {

  def run(sparkSession: SparkSession): Seq[Row] = {
    sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog]
      .alterTableDropNonKey(namespace, tableName, columnName)
    sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog].stopAdmin()
    Seq.empty[Row]
  }
}

@DeveloperApi
case class AlterAddColCommand(namespace: String,
                              tableName: String,
                              colName: String,
                              colType: String,
                              colFamily: String,
                              colQualifier: String) extends RunnableCommand {

  def run(sparkSession: SparkSession): Seq[Row] = {
    val hbaseCatalog = sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog]
    hbaseCatalog.alterTableAddNonKey(namespace, tableName,
      NonKeyColumn(colName, DataTypeUtils.getDataType(colType), colFamily, colQualifier))
    hbaseCatalog.stopAdmin()
    Seq.empty[Row]
  }
}

@DeveloperApi
case class InsertValueIntoTableCommand(tid: TableIdentifier, valueSeq: Seq[String])
  extends RunnableCommand {
  override def run(sparkSession: SparkSession) = {
    val relation: HBaseRelation = sparkSession.sessionState.catalog.externalCatalog
      .asInstanceOf[HBaseCatalog]
      .getHBaseRelation(tid.database.getOrElse(null), tid.table).getOrElse(null)

    val bytes = valueSeq.zipWithIndex.map(v =>
      DataTypeUtils.string2TypeData(v._1, relation.schema(v._2).dataType))

    val rows = sparkSession.sparkContext.makeRDD(Seq(Row.fromSeq(bytes)))
    val inputValuesDF = sparkSession.createDataFrame(rows, relation.schema)
    relation.insert(inputValuesDF, overwrite = false)

    Seq.empty[Row]
  }

  override def output: Seq[Attribute] = Seq.empty
} 
Example 91
Source File: InsertIntoHiveDirCommand.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive.execution

import scala.language.existentials

import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.hive.common.FileUtils
import org.apache.hadoop.hive.ql.plan.TableDesc
import org.apache.hadoop.hive.serde.serdeConstants
import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
import org.apache.hadoop.mapred._

import org.apache.spark.SparkException
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.SparkPlan
import org.apache.spark.sql.hive.client.HiveClientImpl


case class InsertIntoHiveDirCommand(
    isLocal: Boolean,
    storage: CatalogStorageFormat,
    query: LogicalPlan,
    overwrite: Boolean,
    outputColumns: Seq[Attribute]) extends SaveAsHiveFile {

  override def run(sparkSession: SparkSession, child: SparkPlan): Seq[Row] = {
    assert(storage.locationUri.nonEmpty)

    val hiveTable = HiveClientImpl.toHiveTable(CatalogTable(
      identifier = TableIdentifier(storage.locationUri.get.toString, Some("default")),
      tableType = org.apache.spark.sql.catalyst.catalog.CatalogTableType.VIEW,
      storage = storage,
      schema = query.schema
    ))
    hiveTable.getMetadata.put(serdeConstants.SERIALIZATION_LIB,
      storage.serde.getOrElse(classOf[LazySimpleSerDe].getName))

    val tableDesc = new TableDesc(
      hiveTable.getInputFormatClass,
      hiveTable.getOutputFormatClass,
      hiveTable.getMetadata
    )

    val hadoopConf = sparkSession.sessionState.newHadoopConf()
    val jobConf = new JobConf(hadoopConf)

    val targetPath = new Path(storage.locationUri.get)
    val writeToPath =
      if (isLocal) {
        val localFileSystem = FileSystem.getLocal(jobConf)
        localFileSystem.makeQualified(targetPath)
      } else {
        val qualifiedPath = FileUtils.makeQualified(targetPath, hadoopConf)
        val dfs = qualifiedPath.getFileSystem(jobConf)
        if (!dfs.exists(qualifiedPath)) {
          dfs.mkdirs(qualifiedPath.getParent)
        }
        qualifiedPath
      }

    val tmpPath = getExternalTmpPath(sparkSession, hadoopConf, writeToPath)
    val fileSinkConf = new org.apache.spark.sql.hive.HiveShim.ShimFileSinkDesc(
      tmpPath.toString, tableDesc, false)

    try {
      saveAsHiveFile(
        sparkSession = sparkSession,
        plan = child,
        hadoopConf = hadoopConf,
        fileSinkConf = fileSinkConf,
        outputLocation = tmpPath.toString,
        allColumns = outputColumns)

      val fs = writeToPath.getFileSystem(hadoopConf)
      if (overwrite && fs.exists(writeToPath)) {
        fs.listStatus(writeToPath).foreach { existFile =>
          if (Option(existFile.getPath) != createdTempDir) fs.delete(existFile.getPath, true)
        }
      }

      fs.listStatus(tmpPath).foreach {
        tmpFile => fs.rename(tmpFile.getPath, writeToPath)
      }
    } catch {
      case e: Throwable =>
        throw new SparkException(
          "Failed inserting overwrite directory " + storage.locationUri.get, e)
    } finally {
      deleteExternalTmpPath(hadoopConf)
    }

    Seq.empty[Row]
  }
} 
Example 92
Source File: HiveExternalCatalogSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive

import org.apache.hadoop.conf.Configuration

import org.apache.spark.SparkConf
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog._
import org.apache.spark.sql.execution.command.DDLUtils
import org.apache.spark.sql.types.StructType


class HiveExternalCatalogSuite extends ExternalCatalogSuite {

  private val externalCatalog: HiveExternalCatalog = {
    val catalog = new HiveExternalCatalog(new SparkConf, new Configuration)
    catalog.client.reset()
    catalog
  }

  protected override val utils: CatalogTestUtils = new CatalogTestUtils {
    override val tableInputFormat: String = "org.apache.hadoop.mapred.SequenceFileInputFormat"
    override val tableOutputFormat: String = "org.apache.hadoop.mapred.SequenceFileOutputFormat"
    override def newEmptyCatalog(): ExternalCatalog = externalCatalog
    override val defaultProvider: String = "hive"
  }

  protected override def resetState(): Unit = {
    externalCatalog.client.reset()
  }

  import utils._

  test("SPARK-18647: do not put provider in table properties for Hive serde table") {
    val catalog = newBasicCatalog()
    val hiveTable = CatalogTable(
      identifier = TableIdentifier("hive_tbl", Some("db1")),
      tableType = CatalogTableType.MANAGED,
      storage = storageFormat,
      schema = new StructType().add("col1", "int").add("col2", "string"),
      provider = Some("hive"))
    catalog.createTable(hiveTable, ignoreIfExists = false)

    val rawTable = externalCatalog.client.getTable("db1", "hive_tbl")
    assert(!rawTable.properties.contains(HiveExternalCatalog.DATASOURCE_PROVIDER))
    assert(DDLUtils.isHiveTable(externalCatalog.getTable("db1", "hive_tbl")))
  }

  Seq("parquet", "hive").foreach { format =>
    test(s"Partition columns should be put at the end of table schema for the format $format") {
      val catalog = newBasicCatalog()
      val newSchema = new StructType()
        .add("col1", "int")
        .add("col2", "string")
        .add("partCol1", "int")
        .add("partCol2", "string")
      val table = CatalogTable(
        identifier = TableIdentifier("tbl", Some("db1")),
        tableType = CatalogTableType.MANAGED,
        storage = CatalogStorageFormat.empty,
        schema = new StructType()
          .add("col1", "int")
          .add("partCol1", "int")
          .add("partCol2", "string")
          .add("col2", "string"),
        provider = Some(format),
        partitionColumnNames = Seq("partCol1", "partCol2"))
      catalog.createTable(table, ignoreIfExists = false)

      val restoredTable = externalCatalog.getTable("db1", "tbl")
      assert(restoredTable.schema == newSchema)
    }
  }

  test("SPARK-22306: alter table schema should not erase the bucketing metadata at hive side") {
    val catalog = newBasicCatalog()
    externalCatalog.client.runSqlHive(
      """
        |CREATE TABLE db1.t(a string, b string)
        |CLUSTERED BY (a, b) SORTED BY (a, b) INTO 10 BUCKETS
        |STORED AS PARQUET
      """.stripMargin)

    val newSchema = new StructType().add("a", "string").add("b", "string").add("c", "string")
    catalog.alterTableDataSchema("db1", "t", newSchema)

    assert(catalog.getTable("db1", "t").schema == newSchema)
    val bucketString = externalCatalog.client.runSqlHive("DESC FORMATTED db1.t")
      .filter(_.contains("Num Buckets")).head
    assert(bucketString.contains("10"))
  }

  test("SPARK-23001: NullPointerException when running desc database") {
    val catalog = newBasicCatalog()
    catalog.createDatabase(newDb("dbWithNullDesc").copy(description = null), ignoreIfExists = false)
    assert(catalog.getDatabase("dbWithNullDesc").description == "")
  }
} 
Example 93
Source File: PruneFileSourcePartitionsSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive.execution

import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project}
import org.apache.spark.sql.catalyst.rules.RuleExecutor
import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions}
import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.sql.types.StructType

class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {

  object Optimize extends RuleExecutor[LogicalPlan] {
    val batches = Batch("PruneFileSourcePartitions", Once, PruneFileSourcePartitions) :: Nil
  }

  test("PruneFileSourcePartitions should not change the output of LogicalRelation") {
    withTable("test") {
      withTempDir { dir =>
        sql(
          s"""
            |CREATE EXTERNAL TABLE test(i int)
            |PARTITIONED BY (p int)
            |STORED AS parquet
            |LOCATION '${dir.toURI}'""".stripMargin)

        val tableMeta = spark.sharedState.externalCatalog.getTable("default", "test")
        val catalogFileIndex = new CatalogFileIndex(spark, tableMeta, 0)

        val dataSchema = StructType(tableMeta.schema.filterNot { f =>
          tableMeta.partitionColumnNames.contains(f.name)
        })
        val relation = HadoopFsRelation(
          location = catalogFileIndex,
          partitionSchema = tableMeta.partitionSchema,
          dataSchema = dataSchema,
          bucketSpec = None,
          fileFormat = new ParquetFileFormat(),
          options = Map.empty)(sparkSession = spark)

        val logicalRelation = LogicalRelation(relation, tableMeta)
        val query = Project(Seq('i, 'p), Filter('p === 1, logicalRelation)).analyze

        val optimized = Optimize.execute(query)
        assert(optimized.missingInput.isEmpty)
      }
    }
  }

  test("SPARK-20986 Reset table's statistics after PruneFileSourcePartitions rule") {
    withTable("tbl") {
      spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl")
      sql(s"ANALYZE TABLE tbl COMPUTE STATISTICS")
      val tableStats = spark.sessionState.catalog.getTableMetadata(TableIdentifier("tbl")).stats
      assert(tableStats.isDefined && tableStats.get.sizeInBytes > 0, "tableStats is lost")

      val df = sql("SELECT * FROM tbl WHERE p = 1")
      val sizes1 = df.queryExecution.analyzed.collect {
        case relation: LogicalRelation => relation.catalogTable.get.stats.get.sizeInBytes
      }
      assert(sizes1.size === 1, s"Size wrong for:\n ${df.queryExecution}")
      assert(sizes1(0) == tableStats.get.sizeInBytes)

      val relations = df.queryExecution.optimizedPlan.collect {
        case relation: LogicalRelation => relation
      }
      assert(relations.size === 1, s"Size wrong for:\n ${df.queryExecution}")
      val size2 = relations(0).stats.sizeInBytes
      assert(size2 == relations(0).catalogTable.get.stats.get.sizeInBytes)
      assert(size2 < tableStats.get.sizeInBytes)
    }
  }
} 
Example 94
Source File: ListTablesSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive

import org.scalatest.BeforeAndAfterAll

import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.hive.test.TestHiveSingleton

class ListTablesSuite extends QueryTest with TestHiveSingleton with BeforeAndAfterAll {
  import hiveContext._
  import hiveContext.implicits._

  val df = sparkContext.parallelize((1 to 10).map(i => (i, s"str$i"))).toDF("key", "value")

  override def beforeAll(): Unit = {
    super.beforeAll()
    // The catalog in HiveContext is a case insensitive one.
    sessionState.catalog.createTempView(
      "ListTablesSuiteTable", df.logicalPlan, overrideIfExists = true)
    sql("CREATE TABLE HiveListTablesSuiteTable (key int, value string)")
    sql("CREATE DATABASE IF NOT EXISTS ListTablesSuiteDB")
    sql("CREATE TABLE ListTablesSuiteDB.HiveInDBListTablesSuiteTable (key int, value string)")
  }

  override def afterAll(): Unit = {
    try {
      sessionState.catalog.dropTable(
        TableIdentifier("ListTablesSuiteTable"), ignoreIfNotExists = true, purge = false)
      sql("DROP TABLE IF EXISTS HiveListTablesSuiteTable")
      sql("DROP TABLE IF EXISTS ListTablesSuiteDB.HiveInDBListTablesSuiteTable")
      sql("DROP DATABASE IF EXISTS ListTablesSuiteDB")
    } finally {
      super.afterAll()
    }
  }

  test("get all tables of current database") {
    Seq(tables(), sql("SHOW TABLes")).foreach {
      case allTables =>
        // We are using default DB.
        checkAnswer(
          allTables.filter("tableName = 'listtablessuitetable'"),
          Row("", "listtablessuitetable", true))
        checkAnswer(
          allTables.filter("tableName = 'hivelisttablessuitetable'"),
          Row("default", "hivelisttablessuitetable", false))
        assert(allTables.filter("tableName = 'hiveindblisttablessuitetable'").count() === 0)
    }
  }

  test("getting all tables with a database name") {
    Seq(tables("listtablessuiteDb"), sql("SHOW TABLes in listTablesSuitedb")).foreach {
      case allTables =>
        checkAnswer(
          allTables.filter("tableName = 'listtablessuitetable'"),
          Row("", "listtablessuitetable", true))
        assert(allTables.filter("tableName = 'hivelisttablessuitetable'").count() === 0)
        checkAnswer(
          allTables.filter("tableName = 'hiveindblisttablessuitetable'"),
          Row("listtablessuitedb", "hiveindblisttablessuitetable", false))
    }
  }
} 
Example 95
Source File: TableIdentifierParserSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.parser

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.TableIdentifier

class TableIdentifierParserSuite extends SparkFunSuite {
  import CatalystSqlParser._

  // Add "$elem$", "$value$" & "$key$"
  val hiveNonReservedKeyword = Array("add", "admin", "after", "analyze", "archive", "asc", "before",
    "bucket", "buckets", "cascade", "change", "cluster", "clustered", "clusterstatus", "collection",
    "columns", "comment", "compact", "compactions", "compute", "concatenate", "continue", "cost",
    "data", "day", "databases", "datetime", "dbproperties", "deferred", "defined", "delimited",
    "dependency", "desc", "directories", "directory", "disable", "distribute",
    "enable", "escaped", "exclusive", "explain", "export", "fields", "file", "fileformat", "first",
    "format", "formatted", "functions", "hold_ddltime", "hour", "idxproperties", "ignore", "index",
    "indexes", "inpath", "inputdriver", "inputformat", "items", "jar", "keys", "key_type", "last",
    "limit", "offset", "lines", "load", "location", "lock", "locks", "logical", "long", "mapjoin",
    "materialized", "metadata", "minus", "minute", "month", "msck", "noscan", "no_drop", "nulls",
    "offline", "option", "outputdriver", "outputformat", "overwrite", "owner", "partitioned",
    "partitions", "plus", "pretty", "principals", "protection", "purge", "read", "readonly",
    "rebuild", "recordreader", "recordwriter", "reload", "rename", "repair", "replace",
    "replication", "restrict", "rewrite", "role", "roles", "schemas", "second",
    "serde", "serdeproperties", "server", "sets", "shared", "show", "show_database", "skewed",
    "sort", "sorted", "ssl", "statistics", "stored", "streamtable", "string", "struct", "tables",
    "tblproperties", "temporary", "terminated", "tinyint", "touch", "transactions", "unarchive",
    "undo", "uniontype", "unlock", "unset", "unsigned", "uri", "use", "utc", "utctimestamp",
    "view", "while", "year", "work", "transaction", "write", "isolation", "level",
    "snapshot", "autocommit", "all", "alter", "array", "as", "authorization", "between", "bigint",
    "binary", "boolean", "both", "by", "create", "cube", "current_date", "current_timestamp",
    "cursor", "date", "decimal", "delete", "describe", "double", "drop", "exists", "external",
    "false", "fetch", "float", "for", "grant", "group", "grouping", "import", "in",
    "insert", "int", "into", "is", "lateral", "like", "local", "none", "null",
    "of", "order", "out", "outer", "partition", "percent", "procedure", "range", "reads", "revoke",
    "rollup", "row", "rows", "set", "smallint", "table", "timestamp", "to", "trigger",
    "true", "truncate", "update", "user", "values", "with", "regexp", "rlike",
    "bigint", "binary", "boolean", "current_date", "current_timestamp", "date", "double", "float",
    "int", "smallint", "timestamp", "at", "position", "both", "leading", "trailing")

  val hiveStrictNonReservedKeyword = Seq("anti", "full", "inner", "left", "semi", "right",
    "natural", "union", "intersect", "except", "database", "on", "join", "cross", "select", "from",
    "where", "having", "from", "to", "table", "with", "not")

  test("table identifier") {
    // Regular names.
    assert(TableIdentifier("q") === parseTableIdentifier("q"))
    assert(TableIdentifier("q", Option("d")) === parseTableIdentifier("d.q"))

    // Illegal names.
    Seq("", "d.q.g", "t:", "${some.var.x}", "tab:1").foreach { identifier =>
      intercept[ParseException](parseTableIdentifier(identifier))
    }
  }

  test("quoted identifiers") {
    assert(TableIdentifier("z", Some("x.y")) === parseTableIdentifier("`x.y`.z"))
    assert(TableIdentifier("y.z", Some("x")) === parseTableIdentifier("x.`y.z`"))
    assert(TableIdentifier("z", Some("`x.y`")) === parseTableIdentifier("```x.y```.z"))
    assert(TableIdentifier("`y.z`", Some("x")) === parseTableIdentifier("x.```y.z```"))
    assert(TableIdentifier("x.y.z", None) === parseTableIdentifier("`x.y.z`"))
  }

  test("table identifier - strict keywords") {
    // SQL Keywords.
    hiveStrictNonReservedKeyword.foreach { keyword =>
      assert(TableIdentifier(keyword) === parseTableIdentifier(keyword))
      assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`"))
      assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`"))
    }
  }

  test("table identifier - non reserved keywords") {
    // Hive keywords are allowed.
    hiveNonReservedKeyword.foreach { nonReserved =>
      assert(TableIdentifier(nonReserved) === parseTableIdentifier(nonReserved))
    }
  }

  test("SPARK-17364 table identifier - contains number") {
    assert(parseTableIdentifier("123_") == TableIdentifier("123_"))
    assert(parseTableIdentifier("1a.123_") == TableIdentifier("123_", Some("1a")))
    // ".123" should not be treated as token of type DECIMAL_VALUE
    assert(parseTableIdentifier("a.123A") == TableIdentifier("123A", Some("a")))
    // ".123E3" should not be treated as token of type SCIENTIFIC_DECIMAL_VALUE
    assert(parseTableIdentifier("a.123E3_LIST") == TableIdentifier("123E3_LIST", Some("a")))
    // ".123D" should not be treated as token of type DOUBLE_LITERAL
    assert(parseTableIdentifier("a.123D_LIST") == TableIdentifier("123D_LIST", Some("a")))
    // ".123BD" should not be treated as token of type BIGDECIMAL_LITERAL
    assert(parseTableIdentifier("a.123BD_LIST") == TableIdentifier("123BD_LIST", Some("a")))
  }

  test("SPARK-17832 table identifier - contains backtick") {
    val complexName = TableIdentifier("`weird`table`name", Some("`d`b`1"))
    assert(complexName === parseTableIdentifier("```d``b``1`.```weird``table``name`"))
    assert(complexName === parseTableIdentifier(complexName.quotedString))
    intercept[ParseException](parseTableIdentifier(complexName.unquotedString))
    // Table identifier contains countious backticks should be treated correctly.
    val complexName2 = TableIdentifier("x``y", Some("d``b"))
    assert(complexName2 === parseTableIdentifier(complexName2.quotedString))
  }
} 
Example 96
Source File: CatalogSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.analysis

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
import org.apache.spark.sql.types.StructType


class CatalogSuite extends AnalysisTest {

  test("desc table when owner is set to null") {
    val table = CatalogTable(
      identifier = TableIdentifier("tbl", Some("db1")),
      tableType = CatalogTableType.MANAGED,
      storage = CatalogStorageFormat.empty,
      owner = null,
      schema = new StructType().add("col1", "int").add("col2", "string"),
      provider = Some("parquet"))
    table.toLinkedHashMap
  }
} 
Example 97
Source File: ddl.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import java.util.Locale

import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils}
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.command.{DDLUtils, RunnableCommand}
import org.apache.spark.sql.types._


case class CreateTempViewUsing(
    tableIdent: TableIdentifier,
    userSpecifiedSchema: Option[StructType],
    replace: Boolean,
    global: Boolean,
    provider: String,
    options: Map[String, String]) extends RunnableCommand {

  if (tableIdent.database.isDefined) {
    throw new AnalysisException(
      s"Temporary view '$tableIdent' should not have specified a database")
  }

  override def argString: String = {
    s"[tableIdent:$tableIdent " +
      userSpecifiedSchema.map(_ + " ").getOrElse("") +
      s"replace:$replace " +
      s"provider:$provider " +
      CatalogUtils.maskCredentials(options)
  }

  override def run(sparkSession: SparkSession): Seq[Row] = {
    if (provider.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) {
      throw new AnalysisException("Hive data source can only be used with tables, " +
        "you can't use it with CREATE TEMP VIEW USING")
    }

    val dataSource = DataSource(
      sparkSession,
      userSpecifiedSchema = userSpecifiedSchema,
      className = provider,
      options = options)

    val catalog = sparkSession.sessionState.catalog
    val viewDefinition = Dataset.ofRows(
      sparkSession, LogicalRelation(dataSource.resolveRelation())).logicalPlan

    if (global) {
      catalog.createGlobalTempView(tableIdent.table, viewDefinition, replace)
    } else {
      catalog.createTempView(tableIdent.table, viewDefinition, replace)
    }

    Seq.empty[Row]
  }
}

case class RefreshTable(tableIdent: TableIdentifier)
  extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    // Refresh the given table's metadata. If this table is cached as an InMemoryRelation,
    // drop the original cached version and make the new version cached lazily.
    sparkSession.catalog.refreshTable(tableIdent.quotedString)
    Seq.empty[Row]
  }
}

case class RefreshResource(path: String)
  extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    sparkSession.catalog.refreshByPath(path)
    Seq.empty[Row]
  }
} 
Example 98
Source File: cache.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command

import org.apache.spark.sql.{Dataset, Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
import org.apache.spark.sql.catalyst.plans.QueryPlan
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan

case class CacheTableCommand(
    tableIdent: TableIdentifier,
    plan: Option[LogicalPlan],
    isLazy: Boolean) extends RunnableCommand {
  require(plan.isEmpty || tableIdent.database.isEmpty,
    "Database name is not allowed in CACHE TABLE AS SELECT")

  override protected def innerChildren: Seq[QueryPlan[_]] = plan.toSeq

  override def run(sparkSession: SparkSession): Seq[Row] = {
    plan.foreach { logicalPlan =>
      Dataset.ofRows(sparkSession, logicalPlan).createTempView(tableIdent.quotedString)
    }
    sparkSession.catalog.cacheTable(tableIdent.quotedString)

    if (!isLazy) {
      // Performs eager caching
      sparkSession.table(tableIdent).count()
    }

    Seq.empty[Row]
  }
}


case class UncacheTableCommand(
    tableIdent: TableIdentifier,
    ifExists: Boolean) extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    val tableId = tableIdent.quotedString
    if (!ifExists || sparkSession.catalog.tableExists(tableId)) {
      sparkSession.catalog.uncacheTable(tableId)
    }
    Seq.empty[Row]
  }
}


  override def makeCopy(newArgs: Array[AnyRef]): ClearCacheCommand = ClearCacheCommand()
} 
Example 99
Source File: AnalyzeTableCommand.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command

import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.CatalogTableType



case class AnalyzeTableCommand(
    tableIdent: TableIdentifier,
    noscan: Boolean = true) extends RunnableCommand {

  override def run(sparkSession: SparkSession): Seq[Row] = {
    val sessionState = sparkSession.sessionState
    val db = tableIdent.database.getOrElse(sessionState.catalog.getCurrentDatabase)
    val tableIdentWithDB = TableIdentifier(tableIdent.table, Some(db))
    val tableMeta = sessionState.catalog.getTableMetadata(tableIdentWithDB)
    if (tableMeta.tableType == CatalogTableType.VIEW) {
      throw new AnalysisException("ANALYZE TABLE is not supported on views.")
    }

    // Compute stats for the whole table
    val newTotalSize = CommandUtils.calculateTotalSize(sessionState, tableMeta)
    val newRowCount =
      if (noscan) None else Some(BigInt(sparkSession.table(tableIdentWithDB).count()))

    // Update the metastore if the above statistics of the table are different from those
    // recorded in the metastore.
    val newStats = CommandUtils.compareAndGetNewStats(tableMeta.stats, newTotalSize, newRowCount)
    if (newStats.isDefined) {
      sessionState.catalog.alterTableStats(tableIdentWithDB, newStats)
    }

    Seq.empty[Row]
  }
} 
Example 100
Source File: SparkSchemaProvider.scala    From mimir   with Apache License 2.0 5 votes vote down vote up
package mimir.data

import com.typesafe.scalalogging.LazyLogging
import org.apache.spark.sql.{ DataFrame, SaveMode } 
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.analysis.{ UnresolvedRelation, NoSuchDatabaseException }
import org.apache.spark.sql.execution.command.{ DropTableCommand, CreateDatabaseCommand }

import mimir.Database
import mimir.algebra._
import mimir.exec.spark.{MimirSpark, RAToSpark, RowIndexPlan}

class SparkSchemaProvider(db: Database)
  extends LogicalPlanSchemaProvider
  with MaterializedTableProvider
  with LazyLogging
{

  def listTables(): Seq[ID] = 
  {
    try {
      val tables = 
        MimirSpark.get.sparkSession
                  .catalog
                  .listTables( table.id)
                .collect()
                .map { col => (
                    ID(col.name), 
                    RAToSpark.getMimirType( 
                      RAToSpark.dataTypeFromHiveDataTypeString(col.dataType))
                  ) }
        )
      } else { 
        logger.trace(s"$table doesn't exist")
        None 
      }
    } catch {
      case _:NoSuchDatabaseException => {
        logger.warn("Couldn't find database!!! ($sparkDBName)")
        None
      }
    }
  }

  def logicalplan(table: ID): LogicalPlan =
  {
    RowIndexPlan(
      UnresolvedRelation(TableIdentifier(table.id)), 
      tableSchema(table).get
    ).getPlan(db)
  }

  def createStoredTableAs(data: DataFrame, name: ID)
  {
    data.persist()
        .createOrReplaceTempView(name.id)
    data.write
        .mode(SaveMode.Overwrite)
        .saveAsTable(name.id)
  }

  def dropStoredTable(name: ID)
  {
    DropTableCommand(
      TableIdentifier(name.id, None),//Option(sparkDBName)), 
      true, false, true
    ).run(MimirSpark.get.sparkSession)
  }
} 
Example 101
Source File: CreateViewAsSelect.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive.execution

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.hive.{HiveMetastoreTypes, HiveContext}
import org.apache.spark.sql.{AnalysisException, Row, SQLContext}
import org.apache.spark.sql.execution.RunnableCommand
import org.apache.spark.sql.hive.client.{HiveColumn, HiveTable}


// TODO: Note that this class can NOT canonicalize the view SQL string entirely, which is different
// from Hive and may not work for some cases like create view on self join.
private[hive] case class CreateViewAsSelect(
    tableDesc: HiveTable,
    childSchema: Seq[Attribute],
    allowExisting: Boolean,
    orReplace: Boolean) extends RunnableCommand {

  assert(tableDesc.schema == Nil || tableDesc.schema.length == childSchema.length)
  assert(tableDesc.viewText.isDefined)

  val tableIdentifier = TableIdentifier(tableDesc.name, Some(tableDesc.database))

  override def run(sqlContext: SQLContext): Seq[Row] = {
    val hiveContext = sqlContext.asInstanceOf[HiveContext]

    if (hiveContext.catalog.tableExists(tableIdentifier)) {
      if (allowExisting) {
        // view already exists, will do nothing, to keep consistent with Hive
      } else if (orReplace) {
        hiveContext.catalog.client.alertView(prepareTable())
      } else {
        throw new AnalysisException(s"View $tableIdentifier already exists. " +
          "If you want to update the view definition, please use ALTER VIEW AS or " +
          "CREATE OR REPLACE VIEW AS")
      }
    } else {
      hiveContext.catalog.client.createView(prepareTable())
    }

    Seq.empty[Row]
  }

  private def prepareTable(): HiveTable = {
    // setup column types according to the schema of child.
    val schema = if (tableDesc.schema == Nil) {
      childSchema.map { attr =>
        HiveColumn(attr.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), null)
      }
    } else {
      childSchema.zip(tableDesc.schema).map { case (attr, col) =>
        HiveColumn(col.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), col.comment)
      }
    }

    val columnNames = childSchema.map(f => verbose(f.name))

    // When user specified column names for view, we should create a project to do the renaming.
    // When no column name specified, we still need to create a project to declare the columns
    // we need, to make us more robust to top level `*`s.
    val projectList = if (tableDesc.schema == Nil) {
      columnNames.mkString(", ")
    } else {
      columnNames.zip(tableDesc.schema.map(f => verbose(f.name))).map {
        case (name, alias) => s"$name AS $alias"
      }.mkString(", ")
    }

    val viewName = verbose(tableDesc.name)

    val expandedText = s"SELECT $projectList FROM (${tableDesc.viewText.get}) $viewName"

    tableDesc.copy(schema = schema, viewText = Some(expandedText))
  }

  // escape backtick with double-backtick in column name and wrap it with backtick.
  private def verbose(name: String) = s"`${name.replaceAll("`", "``")}`"
} 
Example 102
Source File: CreateTableAsSelect.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive.execution

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
import org.apache.spark.sql.execution.RunnableCommand
import org.apache.spark.sql.hive.client.{HiveColumn, HiveTable}
import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes, MetastoreRelation}
import org.apache.spark.sql.{AnalysisException, Row, SQLContext}


private[hive]
case class CreateTableAsSelect(
    tableDesc: HiveTable,
    query: LogicalPlan,
    allowExisting: Boolean)
  extends RunnableCommand {

  val tableIdentifier = TableIdentifier(tableDesc.name, Some(tableDesc.database))

  override def children: Seq[LogicalPlan] = Seq(query)

  override def run(sqlContext: SQLContext): Seq[Row] = {
    val hiveContext = sqlContext.asInstanceOf[HiveContext]
    lazy val metastoreRelation: MetastoreRelation = {
      import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
      import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
      import org.apache.hadoop.io.Text
      import org.apache.hadoop.mapred.TextInputFormat

      val withFormat =
        tableDesc.copy(
          inputFormat =
            tableDesc.inputFormat.orElse(Some(classOf[TextInputFormat].getName)),
          outputFormat =
            tableDesc.outputFormat
              .orElse(Some(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]].getName)),
          serde = tableDesc.serde.orElse(Some(classOf[LazySimpleSerDe].getName())))

      val withSchema = if (withFormat.schema.isEmpty) {
        // Hive doesn't support specifying the column list for target table in CTAS
        // However we don't think SparkSQL should follow that.
        tableDesc.copy(schema =
        query.output.map(c =>
          HiveColumn(c.name, HiveMetastoreTypes.toMetastoreType(c.dataType), null)))
      } else {
        withFormat
      }

      hiveContext.catalog.client.createTable(withSchema)

      // Get the Metastore Relation
      hiveContext.catalog.lookupRelation(tableIdentifier, None) match {
        case r: MetastoreRelation => r
      }
    }
    // TODO ideally, we should get the output data ready first and then
    // add the relation into catalog, just in case of failure occurs while data
    // processing.
    if (hiveContext.catalog.tableExists(tableIdentifier)) {
      if (allowExisting) {
        // table already exists, will do nothing, to keep consistent with Hive
      } else {
        throw new AnalysisException(s"$tableIdentifier already exists.")
      }
    } else {
      hiveContext.executePlan(InsertIntoTable(metastoreRelation, Map(), query, true, false)).toRdd
    }

    Seq.empty[Row]
  }

  override def argString: String = {
    s"[Database:${tableDesc.database}}, TableName: ${tableDesc.name}, InsertIntoHiveTable]"
  }
} 
Example 103
Source File: ListTablesSuite.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive

import org.scalatest.BeforeAndAfterAll

import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.Row

class ListTablesSuite extends QueryTest with TestHiveSingleton with BeforeAndAfterAll {
  import hiveContext._
  import hiveContext.implicits._

  val df = sparkContext.parallelize((1 to 10).map(i => (i, s"str$i"))).toDF("key", "value")

  override def beforeAll(): Unit = {
    // The catalog in HiveContext is a case insensitive one.
    catalog.registerTable(TableIdentifier("ListTablesSuiteTable"), df.logicalPlan)
    sql("CREATE TABLE HiveListTablesSuiteTable (key int, value string)")
    sql("CREATE DATABASE IF NOT EXISTS ListTablesSuiteDB")
    sql("CREATE TABLE ListTablesSuiteDB.HiveInDBListTablesSuiteTable (key int, value string)")
  }

  override def afterAll(): Unit = {
    catalog.unregisterTable(TableIdentifier("ListTablesSuiteTable"))
    sql("DROP TABLE IF EXISTS HiveListTablesSuiteTable")
    sql("DROP TABLE IF EXISTS ListTablesSuiteDB.HiveInDBListTablesSuiteTable")
    sql("DROP DATABASE IF EXISTS ListTablesSuiteDB")
  }

  test("get all tables of current database") {
    Seq(tables(), sql("SHOW TABLes")).foreach {
      case allTables =>
        // We are using default DB.
        checkAnswer(
          allTables.filter("tableName = 'listtablessuitetable'"),
          Row("listtablessuitetable", true))
        checkAnswer(
          allTables.filter("tableName = 'hivelisttablessuitetable'"),
          Row("hivelisttablessuitetable", false))
        assert(allTables.filter("tableName = 'hiveindblisttablessuitetable'").count() === 0)
    }
  }

  test("getting all tables with a database name") {
    Seq(tables("listtablessuiteDb"), sql("SHOW TABLes in listTablesSuitedb")).foreach {
      case allTables =>
        checkAnswer(
          allTables.filter("tableName = 'listtablessuitetable'"),
          Row("listtablessuitetable", true))
        assert(allTables.filter("tableName = 'hivelisttablessuitetable'").count() === 0)
        checkAnswer(
          allTables.filter("tableName = 'hiveindblisttablessuitetable'"),
          Row("hiveindblisttablessuitetable", false))
    }
  }
} 
Example 104
Source File: AnalysisTest.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.catalyst.analysis

import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.plans.PlanTest
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.{TableIdentifier, SimpleCatalystConf}

trait AnalysisTest extends PlanTest {

  val (caseSensitiveAnalyzer, caseInsensitiveAnalyzer) = {
    val caseSensitiveConf = new SimpleCatalystConf(true)
    val caseInsensitiveConf = new SimpleCatalystConf(false)

    val caseSensitiveCatalog = new SimpleCatalog(caseSensitiveConf)
    val caseInsensitiveCatalog = new SimpleCatalog(caseInsensitiveConf)

    caseSensitiveCatalog.registerTable(TableIdentifier("TaBlE"), TestRelations.testRelation)
    caseInsensitiveCatalog.registerTable(TableIdentifier("TaBlE"), TestRelations.testRelation)

    new Analyzer(caseSensitiveCatalog, EmptyFunctionRegistry, caseSensitiveConf) {
      override val extendedResolutionRules = EliminateSubQueries :: Nil
    } ->
    new Analyzer(caseInsensitiveCatalog, EmptyFunctionRegistry, caseInsensitiveConf) {
      override val extendedResolutionRules = EliminateSubQueries :: Nil
    }
  }

  protected def getAnalyzer(caseSensitive: Boolean) = {
    if (caseSensitive) caseSensitiveAnalyzer else caseInsensitiveAnalyzer
  }

  protected def checkAnalysis(
      inputPlan: LogicalPlan,
      expectedPlan: LogicalPlan,
      caseSensitive: Boolean = true): Unit = {
    val analyzer = getAnalyzer(caseSensitive)
    val actualPlan = analyzer.execute(inputPlan)
    analyzer.checkAnalysis(actualPlan)
    comparePlans(actualPlan, expectedPlan)
  }

  protected def assertAnalysisSuccess(
      inputPlan: LogicalPlan,
      caseSensitive: Boolean = true): Unit = {
    val analyzer = getAnalyzer(caseSensitive)
    analyzer.checkAnalysis(analyzer.execute(inputPlan))
  }

  protected def assertAnalysisError(
      inputPlan: LogicalPlan,
      expectedErrors: Seq[String],
      caseSensitive: Boolean = true): Unit = {
    val analyzer = getAnalyzer(caseSensitive)
    val e = intercept[AnalysisException] {
      analyzer.checkAnalysis(analyzer.execute(inputPlan))
    }
    assert(expectedErrors.map(_.toLowerCase).forall(e.getMessage.toLowerCase.contains),
      s"Expected to throw Exception contains: ${expectedErrors.mkString(", ")}, " +
        s"actually we get ${e.getMessage}")
  }
} 
Example 105
Source File: ListTablesSuite.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import org.scalatest.BeforeAndAfter

import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types.{BooleanType, StringType, StructField, StructType}
import org.apache.spark.sql.catalyst.TableIdentifier

class ListTablesSuite extends QueryTest with BeforeAndAfter with SharedSQLContext {
  import testImplicits._

  private lazy val df = (1 to 10).map(i => (i, s"str$i")).toDF("key", "value")

  before {
    df.registerTempTable("ListTablesSuiteTable")
  }

  after {
    sqlContext.catalog.unregisterTable(TableIdentifier("ListTablesSuiteTable"))
  }

  test("get all tables") {
    checkAnswer(
      sqlContext.tables().filter("tableName = 'ListTablesSuiteTable'"),
      Row("ListTablesSuiteTable", true))

    checkAnswer(
      sql("SHOW tables").filter("tableName = 'ListTablesSuiteTable'"),
      Row("ListTablesSuiteTable", true))

    sqlContext.catalog.unregisterTable(TableIdentifier("ListTablesSuiteTable"))
    assert(sqlContext.tables().filter("tableName = 'ListTablesSuiteTable'").count() === 0)
  }

  test("getting all Tables with a database name has no impact on returned table names") {
    checkAnswer(
      sqlContext.tables("DB").filter("tableName = 'ListTablesSuiteTable'"),
      Row("ListTablesSuiteTable", true))

    checkAnswer(
      sql("show TABLES in DB").filter("tableName = 'ListTablesSuiteTable'"),
      Row("ListTablesSuiteTable", true))

    sqlContext.catalog.unregisterTable(TableIdentifier("ListTablesSuiteTable"))
    assert(sqlContext.tables().filter("tableName = 'ListTablesSuiteTable'").count() === 0)
  }

  test("query the returned DataFrame of tables") {
    val expectedSchema = StructType(
      StructField("tableName", StringType, false) ::
      StructField("isTemporary", BooleanType, false) :: Nil)

    Seq(sqlContext.tables(), sql("SHOW TABLes")).foreach {
      case tableDF =>
        assert(expectedSchema === tableDF.schema)

        tableDF.registerTempTable("tables")
        checkAnswer(
          sql(
            "SELECT isTemporary, tableName from tables WHERE tableName = 'ListTablesSuiteTable'"),
          Row(true, "ListTablesSuiteTable")
        )
        checkAnswer(
          sqlContext.tables().filter("tableName = 'tables'").select("tableName", "isTemporary"),
          Row("tables", true))
        sqlContext.dropTempTable("tables")
    }
  }
} 
Example 106
Source File: CarbonCountStar.scala    From carbondata   with Apache License 2.0 4 votes vote down vote up
package org.apache.spark.sql

import scala.collection.JavaConverters._

import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.execution.LeafExecNode
import org.apache.spark.sql.optimizer.CarbonFilters
import org.apache.spark.sql.types.StringType
import org.apache.spark.unsafe.types.UTF8String

import org.apache.carbondata.core.datastore.impl.FileFactory
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
import org.apache.carbondata.core.metadata.schema.table.CarbonTable
import org.apache.carbondata.core.mutate.CarbonUpdateUtil
import org.apache.carbondata.core.statusmanager.StageInputCollector
import org.apache.carbondata.core.util.{CarbonProperties, ThreadLocalSessionInfo}
import org.apache.carbondata.hadoop.api.{CarbonInputFormat, CarbonTableInputFormat}
import org.apache.carbondata.hadoop.util.CarbonInputFormatUtil
import org.apache.carbondata.spark.load.DataLoadProcessBuilderOnSpark

case class CarbonCountStar(
    attributesRaw: Seq[Attribute],
    carbonTable: CarbonTable,
    sparkSession: SparkSession,
    outUnsafeRows: Boolean = true) extends LeafExecNode {

  override def doExecute(): RDD[InternalRow] = {
    ThreadLocalSessionInfo
      .setConfigurationToCurrentThread(sparkSession.sessionState.newHadoopConf())
    val absoluteTableIdentifier = carbonTable.getAbsoluteTableIdentifier
    val (job, tableInputFormat) = createCarbonInputFormat(absoluteTableIdentifier)
    CarbonInputFormat.setQuerySegment(job.getConfiguration, carbonTable)

    // get row count
    var rowCount = CarbonUpdateUtil.getRowCount(
      tableInputFormat.getBlockRowCount(
        job,
        carbonTable,
        CarbonFilters.getPartitions(
          Seq.empty,
          sparkSession,
          TableIdentifier(
            carbonTable.getTableName,
            Some(carbonTable.getDatabaseName))).map(_.asJava).orNull, false),
      carbonTable)

    if (CarbonProperties.isQueryStageInputEnabled) {
      // check for number of row for stage input
      val splits = StageInputCollector.createInputSplits(carbonTable, job.getConfiguration)
      if (!splits.isEmpty) {
        val df = DataLoadProcessBuilderOnSpark.createInputDataFrame(
          sparkSession, carbonTable, splits.asScala)
        rowCount += df.count()
      }
    }

    val valueRaw =
      attributesRaw.head.dataType match {
        case StringType => Seq(UTF8String.fromString(Long.box(rowCount).toString)).toArray
          .asInstanceOf[Array[Any]]
        case _ => Seq(Long.box(rowCount)).toArray.asInstanceOf[Array[Any]]
      }
    val value = new GenericInternalRow(valueRaw)
    val unsafeProjection = UnsafeProjection.create(output.map(_.dataType).toArray)
    val row = if (outUnsafeRows) unsafeProjection(value) else value
    sparkContext.parallelize(Seq(row))
  }

  override def output: Seq[Attribute] = {
    attributesRaw
  }

  private def createCarbonInputFormat(absoluteTableIdentifier: AbsoluteTableIdentifier
  ): (Job, CarbonTableInputFormat[Array[Object]]) = {
    val carbonInputFormat = new CarbonTableInputFormat[Array[Object]]()
    val jobConf: JobConf = new JobConf(FileFactory.getConfiguration)
    SparkHadoopUtil.get.addCredentials(jobConf)
    CarbonInputFormat.setTableInfo(jobConf, carbonTable.getTableInfo)
    val job = new Job(jobConf)
    FileInputFormat.addInputPath(job, new Path(absoluteTableIdentifier.getTablePath))
    CarbonInputFormat
      .setTransactionalTable(job.getConfiguration,
        carbonTable.getTableInfo.isTransactionalTable)
    CarbonInputFormatUtil.setIndexJobIfConfigured(job.getConfiguration)
    (job, carbonInputFormat)
  }
}