org.apache.spark.sql.catalyst.analysis.UnresolvedRelation Scala Example

Source File: rules.scala From tispark with Apache License 2.0

5 votes

package org.apache.spark.sql.extensions

import com.pingcap.tispark.statistics.StatisticsManager
import com.pingcap.tispark.utils.ReflectionUtil._
import com.pingcap.tispark.{MetaManager, TiDBRelation, TiTableReference}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, UnresolvedRelation}
import org.apache.spark.sql.catalyst.catalog.TiSessionCatalog
import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.execution.command._
import org.apache.spark.sql.execution.datasources.LogicalRelation
import org.apache.spark.sql.{AnalysisException, _}

case class TiResolutionRule(getOrCreateTiContext: SparkSession => TiContext)(
    sparkSession: SparkSession)
    extends Rule[LogicalPlan] {
  protected lazy val meta: MetaManager = tiContext.meta
  private lazy val autoLoad = tiContext.autoLoad
  private lazy val tiCatalog = tiContext.tiCatalog
  private lazy val tiSession = tiContext.tiSession
  private lazy val sqlContext = tiContext.sqlContext
  protected val tiContext: TiContext = getOrCreateTiContext(sparkSession)
  protected val resolveTiDBRelation: TableIdentifier => LogicalPlan =
    tableIdentifier => {
      val dbName = getDatabaseFromIdentifier(tableIdentifier)
      val tableName = tableIdentifier.table
      val table = meta.getTable(dbName, tableName)
      if (table.isEmpty) {
        throw new AnalysisException(s"Table or view '$tableName' not found in database '$dbName'")
      }
      if (autoLoad) {
        StatisticsManager.loadStatisticsInfo(table.get)
      }
      val sizeInBytes = StatisticsManager.estimateTableSize(table.get)
      val tiDBRelation =
        TiDBRelation(tiSession, TiTableReference(dbName, tableName, sizeInBytes), meta)(
          sqlContext)
      // Use SubqueryAlias so that projects and joins can correctly resolve
      // UnresolvedAttributes in JoinConditions, Projects, Filters, etc.
      newSubqueryAlias(tableName, LogicalRelation(tiDBRelation))
    }

  override def apply(plan: LogicalPlan): LogicalPlan =
    plan transformUp resolveTiDBRelations

  protected def resolveTiDBRelations: PartialFunction[LogicalPlan, LogicalPlan] = {
    case i @ InsertIntoTable(UnresolvedRelation(tableIdentifier), _, _, _, _)
        if tiCatalog
          .catalogOf(tableIdentifier.database)
          .exists(_.isInstanceOf[TiSessionCatalog]) =>
      i.copy(table = EliminateSubqueryAliases(resolveTiDBRelation(tableIdentifier)))
    case UnresolvedRelation(tableIdentifier)
        if tiCatalog
          .catalogOf(tableIdentifier.database)
          .exists(_.isInstanceOf[TiSessionCatalog]) =>
      resolveTiDBRelation(tableIdentifier)
  }

  private def getDatabaseFromIdentifier(tableIdentifier: TableIdentifier): String =
    tableIdentifier.database.getOrElse(tiCatalog.getCurrentDatabase)
}

case class TiDDLRule(getOrCreateTiContext: SparkSession => TiContext)(sparkSession: SparkSession)
    extends Rule[LogicalPlan] {
  protected lazy val tiContext: TiContext = getOrCreateTiContext(sparkSession)

  override def apply(plan: LogicalPlan): LogicalPlan =
    plan transformUp {
      // TODO: support other commands that may concern TiSpark catalog.
      case sd: ShowDatabasesCommand =>
        TiShowDatabasesCommand(tiContext, sd)
      case sd: SetDatabaseCommand =>
        TiSetDatabaseCommand(tiContext, sd)
      case st: ShowTablesCommand =>
        TiShowTablesCommand(tiContext, st)
      case st: ShowColumnsCommand =>
        TiShowColumnsCommand(tiContext, st)
      case dt: DescribeTableCommand =>
        TiDescribeTablesCommand(tiContext, dt)
      case dc: DescribeColumnCommand =>
        TiDescribeColumnCommand(tiContext, dc)
      case ct: CreateTableLikeCommand =>
        TiCreateTableLikeCommand(tiContext, ct)
    }
}

Source File: parser.scala From tispark with Apache License 2.0

5 votes

package org.apache.spark.sql.extensions

import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.expressions.{Expression, SubqueryExpression}
import org.apache.spark.sql.catalyst.parser._
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
import org.apache.spark.sql.execution.SparkSqlParser
import org.apache.spark.sql.execution.command.{
  CacheTableCommand,
  CreateViewCommand,
  ExplainCommand,
  UncacheTableCommand
}
import org.apache.spark.sql.types.{DataType, StructType}
import org.apache.spark.sql.{SparkSession, TiContext}

case class TiParser(getOrCreateTiContext: SparkSession => TiContext)(
    sparkSession: SparkSession,
    delegate: ParserInterface)
    extends ParserInterface {
  private lazy val tiContext = getOrCreateTiContext(sparkSession)
  private lazy val internal = new SparkSqlParser(sparkSession.sqlContext.conf)

  
  private def needQualify(tableIdentifier: TableIdentifier) =
    tableIdentifier.database.isEmpty && tiContext.sessionCatalog
      .getTempView(tableIdentifier.table)
      .isEmpty
}

Source File: ExtendedPlanner.scala From HANAVora-Extensions with Apache License 2.0

5 votes

package org.apache.spark.sql.extension

import org.apache.spark.Logging
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.{SparkPlan, SparkPlanner}


  def planLaterExt(p: LogicalPlan): SparkPlan = planLater(p)

  def optimizedPlan(p: LogicalPlan): LogicalPlan = sqlContext.executePlan(p).optimizedPlan

  def optimizedRelationLookup(u: UnresolvedRelation): Option[LogicalPlan] = {
    if (sqlContext.catalog.tableExists(u.tableIdentifier)) {
      Some(optimizedPlan(u))
    } else {
      None
    }
  }

  // TODO (AC) Remove this once table-valued function are rebased on top.
  def analyze(p: LogicalPlan): LogicalPlan = sqlContext.analyzer.execute(p)

  override def plan(p: LogicalPlan): Iterator[SparkPlan] = {
    val iter = strategies.view.flatMap({ strategy =>
      val plans = strategy(p)
      if (plans.isEmpty) {
        logTrace(s"Strategy $strategy did not produce plans for $p")
      } else {
        logDebug(s"Strategy $strategy produced a plan for $p: ${plans.head}")
      }
      plans
    }).toIterator
    assert(iter.hasNext, s"No plan for $p")
    iter
  }

}

Source File: RecursiveViewAnalysis.scala From HANAVora-Extensions with Apache License 2.0

5 votes

package org.apache.spark.sql

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.plans.logical.view.AbstractView
import org.apache.spark.sql.execution.datasources.AbstractCreateViewCommand


object RecursiveViewAnalysis {
  def apply(plan: LogicalPlan): Unit = {
    plan.foreach {
      case c:AbstractCreateViewCommand if containsViewIdentifier(c.identifier, c.plan) =>
        throw new AnalysisException(s"The view ${c.identifier.table} " +
          s"cannot be defined recursively.")
      case _ =>
    }
  }

  private def containsViewIdentifier(name: TableIdentifier,
                                     plan: LogicalPlan): Boolean = plan.find {
    case UnresolvedRelation(ident, _) if ident == name =>
      true
    case AbstractView(child) => containsViewIdentifier(name, child)
    case _ =>
      false
  }.isDefined
}

Source File: ExtractSQLParserSuite.scala From HANAVora-Extensions with Apache License 2.0

5 votes

package org.apache.spark.sql

import com.sap.spark.PlanTest
import org.apache.spark.Logging
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.dsl.plans._
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.parser.SapParserDialect
import org.scalatest.FunSuite

class ExtractSQLParserSuite extends FunSuite with PlanTest with Logging {

  // scalastyle:off magic.number

  val t1 = UnresolvedRelation(TableIdentifier("T1"))
  val parser = new SapParserDialect

  test("Parse EXTRACT in SELECT") {
    val result = parser.parse("SELECT a, EXTRACT(YEAR FROM a) FROM T1")
    val expected = t1.select(AliasUnresolver('a, Year('a)): _*)
    comparePlans(expected, result)
  }

  test("Parse EXTRACT in WHERE") {
    val result = parser.parse("SELECT 1 FROM T1 WHERE EXTRACT(MONTH FROM a) = 2015")
    val expected = t1.where(Month('a) === 2015).select(AliasUnresolver(1): _*)
    comparePlans(expected, result)
  }

  test("Parse EXTRACT in GROUP BY") {
    val result = parser.parse("SELECT 1 FROM T1 GROUP BY EXTRACT(DAY FROM a)")
    val expected = t1.groupBy(DayOfMonth('a))(AliasUnresolver(1): _*)
    comparePlans(expected, result)
  }

}

Source File: CarbonCatalystOperators.scala From carbondata with Apache License 2.0

5 votes

package org.apache.spark.sql

import scala.collection.mutable

import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Count}
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.types.StringType

abstract class CarbonProfile(attributes: Seq[Attribute]) extends Serializable {
  def isEmpty: Boolean = attributes.isEmpty
}

case class IncludeProfile(attributes: Seq[Attribute]) extends CarbonProfile(attributes)

case class ExcludeProfile(attributes: Seq[Attribute]) extends CarbonProfile(attributes)

case class ProjectForUpdate(
    table: UnresolvedRelation,
    columns: List[String],
    children: Seq[LogicalPlan]) extends LogicalPlan {
  override def output: Seq[Attribute] = Seq.empty
}

case class UpdateTable(
    table: UnresolvedRelation,
    columns: List[String],
    selectStmt: String,
    alias: Option[String] = None,
    filer: String) extends LogicalPlan {
  override def children: Seq[LogicalPlan] = Seq.empty
  override def output: Seq[Attribute] = Seq.empty
}

case class DeleteRecords(
    statement: String,
    alias: Option[String] = None,
    table: UnresolvedRelation) extends LogicalPlan {
  override def children: Seq[LogicalPlan] = Seq.empty
  override def output: Seq[AttributeReference] = Seq.empty
}


  def strictCountStar(groupingExpressions: Seq[Expression],
      partialComputation: Seq[NamedExpression],
      child: LogicalPlan): Boolean = {
    if (groupingExpressions.nonEmpty) {
      return false
    }
    if (partialComputation.isEmpty) {
      return false
    }
    if (partialComputation.size > 1 && partialComputation.nonEmpty) {
      return false
    }
    child collect {
      case cd: Filter => return false
    }
    true
  }
}

Source File: IUDCommonUtil.scala From carbondata with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.command.mutation

import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan}
import org.apache.spark.sql.execution.datasources.LogicalRelation
import org.apache.spark.sql.hive.HiveSessionCatalog

import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.util.CarbonProperties


  def checkIfSegmentListIsSet(sparkSession: SparkSession, logicalPlan: LogicalPlan): Unit = {
    val carbonProperties = CarbonProperties.getInstance()
    logicalPlan.foreach {
      case unresolvedRelation: UnresolvedRelation =>
        val dbAndTb =
          sparkSession.sessionState.catalog.asInstanceOf[HiveSessionCatalog].getCurrentDatabase +
          "." + unresolvedRelation.tableIdentifier.table
        val segmentProperties = carbonProperties
          .getProperty(CarbonCommonConstants.CARBON_INPUT_SEGMENTS + dbAndTb, "")
        if (!(segmentProperties.equals("") || segmentProperties.trim.equals("*"))) {
          throw new MalformedCarbonCommandException("carbon.input.segments." + dbAndTb +
                                                    "should not be set for table used in DELETE " +
                                                    "query. Please reset the property to carbon" +
                                                    ".input.segments." +
                                                    dbAndTb + "=*")
        }
      case logicalRelation: LogicalRelation if (logicalRelation.relation
        .isInstanceOf[CarbonDatasourceHadoopRelation]) =>
        val dbAndTb =
          logicalRelation.relation.asInstanceOf[CarbonDatasourceHadoopRelation].carbonTable
            .getDatabaseName + "." +
          logicalRelation.relation.asInstanceOf[CarbonDatasourceHadoopRelation].carbonTable
            .getTableName
        val sementProperty = carbonProperties
          .getProperty(CarbonCommonConstants.CARBON_INPUT_SEGMENTS + dbAndTb, "")
        if (!(sementProperty.equals("") || sementProperty.trim.equals("*"))) {
          throw new MalformedCarbonCommandException("carbon.input.segments." + dbAndTb +
                                                    "should not be set for table used in UPDATE " +
                                                    "query. Please reset the property to carbon" +
                                                    ".input.segments." +
                                                    dbAndTb + "=*")
        }
      case filter: Filter => filter.subqueries.toList
        .foreach(subquery => checkIfSegmentListIsSet(sparkSession, subquery))
      case _ =>
    }
  }
}

Source File: CarbonExpressions.scala From carbondata with Apache License 2.0

5 votes

package org.apache.spark.sql

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Expression, ScalaUDF}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
import org.apache.spark.sql.execution.command.DescribeTableCommand
import org.apache.spark.sql.types.DataType


  object CarbonScalaUDF {
    def unapply(expression: Expression): Option[(ScalaUDF)] = {
      expression match {
        case a: ScalaUDF =>
          Some(a)
        case _ =>
          None
      }
    }
  }
}

Source File: SparkSchemaProvider.scala From mimir with Apache License 2.0

5 votes

package mimir.data

import com.typesafe.scalalogging.LazyLogging
import org.apache.spark.sql.{ DataFrame, SaveMode } 
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.analysis.{ UnresolvedRelation, NoSuchDatabaseException }
import org.apache.spark.sql.execution.command.{ DropTableCommand, CreateDatabaseCommand }

import mimir.Database
import mimir.algebra._
import mimir.exec.spark.{MimirSpark, RAToSpark, RowIndexPlan}

class SparkSchemaProvider(db: Database)
  extends LogicalPlanSchemaProvider
  with MaterializedTableProvider
  with LazyLogging
{

  def listTables(): Seq[ID] = 
  {
    try {
      val tables = 
        MimirSpark.get.sparkSession
                  .catalog
                  .listTables( table.id)
                .collect()
                .map { col => (
                    ID(col.name), 
                    RAToSpark.getMimirType( 
                      RAToSpark.dataTypeFromHiveDataTypeString(col.dataType))
                  ) }
        )
      } else { 
        logger.trace(s"$table doesn't exist")
        None 
      }
    } catch {
      case _:NoSuchDatabaseException => {
        logger.warn("Couldn't find database!!! ($sparkDBName)")
        None
      }
    }
  }

  def logicalplan(table: ID): LogicalPlan =
  {
    RowIndexPlan(
      UnresolvedRelation(TableIdentifier(table.id)), 
      tableSchema(table).get
    ).getPlan(db)
  }

  def createStoredTableAs(data: DataFrame, name: ID)
  {
    data.persist()
        .createOrReplaceTempView(name.id)
    data.write
        .mode(SaveMode.Overwrite)
        .saveAsTable(name.id)
  }

  def dropStoredTable(name: ID)
  {
    DropTableCommand(
      TableIdentifier(name.id, None),//Option(sparkDBName)), 
      true, false, true
    ).run(MimirSpark.get.sparkSession)
  }
}

org.apache.spark.sql.catalyst.analysis.UnresolvedRelation Scala Examples