org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias Scala Examples

The following examples show how to use org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: SparkWrapper.scala    From tispark   with Apache License 2.0 5 votes vote down vote up
package com.pingcap.tispark

import org.apache.spark.sql.catalyst.catalog.{CatalogTable, SessionCatalog}
import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
import org.apache.spark.sql.types.{DataType, Metadata}

object SparkWrapper {
  def getVersion: String = {
    "SparkWrapper-2.3"
  }

  def newSubqueryAlias(identifier: String, child: LogicalPlan): SubqueryAlias = {
    SubqueryAlias(identifier, child)
  }

  def newAlias(child: Expression, name: String): Alias = {
    Alias(child, name)()
  }

  def newAttributeReference(
      name: String,
      dataType: DataType,
      nullable: Boolean,
      metadata: Metadata): AttributeReference = {
    AttributeReference(name, dataType, nullable, metadata)()
  }

  def callSessionCatalogCreateTable(
      obj: SessionCatalog,
      tableDefinition: CatalogTable,
      ignoreIfExists: Boolean): Unit = {
    obj.createTable(tableDefinition, ignoreIfExists)
  }
} 
Example 2
Source File: SparkWrapper.scala    From tispark   with Apache License 2.0 5 votes vote down vote up
package com.pingcap.tispark

import org.apache.spark.sql.catalyst.catalog.{CatalogTable, SessionCatalog}
import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
import org.apache.spark.sql.types.{DataType, Metadata}

object SparkWrapper {
  def getVersion: String = {
    "SparkWrapper-2.4"
  }

  def newSubqueryAlias(identifier: String, child: LogicalPlan): SubqueryAlias = {
    SubqueryAlias(identifier, child)
  }

  def newAlias(child: Expression, name: String): Alias = {
    Alias(child, name)()
  }

  def newAttributeReference(
      name: String,
      dataType: DataType,
      nullable: Boolean,
      metadata: Metadata): AttributeReference = {
    AttributeReference(name, dataType, nullable, metadata)()
  }

  def callSessionCatalogCreateTable(
      obj: SessionCatalog,
      tableDefinition: CatalogTable,
      ignoreIfExists: Boolean): Unit = {
    obj.createTable(tableDefinition, ignoreIfExists)
  }
} 
Example 3
Source File: OptimizeHiveMetadataOnlyQuerySuite.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive

import org.scalatest.BeforeAndAfter

import org.apache.spark.metrics.source.HiveCatalogMetrics
import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.catalyst.expressions.NamedExpression
import org.apache.spark.sql.catalyst.plans.logical.{Distinct, Filter, Project, SubqueryAlias}
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.internal.SQLConf.OPTIMIZER_METADATA_ONLY
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.sql.types.{IntegerType, StructField, StructType}

class OptimizeHiveMetadataOnlyQuerySuite extends QueryTest with TestHiveSingleton
    with BeforeAndAfter with SQLTestUtils {

  import spark.implicits._

  override def beforeAll(): Unit = {
    super.beforeAll()
    sql("CREATE TABLE metadata_only (id bigint, data string) PARTITIONED BY (part int)")
    (0 to 10).foreach(p => sql(s"ALTER TABLE metadata_only ADD PARTITION (part=$p)"))
  }

  override protected def afterAll(): Unit = {
    try {
      sql("DROP TABLE IF EXISTS metadata_only")
    } finally {
      super.afterAll()
    }
  }

  test("SPARK-23877: validate metadata-only query pushes filters to metastore") {
    withSQLConf(OPTIMIZER_METADATA_ONLY.key -> "true") {
      val startCount = HiveCatalogMetrics.METRIC_PARTITIONS_FETCHED.getCount

      // verify the number of matching partitions
      assert(sql("SELECT DISTINCT part FROM metadata_only WHERE part < 5").collect().length === 5)

      // verify that the partition predicate was pushed down to the metastore
      assert(HiveCatalogMetrics.METRIC_PARTITIONS_FETCHED.getCount - startCount === 5)
    }
  }

  test("SPARK-23877: filter on projected expression") {
    withSQLConf(OPTIMIZER_METADATA_ONLY.key -> "true") {
      val startCount = HiveCatalogMetrics.METRIC_PARTITIONS_FETCHED.getCount

      // verify the matching partitions
      val partitions = spark.internalCreateDataFrame(Distinct(Filter(($"x" < 5).expr,
        Project(Seq(($"part" + 1).as("x").expr.asInstanceOf[NamedExpression]),
          spark.table("metadata_only").logicalPlan.asInstanceOf[SubqueryAlias].child)))
          .queryExecution.toRdd, StructType(Seq(StructField("x", IntegerType))))

      checkAnswer(partitions, Seq(1, 2, 3, 4).toDF("x"))

      // verify that the partition predicate was not pushed down to the metastore
      assert(HiveCatalogMetrics.METRIC_PARTITIONS_FETCHED.getCount - startCount == 11)
    }
  }
} 
Example 4
Source File: CarbonExpressions.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Expression, ScalaUDF}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
import org.apache.spark.sql.execution.command.DescribeTableCommand
import org.apache.spark.sql.types.DataType


  object CarbonScalaUDF {
    def unapply(expression: Expression): Option[(ScalaUDF)] = {
      expression match {
        case a: ScalaUDF =>
          Some(a)
        case _ =>
          None
      }
    }
  }
} 
Example 5
Source File: hbaseCommands.scala    From Heracles   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hbase.execution

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias
import org.apache.spark.sql.execution.command.RunnableCommand
import org.apache.spark.sql.execution.datasources.LogicalRelation
import org.apache.spark.sql.hbase._
import org.apache.spark.sql.hbase.util.DataTypeUtils
import org.apache.spark.sql.types._

import scala.collection.mutable.ArrayBuffer

@DeveloperApi
case class AlterDropColCommand(namespace: String, tableName: String, columnName: String)
  extends RunnableCommand {

  def run(sparkSession: SparkSession): Seq[Row] = {
    sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog]
      .alterTableDropNonKey(namespace, tableName, columnName)
    sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog].stopAdmin()
    Seq.empty[Row]
  }
}

@DeveloperApi
case class AlterAddColCommand(namespace: String,
                              tableName: String,
                              colName: String,
                              colType: String,
                              colFamily: String,
                              colQualifier: String) extends RunnableCommand {

  def run(sparkSession: SparkSession): Seq[Row] = {
    val hbaseCatalog = sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog]
    hbaseCatalog.alterTableAddNonKey(namespace, tableName,
      NonKeyColumn(colName, DataTypeUtils.getDataType(colType), colFamily, colQualifier))
    hbaseCatalog.stopAdmin()
    Seq.empty[Row]
  }
}

@DeveloperApi
case class InsertValueIntoTableCommand(tid: TableIdentifier, valueSeq: Seq[String])
  extends RunnableCommand {
  override def run(sparkSession: SparkSession) = {
    val relation: HBaseRelation = sparkSession.sessionState.catalog.externalCatalog
      .asInstanceOf[HBaseCatalog]
      .getHBaseRelation(tid.database.getOrElse(null), tid.table).getOrElse(null)

    val bytes = valueSeq.zipWithIndex.map(v =>
      DataTypeUtils.string2TypeData(v._1, relation.schema(v._2).dataType))

    val rows = sparkSession.sparkContext.makeRDD(Seq(Row.fromSeq(bytes)))
    val inputValuesDF = sparkSession.createDataFrame(rows, relation.schema)
    relation.insert(inputValuesDF, overwrite = false)

    Seq.empty[Row]
  }

  override def output: Seq[Attribute] = Seq.empty
} 
Example 6
Source File: ReplaceGroup.scala    From starry   with Apache License 2.0 5 votes vote down vote up
package com.github.passionke.replace

import com.github.passionke.starry.SparkPlanExecutor
import com.github.passionke.baseline.Dumy
import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, SubqueryAlias}
import org.apache.spark.sql.execution.QueryExecution
import org.apache.spark.Spark
import org.scalatest.FunSuite


class ReplaceGroup extends FunSuite {

  test("group by") {
    val sparkSession = Spark.sparkSession
    sparkSession.sparkContext.setLogLevel("WARN")
    import sparkSession.implicits._
    val dumys = Seq(Dumy("a", 10, "abc"), Dumy("a", 20, "ass"))
    dumys.toDF().createOrReplaceTempView("a")

    val df = sparkSession.sql(
      """
        |select name, count(1) as cnt
        |from a
        |group by name
      """.stripMargin)

    df.show()
    val sparkPlan = df.queryExecution.sparkPlan
    val logicalPlan = df.queryExecution.analyzed


    val dumy1 = Seq(Dumy("a", 1, "abc"), Dumy("a", 1, "ass"), Dumy("a", 2, "sf"))
    val data = dumy1.toDF().queryExecution.executedPlan.execute().collect()

    val newL = logicalPlan.transform({
      case SubqueryAlias(a, localRelation) if a.equals("a") =>
        SubqueryAlias(a, LocalRelation(localRelation.output, data))
    })

    val ns = sparkSession.newSession()
    val qe = new QueryExecution(ns, newL)
    val start = System.currentTimeMillis()
    val list = SparkPlanExecutor.exec(qe.sparkPlan, ns)
    assert(list.head.getLong(1).equals(3L))
    val end = System.currentTimeMillis()
    end - start
  }

} 
Example 7
Source File: MergeCommand.scala    From spark-acid   with Apache License 2.0 5 votes vote down vote up
package com.qubole.spark.datasources.hiveacid.sql.catalyst.plans.command

import com.qubole.spark.hiveacid.HiveAcidErrors
import com.qubole.spark.hiveacid.datasource.HiveAcidRelation
import com.qubole.spark.hiveacid.merge.{MergeCondition, MergeWhenClause, MergeWhenNotInsert}
import org.apache.spark.sql.catalyst.AliasIdentifier
import org.apache.spark.sql.{Row, SparkSession, SqlUtils}
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
import org.apache.spark.sql.execution.command.RunnableCommand
import org.apache.spark.sql.execution.datasources.LogicalRelation

case class MergeCommand(targetTable: LogicalPlan,
                        sourceTable: LogicalPlan,
                        matched: Seq[MergeWhenClause],
                        notMatched: Option[MergeWhenClause],
                        mergeCondition: MergeCondition,
                        sourceAlias: Option[AliasIdentifier],
                        targetAlias: Option[AliasIdentifier])
  extends RunnableCommand {

  override def children: Seq[LogicalPlan] = Seq(targetTable, sourceTable)
  override def output: Seq[Attribute] = Seq.empty
  override lazy val resolved: Boolean = childrenResolved
  override def run(sparkSession: SparkSession): Seq[Row] = {
    val insertClause: Option[MergeWhenNotInsert] = notMatched match {
      case Some(i: MergeWhenNotInsert) => Some(i)
      case None => None
      case _ => throw HiveAcidErrors.mergeValidationError("WHEN NOT Clause has to be INSERT CLAUSE")
    }

    children.head match {
      case LogicalRelation(relation: HiveAcidRelation, _, _ , _) =>
        relation.merge(SqlUtils.logicalPlanToDataFrame(sparkSession, sourceTable),
          mergeCondition.expression, matched, insertClause, sourceAlias, targetAlias)
      case SubqueryAlias(_, LogicalRelation(relation: HiveAcidRelation, _, _, _)) =>
        relation.merge(SqlUtils.logicalPlanToDataFrame(sparkSession, sourceTable),
          mergeCondition.expression, matched, insertClause, sourceAlias, targetAlias)
      case _ => throw HiveAcidErrors.tableNotAcidException(targetTable.toString())
    }

    Seq.empty
  }
}