org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: SparkWrapper.scala From tispark with Apache License 2.0 | 5 votes |
package com.pingcap.tispark import org.apache.spark.sql.catalyst.catalog.{CatalogTable, SessionCatalog} import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} import org.apache.spark.sql.types.{DataType, Metadata} object SparkWrapper { def getVersion: String = { "SparkWrapper-2.3" } def newSubqueryAlias(identifier: String, child: LogicalPlan): SubqueryAlias = { SubqueryAlias(identifier, child) } def newAlias(child: Expression, name: String): Alias = { Alias(child, name)() } def newAttributeReference( name: String, dataType: DataType, nullable: Boolean, metadata: Metadata): AttributeReference = { AttributeReference(name, dataType, nullable, metadata)() } def callSessionCatalogCreateTable( obj: SessionCatalog, tableDefinition: CatalogTable, ignoreIfExists: Boolean): Unit = { obj.createTable(tableDefinition, ignoreIfExists) } }
Example 2
Source File: SparkWrapper.scala From tispark with Apache License 2.0 | 5 votes |
package com.pingcap.tispark import org.apache.spark.sql.catalyst.catalog.{CatalogTable, SessionCatalog} import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} import org.apache.spark.sql.types.{DataType, Metadata} object SparkWrapper { def getVersion: String = { "SparkWrapper-2.4" } def newSubqueryAlias(identifier: String, child: LogicalPlan): SubqueryAlias = { SubqueryAlias(identifier, child) } def newAlias(child: Expression, name: String): Alias = { Alias(child, name)() } def newAttributeReference( name: String, dataType: DataType, nullable: Boolean, metadata: Metadata): AttributeReference = { AttributeReference(name, dataType, nullable, metadata)() } def callSessionCatalogCreateTable( obj: SessionCatalog, tableDefinition: CatalogTable, ignoreIfExists: Boolean): Unit = { obj.createTable(tableDefinition, ignoreIfExists) } }
Example 3
Source File: OptimizeHiveMetadataOnlyQuerySuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.scalatest.BeforeAndAfter import org.apache.spark.metrics.source.HiveCatalogMetrics import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.expressions.NamedExpression import org.apache.spark.sql.catalyst.plans.logical.{Distinct, Filter, Project, SubqueryAlias} import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf.OPTIMIZER_METADATA_ONLY import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.{IntegerType, StructField, StructType} class OptimizeHiveMetadataOnlyQuerySuite extends QueryTest with TestHiveSingleton with BeforeAndAfter with SQLTestUtils { import spark.implicits._ override def beforeAll(): Unit = { super.beforeAll() sql("CREATE TABLE metadata_only (id bigint, data string) PARTITIONED BY (part int)") (0 to 10).foreach(p => sql(s"ALTER TABLE metadata_only ADD PARTITION (part=$p)")) } override protected def afterAll(): Unit = { try { sql("DROP TABLE IF EXISTS metadata_only") } finally { super.afterAll() } } test("SPARK-23877: validate metadata-only query pushes filters to metastore") { withSQLConf(OPTIMIZER_METADATA_ONLY.key -> "true") { val startCount = HiveCatalogMetrics.METRIC_PARTITIONS_FETCHED.getCount // verify the number of matching partitions assert(sql("SELECT DISTINCT part FROM metadata_only WHERE part < 5").collect().length === 5) // verify that the partition predicate was pushed down to the metastore assert(HiveCatalogMetrics.METRIC_PARTITIONS_FETCHED.getCount - startCount === 5) } } test("SPARK-23877: filter on projected expression") { withSQLConf(OPTIMIZER_METADATA_ONLY.key -> "true") { val startCount = HiveCatalogMetrics.METRIC_PARTITIONS_FETCHED.getCount // verify the matching partitions val partitions = spark.internalCreateDataFrame(Distinct(Filter(($"x" < 5).expr, Project(Seq(($"part" + 1).as("x").expr.asInstanceOf[NamedExpression]), spark.table("metadata_only").logicalPlan.asInstanceOf[SubqueryAlias].child))) .queryExecution.toRdd, StructType(Seq(StructField("x", IntegerType)))) checkAnswer(partitions, Seq(1, 2, 3, 4).toDF("x")) // verify that the partition predicate was not pushed down to the metastore assert(HiveCatalogMetrics.METRIC_PARTITIONS_FETCHED.getCount - startCount == 11) } } }
Example 4
Source File: CarbonExpressions.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Expression, ScalaUDF} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} import org.apache.spark.sql.execution.command.DescribeTableCommand import org.apache.spark.sql.types.DataType object CarbonScalaUDF { def unapply(expression: Expression): Option[(ScalaUDF)] = { expression match { case a: ScalaUDF => Some(a) case _ => None } } } }
Example 5
Source File: hbaseCommands.scala From Heracles with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase.execution import org.apache.spark.annotation.DeveloperApi import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.hbase._ import org.apache.spark.sql.hbase.util.DataTypeUtils import org.apache.spark.sql.types._ import scala.collection.mutable.ArrayBuffer @DeveloperApi case class AlterDropColCommand(namespace: String, tableName: String, columnName: String) extends RunnableCommand { def run(sparkSession: SparkSession): Seq[Row] = { sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog] .alterTableDropNonKey(namespace, tableName, columnName) sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog].stopAdmin() Seq.empty[Row] } } @DeveloperApi case class AlterAddColCommand(namespace: String, tableName: String, colName: String, colType: String, colFamily: String, colQualifier: String) extends RunnableCommand { def run(sparkSession: SparkSession): Seq[Row] = { val hbaseCatalog = sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog] hbaseCatalog.alterTableAddNonKey(namespace, tableName, NonKeyColumn(colName, DataTypeUtils.getDataType(colType), colFamily, colQualifier)) hbaseCatalog.stopAdmin() Seq.empty[Row] } } @DeveloperApi case class InsertValueIntoTableCommand(tid: TableIdentifier, valueSeq: Seq[String]) extends RunnableCommand { override def run(sparkSession: SparkSession) = { val relation: HBaseRelation = sparkSession.sessionState.catalog.externalCatalog .asInstanceOf[HBaseCatalog] .getHBaseRelation(tid.database.getOrElse(null), tid.table).getOrElse(null) val bytes = valueSeq.zipWithIndex.map(v => DataTypeUtils.string2TypeData(v._1, relation.schema(v._2).dataType)) val rows = sparkSession.sparkContext.makeRDD(Seq(Row.fromSeq(bytes))) val inputValuesDF = sparkSession.createDataFrame(rows, relation.schema) relation.insert(inputValuesDF, overwrite = false) Seq.empty[Row] } override def output: Seq[Attribute] = Seq.empty }
Example 6
Source File: ReplaceGroup.scala From starry with Apache License 2.0 | 5 votes |
package com.github.passionke.replace import com.github.passionke.starry.SparkPlanExecutor import com.github.passionke.baseline.Dumy import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, SubqueryAlias} import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.Spark import org.scalatest.FunSuite class ReplaceGroup extends FunSuite { test("group by") { val sparkSession = Spark.sparkSession sparkSession.sparkContext.setLogLevel("WARN") import sparkSession.implicits._ val dumys = Seq(Dumy("a", 10, "abc"), Dumy("a", 20, "ass")) dumys.toDF().createOrReplaceTempView("a") val df = sparkSession.sql( """ |select name, count(1) as cnt |from a |group by name """.stripMargin) df.show() val sparkPlan = df.queryExecution.sparkPlan val logicalPlan = df.queryExecution.analyzed val dumy1 = Seq(Dumy("a", 1, "abc"), Dumy("a", 1, "ass"), Dumy("a", 2, "sf")) val data = dumy1.toDF().queryExecution.executedPlan.execute().collect() val newL = logicalPlan.transform({ case SubqueryAlias(a, localRelation) if a.equals("a") => SubqueryAlias(a, LocalRelation(localRelation.output, data)) }) val ns = sparkSession.newSession() val qe = new QueryExecution(ns, newL) val start = System.currentTimeMillis() val list = SparkPlanExecutor.exec(qe.sparkPlan, ns) assert(list.head.getLong(1).equals(3L)) val end = System.currentTimeMillis() end - start } }
Example 7
Source File: MergeCommand.scala From spark-acid with Apache License 2.0 | 5 votes |
package com.qubole.spark.datasources.hiveacid.sql.catalyst.plans.command import com.qubole.spark.hiveacid.HiveAcidErrors import com.qubole.spark.hiveacid.datasource.HiveAcidRelation import com.qubole.spark.hiveacid.merge.{MergeCondition, MergeWhenClause, MergeWhenNotInsert} import org.apache.spark.sql.catalyst.AliasIdentifier import org.apache.spark.sql.{Row, SparkSession, SqlUtils} import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.execution.datasources.LogicalRelation case class MergeCommand(targetTable: LogicalPlan, sourceTable: LogicalPlan, matched: Seq[MergeWhenClause], notMatched: Option[MergeWhenClause], mergeCondition: MergeCondition, sourceAlias: Option[AliasIdentifier], targetAlias: Option[AliasIdentifier]) extends RunnableCommand { override def children: Seq[LogicalPlan] = Seq(targetTable, sourceTable) override def output: Seq[Attribute] = Seq.empty override lazy val resolved: Boolean = childrenResolved override def run(sparkSession: SparkSession): Seq[Row] = { val insertClause: Option[MergeWhenNotInsert] = notMatched match { case Some(i: MergeWhenNotInsert) => Some(i) case None => None case _ => throw HiveAcidErrors.mergeValidationError("WHEN NOT Clause has to be INSERT CLAUSE") } children.head match { case LogicalRelation(relation: HiveAcidRelation, _, _ , _) => relation.merge(SqlUtils.logicalPlanToDataFrame(sparkSession, sourceTable), mergeCondition.expression, matched, insertClause, sourceAlias, targetAlias) case SubqueryAlias(_, LogicalRelation(relation: HiveAcidRelation, _, _, _)) => relation.merge(SqlUtils.logicalPlanToDataFrame(sparkSession, sourceTable), mergeCondition.expression, matched, insertClause, sourceAlias, targetAlias) case _ => throw HiveAcidErrors.tableNotAcidException(targetTable.toString()) } Seq.empty } }