org.apache.spark.sql.catalyst.expressions.AttributeReference Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.expressions.AttributeReference.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: SameResultSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.plans import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.{ExprId, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.util._ class SameResultSuite extends SparkFunSuite { val testRelation = LocalRelation('a.int, 'b.int, 'c.int) val testRelation2 = LocalRelation('a.int, 'b.int, 'c.int) def assertSameResult(a: LogicalPlan, b: LogicalPlan, result: Boolean = true): Unit = { val aAnalyzed = a.analyze val bAnalyzed = b.analyze if (aAnalyzed.sameResult(bAnalyzed) != result) { val comparison = sideBySide(aAnalyzed.toString, bAnalyzed.toString).mkString("\n") fail(s"Plans should return sameResult = $result\n$comparison") } } test("relations") { assertSameResult(testRelation, testRelation2) } test("projections") { assertSameResult(testRelation.select('a), testRelation2.select('a)) assertSameResult(testRelation.select('b), testRelation2.select('b)) assertSameResult(testRelation.select('a, 'b), testRelation2.select('a, 'b)) assertSameResult(testRelation.select('b, 'a), testRelation2.select('b, 'a)) assertSameResult(testRelation, testRelation2.select('a), result = false) assertSameResult(testRelation.select('b, 'a), testRelation2.select('a, 'b), result = false) } test("filters") { assertSameResult(testRelation.where('a === 'b), testRelation2.where('a === 'b)) } test("sorts") { assertSameResult(testRelation.orderBy('a.asc), testRelation2.orderBy('a.asc)) } }
Example 2
Source File: QueryPlanSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.plans import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.dsl.plans import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, Literal, NamedExpression} import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin} import org.apache.spark.sql.types.IntegerType class QueryPlanSuite extends SparkFunSuite { test("origin remains the same after mapExpressions (SPARK-23823)") { CurrentOrigin.setPosition(0, 0) val column = AttributeReference("column", IntegerType)(NamedExpression.newExprId) val query = plans.DslLogicalPlan(plans.table("table")).select(column) CurrentOrigin.reset() val mappedQuery = query mapExpressions { case _: Expression => Literal(1) } val mappedOrigin = mappedQuery.expressions.apply(0).origin assert(mappedOrigin == Origin.apply(Some(0), Some(0))) } }
Example 3
Source File: LogicalPlanSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.plans import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.types.IntegerType class LogicalPlanSuite extends SparkFunSuite { private var invocationCount = 0 private val function: PartialFunction[LogicalPlan, LogicalPlan] = { case p: Project => invocationCount += 1 p } private val testRelation = LocalRelation() test("transformUp runs on operators") { invocationCount = 0 val plan = Project(Nil, testRelation) plan transformUp function assert(invocationCount === 1) invocationCount = 0 plan transformDown function assert(invocationCount === 1) } test("transformUp runs on operators recursively") { invocationCount = 0 val plan = Project(Nil, Project(Nil, testRelation)) plan transformUp function assert(invocationCount === 2) invocationCount = 0 plan transformDown function assert(invocationCount === 2) } test("transformUp skips all ready resolved plans wrapped in analysis barrier") { invocationCount = 0 val plan = AnalysisBarrier(Project(Nil, Project(Nil, testRelation))) plan transformUp function assert(invocationCount === 0) invocationCount = 0 plan transformDown function assert(invocationCount === 0) } test("transformUp skips partially resolved plans wrapped in analysis barrier") { invocationCount = 0 val plan1 = AnalysisBarrier(Project(Nil, testRelation)) val plan2 = Project(Nil, plan1) plan2 transformUp function assert(invocationCount === 1) invocationCount = 0 plan2 transformDown function assert(invocationCount === 1) } test("isStreaming") { val relation = LocalRelation(AttributeReference("a", IntegerType, nullable = true)()) val incrementalRelation = LocalRelation( Seq(AttributeReference("a", IntegerType, nullable = true)()), isStreaming = true) case class TestBinaryRelation(left: LogicalPlan, right: LogicalPlan) extends BinaryNode { override def output: Seq[Attribute] = left.output ++ right.output } require(relation.isStreaming === false) require(incrementalRelation.isStreaming === true) assert(TestBinaryRelation(relation, relation).isStreaming === false) assert(TestBinaryRelation(incrementalRelation, relation).isStreaming === true) assert(TestBinaryRelation(relation, incrementalRelation).isStreaming === true) assert(TestBinaryRelation(incrementalRelation, incrementalRelation).isStreaming) } }
Example 4
Source File: StatsEstimationTestBase.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.statsEstimation import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LeafNode, LogicalPlan, Statistics} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{IntegerType, StringType} trait StatsEstimationTestBase extends SparkFunSuite { var originalValue: Boolean = false override def beforeAll(): Unit = { super.beforeAll() // Enable stats estimation based on CBO. originalValue = SQLConf.get.getConf(SQLConf.CBO_ENABLED) SQLConf.get.setConf(SQLConf.CBO_ENABLED, true) } override def afterAll(): Unit = { SQLConf.get.setConf(SQLConf.CBO_ENABLED, originalValue) super.afterAll() } def getColSize(attribute: Attribute, colStat: ColumnStat): Long = attribute.dataType match { // For UTF8String: base + offset + numBytes case StringType => colStat.avgLen + 8 + 4 case _ => colStat.avgLen } def attr(colName: String): AttributeReference = AttributeReference(colName, IntegerType)() case class StatsTestPlan( outputList: Seq[Attribute], rowCount: BigInt, attributeStats: AttributeMap[ColumnStat], size: Option[BigInt] = None) extends LeafNode { override def output: Seq[Attribute] = outputList override def computeStats(): Statistics = Statistics( // If sizeInBytes is useless in testing, we just use a fake value sizeInBytes = size.getOrElse(Int.MaxValue), rowCount = Some(rowCount), attributeStats = attributeStats) }
Example 5
Source File: SparkSQLParser.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import scala.util.parsing.combinator.RegexParsers import org.apache.spark.sql.catalyst.AbstractSparkSQLParser import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.{DescribeFunction, LogicalPlan, ShowFunctions} import org.apache.spark.sql.execution._ import org.apache.spark.sql.types.StringType private[sql] class SparkSQLParser(fallback: String => LogicalPlan) extends AbstractSparkSQLParser { // A parser for the key-value part of the "SET [key = [value ]]" syntax //用于“SET [key = [value]]”语法的键值部分的解析器 private object SetCommandParser extends RegexParsers { private val key: Parser[String] = "(?m)[^=]+".r private val value: Parser[String] = "(?m).*$".r private val output: Seq[Attribute] = Seq(AttributeReference("", StringType, nullable = false)()) private val pair: Parser[LogicalPlan] = (key ~ ("=".r ~> value).?).? ^^ { case None => SetCommand(None) case Some(k ~ v) => SetCommand(Some(k.trim -> v.map(_.trim))) } def apply(input: String): LogicalPlan = parseAll(pair, input) match { case Success(plan, _) => plan case x => sys.error(x.toString) } } protected val AS = Keyword("AS") protected val CACHE = Keyword("CACHE") protected val CLEAR = Keyword("CLEAR") protected val DESCRIBE = Keyword("DESCRIBE") protected val EXTENDED = Keyword("EXTENDED") protected val FUNCTION = Keyword("FUNCTION") protected val FUNCTIONS = Keyword("FUNCTIONS") protected val IN = Keyword("IN") protected val LAZY = Keyword("LAZY") protected val SET = Keyword("SET") protected val SHOW = Keyword("SHOW") protected val TABLE = Keyword("TABLE") protected val TABLES = Keyword("TABLES") protected val UNCACHE = Keyword("UNCACHE") override protected lazy val start: Parser[LogicalPlan] = cache | uncache | set | show | desc | others private lazy val cache: Parser[LogicalPlan] = CACHE ~> LAZY.? ~ (TABLE ~> ident) ~ (AS ~> restInput).? ^^ { case isLazy ~ tableName ~ plan => CacheTableCommand(tableName, plan.map(fallback), isLazy.isDefined) } private lazy val uncache: Parser[LogicalPlan] = ( UNCACHE ~ TABLE ~> ident ^^ { case tableName => UncacheTableCommand(tableName) } | CLEAR ~ CACHE ^^^ ClearCacheCommand ) private lazy val set: Parser[LogicalPlan] = SET ~> restInput ^^ { case input => SetCommandParser(input) } // It can be the following patterns: // SHOW FUNCTIONS;显示函数 // SHOW FUNCTIONS mydb.func1; // SHOW FUNCTIONS func1; // SHOW FUNCTIONS `mydb.a`.`func1.aa`; private lazy val show: Parser[LogicalPlan] = ( SHOW ~> TABLES ~ (IN ~> ident).? ^^ { case _ ~ dbName => ShowTablesCommand(dbName) } | SHOW ~ FUNCTIONS ~> ((ident <~ ".").? ~ (ident | stringLit)).? ^^ { case Some(f) => ShowFunctions(f._1, Some(f._2)) case None => ShowFunctions(None, None) } ) private lazy val desc: Parser[LogicalPlan] = DESCRIBE ~ FUNCTION ~> EXTENDED.? ~ (ident | stringLit) ^^ { case isExtended ~ functionName => DescribeFunction(functionName, isExtended.isDefined) } private lazy val others: Parser[LogicalPlan] = wholeInput ^^ { case input => fallback(input) } }
Example 6
Source File: TestRelations.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.LocalRelation import org.apache.spark.sql.types._ object TestRelations { val testRelation = LocalRelation(AttributeReference("a", IntegerType, nullable = true)()) val testRelation2 = LocalRelation( AttributeReference("a", StringType)(), AttributeReference("b", StringType)(), AttributeReference("c", DoubleType)(), AttributeReference("d", DecimalType(10, 2))(), AttributeReference("e", ShortType)()) val nestedRelation = LocalRelation( AttributeReference("top", StructType( StructField("duplicateField", StringType) :: StructField("duplicateField", StringType) :: StructField("differentCase", StringType) :: StructField("differentcase", StringType) :: Nil ))()) val nestedRelation2 = LocalRelation( AttributeReference("top", StructType( StructField("aField", StringType) :: StructField("bField", StringType) :: StructField("cField", StringType) :: Nil ))()) val listRelation = LocalRelation( AttributeReference("list", ArrayType(IntegerType))()) }
Example 7
Source File: SameResultSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.plans import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.{ExprId, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.util._ class SameResultSuite extends SparkFunSuite { val testRelation = LocalRelation('a.int, 'b.int, 'c.int) val testRelation2 = LocalRelation('a.int, 'b.int, 'c.int) def assertSameResult(a: LogicalPlan, b: LogicalPlan, result: Boolean = true): Unit = { val aAnalyzed = a.analyze val bAnalyzed = b.analyze if (aAnalyzed.sameResult(bAnalyzed) != result) { val comparison = sideBySide(aAnalyzed.toString, bAnalyzed.toString).mkString("\n") fail(s"Plans should return sameResult = $result\n$comparison") } } test("relations") { assertSameResult(testRelation, testRelation2) } test("projections") { assertSameResult(testRelation.select('a), testRelation2.select('a)) assertSameResult(testRelation.select('b), testRelation2.select('b)) assertSameResult(testRelation.select('a, 'b), testRelation2.select('a, 'b)) assertSameResult(testRelation.select('b, 'a), testRelation2.select('b, 'a)) assertSameResult(testRelation, testRelation2.select('a), result = false) assertSameResult(testRelation.select('b, 'a), testRelation2.select('a, 'b), result = false) } test("filters") { assertSameResult(testRelation.where('a === 'b), testRelation2.where('a === 'b)) } test("sorts") { assertSameResult(testRelation.orderBy('a.asc), testRelation2.orderBy('a.asc)) } }
Example 8
Source File: MetadataIteratorSpec.scala From jgit-spark-connector with Apache License 2.0 | 5 votes |
package tech.sourced.engine.iterator import java.nio.file.Paths import java.util.{Properties, UUID} import org.apache.commons.io.FileUtils import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.types.{Metadata, StringType, StructType} import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} import tech.sourced.engine.{BaseSparkSpec, Schema} class JDBCQueryIteratorSpec extends FlatSpec with Matchers with BeforeAndAfterAll with BaseSparkSpec { private val tmpPath = Paths.get( System.getProperty("java.io.tmpdir"), UUID.randomUUID.toString ) private val dbPath = tmpPath.resolve("test.db") override def beforeAll(): Unit = { super.beforeAll() tmpPath.toFile.mkdir() val rdd = ss.sparkContext.parallelize(Seq( Row("id1"), Row("id2"), Row("id3") )) val properties = new Properties() properties.put("driver", "org.sqlite.JDBC") val df = ss.createDataFrame(rdd, StructType(Seq(Schema.repositories.head))) df.write.jdbc(s"jdbc:sqlite:${dbPath.toString}", "repositories", properties) } override def afterAll(): Unit = { super.afterAll() FileUtils.deleteQuietly(tmpPath.toFile) } "JDBCQueryIterator" should "return all rows for the query" in { val iter = new JDBCQueryIterator( Seq(attr("id")), dbPath.toString, "SELECT id FROM repositories ORDER BY id" ) // calling hasNext more than one time does not cause rows to be lost iter.hasNext iter.hasNext val rows = (for (row <- iter) yield row).toArray rows.length should be(3) rows(0).length should be(1) rows(0)(0).toString should be("id1") rows(1)(0).toString should be("id2") rows(2)(0).toString should be("id3") } private def attr(name: String): Attribute = AttributeReference( name, StringType, nullable = false, Metadata.empty )() }
Example 9
Source File: AddSourceToAttributes.scala From jgit-spark-connector with Apache License 2.0 | 5 votes |
package tech.sourced.engine.rule import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.sql.types.MetadataBuilder import tech.sourced.engine.{GitRelation, MetadataRelation, Sources} import tech.sourced.engine.compat def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { case compat.LogicalRelation(rel @ GitRelation(_, _, _, schemaSource), out, catalogTable) => withMetadata(rel, schemaSource, out, catalogTable) case compat.LogicalRelation( rel @ MetadataRelation(_, _, _, _, schemaSource), out, catalogTable) => withMetadata(rel, schemaSource, out, catalogTable) } private def withMetadata(relation: BaseRelation, schemaSource: Option[String], out: Seq[AttributeReference], catalogTable: Option[CatalogTable]): LogicalRelation = { val processedOut = schemaSource match { case Some(table) => out.map( _.withMetadata(new MetadataBuilder().putString(SOURCE, table).build() ).asInstanceOf[AttributeReference] ) case None => out } compat.LogicalRelation(relation, processedOut, catalogTable) } }
Example 10
Source File: compat.scala From jgit-spark-connector with Apache License 2.0 | 5 votes |
package tech.sourced.engine.compat import org.apache.spark.SPARK_VERSION import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.execution.datasources.{ LogicalRelation => SparkLogicalRelation } import org.apache.spark.sql.sources.BaseRelation import scala.reflect.runtime.{universe => ru} private[compat] object Compat { def apply[T](s22: T, s23: T): T = SPARK_VERSION match { case s if s.startsWith("2.2.") => s22 case s if s.startsWith("2.3.") => s23 case _ => throw new RuntimeException(s"Unsupported SPARK_VERSION: $SPARK_VERSION") } lazy val ClassMirror = ru.runtimeMirror(Compat.getClass.getClassLoader) } private[engine] object LogicalRelation { def apply(rel: BaseRelation, out: Seq[AttributeReference], catalog: Option[CatalogTable]): SparkLogicalRelation = applyImpl(rel, out, catalog) private lazy val applyImpl = Compat(applySpark22(_, _, _), applySpark23(_, _, _)) private lazy val typ = ru.typeOf[SparkLogicalRelation] private lazy val classSymbol = Compat.ClassMirror.reflectClass(typ.typeSymbol.asClass) private lazy val ctor = classSymbol.reflectConstructor(typ.decl(ru.termNames.CONSTRUCTOR).asMethod) def applySpark22(rel: BaseRelation, out: Seq[AttributeReference], catalog: Option[CatalogTable]): SparkLogicalRelation = ctor(rel, out, catalog).asInstanceOf[SparkLogicalRelation] def applySpark23(rel: BaseRelation, out: Seq[AttributeReference], catalog: Option[CatalogTable]): SparkLogicalRelation = ctor(rel, out, catalog, false).asInstanceOf[SparkLogicalRelation] def unapply(arg: SparkLogicalRelation) : Option[(BaseRelation, Seq[AttributeReference], Option[CatalogTable])] = unapplyImpl(arg) private lazy val unapplyImpl = Compat(unapplySpark22(_), unapplySpark23(_)) def unapplySpark22(arg: SparkLogicalRelation) : Option[(BaseRelation, Seq[AttributeReference], Option[CatalogTable])] = Some((arg.relation, arg.output, arg.catalogTable)) def unapplySpark23(arg: SparkLogicalRelation) : Option[(BaseRelation, Seq[AttributeReference], Option[CatalogTable])] = { val isStreaming = Compat.ClassMirror .reflect(arg) .reflectField(typ.decl(ru.TermName("isStreaming")).asTerm) .get .asInstanceOf[Boolean] if (isStreaming) { None } else { Some((arg.relation, arg.output, arg.catalogTable)) } } }
Example 11
Source File: SparkSQLParser.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import scala.util.parsing.combinator.RegexParsers import org.apache.spark.sql.catalyst.AbstractSparkSQLParser import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution._ import org.apache.spark.sql.types.StringType private[sql] class SparkSQLParser(fallback: String => LogicalPlan) extends AbstractSparkSQLParser { // A parser for the key-value part of the "SET [key = [value ]]" syntax private object SetCommandParser extends RegexParsers { private val key: Parser[String] = "(?m)[^=]+".r private val value: Parser[String] = "(?m).*$".r private val output: Seq[Attribute] = Seq(AttributeReference("", StringType, nullable = false)()) private val pair: Parser[LogicalPlan] = (key ~ ("=".r ~> value).?).? ^^ { case None => SetCommand(None, output) case Some(k ~ v) => SetCommand(Some(k.trim -> v.map(_.trim)), output) } def apply(input: String): LogicalPlan = parseAll(pair, input) match { case Success(plan, _) => plan case x => sys.error(x.toString) } } protected val AS = Keyword("AS") protected val CACHE = Keyword("CACHE") protected val CLEAR = Keyword("CLEAR") protected val IN = Keyword("IN") protected val LAZY = Keyword("LAZY") protected val SET = Keyword("SET") protected val SHOW = Keyword("SHOW") protected val TABLE = Keyword("TABLE") protected val TABLES = Keyword("TABLES") protected val UNCACHE = Keyword("UNCACHE") override protected lazy val start: Parser[LogicalPlan] = cache | uncache | set | show | others private lazy val cache: Parser[LogicalPlan] = CACHE ~> LAZY.? ~ (TABLE ~> ident) ~ (AS ~> restInput).? ^^ { case isLazy ~ tableName ~ plan => CacheTableCommand(tableName, plan.map(fallback), isLazy.isDefined) } private lazy val uncache: Parser[LogicalPlan] = ( UNCACHE ~ TABLE ~> ident ^^ { case tableName => UncacheTableCommand(tableName) } | CLEAR ~ CACHE ^^^ ClearCacheCommand ) private lazy val set: Parser[LogicalPlan] = SET ~> restInput ^^ { case input => SetCommandParser(input) } private lazy val show: Parser[LogicalPlan] = SHOW ~> TABLES ~ (IN ~> ident).? ^^ { case _ ~ dbName => ShowTablesCommand(dbName) } private lazy val others: Parser[LogicalPlan] = wholeInput ^^ { case input => fallback(input) } }
Example 12
Source File: TestRelations.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.LocalRelation import org.apache.spark.sql.types._ object TestRelations { val testRelation = LocalRelation(AttributeReference("a", IntegerType, nullable = true)()) val testRelation2 = LocalRelation( AttributeReference("a", StringType)(), AttributeReference("b", StringType)(), AttributeReference("c", DoubleType)(), AttributeReference("d", DecimalType(10, 2))(), AttributeReference("e", ShortType)()) val testRelation3 = LocalRelation( AttributeReference("e", ShortType)(), AttributeReference("f", StringType)(), AttributeReference("g", DoubleType)(), AttributeReference("h", DecimalType(10, 2))()) // This is the same with `testRelation3` but only `h` is incompatible type. val testRelation4 = LocalRelation( AttributeReference("e", StringType)(), AttributeReference("f", StringType)(), AttributeReference("g", StringType)(), AttributeReference("h", MapType(IntegerType, IntegerType))()) val nestedRelation = LocalRelation( AttributeReference("top", StructType( StructField("duplicateField", StringType) :: StructField("duplicateField", StringType) :: StructField("differentCase", StringType) :: StructField("differentcase", StringType) :: Nil ))()) val nestedRelation2 = LocalRelation( AttributeReference("top", StructType( StructField("aField", StringType) :: StructField("bField", StringType) :: StructField("cField", StringType) :: Nil ))()) val listRelation = LocalRelation( AttributeReference("list", ArrayType(IntegerType))()) val mapRelation = LocalRelation( AttributeReference("map", MapType(IntegerType, IntegerType))()) }
Example 13
Source File: commands.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import org.apache.spark.rdd.RDD import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.debug._ import org.apache.spark.sql.execution.streaming.IncrementalExecution import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types._ case class ExplainCommand( logicalPlan: LogicalPlan, override val output: Seq[Attribute] = Seq(AttributeReference("plan", StringType, nullable = true)()), extended: Boolean = false, codegen: Boolean = false) extends RunnableCommand { // Run through the optimizer to generate the physical plan. override def run(sparkSession: SparkSession): Seq[Row] = try { val queryExecution = if (logicalPlan.isStreaming) { // This is used only by explaining `Dataset/DataFrame` created by `spark.readStream`, so the // output mode does not matter since there is no `Sink`. new IncrementalExecution(sparkSession, logicalPlan, OutputMode.Append(), "<unknown>", 0, 0) } else { sparkSession.sessionState.executePlan(logicalPlan) } val outputString = if (codegen) { codegenString(queryExecution.executedPlan) } else if (extended) { queryExecution.toString } else { queryExecution.simpleString } Seq(Row(outputString)) } catch { case cause: TreeNodeException[_] => ("Error occurred during query planning: \n" + cause.getMessage).split("\n").map(Row(_)) } }
Example 14
Source File: resources.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import java.io.File import java.net.URI import org.apache.hadoop.fs.Path import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} case class ListJarsCommand(jars: Seq[String] = Seq.empty[String]) extends RunnableCommand { override val output: Seq[Attribute] = { AttributeReference("Results", StringType, nullable = false)() :: Nil } override def run(sparkSession: SparkSession): Seq[Row] = { val jarList = sparkSession.sparkContext.listJars() if (jars.nonEmpty) { for { jarName <- jars.map(f => new Path(f).getName) jarPath <- jarList if jarPath.contains(jarName) } yield Row(jarPath) } else { jarList.map(Row(_)) } } }
Example 15
Source File: LogicalRelation.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics} import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.util.Utils override def newInstance(): this.type = { LogicalRelation( relation, expectedOutputAttributes.map(_.map(_.newInstance())), catalogTable).asInstanceOf[this.type] } override def refresh(): Unit = relation match { case fs: HadoopFsRelation => fs.location.refresh() case _ => // Do nothing. } override def simpleString: String = s"Relation[${Utils.truncatedString(output, ",")}] $relation" }
Example 16
Source File: TestRelations.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.LocalRelation import org.apache.spark.sql.types._ object TestRelations { val testRelation = LocalRelation(AttributeReference("a", IntegerType, nullable = true)()) val testRelation2 = LocalRelation( AttributeReference("a", StringType)(), AttributeReference("b", StringType)(), AttributeReference("c", DoubleType)(), AttributeReference("d", DecimalType(10, 2))(), AttributeReference("e", ShortType)()) val testRelation3 = LocalRelation( AttributeReference("e", ShortType)(), AttributeReference("f", StringType)(), AttributeReference("g", DoubleType)(), AttributeReference("h", DecimalType(10, 2))()) val nestedRelation = LocalRelation( AttributeReference("top", StructType( StructField("duplicateField", StringType) :: StructField("duplicateField", StringType) :: StructField("differentCase", StringType) :: StructField("differentcase", StringType) :: Nil ))()) val nestedRelation2 = LocalRelation( AttributeReference("top", StructType( StructField("aField", StringType) :: StructField("bField", StringType) :: StructField("cField", StringType) :: Nil ))()) val listRelation = LocalRelation( AttributeReference("list", ArrayType(IntegerType))()) val mapRelation = LocalRelation( AttributeReference("map", MapType(IntegerType, IntegerType))()) }
Example 17
Source File: LogicalPlanSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.plans import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.types.IntegerType class LogicalPlanSuite extends SparkFunSuite { private var invocationCount = 0 private val function: PartialFunction[LogicalPlan, LogicalPlan] = { case p: Project => invocationCount += 1 p } private val testRelation = LocalRelation() test("resolveOperator runs on operators") { invocationCount = 0 val plan = Project(Nil, testRelation) plan resolveOperators function assert(invocationCount === 1) } test("resolveOperator runs on operators recursively") { invocationCount = 0 val plan = Project(Nil, Project(Nil, testRelation)) plan resolveOperators function assert(invocationCount === 2) } test("resolveOperator skips all ready resolved plans") { invocationCount = 0 val plan = Project(Nil, Project(Nil, testRelation)) plan.foreach(_.setAnalyzed()) plan resolveOperators function assert(invocationCount === 0) } test("resolveOperator skips partially resolved plans") { invocationCount = 0 val plan1 = Project(Nil, testRelation) val plan2 = Project(Nil, plan1) plan1.foreach(_.setAnalyzed()) plan2 resolveOperators function assert(invocationCount === 1) } test("isStreaming") { val relation = LocalRelation(AttributeReference("a", IntegerType, nullable = true)()) val incrementalRelation = new LocalRelation( Seq(AttributeReference("a", IntegerType, nullable = true)())) { override def isStreaming(): Boolean = true } case class TestBinaryRelation(left: LogicalPlan, right: LogicalPlan) extends BinaryNode { override def output: Seq[Attribute] = left.output ++ right.output } require(relation.isStreaming === false) require(incrementalRelation.isStreaming === true) assert(TestBinaryRelation(relation, relation).isStreaming === false) assert(TestBinaryRelation(incrementalRelation, relation).isStreaming === true) assert(TestBinaryRelation(relation, incrementalRelation).isStreaming === true) assert(TestBinaryRelation(incrementalRelation, incrementalRelation).isStreaming) } }
Example 18
Source File: DruidRelation.scala From spark-druid-olap with Apache License 2.0 | 5 votes |
package org.sparklinedata.druid import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, ExprId} import org.apache.spark.sql.sources.{BaseRelation, TableScan} import org.apache.spark.sql.types.{DataType, StructField, StructType} import org.apache.spark.sql.{Row, SQLContext} import org.joda.time.Interval import org.sparklinedata.druid.metadata.DruidRelationInfo case class DruidOperatorAttribute(exprId : ExprId, name : String, dataType : DataType, tf: String = null) override val needConversion: Boolean = false override def schema: StructType = dQuery.map(_.schema(info)).getOrElse(info.sourceDF(sqlContext).schema) def buildInternalScan : RDD[InternalRow] = dQuery.map(new DruidRDD(sqlContext, info, _)).getOrElse( info.sourceDF(sqlContext).queryExecution.toRdd ) override def buildScan(): RDD[Row] = buildInternalScan.asInstanceOf[RDD[Row]] override def toString : String = { if (dQuery.isDefined) { s"DruidQuery(${System.identityHashCode(dQuery)}): ${Utils.queryToString(dQuery.get)}" } else { info.toString } } }
Example 19
Source File: PostAggregate.scala From spark-druid-olap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.sources.druid import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, NamedExpression} import org.apache.spark.sql.catalyst.expressions.aggregate._ import org.apache.spark.sql.execution.SparkPlan import org.sparklinedata.druid._ class PostAggregate(val druidOpSchema : DruidOperatorSchema) { val dqb = druidOpSchema.dqb private def attrRef(dOpAttr : DruidOperatorAttribute) : AttributeReference = AttributeReference(dOpAttr.name, dOpAttr.dataType)(dOpAttr.exprId) lazy val groupExpressions = dqb.dimensions.map { d => attrRef(druidOpSchema.druidAttrMap(d.outputName)) } def namedGroupingExpressions = groupExpressions private def toSparkAgg(dAggSpec : AggregationSpec) : Option[AggregateFunction] = { val dOpAttr = druidOpSchema.druidAttrMap(dAggSpec.name) dAggSpec match { case FunctionAggregationSpec("count", nm, _) => Some(Sum(attrRef(dOpAttr))) case FunctionAggregationSpec("longSum", nm, _) => Some(Sum(attrRef(dOpAttr))) case FunctionAggregationSpec("doubleSum", nm, _) => Some(Sum(attrRef(dOpAttr))) case FunctionAggregationSpec("longMin", nm, _) => Some(Min(attrRef(dOpAttr))) case FunctionAggregationSpec("doubleMin", nm, _) => Some(Min(attrRef(dOpAttr))) case FunctionAggregationSpec("longMax", nm, _) => Some(Max(attrRef(dOpAttr))) case FunctionAggregationSpec("doubleMax", nm, _) => Some(Max(attrRef(dOpAttr))) case JavascriptAggregationSpec(_, aggnm, _, _, _, _) if aggnm.startsWith("MIN") => Some(Min(attrRef(dOpAttr))) case JavascriptAggregationSpec(_, aggnm, _, _, _, _) if aggnm.startsWith("MAX") => Some(Max(attrRef(dOpAttr))) case JavascriptAggregationSpec(_, aggnm, _, _, _, _) if aggnm.startsWith("SUM") => Some(Sum(attrRef(dOpAttr))) case JavascriptAggregationSpec(_, aggnm, _, _, _, _) if aggnm.startsWith("COUNT") => Some(Sum(attrRef(dOpAttr))) case _ => None } } lazy val aggregatesO : Option[List[NamedExpression]] = Utils.sequence( dqb.aggregations.map { da => val dOpAttr = druidOpSchema.druidAttrMap(da.name) toSparkAgg(da).map { aggFunc => Alias(AggregateExpression(aggFunc, Complete, false), dOpAttr.name)(dOpAttr.exprId) } }) def canBeExecutedInHistorical : Boolean = dqb.canPushToHistorical && aggregatesO.isDefined lazy val resultExpressions = groupExpressions ++ aggregatesO.get lazy val aggregateExpressions = resultExpressions.flatMap { expr => expr.collect { case agg: AggregateExpression => agg } }.distinct lazy val aggregateFunctionToAttribute = aggregateExpressions.map { agg => val aggregateFunction = agg.aggregateFunction val attribute = Alias(aggregateFunction, aggregateFunction.toString)().toAttribute (aggregateFunction, agg.isDistinct) -> attribute }.toMap lazy val rewrittenResultExpressions = resultExpressions.map { expr => expr.transformDown { case aE@AggregateExpression(aggregateFunction, _, isDistinct, _) => // The final aggregation buffer's attributes will be `finalAggregationAttributes`, // so replace each aggregate expression by its corresponding attribute in the set: // aggregateFunctionToAttribute(aggregateFunction, isDistinct) aE.resultAttribute case expression => expression }.asInstanceOf[NamedExpression] } def aggOp(child : SparkPlan) : Seq[SparkPlan] = { org.apache.spark.sql.execution.aggregate.AggUtils.planAggregateWithoutPartial( namedGroupingExpressions, aggregateExpressions, rewrittenResultExpressions, child) } }
Example 20
Source File: DruidOperatorSchema.scala From spark-druid-olap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.sources.druid import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, NamedExpression} import org.apache.spark.sql.types.DataType import org.sparklinedata.druid.{DruidOperatorAttribute, DruidQueryBuilder} lazy val pushedDownExprToDruidAttr : Map[Expression, DruidOperatorAttribute] = buildPushDownDruidAttrsMap private def pushDownExpressionMap : Map[String, (Expression, DataType, DataType, String)] = dqb.outputAttributeMap.filter(t => t._2._1 != null) private def buildPushDownDruidAttrsMap : Map[Expression, DruidOperatorAttribute] = (pushDownExpressionMap map { case (nm, (e, oDT, dDT, tf)) => { (e -> druidAttrMap(nm)) } }) private def buildDruidOpAttr : Map[String, DruidOperatorAttribute] = (dqb.outputAttributeMap map { case (nm, (e, oDT, dDT, tf)) => { val druidEid = e match { case null => NamedExpression.newExprId case n: NamedExpression => n.exprId case _ => NamedExpression.newExprId } (nm -> DruidOperatorAttribute(druidEid, nm, dDT, tf)) } } ) }
Example 21
Source File: VacuumTableCommand.scala From delta with Apache License 2.0 | 5 votes |
package io.delta.tables.execution import org.apache.hadoop.fs.Path import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaTableIdentifier, DeltaTableUtils} import org.apache.spark.sql.delta.commands.VacuumCommand import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.types.StringType case class VacuumTableCommand( path: Option[String], table: Option[TableIdentifier], horizonHours: Option[Double], dryRun: Boolean) extends RunnableCommand { override val output: Seq[Attribute] = Seq(AttributeReference("path", StringType, nullable = true)()) override def run(sparkSession: SparkSession): Seq[Row] = { val pathToVacuum = if (path.nonEmpty) { new Path(path.get) } else if (table.nonEmpty) { DeltaTableIdentifier(sparkSession, table.get) match { case Some(id) if id.path.nonEmpty => new Path(id.path.get) case _ => new Path(sparkSession.sessionState.catalog.getTableMetadata(table.get).location) } } else { throw DeltaErrors.missingTableIdentifierException("VACUUM") } val baseDeltaPath = DeltaTableUtils.findDeltaTableRoot(sparkSession, pathToVacuum) if (baseDeltaPath.isDefined) { if (baseDeltaPath.get != pathToVacuum) { throw DeltaErrors.vacuumBasePathMissingException(baseDeltaPath.get) } } val deltaLog = DeltaLog.forTable(sparkSession, pathToVacuum) if (deltaLog.snapshot.version == -1) { throw DeltaErrors.notADeltaTableException( "VACUUM", DeltaTableIdentifier(path = Some(pathToVacuum.toString))) } VacuumCommand.gc(sparkSession, deltaLog, dryRun, horizonHours).collect() } }
Example 22
Source File: monotonicaggregates.scala From BigDatalog with Apache License 2.0 | 5 votes |
package edu.ucla.cs.wis.bigdatalog.spark.execution.aggregates import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, Expression, Greatest, Least, Literal, Unevaluable} import org.apache.spark.sql.catalyst.expressions.aggregate._ import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.types.{AbstractDataType, AnyDataType, DataType} abstract class MonotonicAggregateFunction extends DeclarativeAggregate with Serializable {} case class MMax(child: Expression) extends MonotonicAggregateFunction { override def children: Seq[Expression] = child :: Nil override def nullable: Boolean = true // Return data type. override def dataType: DataType = child.dataType // Expected input data type. override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType) override def checkInputDataTypes(): TypeCheckResult = TypeUtils.checkForOrderingExpr(child.dataType, "function mmax") private lazy val mmax = AttributeReference("mmax", child.dataType)() override lazy val aggBufferAttributes: Seq[AttributeReference] = mmax :: Nil override lazy val initialValues: Seq[Literal] = Seq( Least(Seq(mmin.left, mmin.right)) ) } override lazy val evaluateExpression: AttributeReference = mmin } case class MonotonicAggregateExpression(aggregateFunction: MonotonicAggregateFunction, mode: AggregateMode, isDistinct: Boolean) extends Expression with Unevaluable { override def children: Seq[Expression] = aggregateFunction :: Nil override def dataType: DataType = aggregateFunction.dataType override def foldable: Boolean = false override def nullable: Boolean = aggregateFunction.nullable override def references: AttributeSet = { val childReferences = mode match { case Partial | Complete => aggregateFunction.references.toSeq case PartialMerge | Final => aggregateFunction.aggBufferAttributes } AttributeSet(childReferences) } override def prettyString: String = aggregateFunction.prettyString override def toString: String = s"(${aggregateFunction},mode=$mode,isDistinct=$isDistinct)" }
Example 23
Source File: SqlUtils.scala From spark-acid with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} import org.apache.spark.sql.execution.LogicalRDD import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.types.StructType object SqlUtils { def convertToDF(sparkSession: SparkSession, plan : LogicalPlan): DataFrame = { Dataset.ofRows(sparkSession, plan) } def resolveReferences(sparkSession: SparkSession, expr: Expression, planContaining: LogicalPlan, failIfUnresolved: Boolean, exprName: Option[String] = None): Expression = { resolveReferences(sparkSession, expr, Seq(planContaining), failIfUnresolved, exprName) } def resolveReferences(sparkSession: SparkSession, expr: Expression, planContaining: Seq[LogicalPlan], failIfUnresolved: Boolean, exprName: Option[String]): Expression = { val newPlan = FakeLogicalPlan(expr, planContaining) val resolvedExpr = sparkSession.sessionState.analyzer.execute(newPlan) match { case FakeLogicalPlan(resolvedExpr: Expression, _) => // Return even if it did not successfully resolve resolvedExpr case _ => expr // This is unexpected } if (failIfUnresolved) { resolvedExpr.flatMap(_.references).filter(!_.resolved).foreach { attr => { val failedMsg = exprName match { case Some(name) => s"${attr.sql} resolution in $name given these columns: "+ planContaining.flatMap(_.output).map(_.name).mkString(",") case _ => s"${attr.sql} resolution failed given these columns: "+ planContaining.flatMap(_.output).map(_.name).mkString(",") } attr.failAnalysis(failedMsg) } } } resolvedExpr } def hasSparkStopped(sparkSession: SparkSession): Boolean = { sparkSession.sparkContext.stopped.get() } def createDataFrameUsingAttributes(sparkSession: SparkSession, rdd: RDD[Row], schema: StructType, attributes: Seq[Attribute]): DataFrame = { val encoder = RowEncoder(schema) val catalystRows = rdd.map(encoder.toRow) val logicalPlan = LogicalRDD( attributes, catalystRows, isStreaming = false)(sparkSession) Dataset.ofRows(sparkSession, logicalPlan) } def analysisException(cause: String): Throwable = { new AnalysisException(cause) } } case class FakeLogicalPlan(expr: Expression, children: Seq[LogicalPlan]) extends LogicalPlan { override def output: Seq[Attribute] = children.foldLeft(Seq[Attribute]())((out, child) => out ++ child.output) }
Example 24
Source File: HiveAcidUtils.scala From spark-acid with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import scala.collection.JavaConverters._ import com.qubole.spark.hiveacid.hive.HiveAcidMetadata import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTablePartition, CatalogUtils} import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, BoundReference, Expression, InterpretedPredicate, PrettyAttribute} object HiveAcidUtils { def prunePartitionsByFilter( hiveAcidMetadata: HiveAcidMetadata, inputPartitions: Seq[CatalogTablePartition], predicates: Option[Expression], defaultTimeZoneId: String): Seq[CatalogTablePartition] = { if (predicates.isEmpty) { inputPartitions } else { val partitionSchema = hiveAcidMetadata.partitionSchema val partitionColumnNames = hiveAcidMetadata.partitionSchema.fieldNames.toSet val nonPartitionPruningPredicates = predicates.filterNot { _.references.map(_.name).toSet.subsetOf(partitionColumnNames) } if (nonPartitionPruningPredicates.nonEmpty) { throw new AnalysisException("Expected only partition pruning predicates: " + nonPartitionPruningPredicates) } val boundPredicate = InterpretedPredicate.create(predicates.get.transform { case att: Attribute => val index = partitionSchema.indexWhere(_.name == att.name) BoundReference(index, partitionSchema(index).dataType, nullable = true) }) inputPartitions.filter { p => boundPredicate.eval(p.toRow(partitionSchema, defaultTimeZoneId)) } } } def convertToCatalogTablePartition(hp: com.qubole.shaded.hadoop.hive.ql.metadata.Partition): CatalogTablePartition = { val apiPartition = hp.getTPartition val properties: Map[String, String] = if (hp.getParameters != null) { hp.getParameters.asScala.toMap } else { Map.empty } CatalogTablePartition( spec = Option(hp.getSpec).map(_.asScala.toMap).getOrElse(Map.empty), storage = CatalogStorageFormat( locationUri = Option(CatalogUtils.stringToURI(apiPartition.getSd.getLocation)), inputFormat = Option(apiPartition.getSd.getInputFormat), outputFormat = Option(apiPartition.getSd.getOutputFormat), serde = Option(apiPartition.getSd.getSerdeInfo.getSerializationLib), compressed = apiPartition.getSd.isCompressed, properties = Option(apiPartition.getSd.getSerdeInfo.getParameters) .map(_.asScala.toMap).orNull), createTime = apiPartition.getCreateTime.toLong * 1000, lastAccessTime = apiPartition.getLastAccessTime.toLong * 1000, parameters = properties, stats = None) // TODO: need to implement readHiveStats } }
Example 25
Source File: ObjectMapper.scala From infinispan-spark with Apache License 2.0 | 5 votes |
package org.infinispan.spark.sql import java.beans.Introspector import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.CatalystTypeConverters import org.apache.spark.sql.catalyst.expressions.{AttributeReference, GenericRowWithSchema} import org.apache.spark.sql.types.StructType object ObjectMapper { def forBean(schema: StructType, beanClass: Class[_]): (AnyRef, Array[String]) => Row = { val beanInfo = Introspector.getBeanInfo(beanClass) val attrs = schema.fields.map(f => AttributeReference(f.name, f.dataType, f.nullable)()) val extractors = beanInfo.getPropertyDescriptors.filterNot(_.getName == "class").map(_.getReadMethod) val methodsToConverts = extractors.zip(attrs).map { case (e, attr) => (e, CatalystTypeConverters.createToCatalystConverter(attr.dataType)) } (from: Any, columns: Array[String]) => { if (columns.nonEmpty) { from match { case _: Array[_] => new GenericRowWithSchema(from.asInstanceOf[Array[Any]], schema) case f: Any => val rowSchema = StructType(Array(schema(columns.head))) new GenericRowWithSchema(Array(f), rowSchema) } } else { new GenericRowWithSchema(methodsToConverts.map { case (e, convert) => val invoke: AnyRef = e.invoke(from) convert(invoke) }, schema) } } } }
Example 26
Source File: ExpandSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BoundReference, Alias, Literal} import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types.IntegerType class ExpandSuite extends SparkPlanTest with SharedSQLContext { import testImplicits.localSeqToDataFrameHolder private def testExpand(f: SparkPlan => SparkPlan): Unit = { val input = (1 to 1000).map(Tuple1.apply) val projections = Seq.tabulate(2) { i => Alias(BoundReference(0, IntegerType, false), "id")() :: Alias(Literal(i), "gid")() :: Nil } val attributes = projections.head.map(_.toAttribute) checkAnswer( input.toDF(), plan => Expand(projections, attributes, f(plan)), input.flatMap(i => Seq.tabulate(2)(j => Row(i._1, j))) ) } test("inheriting child row type") { val exprs = AttributeReference("a", IntegerType, false)() :: Nil val plan = Expand(Seq(exprs), exprs, ConvertToUnsafe(LocalTableScan(exprs, Seq.empty))) assert(plan.outputsUnsafeRows, "Expand should inherits the created row type from its child.") } test("expanding UnsafeRows") { testExpand(ConvertToUnsafe) } test("expanding SafeRows") { testExpand(identity) } }
Example 27
Source File: LocalNodeTest.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.local import org.apache.spark.SparkFunSuite import org.apache.spark.sql.SQLConf import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions.{Expression, AttributeReference} import org.apache.spark.sql.types.{IntegerType, StringType} class LocalNodeTest extends SparkFunSuite { protected val conf: SQLConf = new SQLConf protected val kvIntAttributes = Seq( AttributeReference("k", IntegerType)(), AttributeReference("v", IntegerType)()) protected val joinNameAttributes = Seq( AttributeReference("id1", IntegerType)(), AttributeReference("name", StringType)()) protected val joinNicknameAttributes = Seq( AttributeReference("id2", IntegerType)(), AttributeReference("nickname", StringType)()) protected def resolveExpressions( expressions: Seq[Expression], localNode: LocalNode): Seq[Expression] = { require(localNode.expressions.forall(_.resolved)) val inputMap = localNode.output.map { a => (a.name, a) }.toMap expressions.map { expression => expression.transformUp { case UnresolvedAttribute(Seq(u)) => inputMap.getOrElse(u, sys.error(s"Invalid Test: Cannot resolve $u given input $inputMap")) } } } }
Example 28
Source File: ProjectNodeSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.local import org.apache.spark.sql.catalyst.expressions.{AttributeReference, NamedExpression} import org.apache.spark.sql.types.{IntegerType, StringType} class ProjectNodeSuite extends LocalNodeTest { private val pieAttributes = Seq( AttributeReference("id", IntegerType)(), AttributeReference("age", IntegerType)(), AttributeReference("name", StringType)()) private def testProject(inputData: Array[(Int, Int, String)] = Array.empty): Unit = { val inputNode = new DummyNode(pieAttributes, inputData) val columns = Seq[NamedExpression](inputNode.output(0), inputNode.output(2)) val projectNode = new ProjectNode(conf, columns, inputNode) val expectedOutput = inputData.map { case (id, age, name) => (id, name) } val actualOutput = projectNode.collect().map { case row => (row.getInt(0), row.getString(1)) } assert(actualOutput === expectedOutput) } test("empty") { testProject() } test("basic") { testProject((1 to 100).map { i => (i, i + 1, "pie" + i) }.toArray) } }
Example 29
Source File: SparkSQLParser.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import scala.util.parsing.combinator.RegexParsers import org.apache.spark.sql.catalyst.AbstractSparkSQLParser import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.types.StringType class SparkSQLParser(fallback: String => LogicalPlan) extends AbstractSparkSQLParser { // A parser for the key-value part of the "SET [key = [value ]]" syntax private object SetCommandParser extends RegexParsers { private val key: Parser[String] = "(?m)[^=]+".r private val value: Parser[String] = "(?m).*$".r private val output: Seq[Attribute] = Seq(AttributeReference("", StringType, nullable = false)()) private val pair: Parser[LogicalPlan] = (key ~ ("=".r ~> value).?).? ^^ { case None => SetCommand(None) case Some(k ~ v) => SetCommand(Some(k.trim -> v.map(_.trim))) } def apply(input: String): LogicalPlan = parseAll(pair, input) match { case Success(plan, _) => plan case x => sys.error(x.toString) } } protected val AS = Keyword("AS") protected val CACHE = Keyword("CACHE") protected val CLEAR = Keyword("CLEAR") protected val DESCRIBE = Keyword("DESCRIBE") protected val EXTENDED = Keyword("EXTENDED") protected val FUNCTION = Keyword("FUNCTION") protected val FUNCTIONS = Keyword("FUNCTIONS") protected val IN = Keyword("IN") protected val LAZY = Keyword("LAZY") protected val SET = Keyword("SET") protected val SHOW = Keyword("SHOW") protected val TABLE = Keyword("TABLE") protected val TABLES = Keyword("TABLES") protected val UNCACHE = Keyword("UNCACHE") override protected lazy val start: Parser[LogicalPlan] = cache | uncache | set | show | desc | others private lazy val cache: Parser[LogicalPlan] = CACHE ~> LAZY.? ~ (TABLE ~> ident) ~ (AS ~> restInput).? ^^ { case isLazy ~ tableName ~ plan => CacheTableCommand(tableName, plan.map(fallback), isLazy.isDefined) } private lazy val uncache: Parser[LogicalPlan] = ( UNCACHE ~ TABLE ~> ident ^^ { case tableName => UncacheTableCommand(tableName) } | CLEAR ~ CACHE ^^^ ClearCacheCommand ) private lazy val set: Parser[LogicalPlan] = SET ~> restInput ^^ { case input => SetCommandParser(input) } // It can be the following patterns: // SHOW FUNCTIONS; // SHOW FUNCTIONS mydb.func1; // SHOW FUNCTIONS func1; // SHOW FUNCTIONS `mydb.a`.`func1.aa`; private lazy val show: Parser[LogicalPlan] = ( SHOW ~> TABLES ~ (IN ~> ident).? ^^ { case _ ~ dbName => ShowTablesCommand(dbName) } | SHOW ~ FUNCTIONS ~> ((ident <~ ".").? ~ (ident | stringLit)).? ^^ { case Some(f) => logical.ShowFunctions(f._1, Some(f._2)) case None => logical.ShowFunctions(None, None) } ) private lazy val desc: Parser[LogicalPlan] = DESCRIBE ~ FUNCTION ~> EXTENDED.? ~ (ident | stringLit) ^^ { case isExtended ~ functionName => logical.DescribeFunction(functionName, isExtended.isDefined) } private lazy val others: Parser[LogicalPlan] = wholeInput ^^ { case input => fallback(input) } }
Example 30
Source File: TestRelations.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.LocalRelation import org.apache.spark.sql.types._ object TestRelations { val testRelation = LocalRelation(AttributeReference("a", IntegerType, nullable = true)()) val testRelation2 = LocalRelation( AttributeReference("a", StringType)(), AttributeReference("b", StringType)(), AttributeReference("c", DoubleType)(), AttributeReference("d", DecimalType(10, 2))(), AttributeReference("e", ShortType)()) val nestedRelation = LocalRelation( AttributeReference("top", StructType( StructField("duplicateField", StringType) :: StructField("duplicateField", StringType) :: StructField("differentCase", StringType) :: StructField("differentcase", StringType) :: Nil ))()) val nestedRelation2 = LocalRelation( AttributeReference("top", StructType( StructField("aField", StringType) :: StructField("bField", StringType) :: StructField("cField", StringType) :: Nil ))()) val listRelation = LocalRelation( AttributeReference("list", ArrayType(IntegerType))()) val mapRelation = LocalRelation( AttributeReference("map", MapType(IntegerType, IntegerType))()) }
Example 31
Source File: SameResultSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.plans import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.{ExprId, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.util._ class SameResultSuite extends SparkFunSuite { val testRelation = LocalRelation('a.int, 'b.int, 'c.int) val testRelation2 = LocalRelation('a.int, 'b.int, 'c.int) def assertSameResult(a: LogicalPlan, b: LogicalPlan, result: Boolean = true): Unit = { val aAnalyzed = a.analyze val bAnalyzed = b.analyze if (aAnalyzed.sameResult(bAnalyzed) != result) { val comparison = sideBySide(aAnalyzed.toString, bAnalyzed.toString).mkString("\n") fail(s"Plans should return sameResult = $result\n$comparison") } } test("relations") { assertSameResult(testRelation, testRelation2) } test("projections") { assertSameResult(testRelation.select('a), testRelation2.select('a)) assertSameResult(testRelation.select('b), testRelation2.select('b)) assertSameResult(testRelation.select('a, 'b), testRelation2.select('a, 'b)) assertSameResult(testRelation.select('b, 'a), testRelation2.select('b, 'a)) assertSameResult(testRelation, testRelation2.select('a), result = false) assertSameResult(testRelation.select('b, 'a), testRelation2.select('a, 'b), result = false) } test("filters") { assertSameResult(testRelation.where('a === 'b), testRelation2.where('a === 'b)) } test("sorts") { assertSameResult(testRelation.orderBy('a.asc), testRelation2.orderBy('a.asc)) } }
Example 32
Source File: DeltaInvariantCheckerExec.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta.schema import org.apache.spark.sql.delta.DeltaErrors import org.apache.spark.sql.delta.schema.Invariants.NotNull import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, BindReferences, Expression, GetStructField, Literal, SortOrder} import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection import org.apache.spark.sql.catalyst.plans.physical.Partitioning import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode} import org.apache.spark.sql.types.{NullType, StructType} private def buildExtractors(invariant: Invariant): Option[Expression] = { assert(invariant.column.nonEmpty) val topLevelColumn = invariant.column.head val topLevelRefOpt = output.collectFirst { case a: AttributeReference if SchemaUtils.DELTA_COL_RESOLVER(a.name, topLevelColumn) => a } val rejectColumnNotFound = isNullNotOkay(invariant) if (topLevelRefOpt.isEmpty) { if (rejectColumnNotFound) { throw DeltaErrors.notNullInvariantException(invariant) } } if (invariant.column.length == 1) { topLevelRefOpt.map(BindReferences.bindReference[Expression](_, output)) } else { topLevelRefOpt.flatMap { topLevelRef => val boundTopLevel = BindReferences.bindReference[Expression](topLevelRef, output) try { val nested = invariant.column.tail.foldLeft(boundTopLevel) { case (e, fieldName) => e.dataType match { case StructType(fields) => val ordinal = fields.indexWhere(f => SchemaUtils.DELTA_COL_RESOLVER(f.name, fieldName)) if (ordinal == -1) { throw new IndexOutOfBoundsException(s"Not nullable column not found in struct: " + s"${fields.map(_.name).mkString("[", ",", "]")}") } GetStructField(e, ordinal, Some(fieldName)) case _ => throw new UnsupportedOperationException( "Invariants on nested fields other than StructTypes are not supported.") } } Some(nested) } catch { case i: IndexOutOfBoundsException if rejectColumnNotFound => throw InvariantViolationException(invariant, i.getMessage) case _: IndexOutOfBoundsException if !rejectColumnNotFound => None } } } } override protected def doExecute(): RDD[InternalRow] = { if (invariants.isEmpty) return child.execute() val boundRefs = invariants.map { invariant => CheckDeltaInvariant(buildExtractors(invariant).getOrElse(Literal(null, NullType)), invariant) } child.execute().mapPartitionsInternal { rows => val assertions = GenerateUnsafeProjection.generate(boundRefs) rows.map { row => assertions(row) row } } } override def outputOrdering: Seq[SortOrder] = child.outputOrdering override def outputPartitioning: Partitioning = child.outputPartitioning }
Example 33
Source File: GroupBitwiseOr.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.exec.spark.udf import org.apache.spark.sql.catalyst.expressions.aggregate.DeclarativeAggregate import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.types.{ DataType, LongType } import org.apache.spark.sql.catalyst.expressions.{ AttributeReference, Literal, BitwiseOr } case class GroupBitwiseOr(child: org.apache.spark.sql.catalyst.expressions.Expression) extends DeclarativeAggregate { override def children: Seq[org.apache.spark.sql.catalyst.expressions.Expression] = child :: Nil override def nullable: Boolean = false // Return data type. override def dataType: DataType = LongType override def checkInputDataTypes(): TypeCheckResult = TypeUtils.checkForOrderingExpr(child.dataType, "function group_bitwise_or") private lazy val group_bitwise_or = AttributeReference("group_bitwise_or", LongType)() override lazy val aggBufferAttributes: Seq[AttributeReference] = group_bitwise_or :: Nil override lazy val initialValues: Seq[Literal] = Seq( Literal.create(0, LongType) ) override lazy val updateExpressions: Seq[ org.apache.spark.sql.catalyst.expressions.Expression] = Seq( BitwiseOr(group_bitwise_or, child) ) override lazy val mergeExpressions: Seq[org.apache.spark.sql.catalyst.expressions.Expression] = { Seq( BitwiseOr(group_bitwise_or.left, group_bitwise_or.right) ) } override lazy val evaluateExpression: AttributeReference = group_bitwise_or }
Example 34
Source File: GroupBitwiseAnd.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.exec.spark.udf import org.apache.spark.sql.catalyst.expressions.aggregate.DeclarativeAggregate import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.types.{ DataType, LongType } import org.apache.spark.sql.catalyst.expressions.{ AttributeReference, Literal, BitwiseAnd } case class GroupBitwiseAnd(child: org.apache.spark.sql.catalyst.expressions.Expression) extends DeclarativeAggregate { override def children: Seq[org.apache.spark.sql.catalyst.expressions.Expression] = child :: Nil override def nullable: Boolean = false // Return data type. override def dataType: DataType = LongType override def checkInputDataTypes(): TypeCheckResult = TypeUtils.checkForOrderingExpr(child.dataType, "function group_bitwise_and") private lazy val group_bitwise_and = AttributeReference("group_bitwise_and", LongType)() override lazy val aggBufferAttributes: Seq[AttributeReference] = group_bitwise_and :: Nil override lazy val initialValues: Seq[Literal] = Seq( Literal.create(0xffffffffffffffffl, LongType) ) override lazy val updateExpressions: Seq[ org.apache.spark.sql.catalyst.expressions.Expression] = Seq( BitwiseAnd(group_bitwise_and, child) ) override lazy val mergeExpressions: Seq[org.apache.spark.sql.catalyst.expressions.Expression] = { Seq( BitwiseAnd(group_bitwise_and.left, group_bitwise_and.right) ) } override lazy val evaluateExpression: AttributeReference = group_bitwise_and }
Example 35
Source File: GroupAnd.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.exec.spark.udf import org.apache.spark.sql.catalyst.expressions.aggregate.DeclarativeAggregate import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.types.{ DataType, BooleanType } import org.apache.spark.sql.catalyst.expressions.{ AttributeReference, Literal, And } case class GroupAnd(child: org.apache.spark.sql.catalyst.expressions.Expression) extends DeclarativeAggregate { override def children: Seq[org.apache.spark.sql.catalyst.expressions.Expression] = child :: Nil override def nullable: Boolean = false // Return data type. override def dataType: DataType = BooleanType override def checkInputDataTypes(): TypeCheckResult = TypeUtils.checkForOrderingExpr(child.dataType, "function group_and") private lazy val group_and = AttributeReference("group_and", BooleanType)() override lazy val aggBufferAttributes: Seq[AttributeReference] = group_and :: Nil override lazy val initialValues: Seq[Literal] = Seq( Literal.create(true, BooleanType) ) override lazy val updateExpressions: Seq[ org.apache.spark.sql.catalyst.expressions.Expression] = Seq( And(group_and, child) ) override lazy val mergeExpressions: Seq[org.apache.spark.sql.catalyst.expressions.Expression] = { Seq( And(group_and.left, group_and.right) ) } override lazy val evaluateExpression: AttributeReference = group_and }
Example 36
Source File: GroupOr.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.exec.spark.udf import org.apache.spark.sql.catalyst.expressions.aggregate.DeclarativeAggregate import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.types.{ DataType, BooleanType } import org.apache.spark.sql.catalyst.expressions.{ AttributeReference, Literal, Or } case class GroupOr(child: org.apache.spark.sql.catalyst.expressions.Expression) extends DeclarativeAggregate { override def children: Seq[org.apache.spark.sql.catalyst.expressions.Expression] = child :: Nil override def nullable: Boolean = false // Return data type. override def dataType: DataType = BooleanType override def checkInputDataTypes(): TypeCheckResult = TypeUtils.checkForOrderingExpr(child.dataType, "function group_or") private lazy val group_or = AttributeReference("group_or", BooleanType)() override lazy val aggBufferAttributes: Seq[AttributeReference] = group_or :: Nil override lazy val initialValues: Seq[Literal] = Seq( Literal.create(false, BooleanType) ) override lazy val updateExpressions: Seq[ org.apache.spark.sql.catalyst.expressions.Expression] = Seq( Or(group_or, child) ) override lazy val mergeExpressions: Seq[org.apache.spark.sql.catalyst.expressions.Expression] = { Seq( Or(group_or.left, group_or.right) ) } override lazy val evaluateExpression: AttributeReference = group_or }
Example 37
Source File: BatchEvalPythonExecSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.python import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import org.apache.spark.api.python.{PythonEvalType, PythonFunction} import org.apache.spark.sql.catalyst.FunctionIdentifier import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, GreaterThan, In} import org.apache.spark.sql.execution.{FilterExec, InputAdapter, SparkPlanTest, WholeStageCodegenExec} import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types.BooleanType class BatchEvalPythonExecSuite extends SparkPlanTest with SharedSQLContext { import testImplicits.newProductEncoder import testImplicits.localSeqToDatasetHolder override def beforeAll(): Unit = { super.beforeAll() spark.udf.registerPython("dummyPythonUDF", new MyDummyPythonUDF) } override def afterAll(): Unit = { spark.sessionState.functionRegistry.dropFunction(FunctionIdentifier("dummyPythonUDF")) super.afterAll() } test("Python UDF: push down deterministic FilterExec predicates") { val df = Seq(("Hello", 4)).toDF("a", "b") .where("dummyPythonUDF(b) and dummyPythonUDF(a) and a in (3, 4)") val qualifiedPlanNodes = df.queryExecution.executedPlan.collect { case f @ FilterExec( And(_: AttributeReference, _: AttributeReference), InputAdapter(_: BatchEvalPythonExec)) => f case b @ BatchEvalPythonExec(_, _, WholeStageCodegenExec(FilterExec(_: In, _))) => b } assert(qualifiedPlanNodes.size == 2) } test("Nested Python UDF: push down deterministic FilterExec predicates") { val df = Seq(("Hello", 4)).toDF("a", "b") .where("dummyPythonUDF(a, dummyPythonUDF(a, b)) and a in (3, 4)") val qualifiedPlanNodes = df.queryExecution.executedPlan.collect { case f @ FilterExec(_: AttributeReference, InputAdapter(_: BatchEvalPythonExec)) => f case b @ BatchEvalPythonExec(_, _, WholeStageCodegenExec(FilterExec(_: In, _))) => b } assert(qualifiedPlanNodes.size == 2) } test("Python UDF: no push down on non-deterministic") { val df = Seq(("Hello", 4)).toDF("a", "b") .where("b > 4 and dummyPythonUDF(a) and rand() > 0.3") val qualifiedPlanNodes = df.queryExecution.executedPlan.collect { case f @ FilterExec( And(_: AttributeReference, _: GreaterThan), InputAdapter(_: BatchEvalPythonExec)) => f case b @ BatchEvalPythonExec(_, _, WholeStageCodegenExec(_: FilterExec)) => b } assert(qualifiedPlanNodes.size == 2) } test("Python UDF: push down on deterministic predicates after the first non-deterministic") { val df = Seq(("Hello", 4)).toDF("a", "b") .where("dummyPythonUDF(a) and rand() > 0.3 and b > 4") val qualifiedPlanNodes = df.queryExecution.executedPlan.collect { case f @ FilterExec( And(_: AttributeReference, _: GreaterThan), InputAdapter(_: BatchEvalPythonExec)) => f case b @ BatchEvalPythonExec(_, _, WholeStageCodegenExec(_: FilterExec)) => b } assert(qualifiedPlanNodes.size == 2) } test("Python UDF refers to the attributes from more than one child") { val df = Seq(("Hello", 4)).toDF("a", "b") val df2 = Seq(("Hello", 4)).toDF("c", "d") val joinDF = df.crossJoin(df2).where("dummyPythonUDF(a, c) == dummyPythonUDF(d, c)") val qualifiedPlanNodes = joinDF.queryExecution.executedPlan.collect { case b: BatchEvalPythonExec => b } assert(qualifiedPlanNodes.size == 1) } } // This Python UDF is dummy and just for testing. Unable to execute. class DummyUDF extends PythonFunction( command = Array[Byte](), envVars = Map("" -> "").asJava, pythonIncludes = ArrayBuffer("").asJava, pythonExec = "", pythonVer = "", broadcastVars = null, accumulator = null) class MyDummyPythonUDF extends UserDefinedPythonFunction( name = "dummyUDF", func = new DummyUDF, dataType = BooleanType, pythonEvalType = PythonEvalType.SQL_BATCHED_UDF, udfDeterministic = true)
Example 38
Source File: resources.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import java.io.File import java.net.URI import org.apache.hadoop.fs.Path import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} case class ListJarsCommand(jars: Seq[String] = Seq.empty[String]) extends RunnableCommand { override val output: Seq[Attribute] = { AttributeReference("Results", StringType, nullable = false)() :: Nil } override def run(sparkSession: SparkSession): Seq[Row] = { val jarList = sparkSession.sparkContext.listJars() if (jars.nonEmpty) { for { jarName <- jars.map(f => new Path(f).getName) jarPath <- jarList if jarPath.contains(jarName) } yield Row(jarPath) } else { jarList.map(Row(_)) } } }
Example 39
Source File: DataSourceV2Relation.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics} import org.apache.spark.sql.sources.v2.reader._ case class DataSourceV2Relation( output: Seq[AttributeReference], reader: DataSourceReader) extends LeafNode with MultiInstanceRelation with DataSourceReaderHolder { override def canEqual(other: Any): Boolean = other.isInstanceOf[DataSourceV2Relation] override def computeStats(): Statistics = reader match { case r: SupportsReportStatistics => Statistics(sizeInBytes = r.getStatistics.sizeInBytes().orElse(conf.defaultSizeInBytes)) case _ => Statistics(sizeInBytes = conf.defaultSizeInBytes) } override def newInstance(): DataSourceV2Relation = { copy(output = output.map(_.newInstance())) } } class StreamingDataSourceV2Relation( output: Seq[AttributeReference], reader: DataSourceReader) extends DataSourceV2Relation(output, reader) { override def isStreaming: Boolean = true } object DataSourceV2Relation { def apply(reader: DataSourceReader): DataSourceV2Relation = { new DataSourceV2Relation(reader.readSchema().toAttributes, reader) } }
Example 40
Source File: LogicalRelation.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.expressions.{AttributeMap, AttributeReference} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics} import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.util.Utils override def newInstance(): LogicalRelation = { this.copy(output = output.map(_.newInstance())) } override def refresh(): Unit = relation match { case fs: HadoopFsRelation => fs.location.refresh() case _ => // Do nothing. } override def simpleString: String = s"Relation[${Utils.truncatedString(output, ",")}] $relation" } object LogicalRelation { def apply(relation: BaseRelation, isStreaming: Boolean = false): LogicalRelation = LogicalRelation(relation, relation.schema.toAttributes, None, isStreaming) def apply(relation: BaseRelation, table: CatalogTable): LogicalRelation = LogicalRelation(relation, relation.schema.toAttributes, Some(table), false) }
Example 41
Source File: EliminateMapObjectsSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.expressions.objects.Invoke import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{DeserializeToObject, LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.types._ class EliminateMapObjectsSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = { Batch("EliminateMapObjects", FixedPoint(50), NullPropagation, SimplifyCasts, EliminateMapObjects) :: Nil } } implicit private def intArrayEncoder = ExpressionEncoder[Array[Int]]() implicit private def doubleArrayEncoder = ExpressionEncoder[Array[Double]]() test("SPARK-20254: Remove unnecessary data conversion for primitive array") { val intObjType = ObjectType(classOf[Array[Int]]) val intInput = LocalRelation('a.array(ArrayType(IntegerType, false))) val intQuery = intInput.deserialize[Array[Int]].analyze val intOptimized = Optimize.execute(intQuery) val intExpected = DeserializeToObject( Invoke(intInput.output(0), "toIntArray", intObjType, Nil, true, false), AttributeReference("obj", intObjType, true)(), intInput) comparePlans(intOptimized, intExpected) val doubleObjType = ObjectType(classOf[Array[Double]]) val doubleInput = LocalRelation('a.array(ArrayType(DoubleType, false))) val doubleQuery = doubleInput.deserialize[Array[Double]].analyze val doubleOptimized = Optimize.execute(doubleQuery) val doubleExpected = DeserializeToObject( Invoke(doubleInput.output(0), "toDoubleArray", doubleObjType, Nil, true, false), AttributeReference("obj", doubleObjType, true)(), doubleInput) comparePlans(doubleOptimized, doubleExpected) } }
Example 42
Source File: StatsEstimationTestBase.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.statsEstimation import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LeafNode, LogicalPlan, Statistics} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{IntegerType, StringType} trait StatsEstimationTestBase extends SparkFunSuite { var originalValue: Boolean = false override def beforeAll(): Unit = { super.beforeAll() // Enable stats estimation based on CBO. originalValue = SQLConf.get.getConf(SQLConf.CBO_ENABLED) SQLConf.get.setConf(SQLConf.CBO_ENABLED, true) } override def afterAll(): Unit = { SQLConf.get.setConf(SQLConf.CBO_ENABLED, originalValue) super.afterAll() } def getColSize(attribute: Attribute, colStat: ColumnStat): Long = attribute.dataType match { // For UTF8String: base + offset + numBytes case StringType => colStat.avgLen.getOrElse(attribute.dataType.defaultSize.toLong) + 8 + 4 case _ => colStat.avgLen.getOrElse(attribute.dataType.defaultSize) } def attr(colName: String): AttributeReference = AttributeReference(colName, IntegerType)() case class StatsTestPlan( outputList: Seq[Attribute], rowCount: BigInt, attributeStats: AttributeMap[ColumnStat], size: Option[BigInt] = None) extends LeafNode { override def output: Seq[Attribute] = outputList override def computeStats(): Statistics = Statistics( // If sizeInBytes is useless in testing, we just use a fake value sizeInBytes = size.getOrElse(Int.MaxValue), rowCount = Some(rowCount), attributeStats = attributeStats) }
Example 43
Source File: DeepDescribeCommand.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.execution.RunnableCommand import org.apache.spark.sql.sources.describable.Describable import org.apache.spark.sql.sources.describable.FieldLike.StructFieldLike import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{Row, SQLContext} private[sql] case class DeepDescribeCommand( relation: Describable) extends RunnableCommand { override def run(sqlContext: SQLContext): Seq[Row] = { val description = relation.describe() Seq(description match { case r: Row => r case default => Row(default) }) } override def output: Seq[Attribute] = { relation.describeOutput match { case StructType(fields) => fields.map(StructFieldLike.toAttribute) case other => AttributeReference("value", other)() :: Nil } } }
Example 44
Source File: SameResultSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import org.apache.spark.sql.{DataFrame, QueryTest} import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project} import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types.IntegerType class SameResultSuite extends QueryTest with SharedSQLContext { import testImplicits._ test("FileSourceScanExec: different orders of data filters and partition filters") { withTempPath { path => val tmpDir = path.getCanonicalPath spark.range(10) .selectExpr("id as a", "id + 1 as b", "id + 2 as c", "id + 3 as d") .write .partitionBy("a", "b") .parquet(tmpDir) val df = spark.read.parquet(tmpDir) // partition filters: a > 1 AND b < 9 // data filters: c > 1 AND d < 9 val plan1 = getFileSourceScanExec(df.where("a > 1 AND b < 9 AND c > 1 AND d < 9")) val plan2 = getFileSourceScanExec(df.where("b < 9 AND a > 1 AND d < 9 AND c > 1")) assert(plan1.sameResult(plan2)) } } private def getFileSourceScanExec(df: DataFrame): FileSourceScanExec = { df.queryExecution.sparkPlan.find(_.isInstanceOf[FileSourceScanExec]).get .asInstanceOf[FileSourceScanExec] } test("SPARK-20725: partial aggregate should behave correctly for sameResult") { val df1 = spark.range(10).agg(sum($"id")) val df2 = spark.range(10).agg(sum($"id")) assert(df1.queryExecution.executedPlan.sameResult(df2.queryExecution.executedPlan)) val df3 = spark.range(10).agg(sumDistinct($"id")) val df4 = spark.range(10).agg(sumDistinct($"id")) assert(df3.queryExecution.executedPlan.sameResult(df4.queryExecution.executedPlan)) } test("Canonicalized result is case-insensitive") { val a = AttributeReference("A", IntegerType)() val b = AttributeReference("B", IntegerType)() val planUppercase = Project(Seq(a), LocalRelation(a, b)) val c = AttributeReference("a", IntegerType)() val d = AttributeReference("b", IntegerType)() val planLowercase = Project(Seq(c), LocalRelation(c, d)) assert(planUppercase.sameResult(planLowercase)) } }
Example 45
Source File: resources.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import java.io.File import java.net.URI import org.apache.hadoop.fs.Path import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} case class ListJarsCommand(jars: Seq[String] = Seq.empty[String]) extends RunnableCommand { override val output: Seq[Attribute] = { AttributeReference("Results", StringType, nullable = false)() :: Nil } override def run(sparkSession: SparkSession): Seq[Row] = { val jarList = sparkSession.sparkContext.listJars() if (jars.nonEmpty) { for { jarName <- jars.map(f => new Path(f).getName) jarPath <- jarList if jarPath.contains(jarName) } yield Row(jarPath) } else { jarList.map(Row(_)) } } }
Example 46
Source File: DataSourceV2Strategy.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.v2 import scala.collection.mutable import org.apache.spark.sql.{sources, Strategy} import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, AttributeSet, Expression} import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, Repartition} import org.apache.spark.sql.execution.{FilterExec, ProjectExec, SparkPlan} import org.apache.spark.sql.execution.datasources.DataSourceStrategy import org.apache.spark.sql.execution.streaming.continuous.{ContinuousCoalesceExec, WriteToContinuousDataSource, WriteToContinuousDataSourceExec} import org.apache.spark.sql.sources.v2.reader.{DataSourceReader, SupportsPushDownFilters, SupportsPushDownRequiredColumns} import org.apache.spark.sql.sources.v2.reader.streaming.ContinuousReader object DataSourceV2Strategy extends Strategy { // TODO: nested column pruning. private def pruneColumns( reader: DataSourceReader, relation: DataSourceV2Relation, exprs: Seq[Expression]): Seq[AttributeReference] = { reader match { case r: SupportsPushDownRequiredColumns => val requiredColumns = AttributeSet(exprs.flatMap(_.references)) val neededOutput = relation.output.filter(requiredColumns.contains) if (neededOutput != relation.output) { r.pruneColumns(neededOutput.toStructType) val nameToAttr = relation.output.map(_.name).zip(relation.output).toMap r.readSchema().toAttributes.map { // We have to keep the attribute id during transformation. a => a.withExprId(nameToAttr(a.name).exprId) } } else { relation.output } case _ => relation.output } } override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { case PhysicalOperation(project, filters, relation: DataSourceV2Relation) => val reader = relation.newReader() // `pushedFilters` will be pushed down and evaluated in the underlying data sources. // `postScanFilters` need to be evaluated after the scan. // `postScanFilters` and `pushedFilters` can overlap, e.g. the parquet row group filter. val (pushedFilters, postScanFilters) = pushFilters(reader, filters) val output = pruneColumns(reader, relation, project ++ postScanFilters) logInfo( s""" |Pushing operators to ${relation.source.getClass} |Pushed Filters: ${pushedFilters.mkString(", ")} |Post-Scan Filters: ${postScanFilters.mkString(",")} |Output: ${output.mkString(", ")} """.stripMargin) val scan = DataSourceV2ScanExec( output, relation.source, relation.options, pushedFilters, reader) val filterCondition = postScanFilters.reduceLeftOption(And) val withFilter = filterCondition.map(FilterExec(_, scan)).getOrElse(scan) // always add the projection, which will produce unsafe rows required by some operators ProjectExec(project, withFilter) :: Nil case r: StreamingDataSourceV2Relation => // ensure there is a projection, which will produce unsafe rows required by some operators ProjectExec(r.output, DataSourceV2ScanExec(r.output, r.source, r.options, r.pushedFilters, r.reader)) :: Nil case WriteToDataSourceV2(writer, query) => WriteToDataSourceV2Exec(writer, planLater(query)) :: Nil case AppendData(r: DataSourceV2Relation, query, _) => WriteToDataSourceV2Exec(r.newWriter(), planLater(query)) :: Nil case WriteToContinuousDataSource(writer, query) => WriteToContinuousDataSourceExec(writer, planLater(query)) :: Nil case Repartition(1, false, child) => val isContinuous = child.collectFirst { case StreamingDataSourceV2Relation(_, _, _, r: ContinuousReader) => r }.isDefined if (isContinuous) { ContinuousCoalesceExec(1, planLater(child)) :: Nil } else { Nil } case _ => Nil } }
Example 47
Source File: LogicalRelation.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.expressions.{AttributeMap, AttributeReference} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics} import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.util.Utils override def newInstance(): LogicalRelation = { this.copy(output = output.map(_.newInstance())) } override def refresh(): Unit = relation match { case fs: HadoopFsRelation => fs.location.refresh() case _ => // Do nothing. } override def simpleString: String = s"Relation[${Utils.truncatedString(output, ",")}] $relation" } object LogicalRelation { def apply(relation: BaseRelation, isStreaming: Boolean = false): LogicalRelation = LogicalRelation(relation, relation.schema.toAttributes, None, isStreaming) def apply(relation: BaseRelation, table: CatalogTable): LogicalRelation = LogicalRelation(relation, relation.schema.toAttributes, Some(table), false) }
Example 48
Source File: EliminateMapObjectsSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.expressions.objects.Invoke import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{DeserializeToObject, LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.types._ class EliminateMapObjectsSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = { Batch("EliminateMapObjects", FixedPoint(50), NullPropagation, SimplifyCasts, EliminateMapObjects) :: Nil } } implicit private def intArrayEncoder = ExpressionEncoder[Array[Int]]() implicit private def doubleArrayEncoder = ExpressionEncoder[Array[Double]]() test("SPARK-20254: Remove unnecessary data conversion for primitive array") { val intObjType = ObjectType(classOf[Array[Int]]) val intInput = LocalRelation('a.array(ArrayType(IntegerType, false))) val intQuery = intInput.deserialize[Array[Int]].analyze val intOptimized = Optimize.execute(intQuery) val intExpected = DeserializeToObject( Invoke(intInput.output(0), "toIntArray", intObjType, Nil, true, false), AttributeReference("obj", intObjType, true)(), intInput) comparePlans(intOptimized, intExpected) val doubleObjType = ObjectType(classOf[Array[Double]]) val doubleInput = LocalRelation('a.array(ArrayType(DoubleType, false))) val doubleQuery = doubleInput.deserialize[Array[Double]].analyze val doubleOptimized = Optimize.execute(doubleQuery) val doubleExpected = DeserializeToObject( Invoke(doubleInput.output(0), "toDoubleArray", doubleObjType, Nil, true, false), AttributeReference("obj", doubleObjType, true)(), doubleInput) comparePlans(doubleOptimized, doubleExpected) } }
Example 49
Source File: TestRelations.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.LocalRelation import org.apache.spark.sql.types._ object TestRelations { val testRelation = LocalRelation(AttributeReference("a", IntegerType, nullable = true)()) val testRelation2 = LocalRelation( AttributeReference("a", StringType)(), AttributeReference("b", StringType)(), AttributeReference("c", DoubleType)(), AttributeReference("d", DecimalType(10, 2))(), AttributeReference("e", ShortType)()) val testRelation3 = LocalRelation( AttributeReference("e", ShortType)(), AttributeReference("f", StringType)(), AttributeReference("g", DoubleType)(), AttributeReference("h", DecimalType(10, 2))()) // This is the same with `testRelation3` but only `h` is incompatible type. val testRelation4 = LocalRelation( AttributeReference("e", StringType)(), AttributeReference("f", StringType)(), AttributeReference("g", StringType)(), AttributeReference("h", MapType(IntegerType, IntegerType))()) val nestedRelation = LocalRelation( AttributeReference("top", StructType( StructField("duplicateField", StringType) :: StructField("duplicateField", StringType) :: StructField("differentCase", StringType) :: StructField("differentcase", StringType) :: Nil ))()) val nestedRelation2 = LocalRelation( AttributeReference("top", StructType( StructField("aField", StringType) :: StructField("bField", StringType) :: StructField("cField", StringType) :: Nil ))()) val listRelation = LocalRelation( AttributeReference("list", ArrayType(IntegerType))()) val mapRelation = LocalRelation( AttributeReference("map", MapType(IntegerType, IntegerType))()) }
Example 50
Source File: QueryPlanSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.plans import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.dsl.plans import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, Literal, NamedExpression} import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin} import org.apache.spark.sql.types.IntegerType class QueryPlanSuite extends SparkFunSuite { test("origin remains the same after mapExpressions (SPARK-23823)") { CurrentOrigin.setPosition(0, 0) val column = AttributeReference("column", IntegerType)(NamedExpression.newExprId) val query = plans.DslLogicalPlan(plans.table("table")).select(column) CurrentOrigin.reset() val mappedQuery = query mapExpressions { case _: Expression => Literal(1) } val mappedOrigin = mappedQuery.expressions.apply(0).origin assert(mappedOrigin == Origin.apply(Some(0), Some(0))) } }
Example 51
Source File: LogicalPlanSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.plans import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, Literal, NamedExpression} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.types.IntegerType class LogicalPlanSuite extends SparkFunSuite { private var invocationCount = 0 private val function: PartialFunction[LogicalPlan, LogicalPlan] = { case p: Project => invocationCount += 1 p } private val testRelation = LocalRelation() test("transformUp runs on operators") { invocationCount = 0 val plan = Project(Nil, testRelation) plan transformUp function assert(invocationCount === 1) invocationCount = 0 plan transformDown function assert(invocationCount === 1) } test("transformUp runs on operators recursively") { invocationCount = 0 val plan = Project(Nil, Project(Nil, testRelation)) plan transformUp function assert(invocationCount === 2) invocationCount = 0 plan transformDown function assert(invocationCount === 2) } test("isStreaming") { val relation = LocalRelation(AttributeReference("a", IntegerType, nullable = true)()) val incrementalRelation = LocalRelation( Seq(AttributeReference("a", IntegerType, nullable = true)()), isStreaming = true) case class TestBinaryRelation(left: LogicalPlan, right: LogicalPlan) extends BinaryNode { override def output: Seq[Attribute] = left.output ++ right.output } require(relation.isStreaming === false) require(incrementalRelation.isStreaming === true) assert(TestBinaryRelation(relation, relation).isStreaming === false) assert(TestBinaryRelation(incrementalRelation, relation).isStreaming === true) assert(TestBinaryRelation(relation, incrementalRelation).isStreaming === true) assert(TestBinaryRelation(incrementalRelation, incrementalRelation).isStreaming) } test("transformExpressions works with a Stream") { val id1 = NamedExpression.newExprId val id2 = NamedExpression.newExprId val plan = Project(Stream( Alias(Literal(1), "a")(exprId = id1), Alias(Literal(2), "b")(exprId = id2)), OneRowRelation()) val result = plan.transformExpressions { case Literal(v: Int, IntegerType) if v != 1 => Literal(v + 1, IntegerType) } val expected = Project(Stream( Alias(Literal(1), "a")(exprId = id1), Alias(Literal(3), "b")(exprId = id2)), OneRowRelation()) assert(result.sameResult(expected)) } }
Example 52
Source File: HierarchyAnalysis.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.apache.spark.sql.catalyst.analysis.Catalog import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.types.Node import org.apache.spark.sql.catalyst.expressions.EqualTo import org.apache.spark.sql.catalyst.expressions.AttributeReference private[this] def supportsExpression(expr: Expression, plan: LogicalPlan): Unit = { expr match { case np: NodePredicate => (np.left, np.right) match { case (l: AttributeReference, r: AttributeReference) => val hl = getReferencedHierarchy(plan, l.exprId) val hr = getReferencedHierarchy(plan, r.exprId) if (hl.identifier != hr.identifier) { throw new AnalysisException(MIXED_NODES_ERROR.format(np.symbol)) } case _ => // OK } case _ => // OK } expr.children.foreach(e => supportsExpression(e, plan)) } private def getReferencedHierarchy(plan: LogicalPlan, exprId: ExprId): Hierarchy = { plan.collectFirst { case h@Hierarchy(_, a) if a.exprId.equals(exprId) => h }.get } }
Example 53
Source File: StreamingIncrementCommand.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.xsql.execution.command import java.util.Locale import org.apache.spark.SparkException import org.apache.spark.sql.{Dataset, Row, SparkSession} import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} import org.apache.spark.sql.catalyst.streaming.InternalOutputModes import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.execution.datasources.DataSource import org.apache.spark.sql.execution.streaming.StreamingRelationV2 import org.apache.spark.sql.sources.v2.StreamWriteSupport import org.apache.spark.sql.streaming.{OutputMode, Trigger} import org.apache.spark.sql.xsql.DataSourceManager._ import org.apache.spark.sql.xsql.StreamingSinkType case class StreamingIncrementCommand(plan: LogicalPlan) extends RunnableCommand { private var outputMode: OutputMode = OutputMode.Append // dummy override def output: Seq[AttributeReference] = Seq.empty // dummy override def producedAttributes: AttributeSet = plan.producedAttributes override def run(sparkSession: SparkSession): Seq[Row] = { import StreamingSinkType._ val qe = new QueryExecution(sparkSession, new ConstructedStreaming(plan)) val df = new Dataset(sparkSession, qe, RowEncoder(qe.analyzed.schema)) plan.collectLeaves.head match { case StreamingRelationV2(_, _, extraOptions, _, _) => val source = extraOptions.getOrElse(STREAMING_SINK_TYPE, DEFAULT_STREAMING_SINK) val sinkOptions = extraOptions.filter(_._1.startsWith(STREAMING_SINK_PREFIX)).map { kv => val key = kv._1.substring(STREAMING_SINK_PREFIX.length) (key, kv._2) } StreamingSinkType.withName(source.toUpperCase(Locale.ROOT)) match { case CONSOLE => case TEXT | PARQUET | ORC | JSON | CSV => if (sinkOptions.get(STREAMING_SINK_PATH) == None) { throw new SparkException("Sink type is file, must config path") } case KAFKA => if (sinkOptions.get(STREAMING_SINK_BOOTSTRAP_SERVERS) == None) { throw new SparkException("Sink type is kafka, must config bootstrap servers") } if (sinkOptions.get(STREAMING_SINK_TOPIC) == None) { throw new SparkException("Sink type is kafka, must config kafka topic") } case _ => throw new SparkException( "Sink type is invalid, " + s"select from ${StreamingSinkType.values}") } val ds = DataSource.lookupDataSource(source, sparkSession.sessionState.conf) val disabledSources = sparkSession.sqlContext.conf.disabledV2StreamingWriters.split(",") val sink = ds.newInstance() match { case w: StreamWriteSupport if !disabledSources.contains(w.getClass.getCanonicalName) => w case _ => val ds = DataSource( sparkSession, className = source, options = sinkOptions.toMap, partitionColumns = Nil) ds.createSink(InternalOutputModes.Append) } val outputMode = InternalOutputModes( extraOptions.getOrElse(STREAMING_OUTPUT_MODE, DEFAULT_STREAMING_OUTPUT_MODE)) val duration = extraOptions.getOrElse(STREAMING_TRIGGER_DURATION, DEFAULT_STREAMING_TRIGGER_DURATION) val trigger = extraOptions.getOrElse(STREAMING_TRIGGER_TYPE, DEFAULT_STREAMING_TRIGGER_TYPE) match { case STREAMING_MICRO_BATCH_TRIGGER => Trigger.ProcessingTime(duration) case STREAMING_ONCE_TRIGGER => Trigger.Once() case STREAMING_CONTINUOUS_TRIGGER => Trigger.Continuous(duration) } val query = sparkSession.sessionState.streamingQueryManager.startQuery( extraOptions.get("queryName"), extraOptions.get(STREAMING_CHECKPOINT_LOCATION), df, sinkOptions.toMap, sink, outputMode, useTempCheckpointLocation = source == DEFAULT_STREAMING_SINK, recoverFromCheckpointLocation = true, trigger = trigger) query.awaitTermination() } // dummy Seq.empty } } case class ConstructedStreaming(child: LogicalPlan) extends UnaryNode { override def output: Seq[Attribute] = child.output }
Example 54
Source File: databases.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.xsql.execution.command import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.types.StringType import org.apache.spark.sql.xsql.XSQLSessionCatalog case class XSQLSetDatabaseCommand(dataSourceName: Option[String], databaseName: String) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val catalog = sparkSession.sessionState.catalog.asInstanceOf[XSQLSessionCatalog] if (dataSourceName.isEmpty) { catalog.setCurrentDatabase(databaseName) } else { catalog.setCurrentDatabase(dataSourceName.get, databaseName) } Seq.empty[Row] } }
Example 55
Source File: datasources.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.xsql.execution.command import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.types.StringType import org.apache.spark.sql.xsql.XSQLSessionCatalog case class XSQLShowDatasourcesCommand(datasourcePattern: Option[String]) extends RunnableCommand { override val output: Seq[Attribute] = { AttributeReference("dataSourceName", StringType, nullable = false)() :: Nil } override def run(sparkSession: SparkSession): Seq[Row] = { val catalog = sparkSession.sessionState.catalog.asInstanceOf[XSQLSessionCatalog] val datasources = datasourcePattern .map { pattern => catalog.listDatasources(pattern) } .getOrElse(catalog.listDatasources()) datasources.map { d => Row(d) } } } case class XSQLAddDatasourceCommand(dataSourceName: String, properties: Map[String, String]) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val catalog = sparkSession.sessionState.catalog.asInstanceOf[XSQLSessionCatalog] catalog.addDataSource(dataSourceName, properties) Seq.empty[Row] } } case class XSQLRemoveDatasourceCommand(dataSourceName: String, ifExists: Boolean) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val catalog = sparkSession.sessionState.catalog.asInstanceOf[XSQLSessionCatalog] catalog.removeDataSource(dataSourceName, ifExists) Seq.empty[Row] } } case class XSQLRefreshDatasourceCommand(dataSourceName: String) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val catalog = sparkSession.sessionState.catalog.asInstanceOf[XSQLSessionCatalog] catalog.refreshDataSource(dataSourceName) Seq.empty[Row] } }
Example 56
Source File: SparkWrapper.scala From tispark with Apache License 2.0 | 5 votes |
package com.pingcap.tispark import org.apache.spark.sql.catalyst.catalog.{CatalogTable, SessionCatalog} import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} import org.apache.spark.sql.types.{DataType, Metadata} object SparkWrapper { def getVersion: String = { "SparkWrapper-2.4" } def newSubqueryAlias(identifier: String, child: LogicalPlan): SubqueryAlias = { SubqueryAlias(identifier, child) } def newAlias(child: Expression, name: String): Alias = { Alias(child, name)() } def newAttributeReference( name: String, dataType: DataType, nullable: Boolean, metadata: Metadata): AttributeReference = { AttributeReference(name, dataType, nullable, metadata)() } def callSessionCatalogCreateTable( obj: SessionCatalog, tableDefinition: CatalogTable, ignoreIfExists: Boolean): Unit = { obj.createTable(tableDefinition, ignoreIfExists) } }
Example 57
Source File: SparkWrapper.scala From tispark with Apache License 2.0 | 5 votes |
package com.pingcap.tispark import org.apache.spark.sql.catalyst.catalog.{CatalogTable, SessionCatalog} import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Expression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} import org.apache.spark.sql.types.{DataType, Metadata} object SparkWrapper { def getVersion: String = { "SparkWrapper-2.3" } def newSubqueryAlias(identifier: String, child: LogicalPlan): SubqueryAlias = { SubqueryAlias(identifier, child) } def newAlias(child: Expression, name: String): Alias = { Alias(child, name)() } def newAttributeReference( name: String, dataType: DataType, nullable: Boolean, metadata: Metadata): AttributeReference = { AttributeReference(name, dataType, nullable, metadata)() } def callSessionCatalogCreateTable( obj: SessionCatalog, tableDefinition: CatalogTable, ignoreIfExists: Boolean): Unit = { obj.createTable(tableDefinition, ignoreIfExists) } }
Example 58
Source File: commands.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import org.apache.spark.rdd.RDD import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.debug._ import org.apache.spark.sql.execution.streaming.IncrementalExecution import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types._ case class ExplainCommand( logicalPlan: LogicalPlan, override val output: Seq[Attribute] = Seq(AttributeReference("plan", StringType, nullable = true)()), extended: Boolean = false, codegen: Boolean = false) extends RunnableCommand { // Run through the optimizer to generate the physical plan. override def run(sparkSession: SparkSession): Seq[Row] = try { val queryExecution = if (logicalPlan.isStreaming) { // This is used only by explaining `Dataset/DataFrame` created by `spark.readStream`, so the // output mode does not matter since there is no `Sink`. new IncrementalExecution(sparkSession, logicalPlan, OutputMode.Append(), "<unknown>", 0) } else { sparkSession.sessionState.executePlan(logicalPlan) } val outputString = if (codegen) { codegenString(queryExecution.executedPlan) } else if (extended) { queryExecution.toString } else { queryExecution.simpleString } Seq(Row(outputString)) } catch { case cause: TreeNodeException[_] => ("Error occurred during query planning: \n" + cause.getMessage).split("\n").map(Row(_)) } }
Example 59
Source File: resources.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import java.io.File import java.net.URI import org.apache.hadoop.fs.Path import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} case class ListJarsCommand(jars: Seq[String] = Seq.empty[String]) extends RunnableCommand { override val output: Seq[Attribute] = { AttributeReference("Results", StringType, nullable = false)() :: Nil } override def run(sparkSession: SparkSession): Seq[Row] = { val jarList = sparkSession.sparkContext.listJars() if (jars.nonEmpty) { for { jarName <- jars.map(f => new Path(f).getName) jarPath <- jarList if jarPath.contains(jarName) } yield Row(jarPath) } else { jarList.map(Row(_)) } } }
Example 60
Source File: LogicalRelation.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics} import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.util.Utils override def newInstance(): this.type = { LogicalRelation( relation, expectedOutputAttributes.map(_.map(_.newInstance())), catalogTable).asInstanceOf[this.type] } override def refresh(): Unit = relation match { case fs: HadoopFsRelation => fs.location.refresh() case _ => // Do nothing. } override def simpleString: String = s"Relation[${Utils.truncatedString(output, ",")}] $relation" }
Example 61
Source File: TestRelations.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.LocalRelation import org.apache.spark.sql.types._ object TestRelations { val testRelation = LocalRelation(AttributeReference("a", IntegerType, nullable = true)()) val testRelation2 = LocalRelation( AttributeReference("a", StringType)(), AttributeReference("b", StringType)(), AttributeReference("c", DoubleType)(), AttributeReference("d", DecimalType(10, 2))(), AttributeReference("e", ShortType)()) val testRelation3 = LocalRelation( AttributeReference("e", ShortType)(), AttributeReference("f", StringType)(), AttributeReference("g", DoubleType)(), AttributeReference("h", DecimalType(10, 2))()) val nestedRelation = LocalRelation( AttributeReference("top", StructType( StructField("duplicateField", StringType) :: StructField("duplicateField", StringType) :: StructField("differentCase", StringType) :: StructField("differentcase", StringType) :: Nil ))()) val nestedRelation2 = LocalRelation( AttributeReference("top", StructType( StructField("aField", StringType) :: StructField("bField", StringType) :: StructField("cField", StringType) :: Nil ))()) val listRelation = LocalRelation( AttributeReference("list", ArrayType(IntegerType))()) val mapRelation = LocalRelation( AttributeReference("map", MapType(IntegerType, IntegerType))()) }
Example 62
Source File: resources.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import java.io.File import java.net.URI import org.apache.hadoop.fs.Path import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} case class ListJarsCommand(jars: Seq[String] = Seq.empty[String]) extends RunnableCommand { override val output: Seq[Attribute] = { AttributeReference("Results", StringType, nullable = false)() :: Nil } override def run(sparkSession: SparkSession): Seq[Row] = { val jarList = sparkSession.sparkContext.listJars() if (jars.nonEmpty) { for { jarName <- jars.map(f => new Path(f).getName) jarPath <- jarList if jarPath.contains(jarName) } yield Row(jarPath) } else { jarList.map(Row(_)) } } }
Example 63
Source File: CarbonUDFTransformRule.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.optimizer import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, PredicateHelper, ScalaUDF} import org.apache.spark.sql.catalyst.plans.logical.{Filter, Join, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.types.StringType import org.apache.carbondata.core.constants.CarbonCommonConstants class CarbonUDFTransformRule extends Rule[LogicalPlan] with PredicateHelper { override def apply(plan: LogicalPlan): LogicalPlan = { pushDownUDFToJoinLeftRelation(plan) } private def pushDownUDFToJoinLeftRelation(plan: LogicalPlan): LogicalPlan = { val output = plan.transform { case proj@Project(cols, Join( left, right, jointype: org.apache.spark.sql.catalyst.plans.JoinType, condition)) => var projectionToBeAdded: Seq[org.apache.spark.sql.catalyst.expressions.Alias] = Seq.empty var udfExists = false val newCols = cols.map { case a@Alias(s: ScalaUDF, name) if name.equalsIgnoreCase(CarbonCommonConstants.POSITION_ID) || name.equalsIgnoreCase(CarbonCommonConstants.CARBON_IMPLICIT_COLUMN_TUPLEID) => udfExists = true projectionToBeAdded :+= a AttributeReference(name, StringType, nullable = true)().withExprId(a.exprId) case other => other } if (udfExists) { val newLeft = left match { case Project(columns, logicalPlan) => Project(columns ++ projectionToBeAdded, logicalPlan) case filter: Filter => Project(filter.output ++ projectionToBeAdded, filter) case relation: LogicalRelation => Project(relation.output ++ projectionToBeAdded, relation) case other => other } Project(newCols, Join(newLeft, right, jointype, condition)) } else { proj } case other => other } output } }
Example 64
Source File: CarbonExplainCommand.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.table import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan, Union} import org.apache.spark.sql.execution.command.{ExplainCommand, MetadataCommand} import org.apache.spark.sql.types.StringType import org.apache.carbondata.core.profiler.ExplainCollector case class CarbonExplainCommand( child: LogicalPlan, override val output: Seq[Attribute] = Seq(AttributeReference("plan", StringType, nullable = true)())) extends MetadataCommand { override def processMetadata(sparkSession: SparkSession): Seq[Row] = { val explainCommand = child.asInstanceOf[ExplainCommand] setAuditInfo(Map("query" -> explainCommand.logicalPlan.simpleString)) val isCommand = explainCommand.logicalPlan match { case _: Command => true case Union(childern) if childern.forall(_.isInstanceOf[Command]) => true case _ => false } if (explainCommand.logicalPlan.isStreaming || isCommand) { explainCommand.run(sparkSession) } else { CarbonExplainCommand.collectProfiler(explainCommand, sparkSession) ++ explainCommand.run(sparkSession) } } override protected def opName: String = "EXPLAIN" } case class CarbonInternalExplainCommand( explainCommand: ExplainCommand, override val output: Seq[Attribute] = Seq(AttributeReference("plan", StringType, nullable = true)())) extends MetadataCommand { override def processMetadata(sparkSession: SparkSession): Seq[Row] = { CarbonExplainCommand .collectProfiler(explainCommand, sparkSession) ++ explainCommand.run(sparkSession) } override protected def opName: String = "Carbon EXPLAIN" } object CarbonExplainCommand { def collectProfiler( explain: ExplainCommand, sparkSession: SparkSession): Seq[Row] = { try { ExplainCollector.setup() if (ExplainCollector.enabled()) { val queryExecution = sparkSession.sessionState.executePlan(explain.logicalPlan) queryExecution.toRdd.partitions // For count(*) queries the explain collector will be disabled, so profiler // informations not required in such scenarios. if (null == ExplainCollector.getFormatedOutput) { Seq.empty } Seq(Row("== CarbonData Profiler ==\n" + ExplainCollector.getFormatedOutput)) } else { Seq.empty } } finally { ExplainCollector.remove() } } }
Example 65
Source File: CarbonShowTablesCommand.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.table import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.execution.command.MetadataCommand import org.apache.spark.sql.types.{BooleanType, StringType} private[sql] case class CarbonShowTablesCommand ( databaseName: Option[String], tableIdentifierPattern: Option[String]) extends MetadataCommand{ // The result of SHOW TABLES has three columns: database, tableName and isTemporary. override val output: Seq[Attribute] = { AttributeReference("database", StringType, nullable = false)() :: AttributeReference("tableName", StringType, nullable = false)() :: AttributeReference("isTemporary", BooleanType, nullable = false)() :: Nil } override def processMetadata(sparkSession: SparkSession): Seq[Row] = { // Since we need to return a Seq of rows, we will call getTables directly // instead of calling tables in sparkSession. val catalog = sparkSession.sessionState.catalog val db = databaseName.getOrElse(catalog.getCurrentDatabase) val tables = tableIdentifierPattern.map(catalog.listTables(db, _)).getOrElse(catalog.listTables(db)) val externalCatalog = sparkSession.sharedState.externalCatalog // this method checks whether the table is mainTable or MV based on property "isVisible" def isMainTable(tableIdent: TableIdentifier) = { var isMainTable = true try { isMainTable = externalCatalog.getTable(db, tableIdent.table).storage.properties .getOrElse("isVisible", true).toString.toBoolean } catch { case ex: Throwable => // ignore the exception for show tables } isMainTable } // tables will be filtered for all the MVs to show only main tables tables.collect { case tableIdent if isMainTable(tableIdent) => val isTemp = catalog.isTemporaryTable(tableIdent) Row(tableIdent.database.getOrElse("default"), tableIdent.table, isTemp) } } override protected def opName: String = "SHOW TABLES" }
Example 66
Source File: CarbonCliCommand.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.management import java.util import scala.collection.JavaConverters._ import org.apache.spark.sql.{CarbonEnv, Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.execution.command.{Checker, DataCommand} import org.apache.spark.sql.types.StringType import org.apache.carbondata.tool.CarbonCli case class CarbonCliCommand( databaseNameOp: Option[String], tableName: String, commandOptions: String) extends DataCommand { override def output: Seq[Attribute] = { Seq(AttributeReference("CarbonCli", StringType, nullable = false)()) } override def processData(sparkSession: SparkSession): Seq[Row] = { Checker.validateTableExists(databaseNameOp, tableName, sparkSession) val carbonTable = CarbonEnv.getCarbonTable(databaseNameOp, tableName)(sparkSession) setAuditTable(carbonTable) setAuditInfo(Map("options" -> commandOptions)) val commandArgs: Seq[String] = commandOptions.split("\\s+").map(_.trim) val finalCommands = commandArgs.exists(_.equalsIgnoreCase("-p")) match { case true => commandArgs case false => val needPath = commandArgs.exists { command => command.equalsIgnoreCase("summary") || command.equalsIgnoreCase("benchmark") } needPath match { case true => commandArgs ++ Seq("-p", carbonTable.getTablePath) case false => commandArgs } } val summaryOutput = new util.ArrayList[String]() CarbonCli.run(finalCommands.toArray, summaryOutput, false) summaryOutput.asScala.map(x => Row(x) ) } override protected def opName: String = "CLI" }
Example 67
Source File: CarbonShowMVCommand.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.view import java.util import scala.collection.JavaConverters._ import org.apache.spark.sql.{CarbonEnv, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.execution.command.{Checker, DataCommand} import org.apache.spark.sql.types.{BooleanType, StringType} import org.apache.carbondata.core.view.{MVProperty, MVSchema} import org.apache.carbondata.view.MVManagerInSpark case class CarbonShowMVCommand( databaseNameOption: Option[String], relatedTableIdentifier: Option[TableIdentifier]) extends DataCommand { override def output: Seq[Attribute] = { Seq( AttributeReference("Database", StringType, nullable = false)(), AttributeReference("Name", StringType, nullable = false)(), AttributeReference("Status", StringType, nullable = false)(), AttributeReference("Refresh Mode", StringType, nullable = false)(), AttributeReference("Refresh Trigger Mode", StringType, nullable = false)(), AttributeReference("Properties", StringType, nullable = false)()) } override def processData(session: SparkSession): Seq[Row] = { // Get mv schemas. val schemaList = new util.ArrayList[MVSchema]() val viewManager = MVManagerInSpark.get(session) relatedTableIdentifier match { case Some(table) => val relatedTable = CarbonEnv.getCarbonTable(table)(session) setAuditTable(relatedTable) Checker.validateTableExists(table.database, table.table, session) if (databaseNameOption.isDefined) { schemaList.addAll(viewManager.getSchemasOnTable( databaseNameOption.get, relatedTable)) } else { schemaList.addAll(viewManager.getSchemasOnTable(relatedTable)) } case _ => if (databaseNameOption.isDefined) { schemaList.addAll(viewManager.getSchemas(databaseNameOption.get)) } else { schemaList.addAll(viewManager.getSchemas()) } } // Convert mv schema to row. schemaList.asScala.map { schema => Row( schema.getIdentifier.getDatabaseName, schema.getIdentifier.getTableName, schema.getStatus.name(), schema.getProperties.get(MVProperty.REFRESH_MODE), schema.getProperties.get(MVProperty.REFRESH_TRIGGER_MODE), schema.getPropertiesAsString ) } } override protected def opName: String = "SHOW MATERIALIZED VIEW" }
Example 68
Source File: CarbonShowStreamsCommand.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.stream import java.util.Date import java.util.concurrent.TimeUnit import org.apache.spark.sql.{CarbonEnv, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.execution.command.MetadataCommand import org.apache.spark.sql.types.StringType import org.apache.carbondata.stream.StreamJobManager case class CarbonShowStreamsCommand( tableOp: Option[TableIdentifier] ) extends MetadataCommand { override def output: Seq[Attribute] = { Seq(AttributeReference("Stream Name", StringType, nullable = false)(), AttributeReference("JobId", StringType, nullable = false)(), AttributeReference("Status", StringType, nullable = false)(), AttributeReference("Source", StringType, nullable = false)(), AttributeReference("Sink", StringType, nullable = false)(), AttributeReference("Start Time", StringType, nullable = false)(), AttributeReference("Time Elapse", StringType, nullable = false)()) } override def processMetadata(sparkSession: SparkSession): Seq[Row] = { val jobs = tableOp match { case None => StreamJobManager.getAllJobs.toSeq case Some(table) => val carbonTable = CarbonEnv.getCarbonTable(table.database, table.table)(sparkSession) setAuditTable(carbonTable) StreamJobManager.getAllJobs.filter { job => job.sinkTable.equalsIgnoreCase(carbonTable.getTableName) && job.sinkDb.equalsIgnoreCase(carbonTable.getDatabaseName) }.toSeq } jobs.map { job => val elapsedTime = System.currentTimeMillis() - job.startTime Row( job.streamName, job.streamingQuery.id.toString, if (job.streamingQuery.isActive) "RUNNING" else "FAILED", s"${ job.sourceDb }.${ job.sourceTable }", s"${ job.sinkDb }.${ job.sinkTable }", new Date(job.startTime).toString, String.format( "%s days, %s hours, %s min, %s sec", TimeUnit.MILLISECONDS.toDays(elapsedTime).toString, TimeUnit.MILLISECONDS.toHours(elapsedTime).toString, TimeUnit.MILLISECONDS.toMinutes(elapsedTime).toString, TimeUnit.MILLISECONDS.toSeconds(elapsedTime).toString) ) } } override protected def opName: String = "SHOW STREAMS" }
Example 69
Source File: ExpressionHelper.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.mv.plans.modular import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, ExprId, Expression, NamedExpression} import org.apache.spark.sql.types.{DataType, Metadata} object ExpressionHelper { def createReference( name: String, dataType: DataType, nullable: Boolean, metadata: Metadata, exprId: ExprId, qualifier: Option[String], attrRef : NamedExpression = null): AttributeReference = { val qf = if (qualifier.nonEmpty) Seq(qualifier.get) else Seq.empty AttributeReference(name, dataType, nullable, metadata)(exprId, qf) } def createAlias( child: Expression, name: String, exprId: ExprId, qualifier: Option[String]) : Alias = { val qf = if (qualifier.nonEmpty) Seq(qualifier.get) else Seq.empty Alias(child, name)(exprId, qf, None) } def getTheLastQualifier(reference: AttributeReference): String = { reference.qualifier.reverse.head } }
Example 70
Source File: ExpressionHelper.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.mv.plans.modular import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, ExprId, Expression, NamedExpression} import org.apache.spark.sql.types.{DataType, Metadata} object ExpressionHelper { def createReference( name: String, dataType: DataType, nullable: Boolean, metadata: Metadata, exprId: ExprId, qualifier: Option[String], attrRef : NamedExpression = null): AttributeReference = { AttributeReference(name, dataType, nullable, metadata)(exprId, qualifier) } def createAlias( child: Expression, name: String, exprId: ExprId = NamedExpression.newExprId, qualifier: Option[String] = None, explicitMetadata: Option[Metadata] = None, namedExpr : Option[NamedExpression] = None ) : Alias = { Alias(child, name)(exprId, qualifier, explicitMetadata) } def getTheLastQualifier(reference: AttributeReference): String = { reference.qualifier.head } }
Example 71
Source File: commands.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import org.apache.spark.rdd.RDD import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.errors.TreeNodeException import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.debug._ import org.apache.spark.sql.execution.streaming.IncrementalExecution import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.types._ case class ExplainCommand( logicalPlan: LogicalPlan, override val output: Seq[Attribute] = Seq(AttributeReference("plan", StringType, nullable = true)()), extended: Boolean = false, codegen: Boolean = false) extends RunnableCommand { // Run through the optimizer to generate the physical plan. override def run(sparkSession: SparkSession): Seq[Row] = try { val queryExecution = if (logicalPlan.isStreaming) { // This is used only by explaining `Dataset/DataFrame` created by `spark.readStream`, so the // output mode does not matter since there is no `Sink`. new IncrementalExecution(sparkSession, logicalPlan, OutputMode.Append(), "<unknown>", 0, 0) } else { sparkSession.sessionState.executePlan(logicalPlan) } val outputString = if (codegen) { codegenString(queryExecution.executedPlan) } else if (extended) { queryExecution.toString } else { queryExecution.simpleString } Seq(Row(outputString)) } catch { case cause: TreeNodeException[_] => ("Error occurred during query planning: \n" + cause.getMessage).split("\n").map(Row(_)) } }
Example 72
Source File: LogicalPlanSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.plans import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.types.IntegerType class LogicalPlanSuite extends SparkFunSuite { private var invocationCount = 0 private val function: PartialFunction[LogicalPlan, LogicalPlan] = { case p: Project => invocationCount += 1 p } private val testRelation = LocalRelation() test("resolveOperator runs on operators") { invocationCount = 0 val plan = Project(Nil, testRelation) plan resolveOperators function assert(invocationCount === 1) } test("resolveOperator runs on operators recursively") { invocationCount = 0 val plan = Project(Nil, Project(Nil, testRelation)) plan resolveOperators function assert(invocationCount === 2) } test("resolveOperator skips all ready resolved plans") { invocationCount = 0 val plan = Project(Nil, Project(Nil, testRelation)) plan.foreach(_.setAnalyzed()) plan resolveOperators function assert(invocationCount === 0) } test("resolveOperator skips partially resolved plans") { invocationCount = 0 val plan1 = Project(Nil, testRelation) val plan2 = Project(Nil, plan1) plan1.foreach(_.setAnalyzed()) plan2 resolveOperators function assert(invocationCount === 1) } test("isStreaming") { val relation = LocalRelation(AttributeReference("a", IntegerType, nullable = true)()) val incrementalRelation = new LocalRelation( Seq(AttributeReference("a", IntegerType, nullable = true)())) { override def isStreaming(): Boolean = true } case class TestBinaryRelation(left: LogicalPlan, right: LogicalPlan) extends BinaryNode { override def output: Seq[Attribute] = left.output ++ right.output } require(relation.isStreaming === false) require(incrementalRelation.isStreaming === true) assert(TestBinaryRelation(relation, relation).isStreaming === false) assert(TestBinaryRelation(incrementalRelation, relation).isStreaming === true) assert(TestBinaryRelation(relation, incrementalRelation).isStreaming === true) assert(TestBinaryRelation(incrementalRelation, incrementalRelation).isStreaming) } }
Example 73
Source File: LogicalRelation.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics} import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.util.Utils override def newInstance(): this.type = { LogicalRelation( relation, expectedOutputAttributes.map(_.map(_.newInstance())), catalogTable).asInstanceOf[this.type] } override def refresh(): Unit = relation match { case fs: HadoopFsRelation => fs.location.refresh() case _ => // Do nothing. } override def simpleString: String = s"Relation[${Utils.truncatedString(output, ",")}] $relation" }
Example 74
Source File: TestRelations.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical.LocalRelation import org.apache.spark.sql.types._ object TestRelations { val testRelation = LocalRelation(AttributeReference("a", IntegerType, nullable = true)()) val testRelation2 = LocalRelation( AttributeReference("a", StringType)(), AttributeReference("b", StringType)(), AttributeReference("c", DoubleType)(), AttributeReference("d", DecimalType(10, 2))(), AttributeReference("e", ShortType)()) val testRelation3 = LocalRelation( AttributeReference("e", ShortType)(), AttributeReference("f", StringType)(), AttributeReference("g", DoubleType)(), AttributeReference("h", DecimalType(10, 2))()) val nestedRelation = LocalRelation( AttributeReference("top", StructType( StructField("duplicateField", StringType) :: StructField("duplicateField", StringType) :: StructField("differentCase", StringType) :: StructField("differentcase", StringType) :: Nil ))()) val nestedRelation2 = LocalRelation( AttributeReference("top", StructType( StructField("aField", StringType) :: StructField("bField", StringType) :: StructField("cField", StringType) :: Nil ))()) val listRelation = LocalRelation( AttributeReference("list", ArrayType(IntegerType))()) val mapRelation = LocalRelation( AttributeReference("map", MapType(IntegerType, IntegerType))()) }
Example 75
Source File: LogicalPlanSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.plans import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.types.IntegerType class LogicalPlanSuite extends SparkFunSuite { private var invocationCount = 0 private val function: PartialFunction[LogicalPlan, LogicalPlan] = { case p: Project => invocationCount += 1 p } private val testRelation = LocalRelation() test("resolveOperator runs on operators") { invocationCount = 0 val plan = Project(Nil, testRelation) plan resolveOperators function assert(invocationCount === 1) } test("resolveOperator runs on operators recursively") { invocationCount = 0 val plan = Project(Nil, Project(Nil, testRelation)) plan resolveOperators function assert(invocationCount === 2) } test("resolveOperator skips all ready resolved plans") { invocationCount = 0 val plan = Project(Nil, Project(Nil, testRelation)) plan.foreach(_.setAnalyzed()) plan resolveOperators function assert(invocationCount === 0) } test("resolveOperator skips partially resolved plans") { invocationCount = 0 val plan1 = Project(Nil, testRelation) val plan2 = Project(Nil, plan1) plan1.foreach(_.setAnalyzed()) plan2 resolveOperators function assert(invocationCount === 1) } test("isStreaming") { val relation = LocalRelation(AttributeReference("a", IntegerType, nullable = true)()) val incrementalRelation = new LocalRelation( Seq(AttributeReference("a", IntegerType, nullable = true)())) { override def isStreaming(): Boolean = true } case class TestBinaryRelation(left: LogicalPlan, right: LogicalPlan) extends BinaryNode { override def output: Seq[Attribute] = left.output ++ right.output } require(relation.isStreaming === false) require(incrementalRelation.isStreaming === true) assert(TestBinaryRelation(relation, relation).isStreaming === false) assert(TestBinaryRelation(incrementalRelation, relation).isStreaming === true) assert(TestBinaryRelation(relation, incrementalRelation).isStreaming === true) assert(TestBinaryRelation(incrementalRelation, incrementalRelation).isStreaming) } }
Example 76
Source File: HiveClientSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.conf.HiveConf import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal} import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.types.IntegerType class HiveClientSuite extends SparkFunSuite { private val clientBuilder = new HiveClientBuilder private val tryDirectSqlKey = HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL.varname test(s"getPartitionsByFilter returns all partitions when $tryDirectSqlKey=false") { val testPartitionCount = 5 val storageFormat = CatalogStorageFormat( locationUri = None, inputFormat = None, outputFormat = None, serde = None, compressed = false, properties = Map.empty) val hadoopConf = new Configuration() hadoopConf.setBoolean(tryDirectSqlKey, false) val client = clientBuilder.buildClient(HiveUtils.hiveExecutionVersion, hadoopConf) client.runSqlHive("CREATE TABLE test (value INT) PARTITIONED BY (part INT)") val partitions = (1 to testPartitionCount).map { part => CatalogTablePartition(Map("part" -> part.toString), storageFormat) } client.createPartitions( "default", "test", partitions, ignoreIfExists = false) val filteredPartitions = client.getPartitionsByFilter(client.getTable("default", "test"), Seq(EqualTo(AttributeReference("part", IntegerType)(), Literal(3)))) assert(filteredPartitions.size == testPartitionCount) } }
Example 77
Source File: ResolveCountDistinctStarSuite.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.Aggregate import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} import org.scalatest.FunSuite import org.scalatest.Inside._ import org.scalatest.mock.MockitoSugar import org.apache.spark.sql.catalyst.dsl.plans.DslLogicalPlan import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete, Count} import scala.collection.mutable.ArrayBuffer class ResolveCountDistinctStarSuite extends FunSuite with MockitoSugar { val persons = new LogicalRelation(new BaseRelation { override def sqlContext: SQLContext = mock[SQLContext] override def schema: StructType = StructType(Seq( StructField("age", IntegerType), StructField("name", StringType) )) }) test("Count distinct star is resolved correctly") { val projection = persons.select(UnresolvedAlias( AggregateExpression(Count(UnresolvedStar(None) :: Nil), Complete, true))) val stillNotCompletelyResolvedAggregate = SimpleAnalyzer.execute(projection) val resolvedAggregate = ResolveCountDistinctStar(SimpleAnalyzer) .apply(stillNotCompletelyResolvedAggregate) inside(resolvedAggregate) { case Aggregate(Nil, ArrayBuffer(Alias(AggregateExpression(Count(expressions), Complete, true), _)), _) => assert(expressions.collect { case a:AttributeReference => a.name }.toSet == Set("name", "age")) } assert(resolvedAggregate.resolved) } }
Example 78
Source File: LogicalPlanExtractorSuite.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.tablefunctions import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, ExprId} import org.apache.spark.sql.catalyst.plans.logical.Project import org.apache.spark.sql.types._ import org.scalatest.FunSuite class LogicalPlanExtractorSuite extends FunSuite { def attr(name: String, dataType: DataType, id: Int, nullable: Boolean = false): Attribute = { AttributeReference(name, dataType, nullable)(ExprId(id)) } val attributes = Seq(attr("foo", IntegerType, 0), attr("bar", StringType, 1)) test("tablePart") { val project = Project(attributes, null) val tablePart = new LogicalPlanExtractor(project).tablePart assert(tablePart == "" :: Nil) } }
Example 79
Source File: ResolveHierarchy.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.types.NodeType private[this] def resolveHierarchyNodeAttr(h: Hierarchy): Hierarchy = { val a = h.node h.copy(node = AttributeReference(a.name, NodeType, nullable = false)()) } private[this] def resolveSpec(h: Hierarchy): Hierarchy = h match { case h @ Hierarchy(spec: LevelBasedHierarchySpec, _) => h.copy(spec = spec.copy(levels = spec.levels.map { case u@UnresolvedAttribute(nameParts) => spec.resolveSpec(nameParts, analyzer.resolver).getOrElse(u) case other => other })) case h @ Hierarchy(spec: AdjacencyListHierarchySpec, _) => h.copy(spec = spec.copy(parenthoodExp = spec.parenthoodExp.mapChildren { case u@UnresolvedAttribute(nameParts) => spec.resolveSpec(nameParts, analyzer.resolver).getOrElse(u) case other => other })) } }
Example 80
Source File: UseAliasesForFunctionsInGroupings.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference} import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Subquery} import org.apache.spark.sql.catalyst.rules.Rule object UseAliasesForFunctionsInGroupings extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { case agg@Aggregate(groupingExpressions, aggregateExpressions, child) => val fixedGroupingExpressions = groupingExpressions.map({ case e: AttributeReference => e case e => val aliasOpt = aggregateExpressions.find({ case Alias(aliasChild, aliasName) => aliasChild == e case _ => false }) aliasOpt match { case Some(alias) => alias.toAttribute case None => sys.error(s"Cannot resolve Alias for $e") } }) agg.copy(groupingExpressions = fixedGroupingExpressions) } }
Example 81
Source File: DescCommand.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.sources.commands.hive import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.types.StringType import org.apache.spark.sql.{Row, SQLContext} case class DescCommand(ident: TableIdentifier) extends HiveRunnableCommand { override protected val commandName: String = s"DESC $ident" override def execute(sqlContext: SQLContext): Seq[Row] = { val plan = sqlContext.catalog.lookupRelation(ident) if (plan.resolved) { plan.schema.map { field => Row(field.name, field.dataType.simpleString, None) } } else { Seq.empty } } override lazy val output: Seq[Attribute] = AttributeReference("col_name", StringType)() :: AttributeReference("data_type", StringType)() :: AttributeReference("comment", StringType)() :: Nil }