org.apache.spark.sql.types.BooleanType Scala Examples
The following examples show how to use org.apache.spark.sql.types.BooleanType.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: ListTablesSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.scalatest.BeforeAndAfter import org.apache.spark.sql.test.TestSQLContext import org.apache.spark.sql.test.TestSQLContext._ import org.apache.spark.sql.types.{BooleanType, StringType, StructField, StructType} class ListTablesSuite extends QueryTest with BeforeAndAfter { import org.apache.spark.sql.test.TestSQLContext.implicits._ val df = sparkContext.parallelize((1 to 10).map(i => (i, s"str$i"))).toDF("key", "value") before { df.registerTempTable("ListTablesSuiteTable") } after { catalog.unregisterTable(Seq("ListTablesSuiteTable")) } test("get all tables") { checkAnswer( tables().filter("tableName = 'ListTablesSuiteTable'"), Row("ListTablesSuiteTable", true)) checkAnswer( sql("SHOW tables").filter("tableName = 'ListTablesSuiteTable'"), Row("ListTablesSuiteTable", true)) catalog.unregisterTable(Seq("ListTablesSuiteTable")) assert(tables().filter("tableName = 'ListTablesSuiteTable'").count() === 0) } test("getting all Tables with a database name has no impact on returned table names") { checkAnswer( tables("DB").filter("tableName = 'ListTablesSuiteTable'"), Row("ListTablesSuiteTable", true)) checkAnswer( sql("show TABLES in DB").filter("tableName = 'ListTablesSuiteTable'"), Row("ListTablesSuiteTable", true)) catalog.unregisterTable(Seq("ListTablesSuiteTable")) assert(tables().filter("tableName = 'ListTablesSuiteTable'").count() === 0) } test("query the returned DataFrame of tables") { val expectedSchema = StructType( StructField("tableName", StringType, false) :: StructField("isTemporary", BooleanType, false) :: Nil) Seq(tables(), sql("SHOW TABLes")).foreach { case tableDF => assert(expectedSchema === tableDF.schema) tableDF.registerTempTable("tables") checkAnswer( sql("SELECT isTemporary, tableName from tables WHERE tableName = 'ListTablesSuiteTable'"), Row(true, "ListTablesSuiteTable") ) checkAnswer( tables().filter("tableName = 'tables'").select("tableName", "isTemporary"), Row("tables", true)) dropTempTable("tables") } } }
Example 2
Source File: DiscreteDistributionBuilder.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.deeplang.doperables.dataframe.report.distribution.discrete import org.apache.spark.sql.Row import org.apache.spark.sql.types.{BooleanType, StringType, StructField} import io.deepsense.deeplang.doperables.dataframe.report.DataFrameReportGenerator import io.deepsense.deeplang.doperables.dataframe.report.distribution.{DistributionBuilder, NoDistributionReasons} import io.deepsense.deeplang.doperables.report.ReportUtils import io.deepsense.deeplang.utils.aggregators.Aggregator import io.deepsense.deeplang.utils.aggregators.AggregatorBatch.BatchedResult import io.deepsense.reportlib.model.{DiscreteDistribution, Distribution, NoDistribution} case class DiscreteDistributionBuilder( categories: Aggregator[Option[scala.collection.mutable.Map[String, Long]], Row], missing: Aggregator[Long, Row], field: StructField) extends DistributionBuilder { def allAggregators: Seq[Aggregator[_, Row]] = Seq(categories, missing) override def build(results: BatchedResult): Distribution = { val categoriesMap = results.forAggregator(categories) val nullsCount = results.forAggregator(missing) categoriesMap match { case Some(occurrencesMap) => { val labels = field.dataType match { case StringType => occurrencesMap.keys.toSeq.sorted // We always want two labels, even when all elements are true or false case BooleanType => Seq(false.toString, true.toString) } val counts = labels.map(occurrencesMap.getOrElse(_, 0L)) DiscreteDistribution( field.name, s"Discrete distribution for ${field.name} column", nullsCount, labels.map(ReportUtils.shortenLongStrings(_, DataFrameReportGenerator.StringPreviewMaxLength)), counts) } case None => NoDistribution( field.name, NoDistributionReasons.TooManyDistinctCategoricalValues ) } } }
Example 3
Source File: SpatialJoin.scala From Simba with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.simba.plans import org.apache.spark.sql.simba.expression.{InCircleRange, InKNN} import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.plans.logical.{BinaryNode, LogicalPlan} import org.apache.spark.sql.types.BooleanType case class SpatialJoin(left: LogicalPlan, right: LogicalPlan, joinType: SpatialJoinType, condition: Option[Expression]) extends BinaryNode { override def output: Seq[Attribute] = { joinType match { case KNNJoin => require(condition.get.isInstanceOf[InKNN]) left.output ++ right.output case ZKNNJoin => require(condition.get.isInstanceOf[InKNN]) left.output ++ right.output case DistanceJoin => require(condition.get.isInstanceOf[InCircleRange]) left.output ++ right.output.map(_.withNullability(true)) case _ => left.output ++ right.output } } def selfJoinResolved: Boolean = left.outputSet.intersect(right.outputSet).isEmpty // Joins are only resolved if they don't introduce ambiguous expression ids. override lazy val resolved: Boolean = { childrenResolved && expressions.forall(_.resolved) && selfJoinResolved && condition.forall(_.dataType == BooleanType) } }
Example 4
Source File: A_1_BasicOperation.scala From wow-spark with MIT License | 5 votes |
package com.sev7e0.wow.structured_streaming import java.sql.Timestamp import org.apache.spark.sql.types.{BooleanType, StringType, StructType, TimestampType} import org.apache.spark.sql.{Dataset, SparkSession} object A_1_BasicOperation { //DateTime要使用Timestamp case类必须使用java.sql。在catalyst中作为TimestampType调用的时间戳 case class DeviceData(device: String, deviceType: String, signal: Double, time: Timestamp) def main(args: Array[String]): Unit = { val spark = SparkSession.builder() .appName(A_1_BasicOperation.getClass.getName) .master("local") .getOrCreate() val timeStructType = new StructType().add("device", StringType) .add("deviceType", StringType) .add("signal", BooleanType) .add("time", TimestampType) val dataFrame = spark.read.json("src/main/resources/sparkresource/device.json") import spark.implicits._ val ds: Dataset[DeviceData] = dataFrame.as[DeviceData] //使用无类型方式查询,类sql dataFrame.select("device").where("signal>10").show() //使用有类型方式进行查询 ds.filter(_.signal > 10).map(_.device).show() //使用无类型方式进行groupBy,并进行统计 dataFrame.groupBy("deviceType").count().show() import org.apache.spark.sql.expressions.scalalang.typed //使用有类型方式进行 计算每种类型的设备的平均信号值 ds.groupByKey(_.deviceType).agg(typed.avg(_.signal)).show() //也可以使用创建临时视图的形式,使用sql语句进行查询 dataFrame.createOrReplaceTempView("device") spark.sql("select * from device").show() //可以使用isStreaming来判断是否有流数据 println(dataFrame.isStreaming) } }
Example 5
Source File: ListTablesSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.scalatest.BeforeAndAfter import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types.{BooleanType, StringType, StructField, StructType} import org.apache.spark.sql.catalyst.TableIdentifier class ListTablesSuite extends QueryTest with BeforeAndAfter with SharedSQLContext { import testImplicits._ private lazy val df = (1 to 10).map(i => (i, s"str$i")).toDF("key", "value") before { df.registerTempTable("ListTablesSuiteTable") } after { sqlContext.catalog.unregisterTable(TableIdentifier("ListTablesSuiteTable")) } test("get all tables") { checkAnswer( sqlContext.tables().filter("tableName = 'ListTablesSuiteTable'"), Row("ListTablesSuiteTable", true)) checkAnswer( sql("SHOW tables").filter("tableName = 'ListTablesSuiteTable'"), Row("ListTablesSuiteTable", true)) sqlContext.catalog.unregisterTable(TableIdentifier("ListTablesSuiteTable")) assert(sqlContext.tables().filter("tableName = 'ListTablesSuiteTable'").count() === 0) } test("getting all Tables with a database name has no impact on returned table names") { checkAnswer( sqlContext.tables("DB").filter("tableName = 'ListTablesSuiteTable'"), Row("ListTablesSuiteTable", true)) checkAnswer( sql("show TABLES in DB").filter("tableName = 'ListTablesSuiteTable'"), Row("ListTablesSuiteTable", true)) sqlContext.catalog.unregisterTable(TableIdentifier("ListTablesSuiteTable")) assert(sqlContext.tables().filter("tableName = 'ListTablesSuiteTable'").count() === 0) } test("query the returned DataFrame of tables") { val expectedSchema = StructType( StructField("tableName", StringType, false) :: StructField("isTemporary", BooleanType, false) :: Nil) Seq(sqlContext.tables(), sql("SHOW TABLes")).foreach { case tableDF => assert(expectedSchema === tableDF.schema) tableDF.registerTempTable("tables") checkAnswer( sql( "SELECT isTemporary, tableName from tables WHERE tableName = 'ListTablesSuiteTable'"), Row(true, "ListTablesSuiteTable") ) checkAnswer( sqlContext.tables().filter("tableName = 'tables'").select("tableName", "isTemporary"), Row("tables", true)) sqlContext.dropTempTable("tables") } } }
Example 6
Source File: MySQLDialect.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.jdbc import java.sql.Types import org.apache.spark.sql.types.{BooleanType, LongType, DataType, MetadataBuilder} private case object MySQLDialect extends JdbcDialect { override def canHandle(url : String): Boolean = url.startsWith("jdbc:mysql") override def getCatalystType( sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { if (sqlType == Types.VARBINARY && typeName.equals("BIT") && size != 1) { // This could instead be a BinaryType if we'd rather return bit-vectors of up to 64 bits as // byte arrays instead of longs. md.putLong("binarylong", 1) Option(LongType) } else if (sqlType == Types.BIT && typeName.equals("TINYINT")) { Option(BooleanType) } else None } override def quoteIdentifier(colName: String): String = { s"`$colName`" } override def getTableExistsQuery(table: String): String = { s"SELECT 1 FROM $table LIMIT 1" } }
Example 7
Source File: GroupAnd.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.exec.spark.udf import org.apache.spark.sql.catalyst.expressions.aggregate.DeclarativeAggregate import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.types.{ DataType, BooleanType } import org.apache.spark.sql.catalyst.expressions.{ AttributeReference, Literal, And } case class GroupAnd(child: org.apache.spark.sql.catalyst.expressions.Expression) extends DeclarativeAggregate { override def children: Seq[org.apache.spark.sql.catalyst.expressions.Expression] = child :: Nil override def nullable: Boolean = false // Return data type. override def dataType: DataType = BooleanType override def checkInputDataTypes(): TypeCheckResult = TypeUtils.checkForOrderingExpr(child.dataType, "function group_and") private lazy val group_and = AttributeReference("group_and", BooleanType)() override lazy val aggBufferAttributes: Seq[AttributeReference] = group_and :: Nil override lazy val initialValues: Seq[Literal] = Seq( Literal.create(true, BooleanType) ) override lazy val updateExpressions: Seq[ org.apache.spark.sql.catalyst.expressions.Expression] = Seq( And(group_and, child) ) override lazy val mergeExpressions: Seq[org.apache.spark.sql.catalyst.expressions.Expression] = { Seq( And(group_and.left, group_and.right) ) } override lazy val evaluateExpression: AttributeReference = group_and }
Example 8
Source File: GroupOr.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.exec.spark.udf import org.apache.spark.sql.catalyst.expressions.aggregate.DeclarativeAggregate import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.types.{ DataType, BooleanType } import org.apache.spark.sql.catalyst.expressions.{ AttributeReference, Literal, Or } case class GroupOr(child: org.apache.spark.sql.catalyst.expressions.Expression) extends DeclarativeAggregate { override def children: Seq[org.apache.spark.sql.catalyst.expressions.Expression] = child :: Nil override def nullable: Boolean = false // Return data type. override def dataType: DataType = BooleanType override def checkInputDataTypes(): TypeCheckResult = TypeUtils.checkForOrderingExpr(child.dataType, "function group_or") private lazy val group_or = AttributeReference("group_or", BooleanType)() override lazy val aggBufferAttributes: Seq[AttributeReference] = group_or :: Nil override lazy val initialValues: Seq[Literal] = Seq( Literal.create(false, BooleanType) ) override lazy val updateExpressions: Seq[ org.apache.spark.sql.catalyst.expressions.Expression] = Seq( Or(group_or, child) ) override lazy val mergeExpressions: Seq[org.apache.spark.sql.catalyst.expressions.Expression] = { Seq( Or(group_or.left, group_or.right) ) } override lazy val evaluateExpression: AttributeReference = group_or }
Example 9
Source File: BatchEvalPythonExecSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.python import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import org.apache.spark.api.python.{PythonEvalType, PythonFunction} import org.apache.spark.sql.catalyst.FunctionIdentifier import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, GreaterThan, In} import org.apache.spark.sql.execution.{FilterExec, InputAdapter, SparkPlanTest, WholeStageCodegenExec} import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types.BooleanType class BatchEvalPythonExecSuite extends SparkPlanTest with SharedSQLContext { import testImplicits.newProductEncoder import testImplicits.localSeqToDatasetHolder override def beforeAll(): Unit = { super.beforeAll() spark.udf.registerPython("dummyPythonUDF", new MyDummyPythonUDF) } override def afterAll(): Unit = { spark.sessionState.functionRegistry.dropFunction(FunctionIdentifier("dummyPythonUDF")) super.afterAll() } test("Python UDF: push down deterministic FilterExec predicates") { val df = Seq(("Hello", 4)).toDF("a", "b") .where("dummyPythonUDF(b) and dummyPythonUDF(a) and a in (3, 4)") val qualifiedPlanNodes = df.queryExecution.executedPlan.collect { case f @ FilterExec( And(_: AttributeReference, _: AttributeReference), InputAdapter(_: BatchEvalPythonExec)) => f case b @ BatchEvalPythonExec(_, _, WholeStageCodegenExec(FilterExec(_: In, _))) => b } assert(qualifiedPlanNodes.size == 2) } test("Nested Python UDF: push down deterministic FilterExec predicates") { val df = Seq(("Hello", 4)).toDF("a", "b") .where("dummyPythonUDF(a, dummyPythonUDF(a, b)) and a in (3, 4)") val qualifiedPlanNodes = df.queryExecution.executedPlan.collect { case f @ FilterExec(_: AttributeReference, InputAdapter(_: BatchEvalPythonExec)) => f case b @ BatchEvalPythonExec(_, _, WholeStageCodegenExec(FilterExec(_: In, _))) => b } assert(qualifiedPlanNodes.size == 2) } test("Python UDF: no push down on non-deterministic") { val df = Seq(("Hello", 4)).toDF("a", "b") .where("b > 4 and dummyPythonUDF(a) and rand() > 0.3") val qualifiedPlanNodes = df.queryExecution.executedPlan.collect { case f @ FilterExec( And(_: AttributeReference, _: GreaterThan), InputAdapter(_: BatchEvalPythonExec)) => f case b @ BatchEvalPythonExec(_, _, WholeStageCodegenExec(_: FilterExec)) => b } assert(qualifiedPlanNodes.size == 2) } test("Python UDF: push down on deterministic predicates after the first non-deterministic") { val df = Seq(("Hello", 4)).toDF("a", "b") .where("dummyPythonUDF(a) and rand() > 0.3 and b > 4") val qualifiedPlanNodes = df.queryExecution.executedPlan.collect { case f @ FilterExec( And(_: AttributeReference, _: GreaterThan), InputAdapter(_: BatchEvalPythonExec)) => f case b @ BatchEvalPythonExec(_, _, WholeStageCodegenExec(_: FilterExec)) => b } assert(qualifiedPlanNodes.size == 2) } test("Python UDF refers to the attributes from more than one child") { val df = Seq(("Hello", 4)).toDF("a", "b") val df2 = Seq(("Hello", 4)).toDF("c", "d") val joinDF = df.crossJoin(df2).where("dummyPythonUDF(a, c) == dummyPythonUDF(d, c)") val qualifiedPlanNodes = joinDF.queryExecution.executedPlan.collect { case b: BatchEvalPythonExec => b } assert(qualifiedPlanNodes.size == 1) } } // This Python UDF is dummy and just for testing. Unable to execute. class DummyUDF extends PythonFunction( command = Array[Byte](), envVars = Map("" -> "").asJava, pythonIncludes = ArrayBuffer("").asJava, pythonExec = "", pythonVer = "", broadcastVars = null, accumulator = null) class MyDummyPythonUDF extends UserDefinedPythonFunction( name = "dummyUDF", func = new DummyUDF, dataType = BooleanType, pythonEvalType = PythonEvalType.SQL_BATCHED_UDF, udfDeterministic = true)
Example 10
Source File: MySQLDialect.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.jdbc import java.sql.Types import org.apache.spark.sql.types.{BooleanType, DataType, LongType, MetadataBuilder} private case object MySQLDialect extends JdbcDialect { override def canHandle(url : String): Boolean = url.startsWith("jdbc:mysql") override def getCatalystType( sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { if (sqlType == Types.VARBINARY && typeName.equals("BIT") && size != 1) { // This could instead be a BinaryType if we'd rather return bit-vectors of up to 64 bits as // byte arrays instead of longs. md.putLong("binarylong", 1) Option(LongType) } else if (sqlType == Types.BIT && typeName.equals("TINYINT")) { Option(BooleanType) } else None } override def quoteIdentifier(colName: String): String = { s"`$colName`" } override def getTableExistsQuery(table: String): String = { s"SELECT 1 FROM $table LIMIT 1" } override def isCascadingTruncateTable(): Option[Boolean] = Some(false) }
Example 11
Source File: LikeSimplificationSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.types.{BooleanType, StringType} class LikeSimplificationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Like Simplification", Once, LikeSimplification) :: Nil } val testRelation = LocalRelation('a.string) test("simplify Like into StartsWith") { val originalQuery = testRelation .where(('a like "abc%") || ('a like "abc\\%")) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(StartsWith('a, "abc") || ('a like "abc\\%")) .analyze comparePlans(optimized, correctAnswer) } test("simplify Like into EndsWith") { val originalQuery = testRelation .where('a like "%xyz") val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(EndsWith('a, "xyz")) .analyze comparePlans(optimized, correctAnswer) } test("simplify Like into startsWith and EndsWith") { val originalQuery = testRelation .where(('a like "abc\\%def") || ('a like "abc%def")) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(('a like "abc\\%def") || (Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def")))) .analyze comparePlans(optimized, correctAnswer) } test("simplify Like into Contains") { val originalQuery = testRelation .where(('a like "%mn%") || ('a like "%mn\\%")) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(Contains('a, "mn") || ('a like "%mn\\%")) .analyze comparePlans(optimized, correctAnswer) } test("simplify Like into EqualTo") { val originalQuery = testRelation .where(('a like "") || ('a like "abc")) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(('a === "") || ('a === "abc")) .analyze comparePlans(optimized, correctAnswer) } test("null pattern") { val originalQuery = testRelation.where('a like Literal(null, StringType)).analyze val optimized = Optimize.execute(originalQuery) comparePlans(optimized, testRelation.where(Literal(null, BooleanType)).analyze) } }
Example 12
Source File: ListTablesSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.scalatest.BeforeAndAfter import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types.{BooleanType, StringType, StructField, StructType} //列表测试套件 class ListTablesSuite extends QueryTest with BeforeAndAfter with SharedSQLContext { import testImplicits._ private lazy val df = (1 to 10).map(i => (i, s"str$i")).toDF("key", "value") before { df.registerTempTable("ListTablesSuiteTable") } after { ctx.catalog.unregisterTable(Seq("ListTablesSuiteTable")) } test("get all tables") {//获得所有的表 ctx.tables("DB").show() checkAnswer( //使用数据库,查找表名 ctx.tables("DB").filter("tableName = 'ListTablesSuiteTable'"), Row("ListTablesSuiteTable", true)) checkAnswer( //使用命令查询表名 sql("show TABLES in DB").filter("tableName = 'ListTablesSuiteTable'"), Row("ListTablesSuiteTable", true)) ctx.catalog.unregisterTable(Seq("ListTablesSuiteTable")) assert(ctx.tables().filter("tableName = 'ListTablesSuiteTable'").count() === 0) } test("query the returned DataFrame of tables") {//查询返回的数据集的表名 //StructType代表一张表,StructField代表一个字段 val expectedSchema = StructType( StructField("tableName", StringType, false) :: StructField("isTemporary", BooleanType, false) :: Nil) Seq(ctx.tables(), sql("SHOW TABLes")).foreach { case tableDF => assert(expectedSchema === tableDF.schema) tableDF.registerTempTable("tables") checkAnswer( sql( "SELECT isTemporary, tableName from tables WHERE tableName = 'ListTablesSuiteTable'"), Row(true, "ListTablesSuiteTable") ) checkAnswer( ctx.tables().filter("tableName = 'tables'").select("tableName", "isTemporary"), Row("tables", true)) ctx.dropTempTable("tables") } } }
Example 13
Source File: TypeQualifiersSuite.scala From kyuubi with Apache License 2.0 | 5 votes |
package yaooqinn.kyuubi.schema import org.apache.hive.service.cli.thrift.TCLIServiceConstants import org.apache.spark.SparkFunSuite import org.apache.spark.sql.types.{BooleanType, DecimalType} import yaooqinn.kyuubi.utils.ReflectUtils class TypeQualifiersSuite extends SparkFunSuite { test("type qualifier basic tests") { val typeQualifiers1 = TypeQualifiers.fromTypeInfo(new DecimalType(10, 9)) val typeQualifiers2 = TypeQualifiers.fromTypeInfo(BooleanType) assert(ReflectUtils.getFieldValue(typeQualifiers1, "precision") === Some(10)) assert(ReflectUtils.getFieldValue(typeQualifiers1, "scale") === Some(9)) assert(ReflectUtils.getFieldValue(typeQualifiers2, "precision") === None) assert(ReflectUtils.getFieldValue(typeQualifiers2, "scale") === None) assert(typeQualifiers1.toTTypeQualifiers .getQualifiers.get(TCLIServiceConstants.PRECISION).getI32Value === 10) assert(typeQualifiers1.toTTypeQualifiers .getQualifiers.get(TCLIServiceConstants.SCALE).getI32Value === 9) assert(!typeQualifiers1.toTTypeQualifiers .getQualifiers.containsKey(TCLIServiceConstants.CHARACTER_MAXIMUM_LENGTH)) assert(typeQualifiers2.toTTypeQualifiers.getQualifiers.isEmpty) assert(!typeQualifiers2.toTTypeQualifiers .getQualifiers.containsKey(TCLIServiceConstants.PRECISION)) assert(!typeQualifiers2.toTTypeQualifiers .getQualifiers.containsKey(TCLIServiceConstants.SCALE)) ReflectUtils.invokeMethod( typeQualifiers2, "yaooqinn$kyuubi$schema$TypeQualifiers$$setPrecision", Seq(classOf[Int]), Seq(Integer.valueOf(8))) ReflectUtils.invokeMethod( typeQualifiers2, "yaooqinn$kyuubi$schema$TypeQualifiers$$setScale", Seq(classOf[Int]), Seq(Integer.valueOf(8))) assert(typeQualifiers2.toTTypeQualifiers .getQualifiers.get(TCLIServiceConstants.PRECISION).getI32Value === 8) assert(typeQualifiers2.toTTypeQualifiers .getQualifiers.get(TCLIServiceConstants.SCALE).getI32Value === 8) } }
Example 14
Source File: DiscreteDistributionBuilder.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperables.dataframe.report.distribution.discrete import org.apache.spark.sql.Row import org.apache.spark.sql.types.{BooleanType, StringType, StructField} import ai.deepsense.deeplang.doperables.dataframe.report.DataFrameReportGenerator import ai.deepsense.deeplang.doperables.dataframe.report.distribution.{DistributionBuilder, NoDistributionReasons} import ai.deepsense.deeplang.doperables.report.ReportUtils import ai.deepsense.deeplang.utils.aggregators.Aggregator import ai.deepsense.deeplang.utils.aggregators.AggregatorBatch.BatchedResult import ai.deepsense.reportlib.model.{DiscreteDistribution, Distribution, NoDistribution} case class DiscreteDistributionBuilder( categories: Aggregator[Option[scala.collection.mutable.Map[String, Long]], Row], missing: Aggregator[Long, Row], field: StructField) extends DistributionBuilder { def allAggregators: Seq[Aggregator[_, Row]] = Seq(categories, missing) override def build(results: BatchedResult): Distribution = { val categoriesMap = results.forAggregator(categories) val nullsCount = results.forAggregator(missing) categoriesMap match { case Some(occurrencesMap) => { val labels = field.dataType match { case StringType => occurrencesMap.keys.toSeq.sorted // We always want two labels, even when all elements are true or false case BooleanType => Seq(false.toString, true.toString) } val counts = labels.map(occurrencesMap.getOrElse(_, 0L)) DiscreteDistribution( field.name, s"Discrete distribution for ${field.name} column", nullsCount, labels.map(ReportUtils.shortenLongStrings(_, DataFrameReportGenerator.StringPreviewMaxLength)), counts) } case None => NoDistribution( field.name, NoDistributionReasons.TooManyDistinctCategoricalValues ) } } }
Example 15
Source File: MySQLDialect.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.jdbc import java.sql.Types import org.apache.spark.sql.types.{BooleanType, DataType, LongType, MetadataBuilder} private case object MySQLDialect extends JdbcDialect { override def canHandle(url : String): Boolean = url.startsWith("jdbc:mysql") override def getCatalystType( sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { if (sqlType == Types.VARBINARY && typeName.equals("BIT") && size != 1) { // This could instead be a BinaryType if we'd rather return bit-vectors of up to 64 bits as // byte arrays instead of longs. md.putLong("binarylong", 1) Option(LongType) } else if (sqlType == Types.BIT && typeName.equals("TINYINT")) { Option(BooleanType) } else None } override def quoteIdentifier(colName: String): String = { s"`$colName`" } override def getTableExistsQuery(table: String): String = { s"SELECT 1 FROM $table LIMIT 1" } override def isCascadingTruncateTable(): Option[Boolean] = Some(false) }
Example 16
Source File: MySQLDialect.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.jdbc import java.sql.Types import org.apache.spark.sql.types.{BooleanType, DataType, LongType, MetadataBuilder} private case object MySQLDialect extends JdbcDialect { override def canHandle(url : String): Boolean = url.startsWith("jdbc:mysql") override def getCatalystType( sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { if (sqlType == Types.VARBINARY && typeName.equals("BIT") && size != 1) { // This could instead be a BinaryType if we'd rather return bit-vectors of up to 64 bits as // byte arrays instead of longs. md.putLong("binarylong", 1) Option(LongType) } else if (sqlType == Types.BIT && typeName.equals("TINYINT")) { Option(BooleanType) } else None } override def quoteIdentifier(colName: String): String = { s"`$colName`" } override def getTableExistsQuery(table: String): String = { s"SELECT 1 FROM $table LIMIT 1" } override def isCascadingTruncateTable(): Option[Boolean] = Some(false) }
Example 17
Source File: CarbonShowTablesCommand.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.table import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.execution.command.MetadataCommand import org.apache.spark.sql.types.{BooleanType, StringType} private[sql] case class CarbonShowTablesCommand ( databaseName: Option[String], tableIdentifierPattern: Option[String]) extends MetadataCommand{ // The result of SHOW TABLES has three columns: database, tableName and isTemporary. override val output: Seq[Attribute] = { AttributeReference("database", StringType, nullable = false)() :: AttributeReference("tableName", StringType, nullable = false)() :: AttributeReference("isTemporary", BooleanType, nullable = false)() :: Nil } override def processMetadata(sparkSession: SparkSession): Seq[Row] = { // Since we need to return a Seq of rows, we will call getTables directly // instead of calling tables in sparkSession. val catalog = sparkSession.sessionState.catalog val db = databaseName.getOrElse(catalog.getCurrentDatabase) val tables = tableIdentifierPattern.map(catalog.listTables(db, _)).getOrElse(catalog.listTables(db)) val externalCatalog = sparkSession.sharedState.externalCatalog // this method checks whether the table is mainTable or MV based on property "isVisible" def isMainTable(tableIdent: TableIdentifier) = { var isMainTable = true try { isMainTable = externalCatalog.getTable(db, tableIdent.table).storage.properties .getOrElse("isVisible", true).toString.toBoolean } catch { case ex: Throwable => // ignore the exception for show tables } isMainTable } // tables will be filtered for all the MVs to show only main tables tables.collect { case tableIdent if isMainTable(tableIdent) => val isTemp = catalog.isTemporaryTable(tableIdent) Row(tableIdent.database.getOrElse("default"), tableIdent.table, isTemp) } } override protected def opName: String = "SHOW TABLES" }
Example 18
Source File: CarbonShowMVCommand.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command.view import java.util import scala.collection.JavaConverters._ import org.apache.spark.sql.{CarbonEnv, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.execution.command.{Checker, DataCommand} import org.apache.spark.sql.types.{BooleanType, StringType} import org.apache.carbondata.core.view.{MVProperty, MVSchema} import org.apache.carbondata.view.MVManagerInSpark case class CarbonShowMVCommand( databaseNameOption: Option[String], relatedTableIdentifier: Option[TableIdentifier]) extends DataCommand { override def output: Seq[Attribute] = { Seq( AttributeReference("Database", StringType, nullable = false)(), AttributeReference("Name", StringType, nullable = false)(), AttributeReference("Status", StringType, nullable = false)(), AttributeReference("Refresh Mode", StringType, nullable = false)(), AttributeReference("Refresh Trigger Mode", StringType, nullable = false)(), AttributeReference("Properties", StringType, nullable = false)()) } override def processData(session: SparkSession): Seq[Row] = { // Get mv schemas. val schemaList = new util.ArrayList[MVSchema]() val viewManager = MVManagerInSpark.get(session) relatedTableIdentifier match { case Some(table) => val relatedTable = CarbonEnv.getCarbonTable(table)(session) setAuditTable(relatedTable) Checker.validateTableExists(table.database, table.table, session) if (databaseNameOption.isDefined) { schemaList.addAll(viewManager.getSchemasOnTable( databaseNameOption.get, relatedTable)) } else { schemaList.addAll(viewManager.getSchemasOnTable(relatedTable)) } case _ => if (databaseNameOption.isDefined) { schemaList.addAll(viewManager.getSchemas(databaseNameOption.get)) } else { schemaList.addAll(viewManager.getSchemas()) } } // Convert mv schema to row. schemaList.asScala.map { schema => Row( schema.getIdentifier.getDatabaseName, schema.getIdentifier.getTableName, schema.getStatus.name(), schema.getProperties.get(MVProperty.REFRESH_MODE), schema.getProperties.get(MVProperty.REFRESH_TRIGGER_MODE), schema.getPropertiesAsString ) } } override protected def opName: String = "SHOW MATERIALIZED VIEW" }
Example 19
Source File: MySQLDialect.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.jdbc import java.sql.Types import org.apache.spark.sql.types.{BooleanType, DataType, LongType, MetadataBuilder} private case object MySQLDialect extends JdbcDialect { override def canHandle(url : String): Boolean = url.startsWith("jdbc:mysql") override def getCatalystType( sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { if (sqlType == Types.VARBINARY && typeName.equals("BIT") && size != 1) { // This could instead be a BinaryType if we'd rather return bit-vectors of up to 64 bits as // byte arrays instead of longs. md.putLong("binarylong", 1) Option(LongType) } else if (sqlType == Types.BIT && typeName.equals("TINYINT")) { Option(BooleanType) } else None } override def quoteIdentifier(colName: String): String = { s"`$colName`" } override def getTableExistsQuery(table: String): String = { s"SELECT 1 FROM $table LIMIT 1" } override def isCascadingTruncateTable(): Option[Boolean] = Some(false) }
Example 20
Source File: BooleanTypeDataFiberReaderWriterSuite.scala From OAP with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.oap.io import org.apache.spark.sql.execution.vectorized.{Dictionary, OnHeapColumnVector} import org.apache.spark.sql.types.BooleanType class BooleanTypeDataFiberReaderWriterSuite extends DataFiberReaderWriterSuite { protected def dictionary: Dictionary = throw new UnsupportedOperationException("Boolean Type not support dic encode") test("no dic no nulls") { val column = new OnHeapColumnVector(total, BooleanType) (0 until total).foreach(i => column.putBoolean(i, i % 2 == 0)) fiberCache = ParquetDataFiberWriter.dumpToCache(column, total) val address = fiberCache.getBaseOffset // read use batch api val ret1 = new OnHeapColumnVector(total, BooleanType) val reader = ParquetDataFiberReader(address, BooleanType, total) reader.readBatch(start, num, ret1) (0 until num).foreach(i => assert(ret1.getBoolean(i) == (((i + start) % 2) == 0))) // read use random access api val ret2 = new OnHeapColumnVector(total, BooleanType) reader.readBatch(rowIdList, ret2) ints.indices.foreach(i => assert(ret2.getBoolean(i) == ((ints(i) % 2) == 0))) } test("no dic all nulls") { val column = new OnHeapColumnVector(total, BooleanType) column.putNulls(0, total) fiberCache = ParquetDataFiberWriter.dumpToCache(column, total) val address = fiberCache.getBaseOffset // read use batch api val ret1 = new OnHeapColumnVector(total, BooleanType) val reader = ParquetDataFiberReader(address, BooleanType, total) reader.readBatch(start, num, ret1) (0 until num).foreach(i => assert(ret1.isNullAt(i))) // read use random access api val ret2 = new OnHeapColumnVector(total, BooleanType) reader.readBatch(rowIdList, ret2) ints.indices.foreach(i => assert(ret2.isNullAt(i))) } test("no dic") { val column = new OnHeapColumnVector(total, BooleanType) (0 until total).foreach(i => { if (i % 3 == 0) column.putNull(i) else column.putBoolean(i, i % 2 == 0) }) fiberCache = ParquetDataFiberWriter.dumpToCache(column, total) val address = fiberCache.getBaseOffset // read use batch api val ret1 = new OnHeapColumnVector(total, BooleanType) val reader = ParquetDataFiberReader(address, BooleanType, total) reader.readBatch(start, num, ret1) (0 until num).foreach(i => { if ((i + start) % 3 == 0) assert(ret1.isNullAt(i)) else assert(ret1.getBoolean(i) == (((i + start) % 2) == 0)) }) // read use random access api val ret2 = new OnHeapColumnVector(total, BooleanType) reader.readBatch(rowIdList, ret2) ints.indices.foreach(i => { if ((i + start) % 3 == 0) assert(ret2.isNullAt(i)) else assert(ret2.getBoolean(i) == ((ints(i) % 2) == 0)) }) } }
Example 21
Source File: InteractionOp.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.bundle.ops.feature import ml.bundle.DataShape import ml.combust.bundle.BundleContext import ml.combust.bundle.dsl._ import ml.combust.bundle.op.{OpModel, OpNode} import ml.combust.mleap.core.annotation.SparkCode import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NominalAttribute} import org.apache.spark.ml.bundle._ import org.apache.spark.ml.feature.Interaction import org.apache.spark.ml.linalg.VectorUDT import org.apache.spark.sql.DataFrame import org.apache.spark.sql.mleap.TypeConverters._ import ml.combust.mleap.runtime.types.BundleTypeConverters._ import org.apache.spark.sql.types.{BooleanType, NumericType} class InteractionOp extends SimpleSparkOp[Interaction] { override val Model: OpModel[SparkBundleContext, Interaction] = new OpModel[SparkBundleContext, Interaction] { override val klazz: Class[Interaction] = classOf[Interaction] override def opName: String = Bundle.BuiltinOps.feature.interaction override def store(model: Model, obj: Interaction) (implicit context: BundleContext[SparkBundleContext]): Model = { assert(context.context.dataset.isDefined, BundleHelper.sampleDataframeMessage(klazz)) val dataset = context.context.dataset.get val spec = buildSpec(obj.getInputCols, dataset) val inputShapes = obj.getInputCols.map(v => sparkToMleapDataShape(dataset.schema(v), dataset): DataShape) val m = model.withValue("num_inputs", Value.int(spec.length)). withValue("input_shapes", Value.dataShapeList(inputShapes)) spec.zipWithIndex.foldLeft(m) { case (m2, (numFeatures, index)) => m2.withValue(s"num_features$index", Value.intList(numFeatures)) } } override def load(model: Model) (implicit context: BundleContext[SparkBundleContext]): Interaction = { // No need to do anything here, everything is handled through Spark meta data new Interaction() } @SparkCode(uri = "https://github.com/apache/spark/blob/branch-2.1/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala") private def buildSpec(inputCols: Array[String], dataset: DataFrame): Array[Array[Int]] = { def getNumFeatures(attr: Attribute): Int = { attr match { case nominal: NominalAttribute => math.max(1, nominal.getNumValues.getOrElse( throw new IllegalArgumentException("Nominal features must have attr numValues defined."))) case _ => 1 // numeric feature } } inputCols.map(dataset.schema.apply).map { f => f.dataType match { case _: NumericType | BooleanType => Array(getNumFeatures(Attribute.fromStructField(f))) case _: VectorUDT => val attrs = AttributeGroup.fromStructField(f).attributes.getOrElse( throw new IllegalArgumentException("Vector attributes must be defined for interaction.")) attrs.map(getNumFeatures) } } } } override def sparkLoad(uid: String, shape: NodeShape, model: Interaction): Interaction = { new Interaction(uid = uid) } override def sparkInputs(obj: Interaction): Seq[ParamSpec] = { Seq("input" -> obj.inputCols) } override def sparkOutputs(obj: Interaction): Seq[SimpleParamSpec] = { Seq("output" -> obj.outputCol) } }
Example 22
Source File: HttpStreamServerClientTest.scala From spark-http-stream with BSD 2-Clause "Simplified" License | 5 votes |
import org.apache.spark.SparkConf import org.apache.spark.serializer.KryoSerializer import org.apache.spark.sql.Row import org.apache.spark.sql.execution.streaming.http.HttpStreamClient import org.junit.Assert import org.junit.Test import org.apache.spark.sql.types.LongType import org.apache.spark.sql.types.IntegerType import org.apache.spark.sql.types.DoubleType import org.apache.spark.sql.types.BooleanType import org.apache.spark.sql.types.FloatType import org.apache.spark.sql.types.StringType import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema import org.apache.spark.sql.types.StructType import org.apache.spark.sql.types.StructField import org.apache.spark.sql.SparkSession import org.apache.spark.sql.types.ByteType import org.apache.spark.sql.execution.streaming.http.HttpStreamServer import org.apache.spark.sql.execution.streaming.http.StreamPrinter import org.apache.spark.sql.execution.streaming.http.HttpStreamServerSideException class HttpStreamServerClientTest { val ROWS1 = Array(Row("hello1", 1, true, 0.1f, 0.1d, 1L, '1'.toByte), Row("hello2", 2, false, 0.2f, 0.2d, 2L, '2'.toByte), Row("hello3", 3, true, 0.3f, 0.3d, 3L, '3'.toByte)); val ROWS2 = Array(Row("hello"), Row("world"), Row("bye"), Row("world")); @Test def testHttpStreamIO() { //starts a http server val kryoSerializer = new KryoSerializer(new SparkConf()); val server = HttpStreamServer.start("/xxxx", 8080); val spark = SparkSession.builder.appName("testHttpTextSink").master("local[4]") .getOrCreate(); spark.conf.set("spark.sql.streaming.checkpointLocation", "/tmp/"); val sqlContext = spark.sqlContext; import spark.implicits._ //add a local message buffer to server, with 2 topics registered server.withBuffer() .addListener(new StreamPrinter()) .createTopic[(String, Int, Boolean, Float, Double, Long, Byte)]("topic-1") .createTopic[String]("topic-2"); val client = HttpStreamClient.connect("http://localhost:8080/xxxx"); //tests schema of topics val schema1 = client.fetchSchema("topic-1"); Assert.assertArrayEquals(Array[Object](StringType, IntegerType, BooleanType, FloatType, DoubleType, LongType, ByteType), schema1.fields.map(_.dataType).asInstanceOf[Array[Object]]); val schema2 = client.fetchSchema("topic-2"); Assert.assertArrayEquals(Array[Object](StringType), schema2.fields.map(_.dataType).asInstanceOf[Array[Object]]); //prepare to consume messages val sid1 = client.subscribe("topic-1")._1; val sid2 = client.subscribe("topic-2")._1; //produces some data client.sendRows("topic-1", 1, ROWS1); val sid4 = client.subscribe("topic-1")._1; val sid5 = client.subscribe("topic-2")._1; client.sendRows("topic-2", 1, ROWS2); //consumes data val fetched = client.fetchStream(sid1).map(_.originalRow); Assert.assertArrayEquals(ROWS1.asInstanceOf[Array[Object]], fetched.asInstanceOf[Array[Object]]); //it is empty now Assert.assertArrayEquals(Array[Object](), client.fetchStream(sid1).map(_.originalRow).asInstanceOf[Array[Object]]); Assert.assertArrayEquals(ROWS2.asInstanceOf[Array[Object]], client.fetchStream(sid2).map(_.originalRow).asInstanceOf[Array[Object]]); Assert.assertArrayEquals(Array[Object](), client.fetchStream(sid4).map(_.originalRow).asInstanceOf[Array[Object]]); Assert.assertArrayEquals(ROWS2.asInstanceOf[Array[Object]], client.fetchStream(sid5).map(_.originalRow).asInstanceOf[Array[Object]]); Assert.assertArrayEquals(Array[Object](), client.fetchStream(sid5).map(_.originalRow).asInstanceOf[Array[Object]]); client.unsubscribe(sid4); try { client.fetchStream(sid4); //exception should be thrown, because subscriber id is invalidated Assert.assertTrue(false); } catch { case e: Throwable ⇒ e.printStackTrace(); Assert.assertEquals(classOf[HttpStreamServerSideException], e.getClass); } server.stop(); } }
Example 23
Source File: subquery.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.{expressions, InternalRow} import org.apache.spark.sql.catalyst.expressions.{Expression, ExprId, InSet, Literal, PlanExpression} import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{BooleanType, DataType, StructType} case class ReuseSubquery(conf: SQLConf) extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.exchangeReuseEnabled) { return plan } // Build a hash map using schema of subqueries to avoid O(N*N) sameResult calls. val subqueries = mutable.HashMap[StructType, ArrayBuffer[SubqueryExec]]() plan transformAllExpressions { case sub: ExecSubqueryExpression => val sameSchema = subqueries.getOrElseUpdate(sub.plan.schema, ArrayBuffer[SubqueryExec]()) val sameResult = sameSchema.find(_.sameResult(sub.plan)) if (sameResult.isDefined) { sub.withNewPlan(sameResult.get) } else { sameSchema += sub.plan sub } } } }
Example 24
Source File: MySQLDialect.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.jdbc import java.sql.Types import org.apache.spark.sql.types.{BooleanType, DataType, LongType, MetadataBuilder} private case object MySQLDialect extends JdbcDialect { override def canHandle(url : String): Boolean = url.startsWith("jdbc:mysql") override def getCatalystType( sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { if (sqlType == Types.VARBINARY && typeName.equals("BIT") && size != 1) { // This could instead be a BinaryType if we'd rather return bit-vectors of up to 64 bits as // byte arrays instead of longs. md.putLong("binarylong", 1) Option(LongType) } else if (sqlType == Types.BIT && typeName.equals("TINYINT")) { Option(BooleanType) } else None } override def quoteIdentifier(colName: String): String = { s"`$colName`" } override def getTableExistsQuery(table: String): String = { s"SELECT 1 FROM $table LIMIT 1" } override def isCascadingTruncateTable(): Option[Boolean] = Some(false) }
Example 25
Source File: LikeSimplificationSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.types.{BooleanType, StringType} class LikeSimplificationSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Like Simplification", Once, LikeSimplification) :: Nil } val testRelation = LocalRelation('a.string) test("simplify Like into StartsWith") { val originalQuery = testRelation .where(('a like "abc%") || ('a like "abc\\%")) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(StartsWith('a, "abc") || ('a like "abc\\%")) .analyze comparePlans(optimized, correctAnswer) } test("simplify Like into EndsWith") { val originalQuery = testRelation .where('a like "%xyz") val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(EndsWith('a, "xyz")) .analyze comparePlans(optimized, correctAnswer) } test("simplify Like into startsWith and EndsWith") { val originalQuery = testRelation .where(('a like "abc\\%def") || ('a like "abc%def")) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(('a like "abc\\%def") || (Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def")))) .analyze comparePlans(optimized, correctAnswer) } test("simplify Like into Contains") { val originalQuery = testRelation .where(('a like "%mn%") || ('a like "%mn\\%")) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(Contains('a, "mn") || ('a like "%mn\\%")) .analyze comparePlans(optimized, correctAnswer) } test("simplify Like into EqualTo") { val originalQuery = testRelation .where(('a like "") || ('a like "abc")) val optimized = Optimize.execute(originalQuery.analyze) val correctAnswer = testRelation .where(('a === "") || ('a === "abc")) .analyze comparePlans(optimized, correctAnswer) } test("null pattern") { val originalQuery = testRelation.where('a like Literal(null, StringType)).analyze val optimized = Optimize.execute(originalQuery) comparePlans(optimized, testRelation.where(Literal(null, BooleanType)).analyze) } }
Example 26
Source File: ExprValueSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions.codegen import org.apache.spark.SparkFunSuite import org.apache.spark.sql.types.BooleanType class ExprValueSuite extends SparkFunSuite { test("TrueLiteral and FalseLiteral should be LiteralValue") { val trueLit = TrueLiteral val falseLit = FalseLiteral assert(trueLit.value == "true") assert(falseLit.value == "false") assert(trueLit.isPrimitive) assert(falseLit.isPrimitive) assert(trueLit === JavaCode.literal("true", BooleanType)) assert(falseLit === JavaCode.literal("false", BooleanType)) } }
Example 27
Source File: subquery.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.{expressions, InternalRow} import org.apache.spark.sql.catalyst.expressions.{Expression, ExprId, InSet, Literal, PlanExpression} import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{BooleanType, DataType, StructType} case class ReuseSubquery(conf: SQLConf) extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.exchangeReuseEnabled) { return plan } // Build a hash map using schema of exchanges to avoid O(N*N) sameResult calls. val subqueries = mutable.HashMap[StructType, ArrayBuffer[SubqueryExec]]() plan transformAllExpressions { case sub: ExecSubqueryExpression => val sameSchema = subqueries.getOrElseUpdate(sub.plan.schema, ArrayBuffer[SubqueryExec]()) val sameResult = sameSchema.find(_.sameResult(sub.plan)) if (sameResult.isDefined) { sub.withNewPlan(sameResult.get) } else { sameSchema += sub.plan sub } } } }