org.apache.spark.sql.hive.test.TestHive Scala Examples
The following examples show how to use org.apache.spark.sql.hive.test.TestHive.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: HiveTypeCoercionSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.catalyst.expressions.{Cast, EqualTo} import org.apache.spark.sql.execution.Project import org.apache.spark.sql.hive.test.TestHive class HiveTypeCoercionSuite extends HiveComparisonTest { val baseTypes = Seq("1", "1.0", "1L", "1S", "1Y", "'1'") baseTypes.foreach { i => baseTypes.foreach { j => createQueryTest(s"$i + $j", s"SELECT $i + $j FROM src LIMIT 1") } } val nullVal = "null" baseTypes.init.foreach { i => createQueryTest(s"case when then $i else $nullVal end ", s"SELECT case when true then $i else $nullVal end FROM src limit 1") createQueryTest(s"case when then $nullVal else $i end ", s"SELECT case when true then $nullVal else $i end FROM src limit 1") } //应该删除布尔值的boolean cast test("[SPARK-2210] boolean cast on boolean value should be removed") { val q = "select cast(cast(key=0 as boolean) as boolean) from src" val project = TestHive.sql(q).queryExecution.executedPlan.collect { case e: Project => e }.head // No cast expression introduced 没有引入表达式 project.transformAllExpressions { case c: Cast => fail(s"unexpected cast $c") c } // Only one equality check 只有一个平等检查 var numEquals = 0 project.transformAllExpressions { case e: EqualTo => numEquals += 1 e } assert(numEquals === 1) } }
Example 2
Source File: HiveTableScanSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.Row import org.apache.spark.sql.functions._ import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.hive.test.TestHive.implicits._ import org.apache.spark.util.Utils class HiveTableScanSuite extends HiveComparisonTest { createQueryTest("partition_based_table_scan_with_different_serde", """ |CREATE TABLE part_scan_test (key STRING, value STRING) PARTITIONED BY (ds STRING) |ROW FORMAT SERDE |'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' |STORED AS RCFILE; | |FROM src |INSERT INTO TABLE part_scan_test PARTITION (ds='2010-01-01') |SELECT 100,100 LIMIT 1; | |ALTER TABLE part_scan_test SET SERDE |'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'; | |FROM src INSERT INTO TABLE part_scan_test PARTITION (ds='2010-01-02') |SELECT 200,200 LIMIT 1; | |SELECT * from part_scan_test; """.stripMargin) // In unit test, kv1.txt is a small file and will be loaded as table src // Since the small file will be considered as a single split, we assume // Hive / SparkSQL HQL has the same output even for SORT BY createQueryTest("file_split_for_small_table", """ |SELECT key, value FROM src SORT BY key, value """.stripMargin) test("Spark-4041: lowercase issue") { TestHive.sql("CREATE TABLE tb (KEY INT, VALUE STRING) STORED AS ORC") TestHive.sql("insert into table tb select key, value from src") TestHive.sql("select KEY from tb where VALUE='just_for_test' limit 5").collect() TestHive.sql("drop table tb") } test("Spark-4077: timestamp query for null value") { TestHive.sql("DROP TABLE IF EXISTS timestamp_query_null") TestHive.sql( """ CREATE EXTERNAL TABLE timestamp_query_null (time TIMESTAMP,id INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' """.stripMargin) val location = Utils.getSparkClassLoader.getResource("data/files/issue-4077-data.txt").getFile() TestHive.sql(s"LOAD DATA LOCAL INPATH '$location' INTO TABLE timestamp_query_null") assert(TestHive.sql("SELECT time from timestamp_query_null limit 2").collect() === Array(Row(java.sql.Timestamp.valueOf("2014-12-11 00:00:00")), Row(null))) TestHive.sql("DROP TABLE timestamp_query_null") } test("Spark-4959 Attributes are case sensitive when using a select query from a projection") { sql("create table spark_4959 (col1 string)") sql("""insert into table spark_4959 select "hi" from src limit 1""") table("spark_4959").select( 'col1.as("CaseSensitiveColName"), 'col1.as("CaseSensitiveColName2")).registerTempTable("spark_4959_2") assert(sql("select CaseSensitiveColName from spark_4959_2").head() === Row("hi")) assert(sql("select casesensitivecolname from spark_4959_2").head() === Row("hi")) } }
Example 3
Source File: HiveSerDeSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.hive.test.TestHive class HiveSerDeSuite extends HiveComparisonTest with BeforeAndAfterAll { override def beforeAll(): Unit = { import TestHive._ import org.apache.hadoop.hive.serde2.RegexSerDe super.beforeAll() TestHive.cacheTables = false sql(s"""CREATE TABLE IF NOT EXISTS sales (key STRING, value INT) |ROW FORMAT SERDE '${classOf[RegexSerDe].getCanonicalName}' |WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\t([^ ]*)") """.stripMargin) sql(s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt")}' INTO TABLE sales") } // table sales is not a cache table, and will be clear after reset createQueryTest("Read with RegexSerDe", "SELECT * FROM sales", false) createQueryTest( "Read and write with LazySimpleSerDe (tab separated)", "SELECT * from serdeins") createQueryTest("Read with AvroSerDe", "SELECT * FROM episodes") createQueryTest("Read Partitioned with AvroSerDe", "SELECT * FROM episodes_part") }
Example 4
Source File: HiveTypeCoercionSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.catalyst.expressions.{Cast, EqualTo} import org.apache.spark.sql.execution.Project import org.apache.spark.sql.hive.test.TestHive class HiveTypeCoercionSuite extends HiveComparisonTest { val baseTypes = Seq("1", "1.0", "1L", "1S", "1Y", "'1'") baseTypes.foreach { i => baseTypes.foreach { j => createQueryTest(s"$i + $j", s"SELECT $i + $j FROM src LIMIT 1") } } val nullVal = "null" baseTypes.init.foreach { i => createQueryTest(s"case when then $i else $nullVal end ", s"SELECT case when true then $i else $nullVal end FROM src limit 1") createQueryTest(s"case when then $nullVal else $i end ", s"SELECT case when true then $nullVal else $i end FROM src limit 1") } test("[SPARK-2210] boolean cast on boolean value should be removed") { val q = "select cast(cast(key=0 as boolean) as boolean) from src" val project = TestHive.sql(q).queryExecution.executedPlan.collect { case e: Project => e }.head // No cast expression introduced project.transformAllExpressions { case c: Cast => fail(s"unexpected cast $c") c } // Only one equality check var numEquals = 0 project.transformAllExpressions { case e: EqualTo => numEquals += 1 e } assert(numEquals === 1) } test("COALESCE with different types") { intercept[RuntimeException] { TestHive.sql("""SELECT COALESCE(1, true, "abc") FROM src limit 1""").collect() } } }
Example 5
Source File: ListTablesSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.QueryTest import org.apache.spark.sql.Row class ListTablesSuite extends QueryTest with BeforeAndAfterAll { import org.apache.spark.sql.hive.test.TestHive.implicits._ val df = sparkContext.parallelize((1 to 10).map(i => (i, s"str$i"))).toDF("key", "value") override def beforeAll(): Unit = { // The catalog in HiveContext is a case insensitive one. catalog.registerTable(Seq("ListTablesSuiteTable"), df.logicalPlan) catalog.registerTable(Seq("ListTablesSuiteDB", "InDBListTablesSuiteTable"), df.logicalPlan) sql("CREATE TABLE HiveListTablesSuiteTable (key int, value string)") sql("CREATE DATABASE IF NOT EXISTS ListTablesSuiteDB") sql("CREATE TABLE ListTablesSuiteDB.HiveInDBListTablesSuiteTable (key int, value string)") } override def afterAll(): Unit = { catalog.unregisterTable(Seq("ListTablesSuiteTable")) catalog.unregisterTable(Seq("ListTablesSuiteDB", "InDBListTablesSuiteTable")) sql("DROP TABLE IF EXISTS HiveListTablesSuiteTable") sql("DROP TABLE IF EXISTS ListTablesSuiteDB.HiveInDBListTablesSuiteTable") sql("DROP DATABASE IF EXISTS ListTablesSuiteDB") } test("get all tables of current database") { Seq(tables(), sql("SHOW TABLes")).foreach { case allTables => // We are using default DB. checkAnswer( allTables.filter("tableName = 'listtablessuitetable'"), Row("listtablessuitetable", true)) assert(allTables.filter("tableName = 'indblisttablessuitetable'").count() === 0) checkAnswer( allTables.filter("tableName = 'hivelisttablessuitetable'"), Row("hivelisttablessuitetable", false)) assert(allTables.filter("tableName = 'hiveindblisttablessuitetable'").count() === 0) } } test("getting all tables with a database name") { Seq(tables("listtablessuiteDb"), sql("SHOW TABLes in listTablesSuitedb")).foreach { case allTables => checkAnswer( allTables.filter("tableName = 'listtablessuitetable'"), Row("listtablessuitetable", true)) checkAnswer( allTables.filter("tableName = 'indblisttablessuitetable'"), Row("indblisttablessuitetable", true)) assert(allTables.filter("tableName = 'hivelisttablessuitetable'").count() === 0) checkAnswer( allTables.filter("tableName = 'hiveindblisttablessuitetable'"), Row("hiveindblisttablessuitetable", false)) } } }
Example 6
Source File: HiveDataFrameAnalyticsSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.sql.{DataFrame, QueryTest} import org.apache.spark.sql.functions._ import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.hive.test.TestHive.implicits._ import org.scalatest.BeforeAndAfterAll // TODO ideally we should put the test suite into the package `sql`, as //TODO理想情况下,我们应该将测试套件放入“sql”包中 // `hive` package is optional in compiling, however, `SQLContext.sql` doesn't // support the `cube` or `rollup` yet. 不支持“cube”或“rollup” //Hive DataFrame分析套件 class HiveDataFrameAnalyticsSuite extends QueryTest with BeforeAndAfterAll { private var testData: DataFrame = _ override def beforeAll() { testData = Seq((1, 2), (2, 4)).toDF("a", "b") TestHive.registerDataFrameAsTable(testData, "mytable") } override def afterAll(): Unit = { TestHive.dropTempTable("mytable") } test("cube") { checkAnswer( testData.cube($"a" + $"b", $"b").agg(sum($"a" - $"b")), sql("select a + b, b, sum(a - b) from mytable group by a + b, b with cube").collect() ) checkAnswer( testData.cube("a", "b").agg(sum("b")), sql("select a, b, sum(b) from mytable group by a, b with cube").collect() ) } }
Example 7
Source File: HiveParquetSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.execution.datasources.parquet.ParquetTest import org.apache.spark.sql.{QueryTest, Row, SQLContext} case class Cases(lower: String, UPPER: String) class HiveParquetSuite extends QueryTest with ParquetTest { private val ctx = TestHive override def _sqlContext: SQLContext = ctx //不区分大小写的属性名 test("Case insensitive attribute names") { withParquetTable((1 to 4).map(i => Cases(i.toString, i.toString)), "cases") { val expected = (1 to 4).map(i => Row(i.toString)) checkAnswer(sql("SELECT upper FROM cases"), expected) checkAnswer(sql("SELECT LOWER FROM cases"), expected) } } //select 在Parquet表查询 test("SELECT on Parquet table") { val data = (1 to 4).map(i => (i, s"val_$i")) withParquetTable(data, "t") { checkAnswer(sql("SELECT * FROM t"), data.map(Row.fromTuple)) } } //简单的柱投影+filter在Parquet表 test("Simple column projection + filter on Parquet table") { withParquetTable((1 to 4).map(i => (i % 2 == 0, i, s"val_$i")), "t") { checkAnswer( sql("SELECT `_1`, `_3` FROM t WHERE `_1` = true"), Seq(Row(true, "val_2"), Row(true, "val_4"))) } } //通过saveAsParquetFile将Hive转换为Parquet Table test("Converting Hive to Parquet Table via saveAsParquetFile") { withTempPath { dir => sql("SELECT * FROM src").write.parquet(dir.getCanonicalPath) ctx.read.parquet(dir.getCanonicalPath).registerTempTable("p") withTempTable("p") { checkAnswer( sql("SELECT * FROM src ORDER BY key"), sql("SELECT * from p ORDER BY key").collect().toSeq) } } } //插入覆盖表Parquet表 test("INSERT OVERWRITE TABLE Parquet table") { withParquetTable((1 to 10).map(i => (i, s"val_$i")), "t") { withTempPath { file => sql("SELECT * FROM t LIMIT 1").write.parquet(file.getCanonicalPath) ctx.read.parquet(file.getCanonicalPath).registerTempTable("p") withTempTable("p") { // let's do three overwrites for good measure //让我们做良好的措施三覆盖 sql("INSERT OVERWRITE TABLE p SELECT * FROM t") sql("INSERT OVERWRITE TABLE p SELECT * FROM t") sql("INSERT OVERWRITE TABLE p SELECT * FROM t") checkAnswer(sql("SELECT * FROM p"), sql("SELECT * FROM t").collect().toSeq) } } } } }
Example 8
Source File: HiveTableScanSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.Row import org.apache.spark.sql.functions._ import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.hive.test.TestHive.implicits._ import org.apache.spark.util.Utils class HiveTableScanSuite extends HiveComparisonTest { //基于分区的表扫描与不同的serde createQueryTest("partition_based_table_scan_with_different_serde", """ |CREATE TABLE part_scan_test (key STRING, value STRING) PARTITIONED BY (ds STRING) |ROW FORMAT SERDE |'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' |STORED AS RCFILE; | |FROM src |INSERT INTO TABLE part_scan_test PARTITION (ds='2010-01-01') |SELECT 100,100 LIMIT 1; | |ALTER TABLE part_scan_test SET SERDE |'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'; | |FROM src INSERT INTO TABLE part_scan_test PARTITION (ds='2010-01-02') |SELECT 200,200 LIMIT 1; | |SELECT * from part_scan_test; """.stripMargin) // In unit test, kv1.txt is a small file and will be loaded as table src // Since the small file will be considered as a single split, we assume // Hive / SparkSQL HQL has the same output even for SORT BY //在单元测试中,kv1.txt是一个小文件,将作为表src加载由于小文件将被视为单个拆分, //我们假设Hive / SparkSQL HQL即使对于SORT BY也具有相同的输出 //文件拆分为小表 createQueryTest("file_split_for_small_table", """ |SELECT key, value FROM src SORT BY key, value """.stripMargin) //小写问题 test("Spark-4041: lowercase issue") { TestHive.sql("CREATE TABLE tb (KEY INT, VALUE STRING) STORED AS ORC") TestHive.sql("insert into table tb select key, value from src") TestHive.sql("select KEY from tb where VALUE='just_for_test' limit 5").collect() TestHive.sql("drop table tb") } //时间戳查询空值 test("Spark-4077: timestamp query for null value") { TestHive.sql("DROP TABLE IF EXISTS timestamp_query_null") TestHive.sql( """ CREATE EXTERNAL TABLE timestamp_query_null (time TIMESTAMP,id INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' """.stripMargin) val location = Utils.getSparkClassLoader.getResource("data/files/issue-4077-data.txt").getFile() TestHive.sql(s"LOAD DATA LOCAL INPATH '$location' INTO TABLE timestamp_query_null") assert(TestHive.sql("SELECT time from timestamp_query_null limit 2").collect() === Array(Row(java.sql.Timestamp.valueOf("2014-12-11 00:00:00")), Row(null))) TestHive.sql("DROP TABLE timestamp_query_null") } //当使用从投影中选择查询时,属性是区分大小写的 test("Spark-4959 Attributes are case sensitive when using a select query from a projection") { sql("create table spark_4959 (col1 string)") sql("""insert into table spark_4959 select "hi" from src limit 1""") table("spark_4959").select( 'col1.as("CaseSensitiveColName"), 'col1.as("CaseSensitiveColName2")).registerTempTable("spark_4959_2") assert(sql("select CaseSensitiveColName from spark_4959_2").head() === Row("hi")) assert(sql("select casesensitivecolname from spark_4959_2").head() === Row("hi")) } }
Example 9
Source File: HiveSerDeSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.hive.test.TestHive class HiveSerDeSuite extends HiveComparisonTest with BeforeAndAfterAll { override def beforeAll(): Unit = { import TestHive._ import org.apache.hadoop.hive.serde2.RegexSerDe super.beforeAll() TestHive.cacheTables = false sql(s"""CREATE TABLE IF NOT EXISTS sales (key STRING, value INT) |ROW FORMAT SERDE '${classOf[RegexSerDe].getCanonicalName}' |WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\t([^ ]*)") """.stripMargin) sql(s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt")}' INTO TABLE sales") } // table sales is not a cache table, and will be clear after reset //sales表不是缓存表,重置后会清除 createQueryTest("Read with RegexSerDe", "SELECT * FROM sales", false) createQueryTest( "Read and write with LazySimpleSerDe (tab separated)", "SELECT * from serdeins") createQueryTest("Read with AvroSerDe", "SELECT * FROM episodes") createQueryTest("Read Partitioned with AvroSerDe", "SELECT * FROM episodes_part") }
Example 10
Source File: HiveMetastoreCatalogSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.SparkFunSuite import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.test.ExamplePointUDT import org.apache.spark.sql.types.StructType class HiveMetastoreCatalogSuite extends SparkFunSuite { test("struct field should accept underscore in sub-column name") { val metastr = "struct<a: int, b_1: string, c: string>" val datatype = HiveMetastoreTypes.toDataType(metastr) assert(datatype.isInstanceOf[StructType]) } test("udt to metastore type conversion") { val udt = new ExamplePointUDT assert(HiveMetastoreTypes.toMetastoreType(udt) === HiveMetastoreTypes.toMetastoreType(udt.sqlType)) } test("duplicated metastore relations") { import TestHive.implicits._ val df = TestHive.sql("SELECT * FROM src") println(df.queryExecution) df.as('a).join(df.as('b), $"a.key" === $"b.key") } }
Example 11
Source File: HivePlanTest.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.functions._ import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.expressions.Window import org.apache.spark.sql.hive.test.TestHive class HivePlanTest extends QueryTest { import TestHive._ import TestHive.implicits._ //自定义函数常量折叠 test("udf constant folding") { Seq.empty[Tuple1[Int]].toDF("a").registerTempTable("t") val optimized = sql("SELECT cos(null) FROM t").queryExecution.optimizedPlan val correctAnswer = sql("SELECT cast(null as double) FROM t").queryExecution.optimizedPlan comparePlans(optimized, correctAnswer) } //共享相同分区的窗口表达式和order by子句 test("window expressions sharing the same partition by and order by clause") { val df = Seq.empty[(Int, String, Int, Int)].toDF("id", "grp", "seq", "val") val window = Window. partitionBy($"grp"). orderBy($"val") val query = df.select( $"id", sum($"val").over(window.rowsBetween(-1, 1)), sum($"val").over(window.rangeBetween(-1, 1)) ) val plan = query.queryExecution.analyzed assert(plan.collect{ case w: logical.Window => w }.size === 1, "Should have only 1 Window operator.") } }
Example 12
Source File: ListTablesSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.QueryTest import org.apache.spark.sql.Row class ListTablesSuite extends QueryTest with BeforeAndAfterAll { import org.apache.spark.sql.hive.test.TestHive.implicits._ val df = sparkContext.parallelize((1 to 10).map(i => (i, s"str$i"))).toDF("key", "value") override def beforeAll(): Unit = { // The catalog in HiveContext is a case insensitive one. //HiveContext中的目录是不区分大小写的 catalog.registerTable(Seq("ListTablesSuiteTable"), df.logicalPlan) sql("CREATE TABLE HiveListTablesSuiteTable (key int, value string)") sql("CREATE DATABASE IF NOT EXISTS ListTablesSuiteDB") sql("CREATE TABLE ListTablesSuiteDB.HiveInDBListTablesSuiteTable (key int, value string)") } override def afterAll(): Unit = { catalog.unregisterTable(Seq("ListTablesSuiteTable")) sql("DROP TABLE IF EXISTS HiveListTablesSuiteTable") sql("DROP TABLE IF EXISTS ListTablesSuiteDB.HiveInDBListTablesSuiteTable") sql("DROP DATABASE IF EXISTS ListTablesSuiteDB") } //获取当前数据库的所有表 test("get all tables of current database") { Seq(tables(), sql("SHOW TABLes")).foreach { case allTables => // We are using default DB. //我们正在使用默认数据库 checkAnswer( allTables.filter("tableName = 'listtablessuitetable'"), Row("listtablessuitetable", true)) checkAnswer( allTables.filter("tableName = 'hivelisttablessuitetable'"), Row("hivelisttablessuitetable", false)) assert(allTables.filter("tableName = 'hiveindblisttablessuitetable'").count() === 0) } } //获取具有数据库名称的所有表 test("getting all tables with a database name") { Seq(tables("listtablessuiteDb"), sql("SHOW TABLes in listTablesSuitedb")).foreach { case allTables => checkAnswer( allTables.filter("tableName = 'listtablessuitetable'"), Row("listtablessuitetable", true)) assert(allTables.filter("tableName = 'hivelisttablessuitetable'").count() === 0) checkAnswer( allTables.filter("tableName = 'hiveindblisttablessuitetable'"), Row("hiveindblisttablessuitetable", false)) } } }
Example 13
Source File: CommitFailureTestRelationSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.sources import org.apache.hadoop.fs.Path import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.test.SQLTestUtils class CommitFailureTestRelationSuite extends SparkFunSuite with SQLTestUtils { override def _sqlContext: SQLContext = TestHive private val sqlContext = _sqlContext // When committing a task, `CommitFailureTestSource` throws an exception for testing purpose. //提交任务时,“CommitFailureTestSource”会为测试目的引发异常 val dataSourceName: String = classOf[CommitFailureTestSource].getCanonicalName //commitTask()失败应该回退到abortTask() test("SPARK-7684: commitTask() failure should fallback to abortTask()") { withTempPath { file => // Here we coalesce partition number to 1 to ensure that only a single task is issued. This // prevents race condition happened when FileOutputCommitter tries to remove the `_temporary` // directory while committing/aborting the job. See SPARK-8513 for more details. //这里我们将分区号合并为1,以确保只发出一个任务, 这个防止当FileOutputCommitter尝试删除`_temporary`时发生竞争条件 //目录提交/中止作业, 有关详细信息,请参阅SPARK-8513 val df = sqlContext.range(0, 10).coalesce(1) intercept[SparkException] { df.write.format(dataSourceName).save(file.getCanonicalPath) } val fs = new Path(file.getCanonicalPath).getFileSystem(SparkHadoopUtil.get.conf) assert(!fs.exists(new Path(file.getCanonicalPath, "_temporary"))) } } }
Example 14
Source File: HiveTypeCoercionSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.catalyst.expressions.{Cast, EqualTo} import org.apache.spark.sql.execution.ProjectExec import org.apache.spark.sql.hive.test.TestHive class HiveTypeCoercionSuite extends HiveComparisonTest { val baseTypes = Seq( ("1", "1"), ("1.0", "CAST(1.0 AS DOUBLE)"), ("1L", "1L"), ("1S", "1S"), ("1Y", "1Y"), ("'1'", "'1'")) baseTypes.foreach { case (ni, si) => baseTypes.foreach { case (nj, sj) => createQueryTest(s"$ni + $nj", s"SELECT $si + $sj FROM src LIMIT 1") } } val nullVal = "null" baseTypes.init.foreach { case (i, s) => createQueryTest(s"case when then $i else $nullVal end ", s"SELECT case when true then $s else $nullVal end FROM src limit 1") createQueryTest(s"case when then $nullVal else $i end ", s"SELECT case when true then $nullVal else $s end FROM src limit 1") } test("[SPARK-2210] boolean cast on boolean value should be removed") { val q = "select cast(cast(key=0 as boolean) as boolean) from src" val project = TestHive.sql(q).queryExecution.sparkPlan.collect { case e: ProjectExec => e }.head // No cast expression introduced project.transformAllExpressions { case c: Cast => fail(s"unexpected cast $c") c } // Only one equality check var numEquals = 0 project.transformAllExpressions { case e: EqualTo => numEquals += 1 e } assert(numEquals === 1) } }
Example 15
Source File: HiveTableScanSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.Row import org.apache.spark.sql.functions._ import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.hive.test.TestHive.implicits._ import org.apache.spark.util.Utils class HiveTableScanSuite extends HiveComparisonTest { createQueryTest("partition_based_table_scan_with_different_serde", """ |CREATE TABLE part_scan_test (key STRING, value STRING) PARTITIONED BY (ds STRING) |ROW FORMAT SERDE |'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' |STORED AS RCFILE; | |FROM src |INSERT INTO TABLE part_scan_test PARTITION (ds='2010-01-01') |SELECT 100,100 LIMIT 1; | |ALTER TABLE part_scan_test SET SERDE |'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'; | |FROM src INSERT INTO TABLE part_scan_test PARTITION (ds='2010-01-02') |SELECT 200,200 LIMIT 1; | |SELECT * from part_scan_test; """.stripMargin) // In unit test, kv1.txt is a small file and will be loaded as table src // Since the small file will be considered as a single split, we assume // Hive / SparkSQL HQL has the same output even for SORT BY createQueryTest("file_split_for_small_table", """ |SELECT key, value FROM src SORT BY key, value """.stripMargin) test("Spark-4041: lowercase issue") { TestHive.sql("CREATE TABLE tb (KEY INT, VALUE STRING) STORED AS ORC") TestHive.sql("insert into table tb select key, value from src") TestHive.sql("select KEY from tb where VALUE='just_for_test' limit 5").collect() TestHive.sql("drop table tb") } test("Spark-4077: timestamp query for null value") { TestHive.sql("DROP TABLE IF EXISTS timestamp_query_null") TestHive.sql( """ CREATE EXTERNAL TABLE timestamp_query_null (time TIMESTAMP,id INT) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' LINES TERMINATED BY '\n' """.stripMargin) val location = Utils.getSparkClassLoader.getResource("data/files/issue-4077-data.txt").getFile() TestHive.sql(s"LOAD DATA LOCAL INPATH '$location' INTO TABLE timestamp_query_null") assert(TestHive.sql("SELECT time from timestamp_query_null limit 2").collect() === Array(Row(java.sql.Timestamp.valueOf("2014-12-11 00:00:00")), Row(null))) TestHive.sql("DROP TABLE timestamp_query_null") } test("Spark-4959 Attributes are case sensitive when using a select query from a projection") { sql("create table spark_4959 (col1 string)") sql("""insert into table spark_4959 select "hi" from src limit 1""") table("spark_4959").select( 'col1.as("CaseSensitiveColName"), 'col1.as("CaseSensitiveColName2")).registerTempTable("spark_4959_2") assert(sql("select CaseSensitiveColName from spark_4959_2").head() === Row("hi")) assert(sql("select casesensitivecolname from spark_4959_2").head() === Row("hi")) } }
Example 16
Source File: HiveSerDeSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.hive.test.TestHive class HiveSerDeSuite extends HiveComparisonTest with BeforeAndAfterAll { override def beforeAll(): Unit = { import TestHive._ import org.apache.hadoop.hive.serde2.RegexSerDe super.beforeAll() TestHive.cacheTables = false sql(s"""CREATE TABLE IF NOT EXISTS sales (key STRING, value INT) |ROW FORMAT SERDE '${classOf[RegexSerDe].getCanonicalName}' |WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\t([^ ]*)") """.stripMargin) sql(s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt")}' INTO TABLE sales") } // table sales is not a cache table, and will be clear after reset createQueryTest("Read with RegexSerDe", "SELECT * FROM sales", false) createQueryTest( "Read and write with LazySimpleSerDe (tab separated)", "SELECT * from serdeins") createQueryTest("Read with AvroSerDe", "SELECT * FROM episodes") createQueryTest("Read Partitioned with AvroSerDe", "SELECT * FROM episodes_part") }
Example 17
Source File: HiveTypeCoercionSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.catalyst.expressions.{Cast, EqualTo} import org.apache.spark.sql.execution.Project import org.apache.spark.sql.hive.test.TestHive class HiveTypeCoercionSuite extends HiveComparisonTest { val baseTypes = Seq("1", "1.0", "1L", "1S", "1Y", "'1'") baseTypes.foreach { i => baseTypes.foreach { j => createQueryTest(s"$i + $j", s"SELECT $i + $j FROM src LIMIT 1") } } val nullVal = "null" baseTypes.init.foreach { i => createQueryTest(s"case when then $i else $nullVal end ", s"SELECT case when true then $i else $nullVal end FROM src limit 1") createQueryTest(s"case when then $nullVal else $i end ", s"SELECT case when true then $nullVal else $i end FROM src limit 1") } test("[SPARK-2210] boolean cast on boolean value should be removed") { val q = "select cast(cast(key=0 as boolean) as boolean) from src" val project = TestHive.sql(q).queryExecution.executedPlan.collect { case e: Project => e }.head // No cast expression introduced project.transformAllExpressions { case c: Cast => fail(s"unexpected cast $c") c } // Only one equality check var numEquals = 0 project.transformAllExpressions { case e: EqualTo => numEquals += 1 e } assert(numEquals === 1) } }
Example 18
Source File: HiveOperatorQueryableSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.{Row, QueryTest} import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton} class HiveOperatorQueryableSuite extends QueryTest with TestHiveSingleton { import hiveContext._ test("SPARK-5324 query result of describe command") { hiveContext.loadTestTable("src") // register a describe command to be a temp table sql("desc src").registerTempTable("mydesc") checkAnswer( sql("desc mydesc"), Seq( Row("col_name", "string", "name of the column"), Row("data_type", "string", "data type of the column"), Row("comment", "string", "comment of the column"))) checkAnswer( sql("select * from mydesc"), Seq( Row("key", "int", null), Row("value", "string", null))) checkAnswer( sql("select col_name, data_type, comment from mydesc"), Seq( Row("key", "int", null), Row("value", "string", null))) } }
Example 19
Source File: RangerSparkMaskingExtensionTest.scala From spark-ranger with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.RangerSparkTestUtils._ import org.apache.spark.sql.catalyst.expressions.Alias import org.apache.spark.sql.catalyst.plans.logical.{Project, RangerSparkMasking} import org.scalatest.FunSuite class RangerSparkMaskingExtensionTest extends FunSuite { private val spark = TestHive.sparkSession test("data masking for bob show last 4") { val extension = RangerSparkMaskingExtension(spark) val plan = spark.sql("select * from src").queryExecution.optimizedPlan println(plan) withUser("bob") { val newPlan = extension.apply(plan) assert(newPlan.isInstanceOf[Project]) val project = newPlan.asInstanceOf[Project] val key = project.projectList.head assert(key.name === "key", "no affect on un masking attribute") val value = project.projectList.tail assert(value.head.name === "value", "attibute name should be unchanged") assert(value.head.asInstanceOf[Alias].child.sql === "mask_show_last_n(`value`, 4, 'x', 'x', 'x', -1, '1')") } withUser("alice") { val newPlan = extension.apply(plan) assert(newPlan === RangerSparkMasking(plan)) } } }
Example 20
Source File: HiveTypeCoercionSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.catalyst.expressions.{Cast, EqualTo} import org.apache.spark.sql.execution.ProjectExec import org.apache.spark.sql.hive.test.TestHive class HiveTypeCoercionSuite extends HiveComparisonTest { val baseTypes = Seq( ("1", "1"), ("1.0", "CAST(1.0 AS DOUBLE)"), ("1L", "1L"), ("1S", "1S"), ("1Y", "1Y"), ("'1'", "'1'")) baseTypes.foreach { case (ni, si) => baseTypes.foreach { case (nj, sj) => createQueryTest(s"$ni + $nj", s"SELECT $si + $sj FROM src LIMIT 1") } } val nullVal = "null" baseTypes.init.foreach { case (i, s) => createQueryTest(s"case when then $i else $nullVal end ", s"SELECT case when true then $s else $nullVal end FROM src limit 1") createQueryTest(s"case when then $nullVal else $i end ", s"SELECT case when true then $nullVal else $s end FROM src limit 1") } test("[SPARK-2210] boolean cast on boolean value should be removed") { val q = "select cast(cast(key=0 as boolean) as boolean) from src" val project = TestHive.sql(q).queryExecution.sparkPlan.collect { case e: ProjectExec => e }.head // No cast expression introduced project.transformAllExpressions { case c: Cast => fail(s"unexpected cast $c") c } // Only one equality check var numEquals = 0 project.transformAllExpressions { case e: EqualTo => numEquals += 1 e } assert(numEquals === 1) } }
Example 21
Source File: HiveTypeCoercionSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.catalyst.expressions.{Cast, EqualTo} import org.apache.spark.sql.execution.ProjectExec import org.apache.spark.sql.hive.test.TestHive class HiveTypeCoercionSuite extends HiveComparisonTest { val baseTypes = Seq( ("1", "1"), ("1.0", "CAST(1.0 AS DOUBLE)"), ("1L", "1L"), ("1S", "1S"), ("1Y", "1Y"), ("'1'", "'1'")) baseTypes.foreach { case (ni, si) => baseTypes.foreach { case (nj, sj) => createQueryTest(s"$ni + $nj", s"SELECT $si + $sj FROM src LIMIT 1") } } val nullVal = "null" baseTypes.init.foreach { case (i, s) => createQueryTest(s"case when then $i else $nullVal end ", s"SELECT case when true then $s else $nullVal end FROM src limit 1") createQueryTest(s"case when then $nullVal else $i end ", s"SELECT case when true then $nullVal else $s end FROM src limit 1") } test("[SPARK-2210] boolean cast on boolean value should be removed") { val q = "select cast(cast(key=0 as boolean) as boolean) from src" val project = TestHive.sql(q).queryExecution.sparkPlan.collect { case e: ProjectExec => e }.head // No cast expression introduced project.transformAllExpressions { case c: Cast => fail(s"unexpected cast $c") c } // Only one equality check var numEquals = 0 project.transformAllExpressions { case e: EqualTo => numEquals += 1 e } assert(numEquals === 1) } }
Example 22
Source File: HivemallOpsSuite.scala From hivemall-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming import org.apache.spark.ml.feature.HmLabeledPoint import org.apache.spark.sql.Row import org.apache.spark.sql.hive.HivemallOps._ import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.types._ import org.apache.spark.streaming.HivemallStreamingOps._ class HivemallOpsSuite extends TestSuiteBase { import org.apache.spark.sql.hive.test.TestHive.implicits._ import org.apache.spark.streaming.HivemallOpsSuite._ test("streaming") { withStreamingContext(new StreamingContext(TestHive.sparkContext, Milliseconds(500))) { ssc => val input = Seq( Seq(HmLabeledPoint(features = "1:0.6" :: "2:0.1" :: Nil)), Seq(HmLabeledPoint(features = "2:0.9" :: Nil)), Seq(HmLabeledPoint(features = "1:0.2" :: Nil)), Seq(HmLabeledPoint(features = "2:0.1" :: Nil)), Seq(HmLabeledPoint(features = "0:0.6" :: "2:0.4" :: Nil)) ) val stream = new TestInputStream[HmLabeledPoint](ssc, input, 2) // Apply predictions on input streams val prediction = stream.predict { testDf => val testDf_exploded = testDf .explode_array($"features") .select(rowid(), extract_feature($"feature"), extract_weight($"feature")) val predictDf = testDf_exploded .join(model, testDf_exploded("feature") === model("feature"), "LEFT_OUTER") .select($"rowid", ($"weight" * $"value").as("value")) .groupby("rowid").sum("value") .select($"rowid", sigmoid($"SUM(value)")) assert(predictDf.count > 0) predictDf } // Dummy output stream prediction.foreachRDD { _ => {}} } } } object HivemallOpsSuite { implicit val sqlContext = TestHive val model = TestHive.createDataFrame( TestHive.sparkContext.parallelize( Row(0, 0.3f) :: Row(1, 0.1f) :: Row(2, 0.6f) :: Row(3, 0.2f) :: Nil ), StructType( StructField("feature", IntegerType, true) :: StructField("weight", FloatType, true) :: Nil) ) }
Example 23
Source File: OapQuerySuite.scala From OAP with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import java.util.{Locale, TimeZone} import org.scalatest.{BeforeAndAfter, Ignore} import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.internal.SQLConf // Ignore because in separate package will encounter problem with shaded spark source. @Ignore class OapQuerySuite extends HiveComparisonTest with BeforeAndAfter { private lazy val originalTimeZone = TimeZone.getDefault private lazy val originalLocale = Locale.getDefault import org.apache.spark.sql.hive.test.TestHive._ // Note: invoke TestHive will create a SparkContext which can't be configured by us. // So be careful this may affect current using SparkContext and cause strange problem. private lazy val originalCrossJoinEnabled = TestHive.conf.crossJoinEnabled override def beforeAll() { super.beforeAll() TestHive.setCacheTables(true) // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*) TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles")) // Add Locale setting Locale.setDefault(Locale.US) // Ensures that cross joins are enabled so that we can test them TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, true) TestHive.setConf(HiveUtils.CONVERT_METASTORE_PARQUET, true) } override def afterAll() { try { TestHive.setCacheTables(false) TimeZone.setDefault(originalTimeZone) Locale.setDefault(originalLocale) sql("DROP TEMPORARY FUNCTION IF EXISTS udtf_count2") TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled) } finally { super.afterAll() } } private def assertDupIndex(body: => Unit): Unit = { val e = intercept[AnalysisException] { body } assert(e.getMessage.toLowerCase.contains("exists")) } test("create hive table in parquet format") { try { sql("create table p_table (key int, val string) stored as parquet") sql("insert overwrite table p_table select * from src") sql("create oindex if not exists p_index on p_table(key)") assert(sql("select val from p_table where key = 238") .collect().head.getString(0) == "val_238") } finally { sql("drop oindex p_index on p_table") sql("drop table p_table") } } test("create duplicate hive table in parquet format") { try { sql("create table p_table1 (key int, val string) stored as parquet") sql("insert overwrite table p_table1 select * from src") sql("create oindex p_index on p_table1(key)") assertDupIndex { sql("create oindex p_index on p_table1(key)") } } finally { sql("drop oindex p_index on p_table1") } } }
Example 24
Source File: HiveSerDeSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.hive.test.TestHive class HiveSerDeSuite extends HiveComparisonTest with BeforeAndAfterAll { override def beforeAll(): Unit = { import TestHive._ import org.apache.hadoop.hive.serde2.RegexSerDe super.beforeAll() TestHive.setCacheTables(false) sql(s"""CREATE TABLE IF NOT EXISTS sales (key STRING, value INT) |ROW FORMAT SERDE '${classOf[RegexSerDe].getCanonicalName}' |WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\t([^ ]*)") """.stripMargin) sql(s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt")}' INTO TABLE sales") } // table sales is not a cache table, and will be clear after reset createQueryTest("Read with RegexSerDe", "SELECT * FROM sales", false) createQueryTest( "Read and write with LazySimpleSerDe (tab separated)", "SELECT * from serdeins") createQueryTest("Read with AvroSerDe", "SELECT * FROM episodes") createQueryTest("Read Partitioned with AvroSerDe", "SELECT * FROM episodes_part") }
Example 25
Source File: HiveTypeCoercionSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.catalyst.expressions.{Cast, EqualTo} import org.apache.spark.sql.execution.ProjectExec import org.apache.spark.sql.hive.test.TestHive class HiveTypeCoercionSuite extends HiveComparisonTest { val baseTypes = Seq( ("1", "1"), ("1.0", "CAST(1.0 AS DOUBLE)"), ("1L", "1L"), ("1S", "1S"), ("1Y", "1Y"), ("'1'", "'1'")) baseTypes.foreach { case (ni, si) => baseTypes.foreach { case (nj, sj) => createQueryTest(s"$ni + $nj", s"SELECT $si + $sj FROM src LIMIT 1") } } val nullVal = "null" baseTypes.init.foreach { case (i, s) => createQueryTest(s"case when then $i else $nullVal end ", s"SELECT case when true then $s else $nullVal end FROM src limit 1") createQueryTest(s"case when then $nullVal else $i end ", s"SELECT case when true then $nullVal else $s end FROM src limit 1") } test("[SPARK-2210] boolean cast on boolean value should be removed") { val q = "select cast(cast(key=0 as boolean) as boolean) from src" val project = TestHive.sql(q).queryExecution.sparkPlan.collect { case e: ProjectExec => e }.head // No cast expression introduced project.transformAllExpressions { case c: Cast => fail(s"unexpected cast $c") c } // Only one equality check var numEquals = 0 project.transformAllExpressions { case e: EqualTo => numEquals += 1 e } assert(numEquals === 1) } }
Example 26
Source File: DatasetPerformanceSuite.scala From spark-sql-perf with Apache License 2.0 | 5 votes |
package com.databricks.spark.sql.perf import org.apache.spark.sql.hive.test.TestHive import org.scalatest.FunSuite class DatasetPerformanceSuite extends FunSuite { ignore("run benchmark") { TestHive // Init HiveContext val benchmark = new DatasetPerformance() { override val numLongs = 100 } import benchmark._ val exp = runExperiment(allBenchmarks) exp.waitForFinish(10000) } }
Example 27
Source File: RangerSparkPlanOmitStrategyTest.scala From spark-ranger with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import org.apache.spark.sql.catalyst.plans.logical.{RangerSparkMasking, RangerSparkRowFilter} import org.apache.spark.sql.hive.test.TestHive import org.scalatest.FunSuite class RangerSparkPlanOmitStrategyTest extends FunSuite { private val spark = TestHive.sparkSession test("ranger spark plan omit strategy") { val strategy = RangerSparkPlanOmitStrategy(spark) val df = spark.range(0, 5) val plan1 = df.queryExecution.optimizedPlan assert(strategy.apply(plan1) === Nil) val plan2 = RangerSparkRowFilter(plan1) assert(strategy.apply(plan2) === PlanLater(plan1) :: Nil) val plan3 = RangerSparkMasking(plan1) assert(strategy.apply(plan3) === PlanLater(plan1) :: Nil) val plan4 = RangerSparkMasking(plan2) assert(strategy.apply(plan4) === PlanLater(plan2) :: Nil) val plan5 = RangerSparkRowFilter(plan3) assert(strategy.apply(plan5) === PlanLater(plan3) :: Nil) } }
Example 28
Source File: HiveSerDeSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.hive.test.TestHive class HiveSerDeSuite extends HiveComparisonTest with BeforeAndAfterAll { override def beforeAll(): Unit = { import TestHive._ import org.apache.hadoop.hive.serde2.RegexSerDe super.beforeAll() TestHive.setCacheTables(false) sql(s"""CREATE TABLE IF NOT EXISTS sales (key STRING, value INT) |ROW FORMAT SERDE '${classOf[RegexSerDe].getCanonicalName}' |WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\t([^ ]*)") """.stripMargin) sql(s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt")}' INTO TABLE sales") } // table sales is not a cache table, and will be clear after reset createQueryTest("Read with RegexSerDe", "SELECT * FROM sales", false) createQueryTest( "Read and write with LazySimpleSerDe (tab separated)", "SELECT * from serdeins") createQueryTest("Read with AvroSerDe", "SELECT * FROM episodes") createQueryTest("Read Partitioned with AvroSerDe", "SELECT * FROM episodes_part") }
Example 29
Source File: RangerSparkRowFilterExtensionTest.scala From spark-ranger with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.hive.test.TestHive import org.scalatest.FunSuite import org.apache.spark.sql.RangerSparkTestUtils._ import org.apache.spark.sql.catalyst.plans.logical.{Filter, RangerSparkRowFilter} class RangerSparkRowFilterExtensionTest extends FunSuite { private val spark = TestHive.sparkSession test("ranger spark row filter extension") { val extension = RangerSparkRowFilterExtension(spark) val plan = spark.sql("select * from src").queryExecution.optimizedPlan println(plan) withUser("bob") { val newPlan = extension.apply(plan) assert(newPlan.isInstanceOf[RangerSparkRowFilter]) val filters = newPlan.collect { case f: Filter => f } assert(filters.nonEmpty, "ranger row level filters should be applied automatically") println(newPlan) } withUser("alice") { val newPlan = extension.apply(plan) assert(newPlan.isInstanceOf[RangerSparkRowFilter]) val filters = newPlan.collect { case f: Filter => f } assert(filters.isEmpty, "alice does not have implicit filters") println(newPlan) } } }
Example 30
Source File: DeltaHiveTest.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta.test import org.apache.spark.sql.delta.catalog.DeltaCatalog import io.delta.sql.DeltaSparkSessionExtension import org.scalatest.BeforeAndAfterAll import org.apache.spark.{SparkContext, SparkFunSuite} import org.apache.spark.sql.SparkSession import org.apache.spark.sql.hive.test.{TestHive, TestHiveContext} import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} import org.apache.spark.sql.test.SQLTestUtils trait DeltaHiveTest extends SparkFunSuite with BeforeAndAfterAll { self: SQLTestUtils => private var _session: SparkSession = _ private var _hiveContext: TestHiveContext = _ private var _sc: SparkContext = _ override def beforeAll(): Unit = { val conf = TestHive.sparkSession.sparkContext.getConf.clone() TestHive.sparkSession.stop() conf.set(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[DeltaCatalog].getName) conf.set(StaticSQLConf.SPARK_SESSION_EXTENSIONS.key, classOf[DeltaSparkSessionExtension].getName) _sc = new SparkContext("local", this.getClass.getName, conf) _hiveContext = new TestHiveContext(_sc) _session = _hiveContext.sparkSession SparkSession.setActiveSession(_session) super.beforeAll() } override protected def spark: SparkSession = _session override def afterAll(): Unit = { try { _hiveContext.reset() } finally { _sc.stop() } } }
Example 31
Source File: HiveSerDeSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql.hive.test.TestHive class HiveSerDeSuite extends HiveComparisonTest with BeforeAndAfterAll { override def beforeAll(): Unit = { import TestHive._ import org.apache.hadoop.hive.serde2.RegexSerDe super.beforeAll() TestHive.setCacheTables(false) sql(s"""CREATE TABLE IF NOT EXISTS sales (key STRING, value INT) |ROW FORMAT SERDE '${classOf[RegexSerDe].getCanonicalName}' |WITH SERDEPROPERTIES ("input.regex" = "([^ ]*)\t([^ ]*)") """.stripMargin) sql(s"LOAD DATA LOCAL INPATH '${getHiveFile("data/files/sales.txt")}' INTO TABLE sales") } // table sales is not a cache table, and will be clear after reset createQueryTest("Read with RegexSerDe", "SELECT * FROM sales", false) createQueryTest( "Read and write with LazySimpleSerDe (tab separated)", "SELECT * from serdeins") createQueryTest("Read with AvroSerDe", "SELECT * FROM episodes") createQueryTest("Read Partitioned with AvroSerDe", "SELECT * FROM episodes_part") }
Example 32
Source File: HiveTypeCoercionSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.catalyst.expressions.{Cast, EqualTo} import org.apache.spark.sql.execution.ProjectExec import org.apache.spark.sql.hive.test.TestHive class HiveTypeCoercionSuite extends HiveComparisonTest { val baseTypes = Seq( ("1", "1"), ("1.0", "CAST(1.0 AS DOUBLE)"), ("1L", "1L"), ("1S", "1S"), ("1Y", "1Y"), ("'1'", "'1'")) baseTypes.foreach { case (ni, si) => baseTypes.foreach { case (nj, sj) => createQueryTest(s"$ni + $nj", s"SELECT $si + $sj FROM src LIMIT 1") } } val nullVal = "null" baseTypes.init.foreach { case (i, s) => createQueryTest(s"case when then $i else $nullVal end ", s"SELECT case when true then $s else $nullVal end FROM src limit 1") createQueryTest(s"case when then $nullVal else $i end ", s"SELECT case when true then $nullVal else $s end FROM src limit 1") } test("[SPARK-2210] boolean cast on boolean value should be removed") { val q = "select cast(cast(key=0 as boolean) as boolean) from src" val project = TestHive.sql(q).queryExecution.sparkPlan.collect { case e: ProjectExec => e }.head // No cast expression introduced project.transformAllExpressions { case c: Cast => fail(s"unexpected cast $c") c } // Only one equality check var numEquals = 0 project.transformAllExpressions { case e: EqualTo => numEquals += 1 e } assert(numEquals === 1) } }
Example 33
Source File: HiveDataFrameAnalyticsSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.sql.{DataFrame, QueryTest} import org.apache.spark.sql.functions._ import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.hive.test.TestHive.implicits._ import org.scalatest.BeforeAndAfterAll // TODO ideally we should put the test suite into the package `sql`, as // `hive` package is optional in compiling, however, `SQLContext.sql` doesn't // support the `cube` or `rollup` yet. class HiveDataFrameAnalyticsSuite extends QueryTest with BeforeAndAfterAll { private var testData: DataFrame = _ override def beforeAll() { testData = Seq((1, 2), (2, 4)).toDF("a", "b") TestHive.registerDataFrameAsTable(testData, "mytable") } override def afterAll(): Unit = { TestHive.dropTempTable("mytable") } test("rollup") { checkAnswer( testData.rollup($"a" + $"b", $"b").agg(sum($"a" - $"b")), sql("select a + b, b, sum(a - b) from mytable group by a + b, b with rollup").collect() ) checkAnswer( testData.rollup("a", "b").agg(sum("b")), sql("select a, b, sum(b) from mytable group by a, b with rollup").collect() ) } test("cube") { checkAnswer( testData.cube($"a" + $"b", $"b").agg(sum($"a" - $"b")), sql("select a + b, b, sum(a - b) from mytable group by a + b, b with cube").collect() ) checkAnswer( testData.cube("a", "b").agg(sum("b")), sql("select a, b, sum(b) from mytable group by a, b with cube").collect() ) } }
Example 34
Source File: QueryPartitionSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import com.google.common.io.Files import org.apache.spark.sql.{QueryTest, _} import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.util.Utils class QueryPartitionSuite extends QueryTest { import org.apache.spark.sql.hive.test.TestHive.implicits._ test("SPARK-5068: query data when path doesn't exist"){ val testData = TestHive.sparkContext.parallelize( (1 to 10).map(i => TestData(i, i.toString))).toDF() testData.registerTempTable("testData") val tmpDir = Files.createTempDir() // create the table for test sql(s"CREATE TABLE table_with_partition(key int,value string) " + s"PARTITIONED by (ds string) location '${tmpDir.toURI.toString}' ") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='1') " + "SELECT key,value FROM testData") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='2') " + "SELECT key,value FROM testData") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='3') " + "SELECT key,value FROM testData") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='4') " + "SELECT key,value FROM testData") // test for the exist path checkAnswer(sql("select key,value from table_with_partition"), testData.toSchemaRDD.collect ++ testData.toSchemaRDD.collect ++ testData.toSchemaRDD.collect ++ testData.toSchemaRDD.collect) // delete the path of one partition tmpDir.listFiles .find { f => f.isDirectory && f.getName().startsWith("ds=") } .foreach { f => Utils.deleteRecursively(f) } // test for after delete the path checkAnswer(sql("select key,value from table_with_partition"), testData.toSchemaRDD.collect ++ testData.toSchemaRDD.collect ++ testData.toSchemaRDD.collect) sql("DROP TABLE table_with_partition") sql("DROP TABLE createAndInsertTest") } }
Example 35
Source File: OrcTest.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.orc import java.io.File import scala.reflect.ClassTag import scala.reflect.runtime.universe.TypeTag import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql._ private[sql] trait OrcTest extends SQLTestUtils { protected def hiveContext = sqlContext.asInstanceOf[HiveContext] import sqlContext.sparkContext import sqlContext.implicits._ protected def withOrcTable[T <: Product: ClassTag: TypeTag] (data: Seq[T], tableName: String) (f: => Unit): Unit = { withOrcDataFrame(data) { df => hiveContext.registerDataFrameAsTable(df, tableName) withTempTable(tableName)(f) } } protected def makeOrcFile[T <: Product: ClassTag: TypeTag]( data: Seq[T], path: File): Unit = { data.toDF().write.format("orc").mode(SaveMode.Overwrite).save(path.getCanonicalPath) } protected def makeOrcFile[T <: Product: ClassTag: TypeTag]( df: DataFrame, path: File): Unit = { df.write.format("orc").mode(SaveMode.Overwrite).save(path.getCanonicalPath) } }
Example 36
Source File: HiveParquetSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.sql.catalyst.expressions.Row import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.parquet.ParquetTest import org.apache.spark.sql.{QueryTest, SQLConf} case class Cases(lower: String, UPPER: String) class HiveParquetSuite extends QueryTest with ParquetTest { val sqlContext = TestHive import sqlContext._ def run(prefix: String): Unit = { test(s"$prefix: Case insensitive attribute names") { withParquetTable((1 to 4).map(i => Cases(i.toString, i.toString)), "cases") { val expected = (1 to 4).map(i => Row(i.toString)) checkAnswer(sql("SELECT upper FROM cases"), expected) checkAnswer(sql("SELECT LOWER FROM cases"), expected) } } test(s"$prefix: SELECT on Parquet table") { val data = (1 to 4).map(i => (i, s"val_$i")) withParquetTable(data, "t") { checkAnswer(sql("SELECT * FROM t"), data.map(Row.fromTuple)) } } test(s"$prefix: Simple column projection + filter on Parquet table") { withParquetTable((1 to 4).map(i => (i % 2 == 0, i, s"val_$i")), "t") { checkAnswer( sql("SELECT `_1`, `_3` FROM t WHERE `_1` = true"), Seq(Row(true, "val_2"), Row(true, "val_4"))) } } test(s"$prefix: Converting Hive to Parquet Table via saveAsParquetFile") { withTempPath { dir => sql("SELECT * FROM src").write.parquet(dir.getCanonicalPath) read.parquet(dir.getCanonicalPath).registerTempTable("p") withTempTable("p") { checkAnswer( sql("SELECT * FROM src ORDER BY key"), sql("SELECT * from p ORDER BY key").collect().toSeq) } } } test(s"$prefix: INSERT OVERWRITE TABLE Parquet table") { withParquetTable((1 to 10).map(i => (i, s"val_$i")), "t") { withTempPath { file => sql("SELECT * FROM t LIMIT 1").write.parquet(file.getCanonicalPath) read.parquet(file.getCanonicalPath).registerTempTable("p") withTempTable("p") { // let's do three overwrites for good measure sql("INSERT OVERWRITE TABLE p SELECT * FROM t") sql("INSERT OVERWRITE TABLE p SELECT * FROM t") sql("INSERT OVERWRITE TABLE p SELECT * FROM t") checkAnswer(sql("SELECT * FROM p"), sql("SELECT * FROM t").collect().toSeq) } } } } } withSQLConf(SQLConf.PARQUET_USE_DATA_SOURCE_API -> "true") { run("Parquet data source enabled") } withSQLConf(SQLConf.PARQUET_USE_DATA_SOURCE_API -> "false") { run("Parquet data source disabled") } }