org.apache.hadoop.hive.metastore.api.FieldSchema Scala Examples
The following examples show how to use org.apache.hadoop.hive.metastore.api.FieldSchema.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: DescribeHiveTableCommand.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import scala.collection.JavaConversions._ import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.spark.annotation.DeveloperApi import org.apache.spark.sql.catalyst.expressions.{Attribute, Row} import org.apache.spark.sql.execution.{SparkPlan, RunnableCommand} import org.apache.spark.sql.hive.{HiveContext, MetastoreRelation} import org.apache.spark.sql.hive.HiveShim import org.apache.spark.sql.SQLContext private[hive] case class DescribeHiveTableCommand( table: MetastoreRelation, override val output: Seq[Attribute], isExtended: Boolean) extends RunnableCommand { override def run(sqlContext: SQLContext): Seq[Row] = { // Trying to mimic the format of Hive's output. But not exactly the same. var results: Seq[(String, String, String)] = Nil val columns: Seq[FieldSchema] = table.hiveQlTable.getCols val partitionColumns: Seq[FieldSchema] = table.hiveQlTable.getPartCols results ++= columns.map(field => (field.getName, field.getType, field.getComment)) if (partitionColumns.nonEmpty) { val partColumnInfo = partitionColumns.map(field => (field.getName, field.getType, field.getComment)) results ++= partColumnInfo ++ Seq(("# Partition Information", "", "")) ++ Seq((s"# ${output.get(0).name}", output.get(1).name, output.get(2).name)) ++ partColumnInfo } if (isExtended) { results ++= Seq(("Detailed Table Information", table.hiveQlTable.getTTable.toString, "")) } results.map { case (name, dataType, comment) => Row(name, dataType, comment) } } }
Example 2
Source File: Column.scala From flamy with Apache License 2.0 | 5 votes |
package com.flaminem.flamy.model import com.flaminem.flamy.model.columns.{ColumnValue, ConstantValue, NoValue} import com.flaminem.flamy.parsing.model.ColumnDependency import org.apache.hadoop.hive.metastore.api.FieldSchema val valueString: String = value match { case NoValue => "" case cd: ColumnDependency if cd.columnName == columnName => "" case v => s"=$value" } rawColumnName + valueString } else { val name: String = "name=" + this.rawColumnName val valueString: String = if(value.isEmpty) "" else s", value=$value" val typeString: String = if (columnType.isEmpty) "" else s", type=${columnType.get}" val commentString: String = if (comment.isEmpty) "" else s", comment=${comment.get}" "Column(" + name + valueString + typeString + commentString + ")" } } }
Example 3
Source File: SparkSQLDriver.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.{Arrays, ArrayList => JArrayList, List => JList} import org.apache.log4j.LogManager import org.apache.spark.sql.AnalysisException import scala.collection.JavaConverters._ import org.apache.commons.lang3.exception.ExceptionUtils import org.apache.hadoop.hive.metastore.api.{FieldSchema, Schema} import org.apache.hadoop.hive.ql.Driver import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse import org.apache.spark.Logging import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes} private[hive] class SparkSQLDriver( val context: HiveContext = SparkSQLEnv.hiveContext) extends Driver with Logging { private[hive] var tableSchema: Schema = _ private[hive] var hiveResponse: Seq[String] = _ override def init(): Unit = { } private def getResultSetSchema(query: context.QueryExecution): Schema = { val analyzed = query.analyzed logDebug(s"Result Schema: ${analyzed.output}") if (analyzed.output.isEmpty) { new Schema(Arrays.asList(new FieldSchema("Response code", "string", "")), null) } else { val fieldSchemas = analyzed.output.map { attr => new FieldSchema(attr.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), "") } new Schema(fieldSchemas.asJava, null) } } override def run(command: String): CommandProcessorResponse = { // TODO unify the error code try { context.sparkContext.setJobDescription(command) val execution = context.executePlan(context.sql(command).logicalPlan) hiveResponse = execution.stringResult() tableSchema = getResultSetSchema(execution) new CommandProcessorResponse(0) } catch { case ae: AnalysisException => logDebug(s"Failed in [$command]", ae) new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(ae), null, ae) case cause: Throwable => logError(s"Failed in [$command]", cause) new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(cause), null, cause) } } override def close(): Int = { hiveResponse = null tableSchema = null 0 } override def getResults(res: JList[_]): Boolean = { if (hiveResponse == null) { false } else { res.asInstanceOf[JArrayList[String]].addAll(hiveResponse.asJava) hiveResponse = null true } } override def getSchema: Schema = tableSchema override def destroy() { super.destroy() hiveResponse = null tableSchema = null } }
Example 4
Source File: FiltersSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import java.util.Collections import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.serde.serdeConstants import org.apache.spark.{Logging, SparkFunSuite} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ class FiltersSuite extends SparkFunSuite with Logging { private val shim = new Shim_v0_13 private val testTable = new org.apache.hadoop.hive.ql.metadata.Table("default", "test") private val varCharCol = new FieldSchema() varCharCol.setName("varchar") varCharCol.setType(serdeConstants.VARCHAR_TYPE_NAME) testTable.setPartCols(Collections.singletonList(varCharCol)) filterTest("string filter", (a("stringcol", StringType) > Literal("test")) :: Nil, "stringcol > \"test\"") filterTest("string filter backwards", (Literal("test") > a("stringcol", StringType)) :: Nil, "\"test\" > stringcol") filterTest("int filter", (a("intcol", IntegerType) === Literal(1)) :: Nil, "intcol = 1") filterTest("int filter backwards", (Literal(1) === a("intcol", IntegerType)) :: Nil, "1 = intcol") filterTest("int and string filter", (Literal(1) === a("intcol", IntegerType)) :: (Literal("a") === a("strcol", IntegerType)) :: Nil, "1 = intcol and \"a\" = strcol") filterTest("skip varchar", (Literal("") === a("varchar", StringType)) :: Nil, "") private def filterTest(name: String, filters: Seq[Expression], result: String) = { test(name){ val converted = shim.convertFilters(testTable, filters) if (converted != result) { fail( s"Expected filters ${filters.mkString(",")} to convert to '$result' but got '$converted'") } } } private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)() }
Example 5
Source File: DescribeHiveTableCommand.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import scala.collection.JavaConverters._ import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.execution.RunnableCommand import org.apache.spark.sql.hive.MetastoreRelation import org.apache.spark.sql.{Row, SQLContext} private[hive] case class DescribeHiveTableCommand( table: MetastoreRelation, override val output: Seq[Attribute], isExtended: Boolean) extends RunnableCommand { override def run(sqlContext: SQLContext): Seq[Row] = { // Trying to mimic the format of Hive's output. But not exactly the same. var results: Seq[(String, String, String)] = Nil val columns: Seq[FieldSchema] = table.hiveQlTable.getCols.asScala val partitionColumns: Seq[FieldSchema] = table.hiveQlTable.getPartCols.asScala results ++= columns.map(field => (field.getName, field.getType, field.getComment)) if (partitionColumns.nonEmpty) { val partColumnInfo = partitionColumns.map(field => (field.getName, field.getType, field.getComment)) results ++= partColumnInfo ++ Seq(("# Partition Information", "", "")) ++ Seq((s"# ${output(0).name}", output(1).name, output(2).name)) ++ partColumnInfo } if (isExtended) { results ++= Seq(("Detailed Table Information", table.hiveQlTable.getTTable.toString, "")) } results.map { case (name, dataType, comment) => Row(name, dataType, comment) } } }
Example 6
Source File: SparkSQLDriver.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.{ArrayList => JArrayList, Arrays, List => JList} import scala.collection.JavaConverters._ import org.apache.commons.lang3.exception.ExceptionUtils import org.apache.hadoop.hive.metastore.api.{FieldSchema, Schema} import org.apache.hadoop.hive.ql.Driver import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse import org.apache.spark.internal.Logging import org.apache.spark.sql.{AnalysisException, SQLContext} import org.apache.spark.sql.execution.{QueryExecution, SQLExecution} private[hive] class SparkSQLDriver(val context: SQLContext = SparkSQLEnv.sqlContext) extends Driver with Logging { private[hive] var tableSchema: Schema = _ private[hive] var hiveResponse: Seq[String] = _ override def init(): Unit = { } private def getResultSetSchema(query: QueryExecution): Schema = { val analyzed = query.analyzed logDebug(s"Result Schema: ${analyzed.output}") if (analyzed.output.isEmpty) { new Schema(Arrays.asList(new FieldSchema("Response code", "string", "")), null) } else { val fieldSchemas = analyzed.output.map { attr => new FieldSchema(attr.name, attr.dataType.catalogString, "") } new Schema(fieldSchemas.asJava, null) } } override def run(command: String): CommandProcessorResponse = { // TODO unify the error code try { context.sparkContext.setJobDescription(command) val execution = context.sessionState.executePlan(context.sql(command).logicalPlan) hiveResponse = SQLExecution.withNewExecutionId(context.sparkSession, execution) { execution.hiveResultString() } tableSchema = getResultSetSchema(execution) new CommandProcessorResponse(0) } catch { case ae: AnalysisException => logDebug(s"Failed in [$command]", ae) new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(ae), null, ae) case cause: Throwable => logError(s"Failed in [$command]", cause) new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(cause), null, cause) } } override def close(): Int = { hiveResponse = null tableSchema = null 0 } override def getResults(res: JList[_]): Boolean = { if (hiveResponse == null) { false } else { res.asInstanceOf[JArrayList[String]].addAll(hiveResponse.asJava) hiveResponse = null true } } override def getSchema: Schema = tableSchema override def destroy() { super.destroy() hiveResponse = null tableSchema = null } }
Example 7
Source File: FiltersSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import java.util.Collections import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.serde.serdeConstants import org.apache.spark.SparkFunSuite import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ class FiltersSuite extends SparkFunSuite with Logging with PlanTest { private val shim = new Shim_v0_13 private val testTable = new org.apache.hadoop.hive.ql.metadata.Table("default", "test") private val varCharCol = new FieldSchema() varCharCol.setName("varchar") varCharCol.setType(serdeConstants.VARCHAR_TYPE_NAME) testTable.setPartCols(Collections.singletonList(varCharCol)) filterTest("string filter", (a("stringcol", StringType) > Literal("test")) :: Nil, "stringcol > \"test\"") filterTest("string filter backwards", (Literal("test") > a("stringcol", StringType)) :: Nil, "\"test\" > stringcol") filterTest("int filter", (a("intcol", IntegerType) === Literal(1)) :: Nil, "intcol = 1") filterTest("int filter backwards", (Literal(1) === a("intcol", IntegerType)) :: Nil, "1 = intcol") filterTest("int and string filter", (Literal(1) === a("intcol", IntegerType)) :: (Literal("a") === a("strcol", IntegerType)) :: Nil, "1 = intcol and \"a\" = strcol") filterTest("skip varchar", (Literal("") === a("varchar", StringType)) :: Nil, "") filterTest("SPARK-19912 String literals should be escaped for Hive metastore partition pruning", (a("stringcol", StringType) === Literal("p1\" and q=\"q1")) :: (Literal("p2\" and q=\"q2") === a("stringcol", StringType)) :: Nil, """stringcol = 'p1" and q="q1' and 'p2" and q="q2' = stringcol""") private def filterTest(name: String, filters: Seq[Expression], result: String) = { test(name) { withSQLConf(SQLConf.ADVANCED_PARTITION_PREDICATE_PUSHDOWN.key -> "true") { val converted = shim.convertFilters(testTable, filters) if (converted != result) { fail(s"Expected ${filters.mkString(",")} to convert to '$result' but got '$converted'") } } } } test("turn on/off ADVANCED_PARTITION_PREDICATE_PUSHDOWN") { import org.apache.spark.sql.catalyst.dsl.expressions._ Seq(true, false).foreach { enabled => withSQLConf(SQLConf.ADVANCED_PARTITION_PREDICATE_PUSHDOWN.key -> enabled.toString) { val filters = (Literal(1) === a("intcol", IntegerType) || Literal(2) === a("intcol", IntegerType)) :: Nil val converted = shim.convertFilters(testTable, filters) if (enabled) { assert(converted == "(1 = intcol or 2 = intcol)") } else { assert(converted.isEmpty) } } } } private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)() }
Example 8
Source File: SparkSQLDriver.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.{ArrayList => JArrayList, List => JList} import scala.collection.JavaConversions._ import org.apache.commons.lang3.exception.ExceptionUtils import org.apache.hadoop.hive.metastore.api.{FieldSchema, Schema} import org.apache.hadoop.hive.ql.Driver import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse import org.apache.spark.Logging import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes} private[hive] class SparkSQLDriver( val context: HiveContext = SparkSQLEnv.hiveContext) extends Driver with Logging { private[hive] var tableSchema: Schema = _ private[hive] var hiveResponse: Seq[String] = _ override def init(): Unit = { } private def getResultSetSchema(query: context.QueryExecution): Schema = { val analyzed = query.analyzed logDebug(s"Result Schema: ${analyzed.output}") if (analyzed.output.size == 0) { new Schema(new FieldSchema("Response code", "string", "") :: Nil, null) } else { val fieldSchemas = analyzed.output.map { attr => new FieldSchema(attr.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), "") } new Schema(fieldSchemas, null) } } override def run(command: String): CommandProcessorResponse = { // TODO unify the error code try { context.sparkContext.setJobDescription(command) val execution = context.executePlan(context.sql(command).logicalPlan) hiveResponse = execution.stringResult() tableSchema = getResultSetSchema(execution) new CommandProcessorResponse(0) } catch { case ae: AnalysisException => logDebug(s"Failed in [$command]", ae) new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(ae), null, ae) case cause: Throwable => logError(s"Failed in [$command]", cause) new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(cause), null, cause) } } override def close(): Int = { hiveResponse = null tableSchema = null 0 } override def getResults(res: JList[_]): Boolean = { if (hiveResponse == null) { false } else { res.asInstanceOf[JArrayList[String]].addAll(hiveResponse) hiveResponse = null true } } override def getSchema: Schema = tableSchema override def destroy() { super.destroy() hiveResponse = null tableSchema = null } }
Example 9
Source File: FiltersSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import scala.collection.JavaConversions._ import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.serde.serdeConstants import org.apache.spark.{Logging, SparkFunSuite} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ class FiltersSuite extends SparkFunSuite with Logging { private val shim = new Shim_v0_13 private val testTable = new org.apache.hadoop.hive.ql.metadata.Table("default", "test") private val varCharCol = new FieldSchema() varCharCol.setName("varchar") varCharCol.setType(serdeConstants.VARCHAR_TYPE_NAME) testTable.setPartCols(varCharCol :: Nil) //字符串过滤器 filterTest("string filter", (a("stringcol", StringType) > Literal("test")) :: Nil, "stringcol > \"test\"") //字符串过滤器向后 filterTest("string filter backwards", (Literal("test") > a("stringcol", StringType)) :: Nil, "\"test\" > stringcol") //int过滤器 filterTest("int filter", (a("intcol", IntegerType) === Literal(1)) :: Nil, "intcol = 1") //int向后过滤 filterTest("int filter backwards", (Literal(1) === a("intcol", IntegerType)) :: Nil, "1 = intcol") filterTest("int and string filter", (Literal(1) === a("intcol", IntegerType)) :: (Literal("a") === a("strcol", IntegerType)) :: Nil, "1 = intcol and \"a\" = strcol") filterTest("skip varchar", (Literal("") === a("varchar", StringType)) :: Nil, "") private def filterTest(name: String, filters: Seq[Expression], result: String) = { test(name){ val converted = shim.convertFilters(testTable, filters) if (converted != result) { fail( s"Expected filters ${filters.mkString(",")} to convert to '$result' but got '$converted'") } } } private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)() }
Example 10
Source File: DescribeHiveTableCommand.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import scala.collection.JavaConversions._ import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.execution.RunnableCommand import org.apache.spark.sql.hive.MetastoreRelation import org.apache.spark.sql.{Row, SQLContext} private[hive] case class DescribeHiveTableCommand( table: MetastoreRelation, override val output: Seq[Attribute], isExtended: Boolean) extends RunnableCommand { override def run(sqlContext: SQLContext): Seq[Row] = { // Trying to mimic the format of Hive's output. But not exactly the same. //试图模仿Hive输出的格式,但不完全一样。 var results: Seq[(String, String, String)] = Nil val columns: Seq[FieldSchema] = table.hiveQlTable.getCols val partitionColumns: Seq[FieldSchema] = table.hiveQlTable.getPartCols results ++= columns.map(field => (field.getName, field.getType, field.getComment)) if (partitionColumns.nonEmpty) { val partColumnInfo = partitionColumns.map(field => (field.getName, field.getType, field.getComment)) results ++= partColumnInfo ++ Seq(("# Partition Information", "", "")) ++ Seq((s"# ${output.get(0).name}", output.get(1).name, output.get(2).name)) ++ partColumnInfo } if (isExtended) { results ++= Seq(("Detailed Table Information", table.hiveQlTable.getTTable.toString, "")) } results.map { case (name, dataType, comment) => Row(name, dataType, comment) } } }
Example 11
Source File: AbstractSparkSQLDriver.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import scala.collection.JavaConversions._ import org.apache.commons.lang3.exception.ExceptionUtils import org.apache.hadoop.hive.metastore.api.{FieldSchema, Schema} import org.apache.hadoop.hive.ql.Driver import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse import org.apache.spark.Logging import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes} private[hive] abstract class AbstractSparkSQLDriver( val context: HiveContext = SparkSQLEnv.hiveContext) extends Driver with Logging { private[hive] var tableSchema: Schema = _ private[hive] var hiveResponse: Seq[String] = _ override def init(): Unit = { } private def getResultSetSchema(query: context.QueryExecution): Schema = { val analyzed = query.analyzed logDebug(s"Result Schema: ${analyzed.output}") if (analyzed.output.size == 0) { new Schema(new FieldSchema("Response code", "string", "") :: Nil, null) } else { val fieldSchemas = analyzed.output.map { attr => new FieldSchema(attr.name, HiveMetastoreTypes.toMetastoreType(attr.dataType), "") } new Schema(fieldSchemas, null) } } override def run(command: String): CommandProcessorResponse = { // TODO unify the error code try { context.sparkContext.setJobDescription(command) val execution = context.executePlan(context.sql(command).logicalPlan) hiveResponse = execution.stringResult() tableSchema = getResultSetSchema(execution) new CommandProcessorResponse(0) } } def runWrapper(command: String): CommandProcessorResponseWrapper = try { val result = run(command) new CommandProcessorResponseWrapper(result, null) } catch { case ae: AnalysisException => logDebug(s"Failed in [$command]", ae) new CommandProcessorResponseWrapper(new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(ae), null), ae) case cause: Throwable => logError(s"Failed in [$command]", cause) new CommandProcessorResponseWrapper(new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(cause), null), cause) } override def close(): Int = { hiveResponse = null tableSchema = null 0 } override def getSchema: Schema = tableSchema override def destroy() { super.destroy() hiveResponse = null tableSchema = null } } private[hive] case class CommandProcessorResponseWrapper( rc : CommandProcessorResponse, cause : Throwable)
Example 12
Source File: FiltersSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import java.util.Collections import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.serde.serdeConstants import org.apache.spark.SparkFunSuite import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ class FiltersSuite extends SparkFunSuite with Logging { private val shim = new Shim_v0_13 private val testTable = new org.apache.hadoop.hive.ql.metadata.Table("default", "test") private val varCharCol = new FieldSchema() varCharCol.setName("varchar") varCharCol.setType(serdeConstants.VARCHAR_TYPE_NAME) testTable.setPartCols(Collections.singletonList(varCharCol)) filterTest("string filter", (a("stringcol", StringType) > Literal("test")) :: Nil, "stringcol > \"test\"") filterTest("string filter backwards", (Literal("test") > a("stringcol", StringType)) :: Nil, "\"test\" > stringcol") filterTest("int filter", (a("intcol", IntegerType) === Literal(1)) :: Nil, "intcol = 1") filterTest("int filter backwards", (Literal(1) === a("intcol", IntegerType)) :: Nil, "1 = intcol") filterTest("int and string filter", (Literal(1) === a("intcol", IntegerType)) :: (Literal("a") === a("strcol", IntegerType)) :: Nil, "1 = intcol and \"a\" = strcol") filterTest("skip varchar", (Literal("") === a("varchar", StringType)) :: Nil, "") private def filterTest(name: String, filters: Seq[Expression], result: String) = { test(name) { val converted = shim.convertFilters(testTable, filters) if (converted != result) { fail( s"Expected filters ${filters.mkString(",")} to convert to '$result' but got '$converted'") } } } private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)() }
Example 13
Source File: SparkSQLDriver.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.{Arrays, ArrayList => JArrayList, List => JList} import scala.collection.JavaConverters._ import org.apache.commons.lang3.exception.ExceptionUtils import org.apache.hadoop.hive.metastore.api.{FieldSchema, Schema} import org.apache.hadoop.hive.ql.Driver import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse import org.apache.spark.internal.Logging import org.apache.spark.sql.{AnalysisException, SQLContext, SparkSession} import org.apache.spark.sql.execution.QueryExecution private[hive] class SparkSQLDriver(val sparkSession: SparkSession = SparkSQLEnv.sparkSession) extends Driver with Logging { private[hive] var tableSchema: Schema = _ private[hive] var hiveResponse: Seq[String] = _ override def init(): Unit = { } private def getResultSetSchema(query: QueryExecution): Schema = { val analyzed = query.analyzed logDebug(s"Result Schema: ${analyzed.output}") if (analyzed.output.isEmpty) { new Schema(Arrays.asList(new FieldSchema("Response code", "string", "")), null) } else { val fieldSchemas = analyzed.output.map { attr => new FieldSchema(attr.name, attr.dataType.catalogString, "") } new Schema(fieldSchemas.asJava, null) } } override def run(command: String): CommandProcessorResponse = { // TODO unify the error code try { sparkSession.sparkContext.setJobDescription(command) val execution = sparkSession.sessionState.executePlan(sparkSession.sql(command).logicalPlan) hiveResponse = execution.hiveResultString() tableSchema = getResultSetSchema(execution) new CommandProcessorResponse(0) } catch { case ae: AnalysisException => logDebug(s"Failed in [$command]", ae) new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(ae), null, ae) case cause: Throwable => logError(s"Failed in [$command]", cause) new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(cause), null, cause) } } override def close(): Int = { hiveResponse = null tableSchema = null 0 } override def getResults(res: JList[_]): Boolean = { if (hiveResponse == null) { false } else { res.asInstanceOf[JArrayList[String]].addAll(hiveResponse.asJava) hiveResponse = null true } } override def getSchema: Schema = tableSchema override def destroy() { super.destroy() hiveResponse = null tableSchema = null } }
Example 14
Source File: FiltersSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import java.util.Collections import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.serde.serdeConstants import org.apache.spark.SparkFunSuite import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ class FiltersSuite extends SparkFunSuite with Logging { private val shim = new Shim_v0_13 private val testTable = new org.apache.hadoop.hive.ql.metadata.Table("default", "test") private val varCharCol = new FieldSchema() varCharCol.setName("varchar") varCharCol.setType(serdeConstants.VARCHAR_TYPE_NAME) testTable.setPartCols(Collections.singletonList(varCharCol)) filterTest("string filter", (a("stringcol", StringType) > Literal("test")) :: Nil, "stringcol > \"test\"") filterTest("string filter backwards", (Literal("test") > a("stringcol", StringType)) :: Nil, "\"test\" > stringcol") filterTest("int filter", (a("intcol", IntegerType) === Literal(1)) :: Nil, "intcol = 1") filterTest("int filter backwards", (Literal(1) === a("intcol", IntegerType)) :: Nil, "1 = intcol") filterTest("int and string filter", (Literal(1) === a("intcol", IntegerType)) :: (Literal("a") === a("strcol", IntegerType)) :: Nil, "1 = intcol and \"a\" = strcol") filterTest("skip varchar", (Literal("") === a("varchar", StringType)) :: Nil, "") private def filterTest(name: String, filters: Seq[Expression], result: String) = { test(name) { val converted = shim.convertFilters(testTable, filters) if (converted != result) { fail( s"Expected filters ${filters.mkString(",")} to convert to '$result' but got '$converted'") } } } private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)() }
Example 15
Source File: CarbonHiveMetastoreListener.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.hive import scala.collection.JavaConverters._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.metastore.MetaStorePreEventListener import org.apache.hadoop.hive.metastore.api.{FieldSchema, MetaException} import org.apache.hadoop.hive.metastore.events._ import org.apache.hadoop.hive.metastore.events.PreEventContext.PreEventType._ import org.apache.spark.sql.types.{DataType, StructField, StructType} class CarbonHiveMetastoreListener(conf: Configuration) extends MetaStorePreEventListener(conf) { override def onEvent(preEventContext: PreEventContext): Unit = { preEventContext.getEventType match { case CREATE_TABLE => val table = preEventContext.asInstanceOf[PreCreateTableEvent].getTable val tableProps = table.getParameters if (tableProps != null && (tableProps.get("spark.sql.sources.provider") == "org.apache.spark.sql.CarbonSource" || tableProps.get("spark.sql.sources.provider").equalsIgnoreCase("carbondata"))) { val numSchemaParts = tableProps.get("spark.sql.sources.schema.numParts") if (numSchemaParts != null && !numSchemaParts.isEmpty) { val parts = (0 until numSchemaParts.toInt).map { index => val part = tableProps.get(s"spark.sql.sources.schema.part.${index}") if (part == null) { throw new MetaException(s"spark.sql.sources.schema.part.${index} is missing!") } part } // Stick all parts back to a single schema string. val schema = DataType.fromJson(parts.mkString).asInstanceOf[StructType] val hiveSchema = schema.map(toHiveColumn).asJava table.getSd.setCols(hiveSchema) table.getSd.setInputFormat("org.apache.carbondata.hive.MapredCarbonInputFormat") table.getSd.setOutputFormat("org.apache.carbondata.hive.MapredCarbonOutputFormat") val serdeInfo = table.getSd.getSerdeInfo serdeInfo.setSerializationLib("org.apache.carbondata.hive.CarbonHiveSerDe") val tablePath = serdeInfo.getParameters.get("tablePath") if (tablePath != null) { table.getSd.setLocation(tablePath) } } } case ALTER_TABLE => val table = preEventContext.asInstanceOf[PreAlterTableEvent].getNewTable val tableProps = table.getParameters if (tableProps != null && (tableProps.get("spark.sql.sources.provider") == "org.apache.spark.sql.CarbonSource" || tableProps.get("spark.sql.sources.provider").equalsIgnoreCase("carbondata"))) { val numSchemaParts = tableProps.get("spark.sql.sources.schema.numParts") if (numSchemaParts != null && !numSchemaParts.isEmpty) { val schemaParts = (0 until numSchemaParts.toInt).map { index => val schemaPart = tableProps.get(s"spark.sql.sources.schema.part.$index") if (schemaPart == null) { throw new MetaException(s"spark.sql.sources.schema.part.$index is missing!") } schemaPart } // Stick all schemaParts back to a single schema string. val schema = DataType.fromJson(schemaParts.mkString).asInstanceOf[StructType] val hiveSchema = schema.map(toHiveColumn).asJava table.getSd.setCols(hiveSchema) } } case _ => // do nothing } } private def toHiveColumn(c: StructField): FieldSchema = { val typeString = if (c.metadata.contains("HIVE_TYPE_STRING")) { c.metadata.getString("HIVE_TYPE_STRING") } else { c.dataType.catalogString } new FieldSchema(c.name, typeString, c.getComment().orNull) } }
Example 16
Source File: SparkSQLDriver.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.{ArrayList => JArrayList, Arrays, List => JList} import scala.collection.JavaConverters._ import org.apache.commons.lang3.exception.ExceptionUtils import org.apache.hadoop.hive.metastore.api.{FieldSchema, Schema} import org.apache.hadoop.hive.ql.Driver import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse import org.apache.spark.internal.Logging import org.apache.spark.sql.{AnalysisException, SQLContext} import org.apache.spark.sql.execution.QueryExecution private[hive] class SparkSQLDriver(val context: SQLContext = SparkSQLEnv.sqlContext) extends Driver with Logging { private[hive] var tableSchema: Schema = _ private[hive] var hiveResponse: Seq[String] = _ override def init(): Unit = { } private def getResultSetSchema(query: QueryExecution): Schema = { val analyzed = query.analyzed logDebug(s"Result Schema: ${analyzed.output}") if (analyzed.output.isEmpty) { new Schema(Arrays.asList(new FieldSchema("Response code", "string", "")), null) } else { val fieldSchemas = analyzed.output.map { attr => new FieldSchema(attr.name, attr.dataType.catalogString, "") } new Schema(fieldSchemas.asJava, null) } } override def run(command: String): CommandProcessorResponse = { // TODO unify the error code try { context.sparkContext.setJobDescription(command) val execution = context.sessionState.executePlan(context.sql(command).logicalPlan) hiveResponse = execution.hiveResultString() tableSchema = getResultSetSchema(execution) new CommandProcessorResponse(0) } catch { case ae: AnalysisException => logDebug(s"Failed in [$command]", ae) new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(ae), null, ae) case cause: Throwable => logError(s"Failed in [$command]", cause) new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(cause), null, cause) } } override def close(): Int = { hiveResponse = null tableSchema = null 0 } override def getResults(res: JList[_]): Boolean = { if (hiveResponse == null) { false } else { res.asInstanceOf[JArrayList[String]].addAll(hiveResponse.asJava) hiveResponse = null true } } override def getSchema: Schema = tableSchema override def destroy() { super.destroy() hiveResponse = null tableSchema = null } }
Example 17
Source File: FiltersSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import java.util.Collections import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.serde.serdeConstants import org.apache.spark.SparkFunSuite import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ class FiltersSuite extends SparkFunSuite with Logging { private val shim = new Shim_v0_13 private val testTable = new org.apache.hadoop.hive.ql.metadata.Table("default", "test") private val varCharCol = new FieldSchema() varCharCol.setName("varchar") varCharCol.setType(serdeConstants.VARCHAR_TYPE_NAME) testTable.setPartCols(Collections.singletonList(varCharCol)) filterTest("string filter", (a("stringcol", StringType) > Literal("test")) :: Nil, "stringcol > \"test\"") filterTest("string filter backwards", (Literal("test") > a("stringcol", StringType)) :: Nil, "\"test\" > stringcol") filterTest("int filter", (a("intcol", IntegerType) === Literal(1)) :: Nil, "intcol = 1") filterTest("int filter backwards", (Literal(1) === a("intcol", IntegerType)) :: Nil, "1 = intcol") filterTest("int and string filter", (Literal(1) === a("intcol", IntegerType)) :: (Literal("a") === a("strcol", IntegerType)) :: Nil, "1 = intcol and \"a\" = strcol") filterTest("skip varchar", (Literal("") === a("varchar", StringType)) :: Nil, "") private def filterTest(name: String, filters: Seq[Expression], result: String) = { test(name) { val converted = shim.convertFilters(testTable, filters) if (converted != result) { fail( s"Expected filters ${filters.mkString(",")} to convert to '$result' but got '$converted'") } } } private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)() }
Example 18
Source File: SparkSQLDriver.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.{ArrayList => JArrayList, Arrays, List => JList} import scala.collection.JavaConverters._ import org.apache.commons.lang3.exception.ExceptionUtils import org.apache.hadoop.hive.metastore.api.{FieldSchema, Schema} import org.apache.hadoop.hive.ql.Driver import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse import org.apache.spark.internal.Logging import org.apache.spark.sql.{AnalysisException, SQLContext} import org.apache.spark.sql.execution.{QueryExecution, SQLExecution} private[hive] class SparkSQLDriver(val context: SQLContext = SparkSQLEnv.sqlContext) extends Driver with Logging { private[hive] var tableSchema: Schema = _ private[hive] var hiveResponse: Seq[String] = _ override def init(): Unit = { } private def getResultSetSchema(query: QueryExecution): Schema = { val analyzed = query.analyzed logDebug(s"Result Schema: ${analyzed.output}") if (analyzed.output.isEmpty) { new Schema(Arrays.asList(new FieldSchema("Response code", "string", "")), null) } else { val fieldSchemas = analyzed.output.map { attr => new FieldSchema(attr.name, attr.dataType.catalogString, "") } new Schema(fieldSchemas.asJava, null) } } override def run(command: String): CommandProcessorResponse = { // TODO unify the error code try { context.sparkContext.setJobDescription(command) val execution = context.sessionState.executePlan(context.sql(command).logicalPlan) hiveResponse = SQLExecution.withNewExecutionId(context.sparkSession, execution) { execution.hiveResultString() } tableSchema = getResultSetSchema(execution) new CommandProcessorResponse(0) } catch { case ae: AnalysisException => logDebug(s"Failed in [$command]", ae) new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(ae), null, ae) case cause: Throwable => logError(s"Failed in [$command]", cause) new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(cause), null, cause) } } override def close(): Int = { hiveResponse = null tableSchema = null 0 } override def getResults(res: JList[_]): Boolean = { if (hiveResponse == null) { false } else { res.asInstanceOf[JArrayList[String]].addAll(hiveResponse.asJava) hiveResponse = null true } } override def getSchema: Schema = tableSchema override def destroy() { super.destroy() hiveResponse = null tableSchema = null } }
Example 19
Source File: FiltersSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import java.util.Collections import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hadoop.hive.serde.serdeConstants import org.apache.spark.SparkFunSuite import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ class FiltersSuite extends SparkFunSuite with Logging with PlanTest { private val shim = new Shim_v0_13 private val testTable = new org.apache.hadoop.hive.ql.metadata.Table("default", "test") private val varCharCol = new FieldSchema() varCharCol.setName("varchar") varCharCol.setType(serdeConstants.VARCHAR_TYPE_NAME) testTable.setPartCols(Collections.singletonList(varCharCol)) filterTest("string filter", (a("stringcol", StringType) > Literal("test")) :: Nil, "stringcol > \"test\"") filterTest("string filter backwards", (Literal("test") > a("stringcol", StringType)) :: Nil, "\"test\" > stringcol") filterTest("int filter", (a("intcol", IntegerType) === Literal(1)) :: Nil, "intcol = 1") filterTest("int filter backwards", (Literal(1) === a("intcol", IntegerType)) :: Nil, "1 = intcol") filterTest("int and string filter", (Literal(1) === a("intcol", IntegerType)) :: (Literal("a") === a("strcol", IntegerType)) :: Nil, "1 = intcol and \"a\" = strcol") filterTest("skip varchar", (Literal("") === a("varchar", StringType)) :: Nil, "") filterTest("SPARK-19912 String literals should be escaped for Hive metastore partition pruning", (a("stringcol", StringType) === Literal("p1\" and q=\"q1")) :: (Literal("p2\" and q=\"q2") === a("stringcol", StringType)) :: Nil, """stringcol = 'p1" and q="q1' and 'p2" and q="q2' = stringcol""") filterTest("SPARK-24879 null literals should be ignored for IN constructs", (a("intcol", IntegerType) in (Literal(1), Literal(null))) :: Nil, "(intcol = 1)") // Applying the predicate `x IN (NULL)` should return an empty set, but since this optimization // will be applied by Catalyst, this filter converter does not need to account for this. filterTest("SPARK-24879 IN predicates with only NULLs will not cause a NPE", (a("intcol", IntegerType) in Literal(null)) :: Nil, "") filterTest("typecast null literals should not be pushed down in simple predicates", (a("intcol", IntegerType) === Literal(null, IntegerType)) :: Nil, "") private def filterTest(name: String, filters: Seq[Expression], result: String) = { test(name) { withSQLConf(SQLConf.ADVANCED_PARTITION_PREDICATE_PUSHDOWN.key -> "true") { val converted = shim.convertFilters(testTable, filters) if (converted != result) { fail(s"Expected ${filters.mkString(",")} to convert to '$result' but got '$converted'") } } } } test("turn on/off ADVANCED_PARTITION_PREDICATE_PUSHDOWN") { import org.apache.spark.sql.catalyst.dsl.expressions._ Seq(true, false).foreach { enabled => withSQLConf(SQLConf.ADVANCED_PARTITION_PREDICATE_PUSHDOWN.key -> enabled.toString) { val filters = (Literal(1) === a("intcol", IntegerType) || Literal(2) === a("intcol", IntegerType)) :: Nil val converted = shim.convertFilters(testTable, filters) if (enabled) { assert(converted == "(1 = intcol or 2 = intcol)") } else { assert(converted.isEmpty) } } } } private def a(name: String, dataType: DataType) = AttributeReference(name, dataType)() }
Example 20
Source File: HiveSchemaFns.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hive import com.sksamuel.exts.Logging import io.eels.schema._ import org.apache.hadoop.hive.metastore.api.FieldSchema // createReader FrameSchema from hive FieldSchemas // see https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types object HiveSchemaFns extends Logging { private val CharRegex = "char\\((.*?)\\)".r private val VarcharRegex = "varchar\\((.*?)\\)".r private val DecimalRegex = "decimal\\((\\d+),(\\d+)\\)".r private val StructRegex = "struct<(.*?)>".r // everything up to the type seperator, then letters (which is the datatype), with an optional type params private val StructElementRegex = "(.*?)\\:([a-z]+)(\\(.*?\\))?,?".r private val ArrayRegex = "array<(.*?)>".r def fromHiveField(fieldSchema: FieldSchema): Field = fromHive(fieldSchema.getName, fieldSchema.getType, fieldSchema.getComment) def fromHive(name: String, typeInfo: String, comment: String): Field = { val dataType = fromHiveType(typeInfo) Field(name, dataType, true).withComment(comment) } // converts a hive type string into an eel DataType. def fromHiveType(descriptor: String): DataType = descriptor match { case ArrayRegex(element) => val elementType = fromHiveType(element) ArrayType(elementType) case "bigint" => LongType.Signed case "binary" => BinaryType case "boolean" => BooleanType case CharRegex(size) => CharType(size.toInt) case DecimalRegex(precision, scale) => DecimalType(Precision(precision.toInt), Scale(scale.toInt)) case "date" => DateType case "double" => DoubleType case "float" => FloatType case "int" => IntType.Signed case "smallint" => ShortType.Signed case "string" => StringType case "timestamp" => TimestampMillisType case "tinyint" => ByteType.Signed case StructRegex(struct) => val fields = StructElementRegex.findAllMatchIn(struct).map { pattern => val name = pattern.group(1) val datatypeString = pattern.group(2) + Option(pattern.group(3)).getOrElse("") val dataType = fromHiveType(datatypeString) Field(name, dataType, true) } StructType(fields.toVector) case VarcharRegex(size) => VarcharType(size.toInt) case _ => sys.error(s"Unsupported hive type [$descriptor]") } // converts an eel Schema into a seq of hive FieldSchema's def toHiveFields(schema: StructType): Vector[FieldSchema] = schema.fields.map(toHiveField) // converts an eel field into a hive FieldSchema def toHiveField(field: Field): FieldSchema = new FieldSchema(field.name.toLowerCase(), toHiveType(field), field.comment.orNull) def toHiveType(field: Field): String = toHiveType(field.dataType) def toHiveType(dataType: DataType): String = dataType match { case ArrayType(elementType) => "array<" + toHiveType(elementType) + ">" case BigIntType => sys.error("Hive does not support java BigIntegers, use long or decimal") case BinaryType => "binary" case _: ByteType => "tinyint" case BooleanType => "boolean" case CharType(size) => s"varchar($size)" case DateType => "date" case DecimalType(precision, scale) => s"decimal(${precision.value},${scale.value})" case DoubleType => "double" case _: EnumType => logger.warn("Hive does not support enum types; this field will be written as a varchar(255)") "varchar(255)" case FloatType => "float" case _: IntType => "int" case _: LongType => "bigint" case _: ShortType => "smallint" case StringType => "string" case StructType(fields) => s"struct<${fields.map(toHiveField).map(field => s"${field.getName}:${field.getType}").mkString(",")}>" case TimestampMillisType => "timestamp" case VarcharType(size) => s"varchar($size)" case _ => sys.error(s"No conversion from eel type [$dataType] to hive type") } def toStructDDL(fields: Vector[Field]): String = { val types = fields.map { it => it.name + ":" + toHiveType(it) }.mkString(",") s"struct<$types>" } }
Example 21
Source File: SparkSQLDriver.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.{ArrayList => JArrayList, Arrays, List => JList} import scala.collection.JavaConverters._ import org.apache.commons.lang3.exception.ExceptionUtils import org.apache.hadoop.hive.metastore.api.{FieldSchema, Schema} import org.apache.hadoop.hive.ql.Driver import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse import org.apache.spark.internal.Logging import org.apache.spark.sql.{AnalysisException, SQLContext} import org.apache.spark.sql.execution.QueryExecution private[hive] class SparkSQLDriver(val context: SQLContext = SparkSQLEnv.sqlContext) extends Driver with Logging { private[hive] var tableSchema: Schema = _ private[hive] var hiveResponse: Seq[String] = _ override def init(): Unit = { } private def getResultSetSchema(query: QueryExecution): Schema = { val analyzed = query.analyzed logDebug(s"Result Schema: ${analyzed.output}") if (analyzed.output.isEmpty) { new Schema(Arrays.asList(new FieldSchema("Response code", "string", "")), null) } else { val fieldSchemas = analyzed.output.map { attr => new FieldSchema(attr.name, attr.dataType.catalogString, "") } new Schema(fieldSchemas.asJava, null) } } override def run(command: String): CommandProcessorResponse = { // TODO unify the error code try { context.sparkContext.setJobDescription(command) val execution = context.sessionState.executePlan(context.sql(command).logicalPlan) hiveResponse = execution.hiveResultString() tableSchema = getResultSetSchema(execution) new CommandProcessorResponse(0) } catch { case ae: AnalysisException => logDebug(s"Failed in [$command]", ae) new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(ae), null, ae) case cause: Throwable => logError(s"Failed in [$command]", cause) new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(cause), null, cause) } } override def close(): Int = { hiveResponse = null tableSchema = null 0 } override def getResults(res: JList[_]): Boolean = { if (hiveResponse == null) { false } else { res.asInstanceOf[JArrayList[String]].addAll(hiveResponse.asJava) hiveResponse = null true } } override def getSchema: Schema = tableSchema override def destroy() { super.destroy() hiveResponse = null tableSchema = null } }