org.apache.spark.sql.hive.HiveUtils Scala Examples
The following examples show how to use org.apache.spark.sql.hive.HiveUtils.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: SparkSQLSessionManager.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.concurrent.Executors import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hive.service.cli.SessionHandle import org.apache.hive.service.cli.session.SessionManager import org.apache.hive.service.cli.thrift.TProtocolVersion import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.{HiveSessionState, HiveUtils} import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ import org.apache.spark.sql.hive.thriftserver.server.SparkSQLOperationManager private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext: SQLContext) extends SessionManager(hiveServer) with ReflectedCompositeService { private lazy val sparkSqlOperationManager = new SparkSQLOperationManager() override def init(hiveConf: HiveConf) { setSuperField(this, "hiveConf", hiveConf) // Create operation log root directory, if operation logging is enabled if (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) { invoke(classOf[SessionManager], this, "initOperationLogRootDir") } val backgroundPoolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS) setSuperField(this, "backgroundOperationPool", Executors.newFixedThreadPool(backgroundPoolSize)) getAncestorField[Log](this, 3, "LOG").info( s"HiveServer2: Async execution pool size $backgroundPoolSize") setSuperField(this, "operationManager", sparkSqlOperationManager) addService(sparkSqlOperationManager) initCompositeService(hiveConf) } override def openSession( protocol: TProtocolVersion, username: String, passwd: String, ipAddress: String, sessionConf: java.util.Map[String, String], withImpersonation: Boolean, delegationToken: String): SessionHandle = { val sessionHandle = super.openSession(protocol, username, passwd, ipAddress, sessionConf, withImpersonation, delegationToken) val session = super.getSession(sessionHandle) HiveThriftServer2.listener.onSessionCreated( session.getIpAddress, sessionHandle.getSessionId.toString, session.getUsername) val sessionState = sqlContext.sessionState.asInstanceOf[HiveSessionState] val ctx = if (sessionState.hiveThriftServerSingleSession) { sqlContext } else { sqlContext.newSession() } ctx.setConf("spark.sql.hive.version", HiveUtils.hiveExecutionVersion) if (sessionConf != null && sessionConf.containsKey("use:database")) { ctx.sql(s"use ${sessionConf.get("use:database")}") } sparkSqlOperationManager.sessionToContexts.put(sessionHandle, ctx) sessionHandle } override def closeSession(sessionHandle: SessionHandle) { HiveThriftServer2.listener.onSessionClosed(sessionHandle.getSessionId.toString) super.closeSession(sessionHandle) sparkSqlOperationManager.sessionToActivePool.remove(sessionHandle) sparkSqlOperationManager.sessionToContexts.remove(sessionHandle) } }
Example 2
Source File: SparkSQLOperationsMenager.scala From bdg-sequila with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver.server import java.util.{Map => JMap} import java.util.concurrent.ConcurrentHashMap import org.apache.hive.service.cli._ import org.apache.hive.service.cli.operation.{ExecuteStatementOperation, Operation, OperationManager} import org.apache.hive.service.cli.session.HiveSession import org.apache.spark.internal.Logging import org.apache.spark.sql.{SQLContext, SequilaSession} import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.thriftserver.{ReflectionUtils, SparkExecuteStatementOperationSeq} import org.apache.spark.sql.internal.SQLConf private[thriftserver] class SparkSQLOperationManagerSeq(ss: SequilaSession) extends OperationManager with Logging { val handleToOperation = ReflectionUtils .getSuperField[JMap[OperationHandle, Operation]](this, "handleToOperation") val sessionToActivePool = new ConcurrentHashMap[SessionHandle, String]() val sessionToContexts = new ConcurrentHashMap[SessionHandle, SQLContext]() override def newExecuteStatementOperation( parentSession: HiveSession, statement: String, confOverlay: JMap[String, String], async: Boolean): ExecuteStatementOperation = synchronized { val sqlContext = sessionToContexts.get(parentSession.getSessionHandle) require(sqlContext != null, s"Session handle: ${parentSession.getSessionHandle} has not been" + s" initialized or had already closed.") val conf = sqlContext.sessionState.conf val hiveSessionState = parentSession.getSessionState setConfMap(conf, hiveSessionState.getOverriddenConfigurations) setConfMap(conf, hiveSessionState.getHiveVariables) val runInBackground = async && conf.getConf(HiveUtils.HIVE_THRIFT_SERVER_ASYNC) val operation = new SparkExecuteStatementOperationSeq(parentSession, statement, confOverlay, runInBackground)(ss, sessionToActivePool) handleToOperation.put(operation.getHandle, operation) logDebug(s"Created Operation for $statement with session=$parentSession, " + s"runInBackground=$runInBackground") operation } def setConfMap(conf: SQLConf, confMap: java.util.Map[String, String]): Unit = { val iterator = confMap.entrySet().iterator() while (iterator.hasNext) { val kv = iterator.next() conf.setConfString(kv.getKey, kv.getValue) } } }
Example 3
Source File: SparkSQLSessionManager.scala From bdg-sequila with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.concurrent.Executors import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hive.service.cli.SessionHandle import org.apache.hive.service.cli.session.SessionManager import org.apache.hive.service.cli.thrift.TProtocolVersion import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.{SQLContext, SequilaSession} import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ import org.apache.spark.sql.hive.thriftserver.server.SparkSQLOperationManagerSeq private[hive] class SparkSQLSessionManagerSeq(hiveServer: HiveServer2, ss: SequilaSession) extends SessionManager(hiveServer) with ReflectedCompositeService { private lazy val sparkSqlOperationManager = new SparkSQLOperationManagerSeq(ss) override def init(hiveConf: HiveConf) { setSuperField(this, "operationManager", sparkSqlOperationManager) super.init(hiveConf) } override def openSession( protocol: TProtocolVersion, username: String, passwd: String, ipAddress: String, sessionConf: java.util.Map[String, String], withImpersonation: Boolean, delegationToken: String): SessionHandle = { val sessionHandle = super.openSession(protocol, username, passwd, ipAddress, sessionConf, withImpersonation, delegationToken) val session = super.getSession(sessionHandle) HiveThriftServer2Seq.listener.onSessionCreated( session.getIpAddress, sessionHandle.getSessionId.toString, session.getUsername) val ctx = if (ss.sqlContext.conf.hiveThriftServerSingleSession) { ss.sqlContext } else { ss.sqlContext.newSession() } //ctx.setConf(HiveUtils.FAKE_HIVE_VERSION.key, HiveUtils.builtinHiveVersion) if (sessionConf != null && sessionConf.containsKey("use:database")) { ctx.sql(s"use ${sessionConf.get("use:database")}") } sparkSqlOperationManager.sessionToContexts.put(sessionHandle, ctx) sessionHandle } override def closeSession(sessionHandle: SessionHandle) { HiveThriftServer2Seq.listener.onSessionClosed(sessionHandle.getSessionId.toString) super.closeSession(sessionHandle) sparkSqlOperationManager.sessionToActivePool.remove(sessionHandle) sparkSqlOperationManager.sessionToContexts.remove(sessionHandle) } }
Example 4
Source File: SequilaThriftServer.scala From bdg-sequila with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.io.File import org.apache.spark.annotation.DeveloperApi import org.apache.spark.internal.Logging import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2.HiveThriftServer2Listener import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2Seq.HiveThriftServer2ListenerSeq import org.apache.spark.sql.hive.thriftserver._ import org.apache.spark.sql.{SQLContext, SequilaSession, SparkSession} import org.biodatageeks.sequila.utils.{SequilaRegister, UDFRegister} import org.apache.spark.sql.hive.thriftserver.ui.{ThriftServerTab, ThriftServerTabSeq} object SequilaThriftServer extends Logging { var uiTab: Option[ThriftServerTabSeq] = None var listener: HiveThriftServer2ListenerSeq = _ @DeveloperApi def startWithContext(ss: SequilaSession): Unit = { //System.setSecurityManager(null) val server = new HiveThriftServer2Seq(ss) val executionHive = HiveUtils.newClientForExecution( ss.sqlContext.sparkContext.conf, ss.sparkContext.hadoopConfiguration) server.init(executionHive.conf) server.start() listener = new HiveThriftServer2ListenerSeq(server, ss.sqlContext.conf) ss.sqlContext.sparkContext.addSparkListener(listener) uiTab = if (ss.sqlContext.sparkContext.getConf.getBoolean("spark.ui.enabled", true)) { Some(new ThriftServerTabSeq(ss.sqlContext.sparkContext,listener)) } else { None } } def main(args: Array[String]): Unit = { //System.setSecurityManager(null) val spark = SparkSession .builder .config("spark.sql.hive.thriftServer.singleSession","true") .config("spark.sql.warehouse.dir",sys.env.getOrElse("SEQ_METASTORE_LOCATION",System.getProperty("user.dir")) ) // .config("spark.hadoop.hive.metastore.uris","thrift://localhost:9083") .enableHiveSupport() // .master("local[1]") .getOrCreate val ss = new SequilaSession(spark) UDFRegister.register(ss) SequilaRegister.register(ss) HiveThriftServer2Seq.startWithContext(ss) } }
Example 5
Source File: HiveCliSessionStateSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import org.apache.hadoop.hive.cli.CliSessionState import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.ql.session.SessionState import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.sql.hive.HiveUtils class HiveCliSessionStateSuite extends SparkFunSuite { def withSessionClear(f: () => Unit): Unit = { try f finally SessionState.detachSession() } test("CliSessionState will be reused") { withSessionClear { () => val hiveConf = new HiveConf(classOf[SessionState]) HiveUtils.newTemporaryConfiguration(useInMemoryDerby = false).foreach { case (key, value) => hiveConf.set(key, value) } val sessionState: SessionState = new CliSessionState(hiveConf) SessionState.start(sessionState) val s1 = SessionState.get val sparkConf = new SparkConf() val hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf) val s2 = HiveUtils.newClientForMetadata(sparkConf, hadoopConf).getState assert(s1 === s2) assert(s2.isInstanceOf[CliSessionState]) } } test("SessionState will not be reused") { withSessionClear { () => val sparkConf = new SparkConf() val hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf) HiveUtils.newTemporaryConfiguration(useInMemoryDerby = false).foreach { case (key, value) => hadoopConf.set(key, value) } val hiveClient = HiveUtils.newClientForMetadata(sparkConf, hadoopConf) val s1 = hiveClient.getState val s2 = hiveClient.newSession().getState assert(s1 !== s2) } } }
Example 6
Source File: SparkSQLEnv.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.io.PrintStream import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.internal.Logging import org.apache.spark.sql.{SparkSession, SQLContext} import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils} import org.apache.spark.util.Utils def stop() { logDebug("Shutting down Spark SQL Environment") // Stop the SparkContext if (SparkSQLEnv.sparkContext != null) { sparkContext.stop() sparkContext = null sqlContext = null } } }
Example 7
Source File: SparkSQLOperationManager.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver.server import java.util.{Map => JMap} import java.util.concurrent.ConcurrentHashMap import org.apache.hive.service.cli._ import org.apache.hive.service.cli.operation.{ExecuteStatementOperation, Operation, OperationManager} import org.apache.hive.service.cli.session.HiveSession import org.apache.spark.internal.Logging import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.thriftserver.{ReflectionUtils, SparkExecuteStatementOperation} import org.apache.spark.sql.internal.SQLConf private[thriftserver] class SparkSQLOperationManager() extends OperationManager with Logging { val handleToOperation = ReflectionUtils .getSuperField[JMap[OperationHandle, Operation]](this, "handleToOperation") val sessionToActivePool = new ConcurrentHashMap[SessionHandle, String]() val sessionToContexts = new ConcurrentHashMap[SessionHandle, SQLContext]() override def newExecuteStatementOperation( parentSession: HiveSession, statement: String, confOverlay: JMap[String, String], async: Boolean): ExecuteStatementOperation = synchronized { val sqlContext = sessionToContexts.get(parentSession.getSessionHandle) require(sqlContext != null, s"Session handle: ${parentSession.getSessionHandle} has not been" + s" initialized or had already closed.") val conf = sqlContext.sessionState.conf val hiveSessionState = parentSession.getSessionState setConfMap(conf, hiveSessionState.getOverriddenConfigurations) setConfMap(conf, hiveSessionState.getHiveVariables) val runInBackground = async && conf.getConf(HiveUtils.HIVE_THRIFT_SERVER_ASYNC) val operation = new SparkExecuteStatementOperation(parentSession, statement, confOverlay, runInBackground)(sqlContext, sessionToActivePool) handleToOperation.put(operation.getHandle, operation) logDebug(s"Created Operation for $statement with session=$parentSession, " + s"runInBackground=$runInBackground") operation } def setConfMap(conf: SQLConf, confMap: java.util.Map[String, String]): Unit = { val iterator = confMap.entrySet().iterator() while (iterator.hasNext) { val kv = iterator.next() conf.setConfString(kv.getKey, kv.getValue) } } }
Example 8
Source File: SparkSQLSessionManager.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.concurrent.Executors import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hive.service.cli.SessionHandle import org.apache.hive.service.cli.session.SessionManager import org.apache.hive.service.cli.thrift.TProtocolVersion import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ import org.apache.spark.sql.hive.thriftserver.server.SparkSQLOperationManager private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext: SQLContext) extends SessionManager(hiveServer) with ReflectedCompositeService { private lazy val sparkSqlOperationManager = new SparkSQLOperationManager() override def init(hiveConf: HiveConf) { setSuperField(this, "operationManager", sparkSqlOperationManager) super.init(hiveConf) } override def openSession( protocol: TProtocolVersion, username: String, passwd: String, ipAddress: String, sessionConf: java.util.Map[String, String], withImpersonation: Boolean, delegationToken: String): SessionHandle = { val sessionHandle = super.openSession(protocol, username, passwd, ipAddress, sessionConf, withImpersonation, delegationToken) val session = super.getSession(sessionHandle) HiveThriftServer2.listener.onSessionCreated( session.getIpAddress, sessionHandle.getSessionId.toString, session.getUsername) val ctx = if (sqlContext.conf.hiveThriftServerSingleSession) { sqlContext } else { sqlContext.newSession() } ctx.setConf(HiveUtils.FAKE_HIVE_VERSION.key, HiveUtils.builtinHiveVersion) if (sessionConf != null && sessionConf.containsKey("use:database")) { ctx.sql(s"use ${sessionConf.get("use:database")}") } sparkSqlOperationManager.sessionToContexts.put(sessionHandle, ctx) sessionHandle } override def closeSession(sessionHandle: SessionHandle) { HiveThriftServer2.listener.onSessionClosed(sessionHandle.getSessionId.toString) super.closeSession(sessionHandle) sparkSqlOperationManager.sessionToActivePool.remove(sessionHandle) sparkSqlOperationManager.sessionToContexts.remove(sessionHandle) } }
Example 9
Source File: HiveVersionSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import org.apache.hadoop.conf.Configuration import org.scalactic.source.Position import org.scalatest.Tag import org.apache.spark.SparkFunSuite import org.apache.spark.sql.hive.HiveUtils private[client] abstract class HiveVersionSuite(version: String) extends SparkFunSuite { protected var client: HiveClient = null protected def buildClient(hadoopConf: Configuration): HiveClient = { // Hive changed the default of datanucleus.schema.autoCreateAll from true to false and // hive.metastore.schema.verification from false to true since 2.0 // For details, see the JIRA HIVE-6113 and HIVE-12463 if (version == "2.0" || version == "2.1") { hadoopConf.set("datanucleus.schema.autoCreateAll", "true") hadoopConf.set("hive.metastore.schema.verification", "false") } HiveClientBuilder .buildClient(version, hadoopConf, HiveUtils.formatTimeVarsForHiveClient(hadoopConf)) } override def suiteName: String = s"${super.suiteName}($version)" override protected def test(testName: String, testTags: Tag*)(testFun: => Any) (implicit pos: Position): Unit = { super.test(s"$version: $testName", testTags: _*)(testFun) } }
Example 10
Source File: SparkSQLUtils.scala From kyuubi with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.types.DataType object SparkSQLUtils { def toHiveString(a: (Any, DataType)): String = { HiveUtils.toHiveString(a) } def getUserJarClassLoader(sparkSession: SparkSession): ClassLoader = { sparkSession.sharedState.jarClassLoader } def parsePlan(sparkSession: SparkSession, statement: String): LogicalPlan = { sparkSession.sessionState.sqlParser.parsePlan(statement) } def toDataFrame(sparkSession: SparkSession, plan: LogicalPlan): DataFrame = { Dataset.ofRows(sparkSession, plan) } def initializeMetaStoreClient(sparkSession: SparkSession): Seq[String] = { sparkSession.sessionState.catalog.listDatabases("default") } }
Example 11
Source File: SparkSQLEnv.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.io.PrintStream import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.internal.Logging import org.apache.spark.sql.{SparkSession, SQLContext} import org.apache.spark.sql.hive.{HiveSessionState, HiveUtils} import org.apache.spark.util.Utils def stop() { logDebug("Shutting down Spark SQL Environment") // Stop the SparkContext if (SparkSQLEnv.sparkContext != null) { sparkContext.stop() sparkContext = null sqlContext = null } } }
Example 12
Source File: SparkSQLSessionManager.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.concurrent.Executors import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hive.service.cli.SessionHandle import org.apache.hive.service.cli.session.SessionManager import org.apache.hive.service.cli.thrift.TProtocolVersion import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.{HiveSessionState, HiveUtils} import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ import org.apache.spark.sql.hive.thriftserver.server.SparkSQLOperationManager private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext: SQLContext) extends SessionManager(hiveServer) with ReflectedCompositeService { private lazy val sparkSqlOperationManager = new SparkSQLOperationManager() override def init(hiveConf: HiveConf) { setSuperField(this, "hiveConf", hiveConf) // Create operation log root directory, if operation logging is enabled if (hiveConf.getBoolVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED)) { invoke(classOf[SessionManager], this, "initOperationLogRootDir") } val backgroundPoolSize = hiveConf.getIntVar(ConfVars.HIVE_SERVER2_ASYNC_EXEC_THREADS) setSuperField(this, "backgroundOperationPool", Executors.newFixedThreadPool(backgroundPoolSize)) getAncestorField[Log](this, 3, "LOG").info( s"HiveServer2: Async execution pool size $backgroundPoolSize") setSuperField(this, "operationManager", sparkSqlOperationManager) addService(sparkSqlOperationManager) initCompositeService(hiveConf) } override def openSession( protocol: TProtocolVersion, username: String, passwd: String, ipAddress: String, sessionConf: java.util.Map[String, String], withImpersonation: Boolean, delegationToken: String): SessionHandle = { val sessionHandle = super.openSession(protocol, username, passwd, ipAddress, sessionConf, withImpersonation, delegationToken) val session = super.getSession(sessionHandle) HiveThriftServer2.listener.onSessionCreated( session.getIpAddress, sessionHandle.getSessionId.toString, session.getUsername) val sessionState = sqlContext.sessionState.asInstanceOf[HiveSessionState] val ctx = if (sessionState.hiveThriftServerSingleSession) { sqlContext } else { sqlContext.newSession() } ctx.setConf("spark.sql.hive.version", HiveUtils.hiveExecutionVersion) if (sessionConf != null && sessionConf.containsKey("use:database")) { ctx.sql(s"use ${sessionConf.get("use:database")}") } sparkSqlOperationManager.sessionToContexts.put(sessionHandle, ctx) sessionHandle } override def closeSession(sessionHandle: SessionHandle) { HiveThriftServer2.listener.onSessionClosed(sessionHandle.getSessionId.toString) super.closeSession(sessionHandle) sparkSqlOperationManager.sessionToActivePool.remove(sessionHandle) sparkSqlOperationManager.sessionToContexts.remove(sessionHandle) } }
Example 13
Source File: HiveClientSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.conf.HiveConf import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.expressions.{AttributeReference, EqualTo, Literal} import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.types.IntegerType class HiveClientSuite extends SparkFunSuite { private val clientBuilder = new HiveClientBuilder private val tryDirectSqlKey = HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL.varname test(s"getPartitionsByFilter returns all partitions when $tryDirectSqlKey=false") { val testPartitionCount = 5 val storageFormat = CatalogStorageFormat( locationUri = None, inputFormat = None, outputFormat = None, serde = None, compressed = false, properties = Map.empty) val hadoopConf = new Configuration() hadoopConf.setBoolean(tryDirectSqlKey, false) val client = clientBuilder.buildClient(HiveUtils.hiveExecutionVersion, hadoopConf) client.runSqlHive("CREATE TABLE test (value INT) PARTITIONED BY (part INT)") val partitions = (1 to testPartitionCount).map { part => CatalogTablePartition(Map("part" -> part.toString), storageFormat) } client.createPartitions( "default", "test", partitions, ignoreIfExists = false) val filteredPartitions = client.getPartitionsByFilter(client.getTable("default", "test"), Seq(EqualTo(AttributeReference("part", IntegerType)(), Literal(3)))) assert(filteredPartitions.size == testPartitionCount) } }
Example 14
Source File: OapQuerySuite.scala From OAP with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import java.util.{Locale, TimeZone} import org.scalatest.{BeforeAndAfter, Ignore} import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.internal.SQLConf // Ignore because in separate package will encounter problem with shaded spark source. @Ignore class OapQuerySuite extends HiveComparisonTest with BeforeAndAfter { private lazy val originalTimeZone = TimeZone.getDefault private lazy val originalLocale = Locale.getDefault import org.apache.spark.sql.hive.test.TestHive._ // Note: invoke TestHive will create a SparkContext which can't be configured by us. // So be careful this may affect current using SparkContext and cause strange problem. private lazy val originalCrossJoinEnabled = TestHive.conf.crossJoinEnabled override def beforeAll() { super.beforeAll() TestHive.setCacheTables(true) // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*) TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles")) // Add Locale setting Locale.setDefault(Locale.US) // Ensures that cross joins are enabled so that we can test them TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, true) TestHive.setConf(HiveUtils.CONVERT_METASTORE_PARQUET, true) } override def afterAll() { try { TestHive.setCacheTables(false) TimeZone.setDefault(originalTimeZone) Locale.setDefault(originalLocale) sql("DROP TEMPORARY FUNCTION IF EXISTS udtf_count2") TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled) } finally { super.afterAll() } } private def assertDupIndex(body: => Unit): Unit = { val e = intercept[AnalysisException] { body } assert(e.getMessage.toLowerCase.contains("exists")) } test("create hive table in parquet format") { try { sql("create table p_table (key int, val string) stored as parquet") sql("insert overwrite table p_table select * from src") sql("create oindex if not exists p_index on p_table(key)") assert(sql("select val from p_table where key = 238") .collect().head.getString(0) == "val_238") } finally { sql("drop oindex p_index on p_table") sql("drop table p_table") } } test("create duplicate hive table in parquet format") { try { sql("create table p_table1 (key int, val string) stored as parquet") sql("insert overwrite table p_table1 select * from src") sql("create oindex p_index on p_table1(key)") assertDupIndex { sql("create oindex p_index on p_table1(key)") } } finally { sql("drop oindex p_index on p_table1") } } }
Example 15
Source File: OapEnv.scala From OAP with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.io.PrintStream import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.internal.Logging import org.apache.spark.sql.{SparkSession, SQLContext} import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils} import org.apache.spark.sql.oap.listener.OapListener import org.apache.spark.sql.oap.ui.OapTab import org.apache.spark.util.Utils private[spark] object OapEnv extends Logging { logDebug("Initializing Oap Env") var initialized: Boolean = false var sparkSession: SparkSession = _ // This is to enable certain OAP features, like UI, even // in non-Spark SQL CLI/ThriftServer conditions def initWithoutCreatingSparkSession(): Unit = synchronized { if (!initialized && !Utils.isTesting) { val sc = SparkContext.getOrCreate() sc.addSparkListener(new OapListener) this.sparkSession = SparkSession.getActiveSession.get sc.ui.foreach(new OapTab(_)) initialized = true } } }
Example 16
Source File: HiveCliSessionStateSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import org.apache.hadoop.hive.cli.CliSessionState import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.ql.session.SessionState import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.sql.hive.HiveUtils class HiveCliSessionStateSuite extends SparkFunSuite { def withSessionClear(f: () => Unit): Unit = { try f finally SessionState.detachSession() } test("CliSessionState will be reused") { withSessionClear { () => val hiveConf = new HiveConf(classOf[SessionState]) HiveUtils.newTemporaryConfiguration(useInMemoryDerby = false).foreach { case (key, value) => hiveConf.set(key, value) } val sessionState: SessionState = new CliSessionState(hiveConf) SessionState.start(sessionState) val s1 = SessionState.get val sparkConf = new SparkConf() val hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf) val s2 = HiveUtils.newClientForMetadata(sparkConf, hadoopConf).getState assert(s1 === s2) assert(s2.isInstanceOf[CliSessionState]) } } test("SessionState will not be reused") { withSessionClear { () => val sparkConf = new SparkConf() val hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf) HiveUtils.newTemporaryConfiguration(useInMemoryDerby = false).foreach { case (key, value) => hadoopConf.set(key, value) } val hiveClient = HiveUtils.newClientForMetadata(sparkConf, hadoopConf) val s1 = hiveClient.getState val s2 = hiveClient.newSession().getState assert(s1 !== s2) } } }
Example 17
Source File: SparkSQLEnv.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.io.PrintStream import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.internal.Logging import org.apache.spark.sql.{SparkSession, SQLContext} import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils} import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION import org.apache.spark.util.Utils def stop() { logDebug("Shutting down Spark SQL Environment") // Stop the SparkContext if (SparkSQLEnv.sparkContext != null) { sparkContext.stop() sparkContext = null sqlContext = null } } }
Example 18
Source File: SparkSQLOperationManager.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver.server import java.util.{Map => JMap} import java.util.concurrent.ConcurrentHashMap import org.apache.hive.service.cli._ import org.apache.hive.service.cli.operation.{ExecuteStatementOperation, Operation, OperationManager} import org.apache.hive.service.cli.session.HiveSession import org.apache.spark.internal.Logging import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.thriftserver.{ReflectionUtils, SparkExecuteStatementOperation} import org.apache.spark.sql.internal.SQLConf private[thriftserver] class SparkSQLOperationManager() extends OperationManager with Logging { val handleToOperation = ReflectionUtils .getSuperField[JMap[OperationHandle, Operation]](this, "handleToOperation") val sessionToActivePool = new ConcurrentHashMap[SessionHandle, String]() val sessionToContexts = new ConcurrentHashMap[SessionHandle, SQLContext]() override def newExecuteStatementOperation( parentSession: HiveSession, statement: String, confOverlay: JMap[String, String], async: Boolean): ExecuteStatementOperation = synchronized { val sqlContext = sessionToContexts.get(parentSession.getSessionHandle) require(sqlContext != null, s"Session handle: ${parentSession.getSessionHandle} has not been" + s" initialized or had already closed.") val conf = sqlContext.sessionState.conf val hiveSessionState = parentSession.getSessionState setConfMap(conf, hiveSessionState.getOverriddenConfigurations) setConfMap(conf, hiveSessionState.getHiveVariables) val runInBackground = async && conf.getConf(HiveUtils.HIVE_THRIFT_SERVER_ASYNC) val operation = new SparkExecuteStatementOperation(parentSession, statement, confOverlay, runInBackground)(sqlContext, sessionToActivePool) handleToOperation.put(operation.getHandle, operation) logDebug(s"Created Operation for $statement with session=$parentSession, " + s"runInBackground=$runInBackground") operation } def setConfMap(conf: SQLConf, confMap: java.util.Map[String, String]): Unit = { val iterator = confMap.entrySet().iterator() while (iterator.hasNext) { val kv = iterator.next() conf.setConfString(kv.getKey, kv.getValue) } } }
Example 19
Source File: SparkSQLSessionManager.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.util.concurrent.Executors import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hive.service.cli.SessionHandle import org.apache.hive.service.cli.session.SessionManager import org.apache.hive.service.cli.thrift.TProtocolVersion import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.HiveUtils import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ import org.apache.spark.sql.hive.thriftserver.server.SparkSQLOperationManager private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext: SQLContext) extends SessionManager(hiveServer) with ReflectedCompositeService { private lazy val sparkSqlOperationManager = new SparkSQLOperationManager() override def init(hiveConf: HiveConf) { setSuperField(this, "operationManager", sparkSqlOperationManager) super.init(hiveConf) } override def openSession( protocol: TProtocolVersion, username: String, passwd: String, ipAddress: String, sessionConf: java.util.Map[String, String], withImpersonation: Boolean, delegationToken: String): SessionHandle = { val sessionHandle = super.openSession(protocol, username, passwd, ipAddress, sessionConf, withImpersonation, delegationToken) val session = super.getSession(sessionHandle) HiveThriftServer2.listener.onSessionCreated( session.getIpAddress, sessionHandle.getSessionId.toString, session.getUsername) val ctx = if (sqlContext.conf.hiveThriftServerSingleSession) { sqlContext } else { sqlContext.newSession() } ctx.setConf(HiveUtils.FAKE_HIVE_VERSION.key, HiveUtils.builtinHiveVersion) if (sessionConf != null && sessionConf.containsKey("use:database")) { ctx.sql(s"use ${sessionConf.get("use:database")}") } sparkSqlOperationManager.sessionToContexts.put(sessionHandle, ctx) sessionHandle } override def closeSession(sessionHandle: SessionHandle) { HiveThriftServer2.listener.onSessionClosed(sessionHandle.getSessionId.toString) super.closeSession(sessionHandle) sparkSqlOperationManager.sessionToActivePool.remove(sessionHandle) sparkSqlOperationManager.sessionToContexts.remove(sessionHandle) } }
Example 20
Source File: HiveVersionSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import org.apache.hadoop.conf.Configuration import org.scalactic.source.Position import org.scalatest.Tag import org.apache.spark.SparkFunSuite import org.apache.spark.sql.hive.HiveUtils private[client] abstract class HiveVersionSuite(version: String) extends SparkFunSuite { override protected val enableAutoThreadAudit = false protected var client: HiveClient = null protected def buildClient( hadoopConf: Configuration, sharesHadoopClasses: Boolean = true): HiveClient = { // Hive changed the default of datanucleus.schema.autoCreateAll from true to false and // hive.metastore.schema.verification from false to true since 2.0 // For details, see the JIRA HIVE-6113 and HIVE-12463 if (version == "2.0" || version == "2.1" || version == "2.2" || version == "2.3") { hadoopConf.set("datanucleus.schema.autoCreateAll", "true") hadoopConf.set("hive.metastore.schema.verification", "false") } HiveClientBuilder.buildClient( version, hadoopConf, HiveUtils.formatTimeVarsForHiveClient(hadoopConf), sharesHadoopClasses = sharesHadoopClasses) } override def suiteName: String = s"${super.suiteName}($version)" override protected def test(testName: String, testTags: Tag*)(testFun: => Any) (implicit pos: Position): Unit = { super.test(s"$version: $testName", testTags: _*)(testFun) } }
Example 21
Source File: SparkSQLEnv.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.io.PrintStream import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.internal.Logging import org.apache.spark.sql.{SparkSession, SQLContext} import org.apache.spark.sql.hive.{HiveSessionState, HiveUtils} import org.apache.spark.util.Utils def stop() { logDebug("Shutting down Spark SQL Environment") // Stop the SparkContext if (SparkSQLEnv.sparkContext != null) { sparkContext.stop() sparkContext = null sqlContext = null } } }