org.apache.spark.sql.catalyst.plans.logical.InsertIntoTable Scala Examples
The following examples show how to use org.apache.spark.sql.catalyst.plans.logical.InsertIntoTable.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: CreateHiveTableAsSelectCommand.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import scala.util.control.NonFatal import org.apache.spark.sql.{AnalysisException, Row, SparkSession} import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.hive.MetastoreRelation case class CreateHiveTableAsSelectCommand( tableDesc: CatalogTable, query: LogicalPlan, ignoreIfExists: Boolean) extends RunnableCommand { private val tableIdentifier = tableDesc.identifier override def innerChildren: Seq[LogicalPlan] = Seq(query) override def run(sparkSession: SparkSession): Seq[Row] = { lazy val metastoreRelation: MetastoreRelation = { import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.apache.hadoop.io.Text import org.apache.hadoop.mapred.TextInputFormat val withFormat = tableDesc.withNewStorage( inputFormat = tableDesc.storage.inputFormat.orElse(Some(classOf[TextInputFormat].getName)), outputFormat = tableDesc.storage.outputFormat .orElse(Some(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]].getName)), serde = tableDesc.storage.serde.orElse(Some(classOf[LazySimpleSerDe].getName)), compressed = tableDesc.storage.compressed) val withSchema = if (withFormat.schema.isEmpty) { // Hive doesn't support specifying the column list for target table in CTAS // However we don't think SparkSQL should follow that. tableDesc.copy(schema = query.output.toStructType) } else { withFormat } sparkSession.sessionState.catalog.createTable(withSchema, ignoreIfExists = false) // Get the Metastore Relation sparkSession.sessionState.catalog.lookupRelation(tableIdentifier) match { case r: MetastoreRelation => r } } // TODO ideally, we should get the output data ready first and then // add the relation into catalog, just in case of failure occurs while data // processing. if (sparkSession.sessionState.catalog.tableExists(tableIdentifier)) { if (ignoreIfExists) { // table already exists, will do nothing, to keep consistent with Hive } else { throw new AnalysisException(s"$tableIdentifier already exists.") } } else { try { sparkSession.sessionState.executePlan(InsertIntoTable( metastoreRelation, Map(), query, overwrite = true, ifNotExists = false)).toRdd } catch { case NonFatal(e) => // drop the created table. sparkSession.sessionState.catalog.dropTable(tableIdentifier, ignoreIfNotExists = true, purge = false) throw e } } Seq.empty[Row] } override def argString: String = { s"[Database:${tableDesc.database}}, " + s"TableName: ${tableDesc.identifier.table}, " + s"InsertIntoHiveTable]" } }
Example 2
Source File: rules.scala From tispark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.extensions import com.pingcap.tispark.statistics.StatisticsManager import com.pingcap.tispark.utils.ReflectionUtil._ import com.pingcap.tispark.{MetaManager, TiDBRelation, TiTableReference} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, UnresolvedRelation} import org.apache.spark.sql.catalyst.catalog.TiSessionCatalog import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.{AnalysisException, _} case class TiResolutionRule(getOrCreateTiContext: SparkSession => TiContext)( sparkSession: SparkSession) extends Rule[LogicalPlan] { protected lazy val meta: MetaManager = tiContext.meta private lazy val autoLoad = tiContext.autoLoad private lazy val tiCatalog = tiContext.tiCatalog private lazy val tiSession = tiContext.tiSession private lazy val sqlContext = tiContext.sqlContext protected val tiContext: TiContext = getOrCreateTiContext(sparkSession) protected val resolveTiDBRelation: TableIdentifier => LogicalPlan = tableIdentifier => { val dbName = getDatabaseFromIdentifier(tableIdentifier) val tableName = tableIdentifier.table val table = meta.getTable(dbName, tableName) if (table.isEmpty) { throw new AnalysisException(s"Table or view '$tableName' not found in database '$dbName'") } if (autoLoad) { StatisticsManager.loadStatisticsInfo(table.get) } val sizeInBytes = StatisticsManager.estimateTableSize(table.get) val tiDBRelation = TiDBRelation(tiSession, TiTableReference(dbName, tableName, sizeInBytes), meta)( sqlContext) // Use SubqueryAlias so that projects and joins can correctly resolve // UnresolvedAttributes in JoinConditions, Projects, Filters, etc. newSubqueryAlias(tableName, LogicalRelation(tiDBRelation)) } override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp resolveTiDBRelations protected def resolveTiDBRelations: PartialFunction[LogicalPlan, LogicalPlan] = { case i @ InsertIntoTable(UnresolvedRelation(tableIdentifier), _, _, _, _) if tiCatalog .catalogOf(tableIdentifier.database) .exists(_.isInstanceOf[TiSessionCatalog]) => i.copy(table = EliminateSubqueryAliases(resolveTiDBRelation(tableIdentifier))) case UnresolvedRelation(tableIdentifier) if tiCatalog .catalogOf(tableIdentifier.database) .exists(_.isInstanceOf[TiSessionCatalog]) => resolveTiDBRelation(tableIdentifier) } private def getDatabaseFromIdentifier(tableIdentifier: TableIdentifier): String = tableIdentifier.database.getOrElse(tiCatalog.getCurrentDatabase) } case class TiDDLRule(getOrCreateTiContext: SparkSession => TiContext)(sparkSession: SparkSession) extends Rule[LogicalPlan] { protected lazy val tiContext: TiContext = getOrCreateTiContext(sparkSession) override def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { // TODO: support other commands that may concern TiSpark catalog. case sd: ShowDatabasesCommand => TiShowDatabasesCommand(tiContext, sd) case sd: SetDatabaseCommand => TiSetDatabaseCommand(tiContext, sd) case st: ShowTablesCommand => TiShowTablesCommand(tiContext, st) case st: ShowColumnsCommand => TiShowColumnsCommand(tiContext, st) case dt: DescribeTableCommand => TiDescribeTablesCommand(tiContext, dt) case dc: DescribeColumnCommand => TiDescribeColumnCommand(tiContext, dc) case ct: CreateTableLikeCommand => TiCreateTableLikeCommand(tiContext, ct) } }
Example 3
Source File: CreateHiveTableAsSelectCommand.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import scala.util.control.NonFatal import org.apache.spark.sql.{AnalysisException, Row, SparkSession} import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan, OverwriteOptions} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.hive.MetastoreRelation case class CreateHiveTableAsSelectCommand( tableDesc: CatalogTable, query: LogicalPlan, ignoreIfExists: Boolean) extends RunnableCommand { private val tableIdentifier = tableDesc.identifier override def innerChildren: Seq[LogicalPlan] = Seq(query) override def run(sparkSession: SparkSession): Seq[Row] = { lazy val metastoreRelation: MetastoreRelation = { import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.apache.hadoop.io.Text import org.apache.hadoop.mapred.TextInputFormat val withFormat = tableDesc.withNewStorage( inputFormat = tableDesc.storage.inputFormat.orElse(Some(classOf[TextInputFormat].getName)), outputFormat = tableDesc.storage.outputFormat .orElse(Some(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]].getName)), serde = tableDesc.storage.serde.orElse(Some(classOf[LazySimpleSerDe].getName)), compressed = tableDesc.storage.compressed) val withSchema = if (withFormat.schema.isEmpty) { // Hive doesn't support specifying the column list for target table in CTAS // However we don't think SparkSQL should follow that. tableDesc.copy(schema = query.output.toStructType) } else { withFormat } sparkSession.sessionState.catalog.createTable(withSchema, ignoreIfExists = false) // Get the Metastore Relation sparkSession.sessionState.catalog.lookupRelation(tableIdentifier) match { case r: MetastoreRelation => r } } // TODO ideally, we should get the output data ready first and then // add the relation into catalog, just in case of failure occurs while data // processing. if (sparkSession.sessionState.catalog.tableExists(tableIdentifier)) { if (ignoreIfExists) { // table already exists, will do nothing, to keep consistent with Hive } else { throw new AnalysisException(s"$tableIdentifier already exists.") } } else { try { sparkSession.sessionState.executePlan(InsertIntoTable( metastoreRelation, Map(), query, overwrite = OverwriteOptions(true), ifNotExists = false)).toRdd } catch { case NonFatal(e) => // drop the created table. sparkSession.sessionState.catalog.dropTable(tableIdentifier, ignoreIfNotExists = true, purge = false) throw e } } Seq.empty[Row] } override def argString: String = { s"[Database:${tableDesc.database}}, " + s"TableName: ${tableDesc.identifier.table}, " + s"InsertIntoHiveTable]" } }
Example 4
Source File: CreateHiveTableAsSelectCommand.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import scala.util.control.NonFatal import org.apache.spark.sql.{AnalysisException, Dataset, Row, SparkSession} import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan, OverwriteOptions} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.hive.MetastoreRelation case class CreateHiveTableAsSelectCommand( tableDesc: CatalogTable, query: LogicalPlan, ignoreIfExists: Boolean) extends RunnableCommand { private val tableIdentifier = tableDesc.identifier override def innerChildren: Seq[LogicalPlan] = Seq(query) override def run(sparkSession: SparkSession): Seq[Row] = { lazy val metastoreRelation: MetastoreRelation = { import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.apache.hadoop.io.Text import org.apache.hadoop.mapred.TextInputFormat val withFormat = tableDesc.withNewStorage( inputFormat = tableDesc.storage.inputFormat.orElse(Some(classOf[TextInputFormat].getName)), outputFormat = tableDesc.storage.outputFormat .orElse(Some(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]].getName)), serde = tableDesc.storage.serde.orElse(Some(classOf[LazySimpleSerDe].getName)), compressed = tableDesc.storage.compressed) val withSchema = if (withFormat.schema.isEmpty) { // Hive doesn't support specifying the column list for target table in CTAS // However we don't think SparkSQL should follow that. tableDesc.copy(schema = query.output.toStructType) } else { withFormat } sparkSession.sessionState.catalog.createTable(withSchema, ignoreIfExists = false) // Get the Metastore Relation sparkSession.sessionState.catalog.lookupRelation(tableIdentifier) match { case r: MetastoreRelation => r } } // TODO ideally, we should get the output data ready first and then // add the relation into catalog, just in case of failure occurs while data // processing. if (sparkSession.sessionState.catalog.tableExists(tableIdentifier)) { if (ignoreIfExists) { // table already exists, will do nothing, to keep consistent with Hive } else { throw new AnalysisException(s"$tableIdentifier already exists.") } } else { try { sparkSession.sessionState.executePlan(InsertIntoTable( metastoreRelation, Map(), query, overwrite = OverwriteOptions(true), ifNotExists = false)).toRdd } catch { case NonFatal(e) => // drop the created table. sparkSession.sessionState.catalog.dropTable(tableIdentifier, ignoreIfNotExists = true, purge = false) throw e } } Seq.empty[Row] } override def argString: String = { s"[Database:${tableDesc.database}}, " + s"TableName: ${tableDesc.identifier.table}, " + s"InsertIntoHiveTable]" } }
Example 5
Source File: CreateTableAsSelect.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.annotation.Experimental import org.apache.spark.sql.{AnalysisException, SQLContext} import org.apache.spark.sql.catalyst.expressions.Row import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan} import org.apache.spark.sql.execution.RunnableCommand import org.apache.spark.sql.hive.client.{HiveTable, HiveColumn} import org.apache.spark.sql.hive.{HiveContext, MetastoreRelation, HiveMetastoreTypes} private[hive] case class CreateTableAsSelect( tableDesc: HiveTable, query: LogicalPlan, allowExisting: Boolean) extends RunnableCommand { def database: String = tableDesc.database def tableName: String = tableDesc.name override def run(sqlContext: SQLContext): Seq[Row] = { val hiveContext = sqlContext.asInstanceOf[HiveContext] lazy val metastoreRelation: MetastoreRelation = { import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat import org.apache.hadoop.io.Text import org.apache.hadoop.mapred.TextInputFormat val withSchema = tableDesc.copy( schema = query.output.map(c => HiveColumn(c.name, HiveMetastoreTypes.toMetastoreType(c.dataType), null)), inputFormat = tableDesc.inputFormat.orElse(Some(classOf[TextInputFormat].getName)), outputFormat = tableDesc.outputFormat .orElse(Some(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]].getName)), serde = tableDesc.serde.orElse(Some(classOf[LazySimpleSerDe].getName()))) hiveContext.catalog.client.createTable(withSchema) // Get the Metastore Relation hiveContext.catalog.lookupRelation(Seq(database, tableName), None) match { case r: MetastoreRelation => r } } // TODO ideally, we should get the output data ready first and then // add the relation into catalog, just in case of failure occurs while data // processing. if (hiveContext.catalog.tableExists(Seq(database, tableName))) { if (allowExisting) { // table already exists, will do nothing, to keep consistent with Hive } else { throw new AnalysisException(s"$database.$tableName already exists.") } } else { hiveContext.executePlan(InsertIntoTable(metastoreRelation, Map(), query, true, false)).toRdd } Seq.empty[Row] } override def argString: String = { s"[Database:$database, TableName: $tableName, InsertIntoHiveTable]\n" + query.toString } }
Example 6
Source File: CreateTableAsSelect.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan} import org.apache.spark.sql.execution.RunnableCommand import org.apache.spark.sql.hive.client.{HiveColumn, HiveTable} import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes, MetastoreRelation} import org.apache.spark.sql.{AnalysisException, Row, SQLContext} private[hive] case class CreateTableAsSelect( tableDesc: HiveTable, query: LogicalPlan, allowExisting: Boolean) extends RunnableCommand { val tableIdentifier = TableIdentifier(tableDesc.name, Some(tableDesc.database)) override def children: Seq[LogicalPlan] = Seq(query) override def run(sqlContext: SQLContext): Seq[Row] = { val hiveContext = sqlContext.asInstanceOf[HiveContext] lazy val metastoreRelation: MetastoreRelation = { import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.apache.hadoop.io.Text import org.apache.hadoop.mapred.TextInputFormat val withFormat = tableDesc.copy( inputFormat = tableDesc.inputFormat.orElse(Some(classOf[TextInputFormat].getName)), outputFormat = tableDesc.outputFormat .orElse(Some(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]].getName)), serde = tableDesc.serde.orElse(Some(classOf[LazySimpleSerDe].getName()))) val withSchema = if (withFormat.schema.isEmpty) { // Hive doesn't support specifying the column list for target table in CTAS // However we don't think SparkSQL should follow that. tableDesc.copy(schema = query.output.map(c => HiveColumn(c.name, HiveMetastoreTypes.toMetastoreType(c.dataType), null))) } else { withFormat } hiveContext.catalog.client.createTable(withSchema) // Get the Metastore Relation hiveContext.catalog.lookupRelation(tableIdentifier, None) match { case r: MetastoreRelation => r } } // TODO ideally, we should get the output data ready first and then // add the relation into catalog, just in case of failure occurs while data // processing. if (hiveContext.catalog.tableExists(tableIdentifier)) { if (allowExisting) { // table already exists, will do nothing, to keep consistent with Hive } else { throw new AnalysisException(s"$tableIdentifier already exists.") } } else { hiveContext.executePlan(InsertIntoTable(metastoreRelation, Map(), query, true, false)).toRdd } Seq.empty[Row] } override def argString: String = { s"[Database:${tableDesc.database}}, TableName: ${tableDesc.name}, InsertIntoHiveTable]" } }
Example 7
Source File: HiveAcidAutoConvert.scala From spark-acid with Apache License 2.0 | 5 votes |
package com.qubole.spark.hiveacid import java.util.Locale import com.qubole.spark.datasources.hiveacid.sql.execution.SparkAcidSqlParser import org.apache.spark.sql.{SparkSession, SparkSessionExtensions} import org.apache.spark.sql.catalyst.catalog.HiveTableRelation import org.apache.spark.sql.catalyst.plans.logical.{Filter, InsertIntoTable, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.execution.datasources.LogicalRelation import com.qubole.spark.hiveacid.datasource.HiveAcidDataSource case class HiveAcidAutoConvert(spark: SparkSession) extends Rule[LogicalPlan] { private def isConvertible(relation: HiveTableRelation): Boolean = { val serde = relation.tableMeta.storage.serde.getOrElse("").toLowerCase(Locale.ROOT) relation.tableMeta.properties.getOrElse("transactional", "false").toBoolean } private def convert(relation: HiveTableRelation): LogicalRelation = { val options = relation.tableMeta.properties ++ relation.tableMeta.storage.properties ++ Map("table" -> relation.tableMeta.qualifiedName) val newRelation = new HiveAcidDataSource().createRelation(spark.sqlContext, options) LogicalRelation(newRelation, isStreaming = false) } override def apply(plan: LogicalPlan): LogicalPlan = { plan resolveOperators { // Write path case InsertIntoTable(r: HiveTableRelation, partition, query, overwrite, ifPartitionNotExists) if query.resolved && DDLUtils.isHiveTable(r.tableMeta) && isConvertible(r) => InsertIntoTable(convert(r), partition, query, overwrite, ifPartitionNotExists) // Read path case relation: HiveTableRelation if DDLUtils.isHiveTable(relation.tableMeta) && isConvertible(relation) => convert(relation) } } } class HiveAcidAutoConvertExtension extends (SparkSessionExtensions => Unit) { def apply(extension: SparkSessionExtensions): Unit = { extension.injectResolutionRule(HiveAcidAutoConvert.apply) extension.injectParser { (session, parser) => SparkAcidSqlParser(parser) } } }