org.apache.spark.sql.execution.command.RunnableCommand Scala Examples
The following examples show how to use org.apache.spark.sql.execution.command.RunnableCommand.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: RangerShowTablesCommand.scala From spark-ranger with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import org.apache.ranger.authorization.spark.authorizer.{RangerSparkAuthorizer, SparkPrivilegeObject, SparkPrivilegeObjectType} import org.apache.spark.sql.execution.command.{RunnableCommand, ShowTablesCommand} import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.Attribute case class RangerShowTablesCommand(child: ShowTablesCommand) extends RunnableCommand { override val output: Seq[Attribute] = child.output override def run(sparkSession: SparkSession): Seq[Row] = { val rows = child.run(sparkSession) rows.filter(r => RangerSparkAuthorizer.isAllowed(toSparkPrivilegeObject(r))) } private def toSparkPrivilegeObject(row: Row): SparkPrivilegeObject = { val database = row.getString(0) val table = row.getString(1) new SparkPrivilegeObject(SparkPrivilegeObjectType.TABLE_OR_VIEW, database, table) } }
Example 2
Source File: UpdateCommand.scala From spark-acid with Apache License 2.0 | 5 votes |
package com.qubole.spark.datasources.hiveacid.sql.catalyst.plans.command import com.qubole.spark.hiveacid.HiveAcidErrors import com.qubole.spark.hiveacid.datasource.HiveAcidRelation import org.apache.spark.sql.{Column, Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.execution.datasources.LogicalRelation case class UpdateCommand( table: LogicalPlan, setExpressions: Map[String, Expression], condition: Option[Expression]) extends RunnableCommand { override def children: Seq[LogicalPlan] = Seq(table) override def output: Seq[Attribute] = Seq.empty override lazy val resolved: Boolean = childrenResolved override def run(sparkSession: SparkSession): Seq[Row] = { if (children.size != 1) { throw new IllegalArgumentException("UPDATE command should have one table to update, whereas this has: " + children.size) } children(0) match { case LogicalRelation(relation: HiveAcidRelation, _, _ , _) => { val setColumns = setExpressions.mapValues(expr => new Column(expr)) val updateFilterColumn = condition.map(new Column(_)) relation.update(updateFilterColumn, setColumns) } case LogicalRelation(_, _, Some(catalogTable), _) => throw HiveAcidErrors.tableNotAcidException(catalogTable.qualifiedName) case _ => throw HiveAcidErrors.tableNotAcidException(table.toString()) } Seq.empty[Row] } }
Example 3
Source File: MergeCommand.scala From spark-acid with Apache License 2.0 | 5 votes |
package com.qubole.spark.datasources.hiveacid.sql.catalyst.plans.command import com.qubole.spark.hiveacid.HiveAcidErrors import com.qubole.spark.hiveacid.datasource.HiveAcidRelation import com.qubole.spark.hiveacid.merge.{MergeCondition, MergeWhenClause, MergeWhenNotInsert} import org.apache.spark.sql.catalyst.AliasIdentifier import org.apache.spark.sql.{Row, SparkSession, SqlUtils} import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.execution.datasources.LogicalRelation case class MergeCommand(targetTable: LogicalPlan, sourceTable: LogicalPlan, matched: Seq[MergeWhenClause], notMatched: Option[MergeWhenClause], mergeCondition: MergeCondition, sourceAlias: Option[AliasIdentifier], targetAlias: Option[AliasIdentifier]) extends RunnableCommand { override def children: Seq[LogicalPlan] = Seq(targetTable, sourceTable) override def output: Seq[Attribute] = Seq.empty override lazy val resolved: Boolean = childrenResolved override def run(sparkSession: SparkSession): Seq[Row] = { val insertClause: Option[MergeWhenNotInsert] = notMatched match { case Some(i: MergeWhenNotInsert) => Some(i) case None => None case _ => throw HiveAcidErrors.mergeValidationError("WHEN NOT Clause has to be INSERT CLAUSE") } children.head match { case LogicalRelation(relation: HiveAcidRelation, _, _ , _) => relation.merge(SqlUtils.logicalPlanToDataFrame(sparkSession, sourceTable), mergeCondition.expression, matched, insertClause, sourceAlias, targetAlias) case SubqueryAlias(_, LogicalRelation(relation: HiveAcidRelation, _, _, _)) => relation.merge(SqlUtils.logicalPlanToDataFrame(sparkSession, sourceTable), mergeCondition.expression, matched, insertClause, sourceAlias, targetAlias) case _ => throw HiveAcidErrors.tableNotAcidException(targetTable.toString()) } Seq.empty } }
Example 4
Source File: DeleteCommand.scala From spark-acid with Apache License 2.0 | 5 votes |
package com.qubole.spark.datasources.hiveacid.sql.catalyst.plans.command import com.qubole.spark.hiveacid.HiveAcidErrors import com.qubole.spark.hiveacid.datasource.HiveAcidRelation import org.apache.spark.sql.{Column, Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.execution.datasources.LogicalRelation case class DeleteCommand( table: LogicalPlan, condition: Expression) extends RunnableCommand { // We don't want `table` in children as sometimes we don't want to transform it. override def children: Seq[LogicalPlan] = Seq(table) override def output: Seq[Attribute] = Seq.empty override lazy val resolved: Boolean = childrenResolved override def run(sparkSession: SparkSession): Seq[Row] = { if (children.size != 1) { throw new IllegalArgumentException("DELETE command should specify exactly one table, whereas this has: " + children.size) } children(0) match { case LogicalRelation(relation: HiveAcidRelation, _, _ , _) => { relation.delete(new Column(condition)) } case _ => throw HiveAcidErrors.tableNotAcidException(table.toString()) } Seq.empty[Row] } }
Example 5
Source File: SaveIntoDataSourceCommand.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.{Dataset, Row, SaveMode, SparkSession} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.sources.CreatableRelationProvider case class SaveIntoDataSourceCommand( query: LogicalPlan, dataSource: CreatableRelationProvider, options: Map[String, String], mode: SaveMode) extends RunnableCommand { override protected def innerChildren: Seq[QueryPlan[_]] = Seq(query) override def run(sparkSession: SparkSession): Seq[Row] = { dataSource.createRelation( sparkSession.sqlContext, mode, options, Dataset.ofRows(sparkSession, query)) Seq.empty[Row] } override def simpleString: String = { val redacted = SQLConf.get.redactOptions(options) s"SaveIntoDataSourceCommand ${dataSource}, ${redacted}, ${mode}" } }
Example 6
Source File: ddl.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import java.util.Locale import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils} import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.{DDLUtils, RunnableCommand} import org.apache.spark.sql.types._ case class CreateTempViewUsing( tableIdent: TableIdentifier, userSpecifiedSchema: Option[StructType], replace: Boolean, global: Boolean, provider: String, options: Map[String, String]) extends RunnableCommand { if (tableIdent.database.isDefined) { throw new AnalysisException( s"Temporary view '$tableIdent' should not have specified a database") } override def argString: String = { s"[tableIdent:$tableIdent " + userSpecifiedSchema.map(_ + " ").getOrElse("") + s"replace:$replace " + s"provider:$provider " + CatalogUtils.maskCredentials(options) } override def run(sparkSession: SparkSession): Seq[Row] = { if (provider.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) { throw new AnalysisException("Hive data source can only be used with tables, " + "you can't use it with CREATE TEMP VIEW USING") } val dataSource = DataSource( sparkSession, userSpecifiedSchema = userSpecifiedSchema, className = provider, options = options) val catalog = sparkSession.sessionState.catalog val viewDefinition = Dataset.ofRows( sparkSession, LogicalRelation(dataSource.resolveRelation())).logicalPlan if (global) { catalog.createGlobalTempView(tableIdent.table, viewDefinition, replace) } else { catalog.createTempView(tableIdent.table, viewDefinition, replace) } Seq.empty[Row] } } case class RefreshTable(tableIdent: TableIdentifier) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { // Refresh the given table's metadata. If this table is cached as an InMemoryRelation, // drop the original cached version and make the new version cached lazily. sparkSession.catalog.refreshTable(tableIdent.quotedString) Seq.empty[Row] } } case class RefreshResource(path: String) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { sparkSession.catalog.refreshByPath(path) Seq.empty[Row] } }
Example 7
Source File: InsertIntoDataSourceCommand.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.sources.InsertableRelation case class InsertIntoDataSourceCommand( logicalRelation: LogicalRelation, query: LogicalPlan, overwrite: Boolean) extends RunnableCommand { override protected def innerChildren: Seq[QueryPlan[_]] = Seq(query) override def run(sparkSession: SparkSession): Seq[Row] = { val relation = logicalRelation.relation.asInstanceOf[InsertableRelation] val data = Dataset.ofRows(sparkSession, query) // Apply the schema of the existing table to the new data. val df = sparkSession.internalCreateDataFrame(data.queryExecution.toRdd, logicalRelation.schema) relation.insert(df, overwrite) // Re-cache all cached plans(including this relation itself, if it's cached) that refer to this // data source relation. sparkSession.sharedState.cacheManager.recacheByPlan(sparkSession, logicalRelation) Seq.empty[Row] } }
Example 8
Source File: hbaseCommands.scala From Heracles with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase.execution import org.apache.spark.annotation.DeveloperApi import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.hbase._ import org.apache.spark.sql.hbase.util.DataTypeUtils import org.apache.spark.sql.types._ import scala.collection.mutable.ArrayBuffer @DeveloperApi case class AlterDropColCommand(namespace: String, tableName: String, columnName: String) extends RunnableCommand { def run(sparkSession: SparkSession): Seq[Row] = { sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog] .alterTableDropNonKey(namespace, tableName, columnName) sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog].stopAdmin() Seq.empty[Row] } } @DeveloperApi case class AlterAddColCommand(namespace: String, tableName: String, colName: String, colType: String, colFamily: String, colQualifier: String) extends RunnableCommand { def run(sparkSession: SparkSession): Seq[Row] = { val hbaseCatalog = sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog] hbaseCatalog.alterTableAddNonKey(namespace, tableName, NonKeyColumn(colName, DataTypeUtils.getDataType(colType), colFamily, colQualifier)) hbaseCatalog.stopAdmin() Seq.empty[Row] } } @DeveloperApi case class InsertValueIntoTableCommand(tid: TableIdentifier, valueSeq: Seq[String]) extends RunnableCommand { override def run(sparkSession: SparkSession) = { val relation: HBaseRelation = sparkSession.sessionState.catalog.externalCatalog .asInstanceOf[HBaseCatalog] .getHBaseRelation(tid.database.getOrElse(null), tid.table).getOrElse(null) val bytes = valueSeq.zipWithIndex.map(v => DataTypeUtils.string2TypeData(v._1, relation.schema(v._2).dataType)) val rows = sparkSession.sparkContext.makeRDD(Seq(Row.fromSeq(bytes))) val inputValuesDF = sparkSession.createDataFrame(rows, relation.schema) relation.insert(inputValuesDF, overwrite = false) Seq.empty[Row] } override def output: Seq[Attribute] = Seq.empty }
Example 9
Source File: ddl.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.types._ case class CreateTable( tableDesc: CatalogTable, mode: SaveMode, query: Option[LogicalPlan]) extends Command { assert(tableDesc.provider.isDefined, "The table to be created must have a provider.") if (query.isEmpty) { assert( mode == SaveMode.ErrorIfExists || mode == SaveMode.Ignore, "create table without data insertion can only use ErrorIfExists or Ignore as SaveMode.") } override def innerChildren: Seq[QueryPlan[_]] = query.toSeq } case class CreateTempViewUsing( tableIdent: TableIdentifier, userSpecifiedSchema: Option[StructType], replace: Boolean, global: Boolean, provider: String, options: Map[String, String]) extends RunnableCommand { if (tableIdent.database.isDefined) { throw new AnalysisException( s"Temporary view '$tableIdent' should not have specified a database") } override def argString: String = { s"[tableIdent:$tableIdent " + userSpecifiedSchema.map(_ + " ").getOrElse("") + s"replace:$replace " + s"provider:$provider " + CatalogUtils.maskCredentials(options) } def run(sparkSession: SparkSession): Seq[Row] = { val dataSource = DataSource( sparkSession, userSpecifiedSchema = userSpecifiedSchema, className = provider, options = options) val catalog = sparkSession.sessionState.catalog val viewDefinition = Dataset.ofRows( sparkSession, LogicalRelation(dataSource.resolveRelation())).logicalPlan if (global) { catalog.createGlobalTempView(tableIdent.table, viewDefinition, replace) } else { catalog.createTempView(tableIdent.table, viewDefinition, replace) } Seq.empty[Row] } } case class RefreshTable(tableIdent: TableIdentifier) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { // Refresh the given table's metadata. If this table is cached as an InMemoryRelation, // drop the original cached version and make the new version cached lazily. sparkSession.catalog.refreshTable(tableIdent.quotedString) Seq.empty[Row] } } case class RefreshResource(path: String) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { sparkSession.catalog.refreshByPath(path) Seq.empty[Row] } }
Example 10
Source File: InsertIntoDataSourceCommand.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OverwriteOptions} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.sources.InsertableRelation case class InsertIntoDataSourceCommand( logicalRelation: LogicalRelation, query: LogicalPlan, overwrite: OverwriteOptions) extends RunnableCommand { override protected def innerChildren: Seq[QueryPlan[_]] = Seq(query) override def run(sparkSession: SparkSession): Seq[Row] = { val relation = logicalRelation.relation.asInstanceOf[InsertableRelation] val data = Dataset.ofRows(sparkSession, query) // Apply the schema of the existing table to the new data. val df = sparkSession.internalCreateDataFrame(data.queryExecution.toRdd, logicalRelation.schema) relation.insert(df, overwrite.enabled) // Invalidate the cache. sparkSession.sharedState.cacheManager.invalidateCache(logicalRelation) Seq.empty[Row] } }
Example 11
Source File: CreateHiveTableAsSelectCommand.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import scala.util.control.NonFatal import org.apache.spark.sql.{AnalysisException, Dataset, Row, SparkSession} import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan, OverwriteOptions} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.hive.MetastoreRelation case class CreateHiveTableAsSelectCommand( tableDesc: CatalogTable, query: LogicalPlan, ignoreIfExists: Boolean) extends RunnableCommand { private val tableIdentifier = tableDesc.identifier override def innerChildren: Seq[LogicalPlan] = Seq(query) override def run(sparkSession: SparkSession): Seq[Row] = { lazy val metastoreRelation: MetastoreRelation = { import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.apache.hadoop.io.Text import org.apache.hadoop.mapred.TextInputFormat val withFormat = tableDesc.withNewStorage( inputFormat = tableDesc.storage.inputFormat.orElse(Some(classOf[TextInputFormat].getName)), outputFormat = tableDesc.storage.outputFormat .orElse(Some(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]].getName)), serde = tableDesc.storage.serde.orElse(Some(classOf[LazySimpleSerDe].getName)), compressed = tableDesc.storage.compressed) val withSchema = if (withFormat.schema.isEmpty) { // Hive doesn't support specifying the column list for target table in CTAS // However we don't think SparkSQL should follow that. tableDesc.copy(schema = query.output.toStructType) } else { withFormat } sparkSession.sessionState.catalog.createTable(withSchema, ignoreIfExists = false) // Get the Metastore Relation sparkSession.sessionState.catalog.lookupRelation(tableIdentifier) match { case r: MetastoreRelation => r } } // TODO ideally, we should get the output data ready first and then // add the relation into catalog, just in case of failure occurs while data // processing. if (sparkSession.sessionState.catalog.tableExists(tableIdentifier)) { if (ignoreIfExists) { // table already exists, will do nothing, to keep consistent with Hive } else { throw new AnalysisException(s"$tableIdentifier already exists.") } } else { try { sparkSession.sessionState.executePlan(InsertIntoTable( metastoreRelation, Map(), query, overwrite = OverwriteOptions(true), ifNotExists = false)).toRdd } catch { case NonFatal(e) => // drop the created table. sparkSession.sessionState.catalog.dropTable(tableIdentifier, ignoreIfNotExists = true, purge = false) throw e } } Seq.empty[Row] } override def argString: String = { s"[Database:${tableDesc.database}}, " + s"TableName: ${tableDesc.identifier.table}, " + s"InsertIntoHiveTable]" } }
Example 12
Source File: DruidMetadataCommands.scala From spark-druid-olap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.sparklinedata.commands import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.hive.sparklinedata.SPLSessionState import org.apache.spark.sql.sources.druid.{DruidPlanner, DruidQueryCostModel} import org.apache.spark.sql.types._ import org.apache.spark.sql.util.PlanUtil import org.apache.spark.sql.{Row, SQLContext, SparkSession} import org.joda.time.Interval import org.sparklinedata.druid.metadata.{DruidMetadataCache, DruidRelationName, DruidRelationOptions} case class ClearMetadata(druidHost: Option[String]) extends RunnableCommand { override val output: Seq[Attribute] = { val schema = StructType( StructField("", StringType, nullable = true) :: Nil) schema.toAttributes } override def run(sparkSession: SparkSession): Seq[Row] = { if (druidHost.isDefined) { DruidMetadataCache.clearCache(druidHost.get) } else { DruidMetadataCache.clearCache } Seq(Row("")) } } case class ExplainDruidRewrite(sql: String) extends RunnableCommand { override val output: Seq[Attribute] = { val schema = StructType( StructField("", StringType, nullable = true) :: Nil) schema.toAttributes } override def run(sparkSession: SparkSession): Seq[Row] = { val qe = sparkSession.sessionState.executeSql(sql) qe.sparkPlan.toString().split("\n").map(Row(_)).toSeq ++ Seq(Row("")) ++ DruidPlanner.getDruidRDDs(qe.sparkPlan).flatMap { dR => val druidDSIntervals = dR.drDSIntervals val druidDSFullName= dR.drFullName val druidDSOptions = dR.drOptions val inputEstimate = dR.inputEstimate val outputEstimate = dR.outputEstimate s"""DruidQuery(${System.identityHashCode(dR.dQuery)}) details :: |${DruidQueryCostModel.computeMethod( sparkSession.sqlContext, druidDSIntervals, druidDSFullName, druidDSOptions, inputEstimate, outputEstimate, dR.dQuery.q) } """.stripMargin.split("\n").map(Row(_)) } } }
Example 13
Source File: VacuumTableCommand.scala From delta with Apache License 2.0 | 5 votes |
package io.delta.tables.execution import org.apache.hadoop.fs.Path import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaTableIdentifier, DeltaTableUtils} import org.apache.spark.sql.delta.commands.VacuumCommand import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.types.StringType case class VacuumTableCommand( path: Option[String], table: Option[TableIdentifier], horizonHours: Option[Double], dryRun: Boolean) extends RunnableCommand { override val output: Seq[Attribute] = Seq(AttributeReference("path", StringType, nullable = true)()) override def run(sparkSession: SparkSession): Seq[Row] = { val pathToVacuum = if (path.nonEmpty) { new Path(path.get) } else if (table.nonEmpty) { DeltaTableIdentifier(sparkSession, table.get) match { case Some(id) if id.path.nonEmpty => new Path(id.path.get) case _ => new Path(sparkSession.sessionState.catalog.getTableMetadata(table.get).location) } } else { throw DeltaErrors.missingTableIdentifierException("VACUUM") } val baseDeltaPath = DeltaTableUtils.findDeltaTableRoot(sparkSession, pathToVacuum) if (baseDeltaPath.isDefined) { if (baseDeltaPath.get != pathToVacuum) { throw DeltaErrors.vacuumBasePathMissingException(baseDeltaPath.get) } } val deltaLog = DeltaLog.forTable(sparkSession, pathToVacuum) if (deltaLog.snapshot.version == -1) { throw DeltaErrors.notADeltaTableException( "VACUUM", DeltaTableIdentifier(path = Some(pathToVacuum.toString))) } VacuumCommand.gc(sparkSession, deltaLog, dryRun, horizonHours).collect() } }
Example 14
Source File: DeltaGenerateCommand.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta.commands import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaTableIdentifier} import org.apache.spark.sql.delta.hooks.GenerateSymlinkManifest import org.apache.hadoop.fs.Path import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.execution.command.RunnableCommand case class DeltaGenerateCommand(modeName: String, tableId: TableIdentifier) extends RunnableCommand { import DeltaGenerateCommand._ override def run(sparkSession: SparkSession): Seq[Row] = { if (!modeNameToGenerationFunc.contains(modeName)) { throw DeltaErrors.unsupportedGenerateModeException(modeName) } val tablePath = DeltaTableIdentifier(sparkSession, tableId) match { case Some(id) if id.path.isDefined => new Path(id.path.get) case _ => new Path(sparkSession.sessionState.catalog.getTableMetadata(tableId).location) } val deltaLog = DeltaLog.forTable(sparkSession, tablePath) if (deltaLog.snapshot.version < 0) { throw DeltaErrors.notADeltaTableException("GENERATE") } val generationFunc = modeNameToGenerationFunc(modeName) generationFunc(sparkSession, deltaLog) Seq.empty } } object DeltaGenerateCommand { val modeNameToGenerationFunc = CaseInsensitiveMap( Map[String, (SparkSession, DeltaLog) => Unit]( "symlink_format_manifest" -> GenerateSymlinkManifest.generateFullManifest )) }
Example 15
Source File: DescribeDeltaHistoryCommand.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta.commands // scalastyle:off import.ordering.noEmptyLine import org.apache.spark.sql.delta.{DeltaErrors, DeltaLog, DeltaTableIdentifier} import org.apache.spark.sql.delta.actions.CommitInfo import org.apache.spark.sql.delta.metering.DeltaLogging import org.apache.hadoop.fs.Path import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation import org.apache.spark.sql.catalyst.catalog.CatalogTableType import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics} import org.apache.spark.sql.execution.command.RunnableCommand case class DescribeDeltaHistoryCommand( path: Option[String], tableIdentifier: Option[TableIdentifier], limit: Option[Int], override val output: Seq[Attribute] = ExpressionEncoder[CommitInfo]().schema.toAttributes) extends RunnableCommand with DeltaLogging { override def run(sparkSession: SparkSession): Seq[Row] = { val basePath = if (path.nonEmpty) { new Path(path.get) } else if (tableIdentifier.nonEmpty) { val sessionCatalog = sparkSession.sessionState.catalog lazy val metadata = sessionCatalog.getTableMetadata(tableIdentifier.get) DeltaTableIdentifier(sparkSession, tableIdentifier.get) match { case Some(id) if id.path.nonEmpty => new Path(id.path.get) case Some(id) if id.table.nonEmpty => new Path(metadata.location) case _ => if (metadata.tableType == CatalogTableType.VIEW) { throw DeltaErrors.describeViewHistory } throw DeltaErrors.notADeltaTableException("DESCRIBE HISTORY") } } else { throw DeltaErrors.missingTableIdentifierException("DESCRIBE HISTORY") } // Max array size if (limit.exists(_ > Int.MaxValue - 8)) { throw new IllegalArgumentException("Please use a limit less than Int.MaxValue - 8.") } val deltaLog = DeltaLog.forTable(sparkSession, basePath) recordDeltaOperation(deltaLog, "delta.ddl.describeHistory") { if (deltaLog.snapshot.version == -1) { throw DeltaErrors.notADeltaTableException("DESCRIBE HISTORY") } import sparkSession.implicits._ deltaLog.history.getHistory(limit).toDF().collect().toSeq } } }
Example 16
Source File: RangerShowDatabasesCommand.scala From spark-ranger with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import org.apache.ranger.authorization.spark.authorizer.{RangerSparkAuthorizer, SparkPrivilegeObject, SparkPrivilegeObjectType} import org.apache.spark.sql.execution.command.{RunnableCommand, ShowDatabasesCommand} import org.apache.spark.sql.{Row, SparkSession} case class RangerShowDatabasesCommand(child: ShowDatabasesCommand) extends RunnableCommand { override val output = child.output override def run(sparkSession: SparkSession): Seq[Row] = { val rows = child.run(sparkSession) rows.filter(r => RangerSparkAuthorizer.isAllowed(toSparkPrivilegeObject(r))) } private def toSparkPrivilegeObject(row: Row): SparkPrivilegeObject = { val database = row.getString(0) new SparkPrivilegeObject(SparkPrivilegeObjectType.DATABASE, database, database) } }
Example 17
Source File: CreateHiveTableAsSelectCommand.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import scala.util.control.NonFatal import org.apache.spark.sql.{AnalysisException, Row, SparkSession} import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.hive.MetastoreRelation case class CreateHiveTableAsSelectCommand( tableDesc: CatalogTable, query: LogicalPlan, ignoreIfExists: Boolean) extends RunnableCommand { private val tableIdentifier = tableDesc.identifier override def innerChildren: Seq[LogicalPlan] = Seq(query) override def run(sparkSession: SparkSession): Seq[Row] = { lazy val metastoreRelation: MetastoreRelation = { import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.apache.hadoop.io.Text import org.apache.hadoop.mapred.TextInputFormat val withFormat = tableDesc.withNewStorage( inputFormat = tableDesc.storage.inputFormat.orElse(Some(classOf[TextInputFormat].getName)), outputFormat = tableDesc.storage.outputFormat .orElse(Some(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]].getName)), serde = tableDesc.storage.serde.orElse(Some(classOf[LazySimpleSerDe].getName)), compressed = tableDesc.storage.compressed) val withSchema = if (withFormat.schema.isEmpty) { // Hive doesn't support specifying the column list for target table in CTAS // However we don't think SparkSQL should follow that. tableDesc.copy(schema = query.output.toStructType) } else { withFormat } sparkSession.sessionState.catalog.createTable(withSchema, ignoreIfExists = false) // Get the Metastore Relation sparkSession.sessionState.catalog.lookupRelation(tableIdentifier) match { case r: MetastoreRelation => r } } // TODO ideally, we should get the output data ready first and then // add the relation into catalog, just in case of failure occurs while data // processing. if (sparkSession.sessionState.catalog.tableExists(tableIdentifier)) { if (ignoreIfExists) { // table already exists, will do nothing, to keep consistent with Hive } else { throw new AnalysisException(s"$tableIdentifier already exists.") } } else { try { sparkSession.sessionState.executePlan(InsertIntoTable( metastoreRelation, Map(), query, overwrite = true, ifNotExists = false)).toRdd } catch { case NonFatal(e) => // drop the created table. sparkSession.sessionState.catalog.dropTable(tableIdentifier, ignoreIfNotExists = true, purge = false) throw e } } Seq.empty[Row] } override def argString: String = { s"[Database:${tableDesc.database}}, " + s"TableName: ${tableDesc.identifier.table}, " + s"InsertIntoHiveTable]" } }
Example 18
Source File: CarbonResetCommand.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution.command import org.apache.spark.sql.{CarbonEnv, Row, SparkSession} import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.{ResetCommand, RunnableCommand} case class CarbonResetCommand() extends RunnableCommand { override val output = ResetCommand.output override def run(sparkSession: SparkSession): Seq[Row] = { CarbonEnv.getInstance(sparkSession).carbonSessionInfo.getSessionParams.clear() ResetCommand.run(sparkSession) } } object MatchResetCommand { def unapply(plan: LogicalPlan): Option[LogicalPlan] = { plan match { case r@ResetCommand => Some(plan) case _ => None } } }
Example 19
Source File: ddl.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.types._ case class CreateTable( tableDesc: CatalogTable, mode: SaveMode, query: Option[LogicalPlan]) extends Command { assert(tableDesc.provider.isDefined, "The table to be created must have a provider.") if (query.isEmpty) { assert( mode == SaveMode.ErrorIfExists || mode == SaveMode.Ignore, "create table without data insertion can only use ErrorIfExists or Ignore as SaveMode.") } override def innerChildren: Seq[QueryPlan[_]] = query.toSeq } case class CreateTempViewUsing( tableIdent: TableIdentifier, userSpecifiedSchema: Option[StructType], replace: Boolean, global: Boolean, provider: String, options: Map[String, String]) extends RunnableCommand { if (tableIdent.database.isDefined) { throw new AnalysisException( s"Temporary view '$tableIdent' should not have specified a database") } override def argString: String = { s"[tableIdent:$tableIdent " + userSpecifiedSchema.map(_ + " ").getOrElse("") + s"replace:$replace " + s"provider:$provider " + CatalogUtils.maskCredentials(options) } def run(sparkSession: SparkSession): Seq[Row] = { val dataSource = DataSource( sparkSession, userSpecifiedSchema = userSpecifiedSchema, className = provider, options = options) val catalog = sparkSession.sessionState.catalog val viewDefinition = Dataset.ofRows( sparkSession, LogicalRelation(dataSource.resolveRelation())).logicalPlan if (global) { catalog.createGlobalTempView(tableIdent.table, viewDefinition, replace) } else { catalog.createTempView(tableIdent.table, viewDefinition, replace) } Seq.empty[Row] } } case class RefreshTable(tableIdent: TableIdentifier) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { // Refresh the given table's metadata. If this table is cached as an InMemoryRelation, // drop the original cached version and make the new version cached lazily. sparkSession.catalog.refreshTable(tableIdent.quotedString) Seq.empty[Row] } } case class RefreshResource(path: String) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { sparkSession.catalog.refreshByPath(path) Seq.empty[Row] } }
Example 20
Source File: InsertIntoDataSourceCommand.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, OverwriteOptions} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.sources.InsertableRelation case class InsertIntoDataSourceCommand( logicalRelation: LogicalRelation, query: LogicalPlan, overwrite: OverwriteOptions) extends RunnableCommand { override protected def innerChildren: Seq[QueryPlan[_]] = Seq(query) override def run(sparkSession: SparkSession): Seq[Row] = { val relation = logicalRelation.relation.asInstanceOf[InsertableRelation] val data = Dataset.ofRows(sparkSession, query) // Apply the schema of the existing table to the new data. val df = sparkSession.internalCreateDataFrame(data.queryExecution.toRdd, logicalRelation.schema) relation.insert(df, overwrite.enabled) // Invalidate the cache. sparkSession.sharedState.cacheManager.invalidateCache(logicalRelation) Seq.empty[Row] } }
Example 21
Source File: CreateHiveTableAsSelectCommand.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import scala.util.control.NonFatal import org.apache.spark.sql.{AnalysisException, Row, SparkSession} import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoTable, LogicalPlan, OverwriteOptions} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.hive.MetastoreRelation case class CreateHiveTableAsSelectCommand( tableDesc: CatalogTable, query: LogicalPlan, ignoreIfExists: Boolean) extends RunnableCommand { private val tableIdentifier = tableDesc.identifier override def innerChildren: Seq[LogicalPlan] = Seq(query) override def run(sparkSession: SparkSession): Seq[Row] = { lazy val metastoreRelation: MetastoreRelation = { import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.apache.hadoop.io.Text import org.apache.hadoop.mapred.TextInputFormat val withFormat = tableDesc.withNewStorage( inputFormat = tableDesc.storage.inputFormat.orElse(Some(classOf[TextInputFormat].getName)), outputFormat = tableDesc.storage.outputFormat .orElse(Some(classOf[HiveIgnoreKeyTextOutputFormat[Text, Text]].getName)), serde = tableDesc.storage.serde.orElse(Some(classOf[LazySimpleSerDe].getName)), compressed = tableDesc.storage.compressed) val withSchema = if (withFormat.schema.isEmpty) { // Hive doesn't support specifying the column list for target table in CTAS // However we don't think SparkSQL should follow that. tableDesc.copy(schema = query.output.toStructType) } else { withFormat } sparkSession.sessionState.catalog.createTable(withSchema, ignoreIfExists = false) // Get the Metastore Relation sparkSession.sessionState.catalog.lookupRelation(tableIdentifier) match { case r: MetastoreRelation => r } } // TODO ideally, we should get the output data ready first and then // add the relation into catalog, just in case of failure occurs while data // processing. if (sparkSession.sessionState.catalog.tableExists(tableIdentifier)) { if (ignoreIfExists) { // table already exists, will do nothing, to keep consistent with Hive } else { throw new AnalysisException(s"$tableIdentifier already exists.") } } else { try { sparkSession.sessionState.executePlan(InsertIntoTable( metastoreRelation, Map(), query, overwrite = OverwriteOptions(true), ifNotExists = false)).toRdd } catch { case NonFatal(e) => // drop the created table. sparkSession.sessionState.catalog.dropTable(tableIdentifier, ignoreIfNotExists = true, purge = false) throw e } } Seq.empty[Row] } override def argString: String = { s"[Database:${tableDesc.database}}, " + s"TableName: ${tableDesc.identifier.table}, " + s"InsertIntoHiveTable]" } }
Example 22
Source File: SaveIntoDataSourceCommand.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql.{Dataset, Row, SaveMode, SparkSession} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.sources.CreatableRelationProvider case class SaveIntoDataSourceCommand( query: LogicalPlan, dataSource: CreatableRelationProvider, options: Map[String, String], mode: SaveMode) extends RunnableCommand { override protected def innerChildren: Seq[QueryPlan[_]] = Seq(query) override def run(sparkSession: SparkSession): Seq[Row] = { dataSource.createRelation( sparkSession.sqlContext, mode, options, Dataset.ofRows(sparkSession, query)) Seq.empty[Row] } override def simpleString: String = { val redacted = SQLConf.get.redactOptions(options) s"SaveIntoDataSourceCommand ${dataSource}, ${redacted}, ${mode}" } }
Example 23
package org.apache.spark.sql.execution.datasources import java.util.Locale import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogUtils} import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.{DDLUtils, RunnableCommand} import org.apache.spark.sql.types._ case class CreateTempViewUsing( tableIdent: TableIdentifier, userSpecifiedSchema: Option[StructType], replace: Boolean, global: Boolean, provider: String, options: Map[String, String]) extends RunnableCommand { if (tableIdent.database.isDefined) { throw new AnalysisException( s"Temporary view '$tableIdent' should not have specified a database") } override def argString: String = { s"[tableIdent:$tableIdent " + userSpecifiedSchema.map(_ + " ").getOrElse("") + s"replace:$replace " + s"provider:$provider " + CatalogUtils.maskCredentials(options) } override def run(sparkSession: SparkSession): Seq[Row] = { if (provider.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) { throw new AnalysisException("Hive data source can only be used with tables, " + "you can't use it with CREATE TEMP VIEW USING") } val dataSource = DataSource( sparkSession, userSpecifiedSchema = userSpecifiedSchema, className = provider, options = options) val catalog = sparkSession.sessionState.catalog val viewDefinition = Dataset.ofRows( sparkSession, LogicalRelation(dataSource.resolveRelation())).logicalPlan if (global) { catalog.createGlobalTempView(tableIdent.table, viewDefinition, replace) } else { catalog.createTempView(tableIdent.table, viewDefinition, replace) } Seq.empty[Row] } } case class RefreshTable(tableIdent: TableIdentifier) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { // Refresh the given table's metadata. If this table is cached as an InMemoryRelation, // drop the original cached version and make the new version cached lazily. sparkSession.catalog.refreshTable(tableIdent.quotedString) Seq.empty[Row] } } case class RefreshResource(path: String) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { sparkSession.catalog.refreshByPath(path) Seq.empty[Row] } }
Example 24
Source File: InsertIntoDataSourceCommand.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.sources.InsertableRelation case class InsertIntoDataSourceCommand( logicalRelation: LogicalRelation, query: LogicalPlan, overwrite: Boolean) extends RunnableCommand { override protected def innerChildren: Seq[QueryPlan[_]] = Seq(query) override def run(sparkSession: SparkSession): Seq[Row] = { val relation = logicalRelation.relation.asInstanceOf[InsertableRelation] val data = Dataset.ofRows(sparkSession, query) // Data has been casted to the target relation's schema by the PreprocessTableInsertion rule. relation.insert(data, overwrite) // Re-cache all cached plans(including this relation itself, if it's cached) that refer to this // data source relation. sparkSession.sharedState.cacheManager.recacheByPlan(sparkSession, logicalRelation) Seq.empty[Row] } }
Example 25
Source File: StreamingIncrementCommand.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.xsql.execution.command import java.util.Locale import org.apache.spark.SparkException import org.apache.spark.sql.{Dataset, Row, SparkSession} import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} import org.apache.spark.sql.catalyst.streaming.InternalOutputModes import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.execution.datasources.DataSource import org.apache.spark.sql.execution.streaming.StreamingRelationV2 import org.apache.spark.sql.sources.v2.StreamWriteSupport import org.apache.spark.sql.streaming.{OutputMode, Trigger} import org.apache.spark.sql.xsql.DataSourceManager._ import org.apache.spark.sql.xsql.StreamingSinkType case class StreamingIncrementCommand(plan: LogicalPlan) extends RunnableCommand { private var outputMode: OutputMode = OutputMode.Append // dummy override def output: Seq[AttributeReference] = Seq.empty // dummy override def producedAttributes: AttributeSet = plan.producedAttributes override def run(sparkSession: SparkSession): Seq[Row] = { import StreamingSinkType._ val qe = new QueryExecution(sparkSession, new ConstructedStreaming(plan)) val df = new Dataset(sparkSession, qe, RowEncoder(qe.analyzed.schema)) plan.collectLeaves.head match { case StreamingRelationV2(_, _, extraOptions, _, _) => val source = extraOptions.getOrElse(STREAMING_SINK_TYPE, DEFAULT_STREAMING_SINK) val sinkOptions = extraOptions.filter(_._1.startsWith(STREAMING_SINK_PREFIX)).map { kv => val key = kv._1.substring(STREAMING_SINK_PREFIX.length) (key, kv._2) } StreamingSinkType.withName(source.toUpperCase(Locale.ROOT)) match { case CONSOLE => case TEXT | PARQUET | ORC | JSON | CSV => if (sinkOptions.get(STREAMING_SINK_PATH) == None) { throw new SparkException("Sink type is file, must config path") } case KAFKA => if (sinkOptions.get(STREAMING_SINK_BOOTSTRAP_SERVERS) == None) { throw new SparkException("Sink type is kafka, must config bootstrap servers") } if (sinkOptions.get(STREAMING_SINK_TOPIC) == None) { throw new SparkException("Sink type is kafka, must config kafka topic") } case _ => throw new SparkException( "Sink type is invalid, " + s"select from ${StreamingSinkType.values}") } val ds = DataSource.lookupDataSource(source, sparkSession.sessionState.conf) val disabledSources = sparkSession.sqlContext.conf.disabledV2StreamingWriters.split(",") val sink = ds.newInstance() match { case w: StreamWriteSupport if !disabledSources.contains(w.getClass.getCanonicalName) => w case _ => val ds = DataSource( sparkSession, className = source, options = sinkOptions.toMap, partitionColumns = Nil) ds.createSink(InternalOutputModes.Append) } val outputMode = InternalOutputModes( extraOptions.getOrElse(STREAMING_OUTPUT_MODE, DEFAULT_STREAMING_OUTPUT_MODE)) val duration = extraOptions.getOrElse(STREAMING_TRIGGER_DURATION, DEFAULT_STREAMING_TRIGGER_DURATION) val trigger = extraOptions.getOrElse(STREAMING_TRIGGER_TYPE, DEFAULT_STREAMING_TRIGGER_TYPE) match { case STREAMING_MICRO_BATCH_TRIGGER => Trigger.ProcessingTime(duration) case STREAMING_ONCE_TRIGGER => Trigger.Once() case STREAMING_CONTINUOUS_TRIGGER => Trigger.Continuous(duration) } val query = sparkSession.sessionState.streamingQueryManager.startQuery( extraOptions.get("queryName"), extraOptions.get(STREAMING_CHECKPOINT_LOCATION), df, sinkOptions.toMap, sink, outputMode, useTempCheckpointLocation = source == DEFAULT_STREAMING_SINK, recoverFromCheckpointLocation = true, trigger = trigger) query.awaitTermination() } // dummy Seq.empty } } case class ConstructedStreaming(child: LogicalPlan) extends UnaryNode { override def output: Seq[Attribute] = child.output }
Example 26
Source File: XSQLAnalyzeTableCommand.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.xsql.execution.command import org.apache.spark.sql.{AnalysisException, Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogTableType import org.apache.spark.sql.execution.command.{CommandUtils, RunnableCommand} import org.apache.spark.sql.xsql.XSQLSessionCatalog case class XSQLAnalyzeTableCommand(tableIdent: TableIdentifier, noscan: Boolean = true) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val sessionState = sparkSession.sessionState val catalog = sparkSession.sessionState.catalog.asInstanceOf[XSQLSessionCatalog] val catalogDB = catalog.getUsedCatalogDatabase(tableIdent.dataSource, tableIdent.database) if (catalogDB == None) { return Seq.empty[Row] } val ds = catalogDB.get.dataSourceName val db = catalogDB.get.name val tableIdentWithDB = TableIdentifier(tableIdent.table, Some(db), Some(ds)) val tableMeta = catalog.getRawTable(tableIdentWithDB) if (tableMeta.tableType == CatalogTableType.VIEW) { throw new AnalysisException("ANALYZE TABLE is not supported on views.") } // Compute stats for the whole table val newTotalSize = CommandUtils.calculateTotalSize(sparkSession, tableMeta) val newRowCount = if (noscan) None else Some(BigInt(sparkSession.table(tableIdentWithDB).count())) // Update the metastore if the above statistics of the table are different from those // recorded in the metastore. val newStats = CommandUtils.compareAndGetNewStats(tableMeta.stats, newTotalSize, newRowCount) if (newStats.isDefined) { catalog.alterTableStats(tableIdentWithDB, newStats) } Seq.empty[Row] } }
Example 27
Source File: databases.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.xsql.execution.command import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.types.StringType import org.apache.spark.sql.xsql.XSQLSessionCatalog case class XSQLSetDatabaseCommand(dataSourceName: Option[String], databaseName: String) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val catalog = sparkSession.sessionState.catalog.asInstanceOf[XSQLSessionCatalog] if (dataSourceName.isEmpty) { catalog.setCurrentDatabase(databaseName) } else { catalog.setCurrentDatabase(dataSourceName.get, databaseName) } Seq.empty[Row] } }
Example 28
Source File: datasources.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.xsql.execution.command import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.types.StringType import org.apache.spark.sql.xsql.XSQLSessionCatalog case class XSQLShowDatasourcesCommand(datasourcePattern: Option[String]) extends RunnableCommand { override val output: Seq[Attribute] = { AttributeReference("dataSourceName", StringType, nullable = false)() :: Nil } override def run(sparkSession: SparkSession): Seq[Row] = { val catalog = sparkSession.sessionState.catalog.asInstanceOf[XSQLSessionCatalog] val datasources = datasourcePattern .map { pattern => catalog.listDatasources(pattern) } .getOrElse(catalog.listDatasources()) datasources.map { d => Row(d) } } } case class XSQLAddDatasourceCommand(dataSourceName: String, properties: Map[String, String]) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val catalog = sparkSession.sessionState.catalog.asInstanceOf[XSQLSessionCatalog] catalog.addDataSource(dataSourceName, properties) Seq.empty[Row] } } case class XSQLRemoveDatasourceCommand(dataSourceName: String, ifExists: Boolean) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val catalog = sparkSession.sessionState.catalog.asInstanceOf[XSQLSessionCatalog] catalog.removeDataSource(dataSourceName, ifExists) Seq.empty[Row] } } case class XSQLRefreshDatasourceCommand(dataSourceName: String) extends RunnableCommand { override def run(sparkSession: SparkSession): Seq[Row] = { val catalog = sparkSession.sessionState.catalog.asInstanceOf[XSQLSessionCatalog] catalog.refreshDataSource(dataSourceName) Seq.empty[Row] } }
Example 29
Source File: ddl.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.CatalogTable import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan} import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.types._ case class CreateTable( tableDesc: CatalogTable, mode: SaveMode, query: Option[LogicalPlan]) extends Command { assert(tableDesc.provider.isDefined, "The table to be created must have a provider.") if (query.isEmpty) { assert( mode == SaveMode.ErrorIfExists || mode == SaveMode.Ignore, "create table without data insertion can only use ErrorIfExists or Ignore as SaveMode.") } override def innerChildren: Seq[QueryPlan[_]] = query.toSeq } class CaseInsensitiveMap(map: Map[String, String]) extends Map[String, String] with Serializable { val baseMap = map.map(kv => kv.copy(_1 = kv._1.toLowerCase)) override def get(k: String): Option[String] = baseMap.get(k.toLowerCase) override def + [B1 >: String](kv: (String, B1)): Map[String, B1] = baseMap + kv.copy(_1 = kv._1.toLowerCase) override def iterator: Iterator[(String, String)] = baseMap.iterator override def -(key: String): Map[String, String] = baseMap - key.toLowerCase }
Example 30
Source File: InsertIntoHadoopFsRelationCommand.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import java.io.IOException import org.apache.hadoop.fs.Path import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.catalog.BucketSpec import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.RunnableCommand )) { throw new IOException(s"Unable to clear output " + s"directory $qualifiedOutputPath prior to writing to it") } true case (SaveMode.Append, _) | (SaveMode.Overwrite, _) | (SaveMode.ErrorIfExists, false) => true case (SaveMode.Ignore, exists) => !exists case (s, exists) => throw new IllegalStateException(s"unsupported save mode $s ($exists)") } // If we are appending data to an existing dir. val isAppend = pathExists && (mode == SaveMode.Append) if (doInsertion) { WriteOutput.write( sparkSession, query, fileFormat, qualifiedOutputPath, hadoopConf, partitionColumns, bucketSpec, refreshFunction, options, isAppend) } else { logInfo("Skipping insertion into a relation that already exists.") } Seq.empty[Row] } }
Example 31
Source File: InsertIntoDataSourceCommand.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.sources.InsertableRelation case class InsertIntoDataSourceCommand( logicalRelation: LogicalRelation, query: LogicalPlan, overwrite: Boolean) extends RunnableCommand { override protected def innerChildren: Seq[QueryPlan[_]] = Seq(query) override def run(sparkSession: SparkSession): Seq[Row] = { val relation = logicalRelation.relation.asInstanceOf[InsertableRelation] val data = Dataset.ofRows(sparkSession, query) // Apply the schema of the existing table to the new data. val df = sparkSession.internalCreateDataFrame(data.queryExecution.toRdd, logicalRelation.schema) relation.insert(df, overwrite) // Invalidate the cache. sparkSession.sharedState.cacheManager.invalidateCache(logicalRelation) Seq.empty[Row] } }