org.apache.spark.sql.internal.SessionState Scala Examples
The following examples show how to use org.apache.spark.sql.internal.SessionState.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: StateStoreRDD.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming.state import scala.reflect.ClassTag import org.apache.spark.{Partition, TaskContext} import org.apache.spark.rdd.RDD import org.apache.spark.sql.internal.SessionState import org.apache.spark.sql.types.StructType import org.apache.spark.util.SerializableConfiguration class StateStoreRDD[T: ClassTag, U: ClassTag]( dataRDD: RDD[T], storeUpdateFunction: (StateStore, Iterator[T]) => Iterator[U], checkpointLocation: String, operatorId: Long, storeVersion: Long, keySchema: StructType, valueSchema: StructType, sessionState: SessionState, @transient private val storeCoordinator: Option[StateStoreCoordinatorRef]) extends RDD[U](dataRDD) { private val storeConf = new StateStoreConf(sessionState.conf) // A Hadoop Configuration can be about 10 KB, which is pretty big, so broadcast it private val confBroadcast = dataRDD.context.broadcast( new SerializableConfiguration(sessionState.newHadoopConf())) override protected def getPartitions: Array[Partition] = dataRDD.partitions override def getPreferredLocations(partition: Partition): Seq[String] = { val storeId = StateStoreId(checkpointLocation, operatorId, partition.index) storeCoordinator.flatMap(_.getLocation(storeId)).toSeq } override def compute(partition: Partition, ctxt: TaskContext): Iterator[U] = { var store: StateStore = null val storeId = StateStoreId(checkpointLocation, operatorId, partition.index) store = StateStore.get( storeId, keySchema, valueSchema, storeVersion, storeConf, confBroadcast.value.value) val inputIter = dataRDD.iterator(partition, ctxt) storeUpdateFunction(store, inputIter) } }
Example 2
Source File: TestSQLContext.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.test import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.SparkSession import org.apache.spark.sql.internal.{SessionState, SessionStateBuilder, SQLConf, WithTestConf} val overrideConfs: Map[String, String] = Map( // Fewer shuffle partitions to speed up testing. SQLConf.SHUFFLE_PARTITIONS.key -> "5") } private[sql] class TestSQLSessionStateBuilder( session: SparkSession, state: Option[SessionState]) extends SessionStateBuilder(session, state) with WithTestConf { override def overrideConfs: Map[String, String] = TestSQLContext.overrideConfs override def newBuilder: NewBuilder = new TestSQLSessionStateBuilder(_, _) }
Example 3
Source File: package.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import scala.reflect.ClassTag import org.apache.spark.TaskContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.SQLContext import org.apache.spark.sql.internal.SessionState import org.apache.spark.sql.types.StructType package object state { implicit class StateStoreOps[T: ClassTag](dataRDD: RDD[T]) { private[streaming] def mapPartitionsWithStateStore[U: ClassTag]( stateInfo: StatefulOperatorStateInfo, keySchema: StructType, valueSchema: StructType, indexOrdinal: Option[Int], sessionState: SessionState, storeCoordinator: Option[StateStoreCoordinatorRef])( storeUpdateFunction: (StateStore, Iterator[T]) => Iterator[U]): StateStoreRDD[T, U] = { val cleanedF = dataRDD.sparkContext.clean(storeUpdateFunction) val wrappedF = (store: StateStore, iter: Iterator[T]) => { // Abort the state store in case of error TaskContext.get().addTaskCompletionListener(_ => { if (!store.hasCommitted) store.abort() }) cleanedF(store, iter) } new StateStoreRDD( dataRDD, wrappedF, stateInfo.checkpointLocation, stateInfo.queryRunId, stateInfo.operatorId, stateInfo.storeVersion, keySchema, valueSchema, indexOrdinal, sessionState, storeCoordinator) } } }
Example 4
Source File: StateStoreRDD.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming.state import java.util.UUID import scala.reflect.ClassTag import org.apache.spark.{Partition, TaskContext} import org.apache.spark.rdd.RDD import org.apache.spark.sql.internal.SessionState import org.apache.spark.sql.types.StructType import org.apache.spark.util.SerializableConfiguration override def getPreferredLocations(partition: Partition): Seq[String] = { val stateStoreProviderId = StateStoreProviderId( StateStoreId(checkpointLocation, operatorId, partition.index), queryRunId) storeCoordinator.flatMap(_.getLocation(stateStoreProviderId)).toSeq } override def compute(partition: Partition, ctxt: TaskContext): Iterator[U] = { var store: StateStore = null val storeProviderId = StateStoreProviderId( StateStoreId(checkpointLocation, operatorId, partition.index), queryRunId) store = StateStore.get( storeProviderId, keySchema, valueSchema, indexOrdinal, storeVersion, storeConf, hadoopConfBroadcast.value.value) val inputIter = dataRDD.iterator(partition, ctxt) storeUpdateFunction(store, inputIter) } }
Example 5
Source File: HiveSessionStateBuilder.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.annotation.{Experimental, InterfaceStability} import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.analysis.Analyzer import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.SparkPlanner import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.hive.client.HiveClient import org.apache.spark.sql.internal.{BaseSessionStateBuilder, SessionResourceLoader, SessionState} override protected def planner: SparkPlanner = { new SparkPlanner(session.sparkContext, conf, experimentalMethods) with HiveStrategies { override val sparkSession: SparkSession = session override def extraPlanningStrategies: Seq[Strategy] = super.extraPlanningStrategies ++ customPlanningStrategies ++ Seq(HiveTableScans, Scripts) } } override protected def newBuilder: NewBuilder = new HiveSessionStateBuilder(_, _) } class HiveSessionResourceLoader( session: SparkSession, client: HiveClient) extends SessionResourceLoader(session) { override def addJar(path: String): Unit = { client.addJar(path) super.addJar(path) } }
Example 6
Source File: HBaseSparkSession.scala From Heracles with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase import org.apache.hadoop.hbase.HBaseConfiguration import org.apache.spark.SparkContext import org.apache.spark.api.java.JavaSparkContext import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.analysis.Analyzer import org.apache.spark.sql.catalyst.catalog.ExternalCatalog import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.SparkPlanner import org.apache.spark.sql.hbase.execution.{HBaseSourceAnalysis, HBaseStrategies} import org.apache.spark.sql.internal.{BaseSessionStateBuilder, SQLConf, SessionState, SharedState} class HBaseSparkSession(sc: SparkContext) extends SparkSession(sc) { self => def this(sparkContext: JavaSparkContext) = this(sparkContext.sc) @transient override lazy val sessionState: SessionState = new HBaseSessionStateBuilder(this).build() HBaseConfiguration.merge( sc.hadoopConfiguration, HBaseConfiguration.create(sc.hadoopConfiguration)) @transient override lazy val sharedState: SharedState = new HBaseSharedState(sc, this.sqlContext) } class HBaseSessionStateBuilder(session: SparkSession, parentState: Option[SessionState] = None) extends BaseSessionStateBuilder(session) { override lazy val conf: SQLConf = new HBaseSQLConf override protected def newBuilder: NewBuilder = new HBaseSessionStateBuilder(_, _) override lazy val experimentalMethods: ExperimentalMethods = { val result = new ExperimentalMethods; result.extraStrategies = Seq((new SparkPlanner(session.sparkContext, conf, new ExperimentalMethods) with HBaseStrategies).HBaseDataSource) result } override lazy val analyzer: Analyzer = { new Analyzer(catalog, conf) { override val extendedResolutionRules: Seq[Rule[LogicalPlan]] = new FindDataSourceTable(session) +: new ResolveSQLOnFile(session) +: customResolutionRules override val postHocResolutionRules: Seq[Rule[LogicalPlan]] = PreprocessTableCreation(session) +: PreprocessTableInsertion(conf) +: DataSourceAnalysis(conf) +: HBaseSourceAnalysis(session) +: customPostHocResolutionRules override val extendedCheckRules = customCheckRules } } } class HBaseSharedState(sc: SparkContext, sqlContext: SQLContext) extends SharedState(sc) { override lazy val externalCatalog: ExternalCatalog = new HBaseCatalog(sqlContext, sc.hadoopConfiguration) }
Example 7
Source File: package.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.sql.SQLContext import org.apache.spark.sql.internal.SessionState import org.apache.spark.sql.types.StructType package object state { implicit class StateStoreOps[T: ClassTag](dataRDD: RDD[T]) { private[streaming] def mapPartitionsWithStateStore[U: ClassTag]( checkpointLocation: String, operatorId: Long, storeVersion: Long, keySchema: StructType, valueSchema: StructType, sessionState: SessionState, storeCoordinator: Option[StateStoreCoordinatorRef])( storeUpdateFunction: (StateStore, Iterator[T]) => Iterator[U]): StateStoreRDD[T, U] = { val cleanedF = dataRDD.sparkContext.clean(storeUpdateFunction) new StateStoreRDD( dataRDD, cleanedF, checkpointLocation, operatorId, storeVersion, keySchema, valueSchema, sessionState, storeCoordinator) } } }
Example 8
Source File: StateStoreRDD.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming.state import scala.reflect.ClassTag import org.apache.spark.{Partition, TaskContext} import org.apache.spark.rdd.RDD import org.apache.spark.sql.internal.SessionState import org.apache.spark.sql.types.StructType import org.apache.spark.util.SerializableConfiguration class StateStoreRDD[T: ClassTag, U: ClassTag]( dataRDD: RDD[T], storeUpdateFunction: (StateStore, Iterator[T]) => Iterator[U], checkpointLocation: String, operatorId: Long, storeVersion: Long, keySchema: StructType, valueSchema: StructType, sessionState: SessionState, @transient private val storeCoordinator: Option[StateStoreCoordinatorRef]) extends RDD[U](dataRDD) { private val storeConf = new StateStoreConf(sessionState.conf) // A Hadoop Configuration can be about 10 KB, which is pretty big, so broadcast it private val confBroadcast = dataRDD.context.broadcast( new SerializableConfiguration(sessionState.newHadoopConf())) override protected def getPartitions: Array[Partition] = dataRDD.partitions override def getPreferredLocations(partition: Partition): Seq[String] = { val storeId = StateStoreId(checkpointLocation, operatorId, partition.index) storeCoordinator.flatMap(_.getLocation(storeId)).toSeq } override def compute(partition: Partition, ctxt: TaskContext): Iterator[U] = { var store: StateStore = null val storeId = StateStoreId(checkpointLocation, operatorId, partition.index) store = StateStore.get( storeId, keySchema, valueSchema, storeVersion, storeConf, confBroadcast.value.value) val inputIter = dataRDD.iterator(partition, ctxt) storeUpdateFunction(store, inputIter) } }
Example 9
Source File: package.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.sql.SQLContext import org.apache.spark.sql.internal.SessionState import org.apache.spark.sql.types.StructType package object state { implicit class StateStoreOps[T: ClassTag](dataRDD: RDD[T]) { private[streaming] def mapPartitionsWithStateStore[U: ClassTag]( checkpointLocation: String, operatorId: Long, storeVersion: Long, keySchema: StructType, valueSchema: StructType, sessionState: SessionState, storeCoordinator: Option[StateStoreCoordinatorRef])( storeUpdateFunction: (StateStore, Iterator[T]) => Iterator[U]): StateStoreRDD[T, U] = { val cleanedF = dataRDD.sparkContext.clean(storeUpdateFunction) new StateStoreRDD( dataRDD, cleanedF, checkpointLocation, operatorId, storeVersion, keySchema, valueSchema, sessionState, storeCoordinator) } } }
Example 10
Source File: StateStoreRDD.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming.state import scala.reflect.ClassTag import org.apache.spark.{Partition, TaskContext} import org.apache.spark.rdd.RDD import org.apache.spark.sql.internal.SessionState import org.apache.spark.sql.types.StructType import org.apache.spark.util.SerializableConfiguration class StateStoreRDD[T: ClassTag, U: ClassTag]( dataRDD: RDD[T], storeUpdateFunction: (StateStore, Iterator[T]) => Iterator[U], checkpointLocation: String, operatorId: Long, storeVersion: Long, keySchema: StructType, valueSchema: StructType, sessionState: SessionState, @transient private val storeCoordinator: Option[StateStoreCoordinatorRef]) extends RDD[U](dataRDD) { private val storeConf = new StateStoreConf(sessionState.conf) // A Hadoop Configuration can be about 10 KB, which is pretty big, so broadcast it private val confBroadcast = dataRDD.context.broadcast( new SerializableConfiguration(sessionState.newHadoopConf())) override protected def getPartitions: Array[Partition] = dataRDD.partitions override def getPreferredLocations(partition: Partition): Seq[String] = { val storeId = StateStoreId(checkpointLocation, operatorId, partition.index) storeCoordinator.flatMap(_.getLocation(storeId)).toSeq } override def compute(partition: Partition, ctxt: TaskContext): Iterator[U] = { var store: StateStore = null val storeId = StateStoreId(checkpointLocation, operatorId, partition.index) store = StateStore.get( storeId, keySchema, valueSchema, storeVersion, storeConf, confBroadcast.value.value) val inputIter = dataRDD.iterator(partition, ctxt) storeUpdateFunction(store, inputIter) } }
Example 11
Source File: TestSQLContext.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.test import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.SparkSession import org.apache.spark.sql.internal.{SessionState, SessionStateBuilder, SQLConf, WithTestConf} val overrideConfs: Map[String, String] = Map( // Fewer shuffle partitions to speed up testing. SQLConf.SHUFFLE_PARTITIONS.key -> "5") } private[sql] class TestSQLSessionStateBuilder( session: SparkSession, state: Option[SessionState]) extends SessionStateBuilder(session, state) with WithTestConf { override def overrideConfs: Map[String, String] = TestSQLContext.overrideConfs override def newBuilder: NewBuilder = new TestSQLSessionStateBuilder(_, _) }
Example 12
Source File: package.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import scala.reflect.ClassTag import org.apache.spark.TaskContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.SQLContext import org.apache.spark.sql.internal.SessionState import org.apache.spark.sql.types.StructType package object state { implicit class StateStoreOps[T: ClassTag](dataRDD: RDD[T]) { private[streaming] def mapPartitionsWithStateStore[U: ClassTag]( stateInfo: StatefulOperatorStateInfo, keySchema: StructType, valueSchema: StructType, indexOrdinal: Option[Int], sessionState: SessionState, storeCoordinator: Option[StateStoreCoordinatorRef])( storeUpdateFunction: (StateStore, Iterator[T]) => Iterator[U]): StateStoreRDD[T, U] = { val cleanedF = dataRDD.sparkContext.clean(storeUpdateFunction) val wrappedF = (store: StateStore, iter: Iterator[T]) => { // Abort the state store in case of error TaskContext.get().addTaskCompletionListener[Unit](_ => { if (!store.hasCommitted) store.abort() }) cleanedF(store, iter) } new StateStoreRDD( dataRDD, wrappedF, stateInfo.checkpointLocation, stateInfo.queryRunId, stateInfo.operatorId, stateInfo.storeVersion, keySchema, valueSchema, indexOrdinal, sessionState, storeCoordinator) } } }
Example 13
Source File: StateStoreRDD.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming.state import java.util.UUID import scala.reflect.ClassTag import org.apache.spark.{Partition, TaskContext} import org.apache.spark.rdd.RDD import org.apache.spark.sql.execution.streaming.StreamExecution import org.apache.spark.sql.execution.streaming.continuous.EpochTracker import org.apache.spark.sql.internal.SessionState import org.apache.spark.sql.types.StructType import org.apache.spark.util.SerializableConfiguration override def getPreferredLocations(partition: Partition): Seq[String] = { val stateStoreProviderId = StateStoreProviderId( StateStoreId(checkpointLocation, operatorId, partition.index), queryRunId) storeCoordinator.flatMap(_.getLocation(stateStoreProviderId)).toSeq } override def compute(partition: Partition, ctxt: TaskContext): Iterator[U] = { var store: StateStore = null val storeProviderId = StateStoreProviderId( StateStoreId(checkpointLocation, operatorId, partition.index), queryRunId) // If we're in continuous processing mode, we should get the store version for the current // epoch rather than the one at planning time. val isContinuous = Option(ctxt.getLocalProperty(StreamExecution.IS_CONTINUOUS_PROCESSING)) .map(_.toBoolean).getOrElse(false) val currentVersion = if (isContinuous) { val epoch = EpochTracker.getCurrentEpoch assert(epoch.isDefined, "Current epoch must be defined for continuous processing streams.") epoch.get } else { storeVersion } store = StateStore.get( storeProviderId, keySchema, valueSchema, indexOrdinal, currentVersion, storeConf, hadoopConfBroadcast.value.value) val inputIter = dataRDD.iterator(partition, ctxt) storeUpdateFunction(store, inputIter) } }
Example 14
Source File: XSQLTestSparkSession.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.xsql.test import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.internal.SessionState import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION import org.apache.spark.sql.test.TestSparkSession import org.apache.spark.sql.xsql.XSQLSessionStateBuilder class XSQLTestSparkSession(sc: SparkContext) extends TestSparkSession(sc) { self => def this(sparkConf: SparkConf) { this( new SparkContext( "local[2]", "test-sql-context", sparkConf.set("spark.sql.testkey", "true").set(CATALOG_IMPLEMENTATION, "xsql"))) } @transient override lazy val sessionState: SessionState = { new XSQLSessionStateBuilder(this, None).build() } }
Example 15
Source File: HiveSessionStateBuilder.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.annotation.{Experimental, InterfaceStability} import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.analysis.Analyzer import org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.SparkPlanner import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.hive.client.HiveClient import org.apache.spark.sql.internal.{BaseSessionStateBuilder, SessionResourceLoader, SessionState} override protected def planner: SparkPlanner = { new SparkPlanner(session.sparkContext, conf, experimentalMethods) with HiveStrategies { override val sparkSession: SparkSession = session override def extraPlanningStrategies: Seq[Strategy] = super.extraPlanningStrategies ++ customPlanningStrategies ++ Seq(HiveTableScans, Scripts) } } override protected def newBuilder: NewBuilder = new HiveSessionStateBuilder(_, _) } class HiveSessionResourceLoader( session: SparkSession, clientBuilder: () => HiveClient) extends SessionResourceLoader(session) { private lazy val client = clientBuilder() override def addJar(path: String): Unit = { client.addJar(path) super.addJar(path) } }
Example 16
Source File: TestSparkSession.scala From spark-alchemy with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.test import org.apache.spark.sql.{SparkSession, SparkSessionExtensions} import org.apache.spark.sql.internal.{SQLConf, SessionState, SessionStateBuilder, WithTestConf} import org.apache.spark.{SparkConf, SparkContext} val overrideConfs: Map[String, String] = Map( // Fewer shuffle partitions to speed up testing. SQLConf.SHUFFLE_PARTITIONS.key -> "3" ) } private[sql] class TestSQLSessionStateBuilder( session: SparkSession, state: Option[SessionState]) extends SessionStateBuilder(session, state) with WithTestConf { override def overrideConfs: Map[String, String] = TestSQLContext.overrideConfs override def newBuilder: NewBuilder = new TestSQLSessionStateBuilder(_, _) }
Example 17
Source File: MatfastSessionState.scala From MatRel with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.matfast import org.apache.spark.sql.matfast.execution.MatfastPlanner import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, FileSourceStrategy} import org.apache.spark.sql.{execution => sparkexecution, _} import org.apache.spark.sql.internal.SessionState import org.apache.spark.sql.matfast.plans.{MatfastOptimizer} import scala.collection.immutable private[matfast] class MatfastSessionState (matfastSession: MatfastSession) extends SessionState(matfastSession) { self => protected[matfast] lazy val matfastConf = new MatfastConf protected[matfast] def getSQLOptimizer = optimizer protected[matfast] lazy val matfastOptimizer: MatfastOptimizer = new MatfastOptimizer protected[matfast] val matfastPlanner: sparkexecution.SparkPlanner = { new MatfastPlanner(matfastSession, conf, experimentalMethods.extraStrategies) } override def executePlan(plan: LogicalPlan) = new execution.QueryExecution(matfastSession, plan) def setConf(key: String, value: String): Unit = { if (key.startsWith("matfast.")) matfastConf.setConfString(key, value) else conf.setConfString(key, value) } def getConf(key: String): String = { if (key.startsWith("matfast.")) matfastConf.getConfString(key) else conf.getConfString(key) } def getConf(key: String, defaultValue: String): String = { if (key.startsWith("matfast.")) conf.getConfString(key, defaultValue) else conf.getConfString(key, defaultValue) } def getAllConfs: immutable.Map[String, String] = { conf.getAllConfs ++ matfastConf.getAllConfs } }
Example 18
Source File: package.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.sql.SQLContext import org.apache.spark.sql.internal.SessionState import org.apache.spark.sql.types.StructType package object state { implicit class StateStoreOps[T: ClassTag](dataRDD: RDD[T]) { private[streaming] def mapPartitionsWithStateStore[U: ClassTag]( checkpointLocation: String, operatorId: Long, storeVersion: Long, keySchema: StructType, valueSchema: StructType, sessionState: SessionState, storeCoordinator: Option[StateStoreCoordinatorRef])( storeUpdateFunction: (StateStore, Iterator[T]) => Iterator[U]): StateStoreRDD[T, U] = { val cleanedF = dataRDD.sparkContext.clean(storeUpdateFunction) new StateStoreRDD( dataRDD, cleanedF, checkpointLocation, operatorId, storeVersion, keySchema, valueSchema, sessionState, storeCoordinator) } } }