org.apache.spark.internal.config.ConfigBuilder Scala Examples
The following examples show how to use org.apache.spark.internal.config.ConfigBuilder.
Example 1
Source File: Monitor.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.monitor import scala.collection.mutable.ArrayBuffer import org.apache.spark.SparkConf import org.apache.spark.alarm.{Alarm, AlertMessage} import org.apache.spark.alarm.AlertType.AlertType import org.apache.spark.internal.config.ConfigBuilder import org.apache.spark.monitor.MonitorItem.MonitorItem import org.apache.spark.scheduler.SparkListenerEvent import org.apache.spark.status.AppStatusStore import org.apache.spark.util.kvstore.KVStore trait Monitor { val alertType: Seq[AlertType] val item: MonitorItem val alarms: ArrayBuffer[Alarm] = ArrayBuffer() var kvStore: KVStore = null var appStore: AppStatusStore = null var conf: SparkConf = null def watchOut(event: SparkListenerEvent): Option[AlertMessage] def bind(alarm: Alarm): Monitor = { alarms.append(alarm) this } def bind(alarms: Seq[Alarm]): Monitor = { this.alarms.appendAll(alarms) this } def bind(kvStore: KVStore): Monitor = { this.kvStore = kvStore this.appStore = new AppStatusStore(kvStore) this } def bind(conf: SparkConf): Monitor = { this.conf = conf this } def onEvent(event: SparkListenerEvent): Unit = { val message = watchOut(event) if (message.isDefined) { alarms.foreach(_.alarm(message.get)) } } } object Monitor { val commonClasses = Seq( "", "org.apache.spark.repl.Main", "", "org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver") val dateFormats = Seq("yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd") val PREFIX = "spark.monitor" private[spark] val MONITOR_ITEMS = ConfigBuilder("spark.monitor.items") .internal() .doc("choose monitors to open, split with `,`") .stringConf .transform(_.toUpperCase) .toSequence .checkValue( _.toSet.subsetOf(, s"must be one of ${}") .createWithDefault(Seq.empty) } object MonitorItem extends Enumeration { type MonitorItem = Value val SQL_CHANGE_NOTIFIER = Value val APP_FINISH_NOTIFIER, EXECUTOR_NUM_NOTIFIER, DATASKEW_NOTIFIER, EXECUTOR_MEMORY_ADVISER = Value val SPARK_APPLICATION_SUMMARY, APP_IDLE_WARNER = Value }
Example 2
Source File: RemoteShuffleConf.scala From OAP with Apache License 2.0 | 5 votes |
package org.apache.spark.shuffle.remote import org.apache.spark.internal.config.{ConfigBuilder, ConfigEntry} object RemoteShuffleConf { val STORAGE_MASTER_URI: ConfigEntry[String] = ConfigBuilder("spark.shuffle.remote.storageMasterUri") .doc("Contact this storage master while persisting shuffle files") .stringConf .createWithDefault("hdfs://localhost:9001") val STORAGE_HDFS_MASTER_UI_PORT: ConfigEntry[String] = ConfigBuilder("spark.shuffle.remote.hdfs.storageMasterUIPort") .doc("Contact this UI port to retrieve HDFS configurations") .stringConf .createWithDefault("50070") val SHUFFLE_FILES_ROOT_DIRECTORY: ConfigEntry[String] = ConfigBuilder("spark.shuffle.remote.filesRootDirectory") .doc("Use this as the root directory for shuffle files") .stringConf .createWithDefault("/shuffle") val DFS_REPLICATION: ConfigEntry[Int] = ConfigBuilder("spark.shuffle.remote.hdfs.replication") .doc("The default replication of remote storage system, will override dfs.replication" + " when HDFS is used as shuffling storage") .intConf .createWithDefault(3) val REMOTE_OPTIMIZED_SHUFFLE_ENABLED: ConfigEntry[Boolean] = ConfigBuilder("spark.shuffle.remote.optimizedPathEnabled") .doc("Enable using unsafe-optimized shuffle writer") .internal() .booleanConf .createWithDefault(true) val REMOTE_BYPASS_MERGE_THRESHOLD: ConfigEntry[Int] = ConfigBuilder("spark.shuffle.remote.bypassMergeThreshold") .doc("Remote shuffle manager uses this threshold to decide using bypass-merge(hash-based)" + "shuffle or not, a new configuration is introduced(and it's -1 by default) because we" + " want to explicitly make disabling hash-based shuffle writer as the default behavior." + " When memory is relatively sufficient, using sort-based shuffle writer in remote shuffle" + " is often more efficient than the hash-based one. Because the bypass-merge shuffle " + "writer proceeds I/O of 3x total shuffle size: 1 time for read I/O and 2 times for write" + " I/Os, and this can be an even larger overhead under remote shuffle, the 3x shuffle size" + " is gone through network, arriving at remote storage system.") .intConf .createWithDefault(-1) val REMOTE_INDEX_CACHE_SIZE: ConfigEntry[String] = ConfigBuilder("spark.shuffle.remote.index.cache.size") .doc("This index file cache resides in each executor. If it's a positive value, index " + "cache will be turned on: instead of reading index files directly from remote storage" + ", a reducer will fetch the index files from the executors that write them through" + " network. And those executors will return the index files kept in cache. (read them" + "from storage if needed)") .stringConf .createWithDefault("0") val NUM_TRANSFER_SERVICE_THREADS: ConfigEntry[Int] = ConfigBuilder("spark.shuffle.remote.numIndexReadThreads") .doc("The maximum number of server/client threads used in RemoteShuffleTransferService for" + "index files transferring") .intConf .createWithDefault(3) val NUM_CONCURRENT_FETCH: ConfigEntry[Int] = ConfigBuilder("spark.shuffle.remote.numReadThreads") .doc("The maximum number of concurrent reading threads fetching shuffle data blocks") .intConf .createWithDefault(Runtime.getRuntime.availableProcessors()) val REUSE_FILE_HANDLE: ConfigEntry[Boolean] = ConfigBuilder("spark.shuffle.remote.reuseFileHandle") .doc("By switching on this feature, the file handles returned by Filesystem open operations" + " will be cached/reused inside an executor(across different rounds of reduce tasks)," + " eliminating open overhead. This should improve the reduce stage performance only when" + " file open operations occupy majority of the time, e.g. There is a large number of" + " shuffle blocks, each reading a fairly small block of data, and there is no other" + " compute in the reduce stage.") .booleanConf .createWithDefault(false) val DATA_FETCH_EAGER_REQUIREMENT: ConfigEntry[Boolean] = ConfigBuilder("spark.shuffle.remote.eagerRequirementDataFetch") .doc("With eager requirement = false, a shuffle block will be counted ready and served for" + " compute until all content of the block is put in Spark's local memory. With eager " + "requirement = true, a shuffle block will be served to later compute after the bytes " + "required is fetched and put in memory") .booleanConf .createWithDefault(false) }
Example 3
Source File: config.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.mesos import java.util.concurrent.TimeUnit import org.apache.spark.internal.config.ConfigBuilder package object config { private[spark] val SHUFFLE_CLEANER_INTERVAL_S = ConfigBuilder("spark.shuffle.cleaner.interval") .timeConf(TimeUnit.SECONDS) .createWithDefaultString("30s") private[spark] val RECOVERY_MODE = ConfigBuilder("spark.deploy.recoveryMode") .stringConf .createWithDefault("NONE") private[spark] val DISPATCHER_WEBUI_URL = ConfigBuilder("spark.mesos.dispatcher.webui.url") .doc("Set the Spark Mesos dispatcher webui_url for interacting with the " + "framework. If unset it will point to Spark's internal web UI.") .stringConf .createOptional private[spark] val ZOOKEEPER_URL = ConfigBuilder("spark.deploy.zookeeper.url") .doc("When `spark.deploy.recoveryMode` is set to ZOOKEEPER, this " + "configuration is used to set the zookeeper URL to connect to.") .stringConf .createOptional private[spark] val HISTORY_SERVER_URL = ConfigBuilder("spark.mesos.dispatcher.historyServer.url") .doc("Set the URL of the history server. The dispatcher will then " + "link each driver to its entry in the history server.") .stringConf .createOptional }
Example 4
Source File: config.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.mesos import java.util.concurrent.TimeUnit import org.apache.spark.internal.config.ConfigBuilder package object config { private[spark] val SHUFFLE_CLEANER_INTERVAL_S = ConfigBuilder("spark.shuffle.cleaner.interval") .timeConf(TimeUnit.SECONDS) .createWithDefaultString("30s") private[spark] val RECOVERY_MODE = ConfigBuilder("spark.deploy.recoveryMode") .stringConf .createWithDefault("NONE") private[spark] val DISPATCHER_WEBUI_URL = ConfigBuilder("spark.mesos.dispatcher.webui.url") .doc("Set the Spark Mesos dispatcher webui_url for interacting with the " + "framework. If unset it will point to Spark's internal web UI.") .stringConf .createOptional private[spark] val ZOOKEEPER_URL = ConfigBuilder("spark.deploy.zookeeper.url") .doc("When `spark.deploy.recoveryMode` is set to ZOOKEEPER, this " + "configuration is used to set the zookeeper URL to connect to.") .stringConf .createOptional private[spark] val HISTORY_SERVER_URL = ConfigBuilder("spark.mesos.dispatcher.historyServer.url") .doc("Set the URL of the history server. The dispatcher will then " + "link each driver to its entry in the history server.") .stringConf .createOptional }
Example 5
Source File: package.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.internal.config.ConfigBuilder package object config { private[hive] val PROXY_USERS = ConfigBuilder("spark.sql.proxy.users") .doc(s"Comma separated string of user names for Spark Thrift Server to initializing " + s"different SparkContext. These users must have rights to impersonate the real user" + s"who start the driver side jvm.") .stringConf .toSequence .createWithDefault(Nil) }
Example 6
Source File: config.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.history import java.util.concurrent.TimeUnit import org.apache.spark.internal.config.ConfigBuilder import private[spark] object config { val DEFAULT_LOG_DIR = "file:/tmp/spark-events" val EVENT_LOG_DIR = ConfigBuilder("spark.history.fs.logDirectory") .stringConf .createWithDefault(DEFAULT_LOG_DIR) val MAX_LOG_AGE_S = ConfigBuilder("spark.history.fs.cleaner.maxAge") .timeConf(TimeUnit.SECONDS) .createWithDefaultString("7d") val LOCAL_STORE_DIR = ConfigBuilder("") .doc("Local directory where to cache application history information. By default this is " + "not set, meaning all history information will be kept in memory.") .stringConf .createOptional val MAX_LOCAL_DISK_USAGE = ConfigBuilder("") .bytesConf(ByteUnit.BYTE) .createWithDefaultString("10g") val HISTORY_SERVER_UI_PORT = ConfigBuilder("spark.history.ui.port") .doc("Web UI port to bind Spark History Server") .intConf .createWithDefault(18080) }