org.apache.spark.internal.config.ConfigBuilder Scala Examples
The following examples show how to use org.apache.spark.internal.config.ConfigBuilder.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: Monitor.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.monitor import scala.collection.mutable.ArrayBuffer import org.apache.spark.SparkConf import org.apache.spark.alarm.{Alarm, AlertMessage} import org.apache.spark.alarm.AlertType.AlertType import org.apache.spark.internal.config.ConfigBuilder import org.apache.spark.monitor.MonitorItem.MonitorItem import org.apache.spark.scheduler.SparkListenerEvent import org.apache.spark.status.AppStatusStore import org.apache.spark.util.kvstore.KVStore trait Monitor { val alertType: Seq[AlertType] val item: MonitorItem val alarms: ArrayBuffer[Alarm] = ArrayBuffer() var kvStore: KVStore = null var appStore: AppStatusStore = null var conf: SparkConf = null def watchOut(event: SparkListenerEvent): Option[AlertMessage] def bind(alarm: Alarm): Monitor = { alarms.append(alarm) this } def bind(alarms: Seq[Alarm]): Monitor = { this.alarms.appendAll(alarms) this } def bind(kvStore: KVStore): Monitor = { this.kvStore = kvStore this.appStore = new AppStatusStore(kvStore) this } def bind(conf: SparkConf): Monitor = { this.conf = conf this } def onEvent(event: SparkListenerEvent): Unit = { val message = watchOut(event) if (message.isDefined) { alarms.foreach(_.alarm(message.get)) } } } object Monitor { val commonClasses = Seq( "org.apache.spark.sql.xsql.shell.SparkXSQLShell", "org.apache.spark.repl.Main", "org.apache.spark.sql.hive.xitong.shell.SparkHiveShell", "org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver") val dateFormats = Seq("yyyy-MM-dd", "yyyy/MM/dd", "yyyyMMdd") val PREFIX = "spark.monitor" private[spark] val MONITOR_ITEMS = ConfigBuilder("spark.monitor.items") .internal() .doc("choose monitors to open, split with `,`") .stringConf .transform(_.toUpperCase) .toSequence .checkValue( _.toSet.subsetOf(MonitorItem.values.map(_.toString)), s"must be one of ${MonitorItem.values.map(_.toString)}") .createWithDefault(Seq.empty) } object MonitorItem extends Enumeration { type MonitorItem = Value val SQL_CHANGE_NOTIFIER = Value val APP_FINISH_NOTIFIER, EXECUTOR_NUM_NOTIFIER, DATASKEW_NOTIFIER, EXECUTOR_MEMORY_ADVISER = Value val SPARK_APPLICATION_SUMMARY, APP_IDLE_WARNER = Value }
Example 2
Source File: RemoteShuffleConf.scala From OAP with Apache License 2.0 | 5 votes |
package org.apache.spark.shuffle.remote import org.apache.spark.internal.config.{ConfigBuilder, ConfigEntry} object RemoteShuffleConf { val STORAGE_MASTER_URI: ConfigEntry[String] = ConfigBuilder("spark.shuffle.remote.storageMasterUri") .doc("Contact this storage master while persisting shuffle files") .stringConf .createWithDefault("hdfs://localhost:9001") val STORAGE_HDFS_MASTER_UI_PORT: ConfigEntry[String] = ConfigBuilder("spark.shuffle.remote.hdfs.storageMasterUIPort") .doc("Contact this UI port to retrieve HDFS configurations") .stringConf .createWithDefault("50070") val SHUFFLE_FILES_ROOT_DIRECTORY: ConfigEntry[String] = ConfigBuilder("spark.shuffle.remote.filesRootDirectory") .doc("Use this as the root directory for shuffle files") .stringConf .createWithDefault("/shuffle") val DFS_REPLICATION: ConfigEntry[Int] = ConfigBuilder("spark.shuffle.remote.hdfs.replication") .doc("The default replication of remote storage system, will override dfs.replication" + " when HDFS is used as shuffling storage") .intConf .createWithDefault(3) val REMOTE_OPTIMIZED_SHUFFLE_ENABLED: ConfigEntry[Boolean] = ConfigBuilder("spark.shuffle.remote.optimizedPathEnabled") .doc("Enable using unsafe-optimized shuffle writer") .internal() .booleanConf .createWithDefault(true) val REMOTE_BYPASS_MERGE_THRESHOLD: ConfigEntry[Int] = ConfigBuilder("spark.shuffle.remote.bypassMergeThreshold") .doc("Remote shuffle manager uses this threshold to decide using bypass-merge(hash-based)" + "shuffle or not, a new configuration is introduced(and it's -1 by default) because we" + " want to explicitly make disabling hash-based shuffle writer as the default behavior." + " When memory is relatively sufficient, using sort-based shuffle writer in remote shuffle" + " is often more efficient than the hash-based one. Because the bypass-merge shuffle " + "writer proceeds I/O of 3x total shuffle size: 1 time for read I/O and 2 times for write" + " I/Os, and this can be an even larger overhead under remote shuffle, the 3x shuffle size" + " is gone through network, arriving at remote storage system.") .intConf .createWithDefault(-1) val REMOTE_INDEX_CACHE_SIZE: ConfigEntry[String] = ConfigBuilder("spark.shuffle.remote.index.cache.size") .doc("This index file cache resides in each executor. If it's a positive value, index " + "cache will be turned on: instead of reading index files directly from remote storage" + ", a reducer will fetch the index files from the executors that write them through" + " network. And those executors will return the index files kept in cache. (read them" + "from storage if needed)") .stringConf .createWithDefault("0") val NUM_TRANSFER_SERVICE_THREADS: ConfigEntry[Int] = ConfigBuilder("spark.shuffle.remote.numIndexReadThreads") .doc("The maximum number of server/client threads used in RemoteShuffleTransferService for" + "index files transferring") .intConf .createWithDefault(3) val NUM_CONCURRENT_FETCH: ConfigEntry[Int] = ConfigBuilder("spark.shuffle.remote.numReadThreads") .doc("The maximum number of concurrent reading threads fetching shuffle data blocks") .intConf .createWithDefault(Runtime.getRuntime.availableProcessors()) val REUSE_FILE_HANDLE: ConfigEntry[Boolean] = ConfigBuilder("spark.shuffle.remote.reuseFileHandle") .doc("By switching on this feature, the file handles returned by Filesystem open operations" + " will be cached/reused inside an executor(across different rounds of reduce tasks)," + " eliminating open overhead. This should improve the reduce stage performance only when" + " file open operations occupy majority of the time, e.g. There is a large number of" + " shuffle blocks, each reading a fairly small block of data, and there is no other" + " compute in the reduce stage.") .booleanConf .createWithDefault(false) val DATA_FETCH_EAGER_REQUIREMENT: ConfigEntry[Boolean] = ConfigBuilder("spark.shuffle.remote.eagerRequirementDataFetch") .doc("With eager requirement = false, a shuffle block will be counted ready and served for" + " compute until all content of the block is put in Spark's local memory. With eager " + "requirement = true, a shuffle block will be served to later compute after the bytes " + "required is fetched and put in memory") .booleanConf .createWithDefault(false) }
Example 3
Source File: config.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.mesos import java.util.concurrent.TimeUnit import org.apache.spark.internal.config.ConfigBuilder package object config { private[spark] val SHUFFLE_CLEANER_INTERVAL_S = ConfigBuilder("spark.shuffle.cleaner.interval") .timeConf(TimeUnit.SECONDS) .createWithDefaultString("30s") private[spark] val RECOVERY_MODE = ConfigBuilder("spark.deploy.recoveryMode") .stringConf .createWithDefault("NONE") private[spark] val DISPATCHER_WEBUI_URL = ConfigBuilder("spark.mesos.dispatcher.webui.url") .doc("Set the Spark Mesos dispatcher webui_url for interacting with the " + "framework. If unset it will point to Spark's internal web UI.") .stringConf .createOptional private[spark] val ZOOKEEPER_URL = ConfigBuilder("spark.deploy.zookeeper.url") .doc("When `spark.deploy.recoveryMode` is set to ZOOKEEPER, this " + "configuration is used to set the zookeeper URL to connect to.") .stringConf .createOptional private[spark] val HISTORY_SERVER_URL = ConfigBuilder("spark.mesos.dispatcher.historyServer.url") .doc("Set the URL of the history server. The dispatcher will then " + "link each driver to its entry in the history server.") .stringConf .createOptional }
Example 4
Source File: config.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.mesos import java.util.concurrent.TimeUnit import org.apache.spark.internal.config.ConfigBuilder package object config { private[spark] val SHUFFLE_CLEANER_INTERVAL_S = ConfigBuilder("spark.shuffle.cleaner.interval") .timeConf(TimeUnit.SECONDS) .createWithDefaultString("30s") private[spark] val RECOVERY_MODE = ConfigBuilder("spark.deploy.recoveryMode") .stringConf .createWithDefault("NONE") private[spark] val DISPATCHER_WEBUI_URL = ConfigBuilder("spark.mesos.dispatcher.webui.url") .doc("Set the Spark Mesos dispatcher webui_url for interacting with the " + "framework. If unset it will point to Spark's internal web UI.") .stringConf .createOptional private[spark] val ZOOKEEPER_URL = ConfigBuilder("spark.deploy.zookeeper.url") .doc("When `spark.deploy.recoveryMode` is set to ZOOKEEPER, this " + "configuration is used to set the zookeeper URL to connect to.") .stringConf .createOptional private[spark] val HISTORY_SERVER_URL = ConfigBuilder("spark.mesos.dispatcher.historyServer.url") .doc("Set the URL of the history server. The dispatcher will then " + "link each driver to its entry in the history server.") .stringConf .createOptional }
Example 5
Source File: package.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import org.apache.spark.internal.config.ConfigBuilder package object config { private[hive] val PROXY_USERS = ConfigBuilder("spark.sql.proxy.users") .doc(s"Comma separated string of user names for Spark Thrift Server to initializing " + s"different SparkContext. These users must have rights to impersonate the real user" + s"who start the driver side jvm.") .stringConf .toSequence .createWithDefault(Nil) }
Example 6
Source File: config.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.history import java.util.concurrent.TimeUnit import org.apache.spark.internal.config.ConfigBuilder import org.apache.spark.network.util.ByteUnit private[spark] object config { val DEFAULT_LOG_DIR = "file:/tmp/spark-events" val EVENT_LOG_DIR = ConfigBuilder("spark.history.fs.logDirectory") .stringConf .createWithDefault(DEFAULT_LOG_DIR) val MAX_LOG_AGE_S = ConfigBuilder("spark.history.fs.cleaner.maxAge") .timeConf(TimeUnit.SECONDS) .createWithDefaultString("7d") val LOCAL_STORE_DIR = ConfigBuilder("spark.history.store.path") .doc("Local directory where to cache application history information. By default this is " + "not set, meaning all history information will be kept in memory.") .stringConf .createOptional val MAX_LOCAL_DISK_USAGE = ConfigBuilder("spark.history.store.maxDiskUsage") .bytesConf(ByteUnit.BYTE) .createWithDefaultString("10g") val HISTORY_SERVER_UI_PORT = ConfigBuilder("spark.history.ui.port") .doc("Web UI port to bind Spark History Server") .intConf .createWithDefault(18080) }