org.apache.spark.launcher.SparkLauncher Scala Examples
The following examples show how to use org.apache.spark.launcher.SparkLauncher.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: Launcher.scala From sparkplug with MIT License | 7 votes |
package springnz.sparkplug.client import java.net.{ URLEncoder, InetAddress } import better.files._ import com.typesafe.config.{ ConfigRenderOptions, Config } import org.apache.spark.launcher.SparkLauncher import springnz.sparkplug.util.{ BuilderOps, ConfigUtils, Logging, Pimpers } import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future import scala.util.{ Properties, Try } object Launcher extends Logging { import BuilderOps._ import Pimpers._ def startProcess(launcher: SparkLauncher): Future[Unit] = { val processFuture = Future { launcher.launch() }.withErrorLog("Failed to launch: ") processFuture.flatMap { process ⇒ executeProcess(process) } } private def executeProcess(process: Process): Future[Unit] = Future { val outStream = scala.io.Source.fromInputStream(process.getInputStream) for (line ← outStream.getLines()) { log.info(line) } val errorStream = scala.io.Source.fromInputStream(process.getErrorStream) for (line ← errorStream.getLines()) { log.info(line) } process.waitFor() } def launch(clientAkkaAddress: String, jarPath: File, mainJarPattern: String, mainClass: String, sparkConfig: Config, akkaRemoteConfig: Option[Config], sendJars: Boolean = true): Try[Future[Unit]] = Try { val fullExtraJarFolder = jarPath.pathAsString val sparkHome = Properties.envOrNone("SPARK_HOME") val sparkMaster = Properties.envOrElse("SPARK_MASTER", s"spark://${InetAddress.getLocalHost.getHostAddress}:7077") log.debug(s"Spark master set to: $sparkMaster") // TODO: enable this functionality (need Spark 1.5 for this) // val sparkArgs: Array[String] = config.getString("spark.submit.sparkargs").split(' ') if (!sparkMaster.startsWith("local[") && !sparkHome.isDefined) throw new RuntimeException("If 'SPARK_MASTER' is not set to local, 'SPARK_HOME' must be set.") val appName = mainClass.split('.').last val mainJar = jarPath.glob(mainJarPattern).collectFirst { case f ⇒ f.pathAsString } val configVars: Seq[(String, String)] = ConfigUtils.configFields(sparkConfig).toSeq val akkaRemoteConfigString = akkaRemoteConfig.map { config ⇒ val configString = config.root().render(ConfigRenderOptions.concise()) URLEncoder.encode(configString, "UTF-8") } val launcher = (new SparkLauncher) .setIfSome[String](mainJar) { (l, mj) ⇒ l.setAppResource(mj) } .setMainClass(mainClass) .setAppName(appName) .setMaster(sparkMaster) .setIfSome[String](sparkHome) { (l, sh) ⇒ l.setSparkHome(sh) } .addAppArgs("appName", appName) .addAppArgs("clientAkkaAddress", clientAkkaAddress) .setIfSome(akkaRemoteConfigString) { (l, config) ⇒ l.addAppArgs("remoteAkkaConfig", config) } .setFoldLeft(configVars) { case (launcher, (key, value)) ⇒ launcher.setConf(key, value) } .setDeployMode(sparkConfig.getString("spark.deploymode")) val extraJarFiles = jarPath.glob("*.jar") .map { case f ⇒ f.pathAsString } .filterNot(_.contains("/akka-")) val launcherWithJars = if (sendJars) extraJarFiles.foldLeft(launcher) { case (l, jarFile) ⇒ l.addJar(jarFile) } else if (extraJarFiles.length == 0) launcher else launcher .setConf(SparkLauncher.DRIVER_EXTRA_CLASSPATH, s"$fullExtraJarFolder/*") .setConf(SparkLauncher.EXECUTOR_EXTRA_CLASSPATH, s"$fullExtraJarFolder/*") startProcess(launcherWithJars) } }
Example 2
Source File: FlowLauncher.scala From piflow with BSD 2-Clause "Simplified" License | 5 votes |
package cn.piflow.util import java.io.File import java.util.Date import java.util.concurrent.CountDownLatch import cn.piflow.Flow import org.apache.hadoop.security.SecurityUtil import org.apache.http.client.methods.{CloseableHttpResponse, HttpPut} import org.apache.http.entity.StringEntity import org.apache.http.impl.client.HttpClients import org.apache.http.util.EntityUtils import org.apache.spark.launcher.SparkLauncher object FlowLauncher { def launch(flow: Flow) : SparkLauncher = { var flowJson = flow.getFlowJson() println("FlowLauncher json:" + flowJson) val flowJsonencryptAES = SecurityUtil.encryptAES(flowJson) var appId : String = "" val countDownLatch = new CountDownLatch(1) val launcher = new SparkLauncher val sparkLauncher =launcher .setAppName(flow.getFlowName()) .setMaster(PropertyUtil.getPropertyValue("spark.master")) //.setDeployMode(PropertyUtil.getPropertyValue("spark.deploy.mode")) .setAppResource(ConfigureUtil.getPiFlowBundlePath()) .setVerbose(true) .setConf("spark.driver.memory", flow.getDriverMemory()) .setConf("spark.executor.instances", flow.getExecutorNum()) .setConf("spark.executor.memory", flow.getExecutorMem()) .setConf("spark.executor.cores",flow.getExecutorCores()) .addFile(PropertyUtil.getConfigureFile()) .addFile(ServerIpUtil.getServerIpFile()) .setMainClass("cn.piflow.api.StartFlowMain") .addAppArgs(flowJsonencryptAES) val sparkMaster = PropertyUtil.getPropertyValue("spark.master") if(sparkMaster.equals("yarn")){ sparkLauncher.setDeployMode(PropertyUtil.getPropertyValue("spark.deploy.mode")) sparkLauncher.setConf("spark.hadoop.yarn.resourcemanager.hostname", PropertyUtil.getPropertyValue("yarn.resourcemanager.hostname")) } //add other jars for application val classPath = PropertyUtil.getClassPath() val classPathFile = new File(classPath) if(classPathFile.exists()){ FileUtil.getJarFile(new File(classPath)).foreach(f => { println(f.getPath) sparkLauncher.addJar(f.getPath) }) } val scalaPath = PropertyUtil.getScalaPath() val scalaPathFile = new File(scalaPath) if(scalaPathFile.exists()){ FileUtil.getJarFile(new File(scalaPath)).foreach(f => { println(f.getPath) sparkLauncher.addJar(f.getPath) }) } sparkLauncher } def stop(appID: String) = { println("Stop Flow !!!!!!!!!!!!!!!!!!!!!!!!!!") //yarn application kill appId val url = ConfigureUtil.getYarnResourceManagerWebAppAddress() + appID + "/state" val client = HttpClients.createDefault() val put:HttpPut = new HttpPut(url) val body ="{\"state\":\"KILLED\"}" put.addHeader("Content-Type", "application/json") put.setEntity(new StringEntity(body)) val response:CloseableHttpResponse = client.execute(put) val entity = response.getEntity val str = EntityUtils.toString(entity,"UTF-8") //update db println("Update flow state after Stop Flow !!!!!!!!!!!!!!!!!!!!!!!!!!") H2Util.updateFlowState(appID, FlowState.KILLED) H2Util.updateFlowFinishedTime(appID, new Date().toString) "ok" } }
Example 3
Source File: BaseInteractiveServletSpec.scala From incubator-livy with Apache License 2.0 | 5 votes |
package org.apache.livy.server.interactive import java.io.File import java.nio.file.Files import org.apache.commons.io.FileUtils import org.apache.spark.launcher.SparkLauncher import org.apache.livy.LivyConf import org.apache.livy.rsc.RSCConf import org.apache.livy.server.BaseSessionServletSpec import org.apache.livy.sessions.{Kind, SessionKindModule, Spark} abstract class BaseInteractiveServletSpec extends BaseSessionServletSpec[InteractiveSession, InteractiveRecoveryMetadata] { mapper.registerModule(new SessionKindModule()) protected var tempDir: File = _ override def afterAll(): Unit = { super.afterAll() if (tempDir != null) { scala.util.Try(FileUtils.deleteDirectory(tempDir)) tempDir = null } } override protected def createConf(): LivyConf = synchronized { if (tempDir == null) { tempDir = Files.createTempDirectory("client-test").toFile() } super.createConf() .set(LivyConf.SESSION_STAGING_DIR, tempDir.toURI().toString()) .set(LivyConf.REPL_JARS, "dummy.jar") .set(LivyConf.LIVY_SPARK_VERSION, sys.env("LIVY_SPARK_VERSION")) .set(LivyConf.LIVY_SPARK_SCALA_VERSION, sys.env("LIVY_SCALA_VERSION")) } protected def createRequest( inProcess: Boolean = true, extraConf: Map[String, String] = Map(), kind: Kind = Spark): CreateInteractiveRequest = { val classpath = sys.props("java.class.path") val request = new CreateInteractiveRequest() request.kind = kind request.name = None request.conf = extraConf ++ Map( RSCConf.Entry.LIVY_JARS.key() -> "", RSCConf.Entry.CLIENT_IN_PROCESS.key() -> inProcess.toString, SparkLauncher.SPARK_MASTER -> "local", SparkLauncher.DRIVER_EXTRA_CLASSPATH -> classpath, SparkLauncher.EXECUTOR_EXTRA_CLASSPATH -> classpath ) request } }
Example 4
Source File: ReplDriverSuite.scala From incubator-livy with Apache License 2.0 | 5 votes |
package org.apache.livy.repl import java.net.URI import java.util.concurrent.TimeUnit import scala.concurrent.duration._ import scala.language.postfixOps import org.apache.spark.launcher.SparkLauncher import org.json4s._ import org.json4s.jackson.JsonMethods._ import org.scalatest.FunSuite import org.scalatest.concurrent.Eventually._ import org.apache.livy._ import org.apache.livy.rsc.{PingJob, RSCClient, RSCConf} import org.apache.livy.sessions.Spark class ReplDriverSuite extends FunSuite with LivyBaseUnitTestSuite { private implicit val formats = DefaultFormats test("start a repl session using the rsc") { val client = new LivyClientBuilder() .setConf(SparkLauncher.DRIVER_MEMORY, "512m") .setConf(SparkLauncher.DRIVER_EXTRA_CLASSPATH, sys.props("java.class.path")) .setConf(SparkLauncher.EXECUTOR_EXTRA_CLASSPATH, sys.props("java.class.path")) .setConf(RSCConf.Entry.LIVY_JARS.key(), "") .setURI(new URI("rsc:/")) .setConf(RSCConf.Entry.DRIVER_CLASS.key(), classOf[ReplDriver].getName()) .setConf(RSCConf.Entry.SESSION_KIND.key(), Spark.toString) .build() .asInstanceOf[RSCClient] try { // This is sort of what InteractiveSession.scala does to detect an idle session. client.submit(new PingJob()).get(60, TimeUnit.SECONDS) val statementId = client.submitReplCode("1 + 1", "spark").get eventually(timeout(30 seconds), interval(100 millis)) { val rawResult = client.getReplJobResults(statementId, 1).get(10, TimeUnit.SECONDS).statements(0) val result = rawResult.output assert((parse(result) \ Session.STATUS).extract[String] === Session.OK) } } finally { client.stop(true) } } }
Example 5
Source File: OutputInterceptorFactory.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.outputintercepting import java.io.File import java.text.SimpleDateFormat import java.util.logging._ import java.util.{Calendar, UUID} import com.google.inject.Inject import com.google.inject.name.Named import org.apache.spark.launcher.SparkLauncher import ai.deepsense.commons.models.ClusterDetails case class OutputInterceptorHandle private [outputintercepting] ( private val logger: Logger, private val childProcLoggerName: String, private val loggerFileHandler: FileHandler ) { def attachTo(sparkLauncher: SparkLauncher): Unit = { sparkLauncher.setConf( "spark.launcher.childProcLoggerName", childProcLoggerName ) } def writeOutput(text: String): Unit = { logger.info(text) } def close(): Unit = { loggerFileHandler.close() } } class OutputInterceptorFactory @Inject()( @Named("session-executor.spark-applications-logs-dir") val executorsLogDirectory: String ) { def prepareInterceptorWritingToFiles(clusterDetails: ClusterDetails): OutputInterceptorHandle = { new File(executorsLogDirectory).mkdirs() val childProcLoggerName = s"WE-app-${UUID.randomUUID()}" val logger = Logger.getLogger(childProcLoggerName) val fileName = { val time = Calendar.getInstance().getTime() // Colons are not allowed in Windows filenames val format = new SimpleDateFormat("yyyy-MM-dd_HH-mm-ss") val formattedTime = format.format(time) val illegalFileNameCharactersRegExp = "[^a-zA-Z0-9.-]" s"$formattedTime-${clusterDetails.name.replaceAll(illegalFileNameCharactersRegExp, "_")}.log" } val fileHandler = new FileHandler(s"$executorsLogDirectory/$fileName") fileHandler.setFormatter(new SimpleFormaterWithoutOutputRedirectorNoise) logger.addHandler(fileHandler) sys.addShutdownHook { fileHandler.close() } OutputInterceptorHandle(logger, childProcLoggerName, fileHandler) } class SimpleFormaterWithoutOutputRedirectorNoise extends Formatter { val simpleFormatter = new SimpleFormatter override def format(logRecord: LogRecord): String = { val formatted = simpleFormatter.format(logRecord) val redirectorNoise = "org.apache.spark.launcher.OutputRedirector redirect\nINFO: " val beginningOfRedirectorNoise = formatted.indexOf(redirectorNoise) val endOfRedirectorNoise = if (beginningOfRedirectorNoise > 0) { beginningOfRedirectorNoise + redirectorNoise.length } else { 0 } formatted.substring(endOfRedirectorNoise) } } }
Example 6
Source File: YarnSparkLauncher.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.clusters import scalaz.Validation import org.apache.spark.launcher.SparkLauncher import ai.deepsense.commons.models.ClusterDetails import ai.deepsense.sessionmanager.service.sessionspawner.SessionConfig import ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.SparkLauncherConfig import ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.clusters.SeahorseSparkLauncher.RichSparkLauncher import ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.executor.{CommonEnv, SessionExecutorArgs} import ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.spark.SparkArgumentParser.SparkOptionsMultiMap import ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.spark.SparkArgumentParser.SparkOptionsOp private [clusters] object YarnSparkLauncher { import scala.collection.JavaConversions._ def apply(applicationArgs: Seq[String], config: SparkLauncherConfig, clusterConfig: ClusterDetails, args: SparkOptionsMultiMap): SparkLauncher = { val updatedArgs = args .updateConfOptions("spark.yarn.dist.archives", sparkRArchivePath(config.sparkHome)) .updateConfOptions("spark.yarn.dist.archives", pySparkArchivePath(config.sparkHome)) new SparkLauncher(env(config, clusterConfig)) .setSparkArgs(updatedArgs) .setVerbose(true) .setMainClass(config.className) .setMaster("yarn") .setDeployMode("client") .setAppResource(config.weJarPath) .setSparkHome(config.sparkHome) .setAppName("Seahorse Workflow Executor") .addAppArgs(applicationArgs: _*) .addFile(config.weDepsPath) .setConf("spark.driver.host", clusterConfig.userIP) .setConf("spark.executorEnv.PYTHONPATH", s"${config.weDepsFileName}:pyspark") .setConf("spark.yarn.appMasterEnv.PYSPARK_PYTHON", config.pythonDriverBinary) } private def pySparkArchivePath(sparkHome: String, linkName: String = "#pyspark") = sparkHome + "/python/lib/pyspark.zip" + linkName private def sparkRArchivePath(sparkHome: String, linkName: String = "#sparkr") = sparkHome + "/R/lib/sparkr.zip" + linkName private def env( config: SparkLauncherConfig, clusterConfig: ClusterDetails) = CommonEnv(config, clusterConfig) ++ Map( "HADOOP_CONF_DIR" -> clusterConfig.uri, "SPARK_YARN_MODE" -> "true" ) }
Example 7
Source File: LocalSparkLauncher.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.clusters import org.apache.spark.launcher.SparkLauncher import ai.deepsense.commons.models.ClusterDetails import ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.SparkLauncherConfig import ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.clusters.SeahorseSparkLauncher.RichSparkLauncher import ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.executor.CommonEnv import ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.spark.SparkArgumentParser._ private [clusters] object LocalSparkLauncher { import scala.collection.JavaConversions._ def apply(applicationArgs: Seq[String], config: SparkLauncherConfig, clusterConfig: ClusterDetails, args: SparkOptionsMultiMap): SparkLauncher = { new SparkLauncher(env(config, clusterConfig)) .setSparkArgs(args) .setVerbose(true) .setMainClass(config.className) .setMaster("local[*]") .setDeployMode("client") .setAppResource(config.weJarPath) .setAppName("Seahorse Workflow Executor") .addAppArgs(applicationArgs: _*) .addFile(config.weDepsPath) .setConf("spark.executorEnv.PYTHONPATH", config.weDepsPath) .setConfDefault("spark.default.parallelism", args, parallelism.toString) } private def env(config: SparkLauncherConfig, clusterConfig: ClusterDetails) = CommonEnv(config, clusterConfig) ++ Map( // For local cluster driver python binary IS executors python binary "PYSPARK_PYTHON" -> config.pythonDriverBinary ) private lazy val parallelism = { val availableCoresNumber: Int = Runtime.getRuntime.availableProcessors() math.min(availableCoresNumber, 4) } }
Example 8
Source File: StandaloneSparkLauncher.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.clusters import scalaz.Validation import org.apache.spark.launcher.SparkLauncher import ai.deepsense.commons.models.ClusterDetails import ai.deepsense.sessionmanager.service.sessionspawner.SessionConfig import ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.SparkLauncherConfig import ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.clusters.SeahorseSparkLauncher.RichSparkLauncher import ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.executor.{CommonEnv, SessionExecutorArgs} import ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.spark.SparkArgumentParser import ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.spark.SparkArgumentParser._ private [clusters] object StandaloneSparkLauncher { import scala.collection.JavaConversions._ def apply(applicationArgs: Seq[String], config: SparkLauncherConfig, clusterConfig: ClusterDetails, args: SparkOptionsMultiMap): SparkLauncher = { new SparkLauncher(CommonEnv(config, clusterConfig)) .setSparkArgs(args) .setVerbose(true) .setMainClass(config.className) .setMaster(clusterConfig.uri) .setDeployMode("client") .setAppResource(config.weJarPath) .setAppName("Seahorse Workflow Executor") .addAppArgs(applicationArgs: _*) .addFile(config.weDepsPath) .setConf("spark.driver.host", clusterConfig.userIP) .setConf("spark.executorEnv.PYTHONPATH", config.weDepsPath) } }
Example 9
Source File: MesosSparkLauncher.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.clusters import ai.deepsense.commons.buildinfo.BuildInfo import ai.deepsense.commons.models.ClusterDetails import ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.SparkLauncherConfig import ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.clusters.SeahorseSparkLauncher.RichSparkLauncher import ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.executor.CommonEnv import ai.deepsense.sessionmanager.service.sessionspawner.sparklauncher.spark.SparkArgumentParser.SparkOptionsMultiMap import org.apache.spark.launcher.SparkLauncher private [clusters] object MesosSparkLauncher { import scala.collection.JavaConversions._ val sparkVersion = BuildInfo.sparkVersion val hadoopVersion = BuildInfo.hadoopVersion def apply(applicationArgs: Seq[String], config: SparkLauncherConfig, clusterConfig: ClusterDetails, args: SparkOptionsMultiMap): SparkLauncher = { new SparkLauncher(env(config, clusterConfig)) .setSparkArgs(args) .setVerbose(true) .setMainClass(config.className) .setMaster(clusterConfig.uri) .setDeployMode("client") .setAppResource(config.weJarPath) .setSparkHome(config.sparkHome) .setAppName("Seahorse Workflow Executor") .addAppArgs(applicationArgs: _*) .addFile(config.weDepsPath) .setConf("spark.executor.uri", s"http://d3kbcqa49mib13.cloudfront.net/spark-$sparkVersion-bin-hadoop$hadoopVersion.tgz") } private def env( config: SparkLauncherConfig, clusterConfig: ClusterDetails) = CommonEnv(config, clusterConfig) ++ Map( "MESOS_NATIVE_JAVA_LIBRARY" -> "/usr/lib/libmesos.so", "LIBPROCESS_IP" -> clusterConfig.userIP, "LIBPROCESS_ADVERTISE_IP" -> clusterConfig.userIP ) }