org.apache.hadoop.yarn.conf.YarnConfiguration Scala Examples
The following examples show how to use org.apache.hadoop.yarn.conf.YarnConfiguration.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: YarnShuffleIntegrationSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import java.io.File import java.nio.charset.StandardCharsets import com.google.common.io.Files import org.apache.commons.io.FileUtils import org.apache.hadoop.yarn.conf.YarnConfiguration import org.scalatest.Matchers import org.apache.spark._ import org.apache.spark.internal.Logging import org.apache.spark.network.shuffle.ShuffleTestAccessor import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor} import org.apache.spark.tags.ExtendedYarnTest @ExtendedYarnTest class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite { override def newYarnConfig(): YarnConfiguration = { val yarnConfig = new YarnConfiguration() yarnConfig.set(YarnConfiguration.NM_AUX_SERVICES, "spark_shuffle") yarnConfig.set(YarnConfiguration.NM_AUX_SERVICE_FMT.format("spark_shuffle"), classOf[YarnShuffleService].getCanonicalName) yarnConfig.set("spark.shuffle.service.port", "0") yarnConfig } test("external shuffle service") { val shuffleServicePort = YarnTestAccessor.getShuffleServicePort val shuffleService = YarnTestAccessor.getShuffleServiceInstance val registeredExecFile = YarnTestAccessor.getRegisteredExecutorFile(shuffleService) logInfo("Shuffle service port = " + shuffleServicePort) val result = File.createTempFile("result", null, tempDir) val finalState = runSpark( false, mainClassName(YarnExternalShuffleDriver.getClass), appArgs = Seq(result.getAbsolutePath(), registeredExecFile.getAbsolutePath), extraConf = Map( "spark.shuffle.service.enabled" -> "true", "spark.shuffle.service.port" -> shuffleServicePort.toString ) ) checkResult(finalState, result) assert(YarnTestAccessor.getRegisteredExecutorFile(shuffleService).exists()) } } private object YarnExternalShuffleDriver extends Logging with Matchers { val WAIT_TIMEOUT_MILLIS = 10000 def main(args: Array[String]): Unit = { if (args.length != 2) { // scalastyle:off println System.err.println( s""" |Invalid command line: ${args.mkString(" ")} | |Usage: ExternalShuffleDriver [result file] [registered exec file] """.stripMargin) // scalastyle:on println System.exit(1) } val sc = new SparkContext(new SparkConf() .setAppName("External Shuffle Test")) val conf = sc.getConf val status = new File(args(0)) val registeredExecFile = new File(args(1)) logInfo("shuffle service executor file = " + registeredExecFile) var result = "failure" val execStateCopy = new File(registeredExecFile.getAbsolutePath + "_dup") try { val data = sc.parallelize(0 until 100, 10).map { x => (x % 10) -> x }.reduceByKey{ _ + _ }. collect().toSet sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) data should be ((0 until 10).map{x => x -> (x * 10 + 450)}.toSet) result = "success" // only one process can open a leveldb file at a time, so we copy the files FileUtils.copyDirectory(registeredExecFile, execStateCopy) assert(!ShuffleTestAccessor.reloadRegisteredExecutors(execStateCopy).isEmpty) } finally { sc.stop() FileUtils.deleteDirectory(execStateCopy) Files.write(result, status, StandardCharsets.UTF_8) } } }
Example 2
Source File: YarnShuffleIntegrationSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import java.io.File import com.google.common.base.Charsets.UTF_8 import com.google.common.io.Files import org.apache.commons.io.FileUtils import org.apache.hadoop.yarn.conf.YarnConfiguration import org.scalatest.Matchers import org.apache.spark._ import org.apache.spark.network.shuffle.ShuffleTestAccessor import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor} import org.apache.spark.tags.ExtendedYarnTest @ExtendedYarnTest class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite { override def newYarnConfig(): YarnConfiguration = { val yarnConfig = new YarnConfiguration() yarnConfig.set(YarnConfiguration.NM_AUX_SERVICES, "spark_shuffle") yarnConfig.set(YarnConfiguration.NM_AUX_SERVICE_FMT.format("spark_shuffle"), classOf[YarnShuffleService].getCanonicalName) yarnConfig.set("spark.shuffle.service.port", "0") yarnConfig } test("external shuffle service") { val shuffleServicePort = YarnTestAccessor.getShuffleServicePort val shuffleService = YarnTestAccessor.getShuffleServiceInstance val registeredExecFile = YarnTestAccessor.getRegisteredExecutorFile(shuffleService) logInfo("Shuffle service port = " + shuffleServicePort) val result = File.createTempFile("result", null, tempDir) val finalState = runSpark( false, mainClassName(YarnExternalShuffleDriver.getClass), appArgs = Seq(result.getAbsolutePath(), registeredExecFile.getAbsolutePath), extraConf = Map( "spark.shuffle.service.enabled" -> "true", "spark.shuffle.service.port" -> shuffleServicePort.toString ) ) checkResult(finalState, result) assert(YarnTestAccessor.getRegisteredExecutorFile(shuffleService).exists()) } } private object YarnExternalShuffleDriver extends Logging with Matchers { val WAIT_TIMEOUT_MILLIS = 10000 def main(args: Array[String]): Unit = { if (args.length != 2) { // scalastyle:off println System.err.println( s""" |Invalid command line: ${args.mkString(" ")} | |Usage: ExternalShuffleDriver [result file] [registed exec file] """.stripMargin) // scalastyle:on println System.exit(1) } val sc = new SparkContext(new SparkConf() .setAppName("External Shuffle Test")) val conf = sc.getConf val status = new File(args(0)) val registeredExecFile = new File(args(1)) logInfo("shuffle service executor file = " + registeredExecFile) var result = "failure" val execStateCopy = new File(registeredExecFile.getAbsolutePath + "_dup") try { val data = sc.parallelize(0 until 100, 10).map { x => (x % 10) -> x }.reduceByKey{ _ + _ }. collect().toSet sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) data should be ((0 until 10).map{x => x -> (x * 10 + 450)}.toSet) result = "success" // only one process can open a leveldb file at a time, so we copy the files FileUtils.copyDirectory(registeredExecFile, execStateCopy) assert(!ShuffleTestAccessor.reloadRegisteredExecutors(execStateCopy).isEmpty) } finally { sc.stop() FileUtils.deleteDirectory(execStateCopy) Files.write(result, status, UTF_8) } } }
Example 3
Source File: YarnClusterSchedulerBackend.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler.cluster import org.apache.hadoop.yarn.api.ApplicationConstants.Environment import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.spark.SparkContext import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil import org.apache.spark.scheduler.TaskSchedulerImpl import org.apache.spark.util.Utils private[spark] class YarnClusterSchedulerBackend( scheduler: TaskSchedulerImpl, sc: SparkContext) extends YarnSchedulerBackend(scheduler, sc) { override def start() { super.start() totalExpectedExecutors = YarnSparkHadoopUtil.getInitialTargetExecutorNumber(sc.conf) } override def applicationId(): String = // In YARN Cluster mode, the application ID is expected to be set, so log an error if it's // not found. sc.getConf.getOption("spark.yarn.app.id").getOrElse { logError("Application ID is not set.") super.applicationId } override def applicationAttemptId(): Option[String] = // In YARN Cluster mode, the attempt ID is expected to be set, so log an error if it's // not found. sc.getConf.getOption("spark.yarn.app.attemptId").orElse { logError("Application attempt ID is not set.") super.applicationAttemptId } override def getDriverLogUrls: Option[Map[String, String]] = { var driverLogs: Option[Map[String, String]] = None try { val yarnConf = new YarnConfiguration(sc.hadoopConfiguration) val containerId = YarnSparkHadoopUtil.get.getContainerId val httpAddress = System.getenv(Environment.NM_HOST.name()) + ":" + System.getenv(Environment.NM_HTTP_PORT.name()) // lookup appropriate http scheme for container log urls val yarnHttpPolicy = yarnConf.get( YarnConfiguration.YARN_HTTP_POLICY_KEY, YarnConfiguration.YARN_HTTP_POLICY_DEFAULT ) val user = Utils.getCurrentUserName() val httpScheme = if (yarnHttpPolicy == "HTTPS_ONLY") "https://" else "http://" val baseUrl = s"$httpScheme$httpAddress/node/containerlogs/$containerId/$user" logDebug(s"Base URL for logs: $baseUrl") driverLogs = Some(Map( "stderr" -> s"$baseUrl/stderr?start=-4096", "stdout" -> s"$baseUrl/stdout?start=-4096")) } catch { case e: Exception => logInfo("Error while building AM log links, so AM" + " logs link will not appear in application UI", e) } driverLogs } }
Example 4
Source File: ApplicationMasterSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.spark.{SparkConf, SparkFunSuite} class ApplicationMasterSuite extends SparkFunSuite { test("history url with hadoop and spark substitutions") { val host = "rm.host.com" val port = 18080 val sparkConf = new SparkConf() sparkConf.set("spark.yarn.historyServer.address", "http://${hadoopconf-yarn.resourcemanager.hostname}:${spark.history.ui.port}") val yarnConf = new YarnConfiguration() yarnConf.set("yarn.resourcemanager.hostname", host) val appId = "application_123_1" val attemptId = appId + "_1" val shsAddr = ApplicationMaster .getHistoryServerAddress(sparkConf, yarnConf, appId, attemptId) assert(shsAddr === s"http://${host}:${port}/history/${appId}/${attemptId}") } }
Example 5
Source File: LocalityPlacementStrategySuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import scala.collection.JavaConverters._ import scala.collection.mutable.{HashMap, HashSet, Set} import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.conf.YarnConfiguration import org.mockito.Mockito._ import org.apache.spark.{SparkConf, SparkFunSuite} class LocalityPlacementStrategySuite extends SparkFunSuite { test("handle large number of containers and tasks (SPARK-18750)") { // Run the test in a thread with a small stack size, since the original issue // surfaced as a StackOverflowError. var error: Throwable = null val runnable = new Runnable() { override def run(): Unit = try { runTest() } catch { case e: Throwable => error = e } } val thread = new Thread(new ThreadGroup("test"), runnable, "test-thread", 32 * 1024) thread.start() thread.join() assert(error === null) } private def runTest(): Unit = { val yarnConf = new YarnConfiguration() // The numbers below have been chosen to balance being large enough to replicate the // original issue while not taking too long to run when the issue is fixed. The main // goal is to create enough requests for localized containers (so there should be many // tasks on several hosts that have no allocated containers). val resource = Resource.newInstance(8 * 1024, 4) val strategy = new LocalityPreferredContainerPlacementStrategy(new SparkConf(), yarnConf, resource, new MockResolver()) val totalTasks = 32 * 1024 val totalContainers = totalTasks / 16 val totalHosts = totalContainers / 16 val mockId = mock(classOf[ContainerId]) val hosts = (1 to totalHosts).map { i => (s"host_$i", totalTasks % i) }.toMap val containers = (1 to totalContainers).map { i => mockId } val count = containers.size / hosts.size / 2 val hostToContainerMap = new HashMap[String, Set[ContainerId]]() hosts.keys.take(hosts.size / 2).zipWithIndex.foreach { case (host, i) => val hostContainers = new HashSet[ContainerId]() containers.drop(count * i).take(i).foreach { c => hostContainers += c } hostToContainerMap(host) = hostContainers } strategy.localityOfRequestedContainers(containers.size * 2, totalTasks, hosts, hostToContainerMap, Nil) } }
Example 6
Source File: YarnClusterSchedulerBackend.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler.cluster import org.apache.hadoop.yarn.api.ApplicationConstants.Environment import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.spark.SparkContext import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnSparkHadoopUtil} import org.apache.spark.scheduler.TaskSchedulerImpl import org.apache.spark.util.Utils private[spark] class YarnClusterSchedulerBackend( scheduler: TaskSchedulerImpl, sc: SparkContext) extends YarnSchedulerBackend(scheduler, sc) { override def start() { val attemptId = ApplicationMaster.getAttemptId bindToYarn(attemptId.getApplicationId(), Some(attemptId)) super.start() totalExpectedExecutors = SchedulerBackendUtils.getInitialTargetExecutorNumber(sc.conf) } override def getDriverLogUrls: Option[Map[String, String]] = { var driverLogs: Option[Map[String, String]] = None try { val yarnConf = new YarnConfiguration(sc.hadoopConfiguration) val containerId = YarnSparkHadoopUtil.getContainerId val httpAddress = System.getenv(Environment.NM_HOST.name()) + ":" + System.getenv(Environment.NM_HTTP_PORT.name()) // lookup appropriate http scheme for container log urls val yarnHttpPolicy = yarnConf.get( YarnConfiguration.YARN_HTTP_POLICY_KEY, YarnConfiguration.YARN_HTTP_POLICY_DEFAULT ) val user = Utils.getCurrentUserName() val httpScheme = if (yarnHttpPolicy == "HTTPS_ONLY") "https://" else "http://" val baseUrl = s"$httpScheme$httpAddress/node/containerlogs/$containerId/$user" logDebug(s"Base URL for logs: $baseUrl") driverLogs = Some(Map( "stdout" -> s"$baseUrl/stdout?start=-4096", "stderr" -> s"$baseUrl/stderr?start=-4096")) } catch { case e: Exception => logInfo("Error while building AM log links, so AM" + " logs link will not appear in application UI", e) } driverLogs } }
Example 7
Source File: YarnRMClient.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import scala.collection.JavaConverters._ import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.client.api.AMRMClient import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.webapp.util.WebAppUtils import org.apache.spark.{SecurityManager, SparkConf} import org.apache.spark.deploy.yarn.config._ import org.apache.spark.internal.Logging import org.apache.spark.rpc.RpcEndpointRef import org.apache.spark.util.Utils def getMaxRegAttempts(sparkConf: SparkConf, yarnConf: YarnConfiguration): Int = { val sparkMaxAttempts = sparkConf.get(MAX_APP_ATTEMPTS).map(_.toInt) val yarnMaxAttempts = yarnConf.getInt( YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS) sparkMaxAttempts match { case Some(x) => if (x <= yarnMaxAttempts) x else yarnMaxAttempts case None => yarnMaxAttempts } } }
Example 8
Source File: YarnClusterSchedulerBackend.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler.cluster import org.apache.hadoop.yarn.api.ApplicationConstants.Environment import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.spark.SparkContext import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil import org.apache.spark.scheduler.TaskSchedulerImpl import org.apache.spark.util.Utils private[spark] class YarnClusterSchedulerBackend( scheduler: TaskSchedulerImpl, sc: SparkContext) extends YarnSchedulerBackend(scheduler, sc) { override def start() { super.start() totalExpectedExecutors = YarnSparkHadoopUtil.getInitialTargetExecutorNumber(sc.conf) } override def applicationId(): String = // In YARN Cluster mode, the application ID is expected to be set, so log an error if it's // not found. //在YARN群集模式下,应该设置应用程序ID,因此如果找不到则记录错误 sc.getConf.getOption("spark.yarn.app.id").getOrElse { logError("Application ID is not set.") super.applicationId } override def applicationAttemptId(): Option[String] = // In YARN Cluster mode, the attempt ID is expected to be set, so log an error if it's // not found. //在YARN群集模式下,预计会设置尝试ID,因此如果找不到则会记录错误。 sc.getConf.getOption("spark.yarn.app.attemptId").orElse { logError("Application attempt ID is not set.") super.applicationAttemptId } override def getDriverLogUrls: Option[Map[String, String]] = { var driverLogs: Option[Map[String, String]] = None try { val yarnConf = new YarnConfiguration(sc.hadoopConfiguration) val containerId = YarnSparkHadoopUtil.get.getContainerId val httpAddress = System.getenv(Environment.NM_HOST.name()) + ":" + System.getenv(Environment.NM_HTTP_PORT.name()) // lookup appropriate http scheme for container log urls val yarnHttpPolicy = yarnConf.get( YarnConfiguration.YARN_HTTP_POLICY_KEY, YarnConfiguration.YARN_HTTP_POLICY_DEFAULT ) val user = Utils.getCurrentUserName() val httpScheme = if (yarnHttpPolicy == "HTTPS_ONLY") "https://" else "http://" val baseUrl = s"$httpScheme$httpAddress/node/containerlogs/$containerId/$user" logDebug(s"Base URL for logs: $baseUrl") driverLogs = Some(Map( "stderr" -> s"$baseUrl/stderr?start=-4096", "stdout" -> s"$baseUrl/stdout?start=-4096")) } catch { case e: Exception => logInfo("Error while building AM log links, so AM" + " logs link will not appear in application UI", e) } driverLogs } }
Example 9
Source File: KyuubiHadoopUtil.scala From kyuubi with Apache License 2.0 | 5 votes |
package yaooqinn.kyuubi.utils import java.security.PrivilegedExceptionAction import java.util.EnumSet import scala.collection.JavaConverters._ import scala.util.control.NonFatal import org.apache.hadoop.security.UserGroupInformation import org.apache.hadoop.yarn.api.records.YarnApplicationState._ import org.apache.hadoop.yarn.client.api.YarnClient import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.kyuubi.Logging import org.apache.spark.KyuubiSparkUtil private[kyuubi] object KyuubiHadoopUtil extends Logging { private def createYarnClient: YarnClient = { val c = YarnClient.createYarnClient() c.init(new YarnConfiguration()) c.start() c } def killYarnAppByName(appName: String): Unit = { val client = createYarnClient client.getApplications(Set("SPARK").asJava, EnumSet.of(ACCEPTED, SUBMITTED, RUNNING)).asScala .filter(applicationReport => applicationReport.getName.equals(appName)) .foreach { applicationReport => client.killApplication(applicationReport.getApplicationId) } } def doAs[T](user: UserGroupInformation)(f: => T): T = { try { user.doAs(new PrivilegedExceptionAction[T] { override def run(): T = f }) } catch { case NonFatal(e) => throw KyuubiSparkUtil.findCause(e) } } def doAsAndLogNonFatal(user: UserGroupInformation)(f: => Unit): Unit = { try { doAs(user)(f) } catch { case NonFatal(e) => error(s"Failed to operate as ${user.getShortUserName}", e) } } def doAsRealUser[T](f: => T): T = { val currentUser = UserGroupInformation.getCurrentUser val realUser = Option(currentUser.getRealUser).getOrElse(currentUser) doAs(realUser)(f) } }
Example 10
Source File: YarnShuffleIntegrationSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import java.io.File import java.nio.charset.StandardCharsets import com.google.common.io.Files import org.apache.commons.io.FileUtils import org.apache.hadoop.yarn.conf.YarnConfiguration import org.scalatest.Matchers import org.apache.spark._ import org.apache.spark.internal.Logging import org.apache.spark.network.shuffle.ShuffleTestAccessor import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor} import org.apache.spark.tags.ExtendedYarnTest @ExtendedYarnTest class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite { override def newYarnConfig(): YarnConfiguration = { val yarnConfig = new YarnConfiguration() yarnConfig.set(YarnConfiguration.NM_AUX_SERVICES, "spark_shuffle") yarnConfig.set(YarnConfiguration.NM_AUX_SERVICE_FMT.format("spark_shuffle"), classOf[YarnShuffleService].getCanonicalName) yarnConfig.set("spark.shuffle.service.port", "0") yarnConfig } test("external shuffle service") { val shuffleServicePort = YarnTestAccessor.getShuffleServicePort val shuffleService = YarnTestAccessor.getShuffleServiceInstance val registeredExecFile = YarnTestAccessor.getRegisteredExecutorFile(shuffleService) logInfo("Shuffle service port = " + shuffleServicePort) val result = File.createTempFile("result", null, tempDir) val finalState = runSpark( false, mainClassName(YarnExternalShuffleDriver.getClass), appArgs = Seq(result.getAbsolutePath(), registeredExecFile.getAbsolutePath), extraConf = Map( "spark.shuffle.service.enabled" -> "true", "spark.shuffle.service.port" -> shuffleServicePort.toString ) ) checkResult(finalState, result) assert(YarnTestAccessor.getRegisteredExecutorFile(shuffleService).exists()) } } private object YarnExternalShuffleDriver extends Logging with Matchers { val WAIT_TIMEOUT_MILLIS = 10000 def main(args: Array[String]): Unit = { if (args.length != 2) { // scalastyle:off println System.err.println( s""" |Invalid command line: ${args.mkString(" ")} | |Usage: ExternalShuffleDriver [result file] [registered exec file] """.stripMargin) // scalastyle:on println System.exit(1) } val sc = new SparkContext(new SparkConf() .setAppName("External Shuffle Test")) val conf = sc.getConf val status = new File(args(0)) val registeredExecFile = new File(args(1)) logInfo("shuffle service executor file = " + registeredExecFile) var result = "failure" val execStateCopy = new File(registeredExecFile.getAbsolutePath + "_dup") try { val data = sc.parallelize(0 until 100, 10).map { x => (x % 10) -> x }.reduceByKey{ _ + _ }. collect().toSet sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) data should be ((0 until 10).map{x => x -> (x * 10 + 450)}.toSet) result = "success" // only one process can open a leveldb file at a time, so we copy the files FileUtils.copyDirectory(registeredExecFile, execStateCopy) assert(!ShuffleTestAccessor.reloadRegisteredExecutors(execStateCopy).isEmpty) } finally { sc.stop() FileUtils.deleteDirectory(execStateCopy) Files.write(result, status, StandardCharsets.UTF_8) } } }
Example 11
Source File: YarnClusterSchedulerBackend.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler.cluster import org.apache.hadoop.yarn.api.ApplicationConstants.Environment import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.spark.SparkContext import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnSparkHadoopUtil} import org.apache.spark.scheduler.TaskSchedulerImpl import org.apache.spark.util.Utils private[spark] class YarnClusterSchedulerBackend( scheduler: TaskSchedulerImpl, sc: SparkContext) extends YarnSchedulerBackend(scheduler, sc) { override def start() { val attemptId = ApplicationMaster.getAttemptId bindToYarn(attemptId.getApplicationId(), Some(attemptId)) super.start() totalExpectedExecutors = YarnSparkHadoopUtil.getInitialTargetExecutorNumber(sc.conf) } override def getDriverLogUrls: Option[Map[String, String]] = { var driverLogs: Option[Map[String, String]] = None try { val yarnConf = new YarnConfiguration(sc.hadoopConfiguration) val containerId = YarnSparkHadoopUtil.get.getContainerId val httpAddress = System.getenv(Environment.NM_HOST.name()) + ":" + System.getenv(Environment.NM_HTTP_PORT.name()) // lookup appropriate http scheme for container log urls val yarnHttpPolicy = yarnConf.get( YarnConfiguration.YARN_HTTP_POLICY_KEY, YarnConfiguration.YARN_HTTP_POLICY_DEFAULT ) val user = Utils.getCurrentUserName() val httpScheme = if (yarnHttpPolicy == "HTTPS_ONLY") "https://" else "http://" val baseUrl = s"$httpScheme$httpAddress/node/containerlogs/$containerId/$user" logDebug(s"Base URL for logs: $baseUrl") driverLogs = Some(Map( "stdout" -> s"$baseUrl/stdout?start=-4096", "stderr" -> s"$baseUrl/stderr?start=-4096")) } catch { case e: Exception => logInfo("Error while building AM log links, so AM" + " logs link will not appear in application UI", e) } driverLogs } }
Example 12
Source File: YarnRMClient.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import java.util.{List => JList} import scala.collection.JavaConverters._ import scala.util.Try import org.apache.hadoop.conf.Configuration import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.client.api.AMRMClient import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.webapp.util.WebAppUtils import org.apache.spark.{SecurityManager, SparkConf} import org.apache.spark.deploy.yarn.config._ import org.apache.spark.internal.Logging import org.apache.spark.rpc.RpcEndpointRef import org.apache.spark.util.Utils def getMaxRegAttempts(sparkConf: SparkConf, yarnConf: YarnConfiguration): Int = { val sparkMaxAttempts = sparkConf.get(MAX_APP_ATTEMPTS).map(_.toInt) val yarnMaxAttempts = yarnConf.getInt( YarnConfiguration.RM_AM_MAX_ATTEMPTS, YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS) val retval: Int = sparkMaxAttempts match { case Some(x) => if (x <= yarnMaxAttempts) x else yarnMaxAttempts case None => yarnMaxAttempts } retval } }
Example 13
Source File: YarnClusterSchedulerBackend.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler.cluster import org.apache.hadoop.yarn.api.ApplicationConstants.Environment import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.spark.SparkContext import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnSparkHadoopUtil} import org.apache.spark.scheduler.TaskSchedulerImpl import org.apache.spark.util.Utils private[spark] class YarnClusterSchedulerBackend( scheduler: TaskSchedulerImpl, sc: SparkContext) extends YarnSchedulerBackend(scheduler, sc) { override def start() { val attemptId = ApplicationMaster.getAttemptId bindToYarn(attemptId.getApplicationId(), Some(attemptId)) super.start() totalExpectedExecutors = YarnSparkHadoopUtil.getInitialTargetExecutorNumber(sc.conf) } override def getDriverLogUrls: Option[Map[String, String]] = { var driverLogs: Option[Map[String, String]] = None try { val yarnConf = new YarnConfiguration(sc.hadoopConfiguration) val containerId = YarnSparkHadoopUtil.get.getContainerId val httpAddress = System.getenv(Environment.NM_HOST.name()) + ":" + System.getenv(Environment.NM_HTTP_PORT.name()) // lookup appropriate http scheme for container log urls val yarnHttpPolicy = yarnConf.get( YarnConfiguration.YARN_HTTP_POLICY_KEY, YarnConfiguration.YARN_HTTP_POLICY_DEFAULT ) val user = Utils.getCurrentUserName() val httpScheme = if (yarnHttpPolicy == "HTTPS_ONLY") "https://" else "http://" val baseUrl = s"$httpScheme$httpAddress/node/containerlogs/$containerId/$user" logDebug(s"Base URL for logs: $baseUrl") driverLogs = Some(Map( "stdout" -> s"$baseUrl/stdout?start=-4096", "stderr" -> s"$baseUrl/stderr?start=-4096")) } catch { case e: Exception => logInfo("Error while building AM log links, so AM" + " logs link will not appear in application UI", e) } driverLogs } }
Example 14
Source File: AppMasterResolver.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.experiments.yarn.client import java.io.IOException import java.net.{HttpURLConnection, URL} import java.nio.charset.StandardCharsets import akka.actor.{ActorRef, ActorSystem} import org.apache.commons.io.IOUtils import org.apache.gearpump.experiments.yarn.glue.Records.{ApplicationId, ApplicationReport} import org.apache.gearpump.experiments.yarn.glue.YarnClient import org.apache.gearpump.util.{AkkaHelper, LogUtil} import org.apache.hadoop.hdfs.web.URLConnectionFactory import org.apache.hadoop.yarn.conf.YarnConfiguration import scala.util.Try class AppMasterResolver(yarnClient: YarnClient, system: ActorSystem) { val LOG = LogUtil.getLogger(getClass) val RETRY_INTERVAL_MS = 3000 // ms def resolve(appId: ApplicationId, timeoutSeconds: Int = 30): ActorRef = { val appMaster = retry(connect(appId), 1 + timeoutSeconds * 1000 / RETRY_INTERVAL_MS) appMaster } private def connect(appId: ApplicationId): ActorRef = { val report = yarnClient.getApplicationReport(appId) AppMasterResolver.resolveAppMasterAddress(report, system) } private def retry(fun: => ActorRef, times: Int): ActorRef = { var index = 0 var result: ActorRef = null while (index < times && result == null) { Thread.sleep(RETRY_INTERVAL_MS) index += 1 val tryConnect = Try(fun) if (tryConnect.isFailure) { LOG.error(s"Failed to connect YarnAppMaster(tried $index)... " + tryConnect.failed.get.getMessage) } else { result = tryConnect.get } } result } } object AppMasterResolver { val LOG = LogUtil.getLogger(getClass) def resolveAppMasterAddress(report: ApplicationReport, system: ActorSystem): ActorRef = { val appMasterPath = s"${report.getTrackingURL}/supervisor-actor-path" LOG.info(s"appMasterPath=$appMasterPath") val connectionFactory: URLConnectionFactory = URLConnectionFactory .newDefaultURLConnectionFactory(new YarnConfiguration()) val url: URL = new URL(appMasterPath) val connection: HttpURLConnection = connectionFactory.openConnection(url) .asInstanceOf[HttpURLConnection] connection.setInstanceFollowRedirects(true) try { connection.connect() } catch { case e: IOException => LOG.error(s"Failed to connect to AppMaster" + e.getMessage) } val status = connection.getResponseCode if (status == 200) { val stream: java.io.InputStream = connection.getInputStream val response = IOUtils.toString(stream, StandardCharsets.UTF_8) LOG.info("Successfully resolved AppMaster address: " + response) connection.disconnect() AkkaHelper.actorFor(system, response) } else { connection.disconnect() throw new IOException("Fail to resolve AppMaster address, please make sure " + s"${report.getTrackingURL} is accessible...") } } }
Example 15
Source File: ContainerLaunchContext.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.experiments.yarn.glue import java.io.File import java.nio.ByteBuffer import scala.collection.JavaConverters._ import org.apache.hadoop.fs.{FileSystem => YarnFileSystem, Path} import org.apache.hadoop.io.DataOutputBuffer import org.apache.hadoop.mapreduce.security.TokenCache import org.apache.hadoop.security.UserGroupInformation import org.apache.hadoop.yarn.api.ApplicationConstants.Environment import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.util.ConverterUtils import org.slf4j.Logger import org.apache.gearpump.util.LogUtil private[glue] object ContainerLaunchContext { private val LOG: Logger = LogUtil.getLogger(getClass) def apply(yarnConf: YarnConfiguration, command: String, packagePath: String, configPath: String) : ContainerLaunchContext = { val context = Records.newRecord(classOf[ContainerLaunchContext]) context.setCommands(Seq(command).asJava) context.setEnvironment(getAppEnv(yarnConf).asJava) context.setTokens(getToken(yarnConf, packagePath, configPath)) context.setLocalResources(getAMLocalResourcesMap(yarnConf, packagePath, configPath).asJava) context } private def getFs(yarnConf: YarnConfiguration) = YarnFileSystem.get(yarnConf) private def getAppEnv(yarnConf: YarnConfiguration): Map[String, String] = { val classPaths = yarnConf.getStrings( YarnConfiguration.YARN_APPLICATION_CLASSPATH, YarnConfiguration.DEFAULT_YARN_APPLICATION_CLASSPATH.mkString(File.pathSeparator)) val allPaths = Option(classPaths).getOrElse(Array("")) allPaths :+ Environment.PWD.$() + File.separator + "*" + File.pathSeparator Map(Environment.CLASSPATH.name -> allPaths.map(_.trim).mkString(File.pathSeparator)) } private def getAMLocalResourcesMap( yarnConf: YarnConfiguration, packagePath: String, configPath: String) : Map[String, LocalResource] = { val fs = getFs(yarnConf) Map( "pack" -> newYarnAppResource(fs, new Path(packagePath), LocalResourceType.ARCHIVE, LocalResourceVisibility.APPLICATION), "conf" -> newYarnAppResource(fs, new Path(configPath), LocalResourceType.FILE, LocalResourceVisibility.APPLICATION)) } private def newYarnAppResource( fs: YarnFileSystem, path: Path, resourceType: LocalResourceType, vis: LocalResourceVisibility): LocalResource = { val qualified = fs.makeQualified(path) val status = fs.getFileStatus(qualified) val resource = Records.newRecord(classOf[LocalResource]) resource.setType(resourceType) resource.setVisibility(vis) resource.setResource(ConverterUtils.getYarnUrlFromPath(qualified)) resource.setTimestamp(status.getModificationTime) resource.setSize(status.getLen) resource } private def getToken(yc: YarnConfiguration, packagePath: String, configPath: String) : ByteBuffer = { val credentials = UserGroupInformation.getCurrentUser.getCredentials val dob = new DataOutputBuffer val dirs = Array(new Path(packagePath), new Path(configPath)) TokenCache.obtainTokensForNamenodes(credentials, dirs, yc) credentials.writeTokenStorageToStream(dob) ByteBuffer.wrap(dob.getData) } }
Example 16
Source File: ParquetWriterTaskSpec.scala From gearpump-examples with Apache License 2.0 | 5 votes |
package io.gearpump.examples.kafka_hdfs_pipeline import akka.actor.ActorSystem import org.apache.avro.Schema import io.gearpump.Message import io.gearpump.cluster.UserConfig import io.gearpump.streaming.MockUtil import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.parquet.avro.{AvroParquetReader, AvroParquetWriter} import org.apache.parquet.hadoop.ParquetReader import org.apache.parquet.hadoop.api.ReadSupport import org.mockito.Mockito import org.mockito.Mockito._ import org.scalatest.prop.PropertyChecks import org.scalatest.{BeforeAndAfterAll, Matchers, PropSpec} class ParquetWriterTaskSpec extends PropSpec with PropertyChecks with Matchers with BeforeAndAfterAll { implicit var system: ActorSystem = ActorSystem("PipeLineSpec") val context = MockUtil.mockTaskContext val appName = "KafkaHdfsPipeLine" when(context.appName).thenReturn(appName) val fs = FileSystem.get(new YarnConfiguration) val homeDir = fs.getHomeDirectory.toUri.getPath val parquetDir = new Path(homeDir, "gearpump") + "/parquet/" val parquetPath = parquetDir + appName + ".parquet" val parquetCrc = parquetDir + "." + appName + ".parquet.crc" val parquetWriter = Mockito.mock(classOf[AvroParquetWriter[SpaceShuttleRecord]]) val anomaly = 0.252 val now = System.currentTimeMillis val userConfig = UserConfig.empty.withString(ParquetWriterTask.PARQUET_OUTPUT_DIRECTORY, "/parquet") override def afterAll(): Unit = { List(parquetPath, parquetCrc, parquetDir).foreach(new java.io.File(_).delete) system.shutdown() } property("ParquetWriterTask should initialize with local parquet file opened for writing") { val parquetWriterTask = new ParquetWriterTask(context, userConfig) val path = parquetWriterTask.absolutePath.stripPrefix("file:") assert(parquetPath.equals(path)) parquetWriterTask.onStop } property("ParquetWriterTask should write records to a parquet file") { val message = Message(SpaceShuttleRecord(now, anomaly), now) val parquetWriterTask = new ParquetWriterTask(context, userConfig) parquetWriterTask.parquetWriter = parquetWriter parquetWriterTask.onNext(message) verify(parquetWriterTask.parquetWriter).write(message.msg.asInstanceOf[SpaceShuttleRecord]) parquetWriterTask.onStop } property("ParquetWriterTask should have verifiable written record") { val message = Message(SpaceShuttleRecord(now, anomaly), now) val parquetWriterTask = new ParquetWriterTask(context, userConfig) parquetWriterTask.onNext(message) parquetWriterTask.onStop val reader = new AvroParquetReader[SpaceShuttleRecord](new Path(parquetPath)) val record = reader.read() assert(message.msg.asInstanceOf[SpaceShuttleRecord].anomaly == record.anomaly) assert(message.msg.asInstanceOf[SpaceShuttleRecord].ts == record.ts) } }
Example 17
Source File: ParquetWriterTask.scala From gearpump-examples with Apache License 2.0 | 5 votes |
package io.gearpump.examples.kafka_hdfs_pipeline import org.apache.avro.Schema import io.gearpump.Message import io.gearpump.cluster.UserConfig import io.gearpump.examples.kafka_hdfs_pipeline.ParquetWriterTask._ import io.gearpump.streaming.task.{StartTime, Task, TaskContext} import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.parquet.avro.AvroParquetWriter import scala.util.{Failure, Success, Try} class ParquetWriterTask(taskContext : TaskContext, config: UserConfig) extends Task(taskContext, config) { val outputFileName = taskContext.appName + ".parquet" val absolutePath = Option(getHdfs + config.getString(PARQUET_OUTPUT_DIRECTORY).getOrElse("/parquet") + "/" + outputFileName).map(deleteFile(_)).get val outputPath = new Path(absolutePath) var parquetWriter = new AvroParquetWriter[SpaceShuttleRecord](outputPath, SpaceShuttleRecord.SCHEMA$) def getYarnConf = new YarnConfiguration def getFs = FileSystem.get(getYarnConf) def getHdfs = new Path(getFs.getHomeDirectory, "gearpump") private def deleteFile(fileName: String): String = { val file = new Path(fileName) getFs.exists(file) match { case true => getFs.delete(file,false) case false => } fileName } override def onStart(startTime: StartTime): Unit = { LOG.info(s"ParquetWriter.onStart $absolutePath") } override def onNext(msg: Message): Unit = { Try({ parquetWriter.write(msg.msg.asInstanceOf[SpaceShuttleRecord]) }) match { case Success(ok) => case Failure(throwable) => LOG.error(s"failed ${throwable.getMessage}") } } override def onStop(): Unit = { LOG.info("ParquetWriter.onStop") parquetWriter.close() } } object ParquetWriterTask { val PARQUET_OUTPUT_DIRECTORY = "parquet.output.directory" val PARQUET_WRITER = "parquet.writer" }
Example 18
Source File: RMCallbackHandler.scala From DataXServer with Apache License 2.0 | 5 votes |
package org.tianlangstudio.data.hamal.yarn import java.io.File import java.util.{Collections, List} import org.tianlangstudio.data.hamal.core.{Constants, HamalConf} import org.tianlangstudio.data.hamal.core.HamalConf //import java.util.Collections import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path, FileContext} import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.client.api.{AMRMClient, NMClient} import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.util.{ConverterUtils, Records} import scala.jdk.CollectionConverters._ //import scala.collection.JavaConverters._ /** * Created by zhuhq on 2016/4/29. */ class RMCallbackHandler(nmClient:NMClient,containerCmd:Container => String,hamalConf: HamalConf,yarnConfiguration: Configuration) extends AMRMClientAsync.CallbackHandler { private val logging = org.slf4j.LoggerFactory.getLogger(classOf[RMCallbackHandler]) override def onContainersCompleted(statuses: List[ContainerStatus]): Unit = { for(containerStatus <- statuses.asScala) { logging.info(s"containerId:${containerStatus} exitStatus:${containerStatus}") } } override def onError(e: Throwable): Unit = { logging.error("on error",e) } override def getProgress: Float = { 0 } override def onShutdownRequest(): Unit = { logging.info("on shutdown request") } override def onNodesUpdated(updatedNodes: List[NodeReport]): Unit = { logging.info("on nodes updated") for(nodeReport <- updatedNodes.asScala) { logging.info(s"node id:${nodeReport} node labels:${nodeReport}"); } } override def onContainersAllocated(containers: List[Container]): Unit = { logging.info("on containers allocated"); for (container:Container <- containers.asScala) { try { // Launch container by create ContainerLaunchContext val ctx = Records.newRecord(classOf[ContainerLaunchContext]); //ctx.setCommands(Collections.singletonList(""" echo "begin";sleep 900;echo "end"; """)) ctx.setCommands(Collections.singletonList(containerCmd(container))) val packagePath = hamalConf.getString(Constants.DATAX_EXECUTOR_FILE,"executor.zip"); val archiveStat = FileSystem.get(yarnConfiguration).getFileStatus(new Path(packagePath)) val packageUrl = ConverterUtils.getYarnUrlFromPath( FileContext.getFileContext.makeQualified(new Path(packagePath))); val packageResource = Records.newRecord[LocalResource](classOf[LocalResource]) packageResource.setResource(packageUrl); packageResource.setSize(archiveStat.getLen); packageResource.setTimestamp(archiveStat.getModificationTime); packageResource.setType(LocalResourceType.ARCHIVE); packageResource.setVisibility(LocalResourceVisibility.APPLICATION) ctx.setLocalResources(Collections.singletonMap(Constants.DATAX_EXECUTOR_ARCHIVE_FILE_NAME,packageResource)) logging.info("[AM] Launching container " + container.getId()); nmClient.startContainer(container, ctx); } catch { case ex:Exception => logging.info("[AM] Error launching container " + container.getId() + " " + ex); } } } }
Example 19
Source File: HiveMRSuite.scala From connectors with Apache License 2.0 | 5 votes |
package io.delta.hive import java.io.{Closeable, File} import scala.collection.JavaConverters._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.mapred.{JobConf, MiniMRCluster} import org.apache.hadoop.mapreduce.MRJobConfig import org.apache.hadoop.yarn.conf.YarnConfiguration class HiveMRSuite extends HiveConnectorTest { override val engine: String = "mr" override def createCluster(namenode: String, conf: Configuration, tempPath: File): Closeable = { val jConf = new JobConf(conf); jConf.set("yarn.scheduler.capacity.root.queues", "default"); jConf.set("yarn.scheduler.capacity.root.default.capacity", "100"); jConf.setInt(MRJobConfig.MAP_MEMORY_MB, 512); jConf.setInt(MRJobConfig.REDUCE_MEMORY_MB, 512); jConf.setInt(MRJobConfig.MR_AM_VMEM_MB, 128); jConf.setInt(YarnConfiguration.YARN_MINICLUSTER_NM_PMEM_MB, 512); jConf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128); jConf.setInt(YarnConfiguration.RM_SCHEDULER_MAXIMUM_ALLOCATION_MB, 512); val mr = new MiniMRCluster(2, namenode, 1, null, null, jConf) new Closeable { override def close(): Unit = { mr.shutdown() } } } }
Example 20
Source File: HiveTezSuite.scala From connectors with Apache License 2.0 | 5 votes |
package io.delta.hive import java.io.{Closeable, File} import scala.collection.JavaConverters._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapreduce.MRJobConfig import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.tez.dag.api.TezConfiguration import org.apache.tez.runtime.library.api.TezRuntimeConfiguration import org.apache.tez.test.MiniTezCluster class HiveTezSuite extends HiveConnectorTest { override val engine: String = "tez" private var tezConf: Configuration = _ // scalastyle:off // scalastyle:on override def setupConfiguration(conf: Configuration): Unit = { tezConf.asScala.foreach { e => conf.set(e.getKey, e.getValue) } // Overrides values from the hive/tez-site. conf.setInt("hive.tez.container.size", 256) conf.setInt(TezConfiguration.TEZ_AM_RESOURCE_MEMORY_MB, 256) conf.setInt(TezConfiguration.TEZ_TASK_RESOURCE_MEMORY_MB, 256) conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_MB, 24) conf.setInt(TezRuntimeConfiguration.TEZ_RUNTIME_UNORDERED_OUTPUT_BUFFER_SIZE_MB, 10) conf.setFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0.4f) conf.setBoolean(TezConfiguration.TEZ_IGNORE_LIB_URIS, true) } }
Example 21
Source File: YarnShuffleIntegrationSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import java.io.File import java.nio.charset.StandardCharsets import com.google.common.io.Files import org.apache.commons.io.FileUtils import org.apache.hadoop.yarn.conf.YarnConfiguration import org.scalatest.Matchers import org.apache.spark._ import org.apache.spark.internal.Logging import org.apache.spark.network.shuffle.ShuffleTestAccessor import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor} import org.apache.spark.tags.ExtendedYarnTest @ExtendedYarnTest class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite { override def newYarnConfig(): YarnConfiguration = { val yarnConfig = new YarnConfiguration() yarnConfig.set(YarnConfiguration.NM_AUX_SERVICES, "spark_shuffle") yarnConfig.set(YarnConfiguration.NM_AUX_SERVICE_FMT.format("spark_shuffle"), classOf[YarnShuffleService].getCanonicalName) yarnConfig.set("spark.shuffle.service.port", "0") yarnConfig } test("external shuffle service") { val shuffleServicePort = YarnTestAccessor.getShuffleServicePort val shuffleService = YarnTestAccessor.getShuffleServiceInstance val registeredExecFile = YarnTestAccessor.getRegisteredExecutorFile(shuffleService) logInfo("Shuffle service port = " + shuffleServicePort) val result = File.createTempFile("result", null, tempDir) val finalState = runSpark( false, mainClassName(YarnExternalShuffleDriver.getClass), appArgs = Seq(result.getAbsolutePath(), registeredExecFile.getAbsolutePath), extraConf = Map( "spark.shuffle.service.enabled" -> "true", "spark.shuffle.service.port" -> shuffleServicePort.toString ) ) checkResult(finalState, result) assert(YarnTestAccessor.getRegisteredExecutorFile(shuffleService).exists()) } } private object YarnExternalShuffleDriver extends Logging with Matchers { val WAIT_TIMEOUT_MILLIS = 10000 def main(args: Array[String]): Unit = { if (args.length != 2) { // scalastyle:off println System.err.println( s""" |Invalid command line: ${args.mkString(" ")} | |Usage: ExternalShuffleDriver [result file] [registered exec file] """.stripMargin) // scalastyle:on println System.exit(1) } val sc = new SparkContext(new SparkConf() .setAppName("External Shuffle Test")) val conf = sc.getConf val status = new File(args(0)) val registeredExecFile = new File(args(1)) logInfo("shuffle service executor file = " + registeredExecFile) var result = "failure" val execStateCopy = new File(registeredExecFile.getAbsolutePath + "_dup") try { val data = sc.parallelize(0 until 100, 10).map { x => (x % 10) -> x }.reduceByKey{ _ + _ }. collect().toSet sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) data should be ((0 until 10).map{x => x -> (x * 10 + 450)}.toSet) result = "success" // only one process can open a leveldb file at a time, so we copy the files FileUtils.copyDirectory(registeredExecFile, execStateCopy) assert(!ShuffleTestAccessor.reloadRegisteredExecutors(execStateCopy).isEmpty) } finally { sc.stop() FileUtils.deleteDirectory(execStateCopy) Files.write(result, status, StandardCharsets.UTF_8) } } }
Example 22
Source File: YarnClusterSchedulerBackend.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler.cluster import org.apache.hadoop.yarn.api.ApplicationConstants.Environment import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.spark.SparkContext import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnSparkHadoopUtil} import org.apache.spark.scheduler.TaskSchedulerImpl import org.apache.spark.util.Utils private[spark] class YarnClusterSchedulerBackend( scheduler: TaskSchedulerImpl, sc: SparkContext) extends YarnSchedulerBackend(scheduler, sc) { override def start() { val attemptId = ApplicationMaster.getAttemptId bindToYarn(attemptId.getApplicationId(), Some(attemptId)) super.start() totalExpectedExecutors = YarnSparkHadoopUtil.getInitialTargetExecutorNumber(sc.conf) } override def getDriverLogUrls: Option[Map[String, String]] = { var driverLogs: Option[Map[String, String]] = None try { val yarnConf = new YarnConfiguration(sc.hadoopConfiguration) val containerId = YarnSparkHadoopUtil.get.getContainerId val httpAddress = System.getenv(Environment.NM_HOST.name()) + ":" + System.getenv(Environment.NM_HTTP_PORT.name()) // lookup appropriate http scheme for container log urls val yarnHttpPolicy = yarnConf.get( YarnConfiguration.YARN_HTTP_POLICY_KEY, YarnConfiguration.YARN_HTTP_POLICY_DEFAULT ) val user = Utils.getCurrentUserName() val httpScheme = if (yarnHttpPolicy == "HTTPS_ONLY") "https://" else "http://" val baseUrl = s"$httpScheme$httpAddress/node/containerlogs/$containerId/$user" logDebug(s"Base URL for logs: $baseUrl") driverLogs = Some(Map( "stderr" -> s"$baseUrl/stderr?start=-4096", "stdout" -> s"$baseUrl/stdout?start=-4096")) } catch { case e: Exception => logInfo("Error while building AM log links, so AM" + " logs link will not appear in application UI", e) } driverLogs } }