org.apache.hadoop.security.Credentials Scala Examples

The following examples show how to use org.apache.hadoop.security.Credentials. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: HDFSCredentialProvider.scala    From drizzle-spark   with Apache License 2.0 6 votes vote down vote up
package org.apache.spark.deploy.yarn.security

import java.io.{ByteArrayInputStream, DataInputStream}

import scala.collection.JavaConverters._

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier
import org.apache.hadoop.mapred.Master
import org.apache.hadoop.security.Credentials

import org.apache.spark.{SparkConf, SparkException}
import org.apache.spark.deploy.yarn.config._
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config._

private[security] class HDFSCredentialProvider extends ServiceCredentialProvider with Logging {
  // Token renewal interval, this value will be set in the first call,
  // if None means no token renewer specified, so cannot get token renewal interval.
  private var tokenRenewalInterval: Option[Long] = null

  override val serviceName: String = "hdfs"

  override def obtainCredentials(
      hadoopConf: Configuration,
      sparkConf: SparkConf,
      creds: Credentials): Option[Long] = {
    // NameNode to access, used to get tokens from different FileSystems
    nnsToAccess(hadoopConf, sparkConf).foreach { dst =>
      val dstFs = dst.getFileSystem(hadoopConf)
      logInfo("getting token for namenode: " + dst)
      dstFs.addDelegationTokens(getTokenRenewer(hadoopConf), creds)
    }

    // Get the token renewal interval if it is not set. It will only be called once.
    if (tokenRenewalInterval == null) {
      tokenRenewalInterval = getTokenRenewalInterval(hadoopConf, sparkConf)
    }

    // Get the time of next renewal.
    tokenRenewalInterval.map { interval =>
      creds.getAllTokens.asScala
        .filter(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
        .map { t =>
          val identifier = new DelegationTokenIdentifier()
          identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier)))
          identifier.getIssueDate + interval
      }.foldLeft(0L)(math.max)
    }
  }

  private def getTokenRenewalInterval(
      hadoopConf: Configuration, sparkConf: SparkConf): Option[Long] = {
    // We cannot use the tokens generated with renewer yarn. Trying to renew
    // those will fail with an access control issue. So create new tokens with the logged in
    // user as renewer.
    sparkConf.get(PRINCIPAL).map { renewer =>
      val creds = new Credentials()
      nnsToAccess(hadoopConf, sparkConf).foreach { dst =>
        val dstFs = dst.getFileSystem(hadoopConf)
        dstFs.addDelegationTokens(renewer, creds)
      }
      val t = creds.getAllTokens.asScala
        .filter(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
        .head
      val newExpiration = t.renew(hadoopConf)
      val identifier = new DelegationTokenIdentifier()
      identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier)))
      val interval = newExpiration - identifier.getIssueDate
      logInfo(s"Renewal Interval is $interval")
      interval
    }
  }

  private def getTokenRenewer(conf: Configuration): String = {
    val delegTokenRenewer = Master.getMasterPrincipal(conf)
    logDebug("delegation token renewer is: " + delegTokenRenewer)
    if (delegTokenRenewer == null || delegTokenRenewer.length() == 0) {
      val errorMessage = "Can't get Master Kerberos principal for use as renewer"
      logError(errorMessage)
      throw new SparkException(errorMessage)
    }

    delegTokenRenewer
  }

  private def nnsToAccess(hadoopConf: Configuration, sparkConf: SparkConf): Set[Path] = {
    sparkConf.get(NAMENODES_TO_ACCESS).map(new Path(_)).toSet +
      sparkConf.get(STAGING_DIR).map(new Path(_))
        .getOrElse(FileSystem.get(hadoopConf).getHomeDirectory)
  }
} 
Example 2
Source File: HBaseCredentialProvider.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.yarn.security

import scala.reflect.runtime.universe
import scala.util.control.NonFatal

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.security.Credentials
import org.apache.hadoop.security.token.{Token, TokenIdentifier}

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging

private[security] class HBaseCredentialProvider extends ServiceCredentialProvider with Logging {

  override def serviceName: String = "hbase"

  override def obtainCredentials(
      hadoopConf: Configuration,
      sparkConf: SparkConf,
      creds: Credentials): Option[Long] = {
    try {
      val mirror = universe.runtimeMirror(getClass.getClassLoader)
      val obtainToken = mirror.classLoader.
        loadClass("org.apache.hadoop.hbase.security.token.TokenUtil").
        getMethod("obtainToken", classOf[Configuration])

      logDebug("Attempting to fetch HBase security token.")
      val token = obtainToken.invoke(null, hbaseConf(hadoopConf))
        .asInstanceOf[Token[_ <: TokenIdentifier]]
      logInfo(s"Get token from HBase: ${token.toString}")
      creds.addToken(token.getService, token)
    } catch {
      case NonFatal(e) =>
        logDebug(s"Failed to get token from service $serviceName", e)
    }

    None
  }

  override def credentialsRequired(hadoopConf: Configuration): Boolean = {
    hbaseConf(hadoopConf).get("hbase.security.authentication") == "kerberos"
  }

  private def hbaseConf(conf: Configuration): Configuration = {
    try {
      val mirror = universe.runtimeMirror(getClass.getClassLoader)
      val confCreate = mirror.classLoader.
        loadClass("org.apache.hadoop.hbase.HBaseConfiguration").
        getMethod("create", classOf[Configuration])
      confCreate.invoke(null, conf).asInstanceOf[Configuration]
    } catch {
      case NonFatal(e) =>
        logDebug("Fail to invoke HBaseConfiguration", e)
        conf
    }
  }
} 
Example 3
Source File: IOEncryptionSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.yarn

import java.io._
import java.nio.charset.StandardCharsets
import java.security.PrivilegedExceptionAction
import java.util.UUID

import org.apache.hadoop.security.{Credentials, UserGroupInformation}
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, Matchers}

import org.apache.spark._
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.internal.config._
import org.apache.spark.serializer._
import org.apache.spark.storage._

class IOEncryptionSuite extends SparkFunSuite with Matchers with BeforeAndAfterAll
  with BeforeAndAfterEach {
  private[this] val blockId = new TempShuffleBlockId(UUID.randomUUID())
  private[this] val conf = new SparkConf()
  private[this] val ugi = UserGroupInformation.createUserForTesting("testuser", Array("testgroup"))
  private[this] val serializer = new KryoSerializer(conf)

  override def beforeAll(): Unit = {
    System.setProperty("SPARK_YARN_MODE", "true")
    ugi.doAs(new PrivilegedExceptionAction[Unit]() {
      override def run(): Unit = {
        conf.set(IO_ENCRYPTION_ENABLED, true)
        val creds = new Credentials()
        SecurityManager.initIOEncryptionKey(conf, creds)
        SparkHadoopUtil.get.addCurrentUserCredentials(creds)
      }
    })
  }

  override def afterAll(): Unit = {
    SparkEnv.set(null)
    System.clearProperty("SPARK_YARN_MODE")
  }

  override def beforeEach(): Unit = {
    super.beforeEach()
  }

  override def afterEach(): Unit = {
    super.afterEach()
    conf.set("spark.shuffle.compress", false.toString)
    conf.set("spark.shuffle.spill.compress", false.toString)
  }

  test("IO encryption read and write") {
    ugi.doAs(new PrivilegedExceptionAction[Unit] {
      override def run(): Unit = {
        conf.set(IO_ENCRYPTION_ENABLED, true)
        conf.set("spark.shuffle.compress", false.toString)
        conf.set("spark.shuffle.spill.compress", false.toString)
        testYarnIOEncryptionWriteRead()
      }
    })
  }

  test("IO encryption read and write with shuffle compression enabled") {
    ugi.doAs(new PrivilegedExceptionAction[Unit] {
      override def run(): Unit = {
        conf.set(IO_ENCRYPTION_ENABLED, true)
        conf.set("spark.shuffle.compress", true.toString)
        conf.set("spark.shuffle.spill.compress", true.toString)
        testYarnIOEncryptionWriteRead()
      }
    })
  }

  private[this] def testYarnIOEncryptionWriteRead(): Unit = {
    val plainStr = "hello world"
    val outputStream = new ByteArrayOutputStream()
    val serializerManager = new SerializerManager(serializer, conf)
    val wrappedOutputStream = serializerManager.wrapStream(blockId, outputStream)
    wrappedOutputStream.write(plainStr.getBytes(StandardCharsets.UTF_8))
    wrappedOutputStream.close()

    val encryptedBytes = outputStream.toByteArray
    val encryptedStr = new String(encryptedBytes)
    assert(plainStr !== encryptedStr)

    val inputStream = new ByteArrayInputStream(encryptedBytes)
    val wrappedInputStream = serializerManager.wrapStream(blockId, inputStream)
    val decryptedBytes = new Array[Byte](1024)
    val len = wrappedInputStream.read(decryptedBytes)
    val decryptedStr = new String(decryptedBytes, 0, len, StandardCharsets.UTF_8)
    assert(decryptedStr === plainStr)
  }
} 
Example 4
Source File: CryptoStreamUtilsSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.security

import java.security.PrivilegedExceptionAction

import org.apache.hadoop.security.{Credentials, UserGroupInformation}

import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
import org.apache.spark.internal.config._
import org.apache.spark.security.CryptoStreamUtils._

class CryptoStreamUtilsSuite extends SparkFunSuite {
  val ugi = UserGroupInformation.createUserForTesting("testuser", Array("testgroup"))

  test("Crypto configuration conversion") {
    val sparkKey1 = s"${SPARK_IO_ENCRYPTION_COMMONS_CONFIG_PREFIX}a.b.c"
    val sparkVal1 = "val1"
    val cryptoKey1 = s"${COMMONS_CRYPTO_CONF_PREFIX}a.b.c"

    val sparkKey2 = SPARK_IO_ENCRYPTION_COMMONS_CONFIG_PREFIX.stripSuffix(".") + "A.b.c"
    val sparkVal2 = "val2"
    val cryptoKey2 = s"${COMMONS_CRYPTO_CONF_PREFIX}A.b.c"
    val conf = new SparkConf()
    conf.set(sparkKey1, sparkVal1)
    conf.set(sparkKey2, sparkVal2)
    val props = CryptoStreamUtils.toCryptoConf(conf)
    assert(props.getProperty(cryptoKey1) === sparkVal1)
    assert(!props.containsKey(cryptoKey2))
  }

  test("Shuffle encryption is disabled by default") {
    ugi.doAs(new PrivilegedExceptionAction[Unit]() {
      override def run(): Unit = {
        val credentials = UserGroupInformation.getCurrentUser.getCredentials()
        val conf = new SparkConf()
        initCredentials(conf, credentials)
        assert(credentials.getSecretKey(SPARK_IO_TOKEN) === null)
      }
    })
  }

  test("Shuffle encryption key length should be 128 by default") {
    ugi.doAs(new PrivilegedExceptionAction[Unit]() {
      override def run(): Unit = {
        val credentials = UserGroupInformation.getCurrentUser.getCredentials()
        val conf = new SparkConf()
        conf.set(IO_ENCRYPTION_ENABLED, true)
        initCredentials(conf, credentials)
        var key = credentials.getSecretKey(SPARK_IO_TOKEN)
        assert(key !== null)
        val actual = key.length * (java.lang.Byte.SIZE)
        assert(actual === 128)
      }
    })
  }

  test("Initial credentials with key length in 256") {
    ugi.doAs(new PrivilegedExceptionAction[Unit]() {
      override def run(): Unit = {
        val credentials = UserGroupInformation.getCurrentUser.getCredentials()
        val conf = new SparkConf()
        conf.set(IO_ENCRYPTION_KEY_SIZE_BITS, 256)
        conf.set(IO_ENCRYPTION_ENABLED, true)
        initCredentials(conf, credentials)
        var key = credentials.getSecretKey(SPARK_IO_TOKEN)
        assert(key !== null)
        val actual = key.length * (java.lang.Byte.SIZE)
        assert(actual === 256)
      }
    })
  }

  test("Initial credentials with invalid key length") {
    ugi.doAs(new PrivilegedExceptionAction[Unit]() {
      override def run(): Unit = {
        val credentials = UserGroupInformation.getCurrentUser.getCredentials()
        val conf = new SparkConf()
        conf.set(IO_ENCRYPTION_KEY_SIZE_BITS, 328)
        conf.set(IO_ENCRYPTION_ENABLED, true)
        val thrown = intercept[IllegalArgumentException] {
          initCredentials(conf, credentials)
        }
      }
    })
  }

  private[this] def initCredentials(conf: SparkConf, credentials: Credentials): Unit = {
    if (conf.get(IO_ENCRYPTION_ENABLED)) {
      SecurityManager.initIOEncryptionKey(conf, credentials)
    }
  }
} 
Example 5
Source File: HBaseCredentialProvider.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.yarn.security

import scala.reflect.runtime.universe
import scala.util.control.NonFatal

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.security.Credentials
import org.apache.hadoop.security.token.{Token, TokenIdentifier}

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging

private[security] class HBaseCredentialProvider extends ServiceCredentialProvider with Logging {

  override def serviceName: String = "hbase"

  override def obtainCredentials(
      hadoopConf: Configuration,
      sparkConf: SparkConf,
      creds: Credentials): Option[Long] = {
    try {
      val mirror = universe.runtimeMirror(getClass.getClassLoader)
      val obtainToken = mirror.classLoader.
        loadClass("org.apache.hadoop.hbase.security.token.TokenUtil").
        getMethod("obtainToken", classOf[Configuration])

      logDebug("Attempting to fetch HBase security token.")
      val token = obtainToken.invoke(null, hbaseConf(hadoopConf))
        .asInstanceOf[Token[_ <: TokenIdentifier]]
      logInfo(s"Get token from HBase: ${token.toString}")
      creds.addToken(token.getService, token)
    } catch {
      case NonFatal(e) =>
        logDebug(s"Failed to get token from service $serviceName", e)
    }

    None
  }

  override def credentialsRequired(hadoopConf: Configuration): Boolean = {
    hbaseConf(hadoopConf).get("hbase.security.authentication") == "kerberos"
  }

  private def hbaseConf(conf: Configuration): Configuration = {
    try {
      val mirror = universe.runtimeMirror(getClass.getClassLoader)
      val confCreate = mirror.classLoader.
        loadClass("org.apache.hadoop.hbase.HBaseConfiguration").
        getMethod("create", classOf[Configuration])
      confCreate.invoke(null, conf).asInstanceOf[Configuration]
    } catch {
      case NonFatal(e) =>
        logDebug("Fail to invoke HBaseConfiguration", e)
        conf
    }
  }
} 
Example 6
Source File: HDFSCredentialProvider.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.yarn.security

import java.io.{ByteArrayInputStream, DataInputStream}

import scala.collection.JavaConverters._

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier
import org.apache.hadoop.mapred.Master
import org.apache.hadoop.security.Credentials

import org.apache.spark.{SparkConf, SparkException}
import org.apache.spark.deploy.yarn.config._
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config._

private[security] class HDFSCredentialProvider extends ServiceCredentialProvider with Logging {
  // Token renewal interval, this value will be set in the first call,
  // if None means no token renewer specified, so cannot get token renewal interval.
  private var tokenRenewalInterval: Option[Long] = null

  override val serviceName: String = "hdfs"

  override def obtainCredentials(
      hadoopConf: Configuration,
      sparkConf: SparkConf,
      creds: Credentials): Option[Long] = {
    // NameNode to access, used to get tokens from different FileSystems
    nnsToAccess(hadoopConf, sparkConf).foreach { dst =>
      val dstFs = dst.getFileSystem(hadoopConf)
      logInfo("getting token for namenode: " + dst)
      dstFs.addDelegationTokens(getTokenRenewer(hadoopConf), creds)
    }

    // Get the token renewal interval if it is not set. It will only be called once.
    if (tokenRenewalInterval == null) {
      tokenRenewalInterval = getTokenRenewalInterval(hadoopConf, sparkConf)
    }

    // Get the time of next renewal.
    tokenRenewalInterval.map { interval =>
      creds.getAllTokens.asScala
        .filter(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
        .map { t =>
          val identifier = new DelegationTokenIdentifier()
          identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier)))
          identifier.getIssueDate + interval
      }.foldLeft(0L)(math.max)
    }
  }

  private def getTokenRenewalInterval(
      hadoopConf: Configuration, sparkConf: SparkConf): Option[Long] = {
    // We cannot use the tokens generated with renewer yarn. Trying to renew
    // those will fail with an access control issue. So create new tokens with the logged in
    // user as renewer.
    sparkConf.get(PRINCIPAL).flatMap { renewer =>
      val creds = new Credentials()
      nnsToAccess(hadoopConf, sparkConf).foreach { dst =>
        val dstFs = dst.getFileSystem(hadoopConf)
        dstFs.addDelegationTokens(renewer, creds)
      }
      val hdfsToken = creds.getAllTokens.asScala
        .find(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND)
      hdfsToken.map { t =>
        val newExpiration = t.renew(hadoopConf)
        val identifier = new DelegationTokenIdentifier()
        identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier)))
        val interval = newExpiration - identifier.getIssueDate
        logInfo(s"Renewal Interval is $interval")
        interval
      }
    }
  }

  private def getTokenRenewer(conf: Configuration): String = {
    val delegTokenRenewer = Master.getMasterPrincipal(conf)
    logDebug("delegation token renewer is: " + delegTokenRenewer)
    if (delegTokenRenewer == null || delegTokenRenewer.length() == 0) {
      val errorMessage = "Can't get Master Kerberos principal for use as renewer"
      logError(errorMessage)
      throw new SparkException(errorMessage)
    }

    delegTokenRenewer
  }

  private def nnsToAccess(hadoopConf: Configuration, sparkConf: SparkConf): Set[Path] = {
    sparkConf.get(NAMENODES_TO_ACCESS).map(new Path(_)).toSet +
      sparkConf.get(STAGING_DIR).map(new Path(_))
        .getOrElse(FileSystem.get(hadoopConf).getHomeDirectory)
  }
} 
Example 7
Source File: HBaseCredentialProvider.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.yarn.security

import scala.reflect.runtime.universe
import scala.util.control.NonFatal

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.security.Credentials
import org.apache.hadoop.security.token.{Token, TokenIdentifier}

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging

private[security] class HBaseCredentialProvider extends ServiceCredentialProvider with Logging {

  override def serviceName: String = "hbase"

  override def obtainCredentials(
      hadoopConf: Configuration,
      sparkConf: SparkConf,
      creds: Credentials): Option[Long] = {
    try {
      val mirror = universe.runtimeMirror(getClass.getClassLoader)
      val obtainToken = mirror.classLoader.
        loadClass("org.apache.hadoop.hbase.security.token.TokenUtil").
        getMethod("obtainToken", classOf[Configuration])

      logDebug("Attempting to fetch HBase security token.")
      val token = obtainToken.invoke(null, hbaseConf(hadoopConf))
        .asInstanceOf[Token[_ <: TokenIdentifier]]
      logInfo(s"Get token from HBase: ${token.toString}")
      creds.addToken(token.getService, token)
    } catch {
      case NonFatal(e) =>
        logDebug(s"Failed to get token from service $serviceName", e)
    }

    None
  }

  override def credentialsRequired(hadoopConf: Configuration): Boolean = {
    hbaseConf(hadoopConf).get("hbase.security.authentication") == "kerberos"
  }

  private def hbaseConf(conf: Configuration): Configuration = {
    try {
      val mirror = universe.runtimeMirror(getClass.getClassLoader)
      val confCreate = mirror.classLoader.
        loadClass("org.apache.hadoop.hbase.HBaseConfiguration").
        getMethod("create", classOf[Configuration])
      confCreate.invoke(null, conf).asInstanceOf[Configuration]
    } catch {
      case NonFatal(e) =>
        logDebug("Fail to invoke HBaseConfiguration", e)
        conf
    }
  }
} 
Example 8
Source File: ExecutorDelegationTokenUpdater.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.yarn

import java.util.concurrent.{Executors, TimeUnit}

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.security.{Credentials, UserGroupInformation}

import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.{Logging, SparkConf}
import org.apache.spark.util.{ThreadUtils, Utils}

import scala.util.control.NonFatal

private[spark] class ExecutorDelegationTokenUpdater(
    sparkConf: SparkConf,
    hadoopConf: Configuration) extends Logging {

  @volatile private var lastCredentialsFileSuffix = 0

  private val credentialsFile = sparkConf.get("spark.yarn.credentials.file")

  private val delegationTokenRenewer =
    Executors.newSingleThreadScheduledExecutor(
      ThreadUtils.namedThreadFactory("Delegation Token Refresh Thread"))

  // On the executor, this thread wakes up and picks up new tokens from HDFS, if any.
  private val executorUpdaterRunnable =
    new Runnable {
      override def run(): Unit = Utils.logUncaughtExceptions(updateCredentialsIfRequired())
    }

  def updateCredentialsIfRequired(): Unit = {
    try {
      val credentialsFilePath = new Path(credentialsFile)
      val remoteFs = FileSystem.get(hadoopConf)
      SparkHadoopUtil.get.listFilesSorted(
        remoteFs, credentialsFilePath.getParent,
        credentialsFilePath.getName, SparkHadoopUtil.SPARK_YARN_CREDS_TEMP_EXTENSION)
        .lastOption.foreach { credentialsStatus =>
        val suffix = SparkHadoopUtil.get.getSuffixForCredentialsPath(credentialsStatus.getPath)
        if (suffix > lastCredentialsFileSuffix) {
          logInfo("Reading new delegation tokens from " + credentialsStatus.getPath)
          val newCredentials = getCredentialsFromHDFSFile(remoteFs, credentialsStatus.getPath)
          lastCredentialsFileSuffix = suffix
          UserGroupInformation.getCurrentUser.addCredentials(newCredentials)
          logInfo("Tokens updated from credentials file.")
        } else {
          // Check every hour to see if new credentials arrived.
          logInfo("Updated delegation tokens were expected, but the driver has not updated the " +
            "tokens yet, will check again in an hour.")
          delegationTokenRenewer.schedule(executorUpdaterRunnable, 1, TimeUnit.HOURS)
          return
        }
      }
      val timeFromNowToRenewal =
        SparkHadoopUtil.get.getTimeFromNowToRenewal(
          sparkConf, 0.8, UserGroupInformation.getCurrentUser.getCredentials)
      if (timeFromNowToRenewal <= 0) {
        executorUpdaterRunnable.run()
      } else {
        logInfo(s"Scheduling token refresh from HDFS in $timeFromNowToRenewal millis.")
        delegationTokenRenewer.schedule(
          executorUpdaterRunnable, timeFromNowToRenewal, TimeUnit.MILLISECONDS)
      }
    } catch {
      // Since the file may get deleted while we are reading it, catch the Exception and come
      // back in an hour to try again
      case NonFatal(e) =>
        logWarning("Error while trying to update credentials, will try again in 1 hour", e)
        delegationTokenRenewer.schedule(executorUpdaterRunnable, 1, TimeUnit.HOURS)
    }
  }

  private def getCredentialsFromHDFSFile(remoteFs: FileSystem, tokenPath: Path): Credentials = {
    val stream = remoteFs.open(tokenPath)
    try {
      val newCredentials = new Credentials()
      newCredentials.readTokenStorageStream(stream)
      newCredentials
    } finally {
      stream.close()
    }
  }

  def stop(): Unit = {
    delegationTokenRenewer.shutdown()
  }

} 
Example 9
Source File: HiveTokenCollector.scala    From kyuubi   with Apache License 2.0 5 votes vote down vote up
package yaooqinn.kyuubi.session.security

import scala.util.control.NonFatal

import org.apache.commons.lang3.StringUtils
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier
import org.apache.hadoop.hive.ql.metadata.Hive
import org.apache.hadoop.io.Text
import org.apache.hadoop.security.{Credentials, UserGroupInformation}
import org.apache.hadoop.security.token.Token
import org.apache.kyuubi.Logging
import org.apache.spark.SparkConf

import yaooqinn.kyuubi.utils.KyuubiHadoopUtil
import yaooqinn.kyuubi.utils.KyuubiHiveUtil._

private[security] object HiveTokenCollector extends TokenCollector with Logging {

  override def obtainTokens(conf: SparkConf): Unit = {
    try {
      val c = hiveConf(conf)
      val principal = c.getTrimmed(METASTORE_PRINCIPAL)
      val uris = c.getTrimmed(URIS)
      require(StringUtils.isNotEmpty(principal), METASTORE_PRINCIPAL + " Undefined")
      require(StringUtils.isNotEmpty(uris), URIS + " Undefined")
      val currentUser = UserGroupInformation.getCurrentUser.getUserName
      val credentials = new Credentials()
      KyuubiHadoopUtil.doAsRealUser {
        val hive = Hive.get(c, true)
        info(s"Getting token from Hive Metastore for owner $currentUser via $principal")
        val tokenString = hive.getDelegationToken(currentUser, principal)
        val token = new Token[DelegationTokenIdentifier]
        token.decodeFromUrlString(tokenString)
        info(s"Got " + DelegationTokenIdentifier.stringifyToken(token))
        credentials.addToken(new Text("hive.metastore.delegation.token"), token)
      }
      UserGroupInformation.getCurrentUser.addCredentials(credentials)
    } catch {
      case NonFatal(e) =>
        error("Failed to get token from hive metatore service", e)
    } finally {
      Hive.closeCurrent()
    }
  }

  override def tokensRequired(conf: SparkConf): Boolean = {
    UserGroupInformation.isSecurityEnabled && StringUtils.isNotBlank(hiveConf(conf).get(URIS))
  }
} 
Example 10
Source File: HDFSTokenCollector.scala    From kyuubi   with Apache License 2.0 5 votes vote down vote up
package yaooqinn.kyuubi.session.security

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.mapred.Master
import org.apache.hadoop.security.{Credentials, UserGroupInformation}
import org.apache.kyuubi.Logging
import org.apache.spark.KyuubiSparkUtil._
import org.apache.spark.SparkConf

import yaooqinn.kyuubi.service.ServiceException


private[security] object HDFSTokenCollector extends TokenCollector with Logging {

  private def hadoopFStoAccess(conf: SparkConf, hadoopConf: Configuration): Set[FileSystem] = {
    val fileSystems = conf.getOption(ACCESS_FSS)
      .orElse(conf.getOption(ACCESS_NNS)) match {
      case Some(nns) => nns.split(",").map(new Path(_).getFileSystem(hadoopConf)).toSet
      case _ => Set.empty[FileSystem]
    }

    fileSystems +
      conf.getOption(STAGING_DIR).map(new Path(_).getFileSystem(hadoopConf))
        .getOrElse(FileSystem.get(hadoopConf))
  }

  private def renewer(hadoopConf: Configuration): String = {
    val tokenRenewer = Master.getMasterPrincipal(hadoopConf)
    debug("Delegation token renewer is: " + tokenRenewer)

    if (tokenRenewer == null || tokenRenewer.length() == 0) {
      val errorMessage = "Can't get Master Kerberos principal for use as renewer."
      error(errorMessage)
      throw new ServiceException(errorMessage)
    }
    tokenRenewer
  }

  override def obtainTokens(conf: SparkConf): Unit = try {
    val hadoopConf = newConfiguration(conf)
    val tokenRenewer = renewer(hadoopConf)
    val creds = new Credentials()
    hadoopFStoAccess(conf, hadoopConf).foreach { fs =>
      fs.addDelegationTokens(tokenRenewer, creds)
    }
    UserGroupInformation.getCurrentUser.addCredentials(creds)
  } catch {
    case e: Exception =>
      error("Failed to obtain HDFS tokens", e)
  }
} 
Example 11
Source File: ExecutorDelegationTokenUpdater.scala    From spark1.52   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.yarn

import java.util.concurrent.{Executors, TimeUnit}

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.security.{Credentials, UserGroupInformation}

import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.{Logging, SparkConf}
import org.apache.spark.util.{ThreadUtils, Utils}

import scala.util.control.NonFatal

private[spark] class ExecutorDelegationTokenUpdater(
    sparkConf: SparkConf,
    hadoopConf: Configuration) extends Logging {

  @volatile private var lastCredentialsFileSuffix = 0

  private val credentialsFile = sparkConf.get("spark.yarn.credentials.file")
  private val freshHadoopConf =
    SparkHadoopUtil.get.getConfBypassingFSCache(
      hadoopConf, new Path(credentialsFile).toUri.getScheme)

  private val delegationTokenRenewer =
    Executors.newSingleThreadScheduledExecutor(
      ThreadUtils.namedThreadFactory("Delegation Token Refresh Thread"))

  // On the executor, this thread wakes up and picks up new tokens from HDFS, if any.
  //在执行程序中,该线程唤醒并从HDFS中获取新令牌(如果有的话)
  private val executorUpdaterRunnable =
    new Runnable {
      override def run(): Unit = Utils.logUncaughtExceptions(updateCredentialsIfRequired())
    }

  def updateCredentialsIfRequired(): Unit = {
    try {
      val credentialsFilePath = new Path(credentialsFile)
      val remoteFs = FileSystem.get(freshHadoopConf)
      SparkHadoopUtil.get.listFilesSorted(
        remoteFs, credentialsFilePath.getParent,
        credentialsFilePath.getName, SparkHadoopUtil.SPARK_YARN_CREDS_TEMP_EXTENSION)
        .lastOption.foreach { credentialsStatus =>
        val suffix = SparkHadoopUtil.get.getSuffixForCredentialsPath(credentialsStatus.getPath)
        if (suffix > lastCredentialsFileSuffix) {
          logInfo("Reading new delegation tokens from " + credentialsStatus.getPath)
          val newCredentials = getCredentialsFromHDFSFile(remoteFs, credentialsStatus.getPath)
          lastCredentialsFileSuffix = suffix
          UserGroupInformation.getCurrentUser.addCredentials(newCredentials)
          logInfo("Tokens updated from credentials file.")
        } else {
          // Check every hour to see if new credentials arrived.
          logInfo("Updated delegation tokens were expected, but the driver has not updated the " +
            "tokens yet, will check again in an hour.")
          delegationTokenRenewer.schedule(executorUpdaterRunnable, 1, TimeUnit.HOURS)
          return
        }
      }
      val timeFromNowToRenewal =
        SparkHadoopUtil.get.getTimeFromNowToRenewal(
          sparkConf, 0.8, UserGroupInformation.getCurrentUser.getCredentials)
      if (timeFromNowToRenewal <= 0) {
        executorUpdaterRunnable.run()
      } else {
        logInfo(s"Scheduling token refresh from HDFS in $timeFromNowToRenewal millis.")
        delegationTokenRenewer.schedule(
          executorUpdaterRunnable, timeFromNowToRenewal, TimeUnit.MILLISECONDS)
      }
    } catch {
      // Since the file may get deleted while we are reading it, catch the Exception and come
      // back in an hour to try again
      case NonFatal(e) =>
        logWarning("Error while trying to update credentials, will try again in 1 hour", e)
        delegationTokenRenewer.schedule(executorUpdaterRunnable, 1, TimeUnit.HOURS)
    }
  }

  private def getCredentialsFromHDFSFile(remoteFs: FileSystem, tokenPath: Path): Credentials = {
    val stream = remoteFs.open(tokenPath)
    try {
      val newCredentials = new Credentials()
      newCredentials.readTokenStorageStream(stream)
      newCredentials
    } finally {
      stream.close()
    }
  }

  def stop(): Unit = {
    delegationTokenRenewer.shutdown()
  }

} 
Example 12
Source File: YARNHadoopDelegationTokenManager.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.yarn.security

import java.util.ServiceLoader

import scala.collection.JavaConverters._

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.security.Credentials

import org.apache.spark.SparkConf
import org.apache.spark.deploy.security.HadoopDelegationTokenManager
import org.apache.spark.internal.Logging
import org.apache.spark.util.Utils


  def obtainDelegationTokens(hadoopConf: Configuration, creds: Credentials): Long = {
    val superInterval = delegationTokenManager.obtainDelegationTokens(hadoopConf, creds)

    credentialProviders.values.flatMap { provider =>
      if (provider.credentialsRequired(hadoopConf)) {
        provider.obtainCredentials(hadoopConf, sparkConf, creds)
      } else {
        logDebug(s"Service ${provider.serviceName} does not require a token." +
          s" Check your configuration to see if security is disabled or not.")
        None
      }
    }.foldLeft(superInterval)(math.min)
  }

  private def getCredentialProviders: Map[String, ServiceCredentialProvider] = {
    val providers = loadCredentialProviders

    providers.
      filter { p => delegationTokenManager.isServiceEnabled(p.serviceName) }
      .map { p => (p.serviceName, p) }
      .toMap
  }

  private def loadCredentialProviders: List[ServiceCredentialProvider] = {
    ServiceLoader.load(classOf[ServiceCredentialProvider], Utils.getContextOrSparkClassLoader)
      .asScala
      .toList
  }
} 
Example 13
Source File: YARNHadoopDelegationTokenManagerSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.yarn.security

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.security.Credentials
import org.scalatest.Matchers

import org.apache.spark.{SparkConf, SparkFunSuite}
import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil

class YARNHadoopDelegationTokenManagerSuite extends SparkFunSuite with Matchers {
  private var credentialManager: YARNHadoopDelegationTokenManager = null
  private var sparkConf: SparkConf = null
  private var hadoopConf: Configuration = null

  override def beforeAll(): Unit = {
    super.beforeAll()
    sparkConf = new SparkConf()
    hadoopConf = new Configuration()
  }

  test("Correctly loads credential providers") {
    credentialManager = new YARNHadoopDelegationTokenManager(
      sparkConf,
      hadoopConf,
      conf => YarnSparkHadoopUtil.hadoopFSsToAccess(sparkConf, conf))

    credentialManager.credentialProviders.get("yarn-test") should not be (None)
  }
}

class YARNTestCredentialProvider extends ServiceCredentialProvider {
  override def serviceName: String = "yarn-test"

  override def credentialsRequired(conf: Configuration): Boolean = true

  override def obtainCredentials(
      hadoopConf: Configuration,
      sparkConf: SparkConf,
      creds: Credentials): Option[Long] = None
} 
Example 14
Source File: HBaseDelegationTokenProvider.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.security

import scala.reflect.runtime.universe
import scala.util.control.NonFatal

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.security.Credentials
import org.apache.hadoop.security.token.{Token, TokenIdentifier}

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.util.Utils

private[security] class HBaseDelegationTokenProvider
  extends HadoopDelegationTokenProvider with Logging {

  override def serviceName: String = "hbase"

  override def obtainDelegationTokens(
      hadoopConf: Configuration,
      sparkConf: SparkConf,
      creds: Credentials): Option[Long] = {
    try {
      val mirror = universe.runtimeMirror(Utils.getContextOrSparkClassLoader)
      val obtainToken = mirror.classLoader.
        loadClass("org.apache.hadoop.hbase.security.token.TokenUtil").
        getMethod("obtainToken", classOf[Configuration])

      logDebug("Attempting to fetch HBase security token.")
      val token = obtainToken.invoke(null, hbaseConf(hadoopConf))
        .asInstanceOf[Token[_ <: TokenIdentifier]]
      logInfo(s"Get token from HBase: ${token.toString}")
      creds.addToken(token.getService, token)
    } catch {
      case NonFatal(e) =>
        logDebug(s"Failed to get token from service $serviceName", e)
    }

    None
  }

  override def delegationTokensRequired(
      sparkConf: SparkConf,
      hadoopConf: Configuration): Boolean = {
    hbaseConf(hadoopConf).get("hbase.security.authentication") == "kerberos"
  }

  private def hbaseConf(conf: Configuration): Configuration = {
    try {
      val mirror = universe.runtimeMirror(Utils.getContextOrSparkClassLoader)
      val confCreate = mirror.classLoader.
        loadClass("org.apache.hadoop.hbase.HBaseConfiguration").
        getMethod("create", classOf[Configuration])
      confCreate.invoke(null, conf).asInstanceOf[Configuration]
    } catch {
      case NonFatal(e) =>
        logDebug("Fail to invoke HBaseConfiguration", e)
        conf
    }
  }
} 
Example 15
Source File: ExecutorDelegationTokenUpdater.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.yarn

import java.util.concurrent.{Executors, TimeUnit}

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.security.{Credentials, UserGroupInformation}

import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.{Logging, SparkConf}
import org.apache.spark.util.{ThreadUtils, Utils}

import scala.util.control.NonFatal

private[spark] class ExecutorDelegationTokenUpdater(
    sparkConf: SparkConf,
    hadoopConf: Configuration) extends Logging {

  @volatile private var lastCredentialsFileSuffix = 0

  private val credentialsFile = sparkConf.get("spark.yarn.credentials.file")
  private val freshHadoopConf =
    SparkHadoopUtil.get.getConfBypassingFSCache(
      hadoopConf, new Path(credentialsFile).toUri.getScheme)

  private val delegationTokenRenewer =
    Executors.newSingleThreadScheduledExecutor(
      ThreadUtils.namedThreadFactory("Delegation Token Refresh Thread"))

  // On the executor, this thread wakes up and picks up new tokens from HDFS, if any.
  private val executorUpdaterRunnable =
    new Runnable {
      override def run(): Unit = Utils.logUncaughtExceptions(updateCredentialsIfRequired())
    }

  def updateCredentialsIfRequired(): Unit = {
    try {
      val credentialsFilePath = new Path(credentialsFile)
      val remoteFs = FileSystem.get(freshHadoopConf)
      SparkHadoopUtil.get.listFilesSorted(
        remoteFs, credentialsFilePath.getParent,
        credentialsFilePath.getName, SparkHadoopUtil.SPARK_YARN_CREDS_TEMP_EXTENSION)
        .lastOption.foreach { credentialsStatus =>
        val suffix = SparkHadoopUtil.get.getSuffixForCredentialsPath(credentialsStatus.getPath)
        if (suffix > lastCredentialsFileSuffix) {
          logInfo("Reading new delegation tokens from " + credentialsStatus.getPath)
          val newCredentials = getCredentialsFromHDFSFile(remoteFs, credentialsStatus.getPath)
          lastCredentialsFileSuffix = suffix
          UserGroupInformation.getCurrentUser.addCredentials(newCredentials)
          logInfo("Tokens updated from credentials file.")
        } else {
          // Check every hour to see if new credentials arrived.
          logInfo("Updated delegation tokens were expected, but the driver has not updated the " +
            "tokens yet, will check again in an hour.")
          delegationTokenRenewer.schedule(executorUpdaterRunnable, 1, TimeUnit.HOURS)
          return
        }
      }
      val timeFromNowToRenewal =
        SparkHadoopUtil.get.getTimeFromNowToRenewal(
          sparkConf, 0.8, UserGroupInformation.getCurrentUser.getCredentials)
      if (timeFromNowToRenewal <= 0) {
        // We just checked for new credentials but none were there, wait a minute and retry.
        // This handles the shutdown case where the staging directory may have been removed(see
        // SPARK-12316 for more details).
        delegationTokenRenewer.schedule(executorUpdaterRunnable, 1, TimeUnit.MINUTES)
      } else {
        logInfo(s"Scheduling token refresh from HDFS in $timeFromNowToRenewal millis.")
        delegationTokenRenewer.schedule(
          executorUpdaterRunnable, timeFromNowToRenewal, TimeUnit.MILLISECONDS)
      }
    } catch {
      // Since the file may get deleted while we are reading it, catch the Exception and come
      // back in an hour to try again
      case NonFatal(e) =>
        logWarning("Error while trying to update credentials, will try again in 1 hour", e)
        delegationTokenRenewer.schedule(executorUpdaterRunnable, 1, TimeUnit.HOURS)
    }
  }

  private def getCredentialsFromHDFSFile(remoteFs: FileSystem, tokenPath: Path): Credentials = {
    val stream = remoteFs.open(tokenPath)
    try {
      val newCredentials = new Credentials()
      newCredentials.readTokenStorageStream(stream)
      newCredentials
    } finally {
      stream.close()
    }
  }

  def stop(): Unit = {
    delegationTokenRenewer.shutdown()
  }

}