org.apache.spark.scheduler.SparkListener Scala Examples

The following examples show how to use org.apache.spark.scheduler.SparkListener. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: CleanupUtil.scala    From hazelcast-spark   with Apache License 2.0 7 votes vote down vote up
package com.hazelcast.spark.connector.util

import com.hazelcast.spark.connector.util.ConnectionUtil.closeAll
import org.apache.spark.SparkContext
import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd, SparkListenerJobStart}

object CleanupUtil {

  val jobIds: collection.mutable.Map[Int, Seq[Int]] = collection.mutable.Map[Int, Seq[Int]]()
  val cleanupJobRddName: String = "HazelcastResourceCleanupJob"

  def addCleanupListener(sc: SparkContext): Unit = {
    sc.addSparkListener(new SparkListener {
      override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
        this.synchronized {
          jobStart.stageInfos.foreach(info => {
            info.rddInfos.foreach(rdd => {
              if (!cleanupJobRddName.equals(rdd.name)) {
                val ids: Seq[Int] = info.rddInfos.map(_.id)
                val maybeIds: Option[Seq[Int]] = jobIds.get(jobStart.jobId)
                if (maybeIds.isDefined) {
                  jobIds.put(jobStart.jobId, ids ++ maybeIds.get)
                } else {
                  jobIds.put(jobStart.jobId, ids)
                }
              }
            })
          })
        }
      }

      override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
        this.synchronized {
          if (jobIds.contains(jobEnd.jobId)) {
            try {
              val workers = sc.getConf.getInt("spark.executor.instances", sc.getExecutorStorageStatus.length)
              val rddId: Option[Seq[Int]] = jobIds.get(jobEnd.jobId)
              if (rddId.isDefined) {
                sc.parallelize(1 to workers, workers).setName(cleanupJobRddName).foreachPartition(it ⇒ closeAll(rddId.get))
              }
              jobIds -= jobEnd.jobId
            } catch {
              case e: Exception =>
            }
          }
        }
      }
    })
  }


} 
Example 2
Source File: OapListener.scala    From OAP   with Apache License 2.0 6 votes vote down vote up
package org.apache.spark.sql.oap.listener

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent}
import org.apache.spark.sql.oap.OapRuntime

@DeveloperApi
case class SparkListenerCustomInfoUpdate(
    hostName: String,
    executorId: String,
    clazzName: String,
    customizedInfo: String) extends SparkListenerEvent {
  override def logEvent: Boolean = false
}

class OapListener extends SparkListener {
  override def onOtherEvent(event: SparkListenerEvent): Unit = event match {
    case customInfo: SparkListenerCustomInfoUpdate =>
      if (customInfo.clazzName.contains("OapFiberCacheHeartBeatMessager")) {
        OapRuntime.getOrCreate.fiberSensor.updateLocations(customInfo)
      } else if (customInfo.clazzName.contains("FiberCacheManagerMessager")) {
        OapRuntime.getOrCreate.fiberSensor.updateMetrics(customInfo)
      }
    case _ =>
  }
} 
Example 3
Source File: ProfilerListener.scala    From carbondata   with Apache License 2.0 6 votes vote down vote up
package org.apache.spark.sql.profiler

import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListenerTaskEnd, SparkListenerTaskGettingResult, SparkListenerTaskStart}
import org.apache.spark.sql.execution.ui.{SparkListenerSQLExecutionEnd, SparkListenerSQLExecutionStart}


private[profiler] class ProfilerListener extends SparkListener {
  override def onOtherEvent(event: SparkListenerEvent): Unit = {
    Profiler.invokeIfEnable {
      event match {
        case executionStart: SparkListenerSQLExecutionStart =>
          Profiler.addExecutionMessage(
            executionStart.executionId,
            ExecutionStart(
              executionStart.executionId,
              executionStart.time,
              executionStart.physicalPlanDescription
            ))
        case executionEnd: SparkListenerSQLExecutionEnd =>
          Profiler.send(
            ExecutionEnd(
              executionEnd.executionId,
              executionEnd.time
            )
          )
        case _ =>
      }
    }
  }
} 
Example 4
Source File: OutputMetricsTest.scala    From memsql-spark-connector   with Apache License 2.0 5 votes vote down vote up
package com.memsql.spark

import com.github.mrpowers.spark.daria.sql.SparkSessionExt._
import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
import org.apache.spark.sql.types.{IntegerType, StringType}

class OutputMetricsTest extends IntegrationSuiteBase {
  it("records written") {
    var outputWritten = 0L
    spark.sparkContext.addSparkListener(new SparkListener() {
      override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
        val metrics = taskEnd.taskMetrics
        outputWritten += metrics.outputMetrics.recordsWritten
      }
    })

    val numRows = 100000
    val df1 = spark.createDF(
      List.range(0, numRows),
      List(("id", IntegerType, true))
    )

    df1.repartition(30)

    df1.write
      .format("memsql")
      .save("metricsInts")

    assert(outputWritten == numRows)
    outputWritten = 0

    val df2 = spark.createDF(
      List("st1", "", null),
      List(("st", StringType, true))
    )

    df2.write
      .format("memsql")
      .save("metricsStrings")

    assert(outputWritten == 3)
  }
} 
Example 5
Source File: JobProcessor.scala    From sparkplug   with MIT License 5 votes vote down vote up
package springnz.sparkplug.executor

import akka.actor._
import org.apache.spark.SparkContext
import org.apache.spark.scheduler.SparkListener
import springnz.sparkplug.core.SparkPlugin
import springnz.sparkplug.executor.InternalMessageTypes.RoutedRequest
import springnz.sparkplug.executor.MessageTypes.{ JobFailure, JobRequest, JobSuccess }
import springnz.sparkplug.util.Logging

import scala.concurrent.Future
import scala.util._

object JobProcessor {
  // Uses it's own dispatcher for bulkheading
  def props(implicit sparkContext: SparkContext): Props = Props(new JobProcessor).withDispatcher("akka.sparkplug-job-dispatcher")
}

class JobProcessor(implicit sparkContext: SparkContext) extends Actor with Logging {

  def executeJob(job: JobRequest, originator: ActorRef) = {
    val factoryName = job.factoryClassName

    implicit val ec = context.dispatcher
    val f: Future[Any] = Future {
      log.info(s"Loading and instantiating job '$factoryName'.")
      val factoryAny = Class.forName(factoryName).newInstance()
      val operation = factoryAny.asInstanceOf[SparkPlugin].apply(job.data)

      log.info(s"Executing job '$factoryName'.")
      val result = operation.run(sparkContext)
      log.info(s"Job '$factoryName' finished.")

      // TODO: do something about the SparkListener (not sure what)
      val listener = new SparkListener {}
      sparkContext.addSparkListener(listener)

      result
    }
    f.onComplete {
      case Success(result) ⇒
        log.info(s"Successfully processed job $job. Notifying client.")
        log.debug(s"Job result: $result")
        originator ! JobSuccess(job, result)

      case Failure(reason) ⇒
        log.error(s"Job processing of job $job failed. Notifying client", reason)
        originator ! JobFailure(job, reason)
    }
  }

  def receive = {
    case RoutedRequest(job, originator: ActorRef) ⇒
      log.info(s"JobProcessor ${self.path.toString} routed request to process job ${job}")
      executeJob(job, originator)
  }
} 
Example 6
Source File: PulsarContinuousTest.scala    From pulsar-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.pulsar

import java.util.concurrent.atomic.AtomicInteger

import scala.language.reflectiveCalls

import org.apache.spark.SparkContext
import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd, SparkListenerTaskStart}
import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
import org.apache.spark.sql.streaming.Trigger
import org.apache.spark.sql.test.TestSparkSession

trait PulsarContinuousTest extends PulsarSourceTest {

  override val defaultTrigger = Trigger.Continuous(1000)
  override val defaultUseV2Sink = true

  // We need more than the default local[2] to be able to schedule all partitions simultaneously.
  override protected def createSparkSession =
    new TestSparkSession(
      new SparkContext(
        "local[10]",
        "continuous-stream-test-sql-context",
        sparkConf.set("spark.sql.testkey", "true")))

  // Continuous processing tasks end asynchronously, so test that they actually end.
  private val tasksEndedListener = new SparkListener() {
    val activeTaskIdCount = new AtomicInteger(0)

    override def onTaskStart(start: SparkListenerTaskStart): Unit = {
      activeTaskIdCount.incrementAndGet()
    }

    override def onTaskEnd(end: SparkListenerTaskEnd): Unit = {
      activeTaskIdCount.decrementAndGet()
    }
  }

  override def beforeEach(): Unit = {
    super.beforeEach()
    spark.sparkContext.addSparkListener(tasksEndedListener)
  }

  override def afterEach(): Unit = {
    eventually(timeout(streamingTimeout)) {
      assert(tasksEndedListener.activeTaskIdCount.get() == 0)
    }
    spark.sparkContext.removeSparkListener(tasksEndedListener)
    super.afterEach()
  }

  test("ensure continuous stream is being used") {
    val query = spark.readStream
      .format("rate")
      .option("numPartitions", "1")
      .option("rowsPerSecond", "1")
      .load()

    testStream(query)(
      Execute(q => assert(q.isInstanceOf[ContinuousExecution]))
    )
  }
} 
Example 7
Source File: QueryGuardListener.scala    From gimel   with Apache License 2.0 5 votes vote down vote up
package com.paypal.gimel.common.query.guard

import java.util.concurrent.atomic.AtomicBoolean

import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd, SparkListenerJobStart, SparkListenerStageCompleted}
import org.apache.spark.sql.SparkSession
import org.joda.time.{DateTime, Instant}

import com.paypal.gimel.common.conf.{QueryGuardConfigs, QueryGuardConstants}
import com.paypal.gimel.common.utilities.GenericUtils
import com.paypal.gimel.logger.Logger

class QueryGuardListener[E >: QueryGuardDelayedEvent](spark: SparkSession,
                                                      discoveryType: String =
                                                        "job")
    extends SparkListener
    with Producer[E] {
  private val logger = new Logger(this.getClass.getName)
  private val stopped = new AtomicBoolean(true)
  private val HEADER: String = "[DISCOVERY] "
  private var _consumers: Seq[Consumer[E]] = Seq.empty

  override def onJobStart(jobStart: SparkListenerJobStart) {
    logger.info(
      s"${HEADER}Job[${jobStart.jobId}] started with ${jobStart.stageInfos.size} stages @ ${Instant.now()}"
    )
    if (!stopped.get) {
      val job = JobSubmitted(
        jobStart.jobId,
        discoveryType,
        System.currentTimeMillis(),
        jobTtl = GenericUtils.getValue(
          spark.conf,
          QueryGuardConfigs.JOB_TTL,
          QueryGuardConstants.DEFAULT_JOB_TTL
        ),
        delayTtl = GenericUtils.getValue(
          spark.conf,
          QueryGuardConfigs.DELAY_TTL,
          QueryGuardConstants.DEFAULT_DELAY_TTL
        )
      )
      logger.info(
        s"${HEADER}Proceeding to queue in Job[${jobStart.jobId}] onto QueryGuard"
      )
      publish(job)
    } else {
      logger.info(
        s"${HEADER}As QueryGuardListener is ${stopped.get()}," +
          s" unable to queue in Job[${jobStart.jobId}]"
      )
    }
  }

  override def publish(queryGuardEvent: E): Unit = {
    for (consumer <- _consumers) {
      consumer.consume(queryGuardEvent)
    }
  }

  override def onStageCompleted(
    stageCompleted: SparkListenerStageCompleted
  ): Unit = {
    logger.info(
      s"Stage ${stageCompleted.stageInfo.stageId} completed with ${stageCompleted.stageInfo.numTasks} tasks."
    )
  }

  override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
    logger.info(
      s"Job[${jobEnd.jobId}] completed at ${new DateTime(jobEnd.time)}" +
        s" with result -> ${jobEnd.jobResult}"
    )
    super.onJobEnd(jobEnd)
  }

  override def registerConsumers(consumers: Seq[Consumer[E]]): Unit = {
    _consumers = consumers
  }

  def start(): Unit = {
    // toggle stopped to true
    stopped.set(false)
    logger.info(s"${HEADER}Started QueryGuardListener: $stopped")
  }

  def stop(): Unit = {
    // toggle stopped to true
    stopped.compareAndSet(false, true)
    logger.info(s"${HEADER}Stopped QueryGuardListener: $stopped")
  }
} 
Example 8
Source File: package.scala    From infinispan-spark   with Apache License 2.0 5 votes vote down vote up
package org.infinispan.spark

import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd}
import org.apache.spark.streaming.dstream.DStream
import org.infinispan.client.hotrod.RemoteCacheManager
import org.infinispan.spark.config.ConnectorConfiguration
import org.infinispan.spark.rdd.RemoteCacheManagerBuilder

package object stream {

   implicit class InfinispanDStream[K, V](stream: DStream[(K, V)]) {

      private def getCacheManager(configuration: ConnectorConfiguration): RemoteCacheManager = {
         val rcm = RemoteCacheManagerBuilder.create(configuration)
         stream.context.sparkContext.addSparkListener(new SparkListener {
            override def onJobEnd(jobEnd: SparkListenerJobEnd) = rcm.stop()
         })
         rcm
      }

      def writeToInfinispan(configuration: ConnectorConfiguration) = {
         val rcm = getCacheManager(configuration)
         val cache = getCache(configuration, rcm)
         val topologyConfig = getCacheTopology(cache.getCacheTopologyInfo)
         configuration.setServerList(topologyConfig)
         stream.foreachRDD(_.writeToInfinispan(configuration))
      }
   }

} 
Example 9
Source File: LogUrlsStandaloneSuite.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy

import java.net.URL

import scala.collection.mutable
import scala.io.Source

import org.apache.spark.scheduler.cluster.ExecutorInfo
import org.apache.spark.scheduler.{SparkListenerExecutorAdded, SparkListener}
import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite}
import org.apache.spark.util.SparkConfWithEnv

class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {

  
  private val WAIT_TIMEOUT_MILLIS = 10000

  test("verify that correct log urls get propagated from workers") {
    sc = new SparkContext("local-cluster[2,1,1024]", "test")

    val listener = new SaveExecutorInfo
    sc.addSparkListener(listener)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      // Browse to each URL to check that it's valid
      info.logUrlMap.foreach { case (logType, logUrl) =>
        val html = Source.fromURL(logUrl).mkString
        assert(html.contains(s"$logType log page"))
      }
    }
  }

  test("verify that log urls reflect SPARK_PUBLIC_DNS (SPARK-6175)") {
    val SPARK_PUBLIC_DNS = "public_dns"
    val conf = new SparkConfWithEnv(Map("SPARK_PUBLIC_DNS" -> SPARK_PUBLIC_DNS)).set(
      "spark.extraListeners", classOf[SaveExecutorInfo].getName)
    sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    val listeners = sc.listenerBus.findListenersByClass[SaveExecutorInfo]
    assert(listeners.size === 1)
    val listener = listeners(0)
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      info.logUrlMap.values.foreach { logUrl =>
        assert(new URL(logUrl).getHost === SPARK_PUBLIC_DNS)
      }
    }
  }
}

private[spark] class SaveExecutorInfo extends SparkListener {
  val addedExecutorInfos = mutable.Map[String, ExecutorInfo]()

  override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
    addedExecutorInfos(executor.executorId) = executor.executorInfo
  }
} 
Example 10
Source File: KinesisContinuousTest.scala    From kinesis-sql   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kinesis

import java.util.concurrent.atomic.AtomicInteger

import org.scalatest.time.SpanSugar._

import org.apache.spark.SparkContext
import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd, SparkListenerTaskStart}
import org.apache.spark.sql.streaming.Trigger
import org.apache.spark.sql.test.TestSparkSession

trait KinesisContinuousTest extends KinesisSourceTest{
  override val defaultTrigger = Trigger.Continuous("1 hour")
  override val defaultUseV2Sink = true

  override val streamingTimeout = 120.seconds

  override protected def createSparkSession = new TestSparkSession(
    new SparkContext(
      "local[10]",
      "continuous-stream-test-sql-context",
      sparkConf.set("spark.sql.testkey", "true")))

  // Continuous processing tasks end asynchronously, so test that they actually end.
  private val tasksEndedListener = new SparkListener() {
    val activeTaskIdCount = new AtomicInteger(0)

    override def onTaskStart(start: SparkListenerTaskStart): Unit = {
      activeTaskIdCount.incrementAndGet()
    }

    override def onTaskEnd(end: SparkListenerTaskEnd): Unit = {
      activeTaskIdCount.decrementAndGet()
    }
  }

  override def beforeEach(): Unit = {
    super.beforeEach()
    spark.sparkContext.addSparkListener(tasksEndedListener)
  }

  override def afterEach(): Unit = {
    eventually(timeout(streamingTimeout)) {
      assert(tasksEndedListener.activeTaskIdCount.get() == 0)
    }
    spark.sparkContext.removeSparkListener(tasksEndedListener)
    super.afterEach()
  }

} 
Example 11
Source File: LogUrlsStandaloneSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy

import java.net.URL

import scala.collection.mutable
import scala.io.Source

import org.apache.spark.{LocalSparkContext, SparkContext, SparkFunSuite}
import org.apache.spark.scheduler.{SparkListener, SparkListenerExecutorAdded}
import org.apache.spark.scheduler.cluster.ExecutorInfo
import org.apache.spark.util.SparkConfWithEnv

class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {

  
  private val WAIT_TIMEOUT_MILLIS = 10000

  test("verify that correct log urls get propagated from workers") {
    sc = new SparkContext("local-cluster[2,1,1024]", "test")

    val listener = new SaveExecutorInfo
    sc.addSparkListener(listener)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      // Browse to each URL to check that it's valid
      info.logUrlMap.foreach { case (logType, logUrl) =>
        val html = Source.fromURL(logUrl).mkString
        assert(html.contains(s"$logType log page"))
      }
    }
  }

  test("verify that log urls reflect SPARK_PUBLIC_DNS (SPARK-6175)") {
    val SPARK_PUBLIC_DNS = "public_dns"
    val conf = new SparkConfWithEnv(Map("SPARK_PUBLIC_DNS" -> SPARK_PUBLIC_DNS)).set(
      "spark.extraListeners", classOf[SaveExecutorInfo].getName)
    sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    val listeners = sc.listenerBus.findListenersByClass[SaveExecutorInfo]
    assert(listeners.size === 1)
    val listener = listeners(0)
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      info.logUrlMap.values.foreach { logUrl =>
        assert(new URL(logUrl).getHost === SPARK_PUBLIC_DNS)
      }
    }
  }
}

private[spark] class SaveExecutorInfo extends SparkListener {
  val addedExecutorInfos = mutable.Map[String, ExecutorInfo]()

  override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
    addedExecutorInfos(executor.executorId) = executor.executorInfo
  }
} 
Example 12
Source File: SQLHistoryServerPlugin.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.ui

import org.apache.spark.SparkConf
import org.apache.spark.scheduler.SparkListener
import org.apache.spark.status.{AppHistoryServerPlugin, ElementTrackingStore}
import org.apache.spark.ui.SparkUI

class SQLHistoryServerPlugin extends AppHistoryServerPlugin {
  override def createListeners(conf: SparkConf, store: ElementTrackingStore): Seq[SparkListener] = {
    Seq(new SQLAppStatusListener(conf, store, live = false))
  }

  override def setupUI(ui: SparkUI): Unit = {
    val sqlStatusStore = new SQLAppStatusStore(ui.store.store)
    if (sqlStatusStore.executionsCount() > 0) {
      new SQLTab(sqlStatusStore, ui)
    }
  }
} 
Example 13
Source File: KafkaContinuousTest.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.kafka010

import java.util.concurrent.atomic.AtomicInteger

import org.apache.spark.SparkContext
import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd, SparkListenerTaskStart}
import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
import org.apache.spark.sql.execution.streaming.StreamExecution
import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
import org.apache.spark.sql.streaming.Trigger
import org.apache.spark.sql.test.TestSparkSession

// Trait to configure StreamTest for kafka continuous execution tests.
trait KafkaContinuousTest extends KafkaSourceTest {
  override val defaultTrigger = Trigger.Continuous(1000)
  override val defaultUseV2Sink = true

  // We need more than the default local[2] to be able to schedule all partitions simultaneously.
  override protected def createSparkSession = new TestSparkSession(
    new SparkContext(
      "local[10]",
      "continuous-stream-test-sql-context",
      sparkConf.set("spark.sql.testkey", "true")))

  // In addition to setting the partitions in Kafka, we have to wait until the query has
  // reconfigured to the new count so the test framework can hook in properly.
  override protected def setTopicPartitions(
      topic: String, newCount: Int, query: StreamExecution) = {
    testUtils.addPartitions(topic, newCount)
    eventually(timeout(streamingTimeout)) {
      assert(
        query.lastExecution.logical.collectFirst {
          case DataSourceV2Relation(_, r: KafkaContinuousReader) => r
        }.exists(_.knownPartitions.size == newCount),
        s"query never reconfigured to $newCount partitions")
    }
  }

  // Continuous processing tasks end asynchronously, so test that they actually end.
  private val tasksEndedListener = new SparkListener() {
    val activeTaskIdCount = new AtomicInteger(0)

    override def onTaskStart(start: SparkListenerTaskStart): Unit = {
      activeTaskIdCount.incrementAndGet()
    }

    override def onTaskEnd(end: SparkListenerTaskEnd): Unit = {
      activeTaskIdCount.decrementAndGet()
    }
  }

  override def beforeEach(): Unit = {
    super.beforeEach()
    spark.sparkContext.addSparkListener(tasksEndedListener)
  }

  override def afterEach(): Unit = {
    eventually(timeout(streamingTimeout)) {
      assert(tasksEndedListener.activeTaskIdCount.get() == 0)
    }
    spark.sparkContext.removeSparkListener(tasksEndedListener)
    super.afterEach()
  }


  test("ensure continuous stream is being used") {
    val query = spark.readStream
      .format("rate")
      .option("numPartitions", "1")
      .option("rowsPerSecond", "1")
      .load()

    testStream(query)(
      Execute(q => assert(q.isInstanceOf[ContinuousExecution]))
    )
  }
} 
Example 14
Source File: LogUrlsStandaloneSuite.scala    From iolap   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy

import java.net.URL

import scala.collection.JavaConversions._
import scala.collection.mutable
import scala.io.Source

import org.apache.spark.scheduler.cluster.ExecutorInfo
import org.apache.spark.scheduler.{SparkListenerExecutorAdded, SparkListener}
import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite}

class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {

  
  private val WAIT_TIMEOUT_MILLIS = 10000

  test("verify that correct log urls get propagated from workers") {
    sc = new SparkContext("local-cluster[2,1,512]", "test")

    val listener = new SaveExecutorInfo
    sc.addSparkListener(listener)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      // Browse to each URL to check that it's valid
      info.logUrlMap.foreach { case (logType, logUrl) =>
        val html = Source.fromURL(logUrl).mkString
        assert(html.contains(s"$logType log page"))
      }
    }
  }

  test("verify that log urls reflect SPARK_PUBLIC_DNS (SPARK-6175)") {
    val SPARK_PUBLIC_DNS = "public_dns"
    class MySparkConf extends SparkConf(false) {
      override def getenv(name: String): String = {
        if (name == "SPARK_PUBLIC_DNS") SPARK_PUBLIC_DNS
        else super.getenv(name)
      }

      override def clone: SparkConf = {
        new MySparkConf().setAll(getAll)
      }
    }
    val conf = new MySparkConf().set(
      "spark.extraListeners", classOf[SaveExecutorInfo].getName)
    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    val listeners = sc.listenerBus.findListenersByClass[SaveExecutorInfo]
    assert(listeners.size === 1)
    val listener = listeners(0)
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      info.logUrlMap.values.foreach { logUrl =>
        assert(new URL(logUrl).getHost === SPARK_PUBLIC_DNS)
      }
    }
  }
}

private[spark] class SaveExecutorInfo extends SparkListener {
  val addedExecutorInfos = mutable.Map[String, ExecutorInfo]()

  override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
    addedExecutorInfos(executor.executorId) = executor.executorInfo
  }
} 
Example 15
Source File: LogUrlsStandaloneSuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy

import java.net.URL

import scala.collection.mutable
import scala.io.Source

import org.apache.spark.{LocalSparkContext, SparkContext, SparkFunSuite}
import org.apache.spark.scheduler.{SparkListener, SparkListenerExecutorAdded}
import org.apache.spark.scheduler.cluster.ExecutorInfo
import org.apache.spark.util.SparkConfWithEnv

class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {

  
  private val WAIT_TIMEOUT_MILLIS = 10000

  test("verify that correct log urls get propagated from workers") {
    sc = new SparkContext("local-cluster[2,1,1024]", "test")

    val listener = new SaveExecutorInfo
    sc.addSparkListener(listener)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      // Browse to each URL to check that it's valid
      info.logUrlMap.foreach { case (logType, logUrl) =>
        val html = Source.fromURL(logUrl).mkString
        assert(html.contains(s"$logType log page"))
      }
    }
  }

  test("verify that log urls reflect SPARK_PUBLIC_DNS (SPARK-6175)") {
    val SPARK_PUBLIC_DNS = "public_dns"
    val conf = new SparkConfWithEnv(Map("SPARK_PUBLIC_DNS" -> SPARK_PUBLIC_DNS)).set(
      "spark.extraListeners", classOf[SaveExecutorInfo].getName)
    sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    val listeners = sc.listenerBus.findListenersByClass[SaveExecutorInfo]
    assert(listeners.size === 1)
    val listener = listeners(0)
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      info.logUrlMap.values.foreach { logUrl =>
        assert(new URL(logUrl).getHost === SPARK_PUBLIC_DNS)
      }
    }
  }
}

private[spark] class SaveExecutorInfo extends SparkListener {
  val addedExecutorInfos = mutable.Map[String, ExecutorInfo]()

  override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
    addedExecutorInfos(executor.executorId) = executor.executorInfo
  }
} 
Example 16
Source File: MergeIntoAccumulatorSuite.scala    From delta   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.delta

import java.util.concurrent.atomic.AtomicReference

import scala.collection.JavaConverters._

import org.apache.spark.sql.delta.commands.MergeIntoCommand
import org.apache.spark.sql.delta.test.DeltaSQLCommandTest

import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.status.TaskDataWrapper
import org.apache.spark.util.JsonProtocol


class MergeIntoAccumulatorSuite extends QueryTest with SharedSparkSession with DeltaSQLCommandTest {

  import testImplicits._

  private def runTestMergeCommand(): Unit = {
    // Run a simple merge command
    withTempView("source") {
      withTempDir { tempDir =>
        val tempPath = tempDir.getCanonicalPath
        Seq((1, 1), (0, 3)).toDF("key", "value").createOrReplaceTempView("source")
        Seq((2, 2), (1, 4)).toDF("key", "value").write.format("delta").save(tempPath)
        spark.sql(s"""
          |MERGE INTO delta.`$tempPath` target
          |USING source src
          |ON src.key = target.key
          |WHEN MATCHED THEN UPDATE SET *
          |WHEN NOT MATCHED THEN INSERT *
          |""".stripMargin)
      }
    }
  }

  test("accumulators used by MERGE should not be tracked by Spark UI") {
    runTestMergeCommand()

    // Make sure all Spark events generated by the above command have been processed
    spark.sparkContext.listenerBus.waitUntilEmpty(30000)

    val store = spark.sparkContext.statusStore.store
    val iter = store.view(classOf[TaskDataWrapper]).closeableIterator()
    try {
      // Collect all accumulator names tracked by Spark UI.
      val accumNames = iter.asScala.toVector.flatMap { task =>
        task.accumulatorUpdates.map(_.name)
      }.toSet
      // Verify accumulators used by MergeIntoCommand are not tracked.
      assert(!accumNames.contains(MergeIntoCommand.TOUCHED_FILES_ACCUM_NAME))
    } finally {
      iter.close()
    }
  }

} 
Example 17
Source File: SparkSqlAdapter.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
import org.apache.spark.sql.execution.FileSourceScanExec
import org.apache.spark.sql.execution.datasources.HadoopFsRelation
import org.apache.spark.sql.types.StructType

import org.apache.carbondata.core.util.ThreadLocalSessionInfo

object SparkSqlAdapter {

  def initSparkSQL(): Unit = {
  }

  def getScanForSegments(
      @transient relation: HadoopFsRelation,
      output: Seq[Attribute],
      outputSchema: StructType,
      partitionFilters: Seq[Expression],
      dataFilters: Seq[Expression],
      tableIdentifier: Option[TableIdentifier]
  ): FileSourceScanExec = {
    FileSourceScanExec(
      relation,
      output,
      outputSchema,
      partitionFilters,
      dataFilters,
      tableIdentifier)
  }

  def addSparkSessionListener(sparkSession: SparkSession): Unit = {
    sparkSession.sparkContext.addSparkListener(new SparkListener {
      override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = {
        CarbonEnv.carbonEnvMap.remove(sparkSession)
        ThreadLocalSessionInfo.unsetAll()
      }
    })
  }
} 
Example 18
Source File: MVManagerInSpark.scala    From carbondata   with Apache License 2.0 5 votes vote down vote up
package org.apache.carbondata.view

import java.util

import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd}
import org.apache.spark.sql.{CarbonEnv, CarbonUtils, SparkSession}

import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.util.ThreadLocalSessionInfo
import org.apache.carbondata.core.view.MVManager

class MVManagerInSpark(session: SparkSession) extends MVManager {
  override def getDatabases: util.List[String] = {
    CarbonUtils.threadSet(CarbonCommonConstants.DISABLE_SQL_REWRITE, "true")
    try {
      val databaseList = session.catalog.listDatabases()
      val databaseNameList = new util.ArrayList[String]()
      for (database <- databaseList.collect()) {
        databaseNameList.add(database.name)
      }
      databaseNameList
    } finally {
      CarbonUtils.threadUnset(CarbonCommonConstants.DISABLE_SQL_REWRITE)
    }
  }

  override def getDatabaseLocation(databaseName: String): String = {
    CarbonEnv.getDatabaseLocation(databaseName, session)
  }
}

object MVManagerInSpark {

  private val MANAGER_MAP_BY_SESSION =
    new util.HashMap[SparkSession, MVManagerInSpark]()

  def get(session: SparkSession): MVManagerInSpark = {
    var viewManager = MANAGER_MAP_BY_SESSION.get(session)
    if (viewManager == null) {
      MANAGER_MAP_BY_SESSION.synchronized {
        viewManager = MANAGER_MAP_BY_SESSION.get(session)
        if (viewManager == null) {
          viewManager = new MVManagerInSpark(session)
          MANAGER_MAP_BY_SESSION.put(session, viewManager)
          session.sparkContext.addSparkListener(new SparkListener {
            override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = {
              CarbonEnv.carbonEnvMap.remove(session)
              ThreadLocalSessionInfo.unsetAll()
            }
          })
        }
      }
    }
    viewManager
  }

} 
Example 19
Source File: LogUrlsStandaloneSuite.scala    From SparkCore   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy

import java.net.URL

import scala.collection.mutable
import scala.io.Source

import org.scalatest.FunSuite

import org.apache.spark.scheduler.cluster.ExecutorInfo
import org.apache.spark.scheduler.{SparkListenerExecutorAdded, SparkListener}
import org.apache.spark.{SparkConf, SparkContext, LocalSparkContext}

class LogUrlsStandaloneSuite extends FunSuite with LocalSparkContext {

  
  private val WAIT_TIMEOUT_MILLIS = 10000

  test("verify that correct log urls get propagated from workers") {
    sc = new SparkContext("local-cluster[2,1,512]", "test")

    val listener = new SaveExecutorInfo
    sc.addSparkListener(listener)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    assert(sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS))
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      // Browse to each URL to check that it's valid
      info.logUrlMap.foreach { case (logType, logUrl) =>
        val html = Source.fromURL(logUrl).mkString
        assert(html.contains(s"$logType log page"))
      }
    }
  }

  test("verify that log urls reflect SPARK_PUBLIC_DNS (SPARK-6175)") {
    val SPARK_PUBLIC_DNS = "public_dns"
    class MySparkConf extends SparkConf(false) {
      override def getenv(name: String) = {
        if (name == "SPARK_PUBLIC_DNS") SPARK_PUBLIC_DNS
        else super.getenv(name)
      }

      override def clone: SparkConf = {
        new MySparkConf().setAll(getAll)
      }
    }
    val conf = new MySparkConf()
    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)

    val listener = new SaveExecutorInfo
    sc.addSparkListener(listener)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    assert(sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS))
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      info.logUrlMap.values.foreach { logUrl =>
        assert(new URL(logUrl).getHost === SPARK_PUBLIC_DNS)
      }
    }
  }

  private class SaveExecutorInfo extends SparkListener {
    val addedExecutorInfos = mutable.Map[String, ExecutorInfo]()

    override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
      addedExecutorInfos(executor.executorId) = executor.executorInfo
    }
  }
} 
Example 20
Source File: LogUrlsStandaloneSuite.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy

import java.net.URL

import scala.collection.mutable
import scala.io.Source

import org.apache.spark.{LocalSparkContext, SparkContext, SparkFunSuite}
import org.apache.spark.scheduler.{SparkListener, SparkListenerExecutorAdded}
import org.apache.spark.scheduler.cluster.ExecutorInfo
import org.apache.spark.util.SparkConfWithEnv

class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {

  
  private val WAIT_TIMEOUT_MILLIS = 10000

  test("verify that correct log urls get propagated from workers") {
    sc = new SparkContext("local-cluster[2,1,1024]", "test")

    val listener = new SaveExecutorInfo
    sc.addSparkListener(listener)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      // Browse to each URL to check that it's valid
      info.logUrlMap.foreach { case (logType, logUrl) =>
        val html = Source.fromURL(logUrl).mkString
        assert(html.contains(s"$logType log page"))
      }
    }
  }

  test("verify that log urls reflect SPARK_PUBLIC_DNS (SPARK-6175)") {
    val SPARK_PUBLIC_DNS = "public_dns"
    val conf = new SparkConfWithEnv(Map("SPARK_PUBLIC_DNS" -> SPARK_PUBLIC_DNS)).set(
      "spark.extraListeners", classOf[SaveExecutorInfo].getName)
    sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    val listeners = sc.listenerBus.findListenersByClass[SaveExecutorInfo]
    assert(listeners.size === 1)
    val listener = listeners(0)
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      info.logUrlMap.values.foreach { logUrl =>
        assert(new URL(logUrl).getHost === SPARK_PUBLIC_DNS)
      }
    }
  }
}

private[spark] class SaveExecutorInfo extends SparkListener {
  val addedExecutorInfos = mutable.Map[String, ExecutorInfo]()

  override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
    addedExecutorInfos(executor.executorId) = executor.executorInfo
  }
} 
Example 21
Source File: SparkAtlasEventTracker.scala    From spark-atlas-connector   with Apache License 2.0 5 votes vote down vote up
package com.hortonworks.spark.atlas

import com.google.common.annotations.VisibleForTesting
import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent}
import org.apache.spark.sql.catalyst.catalog.ExternalCatalogEvent
import org.apache.spark.sql.execution.QueryExecution
import org.apache.spark.sql.util.QueryExecutionListener
import com.hortonworks.spark.atlas.sql._
import com.hortonworks.spark.atlas.ml.MLPipelineEventProcessor
import com.hortonworks.spark.atlas.utils.Logging

class SparkAtlasEventTracker(atlasClient: AtlasClient, atlasClientConf: AtlasClientConf)
    extends SparkListener with QueryExecutionListener with Logging {

  def this(atlasClientConf: AtlasClientConf) = {
    this(AtlasClient.atlasClient(atlasClientConf), atlasClientConf)
  }

  def this() {
    this(new AtlasClientConf)
  }

  private val enabled: Boolean = AtlasUtils.isSacEnabled(atlasClientConf)

  // Processor to handle DDL related events
  @VisibleForTesting
  private[atlas] val catalogEventTracker =
    new SparkCatalogEventProcessor(atlasClient, atlasClientConf)
  catalogEventTracker.startThread()

  // Processor to handle DML related events
  private val executionPlanTracker = new SparkExecutionPlanProcessor(atlasClient, atlasClientConf)
  executionPlanTracker.startThread()

  private val mlEventTracker = new MLPipelineEventProcessor(atlasClient, atlasClientConf)
  mlEventTracker.startThread()

  override def onOtherEvent(event: SparkListenerEvent): Unit = {
    if (!enabled) {
      // No op if SAC is disabled
      return
    }

    // We only care about SQL related events.
    event match {
      case e: ExternalCatalogEvent => catalogEventTracker.pushEvent(e)
      case e: SparkListenerEvent if e.getClass.getName.contains("org.apache.spark.ml") =>
        mlEventTracker.pushEvent(e)
      case _ => // Ignore other events
    }
  }

  override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
    if (!enabled) {
      // No op if SAC is disabled
      return
    }

    if (qe.logical.isStreaming) {
      // streaming query will be tracked via SparkAtlasStreamingQueryEventTracker
      return
    }

    val qd = QueryDetail.fromQueryExecutionListener(qe, durationNs)
    executionPlanTracker.pushEvent(qd)
  }

  override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {
    // No-op: SAC is one of the listener.
  }

} 
Example 22
Source File: SQLHistoryServerPlugin.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.ui

import org.apache.spark.SparkConf
import org.apache.spark.scheduler.SparkListener
import org.apache.spark.status.{AppHistoryServerPlugin, ElementTrackingStore}
import org.apache.spark.ui.SparkUI

class SQLHistoryServerPlugin extends AppHistoryServerPlugin {
  override def createListeners(conf: SparkConf, store: ElementTrackingStore): Seq[SparkListener] = {
    Seq(new SQLAppStatusListener(conf, store, live = false))
  }

  override def setupUI(ui: SparkUI): Unit = {
    val sqlStatusStore = new SQLAppStatusStore(ui.store.store)
    if (sqlStatusStore.executionsCount() > 0) {
      new SQLTab(sqlStatusStore, ui)
    }
  }
} 
Example 23
Source File: PDCacheInvalidateListener.scala    From tispark   with Apache License 2.0 5 votes vote down vote up
package com.pingcap.tispark.listener

import java.util.logging.Logger

import com.pingcap.tispark.accumulator.CacheInvalidateAccumulator
import com.pingcap.tispark.handler.CacheInvalidateEventHandler
import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd}

class PDCacheInvalidateListener(
    accumulator: CacheInvalidateAccumulator,
    handler: CacheInvalidateEventHandler)
    extends SparkListener {
  private final val logger: Logger = Logger.getLogger(getClass.getName)

  override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit =
    if (accumulator != null && !accumulator.isZero && handler != null) {
      synchronized {
        if (!accumulator.isZero) {
          val events = accumulator.value
          logger.info(
            s"Receiving ${events.size} cache invalidation request(s) from job ${jobEnd.jobId} at driver. " +
              s"This indicates that there's exception(s) thrown in executor node when communicating with " +
              s"TiKV, checkout executors' log for more information.")
          events.foreach(handler.handle)
        }
      }
    }
} 
Example 24
Source File: LogUrlsStandaloneSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy

import java.net.URL

import scala.collection.mutable
import scala.io.Source

import org.apache.spark.{LocalSparkContext, SparkContext, SparkFunSuite}
import org.apache.spark.scheduler.{SparkListener, SparkListenerExecutorAdded}
import org.apache.spark.scheduler.cluster.ExecutorInfo
import org.apache.spark.util.SparkConfWithEnv

class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {

  
  private val WAIT_TIMEOUT_MILLIS = 10000

  test("verify that correct log urls get propagated from workers") {
    sc = new SparkContext("local-cluster[2,1,1024]", "test")

    val listener = new SaveExecutorInfo
    sc.addSparkListener(listener)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      // Browse to each URL to check that it's valid
      info.logUrlMap.foreach { case (logType, logUrl) =>
        val html = Source.fromURL(logUrl).mkString
        assert(html.contains(s"$logType log page"))
      }
    }
  }

  test("verify that log urls reflect SPARK_PUBLIC_DNS (SPARK-6175)") {
    val SPARK_PUBLIC_DNS = "public_dns"
    val conf = new SparkConfWithEnv(Map("SPARK_PUBLIC_DNS" -> SPARK_PUBLIC_DNS)).set(
      "spark.extraListeners", classOf[SaveExecutorInfo].getName)
    sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    val listeners = sc.listenerBus.findListenersByClass[SaveExecutorInfo]
    assert(listeners.size === 1)
    val listener = listeners(0)
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      info.logUrlMap.values.foreach { logUrl =>
        assert(new URL(logUrl).getHost === SPARK_PUBLIC_DNS)
      }
    }
  }
}

private[spark] class SaveExecutorInfo extends SparkListener {
  val addedExecutorInfos = mutable.Map[String, ExecutorInfo]()

  override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
    addedExecutorInfos(executor.executorId) = executor.executorInfo
  }
} 
Example 25
Source File: StreamingQueryListenerBus.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming

import org.apache.spark.scheduler.{LiveListenerBus, SparkListener, SparkListenerEvent}
import org.apache.spark.sql.streaming.StreamingQueryListener
import org.apache.spark.util.ListenerBus


  def post(event: StreamingQueryListener.Event) {
    event match {
      case s: QueryStartedEvent =>
        postToAll(s)
      case _ =>
        sparkListenerBus.post(event)
    }
  }

  override def onOtherEvent(event: SparkListenerEvent): Unit = {
    event match {
      case e: StreamingQueryListener.Event =>
        postToAll(e)
      case _ =>
    }
  }

  override protected def doPostEvent(
      listener: StreamingQueryListener,
      event: StreamingQueryListener.Event): Unit = {
    event match {
      case queryStarted: QueryStartedEvent =>
        listener.onQueryStarted(queryStarted)
      case queryProgress: QueryProgressEvent =>
        listener.onQueryProgress(queryProgress)
      case queryTerminated: QueryTerminatedEvent =>
        listener.onQueryTerminated(queryTerminated)
      case _ =>
    }
  }

} 
Example 26
Source File: StreamingQueryListenerBus.scala    From XSQL   with Apache License 2.0 4 votes vote down vote up
package org.apache.spark.sql.execution.streaming

import java.util.UUID

import scala.collection.mutable

import org.apache.spark.scheduler.{LiveListenerBus, SparkListener, SparkListenerEvent}
import org.apache.spark.sql.streaming.StreamingQueryListener
import org.apache.spark.util.ListenerBus


  override protected def doPostEvent(
      listener: StreamingQueryListener,
      event: StreamingQueryListener.Event): Unit = {
    def shouldReport(runId: UUID): Boolean = {
      activeQueryRunIds.synchronized { activeQueryRunIds.contains(runId) }
    }

    event match {
      case queryStarted: QueryStartedEvent =>
        if (shouldReport(queryStarted.runId)) {
          listener.onQueryStarted(queryStarted)
        }
      case queryProgress: QueryProgressEvent =>
        if (shouldReport(queryProgress.progress.runId)) {
          listener.onQueryProgress(queryProgress)
        }
      case queryTerminated: QueryTerminatedEvent =>
        if (shouldReport(queryTerminated.runId)) {
          listener.onQueryTerminated(queryTerminated)
        }
      case _ =>
    }
  }
}

object StreamingQueryListenerBus {
  val STREAM_EVENT_QUERY = "streams"
}