org.apache.spark.scheduler.SparkListenerTaskStart Scala Examples
The following examples show how to use org.apache.spark.scheduler.SparkListenerTaskStart.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: ProfilerListener.scala From carbondata with Apache License 2.0 | 6 votes |
package org.apache.spark.sql.profiler import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListenerTaskEnd, SparkListenerTaskGettingResult, SparkListenerTaskStart} import org.apache.spark.sql.execution.ui.{SparkListenerSQLExecutionEnd, SparkListenerSQLExecutionStart} private[profiler] class ProfilerListener extends SparkListener { override def onOtherEvent(event: SparkListenerEvent): Unit = { Profiler.invokeIfEnable { event match { case executionStart: SparkListenerSQLExecutionStart => Profiler.addExecutionMessage( executionStart.executionId, ExecutionStart( executionStart.executionId, executionStart.time, executionStart.physicalPlanDescription )) case executionEnd: SparkListenerSQLExecutionEnd => Profiler.send( ExecutionEnd( executionEnd.executionId, executionEnd.time ) ) case _ => } } } }
Example 2
Source File: KafkaContinuousTest.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kafka010 import java.util.concurrent.atomic.AtomicInteger import org.apache.spark.SparkContext import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd, SparkListenerTaskStart} import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.execution.streaming.StreamExecution import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution import org.apache.spark.sql.streaming.Trigger import org.apache.spark.sql.test.TestSparkSession // Trait to configure StreamTest for kafka continuous execution tests. trait KafkaContinuousTest extends KafkaSourceTest { override val defaultTrigger = Trigger.Continuous(1000) override val defaultUseV2Sink = true // We need more than the default local[2] to be able to schedule all partitions simultaneously. override protected def createSparkSession = new TestSparkSession( new SparkContext( "local[10]", "continuous-stream-test-sql-context", sparkConf.set("spark.sql.testkey", "true"))) // In addition to setting the partitions in Kafka, we have to wait until the query has // reconfigured to the new count so the test framework can hook in properly. override protected def setTopicPartitions( topic: String, newCount: Int, query: StreamExecution) = { testUtils.addPartitions(topic, newCount) eventually(timeout(streamingTimeout)) { assert( query.lastExecution.logical.collectFirst { case DataSourceV2Relation(_, r: KafkaContinuousReader) => r }.exists(_.knownPartitions.size == newCount), s"query never reconfigured to $newCount partitions") } } // Continuous processing tasks end asynchronously, so test that they actually end. private val tasksEndedListener = new SparkListener() { val activeTaskIdCount = new AtomicInteger(0) override def onTaskStart(start: SparkListenerTaskStart): Unit = { activeTaskIdCount.incrementAndGet() } override def onTaskEnd(end: SparkListenerTaskEnd): Unit = { activeTaskIdCount.decrementAndGet() } } override def beforeEach(): Unit = { super.beforeEach() spark.sparkContext.addSparkListener(tasksEndedListener) } override def afterEach(): Unit = { eventually(timeout(streamingTimeout)) { assert(tasksEndedListener.activeTaskIdCount.get() == 0) } spark.sparkContext.removeSparkListener(tasksEndedListener) super.afterEach() } test("ensure continuous stream is being used") { val query = spark.readStream .format("rate") .option("numPartitions", "1") .option("rowsPerSecond", "1") .load() testStream(query)( Execute(q => assert(q.isInstanceOf[ContinuousExecution])) ) } }
Example 3
Source File: KinesisContinuousTest.scala From kinesis-sql with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kinesis import java.util.concurrent.atomic.AtomicInteger import org.scalatest.time.SpanSugar._ import org.apache.spark.SparkContext import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd, SparkListenerTaskStart} import org.apache.spark.sql.streaming.Trigger import org.apache.spark.sql.test.TestSparkSession trait KinesisContinuousTest extends KinesisSourceTest{ override val defaultTrigger = Trigger.Continuous("1 hour") override val defaultUseV2Sink = true override val streamingTimeout = 120.seconds override protected def createSparkSession = new TestSparkSession( new SparkContext( "local[10]", "continuous-stream-test-sql-context", sparkConf.set("spark.sql.testkey", "true"))) // Continuous processing tasks end asynchronously, so test that they actually end. private val tasksEndedListener = new SparkListener() { val activeTaskIdCount = new AtomicInteger(0) override def onTaskStart(start: SparkListenerTaskStart): Unit = { activeTaskIdCount.incrementAndGet() } override def onTaskEnd(end: SparkListenerTaskEnd): Unit = { activeTaskIdCount.decrementAndGet() } } override def beforeEach(): Unit = { super.beforeEach() spark.sparkContext.addSparkListener(tasksEndedListener) } override def afterEach(): Unit = { eventually(timeout(streamingTimeout)) { assert(tasksEndedListener.activeTaskIdCount.get() == 0) } spark.sparkContext.removeSparkListener(tasksEndedListener) super.afterEach() } }
Example 4
Source File: PulsarContinuousTest.scala From pulsar-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.pulsar import java.util.concurrent.atomic.AtomicInteger import scala.language.reflectiveCalls import org.apache.spark.SparkContext import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd, SparkListenerTaskStart} import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution import org.apache.spark.sql.streaming.Trigger import org.apache.spark.sql.test.TestSparkSession trait PulsarContinuousTest extends PulsarSourceTest { override val defaultTrigger = Trigger.Continuous(1000) override val defaultUseV2Sink = true // We need more than the default local[2] to be able to schedule all partitions simultaneously. override protected def createSparkSession = new TestSparkSession( new SparkContext( "local[10]", "continuous-stream-test-sql-context", sparkConf.set("spark.sql.testkey", "true"))) // Continuous processing tasks end asynchronously, so test that they actually end. private val tasksEndedListener = new SparkListener() { val activeTaskIdCount = new AtomicInteger(0) override def onTaskStart(start: SparkListenerTaskStart): Unit = { activeTaskIdCount.incrementAndGet() } override def onTaskEnd(end: SparkListenerTaskEnd): Unit = { activeTaskIdCount.decrementAndGet() } } override def beforeEach(): Unit = { super.beforeEach() spark.sparkContext.addSparkListener(tasksEndedListener) } override def afterEach(): Unit = { eventually(timeout(streamingTimeout)) { assert(tasksEndedListener.activeTaskIdCount.get() == 0) } spark.sparkContext.removeSparkListener(tasksEndedListener) super.afterEach() } test("ensure continuous stream is being used") { val query = spark.readStream .format("rate") .option("numPartitions", "1") .option("rowsPerSecond", "1") .load() testStream(query)( Execute(q => assert(q.isInstanceOf[ContinuousExecution])) ) } }