org.apache.spark.scheduler.SparkListenerTaskEnd Scala Examples
The following examples show how to use org.apache.spark.scheduler.SparkListenerTaskEnd.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: ProfilerListener.scala From carbondata with Apache License 2.0 | 6 votes |
package org.apache.spark.sql.profiler import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListenerTaskEnd, SparkListenerTaskGettingResult, SparkListenerTaskStart} import org.apache.spark.sql.execution.ui.{SparkListenerSQLExecutionEnd, SparkListenerSQLExecutionStart} private[profiler] class ProfilerListener extends SparkListener { override def onOtherEvent(event: SparkListenerEvent): Unit = { Profiler.invokeIfEnable { event match { case executionStart: SparkListenerSQLExecutionStart => Profiler.addExecutionMessage( executionStart.executionId, ExecutionStart( executionStart.executionId, executionStart.time, executionStart.physicalPlanDescription )) case executionEnd: SparkListenerSQLExecutionEnd => Profiler.send( ExecutionEnd( executionEnd.executionId, executionEnd.time ) ) case _ => } } } }
Example 2
Source File: OutputMetricsTest.scala From memsql-spark-connector with Apache License 2.0 | 5 votes |
package com.memsql.spark import com.github.mrpowers.spark.daria.sql.SparkSessionExt._ import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd} import org.apache.spark.sql.types.{IntegerType, StringType} class OutputMetricsTest extends IntegrationSuiteBase { it("records written") { var outputWritten = 0L spark.sparkContext.addSparkListener(new SparkListener() { override def onTaskEnd(taskEnd: SparkListenerTaskEnd) { val metrics = taskEnd.taskMetrics outputWritten += metrics.outputMetrics.recordsWritten } }) val numRows = 100000 val df1 = spark.createDF( List.range(0, numRows), List(("id", IntegerType, true)) ) df1.repartition(30) df1.write .format("memsql") .save("metricsInts") assert(outputWritten == numRows) outputWritten = 0 val df2 = spark.createDF( List("st1", "", null), List(("st", StringType, true)) ) df2.write .format("memsql") .save("metricsStrings") assert(outputWritten == 3) } }
Example 3
Source File: MergeIntoAccumulatorSuite.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta import java.util.concurrent.atomic.AtomicReference import scala.collection.JavaConverters._ import org.apache.spark.sql.delta.commands.MergeIntoCommand import org.apache.spark.sql.delta.test.DeltaSQLCommandTest import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd} import org.apache.spark.sql.QueryTest import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.status.TaskDataWrapper import org.apache.spark.util.JsonProtocol class MergeIntoAccumulatorSuite extends QueryTest with SharedSparkSession with DeltaSQLCommandTest { import testImplicits._ private def runTestMergeCommand(): Unit = { // Run a simple merge command withTempView("source") { withTempDir { tempDir => val tempPath = tempDir.getCanonicalPath Seq((1, 1), (0, 3)).toDF("key", "value").createOrReplaceTempView("source") Seq((2, 2), (1, 4)).toDF("key", "value").write.format("delta").save(tempPath) spark.sql(s""" |MERGE INTO delta.`$tempPath` target |USING source src |ON src.key = target.key |WHEN MATCHED THEN UPDATE SET * |WHEN NOT MATCHED THEN INSERT * |""".stripMargin) } } } test("accumulators used by MERGE should not be tracked by Spark UI") { runTestMergeCommand() // Make sure all Spark events generated by the above command have been processed spark.sparkContext.listenerBus.waitUntilEmpty(30000) val store = spark.sparkContext.statusStore.store val iter = store.view(classOf[TaskDataWrapper]).closeableIterator() try { // Collect all accumulator names tracked by Spark UI. val accumNames = iter.asScala.toVector.flatMap { task => task.accumulatorUpdates.map(_.name) }.toSet // Verify accumulators used by MergeIntoCommand are not tracked. assert(!accumNames.contains(MergeIntoCommand.TOUCHED_FILES_ACCUM_NAME)) } finally { iter.close() } } }
Example 4
Source File: KafkaContinuousTest.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kafka010 import java.util.concurrent.atomic.AtomicInteger import org.apache.spark.SparkContext import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd, SparkListenerTaskStart} import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.execution.streaming.StreamExecution import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution import org.apache.spark.sql.streaming.Trigger import org.apache.spark.sql.test.TestSparkSession // Trait to configure StreamTest for kafka continuous execution tests. trait KafkaContinuousTest extends KafkaSourceTest { override val defaultTrigger = Trigger.Continuous(1000) override val defaultUseV2Sink = true // We need more than the default local[2] to be able to schedule all partitions simultaneously. override protected def createSparkSession = new TestSparkSession( new SparkContext( "local[10]", "continuous-stream-test-sql-context", sparkConf.set("spark.sql.testkey", "true"))) // In addition to setting the partitions in Kafka, we have to wait until the query has // reconfigured to the new count so the test framework can hook in properly. override protected def setTopicPartitions( topic: String, newCount: Int, query: StreamExecution) = { testUtils.addPartitions(topic, newCount) eventually(timeout(streamingTimeout)) { assert( query.lastExecution.logical.collectFirst { case DataSourceV2Relation(_, r: KafkaContinuousReader) => r }.exists(_.knownPartitions.size == newCount), s"query never reconfigured to $newCount partitions") } } // Continuous processing tasks end asynchronously, so test that they actually end. private val tasksEndedListener = new SparkListener() { val activeTaskIdCount = new AtomicInteger(0) override def onTaskStart(start: SparkListenerTaskStart): Unit = { activeTaskIdCount.incrementAndGet() } override def onTaskEnd(end: SparkListenerTaskEnd): Unit = { activeTaskIdCount.decrementAndGet() } } override def beforeEach(): Unit = { super.beforeEach() spark.sparkContext.addSparkListener(tasksEndedListener) } override def afterEach(): Unit = { eventually(timeout(streamingTimeout)) { assert(tasksEndedListener.activeTaskIdCount.get() == 0) } spark.sparkContext.removeSparkListener(tasksEndedListener) super.afterEach() } test("ensure continuous stream is being used") { val query = spark.readStream .format("rate") .option("numPartitions", "1") .option("rowsPerSecond", "1") .load() testStream(query)( Execute(q => assert(q.isInstanceOf[ContinuousExecution])) ) } }
Example 5
Source File: KinesisContinuousTest.scala From kinesis-sql with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kinesis import java.util.concurrent.atomic.AtomicInteger import org.scalatest.time.SpanSugar._ import org.apache.spark.SparkContext import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd, SparkListenerTaskStart} import org.apache.spark.sql.streaming.Trigger import org.apache.spark.sql.test.TestSparkSession trait KinesisContinuousTest extends KinesisSourceTest{ override val defaultTrigger = Trigger.Continuous("1 hour") override val defaultUseV2Sink = true override val streamingTimeout = 120.seconds override protected def createSparkSession = new TestSparkSession( new SparkContext( "local[10]", "continuous-stream-test-sql-context", sparkConf.set("spark.sql.testkey", "true"))) // Continuous processing tasks end asynchronously, so test that they actually end. private val tasksEndedListener = new SparkListener() { val activeTaskIdCount = new AtomicInteger(0) override def onTaskStart(start: SparkListenerTaskStart): Unit = { activeTaskIdCount.incrementAndGet() } override def onTaskEnd(end: SparkListenerTaskEnd): Unit = { activeTaskIdCount.decrementAndGet() } } override def beforeEach(): Unit = { super.beforeEach() spark.sparkContext.addSparkListener(tasksEndedListener) } override def afterEach(): Unit = { eventually(timeout(streamingTimeout)) { assert(tasksEndedListener.activeTaskIdCount.get() == 0) } spark.sparkContext.removeSparkListener(tasksEndedListener) super.afterEach() } }
Example 6
Source File: PulsarContinuousTest.scala From pulsar-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.pulsar import java.util.concurrent.atomic.AtomicInteger import scala.language.reflectiveCalls import org.apache.spark.SparkContext import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd, SparkListenerTaskStart} import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution import org.apache.spark.sql.streaming.Trigger import org.apache.spark.sql.test.TestSparkSession trait PulsarContinuousTest extends PulsarSourceTest { override val defaultTrigger = Trigger.Continuous(1000) override val defaultUseV2Sink = true // We need more than the default local[2] to be able to schedule all partitions simultaneously. override protected def createSparkSession = new TestSparkSession( new SparkContext( "local[10]", "continuous-stream-test-sql-context", sparkConf.set("spark.sql.testkey", "true"))) // Continuous processing tasks end asynchronously, so test that they actually end. private val tasksEndedListener = new SparkListener() { val activeTaskIdCount = new AtomicInteger(0) override def onTaskStart(start: SparkListenerTaskStart): Unit = { activeTaskIdCount.incrementAndGet() } override def onTaskEnd(end: SparkListenerTaskEnd): Unit = { activeTaskIdCount.decrementAndGet() } } override def beforeEach(): Unit = { super.beforeEach() spark.sparkContext.addSparkListener(tasksEndedListener) } override def afterEach(): Unit = { eventually(timeout(streamingTimeout)) { assert(tasksEndedListener.activeTaskIdCount.get() == 0) } spark.sparkContext.removeSparkListener(tasksEndedListener) super.afterEach() } test("ensure continuous stream is being used") { val query = spark.readStream .format("rate") .option("numPartitions", "1") .option("rowsPerSecond", "1") .load() testStream(query)( Execute(q => assert(q.isInstanceOf[ContinuousExecution])) ) } }