org.apache.spark.util.ManualClock Scala Example

Source File: ProcessingTimeExecutorSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.streaming

import java.util.concurrent.{CountDownLatch, TimeUnit}

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.streaming.ProcessingTime
import org.apache.spark.util.{Clock, ManualClock, SystemClock}

class ProcessingTimeExecutorSuite extends SparkFunSuite {

  test("nextBatchTime") {
    val processingTimeExecutor = ProcessingTimeExecutor(ProcessingTime(100))
    assert(processingTimeExecutor.nextBatchTime(0) === 100)
    assert(processingTimeExecutor.nextBatchTime(1) === 100)
    assert(processingTimeExecutor.nextBatchTime(99) === 100)
    assert(processingTimeExecutor.nextBatchTime(100) === 200)
    assert(processingTimeExecutor.nextBatchTime(101) === 200)
    assert(processingTimeExecutor.nextBatchTime(150) === 200)
  }

  test("calling nextBatchTime with the result of a previous call should return the next interval") {
    val intervalMS = 100
    val processingTimeExecutor = ProcessingTimeExecutor(ProcessingTime(intervalMS))

    val ITERATION = 10
    var nextBatchTime: Long = 0
    for (it <- 1 to ITERATION) {
      nextBatchTime = processingTimeExecutor.nextBatchTime(nextBatchTime)
    }

    // nextBatchTime should be 1000
    assert(nextBatchTime === intervalMS * ITERATION)
  }

  private def testBatchTermination(intervalMs: Long): Unit = {
    var batchCounts = 0
    val processingTimeExecutor = ProcessingTimeExecutor(ProcessingTime(intervalMs))
    processingTimeExecutor.execute(() => {
      batchCounts += 1
      // If the batch termination works well, batchCounts should be 3 after `execute`
      batchCounts < 3
    })
    assert(batchCounts === 3)
  }

  test("batch termination") {
    testBatchTermination(0)
    testBatchTermination(10)
  }

  test("notifyBatchFallingBehind") {
    val clock = new ManualClock()
    @volatile var batchFallingBehindCalled = false
    val latch = new CountDownLatch(1)
    val t = new Thread() {
      override def run(): Unit = {
        val processingTimeExecutor = new ProcessingTimeExecutor(ProcessingTime(100), clock) {
          override def notifyBatchFallingBehind(realElapsedTimeMs: Long): Unit = {
            batchFallingBehindCalled = true
          }
        }
        processingTimeExecutor.execute(() => {
          latch.countDown()
          clock.waitTillTime(200)
          false
        })
      }
    }
    t.start()
    // Wait until the batch is running so that we don't call `advance` too early
    assert(latch.await(10, TimeUnit.SECONDS), "the batch has not yet started in 10 seconds")
    clock.advance(200)
    t.join()
    assert(batchFallingBehindCalled === true)
  }
}

Source File: RecurringTimerSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.util.concurrent.ConcurrentLinkedQueue

import scala.collection.JavaConverters._
import scala.concurrent.duration._

import org.scalatest.PrivateMethodTester
import org.scalatest.concurrent.Eventually._

import org.apache.spark.SparkFunSuite
import org.apache.spark.util.ManualClock

class RecurringTimerSuite extends SparkFunSuite with PrivateMethodTester {

  test("basic") {
    val clock = new ManualClock()
    val results = new ConcurrentLinkedQueue[Long]()
    val timer = new RecurringTimer(clock, 100, time => {
      results.add(time)
    }, "RecurringTimerSuite-basic")
    timer.start(0)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results.asScala.toSeq === Seq(0L))
    }
    clock.advance(100)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results.asScala.toSeq === Seq(0L, 100L))
    }
    clock.advance(200)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results.asScala.toSeq === Seq(0L, 100L, 200L, 300L))
    }
    assert(timer.stop(interruptTimer = true) === 300L)
  }

  test("SPARK-10224: call 'callback' after stopping") {
    val clock = new ManualClock()
    val results = new ConcurrentLinkedQueue[Long]
    val timer = new RecurringTimer(clock, 100, time => {
      results.add(time)
    }, "RecurringTimerSuite-SPARK-10224")
    timer.start(0)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results.asScala.toSeq === Seq(0L))
    }
    @volatile var lastTime = -1L
    // Now RecurringTimer is waiting for the next interval
    val thread = new Thread {
      override def run(): Unit = {
        lastTime = timer.stop(interruptTimer = false)
      }
    }
    thread.start()
    val stopped = PrivateMethod[RecurringTimer]('stopped)
    // Make sure the `stopped` field has been changed
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(timer.invokePrivate(stopped()) === true)
    }
    clock.advance(200)
    // When RecurringTimer is awake from clock.waitTillTime, it will call `callback` once.
    // Then it will find `stopped` is true and exit the loop, but it should call `callback` again
    // before exiting its internal thread.
    thread.join()
    assert(results.asScala.toSeq === Seq(0L, 100L, 200L))
    assert(lastTime === 200L)
  }
}

Source File: ProcessingTimeExecutorSuite.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.streaming

import java.util.concurrent.{CountDownLatch, TimeUnit}

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.streaming.ProcessingTime
import org.apache.spark.util.{Clock, ManualClock, SystemClock}

class ProcessingTimeExecutorSuite extends SparkFunSuite {

  test("nextBatchTime") {
    val processingTimeExecutor = ProcessingTimeExecutor(ProcessingTime(100))
    assert(processingTimeExecutor.nextBatchTime(0) === 100)
    assert(processingTimeExecutor.nextBatchTime(1) === 100)
    assert(processingTimeExecutor.nextBatchTime(99) === 100)
    assert(processingTimeExecutor.nextBatchTime(100) === 200)
    assert(processingTimeExecutor.nextBatchTime(101) === 200)
    assert(processingTimeExecutor.nextBatchTime(150) === 200)
  }

  test("calling nextBatchTime with the result of a previous call should return the next interval") {
    val intervalMS = 100
    val processingTimeExecutor = ProcessingTimeExecutor(ProcessingTime(intervalMS))

    val ITERATION = 10
    var nextBatchTime: Long = 0
    for (it <- 1 to ITERATION) {
      nextBatchTime = processingTimeExecutor.nextBatchTime(nextBatchTime)
    }

    // nextBatchTime should be 1000
    assert(nextBatchTime === intervalMS * ITERATION)
  }

  private def testBatchTermination(intervalMs: Long): Unit = {
    var batchCounts = 0
    val processingTimeExecutor = ProcessingTimeExecutor(ProcessingTime(intervalMs))
    processingTimeExecutor.execute(() => {
      batchCounts += 1
      // If the batch termination works well, batchCounts should be 3 after `execute`
      batchCounts < 3
    })
    assert(batchCounts === 3)
  }

  test("batch termination") {
    testBatchTermination(0)
    testBatchTermination(10)
  }

  test("notifyBatchFallingBehind") {
    val clock = new ManualClock()
    @volatile var batchFallingBehindCalled = false
    val latch = new CountDownLatch(1)
    val t = new Thread() {
      override def run(): Unit = {
        val processingTimeExecutor = new ProcessingTimeExecutor(ProcessingTime(100), clock) {
          override def notifyBatchFallingBehind(realElapsedTimeMs: Long): Unit = {
            batchFallingBehindCalled = true
          }
        }
        processingTimeExecutor.execute(() => {
          latch.countDown()
          clock.waitTillTime(200)
          false
        })
      }
    }
    t.start()
    // Wait until the batch is running so that we don't call `advance` too early
    assert(latch.await(10, TimeUnit.SECONDS), "the batch has not yet started in 10 seconds")
    clock.advance(200)
    t.join()
    assert(batchFallingBehindCalled === true)
  }
}

Source File: RecurringTimerSuite.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.util.concurrent.ConcurrentLinkedQueue

import scala.collection.JavaConverters._
import scala.concurrent.duration._

import org.scalatest.PrivateMethodTester
import org.scalatest.concurrent.Eventually._

import org.apache.spark.SparkFunSuite
import org.apache.spark.util.ManualClock

class RecurringTimerSuite extends SparkFunSuite with PrivateMethodTester {

  test("basic") {
    val clock = new ManualClock()
    val results = new ConcurrentLinkedQueue[Long]()
    val timer = new RecurringTimer(clock, 100, time => {
      results.add(time)
    }, "RecurringTimerSuite-basic")
    timer.start(0)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results.asScala.toSeq === Seq(0L))
    }
    clock.advance(100)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results.asScala.toSeq === Seq(0L, 100L))
    }
    clock.advance(200)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results.asScala.toSeq === Seq(0L, 100L, 200L, 300L))
    }
    assert(timer.stop(interruptTimer = true) === 300L)
  }

  test("SPARK-10224: call 'callback' after stopping") {
    val clock = new ManualClock()
    val results = new ConcurrentLinkedQueue[Long]
    val timer = new RecurringTimer(clock, 100, time => {
      results.add(time)
    }, "RecurringTimerSuite-SPARK-10224")
    timer.start(0)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results.asScala.toSeq === Seq(0L))
    }
    @volatile var lastTime = -1L
    // Now RecurringTimer is waiting for the next interval
    val thread = new Thread {
      override def run(): Unit = {
        lastTime = timer.stop(interruptTimer = false)
      }
    }
    thread.start()
    val stopped = PrivateMethod[RecurringTimer]('stopped)
    // Make sure the `stopped` field has been changed
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(timer.invokePrivate(stopped()) === true)
    }
    clock.advance(200)
    // When RecurringTimer is awake from clock.waitTillTime, it will call `callback` once.
    // Then it will find `stopped` is true and exit the loop, but it should call `callback` again
    // before exiting its internal thread.
    thread.join()
    assert(results.asScala.toSeq === Seq(0L, 100L, 200L))
    assert(lastTime === 200L)
  }
}

Source File: ProcessingTimeExecutorSuite.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.streaming

import java.util.concurrent.{CountDownLatch, TimeUnit}

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.streaming.ProcessingTime
import org.apache.spark.util.{Clock, ManualClock, SystemClock}

class ProcessingTimeExecutorSuite extends SparkFunSuite {

  test("nextBatchTime") {
    val processingTimeExecutor = ProcessingTimeExecutor(ProcessingTime(100))
    assert(processingTimeExecutor.nextBatchTime(0) === 100)
    assert(processingTimeExecutor.nextBatchTime(1) === 100)
    assert(processingTimeExecutor.nextBatchTime(99) === 100)
    assert(processingTimeExecutor.nextBatchTime(100) === 200)
    assert(processingTimeExecutor.nextBatchTime(101) === 200)
    assert(processingTimeExecutor.nextBatchTime(150) === 200)
  }

  test("calling nextBatchTime with the result of a previous call should return the next interval") {
    val intervalMS = 100
    val processingTimeExecutor = ProcessingTimeExecutor(ProcessingTime(intervalMS))

    val ITERATION = 10
    var nextBatchTime: Long = 0
    for (it <- 1 to ITERATION) {
      nextBatchTime = processingTimeExecutor.nextBatchTime(nextBatchTime)
    }

    // nextBatchTime should be 1000
    assert(nextBatchTime === intervalMS * ITERATION)
  }

  private def testBatchTermination(intervalMs: Long): Unit = {
    var batchCounts = 0
    val processingTimeExecutor = ProcessingTimeExecutor(ProcessingTime(intervalMs))
    processingTimeExecutor.execute(() => {
      batchCounts += 1
      // If the batch termination works well, batchCounts should be 3 after `execute`
      batchCounts < 3
    })
    assert(batchCounts === 3)
  }

  test("batch termination") {
    testBatchTermination(0)
    testBatchTermination(10)
  }

  test("notifyBatchFallingBehind") {
    val clock = new ManualClock()
    @volatile var batchFallingBehindCalled = false
    val latch = new CountDownLatch(1)
    val t = new Thread() {
      override def run(): Unit = {
        val processingTimeExecutor = new ProcessingTimeExecutor(ProcessingTime(100), clock) {
          override def notifyBatchFallingBehind(realElapsedTimeMs: Long): Unit = {
            batchFallingBehindCalled = true
          }
        }
        processingTimeExecutor.execute(() => {
          latch.countDown()
          clock.waitTillTime(200)
          false
        })
      }
    }
    t.start()
    // Wait until the batch is running so that we don't call `advance` too early
    assert(latch.await(10, TimeUnit.SECONDS), "the batch has not yet started in 10 seconds")
    clock.advance(200)
    t.join()
    assert(batchFallingBehindCalled === true)
  }
}

Source File: RecurringTimerSuite.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.util.concurrent.ConcurrentLinkedQueue

import scala.collection.JavaConverters._
import scala.concurrent.duration._

import org.scalatest.PrivateMethodTester
import org.scalatest.concurrent.Eventually._

import org.apache.spark.SparkFunSuite
import org.apache.spark.util.ManualClock

class RecurringTimerSuite extends SparkFunSuite with PrivateMethodTester {

  test("basic") {
    val clock = new ManualClock()
    val results = new ConcurrentLinkedQueue[Long]()
    val timer = new RecurringTimer(clock, 100, time => {
      results.add(time)
    }, "RecurringTimerSuite-basic")
    timer.start(0)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results.asScala.toSeq === Seq(0L))
    }
    clock.advance(100)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results.asScala.toSeq === Seq(0L, 100L))
    }
    clock.advance(200)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results.asScala.toSeq === Seq(0L, 100L, 200L, 300L))
    }
    assert(timer.stop(interruptTimer = true) === 300L)
  }

  test("SPARK-10224: call 'callback' after stopping") {
    val clock = new ManualClock()
    val results = new ConcurrentLinkedQueue[Long]
    val timer = new RecurringTimer(clock, 100, time => {
      results.add(time)
    }, "RecurringTimerSuite-SPARK-10224")
    timer.start(0)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results.asScala.toSeq === Seq(0L))
    }
    @volatile var lastTime = -1L
    // Now RecurringTimer is waiting for the next interval
    val thread = new Thread {
      override def run(): Unit = {
        lastTime = timer.stop(interruptTimer = false)
      }
    }
    thread.start()
    val stopped = PrivateMethod[RecurringTimer]('stopped)
    // Make sure the `stopped` field has been changed
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(timer.invokePrivate(stopped()) === true)
    }
    clock.advance(200)
    // When RecurringTimer is awake from clock.waitTillTime, it will call `callback` once.
    // Then it will find `stopped` is true and exit the loop, but it should call `callback` again
    // before exiting its internal thread.
    thread.join()
    assert(results.asScala.toSeq === Seq(0L, 100L, 200L))
    assert(lastTime === 200L)
  }
}

Source File: FlumePollingStreamSuite.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.streaming.flume

import java.net.InetSocketAddress

import scala.collection.JavaConversions._
import scala.collection.mutable.{SynchronizedBuffer, ArrayBuffer}
import scala.concurrent.duration._
import scala.language.postfixOps

import com.google.common.base.Charsets.UTF_8
import org.scalatest.BeforeAndAfter
import org.scalatest.concurrent.Eventually._

import org.apache.spark.{Logging, SparkConf, SparkFunSuite}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.dstream.ReceiverInputDStream
import org.apache.spark.streaming.{Seconds, TestOutputStream, StreamingContext}
import org.apache.spark.util.{ManualClock, Utils}

  private def testMultipleTimes(test: () => Unit): Unit = {
    var testPassed = false
    var attempt = 0
    while (!testPassed && attempt < maxAttempts) {
      try {
        test()
        testPassed = true
      } catch {
        case e: Exception if Utils.isBindCollision(e) =>
          logWarning("Exception when running flume polling test: " + e)
          attempt += 1
      }
    }
    assert(testPassed, s"Test failed after $attempt attempts!")
  }

  private def testFlumePolling(): Unit = {
    try {
      val port = utils.startSingleSink()

      writeAndVerify(Seq(port))
      utils.assertChannelsAreEmpty()
    } finally {
      utils.close()
    }
  }

  private def testFlumePollingMultipleHost(): Unit = {
    try {
      val ports = utils.startMultipleSinks()
      writeAndVerify(ports)
      utils.assertChannelsAreEmpty()
    } finally {
      utils.close()
    }
  }

  def writeAndVerify(sinkPorts: Seq[Int]): Unit = {
    // Set up the streaming context and input streams
    //设置流上下文和输入流
    val ssc = new StreamingContext(conf, batchDuration)
    val addresses = sinkPorts.map(port => new InetSocketAddress("localhost", port))
    val flumeStream: ReceiverInputDStream[SparkFlumeEvent] =
      FlumeUtils.createPollingStream(ssc, addresses, StorageLevel.MEMORY_AND_DISK,
        utils.eventsPerBatch, 5)
    val outputBuffer = new ArrayBuffer[Seq[SparkFlumeEvent]]
      with SynchronizedBuffer[Seq[SparkFlumeEvent]]
    val outputStream = new TestOutputStream(flumeStream, outputBuffer)
    outputStream.register()

    ssc.start()
    try {
      utils.sendDatAndEnsureAllDataHasBeenReceived()
      val clock = ssc.scheduler.clock.asInstanceOf[ManualClock]
      clock.advance(batchDuration.milliseconds)

      // The eventually is required to ensure that all data in the batch has been processed.
      //最终需要确保批处理中的所有数据已被处理
      eventually(timeout(10 seconds), interval(100 milliseconds)) {
        val flattenOutputBuffer = outputBuffer.flatten
        val headers = flattenOutputBuffer.map(_.event.getHeaders.map {
          case kv => (kv._1.toString, kv._2.toString)
        }).map(mapAsJavaMap)
        val bodies = flattenOutputBuffer.map(e => new String(e.event.getBody.array(), UTF_8))
        utils.assertOutput(headers, bodies)
      }
    } finally {
      ssc.stop()
    }
  }

}

Source File: RecurringTimerSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.util.concurrent.ConcurrentLinkedQueue

import scala.collection.JavaConverters._
import scala.concurrent.duration._

import org.scalatest.PrivateMethodTester
import org.scalatest.concurrent.Eventually._

import org.apache.spark.SparkFunSuite
import org.apache.spark.util.ManualClock

class RecurringTimerSuite extends SparkFunSuite with PrivateMethodTester {

  test("basic") {
    val clock = new ManualClock()
    val results = new ConcurrentLinkedQueue[Long]()
    val timer = new RecurringTimer(clock, 100, time => {
      results.add(time)
    }, "RecurringTimerSuite-basic")
    timer.start(0)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results.asScala.toSeq === Seq(0L))
    }
    clock.advance(100)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results.asScala.toSeq === Seq(0L, 100L))
    }
    clock.advance(200)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results.asScala.toSeq === Seq(0L, 100L, 200L, 300L))
    }
    assert(timer.stop(interruptTimer = true) === 300L)
  }

  test("SPARK-10224: call 'callback' after stopping") {
    val clock = new ManualClock()
    val results = new ConcurrentLinkedQueue[Long]
    val timer = new RecurringTimer(clock, 100, time => {
      results.add(time)
    }, "RecurringTimerSuite-SPARK-10224")
    timer.start(0)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results.asScala.toSeq === Seq(0L))
    }
    @volatile var lastTime = -1L
    // Now RecurringTimer is waiting for the next interval
    val thread = new Thread {
      override def run(): Unit = {
        lastTime = timer.stop(interruptTimer = false)
      }
    }
    thread.start()
    val stopped = PrivateMethod[RecurringTimer]('stopped)
    // Make sure the `stopped` field has been changed
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(timer.invokePrivate(stopped()) === true)
    }
    clock.advance(200)
    // When RecurringTimer is awake from clock.waitTillTime, it will call `callback` once.
    // Then it will find `stopped` is true and exit the loop, but it should call `callback` again
    // before exiting its internal thread.
    thread.join()
    assert(results.asScala.toSeq === Seq(0L, 100L, 200L))
    assert(lastTime === 200L)
  }
}

Source File: RecurringTimerSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import scala.collection.mutable
import scala.concurrent.duration._

import org.scalatest.PrivateMethodTester
import org.scalatest.concurrent.Eventually._

import org.apache.spark.SparkFunSuite
import org.apache.spark.util.ManualClock

class RecurringTimerSuite extends SparkFunSuite with PrivateMethodTester {

  test("basic") {
    val clock = new ManualClock()
    val results = new mutable.ArrayBuffer[Long]() with mutable.SynchronizedBuffer[Long]
    val timer = new RecurringTimer(clock, 100, time => {
      results += time
    }, "RecurringTimerSuite-basic")
    timer.start(0)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results === Seq(0L))
    }
    clock.advance(100)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results === Seq(0L, 100L))
    }
    clock.advance(200)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results === Seq(0L, 100L, 200L, 300L))
    }
    assert(timer.stop(interruptTimer = true) === 300L)
  }

  test("SPARK-10224: call 'callback' after stopping") {
    val clock = new ManualClock()
    val results = new mutable.ArrayBuffer[Long]() with mutable.SynchronizedBuffer[Long]
    val timer = new RecurringTimer(clock, 100, time => {
      results += time
    }, "RecurringTimerSuite-SPARK-10224")
    timer.start(0)
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(results === Seq(0L))
    }
    @volatile var lastTime = -1L
    // Now RecurringTimer is waiting for the next interval
    val thread = new Thread {
      override def run(): Unit = {
        lastTime = timer.stop(interruptTimer = false)
      }
    }
    thread.start()
    val stopped = PrivateMethod[RecurringTimer]('stopped)
    // Make sure the `stopped` field has been changed
    eventually(timeout(10.seconds), interval(10.millis)) {
      assert(timer.invokePrivate(stopped()) === true)
    }
    clock.advance(200)
    // When RecurringTimer is awake from clock.waitTillTime, it will call `callback` once.
    // Then it will find `stopped` is true and exit the loop, but it should call `callback` again
    // before exiting its internal thread.
    thread.join()
    assert(results === Seq(0L, 100L, 200L))
    assert(lastTime === 200L)
  }
}

org.apache.spark.util.ManualClock Scala Examples