org.apache.spark.streaming.Duration Scala Examples
The following examples show how to use org.apache.spark.streaming.Duration.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: KinesisInputDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kinesis import scala.reflect.ClassTag import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream import com.amazonaws.services.kinesis.model.Record import org.apache.spark.rdd.RDD import org.apache.spark.storage.{BlockId, StorageLevel} import org.apache.spark.streaming.{Duration, StreamingContext, Time} import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.streaming.scheduler.ReceivedBlockInfo private[kinesis] class KinesisInputDStream[T: ClassTag]( _ssc: StreamingContext, streamName: String, endpointUrl: String, regionName: String, initialPositionInStream: InitialPositionInStream, checkpointAppName: String, checkpointInterval: Duration, storageLevel: StorageLevel, messageHandler: Record => T, awsCredentialsOption: Option[SerializableAWSCredentials] ) extends ReceiverInputDStream[T](_ssc) { private[streaming] override def createBlockRDD(time: Time, blockInfos: Seq[ReceivedBlockInfo]): RDD[T] = { // This returns true even for when blockInfos is empty val allBlocksHaveRanges = blockInfos.map { _.metadataOption }.forall(_.nonEmpty) if (allBlocksHaveRanges) { // Create a KinesisBackedBlockRDD, even when there are no blocks val blockIds = blockInfos.map { _.blockId.asInstanceOf[BlockId] }.toArray val seqNumRanges = blockInfos.map { _.metadataOption.get.asInstanceOf[SequenceNumberRanges] }.toArray val isBlockIdValid = blockInfos.map { _.isBlockIdValid() }.toArray logDebug(s"Creating KinesisBackedBlockRDD for $time with ${seqNumRanges.length} " + s"seq number ranges: ${seqNumRanges.mkString(", ")} ") new KinesisBackedBlockRDD( context.sc, regionName, endpointUrl, blockIds, seqNumRanges, isBlockIdValid = isBlockIdValid, retryTimeoutMs = ssc.graph.batchDuration.milliseconds.toInt, messageHandler = messageHandler, awsCredentialsOption = awsCredentialsOption) } else { logWarning("Kinesis sequence number information was not present with some block metadata," + " it may not be possible to recover from failures") super.createBlockRDD(time, blockInfos) } } override def getReceiver(): Receiver[T] = { new KinesisReceiver(streamName, endpointUrl, regionName, initialPositionInStream, checkpointAppName, checkpointInterval, storageLevel, messageHandler, awsCredentialsOption) } }
Example 2
Source File: UnionDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.SparkException import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class UnionDStream[T: ClassTag](parents: Array[DStream[T]]) extends DStream[T](parents.head.ssc) { require(parents.length > 0, "List of DStreams to union is empty") require(parents.map(_.ssc).distinct.length == 1, "Some of the DStreams have different contexts") require(parents.map(_.slideDuration).distinct.length == 1, "Some of the DStreams have different slide durations") override def dependencies: List[DStream[_]] = parents.toList override def slideDuration: Duration = parents.head.slideDuration override def compute(validTime: Time): Option[RDD[T]] = { val rdds = new ArrayBuffer[RDD[T]]() parents.map(_.getOrCompute(validTime)).foreach { case Some(rdd) => rdds += rdd case None => throw new SparkException("Could not generate RDD from a parent for unifying at" + s" time $validTime") } if (rdds.nonEmpty) { Some(ssc.sc.union(rdds)) } else { None } } }
Example 3
Source File: ForEachDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.streaming.scheduler.Job private[streaming] class ForEachDStream[T: ClassTag] ( parent: DStream[T], foreachFunc: (RDD[T], Time) => Unit, displayInnerRDDOps: Boolean ) extends DStream[Unit](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[Unit]] = None override def generateJob(time: Time): Option[Job] = { parent.getOrCompute(time) match { case Some(rdd) => val jobFunc = () => createRDDWithLocalProperties(time, displayInnerRDDOps) { foreachFunc(rdd, time) } Some(new Job(time, jobFunc)) case None => None } } }
Example 4
Source File: FlatMappedDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class FlatMappedDStream[T: ClassTag, U: ClassTag]( parent: DStream[T], flatMapFunc: T => TraversableOnce[U] ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.flatMap(flatMapFunc)) } }
Example 5
Source File: WindowedDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming._ import org.apache.spark.streaming.Duration private[streaming] class WindowedDStream[T: ClassTag]( parent: DStream[T], _windowDuration: Duration, _slideDuration: Duration) extends DStream[T](parent.ssc) { if (!_windowDuration.isMultipleOf(parent.slideDuration)) { throw new Exception("The window duration of windowed DStream (" + _windowDuration + ") " + "must be a multiple of the slide duration of parent DStream (" + parent.slideDuration + ")") } if (!_slideDuration.isMultipleOf(parent.slideDuration)) { throw new Exception("The slide duration of windowed DStream (" + _slideDuration + ") " + "must be a multiple of the slide duration of parent DStream (" + parent.slideDuration + ")") } // Persist parent level by default, as those RDDs are going to be obviously reused. parent.persist(StorageLevel.MEMORY_ONLY_SER) def windowDuration: Duration = _windowDuration override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = _slideDuration override def parentRememberDuration: Duration = rememberDuration + windowDuration override def persist(level: StorageLevel): DStream[T] = { // Do not let this windowed DStream be persisted as windowed (union-ed) RDDs share underlying // RDDs and persisting the windowed RDDs would store numerous copies of the underlying data. // Instead control the persistence of the parent DStream. parent.persist(level) this } override def compute(validTime: Time): Option[RDD[T]] = { val currentWindow = new Interval(validTime - windowDuration + parent.slideDuration, validTime) val rddsInWindow = parent.slice(currentWindow) Some(ssc.sc.union(rddsInWindow)) } }
Example 6
Source File: ShuffledDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.Partitioner import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class ShuffledDStream[K: ClassTag, V: ClassTag, C: ClassTag]( parent: DStream[(K, V)], createCombiner: V => C, mergeValue: (C, V) => C, mergeCombiner: (C, C) => C, partitioner: Partitioner, mapSideCombine: Boolean = true ) extends DStream[(K, C)] (parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, C)]] = { parent.getOrCompute(validTime) match { case Some(rdd) => Some(rdd.combineByKey[C]( createCombiner, mergeValue, mergeCombiner, partitioner, mapSideCombine)) case None => None } } }
Example 7
Source File: FilteredDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class FilteredDStream[T: ClassTag]( parent: DStream[T], filterFunc: T => Boolean ) extends DStream[T](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[T]] = { parent.getOrCompute(validTime).map(_.filter(filterFunc)) } }
Example 8
Source File: FlatMapValuedDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class FlatMapValuedDStream[K: ClassTag, V: ClassTag, U: ClassTag]( parent: DStream[(K, V)], flatMapValueFunc: V => TraversableOnce[U] ) extends DStream[(K, U)](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, U)]] = { parent.getOrCompute(validTime).map(_.flatMapValues[U](flatMapValueFunc)) } }
Example 9
Source File: MapValuedDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class MapValuedDStream[K: ClassTag, V: ClassTag, U: ClassTag]( parent: DStream[(K, V)], mapValueFunc: V => U ) extends DStream[(K, U)](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, U)]] = { parent.getOrCompute(validTime).map(_.mapValues[U](mapValueFunc)) } }
Example 10
Source File: TransformedDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.SparkException import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class TransformedDStream[U: ClassTag] ( parents: Seq[DStream[_]], transformFunc: (Seq[RDD[_]], Time) => RDD[U] ) extends DStream[U](parents.head.ssc) { require(parents.nonEmpty, "List of DStreams to transform is empty") require(parents.map(_.ssc).distinct.size == 1, "Some of the DStreams have different contexts") require(parents.map(_.slideDuration).distinct.size == 1, "Some of the DStreams have different slide durations") override def dependencies: List[DStream[_]] = parents.toList override def slideDuration: Duration = parents.head.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { val parentRDDs = parents.map { parent => parent.getOrCompute(validTime).getOrElse( // Guard out against parent DStream that return None instead of Some(rdd) to avoid NPE throw new SparkException(s"Couldn't generate RDD from parent at time $validTime")) } val transformedRDD = transformFunc(parentRDDs, validTime) if (transformedRDD == null) { throw new SparkException("Transform function must not return null. " + "Return SparkContext.emptyRDD() instead to represent no element " + "as the result of transformation.") } Some(transformedRDD) } override protected[streaming] def createRDDWithLocalProperties[U]( time: Time, displayInnerRDDOps: Boolean)(body: => U): U = { super.createRDDWithLocalProperties(time, displayInnerRDDOps = true)(body) } }
Example 11
Source File: MappedDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class MappedDStream[T: ClassTag, U: ClassTag] ( parent: DStream[T], mapFunc: T => U ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.map[U](mapFunc)) } }
Example 12
Source File: MapPartitionedDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class MapPartitionedDStream[T: ClassTag, U: ClassTag]( parent: DStream[T], mapPartFunc: Iterator[T] => Iterator[U], preservePartitioning: Boolean ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.mapPartitions[U](mapPartFunc, preservePartitioning)) } }
Example 13
Source File: RateEstimator.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.scheduler.rate import org.apache.spark.SparkConf import org.apache.spark.streaming.Duration def create(conf: SparkConf, batchInterval: Duration): RateEstimator = conf.get("spark.streaming.backpressure.rateEstimator", "pid") match { case "pid" => val proportional = conf.getDouble("spark.streaming.backpressure.pid.proportional", 1.0) val integral = conf.getDouble("spark.streaming.backpressure.pid.integral", 0.2) val derived = conf.getDouble("spark.streaming.backpressure.pid.derived", 0.0) val minRate = conf.getDouble("spark.streaming.backpressure.pid.minRate", 100) new PIDRateEstimator(batchInterval.milliseconds, proportional, integral, derived, minRate) case estimator => throw new IllegalArgumentException(s"Unknown rate estimator: $estimator") } }
Example 14
Source File: InputInfoTrackerSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.scheduler import org.scalatest.BeforeAndAfter import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.streaming.{Duration, StreamingContext, Time} class InputInfoTrackerSuite extends SparkFunSuite with BeforeAndAfter { private var ssc: StreamingContext = _ before { val conf = new SparkConf().setMaster("local[2]").setAppName("DirectStreamTacker") if (ssc == null) { ssc = new StreamingContext(conf, Duration(1000)) } } after { if (ssc != null) { ssc.stop() ssc = null } } test("test report and get InputInfo from InputInfoTracker") { val inputInfoTracker = new InputInfoTracker(ssc) val streamId1 = 0 val streamId2 = 1 val time = Time(0L) val inputInfo1 = StreamInputInfo(streamId1, 100L) val inputInfo2 = StreamInputInfo(streamId2, 300L) inputInfoTracker.reportInfo(time, inputInfo1) inputInfoTracker.reportInfo(time, inputInfo2) val batchTimeToInputInfos = inputInfoTracker.getInfo(time) assert(batchTimeToInputInfos.size == 2) assert(batchTimeToInputInfos.keys === Set(streamId1, streamId2)) assert(batchTimeToInputInfos(streamId1) === inputInfo1) assert(batchTimeToInputInfos(streamId2) === inputInfo2) assert(inputInfoTracker.getInfo(time)(streamId1) === inputInfo1) } test("test cleanup InputInfo from InputInfoTracker") { val inputInfoTracker = new InputInfoTracker(ssc) val streamId1 = 0 val inputInfo1 = StreamInputInfo(streamId1, 100L) val inputInfo2 = StreamInputInfo(streamId1, 300L) inputInfoTracker.reportInfo(Time(0), inputInfo1) inputInfoTracker.reportInfo(Time(1), inputInfo2) inputInfoTracker.cleanup(Time(0)) assert(inputInfoTracker.getInfo(Time(0))(streamId1) === inputInfo1) assert(inputInfoTracker.getInfo(Time(1))(streamId1) === inputInfo2) inputInfoTracker.cleanup(Time(1)) assert(inputInfoTracker.getInfo(Time(0)).get(streamId1) === None) assert(inputInfoTracker.getInfo(Time(1))(streamId1) === inputInfo2) } }
Example 15
Source File: TestUpdateStateByKey.scala From spark-dev with GNU General Public License v3.0 | 5 votes |
package examples.streaming import org.apache.spark.streaming.{StreamingContext, Duration} import org.apache.spark.SparkConf object TestUpdateStateByKey { val checkpointDir: String = "hdfs://localhost:9000/user/hduser/spark-chkpt" def main(args: Array[String]): Unit = { val ssc = StreamingContext.getOrCreate(checkpointDir, createFunc _) ssc.start() ssc.awaitTermination() } def updateFunc(values: Seq[Int], state: Option[Int]): Option[Int] = { Some(values.size + state.getOrElse(0)) } def createFunc(): StreamingContext = { val ssc = new StreamingContext(new SparkConf().setAppName("TestUpdateStateByKeyJob"), Duration(2000)) ssc.checkpoint(checkpointDir) ssc.socketTextStream("localhost", 9999) .flatMap(_.split(" ")) .map((_, 1)) .updateStateByKey(updateFunc _) .checkpoint(Duration(10000)) .print() ssc } }
Example 16
Source File: StreamingLifeCycle.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.spark.streaming import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.streaming.{Duration, Seconds, StreamingContext} private[spark] trait StreamingLifeCycle { val timeOut: Long protected[this] val batchDuration: Duration = Seconds(1) val streamingContext = new StreamingContext( new SparkConf() .setMaster("local[4]") .setAppName("StreamingStats") .set("spark.default.parallelism", "4") .set("spark.rdd.compress", "true") .set("spark.executor.memory", "8g") .set("spark.shuffle.spill", "true") .set("spark.shuffle.spill.compress", "true") .set("spark.io.compression.codec", "lzf"), Seconds(2) ) def sparkContext: SparkContext = streamingContext.sparkContext def start: Unit = streamingContext.start def terminate: Unit = { streamingContext.stop(true, true) streamingContext.awaitTerminationOrTimeout(timeOut) } } // ---------------------------------------------- EOF --------------------------------
Example 17
Source File: SpartaWorkflow.scala From sparta with Apache License 2.0 | 5 votes |
package com.stratio.sparta.driver import com.stratio.sparta.driver.factory.SparkContextFactory._ import com.stratio.sparta.driver.schema.SchemaHelper import com.stratio.sparta.driver.stage._ import com.stratio.sparta.sdk.pipeline.input.Input import com.stratio.sparta.sdk.utils.AggregationTime import com.stratio.sparta.serving.core.helpers.PolicyHelper import com.stratio.sparta.serving.core.models.policy._ import com.stratio.sparta.serving.core.utils.CheckpointUtils import org.apache.curator.framework.CuratorFramework import org.apache.spark.streaming.{Duration, StreamingContext} class SpartaWorkflow(val policy: PolicyModel, val curatorFramework: CuratorFramework) extends CheckpointUtils with InputStage with OutputStage with ParserStage with CubeStage with RawDataStage with TriggerStage with ZooKeeperError { clearError() private val ReflectionUtils = PolicyHelper.ReflectionUtils private val outputs = outputStage(ReflectionUtils) private var input: Option[Input] = None def setup(): Unit = { input.foreach(input => input.setUp()) outputs.foreach(output => output.setUp()) } def cleanUp(): Unit = { input.foreach(input => input.cleanUp()) outputs.foreach(output => output.cleanUp()) } def streamingStages(): StreamingContext = { clearError() val checkpointPolicyPath = checkpointPath(policy) val window = AggregationTime.parseValueToMilliSeconds(policy.sparkStreamingWindow) val ssc = sparkStreamingInstance(Duration(window), checkpointPolicyPath, policy.remember) if(input.isEmpty) input = Option(createInput(ssc.get, ReflectionUtils)) val inputDStream = inputStreamStage(ssc.get, input.get) saveRawData(policy.rawData, inputDStream, outputs) policy.transformations.foreach { transformationsModel => val parserSchemas = SchemaHelper.getSchemasFromTransformations( transformationsModel.transformationsPipe, Input.InitSchema) val (parsers, writerOptions) = parserStage(ReflectionUtils, parserSchemas) val parsedData = ParserStage.applyParsers( inputDStream, parsers, parserSchemas.values.last, outputs, writerOptions) triggersStreamStage(parserSchemas.values.last, parsedData, outputs, window) cubesStreamStage(ReflectionUtils, parserSchemas.values.last, parsedData, outputs) } ssc.get } } object SpartaWorkflow { def apply(policy: PolicyModel, curatorFramework: CuratorFramework): SpartaWorkflow = new SpartaWorkflow(policy, curatorFramework) }
Example 18
Source File: SparkContextFactoryTest.scala From sparta with Apache License 2.0 | 5 votes |
package com.stratio.sparta.driver.test.factory import com.stratio.sparta.driver.factory.SparkContextFactory import com.stratio.sparta.serving.core.config.SpartaConfig import com.stratio.sparta.serving.core.helpers.PolicyHelper import com.typesafe.config.ConfigFactory import org.apache.spark.streaming.Duration import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner import org.scalatest.{BeforeAndAfterAll, FlatSpec, _} @RunWith(classOf[JUnitRunner]) class SparkContextFactoryTest extends FlatSpec with ShouldMatchers with BeforeAndAfterAll { self: FlatSpec => override def afterAll { SparkContextFactory.destroySparkContext() } trait WithConfig { val config = SpartaConfig.initConfig("sparta.local") val wrongConfig = ConfigFactory.empty val seconds = 6 val batchDuraction = Duration(seconds) val specificConfig = Map("spark.driver.allowMultipleContexts" -> "true") ++ PolicyHelper.getSparkConfFromProps(config.get) } "SparkContextFactorySpec" should "fails when properties is missing" in new WithConfig { an[Exception] should be thrownBy SparkContextFactory.sparkStandAloneContextInstance( Map.empty[String, String], Seq()) } it should "create and reuse same context" in new WithConfig { val sc = SparkContextFactory.sparkStandAloneContextInstance(specificConfig, Seq()) val otherSc = SparkContextFactory.sparkStandAloneContextInstance(specificConfig, Seq()) sc should be equals (otherSc) SparkContextFactory.destroySparkContext() } it should "create and reuse same SparkSession" in new WithConfig { val sc = SparkContextFactory.sparkStandAloneContextInstance(specificConfig, Seq()) val sqc = SparkContextFactory.sparkSessionInstance sqc shouldNot be equals (null) val otherSqc = SparkContextFactory.sparkSessionInstance sqc should be equals (otherSqc) SparkContextFactory.destroySparkContext() } it should "create and reuse same SparkStreamingContext" in new WithConfig { val checkpointDir = "checkpoint/SparkContextFactorySpec" val sc = SparkContextFactory.sparkStandAloneContextInstance(specificConfig, Seq()) val ssc = SparkContextFactory.sparkStreamingInstance(batchDuraction, checkpointDir, None) ssc shouldNot be equals (None) val otherSsc = SparkContextFactory.sparkStreamingInstance(batchDuraction, checkpointDir, None) ssc should be equals (otherSsc) } }
Example 19
Source File: KinesisInputDStream.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kinesis import scala.reflect.ClassTag import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream import com.amazonaws.services.kinesis.model.Record import org.apache.spark.rdd.RDD import org.apache.spark.storage.{BlockId, StorageLevel} import org.apache.spark.streaming.{Duration, StreamingContext, Time} import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.streaming.scheduler.ReceivedBlockInfo private[kinesis] class KinesisInputDStream[T: ClassTag]( _ssc: StreamingContext, streamName: String, endpointUrl: String, regionName: String, initialPositionInStream: InitialPositionInStream, checkpointAppName: String, checkpointInterval: Duration, storageLevel: StorageLevel, messageHandler: Record => T, awsCredentialsOption: Option[SerializableAWSCredentials] ) extends ReceiverInputDStream[T](_ssc) { private[streaming] override def createBlockRDD(time: Time, blockInfos: Seq[ReceivedBlockInfo]): RDD[T] = { // This returns true even for when blockInfos is empty val allBlocksHaveRanges = blockInfos.map { _.metadataOption }.forall(_.nonEmpty) if (allBlocksHaveRanges) { // Create a KinesisBackedBlockRDD, even when there are no blocks val blockIds = blockInfos.map { _.blockId.asInstanceOf[BlockId] }.toArray val seqNumRanges = blockInfos.map { _.metadataOption.get.asInstanceOf[SequenceNumberRanges] }.toArray val isBlockIdValid = blockInfos.map { _.isBlockIdValid() }.toArray logDebug(s"Creating KinesisBackedBlockRDD for $time with ${seqNumRanges.length} " + s"seq number ranges: ${seqNumRanges.mkString(", ")} ") new KinesisBackedBlockRDD( context.sc, regionName, endpointUrl, blockIds, seqNumRanges, isBlockIdValid = isBlockIdValid, retryTimeoutMs = ssc.graph.batchDuration.milliseconds.toInt, messageHandler = messageHandler, awsCredentialsOption = awsCredentialsOption) } else { logWarning("Kinesis sequence number information was not present with some block metadata," + " it may not be possible to recover from failures") super.createBlockRDD(time, blockInfos) } } override def getReceiver(): Receiver[T] = { new KinesisReceiver(streamName, endpointUrl, regionName, initialPositionInStream, checkpointAppName, checkpointInterval, storageLevel, messageHandler, awsCredentialsOption) } }
Example 20
Source File: UnionDStream.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.SparkException import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class UnionDStream[T: ClassTag](parents: Array[DStream[T]]) extends DStream[T](parents.head.ssc) { require(parents.length > 0, "List of DStreams to union is empty") require(parents.map(_.ssc).distinct.length == 1, "Some of the DStreams have different contexts") require(parents.map(_.slideDuration).distinct.length == 1, "Some of the DStreams have different slide durations") override def dependencies: List[DStream[_]] = parents.toList override def slideDuration: Duration = parents.head.slideDuration override def compute(validTime: Time): Option[RDD[T]] = { val rdds = new ArrayBuffer[RDD[T]]() parents.map(_.getOrCompute(validTime)).foreach { case Some(rdd) => rdds += rdd case None => throw new SparkException("Could not generate RDD from a parent for unifying at" + s" time $validTime") } if (rdds.nonEmpty) { Some(ssc.sc.union(rdds)) } else { None } } }
Example 21
Source File: ForEachDStream.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.streaming.scheduler.Job private[streaming] class ForEachDStream[T: ClassTag] ( parent: DStream[T], foreachFunc: (RDD[T], Time) => Unit, displayInnerRDDOps: Boolean ) extends DStream[Unit](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[Unit]] = None override def generateJob(time: Time): Option[Job] = { parent.getOrCompute(time) match { case Some(rdd) => val jobFunc = () => createRDDWithLocalProperties(time, displayInnerRDDOps) { foreachFunc(rdd, time) } Some(new Job(time, jobFunc)) case None => None } } }
Example 22
Source File: FlatMappedDStream.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class FlatMappedDStream[T: ClassTag, U: ClassTag]( parent: DStream[T], flatMapFunc: T => TraversableOnce[U] ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.flatMap(flatMapFunc)) } }
Example 23
Source File: WindowedDStream.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming._ import org.apache.spark.streaming.Duration private[streaming] class WindowedDStream[T: ClassTag]( parent: DStream[T], _windowDuration: Duration, _slideDuration: Duration) extends DStream[T](parent.ssc) { if (!_windowDuration.isMultipleOf(parent.slideDuration)) { throw new Exception("The window duration of windowed DStream (" + _windowDuration + ") " + "must be a multiple of the slide duration of parent DStream (" + parent.slideDuration + ")") } if (!_slideDuration.isMultipleOf(parent.slideDuration)) { throw new Exception("The slide duration of windowed DStream (" + _slideDuration + ") " + "must be a multiple of the slide duration of parent DStream (" + parent.slideDuration + ")") } // Persist parent level by default, as those RDDs are going to be obviously reused. parent.persist(StorageLevel.MEMORY_ONLY_SER) def windowDuration: Duration = _windowDuration override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = _slideDuration override def parentRememberDuration: Duration = rememberDuration + windowDuration override def persist(level: StorageLevel): DStream[T] = { // Do not let this windowed DStream be persisted as windowed (union-ed) RDDs share underlying // RDDs and persisting the windowed RDDs would store numerous copies of the underlying data. // Instead control the persistence of the parent DStream. parent.persist(level) this } override def compute(validTime: Time): Option[RDD[T]] = { val currentWindow = new Interval(validTime - windowDuration + parent.slideDuration, validTime) val rddsInWindow = parent.slice(currentWindow) Some(ssc.sc.union(rddsInWindow)) } }
Example 24
Source File: ShuffledDStream.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.Partitioner import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class ShuffledDStream[K: ClassTag, V: ClassTag, C: ClassTag]( parent: DStream[(K, V)], createCombiner: V => C, mergeValue: (C, V) => C, mergeCombiner: (C, C) => C, partitioner: Partitioner, mapSideCombine: Boolean = true ) extends DStream[(K, C)] (parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, C)]] = { parent.getOrCompute(validTime) match { case Some(rdd) => Some(rdd.combineByKey[C]( createCombiner, mergeValue, mergeCombiner, partitioner, mapSideCombine)) case None => None } } }
Example 25
Source File: FilteredDStream.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class FilteredDStream[T: ClassTag]( parent: DStream[T], filterFunc: T => Boolean ) extends DStream[T](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[T]] = { parent.getOrCompute(validTime).map(_.filter(filterFunc)) } }
Example 26
Source File: FlatMapValuedDStream.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class FlatMapValuedDStream[K: ClassTag, V: ClassTag, U: ClassTag]( parent: DStream[(K, V)], flatMapValueFunc: V => TraversableOnce[U] ) extends DStream[(K, U)](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, U)]] = { parent.getOrCompute(validTime).map(_.flatMapValues[U](flatMapValueFunc)) } }
Example 27
Source File: MapValuedDStream.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class MapValuedDStream[K: ClassTag, V: ClassTag, U: ClassTag]( parent: DStream[(K, V)], mapValueFunc: V => U ) extends DStream[(K, U)](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, U)]] = { parent.getOrCompute(validTime).map(_.mapValues[U](mapValueFunc)) } }
Example 28
Source File: TransformedDStream.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.SparkException import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class TransformedDStream[U: ClassTag] ( parents: Seq[DStream[_]], transformFunc: (Seq[RDD[_]], Time) => RDD[U] ) extends DStream[U](parents.head.ssc) { require(parents.nonEmpty, "List of DStreams to transform is empty") require(parents.map(_.ssc).distinct.size == 1, "Some of the DStreams have different contexts") require(parents.map(_.slideDuration).distinct.size == 1, "Some of the DStreams have different slide durations") override def dependencies: List[DStream[_]] = parents.toList override def slideDuration: Duration = parents.head.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { val parentRDDs = parents.map { parent => parent.getOrCompute(validTime).getOrElse( // Guard out against parent DStream that return None instead of Some(rdd) to avoid NPE throw new SparkException(s"Couldn't generate RDD from parent at time $validTime")) } val transformedRDD = transformFunc(parentRDDs, validTime) if (transformedRDD == null) { throw new SparkException("Transform function must not return null. " + "Return SparkContext.emptyRDD() instead to represent no element " + "as the result of transformation.") } Some(transformedRDD) } override protected[streaming] def createRDDWithLocalProperties[U]( time: Time, displayInnerRDDOps: Boolean)(body: => U): U = { super.createRDDWithLocalProperties(time, displayInnerRDDOps = true)(body) } }
Example 29
Source File: MappedDStream.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class MappedDStream[T: ClassTag, U: ClassTag] ( parent: DStream[T], mapFunc: T => U ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.map[U](mapFunc)) } }
Example 30
Source File: MapPartitionedDStream.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class MapPartitionedDStream[T: ClassTag, U: ClassTag]( parent: DStream[T], mapPartFunc: Iterator[T] => Iterator[U], preservePartitioning: Boolean ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.mapPartitions[U](mapPartFunc, preservePartitioning)) } }
Example 31
Source File: RateEstimator.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.scheduler.rate import org.apache.spark.SparkConf import org.apache.spark.streaming.Duration def create(conf: SparkConf, batchInterval: Duration): RateEstimator = conf.get("spark.streaming.backpressure.rateEstimator", "pid") match { case "pid" => val proportional = conf.getDouble("spark.streaming.backpressure.pid.proportional", 1.0) val integral = conf.getDouble("spark.streaming.backpressure.pid.integral", 0.2) val derived = conf.getDouble("spark.streaming.backpressure.pid.derived", 0.0) val minRate = conf.getDouble("spark.streaming.backpressure.pid.minRate", 100) new PIDRateEstimator(batchInterval.milliseconds, proportional, integral, derived, minRate) case estimator => throw new IllegalArgumentException(s"Unknown rate estimator: $estimator") } }
Example 32
Source File: InputInfoTrackerSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.scheduler import org.scalatest.BeforeAndAfter import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.streaming.{Duration, StreamingContext, Time} class InputInfoTrackerSuite extends SparkFunSuite with BeforeAndAfter { private var ssc: StreamingContext = _ before { val conf = new SparkConf().setMaster("local[2]").setAppName("DirectStreamTacker") if (ssc == null) { ssc = new StreamingContext(conf, Duration(1000)) } } after { if (ssc != null) { ssc.stop() ssc = null } } test("test report and get InputInfo from InputInfoTracker") { val inputInfoTracker = new InputInfoTracker(ssc) val streamId1 = 0 val streamId2 = 1 val time = Time(0L) val inputInfo1 = StreamInputInfo(streamId1, 100L) val inputInfo2 = StreamInputInfo(streamId2, 300L) inputInfoTracker.reportInfo(time, inputInfo1) inputInfoTracker.reportInfo(time, inputInfo2) val batchTimeToInputInfos = inputInfoTracker.getInfo(time) assert(batchTimeToInputInfos.size == 2) assert(batchTimeToInputInfos.keys === Set(streamId1, streamId2)) assert(batchTimeToInputInfos(streamId1) === inputInfo1) assert(batchTimeToInputInfos(streamId2) === inputInfo2) assert(inputInfoTracker.getInfo(time)(streamId1) === inputInfo1) } test("test cleanup InputInfo from InputInfoTracker") { val inputInfoTracker = new InputInfoTracker(ssc) val streamId1 = 0 val inputInfo1 = StreamInputInfo(streamId1, 100L) val inputInfo2 = StreamInputInfo(streamId1, 300L) inputInfoTracker.reportInfo(Time(0), inputInfo1) inputInfoTracker.reportInfo(Time(1), inputInfo2) inputInfoTracker.cleanup(Time(0)) assert(inputInfoTracker.getInfo(Time(0))(streamId1) === inputInfo1) assert(inputInfoTracker.getInfo(Time(1))(streamId1) === inputInfo2) inputInfoTracker.cleanup(Time(1)) assert(inputInfoTracker.getInfo(Time(0)).get(streamId1) === None) assert(inputInfoTracker.getInfo(Time(1))(streamId1) === inputInfo2) } }
Example 33
Source File: SharedStreamingContext.scala From sscheck with Apache License 2.0 | 5 votes |
package es.ucm.fdi.sscheck.spark.streaming import org.apache.spark.streaming.{StreamingContext,Duration} import org.slf4j.LoggerFactory import scala.util.Try import es.ucm.fdi.sscheck.spark.SharedSparkContext trait SharedStreamingContext extends SharedSparkContext { // cannot use private[this] due to https://issues.scala-lang.org/browse/SI-8087 // @transient private[this] val logger = Logger(LoggerFactory.getLogger("SharedStreamingContext")) @transient private val logger = LoggerFactory.getLogger("SharedStreamingContext") ssc.stop(stopSparkContext=false, stopGracefully=false) } recover { case _ => { logger.warn("second attempt forcing stop of test Spark Streaming context") ssc.stop(stopSparkContext=false, stopGracefully=false) } } _ssc = None } if (stopSparkContext) { super[SharedSparkContext].close() } } }
Example 34
Source File: StreamingFormulaDemo1.scala From sscheck with Apache License 2.0 | 5 votes |
package es.ucm.fdi.sscheck.spark.demo import org.junit.runner.RunWith import org.specs2.runner.JUnitRunner import org.specs2.ScalaCheck import org.specs2.Specification import org.specs2.matcher.ResultMatchers import org.scalacheck.Arbitrary.arbitrary import org.apache.spark.rdd.RDD import org.apache.spark.streaming.Duration import org.apache.spark.streaming.dstream.DStream import es.ucm.fdi.sscheck.spark.streaming.SharedStreamingContextBeforeAfterEach import es.ucm.fdi.sscheck.prop.tl.{Formula,DStreamTLProperty} import es.ucm.fdi.sscheck.prop.tl.Formula._ import es.ucm.fdi.sscheck.gen.{PDStreamGen,BatchGen} @RunWith(classOf[JUnitRunner]) class StreamingFormulaDemo1 extends Specification with DStreamTLProperty with ResultMatchers with ScalaCheck { // Spark configuration override def sparkMaster : String = "local[*]" override def batchDuration = Duration(150) override def defaultParallelism = 4 def is = sequential ^ s2""" Simple demo Specs2 example for ScalaCheck properties with temporal formulas on Spark Streaming programs - where a simple property for DStream.count is a success ${countForallAlwaysProp(_.count)} - where a faulty implementation of the DStream.count is detected ${countForallAlwaysProp(faultyCount) must beFailing} """ def faultyCount(ds : DStream[Double]) : DStream[Long] = ds.count.transform(_.map(_ - 1)) def countForallAlwaysProp(testSubject : DStream[Double] => DStream[Long]) = { type U = (RDD[Double], RDD[Long]) val (inBatch, transBatch) = ((_ : U)._1, (_ : U)._2) val numBatches = 10 val formula : Formula[U] = always { (u : U) => transBatch(u).count === 1 and inBatch(u).count === transBatch(u).first } during numBatches val gen = BatchGen.always(BatchGen.ofNtoM(10, 50, arbitrary[Double]), numBatches) forAllDStream( gen)( testSubject)( formula) }.set(minTestsOk = 10).verbose }
Example 35
Source File: StreamingFormulaDemo2.scala From sscheck with Apache License 2.0 | 5 votes |
package es.ucm.fdi.sscheck.spark.demo import org.junit.runner.RunWith import org.specs2.runner.JUnitRunner import org.specs2.ScalaCheck import org.specs2.Specification import org.specs2.matcher.ResultMatchers import org.scalacheck.Arbitrary.arbitrary import org.scalacheck.Gen import org.apache.spark.rdd.RDD import org.apache.spark.streaming.Duration import org.apache.spark.streaming.dstream.DStream import org.apache.spark.streaming.dstream.DStream._ import scalaz.syntax.std.boolean._ import es.ucm.fdi.sscheck.spark.streaming.SharedStreamingContextBeforeAfterEach import es.ucm.fdi.sscheck.prop.tl.{Formula,DStreamTLProperty} import es.ucm.fdi.sscheck.prop.tl.Formula._ import es.ucm.fdi.sscheck.gen.{PDStreamGen,BatchGen} import es.ucm.fdi.sscheck.gen.BatchGenConversions._ import es.ucm.fdi.sscheck.gen.PDStreamGenConversions._ import es.ucm.fdi.sscheck.matcher.specs2.RDDMatchers._ @RunWith(classOf[JUnitRunner]) class StreamingFormulaDemo2 extends Specification with DStreamTLProperty with ResultMatchers with ScalaCheck { // Spark configuration override def sparkMaster : String = "local[*]" override def batchDuration = Duration(300) override def defaultParallelism = 3 override def enableCheckpointing = true def is = sequential ^ s2""" Check process to persistently detect and ban bad users - where a stateful implementation extracts the banned users correctly ${checkExtractBannedUsersList(listBannedUsers)} - where a trivial implementation ${checkExtractBannedUsersList(statelessListBannedUsers) must beFailing} """ type UserId = Long def listBannedUsers(ds : DStream[(UserId, Boolean)]) : DStream[UserId] = ds.updateStateByKey((flags : Seq[Boolean], maybeFlagged : Option[Unit]) => maybeFlagged match { case Some(_) => maybeFlagged case None => flags.contains(false) option {()} } ).transform(_.keys) def statelessListBannedUsers(ds : DStream[(UserId, Boolean)]) : DStream[UserId] = ds.map(_._1) def checkExtractBannedUsersList(testSubject : DStream[(UserId, Boolean)] => DStream[UserId]) = { val batchSize = 20 val (headTimeout, tailTimeout, nestedTimeout) = (10, 10, 5) val (badId, ids) = (15L, Gen.choose(1L, 50L)) val goodBatch = BatchGen.ofN(batchSize, ids.map((_, true))) val badBatch = goodBatch + BatchGen.ofN(1, (badId, false)) val gen = BatchGen.until(goodBatch, badBatch, headTimeout) ++ BatchGen.always(Gen.oneOf(goodBatch, badBatch), tailTimeout) type U = (RDD[(UserId, Boolean)], RDD[UserId]) val (inBatch, outBatch) = ((_ : U)._1, (_ : U)._2) val formula = { val badInput = at(inBatch)(_ should existsRecord(_ == (badId, false))) val allGoodInputs = at(inBatch)(_ should foreachRecord(_._2 == true)) val noIdBanned = at(outBatch)(_.isEmpty) val badIdBanned = at(outBatch)(_ should existsRecord(_ == badId)) ( ( allGoodInputs and noIdBanned ) until badIdBanned on headTimeout ) and ( always { badInput ==> (always(badIdBanned) during nestedTimeout) } during tailTimeout ) } forAllDStream( gen)( testSubject)( formula) }.set(minTestsOk = 10).verbose }
Example 36
Source File: SimpleStreamingFormulas.scala From sscheck with Apache License 2.0 | 5 votes |
package es.ucm.fdi.sscheck.spark.simple import org.junit.runner.RunWith import org.specs2.runner.JUnitRunner import org.specs2.matcher.ResultMatchers import org.scalacheck.Arbitrary.arbitrary import org.apache.spark.rdd.RDD import org.apache.spark.streaming.Duration import org.apache.spark.streaming.dstream.DStream import es.ucm.fdi.sscheck.spark.streaming.SharedStreamingContextBeforeAfterEach import es.ucm.fdi.sscheck.prop.tl.{Formula,DStreamTLProperty} import es.ucm.fdi.sscheck.prop.tl.Formula._ import es.ucm.fdi.sscheck.matcher.specs2.RDDMatchers._ import es.ucm.fdi.sscheck.gen.{PDStreamGen,BatchGen} import org.scalacheck.Gen import es.ucm.fdi.sscheck.gen.PDStream import es.ucm.fdi.sscheck.gen.Batch @RunWith(classOf[JUnitRunner]) class SimpleStreamingFormulas extends org.specs2.Specification with DStreamTLProperty with org.specs2.ScalaCheck { // Spark configuration override def sparkMaster : String = "local[*]" override def batchDuration = Duration(50) override def defaultParallelism = 4 def is = sequential ^ s2""" Simple demo Specs2 example for ScalaCheck properties with temporal formulas on Spark Streaming programs - Given a stream of integers When we filter out negative numbers Then we get only numbers greater or equal to zero $filterOutNegativeGetGeqZero - where time increments for each batch $timeIncreasesMonotonically """ def filterOutNegativeGetGeqZero = { type U = (RDD[Int], RDD[Int]) val numBatches = 10 val gen = BatchGen.always(BatchGen.ofNtoM(10, 50, arbitrary[Int]), numBatches) val formula = always(nowTime[U]{ (letter, time) => val (_input, output) = letter output should foreachRecord {_ >= 0} }) during numBatches forAllDStream( gen)( _.filter{ x => !(x < 0)})( formula) }.set(minTestsOk = 50).verbose def timeIncreasesMonotonically = { type U = (RDD[Int], RDD[Int]) val numBatches = 10 val gen = BatchGen.always(BatchGen.ofNtoM(10, 50, arbitrary[Int])) val formula = always(nextTime[U]{ (letter, time) => nowTime[U]{ (nextLetter, nextTime) => time.millis <= nextTime.millis } }) during numBatches-1 forAllDStream( gen)( identity[DStream[Int]])( formula) }.set(minTestsOk = 10).verbose }
Example 37
Source File: SharedStreamingContextBeforeAfterEachTest.scala From sscheck with Apache License 2.0 | 5 votes |
package es.ucm.fdi.sscheck.spark.streaming import org.junit.runner.RunWith import org.specs2.runner.JUnitRunner import org.specs2.execute.Result import org.apache.spark.streaming.Duration import org.apache.spark.rdd.RDD import scala.collection.mutable.Queue import scala.concurrent.duration._ import org.slf4j.LoggerFactory import es.ucm.fdi.sscheck.matcher.specs2.RDDMatchers._ // sbt "test-only es.ucm.fdi.sscheck.spark.streaming.SharedStreamingContextBeforeAfterEachTest" @RunWith(classOf[JUnitRunner]) class SharedStreamingContextBeforeAfterEachTest extends org.specs2.Specification with org.specs2.matcher.MustThrownExpectations with org.specs2.matcher.ResultMatchers with SharedStreamingContextBeforeAfterEach { // cannot use private[this] due to https://issues.scala-lang.org/browse/SI-8087 @transient private val logger = LoggerFactory.getLogger("SharedStreamingContextBeforeAfterEachTest") // Spark configuration override def sparkMaster : String = "local[5]" override def batchDuration = Duration(250) override def defaultParallelism = 3 override def enableCheckpointing = false // as queueStream doesn't support checkpointing def is = sequential ^ s2""" Simple test for SharedStreamingContextBeforeAfterEach where a simple queueStream test must be successful $successfulSimpleQueueStreamTest where a simple queueStream test can also fail $failingSimpleQueueStreamTest """ def successfulSimpleQueueStreamTest = simpleQueueStreamTest(expectedCount = 0) def failingSimpleQueueStreamTest = simpleQueueStreamTest(expectedCount = 1) must beFailing def simpleQueueStreamTest(expectedCount : Int) : Result = { val record = "hola" val batches = Seq.fill(5)(Seq.fill(10)(record)) val queue = new Queue[RDD[String]] queue ++= batches.map(batch => sc.parallelize(batch, numSlices = defaultParallelism)) val inputDStream = ssc.queueStream(queue, oneAtATime = true) val sizesDStream = inputDStream.map(_.length) var batchCount = 0 // NOTE wrapping assertions with a Result object is needed // to avoid the Spark Streaming runtime capturing the exceptions // from failing assertions var result : Result = ok inputDStream.foreachRDD { rdd => batchCount += 1 println(s"completed batch number $batchCount: ${rdd.collect.mkString(",")}") result = result and { rdd.filter(_!= record).count() === expectedCount rdd should existsRecord(_ == "hola") } } sizesDStream.foreachRDD { rdd => result = result and { rdd should foreachRecord(record.length)(len => _ == len) } } // should only start the dstream after all the transformations and actions have been defined ssc.start() // wait for completion of batches.length batches StreamingContextUtils.awaitForNBatchesCompleted(batches.length, atMost = 10 seconds)(ssc) result } }
Example 38
Source File: ScalaCheckStreamingTest.scala From sscheck with Apache License 2.0 | 5 votes |
package es.ucm.fdi.sscheck.spark.streaming import org.junit.runner.RunWith import org.specs2.runner.JUnitRunner import org.specs2.ScalaCheck import org.specs2.execute.{AsResult, Result} import org.scalacheck.{Prop, Gen} import org.scalacheck.Arbitrary.arbitrary import org.apache.spark._ import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration} import org.apache.spark.streaming.dstream.DStream import es.ucm.fdi.sscheck.prop.tl.Formula._ import es.ucm.fdi.sscheck.prop.tl.DStreamTLProperty import es.ucm.fdi.sscheck.matcher.specs2.RDDMatchers._ @RunWith(classOf[JUnitRunner]) class ScalaCheckStreamingTest extends org.specs2.Specification with DStreamTLProperty with org.specs2.matcher.ResultMatchers with ScalaCheck { override def sparkMaster : String = "local[5]" override def batchDuration = Duration(350) override def defaultParallelism = 4 def is = sequential ^ s2""" Simple properties for Spark Streaming - where the first property is a success $prop1 - where a simple property for DStream.count is a success ${countProp(_.count)} - where a faulty implementation of the DStream.count is detected ${countProp(faultyCount) must beFailing} """ def prop1 = { val batchSize = 30 val numBatches = 10 val dsgenSeqSeq1 = { val zeroSeqSeq = Gen.listOfN(numBatches, Gen.listOfN(batchSize, 0)) val oneSeqSeq = Gen.listOfN(numBatches, Gen.listOfN(batchSize, 1)) Gen.oneOf(zeroSeqSeq, oneSeqSeq) } type U = (RDD[Int], RDD[Int]) forAllDStream[Int, Int]( "inputDStream" |: dsgenSeqSeq1)( (inputDs : DStream[Int]) => { val transformedDs = inputDs.map(_+1) transformedDs })(always ((u : U) => { val (inputBatch, transBatch) = u inputBatch.count === batchSize and inputBatch.count === transBatch.count and (inputBatch.intersection(transBatch).isEmpty should beTrue) and ( inputBatch should foreachRecord(_ == 0) or (inputBatch should foreachRecord(_ == 1)) ) }) during numBatches )}.set(minTestsOk = 10).verbose def faultyCount(ds : DStream[Double]) : DStream[Long] = ds.count.transform(_.map(_ - 1)) def countProp(testSubject : DStream[Double] => DStream[Long]) = { type U = (RDD[Double], RDD[Long]) val numBatches = 10 forAllDStream[Double, Long]( Gen.listOfN(numBatches, Gen.listOfN(30, arbitrary[Double])))( testSubject )(always ((u : U) => { val (inputBatch, transBatch) = u transBatch.count === 1 and inputBatch.count === transBatch.first }) during numBatches )}.set(minTestsOk = 10).verbose }
Example 39
Source File: AMQPServerStreamSuite.scala From streaming-amqp with Apache License 2.0 | 5 votes |
package io.radanalytics.streaming.amqp import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.amqp.AMQPUtils import org.apache.spark.streaming.{Duration, Seconds, StreamingContext} import org.apache.spark.{SparkConf, SparkFunSuite} import org.scalatest.BeforeAndAfter import org.scalatest.concurrent.Eventually import scala.concurrent.duration._ class AMQPServerStreamSuite extends SparkFunSuite with Eventually with BeforeAndAfter { private val batchDuration: Duration = Seconds(1) private val master: String = "local[2]" private val appName: String = this.getClass().getSimpleName() private val address: String = "my_address" private val checkpointDir: String = "/tmp/spark-streaming-amqp-tests" private var conf: SparkConf = _ private var ssc: StreamingContext = _ private var amqpTestUtils: AMQPTestUtils = _ before { conf = new SparkConf().setMaster(master).setAppName(appName) conf.set("spark.streaming.receiver.writeAheadLog.enable", "true") ssc = new StreamingContext(conf, batchDuration) ssc.checkpoint(checkpointDir) amqpTestUtils = new AMQPTestUtils() amqpTestUtils.setup() } after { if (ssc != null) { ssc.stop() } if (amqpTestUtils != null) { amqpTestUtils.teardown() } } test("AMQP receive server") { val sendMessage = "Spark Streaming & AMQP" val max = 10 val delay = 100l amqpTestUtils.startAMQPServer(sendMessage, max, delay) val converter = new AMQPBodyFunction[String] val receiveStream = AMQPUtils.createStream(ssc, amqpTestUtils.host, amqpTestUtils.port, amqpTestUtils.username, amqpTestUtils.password, address, converter, StorageLevel.MEMORY_ONLY) var receivedMessage: List[String] = List() receiveStream.foreachRDD(rdd => { if (!rdd.isEmpty()) { receivedMessage = receivedMessage ::: rdd.collect().toList } }) ssc.start() eventually(timeout(10000 milliseconds), interval(1000 milliseconds)) { assert(receivedMessage.length == max) } ssc.stop() amqpTestUtils.stopAMQPServer() } }
Example 40
Source File: StreamToStreamWindowJoin.scala From spark-cep with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming.examples import org.apache.spark.sql.SQLContext import org.apache.spark.sql.streaming.StreamSQLContext import org.apache.spark.streaming.{Duration, StreamingContext} import org.apache.spark.streaming.dstream.ConstantInputDStream object StreamToStreamWindowJoin { case class User(id: Int, name: String) def main(args: Array[String]): Unit = { val ssc = new StreamingContext("local[10]", "test", Duration(3000)) val sc = ssc.sparkContext val streamSqlContext = new StreamSQLContext(ssc, new SQLContext(sc)) import streamSqlContext._ val userRDD1 = sc.parallelize(1 to 10).map(i => User(i / 2, s"$i")) val userStream1 = new ConstantInputDStream[User](ssc, userRDD1) streamSqlContext.registerDStreamAsTable(userStream1, "user1") val userRDD2 = sc.parallelize(1 to 10).map(i => User(i / 5, s"$i")) val userStream2 = new ConstantInputDStream[User](ssc, userRDD2) registerDStreamAsTable(userStream2, "user2") sql( """ |SELECT * FROM |user1 OVER (WINDOW '9' SECONDS, SLIDE '6' SECONDS) AS u |JOIN |user2 OVER (WINDOW '9' SECONDS, SLIDE '6' SECONDS) AS v |on u.id = v.id |WHERE u.id > 1 and u.id < 3 and v.id > 1 and v.id < 3 """.stripMargin) .foreachRDD { r => r.foreach(println) } ssc.start() ssc.awaitTerminationOrTimeout(18 * 1000) ssc.stop() } }
Example 41
Source File: UdfEnabledQuery.scala From spark-cep with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming.examples import org.apache.spark.sql.SQLContext import org.apache.spark.sql.streaming.StreamSQLContext import org.apache.spark.streaming.{Duration, StreamingContext} import org.apache.spark.streaming.dstream.ConstantInputDStream object UdfEnabledQuery { case class SingleWord(word: String) def main(args: Array[String]): Unit = { val ssc = new StreamingContext("local[10]", "test", Duration(3000)) val sc = ssc.sparkContext val streamSqlContext = new StreamSQLContext(ssc, new SQLContext(sc)) import streamSqlContext._ val dummyRDD = sc.parallelize(1 to 100).map(i => SingleWord(s"$i")) val dummyStream = new ConstantInputDStream[SingleWord](ssc, dummyRDD) registerDStreamAsTable(dummyStream, "test") streamSqlContext.udf.register("IsEven", (word: String) => { val number = word.toInt if (number % 2 == 0) { "even number" } else { "odd number" } }) sql("SELECT IsEven(word) FROM test").foreachRDD { r => r.foreach(println) } ssc.start() ssc.awaitTerminationOrTimeout(30 * 1000) ssc.stop() } }
Example 42
Source File: WordCountQuery.scala From spark-cep with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming.examples import org.apache.spark.sql.SQLContext import org.apache.spark.sql.streaming.StreamSQLContext import org.apache.spark.streaming.{Duration, StreamingContext} import org.apache.spark.streaming.dstream.ConstantInputDStream object WordCountQuery { case class SingleWord(word: String) def main(args: Array[String]): Unit = { val ssc = new StreamingContext("local[10]", "test", Duration(3000)) val sc = ssc.sparkContext val streamSqlContext = new StreamSQLContext(ssc, new SQLContext(sc)) import streamSqlContext._ val dummyRDD = sc.parallelize(1 to 10).map(i => SingleWord(s"$i")) val dummyStream = new ConstantInputDStream[SingleWord](ssc, dummyRDD) registerDStreamAsTable(dummyStream, "test") sql( """ |SELECT t.word, COUNT(t.word) |FROM (SELECT * FROM test) OVER (WINDOW '9' SECONDS, SLIDE '3' SECONDS) AS t |GROUP BY t.word """.stripMargin) .map(_.copy()).print() ssc.start() ssc.awaitTerminationOrTimeout(18 * 1000) ssc.stop() } }
Example 43
Source File: StreamToStreamJoin.scala From spark-cep with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming.examples import org.apache.spark.sql.SQLContext import org.apache.spark.sql.streaming.StreamSQLContext import org.apache.spark.streaming.dstream.ConstantInputDStream import org.apache.spark.streaming.{Duration, StreamingContext} object StreamToStreamJoin { case class User(id: Int, name: String) def main(args: Array[String]): Unit = { val ssc = new StreamingContext("local[10]", "test", Duration(3000)) val sc = ssc.sparkContext val streamSqlContext = new StreamSQLContext(ssc, new SQLContext(sc)) import streamSqlContext._ val userRDD1 = sc.parallelize(1 to 100).map(i => User(i / 2, s"$i")) val userStream1 = new ConstantInputDStream[User](ssc, userRDD1) registerDStreamAsTable(userStream1, "user1") val userRDD2 = sc.parallelize(1 to 100).map(i => User(i / 5, s"$i")) val userStream2 = new ConstantInputDStream[User](ssc, userRDD2) registerDStreamAsTable(userStream2, "user2") sql("SELECT * FROM user1 JOIN user2 ON user1.id = user2.id").map(_.copy()).print() ssc.start() ssc.awaitTerminationOrTimeout(30 * 1000) ssc.stop() } }
Example 44
Source File: UdtfEnableQuery.scala From spark-cep with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming.examples import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.streaming.StreamSQLContext import org.apache.spark.streaming.{Duration, StreamingContext} import org.apache.spark.streaming.dstream.ConstantInputDStream object UdtfEnabledQuery { case class People(name: String, items: Array[String]) def main(args: Array[String]): Unit = { val ssc = new StreamingContext("local[10]", "test", Duration(3000)) val sc = ssc.sparkContext val hiveContext = new HiveContext(sc) val streamSqlContext = new StreamSQLContext(ssc, hiveContext) import hiveContext.implicits._ import streamSqlContext.createSchemaDStream val dummyRDD = sc.makeRDD(1 to 3).map(i => People(s"jack$i", Array("book", "gun"))) val dummyStream = new ConstantInputDStream[People](ssc, dummyRDD) streamSqlContext.registerDStreamAsTable(dummyStream, "people") streamSqlContext.sql( """SELECT | name, | item |FROM | people | lateral view explode(items) items AS item""".stripMargin).map(_.copy()).print() ssc.start() ssc.awaitTerminationOrTimeout(30 * 1000) ssc.stop() } }
Example 45
Source File: StreamToTableJoin.scala From spark-cep with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming.examples import org.apache.spark.sql.SQLContext import org.apache.spark.sql.streaming.StreamSQLContext import org.apache.spark.streaming.dstream.ConstantInputDStream import org.apache.spark.streaming.{Duration, StreamingContext} object StreamToTableJoin { case class User(id: Int, name: String) def main(args: Array[String]): Unit = { val ssc = new StreamingContext("local[10]", "test", Duration(3000)) val sc = ssc.sparkContext val streamSqlContext = new StreamSQLContext(ssc, new SQLContext(sc)) val userRDD1 = sc.parallelize(1 to 100).map(i => User(i / 2, s"$i")) val userStream1 = streamSqlContext.createSchemaDStream( new ConstantInputDStream[User](ssc, userRDD1)) streamSqlContext.registerDStreamAsTable(userStream1, "user1") val user2Df = streamSqlContext.sqlContext.createDataFrame( sc.parallelize(1 to 100).map(i => User(i / 5, s"$i"))) user2Df.registerTempTable("user2") val userRDD3 = sc.parallelize(1 to 100).map(i => User(i / 10, s"$i")) val userStream3 = streamSqlContext.createSchemaDStream( new ConstantInputDStream[User](ssc, userRDD3)) streamSqlContext.registerDStreamAsTable(userStream3, "user3") streamSqlContext.sql( """ |SELECT * FROM user1 a, user2 b, user3 c |WHERE a.id = b.id AND a.id = c.id """.stripMargin) .foreachRDD { r => r.foreach(println) } ssc.start() ssc.awaitTerminationOrTimeout(30 * 1000) ssc.stop() } }
Example 46
Source File: JsonInputStreamQuery.scala From spark-cep with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming.examples import scala.collection.mutable.SynchronizedQueue import scala.io.Source import org.apache.spark.rdd.RDD import org.apache.spark.sql.SQLContext import org.apache.spark.sql.streaming.StreamSQLContext import org.apache.spark.streaming.{Duration, StreamingContext} object JsonInputStreamQuery { def main(args: Array[String]): Unit = { val ssc = new StreamingContext("local[10]", "test", Duration(3000)) val sc = ssc.sparkContext val streamSqlContext = new StreamSQLContext(ssc, new SQLContext(sc)) import streamSqlContext._ // Here we read data line by line from a given file and then put it into a queue DStream. // You can replace any kind of String type DStream here including kafka DStream. val queue = new SynchronizedQueue[RDD[String]]() Source.fromFile("src/main/resources/student.json").getLines().foreach(msg => queue.enqueue(sc.parallelize(List(msg)))) val queueDStream = ssc.queueStream[String](queue) // We can infer the schema of json automatically by using inferJsonSchema val schema = streamSqlContext.inferJsonSchema("src/main/resources/student.json") streamSqlContext.registerDStreamAsTable( streamSqlContext.jsonDStream(queueDStream, schema), "jsonTable") sql("SELECT * FROM jsonTable").print() ssc.start() ssc.awaitTerminationOrTimeout(30 * 1000) ssc.stop() } }
Example 47
Source File: UdafEnableQuery.scala From spark-cep with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.streaming.examples import scala.collection.mutable.ListBuffer import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.streaming.StreamSQLContext import org.apache.spark.streaming.{Duration, StreamingContext} import org.apache.spark.streaming.dstream.ConstantInputDStream object UdafEnabledQuery { case class Data(name: String, money: Int) def main(args: Array[String]): Unit = { val ssc = new StreamingContext("local[10]", "test", Duration(3000)) val sc = ssc.sparkContext val hiveContext = new HiveContext(sc) val streamSQlContext = new StreamSQLContext(ssc, hiveContext) val dummyRDD = sc.makeRDD(1 to 10).map(i => Data(s"jack$i", i)) val dummyStream = new ConstantInputDStream[Data](ssc, dummyRDD) val schemaStream = streamSQlContext.createSchemaDStream(dummyStream) streamSQlContext.registerDStreamAsTable(schemaStream, "data") val resultList = ListBuffer[String]() streamSQlContext.sql( """SELECT | percentile(money,0.8), | stddev_pop(money) |FROM data """.stripMargin).map(_.copy()).print() ssc.start() ssc.awaitTerminationOrTimeout(30 * 1000) ssc.stop() } }
Example 48
Source File: KinesisInputDStream.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kinesis import scala.reflect.ClassTag import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream import com.amazonaws.services.kinesis.model.Record import org.apache.spark.rdd.RDD import org.apache.spark.storage.{BlockId, StorageLevel} import org.apache.spark.streaming.{Duration, StreamingContext, Time} import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.streaming.scheduler.ReceivedBlockInfo private[kinesis] class KinesisInputDStream[T: ClassTag]( _ssc: StreamingContext, streamName: String, endpointUrl: String, regionName: String, initialPositionInStream: InitialPositionInStream, checkpointAppName: String, checkpointInterval: Duration, storageLevel: StorageLevel, messageHandler: Record => T, awsCredentialsOption: Option[SerializableAWSCredentials] ) extends ReceiverInputDStream[T](_ssc) { private[streaming] override def createBlockRDD(time: Time, blockInfos: Seq[ReceivedBlockInfo]): RDD[T] = { // This returns true even for when blockInfos is empty val allBlocksHaveRanges = blockInfos.map { _.metadataOption }.forall(_.nonEmpty) if (allBlocksHaveRanges) { // Create a KinesisBackedBlockRDD, even when there are no blocks val blockIds = blockInfos.map { _.blockId.asInstanceOf[BlockId] }.toArray val seqNumRanges = blockInfos.map { _.metadataOption.get.asInstanceOf[SequenceNumberRanges] }.toArray val isBlockIdValid = blockInfos.map { _.isBlockIdValid() }.toArray logDebug(s"Creating KinesisBackedBlockRDD for $time with ${seqNumRanges.length} " + s"seq number ranges: ${seqNumRanges.mkString(", ")} ") new KinesisBackedBlockRDD( context.sc, regionName, endpointUrl, blockIds, seqNumRanges, isBlockIdValid = isBlockIdValid, retryTimeoutMs = ssc.graph.batchDuration.milliseconds.toInt, messageHandler = messageHandler, awsCredentialsOption = awsCredentialsOption) } else { logWarning("Kinesis sequence number information was not present with some block metadata," + " it may not be possible to recover from failures") super.createBlockRDD(time, blockInfos) } } override def getReceiver(): Receiver[T] = { new KinesisReceiver(streamName, endpointUrl, regionName, initialPositionInStream, checkpointAppName, checkpointInterval, storageLevel, messageHandler, awsCredentialsOption) } }
Example 49
Source File: UnionDStream.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.SparkException import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class UnionDStream[T: ClassTag](parents: Array[DStream[T]]) extends DStream[T](parents.head.ssc) { require(parents.length > 0, "List of DStreams to union is empty") require(parents.map(_.ssc).distinct.length == 1, "Some of the DStreams have different contexts") require(parents.map(_.slideDuration).distinct.length == 1, "Some of the DStreams have different slide durations") override def dependencies: List[DStream[_]] = parents.toList override def slideDuration: Duration = parents.head.slideDuration override def compute(validTime: Time): Option[RDD[T]] = { val rdds = new ArrayBuffer[RDD[T]]() parents.map(_.getOrCompute(validTime)).foreach { case Some(rdd) => rdds += rdd case None => throw new SparkException("Could not generate RDD from a parent for unifying at" + s" time $validTime") } if (rdds.nonEmpty) { Some(ssc.sc.union(rdds)) } else { None } } }
Example 50
Source File: ForEachDStream.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.streaming.scheduler.Job private[streaming] class ForEachDStream[T: ClassTag] ( parent: DStream[T], foreachFunc: (RDD[T], Time) => Unit, displayInnerRDDOps: Boolean ) extends DStream[Unit](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[Unit]] = None override def generateJob(time: Time): Option[Job] = { parent.getOrCompute(time) match { case Some(rdd) => val jobFunc = () => createRDDWithLocalProperties(time, displayInnerRDDOps) { foreachFunc(rdd, time) } Some(new Job(time, jobFunc)) case None => None } } }
Example 51
Source File: FlatMappedDStream.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class FlatMappedDStream[T: ClassTag, U: ClassTag]( parent: DStream[T], flatMapFunc: T => TraversableOnce[U] ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.flatMap(flatMapFunc)) } }
Example 52
Source File: WindowedDStream.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming._ import org.apache.spark.streaming.Duration private[streaming] class WindowedDStream[T: ClassTag]( parent: DStream[T], _windowDuration: Duration, _slideDuration: Duration) extends DStream[T](parent.ssc) { if (!_windowDuration.isMultipleOf(parent.slideDuration)) { throw new Exception("The window duration of windowed DStream (" + _windowDuration + ") " + "must be a multiple of the slide duration of parent DStream (" + parent.slideDuration + ")") } if (!_slideDuration.isMultipleOf(parent.slideDuration)) { throw new Exception("The slide duration of windowed DStream (" + _slideDuration + ") " + "must be a multiple of the slide duration of parent DStream (" + parent.slideDuration + ")") } // Persist parent level by default, as those RDDs are going to be obviously reused. parent.persist(StorageLevel.MEMORY_ONLY_SER) def windowDuration: Duration = _windowDuration override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = _slideDuration override def parentRememberDuration: Duration = rememberDuration + windowDuration override def persist(level: StorageLevel): DStream[T] = { // Do not let this windowed DStream be persisted as windowed (union-ed) RDDs share underlying // RDDs and persisting the windowed RDDs would store numerous copies of the underlying data. // Instead control the persistence of the parent DStream. parent.persist(level) this } override def compute(validTime: Time): Option[RDD[T]] = { val currentWindow = new Interval(validTime - windowDuration + parent.slideDuration, validTime) val rddsInWindow = parent.slice(currentWindow) Some(ssc.sc.union(rddsInWindow)) } }
Example 53
Source File: ShuffledDStream.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.Partitioner import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class ShuffledDStream[K: ClassTag, V: ClassTag, C: ClassTag]( parent: DStream[(K, V)], createCombiner: V => C, mergeValue: (C, V) => C, mergeCombiner: (C, C) => C, partitioner: Partitioner, mapSideCombine: Boolean = true ) extends DStream[(K, C)] (parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, C)]] = { parent.getOrCompute(validTime) match { case Some(rdd) => Some(rdd.combineByKey[C]( createCombiner, mergeValue, mergeCombiner, partitioner, mapSideCombine)) case None => None } } }
Example 54
Source File: FilteredDStream.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class FilteredDStream[T: ClassTag]( parent: DStream[T], filterFunc: T => Boolean ) extends DStream[T](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[T]] = { parent.getOrCompute(validTime).map(_.filter(filterFunc)) } }
Example 55
Source File: FlatMapValuedDStream.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class FlatMapValuedDStream[K: ClassTag, V: ClassTag, U: ClassTag]( parent: DStream[(K, V)], flatMapValueFunc: V => TraversableOnce[U] ) extends DStream[(K, U)](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, U)]] = { parent.getOrCompute(validTime).map(_.flatMapValues[U](flatMapValueFunc)) } }
Example 56
Source File: MapValuedDStream.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class MapValuedDStream[K: ClassTag, V: ClassTag, U: ClassTag]( parent: DStream[(K, V)], mapValueFunc: V => U ) extends DStream[(K, U)](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, U)]] = { parent.getOrCompute(validTime).map(_.mapValues[U](mapValueFunc)) } }
Example 57
Source File: TransformedDStream.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.SparkException import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class TransformedDStream[U: ClassTag] ( parents: Seq[DStream[_]], transformFunc: (Seq[RDD[_]], Time) => RDD[U] ) extends DStream[U](parents.head.ssc) { require(parents.nonEmpty, "List of DStreams to transform is empty") require(parents.map(_.ssc).distinct.size == 1, "Some of the DStreams have different contexts") require(parents.map(_.slideDuration).distinct.size == 1, "Some of the DStreams have different slide durations") override def dependencies: List[DStream[_]] = parents.toList override def slideDuration: Duration = parents.head.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { val parentRDDs = parents.map { parent => parent.getOrCompute(validTime).getOrElse( // Guard out against parent DStream that return None instead of Some(rdd) to avoid NPE throw new SparkException(s"Couldn't generate RDD from parent at time $validTime")) } val transformedRDD = transformFunc(parentRDDs, validTime) if (transformedRDD == null) { throw new SparkException("Transform function must not return null. " + "Return SparkContext.emptyRDD() instead to represent no element " + "as the result of transformation.") } Some(transformedRDD) } override protected[streaming] def createRDDWithLocalProperties[U]( time: Time, displayInnerRDDOps: Boolean)(body: => U): U = { super.createRDDWithLocalProperties(time, displayInnerRDDOps = true)(body) } }
Example 58
Source File: MappedDStream.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class MappedDStream[T: ClassTag, U: ClassTag] ( parent: DStream[T], mapFunc: T => U ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.map[U](mapFunc)) } }
Example 59
Source File: MapPartitionedDStream.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class MapPartitionedDStream[T: ClassTag, U: ClassTag]( parent: DStream[T], mapPartFunc: Iterator[T] => Iterator[U], preservePartitioning: Boolean ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.mapPartitions[U](mapPartFunc, preservePartitioning)) } }
Example 60
Source File: RateEstimator.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.scheduler.rate import org.apache.spark.SparkConf import org.apache.spark.streaming.Duration def create(conf: SparkConf, batchInterval: Duration): RateEstimator = conf.get("spark.streaming.backpressure.rateEstimator", "pid") match { case "pid" => val proportional = conf.getDouble("spark.streaming.backpressure.pid.proportional", 1.0) val integral = conf.getDouble("spark.streaming.backpressure.pid.integral", 0.2) val derived = conf.getDouble("spark.streaming.backpressure.pid.derived", 0.0) val minRate = conf.getDouble("spark.streaming.backpressure.pid.minRate", 100) new PIDRateEstimator(batchInterval.milliseconds, proportional, integral, derived, minRate) case estimator => throw new IllegalArgumentException(s"Unknown rate estimator: $estimator") } }
Example 61
Source File: InputInfoTrackerSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.scheduler import org.scalatest.BeforeAndAfter import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.streaming.{Duration, StreamingContext, Time} class InputInfoTrackerSuite extends SparkFunSuite with BeforeAndAfter { private var ssc: StreamingContext = _ before { val conf = new SparkConf().setMaster("local[2]").setAppName("DirectStreamTacker") if (ssc == null) { ssc = new StreamingContext(conf, Duration(1000)) } } after { if (ssc != null) { ssc.stop() ssc = null } } test("test report and get InputInfo from InputInfoTracker") { val inputInfoTracker = new InputInfoTracker(ssc) val streamId1 = 0 val streamId2 = 1 val time = Time(0L) val inputInfo1 = StreamInputInfo(streamId1, 100L) val inputInfo2 = StreamInputInfo(streamId2, 300L) inputInfoTracker.reportInfo(time, inputInfo1) inputInfoTracker.reportInfo(time, inputInfo2) val batchTimeToInputInfos = inputInfoTracker.getInfo(time) assert(batchTimeToInputInfos.size == 2) assert(batchTimeToInputInfos.keys === Set(streamId1, streamId2)) assert(batchTimeToInputInfos(streamId1) === inputInfo1) assert(batchTimeToInputInfos(streamId2) === inputInfo2) assert(inputInfoTracker.getInfo(time)(streamId1) === inputInfo1) } test("test cleanup InputInfo from InputInfoTracker") { val inputInfoTracker = new InputInfoTracker(ssc) val streamId1 = 0 val inputInfo1 = StreamInputInfo(streamId1, 100L) val inputInfo2 = StreamInputInfo(streamId1, 300L) inputInfoTracker.reportInfo(Time(0), inputInfo1) inputInfoTracker.reportInfo(Time(1), inputInfo2) inputInfoTracker.cleanup(Time(0)) assert(inputInfoTracker.getInfo(Time(0))(streamId1) === inputInfo1) assert(inputInfoTracker.getInfo(Time(1))(streamId1) === inputInfo2) inputInfoTracker.cleanup(Time(1)) assert(inputInfoTracker.getInfo(Time(0)).get(streamId1) === None) assert(inputInfoTracker.getInfo(Time(1))(streamId1) === inputInfo2) } }
Example 62
Source File: UnionDStream.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import org.apache.spark.rdd.UnionRDD import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag private[streaming] class UnionDStream[T: ClassTag](parents: Array[DStream[T]]) extends DStream[T](parents.head.ssc) { require(parents.length > 0, "List of DStreams to union is empty") require(parents.map(_.ssc).distinct.size == 1, "Some of the DStreams have different contexts") require(parents.map(_.slideDuration).distinct.size == 1, "Some of the DStreams have different slide durations") override def dependencies: List[DStream[_]] = parents.toList override def slideDuration: Duration = parents.head.slideDuration override def compute(validTime: Time): Option[RDD[T]] = { val rdds = new ArrayBuffer[RDD[T]]() parents.map(_.getOrCompute(validTime)).foreach { case Some(rdd) => rdds += rdd case None => throw new Exception("Could not generate RDD from a parent for unifying at time " + validTime) } if (rdds.size > 0) { Some(new UnionRDD(ssc.sc, rdds)) } else { None } } }
Example 63
Source File: ForEachDStream.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.streaming.scheduler.Job import scala.reflect.ClassTag private[streaming] class ForEachDStream[T: ClassTag] ( parent: DStream[T], foreachFunc: (RDD[T], Time) => Unit ) extends DStream[Unit](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[Unit]] = None override def generateJob(time: Time): Option[Job] = { parent.getOrCompute(time) match { case Some(rdd) => val jobFunc = () => createRDDWithLocalProperties(time) { ssc.sparkContext.setCallSite(creationSite) foreachFunc(rdd, time) } Some(new Job(time, jobFunc)) case None => None } } }
Example 64
Source File: FlatMappedDStream.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import scala.reflect.ClassTag private[streaming] class FlatMappedDStream[T: ClassTag, U: ClassTag]( parent: DStream[T], flatMapFunc: T => Traversable[U] ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.flatMap(flatMapFunc)) } }
Example 65
Source File: WindowedDStream.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.rdd.{PartitionerAwareUnionRDD, RDD, UnionRDD} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming._ import org.apache.spark.streaming.Duration import scala.reflect.ClassTag private[streaming] class WindowedDStream[T: ClassTag]( parent: DStream[T], _windowDuration: Duration, _slideDuration: Duration) extends DStream[T](parent.ssc) { if (!_windowDuration.isMultipleOf(parent.slideDuration)) { throw new Exception("The window duration of windowed DStream (" + _windowDuration + ") " + "must be a multiple of the slide duration of parent DStream (" + parent.slideDuration + ")") } if (!_slideDuration.isMultipleOf(parent.slideDuration)) { throw new Exception("The slide duration of windowed DStream (" + _slideDuration + ") " + "must be a multiple of the slide duration of parent DStream (" + parent.slideDuration + ")") } // Persist parent level by default, as those RDDs are going to be obviously reused. parent.persist(StorageLevel.MEMORY_ONLY_SER) def windowDuration: Duration = _windowDuration override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = _slideDuration override def parentRememberDuration: Duration = rememberDuration + windowDuration override def persist(level: StorageLevel): DStream[T] = { // Do not let this windowed DStream be persisted as windowed (union-ed) RDDs share underlying // RDDs and persisting the windowed RDDs would store numerous copies of the underlying data. // Instead control the persistence of the parent DStream. parent.persist(level) this } override def compute(validTime: Time): Option[RDD[T]] = { val currentWindow = new Interval(validTime - windowDuration + parent.slideDuration, validTime) val rddsInWindow = parent.slice(currentWindow) val windowRDD = if (rddsInWindow.flatMap(_.partitioner).distinct.length == 1) { logDebug("Using partition aware union for windowing at " + validTime) new PartitionerAwareUnionRDD(ssc.sc, rddsInWindow) } else { logDebug("Using normal union for windowing at " + validTime) new UnionRDD(ssc.sc, rddsInWindow) } Some(windowRDD) } }
Example 66
Source File: ShuffledDStream.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.Partitioner import org.apache.spark.rdd.RDD import org.apache.spark.SparkContext._ import org.apache.spark.streaming.{Duration, Time} import scala.reflect.ClassTag private[streaming] class ShuffledDStream[K: ClassTag, V: ClassTag, C: ClassTag]( parent: DStream[(K, V)], createCombiner: V => C, mergeValue: (C, V) => C, mergeCombiner: (C, C) => C, partitioner: Partitioner, mapSideCombine: Boolean = true ) extends DStream[(K, C)] (parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, C)]] = { parent.getOrCompute(validTime) match { case Some(rdd) => Some(rdd.combineByKey[C]( createCombiner, mergeValue, mergeCombiner, partitioner, mapSideCombine)) case None => None } } }
Example 67
Source File: FilteredDStream.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import scala.reflect.ClassTag private[streaming] class FilteredDStream[T: ClassTag]( parent: DStream[T], filterFunc: T => Boolean ) extends DStream[T](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[T]] = { parent.getOrCompute(validTime).map(_.filter(filterFunc)) } }
Example 68
Source File: FlatMapValuedDStream.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import org.apache.spark.SparkContext._ import scala.reflect.ClassTag private[streaming] class FlatMapValuedDStream[K: ClassTag, V: ClassTag, U: ClassTag]( parent: DStream[(K, V)], flatMapValueFunc: V => TraversableOnce[U] ) extends DStream[(K, U)](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, U)]] = { parent.getOrCompute(validTime).map(_.flatMapValues[U](flatMapValueFunc)) } }
Example 69
Source File: MapValuedDStream.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import org.apache.spark.SparkContext._ import scala.reflect.ClassTag private[streaming] class MapValuedDStream[K: ClassTag, V: ClassTag, U: ClassTag]( parent: DStream[(K, V)], mapValueFunc: V => U ) extends DStream[(K, U)](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, U)]] = { parent.getOrCompute(validTime).map(_.mapValues[U](mapValueFunc)) } }
Example 70
Source File: TransformedDStream.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.rdd.{PairRDDFunctions, RDD} import org.apache.spark.streaming.{Duration, Time} import scala.reflect.ClassTag private[streaming] class TransformedDStream[U: ClassTag] ( parents: Seq[DStream[_]], transformFunc: (Seq[RDD[_]], Time) => RDD[U] ) extends DStream[U](parents.head.ssc) { require(parents.length > 0, "List of DStreams to transform is empty") require(parents.map(_.ssc).distinct.size == 1, "Some of the DStreams have different contexts") require(parents.map(_.slideDuration).distinct.size == 1, "Some of the DStreams have different slide durations") override def dependencies: List[DStream[_]] = parents.toList override def slideDuration: Duration = parents.head.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { val parentRDDs = parents.map(_.getOrCompute(validTime).orNull).toSeq Some(transformFunc(parentRDDs, validTime)) } }
Example 71
Source File: MappedDStream.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import scala.reflect.ClassTag private[streaming] class MappedDStream[T: ClassTag, U: ClassTag] ( parent: DStream[T], mapFunc: T => U ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.map[U](mapFunc)) } }
Example 72
Source File: MapPartitionedDStream.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import scala.reflect.ClassTag private[streaming] class MapPartitionedDStream[T: ClassTag, U: ClassTag]( parent: DStream[T], mapPartFunc: Iterator[T] => Iterator[U], preservePartitioning: Boolean ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.mapPartitions[U](mapPartFunc, preservePartitioning)) } }
Example 73
Source File: InputInfoTrackerSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.scheduler import org.scalatest.BeforeAndAfter import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.streaming.{Time, Duration, StreamingContext} class InputInfoTrackerSuite extends SparkFunSuite with BeforeAndAfter { private var ssc: StreamingContext = _ before { val conf = new SparkConf().setMaster("local[2]").setAppName("DirectStreamTacker") if (ssc == null) { ssc = new StreamingContext(conf, Duration(1000)) } } after { if (ssc != null) { ssc.stop() ssc = null } } test("test report and get InputInfo from InputInfoTracker") { val inputInfoTracker = new InputInfoTracker(ssc) val streamId1 = 0 val streamId2 = 1 val time = Time(0L) val inputInfo1 = InputInfo(streamId1, 100L) val inputInfo2 = InputInfo(streamId2, 300L) inputInfoTracker.reportInfo(time, inputInfo1) inputInfoTracker.reportInfo(time, inputInfo2) val batchTimeToInputInfos = inputInfoTracker.getInfo(time) assert(batchTimeToInputInfos.size == 2) assert(batchTimeToInputInfos.keys === Set(streamId1, streamId2)) assert(batchTimeToInputInfos(streamId1) === inputInfo1) assert(batchTimeToInputInfos(streamId2) === inputInfo2) assert(inputInfoTracker.getInfo(time)(streamId1) === inputInfo1) } test("test cleanup InputInfo from InputInfoTracker") { val inputInfoTracker = new InputInfoTracker(ssc) val streamId1 = 0 val inputInfo1 = InputInfo(streamId1, 100L) val inputInfo2 = InputInfo(streamId1, 300L) inputInfoTracker.reportInfo(Time(0), inputInfo1) inputInfoTracker.reportInfo(Time(1), inputInfo2) inputInfoTracker.cleanup(Time(0)) assert(inputInfoTracker.getInfo(Time(0))(streamId1) === inputInfo1) assert(inputInfoTracker.getInfo(Time(1))(streamId1) === inputInfo2) inputInfoTracker.cleanup(Time(1)) assert(inputInfoTracker.getInfo(Time(0)).get(streamId1) === None) assert(inputInfoTracker.getInfo(Time(1))(streamId1) === inputInfo2) } }
Example 74
Source File: KinesisInputDStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kinesis import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream import org.apache.spark.rdd.RDD import org.apache.spark.storage.{BlockId, StorageLevel} import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.streaming.scheduler.ReceivedBlockInfo import org.apache.spark.streaming.{Duration, StreamingContext, Time} private[kinesis] class KinesisInputDStream( @transient _ssc: StreamingContext, streamName: String, endpointUrl: String, regionName: String, initialPositionInStream: InitialPositionInStream, checkpointAppName: String, checkpointInterval: Duration, storageLevel: StorageLevel, awsCredentialsOption: Option[SerializableAWSCredentials] ) extends ReceiverInputDStream[Array[Byte]](_ssc) { private[streaming] override def createBlockRDD(time: Time, blockInfos: Seq[ReceivedBlockInfo]): RDD[Array[Byte]] = { // This returns true even for when blockInfos is empty val allBlocksHaveRanges = blockInfos.map { _.metadataOption }.forall(_.nonEmpty) if (allBlocksHaveRanges) { // Create a KinesisBackedBlockRDD, even when there are no blocks val blockIds = blockInfos.map { _.blockId.asInstanceOf[BlockId] }.toArray val seqNumRanges = blockInfos.map { _.metadataOption.get.asInstanceOf[SequenceNumberRanges] }.toArray val isBlockIdValid = blockInfos.map { _.isBlockIdValid() }.toArray logDebug(s"Creating KinesisBackedBlockRDD for $time with ${seqNumRanges.length} " + s"seq number ranges: ${seqNumRanges.mkString(", ")} ") new KinesisBackedBlockRDD( context.sc, regionName, endpointUrl, blockIds, seqNumRanges, isBlockIdValid = isBlockIdValid, retryTimeoutMs = ssc.graph.batchDuration.milliseconds.toInt, awsCredentialsOption = awsCredentialsOption) } else { logWarning("Kinesis sequence number information was not present with some block metadata," + " it may not be possible to recover from failures") super.createBlockRDD(time, blockInfos) } } override def getReceiver(): Receiver[Array[Byte]] = { new KinesisReceiver(streamName, endpointUrl, regionName, initialPositionInStream, checkpointAppName, checkpointInterval, storageLevel, awsCredentialsOption) } }
Example 75
Source File: UnionDStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import org.apache.spark.rdd.UnionRDD import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag private[streaming] class UnionDStream[T: ClassTag](parents: Array[DStream[T]]) extends DStream[T](parents.head.ssc) { require(parents.length > 0, "List of DStreams to union is empty") require(parents.map(_.ssc).distinct.size == 1, "Some of the DStreams have different contexts") require(parents.map(_.slideDuration).distinct.size == 1, "Some of the DStreams have different slide durations") override def dependencies: List[DStream[_]] = parents.toList override def slideDuration: Duration = parents.head.slideDuration override def compute(validTime: Time): Option[RDD[T]] = { val rdds = new ArrayBuffer[RDD[T]]() parents.map(_.getOrCompute(validTime)).foreach { case Some(rdd) => rdds += rdd case None => throw new Exception("Could not generate RDD from a parent for unifying at time " + validTime) } if (rdds.size > 0) { Some(new UnionRDD(ssc.sc, rdds)) } else { None } } }
Example 76
Source File: ForEachDStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.streaming.scheduler.Job import scala.reflect.ClassTag private[streaming] //每个DStream class ForEachDStream[T: ClassTag] ( parent: DStream[T], foreachFunc: (RDD[T], Time) => Unit ) extends DStream[Unit](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) //持续时间 override def slideDuration: Duration = parent.slideDuration //None被声明为一个对象,而不是一个类,在没有值的时候,使用None,如果有值可以引用,就使用Some来包含这个值,都是Option的子类 override def compute(validTime: Time): Option[RDD[Unit]] = None override def generateJob(time: Time): Option[Job] = { parent.getOrCompute(time) match { case Some(rdd) => val jobFunc = () => createRDDWithLocalProperties(time) { ssc.sparkContext.setCallSite(creationSite) foreachFunc(rdd, time) } Some(new Job(time, jobFunc)) //None被声明为一个对象,而不是一个类,在没有值的时候,使用None,如果有值可以引用,就使用Some来包含这个值,都是Option的子类 case None => None } } }
Example 77
Source File: FlatMappedDStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import scala.reflect.ClassTag private[streaming] class FlatMappedDStream[T: ClassTag, U: ClassTag]( parent: DStream[T], flatMapFunc: T => Traversable[U] ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.flatMap(flatMapFunc)) } }
Example 78
Source File: WindowedDStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.rdd.{PartitionerAwareUnionRDD, RDD, UnionRDD} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming._ import org.apache.spark.streaming.Duration import scala.reflect.ClassTag private[streaming] class WindowedDStream[T: ClassTag]( parent: DStream[T], _windowDuration: Duration, _slideDuration: Duration) extends DStream[T](parent.ssc) { if (!_windowDuration.isMultipleOf(parent.slideDuration)) { throw new Exception("The window duration of windowed DStream (" + _windowDuration + ") " + "must be a multiple of the slide duration of parent DStream (" + parent.slideDuration + ")") } if (!_slideDuration.isMultipleOf(parent.slideDuration)) { throw new Exception("The slide duration of windowed DStream (" + _slideDuration + ") " + "must be a multiple of the slide duration of parent DStream (" + parent.slideDuration + ")") } // Persist parent level by default, as those RDDs are going to be obviously reused. //默认持久化级别,那些将要明显重用RDDS parent.persist(StorageLevel.MEMORY_ONLY_SER) def windowDuration: Duration = _windowDuration override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = _slideDuration override def parentRememberDuration: Duration = rememberDuration + windowDuration override def persist(level: StorageLevel): DStream[T] = { // Do not let this windowed DStream be persisted as windowed (union-ed) RDDs share underlying // RDDs and persisting the windowed RDDs would store numerous copies of the underlying data. // Instead control the persistence of the parent DStream. parent.persist(level) this } override def compute(validTime: Time): Option[RDD[T]] = { val currentWindow = new Interval(validTime - windowDuration + parent.slideDuration, validTime) val rddsInWindow = parent.slice(currentWindow) val windowRDD = if (rddsInWindow.flatMap(_.partitioner).distinct.length == 1) { logDebug("Using partition aware union for windowing at " + validTime) new PartitionerAwareUnionRDD(ssc.sc, rddsInWindow) } else { logDebug("Using normal union for windowing at " + validTime) new UnionRDD(ssc.sc, rddsInWindow) } Some(windowRDD) } }
Example 79
Source File: ShuffledDStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.Partitioner import org.apache.spark.rdd.RDD import org.apache.spark.SparkContext._ import org.apache.spark.streaming.{Duration, Time} import scala.reflect.ClassTag private[streaming] class ShuffledDStream[K: ClassTag, V: ClassTag, C: ClassTag]( parent: DStream[(K, V)], createCombiner: V => C, mergeValue: (C, V) => C, mergeCombiner: (C, C) => C, partitioner: Partitioner, mapSideCombine: Boolean = true ) extends DStream[(K, C)] (parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, C)]] = { parent.getOrCompute(validTime) match { case Some(rdd) => Some(rdd.combineByKey[C]( createCombiner, mergeValue, mergeCombiner, partitioner, mapSideCombine)) case None => None } } }
Example 80
Source File: FilteredDStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import scala.reflect.ClassTag private[streaming] class FilteredDStream[T: ClassTag]( parent: DStream[T], filterFunc: T => Boolean ) extends DStream[T](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[T]] = { parent.getOrCompute(validTime).map(_.filter(filterFunc)) } }
Example 81
Source File: FlatMapValuedDStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import org.apache.spark.SparkContext._ import scala.reflect.ClassTag private[streaming] class FlatMapValuedDStream[K: ClassTag, V: ClassTag, U: ClassTag]( parent: DStream[(K, V)], flatMapValueFunc: V => TraversableOnce[U] ) extends DStream[(K, U)](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, U)]] = { parent.getOrCompute(validTime).map(_.flatMapValues[U](flatMapValueFunc)) } }
Example 82
Source File: MapValuedDStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import org.apache.spark.SparkContext._ import scala.reflect.ClassTag private[streaming] class MapValuedDStream[K: ClassTag, V: ClassTag, U: ClassTag]( parent: DStream[(K, V)], mapValueFunc: V => U ) extends DStream[(K, U)](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, U)]] = { parent.getOrCompute(validTime).map(_.mapValues[U](mapValueFunc)) } }
Example 83
Source File: TransformedDStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.rdd.{PairRDDFunctions, RDD} import org.apache.spark.streaming.{Duration, Time} import scala.reflect.ClassTag private[streaming] class TransformedDStream[U: ClassTag] ( parents: Seq[DStream[_]], transformFunc: (Seq[RDD[_]], Time) => RDD[U] ) extends DStream[U](parents.head.ssc) { require(parents.length > 0, "List of DStreams to transform is empty") require(parents.map(_.ssc).distinct.size == 1, "Some of the DStreams have different contexts") require(parents.map(_.slideDuration).distinct.size == 1, "Some of the DStreams have different slide durations") override def dependencies: List[DStream[_]] = parents.toList override def slideDuration: Duration = parents.head.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { val parentRDDs = parents.map(_.getOrCompute(validTime).orNull).toSeq Some(transformFunc(parentRDDs, validTime)) } }
Example 84
Source File: MappedDStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import scala.reflect.ClassTag private[streaming] class MappedDStream[T: ClassTag, U: ClassTag] ( parent: DStream[T], mapFunc: T => U ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.map[U](mapFunc)) } }
Example 85
Source File: MapPartitionedDStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import scala.reflect.ClassTag private[streaming] class MapPartitionedDStream[T: ClassTag, U: ClassTag]( parent: DStream[T], mapPartFunc: Iterator[T] => Iterator[U], preservePartitioning: Boolean ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.mapPartitions[U](mapPartFunc, preservePartitioning)) } }
Example 86
Source File: RateEstimator.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.scheduler.rate import org.apache.spark.SparkConf import org.apache.spark.streaming.Duration def create(conf: SparkConf, batchInterval: Duration): RateEstimator = conf.get("spark.streaming.backpressure.rateEstimator", "pid") match { case "pid" => val proportional = conf.getDouble("spark.streaming.backpressure.pid.proportional", 1.0) val integral = conf.getDouble("spark.streaming.backpressure.pid.integral", 0.2) val derived = conf.getDouble("spark.streaming.backpressure.pid.derived", 0.0) val minRate = conf.getDouble("spark.streaming.backpressure.pid.minRate", 100) new PIDRateEstimator(batchInterval.milliseconds, proportional, integral, derived, minRate) case estimator => throw new IllegalArgumentException(s"Unkown rate estimator: $estimator") } }
Example 87
Source File: FnContext.scala From mist with Apache License 2.0 | 5 votes |
package mist.api import mist.api.data.JsMap import org.apache.spark.SparkContext import org.apache.spark.streaming.Duration sealed trait FnContext{ val params: JsMap } case class FullFnContext( sc: SparkContext, streamingDuration: Duration, info: RuntimeJobInfo, params: JsMap ) extends FnContext object FnContext { def onlyInput(in: JsMap): FnContext = new FnContext { override val params: JsMap = in } def apply( sc: SparkContext, params: JsMap, streamingDuration: Duration = Duration(1000), info: RuntimeJobInfo = RuntimeJobInfo.Unknown): FullFnContext = FullFnContext(sc, streamingDuration, info, params) } object FnContextBuilder { def create(sc: SparkContext, params: JsMap): FullFnContext = FnContext(sc, params) def create( sc: SparkContext, params: JsMap, streamingDuration: Duration): FullFnContext = FnContext(sc, params, streamingDuration) def create( sc: SparkContext, params: JsMap, streamingDuration: Duration, info: RuntimeJobInfo ): FullFnContext = FnContext(sc, params, streamingDuration, info) }
Example 88
Source File: UnionDStream.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.SparkException import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class UnionDStream[T: ClassTag](parents: Array[DStream[T]]) extends DStream[T](parents.head.ssc) { require(parents.length > 0, "List of DStreams to union is empty") require(parents.map(_.ssc).distinct.length == 1, "Some of the DStreams have different contexts") require(parents.map(_.slideDuration).distinct.length == 1, "Some of the DStreams have different slide durations") override def dependencies: List[DStream[_]] = parents.toList override def slideDuration: Duration = parents.head.slideDuration override def compute(validTime: Time): Option[RDD[T]] = { val rdds = new ArrayBuffer[RDD[T]]() parents.map(_.getOrCompute(validTime)).foreach { case Some(rdd) => rdds += rdd case None => throw new SparkException("Could not generate RDD from a parent for unifying at" + s" time $validTime") } if (rdds.nonEmpty) { Some(ssc.sc.union(rdds)) } else { None } } }
Example 89
Source File: ForEachDStream.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.streaming.scheduler.Job private[streaming] class ForEachDStream[T: ClassTag] ( parent: DStream[T], foreachFunc: (RDD[T], Time) => Unit, displayInnerRDDOps: Boolean ) extends DStream[Unit](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[Unit]] = None override def generateJob(time: Time): Option[Job] = { parent.getOrCompute(time) match { case Some(rdd) => val jobFunc = () => createRDDWithLocalProperties(time, displayInnerRDDOps) { foreachFunc(rdd, time) } Some(new Job(time, jobFunc)) case None => None } } }
Example 90
Source File: FlatMappedDStream.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class FlatMappedDStream[T: ClassTag, U: ClassTag]( parent: DStream[T], flatMapFunc: T => TraversableOnce[U] ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.flatMap(flatMapFunc)) } }
Example 91
Source File: WindowedDStream.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming._ import org.apache.spark.streaming.Duration private[streaming] class WindowedDStream[T: ClassTag]( parent: DStream[T], _windowDuration: Duration, _slideDuration: Duration) extends DStream[T](parent.ssc) { if (!_windowDuration.isMultipleOf(parent.slideDuration)) { throw new Exception("The window duration of windowed DStream (" + _windowDuration + ") " + "must be a multiple of the slide duration of parent DStream (" + parent.slideDuration + ")") } if (!_slideDuration.isMultipleOf(parent.slideDuration)) { throw new Exception("The slide duration of windowed DStream (" + _slideDuration + ") " + "must be a multiple of the slide duration of parent DStream (" + parent.slideDuration + ")") } // Persist parent level by default, as those RDDs are going to be obviously reused. parent.persist(StorageLevel.MEMORY_ONLY_SER) def windowDuration: Duration = _windowDuration override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = _slideDuration override def parentRememberDuration: Duration = rememberDuration + windowDuration override def persist(level: StorageLevel): DStream[T] = { // Do not let this windowed DStream be persisted as windowed (union-ed) RDDs share underlying // RDDs and persisting the windowed RDDs would store numerous copies of the underlying data. // Instead control the persistence of the parent DStream. parent.persist(level) this } override def compute(validTime: Time): Option[RDD[T]] = { val currentWindow = new Interval(validTime - windowDuration + parent.slideDuration, validTime) val rddsInWindow = parent.slice(currentWindow) Some(ssc.sc.union(rddsInWindow)) } }
Example 92
Source File: ShuffledDStream.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.Partitioner import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class ShuffledDStream[K: ClassTag, V: ClassTag, C: ClassTag]( parent: DStream[(K, V)], createCombiner: V => C, mergeValue: (C, V) => C, mergeCombiner: (C, C) => C, partitioner: Partitioner, mapSideCombine: Boolean = true ) extends DStream[(K, C)] (parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, C)]] = { parent.getOrCompute(validTime) match { case Some(rdd) => Some(rdd.combineByKey[C]( createCombiner, mergeValue, mergeCombiner, partitioner, mapSideCombine)) case None => None } } }
Example 93
Source File: FilteredDStream.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class FilteredDStream[T: ClassTag]( parent: DStream[T], filterFunc: T => Boolean ) extends DStream[T](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[T]] = { parent.getOrCompute(validTime).map(_.filter(filterFunc)) } }
Example 94
Source File: FlatMapValuedDStream.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class FlatMapValuedDStream[K: ClassTag, V: ClassTag, U: ClassTag]( parent: DStream[(K, V)], flatMapValueFunc: V => TraversableOnce[U] ) extends DStream[(K, U)](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, U)]] = { parent.getOrCompute(validTime).map(_.flatMapValues[U](flatMapValueFunc)) } }
Example 95
Source File: MapValuedDStream.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class MapValuedDStream[K: ClassTag, V: ClassTag, U: ClassTag]( parent: DStream[(K, V)], mapValueFunc: V => U ) extends DStream[(K, U)](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, U)]] = { parent.getOrCompute(validTime).map(_.mapValues[U](mapValueFunc)) } }
Example 96
Source File: TransformedDStream.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.SparkException import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class TransformedDStream[U: ClassTag] ( parents: Seq[DStream[_]], transformFunc: (Seq[RDD[_]], Time) => RDD[U] ) extends DStream[U](parents.head.ssc) { require(parents.nonEmpty, "List of DStreams to transform is empty") require(parents.map(_.ssc).distinct.size == 1, "Some of the DStreams have different contexts") require(parents.map(_.slideDuration).distinct.size == 1, "Some of the DStreams have different slide durations") override def dependencies: List[DStream[_]] = parents.toList override def slideDuration: Duration = parents.head.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { // 针对每一个流,获取其当前时间的RDD。 val parentRDDs = parents.map { parent => parent.getOrCompute(validTime).getOrElse( // Guard out against parent DStream that return None instead of Some(rdd) to avoid NPE throw new SparkException(s"Couldn't generate RDD from parent at time $validTime")) } val transformedRDD = transformFunc(parentRDDs, validTime) if (transformedRDD == null) { throw new SparkException("Transform function must not return null. " + "Return SparkContext.emptyRDD() instead to represent no element " + "as the result of transformation.") } Some(transformedRDD) } override protected[streaming] def createRDDWithLocalProperties[U]( time: Time, displayInnerRDDOps: Boolean)(body: => U): U = { super.createRDDWithLocalProperties(time, displayInnerRDDOps = true)(body) } }
Example 97
Source File: MappedDStream.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class MappedDStream[T: ClassTag, U: ClassTag] ( parent: DStream[T], mapFunc: T => U ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.map[U](mapFunc)) } }
Example 98
Source File: MapPartitionedDStream.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class MapPartitionedDStream[T: ClassTag, U: ClassTag]( parent: DStream[T], mapPartFunc: Iterator[T] => Iterator[U], preservePartitioning: Boolean ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.mapPartitions[U](mapPartFunc, preservePartitioning)) } }
Example 99
Source File: RateEstimator.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.scheduler.rate import org.apache.spark.SparkConf import org.apache.spark.streaming.Duration def create(conf: SparkConf, batchInterval: Duration): RateEstimator = conf.get("spark.streaming.backpressure.rateEstimator", "pid") match { case "pid" => val proportional = conf.getDouble("spark.streaming.backpressure.pid.proportional", 1.0) val integral = conf.getDouble("spark.streaming.backpressure.pid.integral", 0.2) val derived = conf.getDouble("spark.streaming.backpressure.pid.derived", 0.0) val minRate = conf.getDouble("spark.streaming.backpressure.pid.minRate", 100) new PIDRateEstimator(batchInterval.milliseconds, proportional, integral, derived, minRate) case estimator => throw new IllegalArgumentException(s"Unknown rate estimator: $estimator") } }
Example 100
Source File: InputInfoTrackerSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.scheduler import org.scalatest.BeforeAndAfter import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.streaming.{Duration, StreamingContext, Time} class InputInfoTrackerSuite extends SparkFunSuite with BeforeAndAfter { private var ssc: StreamingContext = _ before { val conf = new SparkConf().setMaster("local[2]").setAppName("DirectStreamTacker") if (ssc == null) { ssc = new StreamingContext(conf, Duration(1000)) } } after { if (ssc != null) { ssc.stop() ssc = null } } test("test report and get InputInfo from InputInfoTracker") { val inputInfoTracker = new InputInfoTracker(ssc) val streamId1 = 0 val streamId2 = 1 val time = Time(0L) val inputInfo1 = StreamInputInfo(streamId1, 100L) val inputInfo2 = StreamInputInfo(streamId2, 300L) inputInfoTracker.reportInfo(time, inputInfo1) inputInfoTracker.reportInfo(time, inputInfo2) val batchTimeToInputInfos = inputInfoTracker.getInfo(time) assert(batchTimeToInputInfos.size == 2) assert(batchTimeToInputInfos.keys === Set(streamId1, streamId2)) assert(batchTimeToInputInfos(streamId1) === inputInfo1) assert(batchTimeToInputInfos(streamId2) === inputInfo2) assert(inputInfoTracker.getInfo(time)(streamId1) === inputInfo1) } test("test cleanup InputInfo from InputInfoTracker") { val inputInfoTracker = new InputInfoTracker(ssc) val streamId1 = 0 val inputInfo1 = StreamInputInfo(streamId1, 100L) val inputInfo2 = StreamInputInfo(streamId1, 300L) inputInfoTracker.reportInfo(Time(0), inputInfo1) inputInfoTracker.reportInfo(Time(1), inputInfo2) inputInfoTracker.cleanup(Time(0)) assert(inputInfoTracker.getInfo(Time(0))(streamId1) === inputInfo1) assert(inputInfoTracker.getInfo(Time(1))(streamId1) === inputInfo2) inputInfoTracker.cleanup(Time(1)) assert(inputInfoTracker.getInfo(Time(0)).get(streamId1) === None) assert(inputInfoTracker.getInfo(Time(1))(streamId1) === inputInfo2) } }
Example 101
Source File: KinesisInputDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kinesis import scala.reflect.ClassTag import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream import com.amazonaws.services.kinesis.model.Record import org.apache.spark.rdd.RDD import org.apache.spark.storage.{BlockId, StorageLevel} import org.apache.spark.streaming.dstream.ReceiverInputDStream import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.streaming.scheduler.ReceivedBlockInfo import org.apache.spark.streaming.{Duration, StreamingContext, Time} private[kinesis] class KinesisInputDStream[T: ClassTag]( @transient _ssc: StreamingContext, streamName: String, endpointUrl: String, regionName: String, initialPositionInStream: InitialPositionInStream, checkpointAppName: String, checkpointInterval: Duration, storageLevel: StorageLevel, messageHandler: Record => T, awsCredentialsOption: Option[SerializableAWSCredentials] ) extends ReceiverInputDStream[T](_ssc) { private[streaming] override def createBlockRDD(time: Time, blockInfos: Seq[ReceivedBlockInfo]): RDD[T] = { // This returns true even for when blockInfos is empty val allBlocksHaveRanges = blockInfos.map { _.metadataOption }.forall(_.nonEmpty) if (allBlocksHaveRanges) { // Create a KinesisBackedBlockRDD, even when there are no blocks val blockIds = blockInfos.map { _.blockId.asInstanceOf[BlockId] }.toArray val seqNumRanges = blockInfos.map { _.metadataOption.get.asInstanceOf[SequenceNumberRanges] }.toArray val isBlockIdValid = blockInfos.map { _.isBlockIdValid() }.toArray logDebug(s"Creating KinesisBackedBlockRDD for $time with ${seqNumRanges.length} " + s"seq number ranges: ${seqNumRanges.mkString(", ")} ") new KinesisBackedBlockRDD( context.sc, regionName, endpointUrl, blockIds, seqNumRanges, isBlockIdValid = isBlockIdValid, retryTimeoutMs = ssc.graph.batchDuration.milliseconds.toInt, messageHandler = messageHandler, awsCredentialsOption = awsCredentialsOption) } else { logWarning("Kinesis sequence number information was not present with some block metadata," + " it may not be possible to recover from failures") super.createBlockRDD(time, blockInfos) } } override def getReceiver(): Receiver[T] = { new KinesisReceiver(streamName, endpointUrl, regionName, initialPositionInStream, checkpointAppName, checkpointInterval, storageLevel, messageHandler, awsCredentialsOption) } }
Example 102
Source File: UnionDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.SparkException import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import org.apache.spark.rdd.UnionRDD private[streaming] class UnionDStream[T: ClassTag](parents: Array[DStream[T]]) extends DStream[T](parents.head.ssc) { require(parents.length > 0, "List of DStreams to union is empty") require(parents.map(_.ssc).distinct.size == 1, "Some of the DStreams have different contexts") require(parents.map(_.slideDuration).distinct.size == 1, "Some of the DStreams have different slide durations") override def dependencies: List[DStream[_]] = parents.toList override def slideDuration: Duration = parents.head.slideDuration override def compute(validTime: Time): Option[RDD[T]] = { val rdds = new ArrayBuffer[RDD[T]]() parents.map(_.getOrCompute(validTime)).foreach { case Some(rdd) => rdds += rdd case None => throw new SparkException("Could not generate RDD from a parent for unifying at" + s" time $validTime") } if (rdds.size > 0) { Some(new UnionRDD(ssc.sc, rdds)) } else { None } } }
Example 103
Source File: ForEachDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.streaming.scheduler.Job import scala.reflect.ClassTag private[streaming] class ForEachDStream[T: ClassTag] ( parent: DStream[T], foreachFunc: (RDD[T], Time) => Unit, displayInnerRDDOps: Boolean ) extends DStream[Unit](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[Unit]] = None override def generateJob(time: Time): Option[Job] = { parent.getOrCompute(time) match { case Some(rdd) => val jobFunc = () => createRDDWithLocalProperties(time, displayInnerRDDOps) { foreachFunc(rdd, time) } Some(new Job(time, jobFunc)) case None => None } } }
Example 104
Source File: FlatMappedDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import scala.reflect.ClassTag private[streaming] class FlatMappedDStream[T: ClassTag, U: ClassTag]( parent: DStream[T], flatMapFunc: T => Traversable[U] ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.flatMap(flatMapFunc)) } }
Example 105
Source File: WindowedDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.rdd.{PartitionerAwareUnionRDD, RDD, UnionRDD} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming._ import org.apache.spark.streaming.Duration import scala.reflect.ClassTag private[streaming] class WindowedDStream[T: ClassTag]( parent: DStream[T], _windowDuration: Duration, _slideDuration: Duration) extends DStream[T](parent.ssc) { if (!_windowDuration.isMultipleOf(parent.slideDuration)) { throw new Exception("The window duration of windowed DStream (" + _windowDuration + ") " + "must be a multiple of the slide duration of parent DStream (" + parent.slideDuration + ")") } if (!_slideDuration.isMultipleOf(parent.slideDuration)) { throw new Exception("The slide duration of windowed DStream (" + _slideDuration + ") " + "must be a multiple of the slide duration of parent DStream (" + parent.slideDuration + ")") } // Persist parent level by default, as those RDDs are going to be obviously reused. parent.persist(StorageLevel.MEMORY_ONLY_SER) def windowDuration: Duration = _windowDuration override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = _slideDuration override def parentRememberDuration: Duration = rememberDuration + windowDuration override def persist(level: StorageLevel): DStream[T] = { // Do not let this windowed DStream be persisted as windowed (union-ed) RDDs share underlying // RDDs and persisting the windowed RDDs would store numerous copies of the underlying data. // Instead control the persistence of the parent DStream. parent.persist(level) this } override def compute(validTime: Time): Option[RDD[T]] = { val currentWindow = new Interval(validTime - windowDuration + parent.slideDuration, validTime) val rddsInWindow = parent.slice(currentWindow) val windowRDD = if (rddsInWindow.flatMap(_.partitioner).distinct.length == 1) { logDebug("Using partition aware union for windowing at " + validTime) new PartitionerAwareUnionRDD(ssc.sc, rddsInWindow) } else { logDebug("Using normal union for windowing at " + validTime) new UnionRDD(ssc.sc, rddsInWindow) } Some(windowRDD) } }
Example 106
Source File: ShuffledDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.Partitioner import org.apache.spark.rdd.RDD import org.apache.spark.SparkContext._ import org.apache.spark.streaming.{Duration, Time} import scala.reflect.ClassTag private[streaming] class ShuffledDStream[K: ClassTag, V: ClassTag, C: ClassTag]( parent: DStream[(K, V)], createCombiner: V => C, mergeValue: (C, V) => C, mergeCombiner: (C, C) => C, partitioner: Partitioner, mapSideCombine: Boolean = true ) extends DStream[(K, C)] (parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, C)]] = { parent.getOrCompute(validTime) match { case Some(rdd) => Some(rdd.combineByKey[C]( createCombiner, mergeValue, mergeCombiner, partitioner, mapSideCombine)) case None => None } } }
Example 107
Source File: FilteredDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import scala.reflect.ClassTag private[streaming] class FilteredDStream[T: ClassTag]( parent: DStream[T], filterFunc: T => Boolean ) extends DStream[T](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[T]] = { parent.getOrCompute(validTime).map(_.filter(filterFunc)) } }
Example 108
Source File: FlatMapValuedDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import org.apache.spark.SparkContext._ import scala.reflect.ClassTag private[streaming] class FlatMapValuedDStream[K: ClassTag, V: ClassTag, U: ClassTag]( parent: DStream[(K, V)], flatMapValueFunc: V => TraversableOnce[U] ) extends DStream[(K, U)](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, U)]] = { parent.getOrCompute(validTime).map(_.flatMapValues[U](flatMapValueFunc)) } }
Example 109
Source File: MapValuedDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import org.apache.spark.SparkContext._ import scala.reflect.ClassTag private[streaming] class MapValuedDStream[K: ClassTag, V: ClassTag, U: ClassTag]( parent: DStream[(K, V)], mapValueFunc: V => U ) extends DStream[(K, U)](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[(K, U)]] = { parent.getOrCompute(validTime).map(_.mapValues[U](mapValueFunc)) } }
Example 110
Source File: TransformedDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.SparkException import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class TransformedDStream[U: ClassTag] ( parents: Seq[DStream[_]], transformFunc: (Seq[RDD[_]], Time) => RDD[U] ) extends DStream[U](parents.head.ssc) { require(parents.length > 0, "List of DStreams to transform is empty") require(parents.map(_.ssc).distinct.size == 1, "Some of the DStreams have different contexts") require(parents.map(_.slideDuration).distinct.size == 1, "Some of the DStreams have different slide durations") override def dependencies: List[DStream[_]] = parents.toList override def slideDuration: Duration = parents.head.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { val parentRDDs = parents.map { parent => parent.getOrCompute(validTime).getOrElse( // Guard out against parent DStream that return None instead of Some(rdd) to avoid NPE throw new SparkException(s"Couldn't generate RDD from parent at time $validTime")) } val transformedRDD = transformFunc(parentRDDs, validTime) if (transformedRDD == null) { throw new SparkException("Transform function must not return null. " + "Return SparkContext.emptyRDD() instead to represent no element " + "as the result of transformation.") } Some(transformedRDD) } override protected[streaming] def createRDDWithLocalProperties[U]( time: Time, displayInnerRDDOps: Boolean)(body: => U): U = { super.createRDDWithLocalProperties(time, displayInnerRDDOps = true)(body) } }
Example 111
Source File: MappedDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import scala.reflect.ClassTag private[streaming] class MappedDStream[T: ClassTag, U: ClassTag] ( parent: DStream[T], mapFunc: T => U ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.map[U](mapFunc)) } }
Example 112
Source File: MapPartitionedDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import scala.reflect.ClassTag private[streaming] class MapPartitionedDStream[T: ClassTag, U: ClassTag]( parent: DStream[T], mapPartFunc: Iterator[T] => Iterator[U], preservePartitioning: Boolean ) extends DStream[U](parent.ssc) { override def dependencies: List[DStream[_]] = List(parent) override def slideDuration: Duration = parent.slideDuration override def compute(validTime: Time): Option[RDD[U]] = { parent.getOrCompute(validTime).map(_.mapPartitions[U](mapPartFunc, preservePartitioning)) } }
Example 113
Source File: RateEstimator.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.scheduler.rate import org.apache.spark.SparkConf import org.apache.spark.streaming.Duration def create(conf: SparkConf, batchInterval: Duration): RateEstimator = conf.get("spark.streaming.backpressure.rateEstimator", "pid") match { case "pid" => val proportional = conf.getDouble("spark.streaming.backpressure.pid.proportional", 1.0) val integral = conf.getDouble("spark.streaming.backpressure.pid.integral", 0.2) val derived = conf.getDouble("spark.streaming.backpressure.pid.derived", 0.0) val minRate = conf.getDouble("spark.streaming.backpressure.pid.minRate", 100) new PIDRateEstimator(batchInterval.milliseconds, proportional, integral, derived, minRate) case estimator => throw new IllegalArgumentException(s"Unkown rate estimator: $estimator") } }
Example 114
Source File: InputInfoTrackerSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.scheduler import org.scalatest.BeforeAndAfter import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.streaming.{Time, Duration, StreamingContext} class InputInfoTrackerSuite extends SparkFunSuite with BeforeAndAfter { private var ssc: StreamingContext = _ before { val conf = new SparkConf().setMaster("local[2]").setAppName("DirectStreamTacker") if (ssc == null) { ssc = new StreamingContext(conf, Duration(1000)) } } after { if (ssc != null) { ssc.stop() ssc = null } } test("test report and get InputInfo from InputInfoTracker") { val inputInfoTracker = new InputInfoTracker(ssc) val streamId1 = 0 val streamId2 = 1 val time = Time(0L) val inputInfo1 = StreamInputInfo(streamId1, 100L) val inputInfo2 = StreamInputInfo(streamId2, 300L) inputInfoTracker.reportInfo(time, inputInfo1) inputInfoTracker.reportInfo(time, inputInfo2) val batchTimeToInputInfos = inputInfoTracker.getInfo(time) assert(batchTimeToInputInfos.size == 2) assert(batchTimeToInputInfos.keys === Set(streamId1, streamId2)) assert(batchTimeToInputInfos(streamId1) === inputInfo1) assert(batchTimeToInputInfos(streamId2) === inputInfo2) assert(inputInfoTracker.getInfo(time)(streamId1) === inputInfo1) } test("test cleanup InputInfo from InputInfoTracker") { val inputInfoTracker = new InputInfoTracker(ssc) val streamId1 = 0 val inputInfo1 = StreamInputInfo(streamId1, 100L) val inputInfo2 = StreamInputInfo(streamId1, 300L) inputInfoTracker.reportInfo(Time(0), inputInfo1) inputInfoTracker.reportInfo(Time(1), inputInfo2) inputInfoTracker.cleanup(Time(0)) assert(inputInfoTracker.getInfo(Time(0))(streamId1) === inputInfo1) assert(inputInfoTracker.getInfo(Time(1))(streamId1) === inputInfo2) inputInfoTracker.cleanup(Time(1)) assert(inputInfoTracker.getInfo(Time(0)).get(streamId1) === None) assert(inputInfoTracker.getInfo(Time(1))(streamId1) === inputInfo2) } }
Example 115
Source File: FixWindow.scala From Swallow with Apache License 2.0 | 5 votes |
package com.intel.hibench.sparkbench.streaming.application import com.intel.hibench.common.streaming.UserVisitParser import com.intel.hibench.common.streaming.metrics.KafkaReporter import com.intel.hibench.sparkbench.streaming.util.SparkBenchConfig import org.apache.spark.streaming.Duration import org.apache.spark.streaming.dstream.DStream class FixWindow(duration: Long, slideStep: Long) extends BenchBase { override def process(lines: DStream[(Long, String)], config: SparkBenchConfig): Unit = { val reportTopic = config.reporterTopic val brokerList = config.brokerList lines.window(Duration(duration), Duration(slideStep)).map{ case (inTime, line) => { val uv = UserVisitParser.parse(line) (uv.getIp, (inTime, 1)) } }.reduceByKey((value, result) => { // maintain the min time of this window and count record number (Math.min(value._1, result._1), value._2 + result._2) }).foreachRDD( rdd => rdd.foreachPartition( results => { // report back to kafka val reporter = new KafkaReporter(reportTopic, brokerList) val outTime = System.currentTimeMillis() results.foreach(res => { (1 to (res._2._2)).foreach { _ => reporter.report(res._2._1, outTime) if(config.debugMode) { println("Event: " + res._2._1 + ", " + outTime) } } }) })) } }