scala.collection.mutable.Queue Scala Examples
The following examples show how to use scala.collection.mutable.Queue.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: SqlApp.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package main.scala import scala.collection.mutable.{ListBuffer, Queue} import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.SQLContext case class Person(name: String, age: Int) object SparkSqlExample { def main(args: Array[String]) { val conf = sys.env.get("SPARK_AUDIT_MASTER") match { case Some(master) => new SparkConf().setAppName("Simple Sql App").setMaster(master) case None => new SparkConf().setAppName("Simple Sql App") } val sc = new SparkContext(conf) val sqlContext = new SQLContext(sc) import sqlContext.implicits._ import sqlContext._ val people = sc.makeRDD(1 to 100, 10).map(x => Person(s"Name$x", x)).toDF() people.registerTempTable("people") val teenagers = sql("SELECT name FROM people WHERE age >= 13 AND age <= 19") val teenagerNames = teenagers.map(t => "Name: " + t(0)).collect() teenagerNames.foreach(println) def test(f: => Boolean, failureMsg: String) = { if (!f) { println(failureMsg) System.exit(-1) } } test(teenagerNames.size == 7, "Unexpected number of selected elements: " + teenagerNames) println("Test succeeded") sc.stop() } } // scalastyle:on println
Example 2
Source File: CurrentPersistenceIdsSource.scala From akka-persistence-redis with Apache License 2.0 | 5 votes |
package akka package persistence package query package journal package redis import akka.persistence.redis._ import RedisKeys._ import _root_.redis._ import api.pubsub._ import akka.actor._ import SupervisorStrategy._ import akka.stream._ import akka.stream.stage._ import scala.concurrent.duration._ import scala.collection.mutable.Queue import scala.util.{ Success, Failure } import scala.reflect._ import com.typesafe.config.Config private class CurrentPersistenceIdsSource(redis: RedisClient) extends GraphStage[SourceShape[String]] { val out: Outlet[String] = Outlet("CurrentPersistenceIdsSource") override val shape: SourceShape[String] = SourceShape(out) override def createLogic(inheritedAttributes: Attributes): GraphStageLogic = new GraphStageLogicWithLogging(shape) { private var start = true private var index = 0 private var buffer = Queue.empty[String] implicit def ec = materializer.executionContext private val StringSeq = classTag[Seq[String]] setHandler(out, new OutHandler { override def onPull(): Unit = { if (buffer.isEmpty && (start || index > 0)) { val callback = getAsyncCallback[Cursor[Seq[String]]] { case Cursor(idx, StringSeq(data)) => // save the index for further initialization if needed index = idx // it is not the start anymore start = false // enqueue received data buffer ++= data // deliver element deliver() } redis.sscan[String](identifiersKey, index).onComplete { case Success(cursor) => callback.invoke(cursor) case Failure(t) => log.error(t, "Error while querying persistence identifiers") val cb = getAsyncCallback[Unit] { _ => failStage(t) } cb.invoke(()) } } else { deliver() } } }) private def deliver(): Unit = { if (buffer.nonEmpty) { val elem = buffer.dequeue push(out, elem) } else { // we're done here, goodbye completeStage() } } } }
Example 3
Source File: StreamingApp.scala From spark1.52 with Apache License 2.0 | 5 votes |
// scalastyle:off println package main.scala import scala.collection.mutable.{ListBuffer, Queue} import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming._ object SparkStreamingExample { def main(args: Array[String]) { //System.getenv()和System.getProperties()的区别 //System.getenv() 返回系统环境变量值 设置系统环境变量:当前登录用户主目录下的".bashrc"文件中可以设置系统环境变量 //System.getProperties() 返回Java进程变量值 通过命令行参数的"-D"选项 val conf = sys.env.get("SPARK_AUDIT_MASTER") match { case Some(master) => new SparkConf().setAppName("Simple Streaming App").setMaster(master) case None => new SparkConf().setAppName("Simple Streaming App") } val ssc = new StreamingContext(conf, Seconds(1)) val seen = ListBuffer[RDD[Int]]() val rdd1 = ssc.sparkContext.makeRDD(1 to 100, 10) val rdd2 = ssc.sparkContext.makeRDD(1 to 1000, 10) val rdd3 = ssc.sparkContext.makeRDD(1 to 10000, 10) val queue = Queue(rdd1, rdd2, rdd3) val stream = ssc.queueStream(queue) stream.foreachRDD(rdd => seen += rdd) ssc.start() Thread.sleep(5000) def test(f: => Boolean, failureMsg: String) = { if (!f) { println(failureMsg) System.exit(-1) } } val rddCounts = seen.map(rdd => rdd.count()).filter(_ > 0) test(rddCounts.length == 3, "Did not collect three RDD's from stream") test(rddCounts.toSet == Set(100, 1000, 10000), "Did not find expected streams") println("Test succeeded") ssc.stop() } } // scalastyle:on println
Example 4
Source File: SqlApp.scala From spark1.52 with Apache License 2.0 | 5 votes |
// scalastyle:off println package main.scala import scala.collection.mutable.{ListBuffer, Queue} import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.SQLContext case class Person(name: String, age: Int) object SparkSqlExample { def main(args: Array[String]) { val conf = sys.env.get("SPARK_AUDIT_MASTER") match { case Some(master) => new SparkConf().setAppName("Simple Sql App").setMaster(master) case None => new SparkConf().setAppName("Simple Sql App") } val sc = new SparkContext(conf) val sqlContext = new SQLContext(sc) import sqlContext.implicits._ import sqlContext._ val people = sc.makeRDD(1 to 100, 10).map(x => Person(s"Name$x", x)).toDF() people.registerTempTable("people") val teenagers = sql("SELECT name FROM people WHERE age >= 13 AND age <= 19") val teenagerNames = teenagers.map(t => "Name: " + t(0)).collect() teenagerNames.foreach(println) def test(f: => Boolean, failureMsg: String) = { if (!f) { println(failureMsg) System.exit(-1) } } test(teenagerNames.size == 7, "Unexpected number of selected elements: " + teenagerNames) println("Test succeeded") sc.stop() } } // scalastyle:on println
Example 5
Source File: HiveApp.scala From spark1.52 with Apache License 2.0 | 5 votes |
// scalastyle:off println package main.scala import scala.collection.mutable.{ListBuffer, Queue} import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.hive.HiveContext case class Person(name: String, age: Int) object SparkSqlExample { def main(args: Array[String]) { //System.getenv()和System.getProperties()的区别 //System.getenv() 返回系统环境变量值 设置系统环境变量:当前登录用户主目录下的".bashrc"文件中可以设置系统环境变量 //System.getProperties() 返回Java进程变量值 通过命令行参数的"-D"选项 val conf = sys.env.get("SPARK_AUDIT_MASTER") match { case Some(master) => new SparkConf().setAppName("Simple Sql App").setMaster(master) case None => new SparkConf().setAppName("Simple Sql App") } val sc = new SparkContext(conf) val hiveContext = new HiveContext(sc) import hiveContext._ sql("DROP TABLE IF EXISTS src") sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)") sql("LOAD DATA LOCAL INPATH 'data.txt' INTO TABLE src") val results = sql("FROM src SELECT key, value WHERE key >= 0 AND KEY < 5").collect() results.foreach(println) def test(f: => Boolean, failureMsg: String) = { if (!f) { println(failureMsg) System.exit(-1) } } test(results.size == 5, "Unexpected number of selected elements: " + results) println("Test succeeded") sc.stop() } } // scalastyle:on println
Example 6
Source File: SparkSqlExample.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.examples.sql import scala.collection.mutable.{ ListBuffer, Queue } import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.SQLContext case class Person(name: String, age: Int) object SparkSqlExample { def main(args: Array[String]) { //System.getenv()和System.getProperties()的区别 //System.getenv() 返回系统环境变量值 设置系统环境变量:当前登录用户主目录下的".bashrc"文件中可以设置系统环境变量 //System.getProperties() 返回Java进程变量值 通过命令行参数的"-D"选项 sys.env.foreach(println _) //获得环境变量信息(JAVA_HOME,D:\jdk\jdk17_64) val conf = sys.env.get("SPARK_AUDIT_MASTER") match { case Some(master) => new SparkConf().setAppName("Simple Sql App").setMaster(master) case None => new SparkConf().setAppName("Simple Sql App").setMaster("local") } val sc = new SparkContext(conf) val sqlContext = new SQLContext(sc) import sqlContext.implicits._ import sqlContext._ //生成RDD,转换成DataFrame对象 val people = sc.makeRDD(1 to 100, 10).map(x => Person(s"Name$x", x)).toDF() people.registerTempTable("people")//注册对象 //返回DataFrame,查询年龄大于等于13小于等于19 val teenagers = sql("SELECT name,age FROM people WHERE age >= 13 AND age <= 19") //返回 teenagerNames: Array[String],获得SQL语句name,age字段值 val teenagerNames = teenagers.map(t => "Name: " + t(0)+ " age:"+t(1)).collect() teenagerNames.foreach(println) def test(f: => Boolean, failureMsg: String) = { if (!f) { println(failureMsg) System.exit(-1) } } //异常数量的选定元素 test(teenagerNames.size == 7, "Unexpected number of selected elements: " + teenagerNames) println("Test succeeded") sc.stop() } }
Example 7
Source File: QueueInputDStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{NotSerializableException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.{Time, StreamingContext} private[streaming] class QueueInputDStream[T: ClassTag]( @transient ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { throw new NotSerializableException("queueStream doesn't support checkpointing. " + "Please don't use queueStream when checkpointing is enabled.") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() if (oneAtATime && queue.size > 0) { buffer += queue.dequeue() } else { buffer ++= queue.dequeueAll(_ => true) } if (buffer.size > 0) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(ssc.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { None } } }
Example 8
Source File: StreamingListener.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.scheduler import scala.collection.mutable.Queue import org.apache.spark.util.Distribution import org.apache.spark.annotation.DeveloperApi @DeveloperApi class StatsReportListener(numBatchInfos: Int = 10) extends StreamingListener { // Queue containing latest completed batches val batchInfos = new Queue[BatchInfo]() override def onBatchCompleted(batchStarted: StreamingListenerBatchCompleted) { batchInfos.enqueue(batchStarted.batchInfo) if (batchInfos.size > numBatchInfos) batchInfos.dequeue() printStats() } def printStats() { showMillisDistribution("Total delay: ", _.totalDelay) showMillisDistribution("Processing time: ", _.processingDelay) } def showMillisDistribution(heading: String, getMetric: BatchInfo => Option[Long]) { org.apache.spark.scheduler.StatsReportListener.showMillisDistribution( heading, extractDistribution(getMetric)) } def extractDistribution(getMetric: BatchInfo => Option[Long]): Option[Distribution] = { Distribution(batchInfos.flatMap(getMetric(_)).map(_.toDouble)) } }
Example 9
Source File: QueueStream.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.examples.streaming import scala.collection.mutable.Queue import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Seconds, StreamingContext} object QueueStream { def main(args: Array[String]) { StreamingExamples.setStreamingLogLevels() val sparkConf = new SparkConf().setAppName("QueueStream") // Create the context val ssc = new StreamingContext(sparkConf, Seconds(1)) // Create the queue through which RDDs can be pushed to // a QueueInputDStream val rddQueue = new Queue[RDD[Int]]() // Create the QueueInputDStream and use it do some processing val inputStream = ssc.queueStream(rddQueue) val mappedStream = inputStream.map(x => (x % 10, 1)) val reducedStream = mappedStream.reduceByKey(_ + _) reducedStream.print() ssc.start() // Create and push some RDDs into rddQueue for (i <- 1 to 30) { rddQueue.synchronized { rddQueue += ssc.sparkContext.makeRDD(1 to 1000, 10) } Thread.sleep(1000) } ssc.stop() } }
Example 10
Source File: QueueInputDStream.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{NotSerializableException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.{StreamingContext, Time} private[streaming] class QueueInputDStream[T: ClassTag]( ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { throw new NotSerializableException("queueStream doesn't support checkpointing. " + "Please don't use queueStream when checkpointing is enabled.") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() queue.synchronized { if (oneAtATime && queue.nonEmpty) { buffer += queue.dequeue() } else { buffer ++= queue queue.clear() } } if (buffer.nonEmpty) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(context.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { Some(ssc.sparkContext.emptyRDD) } } }
Example 11
Source File: StreamingApp.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package main.scala import scala.collection.mutable.{ListBuffer, Queue} import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming._ object SparkStreamingExample { def main(args: Array[String]) { val conf = sys.env.get("SPARK_AUDIT_MASTER") match { case Some(master) => new SparkConf().setAppName("Simple Streaming App").setMaster(master) case None => new SparkConf().setAppName("Simple Streaming App") } val ssc = new StreamingContext(conf, Seconds(1)) val seen = ListBuffer[RDD[Int]]() val rdd1 = ssc.sparkContext.makeRDD(1 to 100, 10) val rdd2 = ssc.sparkContext.makeRDD(1 to 1000, 10) val rdd3 = ssc.sparkContext.makeRDD(1 to 10000, 10) val queue = Queue(rdd1, rdd2, rdd3) val stream = ssc.queueStream(queue) stream.foreachRDD(rdd => seen += rdd) ssc.start() Thread.sleep(5000) def test(f: => Boolean, failureMsg: String) = { if (!f) { println(failureMsg) System.exit(-1) } } val rddCounts = seen.map(rdd => rdd.count()).filter(_ > 0) test(rddCounts.length == 3, "Did not collect three RDD's from stream") test(rddCounts.toSet == Set(100, 1000, 10000), "Did not find expected streams") println("Test succeeded") ssc.stop() } } // scalastyle:on println
Example 12
Source File: StreamingListener.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.scheduler import scala.collection.mutable.Queue import org.apache.spark.util.Distribution import org.apache.spark.annotation.DeveloperApi @DeveloperApi class StatsReportListener(numBatchInfos: Int = 10) extends StreamingListener { // Queue containing latest completed batches val batchInfos = new Queue[BatchInfo]() override def onBatchCompleted(batchStarted: StreamingListenerBatchCompleted) { batchInfos.enqueue(batchStarted.batchInfo) if (batchInfos.size > numBatchInfos) batchInfos.dequeue() printStats() } def printStats() { showMillisDistribution("Total delay: ", _.totalDelay) showMillisDistribution("Processing time: ", _.processingDelay) } def showMillisDistribution(heading: String, getMetric: BatchInfo => Option[Long]) { org.apache.spark.scheduler.StatsReportListener.showMillisDistribution( heading, extractDistribution(getMetric)) } def extractDistribution(getMetric: BatchInfo => Option[Long]): Option[Distribution] = { Distribution(batchInfos.flatMap(getMetric(_)).map(_.toDouble)) } }
Example 13
Source File: HiveApp.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package main.scala import scala.collection.mutable.{ListBuffer, Queue} import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.hive.HiveContext case class Person(name: String, age: Int) object SparkSqlExample { def main(args: Array[String]) { val conf = sys.env.get("SPARK_AUDIT_MASTER") match { case Some(master) => new SparkConf().setAppName("Simple Sql App").setMaster(master) case None => new SparkConf().setAppName("Simple Sql App") } val sc = new SparkContext(conf) val hiveContext = new HiveContext(sc) import hiveContext._ sql("DROP TABLE IF EXISTS src") sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)") sql("LOAD DATA LOCAL INPATH 'data.txt' INTO TABLE src") val results = sql("FROM src SELECT key, value WHERE key >= 0 AND KEY < 5").collect() results.foreach(println) def test(f: => Boolean, failureMsg: String) = { if (!f) { println(failureMsg) System.exit(-1) } } test(results.size == 5, "Unexpected number of selected elements: " + results) println("Test succeeded") sc.stop() } } // scalastyle:on println
Example 14
Source File: QueueInputDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{NotSerializableException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.{Time, StreamingContext} private[streaming] class QueueInputDStream[T: ClassTag]( ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { throw new NotSerializableException("queueStream doesn't support checkpointing. " + "Please don't use queueStream when checkpointing is enabled.") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() if (oneAtATime && queue.size > 0) { buffer += queue.dequeue() } else { buffer ++= queue.dequeueAll(_ => true) } if (buffer.size > 0) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(context.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { Some(ssc.sparkContext.emptyRDD) } } }
Example 15
Source File: SolverPoolFactory.scala From inox with Apache License 2.0 | 5 votes |
package inox package solvers package combinators import scala.collection.mutable.Queue trait SolverPoolFactory extends SolverFactory { self => val factory: SolverFactory val program: factory.program.type = factory.program type S = factory.S val name = "Pool(" + factory.name + ")" var poolSize = 0 val poolMaxSize = 5 private[this] val availables = Queue[S]() private[this] var inUse = Set[S]() def getNewSolver(): S = { if (availables.isEmpty) { poolSize += 1 availables += factory.getNewSolver() } val s = availables.dequeue() inUse += s s } override def reclaim(s: S) = { try { s.reset() inUse -= s s.reset() availables += s.asInstanceOf[S] } catch { case _: CantResetException => inUse -= s s.free() factory.reclaim(s) availables += factory.getNewSolver() } } def init(): Unit = { for (i <- 1 to poolMaxSize) { availables += factory.getNewSolver() } poolSize = poolMaxSize } override def shutdown(): Unit = { for (s <- availables ++ inUse) { factory.reclaim(s) } availables.clear() inUse = Set() } init() } object SolverPoolFactory { def apply(sf: SolverFactory): SolverPoolFactory { val factory: sf.type } = new { val factory: sf.type = sf } with SolverPoolFactory }
Example 16
Source File: package.scala From Waves with MIT License | 5 votes |
package com.wavesplatform.lang.v1 import cats.Id import cats.implicits._ import com.wavesplatform.lang.v1.compiler.Terms._ import com.wavesplatform.lang.v1.task.TaskM import com.wavesplatform.lang.v1.task.imports._ import scala.annotation.tailrec import scala.collection.mutable.Queue package object compiler { type CompileM[A] = TaskM[CompilerContext, CompilationError, A] implicit class EiExt[A](ei: Either[CompilationError, A]) { def toCompileM: CompileM[A] = ei.fold( raiseError[Id, CompilerContext, CompilationError, A], _.pure[CompileM] ) } def containsBlockV2(e: EXPR): Boolean = { @tailrec def horTraversal(queue: Queue[EXPR]): Boolean = { queue.headOption match { case Some(expr) => expr match { case BLOCK(_, _) => true case GETTER(expr1, _) => horTraversal(queue.tail += expr1) case LET_BLOCK(let, body) => horTraversal(queue.tail ++ Queue(let.value, body)) case IF(expr1, expr2, expr3) => horTraversal(queue.tail ++ Queue(expr1, expr2, expr3)) case FUNCTION_CALL(_, exprList) => horTraversal(queue.tail ++ exprList) case _ => false } case None => false } } horTraversal(Queue(e)) } def containsArray(e: EXPR): Boolean = { @tailrec def horTraversal(queue: Queue[EXPR]): Boolean = { queue.headOption match { case Some(expr) => expr match { case ARR(_) => true case BLOCK(let: LET, body) => horTraversal(queue.tail ++ Queue(let.value, body)) case BLOCK(func: FUNC, body) => horTraversal(queue.tail ++ Queue(func.body, body)) case LET_BLOCK(let, body) => horTraversal(queue.tail ++ Queue(let.value, body)) case GETTER(expr1, _) => horTraversal(queue.tail += expr1) case IF(expr1, expr2, expr3) => horTraversal(queue.tail ++ Queue(expr1, expr2, expr3)) case FUNCTION_CALL(_, exprList) => horTraversal(queue.tail ++ exprList) case _ => false } case None => false } } horTraversal(Queue(e)) } }
Example 17
Source File: ThreadPoolSchedulerProvider.scala From scala-game-library with MIT License | 5 votes |
package sgl.util import java.util.concurrent.Executors import java.io.{StringWriter, PrintWriter} import scala.collection.mutable.Queue trait ThreadPoolSchedulerProvider extends SchedulerProvider { this: LoggingProvider => private implicit val Tag = Logger.Tag("threadpool-scheduler") class ThreadPoolScheduler extends Scheduler { private val pool = Executors.newFixedThreadPool(4) private val tasks: Queue[ChunkedTask] = new Queue private val taskQueueLock = new Object private var r1: ChunksRunner = null private var r2: ChunksRunner = null private var r3: ChunksRunner = null private var r4: ChunksRunner = null override def schedule(task: ChunkedTask): Unit = { taskQueueLock.synchronized { tasks.enqueue(task) } } def shutdown(): Unit = { pool.shutdown() // Need to check for null because we could have skipped resume. if(r1 != null) r1.shouldStop = true if(r2 != null) r2.shouldStop = true if(r3 != null) r3.shouldStop = true if(r4 != null) r4.shouldStop = true } // Simple Runnable class that picks up the first available ChunkedTask and // run one chunk of it. // Note that if there is only one ChunkedTask in the queue, there will only // be one busy Thread at a time as ChunkedTask are assumed to be sequentials. // In order to optimize the use of the thread pool, one should try to split // parallel work into several independent ChunkedTask. class ChunksRunner extends Runnable { var shouldStop = false override def run(): Unit = { while(!shouldStop) { val task = taskQueueLock.synchronized { if(tasks.isEmpty) { None } else { Some(tasks.dequeue()) } } task match { case None => Thread.sleep(50) case Some(task) => { logger.debug("Executing some ChunkedTask from the task queue.") try { task.doRun(5l) if(task.status != ChunkedTask.Completed) taskQueueLock.synchronized { tasks.enqueue(task) } } catch { case (e: Throwable) => { logger.error(s"Unexpected error while executing task ${task.name}: ${e.getMessage}") val sw = new StringWriter() val pw = new PrintWriter(sw, true) e.printStackTrace(pw) logger.error(sw.toString) } } } } } } } } override val Scheduler = new ThreadPoolScheduler }
Example 18
Source File: SchedulerProvider.scala From scala-game-library with MIT License | 5 votes |
package sgl package util def run(ms: Long): Boolean = { logger.trace("Running SingleThreadScheduler with taskQueue size of: " + taskQueue.size) val endTime = System.nanoTime + ms*1000l*1000l var remaining = endTime - System.nanoTime while(remaining > 0 && taskQueue.nonEmpty) { val available = (remaining/(1000l*1000l)) min 5 val task = taskQueue.dequeue() task.doRun(available) if(task.status == ChunkedTask.InProgress) taskQueue.enqueue(task) remaining = endTime - System.nanoTime } taskQueue.isEmpty } } override val Scheduler = new SingleThreadScheduler }
Example 19
Source File: SWNearestNeighbors.scala From streamDM with Apache License 2.0 | 5 votes |
package org.apache.spark.streamdm.outlier import com.github.javacliparser.{FlagOption, IntOption} import org.apache.spark.internal.Logging import org.apache.spark.streamdm.core.Example import org.apache.spark.streamdm.core.specification.ExampleSpecification import org.apache.spark.streaming.dstream.DStream import scala.collection.mutable.Queue def outlierness(example: Example): Double = { val distances = window.map(p => p.in.distanceTo(example.in)) if(!distances.isEmpty) { val aggDistance = distances.reduce((d1, d2) => (d1 + d2)) / distances.size if(debug) logInfo("outlierness, %f, {%s}, %s, %d".format(aggDistance, example.in.getFeatureIndexArray().map(ins => ins._1).mkString(";"), example.out.getFeatureIndexArray().map(ins => ins._1).mkString(" "), distances.size)) aggDistance } else { 0.0 } } }
Example 20
Source File: LogisticStreaming.scala From Apache-Spark-2x-Machine-Learning-Cookbook with MIT License | 5 votes |
package spark.ml.cookbook.chapter13 import org.apache.log4j.{Level, Logger} import org.apache.spark.mllib.classification.StreamingLogisticRegressionWithSGD import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.regression.StreamingLinearRegressionWithSGD import org.apache.spark.rdd.RDD import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.streaming.{Seconds, StreamingContext} import scala.collection.mutable.Queue object LogisticStreaming { def main(args: Array[String]) { Logger.getLogger("org").setLevel(Level.ERROR) Logger.getRootLogger.setLevel(Level.WARN) val spark = SparkSession .builder .master("local[*]") .appName("Logistic Streaming App") .config("spark.sql.warehouse.dir", ".") .getOrCreate() import spark.implicits._ val ssc = new StreamingContext(spark.sparkContext, Seconds(2)) val rawDF = spark.read .text("../data/sparkml2/chapter13/pima-indians-diabetes.data").as[String] val buf = rawDF.rdd.map(value => { val data = value.split(",") (data.init.toSeq, data.last) }) val lps = buf.map{ case (feature: Seq[String], label: String) => val featureVector = feature.map(_.toDouble).toArray[Double] LabeledPoint(label.toDouble, Vectors.dense(featureVector)) } val trainQueue = new Queue[RDD[LabeledPoint]]() val testQueue = new Queue[RDD[LabeledPoint]]() val trainingStream = ssc.queueStream(trainQueue) val testStream = ssc.queueStream(testQueue) val numFeatures = 8 val model = new StreamingLogisticRegressionWithSGD() .setInitialWeights(Vectors.zeros(numFeatures)) .setNumIterations(15) .setStepSize(0.5) .setMiniBatchFraction(0.25) model.trainOn(trainingStream) val result = model.predictOnValues(testStream.map(lp => (lp.label, lp.features))) result.map{ case (label: Double, prediction: Double) => (label, prediction) }.print() ssc.start() val Array(trainData, test) = lps.randomSplit(Array(.80, .20)) trainQueue += trainData Thread.sleep(4000) val testGroups = test.randomSplit(Array(.50, .50)) testGroups.foreach(group => { testQueue += group Thread.sleep(2000) }) ssc.stop() } }
Example 21
Source File: KMeansStreaming.scala From Apache-Spark-2x-Machine-Learning-Cookbook with MIT License | 5 votes |
package spark.ml.cookbook.chapter13 import org.apache.log4j.{Level, Logger} import org.apache.spark.mllib.clustering.StreamingKMeans import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.rdd.RDD import org.apache.spark.sql.SparkSession import org.apache.spark.streaming.{Seconds, StreamingContext} import scala.collection.mutable.Queue object KMeansStreaming { def main(args: Array[String]) { Logger.getLogger("org").setLevel(Level.ERROR) val spark = SparkSession .builder .master("local[*]") .appName("KMean Streaming App") .config("spark.sql.warehouse.dir", ".") .config("spark.executor.memory", "2g") .getOrCreate() val ssc = new StreamingContext(spark.sparkContext, Seconds(1)) Logger.getRootLogger.setLevel(Level.WARN) val irisData = IrisData.readFromFile(spark.sparkContext) val lookup = IrisData.buildLabelLookup(irisData) val trainQueue = new Queue[RDD[LabeledPoint]]() val testQueue = new Queue[RDD[LabeledPoint]]() val trainingStream = ssc.queueStream(trainQueue) val testStream = ssc.queueStream(testQueue) val model = new StreamingKMeans().setK(3) .setDecayFactor(1.0) .setRandomCenters(4, 0.0) model.trainOn(trainingStream.map(lp => lp.features)) val values = model.predictOnValues(testStream.map(lp => (lp.label, lp.features))) values.foreachRDD(n => n.foreach(v => { println(v._2, v._1, lookup(v._1.toLong)) })) ssc.start() val irisLabelPoints = irisData.map(record => IrisData.toLabelPoints(record)) val Array(trainData, test) = irisLabelPoints.randomSplit(Array(.80, .20)) trainQueue += irisLabelPoints Thread.sleep(2000) val testGroups = test.randomSplit(Array(.25, .25, .25, .25)) testGroups.foreach(group => { testQueue += group println("-" * 25) Thread.sleep(1000) }) ssc.stop() } }
Example 22
Source File: ArtisinalStreamingTest.scala From spark-testing-base with Apache License 2.0 | 5 votes |
package com.holdenkarau.spark.testing import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.Queue import org.apache.spark.streaming._ import org.apache.spark.streaming.dstream._ import org.apache.spark._ import org.apache.spark.rdd.RDD import org.apache.spark.SparkContext._ import org.scalatest.FunSuite import org.scalatest.exceptions.TestFailedException class ArtisinalStreamingTest extends FunSuite with SharedSparkContext { // tag::createQueueStream[] def makeSimpleQueueStream(ssc: StreamingContext) = { val input = List(List("hi"), List("happy pandas", "sad pandas")) .map(sc.parallelize(_)) val idstream = ssc.queueStream(Queue(input:_*)) } // end::createQueueStream[] // tag::HAPPY_PANDA[] test("artisinal streaming test") { val ssc = new StreamingContext(sc, Seconds(1)) val input = List(List("hi"), List("happy pandas", "sad pandas")) .map(sc.parallelize(_)) // Note: does not work for windowing or checkpointing val idstream = ssc.queueStream(Queue(input:_*)) val tdstream = idstream.filter(_.contains("pandas")) val result = ArrayBuffer[String]() tdstream.foreachRDD{(rdd: RDD[String], _) => result ++= rdd.collect() } val startTime = System.currentTimeMillis() val maxWaitTime = 60 * 60 * 30 ssc.start() while (result.size < 2 && System.currentTimeMillis() - startTime < maxWaitTime) { ssc.awaitTerminationOrTimeout(50) } ssc.stop(stopSparkContext = false) assert(List("happy pandas", "sad pandas") === result.toList) } // end::HAPPY_PANDA[] }
Example 23
Source File: QueueInputDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{NotSerializableException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.{StreamingContext, Time} private[streaming] class QueueInputDStream[T: ClassTag]( ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { throw new NotSerializableException("queueStream doesn't support checkpointing. " + "Please don't use queueStream when checkpointing is enabled.") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() queue.synchronized { if (oneAtATime && queue.nonEmpty) { buffer += queue.dequeue() } else { buffer ++= queue queue.clear() } } if (buffer.nonEmpty) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(context.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { Some(ssc.sparkContext.emptyRDD) } } }
Example 24
Source File: StreamingListener.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.scheduler import scala.collection.mutable.Queue import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Distribution @DeveloperApi class StatsReportListener(numBatchInfos: Int = 10) extends StreamingListener { // Queue containing latest completed batches val batchInfos = new Queue[BatchInfo]() override def onBatchCompleted(batchStarted: StreamingListenerBatchCompleted) { batchInfos.enqueue(batchStarted.batchInfo) if (batchInfos.size > numBatchInfos) batchInfos.dequeue() printStats() } def printStats() { showMillisDistribution("Total delay: ", _.totalDelay) showMillisDistribution("Processing time: ", _.processingDelay) } def showMillisDistribution(heading: String, getMetric: BatchInfo => Option[Long]) { org.apache.spark.scheduler.StatsReportListener.showMillisDistribution( heading, extractDistribution(getMetric)) } def extractDistribution(getMetric: BatchInfo => Option[Long]): Option[Distribution] = { Distribution(batchInfos.flatMap(getMetric(_)).map(_.toDouble)) } }
Example 25
Source File: TestableQueueInputDStream.scala From SparkUnitTestingExamples with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.dstream.InputDStream import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag class TestableQueueInputDStream[T: ClassTag]( ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() queue.synchronized { if (oneAtATime && queue.nonEmpty) { buffer += queue.dequeue() } else { buffer ++= queue queue.clear() } } if (buffer.nonEmpty) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(context.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { Some(ssc.sparkContext.emptyRDD) } } }
Example 26
Source File: StreamingUnitTest.scala From SparkUnitTestingExamples with Apache License 2.0 | 5 votes |
package com.cloudera.sa.spark.unittest.streaming import org.apache.spark.rdd.RDD import org.apache.spark.streaming._ import org.apache.spark.streaming.dstream.DStream import org.apache.spark.{SparkConf, SparkContext} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} import scala.collection.mutable.Queue class StreamingUnitTest extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll{ @transient var sc: SparkContext = null @transient var ssc: StreamingContext = null override def beforeAll(): Unit = { val envMap = Map[String,String](("Xmx", "512m")) val sparkConfig = new SparkConf() sparkConfig.set("spark.broadcast.compress", "false") sparkConfig.set("spark.shuffle.compress", "false") sparkConfig.set("spark.shuffle.spill.compress", "false") sparkConfig.set("spark.io.compression.codec", "lzf") sc = new SparkContext("local[2]", "unit test", sparkConfig) ssc = new StreamingContext(sc, Milliseconds(200)) } override def afterAll(): Unit = { sc.stop() } test("Streaming word count") { val firstBatchRDD = sc.parallelize(Seq("a", "b", "c")) val secondBatchRDD = sc.parallelize(Seq("a", "e")) val thirdBatchRDD = sc.parallelize(Seq("b", "c", "e", "f")) val forthBatchRDD = sc.parallelize(Seq("a", "e")) val queue = new Queue[RDD[String]] queue.+=(firstBatchRDD) queue.+=(secondBatchRDD) queue.+=(thirdBatchRDD) queue.+=(forthBatchRDD) println(queue) val startTime = System.currentTimeMillis() val dstream = new TestableQueueInputDStream(ssc, queue, true, sc.makeRDD(Seq[String](), 1)) //ssc.queueStream(queue) dstream.checkpoint(Seconds(100)) val batchTotals:DStream[(String, Int)] = dstream.map(r => (r, 1)).reduceByKey(_ + _) val streamTotals = batchTotals.updateStateByKey( (seq:Seq[Int], opt:Option[Int]) => { if (!seq.isEmpty) { val totalCountForNew = seq.reduce(_ + _) if (opt.isEmpty) { Option(totalCountForNew) } else { Option(opt.get + totalCountForNew) } } else { opt } }) streamTotals.foreachRDD(rdd => { }) ssc.checkpoint("./tmp") ssc.start() ssc.awaitTerminationOrTimeout(2000) val endTime = System.currentTimeMillis() val rddList = streamTotals.slice(new Time(startTime), new Time(endTime)) rddList(0).collect().foreach(println) assert(rddList(0).collect().filter(r => r._1.equals("a"))(0)._2 == 1) rddList(1).collect().foreach(println) assert(rddList(1).collect().filter(r => r._1.equals("a"))(0)._2 == 2) rddList(2).collect().foreach(println) assert(rddList(2).collect().filter(r => r._1.equals("a"))(0)._2 == 2) rddList(3).collect().foreach(println) assert(rddList(3).collect().filter(r => r._1.equals("a"))(0)._2 == 3) } }
Example 27
Source File: FIFO.scala From spatial with MIT License | 5 votes |
package spatial.lang import argon._ import forge.tags._ import spatial.node._ import spatial.metadata.memory._ import scala.collection.mutable.Queue @ref class FIFO[A:Bits] extends Top[FIFO[A]] with LocalMem1[A,FIFO] with Ref[Queue[Any],FIFO[A]] { val A: Bits[A] = Bits[A] val evMem: FIFO[A] <:< LocalMem[A,FIFO] = implicitly[FIFO[A] <:< LocalMem[A,FIFO]] def noduplicate: FIFO[A] = { this.isNoFission = true; me } // --- Typeclass Methods @rig def __read(addr: Seq[Idx], ens: Set[Bit]): A = stage(FIFODeq(this,ens)) @rig def __write(data: A, addr: Seq[Idx], ens: Set[Bit]): Void = stage(FIFOEnq(this,data,ens)) @rig def __reset(ens: Set[Bit]): Void = void } object FIFO { @api def apply[A:Bits](depth: I32): FIFO[A] = stage(FIFONew(depth)) @rig def alloc[A:Bits](depth: I32): FIFO[A] = stage(FIFONew(depth)) @rig def deq[A](fifo: FIFO[A], ens: Set[Bit] = Set.empty): A = { implicit val tA: Bits[A] = fifo.A stage(FIFODeq(fifo, ens)) } @rig def enq[A](fifo: FIFO[A], data: Bits[A], ens: Set[Bit] = Set.empty): Void = { implicit val tA: Bits[A] = fifo.A stage(FIFOEnq(fifo,data,ens)) } }
Example 28
Source File: StreamIn.scala From spatial with MIT License | 5 votes |
package spatial.lang import argon._ import forge.tags._ import spatial.node._ import scala.collection.mutable.Queue @ref class StreamIn[A:Bits] extends LocalMem0[A,StreamIn] with RemoteMem[A,StreamIn] with Ref[Queue[Any],StreamIn[A]] { val A: Bits[A] = Bits[A] private implicit val evA: A <:< Bits[A] = Bits[A].box override val evMem = implicitly[StreamIn[A] <:< (LocalMem[A,StreamIn] with RemoteMem[A,StreamIn])] @api def value(): A = stage(StreamInRead(this,Set.empty)) @api def value(en: Bit): A = stage(StreamInRead(this,Set(en))) // --- Typeclass Methods @rig def __read(addr: Seq[Idx], ens: Set[Bit]): A = this.value() @rig def __write(data: A, addr: Seq[Idx], ens: Set[Bit] ): Void = { error(ctx, "Cannot write to StreamIn") error(ctx) err[Void]("Cannot write to StreamIn") } @rig def __reset(ens: Set[Bit]): Void = void } object StreamIn { @api def apply[A:Bits](bus: Bus): StreamIn[A] = stage(StreamInNew[A](bus)) }
Example 29
Source File: LineBuffer.scala From spatial with MIT License | 5 votes |
package spatial.lang import argon._ import forge.tags._ import spatial.node._ import scala.collection.mutable.Queue @ref class LineBuffer[A:Bits] extends Top[LineBuffer[A]] with LocalMem2[A,LineBuffer] with Ref[Queue[Any],LineBuffer[A]] { val A: Bits[A] = Bits[A] val evMem: LineBuffer[A] <:< LocalMem[A,LineBuffer] = implicitly[LineBuffer[A] <:< LocalMem[A,LineBuffer]] // --- Typeclass Methods @rig def __read(addr: Seq[Idx], ens: Set[Bit]): A = stage(LineBufferRead(this,addr,ens)) @rig def __write(data: A, addr: Seq[Idx], ens: Set[Bit]): Void = if (addr.size == 1) stage(LineBufferEnq(this,data,Seq(0.to[I32]) ++ addr,ens)) else stage(LineBufferEnq(this,data,addr,ens)) @rig def __reset(ens: Set[Bit]): Void = void @api def load(dram: DRAM1[A]): Void = { stage(DenseTransfer(dram,me,isLoad = true)) } } object LineBuffer { @api def apply[A:Bits](rows: I32, cols: I32): LineBuffer[A] = stage(LineBufferNew(rows,cols,1)) @api def strided[A:Bits](rows: I32, cols: I32, stride: I32): LineBuffer[A] = stage(LineBufferNew(rows,cols,stride)) }
Example 30
Source File: MergeBuffer.scala From spatial with MIT License | 5 votes |
package spatial.lang import argon._ import forge.tags._ import spatial.node._ import spatial.metadata.memory._ import scala.collection.mutable.Queue @ref class MergeBuffer[A:Bits] extends Top[MergeBuffer[A]] with LocalMem1[A,MergeBuffer] with Ref[Queue[Any],MergeBuffer[A]] { val A: Bits[A] = Bits[A] val evMem: MergeBuffer[A] <:< LocalMem[A,MergeBuffer] = implicitly[MergeBuffer[A] <:< LocalMem[A,MergeBuffer]] @api def enq(way: Int, data: A): Void = stage(MergeBufferEnq(this, way, data, Set(true))) @api def bound(way: Int, bound: I32): Void = stage(MergeBufferBound(this, way, bound, Set(true))) @api def init(init: Bit): Void = stage(MergeBufferInit(this, init, Set(true))) @api def deq(): A = stage(MergeBufferDeq(this, Set(true))) @rig def __read(addr: Seq[Idx], ens: Set[Bit]): A = stage(MergeBufferDeq(this,ens)) @rig def __write(data: A, addr: Seq[Idx], ens: Set[Bit]): Void = void @rig def __reset(ens: Set[Bit]): Void = void } object MergeBuffer { @api def apply[A:Bits](ways: I32, par: I32): MergeBuffer[A] = { val x = stage(MergeBufferNew(ways, par)) x.isWriteBuffer = true x.isMustMerge = true x } }
Example 31
Source File: StreamOut.scala From spatial with MIT License | 5 votes |
package spatial.lang import argon._ import forge.tags._ import spatial.node._ import scala.collection.mutable.Queue @ref class StreamOut[A:Bits] extends LocalMem0[A,StreamOut] with RemoteMem[A,StreamOut] with Ref[Queue[Any],StreamOut[A]] { val A: Bits[A] = Bits[A] private implicit val evA: A <:< Bits[A] = Bits[A].box override val evMem = implicitly[StreamOut[A] <:< (LocalMem[A,StreamOut] with RemoteMem[A,StreamOut])] @api def :=(data: A): Void = stage(StreamOutWrite(this,data,Set.empty)) @api def :=(data: A, en: Bit): Void = stage(StreamOutWrite(this,data,Set(en))) // --- Typeclass Methods @rig def __read(addr: Seq[Idx], ens: Set[Bit]): A = { error(ctx, "Cannot read from StreamOut") error(ctx) err[A]("Cannot read from StreamOut") } @rig def __write(data: A, addr: Seq[Idx], ens: Set[Bit] ): Void = stage(StreamOutWrite(this,data,ens)) @rig def __reset(ens: Set[Bit]): Void = void } object StreamOut { @api def apply[A:Bits](bus: Bus): StreamOut[A] = stage(StreamOutNew[A](bus)) }
Example 32
Source File: BufferedIterator.scala From scallion with Apache License 2.0 | 5 votes |
package scallion.util.internal import scala.collection.mutable.Queue class BufferedIterator[A] extends Iterator[A] { override def toString = "<iterator>" private var ended = false private val elements = new Queue[A]() override def hasNext: Boolean = synchronized { while (!ended && elements.isEmpty) { wait() } !elements.isEmpty } override def next(): A = synchronized { while (!ended && elements.isEmpty) { wait() } elements.dequeue() } def add(elem: A): Unit = synchronized { elements.enqueue(elem) notifyAll() } def addAll(elems: Seq[A]): Unit = synchronized { elements ++= elems notifyAll() } def end(): Unit = synchronized { ended = true notifyAll() } }
Example 33
Source File: QueueStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.examples.streaming import scala.collection.mutable.Queue import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Seconds, StreamingContext} object QueueStream { def main(args: Array[String]) { StreamingExamples.setStreamingLogLevels() val sparkConf = new SparkConf().setAppName("QueueStream") // Create the context val ssc = new StreamingContext(sparkConf, Seconds(1)) // Create the queue through which RDDs can be pushed to // a QueueInputDStream val rddQueue = new Queue[RDD[Int]]() // Create the QueueInputDStream and use it do some processing val inputStream = ssc.queueStream(rddQueue) val mappedStream = inputStream.map(x => (x % 10, 1)) val reducedStream = mappedStream.reduceByKey(_ + _) reducedStream.print() ssc.start() // Create and push some RDDs into rddQueue for (i <- 1 to 30) { rddQueue.synchronized { rddQueue += ssc.sparkContext.makeRDD(1 to 1000, 10) } Thread.sleep(1000) } ssc.stop() } }
Example 34
Source File: QueueStream.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.examples.streaming import scala.collection.mutable.Queue import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Seconds, StreamingContext} object QueueStream { def main(args: Array[String]) { StreamingExamples.setStreamingLogLevels() val sparkConf = new SparkConf().setAppName("QueueStream") // Create the context val ssc = new StreamingContext(sparkConf, Seconds(1)) // Create the queue through which RDDs can be pushed to // a QueueInputDStream val rddQueue = new Queue[RDD[Int]]() // Create the QueueInputDStream and use it do some processing val inputStream = ssc.queueStream(rddQueue) val mappedStream = inputStream.map(x => (x % 10, 1)) val reducedStream = mappedStream.reduceByKey(_ + _) reducedStream.print() ssc.start() // Create and push some RDDs into rddQueue for (i <- 1 to 30) { rddQueue.synchronized { rddQueue += ssc.sparkContext.makeRDD(1 to 1000, 10) } Thread.sleep(1000) } ssc.stop() } }
Example 35
Source File: QueueInputDStream.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{NotSerializableException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.{StreamingContext, Time} private[streaming] class QueueInputDStream[T: ClassTag]( ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { throw new NotSerializableException("queueStream doesn't support checkpointing. " + "Please don't use queueStream when checkpointing is enabled.") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() queue.synchronized { if (oneAtATime && queue.nonEmpty) { buffer += queue.dequeue() } else { buffer ++= queue queue.clear() } } if (buffer.nonEmpty) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(context.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { Some(ssc.sparkContext.emptyRDD) } } }
Example 36
Source File: SharedStreamingContextBeforeAfterEachTest.scala From sscheck with Apache License 2.0 | 5 votes |
package es.ucm.fdi.sscheck.spark.streaming import org.junit.runner.RunWith import org.specs2.runner.JUnitRunner import org.specs2.execute.Result import org.apache.spark.streaming.Duration import org.apache.spark.rdd.RDD import scala.collection.mutable.Queue import scala.concurrent.duration._ import org.slf4j.LoggerFactory import es.ucm.fdi.sscheck.matcher.specs2.RDDMatchers._ // sbt "test-only es.ucm.fdi.sscheck.spark.streaming.SharedStreamingContextBeforeAfterEachTest" @RunWith(classOf[JUnitRunner]) class SharedStreamingContextBeforeAfterEachTest extends org.specs2.Specification with org.specs2.matcher.MustThrownExpectations with org.specs2.matcher.ResultMatchers with SharedStreamingContextBeforeAfterEach { // cannot use private[this] due to https://issues.scala-lang.org/browse/SI-8087 @transient private val logger = LoggerFactory.getLogger("SharedStreamingContextBeforeAfterEachTest") // Spark configuration override def sparkMaster : String = "local[5]" override def batchDuration = Duration(250) override def defaultParallelism = 3 override def enableCheckpointing = false // as queueStream doesn't support checkpointing def is = sequential ^ s2""" Simple test for SharedStreamingContextBeforeAfterEach where a simple queueStream test must be successful $successfulSimpleQueueStreamTest where a simple queueStream test can also fail $failingSimpleQueueStreamTest """ def successfulSimpleQueueStreamTest = simpleQueueStreamTest(expectedCount = 0) def failingSimpleQueueStreamTest = simpleQueueStreamTest(expectedCount = 1) must beFailing def simpleQueueStreamTest(expectedCount : Int) : Result = { val record = "hola" val batches = Seq.fill(5)(Seq.fill(10)(record)) val queue = new Queue[RDD[String]] queue ++= batches.map(batch => sc.parallelize(batch, numSlices = defaultParallelism)) val inputDStream = ssc.queueStream(queue, oneAtATime = true) val sizesDStream = inputDStream.map(_.length) var batchCount = 0 // NOTE wrapping assertions with a Result object is needed // to avoid the Spark Streaming runtime capturing the exceptions // from failing assertions var result : Result = ok inputDStream.foreachRDD { rdd => batchCount += 1 println(s"completed batch number $batchCount: ${rdd.collect.mkString(",")}") result = result and { rdd.filter(_!= record).count() === expectedCount rdd should existsRecord(_ == "hola") } } sizesDStream.foreachRDD { rdd => result = result and { rdd should foreachRecord(record.length)(len => _ == len) } } // should only start the dstream after all the transformations and actions have been defined ssc.start() // wait for completion of batches.length batches StreamingContextUtils.awaitForNBatchesCompleted(batches.length, atMost = 10 seconds)(ssc) result } }
Example 37
Source File: QueueStream.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.examples.streaming import scala.collection.mutable.Queue import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Seconds, StreamingContext} object QueueStream { def main(args: Array[String]) { StreamingExamples.setStreamingLogLevels() val sparkConf = new SparkConf().setAppName("QueueStream") // Create the context val ssc = new StreamingContext(sparkConf, Seconds(1)) // Create the queue through which RDDs can be pushed to // a QueueInputDStream val rddQueue = new Queue[RDD[Int]]() // Create the QueueInputDStream and use it do some processing val inputStream = ssc.queueStream(rddQueue) val mappedStream = inputStream.map(x => (x % 10, 1)) val reducedStream = mappedStream.reduceByKey(_ + _) reducedStream.print() ssc.start() // Create and push some RDDs into rddQueue for (i <- 1 to 30) { rddQueue.synchronized { rddQueue += ssc.sparkContext.makeRDD(1 to 1000, 10) } Thread.sleep(1000) } ssc.stop() } }
Example 38
Source File: QueueInputDStream.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{NotSerializableException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.{StreamingContext, Time} private[streaming] class QueueInputDStream[T: ClassTag]( ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { throw new NotSerializableException("queueStream doesn't support checkpointing. " + "Please don't use queueStream when checkpointing is enabled.") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() queue.synchronized { if (oneAtATime && queue.nonEmpty) { buffer += queue.dequeue() } else { buffer ++= queue queue.clear() } } if (buffer.nonEmpty) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(context.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { Some(ssc.sparkContext.emptyRDD) } } }
Example 39
Source File: StreamingApp.scala From iolap with Apache License 2.0 | 5 votes |
package main.scala import scala.collection.mutable.{ListBuffer, Queue} import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming._ object SparkStreamingExample { def main(args: Array[String]) { val conf = sys.env.get("SPARK_AUDIT_MASTER") match { case Some(master) => new SparkConf().setAppName("Simple Streaming App").setMaster(master) case None => new SparkConf().setAppName("Simple Streaming App") } val ssc = new StreamingContext(conf, Seconds(1)) val seen = ListBuffer[RDD[Int]]() val rdd1 = ssc.sparkContext.makeRDD(1 to 100, 10) val rdd2 = ssc.sparkContext.makeRDD(1 to 1000, 10) val rdd3 = ssc.sparkContext.makeRDD(1 to 10000, 10) val queue = Queue(rdd1, rdd2, rdd3) val stream = ssc.queueStream(queue) stream.foreachRDD(rdd => seen += rdd) ssc.start() Thread.sleep(5000) def test(f: => Boolean, failureMsg: String) = { if (!f) { println(failureMsg) System.exit(-1) } } val rddCounts = seen.map(rdd => rdd.count()).filter(_ > 0) test(rddCounts.length == 3, "Did not collect three RDD's from stream") test(rddCounts.toSet == Set(100, 1000, 10000), "Did not find expected streams") println("Test succeeded") ssc.stop() } }
Example 40
Source File: SqlApp.scala From iolap with Apache License 2.0 | 5 votes |
package main.scala import scala.collection.mutable.{ListBuffer, Queue} import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.SQLContext case class Person(name: String, age: Int) object SparkSqlExample { def main(args: Array[String]) { val conf = sys.env.get("SPARK_AUDIT_MASTER") match { case Some(master) => new SparkConf().setAppName("Simple Sql App").setMaster(master) case None => new SparkConf().setAppName("Simple Sql App") } val sc = new SparkContext(conf) val sqlContext = new SQLContext(sc) import sqlContext.implicits._ import sqlContext._ val people = sc.makeRDD(1 to 100, 10).map(x => Person(s"Name$x", x)).toDF() people.registerTempTable("people") val teenagers = sql("SELECT name FROM people WHERE age >= 13 AND age <= 19") val teenagerNames = teenagers.map(t => "Name: " + t(0)).collect() teenagerNames.foreach(println) def test(f: => Boolean, failureMsg: String) = { if (!f) { println(failureMsg) System.exit(-1) } } test(teenagerNames.size == 7, "Unexpected number of selected elements: " + teenagerNames) println("Test succeeded") sc.stop() } }
Example 41
Source File: HiveApp.scala From iolap with Apache License 2.0 | 5 votes |
package main.scala import scala.collection.mutable.{ListBuffer, Queue} import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.hive.HiveContext case class Person(name: String, age: Int) object SparkSqlExample { def main(args: Array[String]) { val conf = sys.env.get("SPARK_AUDIT_MASTER") match { case Some(master) => new SparkConf().setAppName("Simple Sql App").setMaster(master) case None => new SparkConf().setAppName("Simple Sql App") } val sc = new SparkContext(conf) val hiveContext = new HiveContext(sc) import hiveContext._ sql("DROP TABLE IF EXISTS src") sql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)") sql("LOAD DATA LOCAL INPATH 'data.txt' INTO TABLE src") val results = sql("FROM src SELECT key, value WHERE key >= 0 AND KEY < 5").collect() results.foreach(println) def test(f: => Boolean, failureMsg: String) = { if (!f) { println(failureMsg) System.exit(-1) } } test(results.size == 5, "Unexpected number of selected elements: " + results) println("Test succeeded") sc.stop() } }
Example 42
Source File: QueueInputDStream.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{NotSerializableException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.{Time, StreamingContext} private[streaming] class QueueInputDStream[T: ClassTag]( @transient ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { throw new NotSerializableException("queueStream doesn't support checkpointing. " + "Please don't use queueStream when checkpointing is enabled.") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() if (oneAtATime && queue.size > 0) { buffer += queue.dequeue() } else { buffer ++= queue.dequeueAll(_ => true) } if (buffer.size > 0) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(ssc.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { None } } }