Example 1
Source File: IOCommon.scala From Swallow with Apache License 2.0 | 5 votes |
package import{File, FileInputStream, IOException, InputStreamReader} import java.util.Properties import import{NullWritable, Text} import org.apache.hadoop.mapred.SequenceFileOutputFormat import org.apache.spark.rdd.RDD import org.apache.spark.{SparkContext, SparkException} import scala.collection.JavaConversions._ import scala.collection.mutable.HashMap import scala.reflect.ClassTag import scala.reflect.runtime.universe.TypeTag class IOCommon(val sc:SparkContext) { def load[T:ClassTag:TypeTag](filename:String, force_format:Option[String]=None) = { val input_format = force_format.getOrElse( IOCommon.getProperty("sparkbench.inputformat").getOrElse("Text")) input_format match { case "Text" => sc.textFile(filename) case "Sequence" => sc.sequenceFile[NullWritable, Text](filename).map(_._2.toString) case _ => throw new UnsupportedOperationException(s"Unknown inpout format: $input_format") } } def save(filename:String, data:RDD[_], prefix:String) = { val output_format = IOCommon.getProperty(prefix).getOrElse("Text") val output_format_codec = loadClassByName[CompressionCodec](IOCommon.getProperty(prefix + ".codec")) output_format match { case "Text" => if (output_format_codec.isEmpty) data.saveAsTextFile(filename) else data.saveAsTextFile(filename, output_format_codec.get) case "Sequence" => val sequence_data = => (NullWritable.get(), new Text(x.toString))) if (output_format_codec.isEmpty) { sequence_data.saveAsHadoopFile[SequenceFileOutputFormat[NullWritable, Text]](filename) } else { sequence_data.saveAsHadoopFile[SequenceFileOutputFormat[NullWritable, Text]](filename, output_format_codec.get) } case _ => throw new UnsupportedOperationException(s"Unknown output format: $output_format") } } def save(filename:String, data:RDD[_]):Unit = save(filename, data, "sparkbench.outputformat") private def loadClassByName[T](name:Option[String]) = { if (!name.isEmpty) Some(Class.forName(name.get) .newInstance.asInstanceOf[T].getClass) else None } private def callMethod[T, R](obj:T, method_name:String) = obj.getClass.getMethod(method_name).invoke(obj).asInstanceOf[R] } object IOCommon { private val sparkbench_conf: HashMap[String, String] = getPropertiesFromFile(System.getenv("SPARKBENCH_PROPERTIES_FILES")) def getPropertiesFromFile(filenames: String): HashMap[String, String] = { val result = new HashMap[String, String] filenames.split(',').filter(_.stripMargin.length > 0).foreach { filename => val file = new File(filename) require(file.exists, s"Properties file $file does not exist") require(file.isFile, s"Properties file $file is not a normal file") val inReader = new InputStreamReader(new FileInputStream(file), "UTF-8") try { val properties = new Properties() properties.load(inReader) result ++= properties.stringPropertyNames() .map(k => (k, properties(k).trim)).toMap } catch { case e: IOException => val message = s"Failed when loading Sparkbench properties file $file" throw new SparkException(message, e) } finally { inReader.close() } } result.filter{case (key, value) => value.toLowerCase != "none"} } def getProperty(key:String):Option[String] = sparkbench_conf.get(key) def dumpProperties(): Unit = sparkbench_conf .foreach{case (key, value)=> println(s"$key\t\t$value")} }
Example 2
Source File: UniqueTermAccumulator.scala From sparkpipe-core with Apache License 2.0 | 5 votes |
package software.uncharted.sparkpipe.ops.core.dataframe.text.util import org.apache.spark.sql.Row import org.apache.spark.util.AccumulatorV2 import scala.collection.mutable.HashMap private[text] class UniqueTermAccumulator( private var result: HashMap[String, Int], private var touched: Boolean = false ) extends AccumulatorV2[Seq[String], HashMap[String, Int]] { def this() { this(new HashMap[String, Int]()) } override def add(in: Seq[String]): Unit = { in.foreach(w => { result.put(w, result.getOrElse(w, 0) + 1) }) } override def copy(): AccumulatorV2[Seq[String], HashMap[String, Int]] = { val clone = new HashMap[String, Int]() result.foreach(kv => clone.put(kv._1, kv._2)) new UniqueTermAccumulator(clone, false) } override def isZero(): Boolean = { !touched } override def merge(other: AccumulatorV2[Seq[String], HashMap[String, Int]]): Unit = { other.value.foreach(t => { result.put(t._1, result.getOrElse(t._1, 0) + t._2) }) } override def reset(): Unit = { result.clear touched = false } override def value: HashMap[String, Int] = { result } }
Example 3
Source File: Mapper.scala From Scalaprof with GNU General Public License v2.0 | 5 votes |
package edu.neu.coe.scala.mapreduce import{ Actor, ActorLogging, ActorRef } import scala.collection.mutable.HashMap import scala.util._ class Mapper_Forgiving[K1,V1,K2,V2](f: (K1,V1)=>(K2,V2)) extends Mapper[K1,V1,K2,V2](f) { override def prepareReply(v2k2ts: Seq[Try[(K2,V2)]]) = { val v2sK2m = HashMap[K2,Seq[V2]]() // mutable val xs = Seq[Throwable]() // mutable for (v2k2t <- v2k2ts; v2k2e = Master.sequence(v2k2t)) v2k2e match { case Right((k2,v2)) => v2sK2m put(k2, v2+:(v2sK2m get(k2) getOrElse(Nil))) case Left(x) => xs :+ x } (v2sK2m.toMap, xs.toSeq) } } case class Incoming[K, V](m: Seq[(K,V)]) { override def toString = s"Incoming: with ${m.size} elements" } object Incoming { def sequence[K,V](vs: Seq[V]): Incoming[K,V] = Incoming((vs zip Stream.continually(null.asInstanceOf[K])).map{_.swap}) def map[K, V](vKm: Map[K,V]): Incoming[K,V] = Incoming(vKm.toSeq) } object Mapper { }
Example 4
Source File: LocalKMeans.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet import breeze.linalg.{squaredDistance, DenseVector, Vector} object LocalKMeans { val N = 1000 val R = 1000 // Scaling factor val D = 10 val K = 10 val convergeDist = 0.001 val rand = new Random(42) def generateData: Array[DenseVector[Double]] = { def generatePoint(i: Int): DenseVector[Double] = { DenseVector.fill(D) {rand.nextDouble * R} } Array.tabulate(N)(generatePoint) } def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = { var index = 0 var bestIndex = 0 var closest = Double.PositiveInfinity for (i <- 1 to centers.size) { val vCurr = centers.get(i).get val tempDist = squaredDistance(p, vCurr) if (tempDist < closest) { closest = tempDist bestIndex = i } } bestIndex } def showWarning() { System.err.println( """WARN: This is a naive implementation of KMeans Clustering and is given as an example! |Please use |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val data = generateData var points = new HashSet[Vector[Double]] var kPoints = new HashMap[Int, Vector[Double]] var tempDist = 1.0 while (points.size < K) { points.add(data(rand.nextInt(N))) } val iter = points.iterator for (i <- 1 to points.size) { kPoints.put(i, } println("Initial centers: " + kPoints) while(tempDist > convergeDist) { var closest = (p => (closestPoint(p, kPoints), (p, 1))) var mappings = closest.groupBy[Int] (x => x._1) var pointStats = { pair => pair._2.reduceLeft [(Int, (Vector[Double], Int))] { case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2)) } } var newPoints = {mapping => (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))} tempDist = 0.0 for (mapping <- newPoints) { tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2) } for (newP <- newPoints) { kPoints.put(newP._1, newP._2) } } println("Final centers: " + kPoints) } } // scalastyle:on println
Example 5
Source File: JsonUtils.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kafka010 import import scala.collection.mutable.HashMap import scala.util.control.NonFatal import org.apache.kafka.common.TopicPartition import org.json4s.NoTypeHints import org.json4s.jackson.Serialization def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = { val result = new HashMap[String, HashMap[Int, Long]]() partitionOffsets.foreach { case (tp, off) => val parts = result.getOrElse(tp.topic, new HashMap[Int, Long]) parts += tp.partition -> off result += tp.topic -> parts } Serialization.write(result) } }
Example 6
Source File: DStreamCheckpointData.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import{IOException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.internal.Logging import org.apache.spark.streaming.Time import org.apache.spark.util.Utils private[streaming] class DStreamCheckpointData[T: ClassTag](dstream: DStream[T]) extends Serializable with Logging { protected val data = new HashMap[Time, AnyRef]() // Mapping of the batch time to the checkpointed RDD file of that time @transient private var timeToCheckpointFile = new HashMap[Time, String] // Mapping of the batch time to the time of the oldest checkpointed RDD // in that batch's checkpoint data @transient private var timeToOldestCheckpointFileTime = new HashMap[Time, Time] @transient private var fileSystem: FileSystem = null protected[streaming] def currentCheckpointFiles = data.asInstanceOf[HashMap[Time, String]] def restore() { // Create RDDs from the checkpoint data currentCheckpointFiles.foreach { case(time, file) => logInfo("Restoring checkpointed RDD for time " + time + " from file '" + file + "'") dstream.generatedRDDs += ((time, dstream.context.sparkContext.checkpointFile[T](file))) } } override def toString: String = { "[\n" + currentCheckpointFiles.size + " checkpoint files \n" + currentCheckpointFiles.mkString("\n") + "\n]" } @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { logDebug(this.getClass().getSimpleName + ".writeObject used") if (dstream.context.graph != null) { dstream.context.graph.synchronized { if (dstream.context.graph.checkpointInProgress) { oos.defaultWriteObject() } else { val msg = "Object of " + this.getClass.getName + " is being serialized " + " possibly as a part of closure of an RDD operation. This is because " + " the DStream object is being referred to from within the closure. " + " Please rewrite the RDD operation inside this DStream to avoid this. " + " This has been enforced to avoid bloating of Spark tasks " + " with unnecessary objects." throw new } } } else { throw new "Graph is unexpectedly null when DStream is being serialized.") } } @throws(classOf[IOException]) private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException { logDebug(this.getClass().getSimpleName + ".readObject used") ois.defaultReadObject() timeToOldestCheckpointFileTime = new HashMap[Time, Time] timeToCheckpointFile = new HashMap[Time, String] } }
Example 7
Source File: MasterWebUI.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.master.ui import scala.collection.mutable.HashMap import org.eclipse.jetty.servlet.ServletContextHandler import org.apache.spark.deploy.master.Master import org.apache.spark.internal.Logging import org.apache.spark.ui.{SparkUI, WebUI} import org.apache.spark.ui.JettyUtils._ def initialize() { val masterPage = new MasterPage(this) attachPage(new ApplicationPage(this)) attachPage(masterPage) attachHandler(createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR, "/static")) attachHandler(createRedirectHandler( "/app/kill", "/", masterPage.handleAppKillRequest, httpMethods = Set("POST"))) attachHandler(createRedirectHandler( "/driver/kill", "/", masterPage.handleDriverKillRequest, httpMethods = Set("POST"))) } def addProxyTargets(id: String, target: String): Unit = { var endTarget = target.stripSuffix("/") val handler = createProxyHandler("/proxy/" + id, endTarget) attachHandler(handler) proxyHandlers(id) = handler } def removeProxyTargets(id: String): Unit = { proxyHandlers.remove(id).foreach(detachHandler) } } private[master] object MasterWebUI { private val STATIC_RESOURCE_DIR = SparkUI.STATIC_RESOURCE_DIR }
Example 8
Source File: PoolTable.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package import import scala.collection.mutable.HashMap import scala.xml.Node import org.apache.spark.scheduler.{Schedulable, StageInfo} import org.apache.spark.ui.UIUtils private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) { private val listener = parent.progressListener def toNodeSeq: Seq[Node] = { listener.synchronized { poolTable(poolRow, pools) } } private def poolTable( makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node], rows: Seq[Schedulable]): Seq[Node] = { <table class="table table-bordered table-striped table-condensed sortable table-fixed"> <thead> <th>Pool Name</th> <th>Minimum Share</th> <th>Pool Weight</th> <th>Active Stages</th> <th>Running Tasks</th> <th>SchedulingMode</th> </thead> <tbody> { => makeRow(r, listener.poolToActiveStages))} </tbody> </table> } private def poolRow( p: Schedulable, poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = { val activeStages = poolToActiveStages.get( match { case Some(stages) => stages.size case None => 0 } val href = "%s/stages/pool?poolname=%s" .format(UIUtils.prependBaseUri(parent.basePath), URLEncoder.encode(, "UTF-8")) <tr> <td> <a href={href}>{}</a> </td> <td>{p.minShare}</td> <td>{p.weight}</td> <td>{activeStages}</td> <td>{p.runningTasks}</td> <td>{p.schedulingMode}</td> </tr> } }
Example 9
Source File: ConfigReader.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.internal.config import java.util.{Map => JMap} import java.util.regex.Pattern import scala.collection.mutable.HashMap import scala.util.matching.Regex private object ConfigReader { private val REF_RE = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r } def substitute(input: String): String = substitute(input, Set()) private def substitute(input: String, usedRefs: Set[String]): String = { if (input != null) { ConfigReader.REF_RE.replaceAllIn(input, { m => val prefix = val name = val ref = if (prefix == null) name else s"$prefix:$name" require(!usedRefs.contains(ref), s"Circular reference in $input: $ref") val replacement = bindings.get(prefix) .flatMap(_.get(name)) .map { v => substitute(v, usedRefs + ref) } .getOrElse(m.matched) Regex.quoteReplacement(replacement) }) } else { input } } }
Example 10
Source File: StageInfo.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import org.apache.spark.executor.TaskMetrics import def fromStage( stage: Stage, attemptId: Int, numTasks: Option[Int] = None, taskMetrics: TaskMetrics = null, taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty ): StageInfo = { val ancestorRddInfos = val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo(, attemptId,, numTasks.getOrElse(stage.numTasks), rddInfos,, stage.details, taskMetrics, taskLocalityPreferences) } }
Example 11
Source File: GroupedCountEvaluator.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.partial import scala.collection.Map import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.spark.util.collection.OpenHashMap private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] { private var outputsMerged = 0 private val sums = new OpenHashMap[T, Long]() // Sum of counts for each key override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]): Unit = { outputsMerged += 1 taskResult.foreach { case (key, value) => sums.changeValue(key, value, _ + value) } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { { case (key, sum) => (key, new BoundedDouble(sum, 1.0, sum, sum)) }.toMap } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs { case (key, sum) => (key, CountEvaluator.bound(confidence, sum, p)) }.toMap } } }
Example 12
Source File: MasterWebUISuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.master.ui import import{HttpURLConnection, URL} import java.nio.charset.StandardCharsets import java.util.Date import scala.collection.mutable.HashMap import org.mockito.Mockito.{mock, times, verify, when} import org.scalatest.BeforeAndAfterAll import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} import org.apache.spark.deploy.DeployMessages.{KillDriverResponse, RequestKillDriver} import org.apache.spark.deploy.DeployTestUtils._ import org.apache.spark.deploy.master._ import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv} class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll { val conf = new SparkConf val securityMgr = new SecurityManager(conf) val rpcEnv = mock(classOf[RpcEnv]) val master = mock(classOf[Master]) val masterEndpointRef = mock(classOf[RpcEndpointRef]) when(master.securityMgr).thenReturn(securityMgr) when(master.conf).thenReturn(conf) when(master.rpcEnv).thenReturn(rpcEnv) when(master.self).thenReturn(masterEndpointRef) val masterWebUI = new MasterWebUI(master, 0) override def beforeAll() { super.beforeAll() masterWebUI.bind() } override def afterAll() { masterWebUI.stop() super.afterAll() } test("kill application") { val appDesc = createAppDesc() // use new start date so it isn't filtered by UI val activeApp = new ApplicationInfo( new Date().getTime, "app-0", appDesc, new Date(), null, Int.MaxValue) when(master.idToApp).thenReturn(HashMap[String, ApplicationInfo]((, activeApp))) val url = s"http://localhost:${masterWebUI.boundPort}/app/kill/" val body = convPostDataToString(Map(("id",, ("terminate", "true"))) val conn = sendHttpRequest(url, "POST", body) conn.getResponseCode // Verify the master was called to remove the active app verify(master, times(1)).removeApplication(activeApp, ApplicationState.KILLED) } test("kill driver") { val activeDriverId = "driver-0" val url = s"http://localhost:${masterWebUI.boundPort}/driver/kill/" val body = convPostDataToString(Map(("id", activeDriverId), ("terminate", "true"))) val conn = sendHttpRequest(url, "POST", body) conn.getResponseCode // Verify that master was asked to kill driver with the correct id verify(masterEndpointRef, times(1)).ask[KillDriverResponse](RequestKillDriver(activeDriverId)) } private def convPostDataToString(data: Map[String, String]): String = { (for ((name, value) <- data) yield s"$name=$value").mkString("&") } private def sendHttpRequest( url: String, method: String, body: String = ""): HttpURLConnection = { val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection] conn.setRequestMethod(method) if (body.nonEmpty) { conn.setDoOutput(true) conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded") conn.setRequestProperty("Content-Length", Integer.toString(body.length)) val out = new DataOutputStream(conn.getOutputStream) out.write(body.getBytes(StandardCharsets.UTF_8)) out.close() } conn } }
Example 13
Source File: CMaxTableSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package import import import import import scala.collection.mutable.HashMap import scala.util.Random class CMaxTableSpec extends TorchSpec { "A CMaxTable Module" should "generate correct output and grad" in { torchCheck() val seed = 100 RNG.setSeed(seed) val module = new CMaxTable[Double]() val input1 = Tensor[Double](5).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](5).apply1(e => Random.nextDouble()) val gradOutput = Tensor[Double](5).apply1(e => Random.nextDouble()) val input = new Table() input(1.toDouble) = input1 input(2.toDouble) = input2 val start = System.nanoTime() val output = module.forward(input) val gradInput = module.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.CMaxTable()\n" + "output = module:forward(input)\n" + "gradInput = module:backward(input,gradOutput)" val (luaTime, torchResult) =, Map("input" -> input, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be(output) luaOutput2 should be (gradInput) println("Test case : CMaxTable, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 14
Source File: L1HingeEmbeddingCriterionSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package import import import import import scala.collection.mutable.HashMap import scala.util.Random class L1HingeEmbeddingCriterionSpec extends TorchSpec { "A L1HingeEmbeddingCriterion" should "generate correct output and grad with y == 1 " in { torchCheck() val seed = 2 RNG.setSeed(seed) val module = new L1HingeEmbeddingCriterion[Double](0.6) val input1 = Tensor[Double](2).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](2).apply1(e => Random.nextDouble()) val input = new Table() input(1.0) = input1 input(2.0) = input2 val target = Tensor[Double](1) target(Array(1)) = 1.0 val start = System.nanoTime() val output = module.forward(input, target) val gradInput = module.backward(input, target) val end = System.nanoTime() val scalaTime = end - start val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.L1HingeEmbeddingCriterion(0.6)\n" + "output = module:forward(input, 1)\n" + "gradInput = module:backward(input, 1)\n" val (luaTime, torchResult) =, Map("input" -> input), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Double] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be(output) luaOutput2 should be (gradInput) println("Test case : L1HingeEmbeddingCriterion, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } "A L1HingeEmbeddingCriterion" should "generate correct output and grad with y == -1 " in { torchCheck() val seed = 2 RNG.setSeed(seed) val module = new L1HingeEmbeddingCriterion[Double](0.6) val input1 = Tensor[Double](2).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](2).apply1(e => Random.nextDouble()) val input = new Table() input(1.0) = input1 input(2.0) = input2 val target = Tensor[Double](1) target(Array(1)) = -1.0 val start = System.nanoTime() val output = module.forward(input, target) val gradInput = module.backward(input, target) val end = System.nanoTime() val scalaTime = end - start val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.L1HingeEmbeddingCriterion(0.6)\n" + "output = module:forward(input, -1.0)\n" + "gradInput = module:backward(input, -1.0)\n" val (luaTime, torchResult) =, Map("input" -> input), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Double] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be(output) luaOutput2 should be (gradInput) println("Test case : L1HingeEmbeddingCriterion, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 15
Source File: CDivTableSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package import import import import import scala.collection.mutable.HashMap import scala.util.Random class CDivTableSpec extends TorchSpec { "A CDivTable Module" should "generate correct output and grad" in { torchCheck() val seed = 100 RNG.setSeed(seed) val module = new CDivTable[Double]() val input1 = Tensor[Double](5).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](5).apply1(e => Random.nextDouble()) val gradOutput = Tensor[Double](5).apply1(e => Random.nextDouble()) val input = new Table() input(1.toDouble) = input1 input(2.toDouble) = input2 val start = System.nanoTime() val output = module.forward(input) val gradInput = module.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.CDivTable()\n" + "output = module:forward(input)\n" + "gradInput = module:backward(input,gradOutput)" val (luaTime, torchResult) =, Map("input" -> input, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be (output) luaOutput2 should be (gradInput) println("Test case : CDivTable, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 16
Source File: CMulTableSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package import import import import import scala.collection.mutable.HashMap import scala.util.Random class CMulTableSpec extends TorchSpec { "A CMulTable Module" should "generate correct output and grad" in { torchCheck() val seed = 100 RNG.setSeed(seed) val module = new CMulTable[Double]() val input1 = Tensor[Double](5).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](5).apply1(e => Random.nextDouble()) val gradOutput = Tensor[Double](5).apply1(e => Random.nextDouble()) val input = new Table() input(1.toDouble) = input1 input(2.toDouble) = input2 val start = System.nanoTime() val output = module.forward(input) val gradInput = module.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.CMulTable()\n" + "output = module:forward(input)\n" + "gradInput = module:backward(input,gradOutput)" val (luaTime, torchResult) =, Map("input" -> input, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be (output) luaOutput2 should be (gradInput) println("Test case : CMinTable, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 17
Source File: CosineDistanceSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package import import import import import scala.collection.mutable.HashMap import scala.util.Random class CosineDistanceSpec extends TorchSpec { "A CosineDistance " should "generate correct output and grad" in { torchCheck() val seed = 100 RNG.setSeed(seed) val input1 = Tensor[Double](3).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](3).apply1(e => Random.nextDouble()) val gradOutput = Tensor[Double](1).apply1(e => Random.nextDouble()) val input = new Table() input(1.0) = input1 input(2.0) = input2 val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.CosineDistance()\n" + "output = module:forward(input)\n" + "gradInput = module:backward(input,gradOutput)\n" val (luaTime, torchResult) =, Map("input" -> input, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] val module = new CosineDistance[Double]() val start = System.nanoTime() val output = module.forward(input) val gradInput = module.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start output should be(luaOutput1) luaOutput2 should be (gradInput) println("Test case : CosineDistance, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 18
Source File: CosineEmbeddingCriterionSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package import import{Storage, Tensor} import import{RandomGenerator, Table} import scala.collection.mutable.HashMap import scala.util.Random class CosineEmbeddingCriterionSpec extends TorchSpec { "A CosineEmbeddingCriterion Module" should "generate correct output and grad" in { torchCheck() val seed = 100 RNG.setSeed(seed) val module = new CosineEmbeddingCriterion[Double](0.2) val input1 = Tensor[Double](5).apply1(e => RandomGenerator.RNG.uniform(0, 2)) val input2 = Tensor[Double](5).apply1(e => RandomGenerator.RNG.uniform(0, 1)) val input = new Table() input(1.0) = input1 input(2.0) = input2 val target = new Table() val target1 = Tensor[Double](Storage(Array(-0.5))) target(1.toDouble) = target1 val start = System.nanoTime() val output = module.forward(input, target) val gradInput = module.backward(input, target) val end = System.nanoTime() val scalaTime = end - start val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.CosineEmbeddingCriterion(0.2)\n" + "_idx = module._idx\n" + "_outputs = module._outputs\n" + "buffer = module.buffer\n" + "output = module:forward(input, -0.5)\n" + "gradInput = module:backward(input, -0.5)\n" val (luaTime, torchResult) =, Map("input" -> input), Array("output", "gradInput", "_idx", "buffer", "_outputs")) val luaOutput1 = torchResult("output").asInstanceOf[Double] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be(output) luaOutput2 should be (gradInput) println("Test case : CrossEntropyCriterion, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 19
Source File: CosineDistanceCriterionSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package import import{Storage, Tensor} import import{RandomGenerator, Table} import scala.collection.mutable.HashMap import scala.util.Random class CosineDistanceCriterionSpec extends TorchSpec { "A CosineDistanceCriterionSpec Module" should "generate correct output and grad" in { torchCheck() val seed = 100 RNG.setSeed(seed) val module = CosineDistanceCriterion[Double](false) val input1 = Tensor[Double](5).apply1(e => RandomGenerator.RNG.uniform(0, 2)) val input2 = Tensor[Double](5).apply1(e => RandomGenerator.RNG.uniform(0, 1)) val input = new Table() input(1.0) = input1 input(2.0) = input2 val target = new Table() val target1 = Tensor[Double](Storage(Array(1.0))) target(1.toDouble) = target1 val start = System.nanoTime() val output = module.forward(input1, input2) val gradInput = module.backward(input1, input2) val end = System.nanoTime() val scalaTime = end - start val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.CosineEmbeddingCriterion(0.0)\n" + "_idx = module._idx\n" + "_outputs = module._outputs\n" + "buffer = module.buffer\n" + "output = module:forward(input, 1.0)\n" + "gradInput = module:backward(input, 1.0)\n" val (luaTime, torchResult) =, Map("input" -> input), Array("output", "gradInput", "_idx", "buffer", "_outputs")) val luaOutput1 = torchResult("output").asInstanceOf[Double] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be(output) luaOutput2[Tensor[Double]](1) should be (gradInput.squeeze()) println("Test case : CrossEntropyCriterion, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 20
Source File: NarrowTableSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package import import import{T, Table} import scala.collection.mutable.HashMap import scala.util.Random class NarrowTableSpec extends TorchSpec { "A NarrowTable Module " should "generate correct output and grad" in { torchCheck() val module = new NarrowTable[Double](1, 2) val input = T() input(1.0) = Tensor[Double](2, 3).apply1(e => Random.nextDouble()) input(2.0) = Tensor[Double](2, 1).apply1(e => Random.nextDouble()) input(3.0) = Tensor[Double](2, 2).apply1(e => Random.nextDouble()) val gradOutput = T() gradOutput(1.0) = Tensor[Double](5, 3).apply1(e => Random.nextDouble()) gradOutput(2.0) = Tensor[Double](2, 5).apply1(e => Random.nextDouble()) val code = "module = nn.NarrowTable(1, 2)\n" + "local i = 0\n" + "while i < 10 do\n" + "output = module:forward(input)\n" + "gradInput = module:backward(input, gradOutput)\n" + "i = i + 1\n" + "end" val (luaTime, torchResult) =, Map("input" -> input, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Table] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] val start = System.nanoTime() var i = 0 var output = T() var gradInput = T() while (i < 10) { output = module.forward(input) gradInput = module.backward(input, gradOutput) i += 1 } val end = System.nanoTime() val scalaTime = end - start luaOutput1 should be (output) luaOutput2 should be (gradInput) println("Test case : NarrowTable, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } "A NarrowTable Module with negative length" should "generate correct output and grad" in { torchCheck() val module = new NarrowTable[Double](2, -2) val input = T() input(1.0) = Tensor[Double](2, 3).apply1(e => Random.nextDouble()) input(2.0) = Tensor[Double](2, 1).apply1(e => Random.nextDouble()) input(3.0) = Tensor[Double](2, 2).apply1(e => Random.nextDouble()) input(4.0) = Tensor[Double](2, 2).apply1(e => Random.nextDouble()) val gradOutput = T() gradOutput(1.0) = Tensor[Double](5, 3).apply1(e => Random.nextDouble()) gradOutput(2.0) = Tensor[Double](2, 5).apply1(e => Random.nextDouble()) val start = System.nanoTime() var i = 0 var output = T() var gradInput = T() output = module.forward(input) gradInput = module.backward(input, gradOutput) i += 1 val end = System.nanoTime() val scalaTime = end - start val gradInput1 = gradInput[Tensor[Double]](2.0) val gradInput2 = gradInput[Tensor[Double]](3.0) val expectedGradInput1 = gradOutput[Tensor[Double]](1.0) val expectedGradInput2 = gradOutput[Tensor[Double]](2.0) val output1 = output[Tensor[Double]](1.0) val output2 = output[Tensor[Double]](2.0) val expectedOutput1 = input[Tensor[Double]](2.0) val expectedOutput2 = input[Tensor[Double]](3.0) output1 should be (expectedOutput1) output2 should be (expectedOutput2) gradInput1 should be (expectedGradInput1) gradInput2 should be (expectedGradInput2) } }
Example 21
Source File: MarginRankingCriterionSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package import import{Storage, Tensor} import import scala.collection.mutable.HashMap import scala.util.Random class MarginRankingCriterionSpec extends TorchSpec { "A MarginRankingCriterion " should "generate correct output and grad with only value" in { torchCheck() val mse = new MarginRankingCriterion[Double]() val input1 = Tensor[Double](5).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](5).apply1(e => Random.nextDouble()) val input = new Table() input(1.toDouble) = input1 input(2.toDouble) = input2 val target = new Table() val target1 = Tensor[Double](Storage(Array(-1.0))) target(1.toDouble) = target1 val start = System.nanoTime() val output = mse.forward(input, target) val gradInput = mse.backward(input, target) val end = System.nanoTime() val scalaTime = end - start val code = "mse = nn.MarginRankingCriterion()\n" + "output = mse:forward(input,-1)\n" + "gradInput = mse:backward(input,-1)" val (luaTime, torchResult) =, Map("input" -> input, "target" -> target), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Double] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be (output) gradInput should equal (luaOutput2) println("Test case : MarginRankingCriterion, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } "A MarginRankingCriterion " should "generate correct output and grad with Tensor target" in { torchCheck() val mse = new MarginRankingCriterion[Double]() val input1 = Tensor[Double](5).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](5).apply1(e => Random.nextDouble()) val input = new Table() input(1.toDouble) = input1 input(2.toDouble) = input2 val target = new Table() val target1 = Tensor[Double](5).apply1(e => Random.nextDouble()) target(1.toDouble) = target1 val start = System.nanoTime() val output = mse.forward(input, target) val gradInput = mse.backward(input, target) val end = System.nanoTime() val scalaTime = end - start val code = "mse = nn.MarginRankingCriterion()\n" + "output = mse:forward(input, target)\n" + "gradInput = mse:backward(input, target)" val (luaTime, torchResult) =, Map("input" -> input, "target" -> target1), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Double] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be (output) gradInput should equal (luaOutput2) println("Test case : MarginRankingCriterion, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 22
Source File: MaskedSelectSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package import import import import scala.collection.mutable.HashMap import scala.util.Random class MaskedSelectSpec extends TorchSpec { "A MaskedSelect Module " should "generate correct output and grad" in { torchCheck() val module = new MaskedSelect[Double]() val input1 = Tensor[Double](2, 2).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](2, 2) input2(Array(1, 1)) = 1 input2(Array(1, 2)) = 0 input2(Array(2, 1)) = 0 input2(Array(2, 2)) = 1 val input = new Table() input(1.0) = input1 input(2.0) = input2 val gradOutput = Tensor[Double](5).apply1(e => Random.nextDouble()) val code = "module = nn.MaskedSelect()\n" + "mask = torch.ByteTensor({{1, 0}, {0, 1}})\n" + "output = module:forward({input1, mask})\n" + "gradInput = module:backward({input1, mask}, gradOutput)\n" + "gradInput[2] = gradInput[2]:double()" val (luaTime, torchResult) =, Map("input1" -> input1, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] val start = System.nanoTime() val output = module.forward(input) val gradInput = module.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start output should be (luaOutput1) gradInput should equal (luaOutput2) println("Test case : MaskedSelect, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 23
Source File: CMinTableSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package import import import import import scala.collection.mutable.HashMap import scala.util.Random class CMinTableSpec extends TorchSpec { "A CMaxTable Module" should "generate correct output and grad" in { torchCheck() val seed = 100 RNG.setSeed(seed) val module = new CMinTable[Double]() val input1 = Tensor[Double](5).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](5).apply1(e => Random.nextDouble()) val gradOutput = Tensor[Double](5).apply1(e => Random.nextDouble()) val input = new Table() input(1.toDouble) = input1 input(2.toDouble) = input2 val start = System.nanoTime() val output = module.forward(input) val gradInput = module.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.CMinTable()\n" + "output = module:forward(input)\n" + "gradInput = module:backward(input,gradOutput)\n" val (luaTime, torchResult) =, Map("input" -> input, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be (output) luaOutput2 should be (gradInput) println("Test case : CMinTable, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 24
Source File: MixtureTableSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package import import import import scala.collection.mutable.HashMap import scala.util.Random class MixtureTableSpec extends TorchSpec { "A MixtureTable " should "generate correct output and grad with table expertInput" in { torchCheck() val mse = new MixtureTable[Double] val expertInput = Tensor[Double](5, 3, 6).apply1(e => Random.nextDouble()) val expertTable = new Table() expertTable(1.0) = Tensor[Double](5, 6).apply1(e => Random.nextDouble()) expertTable(2.0) = Tensor[Double](5, 6).apply1(e => Random.nextDouble()) expertTable(3.0) = Tensor[Double](5, 6).apply1(e => Random.nextDouble()) val input1 = Tensor[Double](5, 3).apply1(e => Random.nextDouble()) val gradOutput = Tensor[Double](5, 6).apply1(e => Random.nextDouble()) val input = new Table() input(1.0) = input1 input(2.0) = expertTable val start = System.nanoTime() val output = mse.forward(input) val gradInput = mse.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start val code = "mse = nn.MixtureTable()\n" + "input = {input1, expertTable}\n" + "output = mse:forward(input)\n" + "gradInput = mse:backward(input,gradOutput)" val (luaTime, torchResult) =, Map("input1" -> input1, "expertTable" -> expertTable, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] output should be (luaOutput1) luaOutput2 should be (gradInput) println("Test case : MixtureTable, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } "A MixtureTable " should "generate correct output and grad with tensor expertInput" in { torchCheck() val mse = new MixtureTable[Double] val expertInput = Tensor[Double](5, 3, 6).apply1(e => Random.nextDouble()) val input1 = Tensor[Double](5, 3).apply1(e => Random.nextDouble()) val gradOutput = Tensor[Double](5, 6).apply1(e => Random.nextDouble()) val input = new Table() input(1.0) = input1 input(2.0) = expertInput val code = "mse = nn.MixtureTable()\n" + "output = mse:forward(input)\n" + "gradInput = mse:backward(input,gradOutput)\n" + "size = mse.size\n" + "dim = mse.dim" val (luaTime, torchResult) =, Map("input" -> input, "gradOutput" -> gradOutput), Array("output", "gradInput", "size", "dim")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] val start = System.nanoTime() val output = mse.forward(input) val gradInput = mse.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start output should be (luaOutput1) gradInput should be (luaOutput2) println("Test case : MixtureTable, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 25
Source File: IndexSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package import import import import import scala.collection.mutable.HashMap import scala.util.Random class IndexSpec extends TorchSpec { "A Index " should "generate correct output and grad with one dimension" in { torchCheck() val seed = 100 RNG.setSeed(seed) val input1 = Tensor[Double](3).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](4) input2(Array(1)) = 1 input2(Array(2)) = 2 input2(Array(3)) = 2 input2(Array(4)) = 3 val gradOutput = Tensor[Double](4).apply1(e => Random.nextDouble()) val input = new Table() input(1.toDouble) = input1 input(2.toDouble) = input2 val code = "torch.manualSeed(" + seed + ")\n" + "input = {input1, torch.LongTensor{1, 2, 2, 3}}\n" + "module = nn.Index(1)\n" + "output = module:forward(input)\n" + "gradInput = module:backward(input,gradOutput)\n" val (luaTime, torchResult) =, Map("input1" -> input1, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] val module = new Index[Double](1) val start = System.nanoTime() val output = module.forward(input) val gradInput = module.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start output should be(luaOutput1) luaOutput2 should be (gradInput) println("Test case : Index, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } "A Index " should "generate correct output and grad with two dimension" in { torchCheck() val seed = 100 RNG.setSeed(seed) val input1 = Tensor[Double](3, 3).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](4) input2(Array(1)) = 1 input2(Array(2)) = 2 input2(Array(3)) = 3 input2(Array(4)) = 1 val gradOutput = Tensor[Double](3, 4).apply1(e => Random.nextDouble()) val input = new Table() input(1.toDouble) = input1 input(2.toDouble) = input2 val code = "torch.manualSeed(" + seed + ")\n" + "input = {input1, torch.LongTensor{1, 2, 3, 1}}\n" + "module = nn.Index(2)\n" + "output = module:forward(input)\n" + "gradInput = module:backward(input,gradOutput)\n" val (luaTime, torchResult) =, Map("input1" -> input1, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] val module = new Index[Double](2) val start = System.nanoTime() val output = module.forward(input) val gradInput = module.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start output should be(luaOutput1) luaOutput2 should be (gradInput) println("Test case : Index, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 26
Source File: CSubTableSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package import import import import import scala.collection.mutable.HashMap import scala.util.Random class CSubTableSpec extends TorchSpec { "A CDivTable Module" should "generate correct output and grad" in { torchCheck() val seed = 100 RNG.setSeed(seed) val module = new CSubTable[Double]() val input1 = Tensor[Double](5).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](5).apply1(e => Random.nextDouble()) val gradOutput = Tensor[Double](5).apply1(e => Random.nextDouble()) val input = new Table() input(1.toDouble) = input1 input(2.toDouble) = input2 val start = System.nanoTime() val output = module.forward(input) val gradInput = module.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.CSubTable()\n" + "output = module:forward(input)\n" + "gradInput = module:backward(input,gradOutput)" val (luaTime, torchResult) =, Map("input" -> input, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be(output) luaOutput2 should be (gradInput) println("Test case : CSubTable, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 27
Source File: KeyBinder.scala From slide-desktop with GNU General Public License v2.0 | 5 votes |
package gui import java.awt.event.{KeyEvent, KeyListener} import scala.collection.mutable.HashMap abstract class KeyBinder(val keyCodes: Int*) extends KeyListener { private val keyMap: HashMap[Int, Boolean] = new HashMap[Int, Boolean] override def keyTyped(e: KeyEvent): Unit = {} override def keyPressed(e: KeyEvent): Unit = { keyMap.put(e.getKeyCode, true) if (getKeysDown) onKeysDown() } override def keyReleased(e: KeyEvent): Unit = keyMap.remove(e.getKeyCode) private def getKeysDown: Boolean = { this.keyCodes.foreach(key => if (keyMap.contains(key)) { if (!keyMap.get(key).get) return false } else return false ) keyMap.clear() true } def onKeysDown(): Unit }
Example 28
Source File: TopElementsAggregator.scala From salt-core with Apache License 2.0 | 5 votes |
package software.uncharted.salt.core.analytic.collection import software.uncharted.salt.core.analytic.Aggregator import scala.collection.Map import scala.collection.mutable.HashMap import scala.collection.mutable.ListBuffer import scala.collection.mutable.{Map => MutableMap} import scala.collection.mutable.PriorityQueue import scala.reflect.ClassTag class TopElementsAggregator[ET: ClassTag](elementLimit: Int) extends Aggregator[Seq[ET], Map[ET, Int], List[(ET, Int)]] { def default(): Map[ET, Int] = { Map[ET, Int]() } override def add(current: Map[ET, Int], next: Option[Seq[ET]]): Map[ET, Int] = { if (next.isDefined) { // If our current map is mutable, add new data in directly. // If not, convert to a mutable map, and then add data in val sum = current match { case hm: MutableMap[ET, Int] => hm case _ => { // The current value isn't itself a mutable hashmap yet; convert to one. val hm = new HashMap[ET, Int]() hm ++= current hm } } next.get.foreach(t => sum.put(t, sum.getOrElse(t, 0) + 1)) sum } else { current } } override def merge(left: Map[ET, Int], right: Map[ET, Int]): Map[ET, Int] = { // If either input map is mutable, merge the other into it. // If neither is, convert one to mutable, and add the other into it. val (to, from) = left match { case hm: MutableMap[ET, Int] => (hm, right) case _ => right match { case hm: MutableMap[ET, Int] => (hm, left) case _ => val hm = new HashMap[ET, Int]() hm ++= left (hm, right) } } from.foreach(t => { to.put(t._1, to.getOrElse(t._1, 0) + t._2) }) to } override def finish(intermediate: Map[ET, Int]): List[(ET, Int)] = { val x = new PriorityQueue[(ET, Int)]()( a => a._2 )) intermediate.foreach(t => { x.enqueue(t) }) var result = new ListBuffer[(ET, Int)] for (i <- 0 until Math.min(elementLimit, x.size)) { result.append(x.dequeue) } result.toList } }
Example 29
Source File: ConfManager.scala From HadoopLearning with MIT License | 5 votes |
package com.utils import java.util.regex.Pattern import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.common.serialization.StringDeserializer import scala.collection.mutable.HashMap /** * 描述 Spark Streaming 配置 * * @author liumm * @since 2018-07-27 20:27 */ object ConfManager { /** * 每次入库最大记录数量 */ val maxRecords = 1000 /** * 配置Kafka * * @param streamConf * @return */ def kafkaParam(streamConf: StreamConf): (Map[String, Object], Pattern) = { (getConsumerConfig(streamConf.brokers, streamConf.groupId), Pattern.compile(streamConf.topics)) } def kafkaParamForMetadata(streamConf: StreamConf): Map[String, String] = { val kafkaParams = new HashMap[String, String]() kafkaParams += (ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> streamConf.brokers) kafkaParams += ("" -> streamConf.brokers) kafkaParams += (ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "smallest") kafkaParams += (ConsumerConfig.GROUP_ID_CONFIG -> streamConf.groupId) kafkaParams.toMap } /** * 生成Kafka的Consumer配置信息 * * @return Kafka的Consumer配置信息 */ private def getConsumerConfig(brokers: String, groupId: String): Map[String, Object] = { val kafkaParams = new HashMap[String, Object]() kafkaParams += (ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokers) kafkaParams += (ConsumerConfig.GROUP_ID_CONFIG -> groupId) kafkaParams += (ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer]) kafkaParams += (ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer]) kafkaParams += (ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG -> new Integer(3 * 1024 * 1024)) kafkaParams += (ConsumerConfig.MAX_POLL_RECORDS_CONFIG -> new Integer(100)) kafkaParams += (ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "latest") //关闭kafka自动提交offset方式 kafkaParams += (ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean)) kafkaParams.toMap } def newStreamConf() = { val conf = new StreamConf() conf.zkUrl = "hdp01:2181" conf.brokers = "hdp01:9092" conf.groupId = "liumm_group" conf.topics = "i57_.*" conf } }
Example 30
Source File: GlobalPerformer.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import import{Actor, ActorLogging, ActorRef, OneForOneStrategy, Props, Terminated} import akka.routing._ import play.api.libs.json.JsObject import scala.collection.mutable.HashMap import scala.concurrent.duration._ protected class GlobalPerformer(val orchestrationID: String, val orchestrationName: String, val globalPerformers: List[JsObject], val ensemblesSpec : List[JsObject]) extends Actor with ActorLogging{ val monitoring_actor = FEY_MONITOR.actorRef var global_metadata: Map[String, Performer] = Map.empty[String, Performer] override def receive: Receive = { case GlobalPerformer.PRINT_GLOBAL => context.actorSelection(s"*") ! FeyGenericActor.PRINT_PATH case Terminated(actor) => monitoring_actor ! Monitor.TERMINATE(actor.path.toString, Utils.getTimestamp) log.error(s"DEAD Global Performers ${}") context.children.foreach{ child => context.unwatch(child) context.stop(child) } throw new RestartGlobalPerformers(s"DEAD Global Performer ${}") case GetRoutees => //Discard case x => log.warning(s"Message $x not treated by Global Performers") } private def loadClazzFromJar(classPath: String, jarLocation: String, jarName: String):Class[FeyGenericActor] = { try { Utils.loadActorClassFromJar(jarLocation,classPath,jarName) }catch { case e: Exception => log.error(e,s"Could not load class $classPath from jar $jarLocation. Please, check the Jar repository path as well the jar name") throw e } } } object GlobalPerformer{ val activeGlobalPerformers:HashMap[String, Map[String, ActorRef]] = HashMap.empty[String, Map[String, ActorRef]] case object PRINT_GLOBAL }
Example 31
Source File: TokenAuthorizingInterceptor.scala From meteorite-core with Apache License 2.0 | 5 votes |
package import java.lang.reflect.Method import import java.util import scala.collection.mutable.HashMap import scala.collection.mutable.ListBuffer import scala.collection.JavaConversions._ import TokenAuthorizingInterceptor._ import scala.collection.JavaConverters._ object TokenAuthorizingInterceptor { private def parseRolesMap(rolesMap: Map[String, String]): scala.collection.mutable.HashMap[String, List[String]] = { val map = new scala.collection.mutable.HashMap[String, List[String]]() for ((key, value) <- rolesMap) { map.put(key, value.split(" ").toList) } map } } class TokenAuthorizingInterceptor(uniqueId: Boolean) extends TokenAbstractAutorizingInInterceptor(uniqueId) { private val methodRolesMap = new HashMap[String, List[String]]() private var userRolesMap = new scala.collection.mutable.HashMap[String, List[String]] private var globalRoles = new scala.collection.mutable.ListBuffer[String] private var checkConfiguredRolesOnly: Boolean = _ def this() { this(true) } protected override def isUserInRole(sc: SecurityContext, roles: util.List[String], deny: Boolean): Boolean = { if (!checkConfiguredRolesOnly && !super.isUserInRole(sc, roles, deny)) { return false } if (userRolesMap.nonEmpty) { val userRoles = userRolesMap.get(sc.getUserPrincipal.getName) if (userRoles == null) { return false } for (role <- roles if userRoles.get.contains(role)) { return true } false } else { !checkConfiguredRolesOnly } } private def createMethodSig(method: Method): String = { val b = new StringBuilder(method.getReturnType.getName) b.append(' ').append(method.getName).append('(') for (cls <- method.getParameterTypes) { b.append(cls.getName) } b.append(')') b.toString method.getName } protected override def getExpectedRoles(method: Method): util.List[String] = { var roles = methodRolesMap.get(createMethodSig(method)) if(roles.isEmpty) { roles = methodRolesMap.get(method.getName) } if(roles.isEmpty){ globalRoles.toList } else{ roles.get } } def setMethodRolesMap(rolesMap: java.util.Map[String, String]) = methodRolesMap.putAll(parseRolesMap(rolesMap.asScala.toMap)) def setUserRolesMap(rolesMap: java.util.Map[String, String]) = userRolesMap = parseRolesMap(rolesMap.asScala.toMap) def setGlobalRoles(roles: String) = globalRoles = roles.split(" ").to[ListBuffer] def setCheckConfiguredRolesOnly(checkConfiguredRolesOnly: Boolean) = this.checkConfiguredRolesOnly = checkConfiguredRolesOnly }
Example 32
Source File: KernelMatrix.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.nodes.learning import scala.collection.mutable.HashMap import scala.reflect.ClassTag import breeze.linalg._ import org.apache.spark.rdd.RDD import keystoneml.utils.{MatrixUtils, Stats} import keystoneml.workflow.{Transformer, LabelEstimator} class BlockKernelMatrix[T: ClassTag]( val kernelGen: KernelTransformer[T], val data: RDD[T], val cacheKernel: Boolean) extends KernelMatrix { val colBlockCache = HashMap.empty[Seq[Int], RDD[DenseMatrix[Double]]] val diagBlockCache = HashMap.empty[Seq[Int], DenseMatrix[Double]] def apply(colIdxs: Seq[Int]): RDD[DenseMatrix[Double]] = { if (colBlockCache.contains(colIdxs)) { colBlockCache(colIdxs) } else { val (kBlock, diagBlock) = kernelGen.computeKernel(data, colIdxs) if (cacheKernel) { colBlockCache += (colIdxs -> kBlock) diagBlockCache += (colIdxs -> diagBlock) } kBlock } } def unpersist(colIdxs: Seq[Int]): Unit = { if (colBlockCache.contains(colIdxs) && !cacheKernel) { colBlockCache(colIdxs).unpersist(true) } } def diagBlock(idxs: Seq[Int]): DenseMatrix[Double] = { if (!diagBlockCache.contains(idxs)) { val (kBlock, diagBlock) = kernelGen.computeKernel(data, idxs) if (cacheKernel) { colBlockCache += (idxs -> kBlock) diagBlockCache += (idxs -> diagBlock) } diagBlock } else { diagBlockCache(idxs) } } }
Example 33
Source File: ParameterTest.scala From maha with Apache License 2.0 | 5 votes |
// Copyright 2017, Yahoo Holdings Inc. // Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms. package import import{Engine, HiveEngine} import org.json4s._ import org.json4s.jackson.JsonMethods import org.scalatest.{FunSuite, Matchers} import scala.collection.mutable.HashMap class ParameterTest extends FunSuite with Matchers { test("SerializeParameters should serialize a map of parameters into a List") { val map_parameters = new HashMap[Parameter, ParameterValue[_]] map_parameters.put(Parameter.ReportFormat, ReportFormatValue(ReportFormatType.CSVFormat)) map_parameters.put(Parameter.DryRun, DryRunValue(false)) map_parameters.put(Parameter.GeneratedQuery, GeneratedQueryValue("GeneratedQuery")) map_parameters.put(Parameter.QueryEngine, QueryEngineValue(Engine.from("hive").get)) map_parameters.put(Parameter.Debug, DebugValue(true)) map_parameters.put(Parameter.RequestId, RequestIdValue("RequestId")) map_parameters.put(Parameter.UserId, UserIdValue("UserId")) map_parameters.put(Parameter.TimeZone, TimeZoneValue("TimeZone")) map_parameters.put(Parameter.Schema, SchemaValue("Schema")) map_parameters.put(Parameter.Distinct, DistinctValue(true)) map_parameters.put(Parameter.JobName, JobNameValue("tools_1")) map_parameters.put(Parameter.RegistryName, RegistryNameValue("mahaRegistry")) map_parameters.put(Parameter.HostName, HostNameValue("")) val result = Parameter.serializeParameters(map_parameters.toMap) result.length shouldBe map_parameters.size val newMap => t._1 -> t._2).toMap for((k,v) <- map_parameters) { newMap.get(k.entryName).get match{ case JString(x) => v.value match { case CSVFormat => x shouldBe "csv" case HiveEngine => x shouldBe "Hive" case _ => x shouldBe v.value } case JBool(x) => x shouldBe v.value case _ => fail } } } test("DeserializeParameters should deserialize a JSON into a Map of parameter values") { val inputJson= """ |{ | "Report-Format": "csv", | "Dry-Run": false, | "Generated-Query": "Generated-Query", | "Query-Engine": "oracle", | "debug": true, | "Request-Id": "Request-Id", | "User-Id": "User-Id", | "TimeZone": "TimeZone", | "Schema": "Schema", | "Distinct": true, | "Job-Name": "Job-Name", | "RegistryName": "mahaRegistry", | "HostName": "" |} |""".stripMargin val result = Parameter.deserializeParameters(JsonMethods.parse(inputJson)) result.getOrElse() match{ case m: Map[Parameter, ParameterValue[_]] => { m.size shouldBe 13 m.get(Parameter.ReportFormat).get shouldBe ReportFormatValue(ReportFormatType.CSVFormat) m.get(Parameter.DryRun).get shouldBe DryRunValue(false) m.get(Parameter.GeneratedQuery).get shouldBe GeneratedQueryValue("Generated-Query") m.get(Parameter.QueryEngine).get shouldBe QueryEngineValue(Engine.from("oracle").get) m.get(Parameter.Debug).get shouldBe DebugValue(true) m.get(Parameter.RequestId).get shouldBe RequestIdValue("Request-Id") m.get(Parameter.UserId).get shouldBe UserIdValue("User-Id") m.get(Parameter.TimeZone).get shouldBe TimeZoneValue("TimeZone") m.get(Parameter.Schema).get shouldBe SchemaValue("Schema") m.get(Parameter.Distinct).get shouldBe DistinctValue(true) m.get(Parameter.JobName).get shouldBe JobNameValue("Job-Name") m.get(Parameter.RegistryName).get shouldBe RegistryNameValue("mahaRegistry") m.get(Parameter.HostName).get shouldBe HostNameValue("") } case _ => fail } } }
Example 34
Source File: depgraph.scala From sbt-blockade with Apache License 2.0 | 5 votes |
//: ---------------------------------------------------------------------------- //: Copyright 2015 Johannes Rudolph //: //: Distributed under the Apache 2.0 License, please see the NOTICE //: file in the root of the project for further details. //: ---------------------------------------------------------------------------- package object depgraph { import import sbt._ import scala.collection.mutable.{HashMap, MultiMap, Set} import scala.language.reflectiveCalls object SbtUpdateReport { type OrganizationArtifactReport = { def modules: Seq[ModuleReport] } def fromConfigurationReport(report: ConfigurationReport, rootInfo: sbt.ModuleID): ModuleGraph = { implicit def id(sbtId: sbt.ModuleID): ModuleId = ModuleId(sbtId.organization,, sbtId.revision) def moduleEdges(orgArt: OrganizationArtifactReport): Seq[(Module, Seq[Edge])] = { val chosenVersion = orgArt.modules.find(!_.evicted).map(_.module.revision) } def moduleEdge(chosenVersion: Option[String])(report: ModuleReport): (Module, Seq[Edge]) = { val evictedByVersion = if (report.evicted) chosenVersion else None val jarFile = report.artifacts.find(_._1.`type` == "jar").orElse(report.artifacts.find(_._1.extension == "jar")).map(_._2) (Module( id = report.module, license =, evictedByVersion = evictedByVersion, jarFile = jarFile, error = report.problem ), ⇒ Edge(caller.caller, report.module))) } val (nodes, edges) = report.details.flatMap(moduleEdges).unzip val root = Module(rootInfo) ModuleGraph(root +: nodes, edges.flatten) } } type Edge = (ModuleId, ModuleId) def Edge(from: ModuleId, to: ModuleId): Edge = from -> to case class ModuleId(organisation: String, name: String, version: String) { def idString: String = organisation + ":" + name + ":" + version } case class Module(id: ModuleId, license: Option[String] = None, extraInfo: String = "", evictedByVersion: Option[String] = None, jarFile: Option[File] = None, error: Option[String] = None) { def hadError: Boolean = error.isDefined def isUsed: Boolean = !isEvicted def isEvicted: Boolean = evictedByVersion.isDefined } case class ModuleGraph(nodes: Seq[Module], edges: Seq[Edge]) { lazy val modules: Map[ModuleId, Module] = ⇒ (, n)).toMap def module(id: ModuleId): Module = modules(id) lazy val dependencyMap: Map[ModuleId, Seq[Module]] = createMap(identity) lazy val reverseDependencyMap: Map[ModuleId, Seq[Module]] = createMap { case (a, b) ⇒ (b, a) } def createMap(bindingFor: ((ModuleId, ModuleId)) ⇒ (ModuleId, ModuleId)): Map[ModuleId, Seq[Module]] = { val m = new HashMap[ModuleId, Set[Module]] with MultiMap[ModuleId, Module] edges.foreach { entry ⇒ val (f, t) = bindingFor(entry) m.addBinding(f, module(t)) } m.toMap.mapValues(_.toSeq.sortBy( } def roots: Seq[Module] = nodes.filter(n ⇒ !edges.exists(_._2 == def isEmpty: Boolean = nodes.isEmpty } }
Example 35
Source File: Checksum.scala From schedoscope with Apache License 2.0 | 5 votes |
package org.schedoscope.dsl.transformations import import org.apache.hadoop.fs.{FileStatus, Path} import org.schedoscope.Schedoscope import org.schedoscope.scheduler.driver.FilesystemDriver._ import scala.Array.canBuildFrom import scala.collection.mutable.HashMap object Checksum { private def md5 = MessageDigest.getInstance("MD5") private def listFiles(path: String): Array[FileStatus] = { val files = fileSystem(path, Schedoscope.settings.hadoopConf).globStatus(new Path(path)) if (files != null) files else Array() } private def fileChecksum(path: String) = if (path == null) "null-checksum" else if (path.endsWith(".jar")) path else try { val cs = fileSystem(path, Schedoscope.settings.hadoopConf).getFileChecksum(new Path(path)) if (cs == null) path else cs.toString() } catch { case _: Throwable => path } def fileChecksums(paths: List[String], recursive: Boolean): List[String] = paths.flatMap(path => { if (fileSystem(path, Schedoscope.settings.hadoopConf).isFile(new Path(path))) List(fileChecksum(path)) else if (recursive) fileChecksums(listFiles(path + "/*").map(f => f.getPath.toString()).toList, recursive) else List() }).sorted val resourceHashCache = new HashMap[List[String], List[String]]() def resourceHashes(resources: List[String]): List[String] = synchronized { resourceHashCache.getOrElseUpdate(resources, fileChecksums(resources, true)) } val defaultDigest = "0" def digest(stringsToDigest: String*): String = if (stringsToDigest.isEmpty) defaultDigest else md5.digest(stringsToDigest.sorted.mkString.toCharArray().map(_.toByte)).map("%02X" format _).mkString object SchemaChecksum { val checksumProperty = "schema.checksum" } object TransformationChecksum { val checksumProperty = "transformation.checksum" val timestampProperty = "transformation.timestamp" } }
Example 36
Source File: BackOffSupervision.scala From schedoscope with Apache License 2.0 | 5 votes |
package org.schedoscope.scheduler.utils import{ActorRef, ActorSystem} import org.slf4j.LoggerFactory import scala.collection.mutable.HashMap import scala.concurrent.duration.{FiniteDuration, _} def manageActorLifecycle(managedActor: ActorRef, backOffSlotTime: FiniteDuration = null, backOffMinimumDelay: FiniteDuration = null): FiniteDuration = { val managedActorName = managedActor.path.toStringWithoutAddress if (actorBackOffWaitTime.contains(managedActorName)) { val newBackOff = actorBackOffWaitTime(managedActorName).nextBackOff actorBackOffWaitTime.put(managedActorName, newBackOff) log.warn(s"$managerName: Set new back-off waiting " + s"time to value ${newBackOff.backOffWaitTime} for rebooted actor ${managedActorName}; " + s"(retries=${newBackOff.retries}, resets=${newBackOff.resets}, total-retries=${newBackOff.totalRetries})") //schedule tick response based on backoff newBackOff.backOffWaitTime } else { val backOff = ExponentialBackOff(backOffSlotTime = backOffSlotTime, constantDelay = backOffMinimumDelay) log.debug(s"$managerName: Set initial back-off waiting " + s"time to value ${backOff.backOffWaitTime} for booted actor ${managedActorName}; " + s"(retries=${backOff.retries}, resets=${backOff.resets}, total-retries=${backOff.totalRetries})") actorBackOffWaitTime.put(managedActorName, backOff) //schedule immediate tick response 0 millis } } }
Example 37
Source File: Database.scala From schedoscope with Apache License 2.0 | 5 votes |
package org.schedoscope.test import java.sql.{Connection, ResultSet, Statement} import org.schedoscope.dsl.{FieldLike, View} import org.schedoscope.schema.ddl.HiveQl import scala.collection.mutable.{HashMap, ListBuffer} class Database(conn: Connection, url: String) { def selectForViewByQuery(v: View, query: String, orderByField: Option[FieldLike[_]]): List[Map[String, Any]] = { val res = ListBuffer[Map[String, Any]]() var statement: Statement = null var rs: ResultSet = null try { statement = conn.createStatement() rs = statement.executeQuery(query) while ( { val row = HashMap[String, Any]() v.fields.view.zipWithIndex.foreach(f => { row.put(f._1.n, ViewSerDe.deserializeField(f._1.t, rs.getString(f._2 + 1))) }) res.append(row.toMap) } } finally { if (rs != null) try { rs.close() } catch { case _: Throwable => } if (statement != null) try { statement.close() } catch { case _: Throwable => } } orderByField match { case Some(f) => res.sortBy { _ (f.n) match { case null => "" case other => other.toString } } toList case None => res.toList } } def selectView(v: View, orderByField: Option[FieldLike[_]]): List[Map[String, Any]] = selectForViewByQuery(v, HiveQl.selectAll(v), orderByField) }
Example 38
Source File: AETest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.unsupervised.dl.autoencoder import org.scalaml.{Logging, Resource} import org.scalaml.Predef.DblVec import import import org.scalatest.{FlatSpec, Matchers} final class AETest extends FlatSpec with Matchers with Logging with Resource { protected val name: String = "Auto-Encoder" it should s"$name single hidden layer" in { show( "Single hidden layer") val REL_PATH = "unsupervised/ae/" val ALPHA = 0.8 val ETA = 0.05 val NUM_EPOCHS = 2500 val EPS = 1e-6 val THRESHOLD = 0.25 val LAMBDA = 0.18 val BETA = 0.3 val symbols = Array[String]( "FXE", "FXA", "SPY", "GLD", "FXB", "FXF", "FXC", "FXY", "CYB" ) val STUDIES = List[Array[String]]( Array[String]("FXY", "FXC", "GLD", "FXA"), Array[String]("FXE", "FXF", "FXB", "CYB"), Array[String]("FXE", "FXC", "GLD", "FXA", "FXY", "FXB"), Array[String]("FXC", "FXY", "FXA"), Array[String]("CYB", "GLD", "FXY"), symbols ) def index: Map[String, Int] = { import scala.collection.mutable.HashMap symbols.zipWithIndex./:(HashMap[String, Int]())((mp, si) => mp += ((si._1, si._2))).toMap } val path: String = getPath(REL_PATH).getOrElse(".") val prices = => DataSource(s"$path$s.csv", true, true, 1)) .map( _.flatMap(_.get(close))).filter(_.isSuccess).map(_.get) val config = AEConfig(ALPHA, ETA, LAMBDA, BETA, NUM_EPOCHS, EPS) val obs = symbols.flatMap( index.get(_)).map(prices(_).toArray) val xv = obs.tail.transpose.dropRight(1) val ae = AE(config, 8, xv.toVector) ae.model match { case Some(aeModel) => if(aeModel.synapses.nonEmpty) { val inputSynapse = aeModel.synapses.head show(s"$name output synapse(0)(0) ${inputSynapse(0)(0)}") show(s"$name output synapse(0)(1) ${inputSynapse(0)(1)}") show(s"$name output synapse(1)(0) ${inputSynapse(1)(0)}") show(s"$name output synapse(1)(1) ${inputSynapse(1)(1)}") } else fail(s"$name Model weights with improper size") case None => fail(s"$name could not generate a model") } } } // --------------------------------- EOF ----------------------------------------------------------------------------
Example 39
Source File: ParallelismTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.scalability.scala import org.scalaml.Logging import org.scalatest.{FlatSpec, Matchers} final class ParallelismTest extends FlatSpec with Matchers with Logging { import scala.collection.mutable.HashMap import scala.collection.parallel.mutable.{ParArray, ParHashMap} import scala.util.Random protected[this] val name: String = "Scala parallel collections" final private val SZ = 100000 final private val NUM_TASKS = 8 final private val evalRange = Range(1, NUM_TASKS) final private val TIMES = 20 // Arbitrary map function final val mapF = (x: Double) => Math.sin(x * 0.01) + Math.exp(-x) // Arbitrary filter function final val filterF = (x: Double) => x > 0.8 // Arbitrary reduce function final val reduceF = (x: Double, y: Double) => (x + y) * x it should s"$name: arrays" in { show(s"Evaluation of arrays") // Generate random vector for both the non-parallel and parallel array val data = Array.fill(SZ)(Random.nextDouble) val pData = ParArray.fill(SZ)(Random.nextDouble) // Initialized and execute the benchmark for the parallel array val benchmark = new ParallelArray[Double](data, pData, TIMES) val ratios = new Array[Double](NUM_TASKS) evalRange.foreach(n => ratios.update(n, val resultMap = ratios.tail resultMap.sum / resultMap.size < 1.0 should be(true) display(resultMap, "") evalRange.foreach(n => ratios.update(n, benchmark.filter(filterF)(n))) val resultfilter = ratios.tail resultfilter.sum / resultfilter.size < 1.0 should be(true) display(resultfilter, "ParArray.filter") } it should s"$name: maps" in { show("Evaluation of maps") val mapData = new HashMap[Int, Double] Range(0, SZ).foreach(n => mapData.put(n, Random.nextDouble)) val parMapData = new ParHashMap[Int, Double] Range(0, SZ).foreach(n => parMapData.put(n, Random.nextDouble)) // Initialized and execute the benchmark for the parallel map val benchmark = new ParallelMap[Double](mapData.toMap, parMapData, TIMES) val ratios = new Array[Double](NUM_TASKS) evalRange.foreach(n => ratios.update(n, val resultMap = ratios.tail resultMap.sum / resultMap.size < 1.0 should be(true) display(resultMap, "") evalRange.foreach(n => ratios.update(n, benchmark.filter(filterF)(n))) val resultfilter = ratios.tail resultfilter.sum / resultfilter.size < 1.0 should be(true) } private def display(x: Array[Double], label: String): Unit = { import org.scalaml.plots.{Legend, LightPlotTheme, LinePlot} val labels = Legend( name, "Scala parallel collections", s"Scala parallel computation for $label", "Relative timing" ) LinePlot.display(x.toVector, labels, new LightPlotTheme) } } // ------------------------------------------- EOF --------------------------------------------------
Example 40
Source File: OrderedClustering.scala From nn_coref with GNU General Public License v3.0 | 5 votes |
package edu.berkeley.nlp.coref import scala.collection.mutable.HashMap import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer class OrderedClustering(val clusters: Seq[Seq[Int]]) { // Elements must be consecutive integers from 0 up to n private val allIndicesSorted = clusters.foldLeft(new ArrayBuffer[Int])(_ ++ _).sorted; require(allIndicesSorted.sameElements((0 until allIndicesSorted.size).toSeq), allIndicesSorted); private val mentionToClusterMap = new HashMap[Int,Seq[Int]]; for (cluster <- clusters) { for (i <- cluster) { mentionToClusterMap.put(i, cluster); } } def getCluster(idx: Int) = mentionToClusterMap(idx); def isSingleton(idx: Int) = mentionToClusterMap(idx).size == 1; def startsCluster(idx: Int) = mentionToClusterMap(idx)(0) == idx; def areInSameCluster(idx1: Int, idx2: Int) = mentionToClusterMap(idx1).contains(idx2); def getImmediateAntecedent(idx: Int) = { val cluster = mentionToClusterMap(idx); val mentIdxInCluster = cluster.indexOf(idx); if (mentIdxInCluster == 0) { -1 } else { cluster(mentIdxInCluster - 1); } } def getAllAntecedents(idx: Int) = { val cluster = mentionToClusterMap(idx); cluster.slice(0, cluster.indexOf(idx)); } def getAllConsequents(idx: Int) = { val cluster = mentionToClusterMap(idx); cluster.slice(cluster.indexOf(idx) + 1, cluster.size); } // Needed for output printing def getClusterIdx(idx: Int) = { var clusterIdx = 0; for (i <- 0 until clusters.size) { if (clusters(i).sameElements(mentionToClusterMap(idx))) { clusterIdx = i; } } clusterIdx; } def getSubclustering(mentIdxsToKeep: Seq[Int]): OrderedClustering = { val oldIndicesToNewIndicesMap = new HashMap[Int,Int](); (0 until mentIdxsToKeep.size).map(i => oldIndicesToNewIndicesMap.put(mentIdxsToKeep(i), i)); val filteredConvertedClusters = => cluster.filter(mentIdxsToKeep.contains(_)).map(mentIdx => oldIndicesToNewIndicesMap(mentIdx))); val filteredConvertedClustersNoEmpties = filteredConvertedClusters.filter(cluster => !cluster.isEmpty); new OrderedClustering(filteredConvertedClustersNoEmpties); } } object OrderedClustering { def createFromClusterIds(clusterIds: Seq[Int]) = { val mentIdAndClusterId = (0 until clusterIds.size).map(i => (i, clusterIds(i))); val clustersUnsorted = mentIdAndClusterId.groupBy(_._2).values; val finalClusters = clustersUnsorted.toSeq.sortBy(_.head).map(clusterWithClusterId =>; new OrderedClustering(finalClusters.toSeq); } def createFromBackpointers(backpointers: Seq[Int]) = { var nextClusterID = 0; val clusters = new ArrayBuffer[ArrayBuffer[Int]](); val mentionToCluster = new HashMap[Int,ArrayBuffer[Int]](); for (i <- 0 until backpointers.size) { if (backpointers(i) == i) { val cluster = ArrayBuffer(i); clusters += cluster; mentionToCluster.put(i, cluster); } else { val cluster = mentionToCluster(backpointers(i)); cluster += i; mentionToCluster.put(i, cluster); } } new OrderedClustering(clusters); } }
Example 41
Source File: FeatureIndexer.scala From jigg with Apache License 2.0 | 5 votes |
package @SerialVersionUID(1L) class HashedFeatureIndexer[Feature] private( val maxFeatureSize: Int, val hasher: (Feature => Int)) extends FeatureIndexer[Feature] { def size = maxFeatureSize def getIndex(key: Feature) = (math.abs(hasher(key)) % maxFeatureSize) } object HashedFeatureIndexer { def apply[Feature]( maxFeatureSize: Int = (2 << 23), hasher: (Feature => Int) = {f: Feature => f.hashCode()}) = { val biggestPrimeBelow = primes.takeWhile(maxFeatureSize > _).last new HashedFeatureIndexer[Feature](biggestPrimeBelow, hasher) } private lazy val primes = 2 #:: sieve(3) private def sieve(n: Int): Stream[Int] = if (primes.takeWhile(p => p*p <= n).exists(n % _ == 0)) sieve(n + 2) else n #:: sieve(n + 2) }
Example 42
Source File: OutputCategoryList.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg import import scala.collection.mutable.ArrayBuffer import scala.sys.process.Process import scala.collection.mutable.HashMap import lexicon._ import breeze.config.CommandLineParser object OutputCategoryList { case class Params( bank: Opts.BankInfo, dict: Opts.DictParams ) case class CategoryInfo(sentence: GoldSuperTaggedSentence, position: Int, num: Int = 1) { def increment(): CategoryInfo = this.copy(num = num + 1) def replace(_sentence: GoldSuperTaggedSentence, _p: Int) = CategoryInfo(_sentence, _p, num + 1) } def main(args:Array[String]) = { val params = CommandLineParser.readIn[Params](args) val dict = new JapaneseDictionary(params.dict.categoryDictinoary) val bank =, dict) val trainSentences: Array[GoldSuperTaggedSentence] = bank.trainSentences val stats = new HashMap[Category, CategoryInfo] trainSentences foreach { sentence => (0 until sentence.size) foreach { i => val cat = stats.get(cat) match { case Some(info) => if (sentence.size > info.sentence.size) stats += ((cat, info.replace(sentence, i))) else stats += ((cat, info.increment())) case None => stats += ((cat, CategoryInfo(sentence, i))) case _ => } } } def highlight(sentence: Sentence, i: Int) = { val tokens = sentence.wordSeq // tokens.take(i).mkString("") + s"\\x1b[1;31m{${tokens(i)}}\\x1b[0m" + tokens.drop(i+1).mkString("") tokens.slice(i-5, i).mkString("") + s"[01;31m${tokens(i)}[00m" + tokens.slice(i+1, i+6).mkString("") } var fw = new FileWriter("./category.lst") stats.toSeq.sortBy(_._2.num).reverse.foreach { case (cat, CategoryInfo(sentence, i, num)) => fw.write("%s\t%s\t%s\t%s\n" .format(num, cat, sentence.pos(i), highlight(sentence, i))) } fw.flush fw.close val noFeatureCategories = new HashMap[String, CategoryInfo] stats foreach { case (cat, CategoryInfo(sentence, i, numWithFeat)) => val noFeature = cat.toStringNoFeature noFeatureCategories.get(noFeature) match { case Some(exist) => val newNum = numWithFeat + exist.num val newInfo = exist.copy(num = newNum) noFeatureCategories += (noFeature -> newInfo) case None => noFeatureCategories += (noFeature -> CategoryInfo(sentence, i, numWithFeat)) case _ => } } fw = new FileWriter("./category.nofeature.lst") noFeatureCategories.toSeq.sortBy(_._2.num).reverse.foreach { case (cat, CategoryInfo(sentence, i, num)) => fw.write("%s\t%s\t%s\t%s\n" .format(num, cat, sentence.pos(i), highlight(sentence, i))) } fw.flush fw.close } }
Example 43
Source File: HeadFinder.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg.parser import scala.collection.mutable.HashMap import jigg.nlp.ccg.lexicon.{PoS, JapanesePoS, Category} import jigg.nlp.ccg.lexicon.Direction._ trait HeadFinder extends Serializable { type NodeInfo = HeadFinder.NodeInfo def get(left:NodeInfo, right:NodeInfo): Direction } object HeadFinder { case class NodeInfo(pos:PoS, category:Category, headCategory:Category) } case class EnglishHeadFinder(children2dir: Map[(Int, Int), Direction]) extends HeadFinder { def get(left:NodeInfo, right:NodeInfo) = children2dir.get(, match { case Some(dir) => dir case _ => Left } } object EnglishHeadFinder { import jigg.nlp.ccg.lexicon.{ParseTree, NodeLabel, BinaryTree, NonterminalLabel} def createFromParseTrees(trees: Seq[ParseTree[NodeLabel]]): EnglishHeadFinder = { val map = new HashMap[(Int, Int), Direction] trees.foreach { _.foreachTree { _ match { case BinaryTree(left, right, NonterminalLabel(dir, _, _)) => map += (, -> dir case _ => }}} EnglishHeadFinder(map.toMap) } } object JapaneseHeadFinder extends HeadFinder { val Symbol = "記号" def get(left:NodeInfo, right:NodeInfo) = { val leftPos = left.pos.first.v val rightPos = right.pos.first.v if (rightPos == Symbol) Left else Right } }
Example 44
Source File: Rule.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg.parser import jigg.nlp.ccg.lexicon.{Category, Derivation, Point, UnaryChildPoint, BinaryChildrenPoints, AppliedRule} import scala.collection.mutable.{HashMap, HashSet} import{ObjectOutputStream, ObjectInputStream} trait Rule { def unify(left:Category, right:Category): Option[Array[(Category, String)]] def raise(child:Category): Option[Array[(Category, String)]] def headFinder:HeadFinder } // rules are restricted to CFG rules extracted from the training CCGBank case class CFGRule(val binaryRules:Map[(Int,Int), Array[(Category, String)]], // category ids -> (category, ruleType) val unaryRules:Map[Int, Array[(Category, String)]], override val headFinder:HeadFinder) extends Rule { def unify(left:Category, right:Category):Option[Array[(Category, String)]] = binaryRules.get((, def raise(child:Category):Option[Array[(Category, String)]] = unaryRules.get( } object CFGRule { def extractRulesFromDerivations(derivations: Array[Derivation], headFinder:HeadFinder): CFGRule = { val binaryRules = new HashMap[(Int, Int), HashSet[(Category, String)]] val unaryRules = new HashMap[Int, HashSet[(Category, String)]] derivations.foreach { deriv => deriv.foreachPoint({ point:Point => deriv.get(point) match { case Some(AppliedRule(UnaryChildPoint(child), ruleType)) => val parents = unaryRules.getOrElseUpdate(, new HashSet[(Category, String)]) parents += ((point.category, ruleType)) case Some(AppliedRule(BinaryChildrenPoints(left, right), ruleType)) => val parents = binaryRules.getOrElseUpdate((,, new HashSet[(Category, String)]) parents += ((point.category, ruleType)) case _ => }}) } new CFGRule( { case (k, v) => k -> v.toArray }.toMap, { case (k, v) => k -> v.toArray }.toMap, headFinder) } }
Example 45
Source File: SuperTaggerModel.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg import tagger.{LF=>Feature, MaxEntMultiTagger, MaxEntMultiTaggerTrainer, FeatureExtractors} import lexicon._ import import scala.collection.mutable.HashMap case class SuperTaggerModel( dict: Dictionary, featureMap: HashMap[Feature, Int], weights: WeightVec, extractors: FeatureExtractors) { self => def reduceFeatures(): SuperTaggerModel = { val buffer = weights.asInstanceOf[GrowableWeightVector[Float]].array // 0 1.0 2.0 0 0 1.0 ... val activeIdxs = buffer.zipWithIndex filter (_._1 != 0) map (_._2) // 1 2 5 println(s"# features reduced from ${buffer.size} to ${activeIdxs.size}") val idxMap = activeIdxs.zipWithIndex.toMap // {1->0, 2->1 5->2} val newFeatureMap = featureMap collect { case (f, oldIdx) if idxMap.isDefinedAt(oldIdx) => (f, idxMap(oldIdx)) } val newWeights = new FixedWeightVector[Float]( this copy (featureMap = newFeatureMap, weights = newWeights) } def mkMultiTaggerTrainer(classifierTrainer: OnlineLogLinearTrainer[Int]) = new MaxEntMultiTaggerTrainer(mkIndexer(), extractors, classifierTrainer, dict) def mkMultiTagger() = new MaxEntMultiTagger(mkIndexer(), extractors, mkClassifier(), dict) def mkClassifier() = new LogLinearClassifier[Int] { override val weights = self.weights } private def mkIndexer() = new ExactFeatureIndexer(featureMap) } object SuperTaggerModel { def saveTo(path: String, model: SuperTaggerModel) = { System.err.println("Saving tagger model to " + path) val os = jigg.util.IOUtil.openBinOut(path) os.writeObject(model) os.close } def loadFrom(path: String): SuperTaggerModel = { jigg.util.LogUtil.track("Loading supertagger model ...") { val in = jigg.util.IOUtil.openBinIn(path) val model = in.readObject.asInstanceOf[SuperTaggerModel] in.close model } } }
Example 46
Source File: CategoryManager.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg.lexicon import scala.collection.mutable.HashMap import scala.collection.mutable.ArrayBuffer class CategoryManager extends StringBaseNumberedManager[Category] with OptionReturner[Category] { override def createWithId(original:Category): Category = original match { case AtomicCategory(id, base, avm) => AtomicCategory(newId, base, avm) case ComplexCategory(id, left, right, slash) => val leftWithId = assignID(left) val rightWithId = assignID(right) ComplexCategory(newId, leftWithId, rightWithId, slash) } override def getOrNone(str:String): Option[Category] = str2objIndex.get(str) match { case Some(i) => Some(objects(i)) case None => canonicalMap.get(createCanonicalInstance(str)) } override def createCanonicalInstance(str:String): Category = JapaneseCategoryParser.parse(str) // This is used when candidate shift category is empty // It sometimes happen if for example, PoS not registered in the dictionary is detected. val unkCategory = getOrCreate("UNK") }
Example 47
Source File: CategoryDictionary.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg.lexicon import scala.collection.mutable.HashMap @SerialVersionUID(1L) sealed trait CategoryDictionary extends Serializable { type Key type UnkKey val categoryMap = new HashMap[Key, Array[Category]] val unkCategoryMap = new HashMap[UnkKey, Array[Category]] def key(word:Word, pos:PoS):Key def unkKey(pos:PoS):UnkKey def getCandidates(word:Word, pos:PoS):Array[Category] = categoryMap.get(key(word, pos)) match { case Some(categories) => categories case None => unkCategoryMap.get(unkKey(pos)) match { case Some(categories) => categories case None => Array[Category]() } } def registCandidates(word:Word, pos:PoS, candidates:Array[Category]) = key(word, pos) match { case k => categoryMap += (k -> (categoryMap.get(k) match { case Some(alreadyExist) => (candidates ++ alreadyExist).distinct case None => candidates.distinct })) } def registUnkCandiates(pos:PoS, candidates:Array[Category]) = unkKey(pos) match { case k => unkCategoryMap += (k -> (unkCategoryMap.get(k) match { case Some(alreadyExist) => (candidates ++ alreadyExist).distinct case None => candidates.distinct })) } def resetWithSentences(sentences: Seq[GoldSuperTaggedSentence], unkThreathold: Int) = { val counts = new HashMap[Key, Int] sentences foreach { sentence => (0 until sentence.size) foreach { i => val k = key(sentence.base(i), sentence.pos(i)) counts.getOrElseUpdate(k, 0) counts(k) += 1 }} sentences foreach { sentence => (0 until sentence.size) foreach { i => val k = key(sentence.base(i), sentence.pos(i)) if (counts(k) >= unkThreathold) registCandidates(sentence.base(i), sentence.pos(i), Array( registUnkCandiates(sentence.pos(i), Array( }} } } class Word2CategoryDictionary extends CategoryDictionary { type Key = Int type UnkKey = Int override def key(word:Word, pos:PoS) = override def unkKey(pos:PoS) = } class WordPoS2CategoryDictionary extends CategoryDictionary { type Key = (Int, Int) type UnkKey = Int override def key(word:Word, pos:PoS) = (, override def unkKey(pos:PoS) = } class WordSecondFineTag2CategoryDictionary extends CategoryDictionary { override type Key = (Int, Int) override type UnkKey = Int override def key(word:Word, pos:PoS) = (, override def unkKey(pos:PoS) = } class WordSecondWithConj2CategoryDictionary extends CategoryDictionary { override type Key = (Int, Int) override type UnkKey = Int override def key(word:Word, pos:PoS) = (, override def unkKey(pos:PoS) = }
Example 48
Source File: CategoryFeature.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg.lexicon trait CategoryFeature { def kvs: Seq[(String, String)] def unify(lhs: CategoryFeature): Boolean = false // TODO: implement } @SerialVersionUID(-8236395926230742650L) case class JPCategoryFeature(values: Seq[String]) extends CategoryFeature { import JPCategoryFeature._ override def kvs = keys zip values override def toString = kvs.filter(_._2 != "").map { case (k, v) => k + "=" + v }.mkString(",") } object JPCategoryFeature { // This is a hard-coded mapping of feature structure of Japanese category. private val k2vals = Map( "mod" -> Array("adv", "adn", "nm"), "form" -> Array("attr", "base", "cont", "hyp", "imp", "beg", "stem", "ta", "te", "pre", "r", "neg", "s", "da"), "case" -> Array("ga", "o", "ni", "to", "nc", "caus"), "fin" -> Array("f", "t")) private val keys = k2vals.keys.toSeq private val v2keyIdx = { val key2idx = keys.zipWithIndex.toMap k2vals.flatMap { case (key, vals) => { v => v -> key2idx(key) } } } val kvpair = """\w+=(\w+)""".r def createFromValues(values: Seq[String]) = values match { case Seq() => emptyFeature case _ => val sortedValues = Array.fill(keys.size)("") values.filter(_!="").foreach { value => val v = value match { case kvpair(v) => v; case v => v } if (v(0) != 'X') v2keyIdx(v) match { case i => sortedValues(i) = v } } JPCategoryFeature(sortedValues) } // We cache this because most categories don't have a feature private val emptyFeature = JPCategoryFeature(Array.fill(keys.size)("")) } case class EnCategoryFeature(values: Seq[String]) extends CategoryFeature { override def kvs = { case (v, k) => (k.toString, v) } override def toString = values.mkString(",") } object EnCategoryFeature { def createFromValues(values: Seq[String]) = EnCategoryFeature(values.sortWith(_ < _)) }
Example 49
Source File: LocalKMeans.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet import breeze.linalg.{squaredDistance, DenseVector, Vector} object LocalKMeans { val N = 1000 val R = 1000 // Scaling factor val D = 10 val K = 10 val convergeDist = 0.001 val rand = new Random(42) def generateData: Array[DenseVector[Double]] = { def generatePoint(i: Int): DenseVector[Double] = { DenseVector.fill(D) {rand.nextDouble * R} } Array.tabulate(N)(generatePoint) } def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = { var index = 0 var bestIndex = 0 var closest = Double.PositiveInfinity for (i <- 1 to centers.size) { val vCurr = centers.get(i).get val tempDist = squaredDistance(p, vCurr) if (tempDist < closest) { closest = tempDist bestIndex = i } } bestIndex } def showWarning() { System.err.println( """WARN: This is a naive implementation of KMeans Clustering and is given as an example! |Please use |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val data = generateData var points = new HashSet[Vector[Double]] var kPoints = new HashMap[Int, Vector[Double]] var tempDist = 1.0 while (points.size < K) { points.add(data(rand.nextInt(N))) } val iter = points.iterator for (i <- 1 to points.size) { kPoints.put(i, } println("Initial centers: " + kPoints) while(tempDist > convergeDist) { var closest = (p => (closestPoint(p, kPoints), (p, 1))) var mappings = closest.groupBy[Int] (x => x._1) var pointStats = { pair => pair._2.reduceLeft [(Int, (Vector[Double], Int))] { case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2)) } } var newPoints = {mapping => (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))} tempDist = 0.0 for (mapping <- newPoints) { tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2) } for (newP <- newPoints) { kPoints.put(newP._1, newP._2) } } println("Final centers: " + kPoints) } } // scalastyle:on println
Example 50
Source File: JsonUtils.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kafka010 import scala.collection.mutable.HashMap import scala.util.control.NonFatal import org.apache.kafka.common.TopicPartition import org.json4s.NoTypeHints import org.json4s.jackson.Serialization def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = { val result = new HashMap[String, HashMap[Int, Long]]() implicit val ordering = new Ordering[TopicPartition] { override def compare(x: TopicPartition, y: TopicPartition): Int = { Ordering.Tuple2[String, Int].compare((x.topic, x.partition), (y.topic, y.partition)) } } val partitions = partitionOffsets.keySet.toSeq.sorted // sort for more determinism partitions.foreach { tp => val off = partitionOffsets(tp) val parts = result.getOrElse(tp.topic, new HashMap[Int, Long]) parts += tp.partition -> off result += tp.topic -> parts } Serialization.write(result) } }
Example 51
Source File: MasterWebUI.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.master.ui import scala.collection.mutable.HashMap import org.eclipse.jetty.servlet.ServletContextHandler import org.apache.spark.deploy.master.Master import org.apache.spark.internal.Logging import org.apache.spark.ui.{SparkUI, WebUI} import org.apache.spark.ui.JettyUtils._ def initialize() { val masterPage = new MasterPage(this) attachPage(new ApplicationPage(this)) attachPage(masterPage) attachHandler(createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR, "/static")) attachHandler(createRedirectHandler( "/app/kill", "/", masterPage.handleAppKillRequest, httpMethods = Set("POST"))) attachHandler(createRedirectHandler( "/driver/kill", "/", masterPage.handleDriverKillRequest, httpMethods = Set("POST"))) } def addProxyTargets(id: String, target: String): Unit = { var endTarget = target.stripSuffix("/") val handler = createProxyHandler("/proxy/" + id, endTarget) attachHandler(handler) proxyHandlers(id) = handler } def removeProxyTargets(id: String): Unit = { proxyHandlers.remove(id).foreach(detachHandler) } } private[master] object MasterWebUI { private val STATIC_RESOURCE_DIR = SparkUI.STATIC_RESOURCE_DIR }
Example 52
Source File: PoolTable.scala From sparkoscope with Apache License 2.0 | 5 votes |
package import import scala.collection.mutable.HashMap import scala.xml.Node import org.apache.spark.scheduler.{Schedulable, StageInfo} import org.apache.spark.ui.UIUtils private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) { private val listener = parent.progressListener def toNodeSeq: Seq[Node] = { listener.synchronized { poolTable(poolRow, pools) } } private def poolTable( makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node], rows: Seq[Schedulable]): Seq[Node] = { <table class="table table-bordered table-striped table-condensed sortable table-fixed"> <thead> <th>Pool Name</th> <th>Minimum Share</th> <th>Pool Weight</th> <th>Active Stages</th> <th>Running Tasks</th> <th>SchedulingMode</th> </thead> <tbody> { => makeRow(r, listener.poolToActiveStages))} </tbody> </table> } private def poolRow( p: Schedulable, poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = { val activeStages = poolToActiveStages.get( match { case Some(stages) => stages.size case None => 0 } val href = "%s/stages/pool?poolname=%s" .format(UIUtils.prependBaseUri(parent.basePath), URLEncoder.encode(, "UTF-8")) <tr> <td> <a href={href}>{}</a> </td> <td>{p.minShare}</td> <td>{p.weight}</td> <td>{activeStages}</td> <td>{p.runningTasks}</td> <td>{p.schedulingMode}</td> </tr> } }
Example 53
Source File: ConfigReader.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.internal.config import java.util.{Map => JMap} import java.util.regex.Pattern import scala.collection.mutable.HashMap import scala.util.matching.Regex private object ConfigReader { private val REF_RE = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r } def substitute(input: String): String = substitute(input, Set()) private def substitute(input: String, usedRefs: Set[String]): String = { if (input != null) { ConfigReader.REF_RE.replaceAllIn(input, { m => val prefix = val name = val ref = if (prefix == null) name else s"$prefix:$name" require(!usedRefs.contains(ref), s"Circular reference in $input: $ref") val replacement = bindings.get(prefix) .flatMap(_.get(name)) .map { v => substitute(v, usedRefs + ref) } .getOrElse(m.matched) Regex.quoteReplacement(replacement) }) } else { input } } }
Example 54
Source File: StageInfo.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import org.apache.spark.executor.TaskMetrics import def fromStage( stage: Stage, attemptId: Int, numTasks: Option[Int] = None, taskMetrics: TaskMetrics = null, taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty ): StageInfo = { val ancestorRddInfos = val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo(, attemptId,, numTasks.getOrElse(stage.numTasks), rddInfos,, stage.details, taskMetrics, taskLocalityPreferences) } }
Example 55
Source File: GroupedCountEvaluator.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.partial import scala.collection.Map import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.spark.util.collection.OpenHashMap private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] { private var outputsMerged = 0 private val sums = new OpenHashMap[T, Long]() // Sum of counts for each key override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]): Unit = { outputsMerged += 1 taskResult.foreach { case (key, value) => sums.changeValue(key, value, _ + value) } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { { case (key, sum) => (key, new BoundedDouble(sum, 1.0, sum, sum)) }.toMap } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs { case (key, sum) => (key, CountEvaluator.bound(confidence, sum, p)) }.toMap } } }
Example 56
Source File: MasterWebUISuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.master.ui import import{HttpURLConnection, URL} import java.nio.charset.StandardCharsets import java.util.Date import scala.collection.mutable.HashMap import org.mockito.Mockito.{mock, times, verify, when} import org.scalatest.BeforeAndAfterAll import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} import org.apache.spark.deploy.DeployMessages.{KillDriverResponse, RequestKillDriver} import org.apache.spark.deploy.DeployTestUtils._ import org.apache.spark.deploy.master._ import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv} class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll { val conf = new SparkConf val securityMgr = new SecurityManager(conf) val rpcEnv = mock(classOf[RpcEnv]) val master = mock(classOf[Master]) val masterEndpointRef = mock(classOf[RpcEndpointRef]) when(master.securityMgr).thenReturn(securityMgr) when(master.conf).thenReturn(conf) when(master.rpcEnv).thenReturn(rpcEnv) when(master.self).thenReturn(masterEndpointRef) val masterWebUI = new MasterWebUI(master, 0) override def beforeAll() { super.beforeAll() masterWebUI.bind() } override def afterAll() { masterWebUI.stop() super.afterAll() } test("kill application") { val appDesc = createAppDesc() // use new start date so it isn't filtered by UI val activeApp = new ApplicationInfo( new Date().getTime, "app-0", appDesc, new Date(), null, Int.MaxValue) when(master.idToApp).thenReturn(HashMap[String, ApplicationInfo]((, activeApp))) val url = s"http://localhost:${masterWebUI.boundPort}/app/kill/" val body = convPostDataToString(Map(("id",, ("terminate", "true"))) val conn = sendHttpRequest(url, "POST", body) conn.getResponseCode // Verify the master was called to remove the active app verify(master, times(1)).removeApplication(activeApp, ApplicationState.KILLED) } test("kill driver") { val activeDriverId = "driver-0" val url = s"http://localhost:${masterWebUI.boundPort}/driver/kill/" val body = convPostDataToString(Map(("id", activeDriverId), ("terminate", "true"))) val conn = sendHttpRequest(url, "POST", body) conn.getResponseCode // Verify that master was asked to kill driver with the correct id verify(masterEndpointRef, times(1)).ask[KillDriverResponse](RequestKillDriver(activeDriverId)) } private def convPostDataToString(data: Map[String, String]): String = { (for ((name, value) <- data) yield s"$name=$value").mkString("&") } private def sendHttpRequest( url: String, method: String, body: String = ""): HttpURLConnection = { val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection] conn.setRequestMethod(method) if (body.nonEmpty) { conn.setDoOutput(true) conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded") conn.setRequestProperty("Content-Length", Integer.toString(body.length)) val out = new DataOutputStream(conn.getOutputStream) out.write(body.getBytes(StandardCharsets.UTF_8)) out.close() } conn } }
Example 57
Source File: exercise02.scala From scala-for-the-Impatient with MIT License | 5 votes |
import scala.collection.mutable.{ListBuffer, HashMap} def mapStrIndex(str:String)={ var indexMap = new HashMap[Char,ListBuffer[Int]]() var i = 0 str.toCharArray.foreach { c => indexMap.get(c) match { case Some(result) => result += i case None => indexMap += (c -> ListBuffer { i }) } i += 1 } indexMap } println(mapStrIndex("Mississippi"))
Example 58
Source File: exercise01.scala From scala-for-the-Impatient with MIT License | 5 votes |
import scala.collection.SortedSet import scala.collection.mutable.HashMap def mapStrIndex(str:String)={ var indexMap = new HashMap[Char,SortedSet[Int]]() var i = 0 str.toCharArray.foreach { c => indexMap.get(c) match { case Some(result) => indexMap(c) = result + i case None => indexMap += (c -> SortedSet { i }) } i += 1 } indexMap } println(mapStrIndex("Mississippi"))
Example 59
Source File: BlockStoreShuffleFetcher.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.shuffle.hash import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.HashMap import scala.util.{Failure, Success, Try} import org.apache.spark._ import org.apache.spark.serializer.Serializer import org.apache.spark.shuffle.FetchFailedException import{BlockId, BlockManagerId, ShuffleBlockFetcherIterator, ShuffleBlockId} import org.apache.spark.util.CompletionIterator private[hash] object BlockStoreShuffleFetcher extends Logging { def fetch[T]( shuffleId: Int, reduceId: Int, context: TaskContext, serializer: Serializer) : Iterator[T] = { logDebug("Fetching outputs for shuffle %d, reduce %d".format(shuffleId, reduceId)) val blockManager = SparkEnv.get.blockManager val startTime = System.currentTimeMillis val statuses = SparkEnv.get.mapOutputTracker.getServerStatuses(shuffleId, reduceId) logDebug("Fetching map output location for shuffle %d, reduce %d took %d ms".format( shuffleId, reduceId, System.currentTimeMillis - startTime)) val splitsByAddress = new HashMap[BlockManagerId, ArrayBuffer[(Int, Long)]] for (((address, size), index) <- statuses.zipWithIndex) { splitsByAddress.getOrElseUpdate(address, ArrayBuffer()) += ((index, size)) } val blocksByAddress: Seq[(BlockManagerId, Seq[(BlockId, Long)])] = { case (address, splits) => (address, => (ShuffleBlockId(shuffleId, s._1, reduceId), s._2))) } def unpackBlock(blockPair: (BlockId, Try[Iterator[Any]])) : Iterator[T] = { val blockId = blockPair._1 val blockOption = blockPair._2 blockOption match { case Success(block) => { block.asInstanceOf[Iterator[T]] } case Failure(e) => { blockId match { case ShuffleBlockId(shufId, mapId, _) => val address = statuses(mapId.toInt)._1 throw new FetchFailedException(address, shufId.toInt, mapId.toInt, reduceId, e) case _ => throw new SparkException( "Failed to get block " + blockId + ", which is not a shuffle block", e) } } } } val blockFetcherItr = new ShuffleBlockFetcherIterator( context, SparkEnv.get.blockManager.shuffleClient, blockManager, blocksByAddress, serializer, SparkEnv.get.conf.getLong("spark.reducer.maxMbInFlight", 48) * 1024 * 1024) val itr = blockFetcherItr.flatMap(unpackBlock) val completionIter = CompletionIterator[T, Iterator[T]](itr, { context.taskMetrics.updateShuffleReadMetrics() }) new InterruptibleIterator[T](context, completionIter) { val readMetrics = context.taskMetrics.createShuffleReadMetricsForDependency() override def next(): T = { readMetrics.incRecordsRead(1) } } } }
Example 60
Source File: ExecutorsTab.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.exec import scala.collection.mutable.HashMap import org.apache.spark.ExceptionFailure import org.apache.spark.annotation.DeveloperApi import org.apache.spark.scheduler._ import import org.apache.spark.ui.{SparkUI, SparkUITab} private[ui] class ExecutorsTab(parent: SparkUI) extends SparkUITab(parent, "executors") { val listener = parent.executorsListener val sc = val threadDumpEnabled = sc.isDefined && parent.conf.getBoolean("spark.ui.threadDumpsEnabled", true) attachPage(new ExecutorsPage(this, threadDumpEnabled)) if (threadDumpEnabled) { attachPage(new ExecutorThreadDumpPage(this)) } } @DeveloperApi class ExecutorsListener(storageStatusListener: StorageStatusListener) extends SparkListener { val executorToTasksActive = HashMap[String, Int]() val executorToTasksComplete = HashMap[String, Int]() val executorToTasksFailed = HashMap[String, Int]() val executorToDuration = HashMap[String, Long]() val executorToInputBytes = HashMap[String, Long]() val executorToInputRecords = HashMap[String, Long]() val executorToOutputBytes = HashMap[String, Long]() val executorToOutputRecords = HashMap[String, Long]() val executorToShuffleRead = HashMap[String, Long]() val executorToShuffleWrite = HashMap[String, Long]() val executorToLogUrls = HashMap[String, Map[String, String]]() def storageStatusList = storageStatusListener.storageStatusList override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded) = synchronized { val eid = executorAdded.executorId executorToLogUrls(eid) = executorAdded.executorInfo.logUrlMap } override def onTaskStart(taskStart: SparkListenerTaskStart) = synchronized { val eid = taskStart.taskInfo.executorId executorToTasksActive(eid) = executorToTasksActive.getOrElse(eid, 0) + 1 } override def onTaskEnd(taskEnd: SparkListenerTaskEnd) = synchronized { val info = taskEnd.taskInfo if (info != null) { val eid = info.executorId executorToTasksActive(eid) = executorToTasksActive.getOrElse(eid, 1) - 1 executorToDuration(eid) = executorToDuration.getOrElse(eid, 0L) + info.duration taskEnd.reason match { case e: ExceptionFailure => executorToTasksFailed(eid) = executorToTasksFailed.getOrElse(eid, 0) + 1 case _ => executorToTasksComplete(eid) = executorToTasksComplete.getOrElse(eid, 0) + 1 } // Update shuffle read/write val metrics = taskEnd.taskMetrics if (metrics != null) { metrics.inputMetrics.foreach { inputMetrics => executorToInputBytes(eid) = executorToInputBytes.getOrElse(eid, 0L) + inputMetrics.bytesRead executorToInputRecords(eid) = executorToInputRecords.getOrElse(eid, 0L) + inputMetrics.recordsRead } metrics.outputMetrics.foreach { outputMetrics => executorToOutputBytes(eid) = executorToOutputBytes.getOrElse(eid, 0L) + outputMetrics.bytesWritten executorToOutputRecords(eid) = executorToOutputRecords.getOrElse(eid, 0L) + outputMetrics.recordsWritten } metrics.shuffleReadMetrics.foreach { shuffleRead => executorToShuffleRead(eid) = executorToShuffleRead.getOrElse(eid, 0L) + shuffleRead.remoteBytesRead } metrics.shuffleWriteMetrics.foreach { shuffleWrite => executorToShuffleWrite(eid) = executorToShuffleWrite.getOrElse(eid, 0L) + shuffleWrite.shuffleBytesWritten } } } } }
Example 61
Source File: UIData.scala From SparkCore with Apache License 2.0 | 5 votes |
package import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable.HashMap private[jobs] object UIData { class ExecutorSummary { var taskTime : Long = 0 var failedTasks : Int = 0 var succeededTasks : Int = 0 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None, var completionTime: Option[Long] = None, var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) }
Example 62
Source File: PoolTable.scala From SparkCore with Apache License 2.0 | 5 votes |
package import scala.collection.mutable.HashMap import scala.xml.Node import org.apache.spark.scheduler.{Schedulable, StageInfo} import org.apache.spark.ui.UIUtils private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) { private val listener = parent.listener def toNodeSeq: Seq[Node] = { listener.synchronized { poolTable(poolRow, pools) } } private def poolTable( makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node], rows: Seq[Schedulable]): Seq[Node] = { <table class="table table-bordered table-striped table-condensed sortable table-fixed"> <thead> <th>Pool Name</th> <th>Minimum Share</th> <th>Pool Weight</th> <th>Active Stages</th> <th>Running Tasks</th> <th>SchedulingMode</th> </thead> <tbody> { => makeRow(r, listener.poolToActiveStages))} </tbody> </table> } private def poolRow( p: Schedulable, poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = { val activeStages = poolToActiveStages.get( match { case Some(stages) => stages.size case None => 0 } val href = "%s/stages/pool?poolname=%s" .format(UIUtils.prependBaseUri(parent.basePath), <tr> <td> <a href={href}>{}</a> </td> <td>{p.minShare}</td> <td>{p.weight}</td> <td>{activeStages}</td> <td>{p.runningTasks}</td> <td>{p.schedulingMode}</td> </tr> } }
Example 63
Source File: StageInfo.scala From SparkCore with Apache License 2.0 | 5 votes
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import def fromStage(stage: Stage, numTasks: Option[Int] = None): StageInfo = { val ancestorRddInfos = val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo(, stage.attemptId,, numTasks.getOrElse(stage.numTasks), rddInfos, stage.details) } }
Source File: GroupedSumEvaluator.scala From SparkCore with Apache License 2.0 | 5 votes
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions.mapAsScalaMap import scala.collection.Map import scala.collection.mutable.HashMap import org.apache.spark.util.StatCounter private[spark] class GroupedSumEvaluator[T](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new JHashMap[T, StatCounter] // Sum of counts for each key override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) { outputsMerged += 1 val iter = taskResult.entrySet.iterator() while (iter.hasNext) { val entry = val old = sums.get(entry.getKey) if (old != null) { old.merge(entry.getValue) } else { sums.put(entry.getKey, entry.getValue) } } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = val sum = entry.getValue.sum result(entry.getKey) = new BoundedDouble(sum, 1.0, sum, sum) } result } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs val studentTCacher = new StudentTCacher(confidence) val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = val counter = entry.getValue val meanEstimate = counter.mean val meanVar = counter.sampleVariance / counter.count val countEstimate = (counter.count + 1 - p) / p val countVar = (counter.count + 1) * (1 - p) / (p * p) val sumEstimate = meanEstimate * countEstimate val sumVar = (meanEstimate * meanEstimate * countVar) + (countEstimate * countEstimate * meanVar) + (meanVar * countVar) val sumStdev = math.sqrt(sumVar) val confFactor = studentTCacher.get(counter.count) val low = sumEstimate - confFactor * sumStdev val high = sumEstimate + confFactor * sumStdev result(entry.getKey) = new BoundedDouble(sumEstimate, confidence, low, high) } result } } }
Source File: GroupedCountEvaluator.scala From SparkCore with Apache License 2.0 | 5 votes
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions.mapAsScalaMap import scala.collection.Map import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.commons.math3.distribution.NormalDistribution import org.apache.spark.util.collection.OpenHashMap private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[OpenHashMap[T,Long], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new OpenHashMap[T,Long]() // Sum of counts for each key override def merge(outputId: Int, taskResult: OpenHashMap[T,Long]) { outputsMerged += 1 taskResult.foreach { case (key, value) => sums.changeValue(key, value, _ + value) } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) sums.foreach { case (key, sum) => result(key) = new BoundedDouble(sum, 1.0, sum, sum) } result } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs val confFactor = new NormalDistribution(). inverseCumulativeProbability(1 - (1 - confidence) / 2) val result = new JHashMap[T, BoundedDouble](sums.size) sums.foreach { case (key, sum) => val mean = (sum + 1 - p) / p val variance = (sum + 1) * (1 - p) / (p * p) val stdev = math.sqrt(variance) val low = mean - confFactor * stdev val high = mean + confFactor * stdev result(key) = new BoundedDouble(mean, confidence, low, high) } result } } }
Source File: GroupedMeanEvaluator.scala From SparkCore with Apache License 2.0 | 5 votes
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions.mapAsScalaMap import scala.collection.Map import scala.collection.mutable.HashMap import org.apache.spark.util.StatCounter private[spark] class GroupedMeanEvaluator[T](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new JHashMap[T, StatCounter] // Sum of counts for each key override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) { outputsMerged += 1 val iter = taskResult.entrySet.iterator() while (iter.hasNext) { val entry = val old = sums.get(entry.getKey) if (old != null) { old.merge(entry.getValue) } else { sums.put(entry.getKey, entry.getValue) } } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = val mean = entry.getValue.mean result(entry.getKey) = new BoundedDouble(mean, 1.0, mean, mean) } result } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val studentTCacher = new StudentTCacher(confidence) val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = val counter = entry.getValue val mean = counter.mean val stdev = math.sqrt(counter.sampleVariance / counter.count) val confFactor = studentTCacher.get(counter.count) val low = mean - confFactor * stdev val high = mean + confFactor * stdev result(entry.getKey) = new BoundedDouble(mean, confidence, low, high) } result } } }
Source File: filter_errorcode.scala From scalabpe with Apache License 2.0 | 5 votes
package scalabpe.plugin import scala.collection.mutable.HashMap import scala.xml.Node import scalabpe.core.DummyActor import scalabpe.core.HashMapStringAny import scalabpe.core.Logging import scalabpe.core.Request import scalabpe.core.Response import scalabpe.core.ResponseFilter import scalabpe.core.Router class ErrorCodeDefine(val resultCodeName: String, val resultMsgName: String); class ErrorDescResponseFilter(val router: Router, val cfgNode: Node) extends ResponseFilter with Logging { val cfgs = new HashMap[Int, ErrorCodeDefine]() var localCacheServiceId = 0 val dummyActor = new DummyActor() init def init() { var s = (cfgNode \ "@localCacheServiceId").toString if (s != "") localCacheServiceId = s.toInt val serviceNodes = (cfgNode \ "Service") for (p <- serviceNodes) { val serviceId = (p \ "@serviceId").toString.toInt val resultCodeName = (p \ "@resultCodeField").toString val resultMsgName = (p \ "@resultMsgField").toString cfgs.put(serviceId, new ErrorCodeDefine(resultCodeName, resultMsgName)) //"serviceId=%d,resultCodeName=%s,resultMsgName=%s".format(serviceId,resultCodeName,resultMsgName)) }"errorcode response filter created") } def filter(res: Response, req: Request): Unit = { //"error response filter called, res={}",res.toString) val rd = cfgs.getOrElse(res.serviceId, null) if (rd == null) return if (rd.resultCodeName != "") { if (res.body.getOrElse(rd.resultCodeName, null) == null) { res.body.put(rd.resultCodeName, res.code) } } if (res.code == 0) return if (rd.resultMsgName == "") return if (res.body.getOrElse(rd.resultMsgName, null) != null) return val body = new HashMapStringAny() body.put("resultCode", res.code) val req = new Request( res.requestId + ":$", Router.DO_NOT_REPLY, res.sequence, res.encoding, localCacheServiceId, 1, new HashMapStringAny(), body, dummyActor) val invokeResult = router.send(req) if (invokeResult == null) return val resultMsg = invokeResult.s("resultMsg", "") if (resultMsg != "") res.body.put(rd.resultMsgName, resultMsg) } }
Source File: httpserverplugin_staticfile.scala From scalabpe with Apache License 2.0 | 5 votes
package scalabpe.plugin.http import import import java.text.SimpleDateFormat import java.util.Calendar import java.util.GregorianCalendar import java.util.Locale import java.util.TimeZone import scala.collection.mutable.HashMap import org.jboss.netty.handler.codec.http.HttpHeaders import scalabpe.core.HashMapStringAny class StaticFilePlugin extends HttpServerPlugin with HttpServerStaticFilePlugin { val ETAG_TAG = "etag" val EXPIRE_TAG = "expire" val ATTACHMENT = "attachment" val FILENAME = "filename" val HTTP_DATE_FORMAT = "EEE, dd MMM yyyy HH:mm:ss zzz"; val HTTP_DATE_GMT_TIMEZONE = "GMT"; val df_tl = new ThreadLocal[SimpleDateFormat]() { override def initialValue(): SimpleDateFormat = { val df = new SimpleDateFormat(HTTP_DATE_FORMAT, Locale.US) df.setTimeZone(TimeZone.getTimeZone(HTTP_DATE_GMT_TIMEZONE)); df } } def generateStaticFile(serviceId: Int, msgId: Int, errorCode: Int, errorMessage: String, body: HashMapStringAny, pluginParam: String, headers: HashMap[String, String]): String = { if (body.ns(FILENAME) == "") { return null } val filename = body.ns(FILENAME) if (!new File(filename).exists()) { return null } if (body.ns(ETAG_TAG) != "") { headers.put("ETag", body.ns(ETAG_TAG)) } if (body.ns(EXPIRE_TAG) != "") { body.i(EXPIRE_TAG) match { case 0 | -1 => headers.put(HttpHeaders.Names.CACHE_CONTROL, "no-cache") case n => // seconds val time = new GregorianCalendar(); time.add(Calendar.SECOND, n); headers.put(HttpHeaders.Names.EXPIRES, df_tl.get.format(time.getTime())); headers.put(HttpHeaders.Names.CACHE_CONTROL, "max-age=" + n); } } val ext = parseExt(filename) if (ext != "") body.put("__file_ext__", ext) if (body.ns(ATTACHMENT, "1") == "1") { val filename = body.ns(FILENAME) val v = "attachment; filename=\"%s\"".format(URLEncoder.encode(parseFilename(filename), "UTF-8")) headers.put("Content-Disposition", v) } filename } def parseFilename(name: String): String = { val p = name.lastIndexOf("/") if (p < 0) return name name.substring(p + 1) } def parseExt(name: String): String = { val p = name.lastIndexOf(".") if (p < 0) return "" name.substring(p + 1).toLowerCase() } }
Source File: format_flow.scala From scalabpe with Apache License 2.0 | 5 votes
package scalabpe import import scala.collection.mutable.HashMap import scala.collection.mutable.ArrayBuffer import import import scala.xml._ import scala.collection.mutable._ import scalabpe.core._ import org.apache.commons.lang.StringUtils import Tools._ object FormatFlowTool { def help() { println( """ usage: scalabpe.FormatFlowTool [options] dirname options: -h|--help 帮助信息 """) } def parseArgs(args:Array[String]):HashMapStringAny = { val map = HashMapStringAny() var i = 0 val files = ArrayBufferString() while(i < args.size) { args(i) match { case "-h" | "--help" => return null case s if s.startsWith("-") => println("invalid option "+s) return null case _ => files += args(i) i += 1 } } map.put("files",files) map } def main(args:Array[String]) { var params = parseArgs(args) if( params == null ) { help() return } var files = params.nls("files") if( files.size == 0 ) { help() return } var dir = files(0) if( !new File(dir).exists() ) { val p1 = "compose_conf"+File.separator+dir if( new File(p1).exists ) { dir = p1 } else { println("not a valid dir, dir="+dir) return } } processDir(dir,params) } def processDir(dir:String,params:HashMapStringAny) { val files = new File(dir).listFiles.filter(_.getName.endsWith(".flow")) for(f <- files ) { processFile(dir,f.getName,params) } } def processFile(dir:String,f:String,params:HashMapStringAny) { val lines = readAllLines(dir+File.separator+f) // TODO } }
Source File: GraphMap.scala From stellar-random-walk with Apache License 2.0 | 5 votes
package au.csiro.data61.randomwalk.algorithm import scala.collection.mutable import scala.collection.mutable.{ArrayBuffer, HashMap} def reset { indexCounter = 0 offsetCounter = 0 srcVertexMap.clear() offsets.clear() lengths.clear() edges.clear() vertexPartitionMap.clear } def getNeighbors(vid: Int): Array[(Int, Float)] = { srcVertexMap.get(vid) match { case Some(index) => if (index == -1) { return Array.empty[(Int, Float)] } val offset = offsets(index) val length = lengths(index) edges.slice(offset, offset + length).toArray case None => null } } }
Source File: HeaderEnum.scala From testchipip with BSD 3-Clause "New" or "Revised" License | 5 votes
package testchipip import chisel3._ import chisel3.util.log2Up import scala.collection.mutable.{HashMap, ListBuffer} class HeaderEnum(val prefix: String) { val h = new HashMap[String,Int] def makeHeader(): String = { h.toSeq.sortBy(_._2).map { case (n,i) => s"#define ${prefix.toUpperCase}_${n.toUpperCase} $i\n" } mkString } def apply(s: String): UInt = h(s).U(log2Up(h.size).W) } object HeaderEnum { val contents = new ListBuffer[String] def apply(prefix: String, names: String*): HeaderEnum = { val e = new HeaderEnum(prefix) names.zipWithIndex.foreach { case (n,i) => e.h.put(n,i) } val header = e.makeHeader() if(!HeaderEnum.contents.contains(header)) HeaderEnum.contents += header e } }
Source File: Mapper.scala From CSYE7200_Old with MIT License | 5 votes
package edu.neu.coe.csye7200.mapreduce import{Actor, ActorLogging, ActorRef} import scala.collection.mutable import scala.collection.mutable.HashMap import scala.util._ class Mapper_Forgiving[K1,V1,K2,V2](f: (K1,V1)=>(K2,V2)) extends Mapper[K1,V1,K2,V2](f) { override def prepareReply(v2k2ts: Seq[Try[(K2,V2)]]) = { val v2sK2m = mutable.HashMap[K2,Seq[V2]]() // mutable val xs = Seq[Throwable]() // mutable for (v2k2t <- v2k2ts; v2k2e = Master.sequence(v2k2t)) v2k2e match { case Right((k2,v2)) => v2sK2m put(k2, v2+:v2sK2m.getOrElse((k2), (Nil))) case Left(x) => xs :+ x } (v2sK2m.toMap, xs) } } case class Incoming[K, V](m: Seq[(K,V)]) { override def toString = s"Incoming: with ${m.size} elements" } object Incoming { def sequence[K,V](vs: Seq[V]): Incoming[K,V] = Incoming((vs zip Stream.continually(null.asInstanceOf[K])).map{_.swap}) def map[K, V](vKm: Map[K,V]): Incoming[K,V] = Incoming(vKm.toSeq) } object Mapper { }
Source File: Labels.scala From jgo with GNU General Public License v3.0 | 5 votes
package package parser.stmts import parser.exprs._ import parser.scoped._ import parser.funcs._ import interm._ import types._ import symbol._ import codeseq._ import instr._ import scala.collection.mutable.{HashMap, HashSet, ListBuffer} import scala.{collection => coll} import coll.{immutable => imm} trait Labels { private val seenDefs = HashSet[String]() private val unseenDefs = HashMap[String, ListBuffer[Pos]]() private val lbls = HashMap[String, UserLabel]() def defLabel(name: String, pos: Pos): (String, Err[UserLabel]) = if (seenDefs contains name) (name, problem("label %s already defined", name)(pos)) else { seenDefs += name unseenDefs -= name val label = lbls getOrElseUpdate (name, new UserLabel(name)) (name, result(label)) } def useLabel(pos: Pos, name: String): UserLabel = { if (!(seenDefs contains name)) unseenDefs.getOrElseUpdate(name, new ListBuffer) += pos lbls getOrElseUpdate (name, new UserLabel(name)) } def procGoto(pos: Pos, name: String): Err[CodeBuilder] = { result(Goto(useLabel(pos, name))) } def checkForUndefedLabels: Err[Unit] = { var issues: Err[Unit] = result(()) for ((lblName, positions) <- unseenDefs; pos <- positions) { issues = issues then problem("target label not found: %s", lblName)(pos) } issues } }
Source File: LocalKMeans.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet import breeze.linalg.{squaredDistance, DenseVector, Vector} object LocalKMeans { val N = 1000 val R = 1000 // Scaling factor val D = 10 val K = 10 val convergeDist = 0.001 val rand = new Random(42) def generateData: Array[DenseVector[Double]] = { def generatePoint(i: Int): DenseVector[Double] = { DenseVector.fill(D) {rand.nextDouble * R} } Array.tabulate(N)(generatePoint) } def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = { var index = 0 var bestIndex = 0 var closest = Double.PositiveInfinity for (i <- 1 to centers.size) { val vCurr = centers.get(i).get val tempDist = squaredDistance(p, vCurr) if (tempDist < closest) { closest = tempDist bestIndex = i } } bestIndex } def showWarning() { System.err.println( """WARN: This is a naive implementation of KMeans Clustering and is given as an example! |Please use |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val data = generateData var points = new HashSet[Vector[Double]] var kPoints = new HashMap[Int, Vector[Double]] var tempDist = 1.0 while (points.size < K) { points.add(data(rand.nextInt(N))) } val iter = points.iterator for (i <- 1 to points.size) { kPoints.put(i, } println("Initial centers: " + kPoints) while(tempDist > convergeDist) { var closest = (p => (closestPoint(p, kPoints), (p, 1))) var mappings = closest.groupBy[Int] (x => x._1) var pointStats = { pair => pair._2.reduceLeft [(Int, (Vector[Double], Int))] { case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2)) } } var newPoints = {mapping => (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))} tempDist = 0.0 for (mapping <- newPoints) { tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2) } for (newP <- newPoints) { kPoints.put(newP._1, newP._2) } } println("Final centers: " + kPoints) } } // scalastyle:on println
Source File: JsonUtils.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes
package org.apache.spark.sql.kafka010 import scala.collection.mutable.HashMap import scala.util.control.NonFatal import org.apache.kafka.common.TopicPartition import org.json4s.NoTypeHints import org.json4s.jackson.Serialization def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = { val result = new HashMap[String, HashMap[Int, Long]]() implicit val ordering = new Ordering[TopicPartition] { override def compare(x: TopicPartition, y: TopicPartition): Int = { Ordering.Tuple2[String, Int].compare((x.topic, x.partition), (y.topic, y.partition)) } } val partitions = partitionOffsets.keySet.toSeq.sorted // sort for more determinism partitions.foreach { tp => val off = partitionOffsets(tp) val parts = result.getOrElse(tp.topic, new HashMap[Int, Long]) parts += tp.partition -> off result += tp.topic -> parts } Serialization.write(result) } }
Source File: ThriftServerMonitor.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes
package org.apache.spark.sql.hive.thriftserver.monitor import scala.collection.mutable.HashMap import org.apache.spark.SparkException import org.apache.spark.internal.Logging import org.apache.spark.sql.hive.thriftserver.ui.ThriftServerTab object ThriftServerMonitor extends Logging { private[this] val uiTabs = new HashMap[String, ThriftServerTab]() private[this] val listeners = new HashMap[String, ThriftServerListener]() def setListener(user: String, sparkListener: ThriftServerListener): Unit = { listeners.put(user, sparkListener) } def getListener(user: String): ThriftServerListener = { listeners.getOrElse(user, throw new SparkException(s"Listener does not init for user[$user]")) } def addUITab(user: String, ui: ThriftServerTab): Unit = { uiTabs.put(user, ui) } def detachUITab(user: String): Unit = { listeners.remove(user) uiTabs.get(user).foreach(_.detach()) } def detachAllUITabs(): Unit = { uiTabs.values.foreach(_.detach()) } }
Source File: MasterWebUI.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes
package org.apache.spark.deploy.master.ui import scala.collection.mutable.HashMap import org.eclipse.jetty.servlet.ServletContextHandler import org.apache.spark.deploy.master.Master import org.apache.spark.internal.Logging import org.apache.spark.ui.{SparkUI, WebUI} import org.apache.spark.ui.JettyUtils._ def initialize() { val masterPage = new MasterPage(this) attachPage(new ApplicationPage(this)) attachPage(masterPage) attachHandler(createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR, "/static")) attachHandler(createRedirectHandler( "/app/kill", "/", masterPage.handleAppKillRequest, httpMethods = Set("POST"))) attachHandler(createRedirectHandler( "/driver/kill", "/", masterPage.handleDriverKillRequest, httpMethods = Set("POST"))) } def addProxyTargets(id: String, target: String): Unit = { var endTarget = target.stripSuffix("/") val handler = createProxyHandler("/proxy/" + id, endTarget) attachHandler(handler) proxyHandlers(id) = handler } def removeProxyTargets(id: String): Unit = { proxyHandlers.remove(id).foreach(detachHandler) } } private[master] object MasterWebUI { private val STATIC_RESOURCE_DIR = SparkUI.STATIC_RESOURCE_DIR }
Source File: PoolTable.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes
package import import scala.collection.mutable.HashMap import scala.xml.Node import org.apache.spark.scheduler.{Schedulable, StageInfo} import org.apache.spark.ui.UIUtils private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) { private val listener = parent.progressListener def toNodeSeq: Seq[Node] = { listener.synchronized { poolTable(poolRow, pools) } } private def poolTable( makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node], rows: Seq[Schedulable]): Seq[Node] = { <table class="table table-bordered table-striped table-condensed sortable table-fixed"> <thead> <th>Pool Name</th> <th>Minimum Share</th> <th>Pool Weight</th> <th>Active Stages</th> <th>Running Tasks</th> <th>SchedulingMode</th> </thead> <tbody> { => makeRow(r, listener.poolToActiveStages))} </tbody> </table> } private def poolRow( p: Schedulable, poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = { val activeStages = poolToActiveStages.get( match { case Some(stages) => stages.size case None => 0 } val href = "%s/stages/pool?poolname=%s" .format(UIUtils.prependBaseUri(parent.basePath, sparkUser = parent.sparkUser), URLEncoder.encode(, "UTF-8")) <tr> <td> <a href={href}>{}</a> </td> <td>{p.minShare}</td> <td>{p.weight}</td> <td>{activeStages}</td> <td>{p.runningTasks}</td> <td>{p.schedulingMode}</td> </tr> } }
Source File: ConfigReader.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes
package org.apache.spark.internal.config import java.util.{Map => JMap} import java.util.regex.Pattern import scala.collection.mutable.HashMap import scala.util.matching.Regex private object ConfigReader { private val REF_RE = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r } def substitute(input: String): String = substitute(input, Set()) private def substitute(input: String, usedRefs: Set[String]): String = { if (input != null) { ConfigReader.REF_RE.replaceAllIn(input, { m => val prefix = val name = val ref = if (prefix == null) name else s"$prefix:$name" require(!usedRefs.contains(ref), s"Circular reference in $input: $ref") val replacement = bindings.get(prefix) .flatMap(_.get(name)) .map { v => substitute(v, usedRefs + ref) } .getOrElse(m.matched) Regex.quoteReplacement(replacement) }) } else { input } } }
Source File: StageInfo.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import org.apache.spark.executor.TaskMetrics import def fromStage( stage: Stage, attemptId: Int, numTasks: Option[Int] = None, taskMetrics: TaskMetrics = null, taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty ): StageInfo = { val ancestorRddInfos = val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo(, attemptId,, numTasks.getOrElse(stage.numTasks), rddInfos,, stage.details, taskMetrics, taskLocalityPreferences) } }
Source File: GroupedCountEvaluator.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes
package org.apache.spark.partial import scala.collection.Map import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.spark.util.collection.OpenHashMap private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] { private var outputsMerged = 0 private val sums = new OpenHashMap[T, Long]() // Sum of counts for each key override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]): Unit = { outputsMerged += 1 taskResult.foreach { case (key, value) => sums.changeValue(key, value, _ + value) } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { { case (key, sum) => (key, new BoundedDouble(sum, 1.0, sum, sum)) }.toMap } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs { case (key, sum) => (key, CountEvaluator.bound(confidence, sum, p)) }.toMap } } }
Source File: MasterWebUISuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes
package org.apache.spark.deploy.master.ui import import{HttpURLConnection, URL} import java.nio.charset.StandardCharsets import java.util.Date import scala.collection.mutable.HashMap import org.mockito.Mockito.{mock, times, verify, when} import org.scalatest.BeforeAndAfterAll import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} import org.apache.spark.deploy.DeployMessages.{KillDriverResponse, RequestKillDriver} import org.apache.spark.deploy.DeployTestUtils._ import org.apache.spark.deploy.master._ import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv} class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll { val conf = new SparkConf val securityMgr = new SecurityManager(conf) val rpcEnv = mock(classOf[RpcEnv]) val master = mock(classOf[Master]) val masterEndpointRef = mock(classOf[RpcEndpointRef]) when(master.securityMgr).thenReturn(securityMgr) when(master.conf).thenReturn(conf) when(master.rpcEnv).thenReturn(rpcEnv) when(master.self).thenReturn(masterEndpointRef) val masterWebUI = new MasterWebUI(master, 0) override def beforeAll() { super.beforeAll() masterWebUI.bind() } override def afterAll() { masterWebUI.stop() super.afterAll() } test("kill application") { val appDesc = createAppDesc() // use new start date so it isn't filtered by UI val activeApp = new ApplicationInfo( new Date().getTime, "app-0", appDesc, new Date(), null, Int.MaxValue) when(master.idToApp).thenReturn(HashMap[String, ApplicationInfo]((, activeApp))) val url = s"http://localhost:${masterWebUI.boundPort}/app/kill/" val body = convPostDataToString(Map(("id",, ("terminate", "true"))) val conn = sendHttpRequest(url, "POST", body) conn.getResponseCode // Verify the master was called to remove the active app verify(master, times(1)).removeApplication(activeApp, ApplicationState.KILLED) } test("kill driver") { val activeDriverId = "driver-0" val url = s"http://localhost:${masterWebUI.boundPort}/driver/kill/" val body = convPostDataToString(Map(("id", activeDriverId), ("terminate", "true"))) val conn = sendHttpRequest(url, "POST", body) conn.getResponseCode // Verify that master was asked to kill driver with the correct id verify(masterEndpointRef, times(1)).ask[KillDriverResponse](RequestKillDriver(activeDriverId)) } private def convPostDataToString(data: Map[String, String]): String = { (for ((name, value) <- data) yield s"$name=$value").mkString("&") } private def sendHttpRequest( url: String, method: String, body: String = ""): HttpURLConnection = { val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection] conn.setRequestMethod(method) if (body.nonEmpty) { conn.setDoOutput(true) conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded") conn.setRequestProperty("Content-Length", Integer.toString(body.length)) val out = new DataOutputStream(conn.getOutputStream) out.write(body.getBytes(StandardCharsets.UTF_8)) out.close() } conn } }
Source File: LocalKMeans.scala From iolap with Apache License 2.0 | 5 votes
package org.apache.spark.examples import java.util.Random import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet import breeze.linalg.{Vector, DenseVector, squaredDistance} import org.apache.spark.SparkContext._ object LocalKMeans { val N = 1000 val R = 1000 // Scaling factor val D = 10 val K = 10 val convergeDist = 0.001 val rand = new Random(42) def generateData: Array[DenseVector[Double]] = { def generatePoint(i: Int): DenseVector[Double] = { DenseVector.fill(D){rand.nextDouble * R} } Array.tabulate(N)(generatePoint) } def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = { var index = 0 var bestIndex = 0 var closest = Double.PositiveInfinity for (i <- 1 to centers.size) { val vCurr = centers.get(i).get val tempDist = squaredDistance(p, vCurr) if (tempDist < closest) { closest = tempDist bestIndex = i } } bestIndex } def showWarning() { System.err.println( """WARN: This is a naive implementation of KMeans Clustering and is given as an example! |Please use the KMeans method found in org.apache.spark.mllib.clustering |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val data = generateData var points = new HashSet[Vector[Double]] var kPoints = new HashMap[Int, Vector[Double]] var tempDist = 1.0 while (points.size < K) { points.add(data(rand.nextInt(N))) } val iter = points.iterator for (i <- 1 to points.size) { kPoints.put(i, } println("Initial centers: " + kPoints) while(tempDist > convergeDist) { var closest = (p => (closestPoint(p, kPoints), (p, 1))) var mappings = closest.groupBy[Int] (x => x._1) var pointStats = { pair => pair._2.reduceLeft [(Int, (Vector[Double], Int))] { case ((id1, (x1, y1)), (id2, (x2, y2))) => (id1, (x1 + x2, y1 + y2)) } } var newPoints = {mapping => (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))} tempDist = 0.0 for (mapping <- newPoints) { tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2) } for (newP <- newPoints) { kPoints.put(newP._1, newP._2) } } println("Final centers: " + kPoints) } }
Source File: BlockStoreShuffleFetcher.scala From iolap with Apache License 2.0 | 5 votes
package org.apache.spark.shuffle.hash import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.HashMap import scala.util.{Failure, Success, Try} import org.apache.spark._ import org.apache.spark.serializer.Serializer import org.apache.spark.shuffle.FetchFailedException import{BlockId, BlockManagerId, ShuffleBlockFetcherIterator, ShuffleBlockId} import org.apache.spark.util.CompletionIterator private[hash] object BlockStoreShuffleFetcher extends Logging { def fetch[T]( shuffleId: Int, reduceId: Int, context: TaskContext, serializer: Serializer) : Iterator[T] = { logDebug("Fetching outputs for shuffle %d, reduce %d".format(shuffleId, reduceId)) val blockManager = SparkEnv.get.blockManager val startTime = System.currentTimeMillis val statuses = SparkEnv.get.mapOutputTracker.getServerStatuses(shuffleId, reduceId) logDebug("Fetching map output location for shuffle %d, reduce %d took %d ms".format( shuffleId, reduceId, System.currentTimeMillis - startTime)) val splitsByAddress = new HashMap[BlockManagerId, ArrayBuffer[(Int, Long)]] for (((address, size), index) <- statuses.zipWithIndex) { splitsByAddress.getOrElseUpdate(address, ArrayBuffer()) += ((index, size)) } val blocksByAddress: Seq[(BlockManagerId, Seq[(BlockId, Long)])] = { case (address, splits) => (address, => (ShuffleBlockId(shuffleId, s._1, reduceId), s._2))) } def unpackBlock(blockPair: (BlockId, Try[Iterator[Any]])) : Iterator[T] = { val blockId = blockPair._1 val blockOption = blockPair._2 blockOption match { case Success(block) => { block.asInstanceOf[Iterator[T]] } case Failure(e) => { blockId match { case ShuffleBlockId(shufId, mapId, _) => val address = statuses(mapId.toInt)._1 throw new FetchFailedException(address, shufId.toInt, mapId.toInt, reduceId, e) case _ => throw new SparkException( "Failed to get block " + blockId + ", which is not a shuffle block", e) } } } } val blockFetcherItr = new ShuffleBlockFetcherIterator( context, SparkEnv.get.blockManager.shuffleClient, blockManager, blocksByAddress, serializer, // Note: we use getSizeAsMb when no suffix is provided for backwards compatibility SparkEnv.get.conf.getSizeAsMb("spark.reducer.maxSizeInFlight", "48m") * 1024 * 1024) val itr = blockFetcherItr.flatMap(unpackBlock) val completionIter = CompletionIterator[T, Iterator[T]](itr, { context.taskMetrics.updateShuffleReadMetrics() }) new InterruptibleIterator[T](context, completionIter) { val readMetrics = context.taskMetrics.createShuffleReadMetricsForDependency() override def next(): T = { readMetrics.incRecordsRead(1) } } } }
Source File: ExecutorsTab.scala From iolap with Apache License 2.0 | 5 votes
package org.apache.spark.ui.exec import scala.collection.mutable.HashMap import org.apache.spark.ExceptionFailure import org.apache.spark.annotation.DeveloperApi import org.apache.spark.scheduler._ import{StorageStatus, StorageStatusListener} import org.apache.spark.ui.{SparkUI, SparkUITab} import private[ui] class ExecutorsTab(parent: SparkUI) extends SparkUITab(parent, "executors") { val listener = parent.executorsListener val sc = val threadDumpEnabled = sc.isDefined && parent.conf.getBoolean("spark.ui.threadDumpsEnabled", true) attachPage(new ExecutorsPage(this, threadDumpEnabled)) if (threadDumpEnabled) { attachPage(new ExecutorThreadDumpPage(this)) } } @DeveloperApi class ExecutorsListener(storageStatusListener: StorageStatusListener) extends SparkListener { val executorToTasksActive = HashMap[String, Int]() val executorToTasksComplete = HashMap[String, Int]() val executorToTasksFailed = HashMap[String, Int]() val executorToDuration = HashMap[String, Long]() val executorToInputBytes = HashMap[String, Long]() val executorToInputRecords = HashMap[String, Long]() val executorToOutputBytes = HashMap[String, Long]() val executorToOutputRecords = HashMap[String, Long]() val executorToShuffleRead = HashMap[String, Long]() val executorToShuffleWrite = HashMap[String, Long]() val executorToLogUrls = HashMap[String, Map[String, String]]() val executorIdToData = HashMap[String, ExecutorUIData]() def storageStatusList: Seq[StorageStatus] = storageStatusListener.storageStatusList override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): Unit = synchronized { val eid = executorAdded.executorId executorToLogUrls(eid) = executorAdded.executorInfo.logUrlMap executorIdToData(eid) = ExecutorUIData(executorAdded.time) } override def onExecutorRemoved( executorRemoved: SparkListenerExecutorRemoved): Unit = synchronized { val eid = executorRemoved.executorId val uiData = executorIdToData(eid) uiData.finishTime = Some(executorRemoved.time) uiData.finishReason = Some(executorRemoved.reason) } override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = synchronized { val eid = taskStart.taskInfo.executorId executorToTasksActive(eid) = executorToTasksActive.getOrElse(eid, 0) + 1 } override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = synchronized { val info = taskEnd.taskInfo if (info != null) { val eid = info.executorId executorToTasksActive(eid) = executorToTasksActive.getOrElse(eid, 1) - 1 executorToDuration(eid) = executorToDuration.getOrElse(eid, 0L) + info.duration taskEnd.reason match { case e: ExceptionFailure => executorToTasksFailed(eid) = executorToTasksFailed.getOrElse(eid, 0) + 1 case _ => executorToTasksComplete(eid) = executorToTasksComplete.getOrElse(eid, 0) + 1 } // Update shuffle read/write val metrics = taskEnd.taskMetrics if (metrics != null) { metrics.inputMetrics.foreach { inputMetrics => executorToInputBytes(eid) = executorToInputBytes.getOrElse(eid, 0L) + inputMetrics.bytesRead executorToInputRecords(eid) = executorToInputRecords.getOrElse(eid, 0L) + inputMetrics.recordsRead } metrics.outputMetrics.foreach { outputMetrics => executorToOutputBytes(eid) = executorToOutputBytes.getOrElse(eid, 0L) + outputMetrics.bytesWritten executorToOutputRecords(eid) = executorToOutputRecords.getOrElse(eid, 0L) + outputMetrics.recordsWritten } metrics.shuffleReadMetrics.foreach { shuffleRead => executorToShuffleRead(eid) = executorToShuffleRead.getOrElse(eid, 0L) + shuffleRead.remoteBytesRead } metrics.shuffleWriteMetrics.foreach { shuffleWrite => executorToShuffleWrite(eid) = executorToShuffleWrite.getOrElse(eid, 0L) + shuffleWrite.shuffleBytesWritten } } } } }
Source File: UIData.scala From iolap with Apache License 2.0 | 5 votes
package import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable.HashMap private[spark] object UIData { class ExecutorSummary { var taskTime : Long = 0 var failedTasks : Int = 0 var succeededTasks : Int = 0 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None, var completionTime: Option[Long] = None, var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) case class ExecutorUIData( val startTime: Long, var finishTime: Option[Long] = None, var finishReason: Option[String] = None) }
Source File: PoolTable.scala From iolap with Apache License 2.0 | 5 votes
package import scala.collection.mutable.HashMap import scala.xml.Node import org.apache.spark.scheduler.{Schedulable, StageInfo} import org.apache.spark.ui.UIUtils private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) { private val listener = parent.progressListener def toNodeSeq: Seq[Node] = { listener.synchronized { poolTable(poolRow, pools) } } private def poolTable( makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node], rows: Seq[Schedulable]): Seq[Node] = { <table class="table table-bordered table-striped table-condensed sortable table-fixed"> <thead> <th>Pool Name</th> <th>Minimum Share</th> <th>Pool Weight</th> <th>Active Stages</th> <th>Running Tasks</th> <th>SchedulingMode</th> </thead> <tbody> { => makeRow(r, listener.poolToActiveStages))} </tbody> </table> } private def poolRow( p: Schedulable, poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = { val activeStages = poolToActiveStages.get( match { case Some(stages) => stages.size case None => 0 } val href = "%s/stages/pool?poolname=%s" .format(UIUtils.prependBaseUri(parent.basePath), <tr> <td> <a href={href}>{}</a> </td> <td>{p.minShare}</td> <td>{p.weight}</td> <td>{activeStages}</td> <td>{p.runningTasks}</td> <td>{p.schedulingMode}</td> </tr> } }
Source File: StageInfo.scala From iolap with Apache License 2.0 | 5 votes
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import def fromStage(stage: Stage, numTasks: Option[Int] = None): StageInfo = { val ancestorRddInfos = val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo(, stage.attemptId,, numTasks.getOrElse(stage.numTasks), rddInfos,, stage.details) } }
Source File: GroupedSumEvaluator.scala From iolap with Apache License 2.0 | 5 votes
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions.mapAsScalaMap import scala.collection.Map import scala.collection.mutable.HashMap import org.apache.spark.util.StatCounter private[spark] class GroupedSumEvaluator[T](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new JHashMap[T, StatCounter] // Sum of counts for each key override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) { outputsMerged += 1 val iter = taskResult.entrySet.iterator() while (iter.hasNext) { val entry = val old = sums.get(entry.getKey) if (old != null) { old.merge(entry.getValue) } else { sums.put(entry.getKey, entry.getValue) } } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = val sum = entry.getValue.sum result(entry.getKey) = new BoundedDouble(sum, 1.0, sum, sum) } result } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs val studentTCacher = new StudentTCacher(confidence) val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = val counter = entry.getValue val meanEstimate = counter.mean val meanVar = counter.sampleVariance / counter.count val countEstimate = (counter.count + 1 - p) / p val countVar = (counter.count + 1) * (1 - p) / (p * p) val sumEstimate = meanEstimate * countEstimate val sumVar = (meanEstimate * meanEstimate * countVar) + (countEstimate * countEstimate * meanVar) + (meanVar * countVar) val sumStdev = math.sqrt(sumVar) val confFactor = studentTCacher.get(counter.count) val low = sumEstimate - confFactor * sumStdev val high = sumEstimate + confFactor * sumStdev result(entry.getKey) = new BoundedDouble(sumEstimate, confidence, low, high) } result } } }
Source File: GroupedCountEvaluator.scala From iolap with Apache License 2.0 | 5 votes
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions.mapAsScalaMap import scala.collection.Map import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.commons.math3.distribution.NormalDistribution import org.apache.spark.util.collection.OpenHashMap private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new OpenHashMap[T, Long]() // Sum of counts for each key override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]) { outputsMerged += 1 taskResult.foreach { case (key, value) => sums.changeValue(key, value, _ + value) } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) sums.foreach { case (key, sum) => result(key) = new BoundedDouble(sum, 1.0, sum, sum) } result } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs val confFactor = new NormalDistribution(). inverseCumulativeProbability(1 - (1 - confidence) / 2) val result = new JHashMap[T, BoundedDouble](sums.size) sums.foreach { case (key, sum) => val mean = (sum + 1 - p) / p val variance = (sum + 1) * (1 - p) / (p * p) val stdev = math.sqrt(variance) val low = mean - confFactor * stdev val high = mean + confFactor * stdev result(key) = new BoundedDouble(mean, confidence, low, high) } result } } }
Source File: GroupedMeanEvaluator.scala From iolap with Apache License 2.0 | 5 votes
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions.mapAsScalaMap import scala.collection.Map import scala.collection.mutable.HashMap import org.apache.spark.util.StatCounter private[spark] class GroupedMeanEvaluator[T](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new JHashMap[T, StatCounter] // Sum of counts for each key override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) { outputsMerged += 1 val iter = taskResult.entrySet.iterator() while (iter.hasNext) { val entry = val old = sums.get(entry.getKey) if (old != null) { old.merge(entry.getValue) } else { sums.put(entry.getKey, entry.getValue) } } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = val mean = entry.getValue.mean result(entry.getKey) = new BoundedDouble(mean, 1.0, mean, mean) } result } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val studentTCacher = new StudentTCacher(confidence) val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = val counter = entry.getValue val mean = counter.mean val stdev = math.sqrt(counter.sampleVariance / counter.count) val confFactor = studentTCacher.get(counter.count) val low = mean - confFactor * stdev val high = mean + confFactor * stdev result(entry.getKey) = new BoundedDouble(mean, confidence, low, high) } result } } }
Source File: FeatureSelection.scala From aerosolve with Apache License 2.0 | 5 votes
package import import import java.util import com.airbnb.aerosolve.core.{ModelRecord, ModelHeader, FeatureVector, Example} import com.airbnb.aerosolve.core.models.LinearModel import com.airbnb.aerosolve.core.util.Util import com.typesafe.config.Config import org.slf4j.{LoggerFactory, Logger} import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ import org.apache.spark.rdd.RDD import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.Buffer import scala.collection.JavaConversions._ import scala.collection.JavaConverters._ import scala.util.Random import scala.math.abs import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.fs.Path import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path object FeatureSelection { private final val log: Logger = LoggerFactory.getLogger("FeatureSelection") val allKey : (String, String) = ("$ALL", "$POS") // Given a RDD compute the pointwise mutual information between // the positive label and the discrete features. def pointwiseMutualInformation(examples : RDD[Example], config : Config, key : String, rankKey : String, posThreshold : Double, minPosCount : Double, newCrosses : Boolean) : RDD[((String, String), Double)] = { val pointwise = LinearRankerUtils.makePointwise(examples, config, key, rankKey) val features = pointwise .mapPartitions(part => { // The tuple2 is var, var | positive val output = scala.collection.mutable.HashMap[(String, String), (Double, Double)]() part.foreach(example =>{ val featureVector = example.example.get(0) val isPos = if (featureVector.floatFeatures.get(rankKey).asScala.head._2 > posThreshold) 1.0 else 0.0 val all : (Double, Double) = output.getOrElse(allKey, (0.0, 0.0)) output.put(allKey, (all._1 + 1.0, all._2 + 1.0 * isPos)) val features : Array[(String, String)] = LinearRankerUtils.getFeatures(featureVector) if (newCrosses) { for (i <- features) { for (j <- features) { if (i._1 < j._1) { val key = ("%s<NEW>%s".format(i._1, j._1), "%s<NEW>%s".format(i._2, j._2)) val x = output.getOrElse(key, (0.0, 0.0)) output.put(key, (x._1 + 1.0, x._2 + 1.0 * isPos)) } } } } for (feature <- features) { val x = output.getOrElse(feature, (0.0, 0.0)) output.put(feature, (x._1 + 1.0, x._2 + 1.0 * isPos)) } }) output.iterator }) .reduceByKey((a, b) => (a._1 + b._1, a._2 + b._2)) .filter(x => x._2._2 >= minPosCount) val allCount = features.filter(x => x._1.equals(allKey)).take(1).head => { val prob = x._2._1 / allCount._2._1 val probPos = x._2._2 / allCount._2._2 (x._1, math.log(probPos / prob) / math.log(2.0)) }) } // Returns the maximum entropy per family def maxEntropy(input : RDD[((String, String), Double)]) : RDD[((String, String), Double)] = { input .map(x => (x._1._1, (x._1._2, x._2))) .reduceByKey((a, b) => if (math.abs(a._2) > math.abs(b._2)) a else b) .map(x => ((x._1, x._2._1), x._2._2)) } }
Source File: package.scala From kyuubi with Apache License 2.0 | 5 votes
package yaooqinn.kyuubi import scala.collection.mutable.HashMap import org.apache.hadoop.fs.permission.FsPermission package object yarn { type EnvMap = HashMap[String, String] val KYUUBI_YARN_APP_NAME = "KYUUBI SERVER" val KYUUBI_YARN_APP_TYPE = "KYUUBI" // Staging directory for any temporary jars or files val KYUUBI_STAGING: String = ".kyuubiStaging" // Staging directory is private! -> rwx-------- val STAGING_DIR_PERMISSION: FsPermission = FsPermission.createImmutable(Integer.parseInt("700", 8).toShort) // App files are world-wide readable and owner writable -> rw-r--r-- val APP_FILE_PERMISSION: FsPermission = FsPermission.createImmutable(Integer.parseInt("644", 8).toShort) val SPARK_CONF_DIR = "__spark_conf__" val SPARK_CONF_FILE = "" // Subdirectory in the conf directory containing Hadoop config files. val HADOOP_CONF_DIR = "__hadoop_conf__" // File containing the conf archive in the AM. See prepareLocalResources(). val SPARK_CONF_ARCHIVE: String = SPARK_CONF_DIR + ".zip" val SPARK_LIB_DIR = "__spark_libs__" val LOCAL_SCHEME = "local" }
Source File: KyuubiDistributedCacheManager.scala From kyuubi with Apache License 2.0 | 5 votes
package org.apache.spark.deploy.yarn import import scala.collection.mutable.{HashMap, Map} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} import org.apache.hadoop.yarn.api.records.{LocalResource, LocalResourceType} def addResource( fs: FileSystem, conf: Configuration, destPath: Path, localResources: HashMap[String, LocalResource], resourceType: LocalResourceType, link: String, statCache: Map[URI, FileStatus]): Unit = { cacheManager.addResource(fs, conf, destPath, localResources, resourceType, link, statCache, appMasterOnly = true) } }
Source File: KyuubiDistributedCacheManagerSuite.scala From kyuubi with Apache License 2.0 | 5 votes
package org.apache.spark.deploy.yarn import import scala.collection.mutable.{HashMap, Map} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} import org.apache.hadoop.yarn.api.records.{LocalResource, LocalResourceType, LocalResourceVisibility} import org.apache.hadoop.yarn.util.ConverterUtils import org.apache.spark.{KyuubiSparkUtil, SparkFunSuite} import org.mockito.Mockito.when import org.scalatest.mock.MockitoSugar import yaooqinn.kyuubi.utils.ReflectUtils class KyuubiDistributedCacheManagerSuite extends SparkFunSuite with MockitoSugar { class MockClientDistributedCacheManager extends ClientDistributedCacheManager { override def getVisibility(conf: Configuration, uri: URI, statCache: Map[URI, FileStatus]): LocalResourceVisibility = { LocalResourceVisibility.PRIVATE } } test("add resource") { val fs = mock[FileSystem] val conf = new Configuration() val destPath = new Path("file:///") val localResources = HashMap[String, LocalResource]() val statCache = HashMap[URI, FileStatus]() val status = new FileStatus() when(fs.getFileStatus(destPath)).thenReturn(status) val fileLink = "link" ReflectUtils.setFieldValue( KyuubiDistributedCacheManager, "cacheManager", new MockClientDistributedCacheManager) KyuubiDistributedCacheManager.addResource( fs, conf, destPath, localResources, LocalResourceType.FILE, fileLink, statCache) val res = localResources(fileLink) assert(res.getVisibility === LocalResourceVisibility.PRIVATE) assert(ConverterUtils.getPathFromYarnURL(res.getResource) === destPath) assert(res.getSize === 0) assert(res.getTimestamp === 0) assert(res.getType === LocalResourceType.FILE) val status2 = new FileStatus( 10, false, 1, 1024, 10, 10, null, KyuubiSparkUtil.getCurrentUserName, null, new Path("/tmp/testing2")) val destPath2 = new Path("file:///") when(fs.getFileStatus(destPath2)).thenReturn(status2) val fileLink2 = "link2" KyuubiDistributedCacheManager.addResource( fs, conf, destPath2, localResources, LocalResourceType.FILE, fileLink2, statCache) val res2 = localResources(fileLink2) assert(res2.getVisibility === LocalResourceVisibility.PRIVATE) assert(ConverterUtils.getPathFromYarnURL(res2.getResource) === destPath2) assert(res2.getSize === 10) assert(res2.getTimestamp === 10) assert(res2.getType === LocalResourceType.FILE) } test("add resource when link null") { val distMgr = new MockClientDistributedCacheManager() val fs = mock[FileSystem] val conf = new Configuration() val destPath = new Path("file:///") ReflectUtils.setFieldValue(KyuubiDistributedCacheManager, "cacheManager", distMgr) val localResources = HashMap[String, LocalResource]() val statCache = HashMap[URI, FileStatus]() when(fs.getFileStatus(destPath)).thenReturn(new FileStatus()) intercept[Exception] { KyuubiDistributedCacheManager.addResource( fs, conf, destPath, localResources, LocalResourceType.FILE, null, statCache) } assert(localResources.get("link") === None) assert(localResources.size === 0) } test("test addResource archive") { val distMgr = new MockClientDistributedCacheManager() ReflectUtils.setFieldValue(KyuubiDistributedCacheManager, "cacheManager", distMgr) val fs = mock[FileSystem] val conf = new Configuration() val destPath = new Path("file:///") val localResources = HashMap[String, LocalResource]() val statCache = HashMap[URI, FileStatus]() val realFileStatus = new FileStatus(10, false, 1, 1024, 10, 10, null, "testOwner", null, new Path("/tmp/testing")) when(fs.getFileStatus(destPath)).thenReturn(realFileStatus) KyuubiDistributedCacheManager.addResource( fs, conf, destPath, localResources, LocalResourceType.ARCHIVE, "link", statCache) val resource = localResources("link") assert(resource.getVisibility === LocalResourceVisibility.PRIVATE) assert(ConverterUtils.getPathFromYarnURL(resource.getResource) === destPath) assert(resource.getTimestamp === 10) assert(resource.getSize === 10) assert(resource.getType === LocalResourceType.ARCHIVE) } }
Source File: UIData.scala From spark1.52 with Apache License 2.0 | 5 votes
package import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable import scala.collection.mutable.HashMap private[spark] object UIData { class ExecutorSummary { var taskTime : Long = 0//任务时间 var failedTasks : Int = 0//失败任务数 var succeededTasks : Int = 0//完成任务数 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None,//提交时间 var completionTime: Option[Long] = None,//完成时间 var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) case class ExecutorUIData( val startTime: Long, var finishTime: Option[Long] = None, var finishReason: Option[String] = None) }
Source File: PoolTable.scala From spark1.52 with Apache License 2.0 | 5 votes
package import scala.collection.mutable.HashMap import scala.xml.Node import org.apache.spark.scheduler.{Schedulable, StageInfo} import org.apache.spark.ui.UIUtils private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) { private val listener = parent.progressListener def toNodeSeq: Seq[Node] = { listener.synchronized { poolTable(poolRow, pools) } } private def poolTable( makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node], rows: Seq[Schedulable]): Seq[Node] = { <table class="table table-bordered table-striped table-condensed sortable table-fixed"> <thead> <th>Pool Name</th> <th>Minimum Share</th> <th>Pool Weight</th> <th>Active Stages</th> <th>Running Tasks</th> <th>SchedulingMode</th> </thead> <tbody> { => makeRow(r, listener.poolToActiveStages))} </tbody> </table> } private def poolRow( p: Schedulable, poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = { val activeStages = poolToActiveStages.get( match { case Some(stages) => stages.size case None => 0 } val href = "%s/stages/pool?poolname=%s" .format(UIUtils.prependBaseUri(parent.basePath), <tr> <td> <a href={href}>{}</a> </td> <td>{p.minShare}</td> <td>{p.weight}</td> <td>{activeStages}</td> <td>{p.runningTasks}</td> <td>{p.schedulingMode}</td> </tr> } }
Source File: StageInfo.scala From spark1.52 with Apache License 2.0 | 5 votes
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import def fromStage( stage: Stage, attemptId: Int, //None被声明为一个对象,而不是一个类,在没有值的时候,使用None,如果有值可以引用,就使用Some来包含这个值,都是Option的子类 numTasks: Option[Int] = None, taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty ): StageInfo = { //方法获取RDD的所有直接或间接的NarrowDependency的RDD //RDDInfo.fromRdd创建RDDInfo信息,包括RDD父依赖关系 val ancestorRddInfos = //对当前stage的RDD也生成RDDInfo,然后所有生成的RDDInfo合并到rddInfos val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo(, attemptId,, numTasks.getOrElse(stage.numTasks), rddInfos,, stage.details, taskLocalityPreferences) } }
Source File: GroupedSumEvaluator.scala From spark1.52 with Apache License 2.0 | 5 votes
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions.mapAsScalaMap import scala.collection.Map import scala.collection.mutable.HashMap import org.apache.spark.util.StatCounter private[spark] class GroupedSumEvaluator[T](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new JHashMap[T, StatCounter] // Sum of counts for each key override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) { outputsMerged += 1 val iter = taskResult.entrySet.iterator() while (iter.hasNext) { val entry = val old = sums.get(entry.getKey) if (old != null) { old.merge(entry.getValue) } else { sums.put(entry.getKey, entry.getValue) } } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = val sum = entry.getValue.sum result(entry.getKey) = new BoundedDouble(sum, 1.0, sum, sum) } result } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs val studentTCacher = new StudentTCacher(confidence) val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = val counter = entry.getValue val meanEstimate = counter.mean val meanVar = counter.sampleVariance / counter.count val countEstimate = (counter.count + 1 - p) / p val countVar = (counter.count + 1) * (1 - p) / (p * p) val sumEstimate = meanEstimate * countEstimate val sumVar = (meanEstimate * meanEstimate * countVar) + (countEstimate * countEstimate * meanVar) + (meanVar * countVar) val sumStdev = math.sqrt(sumVar) val confFactor = studentTCacher.get(counter.count) val low = sumEstimate - confFactor * sumStdev val high = sumEstimate + confFactor * sumStdev result(entry.getKey) = new BoundedDouble(sumEstimate, confidence, low, high) } result } } }
Source File: GroupedCountEvaluator.scala From spark1.52 with Apache License 2.0 | 5 votes
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions.mapAsScalaMap import scala.collection.Map import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.commons.math3.distribution.NormalDistribution import org.apache.spark.util.collection.OpenHashMap private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new OpenHashMap[T, Long]() // Sum of counts for each key override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]) { outputsMerged += 1 taskResult.foreach { case (key, value) => sums.changeValue(key, value, _ + value) } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) sums.foreach { case (key, sum) => result(key) = new BoundedDouble(sum, 1.0, sum, sum) } result } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs val confFactor = new NormalDistribution(). inverseCumulativeProbability(1 - (1 - confidence) / 2) val result = new JHashMap[T, BoundedDouble](sums.size) sums.foreach { case (key, sum) => val mean = (sum + 1 - p) / p val variance = (sum + 1) * (1 - p) / (p * p) val stdev = math.sqrt(variance) val low = mean - confFactor * stdev val high = mean + confFactor * stdev result(key) = new BoundedDouble(mean, confidence, low, high) } result } } }
Source File: GroupedMeanEvaluator.scala From spark1.52 with Apache License 2.0 | 5 votes
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions.mapAsScalaMap import scala.collection.Map import scala.collection.mutable.HashMap import org.apache.spark.util.StatCounter private[spark] class GroupedMeanEvaluator[T](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new JHashMap[T, StatCounter] // Sum of counts for each key override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) { outputsMerged += 1 val iter = taskResult.entrySet.iterator() while (iter.hasNext) { val entry = val old = sums.get(entry.getKey) if (old != null) { old.merge(entry.getValue) } else { sums.put(entry.getKey, entry.getValue) } } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = val mean = entry.getValue.mean result(entry.getKey) = new BoundedDouble(mean, 1.0, mean, mean) } result } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val studentTCacher = new StudentTCacher(confidence) val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = val counter = entry.getValue val mean = counter.mean val stdev = math.sqrt(counter.sampleVariance / counter.count) val confFactor = studentTCacher.get(counter.count) val low = mean - confFactor * stdev val high = mean + confFactor * stdev result(entry.getKey) = new BoundedDouble(mean, confidence, low, high) } result } } }
Source File: Mapper.scala From CSYE7200 with MIT License | 5 votes
package edu.neu.coe.csye7200.mapreduce import{Actor, ActorLogging, ActorRef} import scala.collection.mutable import scala.collection.mutable.HashMap import scala.util._ class Mapper_Forgiving[K1,V1,K2,V2](f: (K1,V1)=>(K2,V2)) extends Mapper[K1,V1,K2,V2](f) { override def prepareReply(v2k2ts: Seq[Try[(K2,V2)]]): (Map[K2, Seq[V2]], Seq[Throwable]) = { val v2sK2m = mutable.HashMap[K2,Seq[V2]]() // mutable val xs = Seq[Throwable]() // mutable // CONSIDER using traverse for (v2k2t <- v2k2ts; v2k2e = Master.sequence(v2k2t)) v2k2e match { case Right((k2,v2)) => v2sK2m put(k2, v2+:v2sK2m.getOrElse(k2, Nil)) case Left(x) => xs :+ x } (v2sK2m.toMap, xs) } } case class Incoming[K, V](m: Seq[(K,V)]) { override def toString = s"Incoming: with ${m.size} elements" } object Incoming { def sequence[K,V](vs: Seq[V]): Incoming[K,V] = Incoming((vs zip Stream.continually(null.asInstanceOf[K])).map{_.swap}) def map[K, V](vKm: Map[K,V]): Incoming[K,V] = Incoming(vKm.toSeq) } object Mapper { }
Source File: LocalKMeans.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet import breeze.linalg.{squaredDistance, DenseVector, Vector} object LocalKMeans { val N = 1000 val R = 1000 // Scaling factor val D = 10 val K = 10 val convergeDist = 0.001 val rand = new Random(42) def generateData: Array[DenseVector[Double]] = { def generatePoint(i: Int): DenseVector[Double] = { DenseVector.fill(D) {rand.nextDouble * R} } Array.tabulate(N)(generatePoint) } def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = { var bestIndex = 0 var closest = Double.PositiveInfinity for (i <- 1 to centers.size) { val vCurr = centers(i) val tempDist = squaredDistance(p, vCurr) if (tempDist < closest) { closest = tempDist bestIndex = i } } bestIndex } def showWarning() { System.err.println( """WARN: This is a naive implementation of KMeans Clustering and is given as an example! |Please use |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val data = generateData val points = new HashSet[Vector[Double]] val kPoints = new HashMap[Int, Vector[Double]] var tempDist = 1.0 while (points.size < K) { points.add(data(rand.nextInt(N))) } val iter = points.iterator for (i <- 1 to points.size) { kPoints.put(i, } println(s"Initial centers: $kPoints") while(tempDist > convergeDist) { val closest = (p => (closestPoint(p, kPoints), (p, 1))) val mappings = closest.groupBy[Int] (x => x._1) val pointStats = { pair => pair._2.reduceLeft [(Int, (Vector[Double], Int))] { case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2)) } } var newPoints = {mapping => (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))} tempDist = 0.0 for (mapping <- newPoints) { tempDist += squaredDistance(kPoints(mapping._1), mapping._2) } for (newP <- newPoints) { kPoints.put(newP._1, newP._2) } } println(s"Final centers: $kPoints") } } // scalastyle:on println
Source File: LocalityPlacementStrategySuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes
package org.apache.spark.deploy.yarn import scala.collection.JavaConverters._ import scala.collection.mutable.{HashMap, HashSet, Set} import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.conf.YarnConfiguration import org.mockito.Mockito._ import org.apache.spark.{SparkConf, SparkFunSuite} class LocalityPlacementStrategySuite extends SparkFunSuite { test("handle large number of containers and tasks (SPARK-18750)") { // Run the test in a thread with a small stack size, since the original issue // surfaced as a StackOverflowError. var error: Throwable = null val runnable = new Runnable() { override def run(): Unit = try { runTest() } catch { case e: Throwable => error = e } } val thread = new Thread(new ThreadGroup("test"), runnable, "test-thread", 32 * 1024) thread.start() thread.join() assert(error === null) } private def runTest(): Unit = { val yarnConf = new YarnConfiguration() // The numbers below have been chosen to balance being large enough to replicate the // original issue while not taking too long to run when the issue is fixed. The main // goal is to create enough requests for localized containers (so there should be many // tasks on several hosts that have no allocated containers). val resource = Resource.newInstance(8 * 1024, 4) val strategy = new LocalityPreferredContainerPlacementStrategy(new SparkConf(), yarnConf, resource, new MockResolver()) val totalTasks = 32 * 1024 val totalContainers = totalTasks / 16 val totalHosts = totalContainers / 16 val mockId = mock(classOf[ContainerId]) val hosts = (1 to totalHosts).map { i => (s"host_$i", totalTasks % i) }.toMap val containers = (1 to totalContainers).map { i => mockId } val count = containers.size / hosts.size / 2 val hostToContainerMap = new HashMap[String, Set[ContainerId]]() hosts.keys.take(hosts.size / 2).zipWithIndex.foreach { case (host, i) => val hostContainers = new HashSet[ContainerId]() containers.drop(count * i).take(i).foreach { c => hostContainers += c } hostToContainerMap(host) = hostContainers } strategy.localityOfRequestedContainers(containers.size * 2, totalTasks, hosts, hostToContainerMap, Nil) } }
Source File: JsonUtils.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes
package org.apache.spark.sql.kafka010 import scala.collection.mutable.HashMap import scala.util.control.NonFatal import org.apache.kafka.common.TopicPartition import org.json4s.NoTypeHints import org.json4s.jackson.Serialization def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = { val result = new HashMap[String, HashMap[Int, Long]]() implicit val ordering = new Ordering[TopicPartition] { override def compare(x: TopicPartition, y: TopicPartition): Int = { Ordering.Tuple2[String, Int].compare((x.topic, x.partition), (y.topic, y.partition)) } } val partitions = partitionOffsets.keySet.toSeq.sorted // sort for more determinism partitions.foreach { tp => val off = partitionOffsets(tp) val parts = result.getOrElse(tp.topic, new HashMap[Int, Long]) parts += tp.partition -> off result += tp.topic -> parts } Serialization.write(result) } }
Source File: StageInfo.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import org.apache.spark.executor.TaskMetrics import def fromStage( stage: Stage, attemptId: Int, numTasks: Option[Int] = None, taskMetrics: TaskMetrics = null, taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty ): StageInfo = { val ancestorRddInfos = val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo(, attemptId,, numTasks.getOrElse(stage.numTasks), rddInfos,, stage.details, taskMetrics, taskLocalityPreferences) } }
Source File: GroupedCountEvaluator.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes
package org.apache.spark.partial import scala.collection.Map import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.spark.util.collection.OpenHashMap private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] { private var outputsMerged = 0 private val sums = new OpenHashMap[T, Long]() // Sum of counts for each key override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]): Unit = { outputsMerged += 1 taskResult.foreach { case (key, value) => sums.changeValue(key, value, _ + value) } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { { case (key, sum) => (key, new BoundedDouble(sum, 1.0, sum, sum)) }.toMap } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs { case (key, sum) => (key, CountEvaluator.bound(confidence, sum, p)) }.toMap } } }
Source File: MasterWebUISuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes
package org.apache.spark.deploy.master.ui import import{HttpURLConnection, URL} import java.nio.charset.StandardCharsets import java.util.Date import scala.collection.mutable.HashMap import org.mockito.Mockito.{mock, times, verify, when} import org.scalatest.BeforeAndAfterAll import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} import org.apache.spark.deploy.DeployMessages.{KillDriverResponse, RequestKillDriver} import org.apache.spark.deploy.DeployTestUtils._ import org.apache.spark.deploy.master._ import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv} class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll { val conf = new SparkConf val securityMgr = new SecurityManager(conf) val rpcEnv = mock(classOf[RpcEnv]) val master = mock(classOf[Master]) val masterEndpointRef = mock(classOf[RpcEndpointRef]) when(master.securityMgr).thenReturn(securityMgr) when(master.conf).thenReturn(conf) when(master.rpcEnv).thenReturn(rpcEnv) when(master.self).thenReturn(masterEndpointRef) val masterWebUI = new MasterWebUI(master, 0) override def beforeAll() { super.beforeAll() masterWebUI.bind() } override def afterAll() { masterWebUI.stop() super.afterAll() } test("kill application") { val appDesc = createAppDesc() // use new start date so it isn't filtered by UI val activeApp = new ApplicationInfo( new Date().getTime, "app-0", appDesc, new Date(), null, Int.MaxValue) when(master.idToApp).thenReturn(HashMap[String, ApplicationInfo]((, activeApp))) val url = s"http://localhost:${masterWebUI.boundPort}/app/kill/" val body = convPostDataToString(Map(("id",, ("terminate", "true"))) val conn = sendHttpRequest(url, "POST", body) conn.getResponseCode // Verify the master was called to remove the active app verify(master, times(1)).removeApplication(activeApp, ApplicationState.KILLED) } test("kill driver") { val activeDriverId = "driver-0" val url = s"http://localhost:${masterWebUI.boundPort}/driver/kill/" val body = convPostDataToString(Map(("id", activeDriverId), ("terminate", "true"))) val conn = sendHttpRequest(url, "POST", body) conn.getResponseCode // Verify that master was asked to kill driver with the correct id verify(masterEndpointRef, times(1)).ask[KillDriverResponse](RequestKillDriver(activeDriverId)) } private def convPostDataToString(data: Map[String, String]): String = { (for ((name, value) <- data) yield s"$name=$value").mkString("&") } private def sendHttpRequest( url: String, method: String, body: String = ""): HttpURLConnection = { val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection] conn.setRequestMethod(method) if (body.nonEmpty) { conn.setDoOutput(true) conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded") conn.setRequestProperty("Content-Length", Integer.toString(body.length)) val out = new DataOutputStream(conn.getOutputStream) out.write(body.getBytes(StandardCharsets.UTF_8)) out.close() } conn } }
Source File: TaskDescriptionSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes
package org.apache.spark.scheduler import{ByteArrayOutputStream, DataOutputStream, UTFDataFormatException} import java.nio.ByteBuffer import java.util.Properties import scala.collection.mutable.HashMap import org.apache.spark.SparkFunSuite class TaskDescriptionSuite extends SparkFunSuite { test("encoding and then decoding a TaskDescription results in the same TaskDescription") { val originalFiles = new HashMap[String, Long]() originalFiles.put("fileUrl1", 1824) originalFiles.put("fileUrl2", 2) val originalJars = new HashMap[String, Long]() originalJars.put("jar1", 3) val originalProperties = new Properties() originalProperties.put("property1", "18") originalProperties.put("property2", "test value") // SPARK-19796 -- large property values (like a large job description for a long sql query) // can cause problems for DataOutputStream, make sure we handle correctly val sb = new StringBuilder() (0 to 10000).foreach(_ => sb.append("1234567890")) val largeString = sb.toString() originalProperties.put("property3", largeString) // make sure we've got a good test case intercept[UTFDataFormatException] { val out = new DataOutputStream(new ByteArrayOutputStream()) try { out.writeUTF(largeString) } finally { out.close() } } // Create a dummy byte buffer for the task. val taskBuffer = ByteBuffer.wrap(Array[Byte](1, 2, 3, 4)) val originalTaskDescription = new TaskDescription( taskId = 1520589, attemptNumber = 2, executorId = "testExecutor", name = "task for test", index = 19, originalFiles, originalJars, originalProperties, taskBuffer ) val serializedTaskDescription = TaskDescription.encode(originalTaskDescription) val decodedTaskDescription = TaskDescription.decode(serializedTaskDescription) // Make sure that all of the fields in the decoded task description match the original. assert(decodedTaskDescription.taskId === originalTaskDescription.taskId) assert(decodedTaskDescription.attemptNumber === originalTaskDescription.attemptNumber) assert(decodedTaskDescription.executorId === originalTaskDescription.executorId) assert( === assert(decodedTaskDescription.index === originalTaskDescription.index) assert(decodedTaskDescription.addedFiles.equals(originalFiles)) assert(decodedTaskDescription.addedJars.equals(originalJars)) assert( assert(decodedTaskDescription.serializedTask.equals(taskBuffer)) } }
Source File: KinesisSourceOffset.scala From kinesis-sql with Apache License 2.0 | 5 votes
package org.apache.spark.sql.kinesis import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import scala.collection.mutable.HashMap import scala.util.control.NonFatal import org.apache.spark.sql.execution.streaming.Offset import org.apache.spark.sql.execution.streaming.SerializedOffset import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, PartitionOffset} def apply(json: String): KinesisSourceOffset = { try { val readObj =[ Map[ String, Map[ String, String ] ] ](json) val metadata = readObj.get("metadata") val shardInfoMap: Map[String, ShardInfo ] = readObj.filter(_._1 != "metadata").map { case (shardId, value) => shardId.toString -> new ShardInfo(shardId.toString, value.get("iteratorType").get, value.get("iteratorPosition").get) }.toMap KinesisSourceOffset( new ShardOffsets( metadata.get("batchId").toLong, metadata.get("streamName"), shardInfoMap)) } catch { case NonFatal(x) => throw new IllegalArgumentException(x) } } def getMap(shardInfos: Array[ShardInfo]): Map[String, ShardInfo] = { { s: ShardInfo => (s.shardId -> s) }.toMap } }
Source File: LocalKMeans.scala From BigDatalog with Apache License 2.0 | 5 votes
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet import breeze.linalg.{Vector, DenseVector, squaredDistance} import org.apache.spark.SparkContext._ object LocalKMeans { val N = 1000 val R = 1000 // Scaling factor val D = 10 val K = 10 val convergeDist = 0.001 val rand = new Random(42) def generateData: Array[DenseVector[Double]] = { def generatePoint(i: Int): DenseVector[Double] = { DenseVector.fill(D){rand.nextDouble * R} } Array.tabulate(N)(generatePoint) } def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = { var index = 0 var bestIndex = 0 var closest = Double.PositiveInfinity for (i <- 1 to centers.size) { val vCurr = centers.get(i).get val tempDist = squaredDistance(p, vCurr) if (tempDist < closest) { closest = tempDist bestIndex = i } } bestIndex } def showWarning() { System.err.println( """WARN: This is a naive implementation of KMeans Clustering and is given as an example! |Please use the KMeans method found in org.apache.spark.mllib.clustering |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val data = generateData var points = new HashSet[Vector[Double]] var kPoints = new HashMap[Int, Vector[Double]] var tempDist = 1.0 while (points.size < K) { points.add(data(rand.nextInt(N))) } val iter = points.iterator for (i <- 1 to points.size) { kPoints.put(i, } println("Initial centers: " + kPoints) while(tempDist > convergeDist) { var closest = (p => (closestPoint(p, kPoints), (p, 1))) var mappings = closest.groupBy[Int] (x => x._1) var pointStats = { pair => pair._2.reduceLeft [(Int, (Vector[Double], Int))] { case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2)) } } var newPoints = {mapping => (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))} tempDist = 0.0 for (mapping <- newPoints) { tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2) } for (newP <- newPoints) { kPoints.put(newP._1, newP._2) } } println("Final centers: " + kPoints) } } // scalastyle:on println
Source File: RecursivePlanDetails.scala From BigDatalog with Apache License 2.0 | 5 votes
package edu.ucla.cs.wis.bigdatalog.spark import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import scala.collection.mutable.HashMap class RecursivePlanDetails extends Serializable { private val baseRelationsByName = new HashMap[String, LogicalPlan] val recursiveRelations = new HashMap[String, LogicalPlan] val aggregateRelations = new HashMap[String, LogicalPlan] def addBaseRelation(name: String, obj: LogicalPlan) = { baseRelationsByName.put(name, obj) } def containsBaseRelation(name: String): Boolean = { baseRelationsByName.contains(name) } }
Source File: CachedRDDManager.scala From BigDatalog with Apache License 2.0 | 5 votes
package edu.ucla.cs.wis.bigdatalog.spark.execution.recursion import org.apache.spark.Logging import org.apache.spark.rdd.RDD import import scala.collection.mutable.{HashMap, HashSet, Set} class CachedRDDManager(defaultStorageLevel: StorageLevel) extends Logging with Serializable { val iterationToRDDMap = new HashMap[Int, HashSet[RDD[_]]] var currentIteration : Int = 0 def persist(rdd: RDD[_]): Unit = { persist(rdd, false) } def persist(rdd: RDD[_], doMemoryCheckpoint: Boolean): Unit = { iterationToRDDMap.getOrElseUpdate(currentIteration, new HashSet[RDD[_]]).add(rdd) rdd.persist(defaultStorageLevel) if (doMemoryCheckpoint) rdd.memoryCheckpoint() } def cleanUpIteration(iterationsBackToRemove: Int = 2) = { val start = System.currentTimeMillis() if (currentIteration >= iterationsBackToRemove) { val iterationId = currentIteration - iterationsBackToRemove if (iterationToRDDMap.contains(iterationId)) { val rdds: HashSet[RDD[_]] = iterationToRDDMap.remove(iterationId).get if (rdds.nonEmpty) logInfo("Unpersisting "+rdds.size+" rdds for iteration " + iterationId) rdds.foreach(rdd => rdd.unpersist(false)) } } logInfo("CleanUpIteration took " + (System.currentTimeMillis() - start) + " ms") currentIteration += 1 } def cleanUpIterationById(iterationId: Int) = { if (iterationToRDDMap.contains(iterationId)) { val rdds: HashSet[RDD[_]] = iterationToRDDMap.remove(iterationId).get rdds.foreach(rdd => rdd.unpersist(false)) } } def incrementIteration() { currentIteration += 1} def clear() = { iterationToRDDMap.clear() } def clear(remainCached: Seq[RDD[_]]) = { iterationToRDDMap.keySet.foreach(key => logInfo("key: " + key + " value: " + iterationToRDDMap.get(key))) iterationToRDDMap.keySet .foreach(key => iterationToRDDMap.get(key) .foreach(value => value.foreach(item => {if (!remainCached.contains(item)) item.unpersist(false)}))) iterationToRDDMap.clear() } def unpersist(rdds: Set[RDD[_]]) = { for (rdd <- rdds) { iterationToRDDMap.synchronized { // rdd should only be in 1 iteration val iterations = iterationToRDDMap.filter(x => x._2.contains(rdd)) if (iterations.nonEmpty) { val iteration = iterations.head iteration._2.remove(rdd) rdd.unpersist(false) if (iteration._2.isEmpty) iterationToRDDMap.remove(iteration._1) } } } } override def toString = { val output = new StringBuilder iterationToRDDMap.keySet.toSeq.sorted .foreach(iteration => { val rdds = iterationToRDDMap.get(iteration) rdds.foreach(rdd => output.append(iteration + ":" + rdd + "\n")) }) output.toString() } }
Source File: RelationCatalog.scala From BigDatalog with Apache License 2.0 | 5 votes
package edu.ucla.cs.wis.bigdatalog.spark import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.types.StructType import scala.collection.mutable.HashMap class RelationCatalog extends Serializable { val directory = HashMap.empty[String, RelationInfo] def addRelation(name : String, schema : StructType) : Unit = { val relationInfo = new RelationInfo().setSchema(schema) directory.get(name) match { case Some(oldRelationInfo) => // update rdd if already present. Schema should not change oldRelationInfo.setRDD(relationInfo.getRDD()) case None => directory.put(name, relationInfo) } } def setRDD(name : String, rdd : RDD[InternalRow]) : Unit = { directory.get(name) match { case Some(oldRelationInfo) => oldRelationInfo.setRDD(rdd) case None => directory.put(name, new RelationInfo().setRDD(rdd)) } } def getRelationInfo(name : String) : RelationInfo = { if (directory.contains(name)) directory(name) else null } def removeRDD(name : String) : Unit = { directory.remove(name) } def clear() : Unit = { directory.clear() } override def toString(): String = { val output = new StringBuilder() directory.iterator.foreach(f => output.append(f.toString())) output.toString() } } class RelationInfo() extends Serializable { private var schema : StructType = _ private var rdd : RDD[InternalRow] = _ def getSchema() : StructType = schema def setSchema(schema : StructType) : RelationInfo = { this.schema = schema this } def getRDD() : RDD[InternalRow] = rdd def setRDD(rdd : RDD[InternalRow]) : RelationInfo = { this.rdd = rdd this } override def toString() : String = { "schema: " + this.schema + (if (rdd != null) " RDD") } }
Source File: UIData.scala From BigDatalog with Apache License 2.0 | 5 votes
package import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable import scala.collection.mutable.HashMap private[spark] object UIData { class ExecutorSummary { var taskTime : Long = 0 var failedTasks : Int = 0 var succeededTasks : Int = 0 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None, var completionTime: Option[Long] = None, var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) case class ExecutorUIData( val startTime: Long, var finishTime: Option[Long] = None, var finishReason: Option[String] = None) }
Source File: PoolTable.scala From BigDatalog with Apache License 2.0 | 5 votes
package import import scala.collection.mutable.HashMap import scala.xml.Node import org.apache.spark.scheduler.{Schedulable, StageInfo} import org.apache.spark.ui.UIUtils private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) { private val listener = parent.progressListener def toNodeSeq: Seq[Node] = { listener.synchronized { poolTable(poolRow, pools) } } private def poolTable( makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node], rows: Seq[Schedulable]): Seq[Node] = { <table class="table table-bordered table-striped table-condensed sortable table-fixed"> <thead> <th>Pool Name</th> <th>Minimum Share</th> <th>Pool Weight</th> <th>Active Stages</th> <th>Running Tasks</th> <th>SchedulingMode</th> </thead> <tbody> { => makeRow(r, listener.poolToActiveStages))} </tbody> </table> } private def poolRow( p: Schedulable, poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = { val activeStages = poolToActiveStages.get( match { case Some(stages) => stages.size case None => 0 } val href = "%s/stages/pool?poolname=%s" .format(UIUtils.prependBaseUri(parent.basePath), URLEncoder.encode(, "UTF-8")) <tr> <td> <a href={href}>{}</a> </td> <td>{p.minShare}</td> <td>{p.weight}</td> <td>{activeStages}</td> <td>{p.runningTasks}</td> <td>{p.schedulingMode}</td> </tr> } }
Source File: StageInfo.scala From BigDatalog with Apache License 2.0 | 5 votes
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import def fromStage( stage: Stage, attemptId: Int, numTasks: Option[Int] = None, taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty ): StageInfo = { val ancestorRddInfos = val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo(, attemptId,, numTasks.getOrElse(stage.numTasks), rddInfos,, stage.details, taskLocalityPreferences) } }
Source File: FixedPointJobDefinition.scala From BigDatalog with Apache License 2.0 | 5 votes
package org.apache.spark.scheduler.fixedpoint import org.apache.spark.TaskContext import org.apache.spark.rdd.RDD import scala.collection.mutable.{HashSet, HashMap, Set} class FixedPointJobDefinition(val setupIteration: (FixedPointJobDefinition, RDD[_]) => RDD[_], val cleanupIteration: (Int) => Unit) { var _fixedPointEvaluator: (TaskContext, Iterator[_]) => Boolean = null var finalRDD: RDD[_] = null var rddIds = Array.empty[Int] // for all and delta rdd id for FixedPointResultTask execution on worker def fixedPointEvaluator(fixedPointEvaluator: (TaskContext, Iterator[_]) => Boolean) = { _fixedPointEvaluator = fixedPointEvaluator } def getfixedPointEvaluator = _fixedPointEvaluator.asInstanceOf[(TaskContext, Iterator[_]) => _] def getFinalRDD: RDD[_] = finalRDD def setRDDIds(newAllRDDId: Int, oldAllRDDId: Int, newDeltaPrimeRDDId: Int, oldDeltaPrimeRDDId: Int): Unit = { rddIds = Array(newAllRDDId, oldAllRDDId, newDeltaPrimeRDDId, oldDeltaPrimeRDDId) } }
Source File: GroupedSumEvaluator.scala From BigDatalog with Apache License 2.0 | 5 votes
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConverters._ import scala.collection.Map import scala.collection.mutable.HashMap import org.apache.spark.util.StatCounter private[spark] class GroupedSumEvaluator[T](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new JHashMap[T, StatCounter] // Sum of counts for each key override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) { outputsMerged += 1 val iter = taskResult.entrySet.iterator() while (iter.hasNext) { val entry = val old = sums.get(entry.getKey) if (old != null) { old.merge(entry.getValue) } else { sums.put(entry.getKey, entry.getValue) } } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = val sum = entry.getValue.sum result.put(entry.getKey, new BoundedDouble(sum, 1.0, sum, sum)) } result.asScala } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs val studentTCacher = new StudentTCacher(confidence) val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = val counter = entry.getValue val meanEstimate = counter.mean val meanVar = counter.sampleVariance / counter.count val countEstimate = (counter.count + 1 - p) / p val countVar = (counter.count + 1) * (1 - p) / (p * p) val sumEstimate = meanEstimate * countEstimate val sumVar = (meanEstimate * meanEstimate * countVar) + (countEstimate * countEstimate * meanVar) + (meanVar * countVar) val sumStdev = math.sqrt(sumVar) val confFactor = studentTCacher.get(counter.count) val low = sumEstimate - confFactor * sumStdev val high = sumEstimate + confFactor * sumStdev result.put(entry.getKey, new BoundedDouble(sumEstimate, confidence, low, high)) } result.asScala } } }
Source File: GroupedCountEvaluator.scala From BigDatalog with Apache License 2.0 | 5 votes
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConverters._ import scala.collection.Map import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.commons.math3.distribution.NormalDistribution import org.apache.spark.util.collection.OpenHashMap private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new OpenHashMap[T, Long]() // Sum of counts for each key override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]) { outputsMerged += 1 taskResult.foreach { case (key, value) => sums.changeValue(key, value, _ + value) } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) sums.foreach { case (key, sum) => result.put(key, new BoundedDouble(sum, 1.0, sum, sum)) } result.asScala } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs val confFactor = new NormalDistribution(). inverseCumulativeProbability(1 - (1 - confidence) / 2) val result = new JHashMap[T, BoundedDouble](sums.size) sums.foreach { case (key, sum) => val mean = (sum + 1 - p) / p val variance = (sum + 1) * (1 - p) / (p * p) val stdev = math.sqrt(variance) val low = mean - confFactor * stdev val high = mean + confFactor * stdev result.put(key, new BoundedDouble(mean, confidence, low, high)) } result.asScala } } }
Source File: GroupedMeanEvaluator.scala From BigDatalog with Apache License 2.0 | 5 votes
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConverters._ import scala.collection.Map import scala.collection.mutable.HashMap import org.apache.spark.util.StatCounter private[spark] class GroupedMeanEvaluator[T](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new JHashMap[T, StatCounter] // Sum of counts for each key override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) { outputsMerged += 1 val iter = taskResult.entrySet.iterator() while (iter.hasNext) { val entry = val old = sums.get(entry.getKey) if (old != null) { old.merge(entry.getValue) } else { sums.put(entry.getKey, entry.getValue) } } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = val mean = entry.getValue.mean result.put(entry.getKey, new BoundedDouble(mean, 1.0, mean, mean)) } result.asScala } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val studentTCacher = new StudentTCacher(confidence) val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = val counter = entry.getValue val mean = counter.mean val stdev = math.sqrt(counter.sampleVariance / counter.count) val confFactor = studentTCacher.get(counter.count) val low = mean - confFactor * stdev val high = mean + confFactor * stdev result.put(entry.getKey, new BoundedDouble(mean, confidence, low, high)) } result.asScala } } }
Source File: TaskContextImpl.scala From BigDatalog with Apache License 2.0 | 5 votes
package org.apache.spark import scala.collection.mutable.{ArrayBuffer, HashMap} import org.apache.spark.executor.TaskMetrics import org.apache.spark.memory.TaskMemoryManager import org.apache.spark.metrics.MetricsSystem import org.apache.spark.metrics.source.Source import org.apache.spark.util.{TaskCompletionListener, TaskCompletionListenerException} private[spark] class TaskContextImpl( val stageId: Int, val partitionId: Int, override val taskAttemptId: Long, override val attemptNumber: Int, override val taskMemoryManager: TaskMemoryManager, @transient private val metricsSystem: MetricsSystem, internalAccumulators: Seq[Accumulator[Long]], val runningLocally: Boolean = false, val taskMetrics: TaskMetrics = TaskMetrics.empty) extends TaskContext with Logging { // For backwards-compatibility; this method is now deprecated as of 1.3.0. override def attemptId(): Long = taskAttemptId // List of callback functions to execute when the task completes. @transient private val onCompleteCallbacks = new ArrayBuffer[TaskCompletionListener] // Whether the corresponding task has been killed. @volatile private var interrupted: Boolean = false // Whether the task has completed. @volatile private var completed: Boolean = false override def addTaskCompletionListener(listener: TaskCompletionListener): this.type = { onCompleteCallbacks += listener this } override def addTaskCompletionListener(f: TaskContext => Unit): this.type = { onCompleteCallbacks += new TaskCompletionListener { override def onTaskCompletion(context: TaskContext): Unit = f(context) } this } @deprecated("use addTaskCompletionListener", "1.1.0") override def addOnCompleteCallback(f: () => Unit) { onCompleteCallbacks += new TaskCompletionListener { override def onTaskCompletion(context: TaskContext): Unit = f() } } private[spark] def markInterrupted(): Unit = { interrupted = true } override def isCompleted(): Boolean = completed override def isRunningLocally(): Boolean = runningLocally override def isInterrupted(): Boolean = interrupted override def getMetricsSources(sourceName: String): Seq[Source] = metricsSystem.getSourcesByName(sourceName) @transient private val accumulators = new HashMap[Long, Accumulable[_, _]] private[spark] override def registerAccumulator(a: Accumulable[_, _]): Unit = synchronized { accumulators( = a } private[spark] override def collectInternalAccumulators(): Map[Long, Any] = synchronized { accumulators.filter(_._2.isInternal).mapValues(_.localValue).toMap } private[spark] override def collectAccumulators(): Map[Long, Any] = synchronized { accumulators.mapValues(_.localValue).toMap } //private[spark] override val internalMetricsToAccumulators: Map[String, Accumulator[Long]] = { // Explicitly register internal accumulators here because these are // not captured in the task closure and are already deserialized internalAccumulators.foreach(registerAccumulator) { a => (, a) }.toMap } }
Source File: LocalKMeans.scala From learning-spark with Apache License 2.0 | 5 votes
package org.apache.spark.examples import java.util.Random import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet import breeze.linalg.{Vector, DenseVector, squaredDistance} import org.apache.spark.SparkContext._ object LocalKMeans { val N = 1000 val R = 1000 // Scaling factor val D = 10 val K = 10 val convergeDist = 0.001 val rand = new Random(42) def generateData = { def generatePoint(i: Int) = { DenseVector.fill(D){rand.nextDouble * R} } Array.tabulate(N)(generatePoint) } def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = { var index = 0 var bestIndex = 0 var closest = Double.PositiveInfinity for (i <- 1 to centers.size) { val vCurr = centers.get(i).get val tempDist = squaredDistance(p, vCurr) if (tempDist < closest) { closest = tempDist bestIndex = i } } bestIndex } def showWarning() { System.err.println( """WARN: This is a naive implementation of KMeans Clustering and is given as an example! |Please use the KMeans method found in org.apache.spark.mllib.clustering |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val data = generateData var points = new HashSet[Vector[Double]] var kPoints = new HashMap[Int, Vector[Double]] var tempDist = 1.0 while (points.size < K) { points.add(data(rand.nextInt(N))) } val iter = points.iterator for (i <- 1 to points.size) { kPoints.put(i, } println("Initial centers: " + kPoints) while(tempDist > convergeDist) { var closest = (p => (closestPoint(p, kPoints), (p, 1))) var mappings = closest.groupBy[Int] (x => x._1) var pointStats = { pair => pair._2.reduceLeft [(Int, (Vector[Double], Int))] { case ((id1, (x1, y1)), (id2, (x2, y2))) => (id1, (x1 + x2, y1 + y2)) } } var newPoints = {mapping => (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))} tempDist = 0.0 for (mapping <- newPoints) { tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2) } for (newP <- newPoints) { kPoints.put(newP._1, newP._2) } } println("Final centers: " + kPoints) } }
Source File: CollectionExample.scala From Scala-and-Spark-for-Big-Data-Analytics with MIT License | 5 votes
package com.chapter3.ScalaFP import scala.collection._ import scala.collection.mutable.Buffer import scala.collection.mutable.HashMap object CollectionExample { def main(args: Array[String]) { val x = 10 val y = 15 val z = 19 Traversable(1, 2, 3) Iterable("x", "y", "z") Map("x" -> 10, "y" -> 13, "z" -> 17) Set("Red", "Green", "Blue") SortedSet("Hello,", "world!") Buffer(x, y, z) IndexedSeq(0.0, 1.0, 2.0) LinearSeq(x, y, z) List(2, 6, 10) HashMap("x" -> 20, "y" -> 19, "z" -> 16) val list = List(1, 2, 3) map (_ + 1) println(list) val set = Set(1, 2, 3) map (_ * 2) println(set) val list2 = List(x, y, z).map(x => x * 3) println(list2) } }
Source File: NFAStructure.scala From piglet with Apache License 2.0 | 5 votes
package dbis.piglet.cep.nfa import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import scala.collection.mutable.HashMap import dbis.piglet.backends.{SchemaClass => Event} import scala.collection.mutable.ListBuffer def addEvent(event: T, currentEdge: ForwardEdge[T]): Unit = { events += event //if (relatedValue != null) { // relatedValue.get( match { // case Some(x) => x.foreach (r => r.updateValue(event)) //case None => Nil //} //} currenState = currentEdge.destState if (currenState.isInstanceOf[FinalState[T]]) complete = true } override def clone(): NFAStructure[T] = { val copyStr = new NFAStructure[T](this.nfaController) copyStr.complete = this.complete copyStr.currenState = this.currenState = // = copyStr } }
Source File: CorefUtils.scala From berkeley-doc-summarizer with GNU General Public License v3.0 | 5 votes
package edu.berkeley.nlp.summ import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.HashMap import edu.berkeley.nlp.entity.DepConstTree import edu.berkeley.nlp.entity.coref.Mention import edu.berkeley.nlp.entity.coref.PronounDictionary import edu.berkeley.nlp.entity.coref.MentionType import edu.berkeley.nlp.entity.coref.CorefDoc import edu.berkeley.nlp.entity.GUtil import edu.berkeley.nlp.futile.math.SloppyMath object CorefUtils { def getAntecedent(corefDoc: CorefDoc, predictor: edu.berkeley.nlp.entity.coref.PairwiseScorer, index: Int) = { val posteriors = computePosteriors(corefDoc, predictor, Seq(index)) GUtil.argMaxIdx(posteriors(0)) } def computePosteriors(corefDoc: CorefDoc, predictor: edu.berkeley.nlp.entity.coref.PairwiseScorer, indicesOfInterest: Seq[Int]): Array[Array[Double]] = { val docGraph = new edu.berkeley.nlp.entity.coref.DocumentGraph(corefDoc, false) Array.tabulate(indicesOfInterest.size)(idxIdxOfInterest => { val idx = indicesOfInterest(idxIdxOfInterest) val scores = Array.tabulate(idx+1)(antIdx => predictor.score(docGraph, idx, antIdx, false).toDouble) val logNormalizer = scores.foldLeft(Double.NegativeInfinity)(SloppyMath.logAdd(_, _)) for (antIdx <- 0 until scores.size) { scores(antIdx) = scores(antIdx) - logNormalizer } scores }) } def remapMentionType(ment: Mention) = { val newMentionType = if (ment.endIdx - ment.startIdx == 1 && PronounDictionary.isDemonstrative(ment.rawDoc.words(ment.sentIdx)(ment.headIdx))) { MentionType.DEMONSTRATIVE; } else if (ment.endIdx - ment.startIdx == 1 && PronounDictionary.isPronLc(ment.rawDoc.words(ment.sentIdx)(ment.headIdx))) { MentionType.PRONOMINAL; } else if (ment.rawDoc.pos(ment.sentIdx)(ment.headIdx) == "NNS" || ment.rawDoc.pos(ment.sentIdx)(ment.headIdx) == "NNPS") { MentionType.PROPER; } else { MentionType.NOMINAL; } new Mention(ment.rawDoc, ment.mentIdx, ment.sentIdx, ment.startIdx, ment.endIdx, ment.headIdx, ment.allHeadIndices, ment.isCoordinated, newMentionType, ment.nerString, ment.number, ment.gender) } def getMentionText(ment: Mention) = ment.rawDoc.words(ment.sentIdx).slice(ment.startIdx, ment.endIdx) def getMentionNerSpan(ment: Mention): Option[(Int,Int)] = { // Smallest NER chunk that contains the head val conllDoc = ment.rawDoc val matchingChunks = conllDoc.nerChunks(ment.sentIdx).filter(chunk => chunk.start <= ment.headIdx && ment.headIdx < chunk.end); if (!matchingChunks.isEmpty) { val smallestChunk = matchingChunks.sortBy(chunk => chunk.end - chunk.start).head; Some(smallestChunk.start -> smallestChunk.end) } else { None } } def getSpanHeads(tree: DepConstTree, startIdx: Int, endIdx: Int): Seq[Int] = getSpanHeads(tree.childParentDepMap, startIdx, endIdx); def getSpanHeads(childParentDepMap: HashMap[Int,Int], startIdx: Int, endIdx: Int): Seq[Int] = { // If it's a constituent, only one should have a head outside val outsidePointing = new ArrayBuffer[Int]; for (i <- startIdx until endIdx) { val ptr = childParentDepMap(i); if (ptr < startIdx || ptr >= endIdx) { outsidePointing += i; } } outsidePointing } def isDefinitelyPerson(str: String): Boolean = { val canonicalization = PronounDictionary.canonicalize(str) // N.B. Don't check "we" or "they" because those might be used in inanimate cases canonicalization == "i" || canonicalization == "you" || canonicalization == "he" || canonicalization == "she" } }