scala.collection.mutable.HashMap Scala Examples
The following examples show how to use scala.collection.mutable.HashMap.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: IOCommon.scala From Swallow with Apache License 2.0 | 5 votes |
package com.intel.hibench.sparkbench.common import java.io.{File, FileInputStream, IOException, InputStreamReader} import java.util.Properties import org.apache.hadoop.io.compress.CompressionCodec import org.apache.hadoop.io.{NullWritable, Text} import org.apache.hadoop.mapred.SequenceFileOutputFormat import org.apache.spark.rdd.RDD import org.apache.spark.{SparkContext, SparkException} import scala.collection.JavaConversions._ import scala.collection.mutable.HashMap import scala.reflect.ClassTag import scala.reflect.runtime.universe.TypeTag class IOCommon(val sc:SparkContext) { def load[T:ClassTag:TypeTag](filename:String, force_format:Option[String]=None) = { val input_format = force_format.getOrElse( IOCommon.getProperty("sparkbench.inputformat").getOrElse("Text")) input_format match { case "Text" => sc.textFile(filename) case "Sequence" => sc.sequenceFile[NullWritable, Text](filename).map(_._2.toString) case _ => throw new UnsupportedOperationException(s"Unknown inpout format: $input_format") } } def save(filename:String, data:RDD[_], prefix:String) = { val output_format = IOCommon.getProperty(prefix).getOrElse("Text") val output_format_codec = loadClassByName[CompressionCodec](IOCommon.getProperty(prefix + ".codec")) output_format match { case "Text" => if (output_format_codec.isEmpty) data.saveAsTextFile(filename) else data.saveAsTextFile(filename, output_format_codec.get) case "Sequence" => val sequence_data = data.map(x => (NullWritable.get(), new Text(x.toString))) if (output_format_codec.isEmpty) { sequence_data.saveAsHadoopFile[SequenceFileOutputFormat[NullWritable, Text]](filename) } else { sequence_data.saveAsHadoopFile[SequenceFileOutputFormat[NullWritable, Text]](filename, output_format_codec.get) } case _ => throw new UnsupportedOperationException(s"Unknown output format: $output_format") } } def save(filename:String, data:RDD[_]):Unit = save(filename, data, "sparkbench.outputformat") private def loadClassByName[T](name:Option[String]) = { if (!name.isEmpty) Some(Class.forName(name.get) .newInstance.asInstanceOf[T].getClass) else None } private def callMethod[T, R](obj:T, method_name:String) = obj.getClass.getMethod(method_name).invoke(obj).asInstanceOf[R] } object IOCommon { private val sparkbench_conf: HashMap[String, String] = getPropertiesFromFile(System.getenv("SPARKBENCH_PROPERTIES_FILES")) def getPropertiesFromFile(filenames: String): HashMap[String, String] = { val result = new HashMap[String, String] filenames.split(',').filter(_.stripMargin.length > 0).foreach { filename => val file = new File(filename) require(file.exists, s"Properties file $file does not exist") require(file.isFile, s"Properties file $file is not a normal file") val inReader = new InputStreamReader(new FileInputStream(file), "UTF-8") try { val properties = new Properties() properties.load(inReader) result ++= properties.stringPropertyNames() .map(k => (k, properties(k).trim)).toMap } catch { case e: IOException => val message = s"Failed when loading Sparkbench properties file $file" throw new SparkException(message, e) } finally { inReader.close() } } result.filter{case (key, value) => value.toLowerCase != "none"} } def getProperty(key:String):Option[String] = sparkbench_conf.get(key) def dumpProperties(): Unit = sparkbench_conf .foreach{case (key, value)=> println(s"$key\t\t$value")} }
Example 2
Source File: UniqueTermAccumulator.scala From sparkpipe-core with Apache License 2.0 | 5 votes |
package software.uncharted.sparkpipe.ops.core.dataframe.text.util import org.apache.spark.sql.Row import org.apache.spark.util.AccumulatorV2 import scala.collection.mutable.HashMap private[text] class UniqueTermAccumulator( private var result: HashMap[String, Int], private var touched: Boolean = false ) extends AccumulatorV2[Seq[String], HashMap[String, Int]] { def this() { this(new HashMap[String, Int]()) } override def add(in: Seq[String]): Unit = { in.foreach(w => { result.put(w, result.getOrElse(w, 0) + 1) }) } override def copy(): AccumulatorV2[Seq[String], HashMap[String, Int]] = { val clone = new HashMap[String, Int]() result.foreach(kv => clone.put(kv._1, kv._2)) new UniqueTermAccumulator(clone, false) } override def isZero(): Boolean = { !touched } override def merge(other: AccumulatorV2[Seq[String], HashMap[String, Int]]): Unit = { other.value.foreach(t => { result.put(t._1, result.getOrElse(t._1, 0) + t._2) }) } override def reset(): Unit = { result.clear touched = false } override def value: HashMap[String, Int] = { result } }
Example 3
Source File: Mapper.scala From Scalaprof with GNU General Public License v2.0 | 5 votes |
package edu.neu.coe.scala.mapreduce import akka.actor.{ Actor, ActorLogging, ActorRef } import scala.collection.mutable.HashMap import scala.util._ class Mapper_Forgiving[K1,V1,K2,V2](f: (K1,V1)=>(K2,V2)) extends Mapper[K1,V1,K2,V2](f) { override def prepareReply(v2k2ts: Seq[Try[(K2,V2)]]) = { val v2sK2m = HashMap[K2,Seq[V2]]() // mutable val xs = Seq[Throwable]() // mutable for (v2k2t <- v2k2ts; v2k2e = Master.sequence(v2k2t)) v2k2e match { case Right((k2,v2)) => v2sK2m put(k2, v2+:(v2sK2m get(k2) getOrElse(Nil))) case Left(x) => xs :+ x } (v2sK2m.toMap, xs.toSeq) } } case class Incoming[K, V](m: Seq[(K,V)]) { override def toString = s"Incoming: with ${m.size} elements" } object Incoming { def sequence[K,V](vs: Seq[V]): Incoming[K,V] = Incoming((vs zip Stream.continually(null.asInstanceOf[K])).map{_.swap}) def map[K, V](vKm: Map[K,V]): Incoming[K,V] = Incoming(vKm.toSeq) } object Mapper { }
Example 4
Source File: LocalKMeans.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet import breeze.linalg.{squaredDistance, DenseVector, Vector} object LocalKMeans { val N = 1000 val R = 1000 // Scaling factor val D = 10 val K = 10 val convergeDist = 0.001 val rand = new Random(42) def generateData: Array[DenseVector[Double]] = { def generatePoint(i: Int): DenseVector[Double] = { DenseVector.fill(D) {rand.nextDouble * R} } Array.tabulate(N)(generatePoint) } def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = { var index = 0 var bestIndex = 0 var closest = Double.PositiveInfinity for (i <- 1 to centers.size) { val vCurr = centers.get(i).get val tempDist = squaredDistance(p, vCurr) if (tempDist < closest) { closest = tempDist bestIndex = i } } bestIndex } def showWarning() { System.err.println( """WARN: This is a naive implementation of KMeans Clustering and is given as an example! |Please use org.apache.spark.ml.clustering.KMeans |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val data = generateData var points = new HashSet[Vector[Double]] var kPoints = new HashMap[Int, Vector[Double]] var tempDist = 1.0 while (points.size < K) { points.add(data(rand.nextInt(N))) } val iter = points.iterator for (i <- 1 to points.size) { kPoints.put(i, iter.next()) } println("Initial centers: " + kPoints) while(tempDist > convergeDist) { var closest = data.map (p => (closestPoint(p, kPoints), (p, 1))) var mappings = closest.groupBy[Int] (x => x._1) var pointStats = mappings.map { pair => pair._2.reduceLeft [(Int, (Vector[Double], Int))] { case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2)) } } var newPoints = pointStats.map {mapping => (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))} tempDist = 0.0 for (mapping <- newPoints) { tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2) } for (newP <- newPoints) { kPoints.put(newP._1, newP._2) } } println("Final centers: " + kPoints) } } // scalastyle:on println
Example 5
Source File: JsonUtils.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kafka010 import java.io.Writer import scala.collection.mutable.HashMap import scala.util.control.NonFatal import org.apache.kafka.common.TopicPartition import org.json4s.NoTypeHints import org.json4s.jackson.Serialization def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = { val result = new HashMap[String, HashMap[Int, Long]]() partitionOffsets.foreach { case (tp, off) => val parts = result.getOrElse(tp.topic, new HashMap[Int, Long]) parts += tp.partition -> off result += tp.topic -> parts } Serialization.write(result) } }
Example 6
Source File: DStreamCheckpointData.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{IOException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.internal.Logging import org.apache.spark.streaming.Time import org.apache.spark.util.Utils private[streaming] class DStreamCheckpointData[T: ClassTag](dstream: DStream[T]) extends Serializable with Logging { protected val data = new HashMap[Time, AnyRef]() // Mapping of the batch time to the checkpointed RDD file of that time @transient private var timeToCheckpointFile = new HashMap[Time, String] // Mapping of the batch time to the time of the oldest checkpointed RDD // in that batch's checkpoint data @transient private var timeToOldestCheckpointFileTime = new HashMap[Time, Time] @transient private var fileSystem: FileSystem = null protected[streaming] def currentCheckpointFiles = data.asInstanceOf[HashMap[Time, String]] def restore() { // Create RDDs from the checkpoint data currentCheckpointFiles.foreach { case(time, file) => logInfo("Restoring checkpointed RDD for time " + time + " from file '" + file + "'") dstream.generatedRDDs += ((time, dstream.context.sparkContext.checkpointFile[T](file))) } } override def toString: String = { "[\n" + currentCheckpointFiles.size + " checkpoint files \n" + currentCheckpointFiles.mkString("\n") + "\n]" } @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { logDebug(this.getClass().getSimpleName + ".writeObject used") if (dstream.context.graph != null) { dstream.context.graph.synchronized { if (dstream.context.graph.checkpointInProgress) { oos.defaultWriteObject() } else { val msg = "Object of " + this.getClass.getName + " is being serialized " + " possibly as a part of closure of an RDD operation. This is because " + " the DStream object is being referred to from within the closure. " + " Please rewrite the RDD operation inside this DStream to avoid this. " + " This has been enforced to avoid bloating of Spark tasks " + " with unnecessary objects." throw new java.io.NotSerializableException(msg) } } } else { throw new java.io.NotSerializableException( "Graph is unexpectedly null when DStream is being serialized.") } } @throws(classOf[IOException]) private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException { logDebug(this.getClass().getSimpleName + ".readObject used") ois.defaultReadObject() timeToOldestCheckpointFileTime = new HashMap[Time, Time] timeToCheckpointFile = new HashMap[Time, String] } }
Example 7
Source File: MasterWebUI.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.master.ui import scala.collection.mutable.HashMap import org.eclipse.jetty.servlet.ServletContextHandler import org.apache.spark.deploy.master.Master import org.apache.spark.internal.Logging import org.apache.spark.ui.{SparkUI, WebUI} import org.apache.spark.ui.JettyUtils._ def initialize() { val masterPage = new MasterPage(this) attachPage(new ApplicationPage(this)) attachPage(masterPage) attachHandler(createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR, "/static")) attachHandler(createRedirectHandler( "/app/kill", "/", masterPage.handleAppKillRequest, httpMethods = Set("POST"))) attachHandler(createRedirectHandler( "/driver/kill", "/", masterPage.handleDriverKillRequest, httpMethods = Set("POST"))) } def addProxyTargets(id: String, target: String): Unit = { var endTarget = target.stripSuffix("/") val handler = createProxyHandler("/proxy/" + id, endTarget) attachHandler(handler) proxyHandlers(id) = handler } def removeProxyTargets(id: String): Unit = { proxyHandlers.remove(id).foreach(detachHandler) } } private[master] object MasterWebUI { private val STATIC_RESOURCE_DIR = SparkUI.STATIC_RESOURCE_DIR }
Example 8
Source File: PoolTable.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import java.net.URLEncoder import scala.collection.mutable.HashMap import scala.xml.Node import org.apache.spark.scheduler.{Schedulable, StageInfo} import org.apache.spark.ui.UIUtils private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) { private val listener = parent.progressListener def toNodeSeq: Seq[Node] = { listener.synchronized { poolTable(poolRow, pools) } } private def poolTable( makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node], rows: Seq[Schedulable]): Seq[Node] = { <table class="table table-bordered table-striped table-condensed sortable table-fixed"> <thead> <th>Pool Name</th> <th>Minimum Share</th> <th>Pool Weight</th> <th>Active Stages</th> <th>Running Tasks</th> <th>SchedulingMode</th> </thead> <tbody> {rows.map(r => makeRow(r, listener.poolToActiveStages))} </tbody> </table> } private def poolRow( p: Schedulable, poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = { val activeStages = poolToActiveStages.get(p.name) match { case Some(stages) => stages.size case None => 0 } val href = "%s/stages/pool?poolname=%s" .format(UIUtils.prependBaseUri(parent.basePath), URLEncoder.encode(p.name, "UTF-8")) <tr> <td> <a href={href}>{p.name}</a> </td> <td>{p.minShare}</td> <td>{p.weight}</td> <td>{activeStages}</td> <td>{p.runningTasks}</td> <td>{p.schedulingMode}</td> </tr> } }
Example 9
Source File: ConfigReader.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.internal.config import java.util.{Map => JMap} import java.util.regex.Pattern import scala.collection.mutable.HashMap import scala.util.matching.Regex private object ConfigReader { private val REF_RE = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r } def substitute(input: String): String = substitute(input, Set()) private def substitute(input: String, usedRefs: Set[String]): String = { if (input != null) { ConfigReader.REF_RE.replaceAllIn(input, { m => val prefix = m.group(1) val name = m.group(2) val ref = if (prefix == null) name else s"$prefix:$name" require(!usedRefs.contains(ref), s"Circular reference in $input: $ref") val replacement = bindings.get(prefix) .flatMap(_.get(name)) .map { v => substitute(v, usedRefs + ref) } .getOrElse(m.matched) Regex.quoteReplacement(replacement) }) } else { input } } }
Example 10
Source File: StageInfo.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import org.apache.spark.executor.TaskMetrics import org.apache.spark.storage.RDDInfo def fromStage( stage: Stage, attemptId: Int, numTasks: Option[Int] = None, taskMetrics: TaskMetrics = null, taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty ): StageInfo = { val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd) val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo( stage.id, attemptId, stage.name, numTasks.getOrElse(stage.numTasks), rddInfos, stage.parents.map(_.id), stage.details, taskMetrics, taskLocalityPreferences) } }
Example 11
Source File: GroupedCountEvaluator.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.partial import scala.collection.Map import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.spark.util.collection.OpenHashMap private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] { private var outputsMerged = 0 private val sums = new OpenHashMap[T, Long]() // Sum of counts for each key override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]): Unit = { outputsMerged += 1 taskResult.foreach { case (key, value) => sums.changeValue(key, value, _ + value) } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { sums.map { case (key, sum) => (key, new BoundedDouble(sum, 1.0, sum, sum)) }.toMap } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs sums.map { case (key, sum) => (key, CountEvaluator.bound(confidence, sum, p)) }.toMap } } }
Example 12
Source File: MasterWebUISuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.master.ui import java.io.DataOutputStream import java.net.{HttpURLConnection, URL} import java.nio.charset.StandardCharsets import java.util.Date import scala.collection.mutable.HashMap import org.mockito.Mockito.{mock, times, verify, when} import org.scalatest.BeforeAndAfterAll import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} import org.apache.spark.deploy.DeployMessages.{KillDriverResponse, RequestKillDriver} import org.apache.spark.deploy.DeployTestUtils._ import org.apache.spark.deploy.master._ import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv} class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll { val conf = new SparkConf val securityMgr = new SecurityManager(conf) val rpcEnv = mock(classOf[RpcEnv]) val master = mock(classOf[Master]) val masterEndpointRef = mock(classOf[RpcEndpointRef]) when(master.securityMgr).thenReturn(securityMgr) when(master.conf).thenReturn(conf) when(master.rpcEnv).thenReturn(rpcEnv) when(master.self).thenReturn(masterEndpointRef) val masterWebUI = new MasterWebUI(master, 0) override def beforeAll() { super.beforeAll() masterWebUI.bind() } override def afterAll() { masterWebUI.stop() super.afterAll() } test("kill application") { val appDesc = createAppDesc() // use new start date so it isn't filtered by UI val activeApp = new ApplicationInfo( new Date().getTime, "app-0", appDesc, new Date(), null, Int.MaxValue) when(master.idToApp).thenReturn(HashMap[String, ApplicationInfo]((activeApp.id, activeApp))) val url = s"http://localhost:${masterWebUI.boundPort}/app/kill/" val body = convPostDataToString(Map(("id", activeApp.id), ("terminate", "true"))) val conn = sendHttpRequest(url, "POST", body) conn.getResponseCode // Verify the master was called to remove the active app verify(master, times(1)).removeApplication(activeApp, ApplicationState.KILLED) } test("kill driver") { val activeDriverId = "driver-0" val url = s"http://localhost:${masterWebUI.boundPort}/driver/kill/" val body = convPostDataToString(Map(("id", activeDriverId), ("terminate", "true"))) val conn = sendHttpRequest(url, "POST", body) conn.getResponseCode // Verify that master was asked to kill driver with the correct id verify(masterEndpointRef, times(1)).ask[KillDriverResponse](RequestKillDriver(activeDriverId)) } private def convPostDataToString(data: Map[String, String]): String = { (for ((name, value) <- data) yield s"$name=$value").mkString("&") } private def sendHttpRequest( url: String, method: String, body: String = ""): HttpURLConnection = { val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection] conn.setRequestMethod(method) if (body.nonEmpty) { conn.setDoOutput(true) conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded") conn.setRequestProperty("Content-Length", Integer.toString(body.length)) val out = new DataOutputStream(conn.getOutputStream) out.write(body.getBytes(StandardCharsets.UTF_8)) out.close() } conn } }
Example 13
Source File: CMaxTableSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.integration.torch import com.intel.analytics.bigdl.nn.CMaxTable import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.utils.RandomGenerator._ import com.intel.analytics.bigdl.utils.Table import scala.collection.mutable.HashMap import scala.util.Random @com.intel.analytics.bigdl.tags.Serial class CMaxTableSpec extends TorchSpec { "A CMaxTable Module" should "generate correct output and grad" in { torchCheck() val seed = 100 RNG.setSeed(seed) val module = new CMaxTable[Double]() val input1 = Tensor[Double](5).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](5).apply1(e => Random.nextDouble()) val gradOutput = Tensor[Double](5).apply1(e => Random.nextDouble()) val input = new Table() input(1.toDouble) = input1 input(2.toDouble) = input2 val start = System.nanoTime() val output = module.forward(input) val gradInput = module.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.CMaxTable()\n" + "output = module:forward(input)\n" + "gradInput = module:backward(input,gradOutput)" val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be(output) luaOutput2 should be (gradInput) println("Test case : CMaxTable, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 14
Source File: L1HingeEmbeddingCriterionSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.integration.torch import com.intel.analytics.bigdl.nn.L1HingeEmbeddingCriterion import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.utils.RandomGenerator._ import com.intel.analytics.bigdl.utils.Table import scala.collection.mutable.HashMap import scala.util.Random @com.intel.analytics.bigdl.tags.Serial class L1HingeEmbeddingCriterionSpec extends TorchSpec { "A L1HingeEmbeddingCriterion" should "generate correct output and grad with y == 1 " in { torchCheck() val seed = 2 RNG.setSeed(seed) val module = new L1HingeEmbeddingCriterion[Double](0.6) val input1 = Tensor[Double](2).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](2).apply1(e => Random.nextDouble()) val input = new Table() input(1.0) = input1 input(2.0) = input2 val target = Tensor[Double](1) target(Array(1)) = 1.0 val start = System.nanoTime() val output = module.forward(input, target) val gradInput = module.backward(input, target) val end = System.nanoTime() val scalaTime = end - start val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.L1HingeEmbeddingCriterion(0.6)\n" + "output = module:forward(input, 1)\n" + "gradInput = module:backward(input, 1)\n" val (luaTime, torchResult) = TH.run(code, Map("input" -> input), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Double] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be(output) luaOutput2 should be (gradInput) println("Test case : L1HingeEmbeddingCriterion, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } "A L1HingeEmbeddingCriterion" should "generate correct output and grad with y == -1 " in { torchCheck() val seed = 2 RNG.setSeed(seed) val module = new L1HingeEmbeddingCriterion[Double](0.6) val input1 = Tensor[Double](2).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](2).apply1(e => Random.nextDouble()) val input = new Table() input(1.0) = input1 input(2.0) = input2 val target = Tensor[Double](1) target(Array(1)) = -1.0 val start = System.nanoTime() val output = module.forward(input, target) val gradInput = module.backward(input, target) val end = System.nanoTime() val scalaTime = end - start val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.L1HingeEmbeddingCriterion(0.6)\n" + "output = module:forward(input, -1.0)\n" + "gradInput = module:backward(input, -1.0)\n" val (luaTime, torchResult) = TH.run(code, Map("input" -> input), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Double] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be(output) luaOutput2 should be (gradInput) println("Test case : L1HingeEmbeddingCriterion, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 15
Source File: CDivTableSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.integration.torch import com.intel.analytics.bigdl.nn.CDivTable import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.utils.RandomGenerator._ import com.intel.analytics.bigdl.utils.Table import scala.collection.mutable.HashMap import scala.util.Random @com.intel.analytics.bigdl.tags.Serial class CDivTableSpec extends TorchSpec { "A CDivTable Module" should "generate correct output and grad" in { torchCheck() val seed = 100 RNG.setSeed(seed) val module = new CDivTable[Double]() val input1 = Tensor[Double](5).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](5).apply1(e => Random.nextDouble()) val gradOutput = Tensor[Double](5).apply1(e => Random.nextDouble()) val input = new Table() input(1.toDouble) = input1 input(2.toDouble) = input2 val start = System.nanoTime() val output = module.forward(input) val gradInput = module.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.CDivTable()\n" + "output = module:forward(input)\n" + "gradInput = module:backward(input,gradOutput)" val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be (output) luaOutput2 should be (gradInput) println("Test case : CDivTable, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 16
Source File: CMulTableSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.integration.torch import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.utils.RandomGenerator._ import com.intel.analytics.bigdl.utils.Table import com.intel.analytics.bigdl.nn.CMulTable import scala.collection.mutable.HashMap import scala.util.Random @com.intel.analytics.bigdl.tags.Serial class CMulTableSpec extends TorchSpec { "A CMulTable Module" should "generate correct output and grad" in { torchCheck() val seed = 100 RNG.setSeed(seed) val module = new CMulTable[Double]() val input1 = Tensor[Double](5).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](5).apply1(e => Random.nextDouble()) val gradOutput = Tensor[Double](5).apply1(e => Random.nextDouble()) val input = new Table() input(1.toDouble) = input1 input(2.toDouble) = input2 val start = System.nanoTime() val output = module.forward(input) val gradInput = module.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.CMulTable()\n" + "output = module:forward(input)\n" + "gradInput = module:backward(input,gradOutput)" val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be (output) luaOutput2 should be (gradInput) println("Test case : CMinTable, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 17
Source File: CosineDistanceSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.integration.torch import com.intel.analytics.bigdl.nn.CosineDistance import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.utils.RandomGenerator._ import com.intel.analytics.bigdl.utils.Table import scala.collection.mutable.HashMap import scala.util.Random @com.intel.analytics.bigdl.tags.Serial class CosineDistanceSpec extends TorchSpec { "A CosineDistance " should "generate correct output and grad" in { torchCheck() val seed = 100 RNG.setSeed(seed) val input1 = Tensor[Double](3).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](3).apply1(e => Random.nextDouble()) val gradOutput = Tensor[Double](1).apply1(e => Random.nextDouble()) val input = new Table() input(1.0) = input1 input(2.0) = input2 val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.CosineDistance()\n" + "output = module:forward(input)\n" + "gradInput = module:backward(input,gradOutput)\n" val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] val module = new CosineDistance[Double]() val start = System.nanoTime() val output = module.forward(input) val gradInput = module.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start output should be(luaOutput1) luaOutput2 should be (gradInput) println("Test case : CosineDistance, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 18
Source File: CosineEmbeddingCriterionSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.integration.torch import com.intel.analytics.bigdl.nn.CosineEmbeddingCriterion import com.intel.analytics.bigdl.tensor.{Storage, Tensor} import com.intel.analytics.bigdl.utils.RandomGenerator._ import com.intel.analytics.bigdl.utils.{RandomGenerator, Table} import scala.collection.mutable.HashMap import scala.util.Random @com.intel.analytics.bigdl.tags.Serial class CosineEmbeddingCriterionSpec extends TorchSpec { "A CosineEmbeddingCriterion Module" should "generate correct output and grad" in { torchCheck() val seed = 100 RNG.setSeed(seed) val module = new CosineEmbeddingCriterion[Double](0.2) val input1 = Tensor[Double](5).apply1(e => RandomGenerator.RNG.uniform(0, 2)) val input2 = Tensor[Double](5).apply1(e => RandomGenerator.RNG.uniform(0, 1)) val input = new Table() input(1.0) = input1 input(2.0) = input2 val target = new Table() val target1 = Tensor[Double](Storage(Array(-0.5))) target(1.toDouble) = target1 val start = System.nanoTime() val output = module.forward(input, target) val gradInput = module.backward(input, target) val end = System.nanoTime() val scalaTime = end - start val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.CosineEmbeddingCriterion(0.2)\n" + "_idx = module._idx\n" + "_outputs = module._outputs\n" + "buffer = module.buffer\n" + "output = module:forward(input, -0.5)\n" + "gradInput = module:backward(input, -0.5)\n" val (luaTime, torchResult) = TH.run(code, Map("input" -> input), Array("output", "gradInput", "_idx", "buffer", "_outputs")) val luaOutput1 = torchResult("output").asInstanceOf[Double] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be(output) luaOutput2 should be (gradInput) println("Test case : CrossEntropyCriterion, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 19
Source File: CosineDistanceCriterionSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.integration.torch import com.intel.analytics.bigdl.nn.CosineDistanceCriterion import com.intel.analytics.bigdl.tensor.{Storage, Tensor} import com.intel.analytics.bigdl.utils.RandomGenerator._ import com.intel.analytics.bigdl.utils.{RandomGenerator, Table} import scala.collection.mutable.HashMap import scala.util.Random @com.intel.analytics.bigdl.tags.Serial class CosineDistanceCriterionSpec extends TorchSpec { "A CosineDistanceCriterionSpec Module" should "generate correct output and grad" in { torchCheck() val seed = 100 RNG.setSeed(seed) val module = CosineDistanceCriterion[Double](false) val input1 = Tensor[Double](5).apply1(e => RandomGenerator.RNG.uniform(0, 2)) val input2 = Tensor[Double](5).apply1(e => RandomGenerator.RNG.uniform(0, 1)) val input = new Table() input(1.0) = input1 input(2.0) = input2 val target = new Table() val target1 = Tensor[Double](Storage(Array(1.0))) target(1.toDouble) = target1 val start = System.nanoTime() val output = module.forward(input1, input2) val gradInput = module.backward(input1, input2) val end = System.nanoTime() val scalaTime = end - start val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.CosineEmbeddingCriterion(0.0)\n" + "_idx = module._idx\n" + "_outputs = module._outputs\n" + "buffer = module.buffer\n" + "output = module:forward(input, 1.0)\n" + "gradInput = module:backward(input, 1.0)\n" val (luaTime, torchResult) = TH.run(code, Map("input" -> input), Array("output", "gradInput", "_idx", "buffer", "_outputs")) val luaOutput1 = torchResult("output").asInstanceOf[Double] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be(output) luaOutput2[Tensor[Double]](1) should be (gradInput.squeeze()) println("Test case : CrossEntropyCriterion, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 20
Source File: NarrowTableSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.integration.torch import com.intel.analytics.bigdl.nn.NarrowTable import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.utils.{T, Table} import scala.collection.mutable.HashMap import scala.util.Random @com.intel.analytics.bigdl.tags.Serial class NarrowTableSpec extends TorchSpec { "A NarrowTable Module " should "generate correct output and grad" in { torchCheck() val module = new NarrowTable[Double](1, 2) val input = T() input(1.0) = Tensor[Double](2, 3).apply1(e => Random.nextDouble()) input(2.0) = Tensor[Double](2, 1).apply1(e => Random.nextDouble()) input(3.0) = Tensor[Double](2, 2).apply1(e => Random.nextDouble()) val gradOutput = T() gradOutput(1.0) = Tensor[Double](5, 3).apply1(e => Random.nextDouble()) gradOutput(2.0) = Tensor[Double](2, 5).apply1(e => Random.nextDouble()) val code = "module = nn.NarrowTable(1, 2)\n" + "local i = 0\n" + "while i < 10 do\n" + "output = module:forward(input)\n" + "gradInput = module:backward(input, gradOutput)\n" + "i = i + 1\n" + "end" val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Table] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] val start = System.nanoTime() var i = 0 var output = T() var gradInput = T() while (i < 10) { output = module.forward(input) gradInput = module.backward(input, gradOutput) i += 1 } val end = System.nanoTime() val scalaTime = end - start luaOutput1 should be (output) luaOutput2 should be (gradInput) println("Test case : NarrowTable, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } "A NarrowTable Module with negative length" should "generate correct output and grad" in { torchCheck() val module = new NarrowTable[Double](2, -2) val input = T() input(1.0) = Tensor[Double](2, 3).apply1(e => Random.nextDouble()) input(2.0) = Tensor[Double](2, 1).apply1(e => Random.nextDouble()) input(3.0) = Tensor[Double](2, 2).apply1(e => Random.nextDouble()) input(4.0) = Tensor[Double](2, 2).apply1(e => Random.nextDouble()) val gradOutput = T() gradOutput(1.0) = Tensor[Double](5, 3).apply1(e => Random.nextDouble()) gradOutput(2.0) = Tensor[Double](2, 5).apply1(e => Random.nextDouble()) val start = System.nanoTime() var i = 0 var output = T() var gradInput = T() output = module.forward(input) gradInput = module.backward(input, gradOutput) i += 1 val end = System.nanoTime() val scalaTime = end - start val gradInput1 = gradInput[Tensor[Double]](2.0) val gradInput2 = gradInput[Tensor[Double]](3.0) val expectedGradInput1 = gradOutput[Tensor[Double]](1.0) val expectedGradInput2 = gradOutput[Tensor[Double]](2.0) val output1 = output[Tensor[Double]](1.0) val output2 = output[Tensor[Double]](2.0) val expectedOutput1 = input[Tensor[Double]](2.0) val expectedOutput2 = input[Tensor[Double]](3.0) output1 should be (expectedOutput1) output2 should be (expectedOutput2) gradInput1 should be (expectedGradInput1) gradInput2 should be (expectedGradInput2) } }
Example 21
Source File: MarginRankingCriterionSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.integration.torch import com.intel.analytics.bigdl.nn.MarginRankingCriterion import com.intel.analytics.bigdl.tensor.{Storage, Tensor} import com.intel.analytics.bigdl.utils.Table import scala.collection.mutable.HashMap import scala.util.Random @com.intel.analytics.bigdl.tags.Serial class MarginRankingCriterionSpec extends TorchSpec { "A MarginRankingCriterion " should "generate correct output and grad with only value" in { torchCheck() val mse = new MarginRankingCriterion[Double]() val input1 = Tensor[Double](5).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](5).apply1(e => Random.nextDouble()) val input = new Table() input(1.toDouble) = input1 input(2.toDouble) = input2 val target = new Table() val target1 = Tensor[Double](Storage(Array(-1.0))) target(1.toDouble) = target1 val start = System.nanoTime() val output = mse.forward(input, target) val gradInput = mse.backward(input, target) val end = System.nanoTime() val scalaTime = end - start val code = "mse = nn.MarginRankingCriterion()\n" + "output = mse:forward(input,-1)\n" + "gradInput = mse:backward(input,-1)" val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "target" -> target), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Double] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be (output) gradInput should equal (luaOutput2) println("Test case : MarginRankingCriterion, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } "A MarginRankingCriterion " should "generate correct output and grad with Tensor target" in { torchCheck() val mse = new MarginRankingCriterion[Double]() val input1 = Tensor[Double](5).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](5).apply1(e => Random.nextDouble()) val input = new Table() input(1.toDouble) = input1 input(2.toDouble) = input2 val target = new Table() val target1 = Tensor[Double](5).apply1(e => Random.nextDouble()) target(1.toDouble) = target1 val start = System.nanoTime() val output = mse.forward(input, target) val gradInput = mse.backward(input, target) val end = System.nanoTime() val scalaTime = end - start val code = "mse = nn.MarginRankingCriterion()\n" + "output = mse:forward(input, target)\n" + "gradInput = mse:backward(input, target)" val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "target" -> target1), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Double] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be (output) gradInput should equal (luaOutput2) println("Test case : MarginRankingCriterion, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 22
Source File: MaskedSelectSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.integration.torch import com.intel.analytics.bigdl.nn.MaskedSelect import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.utils.Table import scala.collection.mutable.HashMap import scala.util.Random @com.intel.analytics.bigdl.tags.Serial class MaskedSelectSpec extends TorchSpec { "A MaskedSelect Module " should "generate correct output and grad" in { torchCheck() val module = new MaskedSelect[Double]() val input1 = Tensor[Double](2, 2).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](2, 2) input2(Array(1, 1)) = 1 input2(Array(1, 2)) = 0 input2(Array(2, 1)) = 0 input2(Array(2, 2)) = 1 val input = new Table() input(1.0) = input1 input(2.0) = input2 val gradOutput = Tensor[Double](5).apply1(e => Random.nextDouble()) val code = "module = nn.MaskedSelect()\n" + "mask = torch.ByteTensor({{1, 0}, {0, 1}})\n" + "output = module:forward({input1, mask})\n" + "gradInput = module:backward({input1, mask}, gradOutput)\n" + "gradInput[2] = gradInput[2]:double()" val (luaTime, torchResult) = TH.run(code, Map("input1" -> input1, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] val start = System.nanoTime() val output = module.forward(input) val gradInput = module.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start output should be (luaOutput1) gradInput should equal (luaOutput2) println("Test case : MaskedSelect, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 23
Source File: CMinTableSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.integration.torch import com.intel.analytics.bigdl.nn.CMinTable import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.utils.RandomGenerator._ import com.intel.analytics.bigdl.utils.Table import scala.collection.mutable.HashMap import scala.util.Random @com.intel.analytics.bigdl.tags.Serial class CMinTableSpec extends TorchSpec { "A CMaxTable Module" should "generate correct output and grad" in { torchCheck() val seed = 100 RNG.setSeed(seed) val module = new CMinTable[Double]() val input1 = Tensor[Double](5).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](5).apply1(e => Random.nextDouble()) val gradOutput = Tensor[Double](5).apply1(e => Random.nextDouble()) val input = new Table() input(1.toDouble) = input1 input(2.toDouble) = input2 val start = System.nanoTime() val output = module.forward(input) val gradInput = module.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.CMinTable()\n" + "output = module:forward(input)\n" + "gradInput = module:backward(input,gradOutput)\n" val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be (output) luaOutput2 should be (gradInput) println("Test case : CMinTable, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 24
Source File: MixtureTableSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.integration.torch import com.intel.analytics.bigdl.nn.MixtureTable import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.utils.Table import scala.collection.mutable.HashMap import scala.util.Random @com.intel.analytics.bigdl.tags.Serial class MixtureTableSpec extends TorchSpec { "A MixtureTable " should "generate correct output and grad with table expertInput" in { torchCheck() val mse = new MixtureTable[Double] val expertInput = Tensor[Double](5, 3, 6).apply1(e => Random.nextDouble()) val expertTable = new Table() expertTable(1.0) = Tensor[Double](5, 6).apply1(e => Random.nextDouble()) expertTable(2.0) = Tensor[Double](5, 6).apply1(e => Random.nextDouble()) expertTable(3.0) = Tensor[Double](5, 6).apply1(e => Random.nextDouble()) val input1 = Tensor[Double](5, 3).apply1(e => Random.nextDouble()) val gradOutput = Tensor[Double](5, 6).apply1(e => Random.nextDouble()) val input = new Table() input(1.0) = input1 input(2.0) = expertTable val start = System.nanoTime() val output = mse.forward(input) val gradInput = mse.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start val code = "mse = nn.MixtureTable()\n" + "input = {input1, expertTable}\n" + "output = mse:forward(input)\n" + "gradInput = mse:backward(input,gradOutput)" val (luaTime, torchResult) = TH.run(code, Map("input1" -> input1, "expertTable" -> expertTable, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] output should be (luaOutput1) luaOutput2 should be (gradInput) println("Test case : MixtureTable, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } "A MixtureTable " should "generate correct output and grad with tensor expertInput" in { torchCheck() val mse = new MixtureTable[Double] val expertInput = Tensor[Double](5, 3, 6).apply1(e => Random.nextDouble()) val input1 = Tensor[Double](5, 3).apply1(e => Random.nextDouble()) val gradOutput = Tensor[Double](5, 6).apply1(e => Random.nextDouble()) val input = new Table() input(1.0) = input1 input(2.0) = expertInput val code = "mse = nn.MixtureTable()\n" + "output = mse:forward(input)\n" + "gradInput = mse:backward(input,gradOutput)\n" + "size = mse.size\n" + "dim = mse.dim" val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "gradOutput" -> gradOutput), Array("output", "gradInput", "size", "dim")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] val start = System.nanoTime() val output = mse.forward(input) val gradInput = mse.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start output should be (luaOutput1) gradInput should be (luaOutput2) println("Test case : MixtureTable, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 25
Source File: IndexSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.integration.torch import com.intel.analytics.bigdl.nn.Index import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.utils.RandomGenerator._ import com.intel.analytics.bigdl.utils.Table import scala.collection.mutable.HashMap import scala.util.Random @com.intel.analytics.bigdl.tags.Serial class IndexSpec extends TorchSpec { "A Index " should "generate correct output and grad with one dimension" in { torchCheck() val seed = 100 RNG.setSeed(seed) val input1 = Tensor[Double](3).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](4) input2(Array(1)) = 1 input2(Array(2)) = 2 input2(Array(3)) = 2 input2(Array(4)) = 3 val gradOutput = Tensor[Double](4).apply1(e => Random.nextDouble()) val input = new Table() input(1.toDouble) = input1 input(2.toDouble) = input2 val code = "torch.manualSeed(" + seed + ")\n" + "input = {input1, torch.LongTensor{1, 2, 2, 3}}\n" + "module = nn.Index(1)\n" + "output = module:forward(input)\n" + "gradInput = module:backward(input,gradOutput)\n" val (luaTime, torchResult) = TH.run(code, Map("input1" -> input1, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] val module = new Index[Double](1) val start = System.nanoTime() val output = module.forward(input) val gradInput = module.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start output should be(luaOutput1) luaOutput2 should be (gradInput) println("Test case : Index, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } "A Index " should "generate correct output and grad with two dimension" in { torchCheck() val seed = 100 RNG.setSeed(seed) val input1 = Tensor[Double](3, 3).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](4) input2(Array(1)) = 1 input2(Array(2)) = 2 input2(Array(3)) = 3 input2(Array(4)) = 1 val gradOutput = Tensor[Double](3, 4).apply1(e => Random.nextDouble()) val input = new Table() input(1.toDouble) = input1 input(2.toDouble) = input2 val code = "torch.manualSeed(" + seed + ")\n" + "input = {input1, torch.LongTensor{1, 2, 3, 1}}\n" + "module = nn.Index(2)\n" + "output = module:forward(input)\n" + "gradInput = module:backward(input,gradOutput)\n" val (luaTime, torchResult) = TH.run(code, Map("input1" -> input1, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] val module = new Index[Double](2) val start = System.nanoTime() val output = module.forward(input) val gradInput = module.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start output should be(luaOutput1) luaOutput2 should be (gradInput) println("Test case : Index, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 26
Source File: CSubTableSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.integration.torch import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.utils.RandomGenerator._ import com.intel.analytics.bigdl.utils.Table import com.intel.analytics.bigdl.nn.CSubTable import scala.collection.mutable.HashMap import scala.util.Random @com.intel.analytics.bigdl.tags.Serial class CSubTableSpec extends TorchSpec { "A CDivTable Module" should "generate correct output and grad" in { torchCheck() val seed = 100 RNG.setSeed(seed) val module = new CSubTable[Double]() val input1 = Tensor[Double](5).apply1(e => Random.nextDouble()) val input2 = Tensor[Double](5).apply1(e => Random.nextDouble()) val gradOutput = Tensor[Double](5).apply1(e => Random.nextDouble()) val input = new Table() input(1.toDouble) = input1 input(2.toDouble) = input2 val start = System.nanoTime() val output = module.forward(input) val gradInput = module.backward(input, gradOutput) val end = System.nanoTime() val scalaTime = end - start val code = "torch.manualSeed(" + seed + ")\n" + "module = nn.CSubTable()\n" + "output = module:forward(input)\n" + "gradInput = module:backward(input,gradOutput)" val (luaTime, torchResult) = TH.run(code, Map("input" -> input, "gradOutput" -> gradOutput), Array("output", "gradInput")) val luaOutput1 = torchResult("output").asInstanceOf[Tensor[Double]] val luaOutput2 = torchResult("gradInput").asInstanceOf[Table] luaOutput1 should be(output) luaOutput2 should be (gradInput) println("Test case : CSubTable, Torch : " + luaTime + " s, Scala : " + scalaTime / 1e9 + " s") } }
Example 27
Source File: KeyBinder.scala From slide-desktop with GNU General Public License v2.0 | 5 votes |
package gui import java.awt.event.{KeyEvent, KeyListener} import scala.collection.mutable.HashMap abstract class KeyBinder(val keyCodes: Int*) extends KeyListener { private val keyMap: HashMap[Int, Boolean] = new HashMap[Int, Boolean] override def keyTyped(e: KeyEvent): Unit = {} override def keyPressed(e: KeyEvent): Unit = { keyMap.put(e.getKeyCode, true) if (getKeysDown) onKeysDown() } override def keyReleased(e: KeyEvent): Unit = keyMap.remove(e.getKeyCode) private def getKeysDown: Boolean = { this.keyCodes.foreach(key => if (keyMap.contains(key)) { if (!keyMap.get(key).get) return false } else return false ) keyMap.clear() true } def onKeysDown(): Unit }
Example 28
Source File: TopElementsAggregator.scala From salt-core with Apache License 2.0 | 5 votes |
package software.uncharted.salt.core.analytic.collection import software.uncharted.salt.core.analytic.Aggregator import scala.collection.Map import scala.collection.mutable.HashMap import scala.collection.mutable.ListBuffer import scala.collection.mutable.{Map => MutableMap} import scala.collection.mutable.PriorityQueue import scala.reflect.ClassTag class TopElementsAggregator[ET: ClassTag](elementLimit: Int) extends Aggregator[Seq[ET], Map[ET, Int], List[(ET, Int)]] { def default(): Map[ET, Int] = { Map[ET, Int]() } override def add(current: Map[ET, Int], next: Option[Seq[ET]]): Map[ET, Int] = { if (next.isDefined) { // If our current map is mutable, add new data in directly. // If not, convert to a mutable map, and then add data in val sum = current match { case hm: MutableMap[ET, Int] => hm case _ => { // The current value isn't itself a mutable hashmap yet; convert to one. val hm = new HashMap[ET, Int]() hm ++= current hm } } next.get.foreach(t => sum.put(t, sum.getOrElse(t, 0) + 1)) sum } else { current } } override def merge(left: Map[ET, Int], right: Map[ET, Int]): Map[ET, Int] = { // If either input map is mutable, merge the other into it. // If neither is, convert one to mutable, and add the other into it. val (to, from) = left match { case hm: MutableMap[ET, Int] => (hm, right) case _ => right match { case hm: MutableMap[ET, Int] => (hm, left) case _ => val hm = new HashMap[ET, Int]() hm ++= left (hm, right) } } from.foreach(t => { to.put(t._1, to.getOrElse(t._1, 0) + t._2) }) to } override def finish(intermediate: Map[ET, Int]): List[(ET, Int)] = { val x = new PriorityQueue[(ET, Int)]()(Ordering.by( a => a._2 )) intermediate.foreach(t => { x.enqueue(t) }) var result = new ListBuffer[(ET, Int)] for (i <- 0 until Math.min(elementLimit, x.size)) { result.append(x.dequeue) } result.toList } }
Example 29
Source File: ConfManager.scala From HadoopLearning with MIT License | 5 votes |
package com.utils import java.util.regex.Pattern import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.common.serialization.StringDeserializer import scala.collection.mutable.HashMap /** * 描述 Spark Streaming 配置 * * @author liumm * @since 2018-07-27 20:27 */ object ConfManager { /** * 每次入库最大记录数量 */ val maxRecords = 1000 /** * 配置Kafka * * @param streamConf * @return */ def kafkaParam(streamConf: StreamConf): (Map[String, Object], Pattern) = { (getConsumerConfig(streamConf.brokers, streamConf.groupId), Pattern.compile(streamConf.topics)) } def kafkaParamForMetadata(streamConf: StreamConf): Map[String, String] = { val kafkaParams = new HashMap[String, String]() kafkaParams += (ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> streamConf.brokers) kafkaParams += ("metadata.broker.list" -> streamConf.brokers) kafkaParams += (ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "smallest") kafkaParams += (ConsumerConfig.GROUP_ID_CONFIG -> streamConf.groupId) kafkaParams.toMap } /** * 生成Kafka的Consumer配置信息 * * @return Kafka的Consumer配置信息 */ private def getConsumerConfig(brokers: String, groupId: String): Map[String, Object] = { val kafkaParams = new HashMap[String, Object]() kafkaParams += (ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokers) kafkaParams += (ConsumerConfig.GROUP_ID_CONFIG -> groupId) kafkaParams += (ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer]) kafkaParams += (ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer]) kafkaParams += (ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG -> new Integer(3 * 1024 * 1024)) kafkaParams += (ConsumerConfig.MAX_POLL_RECORDS_CONFIG -> new Integer(100)) kafkaParams += (ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "latest") //关闭kafka自动提交offset方式 kafkaParams += (ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG -> (false: java.lang.Boolean)) kafkaParams.toMap } def newStreamConf() = { val conf = new StreamConf() conf.zkUrl = "hdp01:2181" conf.brokers = "hdp01:9092" conf.groupId = "liumm_group" conf.topics = "i57_.*" conf } }
Example 30
Source File: GlobalPerformer.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import akka.actor.SupervisorStrategy.Restart import akka.actor.{Actor, ActorLogging, ActorRef, OneForOneStrategy, Props, Terminated} import akka.routing._ import play.api.libs.json.JsObject import scala.collection.mutable.HashMap import scala.concurrent.duration._ protected class GlobalPerformer(val orchestrationID: String, val orchestrationName: String, val globalPerformers: List[JsObject], val ensemblesSpec : List[JsObject]) extends Actor with ActorLogging{ val monitoring_actor = FEY_MONITOR.actorRef var global_metadata: Map[String, Performer] = Map.empty[String, Performer] override def receive: Receive = { case GlobalPerformer.PRINT_GLOBAL => context.actorSelection(s"*") ! FeyGenericActor.PRINT_PATH case Terminated(actor) => monitoring_actor ! Monitor.TERMINATE(actor.path.toString, Utils.getTimestamp) log.error(s"DEAD Global Performers ${actor.path.name}") context.children.foreach{ child => context.unwatch(child) context.stop(child) } throw new RestartGlobalPerformers(s"DEAD Global Performer ${actor.path.name}") case GetRoutees => //Discard case x => log.warning(s"Message $x not treated by Global Performers") } private def loadClazzFromJar(classPath: String, jarLocation: String, jarName: String):Class[FeyGenericActor] = { try { Utils.loadActorClassFromJar(jarLocation,classPath,jarName) }catch { case e: Exception => log.error(e,s"Could not load class $classPath from jar $jarLocation. Please, check the Jar repository path as well the jar name") throw e } } } object GlobalPerformer{ val activeGlobalPerformers:HashMap[String, Map[String, ActorRef]] = HashMap.empty[String, Map[String, ActorRef]] case object PRINT_GLOBAL }
Example 31
Source File: TokenAuthorizingInterceptor.scala From meteorite-core with Apache License 2.0 | 5 votes |
package bi.meteorite.core.security.authorization import java.lang.reflect.Method import org.apache.cxf.security.SecurityContext import java.util import scala.collection.mutable.HashMap import scala.collection.mutable.ListBuffer import scala.collection.JavaConversions._ import TokenAuthorizingInterceptor._ import scala.collection.JavaConverters._ object TokenAuthorizingInterceptor { private def parseRolesMap(rolesMap: Map[String, String]): scala.collection.mutable.HashMap[String, List[String]] = { val map = new scala.collection.mutable.HashMap[String, List[String]]() for ((key, value) <- rolesMap) { map.put(key, value.split(" ").toList) } map } } class TokenAuthorizingInterceptor(uniqueId: Boolean) extends TokenAbstractAutorizingInInterceptor(uniqueId) { private val methodRolesMap = new HashMap[String, List[String]]() private var userRolesMap = new scala.collection.mutable.HashMap[String, List[String]] private var globalRoles = new scala.collection.mutable.ListBuffer[String] private var checkConfiguredRolesOnly: Boolean = _ def this() { this(true) } protected override def isUserInRole(sc: SecurityContext, roles: util.List[String], deny: Boolean): Boolean = { if (!checkConfiguredRolesOnly && !super.isUserInRole(sc, roles, deny)) { return false } if (userRolesMap.nonEmpty) { val userRoles = userRolesMap.get(sc.getUserPrincipal.getName) if (userRoles == null) { return false } for (role <- roles if userRoles.get.contains(role)) { return true } false } else { !checkConfiguredRolesOnly } } private def createMethodSig(method: Method): String = { val b = new StringBuilder(method.getReturnType.getName) b.append(' ').append(method.getName).append('(') for (cls <- method.getParameterTypes) { b.append(cls.getName) } b.append(')') b.toString method.getName } protected override def getExpectedRoles(method: Method): util.List[String] = { var roles = methodRolesMap.get(createMethodSig(method)) if(roles.isEmpty) { roles = methodRolesMap.get(method.getName) } if(roles.isEmpty){ globalRoles.toList } else{ roles.get } } def setMethodRolesMap(rolesMap: java.util.Map[String, String]) = methodRolesMap.putAll(parseRolesMap(rolesMap.asScala.toMap)) def setUserRolesMap(rolesMap: java.util.Map[String, String]) = userRolesMap = parseRolesMap(rolesMap.asScala.toMap) def setGlobalRoles(roles: String) = globalRoles = roles.split(" ").to[ListBuffer] def setCheckConfiguredRolesOnly(checkConfiguredRolesOnly: Boolean) = this.checkConfiguredRolesOnly = checkConfiguredRolesOnly }
Example 32
Source File: KernelMatrix.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.nodes.learning import scala.collection.mutable.HashMap import scala.reflect.ClassTag import breeze.linalg._ import org.apache.spark.rdd.RDD import keystoneml.utils.{MatrixUtils, Stats} import keystoneml.workflow.{Transformer, LabelEstimator} class BlockKernelMatrix[T: ClassTag]( val kernelGen: KernelTransformer[T], val data: RDD[T], val cacheKernel: Boolean) extends KernelMatrix { val colBlockCache = HashMap.empty[Seq[Int], RDD[DenseMatrix[Double]]] val diagBlockCache = HashMap.empty[Seq[Int], DenseMatrix[Double]] def apply(colIdxs: Seq[Int]): RDD[DenseMatrix[Double]] = { if (colBlockCache.contains(colIdxs)) { colBlockCache(colIdxs) } else { val (kBlock, diagBlock) = kernelGen.computeKernel(data, colIdxs) if (cacheKernel) { colBlockCache += (colIdxs -> kBlock) diagBlockCache += (colIdxs -> diagBlock) } kBlock } } def unpersist(colIdxs: Seq[Int]): Unit = { if (colBlockCache.contains(colIdxs) && !cacheKernel) { colBlockCache(colIdxs).unpersist(true) } } def diagBlock(idxs: Seq[Int]): DenseMatrix[Double] = { if (!diagBlockCache.contains(idxs)) { val (kBlock, diagBlock) = kernelGen.computeKernel(data, idxs) if (cacheKernel) { colBlockCache += (idxs -> kBlock) diagBlockCache += (idxs -> diagBlock) } diagBlock } else { diagBlockCache(idxs) } } }
Example 33
Source File: ParameterTest.scala From maha with Apache License 2.0 | 5 votes |
// Copyright 2017, Yahoo Holdings Inc. // Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms. package com.yahoo.maha.core.request import com.yahoo.maha.core.request.ReportFormatType.CSVFormat import com.yahoo.maha.core.{Engine, HiveEngine} import org.json4s._ import org.json4s.jackson.JsonMethods import org.scalatest.{FunSuite, Matchers} import scala.collection.mutable.HashMap class ParameterTest extends FunSuite with Matchers { test("SerializeParameters should serialize a map of parameters into a List") { val map_parameters = new HashMap[Parameter, ParameterValue[_]] map_parameters.put(Parameter.ReportFormat, ReportFormatValue(ReportFormatType.CSVFormat)) map_parameters.put(Parameter.DryRun, DryRunValue(false)) map_parameters.put(Parameter.GeneratedQuery, GeneratedQueryValue("GeneratedQuery")) map_parameters.put(Parameter.QueryEngine, QueryEngineValue(Engine.from("hive").get)) map_parameters.put(Parameter.Debug, DebugValue(true)) map_parameters.put(Parameter.RequestId, RequestIdValue("RequestId")) map_parameters.put(Parameter.UserId, UserIdValue("UserId")) map_parameters.put(Parameter.TimeZone, TimeZoneValue("TimeZone")) map_parameters.put(Parameter.Schema, SchemaValue("Schema")) map_parameters.put(Parameter.Distinct, DistinctValue(true)) map_parameters.put(Parameter.JobName, JobNameValue("tools_1")) map_parameters.put(Parameter.RegistryName, RegistryNameValue("mahaRegistry")) map_parameters.put(Parameter.HostName, HostNameValue("127.0.0.1")) val result = Parameter.serializeParameters(map_parameters.toMap) result.length shouldBe map_parameters.size val newMap = result.map(t=> t._1 -> t._2).toMap for((k,v) <- map_parameters) { newMap.get(k.entryName).get match{ case JString(x) => v.value match { case CSVFormat => x shouldBe "csv" case HiveEngine => x shouldBe "Hive" case _ => x shouldBe v.value } case JBool(x) => x shouldBe v.value case _ => fail } } } test("DeserializeParameters should deserialize a JSON into a Map of parameter values") { val inputJson= """ |{ | "Report-Format": "csv", | "Dry-Run": false, | "Generated-Query": "Generated-Query", | "Query-Engine": "oracle", | "debug": true, | "Request-Id": "Request-Id", | "User-Id": "User-Id", | "TimeZone": "TimeZone", | "Schema": "Schema", | "Distinct": true, | "Job-Name": "Job-Name", | "RegistryName": "mahaRegistry", | "HostName": "127.0.0.1" |} |""".stripMargin val result = Parameter.deserializeParameters(JsonMethods.parse(inputJson)) result.getOrElse() match{ case m: Map[Parameter, ParameterValue[_]] => { m.size shouldBe 13 m.get(Parameter.ReportFormat).get shouldBe ReportFormatValue(ReportFormatType.CSVFormat) m.get(Parameter.DryRun).get shouldBe DryRunValue(false) m.get(Parameter.GeneratedQuery).get shouldBe GeneratedQueryValue("Generated-Query") m.get(Parameter.QueryEngine).get shouldBe QueryEngineValue(Engine.from("oracle").get) m.get(Parameter.Debug).get shouldBe DebugValue(true) m.get(Parameter.RequestId).get shouldBe RequestIdValue("Request-Id") m.get(Parameter.UserId).get shouldBe UserIdValue("User-Id") m.get(Parameter.TimeZone).get shouldBe TimeZoneValue("TimeZone") m.get(Parameter.Schema).get shouldBe SchemaValue("Schema") m.get(Parameter.Distinct).get shouldBe DistinctValue(true) m.get(Parameter.JobName).get shouldBe JobNameValue("Job-Name") m.get(Parameter.RegistryName).get shouldBe RegistryNameValue("mahaRegistry") m.get(Parameter.HostName).get shouldBe HostNameValue("127.0.0.1") } case _ => fail } } }
Example 34
Source File: depgraph.scala From sbt-blockade with Apache License 2.0 | 5 votes |
//: ---------------------------------------------------------------------------- //: Copyright 2015 Johannes Rudolph //: //: Distributed under the Apache 2.0 License, please see the NOTICE //: file in the root of the project for further details. //: ---------------------------------------------------------------------------- package verizon.build object depgraph { import java.io.File import sbt._ import scala.collection.mutable.{HashMap, MultiMap, Set} import scala.language.reflectiveCalls object SbtUpdateReport { type OrganizationArtifactReport = { def modules: Seq[ModuleReport] } def fromConfigurationReport(report: ConfigurationReport, rootInfo: sbt.ModuleID): ModuleGraph = { implicit def id(sbtId: sbt.ModuleID): ModuleId = ModuleId(sbtId.organization, sbtId.name, sbtId.revision) def moduleEdges(orgArt: OrganizationArtifactReport): Seq[(Module, Seq[Edge])] = { val chosenVersion = orgArt.modules.find(!_.evicted).map(_.module.revision) orgArt.modules.map(moduleEdge(chosenVersion)) } def moduleEdge(chosenVersion: Option[String])(report: ModuleReport): (Module, Seq[Edge]) = { val evictedByVersion = if (report.evicted) chosenVersion else None val jarFile = report.artifacts.find(_._1.`type` == "jar").orElse(report.artifacts.find(_._1.extension == "jar")).map(_._2) (Module( id = report.module, license = report.licenses.headOption.map(_._1), evictedByVersion = evictedByVersion, jarFile = jarFile, error = report.problem ), report.callers.map(caller ⇒ Edge(caller.caller, report.module))) } val (nodes, edges) = report.details.flatMap(moduleEdges).unzip val root = Module(rootInfo) ModuleGraph(root +: nodes, edges.flatten) } } type Edge = (ModuleId, ModuleId) def Edge(from: ModuleId, to: ModuleId): Edge = from -> to case class ModuleId(organisation: String, name: String, version: String) { def idString: String = organisation + ":" + name + ":" + version } case class Module(id: ModuleId, license: Option[String] = None, extraInfo: String = "", evictedByVersion: Option[String] = None, jarFile: Option[File] = None, error: Option[String] = None) { def hadError: Boolean = error.isDefined def isUsed: Boolean = !isEvicted def isEvicted: Boolean = evictedByVersion.isDefined } case class ModuleGraph(nodes: Seq[Module], edges: Seq[Edge]) { lazy val modules: Map[ModuleId, Module] = nodes.map(n ⇒ (n.id, n)).toMap def module(id: ModuleId): Module = modules(id) lazy val dependencyMap: Map[ModuleId, Seq[Module]] = createMap(identity) lazy val reverseDependencyMap: Map[ModuleId, Seq[Module]] = createMap { case (a, b) ⇒ (b, a) } def createMap(bindingFor: ((ModuleId, ModuleId)) ⇒ (ModuleId, ModuleId)): Map[ModuleId, Seq[Module]] = { val m = new HashMap[ModuleId, Set[Module]] with MultiMap[ModuleId, Module] edges.foreach { entry ⇒ val (f, t) = bindingFor(entry) m.addBinding(f, module(t)) } m.toMap.mapValues(_.toSeq.sortBy(_.id.idString)).withDefaultValue(Nil) } def roots: Seq[Module] = nodes.filter(n ⇒ !edges.exists(_._2 == n.id)).sortBy(_.id.idString) def isEmpty: Boolean = nodes.isEmpty } }
Example 35
Source File: Checksum.scala From schedoscope with Apache License 2.0 | 5 votes |
package org.schedoscope.dsl.transformations import java.security.MessageDigest import org.apache.hadoop.fs.{FileStatus, Path} import org.schedoscope.Schedoscope import org.schedoscope.scheduler.driver.FilesystemDriver._ import scala.Array.canBuildFrom import scala.collection.mutable.HashMap object Checksum { private def md5 = MessageDigest.getInstance("MD5") private def listFiles(path: String): Array[FileStatus] = { val files = fileSystem(path, Schedoscope.settings.hadoopConf).globStatus(new Path(path)) if (files != null) files else Array() } private def fileChecksum(path: String) = if (path == null) "null-checksum" else if (path.endsWith(".jar")) path else try { val cs = fileSystem(path, Schedoscope.settings.hadoopConf).getFileChecksum(new Path(path)) if (cs == null) path else cs.toString() } catch { case _: Throwable => path } def fileChecksums(paths: List[String], recursive: Boolean): List[String] = paths.flatMap(path => { if (fileSystem(path, Schedoscope.settings.hadoopConf).isFile(new Path(path))) List(fileChecksum(path)) else if (recursive) fileChecksums(listFiles(path + "/*").map(f => f.getPath.toString()).toList, recursive) else List() }).sorted val resourceHashCache = new HashMap[List[String], List[String]]() def resourceHashes(resources: List[String]): List[String] = synchronized { resourceHashCache.getOrElseUpdate(resources, fileChecksums(resources, true)) } val defaultDigest = "0" def digest(stringsToDigest: String*): String = if (stringsToDigest.isEmpty) defaultDigest else md5.digest(stringsToDigest.sorted.mkString.toCharArray().map(_.toByte)).map("%02X" format _).mkString object SchemaChecksum { val checksumProperty = "schema.checksum" } object TransformationChecksum { val checksumProperty = "transformation.checksum" val timestampProperty = "transformation.timestamp" } }
Example 36
Source File: BackOffSupervision.scala From schedoscope with Apache License 2.0 | 5 votes |
package org.schedoscope.scheduler.utils import akka.actor.{ActorRef, ActorSystem} import org.slf4j.LoggerFactory import scala.collection.mutable.HashMap import scala.concurrent.duration.{FiniteDuration, _} def manageActorLifecycle(managedActor: ActorRef, backOffSlotTime: FiniteDuration = null, backOffMinimumDelay: FiniteDuration = null): FiniteDuration = { val managedActorName = managedActor.path.toStringWithoutAddress if (actorBackOffWaitTime.contains(managedActorName)) { val newBackOff = actorBackOffWaitTime(managedActorName).nextBackOff actorBackOffWaitTime.put(managedActorName, newBackOff) log.warn(s"$managerName: Set new back-off waiting " + s"time to value ${newBackOff.backOffWaitTime} for rebooted actor ${managedActorName}; " + s"(retries=${newBackOff.retries}, resets=${newBackOff.resets}, total-retries=${newBackOff.totalRetries})") //schedule tick response based on backoff newBackOff.backOffWaitTime } else { val backOff = ExponentialBackOff(backOffSlotTime = backOffSlotTime, constantDelay = backOffMinimumDelay) log.debug(s"$managerName: Set initial back-off waiting " + s"time to value ${backOff.backOffWaitTime} for booted actor ${managedActorName}; " + s"(retries=${backOff.retries}, resets=${backOff.resets}, total-retries=${backOff.totalRetries})") actorBackOffWaitTime.put(managedActorName, backOff) //schedule immediate tick response 0 millis } } }
Example 37
Source File: Database.scala From schedoscope with Apache License 2.0 | 5 votes |
package org.schedoscope.test import java.sql.{Connection, ResultSet, Statement} import org.schedoscope.dsl.{FieldLike, View} import org.schedoscope.schema.ddl.HiveQl import scala.collection.mutable.{HashMap, ListBuffer} class Database(conn: Connection, url: String) { def selectForViewByQuery(v: View, query: String, orderByField: Option[FieldLike[_]]): List[Map[String, Any]] = { val res = ListBuffer[Map[String, Any]]() var statement: Statement = null var rs: ResultSet = null try { statement = conn.createStatement() rs = statement.executeQuery(query) while (rs.next()) { val row = HashMap[String, Any]() v.fields.view.zipWithIndex.foreach(f => { row.put(f._1.n, ViewSerDe.deserializeField(f._1.t, rs.getString(f._2 + 1))) }) res.append(row.toMap) } } finally { if (rs != null) try { rs.close() } catch { case _: Throwable => } if (statement != null) try { statement.close() } catch { case _: Throwable => } } orderByField match { case Some(f) => res.sortBy { _ (f.n) match { case null => "" case other => other.toString } } toList case None => res.toList } } def selectView(v: View, orderByField: Option[FieldLike[_]]): List[Map[String, Any]] = selectForViewByQuery(v, HiveQl.selectAll(v), orderByField) }
Example 38
Source File: AETest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.unsupervised.dl.autoencoder import org.scalaml.{Logging, Resource} import org.scalaml.Predef.DblVec import org.scalaml.trading.GoogleFinancials.close import org.scalaml.workflow.data.DataSource import org.scalatest.{FlatSpec, Matchers} final class AETest extends FlatSpec with Matchers with Logging with Resource { protected val name: String = "Auto-Encoder" it should s"$name single hidden layer" in { show( "Single hidden layer") val REL_PATH = "unsupervised/ae/" val ALPHA = 0.8 val ETA = 0.05 val NUM_EPOCHS = 2500 val EPS = 1e-6 val THRESHOLD = 0.25 val LAMBDA = 0.18 val BETA = 0.3 val symbols = Array[String]( "FXE", "FXA", "SPY", "GLD", "FXB", "FXF", "FXC", "FXY", "CYB" ) val STUDIES = List[Array[String]]( Array[String]("FXY", "FXC", "GLD", "FXA"), Array[String]("FXE", "FXF", "FXB", "CYB"), Array[String]("FXE", "FXC", "GLD", "FXA", "FXY", "FXB"), Array[String]("FXC", "FXY", "FXA"), Array[String]("CYB", "GLD", "FXY"), symbols ) def index: Map[String, Int] = { import scala.collection.mutable.HashMap symbols.zipWithIndex./:(HashMap[String, Int]())((mp, si) => mp += ((si._1, si._2))).toMap } val path: String = getPath(REL_PATH).getOrElse(".") val prices = symbols.map(s => DataSource(s"$path$s.csv", true, true, 1)) .map( _.flatMap(_.get(close))).filter(_.isSuccess).map(_.get) val config = AEConfig(ALPHA, ETA, LAMBDA, BETA, NUM_EPOCHS, EPS) val obs = symbols.flatMap( index.get(_)).map(prices(_).toArray) val xv = obs.tail.transpose.dropRight(1) val ae = AE(config, 8, xv.toVector) ae.model match { case Some(aeModel) => if(aeModel.synapses.nonEmpty) { val inputSynapse = aeModel.synapses.head show(s"$name output synapse(0)(0) ${inputSynapse(0)(0)}") show(s"$name output synapse(0)(1) ${inputSynapse(0)(1)}") show(s"$name output synapse(1)(0) ${inputSynapse(1)(0)}") show(s"$name output synapse(1)(1) ${inputSynapse(1)(1)}") } else fail(s"$name Model weights with improper size") case None => fail(s"$name could not generate a model") } } } // --------------------------------- EOF ----------------------------------------------------------------------------
Example 39
Source File: ParallelismTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.scalability.scala import org.scalaml.Logging import org.scalatest.{FlatSpec, Matchers} final class ParallelismTest extends FlatSpec with Matchers with Logging { import scala.collection.mutable.HashMap import scala.collection.parallel.mutable.{ParArray, ParHashMap} import scala.util.Random protected[this] val name: String = "Scala parallel collections" final private val SZ = 100000 final private val NUM_TASKS = 8 final private val evalRange = Range(1, NUM_TASKS) final private val TIMES = 20 // Arbitrary map function final val mapF = (x: Double) => Math.sin(x * 0.01) + Math.exp(-x) // Arbitrary filter function final val filterF = (x: Double) => x > 0.8 // Arbitrary reduce function final val reduceF = (x: Double, y: Double) => (x + y) * x it should s"$name: arrays" in { show(s"Evaluation of arrays") // Generate random vector for both the non-parallel and parallel array val data = Array.fill(SZ)(Random.nextDouble) val pData = ParArray.fill(SZ)(Random.nextDouble) // Initialized and execute the benchmark for the parallel array val benchmark = new ParallelArray[Double](data, pData, TIMES) val ratios = new Array[Double](NUM_TASKS) evalRange.foreach(n => ratios.update(n, benchmark.map(mapF)(n))) val resultMap = ratios.tail resultMap.sum / resultMap.size < 1.0 should be(true) display(resultMap, "ParArray.map") evalRange.foreach(n => ratios.update(n, benchmark.filter(filterF)(n))) val resultfilter = ratios.tail resultfilter.sum / resultfilter.size < 1.0 should be(true) display(resultfilter, "ParArray.filter") } it should s"$name: maps" in { show("Evaluation of maps") val mapData = new HashMap[Int, Double] Range(0, SZ).foreach(n => mapData.put(n, Random.nextDouble)) val parMapData = new ParHashMap[Int, Double] Range(0, SZ).foreach(n => parMapData.put(n, Random.nextDouble)) // Initialized and execute the benchmark for the parallel map val benchmark = new ParallelMap[Double](mapData.toMap, parMapData, TIMES) val ratios = new Array[Double](NUM_TASKS) evalRange.foreach(n => ratios.update(n, benchmark.map(mapF)(n))) val resultMap = ratios.tail resultMap.sum / resultMap.size < 1.0 should be(true) display(resultMap, "ParMap.map") evalRange.foreach(n => ratios.update(n, benchmark.filter(filterF)(n))) val resultfilter = ratios.tail resultfilter.sum / resultfilter.size < 1.0 should be(true) } private def display(x: Array[Double], label: String): Unit = { import org.scalaml.plots.{Legend, LightPlotTheme, LinePlot} val labels = Legend( name, "Scala parallel collections", s"Scala parallel computation for $label", "Relative timing" ) LinePlot.display(x.toVector, labels, new LightPlotTheme) } } // ------------------------------------------- EOF --------------------------------------------------
Example 40
Source File: OrderedClustering.scala From nn_coref with GNU General Public License v3.0 | 5 votes |
package edu.berkeley.nlp.coref import scala.collection.mutable.HashMap import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer class OrderedClustering(val clusters: Seq[Seq[Int]]) { // Elements must be consecutive integers from 0 up to n private val allIndicesSorted = clusters.foldLeft(new ArrayBuffer[Int])(_ ++ _).sorted; require(allIndicesSorted.sameElements((0 until allIndicesSorted.size).toSeq), allIndicesSorted); private val mentionToClusterMap = new HashMap[Int,Seq[Int]]; for (cluster <- clusters) { for (i <- cluster) { mentionToClusterMap.put(i, cluster); } } def getCluster(idx: Int) = mentionToClusterMap(idx); def isSingleton(idx: Int) = mentionToClusterMap(idx).size == 1; def startsCluster(idx: Int) = mentionToClusterMap(idx)(0) == idx; def areInSameCluster(idx1: Int, idx2: Int) = mentionToClusterMap(idx1).contains(idx2); def getImmediateAntecedent(idx: Int) = { val cluster = mentionToClusterMap(idx); val mentIdxInCluster = cluster.indexOf(idx); if (mentIdxInCluster == 0) { -1 } else { cluster(mentIdxInCluster - 1); } } def getAllAntecedents(idx: Int) = { val cluster = mentionToClusterMap(idx); cluster.slice(0, cluster.indexOf(idx)); } def getAllConsequents(idx: Int) = { val cluster = mentionToClusterMap(idx); cluster.slice(cluster.indexOf(idx) + 1, cluster.size); } // Needed for output printing def getClusterIdx(idx: Int) = { var clusterIdx = 0; for (i <- 0 until clusters.size) { if (clusters(i).sameElements(mentionToClusterMap(idx))) { clusterIdx = i; } } clusterIdx; } def getSubclustering(mentIdxsToKeep: Seq[Int]): OrderedClustering = { val oldIndicesToNewIndicesMap = new HashMap[Int,Int](); (0 until mentIdxsToKeep.size).map(i => oldIndicesToNewIndicesMap.put(mentIdxsToKeep(i), i)); val filteredConvertedClusters = clusters.map(cluster => cluster.filter(mentIdxsToKeep.contains(_)).map(mentIdx => oldIndicesToNewIndicesMap(mentIdx))); val filteredConvertedClustersNoEmpties = filteredConvertedClusters.filter(cluster => !cluster.isEmpty); new OrderedClustering(filteredConvertedClustersNoEmpties); } } object OrderedClustering { def createFromClusterIds(clusterIds: Seq[Int]) = { val mentIdAndClusterId = (0 until clusterIds.size).map(i => (i, clusterIds(i))); val clustersUnsorted = mentIdAndClusterId.groupBy(_._2).values; val finalClusters = clustersUnsorted.toSeq.sortBy(_.head).map(clusterWithClusterId => clusterWithClusterId.map(_._1)); new OrderedClustering(finalClusters.toSeq); } def createFromBackpointers(backpointers: Seq[Int]) = { var nextClusterID = 0; val clusters = new ArrayBuffer[ArrayBuffer[Int]](); val mentionToCluster = new HashMap[Int,ArrayBuffer[Int]](); for (i <- 0 until backpointers.size) { if (backpointers(i) == i) { val cluster = ArrayBuffer(i); clusters += cluster; mentionToCluster.put(i, cluster); } else { val cluster = mentionToCluster(backpointers(i)); cluster += i; mentionToCluster.put(i, cluster); } } new OrderedClustering(clusters); } }
Example 41
Source File: FeatureIndexer.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.ml @SerialVersionUID(1L) class HashedFeatureIndexer[Feature] private( val maxFeatureSize: Int, val hasher: (Feature => Int)) extends FeatureIndexer[Feature] { def size = maxFeatureSize def getIndex(key: Feature) = (math.abs(hasher(key)) % maxFeatureSize) } object HashedFeatureIndexer { def apply[Feature]( maxFeatureSize: Int = (2 << 23), hasher: (Feature => Int) = {f: Feature => f.hashCode()}) = { val biggestPrimeBelow = primes.takeWhile(maxFeatureSize > _).last new HashedFeatureIndexer[Feature](biggestPrimeBelow, hasher) } private lazy val primes = 2 #:: sieve(3) private def sieve(n: Int): Stream[Int] = if (primes.takeWhile(p => p*p <= n).exists(n % _ == 0)) sieve(n + 2) else n #:: sieve(n + 2) }
Example 42
Source File: OutputCategoryList.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg import java.io.FileWriter import scala.collection.mutable.ArrayBuffer import scala.sys.process.Process import scala.collection.mutable.HashMap import lexicon._ import breeze.config.CommandLineParser object OutputCategoryList { case class Params( bank: Opts.BankInfo, dict: Opts.DictParams ) case class CategoryInfo(sentence: GoldSuperTaggedSentence, position: Int, num: Int = 1) { def increment(): CategoryInfo = this.copy(num = num + 1) def replace(_sentence: GoldSuperTaggedSentence, _p: Int) = CategoryInfo(_sentence, _p, num + 1) } def main(args:Array[String]) = { val params = CommandLineParser.readIn[Params](args) val dict = new JapaneseDictionary(params.dict.categoryDictinoary) val bank = CCGBank.select(params.bank, dict) val trainSentences: Array[GoldSuperTaggedSentence] = bank.trainSentences val stats = new HashMap[Category, CategoryInfo] trainSentences foreach { sentence => (0 until sentence.size) foreach { i => val cat = sentence.cat(i) stats.get(cat) match { case Some(info) => if (sentence.size > info.sentence.size) stats += ((cat, info.replace(sentence, i))) else stats += ((cat, info.increment())) case None => stats += ((cat, CategoryInfo(sentence, i))) case _ => } } } def highlight(sentence: Sentence, i: Int) = { val tokens = sentence.wordSeq // tokens.take(i).mkString("") + s"\\x1b[1;31m{${tokens(i)}}\\x1b[0m" + tokens.drop(i+1).mkString("") tokens.slice(i-5, i).mkString("") + s"[01;31m${tokens(i)}[00m" + tokens.slice(i+1, i+6).mkString("") } var fw = new FileWriter("./category.lst") stats.toSeq.sortBy(_._2.num).reverse.foreach { case (cat, CategoryInfo(sentence, i, num)) => fw.write("%s\t%s\t%s\t%s\n" .format(num, cat, sentence.pos(i), highlight(sentence, i))) } fw.flush fw.close val noFeatureCategories = new HashMap[String, CategoryInfo] stats foreach { case (cat, CategoryInfo(sentence, i, numWithFeat)) => val noFeature = cat.toStringNoFeature noFeatureCategories.get(noFeature) match { case Some(exist) => val newNum = numWithFeat + exist.num val newInfo = exist.copy(num = newNum) noFeatureCategories += (noFeature -> newInfo) case None => noFeatureCategories += (noFeature -> CategoryInfo(sentence, i, numWithFeat)) case _ => } } fw = new FileWriter("./category.nofeature.lst") noFeatureCategories.toSeq.sortBy(_._2.num).reverse.foreach { case (cat, CategoryInfo(sentence, i, num)) => fw.write("%s\t%s\t%s\t%s\n" .format(num, cat, sentence.pos(i), highlight(sentence, i))) } fw.flush fw.close } }
Example 43
Source File: HeadFinder.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg.parser import scala.collection.mutable.HashMap import jigg.nlp.ccg.lexicon.{PoS, JapanesePoS, Category} import jigg.nlp.ccg.lexicon.Direction._ trait HeadFinder extends Serializable { type NodeInfo = HeadFinder.NodeInfo def get(left:NodeInfo, right:NodeInfo): Direction } object HeadFinder { case class NodeInfo(pos:PoS, category:Category, headCategory:Category) } case class EnglishHeadFinder(children2dir: Map[(Int, Int), Direction]) extends HeadFinder { def get(left:NodeInfo, right:NodeInfo) = children2dir.get(left.category.id, right.category.id) match { case Some(dir) => dir case _ => Left } } object EnglishHeadFinder { import jigg.nlp.ccg.lexicon.{ParseTree, NodeLabel, BinaryTree, NonterminalLabel} def createFromParseTrees(trees: Seq[ParseTree[NodeLabel]]): EnglishHeadFinder = { val map = new HashMap[(Int, Int), Direction] trees.foreach { _.foreachTree { _ match { case BinaryTree(left, right, NonterminalLabel(dir, _, _)) => map += (left.label.category.id, right.label.category.id) -> dir case _ => }}} EnglishHeadFinder(map.toMap) } } object JapaneseHeadFinder extends HeadFinder { val Symbol = "記号" def get(left:NodeInfo, right:NodeInfo) = { val leftPos = left.pos.first.v val rightPos = right.pos.first.v if (rightPos == Symbol) Left else Right } }
Example 44
Source File: Rule.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg.parser import jigg.nlp.ccg.lexicon.{Category, Derivation, Point, UnaryChildPoint, BinaryChildrenPoints, AppliedRule} import scala.collection.mutable.{HashMap, HashSet} import java.io.{ObjectOutputStream, ObjectInputStream} trait Rule { def unify(left:Category, right:Category): Option[Array[(Category, String)]] def raise(child:Category): Option[Array[(Category, String)]] def headFinder:HeadFinder } // rules are restricted to CFG rules extracted from the training CCGBank case class CFGRule(val binaryRules:Map[(Int,Int), Array[(Category, String)]], // category ids -> (category, ruleType) val unaryRules:Map[Int, Array[(Category, String)]], override val headFinder:HeadFinder) extends Rule { def unify(left:Category, right:Category):Option[Array[(Category, String)]] = binaryRules.get((left.id, right.id)) def raise(child:Category):Option[Array[(Category, String)]] = unaryRules.get(child.id) } object CFGRule { def extractRulesFromDerivations(derivations: Array[Derivation], headFinder:HeadFinder): CFGRule = { val binaryRules = new HashMap[(Int, Int), HashSet[(Category, String)]] val unaryRules = new HashMap[Int, HashSet[(Category, String)]] derivations.foreach { deriv => deriv.foreachPoint({ point:Point => deriv.get(point) match { case Some(AppliedRule(UnaryChildPoint(child), ruleType)) => val parents = unaryRules.getOrElseUpdate(child.category.id, new HashSet[(Category, String)]) parents += ((point.category, ruleType)) case Some(AppliedRule(BinaryChildrenPoints(left, right), ruleType)) => val parents = binaryRules.getOrElseUpdate((left.category.id, right.category.id), new HashSet[(Category, String)]) parents += ((point.category, ruleType)) case _ => }}) } new CFGRule(binaryRules.map { case (k, v) => k -> v.toArray }.toMap, unaryRules.map { case (k, v) => k -> v.toArray }.toMap, headFinder) } }
Example 45
Source File: SuperTaggerModel.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg import tagger.{LF=>Feature, MaxEntMultiTagger, MaxEntMultiTaggerTrainer, FeatureExtractors} import lexicon._ import jigg.ml._ import scala.collection.mutable.HashMap case class SuperTaggerModel( dict: Dictionary, featureMap: HashMap[Feature, Int], weights: WeightVec, extractors: FeatureExtractors) { self => def reduceFeatures(): SuperTaggerModel = { val buffer = weights.asInstanceOf[GrowableWeightVector[Float]].array // 0 1.0 2.0 0 0 1.0 ... val activeIdxs = buffer.zipWithIndex filter (_._1 != 0) map (_._2) // 1 2 5 println(s"# features reduced from ${buffer.size} to ${activeIdxs.size}") val idxMap = activeIdxs.zipWithIndex.toMap // {1->0, 2->1 5->2} val newFeatureMap = featureMap collect { case (f, oldIdx) if idxMap.isDefinedAt(oldIdx) => (f, idxMap(oldIdx)) } val newWeights = new FixedWeightVector[Float](activeIdxs.map(buffer).toArray) this copy (featureMap = newFeatureMap, weights = newWeights) } def mkMultiTaggerTrainer(classifierTrainer: OnlineLogLinearTrainer[Int]) = new MaxEntMultiTaggerTrainer(mkIndexer(), extractors, classifierTrainer, dict) def mkMultiTagger() = new MaxEntMultiTagger(mkIndexer(), extractors, mkClassifier(), dict) def mkClassifier() = new LogLinearClassifier[Int] { override val weights = self.weights } private def mkIndexer() = new ExactFeatureIndexer(featureMap) } object SuperTaggerModel { def saveTo(path: String, model: SuperTaggerModel) = { System.err.println("Saving tagger model to " + path) val os = jigg.util.IOUtil.openBinOut(path) os.writeObject(model) os.close } def loadFrom(path: String): SuperTaggerModel = { jigg.util.LogUtil.track("Loading supertagger model ...") { val in = jigg.util.IOUtil.openBinIn(path) val model = in.readObject.asInstanceOf[SuperTaggerModel] in.close model } } }
Example 46
Source File: CategoryManager.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg.lexicon import scala.collection.mutable.HashMap import scala.collection.mutable.ArrayBuffer class CategoryManager extends StringBaseNumberedManager[Category] with OptionReturner[Category] { override def createWithId(original:Category): Category = original match { case AtomicCategory(id, base, avm) => AtomicCategory(newId, base, avm) case ComplexCategory(id, left, right, slash) => val leftWithId = assignID(left) val rightWithId = assignID(right) ComplexCategory(newId, leftWithId, rightWithId, slash) } override def getOrNone(str:String): Option[Category] = str2objIndex.get(str) match { case Some(i) => Some(objects(i)) case None => canonicalMap.get(createCanonicalInstance(str)) } override def createCanonicalInstance(str:String): Category = JapaneseCategoryParser.parse(str) // This is used when candidate shift category is empty // It sometimes happen if for example, PoS not registered in the dictionary is detected. val unkCategory = getOrCreate("UNK") }
Example 47
Source File: CategoryDictionary.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg.lexicon import scala.collection.mutable.HashMap @SerialVersionUID(1L) sealed trait CategoryDictionary extends Serializable { type Key type UnkKey val categoryMap = new HashMap[Key, Array[Category]] val unkCategoryMap = new HashMap[UnkKey, Array[Category]] def key(word:Word, pos:PoS):Key def unkKey(pos:PoS):UnkKey def getCandidates(word:Word, pos:PoS):Array[Category] = categoryMap.get(key(word, pos)) match { case Some(categories) => categories case None => unkCategoryMap.get(unkKey(pos)) match { case Some(categories) => categories case None => Array[Category]() } } def registCandidates(word:Word, pos:PoS, candidates:Array[Category]) = key(word, pos) match { case k => categoryMap += (k -> (categoryMap.get(k) match { case Some(alreadyExist) => (candidates ++ alreadyExist).distinct case None => candidates.distinct })) } def registUnkCandiates(pos:PoS, candidates:Array[Category]) = unkKey(pos) match { case k => unkCategoryMap += (k -> (unkCategoryMap.get(k) match { case Some(alreadyExist) => (candidates ++ alreadyExist).distinct case None => candidates.distinct })) } def resetWithSentences(sentences: Seq[GoldSuperTaggedSentence], unkThreathold: Int) = { val counts = new HashMap[Key, Int] sentences foreach { sentence => (0 until sentence.size) foreach { i => val k = key(sentence.base(i), sentence.pos(i)) counts.getOrElseUpdate(k, 0) counts(k) += 1 }} sentences foreach { sentence => (0 until sentence.size) foreach { i => val k = key(sentence.base(i), sentence.pos(i)) if (counts(k) >= unkThreathold) registCandidates(sentence.base(i), sentence.pos(i), Array(sentence.cat(i))) registUnkCandiates(sentence.pos(i), Array(sentence.cat(i))) }} } } class Word2CategoryDictionary extends CategoryDictionary { type Key = Int type UnkKey = Int override def key(word:Word, pos:PoS) = word.id override def unkKey(pos:PoS) = pos.id } class WordPoS2CategoryDictionary extends CategoryDictionary { type Key = (Int, Int) type UnkKey = Int override def key(word:Word, pos:PoS) = (word.id, pos.id) override def unkKey(pos:PoS) = pos.id } class WordSecondFineTag2CategoryDictionary extends CategoryDictionary { override type Key = (Int, Int) override type UnkKey = Int override def key(word:Word, pos:PoS) = (word.id, pos.second.id) override def unkKey(pos:PoS) = pos.second.id } class WordSecondWithConj2CategoryDictionary extends CategoryDictionary { override type Key = (Int, Int) override type UnkKey = Int override def key(word:Word, pos:PoS) = (word.id, pos.secondWithConj.id) override def unkKey(pos:PoS) = pos.secondWithConj.id }
Example 48
Source File: CategoryFeature.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg.lexicon trait CategoryFeature { def kvs: Seq[(String, String)] def unify(lhs: CategoryFeature): Boolean = false // TODO: implement } @SerialVersionUID(-8236395926230742650L) case class JPCategoryFeature(values: Seq[String]) extends CategoryFeature { import JPCategoryFeature._ override def kvs = keys zip values override def toString = kvs.filter(_._2 != "").map { case (k, v) => k + "=" + v }.mkString(",") } object JPCategoryFeature { // This is a hard-coded mapping of feature structure of Japanese category. private val k2vals = Map( "mod" -> Array("adv", "adn", "nm"), "form" -> Array("attr", "base", "cont", "hyp", "imp", "beg", "stem", "ta", "te", "pre", "r", "neg", "s", "da"), "case" -> Array("ga", "o", "ni", "to", "nc", "caus"), "fin" -> Array("f", "t")) private val keys = k2vals.keys.toSeq private val v2keyIdx = { val key2idx = keys.zipWithIndex.toMap k2vals.flatMap { case (key, vals) => vals.map { v => v -> key2idx(key) } } } val kvpair = """\w+=(\w+)""".r def createFromValues(values: Seq[String]) = values match { case Seq() => emptyFeature case _ => val sortedValues = Array.fill(keys.size)("") values.filter(_!="").foreach { value => val v = value match { case kvpair(v) => v; case v => v } if (v(0) != 'X') v2keyIdx(v) match { case i => sortedValues(i) = v } } JPCategoryFeature(sortedValues) } // We cache this because most categories don't have a feature private val emptyFeature = JPCategoryFeature(Array.fill(keys.size)("")) } case class EnCategoryFeature(values: Seq[String]) extends CategoryFeature { override def kvs = values.zipWithIndex.map { case (v, k) => (k.toString, v) } override def toString = values.mkString(",") } object EnCategoryFeature { def createFromValues(values: Seq[String]) = EnCategoryFeature(values.sortWith(_ < _)) }
Example 49
Source File: LocalKMeans.scala From sparkoscope with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet import breeze.linalg.{squaredDistance, DenseVector, Vector} object LocalKMeans { val N = 1000 val R = 1000 // Scaling factor val D = 10 val K = 10 val convergeDist = 0.001 val rand = new Random(42) def generateData: Array[DenseVector[Double]] = { def generatePoint(i: Int): DenseVector[Double] = { DenseVector.fill(D) {rand.nextDouble * R} } Array.tabulate(N)(generatePoint) } def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = { var index = 0 var bestIndex = 0 var closest = Double.PositiveInfinity for (i <- 1 to centers.size) { val vCurr = centers.get(i).get val tempDist = squaredDistance(p, vCurr) if (tempDist < closest) { closest = tempDist bestIndex = i } } bestIndex } def showWarning() { System.err.println( """WARN: This is a naive implementation of KMeans Clustering and is given as an example! |Please use org.apache.spark.ml.clustering.KMeans |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val data = generateData var points = new HashSet[Vector[Double]] var kPoints = new HashMap[Int, Vector[Double]] var tempDist = 1.0 while (points.size < K) { points.add(data(rand.nextInt(N))) } val iter = points.iterator for (i <- 1 to points.size) { kPoints.put(i, iter.next()) } println("Initial centers: " + kPoints) while(tempDist > convergeDist) { var closest = data.map (p => (closestPoint(p, kPoints), (p, 1))) var mappings = closest.groupBy[Int] (x => x._1) var pointStats = mappings.map { pair => pair._2.reduceLeft [(Int, (Vector[Double], Int))] { case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2)) } } var newPoints = pointStats.map {mapping => (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))} tempDist = 0.0 for (mapping <- newPoints) { tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2) } for (newP <- newPoints) { kPoints.put(newP._1, newP._2) } } println("Final centers: " + kPoints) } } // scalastyle:on println
Example 50
Source File: JsonUtils.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kafka010 import scala.collection.mutable.HashMap import scala.util.control.NonFatal import org.apache.kafka.common.TopicPartition import org.json4s.NoTypeHints import org.json4s.jackson.Serialization def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = { val result = new HashMap[String, HashMap[Int, Long]]() implicit val ordering = new Ordering[TopicPartition] { override def compare(x: TopicPartition, y: TopicPartition): Int = { Ordering.Tuple2[String, Int].compare((x.topic, x.partition), (y.topic, y.partition)) } } val partitions = partitionOffsets.keySet.toSeq.sorted // sort for more determinism partitions.foreach { tp => val off = partitionOffsets(tp) val parts = result.getOrElse(tp.topic, new HashMap[Int, Long]) parts += tp.partition -> off result += tp.topic -> parts } Serialization.write(result) } }
Example 51
Source File: MasterWebUI.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.master.ui import scala.collection.mutable.HashMap import org.eclipse.jetty.servlet.ServletContextHandler import org.apache.spark.deploy.master.Master import org.apache.spark.internal.Logging import org.apache.spark.ui.{SparkUI, WebUI} import org.apache.spark.ui.JettyUtils._ def initialize() { val masterPage = new MasterPage(this) attachPage(new ApplicationPage(this)) attachPage(masterPage) attachHandler(createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR, "/static")) attachHandler(createRedirectHandler( "/app/kill", "/", masterPage.handleAppKillRequest, httpMethods = Set("POST"))) attachHandler(createRedirectHandler( "/driver/kill", "/", masterPage.handleDriverKillRequest, httpMethods = Set("POST"))) } def addProxyTargets(id: String, target: String): Unit = { var endTarget = target.stripSuffix("/") val handler = createProxyHandler("/proxy/" + id, endTarget) attachHandler(handler) proxyHandlers(id) = handler } def removeProxyTargets(id: String): Unit = { proxyHandlers.remove(id).foreach(detachHandler) } } private[master] object MasterWebUI { private val STATIC_RESOURCE_DIR = SparkUI.STATIC_RESOURCE_DIR }
Example 52
Source File: PoolTable.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import java.net.URLEncoder import scala.collection.mutable.HashMap import scala.xml.Node import org.apache.spark.scheduler.{Schedulable, StageInfo} import org.apache.spark.ui.UIUtils private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) { private val listener = parent.progressListener def toNodeSeq: Seq[Node] = { listener.synchronized { poolTable(poolRow, pools) } } private def poolTable( makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node], rows: Seq[Schedulable]): Seq[Node] = { <table class="table table-bordered table-striped table-condensed sortable table-fixed"> <thead> <th>Pool Name</th> <th>Minimum Share</th> <th>Pool Weight</th> <th>Active Stages</th> <th>Running Tasks</th> <th>SchedulingMode</th> </thead> <tbody> {rows.map(r => makeRow(r, listener.poolToActiveStages))} </tbody> </table> } private def poolRow( p: Schedulable, poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = { val activeStages = poolToActiveStages.get(p.name) match { case Some(stages) => stages.size case None => 0 } val href = "%s/stages/pool?poolname=%s" .format(UIUtils.prependBaseUri(parent.basePath), URLEncoder.encode(p.name, "UTF-8")) <tr> <td> <a href={href}>{p.name}</a> </td> <td>{p.minShare}</td> <td>{p.weight}</td> <td>{activeStages}</td> <td>{p.runningTasks}</td> <td>{p.schedulingMode}</td> </tr> } }
Example 53
Source File: ConfigReader.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.internal.config import java.util.{Map => JMap} import java.util.regex.Pattern import scala.collection.mutable.HashMap import scala.util.matching.Regex private object ConfigReader { private val REF_RE = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r } def substitute(input: String): String = substitute(input, Set()) private def substitute(input: String, usedRefs: Set[String]): String = { if (input != null) { ConfigReader.REF_RE.replaceAllIn(input, { m => val prefix = m.group(1) val name = m.group(2) val ref = if (prefix == null) name else s"$prefix:$name" require(!usedRefs.contains(ref), s"Circular reference in $input: $ref") val replacement = bindings.get(prefix) .flatMap(_.get(name)) .map { v => substitute(v, usedRefs + ref) } .getOrElse(m.matched) Regex.quoteReplacement(replacement) }) } else { input } } }
Example 54
Source File: StageInfo.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import org.apache.spark.executor.TaskMetrics import org.apache.spark.storage.RDDInfo def fromStage( stage: Stage, attemptId: Int, numTasks: Option[Int] = None, taskMetrics: TaskMetrics = null, taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty ): StageInfo = { val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd) val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo( stage.id, attemptId, stage.name, numTasks.getOrElse(stage.numTasks), rddInfos, stage.parents.map(_.id), stage.details, taskMetrics, taskLocalityPreferences) } }
Example 55
Source File: GroupedCountEvaluator.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.partial import scala.collection.Map import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.spark.util.collection.OpenHashMap private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] { private var outputsMerged = 0 private val sums = new OpenHashMap[T, Long]() // Sum of counts for each key override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]): Unit = { outputsMerged += 1 taskResult.foreach { case (key, value) => sums.changeValue(key, value, _ + value) } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { sums.map { case (key, sum) => (key, new BoundedDouble(sum, 1.0, sum, sum)) }.toMap } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs sums.map { case (key, sum) => (key, CountEvaluator.bound(confidence, sum, p)) }.toMap } } }
Example 56
Source File: MasterWebUISuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.master.ui import java.io.DataOutputStream import java.net.{HttpURLConnection, URL} import java.nio.charset.StandardCharsets import java.util.Date import scala.collection.mutable.HashMap import org.mockito.Mockito.{mock, times, verify, when} import org.scalatest.BeforeAndAfterAll import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} import org.apache.spark.deploy.DeployMessages.{KillDriverResponse, RequestKillDriver} import org.apache.spark.deploy.DeployTestUtils._ import org.apache.spark.deploy.master._ import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv} class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll { val conf = new SparkConf val securityMgr = new SecurityManager(conf) val rpcEnv = mock(classOf[RpcEnv]) val master = mock(classOf[Master]) val masterEndpointRef = mock(classOf[RpcEndpointRef]) when(master.securityMgr).thenReturn(securityMgr) when(master.conf).thenReturn(conf) when(master.rpcEnv).thenReturn(rpcEnv) when(master.self).thenReturn(masterEndpointRef) val masterWebUI = new MasterWebUI(master, 0) override def beforeAll() { super.beforeAll() masterWebUI.bind() } override def afterAll() { masterWebUI.stop() super.afterAll() } test("kill application") { val appDesc = createAppDesc() // use new start date so it isn't filtered by UI val activeApp = new ApplicationInfo( new Date().getTime, "app-0", appDesc, new Date(), null, Int.MaxValue) when(master.idToApp).thenReturn(HashMap[String, ApplicationInfo]((activeApp.id, activeApp))) val url = s"http://localhost:${masterWebUI.boundPort}/app/kill/" val body = convPostDataToString(Map(("id", activeApp.id), ("terminate", "true"))) val conn = sendHttpRequest(url, "POST", body) conn.getResponseCode // Verify the master was called to remove the active app verify(master, times(1)).removeApplication(activeApp, ApplicationState.KILLED) } test("kill driver") { val activeDriverId = "driver-0" val url = s"http://localhost:${masterWebUI.boundPort}/driver/kill/" val body = convPostDataToString(Map(("id", activeDriverId), ("terminate", "true"))) val conn = sendHttpRequest(url, "POST", body) conn.getResponseCode // Verify that master was asked to kill driver with the correct id verify(masterEndpointRef, times(1)).ask[KillDriverResponse](RequestKillDriver(activeDriverId)) } private def convPostDataToString(data: Map[String, String]): String = { (for ((name, value) <- data) yield s"$name=$value").mkString("&") } private def sendHttpRequest( url: String, method: String, body: String = ""): HttpURLConnection = { val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection] conn.setRequestMethod(method) if (body.nonEmpty) { conn.setDoOutput(true) conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded") conn.setRequestProperty("Content-Length", Integer.toString(body.length)) val out = new DataOutputStream(conn.getOutputStream) out.write(body.getBytes(StandardCharsets.UTF_8)) out.close() } conn } }
Example 57
Source File: exercise02.scala From scala-for-the-Impatient with MIT License | 5 votes |
import scala.collection.mutable.{ListBuffer, HashMap} def mapStrIndex(str:String)={ var indexMap = new HashMap[Char,ListBuffer[Int]]() var i = 0 str.toCharArray.foreach { c => indexMap.get(c) match { case Some(result) => result += i case None => indexMap += (c -> ListBuffer { i }) } i += 1 } indexMap } println(mapStrIndex("Mississippi"))
Example 58
Source File: exercise01.scala From scala-for-the-Impatient with MIT License | 5 votes |
import scala.collection.SortedSet import scala.collection.mutable.HashMap def mapStrIndex(str:String)={ var indexMap = new HashMap[Char,SortedSet[Int]]() var i = 0 str.toCharArray.foreach { c => indexMap.get(c) match { case Some(result) => indexMap(c) = result + i case None => indexMap += (c -> SortedSet { i }) } i += 1 } indexMap } println(mapStrIndex("Mississippi"))
Example 59
Source File: BlockStoreShuffleFetcher.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.shuffle.hash import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.HashMap import scala.util.{Failure, Success, Try} import org.apache.spark._ import org.apache.spark.serializer.Serializer import org.apache.spark.shuffle.FetchFailedException import org.apache.spark.storage.{BlockId, BlockManagerId, ShuffleBlockFetcherIterator, ShuffleBlockId} import org.apache.spark.util.CompletionIterator private[hash] object BlockStoreShuffleFetcher extends Logging { def fetch[T]( shuffleId: Int, reduceId: Int, context: TaskContext, serializer: Serializer) : Iterator[T] = { logDebug("Fetching outputs for shuffle %d, reduce %d".format(shuffleId, reduceId)) val blockManager = SparkEnv.get.blockManager val startTime = System.currentTimeMillis val statuses = SparkEnv.get.mapOutputTracker.getServerStatuses(shuffleId, reduceId) logDebug("Fetching map output location for shuffle %d, reduce %d took %d ms".format( shuffleId, reduceId, System.currentTimeMillis - startTime)) val splitsByAddress = new HashMap[BlockManagerId, ArrayBuffer[(Int, Long)]] for (((address, size), index) <- statuses.zipWithIndex) { splitsByAddress.getOrElseUpdate(address, ArrayBuffer()) += ((index, size)) } val blocksByAddress: Seq[(BlockManagerId, Seq[(BlockId, Long)])] = splitsByAddress.toSeq.map { case (address, splits) => (address, splits.map(s => (ShuffleBlockId(shuffleId, s._1, reduceId), s._2))) } def unpackBlock(blockPair: (BlockId, Try[Iterator[Any]])) : Iterator[T] = { val blockId = blockPair._1 val blockOption = blockPair._2 blockOption match { case Success(block) => { block.asInstanceOf[Iterator[T]] } case Failure(e) => { blockId match { case ShuffleBlockId(shufId, mapId, _) => val address = statuses(mapId.toInt)._1 throw new FetchFailedException(address, shufId.toInt, mapId.toInt, reduceId, e) case _ => throw new SparkException( "Failed to get block " + blockId + ", which is not a shuffle block", e) } } } } val blockFetcherItr = new ShuffleBlockFetcherIterator( context, SparkEnv.get.blockManager.shuffleClient, blockManager, blocksByAddress, serializer, SparkEnv.get.conf.getLong("spark.reducer.maxMbInFlight", 48) * 1024 * 1024) val itr = blockFetcherItr.flatMap(unpackBlock) val completionIter = CompletionIterator[T, Iterator[T]](itr, { context.taskMetrics.updateShuffleReadMetrics() }) new InterruptibleIterator[T](context, completionIter) { val readMetrics = context.taskMetrics.createShuffleReadMetricsForDependency() override def next(): T = { readMetrics.incRecordsRead(1) delegate.next() } } } }
Example 60
Source File: ExecutorsTab.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.exec import scala.collection.mutable.HashMap import org.apache.spark.ExceptionFailure import org.apache.spark.annotation.DeveloperApi import org.apache.spark.scheduler._ import org.apache.spark.storage.StorageStatusListener import org.apache.spark.ui.{SparkUI, SparkUITab} private[ui] class ExecutorsTab(parent: SparkUI) extends SparkUITab(parent, "executors") { val listener = parent.executorsListener val sc = parent.sc val threadDumpEnabled = sc.isDefined && parent.conf.getBoolean("spark.ui.threadDumpsEnabled", true) attachPage(new ExecutorsPage(this, threadDumpEnabled)) if (threadDumpEnabled) { attachPage(new ExecutorThreadDumpPage(this)) } } @DeveloperApi class ExecutorsListener(storageStatusListener: StorageStatusListener) extends SparkListener { val executorToTasksActive = HashMap[String, Int]() val executorToTasksComplete = HashMap[String, Int]() val executorToTasksFailed = HashMap[String, Int]() val executorToDuration = HashMap[String, Long]() val executorToInputBytes = HashMap[String, Long]() val executorToInputRecords = HashMap[String, Long]() val executorToOutputBytes = HashMap[String, Long]() val executorToOutputRecords = HashMap[String, Long]() val executorToShuffleRead = HashMap[String, Long]() val executorToShuffleWrite = HashMap[String, Long]() val executorToLogUrls = HashMap[String, Map[String, String]]() def storageStatusList = storageStatusListener.storageStatusList override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded) = synchronized { val eid = executorAdded.executorId executorToLogUrls(eid) = executorAdded.executorInfo.logUrlMap } override def onTaskStart(taskStart: SparkListenerTaskStart) = synchronized { val eid = taskStart.taskInfo.executorId executorToTasksActive(eid) = executorToTasksActive.getOrElse(eid, 0) + 1 } override def onTaskEnd(taskEnd: SparkListenerTaskEnd) = synchronized { val info = taskEnd.taskInfo if (info != null) { val eid = info.executorId executorToTasksActive(eid) = executorToTasksActive.getOrElse(eid, 1) - 1 executorToDuration(eid) = executorToDuration.getOrElse(eid, 0L) + info.duration taskEnd.reason match { case e: ExceptionFailure => executorToTasksFailed(eid) = executorToTasksFailed.getOrElse(eid, 0) + 1 case _ => executorToTasksComplete(eid) = executorToTasksComplete.getOrElse(eid, 0) + 1 } // Update shuffle read/write val metrics = taskEnd.taskMetrics if (metrics != null) { metrics.inputMetrics.foreach { inputMetrics => executorToInputBytes(eid) = executorToInputBytes.getOrElse(eid, 0L) + inputMetrics.bytesRead executorToInputRecords(eid) = executorToInputRecords.getOrElse(eid, 0L) + inputMetrics.recordsRead } metrics.outputMetrics.foreach { outputMetrics => executorToOutputBytes(eid) = executorToOutputBytes.getOrElse(eid, 0L) + outputMetrics.bytesWritten executorToOutputRecords(eid) = executorToOutputRecords.getOrElse(eid, 0L) + outputMetrics.recordsWritten } metrics.shuffleReadMetrics.foreach { shuffleRead => executorToShuffleRead(eid) = executorToShuffleRead.getOrElse(eid, 0L) + shuffleRead.remoteBytesRead } metrics.shuffleWriteMetrics.foreach { shuffleWrite => executorToShuffleWrite(eid) = executorToShuffleWrite.getOrElse(eid, 0L) + shuffleWrite.shuffleBytesWritten } } } } }
Example 61
Source File: UIData.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable.HashMap private[jobs] object UIData { class ExecutorSummary { var taskTime : Long = 0 var failedTasks : Int = 0 var succeededTasks : Int = 0 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None, var completionTime: Option[Long] = None, var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) }
Example 62
Source File: PoolTable.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import scala.collection.mutable.HashMap import scala.xml.Node import org.apache.spark.scheduler.{Schedulable, StageInfo} import org.apache.spark.ui.UIUtils private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) { private val listener = parent.listener def toNodeSeq: Seq[Node] = { listener.synchronized { poolTable(poolRow, pools) } } private def poolTable( makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node], rows: Seq[Schedulable]): Seq[Node] = { <table class="table table-bordered table-striped table-condensed sortable table-fixed"> <thead> <th>Pool Name</th> <th>Minimum Share</th> <th>Pool Weight</th> <th>Active Stages</th> <th>Running Tasks</th> <th>SchedulingMode</th> </thead> <tbody> {rows.map(r => makeRow(r, listener.poolToActiveStages))} </tbody> </table> } private def poolRow( p: Schedulable, poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = { val activeStages = poolToActiveStages.get(p.name) match { case Some(stages) => stages.size case None => 0 } val href = "%s/stages/pool?poolname=%s" .format(UIUtils.prependBaseUri(parent.basePath), p.name) <tr> <td> <a href={href}>{p.name}</a> </td> <td>{p.minShare}</td> <td>{p.weight}</td> <td>{activeStages}</td> <td>{p.runningTasks}</td> <td>{p.schedulingMode}</td> </tr> } }
Example 63
Source File: StageInfo.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import org.apache.spark.storage.RDDInfo def fromStage(stage: Stage, numTasks: Option[Int] = None): StageInfo = { val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd) val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo( stage.id, stage.attemptId, stage.name, numTasks.getOrElse(stage.numTasks), rddInfos, stage.details) } }
Example 64
Source File: GroupedSumEvaluator.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions.mapAsScalaMap import scala.collection.Map import scala.collection.mutable.HashMap import org.apache.spark.util.StatCounter private[spark] class GroupedSumEvaluator[T](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new JHashMap[T, StatCounter] // Sum of counts for each key override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) { outputsMerged += 1 val iter = taskResult.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val old = sums.get(entry.getKey) if (old != null) { old.merge(entry.getValue) } else { sums.put(entry.getKey, entry.getValue) } } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val sum = entry.getValue.sum result(entry.getKey) = new BoundedDouble(sum, 1.0, sum, sum) } result } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs val studentTCacher = new StudentTCacher(confidence) val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val counter = entry.getValue val meanEstimate = counter.mean val meanVar = counter.sampleVariance / counter.count val countEstimate = (counter.count + 1 - p) / p val countVar = (counter.count + 1) * (1 - p) / (p * p) val sumEstimate = meanEstimate * countEstimate val sumVar = (meanEstimate * meanEstimate * countVar) + (countEstimate * countEstimate * meanVar) + (meanVar * countVar) val sumStdev = math.sqrt(sumVar) val confFactor = studentTCacher.get(counter.count) val low = sumEstimate - confFactor * sumStdev val high = sumEstimate + confFactor * sumStdev result(entry.getKey) = new BoundedDouble(sumEstimate, confidence, low, high) } result } } }
Example 65
Source File: GroupedCountEvaluator.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions.mapAsScalaMap import scala.collection.Map import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.commons.math3.distribution.NormalDistribution import org.apache.spark.util.collection.OpenHashMap private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[OpenHashMap[T,Long], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new OpenHashMap[T,Long]() // Sum of counts for each key override def merge(outputId: Int, taskResult: OpenHashMap[T,Long]) { outputsMerged += 1 taskResult.foreach { case (key, value) => sums.changeValue(key, value, _ + value) } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) sums.foreach { case (key, sum) => result(key) = new BoundedDouble(sum, 1.0, sum, sum) } result } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs val confFactor = new NormalDistribution(). inverseCumulativeProbability(1 - (1 - confidence) / 2) val result = new JHashMap[T, BoundedDouble](sums.size) sums.foreach { case (key, sum) => val mean = (sum + 1 - p) / p val variance = (sum + 1) * (1 - p) / (p * p) val stdev = math.sqrt(variance) val low = mean - confFactor * stdev val high = mean + confFactor * stdev result(key) = new BoundedDouble(mean, confidence, low, high) } result } } }
Example 66
Source File: GroupedMeanEvaluator.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions.mapAsScalaMap import scala.collection.Map import scala.collection.mutable.HashMap import org.apache.spark.util.StatCounter private[spark] class GroupedMeanEvaluator[T](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new JHashMap[T, StatCounter] // Sum of counts for each key override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) { outputsMerged += 1 val iter = taskResult.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val old = sums.get(entry.getKey) if (old != null) { old.merge(entry.getValue) } else { sums.put(entry.getKey, entry.getValue) } } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val mean = entry.getValue.mean result(entry.getKey) = new BoundedDouble(mean, 1.0, mean, mean) } result } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val studentTCacher = new StudentTCacher(confidence) val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val counter = entry.getValue val mean = counter.mean val stdev = math.sqrt(counter.sampleVariance / counter.count) val confFactor = studentTCacher.get(counter.count) val low = mean - confFactor * stdev val high = mean + confFactor * stdev result(entry.getKey) = new BoundedDouble(mean, confidence, low, high) } result } } }
Example 67
Source File: filter_errorcode.scala From scalabpe with Apache License 2.0 | 5 votes |
package scalabpe.plugin import scala.collection.mutable.HashMap import scala.xml.Node import scalabpe.core.DummyActor import scalabpe.core.HashMapStringAny import scalabpe.core.Logging import scalabpe.core.Request import scalabpe.core.Response import scalabpe.core.ResponseFilter import scalabpe.core.Router class ErrorCodeDefine(val resultCodeName: String, val resultMsgName: String); class ErrorDescResponseFilter(val router: Router, val cfgNode: Node) extends ResponseFilter with Logging { val cfgs = new HashMap[Int, ErrorCodeDefine]() var localCacheServiceId = 0 val dummyActor = new DummyActor() init def init() { var s = (cfgNode \ "@localCacheServiceId").toString if (s != "") localCacheServiceId = s.toInt val serviceNodes = (cfgNode \ "Service") for (p <- serviceNodes) { val serviceId = (p \ "@serviceId").toString.toInt val resultCodeName = (p \ "@resultCodeField").toString val resultMsgName = (p \ "@resultMsgField").toString cfgs.put(serviceId, new ErrorCodeDefine(resultCodeName, resultMsgName)) // log.info("serviceId=%d,resultCodeName=%s,resultMsgName=%s".format(serviceId,resultCodeName,resultMsgName)) } log.info("errorcode response filter created") } def filter(res: Response, req: Request): Unit = { // log.info("error response filter called, res={}",res.toString) val rd = cfgs.getOrElse(res.serviceId, null) if (rd == null) return if (rd.resultCodeName != "") { if (res.body.getOrElse(rd.resultCodeName, null) == null) { res.body.put(rd.resultCodeName, res.code) } } if (res.code == 0) return if (rd.resultMsgName == "") return if (res.body.getOrElse(rd.resultMsgName, null) != null) return val body = new HashMapStringAny() body.put("resultCode", res.code) val req = new Request( res.requestId + ":$", Router.DO_NOT_REPLY, res.sequence, res.encoding, localCacheServiceId, 1, new HashMapStringAny(), body, dummyActor) val invokeResult = router.send(req) if (invokeResult == null) return val resultMsg = invokeResult.s("resultMsg", "") if (resultMsg != "") res.body.put(rd.resultMsgName, resultMsg) } }
Example 68
Source File: httpserverplugin_staticfile.scala From scalabpe with Apache License 2.0 | 5 votes |
package scalabpe.plugin.http import java.io.File import java.net.URLEncoder import java.text.SimpleDateFormat import java.util.Calendar import java.util.GregorianCalendar import java.util.Locale import java.util.TimeZone import scala.collection.mutable.HashMap import org.jboss.netty.handler.codec.http.HttpHeaders import scalabpe.core.HashMapStringAny class StaticFilePlugin extends HttpServerPlugin with HttpServerStaticFilePlugin { val ETAG_TAG = "etag" val EXPIRE_TAG = "expire" val ATTACHMENT = "attachment" val FILENAME = "filename" val HTTP_DATE_FORMAT = "EEE, dd MMM yyyy HH:mm:ss zzz"; val HTTP_DATE_GMT_TIMEZONE = "GMT"; val df_tl = new ThreadLocal[SimpleDateFormat]() { override def initialValue(): SimpleDateFormat = { val df = new SimpleDateFormat(HTTP_DATE_FORMAT, Locale.US) df.setTimeZone(TimeZone.getTimeZone(HTTP_DATE_GMT_TIMEZONE)); df } } def generateStaticFile(serviceId: Int, msgId: Int, errorCode: Int, errorMessage: String, body: HashMapStringAny, pluginParam: String, headers: HashMap[String, String]): String = { if (body.ns(FILENAME) == "") { return null } val filename = body.ns(FILENAME) if (!new File(filename).exists()) { return null } if (body.ns(ETAG_TAG) != "") { headers.put("ETag", body.ns(ETAG_TAG)) } if (body.ns(EXPIRE_TAG) != "") { body.i(EXPIRE_TAG) match { case 0 | -1 => headers.put(HttpHeaders.Names.CACHE_CONTROL, "no-cache") case n => // seconds val time = new GregorianCalendar(); time.add(Calendar.SECOND, n); headers.put(HttpHeaders.Names.EXPIRES, df_tl.get.format(time.getTime())); headers.put(HttpHeaders.Names.CACHE_CONTROL, "max-age=" + n); } } val ext = parseExt(filename) if (ext != "") body.put("__file_ext__", ext) if (body.ns(ATTACHMENT, "1") == "1") { val filename = body.ns(FILENAME) val v = "attachment; filename=\"%s\"".format(URLEncoder.encode(parseFilename(filename), "UTF-8")) headers.put("Content-Disposition", v) } filename } def parseFilename(name: String): String = { val p = name.lastIndexOf("/") if (p < 0) return name name.substring(p + 1) } def parseExt(name: String): String = { val p = name.lastIndexOf(".") if (p < 0) return "" name.substring(p + 1).toLowerCase() } }
Example 69
Source File: format_flow.scala From scalabpe with Apache License 2.0 | 5 votes |
package scalabpe import java.io._ import scala.collection.mutable.HashMap import scala.collection.mutable.ArrayBuffer import scala.io.Source import org.apache.commons.io.FileUtils import scala.xml._ import scala.collection.mutable._ import scalabpe.core._ import org.apache.commons.lang.StringUtils import Tools._ object FormatFlowTool { def help() { println( """ usage: scalabpe.FormatFlowTool [options] dirname options: -h|--help 帮助信息 """) } def parseArgs(args:Array[String]):HashMapStringAny = { val map = HashMapStringAny() var i = 0 val files = ArrayBufferString() while(i < args.size) { args(i) match { case "-h" | "--help" => return null case s if s.startsWith("-") => println("invalid option "+s) return null case _ => files += args(i) i += 1 } } map.put("files",files) map } def main(args:Array[String]) { var params = parseArgs(args) if( params == null ) { help() return } var files = params.nls("files") if( files.size == 0 ) { help() return } var dir = files(0) if( !new File(dir).exists() ) { val p1 = "compose_conf"+File.separator+dir if( new File(p1).exists ) { dir = p1 } else { println("not a valid dir, dir="+dir) return } } processDir(dir,params) } def processDir(dir:String,params:HashMapStringAny) { val files = new File(dir).listFiles.filter(_.getName.endsWith(".flow")) for(f <- files ) { processFile(dir,f.getName,params) } } def processFile(dir:String,f:String,params:HashMapStringAny) { val lines = readAllLines(dir+File.separator+f) // TODO } }
Example 70
Source File: GraphMap.scala From stellar-random-walk with Apache License 2.0 | 5 votes |
package au.csiro.data61.randomwalk.algorithm import scala.collection.mutable import scala.collection.mutable.{ArrayBuffer, HashMap} def reset { indexCounter = 0 offsetCounter = 0 srcVertexMap.clear() offsets.clear() lengths.clear() edges.clear() vertexPartitionMap.clear } def getNeighbors(vid: Int): Array[(Int, Float)] = { srcVertexMap.get(vid) match { case Some(index) => if (index == -1) { return Array.empty[(Int, Float)] } val offset = offsets(index) val length = lengths(index) edges.slice(offset, offset + length).toArray case None => null } } }
Example 71
Source File: HeaderEnum.scala From testchipip with BSD 3-Clause "New" or "Revised" License | 5 votes |
package testchipip import chisel3._ import chisel3.util.log2Up import scala.collection.mutable.{HashMap, ListBuffer} class HeaderEnum(val prefix: String) { val h = new HashMap[String,Int] def makeHeader(): String = { h.toSeq.sortBy(_._2).map { case (n,i) => s"#define ${prefix.toUpperCase}_${n.toUpperCase} $i\n" } mkString } def apply(s: String): UInt = h(s).U(log2Up(h.size).W) } object HeaderEnum { val contents = new ListBuffer[String] def apply(prefix: String, names: String*): HeaderEnum = { val e = new HeaderEnum(prefix) names.zipWithIndex.foreach { case (n,i) => e.h.put(n,i) } val header = e.makeHeader() if(!HeaderEnum.contents.contains(header)) HeaderEnum.contents += header e } }
Example 72
Source File: Mapper.scala From CSYE7200_Old with MIT License | 5 votes |
package edu.neu.coe.csye7200.mapreduce import akka.actor.{Actor, ActorLogging, ActorRef} import scala.collection.mutable import scala.collection.mutable.HashMap import scala.util._ class Mapper_Forgiving[K1,V1,K2,V2](f: (K1,V1)=>(K2,V2)) extends Mapper[K1,V1,K2,V2](f) { override def prepareReply(v2k2ts: Seq[Try[(K2,V2)]]) = { val v2sK2m = mutable.HashMap[K2,Seq[V2]]() // mutable val xs = Seq[Throwable]() // mutable for (v2k2t <- v2k2ts; v2k2e = Master.sequence(v2k2t)) v2k2e match { case Right((k2,v2)) => v2sK2m put(k2, v2+:v2sK2m.getOrElse((k2), (Nil))) case Left(x) => xs :+ x } (v2sK2m.toMap, xs) } } case class Incoming[K, V](m: Seq[(K,V)]) { override def toString = s"Incoming: with ${m.size} elements" } object Incoming { def sequence[K,V](vs: Seq[V]): Incoming[K,V] = Incoming((vs zip Stream.continually(null.asInstanceOf[K])).map{_.swap}) def map[K, V](vKm: Map[K,V]): Incoming[K,V] = Incoming(vKm.toSeq) } object Mapper { }
Example 73
Source File: Labels.scala From jgo with GNU General Public License v3.0 | 5 votes |
package jgo.tools.compiler package parser.stmts import parser.exprs._ import parser.scoped._ import parser.funcs._ import interm._ import types._ import symbol._ import codeseq._ import instr._ import scala.collection.mutable.{HashMap, HashSet, ListBuffer} import scala.{collection => coll} import coll.{immutable => imm} trait Labels { private val seenDefs = HashSet[String]() private val unseenDefs = HashMap[String, ListBuffer[Pos]]() private val lbls = HashMap[String, UserLabel]() def defLabel(name: String, pos: Pos): (String, Err[UserLabel]) = if (seenDefs contains name) (name, problem("label %s already defined", name)(pos)) else { seenDefs += name unseenDefs -= name val label = lbls getOrElseUpdate (name, new UserLabel(name)) (name, result(label)) } def useLabel(pos: Pos, name: String): UserLabel = { if (!(seenDefs contains name)) unseenDefs.getOrElseUpdate(name, new ListBuffer) += pos lbls getOrElseUpdate (name, new UserLabel(name)) } def procGoto(pos: Pos, name: String): Err[CodeBuilder] = { result(Goto(useLabel(pos, name))) } def checkForUndefedLabels: Err[Unit] = { var issues: Err[Unit] = result(()) for ((lblName, positions) <- unseenDefs; pos <- positions) { issues = issues then problem("target label not found: %s", lblName)(pos) } issues } }
Example 74
Source File: LocalKMeans.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet import breeze.linalg.{squaredDistance, DenseVector, Vector} object LocalKMeans { val N = 1000 val R = 1000 // Scaling factor val D = 10 val K = 10 val convergeDist = 0.001 val rand = new Random(42) def generateData: Array[DenseVector[Double]] = { def generatePoint(i: Int): DenseVector[Double] = { DenseVector.fill(D) {rand.nextDouble * R} } Array.tabulate(N)(generatePoint) } def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = { var index = 0 var bestIndex = 0 var closest = Double.PositiveInfinity for (i <- 1 to centers.size) { val vCurr = centers.get(i).get val tempDist = squaredDistance(p, vCurr) if (tempDist < closest) { closest = tempDist bestIndex = i } } bestIndex } def showWarning() { System.err.println( """WARN: This is a naive implementation of KMeans Clustering and is given as an example! |Please use org.apache.spark.ml.clustering.KMeans |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val data = generateData var points = new HashSet[Vector[Double]] var kPoints = new HashMap[Int, Vector[Double]] var tempDist = 1.0 while (points.size < K) { points.add(data(rand.nextInt(N))) } val iter = points.iterator for (i <- 1 to points.size) { kPoints.put(i, iter.next()) } println("Initial centers: " + kPoints) while(tempDist > convergeDist) { var closest = data.map (p => (closestPoint(p, kPoints), (p, 1))) var mappings = closest.groupBy[Int] (x => x._1) var pointStats = mappings.map { pair => pair._2.reduceLeft [(Int, (Vector[Double], Int))] { case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2)) } } var newPoints = pointStats.map {mapping => (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))} tempDist = 0.0 for (mapping <- newPoints) { tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2) } for (newP <- newPoints) { kPoints.put(newP._1, newP._2) } } println("Final centers: " + kPoints) } } // scalastyle:on println
Example 75
Source File: JsonUtils.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kafka010 import scala.collection.mutable.HashMap import scala.util.control.NonFatal import org.apache.kafka.common.TopicPartition import org.json4s.NoTypeHints import org.json4s.jackson.Serialization def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = { val result = new HashMap[String, HashMap[Int, Long]]() implicit val ordering = new Ordering[TopicPartition] { override def compare(x: TopicPartition, y: TopicPartition): Int = { Ordering.Tuple2[String, Int].compare((x.topic, x.partition), (y.topic, y.partition)) } } val partitions = partitionOffsets.keySet.toSeq.sorted // sort for more determinism partitions.foreach { tp => val off = partitionOffsets(tp) val parts = result.getOrElse(tp.topic, new HashMap[Int, Long]) parts += tp.partition -> off result += tp.topic -> parts } Serialization.write(result) } }
Example 76
Source File: ThriftServerMonitor.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver.monitor import scala.collection.mutable.HashMap import org.apache.spark.SparkException import org.apache.spark.internal.Logging import org.apache.spark.sql.hive.thriftserver.ui.ThriftServerTab object ThriftServerMonitor extends Logging { private[this] val uiTabs = new HashMap[String, ThriftServerTab]() private[this] val listeners = new HashMap[String, ThriftServerListener]() def setListener(user: String, sparkListener: ThriftServerListener): Unit = { listeners.put(user, sparkListener) } def getListener(user: String): ThriftServerListener = { listeners.getOrElse(user, throw new SparkException(s"Listener does not init for user[$user]")) } def addUITab(user: String, ui: ThriftServerTab): Unit = { uiTabs.put(user, ui) } def detachUITab(user: String): Unit = { listeners.remove(user) uiTabs.get(user).foreach(_.detach()) } def detachAllUITabs(): Unit = { uiTabs.values.foreach(_.detach()) } }
Example 77
Source File: MasterWebUI.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.master.ui import scala.collection.mutable.HashMap import org.eclipse.jetty.servlet.ServletContextHandler import org.apache.spark.deploy.master.Master import org.apache.spark.internal.Logging import org.apache.spark.ui.{SparkUI, WebUI} import org.apache.spark.ui.JettyUtils._ def initialize() { val masterPage = new MasterPage(this) attachPage(new ApplicationPage(this)) attachPage(masterPage) attachHandler(createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR, "/static")) attachHandler(createRedirectHandler( "/app/kill", "/", masterPage.handleAppKillRequest, httpMethods = Set("POST"))) attachHandler(createRedirectHandler( "/driver/kill", "/", masterPage.handleDriverKillRequest, httpMethods = Set("POST"))) } def addProxyTargets(id: String, target: String): Unit = { var endTarget = target.stripSuffix("/") val handler = createProxyHandler("/proxy/" + id, endTarget) attachHandler(handler) proxyHandlers(id) = handler } def removeProxyTargets(id: String): Unit = { proxyHandlers.remove(id).foreach(detachHandler) } } private[master] object MasterWebUI { private val STATIC_RESOURCE_DIR = SparkUI.STATIC_RESOURCE_DIR }
Example 78
Source File: PoolTable.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import java.net.URLEncoder import scala.collection.mutable.HashMap import scala.xml.Node import org.apache.spark.scheduler.{Schedulable, StageInfo} import org.apache.spark.ui.UIUtils private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) { private val listener = parent.progressListener def toNodeSeq: Seq[Node] = { listener.synchronized { poolTable(poolRow, pools) } } private def poolTable( makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node], rows: Seq[Schedulable]): Seq[Node] = { <table class="table table-bordered table-striped table-condensed sortable table-fixed"> <thead> <th>Pool Name</th> <th>Minimum Share</th> <th>Pool Weight</th> <th>Active Stages</th> <th>Running Tasks</th> <th>SchedulingMode</th> </thead> <tbody> {rows.map(r => makeRow(r, listener.poolToActiveStages))} </tbody> </table> } private def poolRow( p: Schedulable, poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = { val activeStages = poolToActiveStages.get(p.name) match { case Some(stages) => stages.size case None => 0 } val href = "%s/stages/pool?poolname=%s" .format(UIUtils.prependBaseUri(parent.basePath, sparkUser = parent.sparkUser), URLEncoder.encode(p.name, "UTF-8")) <tr> <td> <a href={href}>{p.name}</a> </td> <td>{p.minShare}</td> <td>{p.weight}</td> <td>{activeStages}</td> <td>{p.runningTasks}</td> <td>{p.schedulingMode}</td> </tr> } }
Example 79
Source File: ConfigReader.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.internal.config import java.util.{Map => JMap} import java.util.regex.Pattern import scala.collection.mutable.HashMap import scala.util.matching.Regex private object ConfigReader { private val REF_RE = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r } def substitute(input: String): String = substitute(input, Set()) private def substitute(input: String, usedRefs: Set[String]): String = { if (input != null) { ConfigReader.REF_RE.replaceAllIn(input, { m => val prefix = m.group(1) val name = m.group(2) val ref = if (prefix == null) name else s"$prefix:$name" require(!usedRefs.contains(ref), s"Circular reference in $input: $ref") val replacement = bindings.get(prefix) .flatMap(_.get(name)) .map { v => substitute(v, usedRefs + ref) } .getOrElse(m.matched) Regex.quoteReplacement(replacement) }) } else { input } } }
Example 80
Source File: StageInfo.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import org.apache.spark.executor.TaskMetrics import org.apache.spark.storage.RDDInfo def fromStage( stage: Stage, attemptId: Int, numTasks: Option[Int] = None, taskMetrics: TaskMetrics = null, taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty ): StageInfo = { val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd) val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo( stage.id, attemptId, stage.name, numTasks.getOrElse(stage.numTasks), rddInfos, stage.parents.map(_.id), stage.details, taskMetrics, taskLocalityPreferences) } }
Example 81
Source File: GroupedCountEvaluator.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.partial import scala.collection.Map import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.spark.util.collection.OpenHashMap private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] { private var outputsMerged = 0 private val sums = new OpenHashMap[T, Long]() // Sum of counts for each key override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]): Unit = { outputsMerged += 1 taskResult.foreach { case (key, value) => sums.changeValue(key, value, _ + value) } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { sums.map { case (key, sum) => (key, new BoundedDouble(sum, 1.0, sum, sum)) }.toMap } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs sums.map { case (key, sum) => (key, CountEvaluator.bound(confidence, sum, p)) }.toMap } } }
Example 82
Source File: MasterWebUISuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.master.ui import java.io.DataOutputStream import java.net.{HttpURLConnection, URL} import java.nio.charset.StandardCharsets import java.util.Date import scala.collection.mutable.HashMap import org.mockito.Mockito.{mock, times, verify, when} import org.scalatest.BeforeAndAfterAll import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} import org.apache.spark.deploy.DeployMessages.{KillDriverResponse, RequestKillDriver} import org.apache.spark.deploy.DeployTestUtils._ import org.apache.spark.deploy.master._ import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv} class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll { val conf = new SparkConf val securityMgr = new SecurityManager(conf) val rpcEnv = mock(classOf[RpcEnv]) val master = mock(classOf[Master]) val masterEndpointRef = mock(classOf[RpcEndpointRef]) when(master.securityMgr).thenReturn(securityMgr) when(master.conf).thenReturn(conf) when(master.rpcEnv).thenReturn(rpcEnv) when(master.self).thenReturn(masterEndpointRef) val masterWebUI = new MasterWebUI(master, 0) override def beforeAll() { super.beforeAll() masterWebUI.bind() } override def afterAll() { masterWebUI.stop() super.afterAll() } test("kill application") { val appDesc = createAppDesc() // use new start date so it isn't filtered by UI val activeApp = new ApplicationInfo( new Date().getTime, "app-0", appDesc, new Date(), null, Int.MaxValue) when(master.idToApp).thenReturn(HashMap[String, ApplicationInfo]((activeApp.id, activeApp))) val url = s"http://localhost:${masterWebUI.boundPort}/app/kill/" val body = convPostDataToString(Map(("id", activeApp.id), ("terminate", "true"))) val conn = sendHttpRequest(url, "POST", body) conn.getResponseCode // Verify the master was called to remove the active app verify(master, times(1)).removeApplication(activeApp, ApplicationState.KILLED) } test("kill driver") { val activeDriverId = "driver-0" val url = s"http://localhost:${masterWebUI.boundPort}/driver/kill/" val body = convPostDataToString(Map(("id", activeDriverId), ("terminate", "true"))) val conn = sendHttpRequest(url, "POST", body) conn.getResponseCode // Verify that master was asked to kill driver with the correct id verify(masterEndpointRef, times(1)).ask[KillDriverResponse](RequestKillDriver(activeDriverId)) } private def convPostDataToString(data: Map[String, String]): String = { (for ((name, value) <- data) yield s"$name=$value").mkString("&") } private def sendHttpRequest( url: String, method: String, body: String = ""): HttpURLConnection = { val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection] conn.setRequestMethod(method) if (body.nonEmpty) { conn.setDoOutput(true) conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded") conn.setRequestProperty("Content-Length", Integer.toString(body.length)) val out = new DataOutputStream(conn.getOutputStream) out.write(body.getBytes(StandardCharsets.UTF_8)) out.close() } conn } }
Example 83
Source File: LocalKMeans.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.examples import java.util.Random import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet import breeze.linalg.{Vector, DenseVector, squaredDistance} import org.apache.spark.SparkContext._ object LocalKMeans { val N = 1000 val R = 1000 // Scaling factor val D = 10 val K = 10 val convergeDist = 0.001 val rand = new Random(42) def generateData: Array[DenseVector[Double]] = { def generatePoint(i: Int): DenseVector[Double] = { DenseVector.fill(D){rand.nextDouble * R} } Array.tabulate(N)(generatePoint) } def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = { var index = 0 var bestIndex = 0 var closest = Double.PositiveInfinity for (i <- 1 to centers.size) { val vCurr = centers.get(i).get val tempDist = squaredDistance(p, vCurr) if (tempDist < closest) { closest = tempDist bestIndex = i } } bestIndex } def showWarning() { System.err.println( """WARN: This is a naive implementation of KMeans Clustering and is given as an example! |Please use the KMeans method found in org.apache.spark.mllib.clustering |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val data = generateData var points = new HashSet[Vector[Double]] var kPoints = new HashMap[Int, Vector[Double]] var tempDist = 1.0 while (points.size < K) { points.add(data(rand.nextInt(N))) } val iter = points.iterator for (i <- 1 to points.size) { kPoints.put(i, iter.next()) } println("Initial centers: " + kPoints) while(tempDist > convergeDist) { var closest = data.map (p => (closestPoint(p, kPoints), (p, 1))) var mappings = closest.groupBy[Int] (x => x._1) var pointStats = mappings.map { pair => pair._2.reduceLeft [(Int, (Vector[Double], Int))] { case ((id1, (x1, y1)), (id2, (x2, y2))) => (id1, (x1 + x2, y1 + y2)) } } var newPoints = pointStats.map {mapping => (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))} tempDist = 0.0 for (mapping <- newPoints) { tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2) } for (newP <- newPoints) { kPoints.put(newP._1, newP._2) } } println("Final centers: " + kPoints) } }
Example 84
Source File: BlockStoreShuffleFetcher.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.shuffle.hash import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.HashMap import scala.util.{Failure, Success, Try} import org.apache.spark._ import org.apache.spark.serializer.Serializer import org.apache.spark.shuffle.FetchFailedException import org.apache.spark.storage.{BlockId, BlockManagerId, ShuffleBlockFetcherIterator, ShuffleBlockId} import org.apache.spark.util.CompletionIterator private[hash] object BlockStoreShuffleFetcher extends Logging { def fetch[T]( shuffleId: Int, reduceId: Int, context: TaskContext, serializer: Serializer) : Iterator[T] = { logDebug("Fetching outputs for shuffle %d, reduce %d".format(shuffleId, reduceId)) val blockManager = SparkEnv.get.blockManager val startTime = System.currentTimeMillis val statuses = SparkEnv.get.mapOutputTracker.getServerStatuses(shuffleId, reduceId) logDebug("Fetching map output location for shuffle %d, reduce %d took %d ms".format( shuffleId, reduceId, System.currentTimeMillis - startTime)) val splitsByAddress = new HashMap[BlockManagerId, ArrayBuffer[(Int, Long)]] for (((address, size), index) <- statuses.zipWithIndex) { splitsByAddress.getOrElseUpdate(address, ArrayBuffer()) += ((index, size)) } val blocksByAddress: Seq[(BlockManagerId, Seq[(BlockId, Long)])] = splitsByAddress.toSeq.map { case (address, splits) => (address, splits.map(s => (ShuffleBlockId(shuffleId, s._1, reduceId), s._2))) } def unpackBlock(blockPair: (BlockId, Try[Iterator[Any]])) : Iterator[T] = { val blockId = blockPair._1 val blockOption = blockPair._2 blockOption match { case Success(block) => { block.asInstanceOf[Iterator[T]] } case Failure(e) => { blockId match { case ShuffleBlockId(shufId, mapId, _) => val address = statuses(mapId.toInt)._1 throw new FetchFailedException(address, shufId.toInt, mapId.toInt, reduceId, e) case _ => throw new SparkException( "Failed to get block " + blockId + ", which is not a shuffle block", e) } } } } val blockFetcherItr = new ShuffleBlockFetcherIterator( context, SparkEnv.get.blockManager.shuffleClient, blockManager, blocksByAddress, serializer, // Note: we use getSizeAsMb when no suffix is provided for backwards compatibility SparkEnv.get.conf.getSizeAsMb("spark.reducer.maxSizeInFlight", "48m") * 1024 * 1024) val itr = blockFetcherItr.flatMap(unpackBlock) val completionIter = CompletionIterator[T, Iterator[T]](itr, { context.taskMetrics.updateShuffleReadMetrics() }) new InterruptibleIterator[T](context, completionIter) { val readMetrics = context.taskMetrics.createShuffleReadMetricsForDependency() override def next(): T = { readMetrics.incRecordsRead(1) delegate.next() } } } }
Example 85
Source File: ExecutorsTab.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.exec import scala.collection.mutable.HashMap import org.apache.spark.ExceptionFailure import org.apache.spark.annotation.DeveloperApi import org.apache.spark.scheduler._ import org.apache.spark.storage.{StorageStatus, StorageStatusListener} import org.apache.spark.ui.{SparkUI, SparkUITab} import org.apache.spark.ui.jobs.UIData.ExecutorUIData private[ui] class ExecutorsTab(parent: SparkUI) extends SparkUITab(parent, "executors") { val listener = parent.executorsListener val sc = parent.sc val threadDumpEnabled = sc.isDefined && parent.conf.getBoolean("spark.ui.threadDumpsEnabled", true) attachPage(new ExecutorsPage(this, threadDumpEnabled)) if (threadDumpEnabled) { attachPage(new ExecutorThreadDumpPage(this)) } } @DeveloperApi class ExecutorsListener(storageStatusListener: StorageStatusListener) extends SparkListener { val executorToTasksActive = HashMap[String, Int]() val executorToTasksComplete = HashMap[String, Int]() val executorToTasksFailed = HashMap[String, Int]() val executorToDuration = HashMap[String, Long]() val executorToInputBytes = HashMap[String, Long]() val executorToInputRecords = HashMap[String, Long]() val executorToOutputBytes = HashMap[String, Long]() val executorToOutputRecords = HashMap[String, Long]() val executorToShuffleRead = HashMap[String, Long]() val executorToShuffleWrite = HashMap[String, Long]() val executorToLogUrls = HashMap[String, Map[String, String]]() val executorIdToData = HashMap[String, ExecutorUIData]() def storageStatusList: Seq[StorageStatus] = storageStatusListener.storageStatusList override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): Unit = synchronized { val eid = executorAdded.executorId executorToLogUrls(eid) = executorAdded.executorInfo.logUrlMap executorIdToData(eid) = ExecutorUIData(executorAdded.time) } override def onExecutorRemoved( executorRemoved: SparkListenerExecutorRemoved): Unit = synchronized { val eid = executorRemoved.executorId val uiData = executorIdToData(eid) uiData.finishTime = Some(executorRemoved.time) uiData.finishReason = Some(executorRemoved.reason) } override def onTaskStart(taskStart: SparkListenerTaskStart): Unit = synchronized { val eid = taskStart.taskInfo.executorId executorToTasksActive(eid) = executorToTasksActive.getOrElse(eid, 0) + 1 } override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = synchronized { val info = taskEnd.taskInfo if (info != null) { val eid = info.executorId executorToTasksActive(eid) = executorToTasksActive.getOrElse(eid, 1) - 1 executorToDuration(eid) = executorToDuration.getOrElse(eid, 0L) + info.duration taskEnd.reason match { case e: ExceptionFailure => executorToTasksFailed(eid) = executorToTasksFailed.getOrElse(eid, 0) + 1 case _ => executorToTasksComplete(eid) = executorToTasksComplete.getOrElse(eid, 0) + 1 } // Update shuffle read/write val metrics = taskEnd.taskMetrics if (metrics != null) { metrics.inputMetrics.foreach { inputMetrics => executorToInputBytes(eid) = executorToInputBytes.getOrElse(eid, 0L) + inputMetrics.bytesRead executorToInputRecords(eid) = executorToInputRecords.getOrElse(eid, 0L) + inputMetrics.recordsRead } metrics.outputMetrics.foreach { outputMetrics => executorToOutputBytes(eid) = executorToOutputBytes.getOrElse(eid, 0L) + outputMetrics.bytesWritten executorToOutputRecords(eid) = executorToOutputRecords.getOrElse(eid, 0L) + outputMetrics.recordsWritten } metrics.shuffleReadMetrics.foreach { shuffleRead => executorToShuffleRead(eid) = executorToShuffleRead.getOrElse(eid, 0L) + shuffleRead.remoteBytesRead } metrics.shuffleWriteMetrics.foreach { shuffleWrite => executorToShuffleWrite(eid) = executorToShuffleWrite.getOrElse(eid, 0L) + shuffleWrite.shuffleBytesWritten } } } } }
Example 86
Source File: UIData.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable.HashMap private[spark] object UIData { class ExecutorSummary { var taskTime : Long = 0 var failedTasks : Int = 0 var succeededTasks : Int = 0 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None, var completionTime: Option[Long] = None, var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) case class ExecutorUIData( val startTime: Long, var finishTime: Option[Long] = None, var finishReason: Option[String] = None) }
Example 87
Source File: PoolTable.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import scala.collection.mutable.HashMap import scala.xml.Node import org.apache.spark.scheduler.{Schedulable, StageInfo} import org.apache.spark.ui.UIUtils private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) { private val listener = parent.progressListener def toNodeSeq: Seq[Node] = { listener.synchronized { poolTable(poolRow, pools) } } private def poolTable( makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node], rows: Seq[Schedulable]): Seq[Node] = { <table class="table table-bordered table-striped table-condensed sortable table-fixed"> <thead> <th>Pool Name</th> <th>Minimum Share</th> <th>Pool Weight</th> <th>Active Stages</th> <th>Running Tasks</th> <th>SchedulingMode</th> </thead> <tbody> {rows.map(r => makeRow(r, listener.poolToActiveStages))} </tbody> </table> } private def poolRow( p: Schedulable, poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = { val activeStages = poolToActiveStages.get(p.name) match { case Some(stages) => stages.size case None => 0 } val href = "%s/stages/pool?poolname=%s" .format(UIUtils.prependBaseUri(parent.basePath), p.name) <tr> <td> <a href={href}>{p.name}</a> </td> <td>{p.minShare}</td> <td>{p.weight}</td> <td>{activeStages}</td> <td>{p.runningTasks}</td> <td>{p.schedulingMode}</td> </tr> } }
Example 88
Source File: StageInfo.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import org.apache.spark.storage.RDDInfo def fromStage(stage: Stage, numTasks: Option[Int] = None): StageInfo = { val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd) val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo( stage.id, stage.attemptId, stage.name, numTasks.getOrElse(stage.numTasks), rddInfos, stage.parents.map(_.id), stage.details) } }
Example 89
Source File: GroupedSumEvaluator.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions.mapAsScalaMap import scala.collection.Map import scala.collection.mutable.HashMap import org.apache.spark.util.StatCounter private[spark] class GroupedSumEvaluator[T](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new JHashMap[T, StatCounter] // Sum of counts for each key override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) { outputsMerged += 1 val iter = taskResult.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val old = sums.get(entry.getKey) if (old != null) { old.merge(entry.getValue) } else { sums.put(entry.getKey, entry.getValue) } } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val sum = entry.getValue.sum result(entry.getKey) = new BoundedDouble(sum, 1.0, sum, sum) } result } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs val studentTCacher = new StudentTCacher(confidence) val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val counter = entry.getValue val meanEstimate = counter.mean val meanVar = counter.sampleVariance / counter.count val countEstimate = (counter.count + 1 - p) / p val countVar = (counter.count + 1) * (1 - p) / (p * p) val sumEstimate = meanEstimate * countEstimate val sumVar = (meanEstimate * meanEstimate * countVar) + (countEstimate * countEstimate * meanVar) + (meanVar * countVar) val sumStdev = math.sqrt(sumVar) val confFactor = studentTCacher.get(counter.count) val low = sumEstimate - confFactor * sumStdev val high = sumEstimate + confFactor * sumStdev result(entry.getKey) = new BoundedDouble(sumEstimate, confidence, low, high) } result } } }
Example 90
Source File: GroupedCountEvaluator.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions.mapAsScalaMap import scala.collection.Map import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.commons.math3.distribution.NormalDistribution import org.apache.spark.util.collection.OpenHashMap private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new OpenHashMap[T, Long]() // Sum of counts for each key override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]) { outputsMerged += 1 taskResult.foreach { case (key, value) => sums.changeValue(key, value, _ + value) } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) sums.foreach { case (key, sum) => result(key) = new BoundedDouble(sum, 1.0, sum, sum) } result } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs val confFactor = new NormalDistribution(). inverseCumulativeProbability(1 - (1 - confidence) / 2) val result = new JHashMap[T, BoundedDouble](sums.size) sums.foreach { case (key, sum) => val mean = (sum + 1 - p) / p val variance = (sum + 1) * (1 - p) / (p * p) val stdev = math.sqrt(variance) val low = mean - confFactor * stdev val high = mean + confFactor * stdev result(key) = new BoundedDouble(mean, confidence, low, high) } result } } }
Example 91
Source File: GroupedMeanEvaluator.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions.mapAsScalaMap import scala.collection.Map import scala.collection.mutable.HashMap import org.apache.spark.util.StatCounter private[spark] class GroupedMeanEvaluator[T](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new JHashMap[T, StatCounter] // Sum of counts for each key override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) { outputsMerged += 1 val iter = taskResult.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val old = sums.get(entry.getKey) if (old != null) { old.merge(entry.getValue) } else { sums.put(entry.getKey, entry.getValue) } } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val mean = entry.getValue.mean result(entry.getKey) = new BoundedDouble(mean, 1.0, mean, mean) } result } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val studentTCacher = new StudentTCacher(confidence) val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val counter = entry.getValue val mean = counter.mean val stdev = math.sqrt(counter.sampleVariance / counter.count) val confFactor = studentTCacher.get(counter.count) val low = mean - confFactor * stdev val high = mean + confFactor * stdev result(entry.getKey) = new BoundedDouble(mean, confidence, low, high) } result } } }
Example 92
Source File: FeatureSelection.scala From aerosolve with Apache License 2.0 | 5 votes |
package com.airbnb.aerosolve.training import java.io.BufferedWriter import java.io.OutputStreamWriter import java.util import com.airbnb.aerosolve.core.{ModelRecord, ModelHeader, FeatureVector, Example} import com.airbnb.aerosolve.core.models.LinearModel import com.airbnb.aerosolve.core.util.Util import com.typesafe.config.Config import org.slf4j.{LoggerFactory, Logger} import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ import org.apache.spark.rdd.RDD import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.Buffer import scala.collection.JavaConversions._ import scala.collection.JavaConverters._ import scala.util.Random import scala.math.abs import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.fs.Path import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path object FeatureSelection { private final val log: Logger = LoggerFactory.getLogger("FeatureSelection") val allKey : (String, String) = ("$ALL", "$POS") // Given a RDD compute the pointwise mutual information between // the positive label and the discrete features. def pointwiseMutualInformation(examples : RDD[Example], config : Config, key : String, rankKey : String, posThreshold : Double, minPosCount : Double, newCrosses : Boolean) : RDD[((String, String), Double)] = { val pointwise = LinearRankerUtils.makePointwise(examples, config, key, rankKey) val features = pointwise .mapPartitions(part => { // The tuple2 is var, var | positive val output = scala.collection.mutable.HashMap[(String, String), (Double, Double)]() part.foreach(example =>{ val featureVector = example.example.get(0) val isPos = if (featureVector.floatFeatures.get(rankKey).asScala.head._2 > posThreshold) 1.0 else 0.0 val all : (Double, Double) = output.getOrElse(allKey, (0.0, 0.0)) output.put(allKey, (all._1 + 1.0, all._2 + 1.0 * isPos)) val features : Array[(String, String)] = LinearRankerUtils.getFeatures(featureVector) if (newCrosses) { for (i <- features) { for (j <- features) { if (i._1 < j._1) { val key = ("%s<NEW>%s".format(i._1, j._1), "%s<NEW>%s".format(i._2, j._2)) val x = output.getOrElse(key, (0.0, 0.0)) output.put(key, (x._1 + 1.0, x._2 + 1.0 * isPos)) } } } } for (feature <- features) { val x = output.getOrElse(feature, (0.0, 0.0)) output.put(feature, (x._1 + 1.0, x._2 + 1.0 * isPos)) } }) output.iterator }) .reduceByKey((a, b) => (a._1 + b._1, a._2 + b._2)) .filter(x => x._2._2 >= minPosCount) val allCount = features.filter(x => x._1.equals(allKey)).take(1).head features.map(x => { val prob = x._2._1 / allCount._2._1 val probPos = x._2._2 / allCount._2._2 (x._1, math.log(probPos / prob) / math.log(2.0)) }) } // Returns the maximum entropy per family def maxEntropy(input : RDD[((String, String), Double)]) : RDD[((String, String), Double)] = { input .map(x => (x._1._1, (x._1._2, x._2))) .reduceByKey((a, b) => if (math.abs(a._2) > math.abs(b._2)) a else b) .map(x => ((x._1, x._2._1), x._2._2)) } }
Example 93
Source File: package.scala From kyuubi with Apache License 2.0 | 5 votes |
package yaooqinn.kyuubi import scala.collection.mutable.HashMap import org.apache.hadoop.fs.permission.FsPermission package object yarn { type EnvMap = HashMap[String, String] val KYUUBI_YARN_APP_NAME = "KYUUBI SERVER" val KYUUBI_YARN_APP_TYPE = "KYUUBI" // Staging directory for any temporary jars or files val KYUUBI_STAGING: String = ".kyuubiStaging" // Staging directory is private! -> rwx-------- val STAGING_DIR_PERMISSION: FsPermission = FsPermission.createImmutable(Integer.parseInt("700", 8).toShort) // App files are world-wide readable and owner writable -> rw-r--r-- val APP_FILE_PERMISSION: FsPermission = FsPermission.createImmutable(Integer.parseInt("644", 8).toShort) val SPARK_CONF_DIR = "__spark_conf__" val SPARK_CONF_FILE = "__spark_conf__.properties" // Subdirectory in the conf directory containing Hadoop config files. val HADOOP_CONF_DIR = "__hadoop_conf__" // File containing the conf archive in the AM. See prepareLocalResources(). val SPARK_CONF_ARCHIVE: String = SPARK_CONF_DIR + ".zip" val SPARK_LIB_DIR = "__spark_libs__" val LOCAL_SCHEME = "local" }
Example 94
Source File: KyuubiDistributedCacheManager.scala From kyuubi with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import java.net.URI import scala.collection.mutable.{HashMap, Map} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} import org.apache.hadoop.yarn.api.records.{LocalResource, LocalResourceType} def addResource( fs: FileSystem, conf: Configuration, destPath: Path, localResources: HashMap[String, LocalResource], resourceType: LocalResourceType, link: String, statCache: Map[URI, FileStatus]): Unit = { cacheManager.addResource(fs, conf, destPath, localResources, resourceType, link, statCache, appMasterOnly = true) } }
Example 95
Source File: KyuubiDistributedCacheManagerSuite.scala From kyuubi with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import java.net.URI import scala.collection.mutable.{HashMap, Map} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} import org.apache.hadoop.yarn.api.records.{LocalResource, LocalResourceType, LocalResourceVisibility} import org.apache.hadoop.yarn.util.ConverterUtils import org.apache.spark.{KyuubiSparkUtil, SparkFunSuite} import org.mockito.Mockito.when import org.scalatest.mock.MockitoSugar import yaooqinn.kyuubi.utils.ReflectUtils class KyuubiDistributedCacheManagerSuite extends SparkFunSuite with MockitoSugar { class MockClientDistributedCacheManager extends ClientDistributedCacheManager { override def getVisibility(conf: Configuration, uri: URI, statCache: Map[URI, FileStatus]): LocalResourceVisibility = { LocalResourceVisibility.PRIVATE } } test("add resource") { val fs = mock[FileSystem] val conf = new Configuration() val destPath = new Path("file:///foo.bar.com:8080/tmp/testing") val localResources = HashMap[String, LocalResource]() val statCache = HashMap[URI, FileStatus]() val status = new FileStatus() when(fs.getFileStatus(destPath)).thenReturn(status) val fileLink = "link" ReflectUtils.setFieldValue( KyuubiDistributedCacheManager, "cacheManager", new MockClientDistributedCacheManager) KyuubiDistributedCacheManager.addResource( fs, conf, destPath, localResources, LocalResourceType.FILE, fileLink, statCache) val res = localResources(fileLink) assert(res.getVisibility === LocalResourceVisibility.PRIVATE) assert(ConverterUtils.getPathFromYarnURL(res.getResource) === destPath) assert(res.getSize === 0) assert(res.getTimestamp === 0) assert(res.getType === LocalResourceType.FILE) val status2 = new FileStatus( 10, false, 1, 1024, 10, 10, null, KyuubiSparkUtil.getCurrentUserName, null, new Path("/tmp/testing2")) val destPath2 = new Path("file:///foo.bar.com:8080/tmp/testing2") when(fs.getFileStatus(destPath2)).thenReturn(status2) val fileLink2 = "link2" KyuubiDistributedCacheManager.addResource( fs, conf, destPath2, localResources, LocalResourceType.FILE, fileLink2, statCache) val res2 = localResources(fileLink2) assert(res2.getVisibility === LocalResourceVisibility.PRIVATE) assert(ConverterUtils.getPathFromYarnURL(res2.getResource) === destPath2) assert(res2.getSize === 10) assert(res2.getTimestamp === 10) assert(res2.getType === LocalResourceType.FILE) } test("add resource when link null") { val distMgr = new MockClientDistributedCacheManager() val fs = mock[FileSystem] val conf = new Configuration() val destPath = new Path("file:///foo.bar.com:8080/tmp/testing") ReflectUtils.setFieldValue(KyuubiDistributedCacheManager, "cacheManager", distMgr) val localResources = HashMap[String, LocalResource]() val statCache = HashMap[URI, FileStatus]() when(fs.getFileStatus(destPath)).thenReturn(new FileStatus()) intercept[Exception] { KyuubiDistributedCacheManager.addResource( fs, conf, destPath, localResources, LocalResourceType.FILE, null, statCache) } assert(localResources.get("link") === None) assert(localResources.size === 0) } test("test addResource archive") { val distMgr = new MockClientDistributedCacheManager() ReflectUtils.setFieldValue(KyuubiDistributedCacheManager, "cacheManager", distMgr) val fs = mock[FileSystem] val conf = new Configuration() val destPath = new Path("file:///foo.bar.com:8080/tmp/testing") val localResources = HashMap[String, LocalResource]() val statCache = HashMap[URI, FileStatus]() val realFileStatus = new FileStatus(10, false, 1, 1024, 10, 10, null, "testOwner", null, new Path("/tmp/testing")) when(fs.getFileStatus(destPath)).thenReturn(realFileStatus) KyuubiDistributedCacheManager.addResource( fs, conf, destPath, localResources, LocalResourceType.ARCHIVE, "link", statCache) val resource = localResources("link") assert(resource.getVisibility === LocalResourceVisibility.PRIVATE) assert(ConverterUtils.getPathFromYarnURL(resource.getResource) === destPath) assert(resource.getTimestamp === 10) assert(resource.getSize === 10) assert(resource.getType === LocalResourceType.ARCHIVE) } }
Example 96
Source File: UIData.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable import scala.collection.mutable.HashMap private[spark] object UIData { class ExecutorSummary { var taskTime : Long = 0//任务时间 var failedTasks : Int = 0//失败任务数 var succeededTasks : Int = 0//完成任务数 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None,//提交时间 var completionTime: Option[Long] = None,//完成时间 var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) case class ExecutorUIData( val startTime: Long, var finishTime: Option[Long] = None, var finishReason: Option[String] = None) }
Example 97
Source File: PoolTable.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import scala.collection.mutable.HashMap import scala.xml.Node import org.apache.spark.scheduler.{Schedulable, StageInfo} import org.apache.spark.ui.UIUtils private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) { private val listener = parent.progressListener def toNodeSeq: Seq[Node] = { listener.synchronized { poolTable(poolRow, pools) } } private def poolTable( makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node], rows: Seq[Schedulable]): Seq[Node] = { <table class="table table-bordered table-striped table-condensed sortable table-fixed"> <thead> <th>Pool Name</th> <th>Minimum Share</th> <th>Pool Weight</th> <th>Active Stages</th> <th>Running Tasks</th> <th>SchedulingMode</th> </thead> <tbody> {rows.map(r => makeRow(r, listener.poolToActiveStages))} </tbody> </table> } private def poolRow( p: Schedulable, poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = { val activeStages = poolToActiveStages.get(p.name) match { case Some(stages) => stages.size case None => 0 } val href = "%s/stages/pool?poolname=%s" .format(UIUtils.prependBaseUri(parent.basePath), p.name) <tr> <td> <a href={href}>{p.name}</a> </td> <td>{p.minShare}</td> <td>{p.weight}</td> <td>{activeStages}</td> <td>{p.runningTasks}</td> <td>{p.schedulingMode}</td> </tr> } }
Example 98
Source File: StageInfo.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import org.apache.spark.storage.RDDInfo def fromStage( stage: Stage, attemptId: Int, //None被声明为一个对象,而不是一个类,在没有值的时候,使用None,如果有值可以引用,就使用Some来包含这个值,都是Option的子类 numTasks: Option[Int] = None, taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty ): StageInfo = { //方法获取RDD的所有直接或间接的NarrowDependency的RDD //RDDInfo.fromRdd创建RDDInfo信息,包括RDD父依赖关系 val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd) //对当前stage的RDD也生成RDDInfo,然后所有生成的RDDInfo合并到rddInfos val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo( stage.id, attemptId, stage.name, numTasks.getOrElse(stage.numTasks), rddInfos, stage.parents.map(_.id), stage.details, taskLocalityPreferences) } }
Example 99
Source File: GroupedSumEvaluator.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions.mapAsScalaMap import scala.collection.Map import scala.collection.mutable.HashMap import org.apache.spark.util.StatCounter private[spark] class GroupedSumEvaluator[T](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new JHashMap[T, StatCounter] // Sum of counts for each key override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) { outputsMerged += 1 val iter = taskResult.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val old = sums.get(entry.getKey) if (old != null) { old.merge(entry.getValue) } else { sums.put(entry.getKey, entry.getValue) } } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val sum = entry.getValue.sum result(entry.getKey) = new BoundedDouble(sum, 1.0, sum, sum) } result } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs val studentTCacher = new StudentTCacher(confidence) val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val counter = entry.getValue val meanEstimate = counter.mean val meanVar = counter.sampleVariance / counter.count val countEstimate = (counter.count + 1 - p) / p val countVar = (counter.count + 1) * (1 - p) / (p * p) val sumEstimate = meanEstimate * countEstimate val sumVar = (meanEstimate * meanEstimate * countVar) + (countEstimate * countEstimate * meanVar) + (meanVar * countVar) val sumStdev = math.sqrt(sumVar) val confFactor = studentTCacher.get(counter.count) val low = sumEstimate - confFactor * sumStdev val high = sumEstimate + confFactor * sumStdev result(entry.getKey) = new BoundedDouble(sumEstimate, confidence, low, high) } result } } }
Example 100
Source File: GroupedCountEvaluator.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions.mapAsScalaMap import scala.collection.Map import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.commons.math3.distribution.NormalDistribution import org.apache.spark.util.collection.OpenHashMap private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new OpenHashMap[T, Long]() // Sum of counts for each key override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]) { outputsMerged += 1 taskResult.foreach { case (key, value) => sums.changeValue(key, value, _ + value) } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) sums.foreach { case (key, sum) => result(key) = new BoundedDouble(sum, 1.0, sum, sum) } result } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs val confFactor = new NormalDistribution(). inverseCumulativeProbability(1 - (1 - confidence) / 2) val result = new JHashMap[T, BoundedDouble](sums.size) sums.foreach { case (key, sum) => val mean = (sum + 1 - p) / p val variance = (sum + 1) * (1 - p) / (p * p) val stdev = math.sqrt(variance) val low = mean - confFactor * stdev val high = mean + confFactor * stdev result(key) = new BoundedDouble(mean, confidence, low, high) } result } } }
Example 101
Source File: GroupedMeanEvaluator.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConversions.mapAsScalaMap import scala.collection.Map import scala.collection.mutable.HashMap import org.apache.spark.util.StatCounter private[spark] class GroupedMeanEvaluator[T](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new JHashMap[T, StatCounter] // Sum of counts for each key override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) { outputsMerged += 1 val iter = taskResult.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val old = sums.get(entry.getKey) if (old != null) { old.merge(entry.getValue) } else { sums.put(entry.getKey, entry.getValue) } } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val mean = entry.getValue.mean result(entry.getKey) = new BoundedDouble(mean, 1.0, mean, mean) } result } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val studentTCacher = new StudentTCacher(confidence) val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val counter = entry.getValue val mean = counter.mean val stdev = math.sqrt(counter.sampleVariance / counter.count) val confFactor = studentTCacher.get(counter.count) val low = mean - confFactor * stdev val high = mean + confFactor * stdev result(entry.getKey) = new BoundedDouble(mean, confidence, low, high) } result } } }
Example 102
Source File: Mapper.scala From CSYE7200 with MIT License | 5 votes |
package edu.neu.coe.csye7200.mapreduce import akka.actor.{Actor, ActorLogging, ActorRef} import scala.collection.mutable import scala.collection.mutable.HashMap import scala.util._ class Mapper_Forgiving[K1,V1,K2,V2](f: (K1,V1)=>(K2,V2)) extends Mapper[K1,V1,K2,V2](f) { override def prepareReply(v2k2ts: Seq[Try[(K2,V2)]]): (Map[K2, Seq[V2]], Seq[Throwable]) = { val v2sK2m = mutable.HashMap[K2,Seq[V2]]() // mutable val xs = Seq[Throwable]() // mutable // CONSIDER using traverse for (v2k2t <- v2k2ts; v2k2e = Master.sequence(v2k2t)) v2k2e match { case Right((k2,v2)) => v2sK2m put(k2, v2+:v2sK2m.getOrElse(k2, Nil)) case Left(x) => xs :+ x } (v2sK2m.toMap, xs) } } case class Incoming[K, V](m: Seq[(K,V)]) { override def toString = s"Incoming: with ${m.size} elements" } object Incoming { def sequence[K,V](vs: Seq[V]): Incoming[K,V] = Incoming((vs zip Stream.continually(null.asInstanceOf[K])).map{_.swap}) def map[K, V](vKm: Map[K,V]): Incoming[K,V] = Incoming(vKm.toSeq) } object Mapper { }
Example 103
Source File: LocalKMeans.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet import breeze.linalg.{squaredDistance, DenseVector, Vector} object LocalKMeans { val N = 1000 val R = 1000 // Scaling factor val D = 10 val K = 10 val convergeDist = 0.001 val rand = new Random(42) def generateData: Array[DenseVector[Double]] = { def generatePoint(i: Int): DenseVector[Double] = { DenseVector.fill(D) {rand.nextDouble * R} } Array.tabulate(N)(generatePoint) } def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = { var bestIndex = 0 var closest = Double.PositiveInfinity for (i <- 1 to centers.size) { val vCurr = centers(i) val tempDist = squaredDistance(p, vCurr) if (tempDist < closest) { closest = tempDist bestIndex = i } } bestIndex } def showWarning() { System.err.println( """WARN: This is a naive implementation of KMeans Clustering and is given as an example! |Please use org.apache.spark.ml.clustering.KMeans |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val data = generateData val points = new HashSet[Vector[Double]] val kPoints = new HashMap[Int, Vector[Double]] var tempDist = 1.0 while (points.size < K) { points.add(data(rand.nextInt(N))) } val iter = points.iterator for (i <- 1 to points.size) { kPoints.put(i, iter.next()) } println(s"Initial centers: $kPoints") while(tempDist > convergeDist) { val closest = data.map (p => (closestPoint(p, kPoints), (p, 1))) val mappings = closest.groupBy[Int] (x => x._1) val pointStats = mappings.map { pair => pair._2.reduceLeft [(Int, (Vector[Double], Int))] { case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2)) } } var newPoints = pointStats.map {mapping => (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))} tempDist = 0.0 for (mapping <- newPoints) { tempDist += squaredDistance(kPoints(mapping._1), mapping._2) } for (newP <- newPoints) { kPoints.put(newP._1, newP._2) } } println(s"Final centers: $kPoints") } } // scalastyle:on println
Example 104
Source File: LocalityPlacementStrategySuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import scala.collection.JavaConverters._ import scala.collection.mutable.{HashMap, HashSet, Set} import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.conf.YarnConfiguration import org.mockito.Mockito._ import org.apache.spark.{SparkConf, SparkFunSuite} class LocalityPlacementStrategySuite extends SparkFunSuite { test("handle large number of containers and tasks (SPARK-18750)") { // Run the test in a thread with a small stack size, since the original issue // surfaced as a StackOverflowError. var error: Throwable = null val runnable = new Runnable() { override def run(): Unit = try { runTest() } catch { case e: Throwable => error = e } } val thread = new Thread(new ThreadGroup("test"), runnable, "test-thread", 32 * 1024) thread.start() thread.join() assert(error === null) } private def runTest(): Unit = { val yarnConf = new YarnConfiguration() // The numbers below have been chosen to balance being large enough to replicate the // original issue while not taking too long to run when the issue is fixed. The main // goal is to create enough requests for localized containers (so there should be many // tasks on several hosts that have no allocated containers). val resource = Resource.newInstance(8 * 1024, 4) val strategy = new LocalityPreferredContainerPlacementStrategy(new SparkConf(), yarnConf, resource, new MockResolver()) val totalTasks = 32 * 1024 val totalContainers = totalTasks / 16 val totalHosts = totalContainers / 16 val mockId = mock(classOf[ContainerId]) val hosts = (1 to totalHosts).map { i => (s"host_$i", totalTasks % i) }.toMap val containers = (1 to totalContainers).map { i => mockId } val count = containers.size / hosts.size / 2 val hostToContainerMap = new HashMap[String, Set[ContainerId]]() hosts.keys.take(hosts.size / 2).zipWithIndex.foreach { case (host, i) => val hostContainers = new HashSet[ContainerId]() containers.drop(count * i).take(i).foreach { c => hostContainers += c } hostToContainerMap(host) = hostContainers } strategy.localityOfRequestedContainers(containers.size * 2, totalTasks, hosts, hostToContainerMap, Nil) } }
Example 105
Source File: JsonUtils.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kafka010 import scala.collection.mutable.HashMap import scala.util.control.NonFatal import org.apache.kafka.common.TopicPartition import org.json4s.NoTypeHints import org.json4s.jackson.Serialization def partitionOffsets(partitionOffsets: Map[TopicPartition, Long]): String = { val result = new HashMap[String, HashMap[Int, Long]]() implicit val ordering = new Ordering[TopicPartition] { override def compare(x: TopicPartition, y: TopicPartition): Int = { Ordering.Tuple2[String, Int].compare((x.topic, x.partition), (y.topic, y.partition)) } } val partitions = partitionOffsets.keySet.toSeq.sorted // sort for more determinism partitions.foreach { tp => val off = partitionOffsets(tp) val parts = result.getOrElse(tp.topic, new HashMap[Int, Long]) parts += tp.partition -> off result += tp.topic -> parts } Serialization.write(result) } }
Example 106
Source File: StageInfo.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import org.apache.spark.executor.TaskMetrics import org.apache.spark.storage.RDDInfo def fromStage( stage: Stage, attemptId: Int, numTasks: Option[Int] = None, taskMetrics: TaskMetrics = null, taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty ): StageInfo = { val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd) val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo( stage.id, attemptId, stage.name, numTasks.getOrElse(stage.numTasks), rddInfos, stage.parents.map(_.id), stage.details, taskMetrics, taskLocalityPreferences) } }
Example 107
Source File: GroupedCountEvaluator.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.partial import scala.collection.Map import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.spark.util.collection.OpenHashMap private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] { private var outputsMerged = 0 private val sums = new OpenHashMap[T, Long]() // Sum of counts for each key override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]): Unit = { outputsMerged += 1 taskResult.foreach { case (key, value) => sums.changeValue(key, value, _ + value) } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { sums.map { case (key, sum) => (key, new BoundedDouble(sum, 1.0, sum, sum)) }.toMap } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs sums.map { case (key, sum) => (key, CountEvaluator.bound(confidence, sum, p)) }.toMap } } }
Example 108
Source File: MasterWebUISuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.master.ui import java.io.DataOutputStream import java.net.{HttpURLConnection, URL} import java.nio.charset.StandardCharsets import java.util.Date import scala.collection.mutable.HashMap import org.mockito.Mockito.{mock, times, verify, when} import org.scalatest.BeforeAndAfterAll import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} import org.apache.spark.deploy.DeployMessages.{KillDriverResponse, RequestKillDriver} import org.apache.spark.deploy.DeployTestUtils._ import org.apache.spark.deploy.master._ import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv} class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll { val conf = new SparkConf val securityMgr = new SecurityManager(conf) val rpcEnv = mock(classOf[RpcEnv]) val master = mock(classOf[Master]) val masterEndpointRef = mock(classOf[RpcEndpointRef]) when(master.securityMgr).thenReturn(securityMgr) when(master.conf).thenReturn(conf) when(master.rpcEnv).thenReturn(rpcEnv) when(master.self).thenReturn(masterEndpointRef) val masterWebUI = new MasterWebUI(master, 0) override def beforeAll() { super.beforeAll() masterWebUI.bind() } override def afterAll() { masterWebUI.stop() super.afterAll() } test("kill application") { val appDesc = createAppDesc() // use new start date so it isn't filtered by UI val activeApp = new ApplicationInfo( new Date().getTime, "app-0", appDesc, new Date(), null, Int.MaxValue) when(master.idToApp).thenReturn(HashMap[String, ApplicationInfo]((activeApp.id, activeApp))) val url = s"http://localhost:${masterWebUI.boundPort}/app/kill/" val body = convPostDataToString(Map(("id", activeApp.id), ("terminate", "true"))) val conn = sendHttpRequest(url, "POST", body) conn.getResponseCode // Verify the master was called to remove the active app verify(master, times(1)).removeApplication(activeApp, ApplicationState.KILLED) } test("kill driver") { val activeDriverId = "driver-0" val url = s"http://localhost:${masterWebUI.boundPort}/driver/kill/" val body = convPostDataToString(Map(("id", activeDriverId), ("terminate", "true"))) val conn = sendHttpRequest(url, "POST", body) conn.getResponseCode // Verify that master was asked to kill driver with the correct id verify(masterEndpointRef, times(1)).ask[KillDriverResponse](RequestKillDriver(activeDriverId)) } private def convPostDataToString(data: Map[String, String]): String = { (for ((name, value) <- data) yield s"$name=$value").mkString("&") } private def sendHttpRequest( url: String, method: String, body: String = ""): HttpURLConnection = { val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection] conn.setRequestMethod(method) if (body.nonEmpty) { conn.setDoOutput(true) conn.setRequestProperty("Content-Type", "application/x-www-form-urlencoded") conn.setRequestProperty("Content-Length", Integer.toString(body.length)) val out = new DataOutputStream(conn.getOutputStream) out.write(body.getBytes(StandardCharsets.UTF_8)) out.close() } conn } }
Example 109
Source File: TaskDescriptionSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.{ByteArrayOutputStream, DataOutputStream, UTFDataFormatException} import java.nio.ByteBuffer import java.util.Properties import scala.collection.mutable.HashMap import org.apache.spark.SparkFunSuite class TaskDescriptionSuite extends SparkFunSuite { test("encoding and then decoding a TaskDescription results in the same TaskDescription") { val originalFiles = new HashMap[String, Long]() originalFiles.put("fileUrl1", 1824) originalFiles.put("fileUrl2", 2) val originalJars = new HashMap[String, Long]() originalJars.put("jar1", 3) val originalProperties = new Properties() originalProperties.put("property1", "18") originalProperties.put("property2", "test value") // SPARK-19796 -- large property values (like a large job description for a long sql query) // can cause problems for DataOutputStream, make sure we handle correctly val sb = new StringBuilder() (0 to 10000).foreach(_ => sb.append("1234567890")) val largeString = sb.toString() originalProperties.put("property3", largeString) // make sure we've got a good test case intercept[UTFDataFormatException] { val out = new DataOutputStream(new ByteArrayOutputStream()) try { out.writeUTF(largeString) } finally { out.close() } } // Create a dummy byte buffer for the task. val taskBuffer = ByteBuffer.wrap(Array[Byte](1, 2, 3, 4)) val originalTaskDescription = new TaskDescription( taskId = 1520589, attemptNumber = 2, executorId = "testExecutor", name = "task for test", index = 19, originalFiles, originalJars, originalProperties, taskBuffer ) val serializedTaskDescription = TaskDescription.encode(originalTaskDescription) val decodedTaskDescription = TaskDescription.decode(serializedTaskDescription) // Make sure that all of the fields in the decoded task description match the original. assert(decodedTaskDescription.taskId === originalTaskDescription.taskId) assert(decodedTaskDescription.attemptNumber === originalTaskDescription.attemptNumber) assert(decodedTaskDescription.executorId === originalTaskDescription.executorId) assert(decodedTaskDescription.name === originalTaskDescription.name) assert(decodedTaskDescription.index === originalTaskDescription.index) assert(decodedTaskDescription.addedFiles.equals(originalFiles)) assert(decodedTaskDescription.addedJars.equals(originalJars)) assert(decodedTaskDescription.properties.equals(originalTaskDescription.properties)) assert(decodedTaskDescription.serializedTask.equals(taskBuffer)) } }
Example 110
Source File: KinesisSourceOffset.scala From kinesis-sql with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kinesis import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import scala.collection.mutable.HashMap import scala.util.control.NonFatal import org.apache.spark.sql.execution.streaming.Offset import org.apache.spark.sql.execution.streaming.SerializedOffset import org.apache.spark.sql.sources.v2.reader.streaming.{Offset => OffsetV2, PartitionOffset} def apply(json: String): KinesisSourceOffset = { try { val readObj = Serialization.read[ Map[ String, Map[ String, String ] ] ](json) val metadata = readObj.get("metadata") val shardInfoMap: Map[String, ShardInfo ] = readObj.filter(_._1 != "metadata").map { case (shardId, value) => shardId.toString -> new ShardInfo(shardId.toString, value.get("iteratorType").get, value.get("iteratorPosition").get) }.toMap KinesisSourceOffset( new ShardOffsets( metadata.get("batchId").toLong, metadata.get("streamName"), shardInfoMap)) } catch { case NonFatal(x) => throw new IllegalArgumentException(x) } } def getMap(shardInfos: Array[ShardInfo]): Map[String, ShardInfo] = { shardInfos.map { s: ShardInfo => (s.shardId -> s) }.toMap } }
Example 111
Source File: LocalKMeans.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import java.util.Random import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet import breeze.linalg.{Vector, DenseVector, squaredDistance} import org.apache.spark.SparkContext._ object LocalKMeans { val N = 1000 val R = 1000 // Scaling factor val D = 10 val K = 10 val convergeDist = 0.001 val rand = new Random(42) def generateData: Array[DenseVector[Double]] = { def generatePoint(i: Int): DenseVector[Double] = { DenseVector.fill(D){rand.nextDouble * R} } Array.tabulate(N)(generatePoint) } def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = { var index = 0 var bestIndex = 0 var closest = Double.PositiveInfinity for (i <- 1 to centers.size) { val vCurr = centers.get(i).get val tempDist = squaredDistance(p, vCurr) if (tempDist < closest) { closest = tempDist bestIndex = i } } bestIndex } def showWarning() { System.err.println( """WARN: This is a naive implementation of KMeans Clustering and is given as an example! |Please use the KMeans method found in org.apache.spark.mllib.clustering |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val data = generateData var points = new HashSet[Vector[Double]] var kPoints = new HashMap[Int, Vector[Double]] var tempDist = 1.0 while (points.size < K) { points.add(data(rand.nextInt(N))) } val iter = points.iterator for (i <- 1 to points.size) { kPoints.put(i, iter.next()) } println("Initial centers: " + kPoints) while(tempDist > convergeDist) { var closest = data.map (p => (closestPoint(p, kPoints), (p, 1))) var mappings = closest.groupBy[Int] (x => x._1) var pointStats = mappings.map { pair => pair._2.reduceLeft [(Int, (Vector[Double], Int))] { case ((id1, (p1, c1)), (id2, (p2, c2))) => (id1, (p1 + p2, c1 + c2)) } } var newPoints = pointStats.map {mapping => (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))} tempDist = 0.0 for (mapping <- newPoints) { tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2) } for (newP <- newPoints) { kPoints.put(newP._1, newP._2) } } println("Final centers: " + kPoints) } } // scalastyle:on println
Example 112
Source File: RecursivePlanDetails.scala From BigDatalog with Apache License 2.0 | 5 votes |
package edu.ucla.cs.wis.bigdatalog.spark import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import scala.collection.mutable.HashMap class RecursivePlanDetails extends Serializable { private val baseRelationsByName = new HashMap[String, LogicalPlan] val recursiveRelations = new HashMap[String, LogicalPlan] val aggregateRelations = new HashMap[String, LogicalPlan] def addBaseRelation(name: String, obj: LogicalPlan) = { baseRelationsByName.put(name, obj) } def containsBaseRelation(name: String): Boolean = { baseRelationsByName.contains(name) } }
Example 113
Source File: CachedRDDManager.scala From BigDatalog with Apache License 2.0 | 5 votes |
package edu.ucla.cs.wis.bigdatalog.spark.execution.recursion import org.apache.spark.Logging import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import scala.collection.mutable.{HashMap, HashSet, Set} class CachedRDDManager(defaultStorageLevel: StorageLevel) extends Logging with Serializable { val iterationToRDDMap = new HashMap[Int, HashSet[RDD[_]]] var currentIteration : Int = 0 def persist(rdd: RDD[_]): Unit = { persist(rdd, false) } def persist(rdd: RDD[_], doMemoryCheckpoint: Boolean): Unit = { iterationToRDDMap.getOrElseUpdate(currentIteration, new HashSet[RDD[_]]).add(rdd) rdd.persist(defaultStorageLevel) if (doMemoryCheckpoint) rdd.memoryCheckpoint() } def cleanUpIteration(iterationsBackToRemove: Int = 2) = { val start = System.currentTimeMillis() if (currentIteration >= iterationsBackToRemove) { val iterationId = currentIteration - iterationsBackToRemove if (iterationToRDDMap.contains(iterationId)) { val rdds: HashSet[RDD[_]] = iterationToRDDMap.remove(iterationId).get if (rdds.nonEmpty) logInfo("Unpersisting "+rdds.size+" rdds for iteration " + iterationId) rdds.foreach(rdd => rdd.unpersist(false)) } } logInfo("CleanUpIteration took " + (System.currentTimeMillis() - start) + " ms") currentIteration += 1 } def cleanUpIterationById(iterationId: Int) = { if (iterationToRDDMap.contains(iterationId)) { val rdds: HashSet[RDD[_]] = iterationToRDDMap.remove(iterationId).get rdds.foreach(rdd => rdd.unpersist(false)) } } def incrementIteration() { currentIteration += 1} def clear() = { iterationToRDDMap.clear() } def clear(remainCached: Seq[RDD[_]]) = { iterationToRDDMap.keySet.foreach(key => logInfo("key: " + key + " value: " + iterationToRDDMap.get(key))) iterationToRDDMap.keySet .foreach(key => iterationToRDDMap.get(key) .foreach(value => value.foreach(item => {if (!remainCached.contains(item)) item.unpersist(false)}))) iterationToRDDMap.clear() } def unpersist(rdds: Set[RDD[_]]) = { for (rdd <- rdds) { iterationToRDDMap.synchronized { // rdd should only be in 1 iteration val iterations = iterationToRDDMap.filter(x => x._2.contains(rdd)) if (iterations.nonEmpty) { val iteration = iterations.head iteration._2.remove(rdd) rdd.unpersist(false) if (iteration._2.isEmpty) iterationToRDDMap.remove(iteration._1) } } } } override def toString = { val output = new StringBuilder iterationToRDDMap.keySet.toSeq.sorted .foreach(iteration => { val rdds = iterationToRDDMap.get(iteration) rdds.foreach(rdd => output.append(iteration + ":" + rdd + "\n")) }) output.toString() } }
Example 114
Source File: RelationCatalog.scala From BigDatalog with Apache License 2.0 | 5 votes |
package edu.ucla.cs.wis.bigdatalog.spark import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.types.StructType import scala.collection.mutable.HashMap class RelationCatalog extends Serializable { val directory = HashMap.empty[String, RelationInfo] def addRelation(name : String, schema : StructType) : Unit = { val relationInfo = new RelationInfo().setSchema(schema) directory.get(name) match { case Some(oldRelationInfo) => // update rdd if already present. Schema should not change oldRelationInfo.setRDD(relationInfo.getRDD()) case None => directory.put(name, relationInfo) } } def setRDD(name : String, rdd : RDD[InternalRow]) : Unit = { directory.get(name) match { case Some(oldRelationInfo) => oldRelationInfo.setRDD(rdd) case None => directory.put(name, new RelationInfo().setRDD(rdd)) } } def getRelationInfo(name : String) : RelationInfo = { if (directory.contains(name)) directory(name) else null } def removeRDD(name : String) : Unit = { directory.remove(name) } def clear() : Unit = { directory.clear() } override def toString(): String = { val output = new StringBuilder() directory.iterator.foreach(f => output.append(f.toString())) output.toString() } } class RelationInfo() extends Serializable { private var schema : StructType = _ private var rdd : RDD[InternalRow] = _ def getSchema() : StructType = schema def setSchema(schema : StructType) : RelationInfo = { this.schema = schema this } def getRDD() : RDD[InternalRow] = rdd def setRDD(rdd : RDD[InternalRow]) : RelationInfo = { this.rdd = rdd this } override def toString() : String = { "schema: " + this.schema + (if (rdd != null) " RDD") } }
Example 115
Source File: UIData.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import org.apache.spark.JobExecutionStatus import org.apache.spark.executor.TaskMetrics import org.apache.spark.scheduler.{AccumulableInfo, TaskInfo} import org.apache.spark.util.collection.OpenHashSet import scala.collection.mutable import scala.collection.mutable.HashMap private[spark] object UIData { class ExecutorSummary { var taskTime : Long = 0 var failedTasks : Int = 0 var succeededTasks : Int = 0 var inputBytes : Long = 0 var inputRecords : Long = 0 var outputBytes : Long = 0 var outputRecords : Long = 0 var shuffleRead : Long = 0 var shuffleReadRecords : Long = 0 var shuffleWrite : Long = 0 var shuffleWriteRecords : Long = 0 var memoryBytesSpilled : Long = 0 var diskBytesSpilled : Long = 0 } class JobUIData( var jobId: Int = -1, var submissionTime: Option[Long] = None, var completionTime: Option[Long] = None, var stageIds: Seq[Int] = Seq.empty, var jobGroup: Option[String] = None, var status: JobExecutionStatus = JobExecutionStatus.UNKNOWN, case class TaskUIData( var taskInfo: TaskInfo, var taskMetrics: Option[TaskMetrics] = None, var errorMessage: Option[String] = None) case class ExecutorUIData( val startTime: Long, var finishTime: Option[Long] = None, var finishReason: Option[String] = None) }
Example 116
Source File: PoolTable.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.jobs import java.net.URLEncoder import scala.collection.mutable.HashMap import scala.xml.Node import org.apache.spark.scheduler.{Schedulable, StageInfo} import org.apache.spark.ui.UIUtils private[ui] class PoolTable(pools: Seq[Schedulable], parent: StagesTab) { private val listener = parent.progressListener def toNodeSeq: Seq[Node] = { listener.synchronized { poolTable(poolRow, pools) } } private def poolTable( makeRow: (Schedulable, HashMap[String, HashMap[Int, StageInfo]]) => Seq[Node], rows: Seq[Schedulable]): Seq[Node] = { <table class="table table-bordered table-striped table-condensed sortable table-fixed"> <thead> <th>Pool Name</th> <th>Minimum Share</th> <th>Pool Weight</th> <th>Active Stages</th> <th>Running Tasks</th> <th>SchedulingMode</th> </thead> <tbody> {rows.map(r => makeRow(r, listener.poolToActiveStages))} </tbody> </table> } private def poolRow( p: Schedulable, poolToActiveStages: HashMap[String, HashMap[Int, StageInfo]]): Seq[Node] = { val activeStages = poolToActiveStages.get(p.name) match { case Some(stages) => stages.size case None => 0 } val href = "%s/stages/pool?poolname=%s" .format(UIUtils.prependBaseUri(parent.basePath), URLEncoder.encode(p.name, "UTF-8")) <tr> <td> <a href={href}>{p.name}</a> </td> <td>{p.minShare}</td> <td>{p.weight}</td> <td>{activeStages}</td> <td>{p.runningTasks}</td> <td>{p.schedulingMode}</td> </tr> } }
Example 117
Source File: StageInfo.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import scala.collection.mutable.HashMap import org.apache.spark.annotation.DeveloperApi import org.apache.spark.storage.RDDInfo def fromStage( stage: Stage, attemptId: Int, numTasks: Option[Int] = None, taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty ): StageInfo = { val ancestorRddInfos = stage.rdd.getNarrowAncestors.map(RDDInfo.fromRdd) val rddInfos = Seq(RDDInfo.fromRdd(stage.rdd)) ++ ancestorRddInfos new StageInfo( stage.id, attemptId, stage.name, numTasks.getOrElse(stage.numTasks), rddInfos, stage.parents.map(_.id), stage.details, taskLocalityPreferences) } }
Example 118
Source File: FixedPointJobDefinition.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler.fixedpoint import org.apache.spark.TaskContext import org.apache.spark.rdd.RDD import scala.collection.mutable.{HashSet, HashMap, Set} class FixedPointJobDefinition(val setupIteration: (FixedPointJobDefinition, RDD[_]) => RDD[_], val cleanupIteration: (Int) => Unit) { var _fixedPointEvaluator: (TaskContext, Iterator[_]) => Boolean = null var finalRDD: RDD[_] = null var rddIds = Array.empty[Int] // for all and delta rdd id for FixedPointResultTask execution on worker def fixedPointEvaluator(fixedPointEvaluator: (TaskContext, Iterator[_]) => Boolean) = { _fixedPointEvaluator = fixedPointEvaluator } def getfixedPointEvaluator = _fixedPointEvaluator.asInstanceOf[(TaskContext, Iterator[_]) => _] def getFinalRDD: RDD[_] = finalRDD def setRDDIds(newAllRDDId: Int, oldAllRDDId: Int, newDeltaPrimeRDDId: Int, oldDeltaPrimeRDDId: Int): Unit = { rddIds = Array(newAllRDDId, oldAllRDDId, newDeltaPrimeRDDId, oldDeltaPrimeRDDId) } }
Example 119
Source File: GroupedSumEvaluator.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConverters._ import scala.collection.Map import scala.collection.mutable.HashMap import org.apache.spark.util.StatCounter private[spark] class GroupedSumEvaluator[T](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new JHashMap[T, StatCounter] // Sum of counts for each key override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) { outputsMerged += 1 val iter = taskResult.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val old = sums.get(entry.getKey) if (old != null) { old.merge(entry.getValue) } else { sums.put(entry.getKey, entry.getValue) } } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val sum = entry.getValue.sum result.put(entry.getKey, new BoundedDouble(sum, 1.0, sum, sum)) } result.asScala } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs val studentTCacher = new StudentTCacher(confidence) val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val counter = entry.getValue val meanEstimate = counter.mean val meanVar = counter.sampleVariance / counter.count val countEstimate = (counter.count + 1 - p) / p val countVar = (counter.count + 1) * (1 - p) / (p * p) val sumEstimate = meanEstimate * countEstimate val sumVar = (meanEstimate * meanEstimate * countVar) + (countEstimate * countEstimate * meanVar) + (meanVar * countVar) val sumStdev = math.sqrt(sumVar) val confFactor = studentTCacher.get(counter.count) val low = sumEstimate - confFactor * sumStdev val high = sumEstimate + confFactor * sumStdev result.put(entry.getKey, new BoundedDouble(sumEstimate, confidence, low, high)) } result.asScala } } }
Example 120
Source File: GroupedCountEvaluator.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConverters._ import scala.collection.Map import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.commons.math3.distribution.NormalDistribution import org.apache.spark.util.collection.OpenHashMap private[spark] class GroupedCountEvaluator[T : ClassTag](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[OpenHashMap[T, Long], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new OpenHashMap[T, Long]() // Sum of counts for each key override def merge(outputId: Int, taskResult: OpenHashMap[T, Long]) { outputsMerged += 1 taskResult.foreach { case (key, value) => sums.changeValue(key, value, _ + value) } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) sums.foreach { case (key, sum) => result.put(key, new BoundedDouble(sum, 1.0, sum, sum)) } result.asScala } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val p = outputsMerged.toDouble / totalOutputs val confFactor = new NormalDistribution(). inverseCumulativeProbability(1 - (1 - confidence) / 2) val result = new JHashMap[T, BoundedDouble](sums.size) sums.foreach { case (key, sum) => val mean = (sum + 1 - p) / p val variance = (sum + 1) * (1 - p) / (p * p) val stdev = math.sqrt(variance) val low = mean - confFactor * stdev val high = mean + confFactor * stdev result.put(key, new BoundedDouble(mean, confidence, low, high)) } result.asScala } } }
Example 121
Source File: GroupedMeanEvaluator.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.partial import java.util.{HashMap => JHashMap} import scala.collection.JavaConverters._ import scala.collection.Map import scala.collection.mutable.HashMap import org.apache.spark.util.StatCounter private[spark] class GroupedMeanEvaluator[T](totalOutputs: Int, confidence: Double) extends ApproximateEvaluator[JHashMap[T, StatCounter], Map[T, BoundedDouble]] { var outputsMerged = 0 var sums = new JHashMap[T, StatCounter] // Sum of counts for each key override def merge(outputId: Int, taskResult: JHashMap[T, StatCounter]) { outputsMerged += 1 val iter = taskResult.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val old = sums.get(entry.getKey) if (old != null) { old.merge(entry.getValue) } else { sums.put(entry.getKey, entry.getValue) } } } override def currentResult(): Map[T, BoundedDouble] = { if (outputsMerged == totalOutputs) { val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val mean = entry.getValue.mean result.put(entry.getKey, new BoundedDouble(mean, 1.0, mean, mean)) } result.asScala } else if (outputsMerged == 0) { new HashMap[T, BoundedDouble] } else { val studentTCacher = new StudentTCacher(confidence) val result = new JHashMap[T, BoundedDouble](sums.size) val iter = sums.entrySet.iterator() while (iter.hasNext) { val entry = iter.next() val counter = entry.getValue val mean = counter.mean val stdev = math.sqrt(counter.sampleVariance / counter.count) val confFactor = studentTCacher.get(counter.count) val low = mean - confFactor * stdev val high = mean + confFactor * stdev result.put(entry.getKey, new BoundedDouble(mean, confidence, low, high)) } result.asScala } } }
Example 122
Source File: TaskContextImpl.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark import scala.collection.mutable.{ArrayBuffer, HashMap} import org.apache.spark.executor.TaskMetrics import org.apache.spark.memory.TaskMemoryManager import org.apache.spark.metrics.MetricsSystem import org.apache.spark.metrics.source.Source import org.apache.spark.util.{TaskCompletionListener, TaskCompletionListenerException} private[spark] class TaskContextImpl( val stageId: Int, val partitionId: Int, override val taskAttemptId: Long, override val attemptNumber: Int, override val taskMemoryManager: TaskMemoryManager, @transient private val metricsSystem: MetricsSystem, internalAccumulators: Seq[Accumulator[Long]], val runningLocally: Boolean = false, val taskMetrics: TaskMetrics = TaskMetrics.empty) extends TaskContext with Logging { // For backwards-compatibility; this method is now deprecated as of 1.3.0. override def attemptId(): Long = taskAttemptId // List of callback functions to execute when the task completes. @transient private val onCompleteCallbacks = new ArrayBuffer[TaskCompletionListener] // Whether the corresponding task has been killed. @volatile private var interrupted: Boolean = false // Whether the task has completed. @volatile private var completed: Boolean = false override def addTaskCompletionListener(listener: TaskCompletionListener): this.type = { onCompleteCallbacks += listener this } override def addTaskCompletionListener(f: TaskContext => Unit): this.type = { onCompleteCallbacks += new TaskCompletionListener { override def onTaskCompletion(context: TaskContext): Unit = f(context) } this } @deprecated("use addTaskCompletionListener", "1.1.0") override def addOnCompleteCallback(f: () => Unit) { onCompleteCallbacks += new TaskCompletionListener { override def onTaskCompletion(context: TaskContext): Unit = f() } } private[spark] def markInterrupted(): Unit = { interrupted = true } override def isCompleted(): Boolean = completed override def isRunningLocally(): Boolean = runningLocally override def isInterrupted(): Boolean = interrupted override def getMetricsSources(sourceName: String): Seq[Source] = metricsSystem.getSourcesByName(sourceName) @transient private val accumulators = new HashMap[Long, Accumulable[_, _]] private[spark] override def registerAccumulator(a: Accumulable[_, _]): Unit = synchronized { accumulators(a.id) = a } private[spark] override def collectInternalAccumulators(): Map[Long, Any] = synchronized { accumulators.filter(_._2.isInternal).mapValues(_.localValue).toMap } private[spark] override def collectAccumulators(): Map[Long, Any] = synchronized { accumulators.mapValues(_.localValue).toMap } //private[spark] override val internalMetricsToAccumulators: Map[String, Accumulator[Long]] = { // Explicitly register internal accumulators here because these are // not captured in the task closure and are already deserialized internalAccumulators.foreach(registerAccumulator) internalAccumulators.map { a => (a.name.get, a) }.toMap } }
Example 123
Source File: LocalKMeans.scala From learning-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.examples import java.util.Random import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet import breeze.linalg.{Vector, DenseVector, squaredDistance} import org.apache.spark.SparkContext._ object LocalKMeans { val N = 1000 val R = 1000 // Scaling factor val D = 10 val K = 10 val convergeDist = 0.001 val rand = new Random(42) def generateData = { def generatePoint(i: Int) = { DenseVector.fill(D){rand.nextDouble * R} } Array.tabulate(N)(generatePoint) } def closestPoint(p: Vector[Double], centers: HashMap[Int, Vector[Double]]): Int = { var index = 0 var bestIndex = 0 var closest = Double.PositiveInfinity for (i <- 1 to centers.size) { val vCurr = centers.get(i).get val tempDist = squaredDistance(p, vCurr) if (tempDist < closest) { closest = tempDist bestIndex = i } } bestIndex } def showWarning() { System.err.println( """WARN: This is a naive implementation of KMeans Clustering and is given as an example! |Please use the KMeans method found in org.apache.spark.mllib.clustering |for more conventional use. """.stripMargin) } def main(args: Array[String]) { showWarning() val data = generateData var points = new HashSet[Vector[Double]] var kPoints = new HashMap[Int, Vector[Double]] var tempDist = 1.0 while (points.size < K) { points.add(data(rand.nextInt(N))) } val iter = points.iterator for (i <- 1 to points.size) { kPoints.put(i, iter.next()) } println("Initial centers: " + kPoints) while(tempDist > convergeDist) { var closest = data.map (p => (closestPoint(p, kPoints), (p, 1))) var mappings = closest.groupBy[Int] (x => x._1) var pointStats = mappings.map { pair => pair._2.reduceLeft [(Int, (Vector[Double], Int))] { case ((id1, (x1, y1)), (id2, (x2, y2))) => (id1, (x1 + x2, y1 + y2)) } } var newPoints = pointStats.map {mapping => (mapping._1, mapping._2._1 * (1.0 / mapping._2._2))} tempDist = 0.0 for (mapping <- newPoints) { tempDist += squaredDistance(kPoints.get(mapping._1).get, mapping._2) } for (newP <- newPoints) { kPoints.put(newP._1, newP._2) } } println("Final centers: " + kPoints) } }
Example 124
Source File: CollectionExample.scala From Scala-and-Spark-for-Big-Data-Analytics with MIT License | 5 votes |
package com.chapter3.ScalaFP import scala.collection._ import scala.collection.mutable.Buffer import scala.collection.mutable.HashMap object CollectionExample { def main(args: Array[String]) { val x = 10 val y = 15 val z = 19 Traversable(1, 2, 3) Iterable("x", "y", "z") Map("x" -> 10, "y" -> 13, "z" -> 17) Set("Red", "Green", "Blue") SortedSet("Hello,", "world!") Buffer(x, y, z) IndexedSeq(0.0, 1.0, 2.0) LinearSeq(x, y, z) List(2, 6, 10) HashMap("x" -> 20, "y" -> 19, "z" -> 16) val list = List(1, 2, 3) map (_ + 1) println(list) val set = Set(1, 2, 3) map (_ * 2) println(set) val list2 = List(x, y, z).map(x => x * 3) println(list2) } }
Example 125
Source File: NFAStructure.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.cep.nfa import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import scala.collection.mutable.HashMap import dbis.piglet.backends.{SchemaClass => Event} import scala.collection.mutable.ListBuffer def addEvent(event: T, currentEdge: ForwardEdge[T]): Unit = { events += event //if (relatedValue != null) { // relatedValue.get(currentEdge.name.get) match { // case Some(x) => x.foreach (r => r.updateValue(event)) //case None => Nil //} //} currenState = currentEdge.destState if (currenState.isInstanceOf[FinalState[T]]) complete = true } override def clone(): NFAStructure[T] = { val copyStr = new NFAStructure[T](this.nfaController) copyStr.complete = this.complete copyStr.currenState = this.currenState copyStr.events = this.events.clone() //copyStr.events = this.events copyStr } }
Example 126
Source File: CorefUtils.scala From berkeley-doc-summarizer with GNU General Public License v3.0 | 5 votes |
package edu.berkeley.nlp.summ import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.HashMap import edu.berkeley.nlp.entity.DepConstTree import edu.berkeley.nlp.entity.coref.Mention import edu.berkeley.nlp.entity.coref.PronounDictionary import edu.berkeley.nlp.entity.coref.MentionType import edu.berkeley.nlp.entity.coref.CorefDoc import edu.berkeley.nlp.entity.GUtil import edu.berkeley.nlp.futile.math.SloppyMath object CorefUtils { def getAntecedent(corefDoc: CorefDoc, predictor: edu.berkeley.nlp.entity.coref.PairwiseScorer, index: Int) = { val posteriors = computePosteriors(corefDoc, predictor, Seq(index)) GUtil.argMaxIdx(posteriors(0)) } def computePosteriors(corefDoc: CorefDoc, predictor: edu.berkeley.nlp.entity.coref.PairwiseScorer, indicesOfInterest: Seq[Int]): Array[Array[Double]] = { val docGraph = new edu.berkeley.nlp.entity.coref.DocumentGraph(corefDoc, false) Array.tabulate(indicesOfInterest.size)(idxIdxOfInterest => { val idx = indicesOfInterest(idxIdxOfInterest) val scores = Array.tabulate(idx+1)(antIdx => predictor.score(docGraph, idx, antIdx, false).toDouble) val logNormalizer = scores.foldLeft(Double.NegativeInfinity)(SloppyMath.logAdd(_, _)) for (antIdx <- 0 until scores.size) { scores(antIdx) = scores(antIdx) - logNormalizer } scores }) } def remapMentionType(ment: Mention) = { val newMentionType = if (ment.endIdx - ment.startIdx == 1 && PronounDictionary.isDemonstrative(ment.rawDoc.words(ment.sentIdx)(ment.headIdx))) { MentionType.DEMONSTRATIVE; } else if (ment.endIdx - ment.startIdx == 1 && PronounDictionary.isPronLc(ment.rawDoc.words(ment.sentIdx)(ment.headIdx))) { MentionType.PRONOMINAL; } else if (ment.rawDoc.pos(ment.sentIdx)(ment.headIdx) == "NNS" || ment.rawDoc.pos(ment.sentIdx)(ment.headIdx) == "NNPS") { MentionType.PROPER; } else { MentionType.NOMINAL; } new Mention(ment.rawDoc, ment.mentIdx, ment.sentIdx, ment.startIdx, ment.endIdx, ment.headIdx, ment.allHeadIndices, ment.isCoordinated, newMentionType, ment.nerString, ment.number, ment.gender) } def getMentionText(ment: Mention) = ment.rawDoc.words(ment.sentIdx).slice(ment.startIdx, ment.endIdx) def getMentionNerSpan(ment: Mention): Option[(Int,Int)] = { // Smallest NER chunk that contains the head val conllDoc = ment.rawDoc val matchingChunks = conllDoc.nerChunks(ment.sentIdx).filter(chunk => chunk.start <= ment.headIdx && ment.headIdx < chunk.end); if (!matchingChunks.isEmpty) { val smallestChunk = matchingChunks.sortBy(chunk => chunk.end - chunk.start).head; Some(smallestChunk.start -> smallestChunk.end) } else { None } } def getSpanHeads(tree: DepConstTree, startIdx: Int, endIdx: Int): Seq[Int] = getSpanHeads(tree.childParentDepMap, startIdx, endIdx); def getSpanHeads(childParentDepMap: HashMap[Int,Int], startIdx: Int, endIdx: Int): Seq[Int] = { // If it's a constituent, only one should have a head outside val outsidePointing = new ArrayBuffer[Int]; for (i <- startIdx until endIdx) { val ptr = childParentDepMap(i); if (ptr < startIdx || ptr >= endIdx) { outsidePointing += i; } } outsidePointing } def isDefinitelyPerson(str: String): Boolean = { val canonicalization = PronounDictionary.canonicalize(str) // N.B. Don't check "we" or "they" because those might be used in inanimate cases canonicalization == "i" || canonicalization == "you" || canonicalization == "he" || canonicalization == "she" } }