scala.collection.mutable Scala Example

Source File: KVStore.scala From Freasy-Monad with MIT License

6 votes

package examples.cats

import cats.Id
import cats.free.Free
import freasymonad.cats.free

import scala.collection.mutable
import scala.concurrent.duration.Duration
import scala.concurrent.{Await, Future}

@free trait KVStore {
  type KVStoreF[A] = Free[GrammarADT, A]
  sealed trait GrammarADT[A]

  def put[T](key: String, value: T): KVStoreF[Unit]
  def get[T](key: String): KVStoreF[Option[T]]
  def delete(key: String): KVStoreF[Unit]

  def update[T](key: String, f: T => T): KVStoreF[Unit] =
    for {
      vMaybe <- get[T](key)
      _      <- vMaybe.map(v => put[T](key, f(v))).getOrElse(Free.pure(()))
    } yield ()
}

object Main extends App {
  import KVStore.ops._

  def program: KVStoreF[Option[Int]] =
    for {
      _ <- put("wild-cats", 2)
      _ <- update[Int]("wild-cats", _ + 12)
      _ <- put("tame-cats", 5)
      n <- get[Int]("wild-cats")
      _ <- delete("tame-cats")
    } yield n

  val idInterpreter = new KVStore.Interp[Id] {
    val kvs = mutable.Map.empty[String, Any]
    def get[T](key: String): Id[Option[T]] = {
      println(s"get($key)")
      kvs.get(key).map(_.asInstanceOf[T])
    }
    def put[T](key: String, value: T): Id[Unit] = {
      println(s"put($key, $value)")
      kvs(key) = value
    }
    def delete(key: String): Id[Unit] = {
      println(s"delete($key)")
      kvs.remove(key)
    }
  }
  val resId: Id[Option[Int]] = idInterpreter.run(program)

  import cats.implicits.catsStdInstancesForFuture
  import scala.concurrent.ExecutionContext.Implicits.global

  val futureInterpreter = new KVStore.Interp[Future] {
    val kvs = mutable.Map.empty[String, Any]
    def get[T](key: String): Future[Option[T]] = Future {
      println(s"get($key)")
      kvs.get(key).map(_.asInstanceOf[T])
    }
    def put[T](key: String, value: T): Future[Unit] = Future {
      println(s"put($key, $value)")
      kvs(key) = value
    }
    def delete(key: String): Future[Unit] = Future {
      println(s"delete($key)")
      kvs.remove(key)
    }
  }
  val resFuture: Future[Option[Int]] = futureInterpreter.run(program)
  Await.ready(resFuture, Duration.Inf)
}

Source File: MNISTBenchmark.scala From spark-knn with Apache License 2.0

6 votes

package com.github.saurfang.spark.ml.knn.examples

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.ml.classification.{KNNClassifier, NaiveKNNClassifier}
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.param.{IntParam, ParamMap}
import org.apache.spark.ml.tuning.{Benchmarker, ParamGridBuilder}
import org.apache.spark.ml.util.Identifiable
import org.apache.spark.ml.{Pipeline, Transformer}
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
import org.apache.log4j

import scala.collection.mutable


object MNISTBenchmark {

  val logger = log4j.Logger.getLogger(getClass)

  def main(args: Array[String]) {
    val ns = if(args.isEmpty) (2500 to 10000 by 2500).toArray else args(0).split(',').map(_.toInt)
    val path = if(args.length >= 2) args(1) else "data/mnist/mnist.bz2"
    val numPartitions = if(args.length >= 3) args(2).toInt else 10
    val models = if(args.length >=4) args(3).split(',') else Array("tree","naive")

    val spark = SparkSession.builder().getOrCreate()
    val sc = spark.sparkContext
    import spark.implicits._

    //read in raw label and features
    val rawDataset = MLUtils.loadLibSVMFile(sc, path)
      .zipWithIndex()
      .filter(_._2 < ns.max)
      .sortBy(_._2, numPartitions = numPartitions)
      .keys
      .toDF()

    // convert "features" from mllib.linalg.Vector to ml.linalg.Vector
    val dataset =  MLUtils.convertVectorColumnsToML(rawDataset)
      .cache()
    dataset.count() //force persist

    val limiter = new Limiter()
    val knn = new KNNClassifier()
      .setTopTreeSize(numPartitions * 10)
      .setFeaturesCol("features")
      .setPredictionCol("prediction")
      .setK(1)
    val naiveKNN = new NaiveKNNClassifier()

    val pipeline = new Pipeline()
      .setStages(Array(limiter, knn))
    val naivePipeline = new Pipeline()
      .setStages(Array(limiter, naiveKNN))

    val paramGrid = new ParamGridBuilder()
      .addGrid(limiter.n, ns)
      .build()

    val bm = new Benchmarker()
      .setEvaluator(new MulticlassClassificationEvaluator)
      .setEstimatorParamMaps(paramGrid)
      .setNumTimes(3)

    val metrics = mutable.ArrayBuffer[String]()
    if(models.contains("tree")) {
      val bmModel = bm.setEstimator(pipeline).fit(dataset)
      metrics += s"knn: ${bmModel.avgTrainingRuntimes.toSeq} / ${bmModel.avgEvaluationRuntimes.toSeq}"
    }
    if(models.contains("naive")) {
      val naiveBMModel = bm.setEstimator(naivePipeline).fit(dataset)
      metrics += s"naive: ${naiveBMModel.avgTrainingRuntimes.toSeq} / ${naiveBMModel.avgEvaluationRuntimes.toSeq}"
    }
    logger.info(metrics.mkString("\n"))
  }
}

class Limiter(override val uid: String) extends Transformer {
  def this() = this(Identifiable.randomUID("limiter"))

  val n: IntParam = new IntParam(this, "n", "number of rows to limit")

  def setN(value: Int): this.type = set(n, value)

  // hack to maintain number of partitions (otherwise it collapses to 1 which is unfair for naiveKNN)
  override def transform(dataset: Dataset[_]): DataFrame = dataset.limit($(n)).repartition(dataset.rdd.partitions.length).toDF()

  override def copy(extra: ParamMap): Transformer = defaultCopy(extra)

  @DeveloperApi
  override def transformSchema(schema: StructType): StructType = schema
}

Source File: RandomForestPipeline.scala From Machine-Learning-with-Spark-Second-Edition with MIT License

6 votes

package org.sparksamples.classification.stumbleupon

import org.apache.log4j.Logger
import org.apache.spark.ml.classification.RandomForestClassifier
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature.{StringIndexer, VectorAssembler}
import org.apache.spark.ml.{Pipeline, PipelineStage}
import org.apache.spark.sql.DataFrame

import scala.collection.mutable


object RandomForestPipeline {
  @transient lazy val logger = Logger.getLogger(getClass.getName)

  def randomForestPipeline(vectorAssembler: VectorAssembler, dataFrame: DataFrame) = {
    val Array(training, test) = dataFrame.randomSplit(Array(0.9, 0.1), seed = 12345)

    // Set up Pipeline
    val stages = new mutable.ArrayBuffer[PipelineStage]()

    val labelIndexer = new StringIndexer()
      .setInputCol("label")
      .setOutputCol("indexedLabel")
    stages += labelIndexer

    val rf = new RandomForestClassifier()
      .setFeaturesCol(vectorAssembler.getOutputCol)
      .setLabelCol("indexedLabel")
      .setNumTrees(20)
      .setMaxDepth(5)
      .setMaxBins(32)
      .setMinInstancesPerNode(1)
      .setMinInfoGain(0.0)
      .setCacheNodeIds(false)
      .setCheckpointInterval(10)

    stages += vectorAssembler
    stages += rf
    val pipeline = new Pipeline().setStages(stages.toArray)

    // Fit the Pipeline
    val startTime = System.nanoTime()
    //val model = pipeline.fit(training)
    val model = pipeline.fit(dataFrame)
    val elapsedTime = (System.nanoTime() - startTime) / 1e9
    println(s"Training time: $elapsedTime seconds")

    //val holdout = model.transform(test).select("prediction","label")
    val holdout = model.transform(dataFrame).select("prediction","label")

    // Select (prediction, true label) and compute test error
    val evaluator = new MulticlassClassificationEvaluator()
      .setLabelCol("label")
      .setPredictionCol("prediction")
      .setMetricName("accuracy")
    val mAccuracy = evaluator.evaluate(holdout)
    println("Test set accuracy = " + mAccuracy)

  }
}

Source File: GradientBoostedTreePipeline.scala From Machine-Learning-with-Spark-Second-Edition with MIT License

6 votes

package org.sparksamples.classification.stumbleupon

import org.apache.log4j.Logger
import org.apache.spark.ml.classification.GBTClassifier
import org.apache.spark.ml.feature.{StringIndexer, VectorAssembler}
import org.apache.spark.ml.{Pipeline, PipelineStage}
import org.apache.spark.mllib.evaluation.{MulticlassMetrics, RegressionMetrics}
import org.apache.spark.sql.DataFrame

import scala.collection.mutable


object GradientBoostedTreePipeline {
  @transient lazy val logger = Logger.getLogger(getClass.getName)

  def gradientBoostedTreePipeline(vectorAssembler: VectorAssembler, dataFrame: DataFrame) = {
    val Array(training, test) = dataFrame.randomSplit(Array(0.9, 0.1), seed = 12345)

    // Set up Pipeline
    val stages = new mutable.ArrayBuffer[PipelineStage]()

    val labelIndexer = new StringIndexer()
      .setInputCol("label")
      .setOutputCol("indexedLabel")
    stages += labelIndexer

    val gbt = new GBTClassifier()
      .setFeaturesCol(vectorAssembler.getOutputCol)
      .setLabelCol("indexedLabel")
      .setMaxIter(10)

    stages += vectorAssembler
    stages += gbt
    val pipeline = new Pipeline().setStages(stages.toArray)

    // Fit the Pipeline
    val startTime = System.nanoTime()
    //val model = pipeline.fit(training)
    val model = pipeline.fit(dataFrame)
    val elapsedTime = (System.nanoTime() - startTime) / 1e9
    println(s"Training time: $elapsedTime seconds")

    //val holdout = model.transform(test).select("prediction","label")
    val holdout = model.transform(dataFrame).select("prediction","label")

    // have to do a type conversion for RegressionMetrics
    val rm = new RegressionMetrics(holdout.rdd.map(x => (x(0).asInstanceOf[Double], x(1).asInstanceOf[Double])))

    logger.info("Test Metrics")
    logger.info("Test Explained Variance:")
    logger.info(rm.explainedVariance)
    logger.info("Test R^2 Coef:")
    logger.info(rm.r2)
    logger.info("Test MSE:")
    logger.info(rm.meanSquaredError)
    logger.info("Test RMSE:")
    logger.info(rm.rootMeanSquaredError)

    val predictions = model.transform(test).select("prediction").rdd.map(_.getDouble(0))
    val labels = model.transform(test).select("label").rdd.map(_.getDouble(0))
    val accuracy = new MulticlassMetrics(predictions.zip(labels)).precision
    println(s"  Accuracy : $accuracy")

    holdout.rdd.map(x => x(0).asInstanceOf[Double]).repartition(1).saveAsTextFile("/home/ubuntu/work/ml-resources/spark-ml/results/GBT.xls")

    savePredictions(holdout, test, rm, "/home/ubuntu/work/ml-resources/spark-ml/results/GBT.csv")
  }

  def savePredictions(predictions:DataFrame, testRaw:DataFrame, regressionMetrics: RegressionMetrics, filePath:String) = {
    predictions
      .coalesce(1)
      .write.format("com.databricks.spark.csv")
      .option("header", "true")
      .save(filePath)
  }

}

Source File: SqlUnitTest.scala From SparkUnitTestingExamples with Apache License 2.0

6 votes

package com.cloudera.sa.spark.unittest.sql

import org.apache.spark.sql.Row
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.{SparkConf, SparkContext}
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}

import scala.collection.mutable

class SqlUnitTest extends FunSuite with
BeforeAndAfterEach with BeforeAndAfterAll{

  @transient var sc: SparkContext = null
  @transient var hiveContext: HiveContext = null

  override def beforeAll(): Unit = {

    val envMap = Map[String,String](("Xmx", "512m"))

    val sparkConfig = new SparkConf()
    sparkConfig.set("spark.broadcast.compress", "false")
    sparkConfig.set("spark.shuffle.compress", "false")
    sparkConfig.set("spark.shuffle.spill.compress", "false")
    sparkConfig.set("spark.io.compression.codec", "lzf")
    sc = new SparkContext("local[2]", "unit test", sparkConfig)
    hiveContext = new HiveContext(sc)
  }

  override def afterAll(): Unit = {
    sc.stop()
  }

  test("Test table creation and summing of counts") {
    val personRDD = sc.parallelize(Seq(Row("ted", 42, "blue"),
      Row("tj", 11, "green"),
      Row("andrew", 9, "green")))

    hiveContext.sql("create table person (name string, age int, color string)")

    val emptyDataFrame = hiveContext.sql("select * from person limit 0")

    val personDataFrame = hiveContext.createDataFrame(personRDD, emptyDataFrame.schema)
    personDataFrame.registerTempTable("tempPerson")

    val ageSumDataFrame = hiveContext.sql("select sum(age) from tempPerson")

    val localAgeSum = ageSumDataFrame.take(10)

    assert(localAgeSum(0).get(0) == 62, "The sum of age should equal 62 but it equaled " + localAgeSum(0).get(0))
  }
}

Source File: BasicShabondiTest.scala From ohara with Apache License 2.0

5 votes

package oharastream.ohara.shabondi

import java.util
import java.util.concurrent.{ExecutorService, Executors}

import com.google.common.util.concurrent.ThreadFactoryBuilder
import com.typesafe.scalalogging.Logger
import oharastream.ohara.common.data.Row
import oharastream.ohara.common.setting.TopicKey
import oharastream.ohara.common.util.{CommonUtils, Releasable}
import oharastream.ohara.kafka.TopicAdmin
import oharastream.ohara.shabondi.common.ShabondiUtils
import oharastream.ohara.shabondi.sink.SinkConfig
import oharastream.ohara.shabondi.source.SourceConfig
import oharastream.ohara.testing.WithBroker
import org.junit.After

import scala.collection.{immutable, mutable}
import scala.concurrent.{ExecutionContext, Future}
import scala.jdk.CollectionConverters._

private[shabondi] abstract class BasicShabondiTest extends WithBroker {
  protected val log = Logger(this.getClass())

  protected val brokerProps            = testUtil.brokersConnProps
  protected val topicAdmin: TopicAdmin = TopicAdmin.of(brokerProps)

  protected val newThreadPool: () => ExecutorService = () =>
    Executors.newCachedThreadPool(new ThreadFactoryBuilder().setNameFormat(this.getClass.getSimpleName + "-").build())

  protected val countRows: (util.Queue[Row], Long, ExecutionContext) => Future[Long] =
    (queue, executionTime, ec) =>
      Future {
        log.debug("countRows begin...")
        val baseTime = System.currentTimeMillis()
        var count    = 0L
        var running  = true
        while (running) {
          val row = queue.poll()
          if (row != null) count += 1 else Thread.sleep(100)
          running = (System.currentTimeMillis() - baseTime) < executionTime
        }
        log.debug("countRows done")
        count
      }(ec)

  protected def createTopicKey = TopicKey.of("default", CommonUtils.randomString(5))

  protected def createTestTopic(topicKey: TopicKey): Unit =
    topicAdmin.topicCreator
      .numberOfPartitions(1)
      .numberOfReplications(1.toShort)
      .topicKey(topicKey)
      .create

  protected def defaultSourceConfig(
    sourceToTopics: Seq[TopicKey] = Seq.empty[TopicKey]
  ): SourceConfig = {
    import ShabondiDefinitions._
    val args = mutable.ArrayBuffer(
      GROUP_DEFINITION.key + "=" + CommonUtils.randomString(5),
      NAME_DEFINITION.key + "=" + CommonUtils.randomString(3),
      SHABONDI_CLASS_DEFINITION.key + "=" + classOf[ShabondiSource].getName,
      CLIENT_PORT_DEFINITION.key + "=8080",
      BROKERS_DEFINITION.key + "=" + testUtil.brokersConnProps
    )
    if (sourceToTopics.nonEmpty)
      args += s"${SOURCE_TO_TOPICS_DEFINITION.key}=${TopicKey.toJsonString(sourceToTopics.asJava)}"

    val rawConfig = ShabondiUtils.parseArgs(args.toArray)
    new SourceConfig(rawConfig)
  }

  protected def defaultSinkConfig(
    sinkFromTopics: Seq[TopicKey] = Seq.empty[TopicKey]
  ): SinkConfig = {
    import ShabondiDefinitions._
    val args = mutable.ArrayBuffer(
      GROUP_DEFINITION.key + "=" + CommonUtils.randomString(5),
      NAME_DEFINITION.key + "=" + CommonUtils.randomString(3),
      SHABONDI_CLASS_DEFINITION.key + "=" + classOf[ShabondiSink].getName,
      CLIENT_PORT_DEFINITION.key + "=8080",
      BROKERS_DEFINITION.key + "=" + testUtil.brokersConnProps
    )
    if (sinkFromTopics.nonEmpty)
      args += s"${SINK_FROM_TOPICS_DEFINITION.key}=${TopicKey.toJsonString(sinkFromTopics.asJava)}"
    val rawConfig = ShabondiUtils.parseArgs(args.toArray)
    new SinkConfig(rawConfig)
  }

  protected def singleRow(columnSize: Int, rowId: Int = 0): Row =
    KafkaSupport.singleRow(columnSize, rowId)

  protected def multipleRows(rowSize: Int): immutable.Iterable[Row] =
    KafkaSupport.multipleRows(rowSize)

  @After
  def tearDown(): Unit = {
    Releasable.close(topicAdmin)
  }
}

Source File: PerformanceReport.scala From ohara with Apache License 2.0

5 votes

package oharastream.ohara.it.performance

import java.util.Objects

import oharastream.ohara.common.setting.ObjectKey
import oharastream.ohara.common.util.CommonUtils

import scala.collection.immutable.ListMap
import scala.collection.mutable

trait PerformanceReport {
  
  def records: Map[Long, Map[String, Double]]
}

object PerformanceReport {
  def builder = new Builder

  final class Builder private[PerformanceReport] extends oharastream.ohara.common.pattern.Builder[PerformanceReport] {
    private[this] var key: ObjectKey    = _
    private[this] var className: String = _
    private[this] val records           = mutable.Map[Long, Map[String, Double]]()

    def connectorKey(key: ObjectKey): Builder = {
      this.key = Objects.requireNonNull(key)
      this
    }

    def className(className: String): Builder = {
      this.className = CommonUtils.requireNonEmpty(className)
      this
    }

    def resetValue(duration: Long, header: String): Builder = {
      records.put(duration, Map(header -> 0.0))
      this
    }

    def record(duration: Long, header: String, value: Double): Builder = {
      val record = records.getOrElse(duration, Map(header -> 0.0))
      records.put(
        duration,
        record + (header -> (record.getOrElse(header, 0.0) + value))
      )
      this
    }

    override def build: PerformanceReport = new PerformanceReport {
      override val className: String = CommonUtils.requireNonEmpty(Builder.this.className)

      override val records: Map[Long, Map[String, Double]] = ListMap(
        Builder.this.records.toSeq.sortBy(_._1)((x: Long, y: Long) => y.compare(x)): _*
      )

      override def key: ObjectKey = Objects.requireNonNull(Builder.this.key)
    }
  }
}

Source File: ServiceKeyHolder.scala From ohara with Apache License 2.0

5 votes

package oharastream.ohara.it

import java.util.concurrent.TimeUnit

import oharastream.ohara.agent.container.ContainerClient
import oharastream.ohara.common.setting.ObjectKey
import oharastream.ohara.common.util.{CommonUtils, Releasable}
import com.typesafe.scalalogging.Logger

import scala.collection.mutable
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.Duration
import scala.concurrent.{Await, Future}


      if (!finalClose || !KEEP_CONTAINERS)
        result(client.containers())
          .filter(
            container =>
              clusterKey.exists(key => container.name.contains(key.group()) && container.name.contains(key.name()))
          )
          .filterNot(container => excludedNodes.contains(container.nodeName))
          .foreach { container =>
            try {
              println(s"[-----------------------------------${container.name}-----------------------------------]")
              // Before 10 minutes container log. Avoid the OutOfMemory of Java heap
              val containerLogs = try result(client.log(container.name, Option(600)))
              catch {
                case e: Throwable =>
                  s"failed to fetch the logs for container:${container.name}. caused by:${e.getMessage}"
              }
              println(containerLogs)
              println("[------------------------------------------------------------------------------------]")
              result(client.forceRemove(container.name))
            } catch {
              case e: Throwable =>
                LOG.error(s"failed to remove container ${container.name}", e)
            }
          } finally Releasable.close(client)
}

Source File: ArgumentsBuilder.scala From ohara with Apache License 2.0

5 votes

package oharastream.ohara.agent

import oharastream.ohara.agent.ArgumentsBuilder.FileAppender
import oharastream.ohara.common.util.CommonUtils
import spray.json.{JsNull, JsNumber, JsString, JsValue}

import scala.collection.mutable


  def mainConfigFile(path: String): ArgumentsBuilder

  override def build: Seq[String]
}

object ArgumentsBuilder {
  trait FileAppender {
    private[this] val props                = mutable.Buffer[String]()
    def append(prop: Int): FileAppender    = append(prop.toString)
    def append(prop: String): FileAppender = append(Seq(prop))
    def append(props: Seq[String]): FileAppender = {
      this.props ++= props
      this
    }
    def append(key: String, value: Boolean): FileAppender = append(s"$key=$value")
    def append(key: String, value: Short): FileAppender   = append(s"$key=$value")
    def append(key: String, value: Int): FileAppender     = append(s"$key=$value")
    def append(key: String, value: String): FileAppender  = append(s"$key=$value")
    def append(key: String, value: JsValue): FileAppender = append(
      key,
      value match {
        case JsString(value) => value
        case JsNumber(value) => value.toString
        case JsNull          => throw new IllegalArgumentException(s"JsNull is not legal")
        case _               => value.toString()
      }
    )

    def done: ArgumentsBuilder = done(props.toSeq)

    protected def done(props: Seq[String]): ArgumentsBuilder
  }
  def apply(): ArgumentsBuilder = new ArgumentsBuilder {
    private[this] val files                  = mutable.Map[String, Seq[String]]()
    private[this] var mainConfigFile: String = _

    override def build: Seq[String] =
      if (CommonUtils.isEmpty(mainConfigFile))
        throw new IllegalArgumentException("you have to define the main configs")
      else
        // format: --file path=line0,line1 --file path1=line0,line1
        // NOTED: the path and props must be in different line. otherwise, k8s will merge them into single line and our
        // script will fail to parse the command-line arguments
        files.flatMap {
          case (path, props) => Seq("--file", s"$path=${props.mkString(",")}")
        }.toSeq ++ Seq("--config", mainConfigFile)

    override def file(path: String): FileAppender = (props: Seq[String]) => {
      this.files += (path -> props)
      this
    }

    override def mainConfigFile(path: String): ArgumentsBuilder = {
      this.mainConfigFile = CommonUtils.requireNonEmpty(path)
      this
    }
  }
}

Source File: ClusterRequest.scala From ohara with Apache License 2.0

5 votes

package oharastream.ohara.client.configurator
import oharastream.ohara.common.annotations.Optional
import oharastream.ohara.common.setting.ObjectKey
import oharastream.ohara.common.util.CommonUtils
import spray.json.DefaultJsonProtocol._
import spray.json.{JsArray, JsNumber, JsObject, JsString, JsValue}

import scala.jdk.CollectionConverters._
import scala.collection.mutable


  protected def key: ObjectKey = ObjectKey.of(
    settings.get(GROUP_KEY).map(_.convertTo[String]).getOrElse(GROUP_DEFAULT),
    settings(NAME_KEY).convertTo[String]
  )

  protected val settings: mutable.Map[String, JsValue] = mutable.Map()

  @Optional("default key is a random string. But it is required in updating")
  def key(key: ObjectKey): ClusterRequest.this.type = {
    setting(NAME_KEY, JsString(key.name()))
    setting(GROUP_KEY, JsString(key.group()))
  }

  @Optional("default name is a random string. But it is required in updating")
  def name(name: String): ClusterRequest.this.type =
    setting(NAME_KEY, JsString(CommonUtils.requireNonEmpty(name)))
  @Optional("default is GROUP_DEFAULT")
  def group(group: String): ClusterRequest.this.type =
    setting(GROUP_KEY, JsString(CommonUtils.requireNonEmpty(group)))
  def nodeName(nodeName: String): ClusterRequest.this.type = nodeNames(Set(CommonUtils.requireNonEmpty(nodeName)))
  def nodeNames(nodeNames: Set[String]): ClusterRequest.this.type =
    setting(NODE_NAMES_KEY, JsArray(CommonUtils.requireNonEmpty(nodeNames.asJava).asScala.map(JsString(_)).toVector))

  @Optional("default value is empty array")
  def routes(routes: Map[String, String]): ClusterRequest.this.type =
    setting(ROUTES_KEY, JsObject(routes.map {
      case (k, v) => k -> JsString(v)
    }))

  @Optional("default value is 1024")
  def initHeap(sizeInMB: Int): ClusterRequest.this.type =
    setting(INIT_HEAP_KEY, JsNumber(CommonUtils.requirePositiveInt(sizeInMB)))

  @Optional("default value is 1024")
  def maxHeap(sizeInMB: Int): ClusterRequest.this.type =
    setting(MAX_HEAP_KEY, JsNumber(CommonUtils.requirePositiveInt(sizeInMB)))

  @Optional("extra settings is empty by default")
  def setting(key: String, value: JsValue): ClusterRequest.this.type =
    settings(Map(key -> value))
  @Optional("extra settings is empty by default")
  def settings(settings: Map[String, JsValue]): ClusterRequest.this.type = {
    // We don't have to check the settings is empty here for the following reasons:
    // 1) we may want to use the benefit of default creation without specify settings
    // 2) actual checking will be done in the json parser phase of creation or update
    this.settings ++= settings
    this
  }
}

Source File: ConnectionUtil.scala From hazelcast-spark with Apache License 2.0

5 votes

package com.hazelcast.spark.connector.util

import com.hazelcast.client.HazelcastClient
import com.hazelcast.client.config.{ClientConfig, XmlClientConfigBuilder}
import com.hazelcast.core.HazelcastInstance
import com.hazelcast.spark.connector.conf.SerializableConf

import scala.collection.{JavaConversions, mutable}

object ConnectionUtil {

  private[connector] val instances = mutable.Map[String, HazelcastInstance]()

  def getHazelcastConnection(member: String, rddId: Int, conf: SerializableConf): HazelcastInstance = {
    def createClientInstance: HazelcastInstance = {
      val client: HazelcastInstance = HazelcastClient.newHazelcastClient(createClientConfig(conf, member))
      instances.put(member + "#" + rddId, client)
      client
    }
    this.synchronized {
      val maybeInstance: Option[HazelcastInstance] = instances.get(member + "#" + rddId)
      if (maybeInstance.isEmpty) {
        createClientInstance
      } else {
        val instance: HazelcastInstance = maybeInstance.get
        if (instance.getLifecycleService.isRunning) {
          instance
        } else {
          createClientInstance
        }
      }
    }
  }

  def closeHazelcastConnection(member: String, rddId: Int): Unit = {
    this.synchronized {
      val maybeInstance: Option[HazelcastInstance] = instances.get(member + "#" + rddId)
      if (maybeInstance.isDefined) {
        val instance: HazelcastInstance = maybeInstance.get
        if (instance.getLifecycleService.isRunning) {
          instance.getLifecycleService.shutdown()
        }
        instances.remove(member + "#" + rddId)
      }
    }
  }

  def closeAll(rddIds: Seq[Int]): Unit = {
    this.synchronized {
      instances.keys.foreach({
        key => {
          val instanceRddId: String = key.split("#")(1)
          if (rddIds.contains(instanceRddId.toInt)) {
            val instance: HazelcastInstance = instances.get(key).get
            if (instance.getLifecycleService.isRunning) {
              instance.shutdown()
            }
            instances.remove(key)
          }
        }
      })
    }
  }

  private def createClientConfig(conf: SerializableConf, member: String): ClientConfig = {
    var config: ClientConfig = null
    if (conf.xmlPath != null) {
      config = new XmlClientConfigBuilder(conf.xmlPath).build()
    } else {
      config = new ClientConfig
      config.getGroupConfig.setName(conf.groupName)
      config.getGroupConfig.setPassword(conf.groupPass)
      config.getNetworkConfig.setAddresses(JavaConversions.seqAsJavaList(member.split(",")))
    }
    config
  }

}

Source File: InMemoryState.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.on.memory

import java.util.concurrent.locks.StampedLock

import com.daml.ledger.on.memory.InMemoryState._
import com.daml.ledger.participant.state.kvutils.Bytes
import com.daml.ledger.participant.state.kvutils.api.LedgerRecord
import com.daml.ledger.participant.state.v1.Offset
import com.google.protobuf.ByteString

import scala.collection.mutable
import scala.concurrent.{ExecutionContext, Future, blocking}

private[memory] class InMemoryState private (log: MutableLog, state: MutableState) {
  private val lockCurrentState = new StampedLock()
  @volatile private var lastLogEntryIndex = 0

  def readLog[A](action: ImmutableLog => A): A =
    action(log) // `log` is mutable, but the interface is immutable

  def newHeadSinceLastWrite(): Int = lastLogEntryIndex

  def write[A](action: (MutableLog, MutableState) => Future[A])(
      implicit executionContext: ExecutionContext
  ): Future[A] =
    for {
      stamp <- Future {
        blocking {
          lockCurrentState.writeLock()
        }
      }
      result <- action(log, state)
        .andThen {
          case _ =>
            lastLogEntryIndex = log.size - 1
            lockCurrentState.unlock(stamp)
        }
    } yield result
}

object InMemoryState {
  type ImmutableLog = IndexedSeq[LedgerRecord]
  type ImmutableState = collection.Map[StateKey, StateValue]

  type MutableLog = mutable.Buffer[LedgerRecord] with ImmutableLog
  type MutableState = mutable.Map[StateKey, StateValue] with ImmutableState

  type StateKey = Bytes
  type StateValue = Bytes

  // The first element will never be read because begin offsets are exclusive.
  private val Beginning = LedgerRecord(Offset.beforeBegin, ByteString.EMPTY, ByteString.EMPTY)

  def empty =
    new InMemoryState(
      log = mutable.ArrayBuffer(Beginning),
      state = mutable.Map.empty,
    )
}

Source File: InMemoryLedgerStateOperations.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.on.memory

import com.daml.ledger.on.memory.InMemoryState.MutableLog
import com.daml.ledger.participant.state.kvutils.KVOffset
import com.daml.ledger.participant.state.kvutils.api.LedgerRecord
import com.daml.ledger.participant.state.v1.Offset
import com.daml.ledger.validator.BatchingLedgerStateOperations
import com.daml.ledger.validator.LedgerStateOperations.{Key, Value}

import scala.collection.mutable
import scala.concurrent.{ExecutionContext, Future}

private[memory] final class InMemoryLedgerStateOperations(
    log: InMemoryState.MutableLog,
    state: InMemoryState.MutableState,
)(implicit executionContext: ExecutionContext)
    extends BatchingLedgerStateOperations[Index] {
  import InMemoryLedgerStateOperations.appendEntry

  override def readState(keys: Seq[Key]): Future[Seq[Option[Value]]] =
    Future.successful(keys.map(state.get))

  override def writeState(keyValuePairs: Seq[(Key, Value)]): Future[Unit] = {
    state ++= keyValuePairs
    Future.unit
  }

  override def appendToLog(key: Key, value: Value): Future[Index] =
    Future.successful(appendEntry(log, LedgerRecord(_, key, value)))
}

object InMemoryLedgerStateOperations {
  def apply()(implicit executionContext: ExecutionContext): InMemoryLedgerStateOperations = {
    val inMemoryState = mutable.Map.empty[Key, Value]
    val inMemoryLog = mutable.ArrayBuffer[LedgerRecord]()
    new InMemoryLedgerStateOperations(inMemoryLog, inMemoryState)
  }

  private[memory] def appendEntry(log: MutableLog, createEntry: Offset => LedgerRecord): Index = {
    val entryAtIndex = log.size
    val offset = KVOffset.fromLong(entryAtIndex.toLong)
    val entry = createEntry(offset)
    log += entry
    entryAtIndex
  }
}

Source File: LogCollector.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.testing

import ch.qos.logback.classic.Level
import ch.qos.logback.classic.spi.ILoggingEvent
import ch.qos.logback.core.AppenderBase

import scala.beans.BeanProperty
import scala.collection.concurrent.TrieMap
import scala.collection.mutable
import scala.reflect.ClassTag

object LogCollector {

  private val log =
    TrieMap
      .empty[String, TrieMap[String, mutable.Builder[(Level, String), Vector[(Level, String)]]]]

  def read[Test, Logger](
      implicit test: ClassTag[Test],
      logger: ClassTag[Logger]): IndexedSeq[(Level, String)] =
    log
      .get(test.runtimeClass.getName)
      .flatMap(_.get(logger.runtimeClass.getName))
      .fold(IndexedSeq.empty[(Level, String)])(_.result())

  def clear[Test](implicit test: ClassTag[Test]): Unit = {
    log.remove(test.runtimeClass.getName)
    ()
  }

}

final class LogCollector extends AppenderBase[ILoggingEvent] {

  @BeanProperty
  var test: String = _

  override def append(e: ILoggingEvent): Unit = {
    if (test == null) {
      addError("Test identifier undefined, skipping logging")
    } else {
      val log = LogCollector.log
        .getOrElseUpdate(test, TrieMap.empty)
        .getOrElseUpdate(e.getLoggerName, Vector.newBuilder)
      val _ = log.synchronized { log += e.getLevel -> e.getMessage }
    }
  }
}

Source File: PostgresqlSqlLedgerReaderWriterIntegrationSpec.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.on.sql

import com.daml.testing.postgresql.PostgresAroundAll

import scala.collection.mutable

class PostgresqlSqlLedgerReaderWriterIntegrationSpec
    extends SqlLedgerReaderWriterIntegrationSpecBase("SQL implementation using PostgreSQL")
    with PostgresAroundAll {

  private val databases: mutable.Map[String, String] = mutable.Map.empty

  override protected def jdbcUrl(id: String): String = {
    if (!databases.contains(id)) {
      val database = createNewDatabase(id)
      databases += id -> database.url
    }
    databases(id)
  }
}

Source File: AsyncForwardingListener.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.api.auth.interceptor

import io.grpc.ServerCall

import scala.collection.mutable


abstract class AsyncForwardingListener[ReqT] extends ServerCall.Listener[ReqT] {
  protected type Listener = ServerCall.Listener[ReqT]
  private[this] val lock = new Object
  private[this] val stash: mutable.ListBuffer[Listener => Unit] = new mutable.ListBuffer
  private[this] var nextListener: Option[Listener] = None

  private def enqueueOrProcess(msg: Listener => Unit): Unit = lock.synchronized {
    if (nextListener.isDefined) {
      msg(nextListener.get)
    } else {
      stash.append(msg)
    }
  }

  protected def setNextListener(listener: Listener): Unit = lock.synchronized {
    nextListener = Some(listener)
    stash.foreach(msg => msg(listener))
  }

  // All methods that need to be forwarded
  override def onHalfClose(): Unit = enqueueOrProcess(i => i.onHalfClose())
  override def onCancel(): Unit = enqueueOrProcess(i => i.onCancel())
  override def onComplete(): Unit = enqueueOrProcess(i => i.onComplete())
  override def onReady(): Unit = enqueueOrProcess(i => i.onReady())
  override def onMessage(message: ReqT): Unit = enqueueOrProcess(i => i.onMessage(message))
}

Source File: CachingDamlLedgerStateReader.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.validator.caching

import com.daml.caching.Cache
import com.daml.ledger.participant.state.kvutils.DamlKvutils.{DamlStateKey, DamlStateValue}
import com.daml.ledger.validator.LedgerStateOperations.Key
import com.daml.ledger.validator.{
  DamlLedgerStateReader,
  LedgerStateReader,
  RawToDamlLedgerStateReaderAdapter,
  StateKeySerializationStrategy
}

import scala.collection.mutable
import scala.concurrent.{ExecutionContext, Future}


class CachingDamlLedgerStateReader(
    val cache: Cache[DamlStateKey, DamlStateValue],
    shouldCache: DamlStateKey => Boolean,
    keySerializationStrategy: StateKeySerializationStrategy,
    delegate: DamlLedgerStateReader)(implicit executionContext: ExecutionContext)
    extends DamlLedgerStateReader
    with QueryableReadSet {

  private val readSet = mutable.Set.empty[DamlStateKey]

  override def getReadSet: Set[Key] =
    this.synchronized { readSet.map(keySerializationStrategy.serializeStateKey).toSet }

  override def readState(keys: Seq[DamlStateKey]): Future[Seq[Option[DamlStateValue]]] = {
    this.synchronized { readSet ++= keys }
    @SuppressWarnings(Array("org.wartremover.warts.Any")) // Required to make `.view` work.
    val cachedValues = keys.view
      .map(key => key -> cache.getIfPresent(key))
      .filter(_._2.isDefined)
      .toMap
    val keysToRead = keys.toSet -- cachedValues.keySet
    if (keysToRead.nonEmpty) {
      delegate
        .readState(keysToRead.toSeq)
        .map { readStateValues =>
          val readValues = keysToRead.zip(readStateValues).toMap
          readValues.collect {
            case (key, Some(value)) if shouldCache(key) => cache.put(key, value)
          }
          val all = cachedValues ++ readValues
          keys.map(all(_))
        }
    } else {
      Future {
        keys.map(cachedValues(_))
      }
    }
  }
}

object CachingDamlLedgerStateReader {
  private[validator] def apply(
      cache: Cache[DamlStateKey, DamlStateValue],
      cachingPolicy: CacheUpdatePolicy,
      ledgerStateOperations: LedgerStateReader,
      keySerializationStrategy: StateKeySerializationStrategy)(
      implicit executionContext: ExecutionContext): CachingDamlLedgerStateReader = {
    new CachingDamlLedgerStateReader(
      cache,
      cachingPolicy.shouldCacheOnRead,
      keySerializationStrategy,
      new RawToDamlLedgerStateReaderAdapter(ledgerStateOperations, keySerializationStrategy))
  }
}

Source File: CommitContext.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.participant.state.kvutils.committer

import com.daml.ledger.participant.state.kvutils.DamlKvutils.{
  DamlLogEntryId,
  DamlStateKey,
  DamlStateValue
}
import com.daml.ledger.participant.state.kvutils.{DamlStateMap, Err}
import com.daml.ledger.participant.state.v1.ParticipantId
import com.daml.lf.data.Time.Timestamp
import org.slf4j.LoggerFactory

import scala.collection.mutable


  def getOutputs: Iterable[(DamlStateKey, DamlStateValue)] =
    outputOrder
      .map(key => key -> outputs(key))
      .filterNot {
        case (key, value) if inputAlreadyContains(key, value) =>
          logger.trace("Identical output found for key {}", key)
          true
        case _ => false
      }

  private def inputAlreadyContains(key: DamlStateKey, value: DamlStateValue): Boolean =
    inputs.get(key).exists(_.contains(value))
}

Source File: FileBasedLedgerDataExporter.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.ledger.participant.state.kvutils.export

import java.io.DataOutputStream
import java.time.Instant
import java.util.concurrent.locks.StampedLock

import com.daml.ledger.participant.state.v1.ParticipantId
import com.daml.ledger.validator.LedgerStateOperations.{Key, Value}
import com.google.protobuf.ByteString

import scala.collection.mutable
import scala.collection.mutable.ListBuffer


class FileBasedLedgerDataExporter(output: DataOutputStream) extends LedgerDataExporter {
  import FileBasedLedgerDataExporter._

  private val outputLock = new StampedLock

  private[export] val correlationIdMapping = mutable.Map.empty[String, String]
  private[export] val inProgressSubmissions = mutable.Map.empty[String, SubmissionInfo]
  private[export] val bufferedKeyValueDataPerCorrelationId =
    mutable.Map.empty[String, mutable.ListBuffer[(Key, Value)]]

  def addSubmission(
      submissionEnvelope: ByteString,
      correlationId: String,
      recordTimeInstant: Instant,
      participantId: ParticipantId): Unit =
    this.synchronized {
      inProgressSubmissions.put(
        correlationId,
        SubmissionInfo(submissionEnvelope, correlationId, recordTimeInstant, participantId))
      ()
    }

  def addParentChild(parentCorrelationId: String, childCorrelationId: String): Unit =
    this.synchronized {
      correlationIdMapping.put(childCorrelationId, parentCorrelationId)
      ()
    }

  def addToWriteSet(correlationId: String, data: Iterable[(Key, Value)]): Unit =
    this.synchronized {
      correlationIdMapping
        .get(correlationId)
        .foreach { parentCorrelationId =>
          val keyValuePairs = bufferedKeyValueDataPerCorrelationId
            .getOrElseUpdate(parentCorrelationId, ListBuffer.empty)
          keyValuePairs.appendAll(data)
          bufferedKeyValueDataPerCorrelationId.put(parentCorrelationId, keyValuePairs)
        }
    }

  def finishedProcessing(correlationId: String): Unit = {
    val (submissionInfo, bufferedData) = this.synchronized {
      (
        inProgressSubmissions.get(correlationId),
        bufferedKeyValueDataPerCorrelationId.get(correlationId))
    }
    submissionInfo.foreach { submission =>
      bufferedData.foreach(writeSubmissionData(submission, _))
      this.synchronized {
        inProgressSubmissions.remove(correlationId)
        bufferedKeyValueDataPerCorrelationId.remove(correlationId)
        correlationIdMapping
          .collect {
            case (key, value) if value == correlationId => key
          }
          .foreach(correlationIdMapping.remove)
      }
    }
  }

  private def writeSubmissionData(
      submissionInfo: SubmissionInfo,
      writeSet: ListBuffer[(Key, Value)]): Unit = {
    val stamp = outputLock.writeLock()
    try {
      Serialization.serializeEntry(submissionInfo, writeSet, output)
      output.flush()
    } finally {
      outputLock.unlock(stamp)
    }
  }
}

object FileBasedLedgerDataExporter {
  case class SubmissionInfo(
      submissionEnvelope: ByteString,
      correlationId: String,
      recordTimeInstant: Instant,
      participantId: ParticipantId)

  type WriteSet = Seq[(Key, Value)]
}

Source File: Relation.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.lf.data

import scala.collection.{mutable, immutable}

object Relation {

  // NOTE: this definition and specifically inversion assumes
  // that the values related to an A are non-empty
  // we treat
  //  - the empty relation mapping
  //  - and a Map that maps everything to the empty set
  // as the same
  // this fits our purposes for the moment
  
  type Relation[A, B] = immutable.Map[A, Set[B]]

  object Relation {
    def merge[A, B](r: Relation[A, B], pair: (A, Set[B])): Relation[A, B] =
      r.updated(pair._1, r.getOrElse(pair._1, Set.empty[B]).union(pair._2))

    def union[A, B](r1: Relation[A, B], r2: Relation[A, B]): Relation[A, B] =
      r2.foldLeft(r1)(merge)

    def diff[A, B](r1: Relation[A, B], r2: Relation[A, B]): Relation[A, B] =
      r1.map { case (a, bs) => a -> r2.get(a).fold(bs)(bs diff _) }

    def invert[A, B](relation: Relation[A, B]): Relation[B, A] = {
      val result = mutable.Map[B, Set[A]]() withDefaultValue Set()
      relation.foreach {
        case (a, bs) =>
          bs.foreach(b => result(b) = result(b) + a)
      }
      result.toMap
    }

    def flatten[A, B](relation: Relation[A, B]): Iterator[(A, B)] =
      for {
        kvs <- relation.iterator
        value <- kvs._2
      } yield (kvs._1, value)

    def mapKeys[A, K, B](r: Relation[A, B])(f: A => K): Relation[K, B] =
      r.map { case (a, b) => f(a) -> b }
  }

}

Source File: DummyBackingStore.scala From iotchain with MIT License

5 votes

package jbok.network.http.server.authentication

import cats.data.OptionT
import cats.effect.IO
import tsec.authentication.BackingStore

import scala.collection.mutable

object DummyBackingStore {
  def apply[I, V](getId: V => I): BackingStore[IO, I, V] = new BackingStore[IO, I, V] {
    private val storageMap = mutable.HashMap.empty[I, V]

    def put(elem: V): IO[V] = {
      val map = storageMap.put(getId(elem), elem)
      if (map.isEmpty)
        IO.pure(elem)
      else
        IO.raiseError(new IllegalArgumentException)
    }

    def get(id: I): OptionT[IO, V] =
      OptionT.fromOption[IO](storageMap.get(id))

    def update(v: V): IO[V] = {
      storageMap.update(getId(v), v)
      IO.pure(v)
    }

    def delete(id: I): IO[Unit] =
      storageMap.remove(id) match {
        case Some(_) => IO.unit
        case None    => IO.raiseError(new IllegalArgumentException)
      }
  }
}

Source File: TopNList.scala From Spark.TableStatsExample with Apache License 2.0

5 votes

package com.cloudera.sa.examples.tablestats.model

import scala.collection.mutable


class TopNList(val maxSize:Int)  extends Serializable {
  val topNCountsForColumnArray = new mutable.ArrayBuffer[(Any, Long)]
  var lowestColumnCountIndex:Int = -1
  var lowestValue = Long.MaxValue

  def add(newValue:Any, newCount:Long): Unit = {
    if (topNCountsForColumnArray.length < maxSize -1) {
      topNCountsForColumnArray += ((newValue, newCount))
    } else if (topNCountsForColumnArray.length == maxSize) {
      updateLowestValue
    } else {
      if (newCount > lowestValue) {
        topNCountsForColumnArray.insert(lowestColumnCountIndex, (newValue, newCount))
        updateLowestValue
      }
    }
  }

  def updateLowestValue: Unit = {
    var index = 0

    topNCountsForColumnArray.foreach{ r =>
      if (r._2 < lowestValue) {
        lowestValue = r._2
        lowestColumnCountIndex = index
      }
      index+=1
    }
  }


  override def toString = s"TopNList(topNCountsForColumnArray=$topNCountsForColumnArray)"
}

Source File: FirstPassStatsModel.scala From Spark.TableStatsExample with Apache License 2.0

5 votes

package com.cloudera.sa.examples.tablestats.model

import scala.collection.mutable


class FirstPassStatsModel extends Serializable {
  var columnStatsMap = new mutable.HashMap[Integer, ColumnStats]

  def +=(colIndex: Int, colValue: Any, colCount: Long): Unit = {
    columnStatsMap.getOrElseUpdate(colIndex, new ColumnStats) += (colValue, colCount)
  }

  def +=(firstPassStatsModel: FirstPassStatsModel): Unit = {
    firstPassStatsModel.columnStatsMap.foreach{ e =>
      val columnStats = columnStatsMap.getOrElse(e._1, null)
      if (columnStats != null) {
        columnStats += (e._2)
      } else {
        columnStatsMap += ((e._1, e._2))
      }
    }
  }

  override def toString = s"FirstPassStatsModel(columnStatsMap=$columnStatsMap)"
}

Source File: ConfigurableDataGeneratorMain.scala From Spark.TableStatsExample with Apache License 2.0

5 votes

package com.cloudera.sa.examples.tablestats

import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.expressions.GenericRow
import org.apache.spark.sql.types.{StringType, LongType, StructField, StructType}
import org.apache.spark.{SparkContext, SparkConf}

import scala.collection.mutable
import scala.util.Random



object ConfigurableDataGeneratorMain {
  def main(args: Array[String]): Unit = {

    if (args.length == 0) {
      println("ConfigurableDataGeneratorMain <outputPath> <numberOfColumns> <numberOfRecords> <numberOfPartitions> <local>")
      return
    }

    val outputPath = args(0)
    val numberOfColumns = args(1).toInt
    val numberOfRecords = args(2).toInt
    val numberOfPartitions = args(3).toInt
    val runLocal = (args.length == 5 && args(4).equals("L"))

    var sc: SparkContext = null
    if (runLocal) {
      val sparkConfig = new SparkConf()
      sparkConfig.set("spark.broadcast.compress", "false")
      sparkConfig.set("spark.shuffle.compress", "false")
      sparkConfig.set("spark.shuffle.spill.compress", "false")
      sc = new SparkContext("local", "test", sparkConfig)
    } else {
      val sparkConfig = new SparkConf().setAppName("ConfigurableDataGeneratorMain")
      sc = new SparkContext(sparkConfig)
    }

    val sqlContext = new org.apache.spark.sql.SQLContext(sc)

    //Part A
    val rowRDD = sc.parallelize( (0 until numberOfPartitions).map( i => i), numberOfPartitions)

    //Part B
    val megaDataRDD = rowRDD.flatMap( r => {
      val random = new Random()

      val dataRange = (0 until numberOfRecords/numberOfPartitions).iterator
      dataRange.map[Row]( x => {
        val values = new mutable.ArrayBuffer[Any]
        for (i <- 0 until numberOfColumns) {
          if (i % 2 == 0) {
            values.+=(random.nextInt(100).toLong)
          } else {
            values.+=(random.nextInt(100).toString)
          }
        }
        new GenericRow(values.toArray)
      })
    })

    //Part C
    val schema =
      StructType(
        (0 until numberOfColumns).map( i => {
          if (i % 2 == 0) {
            StructField("longColumn_" + i, LongType, true) }
          else {
            StructField("stringColumn_" + i, StringType, true)
          }
        })
      )
    val df = sqlContext.createDataFrame(megaDataRDD, schema)
    df.saveAsParquetFile(outputPath)

    //Part D
    sc.stop()
  }
}

Source File: TableStatsSinglePathMain.scala From Spark.TableStatsExample with Apache License 2.0

5 votes

package com.cloudera.sa.examples.tablestats

import com.cloudera.sa.examples.tablestats.model.{FirstPassStatsModel}
import org.apache.spark._
import org.apache.spark.sql.DataFrame

import scala.collection.mutable


object TableStatsSinglePathMain {
  def main(args: Array[String]): Unit = {

    if (args.length == 0) {
      println("TableStatsSinglePathMain <inputPath>")
      return
    }

    val inputPath = args(0)
    val runLocal = (args.length == 2 && args(1).equals("L"))
    var sc:SparkContext = null

    if (runLocal) {
      val sparkConfig = new SparkConf()
      sparkConfig.set("spark.broadcast.compress", "false")
      sparkConfig.set("spark.shuffle.compress", "false")
      sparkConfig.set("spark.shuffle.spill.compress", "false")
      sc = new SparkContext("local", "TableStatsSinglePathMain", sparkConfig)
    } else {
      val sparkConfig = new SparkConf().setAppName("TableStatsSinglePathMain")
      sc = new SparkContext(sparkConfig)
    }
    val sqlContext = new org.apache.spark.sql.SQLContext(sc)
    //Part A
    var df = sqlContext.parquetFile(inputPath)
    //Part B
    val firstPassStats = getFirstPassStat( df)
    //Part E
    println(firstPassStats)
    //Part F
    sc.stop()
  }

  def getFirstPassStat(df: DataFrame): FirstPassStatsModel = {
    val schema = df.schema

    //Part B.1
    val columnValueCounts = df.flatMap(r =>
      (0 until schema.length).map { idx =>
        //((columnIdx, cellValue), count)
        ((idx, r.get(idx)), 1l)
      }
    ).reduceByKey(_ + _) //This is like word count

    //Part C
    val firstPassStats = columnValueCounts.mapPartitions[FirstPassStatsModel]{it =>
      val firstPassStatsModel = new FirstPassStatsModel()
      it.foreach{ case ((columnIdx, columnVal), count) =>
        firstPassStatsModel += (columnIdx, columnVal, count)
      }
      Iterator(firstPassStatsModel)
    }.reduce { (a, b) => //Part D
      a += (b)
      a
    }

    firstPassStats
  }
}

Source File: MarkersQueue.scala From kmq with Apache License 2.0

5 votes

package com.softwaremill.kmq.redelivery

import com.softwaremill.kmq.{EndMarker, MarkerKey, MarkerValue, StartMarker}

import scala.collection.mutable

class MarkersQueue(disableRedeliveryBefore: Offset) {
  private val markersInProgress = mutable.Set[MarkerKey]()
  private val markersByTimestamp = new mutable.PriorityQueue[AttributedMarkerKey[Timestamp]]()(bySmallestAttributeOrdering)
  private val markersByOffset = new mutable.PriorityQueue[AttributedMarkerKey[Offset]]()(bySmallestAttributeOrdering)
  private var redeliveryEnabled = false

  def handleMarker(markerOffset: Offset, k: MarkerKey, v: MarkerValue, t: Timestamp) {
    if (markerOffset >= disableRedeliveryBefore) {
      redeliveryEnabled = true
    }

    v match {
      case s: StartMarker =>
        markersByOffset.enqueue(AttributedMarkerKey(k, markerOffset))
        markersByTimestamp.enqueue(AttributedMarkerKey(k, t+s.getRedeliverAfter))
        markersInProgress += k

      case _: EndMarker =>
        markersInProgress -= k
    }
  }

  def markersToRedeliver(now: Timestamp): List[MarkerKey] = {
    removeEndedMarkers(markersByTimestamp)

    var toRedeliver = List.empty[MarkerKey]

    if (redeliveryEnabled) {
      while (shouldRedeliverMarkersQueueHead(now)) {
        val queueHead = markersByTimestamp.dequeue()
        // the first marker, if any, is not ended for sure (b/c of the cleanup that's done at the beginning),
        // but subsequent markers don't have to be.
        if (markersInProgress.contains(queueHead.key)) {
          toRedeliver ::= queueHead.key
        }

        // not removing from markersInProgress - until we are sure the message is redelivered (the redeliverer
        // sends an end marker when this is done) - the marker needs to stay for minimum-offset calculations to be
        // correct
      }
    }

    toRedeliver
  }

  def smallestMarkerOffset(): Option[Offset] = {
    removeEndedMarkers(markersByOffset)
    markersByOffset.headOption.map(_.attr)
  }

  private def removeEndedMarkers[T](queue: mutable.PriorityQueue[AttributedMarkerKey[T]]): Unit = {
    while (isHeadEnded(queue)) {
      queue.dequeue()
    }
  }

  private def isHeadEnded[T](queue: mutable.PriorityQueue[AttributedMarkerKey[T]]): Boolean = {
    queue.headOption.exists(e => !markersInProgress.contains(e.key))
  }

  private def shouldRedeliverMarkersQueueHead(now: Timestamp): Boolean = {
    markersByTimestamp.headOption match {
      case None => false
      case Some(m) => now >= m.attr
    }
  }

  private case class AttributedMarkerKey[T](key: MarkerKey, attr: T)

  private def bySmallestAttributeOrdering[T: Ordering]: Ordering[AttributedMarkerKey[T]] = new Ordering[AttributedMarkerKey[T]] {
    override def compare(x: AttributedMarkerKey[T], y: AttributedMarkerKey[T]): Int = {
      - implicitly[Ordering[T]].compare(x.attr, y.attr)
    }
  }
}

Source File: MergeSort.scala From chymyst-core with Apache License 2.0

5 votes

package io.chymyst.benchmark

// Make all imports explicit, just to see what is the entire set of required imports.
// Do not optimize imports in this file!
import io.chymyst.jc.{+, FixedPool, M, m, B, b, go, Reaction, ReactionInfo, InputMoleculeInfo, AllMatchersAreTrivial, OutputMoleculeInfo, site, EmitMultiple}
import io.chymyst.jc.ConsoleErrorsAndWarningsReporter

import scala.annotation.tailrec
import scala.collection.mutable

object MergeSort {
  type Coll[T] = IndexedSeq[T]

  def arrayMerge[T: Ordering](arr1: Coll[T], arr2: Coll[T]): Coll[T] = {
    val result = new mutable.ArraySeq[T](arr1.length + arr2.length) // just to allocate space

    def isLess(x: T, y: T) = implicitly[Ordering[T]].compare(x, y) < 0

    // Will now modify the `result` array in place.
    @tailrec
    def mergeRec(i1: Int, i2: Int, i: Int): Unit = {
      if (i1 == arr1.length && i2 == arr2.length) ()
      else {
        val (x, newI1, newI2) = if (i1 < arr1.length && (i2 == arr2.length || isLess(arr1(i1), arr2(i2))))
          (arr1(i1), i1 + 1, i2) else (arr2(i2), i1, i2 + 1)
        result(i) = x
        mergeRec(newI1, newI2, i + 1)
      }
    }

    mergeRec(0, 0, 0)
    result.toIndexedSeq
  }

  def performMergeSort[T: Ordering](array: Coll[T], threads: Int = 8): Coll[T] = {

    val finalResult = m[Coll[T]]
    val getFinalResult = b[Unit, Coll[T]]
    val reactionPool = FixedPool(threads)

    val pool2 = FixedPool(threads)

    site(pool2)(
      go { case finalResult(arr) + getFinalResult(_, r) => r(arr) }
    )

    // The `mergesort()` molecule will start the chain reactions at one level lower.

    val mergesort = m[(Coll[T], M[Coll[T]])]

    site(reactionPool)(
      go { case mergesort((arr, resultToYield)) =>
        if (arr.length <= 1) resultToYield(arr)
        else {
          val (part1, part2) = arr.splitAt(arr.length / 2)
          // The `sorted1()` and `sorted2()` molecules will carry the sorted results from the lower level.
          val sorted1 = m[Coll[T]]
          val sorted2 = m[Coll[T]]
          site(reactionPool)(
            go { case sorted1(x) + sorted2(y) =>
              resultToYield(arrayMerge(x, y))
            }
          )
          // emit `mergesort` with the lower-level `sorted` result molecules
          mergesort((part1, sorted1)) + mergesort((part2, sorted2))
        }
      }
    )
    // Sort our array: emit `mergesort()` at top level.
    mergesort((array, finalResult))

    val result = getFinalResult()
    reactionPool.shutdownNow()
    pool2.shutdownNow()
    result
  }

}

Source File: Sig.scala From devbox with Apache License 2.0

5 votes

package devbox.common
import Util.permsetRw
import upickle.default.{ReadWriter, macroRW}
import java.security.MessageDigest

import os.{Path, StatInfo}

import scala.collection.mutable


  def compute(p: Path, buffer: Array[Byte], fileType: os.FileType) = {
    fileType match {
      case os.FileType.Other => None
      case os.FileType.SymLink => Some(Symlink(os.readLink(p).toString))
      case os.FileType.Dir => Some(Dir(os.perms(p).toInt()))
      case os.FileType.File =>
        val digest = MessageDigest.getInstance("MD5")
        val chunks = mutable.ArrayBuffer.empty[Bytes]
        var size = 0L
        for ((buffer, n) <- os.read.chunks(p, buffer)) {
          size += n
          digest.reset()
          digest.update(buffer, 0, n)

          chunks.append(new Bytes(digest.digest()))
        }
        Some(File(os.perms(p).toInt, chunks.toSeq, size))
    }
  }

  case class File(perms: os.PermSet, blockHashes: Seq[Bytes], size: Long) extends Sig
  object File{ implicit val rw: ReadWriter[File] = macroRW }

  case class Dir(perms: os.PermSet) extends Sig
  object Dir{ implicit val rw: ReadWriter[Dir] = macroRW }

  case class Symlink(dest: String) extends Sig
  object Symlink{ implicit val rw: ReadWriter[Symlink] = macroRW }

  implicit val rw: ReadWriter[Sig] = macroRW
}

Source File: Materializer.scala From sjsonnet with Apache License 2.0

5 votes

package sjsonnet

import sjsonnet.Expr.{FieldName, Member, ObjBody}
import sjsonnet.Expr.Member.Visibility
import upickle.core.Visitor

import scala.collection.mutable


object Materializer {
  def apply(v: Val)(implicit evaluator: EvalScope): ujson.Value = apply0(v, ujson.Value)
  def stringify(v: Val)(implicit evaluator: EvalScope): String = {
    apply0(v, new sjsonnet.Renderer()).toString
  }

  def apply0[T](v: Val, visitor: Visitor[T, T])
               (implicit evaluator: EvalScope): T = try {
    v match {
      case Val.True => visitor.visitTrue(-1)
      case Val.False => visitor.visitFalse(-1)
      case Val.Null => visitor.visitNull(-1)
      case Val.Num(n) => visitor.visitFloat64(n, -1)
      case Val.Str(s) => visitor.visitString(s, -1)
      case Val.Arr(xs) =>
        val arrVisitor = visitor.visitArray(xs.length, -1)
        for(x <- xs) {
          arrVisitor.visitValue(
            apply0(x.force, visitor),
            -1
          )
        }
        arrVisitor.visitEnd(-1)

      case obj: Val.Obj =>
        obj.triggerAllAsserts(obj)

        val keysUnsorted = obj.getVisibleKeys().toArray
        val keys = if (!evaluator.preserveOrder) keysUnsorted.sortBy(_._1) else keysUnsorted
        val objVisitor = visitor.visitObject(keys.length , -1)

        for(t <- keys) {
          val (k, hidden) = t
          if (!hidden){ 
            objVisitor.visitKeyValue(objVisitor.visitKey(-1).visitString(k, -1))
            objVisitor.visitValue(
              apply0(
                obj.value(k, -1)(evaluator.emptyMaterializeFileScope, implicitly),
                visitor
              ),
              -1
            )
          }
        }
        objVisitor.visitEnd(-1)

      case f: Val.Func =>
        apply0(
          f.apply(Nil, "(memory)", -1)(evaluator.emptyMaterializeFileScope, implicitly),
          visitor
        )
    }

  }catch {case e: StackOverflowError =>
    throw Error.Delegate("Stackoverflow while materializing, possibly due to recursive value")
  }

  def reverse(v: ujson.Value): Val = v match{
    case ujson.True => Val.True
    case ujson.False => Val.False
    case ujson.Null => Val.Null
    case ujson.Num(n) => Val.Num(n)
    case ujson.Str(s) => Val.Str(s)
    case ujson.Arr(xs) => Val.Arr(xs.map(x => Val.Lazy(reverse(x))).toArray[Val.Lazy])
    case ujson.Obj(xs) =>
      val builder = mutable.LinkedHashMap.newBuilder[String, Val.Obj.Member]
      for(x <- xs){
        val v = Val.Obj.Member(false, Visibility.Normal,
          (_: Val.Obj, _: Option[Val.Obj], _, _) => reverse(x._2)
        )
        builder += (x._1 -> v)
      }
      new Val.Obj(builder.result(), _ => (), None)
  }

  def toExpr(v: ujson.Value): Expr = v match{
    case ujson.True => Expr.True(0)
    case ujson.False => Expr.False(0)
    case ujson.Null => Expr.Null(0)
    case ujson.Num(n) => Expr.Num(0, n)
    case ujson.Str(s) => Expr.Str(0, s)
    case ujson.Arr(xs) => Expr.Arr(0, xs.map(toExpr).toArray[Expr])
    case ujson.Obj(kvs) =>
      Expr.Obj(0,
        ObjBody.MemberList(
          for((k, v) <- kvs.toArray)
          yield Member.Field(0, FieldName.Fixed(k), false, None, Visibility.Normal, toExpr(v))
        )
      )
  }

}

Source File: SjsonnetMain.scala From sjsonnet with Apache License 2.0

5 votes

package sjsonnet

import scala.collection.mutable
import scala.scalajs.js
import scala.scalajs.js.annotation.{JSExport, JSExportTopLevel}

@JSExportTopLevel("SjsonnetMain")
object SjsonnetMain {
  def createParseCache() = collection.mutable.Map[String, fastparse.Parsed[(Expr, Map[String, Int])]]()
  @JSExport
  def interpret(text: String,
                extVars: js.Any,
                tlaVars: js.Any,
                wd0: String,
                importer: js.Function2[String, String, js.Array[String]],
                preserveOrder: Boolean = false): js.Any = {
    val interp = new Interpreter(
      mutable.Map.empty,
      ujson.WebJson.transform(extVars, ujson.Value).obj.toMap,
      ujson.WebJson.transform(tlaVars, ujson.Value).obj.toMap,
      JsVirtualPath(wd0),
      importer = (wd, path) => {
        importer(wd.asInstanceOf[JsVirtualPath].path, path) match{
          case null => None
          case arr => Some((JsVirtualPath(arr(0)), arr(1)))
        }
      },
      preserveOrder
    )
    interp.interpret0(text, JsVirtualPath("(memory)"), ujson.WebJson.Builder) match{
      case Left(msg) => throw new js.JavaScriptException(msg)
      case Right(v) => v
    }
  }
}


case class JsVirtualPath(path: String) extends Path{
  def relativeToString(p: Path): String = p match{
    case other: JsVirtualPath if path.startsWith(other.path) => path.drop(other.path.length)
    case _ => path
  }

  def debugRead(): Option[String] = None

  def parent(): Path = JsVirtualPath(path.split('/').dropRight(1).mkString("/"))

  def segmentCount(): Int = path.split('/').length

  def last: String = path.split('/').last

  def /(s: String): Path = JsVirtualPath(path + "/" + s)
}

Source File: SparkTC.scala From drizzle-spark with Apache License 2.0

5 votes

// scalastyle:off println
package org.apache.spark.examples

import scala.collection.mutable
import scala.util.Random

import org.apache.spark.sql.SparkSession


object SparkTC {
  val numEdges = 200
  val numVertices = 100
  val rand = new Random(42)

  def generateGraph: Seq[(Int, Int)] = {
    val edges: mutable.Set[(Int, Int)] = mutable.Set.empty
    while (edges.size < numEdges) {
      val from = rand.nextInt(numVertices)
      val to = rand.nextInt(numVertices)
      if (from != to) edges.+=((from, to))
    }
    edges.toSeq
  }

  def main(args: Array[String]) {
    val spark = SparkSession
      .builder
      .appName("SparkTC")
      .getOrCreate()
    val slices = if (args.length > 0) args(0).toInt else 2
    var tc = spark.sparkContext.parallelize(generateGraph, slices).cache()

    // Linear transitive closure: each round grows paths by one edge,
    // by joining the graph's edges with the already-discovered paths.
    // e.g. join the path (y, z) from the TC with the edge (x, y) from
    // the graph to obtain the path (x, z).

    // Because join() joins on keys, the edges are stored in reversed order.
    val edges = tc.map(x => (x._2, x._1))

    // This join is iterated until a fixed point is reached.
    var oldCount = 0L
    var nextCount = tc.count()
    do {
      oldCount = nextCount
      // Perform the join, obtaining an RDD of (y, (z, x)) pairs,
      // then project the result to obtain the new (x, z) paths.
      tc = tc.union(tc.join(edges).map(x => (x._2._2, x._2._1))).distinct().cache()
      nextCount = tc.count()
    } while (nextCount != oldCount)

    println("TC has " + tc.count() + " edges.")
    spark.stop()
  }
}
// scalastyle:on println

Source File: ParamGridBuilder.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.ml.tuning

import scala.annotation.varargs
import scala.collection.mutable

import org.apache.spark.annotation.Since
import org.apache.spark.ml.param._


  @Since("1.2.0")
  def build(): Array[ParamMap] = {
    var paramMaps = Array(new ParamMap)
    paramGrid.foreach { case (param, values) =>
      val newParamMaps = values.flatMap { v =>
        paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v))
      }
      paramMaps = newParamMaps.toArray
    }
    paramMaps
  }
}

Source File: HashingTF.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.mllib.feature

import java.lang.{Iterable => JavaIterable}

import scala.collection.JavaConverters._
import scala.collection.mutable

import org.apache.spark.SparkException
import org.apache.spark.annotation.Since
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.mllib.linalg.{Vector, Vectors}
import org.apache.spark.rdd.RDD
import org.apache.spark.unsafe.hash.Murmur3_x86_32._
import org.apache.spark.unsafe.types.UTF8String
import org.apache.spark.util.Utils


  private[spark] def murmur3Hash(term: Any): Int = {
    term match {
      case null => seed
      case b: Boolean => hashInt(if (b) 1 else 0, seed)
      case b: Byte => hashInt(b, seed)
      case s: Short => hashInt(s, seed)
      case i: Int => hashInt(i, seed)
      case l: Long => hashLong(l, seed)
      case f: Float => hashInt(java.lang.Float.floatToIntBits(f), seed)
      case d: Double => hashLong(java.lang.Double.doubleToLongBits(d), seed)
      case s: String =>
        val utf8 = UTF8String.fromString(s)
        hashUnsafeBytes(utf8.getBaseObject, utf8.getBaseOffset, utf8.numBytes(), seed)
      case _ => throw new SparkException("HashingTF with murmur3 algorithm does not " +
        s"support type ${term.getClass.getCanonicalName} of input data.")
    }
  }
}

Source File: SlidingRDD.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.mllib.rdd

import scala.collection.mutable
import scala.reflect.ClassTag

import org.apache.spark.{Partition, TaskContext}
import org.apache.spark.rdd.RDD

private[mllib]
class SlidingRDDPartition[T](val idx: Int, val prev: Partition, val tail: Seq[T], val offset: Int)
  extends Partition with Serializable {
  override val index: Int = idx
}


private[mllib]
class SlidingRDD[T: ClassTag](@transient val parent: RDD[T], val windowSize: Int, val step: Int)
  extends RDD[Array[T]](parent) {

  require(windowSize > 0 && step > 0 && !(windowSize == 1 && step == 1),
    "Window size and step must be greater than 0, " +
      s"and they cannot be both 1, but got windowSize = $windowSize and step = $step.")

  override def compute(split: Partition, context: TaskContext): Iterator[Array[T]] = {
    val part = split.asInstanceOf[SlidingRDDPartition[T]]
    (firstParent[T].iterator(part.prev, context) ++ part.tail)
      .drop(part.offset)
      .sliding(windowSize, step)
      .withPartial(false)
      .map(_.toArray)
  }

  override def getPreferredLocations(split: Partition): Seq[String] =
    firstParent[T].preferredLocations(split.asInstanceOf[SlidingRDDPartition[T]].prev)

  override def getPartitions: Array[Partition] = {
    val parentPartitions = parent.partitions
    val n = parentPartitions.length
    if (n == 0) {
      Array.empty
    } else if (n == 1) {
      Array(new SlidingRDDPartition[T](0, parentPartitions(0), Seq.empty, 0))
    } else {
      val w1 = windowSize - 1
      // Get partition sizes and first w1 elements.
      val (sizes, heads) = parent.mapPartitions { iter =>
        val w1Array = iter.take(w1).toArray
        Iterator.single((w1Array.length + iter.length, w1Array))
      }.collect().unzip
      val partitions = mutable.ArrayBuffer.empty[SlidingRDDPartition[T]]
      var i = 0
      var cumSize = 0
      var partitionIndex = 0
      while (i < n) {
        val mod = cumSize % step
        val offset = if (mod == 0) 0 else step - mod
        val size = sizes(i)
        if (offset < size) {
          val tail = mutable.ListBuffer.empty[T]
          // Keep appending to the current tail until it has w1 elements.
          var j = i + 1
          while (j < n && tail.length < w1) {
            tail ++= heads(j).take(w1 - tail.length)
            j += 1
          }
          if (sizes(i) + tail.length >= offset + windowSize) {
            partitions +=
              new SlidingRDDPartition[T](partitionIndex, parentPartitions(i), tail, offset)
            partitionIndex += 1
          }
        }
        cumSize += size
        i += 1
      }
      partitions.toArray
    }
  }

  // TODO: Override methods such as aggregate, which only requires one Spark job.
}

Source File: ParamGridBuilderSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.ml.tuning

import scala.collection.mutable

import org.apache.spark.SparkFunSuite
import org.apache.spark.ml.param.{ParamMap, TestParams}

class ParamGridBuilderSuite extends SparkFunSuite {

  val solver = new TestParams()
  import solver.{inputCol, maxIter}

  test("param grid builder") {
    def validateGrid(maps: Array[ParamMap], expected: mutable.Set[(Int, String)]): Unit = {
      assert(maps.size === expected.size)
      maps.foreach { m =>
        val tuple = (m(maxIter), m(inputCol))
        assert(expected.contains(tuple))
        expected.remove(tuple)
      }
      assert(expected.isEmpty)
    }

    val maps0 = new ParamGridBuilder()
      .baseOn(maxIter -> 10)
      .addGrid(inputCol, Array("input0", "input1"))
      .build()
    val expected0 = mutable.Set(
      (10, "input0"),
      (10, "input1"))
    validateGrid(maps0, expected0)

    val maps1 = new ParamGridBuilder()
      .baseOn(ParamMap(maxIter -> 5, inputCol -> "input")) // will be overwritten
      .addGrid(maxIter, Array(10, 20))
      .addGrid(inputCol, Array("input0", "input1"))
      .build()
    val expected1 = mutable.Set(
      (10, "input0"),
      (20, "input0"),
      (10, "input1"),
      (20, "input1"))
    validateGrid(maps1, expected1)
  }
}

Source File: EnsembleTestHelper.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.mllib.tree

import scala.collection.mutable

import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.tree.model.TreeEnsembleModel
import org.apache.spark.util.StatCounter

object EnsembleTestHelper {

  
  def validateRegressor(
      model: TreeEnsembleModel,
      input: Seq[LabeledPoint],
      required: Double,
      metricName: String = "mse") {
    val predictions = input.map(x => model.predict(x.features))
    val errors = predictions.zip(input).map { case (prediction, point) =>
      point.label - prediction
    }
    val metric = metricName match {
      case "mse" =>
        errors.map(err => err * err).sum / errors.size
      case "mae" =>
        errors.map(math.abs).sum / errors.size
    }

    assert(metric <= required,
      s"validateRegressor calculated $metricName $metric but required $required.")
  }

  def generateOrderedLabeledPoints(numFeatures: Int, numInstances: Int): Array[LabeledPoint] = {
    val arr = new Array[LabeledPoint](numInstances)
    for (i <- 0 until numInstances) {
      val label = if (i < numInstances / 10) {
        0.0
      } else if (i < numInstances / 2) {
        1.0
      } else if (i < numInstances * 0.9) {
        0.0
      } else {
        1.0
      }
      val features = Array.fill[Double](numFeatures)(i.toDouble)
      arr(i) = new LabeledPoint(label, Vectors.dense(features))
    }
    arr
  }

}

Source File: KPLBasedKinesisTestUtils.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.kinesis

import java.nio.ByteBuffer
import java.nio.charset.StandardCharsets

import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer

import com.amazonaws.services.kinesis.producer.{KinesisProducer => KPLProducer, KinesisProducerConfiguration, UserRecordResult}
import com.google.common.util.concurrent.{FutureCallback, Futures}

private[kinesis] class KPLBasedKinesisTestUtils extends KinesisTestUtils {
  override protected def getProducer(aggregate: Boolean): KinesisDataGenerator = {
    if (!aggregate) {
      new SimpleDataGenerator(kinesisClient)
    } else {
      new KPLDataGenerator(regionName)
    }
  }
}


private[kinesis] class KPLDataGenerator(regionName: String) extends KinesisDataGenerator {

  private lazy val producer: KPLProducer = {
    val conf = new KinesisProducerConfiguration()
      .setRecordMaxBufferedTime(1000)
      .setMaxConnections(1)
      .setRegion(regionName)
      .setMetricsLevel("none")

    new KPLProducer(conf)
  }

  override def sendData(streamName: String, data: Seq[Int]): Map[String, Seq[(Int, String)]] = {
    val shardIdToSeqNumbers = new mutable.HashMap[String, ArrayBuffer[(Int, String)]]()
    data.foreach { num =>
      val str = num.toString
      val data = ByteBuffer.wrap(str.getBytes(StandardCharsets.UTF_8))
      val future = producer.addUserRecord(streamName, str, data)
      val kinesisCallBack = new FutureCallback[UserRecordResult]() {
        override def onFailure(t: Throwable): Unit = {} // do nothing

        override def onSuccess(result: UserRecordResult): Unit = {
          val shardId = result.getShardId
          val seqNumber = result.getSequenceNumber()
          val sentSeqNumbers = shardIdToSeqNumbers.getOrElseUpdate(shardId,
            new ArrayBuffer[(Int, String)]())
          sentSeqNumbers += ((num, seqNumber))
        }
      }
      Futures.addCallback(future, kinesisCallBack)
    }
    producer.flushSync()
    shardIdToSeqNumbers.toMap
  }
}

Source File: KafkaStreamSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.kafka

import scala.collection.mutable
import scala.concurrent.duration._
import scala.language.postfixOps
import scala.util.Random

import kafka.serializer.StringDecoder
import org.scalatest.BeforeAndAfterAll
import org.scalatest.concurrent.Eventually

import org.apache.spark.{SparkConf, SparkFunSuite}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.{Milliseconds, StreamingContext}

class KafkaStreamSuite extends SparkFunSuite with Eventually with BeforeAndAfterAll {
  private var ssc: StreamingContext = _
  private var kafkaTestUtils: KafkaTestUtils = _

  override def beforeAll(): Unit = {
    kafkaTestUtils = new KafkaTestUtils
    kafkaTestUtils.setup()
  }

  override def afterAll(): Unit = {
    if (ssc != null) {
      ssc.stop()
      ssc = null
    }

    if (kafkaTestUtils != null) {
      kafkaTestUtils.teardown()
      kafkaTestUtils = null
    }
  }

  test("Kafka input stream") {
    val sparkConf = new SparkConf().setMaster("local[4]").setAppName(this.getClass.getSimpleName)
    ssc = new StreamingContext(sparkConf, Milliseconds(500))
    val topic = "topic1"
    val sent = Map("a" -> 5, "b" -> 3, "c" -> 10)
    kafkaTestUtils.createTopic(topic)
    kafkaTestUtils.sendMessages(topic, sent)

    val kafkaParams = Map("zookeeper.connect" -> kafkaTestUtils.zkAddress,
      "group.id" -> s"test-consumer-${Random.nextInt(10000)}",
      "auto.offset.reset" -> "smallest")

    val stream = KafkaUtils.createStream[String, String, StringDecoder, StringDecoder](
      ssc, kafkaParams, Map(topic -> 1), StorageLevel.MEMORY_ONLY)
    val result = new mutable.HashMap[String, Long]()
    stream.map(_._2).countByValue().foreachRDD { r =>
      r.collect().foreach { kv =>
        result.synchronized {
          val count = result.getOrElseUpdate(kv._1, 0) + kv._2
          result.put(kv._1, count)
        }
      }
    }

    ssc.start()

    eventually(timeout(10000 milliseconds), interval(100 milliseconds)) {
      assert(result.synchronized { sent === result })
    }
  }
}

Source File: UDTRegistration.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.types

import scala.collection.mutable

import org.apache.spark.SparkException
import org.apache.spark.internal.Logging
import org.apache.spark.util.Utils


  def getUDTFor(userClass: String): Option[Class[_]] = {
    udtMap.get(userClass).map { udtClassName =>
      if (Utils.classIsLoadable(udtClassName)) {
        val udtClass = Utils.classForName(udtClassName)
        if (classOf[UserDefinedType[_]].isAssignableFrom(udtClass)) {
          udtClass
        } else {
          throw new SparkException(
            s"${udtClass.getName} is not an UserDefinedType. Please make sure registering " +
              s"an UserDefinedType for ${userClass}")
        }
      } else {
        throw new SparkException(
          s"Can not load in UserDefinedType ${udtClassName} for user class ${userClass}.")
      }
    }
  }
}

Source File: collect.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions.aggregate

import scala.collection.generic.Growable
import scala.collection.mutable

import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.util.GenericArrayData
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.types._


@ExpressionDescription(
  usage = "_FUNC_(expr) - Collects and returns a set of unique elements.")
case class CollectSet(
    child: Expression,
    mutableAggBufferOffset: Int = 0,
    inputAggBufferOffset: Int = 0) extends Collect {

  def this(child: Expression) = this(child, 0, 0)

  override def checkInputDataTypes(): TypeCheckResult = {
    if (!child.dataType.existsRecursively(_.isInstanceOf[MapType])) {
      TypeCheckResult.TypeCheckSuccess
    } else {
      TypeCheckResult.TypeCheckFailure("collect_set() cannot have map type data")
    }
  }

  override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate =
    copy(mutableAggBufferOffset = newMutableAggBufferOffset)

  override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ImperativeAggregate =
    copy(inputAggBufferOffset = newInputAggBufferOffset)

  override def prettyName: String = "collect_set"

  override protected[this] val buffer: mutable.HashSet[Any] = mutable.HashSet.empty
}

Source File: EquivalentExpressions.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import scala.collection.mutable

import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback


  def debugString(all: Boolean = false): String = {
    val sb: mutable.StringBuilder = new StringBuilder()
    sb.append("Equivalent expressions:\n")
    equivalenceMap.foreach { case (k, v) =>
      if (all || v.length > 1) {
        sb.append("  " + v.mkString(", ")).append("\n")
      }
    }
    sb.toString()
  }
}

Source File: DriverRegistry.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources.jdbc

import java.sql.{Driver, DriverManager}

import scala.collection.mutable

import org.apache.spark.internal.Logging
import org.apache.spark.util.Utils


object DriverRegistry extends Logging {

  private val wrapperMap: mutable.Map[String, DriverWrapper] = mutable.Map.empty

  def register(className: String): Unit = {
    val cls = Utils.getContextOrSparkClassLoader.loadClass(className)
    if (cls.getClassLoader == null) {
      logTrace(s"$className has been loaded with bootstrap ClassLoader, wrapper is not required")
    } else if (wrapperMap.get(className).isDefined) {
      logTrace(s"Wrapper for $className already exists")
    } else {
      synchronized {
        if (wrapperMap.get(className).isEmpty) {
          val wrapper = new DriverWrapper(cls.newInstance().asInstanceOf[Driver])
          DriverManager.registerDriver(wrapper)
          wrapperMap(className) = wrapper
          logTrace(s"Wrapper for $className registered")
        }
      }
    }
  }
}

Source File: ListingFileCatalog.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources

import scala.collection.mutable

import org.apache.hadoop.fs._

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.StructType



class ListingFileCatalog(
    sparkSession: SparkSession,
    override val rootPaths: Seq[Path],
    parameters: Map[String, String],
    partitionSchema: Option[StructType],
    fileStatusCache: FileStatusCache = NoopCache)
  extends PartitioningAwareFileCatalog(
    sparkSession, parameters, partitionSchema, fileStatusCache) {

  @volatile private var cachedLeafFiles: mutable.LinkedHashMap[Path, FileStatus] = _
  @volatile private var cachedLeafDirToChildrenFiles: Map[Path, Array[FileStatus]] = _
  @volatile private var cachedPartitionSpec: PartitionSpec = _

  refresh0()

  override def partitionSpec(): PartitionSpec = {
    if (cachedPartitionSpec == null) {
      cachedPartitionSpec = inferPartitioning()
    }
    logTrace(s"Partition spec: $cachedPartitionSpec")
    cachedPartitionSpec
  }

  override protected def leafFiles: mutable.LinkedHashMap[Path, FileStatus] = {
    cachedLeafFiles
  }

  override protected def leafDirToChildrenFiles: Map[Path, Array[FileStatus]] = {
    cachedLeafDirToChildrenFiles
  }

  override def refresh(): Unit = {
    refresh0()
    fileStatusCache.invalidateAll()
  }

  private def refresh0(): Unit = {
    val files = listLeafFiles(rootPaths)
    cachedLeafFiles =
      new mutable.LinkedHashMap[Path, FileStatus]() ++= files.map(f => f.getPath -> f)
    cachedLeafDirToChildrenFiles = files.toArray.groupBy(_.getPath.getParent)
    cachedPartitionSpec = null
  }

  override def equals(other: Any): Boolean = other match {
    case hdfs: ListingFileCatalog => rootPaths.toSet == hdfs.rootPaths.toSet
    case _ => false
  }

  override def hashCode(): Int = rootPaths.toSet.hashCode()
}

Source File: Exchange.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.exchange

import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer

import org.apache.spark.broadcast
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.execution.{LeafExecNode, SparkPlan, UnaryExecNode}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.StructType


case class ReuseExchange(conf: SQLConf) extends Rule[SparkPlan] {

  def apply(plan: SparkPlan): SparkPlan = {
    if (!conf.exchangeReuseEnabled) {
      return plan
    }
    // Build a hash map using schema of exchanges to avoid O(N*N) sameResult calls.
    val exchanges = mutable.HashMap[StructType, ArrayBuffer[Exchange]]()
    plan.transformUp {
      case exchange: Exchange =>
        // the exchanges that have same results usually also have same schemas (same column names).
        val sameSchema = exchanges.getOrElseUpdate(exchange.schema, ArrayBuffer[Exchange]())
        val samePlan = sameSchema.find { e =>
          exchange.sameResult(e)
        }
        if (samePlan.isDefined) {
          // Keep the output of this exchange, the following plans require that to resolve
          // attributes.
          ReusedExchangeExec(exchange.output, samePlan.get)
        } else {
          sameSchema += exchange
          exchange
        }
    }
  }
}

Source File: subquery.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.execution

import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.{expressions, InternalRow}
import org.apache.spark.sql.catalyst.expressions.{Expression, ExprId, InSet, Literal, PlanExpression}
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{BooleanType, DataType, StructType}


case class ReuseSubquery(conf: SQLConf) extends Rule[SparkPlan] {

  def apply(plan: SparkPlan): SparkPlan = {
    if (!conf.exchangeReuseEnabled) {
      return plan
    }
    // Build a hash map using schema of exchanges to avoid O(N*N) sameResult calls.
    val subqueries = mutable.HashMap[StructType, ArrayBuffer[SubqueryExec]]()
    plan transformAllExpressions {
      case sub: ExecSubqueryExpression =>
        val sameSchema = subqueries.getOrElseUpdate(sub.plan.schema, ArrayBuffer[SubqueryExec]())
        val sameResult = sameSchema.find(_.sameResult(sub.plan))
        if (sameResult.isDefined) {
          sub.withNewPlan(sameResult.get)
        } else {
          sameSchema += sub.plan
          sub
        }
    }
  }
}

Source File: StateStoreCoordinator.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.streaming.state

import scala.collection.mutable

import org.apache.spark.SparkEnv
import org.apache.spark.internal.Logging
import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef, RpcEnv, ThreadSafeRpcEndpoint}
import org.apache.spark.scheduler.ExecutorCacheTaskLocation
import org.apache.spark.util.RpcUtils


private class StateStoreCoordinator(override val rpcEnv: RpcEnv)
    extends ThreadSafeRpcEndpoint with Logging {
  private val instances = new mutable.HashMap[StateStoreId, ExecutorCacheTaskLocation]

  override def receive: PartialFunction[Any, Unit] = {
    case ReportActiveInstance(id, host, executorId) =>
      logDebug(s"Reported state store $id is active at $executorId")
      instances.put(id, ExecutorCacheTaskLocation(host, executorId))
  }

  override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
    case VerifyIfInstanceActive(id, execId) =>
      val response = instances.get(id) match {
        case Some(location) => location.executorId == execId
        case None => false
      }
      logDebug(s"Verified that state store $id is active: $response")
      context.reply(response)

    case GetLocation(id) =>
      val executorId = instances.get(id).map(_.toString)
      logDebug(s"Got location of the state store $id: $executorId")
      context.reply(executorId)

    case DeactivateInstances(checkpointLocation) =>
      val storeIdsToRemove =
        instances.keys.filter(_.checkpointLocation == checkpointLocation).toSeq
      instances --= storeIdsToRemove
      logDebug(s"Deactivating instances related to checkpoint location $checkpointLocation: " +
        storeIdsToRemove.mkString(", "))
      context.reply(true)

    case StopCoordinator =>
      stop() // Stop before replying to ensure that endpoint name has been deregistered
      logInfo("StateStoreCoordinator stopped")
      context.reply(true)
  }
}

Source File: BatchUIData.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.ui

import scala.collection.mutable

import org.apache.spark.streaming.Time
import org.apache.spark.streaming.scheduler.{BatchInfo, OutputOperationInfo, StreamInputInfo}
import org.apache.spark.streaming.ui.StreamingJobProgressListener._

private[ui] case class OutputOpIdAndSparkJobId(outputOpId: OutputOpId, sparkJobId: SparkJobId)

private[ui] case class BatchUIData(
    val batchTime: Time,
    val streamIdToInputInfo: Map[Int, StreamInputInfo],
    val submissionTime: Long,
    val processingStartTime: Option[Long],
    val processingEndTime: Option[Long],
    val outputOperations: mutable.HashMap[OutputOpId, OutputOperationUIData] = mutable.HashMap(),
    var outputOpIdSparkJobIdPairs: Iterable[OutputOpIdAndSparkJobId] = Seq.empty) {

  
  def isFailed: Boolean = numFailedOutputOp != 0
}

private[ui] object BatchUIData {

  def apply(batchInfo: BatchInfo): BatchUIData = {
    val outputOperations = mutable.HashMap[OutputOpId, OutputOperationUIData]()
    outputOperations ++= batchInfo.outputOperationInfos.mapValues(OutputOperationUIData.apply)
    new BatchUIData(
      batchInfo.batchTime,
      batchInfo.streamIdToInputInfo,
      batchInfo.submissionTime,
      batchInfo.processingStartTime,
      batchInfo.processingEndTime,
      outputOperations
    )
  }
}

private[ui] case class OutputOperationUIData(
    id: OutputOpId,
    name: String,
    description: String,
    startTime: Option[Long],
    endTime: Option[Long],
    failureReason: Option[String]) {

  def duration: Option[Long] = for (s <- startTime; e <- endTime) yield e - s
}

private[ui] object OutputOperationUIData {

  def apply(outputOperationInfo: OutputOperationInfo): OutputOperationUIData = {
    OutputOperationUIData(
      outputOperationInfo.id,
      outputOperationInfo.name,
      outputOperationInfo.description,
      outputOperationInfo.startTime,
      outputOperationInfo.endTime,
      outputOperationInfo.failureReason
    )
  }
}

Source File: MetricsConfig.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.metrics

import java.io.{FileInputStream, InputStream}
import java.util.Properties

import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.util.matching.Regex

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.util.Utils

private[spark] class MetricsConfig(conf: SparkConf) extends Logging {

  private val DEFAULT_PREFIX = "*"
  private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r
  private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties"

  private[metrics] val properties = new Properties()
  private[metrics] var perInstanceSubProperties: mutable.HashMap[String, Properties] = null

  private def setDefaultProperties(prop: Properties) {
    prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet")
    prop.setProperty("*.sink.servlet.path", "/metrics/json")
    prop.setProperty("master.sink.servlet.path", "/metrics/master/json")
    prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json")
  }

  
  private[this] def loadPropertiesFromFile(path: Option[String]): Unit = {
    var is: InputStream = null
    try {
      is = path match {
        case Some(f) => new FileInputStream(f)
        case None => Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME)
      }

      if (is != null) {
        properties.load(is)
      }
    } catch {
      case e: Exception =>
        val file = path.getOrElse(DEFAULT_METRICS_CONF_FILENAME)
        logError(s"Error loading configuration file $file", e)
    } finally {
      if (is != null) {
        is.close()
      }
    }
  }

}

Source File: JavaUtils.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.api.java

import java.{util => ju}
import java.util.Map.Entry

import scala.collection.mutable

private[spark] object JavaUtils {
  def optionToOptional[T](option: Option[T]): Optional[T] =
    if (option.isDefined) {
      Optional.of(option.get)
    } else {
      Optional.empty[T]
    }

  // Workaround for SPARK-3926 / SI-8911
  def mapAsSerializableJavaMap[A, B](underlying: collection.Map[A, B]): SerializableMapWrapper[A, B]
    = new SerializableMapWrapper(underlying)

  // Implementation is copied from scala.collection.convert.Wrappers.MapWrapper,
  // but implements java.io.Serializable. It can't just be subclassed to make it
  // Serializable since the MapWrapper class has no no-arg constructor. This class
  // doesn't need a no-arg constructor though.
  class SerializableMapWrapper[A, B](underlying: collection.Map[A, B])
    extends ju.AbstractMap[A, B] with java.io.Serializable { self =>

    override def size: Int = underlying.size

    override def get(key: AnyRef): B = try {
      underlying.getOrElse(key.asInstanceOf[A], null.asInstanceOf[B])
    } catch {
      case ex: ClassCastException => null.asInstanceOf[B]
    }

    override def entrySet: ju.Set[ju.Map.Entry[A, B]] = new ju.AbstractSet[ju.Map.Entry[A, B]] {
      override def size: Int = self.size

      override def iterator: ju.Iterator[ju.Map.Entry[A, B]] = new ju.Iterator[ju.Map.Entry[A, B]] {
        val ui = underlying.iterator
        var prev : Option[A] = None

        def hasNext: Boolean = ui.hasNext

        def next(): Entry[A, B] = {
          val (k, v) = ui.next()
          prev = Some(k)
          new ju.Map.Entry[A, B] {
            import scala.util.hashing.byteswap32
            override def getKey: A = k
            override def getValue: B = v
            override def setValue(v1 : B): B = self.put(k, v1)
            override def hashCode: Int = byteswap32(k.hashCode) + (byteswap32(v.hashCode) << 16)
            override def equals(other: Any): Boolean = other match {
              case e: ju.Map.Entry[_, _] => k == e.getKey && v == e.getValue
              case _ => false
            }
          }
        }

        def remove() {
          prev match {
            case Some(k) =>
              underlying match {
                case mm: mutable.Map[A, _] =>
                  mm.remove(k)
                  prev = None
                case _ =>
                  throw new UnsupportedOperationException("remove")
              }
            case _ =>
              throw new IllegalStateException("next must be called at least once before remove")
          }
        }
      }
    }
  }
}

Source File: WorkerInfo.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.deploy.master

import scala.collection.mutable

import org.apache.spark.rpc.RpcEndpointRef
import org.apache.spark.util.Utils

private[spark] class WorkerInfo(
    val id: String,
    val host: String,
    val port: Int,
    val cores: Int,
    val memory: Int,
    val endpoint: RpcEndpointRef,
    val webUiAddress: String)
  extends Serializable {

  Utils.checkHost(host, "Expected hostname")
  assert (port > 0)

  @transient var executors: mutable.HashMap[String, ExecutorDesc] = _ // executorId => info
  @transient var drivers: mutable.HashMap[String, DriverInfo] = _ // driverId => info
  @transient var state: WorkerState.Value = _
  @transient var coresUsed: Int = _
  @transient var memoryUsed: Int = _

  @transient var lastHeartbeat: Long = _

  init()

  def coresFree: Int = cores - coresUsed
  def memoryFree: Int = memory - memoryUsed

  private def readObject(in: java.io.ObjectInputStream): Unit = Utils.tryOrIOException {
    in.defaultReadObject()
    init()
  }

  private def init() {
    executors = new mutable.HashMap
    drivers = new mutable.HashMap
    state = WorkerState.ALIVE
    coresUsed = 0
    memoryUsed = 0
    lastHeartbeat = System.currentTimeMillis()
  }

  def hostPort: String = {
    assert (port > 0)
    host + ":" + port
  }

  def addExecutor(exec: ExecutorDesc) {
    executors(exec.fullId) = exec
    coresUsed += exec.cores
    memoryUsed += exec.memory
  }

  def removeExecutor(exec: ExecutorDesc) {
    if (executors.contains(exec.fullId)) {
      executors -= exec.fullId
      coresUsed -= exec.cores
      memoryUsed -= exec.memory
    }
  }

  def hasExecutor(app: ApplicationInfo): Boolean = {
    executors.values.exists(_.application == app)
  }

  def addDriver(driver: DriverInfo) {
    drivers(driver.id) = driver
    memoryUsed += driver.desc.mem
    coresUsed += driver.desc.cores
  }

  def removeDriver(driver: DriverInfo) {
    drivers -= driver.id
    memoryUsed -= driver.desc.mem
    coresUsed -= driver.desc.cores
  }

  def setState(state: WorkerState.Value): Unit = {
    this.state = state
  }

  def isAlive(): Boolean = this.state == WorkerState.ALIVE
}

Source File: StorageTab.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.ui.storage

import scala.collection.mutable

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.scheduler._
import org.apache.spark.storage._
import org.apache.spark.ui._


  private def updateRDDInfo(updatedBlocks: Seq[(BlockId, BlockStatus)]): Unit = {
    val rddIdsToUpdate = updatedBlocks.flatMap { case (bid, _) => bid.asRDDId.map(_.rddId) }.toSet
    val rddInfosToUpdate = _rddInfoMap.values.toSeq.filter { s => rddIdsToUpdate.contains(s.id) }
    StorageUtils.updateRddInfo(rddInfosToUpdate, activeStorageStatusList)
  }

  override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = synchronized {
    val rddInfos = stageSubmitted.stageInfo.rddInfos
    rddInfos.foreach { info => _rddInfoMap.getOrElseUpdate(info.id, info).name = info.name }
  }

  override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = synchronized {
    // Remove all partitions that are no longer cached in current completed stage
    val completedRddIds = stageCompleted.stageInfo.rddInfos.map(r => r.id).toSet
    _rddInfoMap.retain { case (id, info) =>
      !completedRddIds.contains(id) || info.numCachedPartitions > 0
    }
  }

  override def onUnpersistRDD(unpersistRDD: SparkListenerUnpersistRDD): Unit = synchronized {
    _rddInfoMap.remove(unpersistRDD.rddId)
  }

  override def onBlockUpdated(blockUpdated: SparkListenerBlockUpdated): Unit = {
    super.onBlockUpdated(blockUpdated)
    val blockId = blockUpdated.blockUpdatedInfo.blockId
    val storageLevel = blockUpdated.blockUpdatedInfo.storageLevel
    val memSize = blockUpdated.blockUpdatedInfo.memSize
    val diskSize = blockUpdated.blockUpdatedInfo.diskSize
    val blockStatus = BlockStatus(storageLevel, memSize, diskSize)
    updateRDDInfo(Seq((blockId, blockStatus)))
  }
}

Source File: TaskDescription.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.nio.ByteBuffer

import scala.collection.mutable
import scala.collection.mutable.HashSet
import scala.util.control.NonFatal

import org.apache.spark._
import org.apache.spark.internal.Logging
import org.apache.spark.serializer.SerializerInstance
import org.apache.spark.util.SerializableBuffer


private[spark] class TaskDescription(
    val taskId: Long,
    val attemptNumber: Int,
    val executorId: String,
    val name: String,
    val index: Int,    // Index within this task's TaskSet
    val isFutureTask: Boolean,
    @transient private val _task: Task[_],
    @transient private val _addedFiles: mutable.Map[String, Long],
    @transient private val _addedJars: mutable.Map[String, Long],
    @transient private val _ser: SerializerInstance)
  extends Serializable with Logging {

  // Because ByteBuffers are not serializable, wrap the task in a SerializableBuffer
  private var buffer: SerializableBuffer = _

  def prepareSerializedTask(): Unit = {
    if (_task != null) {
      val serializedTask: ByteBuffer = try {
        Task.serializeWithDependencies(_task, _addedFiles, _addedJars, _ser)
      } catch {
        // If the task cannot be serialized, then there is not point in re-attempting
        // the task as it will always fail. So just abort the task set.
        case NonFatal(e) =>
          val msg = s"Failed to serialize the task $taskId, not attempting to retry it."
          logError(msg, e)
          // FIXME(shivaram): We dont have a handle to the taskSet here to abort it.
          throw new TaskNotSerializableException(e)
      }
      if (serializedTask.limit > TaskSetManager.TASK_SIZE_TO_WARN_KB * 1024) {
        logWarning(s"Stage ${_task.stageId} contains a task of very large size " +
          s"(${serializedTask.limit / 1024} KB). The maximum recommended task size is " +
          s"${TaskSetManager.TASK_SIZE_TO_WARN_KB} KB.")
      }
      buffer = new SerializableBuffer(serializedTask)
    } else {
      buffer = new SerializableBuffer(ByteBuffer.allocate(0))
    }
  }

  def serializedTask: ByteBuffer = buffer.value

  override def toString: String = "TaskDescription(TID=%d, index=%d)".format(taskId, index)
}

Source File: StorageStatusListener.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.storage

import scala.collection.mutable

import org.apache.spark.SparkConf
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.scheduler._


  private def updateStorageStatus(unpersistedRDDId: Int) {
    storageStatusList.foreach { storageStatus =>
      storageStatus.rddBlocksById(unpersistedRDDId).foreach { case (blockId, _) =>
        storageStatus.removeBlock(blockId)
      }
    }
  }

  override def onUnpersistRDD(unpersistRDD: SparkListenerUnpersistRDD): Unit = synchronized {
    updateStorageStatus(unpersistRDD.rddId)
  }

  override def onBlockManagerAdded(blockManagerAdded: SparkListenerBlockManagerAdded) {
    synchronized {
      val blockManagerId = blockManagerAdded.blockManagerId
      val executorId = blockManagerId.executorId
      val maxMem = blockManagerAdded.maxMem
      val storageStatus = new StorageStatus(blockManagerId, maxMem)
      executorIdToStorageStatus(executorId) = storageStatus

      // Try to remove the dead storage status if same executor register the block manager twice.
      deadExecutorStorageStatus.zipWithIndex.find(_._1.blockManagerId.executorId == executorId)
        .foreach(toRemoveExecutor => deadExecutorStorageStatus.remove(toRemoveExecutor._2))
    }
  }

  override def onBlockManagerRemoved(blockManagerRemoved: SparkListenerBlockManagerRemoved) {
    synchronized {
      val executorId = blockManagerRemoved.blockManagerId.executorId
      executorIdToStorageStatus.remove(executorId).foreach { status =>
        deadExecutorStorageStatus += status
      }
      if (deadExecutorStorageStatus.size > retainedDeadExecutors) {
        deadExecutorStorageStatus.trimStart(1)
      }
    }
  }

  override def onBlockUpdated(blockUpdated: SparkListenerBlockUpdated): Unit = {
    val executorId = blockUpdated.blockUpdatedInfo.blockManagerId.executorId
    val blockId = blockUpdated.blockUpdatedInfo.blockId
    val storageLevel = blockUpdated.blockUpdatedInfo.storageLevel
    val memSize = blockUpdated.blockUpdatedInfo.memSize
    val diskSize = blockUpdated.blockUpdatedInfo.diskSize
    val blockStatus = BlockStatus(storageLevel, memSize, diskSize)
    updateStorageStatus(executorId, Seq((blockId, blockStatus)))
  }
}

Source File: BlockStatusListener.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.storage

import scala.collection.mutable

import org.apache.spark.scheduler._

private[spark] case class BlockUIData(
    blockId: BlockId,
    location: String,
    storageLevel: StorageLevel,
    memSize: Long,
    diskSize: Long)


private[spark] case class ExecutorStreamBlockStatus(
    executorId: String,
    location: String,
    blocks: Seq[BlockUIData]) {

  def totalMemSize: Long = blocks.map(_.memSize).sum

  def totalDiskSize: Long = blocks.map(_.diskSize).sum

  def numStreamBlocks: Int = blocks.size

}

private[spark] class BlockStatusListener extends SparkListener {

  private val blockManagers =
    new mutable.HashMap[BlockManagerId, mutable.HashMap[BlockId, BlockUIData]]

  override def onBlockUpdated(blockUpdated: SparkListenerBlockUpdated): Unit = {
    val blockId = blockUpdated.blockUpdatedInfo.blockId
    if (!blockId.isInstanceOf[StreamBlockId]) {
      // Now we only monitor StreamBlocks
      return
    }
    val blockManagerId = blockUpdated.blockUpdatedInfo.blockManagerId
    val storageLevel = blockUpdated.blockUpdatedInfo.storageLevel
    val memSize = blockUpdated.blockUpdatedInfo.memSize
    val diskSize = blockUpdated.blockUpdatedInfo.diskSize

    synchronized {
      // Drop the update info if the block manager is not registered
      blockManagers.get(blockManagerId).foreach { blocksInBlockManager =>
        if (storageLevel.isValid) {
          blocksInBlockManager.put(blockId,
            BlockUIData(
              blockId,
              blockManagerId.hostPort,
              storageLevel,
              memSize,
              diskSize)
          )
        } else {
          // If isValid is not true, it means we should drop the block.
          blocksInBlockManager -= blockId
        }
      }
    }
  }

  override def onBlockManagerAdded(blockManagerAdded: SparkListenerBlockManagerAdded): Unit = {
    synchronized {
      blockManagers.put(blockManagerAdded.blockManagerId, mutable.HashMap())
    }
  }

  override def onBlockManagerRemoved(
      blockManagerRemoved: SparkListenerBlockManagerRemoved): Unit = synchronized {
    blockManagers -= blockManagerRemoved.blockManagerId
  }

  def allExecutorStreamBlockStatus: Seq[ExecutorStreamBlockStatus] = synchronized {
    blockManagers.map { case (blockManagerId, blocks) =>
      ExecutorStreamBlockStatus(
        blockManagerId.executorId, blockManagerId.hostPort, blocks.values.toSeq)
    }.toSeq
  }
}

Source File: BlockReplicationPolicy.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.storage

import scala.collection.mutable
import scala.util.Random

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.internal.Logging


  private def getSampleIds(n: Int, m: Int, r: Random): List[Int] = {
    val indices = (n - m + 1 to n).foldLeft(Set.empty[Int]) {case (set, i) =>
      val t = r.nextInt(i) + 1
      if (set.contains(t)) set + i else set + t
    }
    // we shuffle the result to ensure a random arrangement within the sample
    // to avoid any bias from set implementations
    r.shuffle(indices.map(_ - 1).toList)
  }
}

Source File: LogUrlsStandaloneSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.deploy

import java.net.URL

import scala.collection.mutable
import scala.io.Source

import org.apache.spark.{LocalSparkContext, SparkContext, SparkFunSuite}
import org.apache.spark.scheduler.{SparkListener, SparkListenerExecutorAdded}
import org.apache.spark.scheduler.cluster.ExecutorInfo
import org.apache.spark.util.SparkConfWithEnv

class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {

  
  private val WAIT_TIMEOUT_MILLIS = 10000

  test("verify that correct log urls get propagated from workers") {
    sc = new SparkContext("local-cluster[2,1,1024]", "test")

    val listener = new SaveExecutorInfo
    sc.addSparkListener(listener)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      // Browse to each URL to check that it's valid
      info.logUrlMap.foreach { case (logType, logUrl) =>
        val html = Source.fromURL(logUrl).mkString
        assert(html.contains(s"$logType log page"))
      }
    }
  }

  test("verify that log urls reflect SPARK_PUBLIC_DNS (SPARK-6175)") {
    val SPARK_PUBLIC_DNS = "public_dns"
    val conf = new SparkConfWithEnv(Map("SPARK_PUBLIC_DNS" -> SPARK_PUBLIC_DNS)).set(
      "spark.extraListeners", classOf[SaveExecutorInfo].getName)
    sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    val listeners = sc.listenerBus.findListenersByClass[SaveExecutorInfo]
    assert(listeners.size === 1)
    val listener = listeners(0)
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      info.logUrlMap.values.foreach { logUrl =>
        assert(new URL(logUrl).getHost === SPARK_PUBLIC_DNS)
      }
    }
  }
}

private[spark] class SaveExecutorInfo extends SparkListener {
  val addedExecutorInfos = mutable.Map[String, ExecutorInfo]()

  override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
    addedExecutorInfos(executor.executorId) = executor.executorInfo
  }
}

Source File: CustomRecoveryModeFactory.scala From drizzle-spark with Apache License 2.0

5 votes

// This file is placed in different package to make sure all of these components work well
// when they are outside of org.apache.spark.
package other.supplier

import java.nio.ByteBuffer

import scala.collection.mutable
import scala.reflect.ClassTag

import org.apache.spark.SparkConf
import org.apache.spark.deploy.master._
import org.apache.spark.serializer.Serializer

class CustomRecoveryModeFactory(
  conf: SparkConf,
  serializer: Serializer
) extends StandaloneRecoveryModeFactory(conf, serializer) {

  CustomRecoveryModeFactory.instantiationAttempts += 1

  
  override def read[T: ClassTag](prefix: String): Seq[T] = {
    CustomPersistenceEngine.readAttempts += 1
    val results = for ((name, bytes) <- data; if name.startsWith(prefix))
      yield serializer.newInstance().deserialize[T](ByteBuffer.wrap(bytes))
    results.toSeq
  }
}

object CustomPersistenceEngine {
  @volatile var persistAttempts = 0
  @volatile var unpersistAttempts = 0
  @volatile var readAttempts = 0

  @volatile var lastInstance: Option[CustomPersistenceEngine] = None
}

class CustomLeaderElectionAgent(val masterInstance: LeaderElectable) extends LeaderElectionAgent {
  masterInstance.electedLeader()
}

Source File: SparkListenerWithClusterSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import scala.collection.mutable

import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll}

import org.apache.spark.{LocalSparkContext, SparkContext, SparkFunSuite}
import org.apache.spark.scheduler.cluster.ExecutorInfo


  val WAIT_TIMEOUT_MILLIS = 10000

  before {
    sc = new SparkContext("local-cluster[2,1,1024]", "SparkListenerSuite")
  }

  test("SparkListener sends executor added message") {
    val listener = new SaveExecutorInfo
    sc.addSparkListener(listener)

    // This test will check if the number of executors received by "SparkListener" is same as the
    // number of all executors, so we need to wait until all executors are up
    sc.jobProgressListener.waitUntilExecutorsUp(2, 60000)

    val rdd1 = sc.parallelize(1 to 100, 4)
    val rdd2 = rdd1.map(_.toString)
    rdd2.setName("Target RDD")
    rdd2.count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    assert(listener.addedExecutorInfo.size == 2)
    assert(listener.addedExecutorInfo("0").totalCores == 1)
    assert(listener.addedExecutorInfo("1").totalCores == 1)
  }

  private class SaveExecutorInfo extends SparkListener {
    val addedExecutorInfo = mutable.Map[String, ExecutorInfo]()

    override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
      addedExecutorInfo(executor.executorId) = executor.executorInfo
    }
  }
}

Source File: TimeStampedHashMapSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.util

import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.util.Random

import org.apache.spark.SparkFunSuite

class TimeStampedHashMapSuite extends SparkFunSuite {

  // Test the testMap function - a Scala HashMap should obviously pass
  testMap(new mutable.HashMap[String, String]())

  // Test TimeStampedHashMap basic functionality
  testMap(new TimeStampedHashMap[String, String]())
  testMapThreadSafety(new TimeStampedHashMap[String, String]())

  test("TimeStampedHashMap - clearing by timestamp") {
    // clearing by insertion time
    val map = new TimeStampedHashMap[String, String](updateTimeStampOnGet = false)
    map("k1") = "v1"
    assert(map("k1") === "v1")
    Thread.sleep(10)
    val threshTime = System.currentTimeMillis
    assert(map.getTimestamp("k1").isDefined)
    assert(map.getTimestamp("k1").get < threshTime)
    map.clearOldValues(threshTime)
    assert(map.get("k1") === None)

    // clearing by modification time
    val map1 = new TimeStampedHashMap[String, String](updateTimeStampOnGet = true)
    map1("k1") = "v1"
    map1("k2") = "v2"
    assert(map1("k1") === "v1")
    Thread.sleep(10)
    val threshTime1 = System.currentTimeMillis
    Thread.sleep(10)
    assert(map1("k2") === "v2")     // access k2 to update its access time to > threshTime
    assert(map1.getTimestamp("k1").isDefined)
    assert(map1.getTimestamp("k1").get < threshTime1)
    assert(map1.getTimestamp("k2").isDefined)
    assert(map1.getTimestamp("k2").get >= threshTime1)
    map1.clearOldValues(threshTime1) // should only clear k1
    assert(map1.get("k1") === None)
    assert(map1.get("k2").isDefined)
  }

  
  def testMapThreadSafety(hashMapConstructor: => mutable.Map[String, String]) {
    def newMap() = hashMapConstructor
    val name = newMap().getClass.getSimpleName
    val testMap = newMap()
    @volatile var error = false

    def getRandomKey(m: mutable.Map[String, String]): Option[String] = {
      val keys = testMap.keysIterator.toSeq
      if (keys.nonEmpty) {
        Some(keys(Random.nextInt(keys.size)))
      } else {
        None
      }
    }

    val threads = (1 to 25).map(i => new Thread() {
      override def run() {
        try {
          for (j <- 1 to 1000) {
            Random.nextInt(3) match {
              case 0 =>
                testMap(Random.nextString(10)) = Random.nextDouble().toString // put
              case 1 =>
                getRandomKey(testMap).map(testMap.get) // get
              case 2 =>
                getRandomKey(testMap).map(testMap.remove) // remove
            }
          }
        } catch {
          case t: Throwable =>
            error = true
            throw t
        }
      }
    })

    test(name + " - threading safety test")  {
      threads.foreach(_.start())
      threads.foreach(_.join())
      assert(!error)
    }
  }
}

Source File: BlockReplicationPolicySuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.storage

import scala.collection.mutable

import org.scalatest.{BeforeAndAfter, Matchers}

import org.apache.spark.{LocalSparkContext, SparkFunSuite}

class BlockReplicationPolicySuite extends SparkFunSuite
  with Matchers
  with BeforeAndAfter
  with LocalSparkContext {

  // Implicitly convert strings to BlockIds for test clarity.
  private implicit def StringToBlockId(value: String): BlockId = new TestBlockId(value)

  
  test(s"block replication - random block replication policy") {
    val numBlockManagers = 10
    val storeSize = 1000
    val blockManagers = (1 to numBlockManagers).map { i =>
      BlockManagerId(s"store-$i", "localhost", 1000 + i, None)
    }
    val candidateBlockManager = BlockManagerId("test-store", "localhost", 1000, None)
    val replicationPolicy = new RandomBlockReplicationPolicy
    val blockId = "test-block"

    (1 to 10).foreach {numReplicas =>
      logDebug(s"Num replicas : $numReplicas")
      val randomPeers = replicationPolicy.prioritize(
        candidateBlockManager,
        blockManagers,
        mutable.HashSet.empty[BlockManagerId],
        blockId,
        numReplicas
      )
      logDebug(s"Random peers : ${randomPeers.mkString(", ")}")
      assert(randomPeers.toSet.size === numReplicas)

      // choosing n peers out of n
      val secondPass = replicationPolicy.prioritize(
        candidateBlockManager,
        randomPeers,
        mutable.HashSet.empty[BlockManagerId],
        blockId,
        numReplicas
      )
      logDebug(s"Random peers : ${secondPass.mkString(", ")}")
      assert(secondPass.toSet.size === numReplicas)
    }

  }

}

Source File: ParameterOperations.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.parameters

import com.intel.analytics.bigdl._
import com.intel.analytics.bigdl.dataset.{DistributedDataSet, MiniBatch}
import org.apache.spark.rdd.RDD
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.optim.DistriOptimizer.Cache
import com.intel.analytics.bigdl.optim.Metrics
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.utils.Table
import org.apache.spark.broadcast.Broadcast

import scala.collection.mutable


private[bigdl] class L2NormClippingProcessor(l2NormThreshold: Double)
  extends ParameterProcessor {
  override def collectGlobalData[T](models: RDD[Cache[T]],
    parameters: AllReduceParameter[T],
    metrics: Metrics,
    state: Table)(implicit ev: TensorNumeric[T]) : Unit = {
    val numFinishedModel = state.get[Int]("numFinishedModel").get
    val parallelism = state.get[Int]("parallelism").get
    val isGradientUpdated = state.get[Boolean]("isGradientUpdated").get

    val sumSquare = models.mapPartitions(modelIter => {
      if (!isGradientUpdated) {
        val getG = System.nanoTime()
        parameters.aggregateGradientPartition(numFinishedModel)
        metrics.add("aggregrateGradientParition average executor",
          System.nanoTime() - getG)
      }
      val sum = Util.getSumsquareInParallel(parameters.gradientPartition, parallelism)
      Iterator.single(sum)
    }).reduce(_ + _)

    state("isGradientUpdated") = true
    state("l2Norm") = math.sqrt(sumSquare)
  }

  override def processParameters[T](parameters: AllReduceParameter[T],
    modelCache: Cache[T],
    state: Table)(implicit ev: TensorNumeric[T]): Unit = {
    val l2Norm = state.get[Double]("l2Norm").get
    if (l2Norm > l2NormThreshold) {
      val scale = ev.fromType[Double](l2Norm / l2NormThreshold)
      parameters.gradientPartition.div(scale)
    }
  }

  override def processParameters[T](model: Module[T],
    state: Table)(implicit ev: TensorNumeric[T]): Unit = {
    val parallelism = state.get[Int]("parallelism").get
    val gradients = model.getParameters()._2
    val l2Norm = math.sqrt(Util.getSumsquareInParallel(gradients, parallelism))

    if (l2Norm > l2NormThreshold) {
      val scale = ev.fromType[Double](l2Norm / l2NormThreshold)
      gradients.div(scale)
    }
  }
}

Source File: IRToBlas.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.utils.intermediate

import com.intel.analytics.bigdl._
import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity, TensorModule}
import com.intel.analytics.bigdl.optim.DistriOptimizer._
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.utils.{Node, ReflectionUtils, T}

import scala.collection.mutable
import scala.reflect.ClassTag


abstract class ConvertBase[T, D] {
  
  def cloneNode(allNodes: Array[Node[T]], nodeMap: mutable.HashMap[Node[T], Node[D]]): Unit = {
    allNodes.foreach(node => {
      node.nextNodesAndEdges.foreach(nextNodeAndEdge => {
        if (nodeMap.contains(nextNodeAndEdge._1)) {
          nodeMap.get(node).get.add(nodeMap.get(nextNodeAndEdge._1).get, nextNodeAndEdge._2)
        }
      })
    })
    // sort previous node
    nodeMap.toArray.foreach(node => {
      // if node has more than one previous nodes, we have to consider nodes order
      if (node._1.prevNodesAndEdges.length > 1) {
        node._2.removePrevEdges()
        node._1.prevNodesAndEdges.foreach(prevNodeAndEdge => {
          if (nodeMap.contains(prevNodeAndEdge._1)) {
            node._2.from(nodeMap.get(prevNodeAndEdge._1).get, prevNodeAndEdge._2)
          }
        })
      }
    })
  }

  def convertLayerCheck(layer: T) : Boolean

  def convertLayer(layer : T) : D

  def convertingCheck(allNodes: Array[Node[T]]) : Boolean = {
    var convert = true
    allNodes.foreach(node => {
      if (!convertLayerCheck(node.element)) {
        logger.info(s"${node.element} convertion failed")
        convert = false
      }
    })
    convert
  }

  def convert(allNodes: Array[Node[T]]): mutable.HashMap[Node[T], Node[D]] = {
    val nodeMap = new mutable.HashMap[Node[T], Node[D]]()
    allNodes.foreach(node => {
      nodeMap.put(node, new Node(convertLayer(node.element)))
    })
    cloneNode(allNodes, nodeMap)
    nodeMap
  }
}

private[bigdl] class IRToBlas[T: ClassTag] extends ConvertBase[IRElement[T], Module[T]]{

  private def className(layer: IRElement[T]): String = {
    val name = layer.getOp().name
    s"com.intel.analytics.bigdl.nn.${name.substring(2)}"
  }

  override def convertLayerCheck(layer: IRElement[T]): Boolean = {
    ReflectionUtils.findClass(className(layer)) != null ||
      layer.getOp().isInstanceOf[IRGeneralModule[T]]
  }

  override def convertLayer(layer : IRElement[T]) : Module[T] = {
    if (layer.getOp().isInstanceOf[IRGeneralModule[T]]) {
      return layer.getOp().asInstanceOf[IRGeneralModule[T]].model
    }
    ReflectionUtils.reflectFromIR(layer, Class.forName(className(layer)))
  }
}

private[bigdl] object IRToBlas {
  def apply[T: ClassTag](implicit ev: TensorNumeric[T]): IRToBlas[T] = new IRToBlas
}

Source File: Types.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.utils.serializer

import com.google.protobuf.ByteString
import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity}
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric.{NumericBoolean, NumericChar, NumericDouble, NumericFloat, NumericInt, NumericLong, NumericString}
import com.intel.analytics.bigdl.utils.tf.TFTensorNumeric.NumericByteString
import com.intel.analytics.bigdl.serialization.Bigdl.BigDLModule

import scala.collection.mutable
import scala.reflect.ClassTag


trait StorageType
object ProtoStorageType extends StorageType
object BigDLStorage extends StorageType

case class SerializeContext[T: ClassTag](moduleData: ModuleData[T],
                                         storages: mutable.HashMap[Int, Any],
                                         storageType: StorageType,
                                         copyWeightAndBias : Boolean = true,
                                         groupType : String = null)
case class DeserializeContext(bigdlModule : BigDLModule,
                              storages: mutable.HashMap[Int, Any],
                              storageType: StorageType,
                              copyWeightAndBias : Boolean = true)

case class SerializeResult(bigDLModule: BigDLModule.Builder, storages: mutable.HashMap[Int, Any])

case class ModuleData[T: ClassTag](module : AbstractModule[Activity, Activity, T],
                                   pre : Seq[String], next : Seq[String])

object BigDLDataType extends Enumeration{
  type BigDLDataType = Value
  val FLOAT, DOUBLE, CHAR, BOOL, STRING, INT, SHORT, LONG, BYTESTRING, BYTE = Value
}

object SerConst {
  val MAGIC_NO = 3721
  val DIGEST_TYPE = "MD5"
  val GLOBAL_STORAGE = "global_storage"
  val MODULE_TAGES = "module_tags"
  val MODULE_NUMERICS = "module_numerics"
  val GROUP_TYPE = "group_type"
}

object ClassTagMapper {
  def apply(tpe : String): ClassTag[_] = {
    tpe match {
      case "Float" => scala.reflect.classTag[Float]
      case "Double" => scala.reflect.classTag[Double]
      case "Char" => scala.reflect.classTag[Char]
      case "Boolean" => scala.reflect.classTag[Boolean]
      case "String" => scala.reflect.classTag[String]
      case "Int" => scala.reflect.classTag[Int]
      case "Long" => scala.reflect.classTag[Long]
      case "com.google.protobuf.ByteString" => scala.reflect.classTag[ByteString]
    }
  }

  def apply(classTag: ClassTag[_]): String = classTag.toString
}
object TensorNumericMapper {
  def apply(tpe : String): TensorNumeric[_] = {
    tpe match {
      case "Float" => NumericFloat
      case "Double" => NumericDouble
      case "Char" => NumericChar
      case "Boolean" => NumericBoolean
      case "String" => NumericString
      case "Int" => NumericInt
      case "Long" => NumericLong
      case "ByteString" => NumericByteString
    }
  }

  def apply(tensorNumeric: TensorNumeric[_]): String = {
    tensorNumeric match {
      case NumericFloat => "Float"
      case NumericDouble => "Double"
      case NumericChar => "Char"
      case NumericBoolean => "Boolean"
      case NumericString => "String"
      case NumericInt => "Int"
      case NumericLong => "Long"
      case NumericByteString => "ByteString"
    }
  }
}

Source File: TrainSummary.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.visualization

import com.intel.analytics.bigdl.optim.Trigger
import com.intel.analytics.bigdl.visualization.tensorboard.{FileReader, FileWriter}

import scala.collection.mutable


  def getSummaryTrigger(tag: String): Option[Trigger] = {
    if (triggers.contains(tag)) {
      Some(triggers(tag))
    } else {
      None
    }
  }

  private[bigdl] def getScalarTriggers(): Iterator[(String, Trigger)] = {
    triggers.filter(!_._1.equals("Parameters")).toIterator
  }
}

object TrainSummary{
  def apply(logDir: String,
            appName: String): TrainSummary = {
    new TrainSummary(logDir, appName)
  }
}

Source File: FrameManager.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.nn

import java.util.concurrent.atomic.AtomicInteger

import com.intel.analytics.bigdl.nn.Graph.ModuleNode
import com.intel.analytics.bigdl.nn.tf.{Exit, MergeOps, NextIteration}

import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer


  class Frame[T] private[FrameManager] (
    val name: String,
    val parent: Option[Frame[T]]
  ) {
    // Sync all next iteration nodes execution
    private[bigdl] var barrier: AtomicInteger = new AtomicInteger(0)
    // User can use NextIteration to sync execution. This is a list of those type of nodes
    private[bigdl] val waitingNodes: ArrayBuffer[ModuleNode[T]] = new ArrayBuffer[ModuleNode[T]]()

    // Nodes should be refreshed in a iteration of the frame
    private[bigdl] val nodes: ArrayBuffer[ModuleNode[T]] = new ArrayBuffer[ModuleNode[T]]()
  }
}

Source File: SerializerSpecHelper.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.utils.serializer

import java.io.{File}
import java.lang.reflect.Modifier

import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity}
import com.intel.analytics.bigdl.nn.ops.{Exp => ExpOps, Pow => PowOps, Select => SelectOps, Sum => SumOps, Tile => TileOps}
import com.intel.analytics.bigdl.nn.tf.{DecodeGif => DecodeGifOps, DecodeJpeg => DecodeJpegOps, DecodePng => DecodePngOps, DecodeRaw => DecodeRawOps}
import com.intel.analytics.bigdl.utils.RandomGenerator.RNG
import com.intel.analytics.bigdl.utils.tf.loaders.{Pack => _}
import com.intel.analytics.bigdl.utils.{Shape => KShape}
import org.reflections.Reflections
import org.reflections.scanners.SubTypesScanner
import org.reflections.util.{ClasspathHelper, ConfigurationBuilder, FilterBuilder}
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}

import scala.collection.JavaConverters._
import scala.collection.mutable


abstract class SerializerSpecHelper extends FlatSpec with Matchers with BeforeAndAfterAll{

  val postFix = "bigdl"
  val excludedClass = new mutable.HashSet[String]()
  val excludedPackage = new mutable.HashSet[String]()

  private val expected = new mutable.HashSet[String]()
  val tested = new mutable.HashSet[String]()

  private var executedCount = 0

  protected def getPackage(): String = ""

  protected def addExcludedClass(): Unit = {}

  protected def addExcludedPackage(): Unit = {}

  protected def getExpected(): mutable.Set[String] = expected

  override protected def beforeAll() = {
    addExcludedClass
    addExcludedPackage
    val filterBuilder = new FilterBuilder()
    excludedPackage.foreach(filterBuilder.excludePackage(_))
    val reflections = new Reflections(new ConfigurationBuilder()
      .filterInputsBy(filterBuilder)
      .setUrls(ClasspathHelper.forPackage(getPackage()))
      .setScanners(new SubTypesScanner()))
    val subTypes = reflections.getSubTypesOf(classOf[AbstractModule[_, _, _]])
      .asScala.filter(sub => !Modifier.isAbstract(sub.getModifiers)).
      filter(sub => !excludedClass.contains(sub.getName))
    subTypes.foreach(sub => expected.add(sub.getName))
  }

  protected def runSerializationTest(module : AbstractModule[_, _, Float],
                                   input : Activity, cls: Class[_] = null) : Unit = {
    runSerializationTestWithMultiClass(module, input,
      if (cls == null) Array(module.getClass) else Array(cls))
  }

  protected def runSerializationTestWithMultiClass(module : AbstractModule[_, _, Float],
      input : Activity, classes: Array[Class[_]]) : Unit = {
    val name = module.getName
    val serFile = File.createTempFile(name, postFix)
    val originForward = module.evaluate().forward(input)

    ModulePersister.saveToFile[Float](serFile.getAbsolutePath, null, module.evaluate(), true)
    RNG.setSeed(1000)
    val loadedModule = ModuleLoader.loadFromFile[Float](serFile.getAbsolutePath)

    val afterLoadForward = loadedModule.forward(input)

    if (serFile.exists) {
      serFile.delete
    }

    afterLoadForward should be (originForward)
    classes.foreach(cls => {
      if (getExpected.contains(cls.getName)) {
        tested.add(cls.getName)
      }
    })
  }


  override protected def afterAll() = {
    println(s"total ${getExpected.size}, remaining ${getExpected.size - tested.size}")
    tested.filter(!getExpected.contains(_)).foreach(t => {
      println(s"$t do not need to be tested")
    })
    getExpected.foreach(exp => {
      require(tested.contains(exp), s" $exp not included in the test!")
    })
  }
}

Source File: ReverseSpec.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.nn

import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.utils.RandomGenerator
import com.intel.analytics.bigdl.utils.serializer.ModuleSerializationTest
import org.scalatest.{FlatSpec, Matchers}

import scala.collection.mutable
import scala.util.Random

@com.intel.analytics.bigdl.tags.Serial
class ReverseSpec extends FlatSpec with Matchers {

  "A Reverse()" should "generate correct output and grad for Tensor input dim1 inplace" in {
    def randomn(): Double = RandomGenerator.RNG.uniform(-10, 10)
    val layer = new Reverse[Double](1)

    val input = Tensor[Double](4, 3)
    input.apply1(x => randomn())
    val expectedOutput = Tensor[Double]().resizeAs(input)
    expectedOutput.select(1, 1).copy(input(4))
    expectedOutput.select(1, 2).copy(input(3))
    expectedOutput.select(1, 3).copy(input(2))
    expectedOutput.select(1, 4).copy(input(1))

    val gradOutput = Tensor[Double](4, 3)
    gradOutput.apply1(x => randomn())
    val expectedGradInput = Tensor[Double]().resizeAs(gradOutput)
    expectedGradInput(1).copy(gradOutput(4))
    expectedGradInput(2).copy(gradOutput(3))
    expectedGradInput(3).copy(gradOutput(2))
    expectedGradInput(4).copy(gradOutput(1))

    val output = layer.forward(input)
    val gradInput = layer.backward(input, gradOutput)

    output should be (expectedOutput)
    gradInput should be (expectedGradInput)

  }

  "A Reverse()" should "generate correct output and grad for Tensor input dim1" in {
    def randomn(): Double = RandomGenerator.RNG.uniform(-10, 10)
    val layer = new Reverse[Double](1)

    val input = Tensor[Double](3, 3, 3)
    input.apply1(x => randomn())
    val expectedOutput = Tensor[Double]().resizeAs(input)
    expectedOutput(1).copy(input(3))
    expectedOutput(2).copy(input(2))
    expectedOutput(3).copy(input(1))

    val gradOutput = Tensor[Double](3, 3, 3)
    gradOutput.apply1(x => randomn())
    val expectedGradInput = Tensor[Double]().resizeAs(gradOutput)
    expectedGradInput(1).copy(gradOutput(3))
    expectedGradInput(2).copy(gradOutput(2))
    expectedGradInput(3).copy(gradOutput(1))

    val output = layer.forward(input)
    val gradInput = layer.backward(input, gradOutput)

    output should be (expectedOutput)
    gradInput should be (expectedGradInput)

  }
  "A Reverse()" should "generate correct output and grad for Tensor input dim2" in {
    def randomn(): Double = RandomGenerator.RNG.uniform(-10, 10)
    val layer = new Reverse[Double](2)

    val input = Tensor[Double](3, 3, 3)
    input.apply1(x => randomn())
    val expectedOutput = Tensor[Double]().resizeAs(input)
    expectedOutput.select(2, 1).copy(input.select(2, 3))
    expectedOutput.select(2, 2).copy(input.select(2, 2))
    expectedOutput.select(2, 3).copy(input.select(2, 1))

    val gradOutput = Tensor[Double](3, 3, 3)
    gradOutput.apply1(x => randomn())
    val expectedGradInput = Tensor[Double]().resizeAs(gradOutput)
    expectedGradInput.select(2, 1).copy(gradOutput.select(2, 3))
    expectedGradInput.select(2, 2).copy(gradOutput.select(2, 2))
    expectedGradInput.select(2, 3).copy(gradOutput.select(2, 1))

    val output = layer.forward(input)
    val gradInput = layer.backward(input, gradOutput)

    output should be (expectedOutput)
    gradInput should be (expectedGradInput)

  }
}

class ReverseSerialTest extends ModuleSerializationTest {
  override def test(): Unit = {
    val reverse = Reverse[Float]().setName("reverse")
    val input = Tensor[Float](10).apply1(_ => Random.nextFloat())
    runSerializationTest(reverse, input)
  }
}

Source File: VMContext.scala From scala-json with Apache License 2.0

5 votes

package json.shadow

import json._
import json.internal.DefaultVMContext.PrimitiveArray
import json.internal.PrimitiveJArray.Builder
import json.internal.{PrimitiveJArray, SimpleStringBuilder, BaseVMContext, JValueObjectDeserializer}

import scala.collection.immutable.StringOps
import scala.collection.mutable
import scala.reflect.ClassTag

object VMContext extends BaseVMContext {
  def newVMStringBuilder: SimpleStringBuilder = new SimpleStringBuilder {
    val builder = new StringBuilder(128)

    def append(str: String): internal.SimpleStringBuilder = {
      builder append str
      this
    }

    def append(char: Char): SimpleStringBuilder = {
      builder.append(char)
      this
    }

    def ensureCapacity(cap: Int): Unit = builder.ensureCapacity(cap)

    def result(): String = builder.result()
  }

  val localMapper = new ThreadLocal[JValueObjectDeserializer] {
    override protected def initialValue: JValueObjectDeserializer =
      new JValueObjectDeserializer
  }

  //TODO: do these need to be specialized?
  def createPrimitiveArray[
        case '\b' => sb.append("\\b")
        case '\t' => sb.append("\\t")
        case '\n' => sb.append("\\n")
        case '\f' => sb.append("\\f")
        case '\r' => sb.append("\\r")
        case c if c < ' ' =>
          val t = "000" + Integer.toHexString(c)
          sb.append("\\u" + t.substring(t.length() - 4))
        case c => sb.append(c)
      }
    }
    sb.append('"')

    sb
  }

  def newJValueFromArray(arr: Array[_]): JArray = {
    import json.accessors._

    arr match {
      case x: Array[Byte] => new PrimitiveJArray[Byte](wrapPrimitiveArray(x))
      case x: Array[Short] => new PrimitiveJArray[Short](wrapPrimitiveArray(x))
      case x: Array[Int] => new PrimitiveJArray[Int](wrapPrimitiveArray(x))
      case x: Array[Long] => new PrimitiveJArray[Long](wrapPrimitiveArray(x))
      case x: Array[Double] => new PrimitiveJArray[Double](wrapPrimitiveArray(x))
      case x: Array[Float] => new PrimitiveJArray[Float](wrapPrimitiveArray(x))
      case x: Array[Boolean] => new PrimitiveJArray[Boolean](wrapPrimitiveArray(x))
    }
  }

  def extractPrimitiveJArray[T: ClassTag: PrimitiveJArray.Builder](x: Iterable[T]): Option[JArray] = {
    val builder = implicitly[PrimitiveJArray.Builder[T]]

    x match {
      case x: mutable.WrappedArray[T] => Some(newJValueFromArray(x.array))
      case x: IndexedSeq[T] => Some(builder.createFrom(x))
      case _ => None
    }
  }
}

Source File: VMContext.scala From scala-json with Apache License 2.0

5 votes

package json.shadow

import json._
import json.internal.DefaultVMContext.PrimitiveArray
import json.internal.PrimitiveJArray.Builder
import json.internal.{JanssonDeserializer, PrimitiveJArray, SimpleStringBuilder, BaseVMContext}

import scala.collection.immutable.StringOps
import scala.collection.mutable
import scala.reflect.ClassTag

object VMContext extends BaseVMContext {
  def newVMStringBuilder: SimpleStringBuilder = new SimpleStringBuilder {
    val builder = new StringBuilder(128)

    def append(str: String): internal.SimpleStringBuilder = {
      builder append str
      this
    }

    def append(char: Char): SimpleStringBuilder = {
      builder.append(char)
      this
    }

    def ensureCapacity(cap: Int): Unit = builder.ensureCapacity(cap)

    def result(): String = builder.result()
  }

  //TODO: do these need to be specialized?
  def createPrimitiveArray[T: ClassTag](from: Array[T]): PrimitiveArray[T] = from

  def fromString(str: String): JValue = {
    JanssonDeserializer.parseString(str)
  }

  def fromAny(value: Any): JValue = JValue.fromAnyInternal(value)

  final def quoteJSONString(string: String, sb: SimpleStringBuilder): SimpleStringBuilder = {
    require(string != null)

    sb.ensureCapacity(string.length)

    sb.append(JanssonDeserializer.serializeString(string))

    sb
  }

  def newJValueFromArray(arr: Array[_]): JArray = {
    import json.accessors._

    arr match {
      case x: Array[Byte] => new PrimitiveJArray[Byte](wrapPrimitiveArray(x))
      case x: Array[Short] => new PrimitiveJArray[Short](wrapPrimitiveArray(x))
      case x: Array[Int] => new PrimitiveJArray[Int](wrapPrimitiveArray(x))
      case x: Array[Long] => new PrimitiveJArray[Long](wrapPrimitiveArray(x))
      case x: Array[Double] => new PrimitiveJArray[Double](wrapPrimitiveArray(x))
      case x: Array[Float] => new PrimitiveJArray[Float](wrapPrimitiveArray(x))
      case x: Array[Boolean] => new PrimitiveJArray[Boolean](wrapPrimitiveArray(x))
    }
  }

  def extractPrimitiveJArray[T: ClassTag: PrimitiveJArray.Builder](x: Iterable[T]): Option[JArray] = {
    val builder = implicitly[PrimitiveJArray.Builder[T]]

    x match {
      case x: mutable.WrappedArray[T] => Some(newJValueFromArray(x.array))
      case x: IndexedSeq[T] => Some(builder.createFrom(x))
      case _ => None
    }
  }
}

Source File: frontier.scala From aima-scala with MIT License

5 votes

package aima.core.search.uninformed

import aima.core.search.{Frontier, SearchNode}

import scala.collection.immutable.{Queue, Iterable}
import scala.collection.mutable
import scala.util.Try

class FIFOQueueFrontier[State, Action, Node <: SearchNode[State, Action]](queue: Queue[Node], stateSet: Set[State])
    extends Frontier[State, Action, Node] { self =>
  def this(n: Node) = this(Queue(n), Set(n.state))

  def removeLeaf: Option[(Node, Frontier[State, Action, Node])] = queue.dequeueOption.map {
    case (leaf, updatedQueue) => (leaf, new FIFOQueueFrontier[State, Action, Node](updatedQueue, stateSet - leaf.state))
  }
  def addAll(iterable: Iterable[Node]): Frontier[State, Action, Node] =
    new FIFOQueueFrontier(queue.enqueueAll(iterable), stateSet ++ iterable.map(_.state))
  def contains(state: State): Boolean = stateSet.contains(state)

  def replaceByState(node: Node): Frontier[State, Action, Node] = {
    if (contains(node.state)) {
      new FIFOQueueFrontier(queue.filterNot(_.state == node.state).enqueue(node), stateSet)
    } else {
      self
    }
  }
  def getNode(state: State): Option[Node] = {
    if (contains(state)) {
      queue.find(_.state == state)
    } else {
      None
    }
  }

  def add(node: Node): Frontier[State, Action, Node] =
    new FIFOQueueFrontier[State, Action, Node](queue.enqueue(node), stateSet + node.state)
}

class PriorityQueueHashSetFrontier[State, Action, Node <: SearchNode[State, Action]](
    queue: mutable.PriorityQueue[Node],
    stateMap: mutable.Map[State, Node]
) extends Frontier[State, Action, Node] { self =>

  def this(n: Node, costNodeOrdering: Ordering[Node]) =
    this(mutable.PriorityQueue(n)(costNodeOrdering), mutable.Map(n.state -> n))

  def removeLeaf: Option[(Node, Frontier[State, Action, Node])] =
    Try {
      val leaf = queue.dequeue
      stateMap -= leaf.state
      (leaf, self)
    }.toOption

  def addAll(iterable: Iterable[Node]): Frontier[State, Action, Node] = {
    iterable.foreach { costNode =>
      queue += costNode
      stateMap += (costNode.state -> costNode)
    }
    self
  }

  def contains(state: State): Boolean = stateMap.contains(state)

  def replaceByState(node: Node): Frontier[State, Action, Node] = {
    if (contains(node.state)) {
      val updatedElems = node :: queue.toList.filterNot(_.state == node.state)
      queue.clear()
      queue.enqueue(updatedElems: _*)
      stateMap += (node.state -> node)
    }
    self
  }

  def getNode(state: State): Option[Node] = {
    if (contains(state)) {
      queue.find(_.state == state)
    } else {
      None
    }
  }

  def add(node: Node): Frontier[State, Action, Node] = {
    val costNode = node
    queue.enqueue(costNode)
    stateMap += (node.state -> costNode)
    self
  }
}

Source File: LabeledGraph.scala From aima-scala with MIT License

5 votes

package aima.core.environment.map2d


final class LabeledGraph[Vertex, Edge] {
  import scala.collection.mutable
  val globalEdgeLookup = new mutable.LinkedHashMap[Vertex, mutable.LinkedHashMap[Vertex, Edge]]() // TODO: get rid of mutability; ListMap should work
  val vertexLabelsList = new mutable.ArrayBuffer[Vertex]()                                        // TODO: get rid of mutability

  def addVertex(v: Vertex): Unit = {
    checkForNewVertex(v)
    ()
  }

  def set(from: Vertex, to: Vertex, edge: Edge): Unit = {
    val localEdgeLookup = checkForNewVertex(from)
    localEdgeLookup.put(to, edge)
    checkForNewVertex(to)
    ()
  }

  def remove(from: Vertex, to: Vertex): Unit = {
    val localEdgeLookup = globalEdgeLookup.get(from)
    localEdgeLookup.foreach(l => l.remove(to))
  }

  def get(from: Vertex, to: Vertex): Option[Edge] = {
    val localEdgeLookup = globalEdgeLookup.get(from)
    localEdgeLookup.flatMap(_.get(to))
  }

  def successors(v: Vertex): List[Vertex] = {
    val localEdgeLookup = globalEdgeLookup.get(v)
    localEdgeLookup.toList.flatMap(_.keySet.toList)
  }

  def vertexLabels =
    vertexLabelsList.toList

  def isVertexLabel(v: Vertex): Boolean =
    globalEdgeLookup.get(v).isDefined

  def clear(): Unit = {
    vertexLabelsList.clear()
    globalEdgeLookup.clear()
  }

  private def checkForNewVertex(v: Vertex): mutable.LinkedHashMap[Vertex, Edge] = {
    val maybeExisting = globalEdgeLookup.get(v)
    maybeExisting match {
      case None =>
        val m = new mutable.LinkedHashMap[Vertex, Edge]
        globalEdgeLookup.put(v, m)
        vertexLabelsList.append(v)
        m
      case Some(existing) =>
        existing
    }
  }
}

Source File: ExpiringMap.scala From mantis with Apache License 2.0

5 votes

package io.iohk.ethereum.jsonrpc

import java.time.temporal.ChronoUnit
import java.time.Duration

import io.iohk.ethereum.jsonrpc.ExpiringMap.ValueWithDuration

import scala.collection.mutable
import scala.util.Try

object ExpiringMap {

 case class ValueWithDuration[V](value: V, expiration: Duration)

  def empty[K, V](defaultElementRetentionTime: Duration): ExpiringMap[K, V] =
    new ExpiringMap(mutable.Map.empty, defaultElementRetentionTime)
}

//TODO: Make class thread safe
class ExpiringMap[K, V] private (val underlying: mutable.Map[K, ValueWithDuration[V]],
                                 val defaultRetentionTime: Duration) {
  private val maxHoldDuration = ChronoUnit.CENTURIES.getDuration

  def addFor(k: K, v: V, duration: Duration): ExpiringMap[K, V] = {
    underlying += k -> ValueWithDuration(v, Try(currentPlus(duration)).getOrElse(currentPlus(maxHoldDuration)))
    this
  }

  def add(k: K, v: V, duration: Duration): ExpiringMap[K, V] = {
    addFor(k, v, duration)
  }

  def addForever(k: K, v: V): ExpiringMap[K, V] =
    addFor(k, v, maxHoldDuration)

  def add(k: K, v: V): ExpiringMap[K, V] =
    addFor(k, v, defaultRetentionTime)

  def remove(k: K): ExpiringMap[K, V] = {
    underlying -= k
    this
  }

  def get(k: K): Option[V] = {
    underlying.get(k).flatMap(value =>
      if (isNotExpired(value))
        Some(value.value)
      else {
        remove(k)
        None
      }
    )
  }

  private def isNotExpired(value: ValueWithDuration[V]) =
    currentNanoDuration().minus(value.expiration).isNegative

  private def currentPlus(duration: Duration) =
    currentNanoDuration().plus(duration)

  private def currentNanoDuration() =
    Duration.ofNanos(System.nanoTime())

}

Source File: Entities.scala From ArchiveSpark with MIT License

5 votes

package org.archive.archivespark.functions

import java.util.Properties

import edu.stanford.nlp.ling.CoreAnnotations.{NamedEntityTagAnnotation, SentencesAnnotation, TextAnnotation, TokensAnnotation}
import edu.stanford.nlp.ling.CoreLabel
import edu.stanford.nlp.pipeline.{Annotation, StanfordCoreNLP}
import edu.stanford.nlp.util.CoreMap
import org.archive.archivespark.model._
import org.archive.archivespark.model.dataloads.ByteLoad
import org.archive.archivespark.model.pointers.DependentFieldPointer

import scala.collection.JavaConverters._
import scala.collection.mutable

object EntitiesNamespace {
  def get: DependentFieldPointer[ByteLoad.Root, String] = HtmlText.mapIdentity("entities").get[String]("entities")
}


class Entities private (properties: Properties, tagFieldMapping: Seq[(String, String)]) extends BoundEnrichFunc[ByteLoad.Root, String, String](EntitiesNamespace.get) {
  override def defaultField: String = ""

  override def fields: Seq[String] = tagFieldMapping.map { case (tag, field) => field }

  @transient lazy val pipeline: StanfordCoreNLP = new StanfordCoreNLP(properties)

  override def derive(source: TypedEnrichable[String], derivatives: Derivatives): Unit = {
    val doc = new Annotation(source.get)
    pipeline.annotate(doc)
    val sentences: mutable.Seq[CoreMap] = doc.get(classOf[SentencesAnnotation]).asScala
    val mentions = sentences.flatMap { sentence =>
      val tokens: mutable.Buffer[CoreLabel] = sentence.get(classOf[TokensAnnotation]).asScala
      tokens.map { token =>
        val word: String = token.get(classOf[TextAnnotation])
        val ne: String = token.get(classOf[NamedEntityTagAnnotation])
        (ne, word)
      }
    }.groupBy{case (ne, word) => ne.toLowerCase}.mapValues(items => items.map{case (ne, word) => word}.toSet)
    for ((tag, _) <- tagFieldMapping) derivatives.setNext(MultiValueEnrichable(mentions.getOrElse(tag.toLowerCase, Set()).toSeq))
  }
}

object EntitiesConstants {
  val DefaultTagFieldMapping: Seq[(String, String)] = Seq(
    "PERSON" -> "persons",
    "ORGANIZATION" -> "organizations",
    "LOCATION" -> "locations",
    "DATE" -> "dates"
  )

  val DefaultProps: Properties = new Properties() {{
    setProperty("annotators", "tokenize, ssplit, pos, lemma, ner")
    setProperty("tokenize.class", "PTBTokenizer")
    setProperty("tokenize.language", "en")
    setProperty("ner.useSUTime", "false")
    setProperty("ner.applyNumericClassifiers", "false")
  }}
}

object Entities extends Entities(EntitiesConstants.DefaultProps, EntitiesConstants.DefaultTagFieldMapping) {
  def apply() = new Entities(EntitiesConstants.DefaultProps, EntitiesConstants.DefaultTagFieldMapping)
  def apply(tagFieldMapping: (String, String)*) = new Entities(EntitiesConstants.DefaultProps, tagFieldMapping)
  def apply(props: Properties) = new Entities(props, EntitiesConstants.DefaultTagFieldMapping)
  def apply(props: Properties, tagFieldMapping: (String, String)*) = new Entities(props, tagFieldMapping)
  def apply(language: String, tagFieldMapping: Seq[(String, String)] = EntitiesConstants.DefaultTagFieldMapping): Entities = {
    val props = EntitiesConstants.DefaultProps
    props.setProperty("tokenize.language", language)
    new Entities(props, tagFieldMapping)
  }
}

Source File: PermutationStrategy.scala From scalismo-faces with Apache License 2.0

5 votes

package scalismo.faces.numerics

import breeze.linalg.CSCMatrix

import scala.collection.mutable


object CSCMatrixGraph {
  def degree(n: Int, A: CSCMatrix[Double]): Int = {
    require(n >= 0 && n < A.cols, "invalid node number")
    val cStart = A.colPtrs(n)
    val cEnd = A.colPtrs(n + 1)
    cEnd - cStart - 1 // remove diagonal entry
  }

  def neighbours(n: Int, A: CSCMatrix[Double]): Array[Int] = {
    require(n >= 0 && n < A.cols, "invalid node number")
    val cStart = A.colPtrs(n)
    val cEnd = A.colPtrs(n + 1)
    A.rowIndices.slice(cStart, cEnd)
  }
}

Source File: EntityAwarePredictor.scala From low-rank-logic with MIT License

5 votes

package uclmr

import uclmr.EntityAwareEvaluation.Entity
import uclmr.FactorizationUtil.{PredictedFact, Row}
import ml.wolfe.util.Util

import scala.collection.mutable


class EntityAwarePredictor(val embeddings: ProbLogicEmbeddings, val entities: Map[Any, Entity]) {

  val distanceCache = new mutable.HashMap[(String, String), Double]()

  def closest(candidates: Iterable[String], target: String) = {
    if (candidates.isEmpty) ("NA", Double.PositiveInfinity)
    else
      candidates.map(pred => {
        val dist = distanceCache.getOrElseUpdate(pred -> target,
          embeddings.embeddings(target).distance(embeddings.embeddings(pred)))
        pred -> dist
      }).minBy(_._2)
  }

  def farthest(candidates: Iterable[String], target: String) = {
    if (candidates.isEmpty) ("NA", Double.PositiveInfinity)
    else
      candidates.map(pred => {
        val dist = distanceCache.getOrElseUpdate(pred -> target,
          embeddings.embeddings(target).distance(embeddings.embeddings(pred)))
        pred -> dist
      }).maxBy(_._2)
  }

  def predictAll(row: Row, targetRelations:Seq[String], useFilter:Boolean = true) = {
    targetRelations.map(predict(row,_,useFilter))
  }

  import EntityAwareEvaluation._

  def predict(row: Row, target: String, useFilter:Boolean = true) = {
    val arg1 = entities(row.arg1)
    val arg2 = entities(row.arg2)

    val targetEmbedding = embeddings.embeddings(target)

    def filterObs(obs:Iterable[String]) = if (useFilter) obs.filter(targetEmbedding.observationFilter) else obs
    def asProb(pair:(String,Double)) = pair.copy(_2 = Util.sig(targetEmbedding.bias - pair._2))

    //find best unary predicate for arg1
    val arg1Result = closest(filterObs(arg1.asArg1), target)
    //find best unary predicate for arg2
    val arg2Result = closest(filterObs(arg2.asArg2), target)
    //find best binary predicate as observation
    val relResult = closest(filterObs(row.relations.view.map(_._1)), target)

    val (predictor, score) = Iterator(arg1Result, arg2Result, relResult).maxBy(_._2)

    val prob = Util.sig(targetEmbedding.bias - score)
    EntityAwarePrediction(
      PredictedFact(row, target, prob), predictor,
      asProb(arg1Result), asProb(arg2Result), asProb(relResult)
    )
  }

}

case class EntityAwarePrediction(fact: PredictedFact, predictor: String,
                                 arg1Result: (String, Double), arg2Result: (String, Double), relResult: (String, Double)) {
  override def toString = {
    s"""
      |$fact
      |  Predictor: $predictor
      |  Arg1:      $arg1Result
      |  Arg2:      $arg2Result
      |  Rel:       $relResult
    """.stripMargin
  }
}

Source File: TextSegmentor.scala From topwords with GNU General Public License v3.0

5 votes

package io.github.qf6101.topwords

import scala.collection.mutable


  protected def segment(splitPositions: List[Int]): String = {
    // return text itself if it has only one character
    if (T.length <= 1 || splitPositions.length == 0) return T
    // copy the characters one by one plus the splitters in the boundary positions
    val splitPosStack = mutable.Stack().pushAll(splitPositions.reverse)
    var currSplitPos = splitPosStack.pop() - 1
    val splitResult = new StringBuilder()
    T.zipWithIndex.foreach { case (c, idx) =>
      splitResult += c
      if (idx == currSplitPos) {
        splitResult += splitter
        currSplitPos = if (splitPosStack.nonEmpty) splitPosStack.pop() - 1 else -1
      }
    }
    splitResult.toString()
  }
}

Source File: PasswordInfoDAO.scala From crm-seed with Apache License 2.0

5 votes

package com.dataengi.crm.identities.daos

import com.google.inject.Singleton
import com.mohiva.play.silhouette.api.LoginInfo
import com.mohiva.play.silhouette.api.util.PasswordInfo
import com.mohiva.play.silhouette.persistence.daos.DelegableAuthInfoDAO

import scala.collection.mutable
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future

trait PasswordInfoDAO extends DelegableAuthInfoDAO[PasswordInfo]

@Singleton
class InMemoryPasswordInfoDAOImpl extends PasswordInfoDAO {

  val passwords = mutable.HashMap.empty[LoginInfo, PasswordInfo]

  
  def remove(loginInfo: LoginInfo): Future[Unit] = {
    Future.successful(
      passwords.remove(loginInfo)
    )
  }
}

Source File: B3FormatPropagation.scala From opencensus-scala with Apache License 2.0

5 votes

package io.opencensus.scala.http.propagation

import io.opencensus.trace.propagation.TextFormat.{Getter, Setter}
import io.opencensus.trace.{Span, SpanContext}

import scala.collection.{immutable, mutable}
import scala.util.Try

trait B3FormatPropagation[Header, Request]
    extends Propagation[Header, Request] {

  def headerValue(req: Request, key: String): Option[String]
  def createHeader(key: String, value: String): Header

  
  override def extractContext(request: Request): Try[SpanContext] =
    Try(b3Format.extract(request, HeaderGetter))

  private type HttpHeaderBuilder = mutable.ArrayBuffer[Header]

  private object HeaderSetter extends Setter[HttpHeaderBuilder] {
    override def put(
        carrier: HttpHeaderBuilder,
        key: String,
        value: String
    ): Unit = {
      carrier += createHeader(key, value)
    }
  }

  private object HeaderGetter extends Getter[Request] {
    override def get(carrier: Request, key: String): String =
      headerValue(carrier, key).orNull
  }
}

Source File: GrokHelper.scala From incubator-s2graph with Apache License 2.0

5 votes

package org.apache.s2graph.s2jobs.utils

import io.thekraken.grok.api.Grok
import org.apache.s2graph.s2jobs.Logger
import org.apache.spark.SparkFiles
import org.apache.spark.sql.Row

import scala.collection.mutable

object GrokHelper extends Logger {
  private val grokPool:mutable.Map[String, Grok] = mutable.Map.empty

  def getGrok(name:String, patternFiles:Seq[String], patterns:Map[String, String], compilePattern:String):Grok = {
    if (grokPool.get(name).isEmpty) {
      println(s"Grok '$name' initialized..")
      val grok = new Grok()
      patternFiles.foreach { patternFile =>
        val filePath = SparkFiles.get(patternFile)
        println(s"[Grok][$name] add pattern file : $patternFile  ($filePath)")
        grok.addPatternFromFile(filePath)
      }
      patterns.foreach { case (name, pattern) =>
        println(s"[Grok][$name] add pattern : $name ($pattern)")
        grok.addPattern(name, pattern)
      }

      grok.compile(compilePattern)
      println(s"[Grok][$name] patterns: ${grok.getPatterns}")
      grokPool.put(name, grok)
    }

    grokPool(name)
  }

  def grokMatch(text:String)(implicit grok:Grok):Option[Map[String, String]] = {
    import scala.collection.JavaConverters._

    val m = grok.`match`(text)
    m.captures()
    val rstMap = m.toMap.asScala.toMap
      .filter(_._2 != null)
      .map{ case (k, v) =>  k -> v.toString}
    if (rstMap.isEmpty) None else Some(rstMap)
  }

  def grokMatchWithSchema(text:String)(implicit grok:Grok, keys:Array[String]):Option[Row] = {
    import scala.collection.JavaConverters._

    val m = grok.`match`(text)
    m.captures()

    val rstMap = m.toMap.asScala.toMap
    if (rstMap.isEmpty) None
    else {
      val l = keys.map { key => rstMap.getOrElse(key, null)}
      Some(Row.fromSeq(l))
    }
  }
}

Source File: Job.scala From incubator-s2graph with Apache License 2.0

5 votes

package org.apache.s2graph.s2jobs

import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.s2graph.s2jobs.task._

import scala.collection.mutable

class Job(ss:SparkSession, jobDesc:JobDescription) extends Serializable with Logger {
  private val dfMap = mutable.Map[String, DataFrame]()

  def run() = {
    // source
    jobDesc.sources.foreach{ source =>
      val df = source.toDF(ss)
      if (source.conf.cache.getOrElse(false) && !df.isStreaming) df.cache()

      dfMap.put(source.conf.name, df)
    }
    logger.info(s"valid source DF set : ${dfMap.keySet}")

    // process
    var processRst:Seq[(String, DataFrame)] = Nil
    do {
      processRst = getValidProcess(jobDesc.processes)
      processRst.foreach { case (name, df) => dfMap.put(name, df)}

    } while(processRst.nonEmpty)

    logger.info(s"valid named DF set : ${dfMap.keySet}")

    // sinks
    jobDesc.sinks.foreach { s =>
      val inputDFs = s.conf.inputs.flatMap{ input => dfMap.get(input)}
      if (inputDFs.isEmpty) throw new IllegalArgumentException(s"sink has not valid inputs (${s.conf.name})")

      // use only first input
      s.write(inputDFs.head)
    }
    // if stream query exist
    if (ss.streams.active.length > 0) ss.streams.awaitAnyTermination()
  }

  private def getValidProcess(processes:Seq[Process]):Seq[(String, DataFrame)] = {
    val dfKeys = dfMap.keySet

    processes.filter{ p =>
        val existAllInput = p.conf.inputs.forall{ input => dfKeys(input) }
        !dfKeys(p.conf.name) && existAllInput
    }
    .map { p =>
      val inputMap = p.conf.inputs.map{ input => (input,  dfMap(input)) }.toMap
      val df = p.execute(ss, inputMap)
      if (p.conf.cache.getOrElse(false) && !df.isStreaming) df.cache()
      p.conf.name -> df
    }
  }
}

Source File: WalLogUDAFTest.scala From incubator-s2graph with Apache License 2.0

5 votes

package org.apache.s2graph.s2jobs.wal.udafs

import org.apache.s2graph.s2jobs.wal.utils.BoundedPriorityQueue
import org.scalatest._

import scala.collection.mutable
import scala.util.Random

class WalLogUDAFTest extends FunSuite with Matchers {

  test("mergeTwoSeq") {
    val prev: Array[Int] = Array(3, 2, 1)
    val cur: Array[Int] = Array(4, 2, 2)

    val ls = WalLogUDAF.mergeTwoSeq(prev, cur, 10)
    println(ls.size)

    ls.foreach { x =>
      println(x)
    }
  }

  test("addToTopK test.") {
    import WalLogUDAF._
    val numOfTest = 100
    val numOfNums = 100
    val maxNum = 10

    (0 until numOfTest).foreach { testNum =>
      val maxSize = 1 + Random.nextInt(numOfNums)
      val pq = new BoundedPriorityQueue[Int](maxSize)
      val arr = (0 until numOfNums).map(x => Random.nextInt(maxNum))
      var result: mutable.Seq[Int] = mutable.ArrayBuffer.empty[Int]

      arr.foreach { i =>
        pq += i
        result = addToTopK(result, maxSize, i)
      }
      result.sorted shouldBe pq.toSeq.sorted
    }
  }
}

Source File: S2GraphVariables.scala From incubator-s2graph with Apache License 2.0

5 votes

package org.apache.s2graph.core.features

import java.util
import java.util.Optional
import scala.collection.JavaConversions._
import org.apache.tinkerpop.gremlin.structure.Graph

class S2GraphVariables extends Graph.Variables {
  import scala.collection.mutable
  private val variables = mutable.Map.empty[String, Any]

  override def set(key: String, value: scala.Any): Unit = {
    if (key == null) throw Graph.Variables.Exceptions.variableKeyCanNotBeNull()
    if (key.isEmpty) throw Graph.Variables.Exceptions.variableKeyCanNotBeEmpty()
    if (value == null) throw Graph.Variables.Exceptions.variableValueCanNotBeNull()

    variables.put(key, value)
  }

  override def keys(): util.Set[String] = variables.keySet

  override def remove(key: String): Unit = {
    if (key == null) throw Graph.Variables.Exceptions.variableKeyCanNotBeNull()
    if (key.isEmpty) throw Graph.Variables.Exceptions.variableKeyCanNotBeEmpty()

    variables.remove(key)
  }

  override def get[R](key: String): Optional[R] = {
    if (key == null) throw Graph.Variables.Exceptions.variableKeyCanNotBeNull()
    if (key.isEmpty) throw Graph.Variables.Exceptions.variableKeyCanNotBeEmpty()
    variables.get(key) match {
      case None => Optional.empty()
      case Some(value) => if (value == null) Optional.empty() else Optional.of(value.asInstanceOf[R])
    }
  }

  override def toString: String = {
    s"variables[size:${variables.keys.size()}]"
  }
}

Source File: RocksVertexFetcher.scala From incubator-s2graph with Apache License 2.0

5 votes

package org.apache.s2graph.core.storage.rocks

import com.typesafe.config.Config
import org.apache.hadoop.hbase.util.Bytes
import org.apache.s2graph.core._
import org.apache.s2graph.core.schema.ServiceColumn
import org.apache.s2graph.core.storage.rocks.RocksStorage.{qualifier, table}
import org.apache.s2graph.core.storage.{SKeyValue, StorageIO, StorageSerDe}
import org.apache.s2graph.core.types.HBaseType
import org.rocksdb.RocksDB

import scala.collection.mutable.ArrayBuffer
import scala.concurrent.{ExecutionContext, Future}

class RocksVertexFetcher(val graph: S2GraphLike,
                         val config: Config,
                         val db: RocksDB,
                         val vdb: RocksDB,
                         val serDe: StorageSerDe,
                         val io: StorageIO) extends VertexFetcher {
  private def fetchKeyValues(queryRequest: QueryRequest, vertex: S2VertexLike)(implicit ec: ExecutionContext): Future[Seq[SKeyValue]] = {
    val rpc = RocksStorage.buildRequest(queryRequest, vertex)

    RocksStorage.fetchKeyValues(vdb, db, rpc)
  }

  override def fetchVertices(vertexQueryParam: VertexQueryParam)(implicit ec: ExecutionContext): Future[Seq[S2VertexLike]] = {
    def fromResult(kvs: Seq[SKeyValue], version: String): Seq[S2VertexLike] = {
      if (kvs.isEmpty) Nil
      else serDe.vertexDeserializer(version).fromKeyValues(kvs, None).toSeq.filter(vertexQueryParam.where.get.filter)
    }
    val vertices = vertexQueryParam.vertexIds.map(vId => graph.elementBuilder.newVertex(vId))

    val futures = vertices.map { vertex =>
      val queryParam = QueryParam.Empty
      val q = Query.toQuery(Seq(vertex), Seq(queryParam))
      val queryRequest = QueryRequest(q, stepIdx = -1, vertex, queryParam)

      fetchKeyValues(queryRequest, vertex).map { kvs =>
        fromResult(kvs, vertex.serviceColumn.schemaVersion)
      } recoverWith {
        case ex: Throwable => Future.successful(Nil)
      }
    }

    Future.sequence(futures).map(_.flatten)
  }

  override def fetchVerticesAll()(implicit ec: ExecutionContext) = {
    import scala.collection.mutable

    val vertices = new ArrayBuffer[S2VertexLike]()
    ServiceColumn.findAll().groupBy(_.service.hTableName).toSeq.foreach { case (hTableName, columns) =>
      val distinctColumns = columns.toSet

      val iter = vdb.newIterator()
      val buffer = mutable.ListBuffer.empty[SKeyValue]
      var oldVertexIdBytes = Array.empty[Byte]
      var minusPos = 0

      try {
        iter.seekToFirst()
        while (iter.isValid) {
          val row = iter.key()
          if (!Bytes.equals(oldVertexIdBytes, 0, oldVertexIdBytes.length - minusPos, row, 0, row.length - 1)) {
            if (buffer.nonEmpty)
              serDe.vertexDeserializer(schemaVer = HBaseType.DEFAULT_VERSION).fromKeyValues(buffer, None)
                .filter(v => distinctColumns(v.serviceColumn))
                .foreach { vertex =>
                  vertices += vertex
                }

            oldVertexIdBytes = row
            minusPos = 1
            buffer.clear()
          }
          val kv = SKeyValue(table, iter.key(), SKeyValue.VertexCf, qualifier, iter.value(), System.currentTimeMillis())
          buffer += kv

          iter.next()
        }
        if (buffer.nonEmpty)
          serDe.vertexDeserializer(schemaVer = HBaseType.DEFAULT_VERSION).fromKeyValues(buffer, None)
            .filter(v => distinctColumns(v.serviceColumn))
            .foreach { vertex =>
              vertices += vertex
            }

      } finally {
        iter.close()
      }
    }

    Future.successful(vertices)
  }
}

Source File: GraphToETLStreaming.scala From incubator-s2graph with Apache License 2.0

5 votes

package org.apache.s2graph.counter.loader.stream

import kafka.producer.KeyedMessage
import kafka.serializer.StringDecoder
import org.apache.s2graph.core.GraphUtil
import org.apache.s2graph.counter.config.S2CounterConfig
import org.apache.s2graph.counter.loader.config.StreamingConfig
import org.apache.s2graph.spark.config.S2ConfigFactory
import org.apache.s2graph.spark.spark.{WithKafka, SparkApp, HashMapParam}
import org.apache.spark.streaming.Durations._
import org.apache.spark.streaming.kafka.KafkaRDDFunctions.rddToKafkaRDDFunctions
import scala.collection.mutable
import scala.collection.mutable.{HashMap => MutableHashMap}

object GraphToETLStreaming extends SparkApp with WithKafka {
  lazy val config = S2ConfigFactory.config
  lazy val s2Config = new S2CounterConfig(config)
  lazy val className = getClass.getName.stripSuffix("$")
  lazy val producer = getProducer[String, String](StreamingConfig.KAFKA_BROKERS)

  override def run(): Unit = {
    validateArgument("interval", "topic")
    val (intervalInSec, topic) = (seconds(args(0).toLong), args(1))

    val groupId = buildKafkaGroupId(topic, "graph_to_etl")
    val kafkaParam = Map(
//      "auto.offset.reset" -> "smallest",
      "group.id" -> groupId,
      "metadata.broker.list" -> StreamingConfig.KAFKA_BROKERS,
      "zookeeper.connect" -> StreamingConfig.KAFKA_ZOOKEEPER,
      "zookeeper.connection.timeout.ms" -> "10000"
    )

    val conf = sparkConf(s"$topic: $className")
    val ssc = streamingContext(conf, intervalInSec)
    val sc = ssc.sparkContext

    val acc = sc.accumulable(MutableHashMap.empty[String, Long], "Throughput")(HashMapParam[String, Long](_ + _))

    
    val stream = getStreamHelper(kafkaParam).createStream[String, String, StringDecoder, StringDecoder](ssc, topic.split(',').toSet)
    stream.foreachRDD { rdd =>
      rdd.foreachPartitionWithOffsetRange { case (osr, part) =>
        val m = MutableHashMap.empty[Int, mutable.MutableList[String]]
        for {
          (k, v) <- part
          line <- GraphUtil.parseString(v)
        } {
          try {
            val sp = GraphUtil.split(line)
            // get partition key by target vertex id
            val partKey = getPartKey(sp(4), 20)
            val values = m.getOrElse(partKey, mutable.MutableList.empty[String])
            values += line
            m.update(partKey, values)
          } catch {
            case ex: Throwable =>
              log.error(s"$ex: $line")
          }
        }

        m.foreach { case (k, v) =>
          v.grouped(1000).foreach { grouped =>
            producer.send(new KeyedMessage[String, String](StreamingConfig.KAFKA_TOPIC_ETL, null, k, grouped.mkString("\n")))
          }
        }

        getStreamHelper(kafkaParam).commitConsumerOffset(osr)
      }
    }

    ssc.start()
    ssc.awaitTermination()
  }
}

Source File: QueueActor.scala From incubator-s2graph with Apache License 2.0

5 votes

package org.apache.s2graph.rest.play.actors

import java.util.concurrent.TimeUnit

import akka.actor._
import org.apache.s2graph.core.ExceptionHandler._
import org.apache.s2graph.core.utils.logger
import org.apache.s2graph.core.{ExceptionHandler, S2Graph, GraphElement}
import org.apache.s2graph.rest.play.actors.Protocol.FlushAll
import org.apache.s2graph.rest.play.config.Config
import play.api.Play.current
import play.api.libs.concurrent.Akka

import scala.collection.mutable
import scala.concurrent.duration.Duration

object Protocol {

  case object Flush

  case object FlushAll

}

object QueueActor {
  
  var router: ActorRef = _

  //    Akka.system.actorOf(props(), name = "queueActor")
  def init(s2: S2Graph, walLogHandler: ExceptionHandler) = {
    router = Akka.system.actorOf(props(s2, walLogHandler))
  }

  def shutdown() = {
    router ! FlushAll
    Akka.system.shutdown()
    Thread.sleep(Config.ASYNC_HBASE_CLIENT_FLUSH_INTERVAL * 2)
  }

  def props(s2: S2Graph, walLogHandler: ExceptionHandler): Props = Props(classOf[QueueActor], s2, walLogHandler)
}

class QueueActor(s2: S2Graph, walLogHandler: ExceptionHandler) extends Actor with ActorLogging {

  import Protocol._

  implicit val ec = context.system.dispatcher
  //  logger.error(s"QueueActor: $self")
  val queue = mutable.Queue.empty[GraphElement]
  var queueSize = 0L
  val maxQueueSize = Config.LOCAL_QUEUE_ACTOR_MAX_QUEUE_SIZE
  val timeUnitInMillis = 10
  val rateLimitTimeStep = 1000 / timeUnitInMillis
  val rateLimit = Config.LOCAL_QUEUE_ACTOR_RATE_LIMIT / rateLimitTimeStep


  context.system.scheduler.schedule(Duration.Zero, Duration(timeUnitInMillis, TimeUnit.MILLISECONDS), self, Flush)

  override def receive: Receive = {
    case element: GraphElement =>

      if (queueSize > maxQueueSize) {
        walLogHandler.enqueue(toKafkaMessage(Config.KAFKA_FAIL_TOPIC, element, None))
      } else {
        queueSize += 1L
        queue.enqueue(element)
      }

    case Flush =>
      val elementsToFlush =
        if (queue.size < rateLimit) queue.dequeueAll(_ => true)
        else (0 until rateLimit).map(_ => queue.dequeue())

      val flushSize = elementsToFlush.size

      queueSize -= elementsToFlush.length
      s2.mutateElements(elementsToFlush)

      if (flushSize > 0) {
        logger.info(s"flush: $flushSize, $queueSize")
      }

    case FlushAll =>
      s2.mutateElements(queue)
      context.stop(self)

    case _ => logger.error("unknown protocol")
  }
}

Source File: CreateExchangeTransactionActor.scala From matcher with MIT License

5 votes

package com.wavesplatform.dex.actors.tx

import akka.actor.{Actor, ActorRef, Props}
import com.wavesplatform.dex.actors.tx.CreateExchangeTransactionActor.OrderExecutedObserved
import com.wavesplatform.dex.domain.account.Address
import com.wavesplatform.dex.domain.utils.ScorexLogging
import com.wavesplatform.dex.model.Events.{ExchangeTransactionCreated, OrderExecuted}
import com.wavesplatform.dex.model.ExchangeTransactionCreator.CreateTransaction
import play.api.libs.json.Json

import scala.collection.mutable


class CreateExchangeTransactionActor(createTransaction: CreateTransaction, recipients: List[ActorRef]) extends Actor with ScorexLogging {

  private val pendingEvents = mutable.Set.empty[OrderExecuted]

  override def preStart(): Unit = context.system.eventStream.subscribe(self, classOf[OrderExecutedObserved])

  override def receive: Receive = {
    case OrderExecutedObserved(sender, event) =>
      val sameOwner = event.counter.order.sender == event.submitted.order.sender
      log.debug(s"Execution observed at $sender for OrderExecuted(${event.submitted.order.id()}, ${event.counter.order
        .id()}), amount=${event.executedAmount})${if (sameOwner) " Same owner for both orders" else ""}")
      if (sameOwner || pendingEvents.contains(event)) {
        import event.{counter, submitted}
        createTransaction(event) match {
          case Right(tx) =>
            log.info(s"Created transaction: $tx")
            val created = ExchangeTransactionCreated(tx)
            recipients.foreach(_ ! created)
          case Left(ex) =>
            log.warn(
              s"""Can't create tx: $ex
               |o1: (amount=${submitted.amount}, fee=${submitted.fee}): ${Json.prettyPrint(submitted.order.json())}
               |o2: (amount=${counter.amount}, fee=${counter.fee}): ${Json.prettyPrint(counter.order.json())}""".stripMargin
            )
        }

        pendingEvents -= event
      } else pendingEvents += event
  }
}

object CreateExchangeTransactionActor {
  val name = "create-exchange-tx"

  case class OrderExecutedObserved(sender: Address, event: OrderExecuted)

  def props(createTransaction: CreateTransaction, recipients: List[ActorRef]): Props =
    Props(new CreateExchangeTransactionActor(createTransaction, recipients))
}

Source File: AddressDirectoryActor.scala From matcher with MIT License

5 votes

package com.wavesplatform.dex.actors.address

import akka.actor.{Actor, ActorRef, Props, SupervisorStrategy, Terminated}
import com.wavesplatform.dex.db.OrderDB
import com.wavesplatform.dex.domain.account.Address
import com.wavesplatform.dex.domain.utils.{EitherExt2, ScorexLogging}
import com.wavesplatform.dex.history.HistoryRouter._
import com.wavesplatform.dex.model.Events
import com.wavesplatform.dex.model.Events.OrderCancelFailed

import scala.collection.mutable

class AddressDirectoryActor(orderDB: OrderDB, addressActorProps: (Address, Boolean) => Props, historyRouter: Option[ActorRef])
    extends Actor
    with ScorexLogging {

  import AddressDirectoryActor._
  import context._

  private var startSchedules: Boolean = false
  private[this] val children          = mutable.AnyRefMap.empty[Address, ActorRef]

  override def supervisorStrategy: SupervisorStrategy = SupervisorStrategy.stoppingStrategy

  private def createAddressActor(address: Address): ActorRef = {
    log.debug(s"Creating address actor for $address")
    watch(actorOf(addressActorProps(address, startSchedules), address.toString))
  }

  private def forward(address: Address, msg: Any): Unit = (children get address, msg) match {
    case (None, _: AddressActor.Message.BalanceChanged) =>
    case _                                              => children getOrElseUpdate (address, createAddressActor(address)) forward msg
  }

  override def receive: Receive = {
    case Envelope(address, cmd) => forward(address, cmd)

    case e @ Events.OrderAdded(lo, timestamp) =>
      forward(lo.order.sender, e)
      historyRouter foreach { _ ! SaveOrder(lo, timestamp) }

    case e: Events.OrderExecuted =>
      import e.{counter, submitted}
      forward(submitted.order.sender, e)
      if (counter.order.sender != submitted.order.sender) forward(counter.order.sender, e)
      historyRouter foreach { _ ! SaveEvent(e) }

    case e: Events.OrderCanceled =>
      forward(e.acceptedOrder.order.sender, e)
      historyRouter foreach { _ ! SaveEvent(e) }

    case e: OrderCancelFailed =>
      orderDB.get(e.id) match {
        case Some(order) => forward(order.sender.toAddress, e)
        case None        => log.warn(s"The order '${e.id}' not found")
      }

    case StartSchedules =>
      if (!startSchedules) {
        startSchedules = true
        context.children.foreach(_ ! StartSchedules)
      }

    case Terminated(child) =>
      val addressString = child.path.name
      val address       = Address.fromString(addressString).explicitGet()
      children.remove(address)
      log.warn(s"Address handler for $addressString terminated")
  }
}

object AddressDirectoryActor {
  case class Envelope(address: Address, cmd: AddressActor.Message)
  case object StartSchedules
}

Source File: OrderBookSideSnapshotCodecs.scala From matcher with MIT License

5 votes

package com.wavesplatform.dex.codecs

import java.math.BigInteger
import java.nio.ByteBuffer

import com.google.common.primitives.{Ints, Longs}
import com.wavesplatform.dex.codecs.ByteBufferCodecs.ByteBufferExt
import com.wavesplatform.dex.domain.model.Price
import com.wavesplatform.dex.domain.order.{Order, OrderType}
import com.wavesplatform.dex.model.{BuyLimitOrder, LimitOrder, OrderBookSideSnapshot, SellLimitOrder}

import scala.collection.mutable

object OrderBookSideSnapshotCodecs {

  def encode(dest: mutable.ArrayBuilder[Byte], snapshot: OrderBookSideSnapshot): Unit = {
    dest ++= Ints.toByteArray(snapshot.size)
    snapshot.foreach {
      case (price, xs) =>
        dest ++= Longs.toByteArray(price)
        dest ++= Ints.toByteArray(xs.size)
        xs.foreach(encodeLoV2(dest, _))
    }
  }

  def decode(bb: ByteBuffer): OrderBookSideSnapshot = {
    val snapshotSize = bb.getInt
    val r            = Map.newBuilder[Price, Seq[LimitOrder]]
    (1 to snapshotSize).foreach { _ =>
      val price       = bb.getLong
      val levelSize   = bb.getInt
      val limitOrders = (1 to levelSize).map(_ => decodeLo(bb))
      r += price -> limitOrders
    }
    r.result()
  }

  def encodeLoV1(dest: mutable.ArrayBuilder[Byte], lo: LimitOrder): Unit = {

    dest ++= lo.order.orderType.bytes
    dest ++= Longs.toByteArray(lo.amount)
    dest ++= Longs.toByteArray(lo.fee)
    dest += lo.order.version

    val orderBytes = lo.order.bytes()

    dest ++= Ints.toByteArray(orderBytes.length)
    dest ++= orderBytes
  }

  def encodeLoV2(dest: mutable.ArrayBuilder[Byte], lo: LimitOrder): Unit = {
    val avgWeighedPriceNominatorBytes = lo.avgWeighedPriceNominator.toByteArray

    dest += 2

    encodeLoV1(dest, lo)

    dest ++= Ints.toByteArray(avgWeighedPriceNominatorBytes.length)
    dest ++= avgWeighedPriceNominatorBytes
  }

  def decodeLo(bb: ByteBuffer): LimitOrder = {

    val header    = bb.get
    val version   = if (header == 2) 2 else 1
    val orderType = if (version == 1) header else bb.get

    val amount       = bb.getLong
    val fee          = bb.getLong
    val orderVersion = bb.get
    val order        = Order.fromBytes(orderVersion, bb.getBytes)

    val avgWeighedPriceNominator =
      if (version == 2) new BigInteger(bb.getBytes)
      else {
        val filledAmount = order.amount - amount
        (BigInt(order.price) * filledAmount).bigInteger
      }

    OrderType(orderType) match {
      case OrderType.SELL => SellLimitOrder(amount, fee, order, avgWeighedPriceNominator)
      case OrderType.BUY  => BuyLimitOrder(amount, fee, order, avgWeighedPriceNominator)
    }
  }
}

Source File: OrderBookSnapshot.scala From matcher with MIT License

5 votes

package com.wavesplatform.dex.model

import java.nio.ByteBuffer

import com.wavesplatform.dex.codecs.OrderBookSideSnapshotCodecs

import scala.collection.mutable

case class OrderBookSnapshot(bids: OrderBookSideSnapshot, asks: OrderBookSideSnapshot, lastTrade: Option[LastTrade])
object OrderBookSnapshot {

  val empty: OrderBookSnapshot = OrderBookSnapshot(bids = Map.empty, asks = Map.empty, None)

  def serialize(dest: mutable.ArrayBuilder[Byte], x: OrderBookSnapshot): Unit = {
    OrderBookSideSnapshotCodecs.encode(dest, x.bids)
    OrderBookSideSnapshotCodecs.encode(dest, x.asks)
    x.lastTrade match {
      case None => dest += 0
      case Some(lastTrade) =>
        dest += 1
        LastTrade.serialize(dest, lastTrade)
    }
  }

  def fromBytes(bb: ByteBuffer): OrderBookSnapshot =
    OrderBookSnapshot(
      OrderBookSideSnapshotCodecs.decode(bb),
      OrderBookSideSnapshotCodecs.decode(bb),
      bb.get match {
        case 0 => None
        case 1 => Some(LastTrade.fromBytes(bb))
        case x => throw new RuntimeException(s"Can't deserialize Option as $x")
      }
    )
}

Source File: HistoryMessagesBatchSender.scala From matcher with MIT License

5 votes

package com.wavesplatform.dex.history

import akka.actor.{Actor, Cancellable}
import com.wavesplatform.dex.history.HistoryRouter.{HistoryMsg, StopAccumulate}

import scala.collection.mutable
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration._
import scala.reflect.ClassTag

abstract class HistoryMessagesBatchSender[M <: HistoryMsg: ClassTag] extends Actor {

  val batchLinger: Long
  val batchEntries: Long

  def createAndSendBatch(batchBuffer: Iterable[M]): Unit

  private val batchBuffer: mutable.Set[M] = mutable.Set.empty[M]

  private def scheduleStopAccumulating: Cancellable = context.system.scheduler.scheduleOnce(batchLinger.millis, self, StopAccumulate)

  private def sendBatch(): Unit = {
    if (batchBuffer.nonEmpty) {
      createAndSendBatch(batchBuffer)
      batchBuffer.clear()
    }
  }

  def receive: Receive = awaitingHistoryMessages

  private def awaitingHistoryMessages: Receive = {
    case msg: M =>
      scheduleStopAccumulating
      context become accumulateBuffer(scheduleStopAccumulating)
      batchBuffer += msg
  }

  private def accumulateBuffer(scheduledStop: Cancellable): Receive = {
    case msg: M =>
      if (batchBuffer.size == batchEntries) {
        scheduledStop.cancel()
        sendBatch()
        context become accumulateBuffer(scheduleStopAccumulating)
      }

      batchBuffer += msg

    case StopAccumulate => sendBatch(); context become awaitingHistoryMessages
  }
}

Source File: OrderHistoryStub.scala From matcher with MIT License

5 votes

package com.wavesplatform.dex.model

import akka.actor.{ActorRef, ActorSystem, Props}
import com.wavesplatform.dex.actors.SpendableBalancesActor
import com.wavesplatform.dex.actors.address.{AddressActor, AddressDirectoryActor}
import com.wavesplatform.dex.db.{EmptyOrderDB, TestOrderDB}
import com.wavesplatform.dex.domain.account.Address
import com.wavesplatform.dex.domain.asset.Asset
import com.wavesplatform.dex.domain.bytes.ByteStr
import com.wavesplatform.dex.error.ErrorFormatterContext
import com.wavesplatform.dex.queue.QueueEventWithMeta
import com.wavesplatform.dex.time.Time

import scala.collection.mutable
import scala.concurrent.Future

class OrderHistoryStub(system: ActorSystem, time: Time, maxActiveOrders: Int, maxFinalizedOrders: Int) {

  private implicit val efc: ErrorFormatterContext = (_: Asset) => 8

  private val refs   = mutable.AnyRefMap.empty[Address, ActorRef]
  private val orders = mutable.AnyRefMap.empty[ByteStr, Address]

  private val spendableBalances: (Address, Set[Asset]) => Future[Map[Asset, Long]] = (_, _) => Future.successful(Map.empty[Asset, Long])
  private val allAssetsSpendableBalances: Address => Future[Map[Asset, Long]]      = _ => Future.successful(Map.empty[Asset, Long])

  private val spendableBalanceActor = system.actorOf(Props(new SpendableBalancesActor(spendableBalances, allAssetsSpendableBalances, addressDir)))

  def createAddressActor(address: Address, enableSchedules: Boolean): Props = {
    Props(
      new AddressActor(
        address,
        time,
        new TestOrderDB(maxFinalizedOrders),
        (_, _) => Future.successful(Right(())),
        e => Future.successful { Some(QueueEventWithMeta(0, 0, e)) },
        enableSchedules,
        spendableBalanceActor,
        AddressActor.Settings.default.copy(maxActiveOrders = maxActiveOrders)
      )
    )
  }

  private def actorFor(ao: AcceptedOrder): ActorRef =
    refs.getOrElseUpdate(
      ao.order.sender,
      system.actorOf(createAddressActor(ao.order.sender, enableSchedules = true))
    )

  lazy val addressDir = system.actorOf(
    Props(
      new AddressDirectoryActor(
        EmptyOrderDB,
        createAddressActor,
        None
      )
    )
  )

  def ref(sender: Address): ActorRef  = refs(sender)
  def ref(orderId: ByteStr): ActorRef = refs(orders(orderId))

  def process(event: Events.Event): Unit = event match {
    case oa: Events.OrderAdded =>
      orders += oa.order.order.id() -> oa.order.order.sender
      actorFor(oa.order) ! oa

    case ox: Events.OrderExecuted =>
      orders += ox.submitted.order.id() -> ox.submitted.order.sender
      orders += ox.counter.order.id()   -> ox.counter.order.sender
      actorFor(ox.counter) ! ox
      actorFor(ox.submitted) ! ox

    case oc: Events.OrderCanceled => actorFor(oc.acceptedOrder) ! oc
  }

  def processAll(events: Events.Event*): Unit = events.foreach(process)
}

Source File: AffinityPropagationSuite.scala From SparkAffinityPropagation with MIT License

5 votes

package org.viirya.spark.ml

import scala.collection.mutable

import org.scalatest.{BeforeAndAfterAll, FunSuite, Suite}

import org.viirya.spark.ml.AffinityPropagation._

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.graphx.{Edge, Graph}

class AffinityPropagationSuite extends FunSuite with BeforeAndAfterAll { self: Suite =>
  @transient var sc: SparkContext = _

  override def beforeAll() {
    super.beforeAll()
    val conf = new SparkConf()
      .setMaster("local[2]")
      .setAppName("AffinityPropagationUnitTest")
    sc = new SparkContext(conf)
  }

  override def afterAll() {
    try {
      if (sc != null) {
        sc.stop()
      }
      sc = null
    } finally {
      super.afterAll()
    }
  }  

  test("affinity propagation") {
    
    val similarities = Seq[(Long, Long, Double)](
      (0, 1, 1.0), (1, 0, 1.0), (0, 2, 1.0), (2, 0, 1.0), (0, 3, 1.0), (3, 0, 1.0),
      (1, 2, 1.0), (2, 1, 1.0), (2, 3, 1.0), (3, 2, 1.0))
    val expected = Array(
      Array(0.0,     1.0/3.0, 1.0/3.0, 1.0/3.0),
      Array(1.0/2.0,     0.0, 1.0/2.0,     0.0),
      Array(1.0/3.0, 1.0/3.0,     0.0, 1.0/3.0),
      Array(1.0/2.0,     0.0, 1.0/2.0,     0.0))
    val s = constructGraph(sc.parallelize(similarities, 2), true, false)
    s.edges.collect().foreach { case Edge(i, j, x) =>
      assert(math.abs(x.similarity - expected(i.toInt)(j.toInt)) < 1e-14)
    }
  }
}

Source File: RunCypher.scala From piflow with BSD 2-Clause "Simplified" License

5 votes

package cn.piflow.bundle.neo4j

import cn.piflow.conf.bean.PropertyDescriptor
import cn.piflow.conf.util.{ImageUtil, MapUtil}
import cn.piflow.conf.{ConfigurableStop, Port, StopGroup}
import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
import org.neo4j.driver.v1._

import scala.collection.mutable


class RunCypher extends ConfigurableStop{
  override val authorEmail: String = "[email protected]"
  override val description: String = "Run cql on neo4j"
  override val inportList: List[String] =List(Port.DefaultPort)
  override val outportList: List[String] = List(Port.DefaultPort)

  var url : String =_
  var userName : String =_
  var password : String =_
  var cql : String = ""

  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {

    var driver: Driver = GraphDatabase.driver(url, AuthTokens.basic(userName, password))
    var session: Session = null

    try {
      session = driver.session()
      session.run(cql)
    } finally {
      session.close()
      driver.close()
    }
  }

  override def setProperties(map: Map[String, Any]): Unit = {
    url = MapUtil.get(map,"url").asInstanceOf[String]
    userName = MapUtil.get(map,"userName").asInstanceOf[String]
    password = MapUtil.get(map,"password").asInstanceOf[String]
    cql = MapUtil.get(map,"cql").asInstanceOf[String]
  }

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()

    val url=new PropertyDescriptor().name("url")
      .displayName("url")
      .description("The url of neo4j")
      .defaultValue("")
      .required(true)
        .example("bolt://0.0.1.1:7687")
    descriptor = url :: descriptor

    val userName=new PropertyDescriptor()
      .name("userName")
      .displayName("UserName")
      .description("The user of neo4j")
      .defaultValue("")
      .required(true)
        .example("neo4j")
    descriptor = userName :: descriptor

    val password=new PropertyDescriptor()
      .name("password")
      .displayName("Password")
      .description("The password of neo4j")
      .defaultValue("")
      .required(true)
      .sensitive(true)
        .example("123456")
    descriptor = password :: descriptor

    val cql=new PropertyDescriptor()
      .name("cql")
      .displayName("cql")
      .description(" The Cypher")
      .defaultValue("")
      .required(true)
        .example("match(n:user) where n.userid ='11' set n.userclass =5")
    descriptor = cql :: descriptor

    descriptor
  }

  override def getIcon(): Array[Byte] = {
    ImageUtil.getImage("icon/neo4j/RunCypher.png")
  }

  override def getGroup(): List[String] = {
    List(StopGroup.Neo4jGroup)
  }

  override def initialize(ctx: ProcessContext): Unit = {

  }
}

Source File: WriteToKafka.scala From piflow with BSD 2-Clause "Simplified" License

5 votes

package cn.piflow.bundle.kafka

import java.util

import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
import cn.piflow.conf._
import cn.piflow.conf.bean.PropertyDescriptor
import cn.piflow.conf.util.{ImageUtil, MapUtil}
import java.util.Properties

import org.apache.spark.sql.SparkSession
import org.apache.kafka.clients.producer.KafkaProducer
import org.apache.kafka.clients.producer.Producer
import org.apache.kafka.clients.producer.ProducerRecord

import scala.collection.mutable

class WriteToKafka extends ConfigurableStop{
  val description: String = "Write data to kafka"
  val inportList: List[String] = List(Port.DefaultPort)
  val outportList: List[String] = List(Port.DefaultPort)
  var kafka_host:String =_
  var topic:String=_

  def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
    val spark = pec.get[SparkSession]()
    val df = in.read()
    val properties:Properties  = new Properties()
    properties.put("bootstrap.servers", kafka_host)
    properties.put("acks", "all")
    //properties.put("retries", 0)
    //properties.put("batch.size", 16384)
    //properties.put("linger.ms", 1)
    //properties.put("buffer.memory", 33554432)
    properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
    var producer:Producer[String,String]  = new KafkaProducer[String,String](properties)

    df.collect().foreach(row=>{
      //var hm:util.HashMap[String,String]=new util.HashMap()
      //row.schema.fields.foreach(f=>(if(!f.name.equals(column_name)&&row.getAs(f.name)!=null)hm.put(f.name,row.getAs(f.name).asInstanceOf[String])))
      var res:List[String]=List()
      row.schema.fields.foreach(f=>{
          if(row.getAs(f.name)==null)res="None"::res
          else{
            res=row.getAs(f.name).asInstanceOf[String]::res
          }
        })
      val s:String=res.reverse.mkString(",")
      val record=new ProducerRecord[String,String](topic,s)
      producer.send(record)
    })
    producer.close()
  }


  def initialize(ctx: ProcessContext): Unit = {

  }


  def setProperties(map: Map[String, Any]): Unit = {
    kafka_host=MapUtil.get(map,key="kafka_host").asInstanceOf[String]
    //port=Integer.parseInt(MapUtil.get(map,key="port").toString)
    topic=MapUtil.get(map,key="topic").asInstanceOf[String]
  }

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
    val kafka_host = new PropertyDescriptor().name("kafka_host").displayName("KAFKA_HOST").defaultValue("").required(true)
    val topic = new PropertyDescriptor().name("topic").displayName("TOPIC").defaultValue("").required(true)
    descriptor = kafka_host :: descriptor
    descriptor = topic :: descriptor
    descriptor
  }

  override def getIcon(): Array[Byte] = {
    ImageUtil.getImage("icon/kafka/WriteToKafka.png")
  }

  override def getGroup(): List[String] = {
    List(StopGroup.KafkaGroup.toString)
  }

  override val authorEmail: String = "[email protected]"
}

Source File: StopBean.scala From piflow with BSD 2-Clause "Simplified" License

5 votes

package cn.piflow.conf.bean

import java.lang.ClassNotFoundException

import cn.piflow.conf.{ConfigurableIncrementalStop, ConfigurableStop}
import cn.piflow.conf.util.{ClassUtil, MapUtil}

import scala.collection.mutable

class StopBean {
  var flowName : String = _
  var uuid : String = _
  var name : String = _
  var bundle : String = _
  var properties : Map[String, String] = _
  var customizedProperties : Map[String, String] = _

  def init(flowName : String, map:Map[String,Any]) = {
    this.flowName = flowName
    this.uuid = MapUtil.get(map,"uuid").asInstanceOf[String]
    this.name = MapUtil.get(map,"name").asInstanceOf[String]
    this.bundle = MapUtil.get(map,"bundle").asInstanceOf[String]
    this.properties = MapUtil.get(map, "properties").asInstanceOf[Map[String, String]]
    if(map.contains("customizedProperties")){
      this.customizedProperties  = MapUtil.get(map, "customizedProperties").asInstanceOf[Map[String, String]]
    }else{
      this.customizedProperties = Map[String, String]()
    }

  }

  def constructStop() : ConfigurableStop = {

    try{
      val stop = ClassUtil.findConfigurableStop(this.bundle)


      //init ConfigurableIncrementalStop
      if( stop.isInstanceOf[ConfigurableIncrementalStop]){
        stop.asInstanceOf[ConfigurableIncrementalStop].init(flowName, name)
        var startValue : String = stop.asInstanceOf[ConfigurableIncrementalStop].readIncrementalStart()
        if(startValue == null || startValue == ""){
          if(this.properties.contains("incrementalStart")){
            startValue = MapUtil.get(this.properties,"incrementalStart").asInstanceOf[String]
          }else{
            throw new Exception("You must set incrementalStart value!")
          }
        }

        //replace the tag of incremental Field in properties
        val newProperties: scala.collection.mutable.Map[String, String] = scala.collection.mutable.Map()
        val it = this.properties.keysIterator
        while(it.hasNext){
          val key = it.next()
          var value = this.properties(key)
          value = value.replaceAll("#~#", "'" + startValue + "'")
          newProperties(key) = value
        }
        stop.setProperties(newProperties.toMap)

      }else {
        stop.setProperties(this.properties)
      }

      stop.setCustomizedProperties(this.customizedProperties)



      stop
    }catch {
      case ex : Exception => throw ex
    }
  }

}

object StopBean  {

  def apply(flowName : String, map : Map[String, Any]): StopBean = {
    val stopBean = new StopBean()
    stopBean.init(flowName, map)
    stopBean
  }

}

Source File: TestBroadCast.scala From asyspark with MIT License

5 votes

package org.apache.spark.examples

import org.apache.spark.internal.Logging
import org.apache.spark.sql.SparkSession

import scala.collection.mutable


object TestBroadCast extends Logging{
  val sparkSession = SparkSession.builder().appName("test BoradCast").getOrCreate()
  val sc = sparkSession.sparkContext
  def main(args: Array[String]): Unit = {

    //    val data = sc.parallelize(Seq(1 until 10000000))
    val num = args(args.length - 2).toInt
    val times = args(args.length -1).toInt
    println(num)
    val start = System.nanoTime()
    val seq =Seq(1 until num)
    for(i <- 0 until times) {
      val start2 = System.nanoTime()
      val bc = sc.broadcast(seq)
      val rdd = sc.parallelize(1 until 10, 5)
      rdd.map(_ => bc.value.take(1)).collect()
      println((System.nanoTime() - start2)/ 1e6 + "ms")
    }
    logInfo((System.nanoTime() - start) / 1e6 + "ms")
  }

  def testMap(): Unit ={

    val smallRDD = sc.parallelize(Seq(1,2,3))
    val bigRDD = sc.parallelize(Seq(1 until 20))

    bigRDD.mapPartitions {
      partition =>
        val hashMap = new mutable.HashMap[Int,Int]()
        for(ele <- smallRDD) {
          hashMap(ele) = ele
        }
        // some operation here
        partition

    }
  }
}

Source File: Http.scala From AI with Apache License 2.0

5 votes

package com.bigchange.http

import com.bigchange.log.CLogger
import dispatch.Defaults._
import dispatch._

import scala.collection.mutable
import scala.util.{Failure, Success}


  def post(strUrl:String, parameters:mutable.HashMap[String,String], parse: String): Unit = {

    val post = url(strUrl) << parameters
    val response : Future[String] = Http(post OK as.String)

    response onComplete {
      case Success(content) =>
        // parse(content)
        println("post Success content:"+content)
      case Failure(t) =>
        println("post Failure content:"+t)
    }
  }
}

Source File: AggregateActor.scala From AI with Apache License 2.0

5 votes

package com.bigchange.akka.actor

import akka.actor.{ActorRef, UntypedActor}
import akka.event.Logging
import com.bigchange.akka.message.{ReduceData, Result}

import scala.collection.mutable


class AggregateActor(resultActor: ActorRef) extends  UntypedActor {

  val finalHashMap = new mutable.HashMap[String, Int]()

  val log = Logging(context.system, this)

  @scala.throws[Throwable](classOf[Throwable])
  override def onReceive(message: Any): Unit = {

    message match {
      case data: String =>
        log.info("Aggregate got message:" + data)
        log.info("Aggregate ok!")

      case reduceData:ReduceData =>
        aggregateInMemoryReduce(reduceData.reduceHashMap)
        println("path:" + sender().path)
        resultActor ! new Result(finalHashMap)  // 给ResultActor发送计算结果

      case message:Result =>
        println("AggregateActor:" + message.resultValue.toString())

      case _ =>
        log.info("map unhandled message")
        unhandled(message)
    }
  }

  // 聚合
  def aggregateInMemoryReduce(reduceMap: mutable.HashMap[String, Int]): Unit = {

    var count = 0
    reduceMap.foreach(x => {

      if(finalHashMap.contains(x._1)) {
        count = x._2
        count += finalHashMap.get(x._1).get
        finalHashMap.put(x._1,count)
      } else {
        finalHashMap.put(x._1,x._2)
      }

    })

  }
}

Source File: NaiveBayesTest.scala From AI with Apache License 2.0

5 votes

package com.bigchange.test

import com.bigchange.datamining.CustomNaiveBayes

import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.io.Source


  def main(args: Array[String]) {

    // val Array(dataPath) = args
    val data = Source.fromFile("src/main/resources/nbData/i100-i500").getLines().toList
    // 可实现打完包后读取jar包中对应文件数据
    val data2 = Source.fromInputStream(this.getClass.getResourceAsStream("src/main/resources/nbData/i100-i500")).getLines().toList
    // 十折交叉验证(index,List(item1,item2))
    val splitData  = data.zipWithIndex.map(x => (x._2 % 10,x._1)).groupBy(_._1).mapValues(x => x.map(_._2))
    val modelMap = new mutable.HashMap[Int,String]()

    val model = CustomNaiveBayes.model(0, splitData)
    var list = List((0,model))

    for (id <- 1 until 10) {
      // 训练
      val model = CustomNaiveBayes.model(id, splitData)
      list = list ::: List((id,model))

    }

    // 分类
    val listP = new ListBuffer[(String, Double)]
    list.foreach(x => {
      println("model:" + x)
      val pred = CustomNaiveBayes.predict(Array("health", "moderate", "moderate1", "yes"), x._2)
     listP.+=(pred)
    })
    println("tobe:"+ listP)
    println("tobe:"+ listP.max)

  }

}

Source File: Stackoverflow58206168.scala From Binding.scala with MIT License

5 votes

package com.thoughtworks.binding
package regression
import org.scalatest.{FreeSpec, Matchers}
import Binding._
import scala.collection.mutable
import Binding.BindingInstances.functorSyntax._
import org.scalatest.freespec.AnyFreeSpec
import org.scalatest.matchers.should.Matchers

final class Stackoverflow58206168 extends AnyFreeSpec with Matchers {
  // See https://stackoverflow.com/questions//binding-scala-vars-bind-seems-to-not-work-correctly
  "Binding.scala: Vars.bind seems to not work correctly" in {
    val events = mutable.Buffer.empty[List[Int]]
    val test: Vars[Int] = Vars(1, 2, 3, 4)

    test.all.map {
      events += _.toList
    }.watch()

    test.value.append(1111)
    assert(events == mutable.Buffer(List(1, 2, 3, 4), List(1, 2, 3, 4, 1111)))
  }

}

Source File: SolrStreamWriter.scala From spark-solr with Apache License 2.0

5 votes

package com.lucidworks.spark

import com.lucidworks.spark.util.{SolrQuerySupport, SolrSupport}
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.apache.spark.sql.execution.streaming.Sink
import org.apache.spark.sql.streaming.OutputMode
import com.lucidworks.spark.util.ConfigurationConstants._
import org.apache.spark.sql.types.StructType

import scala.collection.mutable


class SolrStreamWriter(
    val sparkSession: SparkSession,
    parameters: Map[String, String],
    val partitionColumns: Seq[String],
    val outputMode: OutputMode)(
  implicit val solrConf : SolrConf = new SolrConf(parameters))
  extends Sink with LazyLogging {

  require(solrConf.getZkHost.isDefined, s"Parameter ${SOLR_ZK_HOST_PARAM} not defined")
  require(solrConf.getCollection.isDefined, s"Parameter ${SOLR_COLLECTION_PARAM} not defined")

  val collection : String = solrConf.getCollection.get
  val zkhost: String = solrConf.getZkHost.get

  lazy val solrVersion : String = SolrSupport.getSolrVersion(solrConf.getZkHost.get)
  lazy val uniqueKey: String = SolrQuerySupport.getUniqueKey(zkhost, collection.split(",")(0))

  lazy val dynamicSuffixes: Set[String] = SolrQuerySupport.getFieldTypes(
      Set.empty,
      SolrSupport.getSolrBaseUrl(zkhost),
      SolrSupport.getCachedCloudClient(zkhost),
      collection,
      skipDynamicExtensions = false)
    .keySet
    .filter(f => f.startsWith("*_") || f.endsWith("_*"))
    .map(f => if (f.startsWith("*_")) f.substring(1) else f.substring(0, f.length-1))

  @volatile private var latestBatchId: Long = -1L
  val acc: SparkSolrAccumulator = new SparkSolrAccumulator
  val accName = if (solrConf.getAccumulatorName.isDefined) solrConf.getAccumulatorName.get else "Records Written"
  sparkSession.sparkContext.register(acc, accName)
  SparkSolrAccumulatorContext.add(accName, acc.id)

  override def addBatch(batchId: Long, df: DataFrame): Unit = {
    if (batchId <= latestBatchId) {
      logger.info(s"Skipping already processed batch $batchId")
    } else {
      val rows = df.collect()
      if (rows.nonEmpty) {
        val schema: StructType = df.schema
        val solrClient = SolrSupport.getCachedCloudClient(zkhost)

        // build up a list of updates to send to the Solr Schema API
        val fieldsToAddToSolr = SolrRelation.getFieldsToAdd(schema, solrConf, solrVersion, dynamicSuffixes)

        if (fieldsToAddToSolr.nonEmpty) {
          SolrRelation.addFieldsForInsert(fieldsToAddToSolr, collection, solrClient)
        }

        val solrDocs = rows.toStream.map(row => SolrRelation.convertRowToSolrInputDocument(row, solrConf, uniqueKey))
        acc.add(solrDocs.length.toLong)
        SolrSupport.sendBatchToSolrWithRetry(zkhost, solrClient, collection, solrDocs, solrConf.commitWithin)
        logger.info(s"Written ${solrDocs.length} documents to Solr collection $collection from batch $batchId")
        latestBatchId = batchId
      }
    }
  }
}

Source File: Authentication.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package it.gov.daf.common.authentication

import java.util.Date

import com.nimbusds.jwt.JWTClaimsSet
import org.pac4j.core.profile.{CommonProfile, ProfileManager}
import org.pac4j.jwt.config.signature.SecretSignatureConfiguration
import org.pac4j.jwt.credentials.authenticator.JwtAuthenticator
import org.pac4j.jwt.profile.JwtGenerator
import org.pac4j.play.PlayWebContext
import org.pac4j.play.store.PlaySessionStore
import play.api.Configuration
import play.api.mvc.{RequestHeader, Result, Results}

import scala.collection.convert.decorateAsScala._
import scala.collection.mutable

@SuppressWarnings(
  Array(
    "org.wartremover.warts.Throw",
    "org.wartremover.warts.Var"
  )
)
object Authentication extends Results {

  var configuration: Option[Configuration] = None
  var playSessionStore: Option[PlaySessionStore] = None
  var secret: Option[String] = None

  def apply(configuration: Configuration, playSessionStore: PlaySessionStore): Unit = {
    this.configuration = Some(configuration)
    this.playSessionStore = Some(playSessionStore)
    this.secret = this.configuration.flatMap(_.getString("pac4j.jwt_secret"))
  }

  def getClaims(requestHeader: RequestHeader): Option[mutable.Map[String, AnyRef]] = {

    val header: Option[String] = requestHeader.headers.get("Authorization")
    val token: Option[String] = for {
      h <- header
      t <- h.split("Bearer").lastOption
    } yield t.trim

    getClaimsFromToken(token)
  }

  def getClaimsFromToken(token: Option[String]): Option[mutable.Map[String, AnyRef]] = {
    val jwtAuthenticator = new JwtAuthenticator()
    jwtAuthenticator.addSignatureConfiguration(new SecretSignatureConfiguration(secret.getOrElse(throw new Exception("missing secret"))))
    token.map(jwtAuthenticator.validateTokenAndGetClaims(_).asScala)
  }

  def getProfiles(request: RequestHeader): List[CommonProfile] = {
    val webContext = new PlayWebContext(request, playSessionStore.getOrElse(throw new Exception("missing playSessionStore")))
    val profileManager = new ProfileManager[CommonProfile](webContext)
    profileManager.getAll(true).asScala.toList
  }

  def getStringToken: (RequestHeader,Long) => Option[String] = (request: RequestHeader,minutes:Long)  => {
    val generator = new JwtGenerator[CommonProfile](new SecretSignatureConfiguration(secret.getOrElse(throw new Exception("missing secret"))))
    val profiles = getProfiles(request)
    val token: Option[String] = profiles.headOption.map(profile => {
      val expDate = new Date( (new Date).getTime + 1000L*60L*minutes )//*60L*24L
      val claims = new JWTClaimsSet.Builder().expirationTime(expDate).build()
      profile.addAttributes(claims.getClaims)
      generator.generate(profile)
    })
    token
  }

  def getToken: (RequestHeader,Long) => Result = (request: RequestHeader, minutes:Long) => {
    Ok(getStringToken(request,minutes).getOrElse(""))
  }

}

Source File: SpotlightLog.scala From dbpedia-spotlight-model with Apache License 2.0

5 votes

package org.dbpedia.spotlight.log

import org.apache.commons.logging.{Log, LogFactory}

import scala.collection.mutable

trait SpotlightLog[T] {
  def _debug(c:Class[_], msg: T, args: Any*)
  def _info(c:Class[_], msg: T, args: Any*)
  def _error(c:Class[_], msg: T, args: Any*)
  def _fatal(c:Class[_], msg: T, args: Any*)
  def _trace(c:Class[_], msg: T, args: Any*)
  def _warn(c:Class[_], msg: T, args: Any*)
}

object SpotlightLog {
  def debug[T](c:Class[_], msg: T, args: Any*)(implicit instance: SpotlightLog[T]) =
    instance._debug(c, msg, args: _*)
  def info[T](c:Class[_], msg: T, args: Any*)(implicit instance: SpotlightLog[T]) =
    instance._info(c, msg, args: _*)
  def error[T](c:Class[_], msg: T, args: Any*)(implicit instance: SpotlightLog[T]) =
    instance._error(c, msg, args: _*)
  def fatal[T](c:Class[_], msg: T, args: Any*)(implicit instance: SpotlightLog[T]) =
    instance._fatal(c, msg, args: _*)
  def trace[T](c:Class[_], msg: T, args: Any*)(implicit instance: SpotlightLog[T]) =
    instance._trace(c, msg, args: _*)
  def warn[T](c:Class[_], msg: T, args: Any*)(implicit instance: SpotlightLog[T]) =
    instance._warn(c, msg, args: _*)

  implicit object StringSpotlightLog extends SpotlightLog[String] {

    val loggers = new mutable.HashMap[Class[_], Log]()

    def _debug(c:Class[_], msg: String, args: Any*) = {
      val log = loggers.getOrElseUpdate(c, LogFactory.getLog(c))

      if (log.isDebugEnabled) {
        if(args.size == 0)
          log.debug(msg)
        else
          log.debug(msg.format(args: _*))
      }
    }
    def _info(c:Class[_], msg: String, args: Any*) = {
      val log = loggers.getOrElseUpdate(c, LogFactory.getLog(c))

      if(log.isInfoEnabled) {
        if(args.size == 0)
          log.info(msg)
        else
          log.info(msg.format(args: _*))
      }
    }
    def _error(c:Class[_], msg: String, args: Any*) = {
      val log = loggers.getOrElseUpdate(c, LogFactory.getLog(c))

      if(log.isErrorEnabled) {
        if(args.size == 0)
          log.error(msg)
        else
          log.error(msg.format(args: _*))
      }
    }
    def _fatal(c:Class[_], msg: String, args: Any*) = {
      val log = loggers.getOrElseUpdate(c, LogFactory.getLog(c))

      if(log.isFatalEnabled) {
        if(args.size == 0)
          log.fatal(msg)
        else
          log.fatal(msg.format(args: _*))
      }
    }
    def _trace(c:Class[_], msg: String, args: Any*) = {
      val log = loggers.getOrElseUpdate(c, LogFactory.getLog(c))

      if(log.isTraceEnabled) {
        if(args.size == 0)
          log.trace(msg)
        else
          log.trace(msg.format(args: _*))
      }
    }
    def _warn(c:Class[_], msg: String, args: Any*) = {
      val log = loggers.getOrElseUpdate(c, LogFactory.getLog(c))

      if(log.isWarnEnabled) {
        if(args.size == 0)
          log.warn(msg)
        else
          log.warn(msg.format(args: _*))
      }
    }
  }


}

Source File: GenerativeContextSimilarity.scala From dbpedia-spotlight-model with Apache License 2.0

5 votes

package org.dbpedia.spotlight.db.similarity

import org.dbpedia.spotlight.db.model.{ContextStore, TokenTypeStore}
import org.dbpedia.spotlight.model.{DBpediaResource, TokenType}
import org.dbpedia.spotlight.util.MathUtil

import scala.collection.mutable


  def p(token: TokenType, res: DBpediaResource, cResAndToken: Int): Double = {

    val pML = if (cResAndToken == 0 || contextStore.getTotalTokenCount(res) == 0 )
      0.0
    else
      cResAndToken.toDouble / contextStore.getTotalTokenCount(res)

    val ml = MathUtil.lnproduct(MathUtil.ln(lambda), MathUtil.ln(pML))
    val lm = MathUtil.lnproduct(MathUtil.ln(1-lambda), pLM(token))

    MathUtil.lnsum( lm, if(ml.isNaN) MathUtil.LOGZERO else ml )
  }

  def intersect(query: Seq[TokenType], res: DBpediaResource): Seq[(TokenType, Int)] = {
    val (tokens, counts) = contextStore.getRawContextCounts(res)
    if (tokens.length == 0) {
      query.map( t => (t, 0))
    } else {
      var j = 0
      query.map { t: TokenType =>
        while(j < tokens.length-1 && tokens(j) < t.id) {
          j += 1
        }

        if(tokens(j) == t.id) (t, counts(j))
        else (t, 0)
      }
    }

  }


  def score(query: Seq[TokenType], candidates: Set[DBpediaResource]): mutable.Map[DBpediaResource, Double] = {
    val contextScores = mutable.HashMap[DBpediaResource, Double]()

    candidates.map( res => {
      contextScores.put(
        res,
        MathUtil.lnproduct(
          intersect(query, res).map({ case(token: TokenType, cResAndToken: Int) =>p(token, res, cResAndToken) })
            .filter(s => !MathUtil.isLogZero(s))
        )
      )
    })
    contextScores
  }


  def nilScore(query: Seq[TokenType]): Double = {
    MathUtil.lnproduct(
      query.map{ t: TokenType =>
        MathUtil.lnproduct(MathUtil.ln(1-lambda), pLM(t))
      }
    )
  }

}

Source File: JarLoaderEngineHook.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.enginemanager.hook

import com.webank.wedatasphere.linkis.common.utils.Logging
import com.webank.wedatasphere.linkis.enginemanager.{Engine, EngineHook}
import com.webank.wedatasphere.linkis.enginemanager.conf.EngineManagerConfiguration.ENGINE_UDF_APP_NAME
import com.webank.wedatasphere.linkis.protocol.engine.RequestEngine
import com.webank.wedatasphere.linkis.rpc.Sender
import com.webank.wedatasphere.linkis.udf.api.rpc.{RequestUdfTree, ResponseUdfTree}
import com.webank.wedatasphere.linkis.udf.entity.{UDFInfo, UDFTree}
import org.apache.commons.collections.CollectionUtils
import org.apache.commons.io.FileUtils
import org.apache.commons.lang.StringUtils
import org.codehaus.jackson.map.ObjectMapper

import scala.collection.JavaConversions._
import scala.collection.mutable

class JarLoaderEngineHook extends EngineHook with Logging{

  override def beforeCreateSession(requestEngine: RequestEngine): RequestEngine = {
    info("start loading UDFs")
    val udfInfos = extractUdfInfos(requestEngine).filter{info => info.getUdfType == 0 && info.getExpire == false && StringUtils.isNotBlank(info.getPath) && isJarExists(info) && info.getLoad == true }
    // add to class path
    val jars = new mutable.HashSet[String]()
    udfInfos.foreach{udfInfo => jars.add("file://" + udfInfo.getPath)}
    val jarPaths = jars.mkString(",")
    if(StringUtils.isBlank(requestEngine.properties.get("jars"))){
      requestEngine.properties.put("jars", jarPaths)
    } else {
      requestEngine.properties.put("jars", requestEngine.properties.get("jars") + "," + jarPaths)
    }
    info("added jars: " + jarPaths)
    //jars.foreach(fetchRemoteFile)
    //info("copied jars.")
    info("end loading UDFs")
    requestEngine
  }

  override def afterCreatedSession(engine: Engine, requestEngine: RequestEngine): Unit = {
  }

  protected def isJarExists(udfInfo: UDFInfo) : Boolean = {
    true
//    if(FileUtils.getFile(udfInfo.getPath).exists()){
//      true
//    } else {
//      info(s"The jar file [${udfInfo.getPath}] of UDF [${udfInfo.getUdfName}] doesn't exist, ignore it.")
//      false
//    }
  }

  protected def extractUdfInfos(requestEngine: RequestEngine): mutable.ArrayBuffer[UDFInfo] = {
    val udfInfoBuilder = new mutable.ArrayBuffer[UDFInfo]
    val userName = requestEngine.user
    val udfTree = queryUdfRpc(userName)
    extractUdfInfos(udfInfoBuilder, udfTree, userName)
    udfInfoBuilder
  }

  protected def extractUdfInfos(udfInfoBuilder: mutable.ArrayBuffer[UDFInfo], udfTree: UDFTree, userName: String) : Unit = {
    if(CollectionUtils.isNotEmpty(udfTree.getUdfInfos)){
      for(udfInfo <- udfTree.getUdfInfos){
        udfInfoBuilder.append(udfInfo)
      }
    }
    if(CollectionUtils.isNotEmpty(udfTree.getChildrens)){
      for(child <- udfTree.getChildrens){
        var childInfo = child
        if(TreeType.specialTypes.contains(child.getUserName)){
          childInfo = queryUdfRpc(userName, child.getId, child.getUserName)
        } else {
          childInfo = queryUdfRpc(userName, child.getId, TreeType.SELF)
        }
        extractUdfInfos(udfInfoBuilder, childInfo, userName)
      }
    }
  }

  private def queryUdfRpc(userName: String, treeId: Long = -1, treeType: String = "self"): UDFTree = {
    val udfTree = Sender.getSender(ENGINE_UDF_APP_NAME.getValue)
      .ask(RequestUdfTree(userName, treeType, treeId, "udf"))
      .asInstanceOf[ResponseUdfTree]
      .udfTree
    //info("got udf tree:" + new ObjectMapper().writer().withDefaultPrettyPrinter().writeValueAsString(udfTree))
    udfTree
  }
}

Source File: EventGroupFactory.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.resourcemanager.schedule

import com.webank.wedatasphere.linkis.common.ServiceInstance
import com.webank.wedatasphere.linkis.common.utils.Logging
import com.webank.wedatasphere.linkis.resourcemanager.event.metric.MetricRMEvent
import com.webank.wedatasphere.linkis.scheduler.queue.parallelqueue.ParallelGroup
import com.webank.wedatasphere.linkis.scheduler.queue.{Group, GroupFactory, SchedulerEvent}

import scala.collection.mutable


abstract class EventGroupFactory extends GroupFactory {

  def getGroupNameByModule(moduleInstance: ServiceInstance): String

  def getGroupNameByUser(user: String): String
}

class EventGroupFactoryImpl extends EventGroupFactory with Logging {
  private val groupMap = new mutable.HashMap[String, Group]()
  private val RM_CONTEXT_CONSTRUCTOR_LOCK = new Object()
  private val maxGroupNum = 100

  def getInitCapacity(groupName: String): Int = 100

  def getMaxCapacity(groupName: String): Int = 1000

  def getBKDRHash2(str: String): Int = {
    val seed: Int = 131
    var hash: Int = 0
    for (i <- 0 to str.length - 1) {
      hash = hash * seed + str.charAt(i)
      hash = hash & 0x7FFFFFFF
      info("current hash code result is " + hash.toString)
    }
    return hash
  }


  override def getGroupNameByModule(moduleInstance: ServiceInstance) = {
    //val inputStr = moduleInstance.ip+moduleInstance.port.toString+moduleInstance.moduleName
    //val hash = getBKDRHash2(inputStr)
    val hash = moduleInstance.hashCode()
    val groupName = hash % maxGroupNum
    groupName.toString
  }

  override def getGroupNameByUser(user: String) = {
    //val hash = getBKDRHash2(user)
    val hash = user.hashCode
    val groupName = hash % maxGroupNum
    groupName.toString
  }

  override def getOrCreateGroup(groupName: String) = {
    RM_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
      if (groupMap.get(groupName).isDefined) {
        groupMap.get(groupName).get
      }
      else {
        val group = new ParallelGroup(groupName, getInitCapacity(groupName), getMaxCapacity(groupName))
        groupMap.put(groupName, group)
        group
      }
    }
  }

  override def getGroupNameByEvent(event: SchedulerEvent) = {
    event match {
      case metricRMEvent: MetricRMEvent => {
        "METRIC"
      }
      case _ => {
        val hash = event.hashCode
        val groupName = hash % maxGroupNum
        groupName.toString
      }
    }
  }
}

Source File: EventConsumerManager.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.resourcemanager.schedule

import java.util.concurrent.ExecutorService

import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils}
import com.webank.wedatasphere.linkis.resourcemanager.event.RMEvent
import com.webank.wedatasphere.linkis.scheduler.SchedulerContext
import com.webank.wedatasphere.linkis.scheduler.listener.ConsumerListener
import com.webank.wedatasphere.linkis.scheduler.queue.{ConsumerManager, LoopArrayQueue}

import scala.collection.mutable



  override def getOrCreateConsumer(groupName: String) = {
    RM_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
      var tmpConsumer = consumerGroupMap.get(groupName).getOrElse(null)
      if (tmpConsumer == null) {
        tmpConsumer = createConsumer(groupName)
      }
      tmpConsumer
    }
  }

  override protected def createConsumer(groupName: String) = {
    val group = schedulerContext.getOrCreateGroupFactory.getOrCreateGroup(groupName)
    val consumer = new RMEventConsumer(schedulerContext, getOrCreateExecutorService, group)
    consumer.start()
    val listener = new RMConsumerListenerImpl
    listener.setConsumer(consumer)
    consumer.setConsumeQueue(new LoopArrayQueue(group))
    consumer.setRmConsumerListener(listener)
    consumerGroupMap.put(groupName, consumer)
    consumerListenerMap.put(groupName, listener)
    if (consumerListener != null) consumerListener.onConsumerCreated(consumer)

    consumer
  }

  protected def createConsumerFromConsumer(oldConsumer: RMEventConsumer) = {
    var newConsumer: RMEventConsumer = null
    if (oldConsumer != null) {
      info("Create new consumer from old consumer " + oldConsumer.getGroup.getGroupName)
      val groupName = oldConsumer.getGroup.getGroupName
      val group = schedulerContext.getOrCreateGroupFactory.getOrCreateGroup(groupName)
      newConsumer = new RMEventConsumer(schedulerContext, getOrCreateExecutorService, group)
      newConsumer.start()
      val listener = new RMConsumerListenerImpl
      listener.setConsumer(newConsumer)
      newConsumer.setConsumeQueue(oldConsumer.getConsumeQueue)
      newConsumer.setRmConsumerListener(listener)
      consumerListenerMap.update(groupName, listener)
      if (consumerListener != null) consumerListener.onConsumerCreated(newConsumer)

    }

    newConsumer
  }

  override def destroyConsumer(groupName: String) = {
    val tmpConsumer = consumerGroupMap.get(groupName).getOrElse(null)
    if (tmpConsumer != null) {
      tmpConsumer.shutdown()
      consumerGroupMap.remove(groupName)
      if (consumerListener != null) consumerListener.onConsumerDestroyed(tmpConsumer)
    }
  }

  override def shutdown() = {
    Utils.tryThrow({
      consumerGroupMap.values.toArray.foreach(x => x.shutdown())
      executorService.shutdown()
    })(t => new Exception("ConsumerManager shutdown exception", t))
  }

  override def listConsumers() = consumerGroupMap.values.toArray

  override def getOrCreateExecutorService: ExecutorService = {
    RM_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
      if (executorService == null) {
        executorService = Utils.newCachedThreadPool(3 * maxParallelismUsers + 1, "Engine-Scheduler-ThreadPool-", true)
        executorService
      } else {
        executorService
      }
    }
  }
}

Source File: DefaultUserMetaData.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.resourcemanager.service.metadata

import java.util

import com.webank.wedatasphere.linkis.common.utils.Logging
import com.webank.wedatasphere.linkis.protocol.config.RequestQueryAppConfigWithGlobal
import com.webank.wedatasphere.linkis.protocol.utils.ProtocolUtils
import com.webank.wedatasphere.linkis.resourcemanager.ResourceRequestPolicy._
import com.webank.wedatasphere.linkis.resourcemanager._
import com.webank.wedatasphere.linkis.resourcemanager.exception.RMWarnException
import com.webank.wedatasphere.linkis.resourcemanager.utils.RMConfiguration._
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.stereotype.Component

import scala.collection.mutable


@Component
class DefaultUserMetaData extends UserMetaData with Logging {

  @Autowired
  var moduleResourceRecordService: ModuleResourceRecordService = _

  override def getUserAvailableResource(moduleName: String, user: String, creator: String): (UserAvailableResource, UserAvailableResource) = {
    val policy = moduleResourceRecordService.getModulePolicy(moduleName)
    val appName = ProtocolUtils.getAppName(moduleName).getOrElse(moduleName)
    val userModuleAvailableResource = UserAvailableResource(moduleName, generateResource(policy, UserConfiguration.getCacheMap(RequestQueryAppConfigWithGlobal(user, null, appName, true))))
    val userCreatorAvailableResource = UserAvailableResource(moduleName, generateResource(policy, UserConfiguration.getCacheMap(RequestQueryAppConfigWithGlobal(user, creator, appName, true))))
    info(s"$user available resource of module:$userModuleAvailableResource,on creator available resource:$userCreatorAvailableResource")
    (userModuleAvailableResource, userCreatorAvailableResource)
  }

  override def getUserGlobalInstanceLimit(user: String): Int = {
    val userConfiguration = UserConfiguration.getCacheMap(RequestQueryAppConfigWithGlobal(user, null, null, true))
    USER_AVAILABLE_INSTANCE.getValue(userConfiguration)
  }

  def generateResource(policy: ResourceRequestPolicy, userConfiguration: util.Map[String, String]): Resource = policy match {
    case CPU => new CPUResource(USER_AVAILABLE_CPU.getValue(userConfiguration))
    case Memory => new MemoryResource(USER_AVAILABLE_MEMORY.getValue(userConfiguration).toLong)
    case Load => new LoadResource(USER_AVAILABLE_MEMORY.getValue(userConfiguration).toLong, USER_AVAILABLE_CPU.getValue(userConfiguration))
    case Instance => new InstanceResource(USER_AVAILABLE_INSTANCE.getValue(userConfiguration))
    case LoadInstance => new LoadInstanceResource(USER_AVAILABLE_MEMORY.getValue(userConfiguration).toLong, USER_AVAILABLE_CPU.getValue(userConfiguration), USER_AVAILABLE_INSTANCE.getValue(userConfiguration))
    case Yarn => new YarnResource(USER_AVAILABLE_YARN_INSTANCE_MEMORY.getValue(userConfiguration).toLong,
      USER_AVAILABLE_YARN_INSTANCE_CPU.getValue(userConfiguration),
      USER_AVAILABLE_YARN_INSTANCE.getValue(userConfiguration), USER_AVAILABLE_YARN_QUEUE_NAME.getValue(userConfiguration))
    case DriverAndYarn => new DriverAndYarnResource(new LoadInstanceResource(USER_AVAILABLE_MEMORY.getValue(userConfiguration).toLong, USER_AVAILABLE_CPU.getValue(userConfiguration), USER_AVAILABLE_INSTANCE.getValue(userConfiguration)),
      new YarnResource(USER_AVAILABLE_YARN_INSTANCE_MEMORY.getValue(userConfiguration).toLong,
        USER_AVAILABLE_YARN_INSTANCE_CPU.getValue(userConfiguration),
        USER_AVAILABLE_YARN_INSTANCE.getValue(userConfiguration), USER_AVAILABLE_YARN_QUEUE_NAME.getValue(userConfiguration)))
    case Special => new SpecialResource(new java.util.HashMap[String, AnyVal]())
    case _ => throw new RMWarnException(111003, "not supported resource result policy ")
  }

  override def getUserModuleInfo(moduleName: String, user: String): Map[String, Any] = {
    val appName = ProtocolUtils.getAppName(moduleName).getOrElse(moduleName)
    val userConfiguration = UserConfiguration.getCacheMap(RequestQueryAppConfigWithGlobal(user, null, appName, true))
    val userModuleInfo = new mutable.HashMap[String, Any]()
    userModuleInfo.put("waitUsed", USER_MODULE_WAIT_USED.getValue(userConfiguration))
    userModuleInfo.put("waitReleased", USER_MODULE_WAIT_RELEASE.getValue(userConfiguration))
    userModuleInfo.toMap
  }
}

Source File: ZookeeperDistributedQueue.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.resourcemanager.notify

import java.util.Collections

import com.webank.wedatasphere.linkis.common.utils.Logging
import org.apache.zookeeper.ZooDefs.Ids
import org.apache.zookeeper.{CreateMode, KeeperException, ZKUtil, ZooKeeper}

import scala.collection.JavaConversions._
import scala.collection.mutable


class ZookeeperDistributedQueue(zk: ZooKeeper, var queueName: String) extends DistributedQueue[Array[Byte]] with Logging {

  if (!queueName.startsWith("/")) queueName = "/" + queueName

  try
      if (zk.exists(queueName, false) == null) zk.create(queueName, new Array[Byte](0), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT)
  catch {
    case e: KeeperException => error(s"Failed to create queue[$queueName]: ", e)
  }

  override def offer(value: Array[Byte]): Unit = {
    zk.create(queueName + "/element", value, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT_SEQUENTIAL)
  }

  override def poll(): Array[Byte] = {
    val path = head()
    if (path == null) return null
    val value = zk.getData(path, false, null)
    zk.delete(path, -1)
    value
  }

  override def peek(): Array[Byte] = {
    val path = head()
    if (path == null) return null
    zk.getData(path, false, null)
  }

  override def destroy(): Unit = {
    try
        if (zk.exists(queueName, false) == null) info(s"Queue[$queueName] already destroyed.") else ZKUtil.deleteRecursive(zk, queueName)
    catch {
      case e: KeeperException => error(s"Failed to destroy queue[$queueName]: ", e)
    }
  }

  private def head(): String = {
    val elements = zk.getChildren(queueName, false)
    if (elements.size == 0) return null

    Collections.sort(elements)
    queueName + "/" + elements.get(0)
  }

  override def copyToArray(): Array[Array[Byte]] = {
    val elements = zk.getChildren(queueName, false)
    if (elements.size == 0) return new Array[Array[Byte]](0)
    elements.map({ e => zk.getData(queueName + "/" + e, false, null) }).toArray
  }

  def indexOf(bytes: Array[Byte]): String = {
    val elements = zk.getChildren(queueName, false)
    elements.find(e => bytes.equals(zk.getData(queueName + "/" + e, false, null))).getOrElse("")
  }

  def copyToMap(): mutable.Map[String, Array[Byte]] = {
    val resultMap = mutable.Map.empty[String, Array[Byte]]
    val elements = zk.getChildren(queueName, false)
    if (elements.size == 0) return resultMap
    elements.map(e => resultMap.put(e, zk.getData(queueName + "/" + e, false, null)))
    resultMap
  }

  def remove(index: String) = if (index.length != 0) zk.delete(queueName + "/" + index, -1)
}

object ZookeeperDistributedQueue {
  def apply(queueName: String): ZookeeperDistributedQueue = new ZookeeperDistributedQueue(ZookeeperUtils.getOrCreateZookeeper(), queueName)

  def apply(zk: ZooKeeper, queueName: String): ZookeeperDistributedQueue = new ZookeeperDistributedQueue(zk, queueName)
}

Source File: package.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis

import java.util

import javax.servlet.http.HttpServletRequest
import com.webank.wedatasphere.linkis.common.exception.{ErrorException, ExceptionManager, FatalException, WarnException}
import com.webank.wedatasphere.linkis.common.utils.Utils
import com.webank.wedatasphere.linkis.server.exception.{BDPServerErrorException, NonLoginException}
import com.webank.wedatasphere.linkis.server.security.SecurityFilter
import org.apache.commons.lang.StringUtils
import org.apache.commons.lang.exception.ExceptionUtils
import org.slf4j.Logger

import scala.collection.{JavaConversions, mutable}


package object server {

  val EXCEPTION_MSG = "errorMsg"
  type JMap[K, V] = java.util.HashMap[K, V]

  implicit def getUser(req: HttpServletRequest): String = SecurityFilter.getLoginUsername(req)

  def validateFailed(message: String): Message = Message(status = 2).setMessage(message)
  def validate[T](json: util.Map[String, T], keys: String*): Unit = {
    keys.foreach(k => if(!json.contains(k) || json.get(k) == null || StringUtils.isEmpty(json.get(k).toString))
      throw new BDPServerErrorException(11001, s"Verification failed, $k cannot be empty!(验证失败，$k 不能为空！)"))
  }
  def error(message: String): Message = Message.error(message)
  implicit def ok(msg: String): Message = Message.ok(msg)
  implicit def error(t: Throwable): Message = Message.error(t)
  implicit def error(e: (String, Throwable)): Message = Message.error(e)
  implicit def error(msg: String, t: Throwable): Message = Message.error(msg -> t)
  //  def tryCatch[T](tryOp: => T)(catchOp: Throwable => T): T = Utils.tryCatch(tryOp)(catchOp)
//  def tryCatch(tryOp: => Message)(catchOp: Throwable => Message): Message = Utils.tryCatch(tryOp){
//    case nonLogin: NonLoginException => Message.noLogin(msg = nonLogin.getMessage)
//    case t => catchOp(t)
//  }
  def catchMsg(tryOp: => Message)(msg: String)(implicit log: Logger): Message = Utils.tryCatch(tryOp){
    case fatal: FatalException =>
      log.error("Fatal Error, system exit...", fatal)
      System.exit(fatal.getErrCode)
      Message.error("Fatal Error, system exit...")
    case nonLogin: NonLoginException =>
      val message = Message.noLogin(nonLogin.getMessage)
      message.data(EXCEPTION_MSG, nonLogin.toMap)
      message
    case error: ErrorException =>
      val cause = error.getCause
      val errorMsg = cause match {
        case t: ErrorException => s"error code(错误码): ${t.getErrCode}, error message(错误信息): ${t.getDesc}."
        case _ => s"error code(错误码): ${error.getErrCode}, error message(错误信息): ${error.getDesc}."
      }
      log.error(errorMsg, error)
      val message = Message.error(errorMsg)
      message.data(EXCEPTION_MSG, error.toMap)
      message
    case warn: WarnException =>
      val warnMsg = s"Warning code(警告码): ${warn.getErrCode}, Warning message(警告信息): ${warn.getDesc}."
      log.warn(warnMsg, warn)
      val message = Message.warn(warnMsg)
      message.data(EXCEPTION_MSG, warn.toMap)
      message
    case t =>
      log.error(msg, t)
      val errorMsg = ExceptionUtils.getRootCauseMessage(t)
      val message = if(StringUtils.isNotEmpty(errorMsg) && "operation failed(操作失败)" != msg) error(msg + "！the reason(原因)：" + errorMsg)
      else if(StringUtils.isNotEmpty(errorMsg)) error(errorMsg) else error(msg)
      message.data(EXCEPTION_MSG, ExceptionManager.unknownException(message.getMessage))
  }
  def catchIt(tryOp: => Message)(implicit log: Logger): Message = catchMsg(tryOp)("operation failed(操作失败)s")
  implicit def toScalaBuffer[T](list: util.List[T]): mutable.Buffer[T] = JavaConversions.asScalaBuffer(list)
  implicit def toScalaMap[K, V](map: util.Map[K, V]): mutable.Map[K, V] = JavaConversions.mapAsScalaMap(map)
  implicit def toJavaList[T](list: mutable.Buffer[T]): util.List[T] = {
    val arrayList = new util.ArrayList[T]
    list.foreach(arrayList.add)
    arrayList
  }
  implicit def toJavaMap[K, V](map: mutable.Map[K, V]): JMap[K, V] = {
    val hashMap = new util.HashMap[K, V]()
    map.foreach(m => hashMap.put(m._1, m._2))
    hashMap
  }
  implicit def toJavaMap[K, V](map: Map[K, V]): JMap[K, V] = {
    val hashMap = new util.HashMap[K, V]()
    map.foreach(m => hashMap.put(m._1, m._2))
    hashMap
  }
  implicit def asString(mapWithKey: (util.Map[String, Object], String)): String = mapWithKey._1.get(mapWithKey._2).asInstanceOf[String]
  implicit def getString(mapWithKey: (util.Map[String, String], String)): String = mapWithKey._1.get(mapWithKey._2)
  implicit def asInt(map: util.Map[String, Object], key: String): Int = map.get(key).asInstanceOf[Int]
  implicit def asBoolean(mapWithKey: (util.Map[String, Object], String)): Boolean = mapWithKey._1.get(mapWithKey._2).asInstanceOf[Boolean]

}

Source File: ParallelConsumerManager.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.scheduler.queue.parallelqueue

import java.util.concurrent.ExecutorService

import com.webank.wedatasphere.linkis.common.utils.Utils
import com.webank.wedatasphere.linkis.scheduler.listener.ConsumerListener
import com.webank.wedatasphere.linkis.scheduler.queue._
import com.webank.wedatasphere.linkis.scheduler.queue.fifoqueue.FIFOUserConsumer

import scala.collection.mutable


class ParallelConsumerManager(maxParallelismUsers: Int)extends  ConsumerManager{

  private val UJES_CONTEXT_CONSTRUCTOR_LOCK = new Object()
  private var consumerListener: Option[ConsumerListener] = None

  private var executorService: ExecutorService = _

  private val consumerGroupMap = new mutable.HashMap[String, FIFOUserConsumer]()

  override def setConsumerListener(consumerListener: ConsumerListener) = {
    this.consumerListener = Some(consumerListener)
  }

  override def getOrCreateExecutorService = if(executorService != null) executorService
    else UJES_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
      if (executorService == null) {
        executorService = Utils.newCachedThreadPool(5 * maxParallelismUsers + 1, "Engine-Scheduler-ThreadPool-", true)
      }
      executorService
  }

  override def getOrCreateConsumer(groupName: String) = if(consumerGroupMap.contains(groupName)) consumerGroupMap(groupName)
    else UJES_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
      consumerGroupMap.getOrElse(groupName, {
        val newConsumer = createConsumer(groupName)
        val group = getSchedulerContext.getOrCreateGroupFactory.getOrCreateGroup(groupName)
        newConsumer.setGroup(group)
        newConsumer.setConsumeQueue(new LoopArrayQueue(group))
        consumerGroupMap.put(groupName, newConsumer)
        consumerListener.foreach(_.onConsumerCreated(newConsumer))
        newConsumer.start()
        newConsumer
      })
  }

  override protected def createConsumer(groupName: String) = {
    val group = getSchedulerContext.getOrCreateGroupFactory.getOrCreateGroup(groupName)
    new FIFOUserConsumer(getSchedulerContext, getOrCreateExecutorService, group)
  }

  override def destroyConsumer(groupName: String) =
    consumerGroupMap.get(groupName).foreach { tmpConsumer =>
      tmpConsumer.shutdown()
      consumerGroupMap.remove(groupName)
      consumerListener.foreach(_.onConsumerDestroyed(tmpConsumer))
    }

  override def shutdown() = {
    consumerGroupMap.iterator.foreach(x => x._2.shutdown())
  }

  override def listConsumers() = consumerGroupMap.values.toArray
}

Source File: ParallelGroupFactory.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.scheduler.queue.parallelqueue

import com.webank.wedatasphere.linkis.scheduler.queue.{Group, GroupFactory, Job, SchedulerEvent}

import scala.collection.mutable


class ParallelGroupFactory extends GroupFactory{
  private val groupMap = new mutable.HashMap[String, Group]()
  def getInitCapacity(groupName: String): Int= 100

  def getMaxCapacity(groupName: String): Int = 1000

  private val UJES_CONTEXT_CONSTRUCTOR_LOCK = new Object()

  override def getOrCreateGroup(groupName: String) = {
    UJES_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
      if (groupMap.get(groupName).isDefined) {
        groupMap.get(groupName).get
      }
      else {
        val group = new ParallelGroup(groupName, getInitCapacity(groupName), getMaxCapacity(groupName))
        groupMap.put(groupName, group)
        group
      }
    }
  }

  override def getGroupNameByEvent(event: SchedulerEvent) = {
    val belongList = groupMap.values.filter(x => x.belongTo(event)).map(x => x.getGroupName).toList
    if(belongList.size > 0){
      belongList(0)
    }else{
      "NULL"
    }
  }

}

Source File: FIFOGroupFactory.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.scheduler.queue.fifoqueue

import com.webank.wedatasphere.linkis.scheduler.queue.{Group, GroupFactory, Job, SchedulerEvent}

import scala.collection.mutable


class FIFOGroupFactory extends GroupFactory {
  private val groupMap = new mutable.HashMap[String, Group]()

  private val UJES_CONTEXT_CONSTRUCTOR_LOCK = new Object()

  //Obtained from the database(从数据库获取)
  def getInitCapacity(groupName: String): Int = 1000

  def getMaxCapacity(groupName: String): Int = 10000

  override def getOrCreateGroup(groupName: String) = {
    UJES_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
      if (groupMap.get(groupName).isDefined) {
        groupMap.get(groupName).get
      }
      else {
        val group = new FIFOGroup(groupName, getInitCapacity(groupName), getMaxCapacity(groupName))
        groupMap.put(groupName, group)
        group
      }
    }
  }

  override def getGroupNameByEvent(event: SchedulerEvent) = "FIFOGROUP"
}

Source File: DWCArgumentsParser.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.common.conf

import org.apache.commons.lang.StringUtils

import scala.collection.{JavaConversions, mutable}
import scala.collection.mutable.ArrayBuffer


object DWCArgumentsParser {
  protected val DWC_CONF = "--dwc-conf"
  protected val SPRING_CONF = "--spring-conf"
  private var dwcOptionMap = Map.empty[String, String]

  private[linkis] def setDWCOptionMap(dwcOptionMap: Map[String, String]) = this.dwcOptionMap = dwcOptionMap
  def getDWCOptionMap = dwcOptionMap

  def parse(args: Array[String]): DWCArgumentsParser = {
    val keyValueRegex = "([^=]+)=(.+)".r
    var i = 0
    val optionParser = new DWCArgumentsParser
    while(i < args.length) {
      args(i) match {
        case DWC_CONF | SPRING_CONF =>
          args(i + 1) match {
            case keyValueRegex(key, value) =>
              optionParser.setConf(args(i), key, value)
              i += 1
            case _ => throw new IllegalArgumentException("illegal commond line, format: --conf key=value.")
          }
        case _ => throw new IllegalArgumentException(s"illegal commond line, ${args(i)} cannot recognize.")
      }
      i += 1
    }
    optionParser.validate()
    optionParser
  }

  def formatToArray(optionParser: DWCArgumentsParser): Array[String] = {
    val options = ArrayBuffer[String]()
    def write(confMap: Map[String, String], optionType: String): Unit = confMap.foreach { case (key, value) =>
      if (StringUtils.isNotEmpty(key) && StringUtils.isNotEmpty(value)) {
        options += optionType
        options += (key + "=" + value)
      }
    }
    write(optionParser.getDWCConfMap, DWC_CONF)
    write(optionParser.getSpringConfMap, SPRING_CONF)
    options.toArray
  }
  def formatToArray(springOptionMap: Map[String, String], dwcOptionMap: Map[String, String]): Array[String] =
    formatToArray(new DWCArgumentsParser().setSpringConf(springOptionMap).setDWCConf(dwcOptionMap))

  def format(optionParser: DWCArgumentsParser): String = formatToArray(optionParser).mkString(" ")
  def format(springOptionMap: Map[String, String], dwcOptionMap: Map[String, String]): String =
    formatToArray(springOptionMap, dwcOptionMap).mkString(" ")

  def formatSpringOptions(springOptionMap: Map[String, String]): Array[String] = {
    val options = ArrayBuffer[String]()
    springOptionMap.foreach { case (key, value) =>
      if (StringUtils.isNotEmpty(key) && StringUtils.isNotEmpty(value)) {
        options += ("--" + key + "=" + value)
      }
    }
    options.toArray
  }
}
class DWCArgumentsParser {
  import DWCArgumentsParser._
  private val dwcOptionMap = new mutable.HashMap[String, String]()
  private val springOptionMap = new mutable.HashMap[String, String]()
  def getSpringConfMap = springOptionMap.toMap
  def getSpringConfs = JavaConversions.mapAsJavaMap(springOptionMap)
  def getDWCConfMap = dwcOptionMap.toMap
  def setConf(optionType: String, key: String, value: String) = {
    optionType match {
      case DWC_CONF =>
        dwcOptionMap += key -> value
      case SPRING_CONF =>
        springOptionMap += key -> value
    }
    this
  }
  def setSpringConf(optionMap: Map[String, String]): DWCArgumentsParser = {
    if(optionMap != null) this.springOptionMap ++= optionMap
    this
  }
  def setDWCConf(optionMap: Map[String, String]): DWCArgumentsParser = {
    if(optionMap != null) this.dwcOptionMap ++= optionMap
    this
  }
  def validate() = {}
}

Source File: AkkaIntroduction.scala From reactive-machine-learning-systems with MIT License

5 votes

package com.reactivemachinelearning

import akka.actor.SupervisorStrategy.Restart
import akka.actor._

import scala.collection.mutable
import scala.util.Random

object AkkaIntroduction extends App {

  val system = ActorSystem("voting")

  val connection = new DatabaseConnection("http://remotedatabase")
  val writerProps = Props(new VoteWriter(connection))
  val writerSuperProps = Props(new WriterSupervisor(writerProps))

  val votingSystem = system.actorOf(writerSuperProps)

  votingSystem ! Vote(1, 5, "nom nom")
  votingSystem ! Vote(2, 7, "Mikey")
  votingSystem ! Vote(3, 9, "nom nom")

  println(connection.votes)

}

case class Vote(timestamp: Long, voterId: Long, howler: String)

class VoteWriter(connection: DatabaseConnection) extends Actor {
  def receive = {
    case Vote(timestamp, voterId, howler) =>
      connection.insert(Map("timestamp" -> timestamp,
        "voterId" -> voterId,
        "howler" -> howler))
  }
}

class WriterSupervisor(writerProps: Props) extends Actor {
  override def supervisorStrategy = OneForOneStrategy() {
    case exception: Exception => Restart
  }

  val writer = context.actorOf(writerProps)

  def receive = {
    case message => writer forward message
  }
}

class DatabaseConnection(url: String) {
  var votes = new mutable.HashMap[String, Any]()

  def insert(updateMap: Map[String, Any]) = {
    if (Random.nextBoolean()) throw new Exception

    updateMap.foreach {
      case (key, value) => votes.update(key, value)
    }
  }
}

Source File: _10_MutableCollections.scala From LearningScala with Apache License 2.0

5 votes

package _020_collections


object _10_MutableCollections {
  def main(args: Array[String]): Unit = {
    println("===== List buffers =====")
    listBufferExample()
    println()

    println("===== Array buffers =====")
    println(arrayBufferExample())
    println()

    println("===== Mutable Sets =====")
    mutableSetExample()
    println()

    println("===== Mutable Maps =====")
    mutableMapExample()
  }

  private def mutableMapExample(): Unit = {
    import scala.collection.mutable
    val map = mutable.Map.empty[String, Int]
    println(map)
    map("hello") = 1
    map("there") = 2
    println(map)
    println(map("hello"))
    println("======")
    val nums = mutable.Map("i" -> 1, "ii" -> 2)
    println(nums)
    nums += ("vi" -> 6)
    println(nums)
    nums -= "ii"
    println(nums)
    nums ++= List("iii" -> 3, "v" -> 5)
    println(nums)
    nums --= List("i", "ii")
    println(nums)
    println("=====")
    println(s"nums.size: ${nums.size}")
    print("nums.contains(\"ii\"): ")
    println(nums.contains("ii"))
    print("nums(\"iii\"): ")
    println(nums("iii"))
    println(s"nums.keys ==> ${nums.keys}")
    println(s"nums.keySet ==> ${nums.keySet}")
    println(s"nums.values ==> ${nums.values}")
    println(s"nums.isEmpty: ${nums.isEmpty}")
  }

  def arrayBufferExample(): List[Int] = {
    import scala.collection.mutable.ArrayBuffer
    val ab = ArrayBuffer[Int](10, 20)
    ab += 30
    ab += 40
    ab.prepend(5)
    ab.toList //return immutable
  }

  private def listBufferExample(): Unit = {
    import scala.collection.mutable.ListBuffer
    val listBuffer = new ListBuffer[Int]
    listBuffer += 1
    listBuffer += 2
    println(listBuffer)
    3 +=: listBuffer
    println(listBuffer)
    val list = listBuffer.toList
    println(list)
  }

  private def mutableSetExample(): Unit = {
    import scala.collection.mutable
    val emptySet = mutable.Set.empty[Int]
    println(emptySet)
    val nums = mutable.Set(1, 2, 3)
    println(nums)
    nums += 5
    println(nums)
    nums -= 3
    println(nums)
    nums ++= List(5, 6)
    println(nums)
    nums --= List(1, 2)
    println(nums)
    println(nums & Set(1, 3, 5, 7)) // intersection of two sets
    nums.clear()
    println(nums)
  }
}

Source File: _07_CaughtUpInClosures.scala From LearningScala with Apache License 2.0

5 votes

package _970_scala_puzzlers

import scala.collection.mutable


object _07_CaughtUpInClosures {
  val accessors1: mutable.Buffer[() => Int] = mutable.Buffer.empty[() => Int]
  val accessors2: mutable.Buffer[() => Int] = mutable.Buffer.empty[() => Int]

  val data = Seq(100, 110, 120)
  var j = 0
  for (i <- data.indices) {
    accessors1 += (() => data(i))
    accessors2 += (() => data(j))
    j += 1
  }

  def main(args: Array[String]): Unit = {
    accessors1.foreach(a1 => println(a1()))
    //    accessors2.foreach(a2 => println(a2())) // throws java.lang.IndexOutOfBoundsException: 3
    println("\n===== Solution =====\n")
    Solution.accessors1.foreach(a1 => println(a1()))
    Solution.accessors2.foreach(a2 => println(a2()))
  }

  object Solution {
    val accessors1: mutable.Buffer[() => Int] = mutable.Buffer.empty[() => Int]
    val accessors2: mutable.Buffer[() => Int] = mutable.Buffer.empty[() => Int]

    val data = Seq(100, 110, 120)
    var j = 0
    for (i <- data.indices) {
      val currentJ = j
      accessors1 += (() => data(i))
      accessors2 += (() => data(currentJ))
      j += 1
    }
  }

}

Source File: CoreUnitTest.scala From SparkUnitTestingExamples with Apache License 2.0

5 votes

package com.cloudera.sa.spark.unittest.core

import org.apache.spark.{SparkConf, SparkContext}
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}

import scala.collection.mutable

class CoreUnitTest extends FunSuite with
BeforeAndAfterEach with BeforeAndAfterAll{

  @transient var sc: SparkContext = null

  override def beforeAll(): Unit = {

    val envMap = Map[String,String](("Xmx", "512m"))

    val sparkConfig = new SparkConf()
    sparkConfig.set("spark.broadcast.compress", "false")
    sparkConfig.set("spark.shuffle.compress", "false")
    sparkConfig.set("spark.shuffle.spill.compress", "false")
    sparkConfig.set("spark.io.compression.codec", "lzf")
    sc = new SparkContext("local[2]", "unit test", sparkConfig)
  }

  override def afterAll(): Unit = {
    sc.stop()
  }

  test("Test word count") {
    val quotesRDD = sc.parallelize(Seq("Courage is not simply one of the virtues, but the form of every virtue at the testing point",
      "We have a very active testing community which people don't often think about when you have open source",
      "Program testing can be used to show the presence of bugs, but never to show their absence",
      "Simple systems are not feasible because they require infinite testing",
      "Testing leads to failure, and failure leads to understanding"))

    val wordCountRDD = quotesRDD.flatMap(r => r.split(' ')).
      map(r => (r.toLowerCase, 1)).
      reduceByKey((a,b) => a + b)

    val wordMap = new mutable.HashMap[String, Int]()
    wordCountRDD.take(100).
      foreach{case(word, count) => wordMap.put(word, count)}
    //Note this is better then foreach(r => wordMap.put(r._1, r._2)

    assert(wordMap.get("to").get == 4, "The word count for 'to' should had been 4 but it was " + wordMap.get("to").get)
    assert(wordMap.get("testing").get == 5, "The word count for 'testing' should had been 5 but it was " + wordMap.get("testing").get)
    assert(wordMap.get("is").get == 1, "The word count for 'is' should had been 1 but it was " + wordMap.get("is").get)
  }
}

Source File: package.scala From magnolify with Apache License 2.0

5 votes

package magnolify

import scala.collection.{mutable, Factory}
import scala.util.hashing.MurmurHash3

package object shims {
  trait Monadic[F[_]] extends mercator.Monadic[F] {
    def flatMapS[A, B](from: F[A])(fn: A => F[B]): F[B]
    def mapS[A, B](from: F[A])(fn: A => B): F[B]

    override def flatMap[A, B](from: F[A])(fn: A => F[B]): F[B] = flatMapS(from)(fn)
    override def map[A, B](from: F[A])(fn: A => B): F[B] = mapS(from)(fn)
  }

  trait FactoryCompat[-A, +C] extends Serializable {
    def newBuilder: mutable.Builder[A, C]
    def build(xs: IterableOnce[A]): C = newBuilder.addAll(xs).result()
  }

  object FactoryCompat {
    implicit def fromFactory[A, C](implicit f: Factory[A, C]): FactoryCompat[A, C] =
      new FactoryCompat[A, C] {
        override def newBuilder: mutable.Builder[A, C] = f.newBuilder
      }
  }

  object SerializableCanBuildFroms

  val JavaConverters = scala.jdk.CollectionConverters

  object MurmurHash3Compat {
    def seed(data: Int): Int = MurmurHash3.mix(MurmurHash3.productSeed, data)
  }
}

Source File: package.scala From magnolify with Apache License 2.0

5 votes

package magnolify

import scala.collection.generic.CanBuildFrom
import scala.collection.mutable
import scala.language.higherKinds
import scala.reflect.ClassTag
import scala.util.hashing.MurmurHash3

package object shims {
  trait Monadic[F[_]] extends mercator.Monadic[F] {
    def flatMapS[A, B](from: F[A])(fn: A => F[B]): F[B]
    def mapS[A, B](from: F[A])(fn: A => B): F[B]

    override def flatMap[A, B](from: F[A])(fn: A => F[B]): F[B] = flatMapS(from)(fn)
    override def map[A, B](from: F[A])(fn: A => B): F[B] = mapS(from)(fn)
  }

  trait FactoryCompat[-A, +C] extends Serializable {
    def newBuilder: mutable.Builder[A, C]
    def build(xs: TraversableOnce[A]): C = (newBuilder ++= xs).result()
  }

  object FactoryCompat extends LowPriorityFactoryCompat1 {
    private type FC[A, C] = FactoryCompat[A, C]

    def apply[A, C](f: () => mutable.Builder[A, C]): FC[A, C] =
      new FactoryCompat[A, C] {
        override def newBuilder: mutable.Builder[A, C] = f()
      }

    implicit def arrayFC[A: ClassTag] = FactoryCompat(() => Array.newBuilder[A])
    // Deprecated in 2.13
    // implicit def traversableFC[A] = FactoryCompat(() => Traversable.newBuilder[A])
    // List <: Iterable
    // implicit def iterableFC[A] = FactoryCompat(() => Iterable.newBuilder[A])
    // List <: Seq
    // implicit def seqFC[A] = FactoryCompat(() => Seq.newBuilder[A])
    // Vector <: IndexedSeq
    // implicit def indexedSeqFC[A] = FactoryCompat(() => IndexedSeq.newBuilder[A])
  }

  trait LowPriorityFactoryCompat1 extends LowPriorityFactoryCompat2 {
    implicit def listFC[A] = FactoryCompat(() => List.newBuilder[A])
  }

  trait LowPriorityFactoryCompat2 {
    implicit def vectorFC[A] = FactoryCompat(() => Vector.newBuilder[A])
    // Deprecated in 2.13
    // implicit def streamFC[A] = FactoryCompat(() => Stream.newBuilder[A])
  }

  object SerializableCanBuildFroms {
    private def cbf[A, C](f: () => mutable.Builder[A, C]): CanBuildFrom[C, A, C] =
      new CanBuildFrom[C, A, C] with Serializable {
        override def apply(from: C): mutable.Builder[A, C] = f()
        override def apply(): mutable.Builder[A, C] = f()
      }

    implicit def arrayCBF[A: ClassTag] = cbf(() => Array.newBuilder[A])
    implicit def traversableCBF[A] = cbf(() => Traversable.newBuilder[A])
    implicit def iterableCBF[A] = cbf(() => Iterable.newBuilder[A])
    implicit def seqCBF[A] = cbf(() => Seq.newBuilder[A])
    implicit def indexedSeqCBF[A] = cbf(() => IndexedSeq.newBuilder[A])
    implicit def listCBF[A] = cbf(() => List.newBuilder[A])
    implicit def vectorCBF[A] = cbf(() => Vector.newBuilder[A])
    implicit def streamCBF[A] = cbf(() => Stream.newBuilder[A])
  }

  val JavaConverters = scala.collection.JavaConverters

  object MurmurHash3Compat {
    def seed(data: Int): Int = MurmurHash3.productSeed
  }
}

Source File: LogsDefinition.scala From algoliasearch-client-scala with MIT License

5 votes

package algolia.definitions

import algolia.http.{GET, HttpPayload}
import algolia.objects.RequestOptions
import algolia.responses.LogType

import scala.collection.mutable

case class LogsDefinition(
    offset: Option[Int] = None,
    length: Option[Int] = None,
    `type`: Option[LogType] = None,
    requestOptions: Option[RequestOptions] = None
) extends Definition {

  type T = LogsDefinition

  def offset(o: Int): LogsDefinition = copy(offset = Some(o))

  def length(l: Int): LogsDefinition = copy(length = Some(l))

  def `type`(t: LogType): LogsDefinition = copy(`type` = Some(t))

  override def options(requestOptions: RequestOptions): LogsDefinition =
    copy(requestOptions = Some(requestOptions))

  override private[algolia] def build(): HttpPayload = {
    val queryParameters = mutable.Map[String, String]()
    offset.map { o =>
      queryParameters.put("offset", o.toString)
    }
    length.map { l =>
      queryParameters.put("length", l.toString)
    }
    `type`.map { t =>
      queryParameters.put("type", t.name)
    }

    HttpPayload(
      GET,
      Seq("1", "logs"),
      queryParameters = Some(queryParameters.toMap),
      isSearch = false,
      requestOptions = requestOptions
    )
  }
}

Source File: IdentList.scala From boopickle with Apache License 2.0

5 votes

package boopickle

import scala.collection.mutable


private[boopickle] final class IdentListBig(first: IdentList.Entry, size: Int) extends IdentList {
  // transform the linked list into an array buffer
  val b = mutable.ArrayBuffer.newBuilder[AnyRef]
  b.sizeHint(size)
  var e = first
  while (e != null) {
    b += e.obj
    e = e.next
  }
  val entries = b.result()

  override def apply(idx: Int): AnyRef = {
    entries(idx)
  }

  override def updated(obj: AnyRef): IdentList = {
    entries += obj
    this
  }
}

Source File: TestAppender.scala From stryker4s with Apache License 2.0

5 votes

package stryker4s.testutil

import org.apache.logging.log4j.core._
import org.apache.logging.log4j.core.appender.AbstractAppender
import org.apache.logging.log4j.core.config.Property
import org.apache.logging.log4j.core.config.plugins._

import scala.collection.mutable
import scala.collection.mutable.ListBuffer

object TestAppender {
  val events: mutable.Map[String, ListBuffer[LogEvent]] =
    new mutable.HashMap[String, ListBuffer[LogEvent]]().withDefaultValue(ListBuffer.empty)

  
  def reset(implicit threadName: String): Unit = events(threadName).clear()

  @PluginFactory def createAppender(
      @PluginAttribute("name") name: String,
      @PluginElement("Filter") filter: Filter
  ): TestAppender =
    new TestAppender(name, filter)
}

@Plugin(name = "TestAppender", category = Core.CATEGORY_NAME, elementType = Appender.ELEMENT_TYPE)
class TestAppender(name: String, filter: Filter)
    extends AbstractAppender(name, filter, null, true, Property.EMPTY_ARRAY) {
  override def append(eventObject: LogEvent): Unit = {
    // Needs to call .toImmutable because the same object is given every time, with only a mutated message
    val _ = TestAppender.events(eventObject.getThreadName) += eventObject.toImmutable
  }
}

Source File: DecoupledHandler.scala From chisel-gui with BSD 3-Clause "New" or "Revised" License

5 votes

// See README.md for license details.

package visualizer.models

import scala.collection.mutable


object DecoupledHandler {
  val ReadyName = "_ready"
  val ValidName = "_valid"
  val BitsName = "_bits_"

  val decoupledNames = new mutable.HashSet[String]

  case class Updater(pattern: String, add: (DecoupledHandler, String) => Unit, isMatch: String => Int)

  def hasPattern(pattern: String)(s: String): Int = s.indexOf(pattern)

  def hasPatternAtEnd(pattern: String)(s: String): Int = if (s.endsWith(pattern)) s.indexOf(pattern) else -1

  val updaters = Seq(
    Updater(ReadyName, (d, s) => d.readyNameOpt = Some(s), hasPatternAtEnd(ReadyName)),
    Updater(ValidName, (d, s) => d.validNameOpt = Some(s), hasPatternAtEnd(ValidName)),
    Updater(BitsName, (d, s) => d.bits += s, hasPattern(BitsName))
  )

  var _indexId: Long = -1L

  def assignIndex(): Long = {
    _indexId += 1L
    _indexId
  }

  val signalNameToDecouple: mutable.HashMap[String, DecoupledHandler] = new mutable.HashMap()

  def prefix(s: String, index: Int): String = {
    s.take(index)
  }

  def lookForReadyValidBundles(names: Seq[String]): Unit = {

    names.sorted.foreach { symbolName =>
      for (updater <- updaters) {
        val index = updater.isMatch(symbolName)
        if (index > 0) {
          val prefix = symbolName.take(index)

          val decoupledHandler = signalNameToDecouple.getOrElseUpdate(prefix, apply(prefix))
          updater.add(decoupledHandler, symbolName)
          decoupledNames += symbolName
        }
      }
    }

    //    signalNameToDecouple.retain { case (key, d) => d.readyNameOpt.isDefined && d.validNameOpt.isDefined }
    signalNameToDecouple.retain { case (key, d) => d.validNameOpt.isDefined }
  }

  def apply(prefix: String): DecoupledHandler = {
    DecoupledHandler(assignIndex(), prefix)
  }
}

case class DecoupledHandler(indexId: Long, prefix: String) {
  var readyNameOpt: Option[String] = None
  var validNameOpt: Option[String] = None
  val bits:         mutable.ArrayBuffer[String] = new mutable.ArrayBuffer()

  def getChildNames: Seq[String] = {
    bits ++ readyNameOpt ++ validNameOpt
  }
}

Source File: EnumManager.scala From chisel-gui with BSD 3-Clause "New" or "Revised" License

5 votes

package visualizer.models

import chisel3.experimental.EnumAnnotations.{EnumComponentAnnotation, EnumDefAnnotation}
import firrtl.AnnotationSeq
import firrtl.annotations.{CircuitName, ComponentName, ModuleName, Named}
import firrtl.options.InputAnnotationFileAnnotation
import firrtl.options.phases.GetIncludes
import treadle.TreadleTester

import scala.collection.mutable


object EnumManager {
  val typeNames:              mutable.HashSet[String] = new mutable.HashSet()
  val targetToTypeName:       mutable.HashMap[Named, String] = new mutable.HashMap()
  val definitions:            mutable.HashMap[String, mutable.HashMap[BigInt, String]] = new mutable.HashMap()
  val signalNameToDefinition: mutable.HashMap[String, mutable.HashMap[BigInt, String]] = new mutable.HashMap()

  def init(annotationSeq: AnnotationSeq,dataModel: DataModel, tester: TreadleTester): Unit = {
    val myAnnos = (new GetIncludes).transform(annotationSeq.filter(_.isInstanceOf[InputAnnotationFileAnnotation]))
    myAnnos.foreach {

      case EnumDefAnnotation(typeName, definition) =>
        val map = definitions.getOrElseUpdate(typeName, new mutable.HashMap())
        map ++= definition.map { case (name, value) => value -> name }

      case EnumComponentAnnotation(target, typeName) =>
        typeNames += typeName
        targetToTypeName(target) = typeName

      case _ =>
        // irrelevant annotation
    }

    val engine = tester.engine
    val symbolTable = engine.symbolTable

    targetToTypeName.keys.foreach {
      case tt @ ComponentName(componentName, ModuleName(annoModuleName, _)) =>
        symbolTable.instanceNameToModuleName.foreach {
          case (instanceName, moduleName) =>
            if (annoModuleName == moduleName) {

              // this little bit of trickery is because for treadle top level signals don't carry a module name
              val enumWireName = if (instanceName.isEmpty) {
                componentName
              } else {
                instanceName + "." + componentName
              }

              dataModel.nameToSignal.get(enumWireName) match {
                case Some(_) =>
                  signalNameToDefinition(enumWireName) = definitions(targetToTypeName(tt))
                case _ =>
              }
            }
        }
    }
  }

  def hasEnumDefinition(signalName: String): Boolean = {
    signalNameToDefinition.contains(signalName)
  }

  def getDefinition(signalName: String): Option[mutable.HashMap[BigInt, String]] = {
    signalNameToDefinition.get(signalName)
  }
}

Source File: DecoupledFireRestrictor.scala From chisel-gui with BSD 3-Clause "New" or "Revised" License

5 votes

// See README.md for license details.

package visualizer.models

import org.scalatest.{FreeSpec, Matchers}

import scala.collection.mutable

object DecoupledFireRestrictor {

  case class Interval(start: Long, end: Long, value: BigInt)

  def buildTimeVector(buffer: mutable.ArrayBuffer[Transition]): List[Interval] = {
    val b = buffer.toList.sliding(2)
    val newList = b.flatMap {
      case transition1 :: transition2 :: Nil =>
        List(
          Interval(transition1.timestamp, transition2.timestamp, transition1.value),
          Interval(transition1.timestamp, transition2.timestamp, transition1.value)
        )
      case transition :: Nil =>
        List.empty
    }
    newList
    }.toList
}

class DecoupledFireRestrictorTest extends FreeSpec with Matchers {
  "select from an array based on values" in {}
}

Source File: NGrams.scala From featran with Apache License 2.0

5 votes

package com.spotify.featran.transformers

import com.spotify.featran.FeatureBuilder

import scala.collection.{mutable, SortedMap}


  def fromSettings(
    setting: Settings
  ): Transformer[Seq[String], Set[String], SortedMap[String, Int]] =
    NGrams(setting.name)
}

private[featran] class NGrams(name: String, val low: Int, val high: Int, val sep: String)
    extends NHotEncoder(name, false) {
  override def prepare(a: Seq[String]): Set[String] = ngrams(a).toSet

  override def buildFeatures(
    a: Option[Seq[String]],
    c: SortedMap[String, Int],
    fb: FeatureBuilder[_]
  ): Unit =
    super.buildFeatures(a.map(ngrams), c, fb)

  private[transformers] def ngrams(a: Seq[String]): Seq[String] = {
    val max = if (high == -1) a.length else high
    val b = Seq.newBuilder[String]
    var i = low
    while (i <= max) {
      if (i == 1) {
        b ++= a
      } else if (i <= a.size) {
        val q = mutable.Queue[String]()
        var j = 0
        val it = a.iterator
        while (j < i) {
          q.enqueue(it.next())
          j += 1
        }
        b += mkNGram(q, sep)
        while (it.hasNext) {
          q.dequeue()
          q.enqueue(it.next())
          b += mkNGram(q, sep)
        }
      }
      i += 1
    }
    b.result()
  }

  private def mkNGram(xs: mutable.Queue[String], sep: String): String = {
    val sb = StringBuilder.newBuilder
    val i = xs.iterator
    sb.append(i.next())
    while (i.hasNext) {
      sb.append(sep).append(i.next())
    }
    sb.mkString
  }
}

Source File: MDLPDiscretizer.scala From featran with Apache License 2.0

5 votes

package com.spotify.featran.transformers.mdl

import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.reflect.ClassTag

private[transformers] class MDLPDiscretizer[T: ClassTag](
  data: Seq[(T, Double)],
  stoppingCriterion: Double = MDLPDiscretizer.DefaultStoppingCriterion,
  minBinPercentage: Double = MDLPDiscretizer.DefaultMinBinPercentage
) extends Serializable {
  private val labels = {
    val m = mutable.Map.empty[T, Int]
    data.foreach {
      case (k, _) =>
        if (!m.contains(k)) {
          m(k) = m.size
        }
    }
    m
  }

  private def isBoundary(f1: Array[Long], f2: Array[Long]): Boolean = {
    val l = math.min(f1.length, f2.length)
    var count = 0
    var i = 0
    while (i < l && count <= 1) {
      if (f1(i) + f2(i) != 0) {
        count += 1
      }
      i += 1
    }
    count > 1
  }

  private def midpoint(x1: Float, x2: Float): Float = (x1 + x2) / 2.0f

  def discretize(maxBins: Int = MDLPDiscretizer.DefaultMaxBins): Seq[Double] = {
    val featureValues = new java.util.TreeMap[Float, Array[Long]]()
    data.foreach {
      case (label, value) =>
        val key = value.toFloat
        val i = labels(label)
        val x = featureValues.get(key)
        if (x == null) {
          val y = Array.fill(labels.size)(0L)
          y(i) = 1L
          featureValues.put(key, y)
        } else {
          x(i) += 1L
        }
    }

    val cutPoint = if (!featureValues.isEmpty) {
      val it = featureValues.asScala.iterator
      var (lastX, lastFreqs) = it.next()
      var result = List.empty[(Float, Array[Long])]
      var accumFreqs = lastFreqs
      while (it.hasNext) {
        val (x, freqs) = it.next()
        if (isBoundary(freqs, lastFreqs)) {
          result = (midpoint(x, lastX), accumFreqs) :: result
          accumFreqs = Array.fill(labels.size)(0L)
        }
        lastX = x
        lastFreqs = freqs
        MDLUtil.plusI(accumFreqs, freqs)
      }
      (lastX, accumFreqs) :: result
    } else {
      Nil
    }

    val minBinWeight: Long = (minBinPercentage * data.length / 100.0).toLong
    val finder =
      new ThresholdFinder(labels.size, stoppingCriterion, maxBins, minBinWeight)
    finder.findThresholds(cutPoint.sortBy(_._1)).map(_.toDouble)
  }
}

private[transformers] object MDLPDiscretizer {
  val DefaultStoppingCriterion: Double = 0.0
  val DefaultMinBinPercentage: Double = 0.0
  val DefaultMaxBins: Int = 50
}

Source File: CanBuild.scala From featran with Apache License 2.0

5 votes

package com.spotify.featran

import scala.collection.mutable
import scala.reflect.ClassTag

// Workaround for CanBuildFrom not serializable
trait CanBuild[T, M[_]] extends Serializable {
  def apply(): mutable.Builder[T, M[T]]
}

object CanBuild {
  // Collection types in _root_.scala.*
  implicit def iterableCB[T]: CanBuild[T, Iterable] = new CanBuild[T, Iterable] {
    override def apply(): mutable.Builder[T, Iterable[T]] = Iterable.newBuilder
  }

  implicit def seqCB[T]: CanBuild[T, Seq] = new CanBuild[T, Seq] {
    override def apply(): mutable.Builder[T, Seq[T]] = Seq.newBuilder
  }

  implicit def indexedSeqCB[T]: CanBuild[T, IndexedSeq] = new CanBuild[T, IndexedSeq] {
    override def apply(): mutable.Builder[T, IndexedSeq[T]] = IndexedSeq.newBuilder
  }

  implicit def listCB[T]: CanBuild[T, List] = new CanBuild[T, List] {
    override def apply(): mutable.Builder[T, List[T]] = List.newBuilder
  }

  implicit def vectorCB[T]: CanBuild[T, Vector] = new CanBuild[T, Vector] {
    override def apply(): mutable.Builder[T, Vector[T]] = Vector.newBuilder
  }

  implicit def bufferCB[T]: CanBuild[T, mutable.Buffer] = new CanBuild[T, mutable.Buffer] {
    override def apply(): mutable.Builder[T, mutable.Buffer[T]] = mutable.Buffer.newBuilder
  }

  implicit def floatArrayCB: CanBuild[Float, Array] = new CanBuild[Float, Array] {
    override def apply(): mutable.Builder[Float, Array[Float]] = Array.newBuilder[Float]
  }

  implicit def doubleArrayCB: CanBuild[Double, Array] = new CanBuild[Double, Array] {
    override def apply(): mutable.Builder[Double, Array[Double]] = Array.newBuilder[Double]
  }

  implicit def arrayCB[T: ClassTag]: CanBuild[T, Array] = new CanBuild[T, Array] {
    override def apply(): mutable.Builder[T, Array[T]] = Array.newBuilder[T]
  }
}

Source File: CollectionType.scala From featran with Apache License 2.0

5 votes

package com.spotify.featran

import simulacrum._

import scala.collection.mutable
import scala.reflect.ClassTag


@typeclass trait CollectionType[M[_]] {
  def pure[A, B: ClassTag](ma: M[A])(a: B): M[B]

  def map[A, B: ClassTag](ma: M[A])(f: A => B): M[B]

  def reduce[A](ma: M[A])(f: (A, A) => A): M[A]

  def cross[A, B: ClassTag](ma: M[A])(mb: M[B]): M[(A, B)]
}

object CollectionType {
  implicit def scalaCollectionType[M[_]](implicit
    cb: CanBuild[_, M],
    ti: M[_] => Iterable[_]
  ): CollectionType[M] =
    new CollectionType[M] {
      override def map[A, B: ClassTag](ma: M[A])(f: A => B): M[B] = {
        val builder = cb().asInstanceOf[mutable.Builder[B, M[B]]]
        ma.asInstanceOf[Iterable[A]].foreach(a => builder += f(a))
        builder.result()
      }

      override def pure[A, B: ClassTag](ma: M[A])(b: B): M[B] = {
        val builder = cb().asInstanceOf[mutable.Builder[B, M[B]]]
        builder += b
        builder.result()
      }

      override def reduce[A](ma: M[A])(f: (A, A) => A): M[A] = {
        val builder = cb().asInstanceOf[mutable.Builder[A, M[A]]]
        if (ma.nonEmpty) {
          builder += ma.asInstanceOf[Iterable[A]].reduce(f)
        }
        builder.result()
      }

      override def cross[A, B: ClassTag](ma: M[A])(mb: M[B]): M[(A, B)] = {
        val builder = cb().asInstanceOf[mutable.Builder[(A, B), M[(A, B)]]]
        if (mb.nonEmpty) {
          val b = mb.asInstanceOf[Iterable[B]].head
          ma.asInstanceOf[Iterable[A]].foreach(a => builder += ((a, b)))
        }
        builder.result()
      }
    }

  implicit val arrayCollectionType: CollectionType[Array] = new CollectionType[Array] {
    override def pure[A, B: ClassTag](ma: Array[A])(b: B): Array[B] = Array(b)

    override def map[A, B: ClassTag](ma: Array[A])(f: A => B): Array[B] =
      ma.map(f)

    override def reduce[A](ma: Array[A])(f: (A, A) => A): Array[A] = {
      // workaround for "No ClassTag available for A"
      val r = ma.take(1)
      r(0) = ma.reduce(f)
      r
    }
    override def cross[A, B: ClassTag](ma: Array[A])(mb: Array[B]): Array[(A, B)] =
      ma.map((_, mb.head))
  }
}

Source File: RichArrayBuffer.scala From swave with Mozilla Public License 2.0

5 votes

package swave.core.util

import scala.annotation.tailrec
import scala.collection.mutable

final class RichArrayBuffer[A](val underlying: mutable.ArrayBuffer[A]) extends AnyVal {

  def inplaceSortBy[B](f: A ⇒ B)(implicit ord: Ordering[B]): Unit = {
    val buf   = underlying.asInstanceOf[mutable.ArrayBuffer[AnyRef]]
    val array = buf.toArray
    java.util.Arrays.sort(array, ord.on(f).asInstanceOf[Ordering[AnyRef]])
    buf.clear()
    buf ++= array
    ()
  }

  def removeWhere(f: A ⇒ Boolean): Unit = {
    @tailrec def rec(ix: Int): Unit =
      if (ix >= 0) {
        if (f(underlying(ix))) underlying.remove(ix)
        rec(ix - 1)
      }
    rec(underlying.size - 1)
  }

  def removeIfPresent(elem: A): Unit =
    underlying.indexOf(elem) match {
      case -1 ⇒
      case ix ⇒ { underlying.remove(ix); () }
    }
}

Source File: package.scala From swave with Mozilla Public License 2.0

5 votes

package swave.core

import java.nio.charset.Charset
import com.typesafe.config.Config
import scala.concurrent.duration._
import scala.concurrent.Future
import scala.collection.mutable
import shapeless.HList

package object util {

  private[this] val _identityFunc = (x: Any) ⇒ x
  def identityFunc[T]: T ⇒ T      = _identityFunc.asInstanceOf[T ⇒ T]

  def identityHash(obj: AnyRef): String = Integer.toHexString(System.identityHashCode(obj))

  val dropFunc: Any ⇒ Unit         = _ ⇒ ()
  val dropFunc2: (Any, Any) ⇒ Unit = (_, _) ⇒ ()

  val oneIntFunc: Any ⇒ Int = _ ⇒ 1

  val UTF8: Charset  = Charset.forName("UTF-8")
  val ASCII: Charset = Charset.forName("US-ASCII")

  def isPowerOf2(i: Int): Boolean = Integer.lowestOneBit(i) == i

  def roundUpToPowerOf2(i: Int): Int = 1 << (32 - Integer.numberOfLeadingZeros(i - 1))

  def Runnable(body: ⇒ Unit): Runnable = new Runnable { def run(): Unit = body }

  implicit def richByteArray(array: Array[Byte]): RichByteArray                    = new RichByteArray(array)
  implicit def richConfig[T](config: Config): RichConfig                           = new RichConfig(config)
  implicit def richDuration(duration: Duration): RichDuration                      = new RichDuration(duration)
  implicit def richFiniteDuration(duration: FiniteDuration): RichFiniteDuration    = new RichFiniteDuration(duration)
  implicit def richFuture[T](future: Future[T]): RichFuture[T]                     = new RichFuture(future)
  implicit def richHList[L <: HList](list: L): RichHList[L]                        = new RichHList(list)
  implicit def richInt(int: Int): RichInt                                          = new RichInt(int)
  implicit def richList[T](list: List[T]): RichList[T]                             = new RichList(list)
  implicit def richLong(long: Long): RichLong                                      = new RichLong(long)
  implicit def richArrayBuffer[T](seq: mutable.ArrayBuffer[T]): RichArrayBuffer[T] = new RichArrayBuffer(seq)
  implicit def richRefArray[T <: AnyRef](array: Array[T]): RichRefArray[T]         = new RichRefArray(array)
  implicit def richSeq[T](seq: Seq[T]): RichSeq[T]                                 = new RichSeq(seq)
  implicit def richString(string: String): RichString                              = new RichString(string)
  implicit def richTraversable[T](seq: Traversable[T]): RichTraversable[T]         = new RichTraversable(seq)
}

Source File: PrefixAndTailStage.scala From swave with Mozilla Public License 2.0

5 votes

package swave.core.impl.stages.inout

import scala.collection.mutable
import swave.core.impl.stages.spout.SubSpoutStage
import swave.core.impl.{Inport, Outport}
import swave.core.impl.stages.InOutStage
import swave.core.macros._
import swave.core._

// format: OFF
@StageImplementation
private[core] final class PrefixAndTailStage(prefixSize: Int, prefixBuilder: mutable.Builder[Any, AnyRef])
  extends InOutStage {

  requireArg(prefixSize > 0, "`prefixSize` must be > 0")

  def kind = Stage.Kind.InOut.PrefixAndTail(prefixSize)

  connectInOutAndSealWith { (in, out) ⇒
    region.impl.registerForXStart(this)
    running(in, out)
  }

  def running(in: Inport, out: Outport) = {

    def awaitingXStart() = state(
      xStart = () => {
        in.request(prefixSize.toLong)
        assemblingPrefix(prefixSize.toLong, false)
      })

    
  def draining(in: Inport, sub: Outport) = state(
    intercept = false,

    request = requestF(in),
    cancel = stopCancelF(in),
    onNext = onNextF(sub),
    onComplete = stopCompleteF(sub),
    onError = stopErrorF(sub))
}

Source File: Infrastructure.scala From swave with Mozilla Public License 2.0

5 votes

package swave.core.graph.impl

import scala.collection.mutable
import swave.core.graph.{Digraph, Glyph}
import swave.core.util._

private[graph] object Infrastructure {

  type Edge = (Node, Node)

  final class Node(val id: Int, val vertex: Any) {
    val preds = new mutable.ArrayBuffer[Node]
    val succs = new mutable.ArrayBuffer[Node]

    def isSingle = preds.isEmpty && succs.isEmpty
    def isRoot   = preds.isEmpty
    def isLeaf   = succs.isEmpty
    def isInOut  = preds.size == 1 && succs.size == 1
    def isFanIn  = preds.size > 1
    def isFanOut = succs.size > 1

    var isHidden     = false
    var desCount     = -1
    var inDegree     = -1
    var xRank: XRank = _
    val glyphs       = new mutable.ArrayBuffer[Glyph]
    var attributes   = List.empty[AnyRef]

    override def toString =
      s"Node(vertex=$vertex, id=$id, rankGroup=${if (xRank != null && xRank.group != null) xRank.group.groupId
      else "null"}, " +
        s"attrs=${attributes.mkString("[", ",", "]")}, " +
        s"preds=${preds.map(_.id).mkString("[", ",", "]")}, " +
        s"succs=${succs.map(_.id).mkString("[", ",", "]")}" + (if (isHidden) ", hidden)" else ")")

    def partialCopyWith(newVertex: Any): Node = {
      val n = new Node(id, newVertex)
      n.isHidden = isHidden
      n.desCount = desCount
      n.inDegree = inDegree
      n.xRank = xRank
      glyphs.foreach(g ⇒ n.glyphs += g)
      n.attributes = attributes
      n
    }
  }

  final class XRank(val id: Int) {
    var group: XRankGroup = _
    var level             = -1 // smaller values -> lay out to the left, higher values -> lay out to the right
    var preds             = List.empty[XRank]
    var succs             = List.empty[XRank]

    override def toString =
      s"XRank(id=$id, group=${group.groupId}, level=$level, " +
        s"preds=[${preds.map(_.id).mkString(",")}], succs=[${succs.map(_.id).mkString(",")}])"
  }

  final class XRankGroup(var groupId: Int) {
    override def equals(that: Any): Boolean =
      that.isInstanceOf[XRankGroup] && that.asInstanceOf[XRankGroup].groupId == groupId
    override def hashCode() = groupId
  }

  type EdgeAttrs = Map[Edge, Digraph.EdgeAttributes]

  implicit class RichEdgeAttrs(val underlying: EdgeAttrs) extends AnyVal {
    def get(edge: Edge): Digraph.EdgeAttributes                   = underlying.getOrElse(edge, 0)
    def has(edge: Edge, attrs: Digraph.EdgeAttributes): Boolean   = (get(edge) & attrs) != 0
    def add(edge: Edge, attrs: Digraph.EdgeAttributes): EdgeAttrs = underlying.updated(edge, get(edge) | attrs)
    def move(sourceEdge: Edge, targetEdges: List[Edge], filter: Int = Digraph.EdgeAttributes.All): EdgeAttrs =
      underlying.get(sourceEdge) match {
        case None ⇒ underlying
        case Some(flags) ⇒
          val filtered = flags & filter
          val map      = if (filtered != 0) targetEdges.foldLeft(underlying)(_ add (_, filtered)) else underlying
          map - sourceEdge
      }

    def printAll() = {
      for ((edge, flags) ← underlying) println(format(edge) + ": " + flags)
      println()
    }
  }

  val Root: AnyRefExtractor[Node, Seq[Node]] =
    AnyRefExtractor(n ⇒ if (n.isRoot) n.succs else null)

  val Leaf: AnyRefExtractor[Node, Seq[Node]] =
    AnyRefExtractor(n ⇒ if (n.isLeaf) n.preds else null)

  val InOut: AnyRefExtractor[Node, (Node, Node)] =
    AnyRefExtractor(n ⇒ if (n.isInOut) n.preds.head → n.succs.head else null)

  def format(edge: Edge) = s"[${edge._1.id} -> ${edge._2.id}]"
}

Source File: XRanking.scala From swave with Mozilla Public License 2.0

5 votes

package swave.core.graph.impl

import scala.annotation.tailrec
import scala.collection.mutable
import Infrastructure._

private[graph] object XRanking {

  def assignXRanks(rootNodes: Vector[Node], allNodes: Vector[Node]): Unit = {

    // STEP 1: partition the nodes into group which have identical XRanks
    // by assigning them the same XRank instance
    val ranks = new mutable.ArrayBuffer[XRank];
    {
      def createXRank(): XRank = {
        val rank = new XRank(ranks.size)
        ranks += rank
        rank
      }

      def _visit(node: Node, rank: XRank): Unit = visit(node, rank)
      @tailrec def visit(node: Node, rank: XRank): Unit =
        if (node.xRank eq null) {
          val nodeRank = if (node.isFanIn) createXRank() else rank
          node.xRank = nodeRank
          (node.succs: Seq[Node]) match {
            case Nil       ⇒ // leaf, just backtrack
            case Seq(next) ⇒ visit(next, nodeRank)
            case succs     ⇒ succs.foreach(_visit(_, createXRank()))
          }
        }

      for (root ← rootNodes) visit(root, createXRank())
    }

    // STEP 2: connect the created XRank instances with edges whereby
    // an edge from rank `a` to rank `b` means "a should be laid out to the left of b"
    // which results in the XRank instances being structured into a graph forest
    {
      val connectRanks: (Node, Node) ⇒ Node = { (a, b) ⇒
        a.xRank.succs ::= b.xRank
        b.xRank.preds ::= a.xRank
        b
      }
      for (node ← allNodes) {
        if (node.isFanIn) node.preds.reduceLeft(connectRanks)
        if (node.isFanOut) node.succs.reduceLeft(connectRanks)
      }
    }

    // STEP3: identify the connected parts of the rank forest (i.e. the rank graphs)
    // and mark all nodes of a connected part with the same (value equality) XRankGroup
    {
      val groupIds = Iterator.from(0)
      def assingGroup(rank: XRank, group: XRankGroup): Unit =
        if (rank.group eq null) {
          rank.group = group
          rank.preds.foreach(assingGroup(_, group))
        } else group.groupId = rank.group.groupId // merge the two groups

      ranks.withFilter(_.succs.isEmpty).foreach(assingGroup(_, new XRankGroup(groupIds.next())))
    }

    // STEP4: for each XRankGroup: apply a simple layering algorithm
    {
      val bitSet = new mutable.BitSet(ranks.size)

      for (groupRanks ← ranks.groupBy(_.group).valuesIterator) {

        def assignLevel(rank: XRank, level: Int): Unit =
          if (!bitSet.contains(rank.id)) {
            bitSet += rank.id
            if (level > rank.level) rank.level = level
            rank.succs.foreach(assignLevel(_, level + 1))
            bitSet -= rank.id
            ()
          } // else println("XRank crossing!")
        groupRanks.withFilter(_.preds.isEmpty).foreach(assignLevel(_, 0))

        def compactLevels(rank: XRank): Boolean =
          !bitSet.contains(rank.id) && {
            bitSet += rank.id
            val minSubRank = if (rank.succs.nonEmpty) rank.succs.minBy(_.level).level else 0
            val progress   = rank.level < minSubRank - 1 && { rank.level = minSubRank - 1; true }
            val result     = rank.succs.foldRight(progress)(compactLevels(_) || _)
            bitSet -= rank.id
            result
          }
        val leafs = groupRanks.filter(_.succs.isEmpty)
        while (leafs.foldRight(false)(compactLevels(_) || _)) ()
      }
    }
  }
}

Source File: ProxyCrawler.scala From ProxyCrawler with Apache License 2.0

5 votes

package org.crowdcrawler.proxycrawler

import java.io.IOException
import java.net.URI
import java.security.cert.X509Certificate

import com.typesafe.scalalogging.Logger
import org.apache.http.client.methods.HttpGet
import org.apache.http.impl.client.HttpClients
import org.apache.http.ssl.{TrustStrategy, SSLContexts}
import org.apache.http.conn.ssl.{NoopHostnameVerifier, SSLConnectionSocketFactory}
import org.apache.http.util.EntityUtils
import org.crowdcrawler.proxycrawler.crawler.plugins.AbstractPlugin

import org.apache.http.HttpHeaders
import org.slf4j.LoggerFactory

import scala.collection.immutable
import scala.collection.mutable


class ProxyCrawler(plugins: List[AbstractPlugin]) {
  *;q=0.8"),
    (HttpHeaders.ACCEPT_ENCODING, "gzip, deflate, sdch"),
    (HttpHeaders.ACCEPT_LANGUAGE, "en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4"),
    (HttpHeaders.CONNECTION, "keep-alive")
  )

  private val CLIENT = {
    // trust all certificates including self-signed certificates
    val sslContext = SSLContexts.custom().loadTrustMaterial(null, new TrustStrategy() {
      def isTrusted(chain: Array[X509Certificate], authType: String) = true
    }).build()
    val connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE)
    HttpClients.custom().setSSLSocketFactory(connectionFactory).build()
  }

  def apply(classNames: String*): ProxyCrawler = {
    val plugins = mutable.ListBuffer.empty[AbstractPlugin]
    for (className <- classNames) {
      val clazz = Class.forName("org.crowdcrawler.proxycrawler.crawler.plugins." + className)
      plugins += clazz.newInstance().asInstanceOf[AbstractPlugin]
    }
    new ProxyCrawler(plugins.toList)
  }

  private def createRequest(uri: URI, headers: immutable.Map[String, String]): HttpGet = {
    val request = new HttpGet(uri)
    for (header <- headers) {
      request.setHeader(header._1, header._2)
    }
    request
  }
}

Source File: IpcnOrgPlugin.scala From ProxyCrawler with Apache License 2.0

5 votes

package org.crowdcrawler.proxycrawler.crawler.plugins

import org.crowdcrawler.proxycrawler.ProxyInfo
import org.jsoup.Jsoup
import java.net.URI
import java.nio.charset.Charset

import scala.collection.mutable



final class IpcnOrgPlugin extends AbstractPlugin {

  val seeds: List[URI] = List(
    new URI("http://proxy.ipcn.org/proxylist.html"),
    new URI("http://proxy.ipcn.org/proxylist2.html")
  )


  def extract(html: String): List[ProxyInfo] = {
    val result = mutable.ListBuffer.empty[ProxyInfo]
    val doc = Jsoup.parse(html)
    val preText = doc.select("tr > td > pre").text
    val rows = preText.split("\n")
    for (row <- rows) {
      if (row.matches("[0-9]+(?:\\.[0-9]+){3}:[0-9]+")) {
        val splitted = row.split(":")
        val host = splitted(0)
        val port = splitted(1).toInt

        result += ProxyInfo(host, port, "HTTP", 0, null, null)
      }
    }
    result.toList
  }

  def next(html: String): List[URI] = List()

  override val responseCharset: Charset = Charset.forName("GB2312")
}

Source File: CoolProxyNetPlugin.scala From ProxyCrawler with Apache License 2.0

5 votes

package org.crowdcrawler.proxycrawler.crawler.plugins

import org.crowdcrawler.proxycrawler.ProxyInfo
import org.jsoup.Jsoup
import java.net.URI
import java.nio.charset.StandardCharsets
import sun.misc.BASE64Decoder
import scala.collection.mutable
import scala.collection.JavaConversions._
import util.control.Breaks._



class CoolProxyNetPlugin extends AbstractPlugin {
  private final val decoder: BASE64Decoder = new BASE64Decoder

  val seeds: List[URI] = List(new URI("http://www.cool-proxy.net/proxies/http_proxy_list/page:1"))

  private def decryptIP(ip: String): String = {
    val base64Encoded = new StringBuilder

    for (ch <- ip) {
      val newChar =
        if (Character.isAlphabetic(ch)) {
          if (ch.toLower < 'n') (ch + 13).toChar else (ch - 13).toChar
        } else {
          ch
        }
      base64Encoded += newChar
    }

    val bytes = decoder.decodeBuffer(base64Encoded.toString())
    new String(bytes, StandardCharsets.UTF_8)
  }

  def extract(html: String): List[ProxyInfo] = {
    val result = mutable.ListBuffer.empty[ProxyInfo]
    val doc = Jsoup.parse(html)
    val rows = doc.select("table > tbody > tr")
    for (row <- rows) {
      breakable {
        val tds = row.select("td")
        if (tds.isEmpty) break
        val host = {
          val hostTmp = tds.get(0).html
          val startWith = "Base64.decode(str_rot13(\""
          val start = hostTmp.indexOf(startWith)
          if (start == -1) break
          val end = hostTmp.indexOf("\")))", start)
          if (end == -1) break
          val hostEncrypted = hostTmp.substring(start + startWith.length, end)
          decryptIP(hostEncrypted)
        }
        val port = tds.get(1).text.toInt
        val location = tds.get(3).text
        val speed = tds.get(8).text.toInt
        result.add(ProxyInfo(host, port, "HTTP", speed, location, null))
      }
    }
    result.toList
  }

  def next(html: String): List[URI] = {
    val result = mutable.ListBuffer.empty[URI]
    val doc = Jsoup.parse(html)
    val rows = doc.select(".pagination > span > a[href]")
    for (row <- rows) {
      val href = row.attr("href")
      result += new URI("http://www.cool-proxy.net" + href)

    }
    result.toList
  }
}

Source File: CnProxyComPlugin.scala From ProxyCrawler with Apache License 2.0

5 votes

package org.crowdcrawler.proxycrawler.crawler.plugins

import org.crowdcrawler.proxycrawler.ProxyInfo
import org.jsoup.Jsoup
import java.net.URI
import java.nio.charset.Charset
import scala.collection.{immutable,mutable}
import util.control.Breaks._


  private val charNum = immutable.Map(
    "v" -> "3",
    "m" -> "4",
    "a" -> "2",
    "l" -> "9",
    "q" -> "0",
    "b" -> "5",
    "i" -> "7",
    "w" -> "6",
    "r" -> "8",
    "c" -> "1"
  )

  val seeds: List[URI] = {
    List(
      new URI("http://www.cnproxy.com/proxy1.html"),
      new URI("http://www.cnproxy.com/proxy2.html"),
      new URI("http://www.cnproxy.com/proxy3.html"),
      new URI("http://www.cnproxy.com/proxy4.html"),
      new URI("http://www.cnproxy.com/proxy5.html"),
      new URI("http://www.cnproxy.com/proxy6.html"),
      new URI("http://www.cnproxy.com/proxy7.html"),
      new URI("http://www.cnproxy.com/proxy8.html"),
      new URI("http://www.cnproxy.com/proxy9.html"),
      new URI("http://www.cnproxy.com/proxy10.html"),
      new URI("http://www.cnproxy.com/proxyedu1.html"),
      new URI("http://www.cnproxy.com/proxyedu2.html")
    )
  }

  private def decryptPort(encrypted: String): Int =
    encrypted.split("\\+").map(str => charNum(str)).mkString.toInt


  def extract(html: String): List[ProxyInfo] = {
    val result = mutable.ListBuffer.empty[ProxyInfo]

    val doc = Jsoup.parse(html)
    val rows = doc.select("#proxylisttb > table").get(2).select("tr")

    for (i <- 1 until rows.size()) {
      breakable {
        // skip the first row
        val row = rows.get(i)
        val tds = row.select("td")
        val host = tds.get(0).text
        val port = {
          val pattern = "document.write(\":\"+"
          val original = tds.get(0).html()
          val pos1 = original.indexOf(pattern)
          if (pos1 == -1) break
          val pos2 = original.indexOf(")</script>", pos1)
          if (pos2 == -1) break
          val portStr = original.substring(pos1 + pattern.length, pos2)

          decryptPort(portStr)
        }
        val schema = tds.get(1).text
        val speeds = tds.get(2).text
        val speed = {
          val splitted = speeds.split(",")
          var sum = 0
          for (str <- splitted) {
            val tmp = str.toInt
            sum += tmp
          }
          sum / splitted.length
        }
        val country = tds.get(3).text
        val proxyInfo = ProxyInfo(host, port, schema, speed, country, null)
        result += proxyInfo
      }
    }
    result.toList
  }

  def next(html: String): List[URI] = List()

  override val responseCharset: Charset = Charset.forName("GB2312")
}

Source File: ProxyListOrg.scala From ProxyCrawler with Apache License 2.0

5 votes

package org.crowdcrawler.proxycrawler.crawler.plugins

import java.net.URI

import org.crowdcrawler.proxycrawler.ProxyInfo
import org.jsoup.Jsoup

import scala.collection.mutable
import scala.collection.JavaConversions._



class ProxyListOrg extends AbstractPlugin {

  val seeds: List[URI] = List(new URI("https://proxy-list.org/english/index.php?p=1"))


  def extract(html: String): List[ProxyInfo] = {
    val result = mutable.ListBuffer.empty[ProxyInfo]
    val doc = Jsoup.parse(html)
    val rows = doc.select("div.table-wrap > div > ul")
    for (row <- rows) {
      val hostPort = row.select("li.proxy").text()
      val host = hostPort.split(":")(0)
      val port = hostPort.split(":")(1).toInt
      val schema = {
        val tmp = row.select("li.https").text()
        if (tmp == "-") "HTTP" else tmp.toUpperCase
      }
      val speed = {
        val tmp = row.select("li.speed").text()
        if (tmp.contains("kbit")) {
          (tmp.dropRight(4).toDouble * 1024).toInt
        } else {
          0
        }
      }
      val location = row.select("li.country-city > div > span.country").first().attr("title")
      result += ProxyInfo(host, port, schema, speed, location, null)
    }
    result.toList
  }


  def next(html: String): List[URI] = {
    val result = mutable.ListBuffer.empty[URI]
    val rootURL = "https://proxy-list.org/english"

    val doc = Jsoup.parse(html)
    val rows = doc.select("div.table-menu > a.item[href]")
    for (row <- rows) {
      val href = row.attr("href")
      result += new URI(rootURL + href.substring(1))
    }
    result.toList
  }
}

Source File: SocksProxyNet.scala From ProxyCrawler with Apache License 2.0

5 votes

package org.crowdcrawler.proxycrawler.crawler.plugins

import java.net.URI

import org.crowdcrawler.proxycrawler.ProxyInfo
import org.jsoup.Jsoup

import scala.collection.mutable
import scala.collection.JavaConversions._


class SocksProxyNet extends AbstractPlugin {
  val seeds: List[URI] = List(new URI("http://www.socks-proxy.net/"))

  def extract(html: String): List[ProxyInfo] = {
    val result = mutable.ListBuffer.empty[ProxyInfo]
    val doc = Jsoup.parse(html)
    val rows = doc.select("table#proxylisttable > tbody > tr")
    for (row <- rows) {
      val tds = row.select("td")
      val host = tds.get(0).text
      val port = tds.get(1).text.toInt
      val location = tds.get(3).text
      val schema= tds.get(4).text.toUpperCase

      result += ProxyInfo(host, port, schema, 0, location, null)
    }
    result.toList
  }

  def next(html: String): List[URI] = List()
}

Source File: MemoryRepository.scala From polynote with Apache License 2.0

5 votes

package polynote.testing.repository

import java.io.FileNotFoundException
import java.net.URI

import polynote.kernel.{BaseEnv, GlobalEnv, NotebookRef, TaskB}
import polynote.messages._
import polynote.server.repository.NotebookRepository
import polynote.testing.kernel.MockNotebookRef
import zio.{RIO, Task, UIO, ZIO}

import scala.collection.mutable

class MemoryRepository extends NotebookRepository {
  private val notebooks = new mutable.HashMap[String, Notebook]()

  def notebookExists(path: String): UIO[Boolean] = ZIO.effectTotal(notebooks contains path)

  def notebookURI(path: String): UIO[Option[URI]] = ZIO.effectTotal(if (notebooks contains path) Option(new URI(s"memory://$path")) else None)

  def loadNotebook(path: String): Task[Notebook] = ZIO.effectTotal(notebooks.get(path)).get.mapError(err => new FileNotFoundException(path))

  def openNotebook(path: String): RIO[BaseEnv with GlobalEnv, NotebookRef] = loadNotebook(path).flatMap(nb => MockNotebookRef(nb, tup => saveNotebook(tup._2)))

  def saveNotebook(nb: Notebook): UIO[Unit] = ZIO.effectTotal(notebooks.put(nb.path, nb))

  def listNotebooks(): UIO[List[String]] = ZIO.effectTotal(notebooks.keys.toList)

  def createNotebook(path: String, maybeUriOrContent: Option[String]): UIO[String] =
    ZIO.effectTotal(notebooks.put(path, Notebook(path, ShortList.of(), None))).as(path)

  def createAndOpen(path: String, notebook: Notebook, version: Int): RIO[BaseEnv with GlobalEnv, NotebookRef] =
    ZIO.effectTotal(notebooks.put(path, notebook)).flatMap {
      _ => MockNotebookRef(notebook, tup => saveNotebook(tup._2), version)
    }

  def initStorage(): TaskB[Unit] = ZIO.unit

  def renameNotebook(path: String, newPath: String): Task[String] = loadNotebook(path).map {
    notebook =>
      notebooks.put(newPath, notebook)
      notebooks.remove(path)
      newPath
  }

  def copyNotebook(path: String, newPath: String): TaskB[String] = loadNotebook(path).map {
    notebook =>
      notebooks.put(newPath, notebook)
      newPath
  }

  def deleteNotebook(path: String): TaskB[Unit] = ZIO.effectTotal(notebooks.get(path)).flatMap {
    case None    => ZIO.fail(new FileNotFoundException(path))
    case Some(_) => ZIO.effectTotal(notebooks.remove(path)).unit
  }
}

Source File: CardinalityProfiler.scala From Mastering-Spark-for-Data-Science with MIT License

5 votes

package io.gzet.profilers.field

import io.gzet.profilers.Utils
import org.apache.spark.sql.functions.col
import org.apache.spark.sql.{Dataset, Row}

import scala.collection.mutable
import scalaz.Scalaz._

case class CardinalityProfiler(topN: Int = 5) {

  def profile(df: Dataset[Array[String]]): Dataset[CardinalityReport] = {

    val total = df.sparkSession.sparkContext.broadcast(df.count())

    import df.sparkSession.implicits._

    val features = Utils.buildColumns(df)

    val topNValues = features.groupByKey({ field =>
      field
    }).count().map({ case (field, count) =>
      (field.idx, Map(field.value -> count))
    }).groupByKey({ case (column, map) =>
      column
    }).reduceGroups({ (v1, v2) =>
      val m1 = v1._2
      val m2 = v2._2
      val m = (m1 |+| m2).toSeq.sortBy(_._2).reverse
      (v1._1, m.take(math.min(m.size, topN)).toMap)
    }).map({ case (column, (_, map)) =>
      val top = map.keySet.toArray
      (column, top)
    })
      .withColumnRenamed("_1", "_topNValues_")
      .withColumnRenamed("_2", "description")

    val cardinalities = features.distinct().groupByKey(_.idx).count().map({
      case (column, distinctValues) =>
        val cardinality = distinctValues / total.value.toDouble
        (column, cardinality)
    })
      .withColumnRenamed("_1", "column")
      .withColumnRenamed("_2", "cardinality")

    cardinalities.join(topNValues, col("column") === col("_topNValues_"))
      .drop("_topNValues_")
      .map({ case Row(column: Int, cardinality: Double, description: mutable.WrappedArray[String]) =>
        CardinalityReport(
          column,
          cardinality,
          description.toArray
        )
      })

  }

}

case class CardinalityReport(
                              field: Int,
                              metricValue: Double,
                              description: Array[String]
                            )

Source File: StackBootstraping.scala From Mastering-Spark-for-Data-Science with MIT License

5 votes

package io.gzet.tagging.stackoverflow


import io.gzet.tagging.classifier.Classifier
import io.gzet.tagging.html.HtmlHandler
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{SparkSession, DataFrame, SQLContext}

import scala.collection.mutable
import scala.xml.{Elem, XML}

object StackBootstraping {

  def parse(spark: SparkSession, posts: RDD[String]): DataFrame = {

    import spark.sqlContext.implicits._
    posts filter { line =>
      line.contains("row Id")
    } map { line =>
      val xml = XML.loadString(line)
      (getBody(xml), getTags(xml))
    } filter { case (body, tags) =>
      body.isDefined && tags.isDefined
    } flatMap  { case (body, tags) =>
      tags.get.map(tag => (body.get, tag))
    } toDF("body", "tag")
  }

  private def getBody(xml: Elem): Option[String] = {
    val bodyAttr = xml.attribute("Body")
    if (bodyAttr.isDefined) {
      val html = bodyAttr.get.head.text
      val htmlHandler = new HtmlHandler()
      val content = htmlHandler.parseHtml(html)
      if (content.isDefined) {
        return content.get.body
      }
    }
    None: Option[String]
  }

  private def getTags(xml: Elem): Option[Array[String]] = {
    val tagsAttr = xml.attribute("Tags")
    if (tagsAttr.isDefined) {
      val tagsText = tagsAttr.get.head.text
      val tags = tagsText
        .replaceAll("<", "")
        .replaceAll(">", ",")
        .split(",")
      return Some(tags)
    }
    None: Option[Array[String]]
  }

  def bootstrapNaiveBayes(df: DataFrame, vectorSize: Option[Int]) = {
    val labeledText = df.rdd map { row =>
      val body = row.getString(0)
      val labels = row.getAs[mutable.WrappedArray[String]](1)
      (body, labels.toArray)
    }
    Classifier.train(labeledText)
  }

}

Source File: OneHotEncoderDemo2.scala From Scala-and-Spark-for-Big-Data-Analytics with MIT License

5 votes

package com.chapter11.SparkMachineLearning

import org.apache.spark.sql.SparkSession
import org.apache.spark.ml.feature.{ OneHotEncoder, StringIndexer }
import org.apache.spark.sql.types._
import org.apache.spark.sql._
import org.apache.spark.sql.functions.year
import org.apache.spark.ml.{ Pipeline, PipelineStage }
import org.apache.spark.ml.classification.{ LogisticRegression, LogisticRegressionModel }
import org.apache.spark.ml.feature.StringIndexer
import org.apache.spark.sql.{ DataFrame, SparkSession }
import scala.collection.mutable
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator

object OneHotEncoderDemo2 {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession
      .builder
      .master("local[*]")
      .config("spark.sql.warehouse.dir", "E:/Exp/")
      .appName(s"OneVsRestExample")
      .getOrCreate()

    val df = spark.createDataFrame(
      Seq((0, "Jason", "Germany"),
        (1, "David", "France"),
        (2, "Martin", "Spain"),
        (3, "Jason", "USA"),
        (4, "Daiel", "UK"),
        (5, "Moahmed", "Bangladesh"),
        (6, "David", "Ireland"),
        (7, "Jason", "Netherlands"))).toDF("id", "name", "address")

    df.show(false)

    val indexer = new StringIndexer()
      .setInputCol("name")
      .setOutputCol("categoryIndex")
      .fit(df)
    val indexed = indexer.transform(df)

    val encoder = new OneHotEncoder()
      .setInputCol("categoryIndex")
      .setOutputCol("categoryVec")

    val encoded = encoder.transform(indexed)
    encoded.show()
    
    spark.stop()
  }
}

Source File: StringIndexerDemo.scala From Scala-and-Spark-for-Big-Data-Analytics with MIT License

5 votes

package com.chapter11.SparkMachineLearning

import org.apache.spark.sql.SparkSession
import org.apache.spark.ml.feature.{ OneHotEncoder, StringIndexer }
import org.apache.spark.sql.types._
import org.apache.spark.sql._
import org.apache.spark.sql.functions.year
import org.apache.spark.ml.{ Pipeline, PipelineStage }
import org.apache.spark.ml.classification.{ LogisticRegression, LogisticRegressionModel }
import org.apache.spark.ml.feature.StringIndexer
import org.apache.spark.sql.{ DataFrame, SparkSession }
import scala.collection.mutable
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.sql._
import org.apache.spark.sql.SQLContext

object StringIndexerDemo {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession
      .builder
      .master("local[*]")
      .config("spark.sql.warehouse.dir", "E:/Exp/")
      .appName(s"OneVsRestExample")
      .getOrCreate()

    val df = spark.createDataFrame(
      Seq((0, "Jason", "Germany"),
        (1, "David", "France"),
        (2, "Martin", "Spain"),
        (3, "Jason", "USA"),
        (4, "Daiel", "UK"),
        (5, "Moahmed", "Bangladesh"),
        (6, "David", "Ireland"),
        (7, "Jason", "Netherlands"))).toDF("id", "name", "address")

    df.show(false)

    val indexer = new StringIndexer()
      .setInputCol("name")
      .setOutputCol("label")
      .fit(df)

    val indexed = indexer.transform(df)
    indexed.show(false)

    spark.stop()
  }
}

Source File: Load.scala From piglet with Apache License 2.0

5 votes

package dbis.piglet.op

import java.net.URI

import dbis.piglet.Piglet.Lineage
import dbis.piglet.expr.{Ref, Value}
import dbis.piglet.schema.Schema
import dbis.piglet.tools.{CliParams, HDFSService}

import scala.collection.mutable
import scala.util.{Failure, Success, Try}


  override def lineageString: String = linStr getOrElse {
    s"""LOAD%$file%${lastModified match {
      case None => -1
      case Some(Failure(_)) => -2
      case Some(Success(v)) => v
    }}%""" + super.lineageString
  }

  override def toString: String =
    s"""LOAD
       |  out = ${outputs.map(_.name).mkString(",")}
       |  file = ${file.toString}
       |  func = $loaderFunc
       |  outSchema = $schema""".stripMargin


  override def resolveReferences(mapping: mutable.Map[String, Ref]): Unit = {
    // we replace only the filename
    if (file.toString.startsWith("$") && mapping.contains(file.toString)) {
      mapping(file.toString) match {
        case Value(v) =>
          val s = v.toString
          if (s(0) == '"')
            file = s.substring(1, s.length-1)
//            file = new URI(s.substring(1, s.length-1))
        case _ =>
      }
    }
  }

}

Source File: RDFLoad.scala From piglet with Apache License 2.0

5 votes

package dbis.piglet.op

import dbis.piglet.schema._
import java.net.URI

import org.kiama.rewriting.Rewriter.everything

import scala.collection.mutable

case class RDFLoad(private val out: Pipe, uri: URI, grouped: Option[String]) extends PigOperator(out) {

  schema = if (grouped.isDefined) {
    if (RDFLoad.groupedSchemas.contains(grouped.get)){
      Some(RDFLoad.groupedSchemas(grouped.get))
    }
    else {
      throw new IllegalArgumentException(grouped.get + " is not a valid RDF grouping column")
    }
  } else {
    RDFLoad.plainSchema
  }

  def BGPFilterIsReachable: Boolean = {
    val isBGPFilter: PartialFunction[Any, Boolean] = {case t: Any => t.isInstanceOf[BGPFilter]}

    everything[Boolean] ("BGPFilterIsReachable", false) { (old: Boolean, newvalue: Boolean) =>
      old || newvalue
    } (isBGPFilter) (this)
  }
}

object RDFLoad {
  
 // lazy final val groupedSchemas = {
  def groupedSchemas = {
    val m = mutable.Map[String, Schema]()
    val columns = List[String]("subject", "predicate", "object")
    for (grouping_column <- columns) {
      val fields = columns.filterNot(_ == grouping_column).map {
        Field(_, Types.CharArrayType)
      }.toArray
      m(grouping_column) = Schema(
        BagType(
          TupleType(
            Array(
              Field(grouping_column, Types.CharArrayType),
              Field("stmts",
                BagType(
                  TupleType(
                    fields)))))))
    }
    m
  }
}

Source File: SpatialFilter.scala From piglet with Apache License 2.0

5 votes

package dbis.piglet.op

import scala.collection.mutable.Map
import dbis.piglet.expr.Predicate
import dbis.piglet.expr.Ref
import dbis.piglet.expr.Expr
import dbis.piglet.expr.SpatialFilterPredicate
import dbis.piglet.op.IndexMethod.IndexMethod

import scala.collection.mutable


  override def lineageString: String = {
    s"""SPATIALFILTER%$pred%$idx""" + super.lineageString
  }

  override def resolveReferences(mapping: mutable.Map[String, Ref]): Unit = pred.resolveReferences(mapping)

  override def checkSchemaConformance: Boolean = {
    schema match {
      case Some(s) =>
        // if we know the schema we check all named fields
        pred.traverseAnd(s, Expr.checkExpressionConformance)
      case None =>
        // if we don't have a schema all expressions should contain only positional fields
        pred.traverseAnd(null, Expr.containsNoNamedFields)
    }
  }


  override def toString =
    s"""SPATIALFILTER
       |  out = $outPipeName
       |  in = $inPipeName
       |  schema = $schema
       |  expr = $pred
       |  idx = $idx""".stripMargin


}

Source File: MacroOp.scala From piglet with Apache License 2.0

5 votes

package dbis.piglet.op

import dbis.piglet.expr.{NamedField, Ref}
import dbis.piglet.op.cmd.DefineMacroCmd
import dbis.piglet.plan.InvalidPlanException
import dbis.piglet.schema.Schema

import scala.collection.mutable
import scala.collection.mutable.ListBuffer


  def buildParameterMapping(cmd: DefineMacroCmd): Unit = {
      if (cmd.params.isEmpty && params.isDefined || cmd.params.isDefined && params.isEmpty)
        throw InvalidPlanException(s"macro $macroName: parameter list doesn't match with definition")
    if (cmd.params.isDefined) {
      val defs = cmd.params.get
      val p = params.get
      if (defs.size != p.size)
        throw InvalidPlanException(s"macro $macroName: number of parameters doesn't match with definition")

      for (i <- defs.indices) {
        paramMapping += ("$" + defs(i) -> p(i))
      }
    }

    paramMapping += ("$" + cmd.out.name -> NamedField(outPipeName))
  }

  override def lineageString: String = s"""MACRO%$macroName%""" + super.lineageString

  override def checkSchemaConformance: Boolean = {
    // TODO
    true
  }

  override def constructSchema: Option[Schema] = {
    // TODO
    super.constructSchema
  }

  override def toString =
    s"""MACRO
       |  out = $outPipeName
       |  name = $macroName
       |  params = ${params.map(_.mkString(","))}
     """.stripMargin
}

Source File: Filter.scala From piglet with Apache License 2.0

5 votes

package dbis.piglet.op

import dbis.piglet.expr.{Expr, Predicate, Ref}

import scala.collection.mutable


  override def lineageString: String = {
    s"""FILTER%$pred%""" + super.lineageString
  }

  override def resolveReferences(mapping: mutable.Map[String, Ref]): Unit = pred.resolveReferences(mapping)

  override def checkSchemaConformance: Boolean = {
    schema match {
      case Some(s) =>
        // if we know the schema we check all named fields
        pred.traverseAnd(s, Expr.checkExpressionConformance)
      case None =>
        // if we don't have a schema all expressions should contain only positional fields
        pred.traverseAnd(null, Expr.containsNoNamedFields)
    }
  }


  override def toString =
    s"""FILTER
       |  out = $outPipeName
       |  in = $inPipeName
       |  schema = $inputSchema
       |  expr = $pred
       |  ${if (windowMode) "window mode" else ""}""".stripMargin
}

Source File: PlanWriter.scala From piglet with Apache License 2.0

5 votes

package dbis.piglet.tools

import java.nio.file.{Files, Path, StandardOpenOption}

import dbis.piglet.op.{PigOperator, TimingOp}
import dbis.piglet.plan.DataflowPlan
import dbis.piglet.tools.logging.PigletLogging
//import guru.nidi.graphviz.engine.{Format, Graphviz}
//import guru.nidi.graphviz.parse.Parser

import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.concurrent.duration.Duration


case class Node(id: String, var time: Option[Duration] = None, var label: String = "") {
  
  private def mkLabel = {
    val t = if(time.isDefined) s"\n${time.get.toMillis}ms (${BigDecimal(time.get.toMillis / 1000.0).setScale(2,BigDecimal.RoundingMode.HALF_UP).toDouble}s)" else ""
    val l = s"$label\n$id\n$t" 
    PlanWriter.quote(l)
  }
  
  override def toString = s"op$id ${if(label.trim().nonEmpty) s"[label=$mkLabel]" else ""}"
}

case class Edge(from: String, to: String, var label: String = "") {
  override def toString = s"op$from -> op$to ${if(label.trim().nonEmpty) s"[label=$label]" else "" }"
}


  private def writeDotFile(file: Path, graph: String): Unit = {
    logger.debug(s"writing dot file to $file")
    if(Files.notExists(file.getParent)) {
      Files.createDirectories(file.getParent)
    }
    Files.write(file, List(graph).asJava, StandardOpenOption.CREATE, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING)
  }
  
  
}

Source File: StreamWindowApplyEmitter.scala From piglet with Apache License 2.0

5 votes

package dbis.piglet.codegen.flink

import dbis.piglet.codegen.CodeEmitter
import dbis.piglet.codegen.CodeGenContext
import dbis.piglet.op.WindowApply
import dbis.piglet.codegen.scala_lang.ScalaEmitter
import dbis.piglet.op.OrderBy
import dbis.piglet.op.PigOperator
import dbis.piglet.op.Pipe
import dbis.piglet.op.Distinct
import dbis.piglet.op.Empty
import dbis.piglet.op.Grouping
import dbis.piglet.op.Foreach
import dbis.piglet.op.Filter
import scala.collection.mutable
import dbis.piglet.codegen.scala_lang.FilterEmitter
import dbis.piglet.codegen.flink.emitter.StreamFilterEmitter
import dbis.piglet.codegen.flink.emitter.StreamForeachEmitter
import dbis.piglet.codegen.flink.emitter.StreamDistinctEmitter

class StreamWindowApplyEmitter extends CodeEmitter[WindowApply] {
  override def template: String = """    val <out> = <in>.apply(<func> _)"""

  override def code(ctx: CodeGenContext, op: WindowApply): String = {
    render(Map("out" -> op.outPipeName, "in" -> op.inPipeName, "func" -> op.fname))
  }

  override def helper(ctx: CodeGenContext, op: WindowApply): String = {
    val inSchema = ScalaEmitter.schemaClassName(op.inputSchema.get.className)
    val outSchema = ScalaEmitter.schemaClassName(op.schema.get.className)
    var fname, applyBody = ""
    var lastOp: PigOperator = new Empty(Pipe("empty"))
    val littleWalker = mutable.Queue(op.inputs.head.producer.outputs.flatMap(_.consumer).toSeq: _*)
    while (!littleWalker.isEmpty) {
      val operator = littleWalker.dequeue()
      operator match {
        case o @ Filter(_, _, pred, windowMode) if (windowMode) => {
          val e = new StreamFilterEmitter
          applyBody += e.windowApply(ctx, o) + "\n"
        }
        case o @ Distinct(_, _, windowMode) if (windowMode) => {
          val e = new StreamDistinctEmitter
          applyBody += e.windowApply(ctx, o) + "\n"
        }
        case o @ OrderBy(_, _, spec, windowMode) if (windowMode) => {
          val e = new StreamOrderByEmitter
          applyBody += e.windowApply(ctx, o) + "\n"
        }
        case o @ Grouping(_, _, groupExpr, windowMode) if (windowMode) => {
          val e = new StreamGroupingEmitter
          applyBody += e.windowApply(ctx, o) + "\n"
        }
        case o @ Foreach(_, _, gen, windowMode) if (windowMode) => {
          fname = "WindowFunc" + o.outPipeName
          val e = new StreamForeachEmitter
          applyBody += e.windowApply(ctx, o)
          return s"""  def ${fname}(wi: Window, ts: Iterable[${inSchema}], out: Collector[${outSchema}]) = {
                |    ts
                |${applyBody}
                |  }
                """.stripMargin
        }
        case _ =>
      }
      littleWalker ++= operator.outputs.flatMap(_.consumer)
      if (littleWalker.isEmpty) lastOp = operator
    }
    val before = lastOp.inputs.tail.head
    fname = "WindowFunc" + before.name
    applyBody += """.foreach { t => out.collect((t)) }"""
    s"""  def ${fname}(wi: Window, ts: Iterable[${inSchema}], out: Collector[${outSchema}]) = {
          |    ts
          |${applyBody}
          |  }
          """.stripMargin
  }
}

object StreamWindowApplyEmitter {
	lazy val instance = new StreamWindowApplyEmitter
}

Source File: CodeMatchers.scala From piglet with Apache License 2.0

5 votes

package dbis.piglet.tools

import org.scalatest._
import matchers._

import scala.collection.mutable


object SnippetMatcher {
  def matches(snippet: String, template: String): Boolean = {
    val replacements = mutable.Map[String, String]()
    val pattern = "\\$_?[0-9]".r
    val positions = pattern.findAllMatchIn(template)
      .map(p => p.start)
      .zipWithIndex
      .map{ case (p, offset) => p - offset}.toList
    val keys = pattern.findAllMatchIn(template).map(p => p.toString).toList
    val pattern2 = "_?[0-9]+".r
    var offs = 0
    for (i <- keys.indices) {
      // now we look for the number that we use to replace the $i string
      if (snippet.length < positions(i) + offs  + 1) return false
      pattern2.findFirstIn(snippet.substring(positions(i) + offs)) match {
        case Some(snip) =>
          replacements += (keys(i) -> snip)
          // if it was longer than one digit we have to correct the position
          offs += snip.length - 1
        case None =>
      }
    }
    var s = template
    replacements.foreach{case (k, v) => s = s.replace(k, v)}
    snippet == s
  }
}

// Make them easy to import with:
// import CodeMatchers._
object CodeMatchers extends CodeMatchers

Source File: MutableHeader.scala From scalingua with Apache License 2.0

5 votes

package ru.makkarpov.scalingua.pofile.parse

import java.io.File

import java_cup.runtime.ComplexSymbolFactory.Location
import ru.makkarpov.scalingua.pofile.{MessageFlag, MessageHeader, MessageLocation, PoFile}

import scala.collection.mutable

class MutableHeader {
  private var _startLoc: Location = _
  private var _endLoc: Location = _

  private var comments: mutable.Builder[String, Seq[String]] = _
  private var extractedComments: mutable.Builder[String, Seq[String]] = _
  private var locations: mutable.Builder[MessageLocation, Seq[MessageLocation]] = _
  private var flags: MessageFlag.ValueSet = _
  private var tag: Option[String] = _

  private def parseComment(cmt: Comment, left: Location, right: Location): Unit = cmt.commentTag match {
    case ' ' =>
      val str = cmt.comment.trim
      if (!str.startsWith(PoFile.GeneratedPrefix))
        comments += str
    case '.' => extractedComments += cmt.comment.trim
    case ':' =>
      // It seems that GNU .po utilities can combine locations in a single line:
      //   #: some.file:123 other.file:456
      // but specifications does not specify how to handle spaces in a string.
      // So ignore there references, Scalingua itself will never produce such lines.
      val str = cmt.comment.trim
      val idx = str.lastIndexOf(':')
      if (idx != -1) {
        val file = str.substring(0, idx)
        val line =
          try str.substring(idx + 1)
          catch {
            case _: NumberFormatException => throw ParserException(left, right, "cannot parse line number")
          }

        locations += MessageLocation(new File(file), line.toInt)
      } else {
        locations += MessageLocation(new File(str), -1)
      }

    case ',' =>
      val addFlags = cmt.comment.trim.split(",").flatMap { s =>
        try Some(MessageFlag.withName(s.toLowerCase))
        catch { case _: NoSuchElementException => None }
      }

      flags = addFlags.foldLeft(flags)(_ + _)

    case '~' => tag = Some(cmt.comment.trim)

    case _ => // ignore
  }

  def reset(): Unit = {
    _startLoc = null
    _endLoc = null

    comments = Vector.newBuilder
    extractedComments = Vector.newBuilder
    locations = Vector.newBuilder
    flags = MessageFlag.ValueSet()
    tag = None
  }

  def add(cmt: Comment, left: Location, right: Location): Unit = {
    if (_startLoc == null) {
      _startLoc = left
    }

    _endLoc = right
    parseComment(cmt, left, right)
  }

  def result(): MessageHeader =
    MessageHeader(comments.result(), extractedComments.result(), locations.result(), flags, tag)
}

Source File: GDBTableSeekWithNullValues.scala From spark-gdb with Apache License 2.0

5 votes

package com.esri.gdb

import scala.collection.mutable


class GDBTableSeekWithNullValues(dataBuffer: DataBuffer,
                                 fields: Seq[Field],
                                 numFieldsWithNullAllowed: Int,
                                 indexIter: Iterator[IndexInfo])
  extends Iterator[Map[String, Any]] with Serializable {

  private val nullValueIndicators = new Array[Byte]((numFieldsWithNullAllowed / 8.0).ceil.toInt)

  def hasNext() = indexIter.hasNext

  def next() = {
    val index = indexIter.next()
    val numBytes = dataBuffer.seek(index.seek).readBytes(4).getInt
    val byteBuffer = dataBuffer.readBytes(numBytes)
    0 until nullValueIndicators.length foreach (nullValueIndicators(_) = byteBuffer.get)
    var bit = 0
    val map = mutable.Map[String, Any]()
    fields.foreach(field => {
      if (field.nullable) {
        val i = bit >> 3
        val m = 1 << (bit & 7)
        bit += 1
        if ((nullValueIndicators(i) & m) == 0) {
          map(field.name) = field.readValue(byteBuffer, index.objectID)
        }
      } else {
        map(field.name) = field.readValue(byteBuffer, index.objectID)
      }
    }
    )
    map.toMap
  }
}

Source File: GDBTableScanWithNullValues.scala From spark-gdb with Apache License 2.0

5 votes

package com.esri.gdb

import scala.collection.mutable


class GDBTableScanWithNullValues(dataBuffer: DataBuffer, fields: Seq[Field], maxRows: Int, startID: Int = 0)
  extends Iterator[Map[String, Any]] with Serializable {

  val numFieldsWithNullAllowed = fields.count(_.nullable)
  val nullValueIndicators = new Array[Byte]((numFieldsWithNullAllowed / 8.0).ceil.toInt)

  var nextRow = 0
  var objectID = startID

  def hasNext() = nextRow < maxRows

  def next() = {
    nextRow += 1
    objectID += 1
    val numBytes = dataBuffer.readBytes(4).getInt
    val byteBuffer = dataBuffer.readBytes(numBytes)
    0 until nullValueIndicators.length foreach (nullValueIndicators(_) = byteBuffer.get)
    var bit = 0
    val map = mutable.Map[String, Any]()
    fields.foreach(field => {
      if (field.nullable) {
        val i = bit >> 3
        val m = 1 << (bit & 7)
        bit += 1
        if ((nullValueIndicators(i) & m) == 0) {
          map(field.name) = field.readValue(byteBuffer, objectID)
        }
      } else {
        map(field.name) = field.readValue(byteBuffer, objectID)
      }
    }
    )
    map.toMap
  }
}

Source File: MessageBuffer.scala From scala-loci with Apache License 2.0

5 votes

package loci

import java.nio.ByteBuffer
import java.nio.charset.StandardCharsets

import scala.annotation.compileTimeOnly
import scala.collection.mutable

final class MessageBuffer private (val backingArray: Array[Byte])
    extends mutable.IndexedSeq[Byte] {
  @compileTimeOnly("`backingArrayBuffer` only available in JS")
  def backingArrayBuffer: Any = ???

  @inline def length: Int = backingArray.length

  @inline def apply(index: Int) = {
    if (index < 0  || index >= length)
      throw new IndexOutOfBoundsException(s"index $index")

    backingArray(index)
  }

  @inline def update(index: Int, element: Byte) = {
    if (index < 0  || index >= length)
      throw new IndexOutOfBoundsException(s"index $index")

    backingArray(index) = element
  }

  @inline def update(offset: Int, buffer: MessageBuffer, bufferOffset: Int, count: Int) = {
    if (offset < 0 || bufferOffset < 0 || count < 0 ||
        offset > length - count || bufferOffset > buffer.length - count)
      throw new IndexOutOfBoundsException(
        s"offset $offset, length $length, " +
        s"buffer offset ${bufferOffset}, buffer length ${buffer.length}, count $count")

    System.arraycopy(buffer.backingArray, bufferOffset, backingArray, offset, count)
  }

  @inline def concat(buffer: MessageBuffer): MessageBuffer = {
    val array = new Array[Byte](length + buffer.length)
    System.arraycopy(backingArray, 0, array, 0, length)
    System.arraycopy(buffer.backingArray, 0, array, length, buffer.length)
    new MessageBuffer(array)
  }

  @inline def copy(offset: Int, count: Int): MessageBuffer = {
    if (offset < 0 || count < 0 || offset > length - count)
      throw new IndexOutOfBoundsException(s"offset $offset, count $count, length $length")

    val array = new Array[Byte](count)
    System.arraycopy(backingArray, offset, array, 0, count)
    new MessageBuffer(array)
  }

  @inline def decodeString(offset: Int, count: Int): String =
    new String(backingArray, offset, count, StandardCharsets.UTF_8)

  @inline def decodeString: String =
    decodeString(0, length)

  @inline def asByteBuffer: ByteBuffer =
    ByteBuffer wrap backingArray

  override def toString: String =
    MessageBufferEncoding.byteBufferToString(asByteBuffer, 0, length, fatal = true) getOrElse
      MessageBufferEncoding.messageBufferToHexString(this)
}

object MessageBuffer {
  def empty: MessageBuffer = new MessageBuffer(Array.emptyByteArray)

  def allocate(length: Int): MessageBuffer = new MessageBuffer(new Array(length))

  def encodeString(string: String): MessageBuffer =
    new MessageBuffer(string getBytes StandardCharsets.UTF_8)

  def wrapByteBuffer(buffer: ByteBuffer): MessageBuffer =
    if (!buffer.hasArray) {
      val duplicate = buffer.duplicate
      duplicate.position(0)
      duplicate.limit(buffer.capacity)
      val array = new Array[Byte](duplicate.remaining)
      duplicate.get(array)
      new MessageBuffer(array)
    }
    else
      new MessageBuffer(buffer.array)

  def wrapArray(array: Array[Byte]): MessageBuffer =
    new MessageBuffer(array)

  @compileTimeOnly("`wrapArrayBuffer` only available in JS")
  def wrapArrayBuffer(arrayBuffer: Any): MessageBuffer = ???
}

Source File: Value.scala From scala-loci with Apache License 2.0

5 votes

package loci
package runtime

import scala.collection.mutable

object Value {
  case class Signature(name: String, module: String, path: List[String]) {
    override def toString: String =
      if (path.isEmpty) s"$module.$name" else s"${path mkString "."}.$module.$name"
  }

  object Signature {
    def serialize(signature: Signature): String =
      if (signature.path.isEmpty)
        s"${signature.module}!${signature.name}"
      else
        s"${signature.module}!${signature.path mkString "."}.${signature.name}"

    def deserialize(signature: String): Signature = {
      var first = 0
      var last = 0
      val end = signature.length
      val buffer = mutable.ListBuffer.empty[String]

      while (last < end && first < end)
        signature(last) match {
          case '!' =>
            first = end
          case _ =>
            last += 1
        }

      val module = signature.substring(0, last)
      if (last < end)
        last += 1
      first = last

      while (last < end)
        signature(last) match {
          case '.' =>
            buffer += signature.substring(first, last)
            last += 1
            first = last
          case '(' | ':' =>
            last = end
          case _ =>
            last += 1
        }

      Signature(signature.substring(first, end), module, buffer.toList)
    }
  }

  case class Reference(channelName: String, channelAnchor: String,
        remote: Remote.Reference, system: System)
      extends transmitter.AbstractionRef {
    lazy val channel = system.obtainChannel(channelName, channelAnchor, remote)
    def derive(name: String) = Reference(s"$channelName:$name", channelAnchor, remote, system)

    override def toString: String = s"[channel:$channelName]$remote"
  }
}

Source File: Dispatcher.scala From scala-loci with Apache License 2.0

5 votes

package loci
package runtime

import scala.collection.mutable
import scala.concurrent.ExecutionContext
import scala.util.control.NonFatal

trait Dispatch[D <: Dispatch[D]] extends Runnable {
  def blockedBy(dispatch: D): Boolean

  final def blockedBy(dispatches: compatibility.IterableOnce[D]): Boolean =
    compatibility.iterable.exists(dispatches) { blockedBy }
}

trait Undispatchable[D <: Dispatch[D]] { this: Dispatch[D] =>
  final def blockedBy(dispatch: D) = false
  final def run() = { }
}

class Dispatcher[D <: Dispatch[D]](implicit context: ExecutionContext) {
  private val dispatches = mutable.ListBuffer.empty[(D, Boolean)]

  def dispatch(dispatch: D*): Unit = dispatches synchronized {
    dispatch foreach { dispatches += _ -> false }
    next(Seq.empty)
  }

  def ignoreDispatched(dispatch: D*): Unit = dispatches synchronized {
    dispatch foreach { dispatches -= _ -> false }
    next(Seq.empty)
  }

  private def next(executed: compatibility.Iterable[D]): Unit = dispatches synchronized {
    executed foreach { dispatches -= _ -> true }

    val pendings = dispatches collect { case (dispatch, true) => dispatch }

    val dispatchings = mutable.ListBuffer.empty[mutable.ListBuffer[D]]

    compatibility.listBuffer.mapInPlace(dispatches) { case (dispatch, running) =>
      dispatch match {
        case _: Undispatchable[D] =>
          pendings += dispatch
          dispatch -> false

        case _ =>
          if (!running) {
            if (!(dispatch blockedBy pendings)) {
              dispatchings filter dispatch.blockedBy match {
                case mutable.ListBuffer() =>
                  dispatchings += mutable.ListBuffer(dispatch)
                  dispatch -> true

                case mutable.ListBuffer(dispatching) =>
                  dispatching += dispatch
                  dispatch -> true

                case _ =>
                  pendings += dispatch
                  dispatch -> false
              }
            }
            else {
              pendings += dispatch
              dispatch -> false
            }
          }
          else
            dispatch -> true
      }
    }

    dispatchings foreach { dispatching =>
      logging.tracing(context).execute(new Runnable {
        def run() = {
          var throwable: Throwable = null

          dispatching foreach { dispatch =>
            try dispatch.run()
            catch {
              case NonFatal(exception) =>
                if (throwable == null)
                  throwable = exception
                else
                  throwable.addSuppressed(exception)
            }
          }

          next(dispatching)

          if (throwable != null)
            throw throwable
        }
      })
    }
  }
}

Source File: AbstractAppender.scala From rollbar-scala with MIT License

5 votes

package com.storecove.rollbar.appenders

import com.storecove.rollbar.util.FiniteQueue
import com.storecove.rollbar.{RollbarNotifier, RollbarNotifierDefaults, RollbarNotifierFactory}
import org.slf4j.MDC

import scala.collection.JavaConversions._
import scala.collection.{immutable, mutable}


trait AbstractAppender {

    protected val DEFAULT_LOGS_LIMITS = 100

    protected var enabled: Boolean = true
    protected var onlyThrowable: Boolean = true

    protected var url: String = RollbarNotifierDefaults.defaultUrl
    protected var apiKey: String = _
    protected var environment: String = _
    protected var notifyLevelString: String = "ERROR"
    protected var limit: Int = DEFAULT_LOGS_LIMITS

    protected val rollbarNotifier: RollbarNotifier = RollbarNotifierFactory.getNotifier(apiKey, environment)

    protected val logBuffer: FiniteQueue[String] = new FiniteQueue[String](immutable.Queue[String]())

    def setNotifyLevel(level: String): Unit

    protected def notifyLevel: Any = "ERROR"

    def setEnabled(enabled: Boolean): Unit = this.enabled = enabled

    def setOnlyThrowable(onlyThrowable: Boolean): Unit = this.onlyThrowable = onlyThrowable

    def setApiKey(apiKey: String): Unit = {
        this.apiKey = apiKey
        rollbarNotifier.setApiKey(apiKey)
    }

    def setEnvironment(environment: String): Unit = {
        this.environment = environment
        rollbarNotifier.setEnvironment(environment)
    }

    def setUrl(url: String): Unit = {
        this.url = url
        rollbarNotifier.setUrl(url)
    }

    def setLimit(limit: Int): Unit = this.limit = limit

    def getEnabled: Boolean = enabled
    def getOnlyThrowable: Boolean = onlyThrowable
    def getApiKey: String = apiKey
    def getEnvironment: String = environment
    def getUrl: String = url
    def getNotifyLevel: String = notifyLevelString
    def getLimit: Int = limit

    protected def getMDCContext: mutable.Map[String, String] = {
        val mdc = MDC.getCopyOfContextMap
        if (mdc == null) {
            mutable.Map.empty[String, String]
        } else {
            mapAsScalaMap(mdc)
        }
    }

}

Source File: Analyser.scala From ClassDependenceAnalyser with GNU General Public License v2.0

5 votes

package com.github.jllk.analyser

import java.io.File
import java.net.{URLClassLoader, URL}

import scala.collection.mutable
import scala.collection.mutable.ListBuffer


object Analyser {
  def notCareClass(fullClassName: String): Boolean =
    fullClassName.startsWith("java") ||
    fullClassName.startsWith("scala") ||
    fullClassName.startsWith("\"[") ||
    (fullClassName.startsWith("android") && !fullClassName.startsWith("android/support"))
}

class Analyser(private val dependenceJarPath: List[File]) {

  import Analyser._

  def analysis(fullClassName: String): mutable.Set[String] = {
    val dependentClasses = mutable.Set[String]()
    val importDependence = analysisImportDependence(fullClassName)
    importDependence
      .foreach(c => {
        dependentClasses += c
        dependentClasses ++= analysisInheritDependence(c)
      })
    dependentClasses
  }

  private def analysisImportDependence(fullClassName: String): List[String] = {
    val dependentClasses = new ListBuffer[String]()
    val classpath = dependenceJarPath.map(f => s"-classpath ${f.toPath}") mkString " "
    val classReport = ProcessUtils.exec(s"javap -verbose $classpath ${fullClassName.replace('.', '/')}")
    val lines = classReport.split('\n')
    lines
      .filter(l => l.contains("= Class") && !l.contains("\"[Ljava/lang/Object;\""))
      .foreach(l => dependentClasses += l.substring(l.indexOf("//") + 2).replaceAll(" ", "").replaceAll("/", "\\.").trim())
    dependentClasses
      .filter(notCareClass)
      .toList
  }

  private def analysisInheritDependence(fullClassName: String): List[String] = {
    val urls = ListBuffer[URL]()
    dependenceJarPath.foreach(f => urls += f.toURI.toURL)
    val classLoader = new URLClassLoader(urls.toArray)
    doClassInheritSearch(fullClassName, classLoader)
  }

  private def doClassInheritSearch(fullClassName: String, classLoader: URLClassLoader): List[String] = {
    if (notCareClass(fullClassName)) {
      List.empty[String]
    } else {

      val dependentClasses = mutable.Set[String]()
      dependentClasses += fullClassName
      dependentClasses ++= analysisImportDependence(fullClassName)
      dependentClasses.foreach(fullClassName => {
        val targetClass: Either[Class[_], Exception] =
          try
            Left(classLoader.loadClass(fullClassName))
          catch {
            case e: ClassNotFoundException => Right(e)
            case e: Exception => Right(e)
          }

        targetClass match {
          case Left(c) =>
            val superclass = c.getSuperclass
            if (superclass != null) {
              dependentClasses ++= doClassInheritSearch(superclass.getName, classLoader)
            }
            c.getInterfaces.foreach(i => dependentClasses ++= doClassInheritSearch(i.getName, classLoader))
          case Right(e) =>
            println(s"[doClassInheritSearch] exception happened: ${e.getMessage}, please check your dependenceJarPath.")
        }

      })
      dependentClasses.toList
    }
  }
}

Source File: IOUtils.scala From ClassDependenceAnalyser with GNU General Public License v2.0

5 votes

package com.github.jllk.analyser

import java.io._
import java.util.Set

import scala.collection.JavaConversions._
import scala.collection.mutable


object IOUtils {

  def writeToMainDexList(input: mutable.Set[String]) = {
    require(input != null)
    val output = new PrintWriter(new File("maindexlist.txt"))
    inSafe(output) {
      input.foreach(l => output.println(l.replaceAll("\\.", "/") + ".class"))
    }
  }

  def writeToMainDexList(input: Set[String]) = {
    require(input != null)
    val output = new PrintWriter(new File("maindexlist.txt"))
    inSafe(output) {
      input.foreach(l => output.println(l + ".class"))
    }
  }
}

Source File: JobBuilder.scala From lemon-schedule with GNU General Public License v2.0

5 votes

package com.gabry.job.core.builder

import java.util.concurrent.TimeUnit

import com.gabry.job.core.constant.Constants
import com.gabry.job.core.domain.Job
import com.gabry.job.core.tools.UIDGenerator

import scala.collection.mutable


object JobBuilder {
  def apply(): JobBuilder = new JobBuilder()
}
class JobBuilder extends Builder[Job]{
  private var name:String = _
  private var className:String = _
  private var cron:String = _
  private var dataTimeOffset:Long = 0
  private var dataTimeOffsetUnit:TimeUnit = TimeUnit.MINUTES
  private var parallel:Int = Int.MaxValue
  private var meta:mutable.Map[String,String] = mutable.HashMap.empty[String,String]
  private var workerNodes:Array[String] = Array.empty[String]
  private var cluster:String = Constants.DEFAULT_CLUSTER_NAME
  private var group:String = Constants.DEFAULT_GROUP_NAME
  private var startTime:Long = System.currentTimeMillis()
  private var priority:Int = Int.MaxValue
  private var retryTimes:Int = 0
  private var timeOut:Int = Int.MaxValue
  private var replaceIfExist:Boolean = false
  def withName(name:String):this.type = {
    this.name = name
    this
  }
  def withClass(className:String):this.type = {
    this.className = className
    this
  }
  def withCron(cron:String):this.type = {
    this.cron = cron
    this
  }
  def withDataTimeOffset(dataTimeOffset:Long):this.type = {
    this.dataTimeOffset = dataTimeOffset
    this
  }
  def withDataTimeOffsetUnit(dataTimeOffsetUnit:TimeUnit):this.type = {
    this.dataTimeOffsetUnit = dataTimeOffsetUnit
    this
  }
  def withParallel(parallel:Int):this.type = {
    this.parallel = parallel
    this
  }
  def withMeta(meta:Map[String,String]):this.type = {
    this.meta ++= meta
    this
  }
  def withMeta(key:String,value:String):this.type = {
    this.meta.put(key,value)
    this
  }
  def withWorkerNodes(workerNodes:Array[String]):this.type = {
    this.workerNodes = workerNodes
    this
  }
  def withCluster(cluster:String):this.type = {
    this.cluster = cluster
    this
  }
  def withGroup(group:String):this.type = {
    this.group = group
    this
  }
  def withStartTime(startTime:Long):this.type = {
    this.startTime = startTime
    this
  }
  def withPriority(priority:Int):this.type = {
    this.priority = priority
    this
  }
  def withRetryTimes(retryTimes:Int):this.type = {
    this.retryTimes = retryTimes
    this
  }
  def withTimeOut(timeOut:Int):this.type = {
    this.timeOut = timeOut
    this
  }
  def withReplaceIfExist(replaceIfExist:Boolean):this.type = {
    this.replaceIfExist = replaceIfExist
    this
  }
  override def build():Job = Job(UIDGenerator.globalUIDGenerator.nextUID(),name,className,cron,dataTimeOffset,dataTimeOffsetUnit,parallel,meta.toMap,workerNodes,cluster,group,startTime,priority,retryTimes,timeOut,replaceIfExist)
}

Source File: DistributedRoughSet.scala From Clustering4Ever with Apache License 2.0

5 votes

package org.clustering4ever.spark.preprocessing.rst

  final def runHeuristic[O, T : ClassTag, V[A] <: GSimpleVector[A, V[A]], Sz[B, C <: GVector[C]] <: Supervizable[B, C, Sz]](data: RDD[Sz[O, V[T]]], columnsOfFeats: Seq[Seq[Int]]): mutable.Buffer[Int] = {

    val nbColumns = columnsOfFeats.size
    val dataBC = sc.broadcast(data.collect.par)
    
    sc.parallelize(0 until 8888, nbColumns).mapPartitionsWithIndex{ (idxp, _) =>
      val dataPerFeat = dataBC.value.map(_.obtainOneBucket(idxp))
      val originalFeatures = columnsOfFeats(idxp)
      val originalFeatIdByTmpFeatId = originalFeatures.zipWithIndex.map(_.swap).toMap      
      val allReductSet = roughSet(dataPerFeat)
      allReductSet(Random.nextInt(allReductSet.size)).map(originalFeatIdByTmpFeatId).toIterator
    }
    .collect
    .toBuffer
  }

}

Source File: K-Centers.scala From Clustering4Ever with Apache License 2.0

5 votes

package org.clustering4ever.clustering.kcenters.dataset

		@annotation.tailrec
		def go(cpt: Int, haveAllCentersConverged: Boolean, centers: List[(Int, V)]): List[(Int, V)] = {
			val preUpdatedCenters = data.groupByKey( cz => obtainNearestCenterID(cz.v, centers, metric) )(encoderInt)
				.mapGroups(computeCenters)(encoder)
				.collect
				.sortBy(_._1)
				.toList
			val alignedOldCenters = preUpdatedCenters.map{ case (oldClusterID, _) => centers(oldClusterID) }
			val updatedCenters = preUpdatedCenters.zipWithIndex.map{ case ((oldClusterID, center), newClusterID) => (newClusterID, center) }
			val shiftingEnough = areCentersNotMovingEnough(updatedCenters, alignedOldCenters, minShift, metric)
			if(cpt < maxIterations && !shiftingEnough) {
				go(cpt + 1, shiftingEnough, updatedCenters)
			}
			else {
				updatedCenters
			}
		}

		immutable.HashMap(go(0, false, centers):_*)

	}
}

Source File: ClusterwiseTypes.scala From Clustering4Ever with Apache License 2.0

5 votes

package org.clustering4ever.spark.clustering.clusterwise

import scala.collection.{mutable, immutable}
import breeze.linalg.DenseMatrix

trait ClusterwiseTypes {
	final type ClassID = Int
	final type ID = Int
	final type Xvector = Array[Double]
	final type Yvector = Array[Double]
	final type IDXYtest = Seq[(Int, (Xvector, Yvector))]
	final type IDXtest = Seq[(Long, Xvector)]
	final type DSPerClass = Array[(ID, (Xvector, Yvector, ClassID))]
	final type ClassedDS = Array[(Int, DSPerClass)]
	final type IDXDS = Array[mutable.ArrayBuffer[(Int, Xvector)]]
	final type YDS = Array[mutable.ArrayBuffer[Yvector]]
	final type RegPerClass = (Double, DenseMatrix[Double], Array[Double], Array[(Int, Array[Double])])
	final type ClassedDSperGrp = Array[(Int, Array[(Int, Int, Array[(ClassID, Int, Xvector, Yvector)])])]
}

Source File: UtilSpark.scala From Clustering4Ever with Apache License 2.0

5 votes

package org.clustering4ever.sparktools

import scala.language.higherKinds
import org.apache.spark.rdd.RDD
import org.apache.spark.HashPartitioner
import scala.reflect.runtime.universe.TypeTag
import scala.util.Random
import scala.reflect.ClassTag
import scala.collection.{GenSeq, mutable}
import org.clustering4ever.preprocessing.Preprocessable
import org.clustering4ever.hashing.HashingScalar
import org.clustering4ever.vectors.{GVector, ScalarVector}

object UtilSpark
{

	type IndexPartition = Int
	type HasConverged = Boolean
    type IsOriginalDot = Boolean


	final def generateDataLocalityOnHashsedDS[
		O,
		Pz[B, C <: GVector[C]] <: Preprocessable[B, C, Pz]
	](
		rddToPartitioned: RDD[Pz[O, ScalarVector]],
		nbblocs1: Int,
		nbBucketRange: Int
	): RDD[(IndexPartition, (Pz[O, ScalarVector], IsOriginalDot, HasConverged))] = {
		val isOriginalPoint = true
		val hasConverged = true
		val bucketRange = 1 to nbBucketRange

		val lshRDD = rddToPartitioned.map((_, isOriginalPoint, !hasConverged))

		val localityPerPartitionRDD = lshRDD.mapPartitionsWithIndex{ (idx, it) =>
			val ar = it.toList
			def rightNeighbourhood = ar.flatMap{ case (cz, _, _) => bucketRange.collect{ case i if(idx + i < nbblocs1) => (idx + i, (cz, !isOriginalPoint, !hasConverged)) } }
			def leftNeighbourhood = ar.flatMap{ case (cz, _, _) => bucketRange.collect{ case i if(idx - i >= 0) => (idx - i, (cz, !isOriginalPoint, !hasConverged)) } }
			val composing = if(idx == 0) ar.map((idx, _)) ::: rightNeighbourhood
				else if(idx == nbblocs1 - 1) ar.map((idx, _)) ::: leftNeighbourhood
				else ar.map((idx, _)) ::: leftNeighbourhood ::: rightNeighbourhood

	      composing.toIterator

	    }.partitionBy(new HashPartitioner(nbblocs1))
	    
	    localityPerPartitionRDD
	}

	final def generateDataLocalityLD[
		O,
		Pz[B, C <: GVector[C]] <: Preprocessable[B, C, Pz],
		Hasher <: HashingScalar
	](
		rddToPartitioned: RDD[Pz[O, ScalarVector]],
		hashing: Hasher,
		nbblocs1: Int,
		nbBucketRange: Int
	): RDD[(IndexPartition, (Pz[O, ScalarVector], IsOriginalDot, HasConverged))] = {
		val hashedRDD = rddToPartitioned.sortBy( cz => hashing.hf(cz.v) , ascending = true, nbblocs1 )
		generateDataLocalityOnHashsedDS(hashedRDD, nbblocs1, nbBucketRange)
	}

}

Source File: KPPInitializer.scala From Clustering4Ever with Apache License 2.0

5 votes

package org.clustering4ever.clustering.kcenters.scala

	final def kppInit[
		O,
		V <: GVector[V],
		Cz[Y, Z <: GVector[Z]] <: Clusterizable[Y, Z, Cz],
		D <: Distance[V]
	](data: GenSeq[Cz[O, V]], metric: D, k: Int): immutable.HashMap[Int, V] = {

		val centers = mutable.ArrayBuffer(data(Random.nextInt(data.size)).v)

		def obtainNearestCenter(v: V): V = centers.minBy(metric.d(_, v))

		@annotation.tailrec
		def go(i: Int): Unit = {
			val preprocessed = data.map{ cz =>
				val toPow2 = metric.d(cz.v, obtainNearestCenter(cz.v))
				(cz.v, toPow2 * toPow2)
			}
			val phi = preprocessed.aggregate(0D)((agg, e) => agg + e._2, _ + _)
			val probabilities = preprocessed.map{ case (v, toPow2) => (v, toPow2 / phi) }.seq
			val shuffled = Random.shuffle(probabilities)
			centers += Stats.obtainMedianFollowingWeightedDistribution[V](shuffled)
			if(i < k - 2) go(i + 1)
		}

		go(0)
		
		immutable.HashMap(centers.zipWithIndex.map{ case (center, clusterID) => (clusterID, center) }:_*)

	}
}

Source File: K-Means.scala From Clustering4Ever with Apache License 2.0

5 votes

package org.clustering4ever.clustering.kcenters.scala

	final def fit[D <: ContinuousDistance, GS[Y] <: GenSeq[Y]](
		data: GS[Array[Double]],
		k: Int,
		metric: D,
		minShift: Double,
		maxIterations: Int
	): KMeansModel[D] = {
		KMeans(k, metric, minShift, maxIterations, immutable.HashMap.empty[Int, ScalarVector]).fit(scalarToClusterizable(data))
	}
}

Source File: EigenValue.scala From Clustering4Ever with Apache License 2.0

5 votes

package org.clustering4ever.scala.clustering.tensor

    val n2 = data.head.cols
    val timeColumn = DenseMatrix.zeros[Double](m,n2)  
    val timeRow = DenseMatrix.zeros[Double](m,n1)

    @annotation.tailrec
    def matriceColumnSet(t:mutable.ArrayBuffer[DenseMatrix[Double]], m:DenseMatrix[Double], c:DenseMatrix[Double], i: Int, j: Int , k: Int): DenseMatrix[Double] = {
      if (j < t.head.cols && k < t.length) {
        m(k, j) = t(k)(i, j)
        matriceColumnSet(t, m, c, i, j, k + 1)
      }
      else if (k == t.length && j < t.head.cols) {
        matriceColumnSet(t, m, c, i, j + 1 , 0)
      }
      else if (i < t.head.rows - 1) {
        c += cov(m)
        matriceColumnSet(t, m, c, i + 1, 0, 0)
      }
      else {
        c += cov(m)
      }
    }

    @annotation.tailrec
    def matriceRowSet(t: mutable.ArrayBuffer[DenseMatrix[Double]], m: DenseMatrix[Double], c: DenseMatrix[Double], i: Int, j: Int , k: Int): DenseMatrix[Double] = {
      if (i < t.head.rows && k < t.length) {
        m(k, i) = t(k)(i, j)
        matriceRowSet(t, m, c, i, j, k + 1)
      }
      else if (k == t.length && i < t.head.rows) {
        matriceRowSet(t, m, c, i + 1, j , 0)
      }
      else if (j < t.head.cols - 1){
        c += cov(m)
        matriceRowSet(t, m, c, 0, j + 1, 0)
      }
      else {
        c += cov(m)
      }
    }

    val columnMatrix = matriceColumnSet(data, timeColumn, DenseMatrix.zeros[Double](n2,n2), 0, 0, 0 )
    val svd.SVD(u1,eigValue,eigVector) = svd(columnMatrix)
    val columnEigvalue = eigValue.toArray   

    val rowMatrix = matriceRowSet(data, timeRow, DenseMatrix.zeros[Double](n1,n1), 0, 0, 0 )
    val svd.SVD(u2,eigValue2,eigVector2) = svd(rowMatrix)
    val rowEigvalue = eigValue2.toArray

    Array(rowEigvalue.take(5), columnEigvalue.take(5))
  
  }
}  
 
object EigenValue extends Serializable {
  
  def train(k: Int, data: mutable.ArrayBuffer[DenseMatrix[Double]]) = (new EigenValue(k)).fit(data)

}

Source File: Statistics.scala From Clustering4Ever with Apache License 2.0

5 votes

package org.clustering4ever.stats

	final def obtainMedianFollowingWeightedDistribution[V](distribution: Seq[(V, Double)]): V = {
		val p = scala.util.Random.nextDouble * distribution.foldLeft(0D)((agg, e) => agg + e._2)
		@annotation.tailrec
		def go(accum: Double, i: Int): Int = {
			if(accum < p) go(accum + distribution(i)._2, i + 1)
			else i
		}
		val cpt = go(0D, 0)
		if(cpt == 0) distribution.head._1 else distribution(cpt - 1)._1
	}
}

Source File: HashingFunctions.scala From Clustering4Ever with Apache License 2.0

5 votes

package org.clustering4ever.hashing

  final def hf(v: Array[Double], j: Int): Double = {
    @annotation.tailrec
    def go(s: Double, i: Int): Double = {
      if(i < v.size) go(s + v(i) * hvs(j)._1(i), i + 1)
      else s
    }
    (go(0D, 0) + hvs(j)._2) / w
  }

  final def obtainBucketPerLevel(v: Array[Double]): immutable.IndexedSeq[Int] = {
    hvs.map{ case (rv, _, hfid) => 
      val bucketID = bucketsLimits.find{ case (th, _) => hf(v, hfid) <= th }
      if(bucketID.isDefined) bucketID.get._2 else buckets
    }
  }
}

Source File: BinaryDistanceUtils.scala From Clustering4Ever with Apache License 2.0

5 votes

package org.clustering4ever.math.distances

	final def countOccFeat(data: Seq[Array[Int]]): Array[(Int, Int)] = {
		import org.clustering4ever.util.VectorsAddOperationsImplicits._
		val nbTotData = data.size
		val nbOne = data.reduce(SumVectors.sumVectors(_, _))
		val nbZero = nbOne.map(nbTotData - _)
		nbZero.zip(nbOne)
	}

	final def genProb2Feat(nbOccFeatTab: Seq[(Int, Int)], nbTotData: Int): Seq[(Double, Double)] = {
		nbOccFeatTab.map{ case (zero, one) =>
			val totDataMinusOne = nbTotData - 1D
			val product = nbTotData * totDataMinusOne
			(
				(zero * (zero - 1D)) / product,
				(one * (one - 1D)) / product
			)
		}
	}

}

Source File: Tree.scala From Clustering4Ever with Apache License 2.0

5 votes

package org.clustering4ever.structures.tree

  final def depthTraversal[T](t: Tree[T]) = {
    @annotation.tailrec
    def go(l: List[Tree[T]], ids: mutable.Buffer[Int]): mutable.Buffer[Int] = {
      l match {
        case Nil => ids
        case Leaf(id, v) :: ls => go(ls, ids += id)
        case Node(id, childrens) :: ls => go(childrens ::: ls, ids += id)
      }
    }
    go(List(t), mutable.ArrayBuffer.empty[Int])
  }
}

Source File: SortingTools.scala From Clustering4Ever with Apache License 2.0

5 votes

package org.clustering4ever.utils

    final def bucketSort(toSort: Array[Double], b: Int) = {
      val buckets = parallel.mutable.ParArray.fill(b)(mutable.ArrayBuffer.empty[Double])
      val m = toSort.max
      @annotation.tailrec
      def go(i: Int) : Unit = {
        if(i < toSort.size) {
            buckets((toSort(i) / m * (b - 1)).toInt) += toSort(i)
            go(i + 1)
        }
      }
      go(0)
      buckets.flatMap(_.sorted)
    }

}

Source File: DiscoverConnexComponents.scala From Clustering4Ever with Apache License 2.0

5 votes

package org.clustering4ever.graph

	final def obtainConnexComponents(nodes: immutable.HashSet[Int], neighbors: immutable.HashMap[Int, immutable.HashSet[Int]]): List[List[Int]] = {

		val visited = mutable.HashMap.empty[Int, Int]

	    def depthFirstTraverseFunctional(node: Int, clusterID: Int): Unit = {

	      val nodeToExplore = immutable.HashSet(node)

	      def obtainUnvisitedNeihbors(hs: immutable.HashSet[Int]) = {
	        hs.flatMap{ n =>
	        	val unvisited = neighbors(n).filter( n => !visited.contains(n) )
	        	visited ++= unvisited.map( uv => (uv, clusterID) )
	        	unvisited
	        }
	      }
	      
	      @annotation.tailrec
	      def go(hs: immutable.HashSet[Int]): immutable.HashSet[Int] = if(!hs.isEmpty) go(obtainUnvisitedNeihbors(hs)) else hs

	      go(nodeToExplore)
	    }

		var clusterID = 0

	    nodes.foreach( n =>
	      if(!visited.contains(n)) {
	        visited += ((n, clusterID))
	        depthFirstTraverseFunctional(n, clusterID)
	        clusterID += 1
	      }
	    )

		val labeledNodes = nodes.toList.map( n => (n, visited(n)) )
		val labels = labeledNodes.map(_._2)
		val connexComponents = labels.map( l => labeledNodes.collect{ case (n, cID) if cID == l => n } )

		connexComponents
	}

}

Source File: PersistenceRepresentation.scala From vamp with Apache License 2.0

5 votes

package io.vamp.persistence

import io.vamp.common.Artifact
import io.vamp.common.akka.CommonActorLogging
import io.vamp.common.http.OffsetEnvelope
import io.vamp.common.notification.NotificationProvider

import scala.collection.mutable
import scala.language.postfixOps
import scala.reflect.ClassTag

trait PersistenceRepresentation extends PersistenceApi with AccessGuard {
  this: CommonActorLogging with NotificationProvider ⇒

  private val store: mutable.Map[String, mutable.Map[String, Artifact]] = new mutable.HashMap()

  protected def info(): Map[String, Any] = Map[String, Any](
    "status" → (if (validData) "valid" else "corrupted"),
    "artifacts" → (store.map {
      case (key, value) ⇒ key → value.values.size
    } toMap)
  )

  protected def all(`type`: String): List[Artifact] = store.get(`type`).map(_.values.toList).getOrElse(Nil)

  protected def all[T <: Artifact](kind: String, page: Int, perPage: Int, filter: T ⇒ Boolean): ArtifactResponseEnvelope = {
    log.debug(s"In memory representation: all [$kind] of $page per $perPage")
    val artifacts = all(kind).filter { artifact ⇒ filter(artifact.asInstanceOf[T]) }
    val total = artifacts.size
    val (p, pp) = OffsetEnvelope.normalize(page, perPage, ArtifactResponseEnvelope.maxPerPage)
    val (rp, rpp) = OffsetEnvelope.normalize(total, p, pp, ArtifactResponseEnvelope.maxPerPage)
    ArtifactResponseEnvelope(artifacts.slice((p - 1) * pp, p * pp), total, rp, rpp)
  }

  protected def get[T <: Artifact](name: String, kind: String): Option[T] = {
    log.debug(s"In memory representation: read [$kind] - $name}")
    store.get(kind).flatMap(_.get(name)).asInstanceOf[Option[T]]
  }

  protected def set[T <: Artifact](artifact: T, kind: String): T = {
    def put(map: mutable.Map[String, Artifact]) = {
      map.put(artifact.name, before(
        artifact
      ))
      after(set = true)(artifact)
    }

    log.debug(s"In memory representation: set [$kind] - ${artifact.name}")
    store.get(kind) match {
      case None ⇒
        val map = new mutable.HashMap[String, Artifact]()
        put(map)
        store.put(kind, map)
      case Some(map) ⇒ put(map)
    }
    artifact
  }

  protected def delete[T <: Artifact](name: String, kind: String): Option[T] = {
    log.debug(s"In memory representation: delete [$kind] - $name}")
    store.get(kind) flatMap { map ⇒
      val result = map.remove(name).map { artifact ⇒ after[T](set = false)(artifact.asInstanceOf[T]) }
      if (result.isEmpty) log.debug(s"Artifact not found for deletion: $kind: $name")
      result
    }
  }

  protected def find[A: ClassTag](p: A ⇒ Boolean, `type`: Class[_ <: Artifact]): Option[A] = {
    store.get(type2string(`type`)).flatMap {
      _.find {
        case (_, artifact: A) ⇒ p(artifact)
        case _                ⇒ false
      }
    } map (_._2.asInstanceOf[A])
  }

  protected def before[T <: Artifact](artifact: T): T = artifact

  protected def after[T <: Artifact](set: Boolean)(artifact: T): T = artifact
}

Source File: Percolator.scala From vamp with Apache License 2.0

5 votes

package io.vamp.pulse

import akka.actor.{ Actor, ActorRef }
import io.vamp.common.akka.CommonActorLogging
import io.vamp.model.event.Event

import scala.collection.mutable

object Percolator {

  sealed trait PercolatorMessage

  case class GetPercolator(name: String) extends PercolatorMessage

  case class RegisterPercolator(name: String, tags: Set[String], `type`: Option[String], message: Any) extends PercolatorMessage

  case class UnregisterPercolator(name: String) extends PercolatorMessage

}

trait Percolator {
  this: Actor with CommonActorLogging ⇒

  case class PercolatorEntry(tags: Set[String], `type`: Option[String], actor: ActorRef, message: Any)

  protected val percolators = mutable.Map[String, PercolatorEntry]()

  def getPercolator(name: String) = percolators.get(name)

  def registerPercolator(name: String, tags: Set[String], `type`: Option[String], message: Any) = {
    percolators.put(name, PercolatorEntry(tags, `type`, sender(), message)) match {
      case Some(entry) if entry.tags == tags && entry.`type` == `type` ⇒
      case _ ⇒ log.info(s"Percolator '$name' has been registered for tags '${tags.mkString(", ")}'.")
    }
  }

  def unregisterPercolator(name: String) = {
    if (percolators.remove(name).nonEmpty)
      log.info(s"Percolator successfully removed for '$name'.")
  }

  def percolate(publishEventValue: Boolean): (Event ⇒ Event) = { (event: Event) ⇒
    percolators.foreach {
      case (name, percolator) ⇒
        if (percolator.tags.forall(event.tags.contains) && (percolator.`type`.isEmpty || percolator.`type`.get == event.`type`)) {
          log.debug(s"Percolate match for '$name'.")
          val send = if (publishEventValue) event else event.copy(value = None)
          percolator.actor ! (percolator.message → send)
        }
    }
    event
  }
}

Source File: SseConnector.scala From vamp with Apache License 2.0

5 votes

package io.vamp.common.http

import akka.Done
import akka.actor.ActorSystem
import akka.event.LoggingAdapter
import akka.http.scaladsl.model.HttpHeader.ParsingResult.Ok
import akka.http.scaladsl.model.sse.ServerSentEvent
import akka.http.scaladsl.model.{ HttpHeader, HttpRequest, HttpResponse, Uri }
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.{ Sink, Source }
import io.vamp.common.http.EventSource.EventSource

import scala.collection.mutable
import scala.concurrent.Future
import scala.concurrent.duration.{ FiniteDuration, _ }
import scala.language.postfixOps
import scala.util.{ Failure, Success }

private case class SseConnectionConfig(url: String, headers: List[(String, String)], tlsCheck: Boolean)

private case class SseConnectionEntryValue(source: EventSource)

trait SseListener {
  def onEvent(event: ServerSentEvent): Unit
}

object SseConnector {

  private val retryDelay: FiniteDuration = 5 second
  private val listeners: mutable.Map[SseConnectionConfig, Set[SseListener]] = mutable.Map()
  private val connections: mutable.Map[SseConnectionConfig, Future[Done]] = mutable.Map()

  def open(url: String, headers: List[(String, String)] = Nil, tlsCheck: Boolean)(listener: SseListener)(implicit system: ActorSystem, logger: LoggingAdapter): Unit = synchronized {
    val config = SseConnectionConfig(url, headers, tlsCheck)
    implicit val materializer: ActorMaterializer = ActorMaterializer()

    listeners.update(config, listeners.getOrElse(config, Set()) + listener)

    connections.getOrElseUpdate(config, {
      logger.info(s"Opening SSE connection: $url")
      EventSource(Uri(url), send(config), None, retryDelay).takeWhile { event ⇒
        event.eventType.foreach(t ⇒ logger.info(s"SSE: $t"))
        val receivers = listeners.getOrElse(config, Set())
        receivers.foreach(_.onEvent(event))
        val continue = receivers.nonEmpty
        if (!continue) logger.info(s"Closing SSE connection: $url")
        continue
      }.runWith(Sink.ignore)
    })
  }

  def close(listener: SseListener): Unit = synchronized {
    listeners.transform((_, v) ⇒ v - listener)
  }

  private def send(config: SseConnectionConfig)(request: HttpRequest)(implicit system: ActorSystem, materializer: ActorMaterializer): Future[HttpResponse] = {
    val httpHeaders = config.headers.map { case (k, v) ⇒ HttpHeader.parse(k, v) } collect { case Ok(h, _) ⇒ h } filterNot request.headers.contains
    Source.single(request.withHeaders(request.headers ++ httpHeaders) → 1).via(HttpClient.pool[Any](config.url, config.tlsCheck)).map {
      case (Success(response: HttpResponse), _) ⇒ response
      case (Failure(f), _)                      ⇒ throw new RuntimeException(f.getMessage)
    }.runWith(Sink.head)
  }
}

Source File: LogPublisherHub.scala From vamp with Apache License 2.0

5 votes

package io.vamp.common.akka

import akka.actor.{ ActorRef, ActorSystem }
import ch.qos.logback.classic.filter.ThresholdFilter
import ch.qos.logback.classic.spi.ILoggingEvent
import ch.qos.logback.classic.{ Level, LoggerContext, Logger ⇒ LogbackLogger }
import ch.qos.logback.core.AppenderBase
import io.vamp.common.Namespace
import org.slf4j.{ Logger, LoggerFactory }

import scala.collection.mutable

object LogPublisherHub {

  private val logger = LoggerFactory.getLogger(LogPublisherHub.getClass)

  private val context = LoggerFactory.getILoggerFactory.asInstanceOf[LoggerContext]
  private val rootLogger = context.getLogger(Logger.ROOT_LOGGER_NAME)

  private val sessions: mutable.Map[String, LogPublisher] = new mutable.HashMap()

  def subscribe(to: ActorRef, level: String, loggerName: Option[String], encoder: (ILoggingEvent) ⇒ AnyRef)(implicit actorSystem: ActorSystem, namespace: Namespace): Unit = {
    val appenderLevel = Level.toLevel(level, Level.INFO)
    val appenderLogger = loggerName.map(context.getLogger).getOrElse(rootLogger)

    val exists = sessions.get(to.toString).exists { publisher ⇒
      publisher.level == appenderLevel && publisher.logger.getName == appenderLogger.getName
    }

    if (!exists) {
      unsubscribe(to)
      if (appenderLevel != Level.OFF) {
        logger.info(s"Starting log publisher [${appenderLevel.levelStr}] '${appenderLogger.getName}': $to")
        val publisher = LogPublisher(to, appenderLogger, appenderLevel, encoder)
        publisher.start()
        sessions.put(to.toString, publisher)
      }
    }
  }

  def unsubscribe(to: ActorRef): Unit = {
    sessions.remove(to.toString).foreach { publisher ⇒
      logger.info(s"Stopping log publisher: $to")
      publisher.stop()
    }
  }
}

private case class LogPublisher(to: ActorRef, logger: LogbackLogger, level: Level, encoder: (ILoggingEvent) ⇒ AnyRef)(implicit actorSystem: ActorSystem, namespace: Namespace) {

  private val filter = new ThresholdFilter()
  filter.setLevel(level.levelStr)

  private val appender = new AppenderBase[ILoggingEvent] {
    override def append(loggingEvent: ILoggingEvent) = to ! encoder(loggingEvent)
  }

  appender.addFilter(filter)
  appender.setName(to.toString)

  def start() = {
    val context = logger.getLoggerContext
    filter.setContext(context)
    appender.setContext(context)
    filter.start()
    appender.start()
    logger.addAppender(appender)
  }

  def stop() = {
    appender.stop()
    filter.stop()
    logger.detachAppender(appender)
  }
}

Source File: IoC.scala From vamp with Apache License 2.0

5 votes

package io.vamp.common.akka

import java.util.concurrent.atomic.AtomicInteger

import _root_.akka.pattern.ask
import akka.actor._
import akka.util.Timeout
import com.typesafe.scalalogging.LazyLogging
import io.vamp.common.Namespace
import io.vamp.common.util.TextUtil

import scala.collection.mutable
import scala.concurrent.{ ExecutionContext, Future }
import scala.reflect._

object IoC extends LazyLogging {

  private val counter = new AtomicInteger(0)

  private val aliases: mutable.Map[String, mutable.Map[Class[_], Class[_]]] = mutable.Map()

  private val actorRefs: mutable.Map[String, mutable.Map[Class[_], ActorRef]] = mutable.Map()

  private val namespaceMap: mutable.Map[String, Namespace] = mutable.Map()

  private val namespaceActors: mutable.Map[String, ActorRef] = mutable.Map()

  def namespaces: List[Namespace] = namespaceMap.values.toList

  def alias[FROM: ClassTag](implicit namespace: Namespace): Class[_] = {
    alias(classTag[FROM].runtimeClass)
  }

  def alias(from: Class[_])(implicit namespace: Namespace): Class[_] = {
    aliases.get(namespace.name).flatMap(_.get(from)).getOrElse(from)
  }

  def alias[FROM: ClassTag, TO: ClassTag](implicit namespace: Namespace): Option[Class[_]] = {
    alias(classTag[FROM].runtimeClass, classTag[TO].runtimeClass)
  }

  def alias(from: Class[_], to: Class[_])(implicit namespace: Namespace): Option[Class[_]] = {
    aliases.getOrElseUpdate(namespace.name, mutable.Map()).put(from, to)
  }

  def createActor(clazz: Class[_])(implicit actorSystem: ActorSystem, namespace: Namespace, timeout: Timeout): Future[ActorRef] = {
    createActor(Props(clazz))
  }

  def createActor[ACTOR: ClassTag](implicit actorSystem: ActorSystem, namespace: Namespace, timeout: Timeout): Future[ActorRef] = {
    createActor(classTag[ACTOR].runtimeClass)
  }

  def createActor[ACTOR: ClassTag](arg: Any, args: Any*)(implicit actorSystem: ActorSystem, namespace: Namespace, timeout: Timeout): Future[ActorRef] = {
    createActor(Props(classTag[ACTOR].runtimeClass, arg :: args.toList: _*))
  }

  def createActor(props: Props)(implicit actorSystem: ActorSystem, namespace: Namespace, timeout: Timeout): Future[ActorRef] = {
    logger.info(s"Create Actor ${props.clazz.getSimpleName} for namespace ${namespace.name}")
    implicit val ec: ExecutionContext = actorSystem.dispatcher
    (namespaceActor ? props) map {
      case actorRef: ActorRef ⇒
        actorRefs.getOrElseUpdate(namespace.name, mutable.Map()).put(props.clazz, actorRef)
        aliases.getOrElseUpdate(namespace.name, mutable.Map()).foreach {
          case (from, to) if to == props.clazz ⇒ actorRefs.getOrElseUpdate(namespace.name, mutable.Map()).put(from, actorRef)
          case _                               ⇒
        }
        actorRef
      case _ ⇒ throw new RuntimeException(s"Cannot create actor for: ${props.clazz.getSimpleName}")
    }
  }

  def actorFor[ACTOR: ClassTag](implicit actorSystem: ActorSystem, namespace: Namespace): ActorRef = {
    actorFor(classTag[ACTOR].runtimeClass)
  }

  def actorFor(clazz: Class[_])(implicit actorSystem: ActorSystem, namespace: Namespace): ActorRef = {
    actorRefs.get(namespace.name).flatMap(_.get(alias(clazz))) match {
      case Some(actorRef) ⇒ actorRef
      case _              ⇒ throw new RuntimeException(s"No actor reference for: $clazz")
    }
  }

  private def namespaceActor(implicit actorSystem: ActorSystem, namespace: Namespace): ActorRef = {
    namespaceMap.put(namespace.name, namespace)
    namespaceActors.getOrElseUpdate(namespace.name, actorSystem.actorOf(Props(new Actor {
      def receive = {
        case props: Props ⇒ sender() ! context.actorOf(props, s"${TextUtil.toSnakeCase(props.clazz.getSimpleName)}-${counter.getAndIncrement}")
        case _            ⇒
      }
    }), namespace.name))
  }
}

Source File: YamlUtil.scala From vamp with Apache License 2.0

5 votes

package io.vamp.common.util

import org.yaml.snakeyaml.Yaml
import org.yaml.snakeyaml.constructor.Constructor
import org.yaml.snakeyaml.error.YAMLException

import scala.collection.JavaConverters._
import scala.collection.mutable

object YamlUtil {

  def yaml: Yaml = {
    new Yaml(new Constructor() {
      override def getClassForName(name: String): Class[_] = throw new YAMLException("Not supported.")
    })
  }

  def convert(any: Any, preserveOrder: Boolean): Any = any match {
    case source: java.util.Map[_, _] ⇒
      if (preserveOrder) {
        val map = new mutable.LinkedHashMap[String, Any]()
        source.entrySet().asScala.foreach(entry ⇒ map += entry.getKey.toString → convert(entry.getValue, preserveOrder))
        map
      }
      else source.entrySet().asScala.map(entry ⇒ entry.getKey.toString → convert(entry.getValue, preserveOrder)).toMap
    case source: java.util.List[_]     ⇒ source.asScala.map(convert(_, preserveOrder)).toList
    case source: java.lang.Iterable[_] ⇒ source.asScala.map(convert(_, preserveOrder)).toList
    case source                        ⇒ source
  }
}

Source File: MessageResolver.scala From vamp with Apache License 2.0

5 votes

package io.vamp.common.notification

import com.typesafe.scalalogging.Logger
import org.slf4j.LoggerFactory
import org.yaml.snakeyaml.Yaml

import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.io.Source
import scala.language.postfixOps

trait MessageResolverProvider {
  val messageResolver: MessageResolver

  trait MessageResolver {
    def resolve(implicit notification: Notification): String
  }
}

trait DefaultPackageMessageResolverProvider extends MessageResolverProvider {
  val messageResolver: MessageResolver = new DefaultPackageMessageResolver()

  private class DefaultPackageMessageResolver extends MessageResolver {

    protected case class Message(parts: Seq[String], args: Seq[String])

    private val logger = Logger(LoggerFactory.getLogger(classOf[Notification]))
    private val messages = new mutable.LinkedHashMap[String, mutable.Map[String, Any]]()

    def resolve(implicit notification: Notification): String = {
      try {
        val name = notification.getClass.getSimpleName
        val messageSource = resolveMessageSource

        messageSource.get(name) match {
          case None ⇒
            logger.warn(s"No mapping for ${notification.getClass}")
            defaultMapping(error = false)
          case Some(value: Message) ⇒ resolveMessageValue(value)
          case Some(value: Any) ⇒
            val message = parseMessage(value.toString)
            messageSource.put(name, message)
            resolveMessageValue(message)
        }
      }
      catch {
        case e: NoSuchMethodException ⇒
          val field = e.getMessage.substring(e.getMessage.lastIndexOf('.') + 1, e.getMessage.length - 2)
          logger.error(s"Message mapping error: field '$field' not defined for ${notification.getClass}")
          defaultMapping()
        case e: Exception ⇒
          logger.error(e.getMessage, e)
          defaultMapping()
      }
    }

    protected def defaultMapping(error: Boolean = true)(implicit notification: Notification): String = if (error) "Error." else "Notification."

    protected def resolveMessageSource(implicit notification: Notification): mutable.Map[String, Any] = {
      val packageName = notification.getClass.getPackage.toString
      messages.get(packageName) match {
        case None ⇒
          val reader = Source.fromURL(notification.getClass.getResource("messages.yml")).bufferedReader()
          try {
            val input = new Yaml().load(reader).asInstanceOf[java.util.Map[String, Any]].asScala
            messages.put(packageName, input)
            input
          }
          finally {
            reader.close()
          }
        case Some(map) ⇒ map
      }
    }

    protected def parseMessage(message: String)(implicit notification: Notification): Message = {
      val pattern = "\\{[^}]+\\}" r
      val parts = pattern split message
      val args = (pattern findAllIn message).map(s ⇒ s.substring(1, s.length - 1)).toList
      Message(parts, args)
    }

    protected def resolveMessageValue(message: Message)(implicit notification: Notification): String = {
      val pi = message.parts.iterator
      val ai = message.args.iterator
      val sb = new StringBuilder()
      while (ai.hasNext) {
        sb append pi.next
        sb append ai.next().split('.').foldLeft(notification.asInstanceOf[AnyRef])((arg1, arg2) ⇒ arg1.getClass.getDeclaredMethod(arg2).invoke(arg1)).toString
      }
      if (pi.hasNext) sb append pi.next
      sb.toString()
    }
  }

}

Source File: DataFrameConverterSpec.scala From incubator-toree with Apache License 2.0

5 votes

package org.apache.toree.utils

import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{DataFrame, Row}
import org.mockito.Mockito._
import org.scalatest.mock.MockitoSugar
import org.scalatest.{BeforeAndAfterAll, FunSpec, Matchers}
import play.api.libs.json.{JsArray, JsString, Json}
import test.utils.SparkContextProvider

import scala.collection.mutable

class DataFrameConverterSpec extends FunSpec with MockitoSugar with Matchers with BeforeAndAfterAll {

  lazy val spark = SparkContextProvider.sparkContext

  override protected def afterAll(): Unit = {
    spark.stop()
    super.afterAll()
  }

  val dataFrameConverter: DataFrameConverter = new DataFrameConverter
  val mockDataFrame = mock[DataFrame]
  val mockRdd = spark.parallelize(Seq(Row(new mutable.WrappedArray.ofRef(Array("test1", "test2")), 2, null)))
  val mockStruct = mock[StructType]
  val columns = Seq("foo", "bar").toArray

  doReturn(mockStruct).when(mockDataFrame).schema
  doReturn(columns).when(mockStruct).fieldNames
  doReturn(mockRdd).when(mockDataFrame).rdd

  describe("DataFrameConverter") {
    describe("#convert") {
      it("should convert to a valid JSON object") {
        val someJson = dataFrameConverter.convert(mockDataFrame, "json")
        val jsValue = Json.parse(someJson.get)
        jsValue \ "columns" should be (JsArray(Seq(JsString("foo"), JsString("bar"))))
        jsValue \ "rows" should be (JsArray(Seq(
          JsArray(Seq(JsString("[test1, test2]"), JsString("2"), JsString("null")))
        )))
      }
      it("should convert to csv") {
        val csv = dataFrameConverter.convert(mockDataFrame, "csv").get
        val values = csv.split("\n")
        values(0) shouldBe "foo,bar"
        values(1) shouldBe "[test1, test2],2,null"
      }
      it("should convert to html") {
        val html = dataFrameConverter.convert(mockDataFrame, "html").get
        html.contains("<th>foo</th>") should be(true)
        html.contains("<th>bar</th>") should be(true)
        html.contains("<td>[test1, test2]</td>") should be(true)
        html.contains("<td>2</td>") should be(true)
        html.contains("<td>null</td>") should be(true)
      }
      it("should convert limit the selection") {
        val someLimited = dataFrameConverter.convert(mockDataFrame, "csv", 1)
        val limitedLines = someLimited.get.split("\n")
        limitedLines.length should be(2)
      }
      it("should return a Failure for invalid types") {
        val result = dataFrameConverter.convert(mockDataFrame, "Invalid Type")
        result.isFailure should be(true)
      }
    }
  }
}

Source File: CoapSinkTask.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.coap.sink

import java.util

import com.datamountaineer.streamreactor.connect.coap.configs.{CoapConstants, CoapSettings, CoapSinkConfig}
import com.datamountaineer.streamreactor.connect.errors.ErrorPolicyEnum
import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter}
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.clients.consumer.OffsetAndMetadata
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.connect.sink.{SinkRecord, SinkTask}

import scala.collection.JavaConverters._
import scala.collection.mutable


class CoapSinkTask extends SinkTask with StrictLogging {
  private val writers = mutable.Map.empty[String, CoapWriter]
  private val progressCounter = new ProgressCounter
  private var enableProgress: Boolean = false
  private val manifest = JarManifest(getClass.getProtectionDomain.getCodeSource.getLocation)

  override def start(props: util.Map[String, String]): Unit = {
    logger.info(scala.io.Source.fromInputStream(getClass.getResourceAsStream("/coap-sink-ascii.txt")).mkString + s" $version")
    logger.info(manifest.printManifest())

    val conf = if (context.configs().isEmpty) props else context.configs()

    val sinkConfig = CoapSinkConfig(conf)
    enableProgress = sinkConfig.getBoolean(CoapConstants.PROGRESS_COUNTER_ENABLED)
    val settings = CoapSettings(sinkConfig)

    //if error policy is retry set retry interval
    if (settings.head.errorPolicy.getOrElse(ErrorPolicyEnum.THROW).equals(ErrorPolicyEnum.RETRY)) {
      context.timeout(sinkConfig.getString(CoapConstants.ERROR_RETRY_INTERVAL).toLong)
    }
    settings.map(s => (s.kcql.getSource, CoapWriter(s))).map({ case (k, v) => writers.put(k, v) })
  }

  override def put(records: util.Collection[SinkRecord]): Unit = {
    records.asScala.map(r => writers(r.topic()).write(List(r)))
    val seq = records.asScala.toVector
    if (enableProgress) {
      progressCounter.update(seq)
    }
  }

  override def stop(): Unit = {
    writers.foreach({ case (t, w) =>
      logger.info(s"Shutting down writer for $t")
      w.stop()
    })
    progressCounter.empty
  }

  override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {}

  override def version: String = manifest.version()

}

Source File: ConnectFileMetaDataStore.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.ftp.source

import java.time.Instant
import java.util

import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.connect.storage.OffsetStorageReader

import scala.collection.JavaConverters._
import scala.collection.mutable

// allows storage and retrieval of meta datas into connect framework
class ConnectFileMetaDataStore(offsetStorage: OffsetStorageReader) extends FileMetaDataStore with StrictLogging {
  // connect offsets aren't directly committed, hence we'll cache them
  private val cache = mutable.Map[String, FileMetaData]()

  override def get(path: String): Option[FileMetaData] =
    cache.get(path).orElse({
      val stored = getFromStorage(path)
      stored.foreach(set(path,_))
      stored
    })

  override def set(path: String, fileMetaData: FileMetaData): Unit = {
    logger.debug(s"ConnectFileMetaDataStore path = ${path}, fileMetaData.offset = ${fileMetaData.offset}, fileMetaData.attribs.size = ${fileMetaData.attribs.size}")
    cache.put(path, fileMetaData)
  }

  // cache couldn't provide us the info. this is a rather expensive operation (?)
  def getFromStorage(path: String): Option[FileMetaData] =
    offsetStorage.offset(Map("path" -> path).asJava) match {
      case null =>
        logger.info(s"meta store storage HASN'T ${path}")
        None
      case o =>
        logger.info(s"meta store storage has ${path}")
        Some(connectOffsetToFileMetas(path, o))
    }

  def fileMetasToConnectPartition(meta:FileMetaData): util.Map[String, String] = {
    Map("path" -> meta.attribs.path).asJava
  }

  def connectOffsetToFileMetas(path:String, o:AnyRef): FileMetaData = {
    val jm = o.asInstanceOf[java.util.Map[String, AnyRef]]
    FileMetaData(
      FileAttributes(
        path,
        jm.get("size").asInstanceOf[Long],
        Instant.ofEpochMilli(jm.get("timestamp").asInstanceOf[Long])
      ),
      jm.get("hash").asInstanceOf[String],
      Instant.ofEpochMilli(jm.get("firstfetched").asInstanceOf[Long]),
      Instant.ofEpochMilli(jm.get("lastmodified").asInstanceOf[Long]),
      Instant.ofEpochMilli(jm.get("lastinspected").asInstanceOf[Long]),
      jm.asScala.getOrElse("offset", -1L).asInstanceOf[Long]
    )
  }

  def fileMetasToConnectOffset(meta: FileMetaData): util.Map[String, Any] = {
    Map("size" -> meta.attribs.size,
      "timestamp" -> meta.attribs.timestamp.toEpochMilli,
      "hash" -> meta.hash,
      "firstfetched" -> meta.firstFetched.toEpochMilli,
      "lastmodified" -> meta.lastModified.toEpochMilli,
      "lastinspected" -> meta.lastInspected.toEpochMilli,
      "offset" -> meta.offset
    ).asJava
  }
}

Source File: TypeVar.scala From lift with MIT License

5 votes

package arithmetic

import lift.arithmetic._
import ir._
import ir.ast.Expr

import scala.collection.{immutable, mutable}
import scala.language.implicitConversions


class TypeVar private(range : Range, fixedId: Option[Long] = None) extends ExtensibleVar("", range, fixedId) {
  override def copy(r: Range) = new TypeVar(r, Some(id))

  override def cloneSimplified() = new TypeVar(range, Some(id)) with SimplifiedExpr

  override def visitAndRebuild(f: (ArithExpr) => ArithExpr): ArithExpr =
    f(new TypeVar(range.visitAndRebuild(f), Some(id)))

  override lazy val toString = "tv_" + name + "_" + id
}

object TypeVar {

  def apply(range : Range = RangeUnknown) = {
    new TypeVar(range)
  }

  def getTypeVars(expr: Expr) : Set[TypeVar] = {
    Expr.visitWithState(immutable.HashSet[TypeVar]())(expr, (inExpr, set) => set ++ getTypeVars(inExpr.t))
  }

  def getTypeVars(t: Type) : Set[TypeVar] = {
    val result = new mutable.HashSet[TypeVar]()
    Type.visit(t, (ae:ArithExpr) => result ++= getTypeVars(ae) : Unit )
    result.toSet
  }

  def getTypeVars(expr: ArithExpr) : Set[TypeVar] = {
    val typeVars = scala.collection.mutable.HashSet[TypeVar]()
    ArithExpr.visit(expr, {
      case tv: TypeVar => typeVars += tv
      case _ =>
    })
    typeVars.toSet
  }
}

Source File: ConfigureDiskAction.scala From berilia with Apache License 2.0

5 votes

package com.criteo.dev.cluster.aws

import com.criteo.dev.cluster.command.SshAction
import com.criteo.dev.cluster.{command, _}
import com.criteo.dev.cluster.config.AWSConfig
import org.slf4j.LoggerFactory

import scala.collection.mutable
import scala.concurrent.duration.Duration
import scala.concurrent.{Await, Future}
import scala.concurrent.ExecutionContext.Implicits.global


  def configureDisk(node: Node) : List[String] = {
    val result = SshAction(node, "lsblk", returnResult = true).stripLineEnd
    logger.info(s"Block information on ${node.ip}:")
    val lines = result.split("\n").map(_.trim)
    require(lines(0).trim.split("\\s+")(6).equalsIgnoreCase("MOUNTPOINT"),
      s"Mount point not in expected position in lsblk output: ${lines(0)}")

    //this is a bit delicate, but assuming the unmounted ones are at the end,
    //then we will take the ones up to the first one that has a mount entry.
    val toMount = lines.reverse.takeWhile(l => l.split("\\s+").length <= 6).map(l => l.split("\\s+")(0))
    val mountCommands = toMount.zipWithIndex.flatMap { case (tm, i) =>
      List(
        s"sudo echo -e 'o\\nn\\np\\n1\\n\\n\\nw' | sudo fdisk /dev/$tm", // create one partition (n, p, 1, default start, default end of sector)
        s"sudo /sbin/mkfs.ext4 /dev/${tm}1", // make fs
        s"sudo mkdir -p /${GeneralConstants.data}/$i",
        s"sudo mount /dev/${tm}1 /${GeneralConstants.data}/$i" // mount
      )
    }.toList
    command.SshMultiAction(node, mountCommands)

    0.to(toMount.length - 1).map(i => s"/${GeneralConstants.data}/$i").toList
  }
}

Source File: CopyJarAwsCliAction.scala From berilia with Apache License 2.0

5 votes

package com.criteo.dev.cluster.aws

import java.io.File
import java.net.{URI, URL}

import com.criteo.dev.cluster.config.GlobalConfig
import com.criteo.dev.cluster._
import com.criteo.dev.cluster.command.RsyncAction
import org.jclouds.compute.domain.NodeMetadata.Status
import org.slf4j.LoggerFactory

import scala.collection.mutable
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.Duration
import scala.concurrent.{Await, Future}
import sys.process._


@Public object CopyJarAwsCliAction extends CliAction[Unit] {
  override def command: String = "copy-jar-aws"

  override def usageArgs: List[Any] = List("instance.id", "source", "destination")

  override def help: String = "Copies a file from source to destination path to all nodes of a given cluster (if target directory exists)."

  private val logger = LoggerFactory.getLogger(CopyJarAwsCliAction.getClass)

  override def applyInternal(args: List[String], conf: GlobalConfig): Unit = {
    val instanceId = args(0)
    val cluster = AwsUtilities.getCluster(conf.backCompat, instanceId)

    if (!cluster.master.getStatus().equals(Status.RUNNING)) {
      logger.info("No running clusters found matching criteria.")
    }

    val source = args(1)
    val target = args(2)
    val sourceUri = new URI(source)
    val targetFile = new File(target)

    GeneralUtilities.prepareTempDir
    val sourceFile = sourceUri.getScheme().toLowerCase() match {
      case "http" => {
        val path = s"${GeneralUtilities.getTempDir()}/${targetFile.getName}"
        DevClusterProcess.process(s"curl -o $path $source").!!
        path
      }
      //only localhost supported
      case "file" => sourceUri.getPath()
      case _ => throw new IllegalArgumentException("Only http and file supported for sources for now.")
    }

    //copy over files in parallel
    val nodesToCopy = cluster.slaves ++ Set(cluster.master)
    logger.info(s"Copying to ${nodesToCopy.size} nodes in parallel.")
    val copyFutures = nodesToCopy.map(u => GeneralUtilities.getFuture {
      val targetN = NodeFactory.getAwsNode(conf.target.aws, u)
      val role = if (AwsUtilities.isSlave(u)) "Slave" else "Master"
      try {
        RsyncAction(
          srcPath = sourceFile,
          targetN = targetN,
          targetPath = target,
          sudo = true)
        s"$role Node ${u.getId()} with ${targetN.ip}: Copy successful."
      } catch {
        case e : Exception => s"$role Node ${u.getId()} with ${targetN.ip}: Copy Failed.  This is normal if the given directory does not exist on the node." +
          s"  If not expected, check the directory location and try again."
      }
    })

    val aggCopyFuture = Future.sequence(copyFutures)
    val result = Await.result(aggCopyFuture, Duration.Inf)
    result.foreach(r => logger.info(r))
    GeneralUtilities.cleanupTempDir
  }
}

Source File: MessagingService.scala From korolev with Apache License 2.0

5 votes

package korolev.server.internal.services

import korolev.effect.syntax._
import korolev.effect.{Effect, Queue, Reporter, Stream}
import korolev.server.Request.RequestHeader
import korolev.server.Response
import korolev.server.Response.Status
import korolev.Qsid
import korolev.effect.io.LazyBytes

import scala.collection.mutable

private[korolev] final class MessagingService[F[_]: Effect](reporter: Reporter,
                                                            commonService: CommonService[F],
                                                            sessionsService: SessionsService[F, _, _]) {

  
  private val commonGoneResponse = Response(
    status = Response.Status.Gone,
    body = LazyBytes.empty[F],
    headers = commonResponseHeaders
  )

  private def takeTopic(qsid: Qsid) =
    Effect[F].delay {
      if (longPollingTopics.contains(qsid)) longPollingTopics(qsid)
      else throw new Exception(s"There is no long-polling topic matching $qsid")
    }

  private def createTopic(qsid: Qsid) =
    longPollingTopics.synchronized {
      val topic = Queue[F, String]()
      longPollingTopics.put(qsid, topic)
      topic.stream
    }
}

Source File: RemoteDomChangesPerformer.scala From korolev with Apache License 2.0

5 votes

package korolev.internal

import korolev.internal.Frontend.ModifyDomProcedure
import levsha.Id
import levsha.impl.DiffRenderContext.ChangesPerformer

import scala.collection.mutable

private[korolev] class RemoteDomChangesPerformer extends ChangesPerformer {

  val buffer: mutable.ArrayBuffer[Any] =
    mutable.ArrayBuffer.empty[Any]

  def remove(id: Id): Unit = {
    buffer += ModifyDomProcedure.Remove.code
    buffer += id.parent.get.mkString
    buffer += id.mkString
  }

  def createText(id: Id, text: String): Unit = {
    buffer += ModifyDomProcedure.CreateText.code
    buffer += id.parent.get.mkString
    buffer += id.mkString
    buffer += text
  }

  def create(id: Id, xmlNs: String, tag: String): Unit = {
    val parent = id.parent.fold("0")(_.mkString)
    val pXmlns =
      if (xmlNs eq levsha.XmlNs.html.uri) 0
      else xmlNs
    buffer += ModifyDomProcedure.Create.code
    buffer += parent
    buffer += id.mkString
    buffer += pXmlns
    buffer += tag
  }

  def removeStyle(id: Id, name: String): Unit = {
    buffer += ModifyDomProcedure.RemoveStyle.code
    buffer += id.mkString
    buffer += name
  }

  def setStyle(id: Id, name: String, value: String): Unit = {
    buffer += ModifyDomProcedure.SetStyle.code
    buffer += id.mkString
    buffer += name
    buffer += value
  }

  def setAttr(id: Id, xmlNs: String, name: String, value: String): Unit = {
    val pXmlns =
      if (xmlNs eq levsha.XmlNs.html.uri) 0
      else xmlNs
    buffer += ModifyDomProcedure.SetAttr.code
    buffer += id.mkString
    buffer += pXmlns
    buffer += name
    buffer += value
    buffer += false
  }

  def removeAttr(id: Id, xmlNs: String, name: String): Unit = {
    val pXmlns =
      if (xmlNs eq levsha.XmlNs.html.uri) 0
      else xmlNs
    buffer += ModifyDomProcedure.RemoveAttr.code
    buffer += id.mkString
    buffer += pXmlns
    buffer += name
    buffer += false
  }

}

Source File: AsyncTable.scala From korolev with Apache License 2.0

5 votes

package korolev.effect

import korolev.effect.AsyncTable.{AlreadyContainsKeyException, RemovedBeforePutException}

import scala.collection.mutable
import korolev.effect.Effect.Promise

final class AsyncTable[F[_]: Effect, K, V](elems: Seq[(K, V)]) {

  private type Callbacks = List[Promise[V]]
  private type Result = Either[Throwable, V]
  private val table = mutable.Map[K, Either[Callbacks, Result]](elems.map { case (k, v) => (k, Right(Right(v))) }: _*)

  def get(key: K): F[V] =
    Effect[F].promise[V] { cb =>
      table.synchronized {
        table.get(key) match {
          case Some(Right(value)) => cb(value)
          case Some(Left(xs))     => table.update(key, Left(cb :: xs))
          case None               => table.update(key, Left(cb :: Nil))
        }
      }
    }

  def put(key: K, value: V): F[Unit] =
    putEither(key, Right(value))

  def fail(key: K, error: Throwable): F[Unit] =
    putEither(key, Left(error))

  def putEither(key: K, errorOrValue: Either[Throwable, V]): F[Unit] =
    Effect[F].delay {
      table.synchronized {
        table.remove(key) match {
          case Some(Right(_)) =>
            throw AlreadyContainsKeyException(key)
          case Some(Left(callbacks)) =>
            table.update(key, Right(errorOrValue))
            callbacks.foreach(_(errorOrValue))
          case None =>
            table.update(key, Right(errorOrValue))
        }
      }
    }

  def remove(key: K): F[Unit] =
    Effect[F].delay {
      table.synchronized {
        table.remove(key) match {
          case Some(Left(callbacks)) =>
            val result = Left(RemovedBeforePutException(key))
            callbacks.foreach(_(result))
          case _ => ()
        }
      }
    }
}

object AsyncTable {

  final case class RemovedBeforePutException(key: Any)
      extends Exception(s"Key $key removed before value was put to table.")

  final case class AlreadyContainsKeyException(key: Any)
      extends Exception(s"This table already contains value for $key")

  def apply[F[_]: Effect, K, V](elems: (K, V)*) =
    new AsyncTable[F, K, V](elems)

  def empty[F[_]: Effect, K, V] =
    new AsyncTable[F, K, V](Nil)
}

Source File: Queue.scala From korolev with Apache License 2.0

5 votes

package korolev.effect

import scala.collection.mutable

class Queue[F[_]: Effect, T](maxSize: Int) {

  def offer(item: T): F[Unit] =
    Effect[F].delay(offerUnsafe(item))

  def offerUnsafe(item: T): Unit =
    underlyingQueue.synchronized {
      if (underlyingQueue.size == maxSize) {
        // Remove head from queue if max size reached
        underlyingQueue.dequeue()
        ()
      }
      if (pending != null) {
        val cb = pending
        pending = null
        cb(Right(Some(item)))
      } else {
        underlyingQueue.enqueue(item)
        ()
      }
    }

  def close(): F[Unit] =
    Effect[F].delay(closeUnsafe())

  def closeUnsafe(): Unit =
    underlyingQueue.synchronized {
      if (pending != null) {
        val cb = pending
        pending = null
        cb(Right(None))
      }
      closed = true
    }

  def fail(e: Throwable): F[Unit] =
    Effect[F].delay {
      underlyingQueue.synchronized {
        error = e
        if (pending != null) {
          val cb = pending
          pending = null
          cb(Left(e))
        }
      }
    }

  private final class QueueStream extends Stream[F, T] {

    def pull(): F[Option[T]] = Effect[F].promise { cb =>
      underlyingQueue.synchronized {
        if (error != null) cb(Left(error))
        else if (closed) cb(Right(None)) else {
          if (underlyingQueue.nonEmpty) {
            val elem = underlyingQueue.dequeue()
            cb(Right(Some(elem)))
          } else {
            pending = cb
          }
        }
      }
    }

    def cancel(): F[Unit] = close()
  }

  val stream: Stream[F, T] = new QueueStream()

  @volatile private var closed = false
  @volatile private var error: Throwable = _
  @volatile private var pending: Effect.Promise[Option[T]] = _
  private val underlyingQueue: mutable.Queue[T] = mutable.Queue.empty[T]
}

object Queue {

  def apply[F[_]: Effect, T](maxSize: Int = Int.MaxValue): Queue[F, T] =
    new Queue[F, T](maxSize)
}

Source File: MetabrowseTextModelService.scala From metabrowse with Apache License 2.0

5 votes

package metabrowse

import scala.collection.mutable
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future
import monaco.Promise
import monaco.Uri
import monaco.editor.Editor
import monaco.editor.ITextModel
import monaco.services.IReference
import monaco.services.ITextEditorModel
import monaco.services.ITextModelService
import monaco.services.ImmortalReference
import scala.meta.internal.{semanticdb => s}

object MetabrowseTextModelService extends ITextModelService {
  def modelReference(
      filename: String
  ): Future[IReference[ITextEditorModel]] =
    modelDocument(createUri(filename)).map(_.model)

  // TODO(olafur): Move this state out for easier testing.
  private val modelDocumentCache = mutable.Map.empty[ITextModel, s.TextDocument]

  private def document(model: ITextModel) =
    MetabrowseMonacoDocument(
      modelDocumentCache(model),
      new ImmortalReference(ITextEditorModel(model))
    )

  def modelDocument(
      resource: Uri
  ): Future[MetabrowseMonacoDocument] = {
    val model = Editor.getModel(resource)
    if (model != null) {
      Future.successful(document(model))
    } else {
      for {
        Some(doc) <- MetabrowseFetch.document(resource.path)
      } yield {
        val model = Editor.createModel(doc.text, "scala", resource)
        modelDocumentCache(model) = doc
        document(model)
      }
    }
  }

  override def createModelReference(
      resource: Uri
  ): Promise[IReference[ITextEditorModel]] =
    modelDocument(resource).map(_.model).toMonacoPromise
}

Source File: SortedBatch.scala From Waves with MIT License

5 votes

package com.wavesplatform.database

import com.wavesplatform.common.state.ByteStr
import org.iq80.leveldb.WriteBatch

import scala.collection.mutable

class SortedBatch extends WriteBatch {
  val addedEntries: mutable.Map[ByteStr, Array[Byte]] = mutable.TreeMap[ByteStr, Array[Byte]]()
  val deletedEntries: mutable.Set[ByteStr]            = mutable.TreeSet[ByteStr]()

  override def put(bytes: Array[Byte], bytes1: Array[Byte]): WriteBatch = {
    val k = ByteStr(bytes)
    addedEntries.put(k, bytes1)
    deletedEntries.remove(k)
    this
  }

  override def delete(bytes: Array[Byte]): WriteBatch = {
    val k = ByteStr(bytes)
    addedEntries.remove(k)
    deletedEntries.add(k)
    this
  }

  override def close(): Unit = {}
}

Source File: ScatterPlot.scala From Scurses with MIT License

5 votes

package net.team2xh.onions.components.widgets

import net.team2xh.onions.Symbols
import net.team2xh.onions.Themes.ColorScheme
import net.team2xh.onions.components.{FramePanel, Widget}
import net.team2xh.onions.utils.{Drawing, Math, Varying}
import net.team2xh.scurses.Scurses

import scala.Numeric.Implicits._
import scala.collection.mutable

case class ScatterPlot[T: Numeric](parent: FramePanel, values: Varying[Seq[(T, T)]],
                                   labelX: String = "", labelY: String = "",
                                   color: Int = 81, showLabels: Boolean = true)
                                  (implicit screen: Scurses) extends Widget(parent, values) {

  val gridSize = 4

  override def redraw(focus: Boolean, theme: ColorScheme): Unit = {
    val (xs, ys) = values.value.unzip

    val maxX = Math.aBitMoreThanMax(xs)
    val maxY = Math.aBitMoreThanMax(ys)
    val minX = Math.aBitLessThanMin(xs)
    val minY = Math.aBitLessThanMin(ys)

    val valuesLength = maxY.toString.length max minY.toString.length
    val x0 = valuesLength + (if (showLabels) 2 else 0)
    val graphWidth = (if (showLabels) innerWidth - 3 else innerWidth - 1) - valuesLength
    val graphHeight = if (showLabels) innerHeight - 3 else innerHeight - 2

    // Draw grid
    Drawing.drawGrid(x0, 0, graphWidth, graphHeight, gridSize, theme.accent1, theme.background,
                     showVertical = true, showHorizontal = true)
    // Draw axis values
    Drawing.drawAxisValues(x0 - valuesLength, 0, graphHeight, gridSize, minY, maxY, theme.accent3, theme.background, horizontal = false)
    Drawing.drawAxisValues(x0, graphHeight + 1, graphWidth, gridSize, minX, maxX, theme.accent3, theme.background)

    // Draw labels
    if (showLabels) {
      Drawing.drawAxisLabels(x0, graphWidth, graphHeight, labelX, labelY, theme)
    }

    // Prepare values (we use half vertical resolution)
    val points = mutable.ArrayDeque.fill[Int](graphWidth+1, graphHeight+1)(0)
    val charHeight = (maxY - minY).toDouble / graphHeight
    for (value <- values.value) {
      val nx = math.round((graphWidth * (value._1.toDouble - minX)) / (maxX - minX)).toInt
      val ny = graphHeight - math.round((graphHeight * (value._2.toDouble - minY)) / (maxY - minY)).toInt
      val point = points(nx)(ny)
      val isLower = if ((math.round(value._2.toDouble).toInt % charHeight) < (charHeight / 2.0)) 1 else 2
      points(nx).update(ny, point | isLower)
    }
    // Plot values
    for (x <- 0 to graphWidth; y <- 0 to graphHeight) {
      val point = points(x)(y)
      val symbol = point match {
        case 0 => ""
        case 1 => Symbols.BLOCK_UPPER
        case 2 => Symbols.BLOCK_LOWER
        case 3 => Symbols.BLOCK
      }
      if (point != 0)
        screen.put(x0 + x, y, symbol, foreground = color, background = theme.background)
    }
  }

  override def handleKeypress(keypress: Int): Unit = { }

  override def focusable: Boolean = false
  override def innerHeight: Int = parent.innerHeight - 3
}

Source File: SubdocLookupAccessor.scala From couchbase-spark-connector with Apache License 2.0

5 votes

package com.couchbase.spark.connection

import java.util.concurrent.TimeUnit

import com.couchbase.client.core.BackpressureException
import com.couchbase.client.core.time.Delay
import com.couchbase.client.java.error.{CouchbaseOutOfMemoryException, TemporaryFailureException}
import com.couchbase.client.java.util.retry.RetryBuilder
import com.couchbase.spark.internal.LazyIterator
import rx.lang.scala.JavaConversions._
import rx.lang.scala.Observable

import scala.collection.mutable
import scala.concurrent.duration.Duration

case class SubdocLookupSpec(id: String, get: Seq[String], exists: Seq[String])

case class SubdocLookupResult(id: String, cas: Long, content: Map[String, Any],
                        exists: Map[String, Boolean])

class SubdocLookupAccessor(cbConfig: CouchbaseConfig, specs: Seq[SubdocLookupSpec],
                          bucketName: String = null, timeout: Option[Duration]) {

  def compute(): Iterator[SubdocLookupResult] = {
    if (specs.isEmpty) {
      return Iterator[SubdocLookupResult]()
    }

    val bucket = CouchbaseConnection().bucket(cbConfig, bucketName).async()
    val maxDelay = cbConfig.retryOpts.maxDelay
    val minDelay = cbConfig.retryOpts.minDelay
    val maxRetries = cbConfig.retryOpts.maxTries

    val kvTimeout = timeout
      .map(_.toMillis)
      .orElse(cbConfig.timeouts.kv)
      .getOrElse(bucket.environment().kvTimeout())


    LazyIterator {
      Observable
        .from(specs)
        .flatMap(spec => {
            var builder = bucket.lookupIn(spec.id)
            spec.exists.foreach(builder.exists(_))
            spec.get.foreach(builder.get(_))
            toScalaObservable(builder.execute().timeout(kvTimeout, TimeUnit.MILLISECONDS)
            ).map(fragment => {
              val content = mutable.Map[String, Any]()
              spec.get.foreach(path => content.put(path, fragment.content(path)))
              val exists = mutable.Map[String, Boolean]()
              spec.exists.foreach(path =>  exists.put(path, fragment.status(path).isSuccess))
              SubdocLookupResult(spec.id, fragment.cas(), content.toMap, exists.toMap)
            }).retryWhen(
            RetryBuilder
              .anyOf(classOf[TemporaryFailureException], classOf[BackpressureException],
                classOf[CouchbaseOutOfMemoryException])
              .delay(Delay.exponential(TimeUnit.MILLISECONDS, maxDelay, minDelay))
              .max(maxRetries)
              .build())
        })
        .toBlocking
        .toIterable
        .iterator
    }
  }

}

Source File: WorkerInfo.scala From aloha with Apache License 2.0

5 votes

package me.jrwang.aloha.scheduler.master

import scala.collection.mutable

import me.jrwang.aloha.common.util.Utils
import me.jrwang.aloha.rpc.RpcEndpointRef

private[aloha] class WorkerInfo(
    val id: String,
    val host: String,
    val port: Int,
    val cores: Int,
    val memory: Int,
    val endpoint: RpcEndpointRef)
  extends Serializable {

  Utils.checkHost(host)
  assert (port > 0)

  @transient var apps: mutable.HashMap[String, ApplicationInfo] = _ // driverId => info
  @transient var state: WorkerState.Value = _
  @transient var coresUsed: Int = _
  @transient var memoryUsed: Int = _
  @transient var lastHeartBeat: Long = _

  init()

  def coresFree: Int = cores - coresUsed
  def memoryFree: Int = memory - memoryUsed

  private def readObject(in: java.io.ObjectInputStream): Unit = Utils.tryOrIOException {
    in.defaultReadObject()
    init()
  }

  private def init() {
    apps = new mutable.HashMap()
    state = WorkerState.ALIVE
    coresUsed = 0
    memoryUsed = 0
    lastHeartBeat = System.currentTimeMillis()
  }

  def hostPort: String = {
    assert (port > 0)
    host + ":" + port
  }

  def addApplication(app: ApplicationInfo) {
    apps(app.id) = app
    memoryUsed += app.desc.memory
    coresUsed += app.desc.cores
  }

  def removeApplication(app: ApplicationInfo) {
    apps -= app.id
    memoryUsed -= app.desc.memory
    coresUsed -= app.desc.cores
  }

  def setState(state: WorkerState.Value): Unit = {
    this.state = state
  }

  def isAlive(): Boolean = this.state == WorkerState.ALIVE
}

Source File: ConfigReader.scala From aloha with Apache License 2.0

5 votes

package me.jrwang.aloha.common.config

import java.util.{Map => JMap}

import scala.collection.mutable
import scala.util.matching.Regex


private object ConfigReader {
  private val REF_REGEX = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r
}


  private def getOrDefault(conf: ConfigProvider, key: String): Option[String] = {
    conf.get(key).orElse {
      ConfigEntry.findEntry(key) match {
        case e: ConfigEntryWithDefault[_] => Option(e.defaultValueString)
        case e: ConfigEntryWithDefaultString[_] => Option(e.defaultValueString)
        case e: ConfigEntryWithDefaultFunction[_] => Option(e.defaultValueString)
        case e: FallbackConfigEntry[_] => getOrDefault(conf, e.fallback.key)
        case _ => None
      }
    }
  }

}

scala.collection.mutable Scala Examples