scala.collection.mutable Scala Examples
The following examples show how to use scala.collection.mutable.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: KVStore.scala From Freasy-Monad with MIT License | 6 votes |
package examples.cats import cats.Id import cats.free.Free import freasymonad.cats.free import scala.collection.mutable import scala.concurrent.duration.Duration import scala.concurrent.{Await, Future} @free trait KVStore { type KVStoreF[A] = Free[GrammarADT, A] sealed trait GrammarADT[A] def put[T](key: String, value: T): KVStoreF[Unit] def get[T](key: String): KVStoreF[Option[T]] def delete(key: String): KVStoreF[Unit] def update[T](key: String, f: T => T): KVStoreF[Unit] = for { vMaybe <- get[T](key) _ <- vMaybe.map(v => put[T](key, f(v))).getOrElse(Free.pure(())) } yield () } object Main extends App { import KVStore.ops._ def program: KVStoreF[Option[Int]] = for { _ <- put("wild-cats", 2) _ <- update[Int]("wild-cats", _ + 12) _ <- put("tame-cats", 5) n <- get[Int]("wild-cats") _ <- delete("tame-cats") } yield n val idInterpreter = new KVStore.Interp[Id] { val kvs = mutable.Map.empty[String, Any] def get[T](key: String): Id[Option[T]] = { println(s"get($key)") kvs.get(key).map(_.asInstanceOf[T]) } def put[T](key: String, value: T): Id[Unit] = { println(s"put($key, $value)") kvs(key) = value } def delete(key: String): Id[Unit] = { println(s"delete($key)") kvs.remove(key) } } val resId: Id[Option[Int]] = idInterpreter.run(program) import cats.implicits.catsStdInstancesForFuture import scala.concurrent.ExecutionContext.Implicits.global val futureInterpreter = new KVStore.Interp[Future] { val kvs = mutable.Map.empty[String, Any] def get[T](key: String): Future[Option[T]] = Future { println(s"get($key)") kvs.get(key).map(_.asInstanceOf[T]) } def put[T](key: String, value: T): Future[Unit] = Future { println(s"put($key, $value)") kvs(key) = value } def delete(key: String): Future[Unit] = Future { println(s"delete($key)") kvs.remove(key) } } val resFuture: Future[Option[Int]] = futureInterpreter.run(program) Await.ready(resFuture, Duration.Inf) }
Example 2
Source File: MNISTBenchmark.scala From spark-knn with Apache License 2.0 | 6 votes |
package com.github.saurfang.spark.ml.knn.examples import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.classification.{KNNClassifier, NaiveKNNClassifier} import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator import org.apache.spark.ml.param.{IntParam, ParamMap} import org.apache.spark.ml.tuning.{Benchmarker, ParamGridBuilder} import org.apache.spark.ml.util.Identifiable import org.apache.spark.ml.{Pipeline, Transformer} import org.apache.spark.mllib.util.MLUtils import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} import org.apache.log4j import scala.collection.mutable object MNISTBenchmark { val logger = log4j.Logger.getLogger(getClass) def main(args: Array[String]) { val ns = if(args.isEmpty) (2500 to 10000 by 2500).toArray else args(0).split(',').map(_.toInt) val path = if(args.length >= 2) args(1) else "data/mnist/mnist.bz2" val numPartitions = if(args.length >= 3) args(2).toInt else 10 val models = if(args.length >=4) args(3).split(',') else Array("tree","naive") val spark = SparkSession.builder().getOrCreate() val sc = spark.sparkContext import spark.implicits._ //read in raw label and features val rawDataset = MLUtils.loadLibSVMFile(sc, path) .zipWithIndex() .filter(_._2 < ns.max) .sortBy(_._2, numPartitions = numPartitions) .keys .toDF() // convert "features" from mllib.linalg.Vector to ml.linalg.Vector val dataset = MLUtils.convertVectorColumnsToML(rawDataset) .cache() dataset.count() //force persist val limiter = new Limiter() val knn = new KNNClassifier() .setTopTreeSize(numPartitions * 10) .setFeaturesCol("features") .setPredictionCol("prediction") .setK(1) val naiveKNN = new NaiveKNNClassifier() val pipeline = new Pipeline() .setStages(Array(limiter, knn)) val naivePipeline = new Pipeline() .setStages(Array(limiter, naiveKNN)) val paramGrid = new ParamGridBuilder() .addGrid(limiter.n, ns) .build() val bm = new Benchmarker() .setEvaluator(new MulticlassClassificationEvaluator) .setEstimatorParamMaps(paramGrid) .setNumTimes(3) val metrics = mutable.ArrayBuffer[String]() if(models.contains("tree")) { val bmModel = bm.setEstimator(pipeline).fit(dataset) metrics += s"knn: ${bmModel.avgTrainingRuntimes.toSeq} / ${bmModel.avgEvaluationRuntimes.toSeq}" } if(models.contains("naive")) { val naiveBMModel = bm.setEstimator(naivePipeline).fit(dataset) metrics += s"naive: ${naiveBMModel.avgTrainingRuntimes.toSeq} / ${naiveBMModel.avgEvaluationRuntimes.toSeq}" } logger.info(metrics.mkString("\n")) } } class Limiter(override val uid: String) extends Transformer { def this() = this(Identifiable.randomUID("limiter")) val n: IntParam = new IntParam(this, "n", "number of rows to limit") def setN(value: Int): this.type = set(n, value) // hack to maintain number of partitions (otherwise it collapses to 1 which is unfair for naiveKNN) override def transform(dataset: Dataset[_]): DataFrame = dataset.limit($(n)).repartition(dataset.rdd.partitions.length).toDF() override def copy(extra: ParamMap): Transformer = defaultCopy(extra) @DeveloperApi override def transformSchema(schema: StructType): StructType = schema }
Example 3
Source File: RandomForestPipeline.scala From Machine-Learning-with-Spark-Second-Edition with MIT License | 6 votes |
package org.sparksamples.classification.stumbleupon import org.apache.log4j.Logger import org.apache.spark.ml.classification.RandomForestClassifier import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator import org.apache.spark.ml.feature.{StringIndexer, VectorAssembler} import org.apache.spark.ml.{Pipeline, PipelineStage} import org.apache.spark.sql.DataFrame import scala.collection.mutable object RandomForestPipeline { @transient lazy val logger = Logger.getLogger(getClass.getName) def randomForestPipeline(vectorAssembler: VectorAssembler, dataFrame: DataFrame) = { val Array(training, test) = dataFrame.randomSplit(Array(0.9, 0.1), seed = 12345) // Set up Pipeline val stages = new mutable.ArrayBuffer[PipelineStage]() val labelIndexer = new StringIndexer() .setInputCol("label") .setOutputCol("indexedLabel") stages += labelIndexer val rf = new RandomForestClassifier() .setFeaturesCol(vectorAssembler.getOutputCol) .setLabelCol("indexedLabel") .setNumTrees(20) .setMaxDepth(5) .setMaxBins(32) .setMinInstancesPerNode(1) .setMinInfoGain(0.0) .setCacheNodeIds(false) .setCheckpointInterval(10) stages += vectorAssembler stages += rf val pipeline = new Pipeline().setStages(stages.toArray) // Fit the Pipeline val startTime = System.nanoTime() //val model = pipeline.fit(training) val model = pipeline.fit(dataFrame) val elapsedTime = (System.nanoTime() - startTime) / 1e9 println(s"Training time: $elapsedTime seconds") //val holdout = model.transform(test).select("prediction","label") val holdout = model.transform(dataFrame).select("prediction","label") // Select (prediction, true label) and compute test error val evaluator = new MulticlassClassificationEvaluator() .setLabelCol("label") .setPredictionCol("prediction") .setMetricName("accuracy") val mAccuracy = evaluator.evaluate(holdout) println("Test set accuracy = " + mAccuracy) } }
Example 4
Source File: GradientBoostedTreePipeline.scala From Machine-Learning-with-Spark-Second-Edition with MIT License | 6 votes |
package org.sparksamples.classification.stumbleupon import org.apache.log4j.Logger import org.apache.spark.ml.classification.GBTClassifier import org.apache.spark.ml.feature.{StringIndexer, VectorAssembler} import org.apache.spark.ml.{Pipeline, PipelineStage} import org.apache.spark.mllib.evaluation.{MulticlassMetrics, RegressionMetrics} import org.apache.spark.sql.DataFrame import scala.collection.mutable object GradientBoostedTreePipeline { @transient lazy val logger = Logger.getLogger(getClass.getName) def gradientBoostedTreePipeline(vectorAssembler: VectorAssembler, dataFrame: DataFrame) = { val Array(training, test) = dataFrame.randomSplit(Array(0.9, 0.1), seed = 12345) // Set up Pipeline val stages = new mutable.ArrayBuffer[PipelineStage]() val labelIndexer = new StringIndexer() .setInputCol("label") .setOutputCol("indexedLabel") stages += labelIndexer val gbt = new GBTClassifier() .setFeaturesCol(vectorAssembler.getOutputCol) .setLabelCol("indexedLabel") .setMaxIter(10) stages += vectorAssembler stages += gbt val pipeline = new Pipeline().setStages(stages.toArray) // Fit the Pipeline val startTime = System.nanoTime() //val model = pipeline.fit(training) val model = pipeline.fit(dataFrame) val elapsedTime = (System.nanoTime() - startTime) / 1e9 println(s"Training time: $elapsedTime seconds") //val holdout = model.transform(test).select("prediction","label") val holdout = model.transform(dataFrame).select("prediction","label") // have to do a type conversion for RegressionMetrics val rm = new RegressionMetrics(holdout.rdd.map(x => (x(0).asInstanceOf[Double], x(1).asInstanceOf[Double]))) logger.info("Test Metrics") logger.info("Test Explained Variance:") logger.info(rm.explainedVariance) logger.info("Test R^2 Coef:") logger.info(rm.r2) logger.info("Test MSE:") logger.info(rm.meanSquaredError) logger.info("Test RMSE:") logger.info(rm.rootMeanSquaredError) val predictions = model.transform(test).select("prediction").rdd.map(_.getDouble(0)) val labels = model.transform(test).select("label").rdd.map(_.getDouble(0)) val accuracy = new MulticlassMetrics(predictions.zip(labels)).precision println(s" Accuracy : $accuracy") holdout.rdd.map(x => x(0).asInstanceOf[Double]).repartition(1).saveAsTextFile("/home/ubuntu/work/ml-resources/spark-ml/results/GBT.xls") savePredictions(holdout, test, rm, "/home/ubuntu/work/ml-resources/spark-ml/results/GBT.csv") } def savePredictions(predictions:DataFrame, testRaw:DataFrame, regressionMetrics: RegressionMetrics, filePath:String) = { predictions .coalesce(1) .write.format("com.databricks.spark.csv") .option("header", "true") .save(filePath) } }
Example 5
Source File: SqlUnitTest.scala From SparkUnitTestingExamples with Apache License 2.0 | 6 votes |
package com.cloudera.sa.spark.unittest.sql import org.apache.spark.sql.Row import org.apache.spark.sql.hive.HiveContext import org.apache.spark.{SparkConf, SparkContext} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} import scala.collection.mutable class SqlUnitTest extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll{ @transient var sc: SparkContext = null @transient var hiveContext: HiveContext = null override def beforeAll(): Unit = { val envMap = Map[String,String](("Xmx", "512m")) val sparkConfig = new SparkConf() sparkConfig.set("spark.broadcast.compress", "false") sparkConfig.set("spark.shuffle.compress", "false") sparkConfig.set("spark.shuffle.spill.compress", "false") sparkConfig.set("spark.io.compression.codec", "lzf") sc = new SparkContext("local[2]", "unit test", sparkConfig) hiveContext = new HiveContext(sc) } override def afterAll(): Unit = { sc.stop() } test("Test table creation and summing of counts") { val personRDD = sc.parallelize(Seq(Row("ted", 42, "blue"), Row("tj", 11, "green"), Row("andrew", 9, "green"))) hiveContext.sql("create table person (name string, age int, color string)") val emptyDataFrame = hiveContext.sql("select * from person limit 0") val personDataFrame = hiveContext.createDataFrame(personRDD, emptyDataFrame.schema) personDataFrame.registerTempTable("tempPerson") val ageSumDataFrame = hiveContext.sql("select sum(age) from tempPerson") val localAgeSum = ageSumDataFrame.take(10) assert(localAgeSum(0).get(0) == 62, "The sum of age should equal 62 but it equaled " + localAgeSum(0).get(0)) } }
Example 6
Source File: BasicShabondiTest.scala From ohara with Apache License 2.0 | 5 votes |
package oharastream.ohara.shabondi import java.util import java.util.concurrent.{ExecutorService, Executors} import com.google.common.util.concurrent.ThreadFactoryBuilder import com.typesafe.scalalogging.Logger import oharastream.ohara.common.data.Row import oharastream.ohara.common.setting.TopicKey import oharastream.ohara.common.util.{CommonUtils, Releasable} import oharastream.ohara.kafka.TopicAdmin import oharastream.ohara.shabondi.common.ShabondiUtils import oharastream.ohara.shabondi.sink.SinkConfig import oharastream.ohara.shabondi.source.SourceConfig import oharastream.ohara.testing.WithBroker import org.junit.After import scala.collection.{immutable, mutable} import scala.concurrent.{ExecutionContext, Future} import scala.jdk.CollectionConverters._ private[shabondi] abstract class BasicShabondiTest extends WithBroker { protected val log = Logger(this.getClass()) protected val brokerProps = testUtil.brokersConnProps protected val topicAdmin: TopicAdmin = TopicAdmin.of(brokerProps) protected val newThreadPool: () => ExecutorService = () => Executors.newCachedThreadPool(new ThreadFactoryBuilder().setNameFormat(this.getClass.getSimpleName + "-").build()) protected val countRows: (util.Queue[Row], Long, ExecutionContext) => Future[Long] = (queue, executionTime, ec) => Future { log.debug("countRows begin...") val baseTime = System.currentTimeMillis() var count = 0L var running = true while (running) { val row = queue.poll() if (row != null) count += 1 else Thread.sleep(100) running = (System.currentTimeMillis() - baseTime) < executionTime } log.debug("countRows done") count }(ec) protected def createTopicKey = TopicKey.of("default", CommonUtils.randomString(5)) protected def createTestTopic(topicKey: TopicKey): Unit = topicAdmin.topicCreator .numberOfPartitions(1) .numberOfReplications(1.toShort) .topicKey(topicKey) .create protected def defaultSourceConfig( sourceToTopics: Seq[TopicKey] = Seq.empty[TopicKey] ): SourceConfig = { import ShabondiDefinitions._ val args = mutable.ArrayBuffer( GROUP_DEFINITION.key + "=" + CommonUtils.randomString(5), NAME_DEFINITION.key + "=" + CommonUtils.randomString(3), SHABONDI_CLASS_DEFINITION.key + "=" + classOf[ShabondiSource].getName, CLIENT_PORT_DEFINITION.key + "=8080", BROKERS_DEFINITION.key + "=" + testUtil.brokersConnProps ) if (sourceToTopics.nonEmpty) args += s"${SOURCE_TO_TOPICS_DEFINITION.key}=${TopicKey.toJsonString(sourceToTopics.asJava)}" val rawConfig = ShabondiUtils.parseArgs(args.toArray) new SourceConfig(rawConfig) } protected def defaultSinkConfig( sinkFromTopics: Seq[TopicKey] = Seq.empty[TopicKey] ): SinkConfig = { import ShabondiDefinitions._ val args = mutable.ArrayBuffer( GROUP_DEFINITION.key + "=" + CommonUtils.randomString(5), NAME_DEFINITION.key + "=" + CommonUtils.randomString(3), SHABONDI_CLASS_DEFINITION.key + "=" + classOf[ShabondiSink].getName, CLIENT_PORT_DEFINITION.key + "=8080", BROKERS_DEFINITION.key + "=" + testUtil.brokersConnProps ) if (sinkFromTopics.nonEmpty) args += s"${SINK_FROM_TOPICS_DEFINITION.key}=${TopicKey.toJsonString(sinkFromTopics.asJava)}" val rawConfig = ShabondiUtils.parseArgs(args.toArray) new SinkConfig(rawConfig) } protected def singleRow(columnSize: Int, rowId: Int = 0): Row = KafkaSupport.singleRow(columnSize, rowId) protected def multipleRows(rowSize: Int): immutable.Iterable[Row] = KafkaSupport.multipleRows(rowSize) @After def tearDown(): Unit = { Releasable.close(topicAdmin) } }
Example 7
Source File: PerformanceReport.scala From ohara with Apache License 2.0 | 5 votes |
package oharastream.ohara.it.performance import java.util.Objects import oharastream.ohara.common.setting.ObjectKey import oharastream.ohara.common.util.CommonUtils import scala.collection.immutable.ListMap import scala.collection.mutable trait PerformanceReport { def records: Map[Long, Map[String, Double]] } object PerformanceReport { def builder = new Builder final class Builder private[PerformanceReport] extends oharastream.ohara.common.pattern.Builder[PerformanceReport] { private[this] var key: ObjectKey = _ private[this] var className: String = _ private[this] val records = mutable.Map[Long, Map[String, Double]]() def connectorKey(key: ObjectKey): Builder = { this.key = Objects.requireNonNull(key) this } def className(className: String): Builder = { this.className = CommonUtils.requireNonEmpty(className) this } def resetValue(duration: Long, header: String): Builder = { records.put(duration, Map(header -> 0.0)) this } def record(duration: Long, header: String, value: Double): Builder = { val record = records.getOrElse(duration, Map(header -> 0.0)) records.put( duration, record + (header -> (record.getOrElse(header, 0.0) + value)) ) this } override def build: PerformanceReport = new PerformanceReport { override val className: String = CommonUtils.requireNonEmpty(Builder.this.className) override val records: Map[Long, Map[String, Double]] = ListMap( Builder.this.records.toSeq.sortBy(_._1)((x: Long, y: Long) => y.compare(x)): _* ) override def key: ObjectKey = Objects.requireNonNull(Builder.this.key) } } }
Example 8
Source File: ServiceKeyHolder.scala From ohara with Apache License 2.0 | 5 votes |
package oharastream.ohara.it import java.util.concurrent.TimeUnit import oharastream.ohara.agent.container.ContainerClient import oharastream.ohara.common.setting.ObjectKey import oharastream.ohara.common.util.{CommonUtils, Releasable} import com.typesafe.scalalogging.Logger import scala.collection.mutable import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.Duration import scala.concurrent.{Await, Future} if (!finalClose || !KEEP_CONTAINERS) result(client.containers()) .filter( container => clusterKey.exists(key => container.name.contains(key.group()) && container.name.contains(key.name())) ) .filterNot(container => excludedNodes.contains(container.nodeName)) .foreach { container => try { println(s"[-----------------------------------${container.name}-----------------------------------]") // Before 10 minutes container log. Avoid the OutOfMemory of Java heap val containerLogs = try result(client.log(container.name, Option(600))) catch { case e: Throwable => s"failed to fetch the logs for container:${container.name}. caused by:${e.getMessage}" } println(containerLogs) println("[------------------------------------------------------------------------------------]") result(client.forceRemove(container.name)) } catch { case e: Throwable => LOG.error(s"failed to remove container ${container.name}", e) } } finally Releasable.close(client) }
Example 9
Source File: ArgumentsBuilder.scala From ohara with Apache License 2.0 | 5 votes |
package oharastream.ohara.agent import oharastream.ohara.agent.ArgumentsBuilder.FileAppender import oharastream.ohara.common.util.CommonUtils import spray.json.{JsNull, JsNumber, JsString, JsValue} import scala.collection.mutable def mainConfigFile(path: String): ArgumentsBuilder override def build: Seq[String] } object ArgumentsBuilder { trait FileAppender { private[this] val props = mutable.Buffer[String]() def append(prop: Int): FileAppender = append(prop.toString) def append(prop: String): FileAppender = append(Seq(prop)) def append(props: Seq[String]): FileAppender = { this.props ++= props this } def append(key: String, value: Boolean): FileAppender = append(s"$key=$value") def append(key: String, value: Short): FileAppender = append(s"$key=$value") def append(key: String, value: Int): FileAppender = append(s"$key=$value") def append(key: String, value: String): FileAppender = append(s"$key=$value") def append(key: String, value: JsValue): FileAppender = append( key, value match { case JsString(value) => value case JsNumber(value) => value.toString case JsNull => throw new IllegalArgumentException(s"JsNull is not legal") case _ => value.toString() } ) def done: ArgumentsBuilder = done(props.toSeq) protected def done(props: Seq[String]): ArgumentsBuilder } def apply(): ArgumentsBuilder = new ArgumentsBuilder { private[this] val files = mutable.Map[String, Seq[String]]() private[this] var mainConfigFile: String = _ override def build: Seq[String] = if (CommonUtils.isEmpty(mainConfigFile)) throw new IllegalArgumentException("you have to define the main configs") else // format: --file path=line0,line1 --file path1=line0,line1 // NOTED: the path and props must be in different line. otherwise, k8s will merge them into single line and our // script will fail to parse the command-line arguments files.flatMap { case (path, props) => Seq("--file", s"$path=${props.mkString(",")}") }.toSeq ++ Seq("--config", mainConfigFile) override def file(path: String): FileAppender = (props: Seq[String]) => { this.files += (path -> props) this } override def mainConfigFile(path: String): ArgumentsBuilder = { this.mainConfigFile = CommonUtils.requireNonEmpty(path) this } } }
Example 10
Source File: ClusterRequest.scala From ohara with Apache License 2.0 | 5 votes |
package oharastream.ohara.client.configurator import oharastream.ohara.common.annotations.Optional import oharastream.ohara.common.setting.ObjectKey import oharastream.ohara.common.util.CommonUtils import spray.json.DefaultJsonProtocol._ import spray.json.{JsArray, JsNumber, JsObject, JsString, JsValue} import scala.jdk.CollectionConverters._ import scala.collection.mutable protected def key: ObjectKey = ObjectKey.of( settings.get(GROUP_KEY).map(_.convertTo[String]).getOrElse(GROUP_DEFAULT), settings(NAME_KEY).convertTo[String] ) protected val settings: mutable.Map[String, JsValue] = mutable.Map() @Optional("default key is a random string. But it is required in updating") def key(key: ObjectKey): ClusterRequest.this.type = { setting(NAME_KEY, JsString(key.name())) setting(GROUP_KEY, JsString(key.group())) } @Optional("default name is a random string. But it is required in updating") def name(name: String): ClusterRequest.this.type = setting(NAME_KEY, JsString(CommonUtils.requireNonEmpty(name))) @Optional("default is GROUP_DEFAULT") def group(group: String): ClusterRequest.this.type = setting(GROUP_KEY, JsString(CommonUtils.requireNonEmpty(group))) def nodeName(nodeName: String): ClusterRequest.this.type = nodeNames(Set(CommonUtils.requireNonEmpty(nodeName))) def nodeNames(nodeNames: Set[String]): ClusterRequest.this.type = setting(NODE_NAMES_KEY, JsArray(CommonUtils.requireNonEmpty(nodeNames.asJava).asScala.map(JsString(_)).toVector)) @Optional("default value is empty array") def routes(routes: Map[String, String]): ClusterRequest.this.type = setting(ROUTES_KEY, JsObject(routes.map { case (k, v) => k -> JsString(v) })) @Optional("default value is 1024") def initHeap(sizeInMB: Int): ClusterRequest.this.type = setting(INIT_HEAP_KEY, JsNumber(CommonUtils.requirePositiveInt(sizeInMB))) @Optional("default value is 1024") def maxHeap(sizeInMB: Int): ClusterRequest.this.type = setting(MAX_HEAP_KEY, JsNumber(CommonUtils.requirePositiveInt(sizeInMB))) @Optional("extra settings is empty by default") def setting(key: String, value: JsValue): ClusterRequest.this.type = settings(Map(key -> value)) @Optional("extra settings is empty by default") def settings(settings: Map[String, JsValue]): ClusterRequest.this.type = { // We don't have to check the settings is empty here for the following reasons: // 1) we may want to use the benefit of default creation without specify settings // 2) actual checking will be done in the json parser phase of creation or update this.settings ++= settings this } }
Example 11
Source File: ConnectionUtil.scala From hazelcast-spark with Apache License 2.0 | 5 votes |
package com.hazelcast.spark.connector.util import com.hazelcast.client.HazelcastClient import com.hazelcast.client.config.{ClientConfig, XmlClientConfigBuilder} import com.hazelcast.core.HazelcastInstance import com.hazelcast.spark.connector.conf.SerializableConf import scala.collection.{JavaConversions, mutable} object ConnectionUtil { private[connector] val instances = mutable.Map[String, HazelcastInstance]() def getHazelcastConnection(member: String, rddId: Int, conf: SerializableConf): HazelcastInstance = { def createClientInstance: HazelcastInstance = { val client: HazelcastInstance = HazelcastClient.newHazelcastClient(createClientConfig(conf, member)) instances.put(member + "#" + rddId, client) client } this.synchronized { val maybeInstance: Option[HazelcastInstance] = instances.get(member + "#" + rddId) if (maybeInstance.isEmpty) { createClientInstance } else { val instance: HazelcastInstance = maybeInstance.get if (instance.getLifecycleService.isRunning) { instance } else { createClientInstance } } } } def closeHazelcastConnection(member: String, rddId: Int): Unit = { this.synchronized { val maybeInstance: Option[HazelcastInstance] = instances.get(member + "#" + rddId) if (maybeInstance.isDefined) { val instance: HazelcastInstance = maybeInstance.get if (instance.getLifecycleService.isRunning) { instance.getLifecycleService.shutdown() } instances.remove(member + "#" + rddId) } } } def closeAll(rddIds: Seq[Int]): Unit = { this.synchronized { instances.keys.foreach({ key => { val instanceRddId: String = key.split("#")(1) if (rddIds.contains(instanceRddId.toInt)) { val instance: HazelcastInstance = instances.get(key).get if (instance.getLifecycleService.isRunning) { instance.shutdown() } instances.remove(key) } } }) } } private def createClientConfig(conf: SerializableConf, member: String): ClientConfig = { var config: ClientConfig = null if (conf.xmlPath != null) { config = new XmlClientConfigBuilder(conf.xmlPath).build() } else { config = new ClientConfig config.getGroupConfig.setName(conf.groupName) config.getGroupConfig.setPassword(conf.groupPass) config.getNetworkConfig.setAddresses(JavaConversions.seqAsJavaList(member.split(","))) } config } }
Example 12
Source File: InMemoryState.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.on.memory import java.util.concurrent.locks.StampedLock import com.daml.ledger.on.memory.InMemoryState._ import com.daml.ledger.participant.state.kvutils.Bytes import com.daml.ledger.participant.state.kvutils.api.LedgerRecord import com.daml.ledger.participant.state.v1.Offset import com.google.protobuf.ByteString import scala.collection.mutable import scala.concurrent.{ExecutionContext, Future, blocking} private[memory] class InMemoryState private (log: MutableLog, state: MutableState) { private val lockCurrentState = new StampedLock() @volatile private var lastLogEntryIndex = 0 def readLog[A](action: ImmutableLog => A): A = action(log) // `log` is mutable, but the interface is immutable def newHeadSinceLastWrite(): Int = lastLogEntryIndex def write[A](action: (MutableLog, MutableState) => Future[A])( implicit executionContext: ExecutionContext ): Future[A] = for { stamp <- Future { blocking { lockCurrentState.writeLock() } } result <- action(log, state) .andThen { case _ => lastLogEntryIndex = log.size - 1 lockCurrentState.unlock(stamp) } } yield result } object InMemoryState { type ImmutableLog = IndexedSeq[LedgerRecord] type ImmutableState = collection.Map[StateKey, StateValue] type MutableLog = mutable.Buffer[LedgerRecord] with ImmutableLog type MutableState = mutable.Map[StateKey, StateValue] with ImmutableState type StateKey = Bytes type StateValue = Bytes // The first element will never be read because begin offsets are exclusive. private val Beginning = LedgerRecord(Offset.beforeBegin, ByteString.EMPTY, ByteString.EMPTY) def empty = new InMemoryState( log = mutable.ArrayBuffer(Beginning), state = mutable.Map.empty, ) }
Example 13
Source File: InMemoryLedgerStateOperations.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.on.memory import com.daml.ledger.on.memory.InMemoryState.MutableLog import com.daml.ledger.participant.state.kvutils.KVOffset import com.daml.ledger.participant.state.kvutils.api.LedgerRecord import com.daml.ledger.participant.state.v1.Offset import com.daml.ledger.validator.BatchingLedgerStateOperations import com.daml.ledger.validator.LedgerStateOperations.{Key, Value} import scala.collection.mutable import scala.concurrent.{ExecutionContext, Future} private[memory] final class InMemoryLedgerStateOperations( log: InMemoryState.MutableLog, state: InMemoryState.MutableState, )(implicit executionContext: ExecutionContext) extends BatchingLedgerStateOperations[Index] { import InMemoryLedgerStateOperations.appendEntry override def readState(keys: Seq[Key]): Future[Seq[Option[Value]]] = Future.successful(keys.map(state.get)) override def writeState(keyValuePairs: Seq[(Key, Value)]): Future[Unit] = { state ++= keyValuePairs Future.unit } override def appendToLog(key: Key, value: Value): Future[Index] = Future.successful(appendEntry(log, LedgerRecord(_, key, value))) } object InMemoryLedgerStateOperations { def apply()(implicit executionContext: ExecutionContext): InMemoryLedgerStateOperations = { val inMemoryState = mutable.Map.empty[Key, Value] val inMemoryLog = mutable.ArrayBuffer[LedgerRecord]() new InMemoryLedgerStateOperations(inMemoryLog, inMemoryState) } private[memory] def appendEntry(log: MutableLog, createEntry: Offset => LedgerRecord): Index = { val entryAtIndex = log.size val offset = KVOffset.fromLong(entryAtIndex.toLong) val entry = createEntry(offset) log += entry entryAtIndex } }
Example 14
Source File: LogCollector.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.platform.testing import ch.qos.logback.classic.Level import ch.qos.logback.classic.spi.ILoggingEvent import ch.qos.logback.core.AppenderBase import scala.beans.BeanProperty import scala.collection.concurrent.TrieMap import scala.collection.mutable import scala.reflect.ClassTag object LogCollector { private val log = TrieMap .empty[String, TrieMap[String, mutable.Builder[(Level, String), Vector[(Level, String)]]]] def read[Test, Logger]( implicit test: ClassTag[Test], logger: ClassTag[Logger]): IndexedSeq[(Level, String)] = log .get(test.runtimeClass.getName) .flatMap(_.get(logger.runtimeClass.getName)) .fold(IndexedSeq.empty[(Level, String)])(_.result()) def clear[Test](implicit test: ClassTag[Test]): Unit = { log.remove(test.runtimeClass.getName) () } } final class LogCollector extends AppenderBase[ILoggingEvent] { @BeanProperty var test: String = _ override def append(e: ILoggingEvent): Unit = { if (test == null) { addError("Test identifier undefined, skipping logging") } else { val log = LogCollector.log .getOrElseUpdate(test, TrieMap.empty) .getOrElseUpdate(e.getLoggerName, Vector.newBuilder) val _ = log.synchronized { log += e.getLevel -> e.getMessage } } } }
Example 15
Source File: PostgresqlSqlLedgerReaderWriterIntegrationSpec.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.on.sql import com.daml.testing.postgresql.PostgresAroundAll import scala.collection.mutable class PostgresqlSqlLedgerReaderWriterIntegrationSpec extends SqlLedgerReaderWriterIntegrationSpecBase("SQL implementation using PostgreSQL") with PostgresAroundAll { private val databases: mutable.Map[String, String] = mutable.Map.empty override protected def jdbcUrl(id: String): String = { if (!databases.contains(id)) { val database = createNewDatabase(id) databases += id -> database.url } databases(id) } }
Example 16
Source File: AsyncForwardingListener.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.api.auth.interceptor import io.grpc.ServerCall import scala.collection.mutable abstract class AsyncForwardingListener[ReqT] extends ServerCall.Listener[ReqT] { protected type Listener = ServerCall.Listener[ReqT] private[this] val lock = new Object private[this] val stash: mutable.ListBuffer[Listener => Unit] = new mutable.ListBuffer private[this] var nextListener: Option[Listener] = None private def enqueueOrProcess(msg: Listener => Unit): Unit = lock.synchronized { if (nextListener.isDefined) { msg(nextListener.get) } else { stash.append(msg) } } protected def setNextListener(listener: Listener): Unit = lock.synchronized { nextListener = Some(listener) stash.foreach(msg => msg(listener)) } // All methods that need to be forwarded override def onHalfClose(): Unit = enqueueOrProcess(i => i.onHalfClose()) override def onCancel(): Unit = enqueueOrProcess(i => i.onCancel()) override def onComplete(): Unit = enqueueOrProcess(i => i.onComplete()) override def onReady(): Unit = enqueueOrProcess(i => i.onReady()) override def onMessage(message: ReqT): Unit = enqueueOrProcess(i => i.onMessage(message)) }
Example 17
Source File: CachingDamlLedgerStateReader.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.validator.caching import com.daml.caching.Cache import com.daml.ledger.participant.state.kvutils.DamlKvutils.{DamlStateKey, DamlStateValue} import com.daml.ledger.validator.LedgerStateOperations.Key import com.daml.ledger.validator.{ DamlLedgerStateReader, LedgerStateReader, RawToDamlLedgerStateReaderAdapter, StateKeySerializationStrategy } import scala.collection.mutable import scala.concurrent.{ExecutionContext, Future} class CachingDamlLedgerStateReader( val cache: Cache[DamlStateKey, DamlStateValue], shouldCache: DamlStateKey => Boolean, keySerializationStrategy: StateKeySerializationStrategy, delegate: DamlLedgerStateReader)(implicit executionContext: ExecutionContext) extends DamlLedgerStateReader with QueryableReadSet { private val readSet = mutable.Set.empty[DamlStateKey] override def getReadSet: Set[Key] = this.synchronized { readSet.map(keySerializationStrategy.serializeStateKey).toSet } override def readState(keys: Seq[DamlStateKey]): Future[Seq[Option[DamlStateValue]]] = { this.synchronized { readSet ++= keys } @SuppressWarnings(Array("org.wartremover.warts.Any")) // Required to make `.view` work. val cachedValues = keys.view .map(key => key -> cache.getIfPresent(key)) .filter(_._2.isDefined) .toMap val keysToRead = keys.toSet -- cachedValues.keySet if (keysToRead.nonEmpty) { delegate .readState(keysToRead.toSeq) .map { readStateValues => val readValues = keysToRead.zip(readStateValues).toMap readValues.collect { case (key, Some(value)) if shouldCache(key) => cache.put(key, value) } val all = cachedValues ++ readValues keys.map(all(_)) } } else { Future { keys.map(cachedValues(_)) } } } } object CachingDamlLedgerStateReader { private[validator] def apply( cache: Cache[DamlStateKey, DamlStateValue], cachingPolicy: CacheUpdatePolicy, ledgerStateOperations: LedgerStateReader, keySerializationStrategy: StateKeySerializationStrategy)( implicit executionContext: ExecutionContext): CachingDamlLedgerStateReader = { new CachingDamlLedgerStateReader( cache, cachingPolicy.shouldCacheOnRead, keySerializationStrategy, new RawToDamlLedgerStateReaderAdapter(ledgerStateOperations, keySerializationStrategy)) } }
Example 18
Source File: CommitContext.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.participant.state.kvutils.committer import com.daml.ledger.participant.state.kvutils.DamlKvutils.{ DamlLogEntryId, DamlStateKey, DamlStateValue } import com.daml.ledger.participant.state.kvutils.{DamlStateMap, Err} import com.daml.ledger.participant.state.v1.ParticipantId import com.daml.lf.data.Time.Timestamp import org.slf4j.LoggerFactory import scala.collection.mutable def getOutputs: Iterable[(DamlStateKey, DamlStateValue)] = outputOrder .map(key => key -> outputs(key)) .filterNot { case (key, value) if inputAlreadyContains(key, value) => logger.trace("Identical output found for key {}", key) true case _ => false } private def inputAlreadyContains(key: DamlStateKey, value: DamlStateValue): Boolean = inputs.get(key).exists(_.contains(value)) }
Example 19
Source File: FileBasedLedgerDataExporter.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.participant.state.kvutils.export import java.io.DataOutputStream import java.time.Instant import java.util.concurrent.locks.StampedLock import com.daml.ledger.participant.state.v1.ParticipantId import com.daml.ledger.validator.LedgerStateOperations.{Key, Value} import com.google.protobuf.ByteString import scala.collection.mutable import scala.collection.mutable.ListBuffer class FileBasedLedgerDataExporter(output: DataOutputStream) extends LedgerDataExporter { import FileBasedLedgerDataExporter._ private val outputLock = new StampedLock private[export] val correlationIdMapping = mutable.Map.empty[String, String] private[export] val inProgressSubmissions = mutable.Map.empty[String, SubmissionInfo] private[export] val bufferedKeyValueDataPerCorrelationId = mutable.Map.empty[String, mutable.ListBuffer[(Key, Value)]] def addSubmission( submissionEnvelope: ByteString, correlationId: String, recordTimeInstant: Instant, participantId: ParticipantId): Unit = this.synchronized { inProgressSubmissions.put( correlationId, SubmissionInfo(submissionEnvelope, correlationId, recordTimeInstant, participantId)) () } def addParentChild(parentCorrelationId: String, childCorrelationId: String): Unit = this.synchronized { correlationIdMapping.put(childCorrelationId, parentCorrelationId) () } def addToWriteSet(correlationId: String, data: Iterable[(Key, Value)]): Unit = this.synchronized { correlationIdMapping .get(correlationId) .foreach { parentCorrelationId => val keyValuePairs = bufferedKeyValueDataPerCorrelationId .getOrElseUpdate(parentCorrelationId, ListBuffer.empty) keyValuePairs.appendAll(data) bufferedKeyValueDataPerCorrelationId.put(parentCorrelationId, keyValuePairs) } } def finishedProcessing(correlationId: String): Unit = { val (submissionInfo, bufferedData) = this.synchronized { ( inProgressSubmissions.get(correlationId), bufferedKeyValueDataPerCorrelationId.get(correlationId)) } submissionInfo.foreach { submission => bufferedData.foreach(writeSubmissionData(submission, _)) this.synchronized { inProgressSubmissions.remove(correlationId) bufferedKeyValueDataPerCorrelationId.remove(correlationId) correlationIdMapping .collect { case (key, value) if value == correlationId => key } .foreach(correlationIdMapping.remove) } } } private def writeSubmissionData( submissionInfo: SubmissionInfo, writeSet: ListBuffer[(Key, Value)]): Unit = { val stamp = outputLock.writeLock() try { Serialization.serializeEntry(submissionInfo, writeSet, output) output.flush() } finally { outputLock.unlock(stamp) } } } object FileBasedLedgerDataExporter { case class SubmissionInfo( submissionEnvelope: ByteString, correlationId: String, recordTimeInstant: Instant, participantId: ParticipantId) type WriteSet = Seq[(Key, Value)] }
Example 20
Source File: Relation.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.lf.data import scala.collection.{mutable, immutable} object Relation { // NOTE: this definition and specifically inversion assumes // that the values related to an A are non-empty // we treat // - the empty relation mapping // - and a Map that maps everything to the empty set // as the same // this fits our purposes for the moment type Relation[A, B] = immutable.Map[A, Set[B]] object Relation { def merge[A, B](r: Relation[A, B], pair: (A, Set[B])): Relation[A, B] = r.updated(pair._1, r.getOrElse(pair._1, Set.empty[B]).union(pair._2)) def union[A, B](r1: Relation[A, B], r2: Relation[A, B]): Relation[A, B] = r2.foldLeft(r1)(merge) def diff[A, B](r1: Relation[A, B], r2: Relation[A, B]): Relation[A, B] = r1.map { case (a, bs) => a -> r2.get(a).fold(bs)(bs diff _) } def invert[A, B](relation: Relation[A, B]): Relation[B, A] = { val result = mutable.Map[B, Set[A]]() withDefaultValue Set() relation.foreach { case (a, bs) => bs.foreach(b => result(b) = result(b) + a) } result.toMap } def flatten[A, B](relation: Relation[A, B]): Iterator[(A, B)] = for { kvs <- relation.iterator value <- kvs._2 } yield (kvs._1, value) def mapKeys[A, K, B](r: Relation[A, B])(f: A => K): Relation[K, B] = r.map { case (a, b) => f(a) -> b } } }
Example 21
Source File: DummyBackingStore.scala From iotchain with MIT License | 5 votes |
package jbok.network.http.server.authentication import cats.data.OptionT import cats.effect.IO import tsec.authentication.BackingStore import scala.collection.mutable object DummyBackingStore { def apply[I, V](getId: V => I): BackingStore[IO, I, V] = new BackingStore[IO, I, V] { private val storageMap = mutable.HashMap.empty[I, V] def put(elem: V): IO[V] = { val map = storageMap.put(getId(elem), elem) if (map.isEmpty) IO.pure(elem) else IO.raiseError(new IllegalArgumentException) } def get(id: I): OptionT[IO, V] = OptionT.fromOption[IO](storageMap.get(id)) def update(v: V): IO[V] = { storageMap.update(getId(v), v) IO.pure(v) } def delete(id: I): IO[Unit] = storageMap.remove(id) match { case Some(_) => IO.unit case None => IO.raiseError(new IllegalArgumentException) } } }
Example 22
Source File: TopNList.scala From Spark.TableStatsExample with Apache License 2.0 | 5 votes |
package com.cloudera.sa.examples.tablestats.model import scala.collection.mutable class TopNList(val maxSize:Int) extends Serializable { val topNCountsForColumnArray = new mutable.ArrayBuffer[(Any, Long)] var lowestColumnCountIndex:Int = -1 var lowestValue = Long.MaxValue def add(newValue:Any, newCount:Long): Unit = { if (topNCountsForColumnArray.length < maxSize -1) { topNCountsForColumnArray += ((newValue, newCount)) } else if (topNCountsForColumnArray.length == maxSize) { updateLowestValue } else { if (newCount > lowestValue) { topNCountsForColumnArray.insert(lowestColumnCountIndex, (newValue, newCount)) updateLowestValue } } } def updateLowestValue: Unit = { var index = 0 topNCountsForColumnArray.foreach{ r => if (r._2 < lowestValue) { lowestValue = r._2 lowestColumnCountIndex = index } index+=1 } } override def toString = s"TopNList(topNCountsForColumnArray=$topNCountsForColumnArray)" }
Example 23
Source File: FirstPassStatsModel.scala From Spark.TableStatsExample with Apache License 2.0 | 5 votes |
package com.cloudera.sa.examples.tablestats.model import scala.collection.mutable class FirstPassStatsModel extends Serializable { var columnStatsMap = new mutable.HashMap[Integer, ColumnStats] def +=(colIndex: Int, colValue: Any, colCount: Long): Unit = { columnStatsMap.getOrElseUpdate(colIndex, new ColumnStats) += (colValue, colCount) } def +=(firstPassStatsModel: FirstPassStatsModel): Unit = { firstPassStatsModel.columnStatsMap.foreach{ e => val columnStats = columnStatsMap.getOrElse(e._1, null) if (columnStats != null) { columnStats += (e._2) } else { columnStatsMap += ((e._1, e._2)) } } } override def toString = s"FirstPassStatsModel(columnStatsMap=$columnStatsMap)" }
Example 24
Source File: ConfigurableDataGeneratorMain.scala From Spark.TableStatsExample with Apache License 2.0 | 5 votes |
package com.cloudera.sa.examples.tablestats import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.GenericRow import org.apache.spark.sql.types.{StringType, LongType, StructField, StructType} import org.apache.spark.{SparkContext, SparkConf} import scala.collection.mutable import scala.util.Random object ConfigurableDataGeneratorMain { def main(args: Array[String]): Unit = { if (args.length == 0) { println("ConfigurableDataGeneratorMain <outputPath> <numberOfColumns> <numberOfRecords> <numberOfPartitions> <local>") return } val outputPath = args(0) val numberOfColumns = args(1).toInt val numberOfRecords = args(2).toInt val numberOfPartitions = args(3).toInt val runLocal = (args.length == 5 && args(4).equals("L")) var sc: SparkContext = null if (runLocal) { val sparkConfig = new SparkConf() sparkConfig.set("spark.broadcast.compress", "false") sparkConfig.set("spark.shuffle.compress", "false") sparkConfig.set("spark.shuffle.spill.compress", "false") sc = new SparkContext("local", "test", sparkConfig) } else { val sparkConfig = new SparkConf().setAppName("ConfigurableDataGeneratorMain") sc = new SparkContext(sparkConfig) } val sqlContext = new org.apache.spark.sql.SQLContext(sc) //Part A val rowRDD = sc.parallelize( (0 until numberOfPartitions).map( i => i), numberOfPartitions) //Part B val megaDataRDD = rowRDD.flatMap( r => { val random = new Random() val dataRange = (0 until numberOfRecords/numberOfPartitions).iterator dataRange.map[Row]( x => { val values = new mutable.ArrayBuffer[Any] for (i <- 0 until numberOfColumns) { if (i % 2 == 0) { values.+=(random.nextInt(100).toLong) } else { values.+=(random.nextInt(100).toString) } } new GenericRow(values.toArray) }) }) //Part C val schema = StructType( (0 until numberOfColumns).map( i => { if (i % 2 == 0) { StructField("longColumn_" + i, LongType, true) } else { StructField("stringColumn_" + i, StringType, true) } }) ) val df = sqlContext.createDataFrame(megaDataRDD, schema) df.saveAsParquetFile(outputPath) //Part D sc.stop() } }
Example 25
Source File: TableStatsSinglePathMain.scala From Spark.TableStatsExample with Apache License 2.0 | 5 votes |
package com.cloudera.sa.examples.tablestats import com.cloudera.sa.examples.tablestats.model.{FirstPassStatsModel} import org.apache.spark._ import org.apache.spark.sql.DataFrame import scala.collection.mutable object TableStatsSinglePathMain { def main(args: Array[String]): Unit = { if (args.length == 0) { println("TableStatsSinglePathMain <inputPath>") return } val inputPath = args(0) val runLocal = (args.length == 2 && args(1).equals("L")) var sc:SparkContext = null if (runLocal) { val sparkConfig = new SparkConf() sparkConfig.set("spark.broadcast.compress", "false") sparkConfig.set("spark.shuffle.compress", "false") sparkConfig.set("spark.shuffle.spill.compress", "false") sc = new SparkContext("local", "TableStatsSinglePathMain", sparkConfig) } else { val sparkConfig = new SparkConf().setAppName("TableStatsSinglePathMain") sc = new SparkContext(sparkConfig) } val sqlContext = new org.apache.spark.sql.SQLContext(sc) //Part A var df = sqlContext.parquetFile(inputPath) //Part B val firstPassStats = getFirstPassStat( df) //Part E println(firstPassStats) //Part F sc.stop() } def getFirstPassStat(df: DataFrame): FirstPassStatsModel = { val schema = df.schema //Part B.1 val columnValueCounts = df.flatMap(r => (0 until schema.length).map { idx => //((columnIdx, cellValue), count) ((idx, r.get(idx)), 1l) } ).reduceByKey(_ + _) //This is like word count //Part C val firstPassStats = columnValueCounts.mapPartitions[FirstPassStatsModel]{it => val firstPassStatsModel = new FirstPassStatsModel() it.foreach{ case ((columnIdx, columnVal), count) => firstPassStatsModel += (columnIdx, columnVal, count) } Iterator(firstPassStatsModel) }.reduce { (a, b) => //Part D a += (b) a } firstPassStats } }
Example 26
Source File: MarkersQueue.scala From kmq with Apache License 2.0 | 5 votes |
package com.softwaremill.kmq.redelivery import com.softwaremill.kmq.{EndMarker, MarkerKey, MarkerValue, StartMarker} import scala.collection.mutable class MarkersQueue(disableRedeliveryBefore: Offset) { private val markersInProgress = mutable.Set[MarkerKey]() private val markersByTimestamp = new mutable.PriorityQueue[AttributedMarkerKey[Timestamp]]()(bySmallestAttributeOrdering) private val markersByOffset = new mutable.PriorityQueue[AttributedMarkerKey[Offset]]()(bySmallestAttributeOrdering) private var redeliveryEnabled = false def handleMarker(markerOffset: Offset, k: MarkerKey, v: MarkerValue, t: Timestamp) { if (markerOffset >= disableRedeliveryBefore) { redeliveryEnabled = true } v match { case s: StartMarker => markersByOffset.enqueue(AttributedMarkerKey(k, markerOffset)) markersByTimestamp.enqueue(AttributedMarkerKey(k, t+s.getRedeliverAfter)) markersInProgress += k case _: EndMarker => markersInProgress -= k } } def markersToRedeliver(now: Timestamp): List[MarkerKey] = { removeEndedMarkers(markersByTimestamp) var toRedeliver = List.empty[MarkerKey] if (redeliveryEnabled) { while (shouldRedeliverMarkersQueueHead(now)) { val queueHead = markersByTimestamp.dequeue() // the first marker, if any, is not ended for sure (b/c of the cleanup that's done at the beginning), // but subsequent markers don't have to be. if (markersInProgress.contains(queueHead.key)) { toRedeliver ::= queueHead.key } // not removing from markersInProgress - until we are sure the message is redelivered (the redeliverer // sends an end marker when this is done) - the marker needs to stay for minimum-offset calculations to be // correct } } toRedeliver } def smallestMarkerOffset(): Option[Offset] = { removeEndedMarkers(markersByOffset) markersByOffset.headOption.map(_.attr) } private def removeEndedMarkers[T](queue: mutable.PriorityQueue[AttributedMarkerKey[T]]): Unit = { while (isHeadEnded(queue)) { queue.dequeue() } } private def isHeadEnded[T](queue: mutable.PriorityQueue[AttributedMarkerKey[T]]): Boolean = { queue.headOption.exists(e => !markersInProgress.contains(e.key)) } private def shouldRedeliverMarkersQueueHead(now: Timestamp): Boolean = { markersByTimestamp.headOption match { case None => false case Some(m) => now >= m.attr } } private case class AttributedMarkerKey[T](key: MarkerKey, attr: T) private def bySmallestAttributeOrdering[T: Ordering]: Ordering[AttributedMarkerKey[T]] = new Ordering[AttributedMarkerKey[T]] { override def compare(x: AttributedMarkerKey[T], y: AttributedMarkerKey[T]): Int = { - implicitly[Ordering[T]].compare(x.attr, y.attr) } } }
Example 27
Source File: MergeSort.scala From chymyst-core with Apache License 2.0 | 5 votes |
package io.chymyst.benchmark // Make all imports explicit, just to see what is the entire set of required imports. // Do not optimize imports in this file! import io.chymyst.jc.{+, FixedPool, M, m, B, b, go, Reaction, ReactionInfo, InputMoleculeInfo, AllMatchersAreTrivial, OutputMoleculeInfo, site, EmitMultiple} import io.chymyst.jc.ConsoleErrorsAndWarningsReporter import scala.annotation.tailrec import scala.collection.mutable object MergeSort { type Coll[T] = IndexedSeq[T] def arrayMerge[T: Ordering](arr1: Coll[T], arr2: Coll[T]): Coll[T] = { val result = new mutable.ArraySeq[T](arr1.length + arr2.length) // just to allocate space def isLess(x: T, y: T) = implicitly[Ordering[T]].compare(x, y) < 0 // Will now modify the `result` array in place. @tailrec def mergeRec(i1: Int, i2: Int, i: Int): Unit = { if (i1 == arr1.length && i2 == arr2.length) () else { val (x, newI1, newI2) = if (i1 < arr1.length && (i2 == arr2.length || isLess(arr1(i1), arr2(i2)))) (arr1(i1), i1 + 1, i2) else (arr2(i2), i1, i2 + 1) result(i) = x mergeRec(newI1, newI2, i + 1) } } mergeRec(0, 0, 0) result.toIndexedSeq } def performMergeSort[T: Ordering](array: Coll[T], threads: Int = 8): Coll[T] = { val finalResult = m[Coll[T]] val getFinalResult = b[Unit, Coll[T]] val reactionPool = FixedPool(threads) val pool2 = FixedPool(threads) site(pool2)( go { case finalResult(arr) + getFinalResult(_, r) => r(arr) } ) // The `mergesort()` molecule will start the chain reactions at one level lower. val mergesort = m[(Coll[T], M[Coll[T]])] site(reactionPool)( go { case mergesort((arr, resultToYield)) => if (arr.length <= 1) resultToYield(arr) else { val (part1, part2) = arr.splitAt(arr.length / 2) // The `sorted1()` and `sorted2()` molecules will carry the sorted results from the lower level. val sorted1 = m[Coll[T]] val sorted2 = m[Coll[T]] site(reactionPool)( go { case sorted1(x) + sorted2(y) => resultToYield(arrayMerge(x, y)) } ) // emit `mergesort` with the lower-level `sorted` result molecules mergesort((part1, sorted1)) + mergesort((part2, sorted2)) } } ) // Sort our array: emit `mergesort()` at top level. mergesort((array, finalResult)) val result = getFinalResult() reactionPool.shutdownNow() pool2.shutdownNow() result } }
Example 28
package devbox.common import Util.permsetRw import upickle.default.{ReadWriter, macroRW} import java.security.MessageDigest import os.{Path, StatInfo} import scala.collection.mutable def compute(p: Path, buffer: Array[Byte], fileType: os.FileType) = { fileType match { case os.FileType.Other => None case os.FileType.SymLink => Some(Symlink(os.readLink(p).toString)) case os.FileType.Dir => Some(Dir(os.perms(p).toInt())) case os.FileType.File => val digest = MessageDigest.getInstance("MD5") val chunks = mutable.ArrayBuffer.empty[Bytes] var size = 0L for ((buffer, n) <- os.read.chunks(p, buffer)) { size += n digest.reset() digest.update(buffer, 0, n) chunks.append(new Bytes(digest.digest())) } Some(File(os.perms(p).toInt, chunks.toSeq, size)) } } case class File(perms: os.PermSet, blockHashes: Seq[Bytes], size: Long) extends Sig object File{ implicit val rw: ReadWriter[File] = macroRW } case class Dir(perms: os.PermSet) extends Sig object Dir{ implicit val rw: ReadWriter[Dir] = macroRW } case class Symlink(dest: String) extends Sig object Symlink{ implicit val rw: ReadWriter[Symlink] = macroRW } implicit val rw: ReadWriter[Sig] = macroRW }
Example 29
Source File: Materializer.scala From sjsonnet with Apache License 2.0 | 5 votes |
package sjsonnet import sjsonnet.Expr.{FieldName, Member, ObjBody} import sjsonnet.Expr.Member.Visibility import upickle.core.Visitor import scala.collection.mutable object Materializer { def apply(v: Val)(implicit evaluator: EvalScope): ujson.Value = apply0(v, ujson.Value) def stringify(v: Val)(implicit evaluator: EvalScope): String = { apply0(v, new sjsonnet.Renderer()).toString } def apply0[T](v: Val, visitor: Visitor[T, T]) (implicit evaluator: EvalScope): T = try { v match { case Val.True => visitor.visitTrue(-1) case Val.False => visitor.visitFalse(-1) case Val.Null => visitor.visitNull(-1) case Val.Num(n) => visitor.visitFloat64(n, -1) case Val.Str(s) => visitor.visitString(s, -1) case Val.Arr(xs) => val arrVisitor = visitor.visitArray(xs.length, -1) for(x <- xs) { arrVisitor.visitValue( apply0(x.force, visitor), -1 ) } arrVisitor.visitEnd(-1) case obj: Val.Obj => obj.triggerAllAsserts(obj) val keysUnsorted = obj.getVisibleKeys().toArray val keys = if (!evaluator.preserveOrder) keysUnsorted.sortBy(_._1) else keysUnsorted val objVisitor = visitor.visitObject(keys.length , -1) for(t <- keys) { val (k, hidden) = t if (!hidden){ objVisitor.visitKeyValue(objVisitor.visitKey(-1).visitString(k, -1)) objVisitor.visitValue( apply0( obj.value(k, -1)(evaluator.emptyMaterializeFileScope, implicitly), visitor ), -1 ) } } objVisitor.visitEnd(-1) case f: Val.Func => apply0( f.apply(Nil, "(memory)", -1)(evaluator.emptyMaterializeFileScope, implicitly), visitor ) } }catch {case e: StackOverflowError => throw Error.Delegate("Stackoverflow while materializing, possibly due to recursive value") } def reverse(v: ujson.Value): Val = v match{ case ujson.True => Val.True case ujson.False => Val.False case ujson.Null => Val.Null case ujson.Num(n) => Val.Num(n) case ujson.Str(s) => Val.Str(s) case ujson.Arr(xs) => Val.Arr(xs.map(x => Val.Lazy(reverse(x))).toArray[Val.Lazy]) case ujson.Obj(xs) => val builder = mutable.LinkedHashMap.newBuilder[String, Val.Obj.Member] for(x <- xs){ val v = Val.Obj.Member(false, Visibility.Normal, (_: Val.Obj, _: Option[Val.Obj], _, _) => reverse(x._2) ) builder += (x._1 -> v) } new Val.Obj(builder.result(), _ => (), None) } def toExpr(v: ujson.Value): Expr = v match{ case ujson.True => Expr.True(0) case ujson.False => Expr.False(0) case ujson.Null => Expr.Null(0) case ujson.Num(n) => Expr.Num(0, n) case ujson.Str(s) => Expr.Str(0, s) case ujson.Arr(xs) => Expr.Arr(0, xs.map(toExpr).toArray[Expr]) case ujson.Obj(kvs) => Expr.Obj(0, ObjBody.MemberList( for((k, v) <- kvs.toArray) yield Member.Field(0, FieldName.Fixed(k), false, None, Visibility.Normal, toExpr(v)) ) ) } }
Example 30
Source File: SjsonnetMain.scala From sjsonnet with Apache License 2.0 | 5 votes |
package sjsonnet import scala.collection.mutable import scala.scalajs.js import scala.scalajs.js.annotation.{JSExport, JSExportTopLevel} @JSExportTopLevel("SjsonnetMain") object SjsonnetMain { def createParseCache() = collection.mutable.Map[String, fastparse.Parsed[(Expr, Map[String, Int])]]() @JSExport def interpret(text: String, extVars: js.Any, tlaVars: js.Any, wd0: String, importer: js.Function2[String, String, js.Array[String]], preserveOrder: Boolean = false): js.Any = { val interp = new Interpreter( mutable.Map.empty, ujson.WebJson.transform(extVars, ujson.Value).obj.toMap, ujson.WebJson.transform(tlaVars, ujson.Value).obj.toMap, JsVirtualPath(wd0), importer = (wd, path) => { importer(wd.asInstanceOf[JsVirtualPath].path, path) match{ case null => None case arr => Some((JsVirtualPath(arr(0)), arr(1))) } }, preserveOrder ) interp.interpret0(text, JsVirtualPath("(memory)"), ujson.WebJson.Builder) match{ case Left(msg) => throw new js.JavaScriptException(msg) case Right(v) => v } } } case class JsVirtualPath(path: String) extends Path{ def relativeToString(p: Path): String = p match{ case other: JsVirtualPath if path.startsWith(other.path) => path.drop(other.path.length) case _ => path } def debugRead(): Option[String] = None def parent(): Path = JsVirtualPath(path.split('/').dropRight(1).mkString("/")) def segmentCount(): Int = path.split('/').length def last: String = path.split('/').last def /(s: String): Path = JsVirtualPath(path + "/" + s) }
Example 31
Source File: SparkTC.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import scala.collection.mutable import scala.util.Random import org.apache.spark.sql.SparkSession object SparkTC { val numEdges = 200 val numVertices = 100 val rand = new Random(42) def generateGraph: Seq[(Int, Int)] = { val edges: mutable.Set[(Int, Int)] = mutable.Set.empty while (edges.size < numEdges) { val from = rand.nextInt(numVertices) val to = rand.nextInt(numVertices) if (from != to) edges.+=((from, to)) } edges.toSeq } def main(args: Array[String]) { val spark = SparkSession .builder .appName("SparkTC") .getOrCreate() val slices = if (args.length > 0) args(0).toInt else 2 var tc = spark.sparkContext.parallelize(generateGraph, slices).cache() // Linear transitive closure: each round grows paths by one edge, // by joining the graph's edges with the already-discovered paths. // e.g. join the path (y, z) from the TC with the edge (x, y) from // the graph to obtain the path (x, z). // Because join() joins on keys, the edges are stored in reversed order. val edges = tc.map(x => (x._2, x._1)) // This join is iterated until a fixed point is reached. var oldCount = 0L var nextCount = tc.count() do { oldCount = nextCount // Perform the join, obtaining an RDD of (y, (z, x)) pairs, // then project the result to obtain the new (x, z) paths. tc = tc.union(tc.join(edges).map(x => (x._2._2, x._2._1))).distinct().cache() nextCount = tc.count() } while (nextCount != oldCount) println("TC has " + tc.count() + " edges.") spark.stop() } } // scalastyle:on println
Example 32
Source File: ParamGridBuilder.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.tuning import scala.annotation.varargs import scala.collection.mutable import org.apache.spark.annotation.Since import org.apache.spark.ml.param._ @Since("1.2.0") def build(): Array[ParamMap] = { var paramMaps = Array(new ParamMap) paramGrid.foreach { case (param, values) => val newParamMaps = values.flatMap { v => paramMaps.map(_.copy.put(param.asInstanceOf[Param[Any]], v)) } paramMaps = newParamMaps.toArray } paramMaps } }
Example 33
Source File: HashingTF.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.feature import java.lang.{Iterable => JavaIterable} import scala.collection.JavaConverters._ import scala.collection.mutable import org.apache.spark.SparkException import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.rdd.RDD import org.apache.spark.unsafe.hash.Murmur3_x86_32._ import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.Utils private[spark] def murmur3Hash(term: Any): Int = { term match { case null => seed case b: Boolean => hashInt(if (b) 1 else 0, seed) case b: Byte => hashInt(b, seed) case s: Short => hashInt(s, seed) case i: Int => hashInt(i, seed) case l: Long => hashLong(l, seed) case f: Float => hashInt(java.lang.Float.floatToIntBits(f), seed) case d: Double => hashLong(java.lang.Double.doubleToLongBits(d), seed) case s: String => val utf8 = UTF8String.fromString(s) hashUnsafeBytes(utf8.getBaseObject, utf8.getBaseOffset, utf8.numBytes(), seed) case _ => throw new SparkException("HashingTF with murmur3 algorithm does not " + s"support type ${term.getClass.getCanonicalName} of input data.") } } }
Example 34
Source File: SlidingRDD.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.rdd import scala.collection.mutable import scala.reflect.ClassTag import org.apache.spark.{Partition, TaskContext} import org.apache.spark.rdd.RDD private[mllib] class SlidingRDDPartition[T](val idx: Int, val prev: Partition, val tail: Seq[T], val offset: Int) extends Partition with Serializable { override val index: Int = idx } private[mllib] class SlidingRDD[T: ClassTag](@transient val parent: RDD[T], val windowSize: Int, val step: Int) extends RDD[Array[T]](parent) { require(windowSize > 0 && step > 0 && !(windowSize == 1 && step == 1), "Window size and step must be greater than 0, " + s"and they cannot be both 1, but got windowSize = $windowSize and step = $step.") override def compute(split: Partition, context: TaskContext): Iterator[Array[T]] = { val part = split.asInstanceOf[SlidingRDDPartition[T]] (firstParent[T].iterator(part.prev, context) ++ part.tail) .drop(part.offset) .sliding(windowSize, step) .withPartial(false) .map(_.toArray) } override def getPreferredLocations(split: Partition): Seq[String] = firstParent[T].preferredLocations(split.asInstanceOf[SlidingRDDPartition[T]].prev) override def getPartitions: Array[Partition] = { val parentPartitions = parent.partitions val n = parentPartitions.length if (n == 0) { Array.empty } else if (n == 1) { Array(new SlidingRDDPartition[T](0, parentPartitions(0), Seq.empty, 0)) } else { val w1 = windowSize - 1 // Get partition sizes and first w1 elements. val (sizes, heads) = parent.mapPartitions { iter => val w1Array = iter.take(w1).toArray Iterator.single((w1Array.length + iter.length, w1Array)) }.collect().unzip val partitions = mutable.ArrayBuffer.empty[SlidingRDDPartition[T]] var i = 0 var cumSize = 0 var partitionIndex = 0 while (i < n) { val mod = cumSize % step val offset = if (mod == 0) 0 else step - mod val size = sizes(i) if (offset < size) { val tail = mutable.ListBuffer.empty[T] // Keep appending to the current tail until it has w1 elements. var j = i + 1 while (j < n && tail.length < w1) { tail ++= heads(j).take(w1 - tail.length) j += 1 } if (sizes(i) + tail.length >= offset + windowSize) { partitions += new SlidingRDDPartition[T](partitionIndex, parentPartitions(i), tail, offset) partitionIndex += 1 } } cumSize += size i += 1 } partitions.toArray } } // TODO: Override methods such as aggregate, which only requires one Spark job. }
Example 35
Source File: ParamGridBuilderSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.tuning import scala.collection.mutable import org.apache.spark.SparkFunSuite import org.apache.spark.ml.param.{ParamMap, TestParams} class ParamGridBuilderSuite extends SparkFunSuite { val solver = new TestParams() import solver.{inputCol, maxIter} test("param grid builder") { def validateGrid(maps: Array[ParamMap], expected: mutable.Set[(Int, String)]): Unit = { assert(maps.size === expected.size) maps.foreach { m => val tuple = (m(maxIter), m(inputCol)) assert(expected.contains(tuple)) expected.remove(tuple) } assert(expected.isEmpty) } val maps0 = new ParamGridBuilder() .baseOn(maxIter -> 10) .addGrid(inputCol, Array("input0", "input1")) .build() val expected0 = mutable.Set( (10, "input0"), (10, "input1")) validateGrid(maps0, expected0) val maps1 = new ParamGridBuilder() .baseOn(ParamMap(maxIter -> 5, inputCol -> "input")) // will be overwritten .addGrid(maxIter, Array(10, 20)) .addGrid(inputCol, Array("input0", "input1")) .build() val expected1 = mutable.Set( (10, "input0"), (20, "input0"), (10, "input1"), (20, "input1")) validateGrid(maps1, expected1) } }
Example 36
Source File: EnsembleTestHelper.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.tree import scala.collection.mutable import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.tree.model.TreeEnsembleModel import org.apache.spark.util.StatCounter object EnsembleTestHelper { def validateRegressor( model: TreeEnsembleModel, input: Seq[LabeledPoint], required: Double, metricName: String = "mse") { val predictions = input.map(x => model.predict(x.features)) val errors = predictions.zip(input).map { case (prediction, point) => point.label - prediction } val metric = metricName match { case "mse" => errors.map(err => err * err).sum / errors.size case "mae" => errors.map(math.abs).sum / errors.size } assert(metric <= required, s"validateRegressor calculated $metricName $metric but required $required.") } def generateOrderedLabeledPoints(numFeatures: Int, numInstances: Int): Array[LabeledPoint] = { val arr = new Array[LabeledPoint](numInstances) for (i <- 0 until numInstances) { val label = if (i < numInstances / 10) { 0.0 } else if (i < numInstances / 2) { 1.0 } else if (i < numInstances * 0.9) { 0.0 } else { 1.0 } val features = Array.fill[Double](numFeatures)(i.toDouble) arr(i) = new LabeledPoint(label, Vectors.dense(features)) } arr } }
Example 37
Source File: KPLBasedKinesisTestUtils.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kinesis import java.nio.ByteBuffer import java.nio.charset.StandardCharsets import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import com.amazonaws.services.kinesis.producer.{KinesisProducer => KPLProducer, KinesisProducerConfiguration, UserRecordResult} import com.google.common.util.concurrent.{FutureCallback, Futures} private[kinesis] class KPLBasedKinesisTestUtils extends KinesisTestUtils { override protected def getProducer(aggregate: Boolean): KinesisDataGenerator = { if (!aggregate) { new SimpleDataGenerator(kinesisClient) } else { new KPLDataGenerator(regionName) } } } private[kinesis] class KPLDataGenerator(regionName: String) extends KinesisDataGenerator { private lazy val producer: KPLProducer = { val conf = new KinesisProducerConfiguration() .setRecordMaxBufferedTime(1000) .setMaxConnections(1) .setRegion(regionName) .setMetricsLevel("none") new KPLProducer(conf) } override def sendData(streamName: String, data: Seq[Int]): Map[String, Seq[(Int, String)]] = { val shardIdToSeqNumbers = new mutable.HashMap[String, ArrayBuffer[(Int, String)]]() data.foreach { num => val str = num.toString val data = ByteBuffer.wrap(str.getBytes(StandardCharsets.UTF_8)) val future = producer.addUserRecord(streamName, str, data) val kinesisCallBack = new FutureCallback[UserRecordResult]() { override def onFailure(t: Throwable): Unit = {} // do nothing override def onSuccess(result: UserRecordResult): Unit = { val shardId = result.getShardId val seqNumber = result.getSequenceNumber() val sentSeqNumbers = shardIdToSeqNumbers.getOrElseUpdate(shardId, new ArrayBuffer[(Int, String)]()) sentSeqNumbers += ((num, seqNumber)) } } Futures.addCallback(future, kinesisCallBack) } producer.flushSync() shardIdToSeqNumbers.toMap } }
Example 38
Source File: KafkaStreamSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kafka import scala.collection.mutable import scala.concurrent.duration._ import scala.language.postfixOps import scala.util.Random import kafka.serializer.StringDecoder import org.scalatest.BeforeAndAfterAll import org.scalatest.concurrent.Eventually import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.{Milliseconds, StreamingContext} class KafkaStreamSuite extends SparkFunSuite with Eventually with BeforeAndAfterAll { private var ssc: StreamingContext = _ private var kafkaTestUtils: KafkaTestUtils = _ override def beforeAll(): Unit = { kafkaTestUtils = new KafkaTestUtils kafkaTestUtils.setup() } override def afterAll(): Unit = { if (ssc != null) { ssc.stop() ssc = null } if (kafkaTestUtils != null) { kafkaTestUtils.teardown() kafkaTestUtils = null } } test("Kafka input stream") { val sparkConf = new SparkConf().setMaster("local[4]").setAppName(this.getClass.getSimpleName) ssc = new StreamingContext(sparkConf, Milliseconds(500)) val topic = "topic1" val sent = Map("a" -> 5, "b" -> 3, "c" -> 10) kafkaTestUtils.createTopic(topic) kafkaTestUtils.sendMessages(topic, sent) val kafkaParams = Map("zookeeper.connect" -> kafkaTestUtils.zkAddress, "group.id" -> s"test-consumer-${Random.nextInt(10000)}", "auto.offset.reset" -> "smallest") val stream = KafkaUtils.createStream[String, String, StringDecoder, StringDecoder]( ssc, kafkaParams, Map(topic -> 1), StorageLevel.MEMORY_ONLY) val result = new mutable.HashMap[String, Long]() stream.map(_._2).countByValue().foreachRDD { r => r.collect().foreach { kv => result.synchronized { val count = result.getOrElseUpdate(kv._1, 0) + kv._2 result.put(kv._1, count) } } } ssc.start() eventually(timeout(10000 milliseconds), interval(100 milliseconds)) { assert(result.synchronized { sent === result }) } } }
Example 39
Source File: UDTRegistration.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.types import scala.collection.mutable import org.apache.spark.SparkException import org.apache.spark.internal.Logging import org.apache.spark.util.Utils def getUDTFor(userClass: String): Option[Class[_]] = { udtMap.get(userClass).map { udtClassName => if (Utils.classIsLoadable(udtClassName)) { val udtClass = Utils.classForName(udtClassName) if (classOf[UserDefinedType[_]].isAssignableFrom(udtClass)) { udtClass } else { throw new SparkException( s"${udtClass.getName} is not an UserDefinedType. Please make sure registering " + s"an UserDefinedType for ${userClass}") } } else { throw new SparkException( s"Can not load in UserDefinedType ${udtClassName} for user class ${userClass}.") } } } }
Example 40
Source File: collect.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions.aggregate import scala.collection.generic.Growable import scala.collection.mutable import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.types._ @ExpressionDescription( usage = "_FUNC_(expr) - Collects and returns a set of unique elements.") case class CollectSet( child: Expression, mutableAggBufferOffset: Int = 0, inputAggBufferOffset: Int = 0) extends Collect { def this(child: Expression) = this(child, 0, 0) override def checkInputDataTypes(): TypeCheckResult = { if (!child.dataType.existsRecursively(_.isInstanceOf[MapType])) { TypeCheckResult.TypeCheckSuccess } else { TypeCheckResult.TypeCheckFailure("collect_set() cannot have map type data") } } override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate = copy(mutableAggBufferOffset = newMutableAggBufferOffset) override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ImperativeAggregate = copy(inputAggBufferOffset = newInputAggBufferOffset) override def prettyName: String = "collect_set" override protected[this] val buffer: mutable.HashSet[Any] = mutable.HashSet.empty }
Example 41
Source File: EquivalentExpressions.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import scala.collection.mutable import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback def debugString(all: Boolean = false): String = { val sb: mutable.StringBuilder = new StringBuilder() sb.append("Equivalent expressions:\n") equivalenceMap.foreach { case (k, v) => if (all || v.length > 1) { sb.append(" " + v.mkString(", ")).append("\n") } } sb.toString() } }
Example 42
Source File: DriverRegistry.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.jdbc import java.sql.{Driver, DriverManager} import scala.collection.mutable import org.apache.spark.internal.Logging import org.apache.spark.util.Utils object DriverRegistry extends Logging { private val wrapperMap: mutable.Map[String, DriverWrapper] = mutable.Map.empty def register(className: String): Unit = { val cls = Utils.getContextOrSparkClassLoader.loadClass(className) if (cls.getClassLoader == null) { logTrace(s"$className has been loaded with bootstrap ClassLoader, wrapper is not required") } else if (wrapperMap.get(className).isDefined) { logTrace(s"Wrapper for $className already exists") } else { synchronized { if (wrapperMap.get(className).isEmpty) { val wrapper = new DriverWrapper(cls.newInstance().asInstanceOf[Driver]) DriverManager.registerDriver(wrapper) wrapperMap(className) = wrapper logTrace(s"Wrapper for $className registered") } } } } }
Example 43
Source File: ListingFileCatalog.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import scala.collection.mutable import org.apache.hadoop.fs._ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.types.StructType class ListingFileCatalog( sparkSession: SparkSession, override val rootPaths: Seq[Path], parameters: Map[String, String], partitionSchema: Option[StructType], fileStatusCache: FileStatusCache = NoopCache) extends PartitioningAwareFileCatalog( sparkSession, parameters, partitionSchema, fileStatusCache) { @volatile private var cachedLeafFiles: mutable.LinkedHashMap[Path, FileStatus] = _ @volatile private var cachedLeafDirToChildrenFiles: Map[Path, Array[FileStatus]] = _ @volatile private var cachedPartitionSpec: PartitionSpec = _ refresh0() override def partitionSpec(): PartitionSpec = { if (cachedPartitionSpec == null) { cachedPartitionSpec = inferPartitioning() } logTrace(s"Partition spec: $cachedPartitionSpec") cachedPartitionSpec } override protected def leafFiles: mutable.LinkedHashMap[Path, FileStatus] = { cachedLeafFiles } override protected def leafDirToChildrenFiles: Map[Path, Array[FileStatus]] = { cachedLeafDirToChildrenFiles } override def refresh(): Unit = { refresh0() fileStatusCache.invalidateAll() } private def refresh0(): Unit = { val files = listLeafFiles(rootPaths) cachedLeafFiles = new mutable.LinkedHashMap[Path, FileStatus]() ++= files.map(f => f.getPath -> f) cachedLeafDirToChildrenFiles = files.toArray.groupBy(_.getPath.getParent) cachedPartitionSpec = null } override def equals(other: Any): Boolean = other match { case hdfs: ListingFileCatalog => rootPaths.toSet == hdfs.rootPaths.toSet case _ => false } override def hashCode(): Int = rootPaths.toSet.hashCode() }
Example 44
Source File: Exchange.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.exchange import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import org.apache.spark.broadcast import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.{LeafExecNode, SparkPlan, UnaryExecNode} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StructType case class ReuseExchange(conf: SQLConf) extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.exchangeReuseEnabled) { return plan } // Build a hash map using schema of exchanges to avoid O(N*N) sameResult calls. val exchanges = mutable.HashMap[StructType, ArrayBuffer[Exchange]]() plan.transformUp { case exchange: Exchange => // the exchanges that have same results usually also have same schemas (same column names). val sameSchema = exchanges.getOrElseUpdate(exchange.schema, ArrayBuffer[Exchange]()) val samePlan = sameSchema.find { e => exchange.sameResult(e) } if (samePlan.isDefined) { // Keep the output of this exchange, the following plans require that to resolve // attributes. ReusedExchangeExec(exchange.output, samePlan.get) } else { sameSchema += exchange exchange } } } }
Example 45
Source File: subquery.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.{expressions, InternalRow} import org.apache.spark.sql.catalyst.expressions.{Expression, ExprId, InSet, Literal, PlanExpression} import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{BooleanType, DataType, StructType} case class ReuseSubquery(conf: SQLConf) extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.exchangeReuseEnabled) { return plan } // Build a hash map using schema of exchanges to avoid O(N*N) sameResult calls. val subqueries = mutable.HashMap[StructType, ArrayBuffer[SubqueryExec]]() plan transformAllExpressions { case sub: ExecSubqueryExpression => val sameSchema = subqueries.getOrElseUpdate(sub.plan.schema, ArrayBuffer[SubqueryExec]()) val sameResult = sameSchema.find(_.sameResult(sub.plan)) if (sameResult.isDefined) { sub.withNewPlan(sameResult.get) } else { sameSchema += sub.plan sub } } } }
Example 46
Source File: StateStoreCoordinator.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming.state import scala.collection.mutable import org.apache.spark.SparkEnv import org.apache.spark.internal.Logging import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef, RpcEnv, ThreadSafeRpcEndpoint} import org.apache.spark.scheduler.ExecutorCacheTaskLocation import org.apache.spark.util.RpcUtils private class StateStoreCoordinator(override val rpcEnv: RpcEnv) extends ThreadSafeRpcEndpoint with Logging { private val instances = new mutable.HashMap[StateStoreId, ExecutorCacheTaskLocation] override def receive: PartialFunction[Any, Unit] = { case ReportActiveInstance(id, host, executorId) => logDebug(s"Reported state store $id is active at $executorId") instances.put(id, ExecutorCacheTaskLocation(host, executorId)) } override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { case VerifyIfInstanceActive(id, execId) => val response = instances.get(id) match { case Some(location) => location.executorId == execId case None => false } logDebug(s"Verified that state store $id is active: $response") context.reply(response) case GetLocation(id) => val executorId = instances.get(id).map(_.toString) logDebug(s"Got location of the state store $id: $executorId") context.reply(executorId) case DeactivateInstances(checkpointLocation) => val storeIdsToRemove = instances.keys.filter(_.checkpointLocation == checkpointLocation).toSeq instances --= storeIdsToRemove logDebug(s"Deactivating instances related to checkpoint location $checkpointLocation: " + storeIdsToRemove.mkString(", ")) context.reply(true) case StopCoordinator => stop() // Stop before replying to ensure that endpoint name has been deregistered logInfo("StateStoreCoordinator stopped") context.reply(true) } }
Example 47
Source File: BatchUIData.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.ui import scala.collection.mutable import org.apache.spark.streaming.Time import org.apache.spark.streaming.scheduler.{BatchInfo, OutputOperationInfo, StreamInputInfo} import org.apache.spark.streaming.ui.StreamingJobProgressListener._ private[ui] case class OutputOpIdAndSparkJobId(outputOpId: OutputOpId, sparkJobId: SparkJobId) private[ui] case class BatchUIData( val batchTime: Time, val streamIdToInputInfo: Map[Int, StreamInputInfo], val submissionTime: Long, val processingStartTime: Option[Long], val processingEndTime: Option[Long], val outputOperations: mutable.HashMap[OutputOpId, OutputOperationUIData] = mutable.HashMap(), var outputOpIdSparkJobIdPairs: Iterable[OutputOpIdAndSparkJobId] = Seq.empty) { def isFailed: Boolean = numFailedOutputOp != 0 } private[ui] object BatchUIData { def apply(batchInfo: BatchInfo): BatchUIData = { val outputOperations = mutable.HashMap[OutputOpId, OutputOperationUIData]() outputOperations ++= batchInfo.outputOperationInfos.mapValues(OutputOperationUIData.apply) new BatchUIData( batchInfo.batchTime, batchInfo.streamIdToInputInfo, batchInfo.submissionTime, batchInfo.processingStartTime, batchInfo.processingEndTime, outputOperations ) } } private[ui] case class OutputOperationUIData( id: OutputOpId, name: String, description: String, startTime: Option[Long], endTime: Option[Long], failureReason: Option[String]) { def duration: Option[Long] = for (s <- startTime; e <- endTime) yield e - s } private[ui] object OutputOperationUIData { def apply(outputOperationInfo: OutputOperationInfo): OutputOperationUIData = { OutputOperationUIData( outputOperationInfo.id, outputOperationInfo.name, outputOperationInfo.description, outputOperationInfo.startTime, outputOperationInfo.endTime, outputOperationInfo.failureReason ) } }
Example 48
Source File: MetricsConfig.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics import java.io.{FileInputStream, InputStream} import java.util.Properties import scala.collection.JavaConverters._ import scala.collection.mutable import scala.util.matching.Regex import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.util.Utils private[spark] class MetricsConfig(conf: SparkConf) extends Logging { private val DEFAULT_PREFIX = "*" private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties" private[metrics] val properties = new Properties() private[metrics] var perInstanceSubProperties: mutable.HashMap[String, Properties] = null private def setDefaultProperties(prop: Properties) { prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet") prop.setProperty("*.sink.servlet.path", "/metrics/json") prop.setProperty("master.sink.servlet.path", "/metrics/master/json") prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json") } private[this] def loadPropertiesFromFile(path: Option[String]): Unit = { var is: InputStream = null try { is = path match { case Some(f) => new FileInputStream(f) case None => Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME) } if (is != null) { properties.load(is) } } catch { case e: Exception => val file = path.getOrElse(DEFAULT_METRICS_CONF_FILENAME) logError(s"Error loading configuration file $file", e) } finally { if (is != null) { is.close() } } } }
Example 49
Source File: JavaUtils.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.api.java import java.{util => ju} import java.util.Map.Entry import scala.collection.mutable private[spark] object JavaUtils { def optionToOptional[T](option: Option[T]): Optional[T] = if (option.isDefined) { Optional.of(option.get) } else { Optional.empty[T] } // Workaround for SPARK-3926 / SI-8911 def mapAsSerializableJavaMap[A, B](underlying: collection.Map[A, B]): SerializableMapWrapper[A, B] = new SerializableMapWrapper(underlying) // Implementation is copied from scala.collection.convert.Wrappers.MapWrapper, // but implements java.io.Serializable. It can't just be subclassed to make it // Serializable since the MapWrapper class has no no-arg constructor. This class // doesn't need a no-arg constructor though. class SerializableMapWrapper[A, B](underlying: collection.Map[A, B]) extends ju.AbstractMap[A, B] with java.io.Serializable { self => override def size: Int = underlying.size override def get(key: AnyRef): B = try { underlying.getOrElse(key.asInstanceOf[A], null.asInstanceOf[B]) } catch { case ex: ClassCastException => null.asInstanceOf[B] } override def entrySet: ju.Set[ju.Map.Entry[A, B]] = new ju.AbstractSet[ju.Map.Entry[A, B]] { override def size: Int = self.size override def iterator: ju.Iterator[ju.Map.Entry[A, B]] = new ju.Iterator[ju.Map.Entry[A, B]] { val ui = underlying.iterator var prev : Option[A] = None def hasNext: Boolean = ui.hasNext def next(): Entry[A, B] = { val (k, v) = ui.next() prev = Some(k) new ju.Map.Entry[A, B] { import scala.util.hashing.byteswap32 override def getKey: A = k override def getValue: B = v override def setValue(v1 : B): B = self.put(k, v1) override def hashCode: Int = byteswap32(k.hashCode) + (byteswap32(v.hashCode) << 16) override def equals(other: Any): Boolean = other match { case e: ju.Map.Entry[_, _] => k == e.getKey && v == e.getValue case _ => false } } } def remove() { prev match { case Some(k) => underlying match { case mm: mutable.Map[A, _] => mm.remove(k) prev = None case _ => throw new UnsupportedOperationException("remove") } case _ => throw new IllegalStateException("next must be called at least once before remove") } } } } } }
Example 50
Source File: WorkerInfo.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.master import scala.collection.mutable import org.apache.spark.rpc.RpcEndpointRef import org.apache.spark.util.Utils private[spark] class WorkerInfo( val id: String, val host: String, val port: Int, val cores: Int, val memory: Int, val endpoint: RpcEndpointRef, val webUiAddress: String) extends Serializable { Utils.checkHost(host, "Expected hostname") assert (port > 0) @transient var executors: mutable.HashMap[String, ExecutorDesc] = _ // executorId => info @transient var drivers: mutable.HashMap[String, DriverInfo] = _ // driverId => info @transient var state: WorkerState.Value = _ @transient var coresUsed: Int = _ @transient var memoryUsed: Int = _ @transient var lastHeartbeat: Long = _ init() def coresFree: Int = cores - coresUsed def memoryFree: Int = memory - memoryUsed private def readObject(in: java.io.ObjectInputStream): Unit = Utils.tryOrIOException { in.defaultReadObject() init() } private def init() { executors = new mutable.HashMap drivers = new mutable.HashMap state = WorkerState.ALIVE coresUsed = 0 memoryUsed = 0 lastHeartbeat = System.currentTimeMillis() } def hostPort: String = { assert (port > 0) host + ":" + port } def addExecutor(exec: ExecutorDesc) { executors(exec.fullId) = exec coresUsed += exec.cores memoryUsed += exec.memory } def removeExecutor(exec: ExecutorDesc) { if (executors.contains(exec.fullId)) { executors -= exec.fullId coresUsed -= exec.cores memoryUsed -= exec.memory } } def hasExecutor(app: ApplicationInfo): Boolean = { executors.values.exists(_.application == app) } def addDriver(driver: DriverInfo) { drivers(driver.id) = driver memoryUsed += driver.desc.mem coresUsed += driver.desc.cores } def removeDriver(driver: DriverInfo) { drivers -= driver.id memoryUsed -= driver.desc.mem coresUsed -= driver.desc.cores } def setState(state: WorkerState.Value): Unit = { this.state = state } def isAlive(): Boolean = this.state == WorkerState.ALIVE }
Example 51
Source File: StorageTab.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.ui.storage import scala.collection.mutable import org.apache.spark.annotation.DeveloperApi import org.apache.spark.scheduler._ import org.apache.spark.storage._ import org.apache.spark.ui._ private def updateRDDInfo(updatedBlocks: Seq[(BlockId, BlockStatus)]): Unit = { val rddIdsToUpdate = updatedBlocks.flatMap { case (bid, _) => bid.asRDDId.map(_.rddId) }.toSet val rddInfosToUpdate = _rddInfoMap.values.toSeq.filter { s => rddIdsToUpdate.contains(s.id) } StorageUtils.updateRddInfo(rddInfosToUpdate, activeStorageStatusList) } override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted): Unit = synchronized { val rddInfos = stageSubmitted.stageInfo.rddInfos rddInfos.foreach { info => _rddInfoMap.getOrElseUpdate(info.id, info).name = info.name } } override def onStageCompleted(stageCompleted: SparkListenerStageCompleted): Unit = synchronized { // Remove all partitions that are no longer cached in current completed stage val completedRddIds = stageCompleted.stageInfo.rddInfos.map(r => r.id).toSet _rddInfoMap.retain { case (id, info) => !completedRddIds.contains(id) || info.numCachedPartitions > 0 } } override def onUnpersistRDD(unpersistRDD: SparkListenerUnpersistRDD): Unit = synchronized { _rddInfoMap.remove(unpersistRDD.rddId) } override def onBlockUpdated(blockUpdated: SparkListenerBlockUpdated): Unit = { super.onBlockUpdated(blockUpdated) val blockId = blockUpdated.blockUpdatedInfo.blockId val storageLevel = blockUpdated.blockUpdatedInfo.storageLevel val memSize = blockUpdated.blockUpdatedInfo.memSize val diskSize = blockUpdated.blockUpdatedInfo.diskSize val blockStatus = BlockStatus(storageLevel, memSize, diskSize) updateRDDInfo(Seq((blockId, blockStatus))) } }
Example 52
Source File: TaskDescription.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.nio.ByteBuffer import scala.collection.mutable import scala.collection.mutable.HashSet import scala.util.control.NonFatal import org.apache.spark._ import org.apache.spark.internal.Logging import org.apache.spark.serializer.SerializerInstance import org.apache.spark.util.SerializableBuffer private[spark] class TaskDescription( val taskId: Long, val attemptNumber: Int, val executorId: String, val name: String, val index: Int, // Index within this task's TaskSet val isFutureTask: Boolean, @transient private val _task: Task[_], @transient private val _addedFiles: mutable.Map[String, Long], @transient private val _addedJars: mutable.Map[String, Long], @transient private val _ser: SerializerInstance) extends Serializable with Logging { // Because ByteBuffers are not serializable, wrap the task in a SerializableBuffer private var buffer: SerializableBuffer = _ def prepareSerializedTask(): Unit = { if (_task != null) { val serializedTask: ByteBuffer = try { Task.serializeWithDependencies(_task, _addedFiles, _addedJars, _ser) } catch { // If the task cannot be serialized, then there is not point in re-attempting // the task as it will always fail. So just abort the task set. case NonFatal(e) => val msg = s"Failed to serialize the task $taskId, not attempting to retry it." logError(msg, e) // FIXME(shivaram): We dont have a handle to the taskSet here to abort it. throw new TaskNotSerializableException(e) } if (serializedTask.limit > TaskSetManager.TASK_SIZE_TO_WARN_KB * 1024) { logWarning(s"Stage ${_task.stageId} contains a task of very large size " + s"(${serializedTask.limit / 1024} KB). The maximum recommended task size is " + s"${TaskSetManager.TASK_SIZE_TO_WARN_KB} KB.") } buffer = new SerializableBuffer(serializedTask) } else { buffer = new SerializableBuffer(ByteBuffer.allocate(0)) } } def serializedTask: ByteBuffer = buffer.value override def toString: String = "TaskDescription(TID=%d, index=%d)".format(taskId, index) }
Example 53
Source File: StorageStatusListener.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.storage import scala.collection.mutable import org.apache.spark.SparkConf import org.apache.spark.annotation.DeveloperApi import org.apache.spark.scheduler._ private def updateStorageStatus(unpersistedRDDId: Int) { storageStatusList.foreach { storageStatus => storageStatus.rddBlocksById(unpersistedRDDId).foreach { case (blockId, _) => storageStatus.removeBlock(blockId) } } } override def onUnpersistRDD(unpersistRDD: SparkListenerUnpersistRDD): Unit = synchronized { updateStorageStatus(unpersistRDD.rddId) } override def onBlockManagerAdded(blockManagerAdded: SparkListenerBlockManagerAdded) { synchronized { val blockManagerId = blockManagerAdded.blockManagerId val executorId = blockManagerId.executorId val maxMem = blockManagerAdded.maxMem val storageStatus = new StorageStatus(blockManagerId, maxMem) executorIdToStorageStatus(executorId) = storageStatus // Try to remove the dead storage status if same executor register the block manager twice. deadExecutorStorageStatus.zipWithIndex.find(_._1.blockManagerId.executorId == executorId) .foreach(toRemoveExecutor => deadExecutorStorageStatus.remove(toRemoveExecutor._2)) } } override def onBlockManagerRemoved(blockManagerRemoved: SparkListenerBlockManagerRemoved) { synchronized { val executorId = blockManagerRemoved.blockManagerId.executorId executorIdToStorageStatus.remove(executorId).foreach { status => deadExecutorStorageStatus += status } if (deadExecutorStorageStatus.size > retainedDeadExecutors) { deadExecutorStorageStatus.trimStart(1) } } } override def onBlockUpdated(blockUpdated: SparkListenerBlockUpdated): Unit = { val executorId = blockUpdated.blockUpdatedInfo.blockManagerId.executorId val blockId = blockUpdated.blockUpdatedInfo.blockId val storageLevel = blockUpdated.blockUpdatedInfo.storageLevel val memSize = blockUpdated.blockUpdatedInfo.memSize val diskSize = blockUpdated.blockUpdatedInfo.diskSize val blockStatus = BlockStatus(storageLevel, memSize, diskSize) updateStorageStatus(executorId, Seq((blockId, blockStatus))) } }
Example 54
Source File: BlockStatusListener.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.storage import scala.collection.mutable import org.apache.spark.scheduler._ private[spark] case class BlockUIData( blockId: BlockId, location: String, storageLevel: StorageLevel, memSize: Long, diskSize: Long) private[spark] case class ExecutorStreamBlockStatus( executorId: String, location: String, blocks: Seq[BlockUIData]) { def totalMemSize: Long = blocks.map(_.memSize).sum def totalDiskSize: Long = blocks.map(_.diskSize).sum def numStreamBlocks: Int = blocks.size } private[spark] class BlockStatusListener extends SparkListener { private val blockManagers = new mutable.HashMap[BlockManagerId, mutable.HashMap[BlockId, BlockUIData]] override def onBlockUpdated(blockUpdated: SparkListenerBlockUpdated): Unit = { val blockId = blockUpdated.blockUpdatedInfo.blockId if (!blockId.isInstanceOf[StreamBlockId]) { // Now we only monitor StreamBlocks return } val blockManagerId = blockUpdated.blockUpdatedInfo.blockManagerId val storageLevel = blockUpdated.blockUpdatedInfo.storageLevel val memSize = blockUpdated.blockUpdatedInfo.memSize val diskSize = blockUpdated.blockUpdatedInfo.diskSize synchronized { // Drop the update info if the block manager is not registered blockManagers.get(blockManagerId).foreach { blocksInBlockManager => if (storageLevel.isValid) { blocksInBlockManager.put(blockId, BlockUIData( blockId, blockManagerId.hostPort, storageLevel, memSize, diskSize) ) } else { // If isValid is not true, it means we should drop the block. blocksInBlockManager -= blockId } } } } override def onBlockManagerAdded(blockManagerAdded: SparkListenerBlockManagerAdded): Unit = { synchronized { blockManagers.put(blockManagerAdded.blockManagerId, mutable.HashMap()) } } override def onBlockManagerRemoved( blockManagerRemoved: SparkListenerBlockManagerRemoved): Unit = synchronized { blockManagers -= blockManagerRemoved.blockManagerId } def allExecutorStreamBlockStatus: Seq[ExecutorStreamBlockStatus] = synchronized { blockManagers.map { case (blockManagerId, blocks) => ExecutorStreamBlockStatus( blockManagerId.executorId, blockManagerId.hostPort, blocks.values.toSeq) }.toSeq } }
Example 55
Source File: BlockReplicationPolicy.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.storage import scala.collection.mutable import scala.util.Random import org.apache.spark.annotation.DeveloperApi import org.apache.spark.internal.Logging private def getSampleIds(n: Int, m: Int, r: Random): List[Int] = { val indices = (n - m + 1 to n).foldLeft(Set.empty[Int]) {case (set, i) => val t = r.nextInt(i) + 1 if (set.contains(t)) set + i else set + t } // we shuffle the result to ensure a random arrangement within the sample // to avoid any bias from set implementations r.shuffle(indices.map(_ - 1).toList) } }
Example 56
Source File: LogUrlsStandaloneSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy import java.net.URL import scala.collection.mutable import scala.io.Source import org.apache.spark.{LocalSparkContext, SparkContext, SparkFunSuite} import org.apache.spark.scheduler.{SparkListener, SparkListenerExecutorAdded} import org.apache.spark.scheduler.cluster.ExecutorInfo import org.apache.spark.util.SparkConfWithEnv class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext { private val WAIT_TIMEOUT_MILLIS = 10000 test("verify that correct log urls get propagated from workers") { sc = new SparkContext("local-cluster[2,1,1024]", "test") val listener = new SaveExecutorInfo sc.addSparkListener(listener) // Trigger a job so that executors get added sc.parallelize(1 to 100, 4).map(_.toString).count() sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) listener.addedExecutorInfos.values.foreach { info => assert(info.logUrlMap.nonEmpty) // Browse to each URL to check that it's valid info.logUrlMap.foreach { case (logType, logUrl) => val html = Source.fromURL(logUrl).mkString assert(html.contains(s"$logType log page")) } } } test("verify that log urls reflect SPARK_PUBLIC_DNS (SPARK-6175)") { val SPARK_PUBLIC_DNS = "public_dns" val conf = new SparkConfWithEnv(Map("SPARK_PUBLIC_DNS" -> SPARK_PUBLIC_DNS)).set( "spark.extraListeners", classOf[SaveExecutorInfo].getName) sc = new SparkContext("local-cluster[2,1,1024]", "test", conf) // Trigger a job so that executors get added sc.parallelize(1 to 100, 4).map(_.toString).count() sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) val listeners = sc.listenerBus.findListenersByClass[SaveExecutorInfo] assert(listeners.size === 1) val listener = listeners(0) listener.addedExecutorInfos.values.foreach { info => assert(info.logUrlMap.nonEmpty) info.logUrlMap.values.foreach { logUrl => assert(new URL(logUrl).getHost === SPARK_PUBLIC_DNS) } } } } private[spark] class SaveExecutorInfo extends SparkListener { val addedExecutorInfos = mutable.Map[String, ExecutorInfo]() override def onExecutorAdded(executor: SparkListenerExecutorAdded) { addedExecutorInfos(executor.executorId) = executor.executorInfo } }
Example 57
Source File: CustomRecoveryModeFactory.scala From drizzle-spark with Apache License 2.0 | 5 votes |
// This file is placed in different package to make sure all of these components work well // when they are outside of org.apache.spark. package other.supplier import java.nio.ByteBuffer import scala.collection.mutable import scala.reflect.ClassTag import org.apache.spark.SparkConf import org.apache.spark.deploy.master._ import org.apache.spark.serializer.Serializer class CustomRecoveryModeFactory( conf: SparkConf, serializer: Serializer ) extends StandaloneRecoveryModeFactory(conf, serializer) { CustomRecoveryModeFactory.instantiationAttempts += 1 override def read[T: ClassTag](prefix: String): Seq[T] = { CustomPersistenceEngine.readAttempts += 1 val results = for ((name, bytes) <- data; if name.startsWith(prefix)) yield serializer.newInstance().deserialize[T](ByteBuffer.wrap(bytes)) results.toSeq } } object CustomPersistenceEngine { @volatile var persistAttempts = 0 @volatile var unpersistAttempts = 0 @volatile var readAttempts = 0 @volatile var lastInstance: Option[CustomPersistenceEngine] = None } class CustomLeaderElectionAgent(val masterInstance: LeaderElectable) extends LeaderElectionAgent { masterInstance.electedLeader() }
Example 58
Source File: SparkListenerWithClusterSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import scala.collection.mutable import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll} import org.apache.spark.{LocalSparkContext, SparkContext, SparkFunSuite} import org.apache.spark.scheduler.cluster.ExecutorInfo val WAIT_TIMEOUT_MILLIS = 10000 before { sc = new SparkContext("local-cluster[2,1,1024]", "SparkListenerSuite") } test("SparkListener sends executor added message") { val listener = new SaveExecutorInfo sc.addSparkListener(listener) // This test will check if the number of executors received by "SparkListener" is same as the // number of all executors, so we need to wait until all executors are up sc.jobProgressListener.waitUntilExecutorsUp(2, 60000) val rdd1 = sc.parallelize(1 to 100, 4) val rdd2 = rdd1.map(_.toString) rdd2.setName("Target RDD") rdd2.count() sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) assert(listener.addedExecutorInfo.size == 2) assert(listener.addedExecutorInfo("0").totalCores == 1) assert(listener.addedExecutorInfo("1").totalCores == 1) } private class SaveExecutorInfo extends SparkListener { val addedExecutorInfo = mutable.Map[String, ExecutorInfo]() override def onExecutorAdded(executor: SparkListenerExecutorAdded) { addedExecutorInfo(executor.executorId) = executor.executorInfo } } }
Example 59
Source File: TimeStampedHashMapSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.util import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.util.Random import org.apache.spark.SparkFunSuite class TimeStampedHashMapSuite extends SparkFunSuite { // Test the testMap function - a Scala HashMap should obviously pass testMap(new mutable.HashMap[String, String]()) // Test TimeStampedHashMap basic functionality testMap(new TimeStampedHashMap[String, String]()) testMapThreadSafety(new TimeStampedHashMap[String, String]()) test("TimeStampedHashMap - clearing by timestamp") { // clearing by insertion time val map = new TimeStampedHashMap[String, String](updateTimeStampOnGet = false) map("k1") = "v1" assert(map("k1") === "v1") Thread.sleep(10) val threshTime = System.currentTimeMillis assert(map.getTimestamp("k1").isDefined) assert(map.getTimestamp("k1").get < threshTime) map.clearOldValues(threshTime) assert(map.get("k1") === None) // clearing by modification time val map1 = new TimeStampedHashMap[String, String](updateTimeStampOnGet = true) map1("k1") = "v1" map1("k2") = "v2" assert(map1("k1") === "v1") Thread.sleep(10) val threshTime1 = System.currentTimeMillis Thread.sleep(10) assert(map1("k2") === "v2") // access k2 to update its access time to > threshTime assert(map1.getTimestamp("k1").isDefined) assert(map1.getTimestamp("k1").get < threshTime1) assert(map1.getTimestamp("k2").isDefined) assert(map1.getTimestamp("k2").get >= threshTime1) map1.clearOldValues(threshTime1) // should only clear k1 assert(map1.get("k1") === None) assert(map1.get("k2").isDefined) } def testMapThreadSafety(hashMapConstructor: => mutable.Map[String, String]) { def newMap() = hashMapConstructor val name = newMap().getClass.getSimpleName val testMap = newMap() @volatile var error = false def getRandomKey(m: mutable.Map[String, String]): Option[String] = { val keys = testMap.keysIterator.toSeq if (keys.nonEmpty) { Some(keys(Random.nextInt(keys.size))) } else { None } } val threads = (1 to 25).map(i => new Thread() { override def run() { try { for (j <- 1 to 1000) { Random.nextInt(3) match { case 0 => testMap(Random.nextString(10)) = Random.nextDouble().toString // put case 1 => getRandomKey(testMap).map(testMap.get) // get case 2 => getRandomKey(testMap).map(testMap.remove) // remove } } } catch { case t: Throwable => error = true throw t } } }) test(name + " - threading safety test") { threads.foreach(_.start()) threads.foreach(_.join()) assert(!error) } } }
Example 60
Source File: BlockReplicationPolicySuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.storage import scala.collection.mutable import org.scalatest.{BeforeAndAfter, Matchers} import org.apache.spark.{LocalSparkContext, SparkFunSuite} class BlockReplicationPolicySuite extends SparkFunSuite with Matchers with BeforeAndAfter with LocalSparkContext { // Implicitly convert strings to BlockIds for test clarity. private implicit def StringToBlockId(value: String): BlockId = new TestBlockId(value) test(s"block replication - random block replication policy") { val numBlockManagers = 10 val storeSize = 1000 val blockManagers = (1 to numBlockManagers).map { i => BlockManagerId(s"store-$i", "localhost", 1000 + i, None) } val candidateBlockManager = BlockManagerId("test-store", "localhost", 1000, None) val replicationPolicy = new RandomBlockReplicationPolicy val blockId = "test-block" (1 to 10).foreach {numReplicas => logDebug(s"Num replicas : $numReplicas") val randomPeers = replicationPolicy.prioritize( candidateBlockManager, blockManagers, mutable.HashSet.empty[BlockManagerId], blockId, numReplicas ) logDebug(s"Random peers : ${randomPeers.mkString(", ")}") assert(randomPeers.toSet.size === numReplicas) // choosing n peers out of n val secondPass = replicationPolicy.prioritize( candidateBlockManager, randomPeers, mutable.HashSet.empty[BlockManagerId], blockId, numReplicas ) logDebug(s"Random peers : ${secondPass.mkString(", ")}") assert(secondPass.toSet.size === numReplicas) } } }
Example 61
Source File: ParameterOperations.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.parameters import com.intel.analytics.bigdl._ import com.intel.analytics.bigdl.dataset.{DistributedDataSet, MiniBatch} import org.apache.spark.rdd.RDD import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.optim.DistriOptimizer.Cache import com.intel.analytics.bigdl.optim.Metrics import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.utils.Table import org.apache.spark.broadcast.Broadcast import scala.collection.mutable private[bigdl] class L2NormClippingProcessor(l2NormThreshold: Double) extends ParameterProcessor { override def collectGlobalData[T](models: RDD[Cache[T]], parameters: AllReduceParameter[T], metrics: Metrics, state: Table)(implicit ev: TensorNumeric[T]) : Unit = { val numFinishedModel = state.get[Int]("numFinishedModel").get val parallelism = state.get[Int]("parallelism").get val isGradientUpdated = state.get[Boolean]("isGradientUpdated").get val sumSquare = models.mapPartitions(modelIter => { if (!isGradientUpdated) { val getG = System.nanoTime() parameters.aggregateGradientPartition(numFinishedModel) metrics.add("aggregrateGradientParition average executor", System.nanoTime() - getG) } val sum = Util.getSumsquareInParallel(parameters.gradientPartition, parallelism) Iterator.single(sum) }).reduce(_ + _) state("isGradientUpdated") = true state("l2Norm") = math.sqrt(sumSquare) } override def processParameters[T](parameters: AllReduceParameter[T], modelCache: Cache[T], state: Table)(implicit ev: TensorNumeric[T]): Unit = { val l2Norm = state.get[Double]("l2Norm").get if (l2Norm > l2NormThreshold) { val scale = ev.fromType[Double](l2Norm / l2NormThreshold) parameters.gradientPartition.div(scale) } } override def processParameters[T](model: Module[T], state: Table)(implicit ev: TensorNumeric[T]): Unit = { val parallelism = state.get[Int]("parallelism").get val gradients = model.getParameters()._2 val l2Norm = math.sqrt(Util.getSumsquareInParallel(gradients, parallelism)) if (l2Norm > l2NormThreshold) { val scale = ev.fromType[Double](l2Norm / l2NormThreshold) gradients.div(scale) } } }
Example 62
Source File: IRToBlas.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.utils.intermediate import com.intel.analytics.bigdl._ import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity, TensorModule} import com.intel.analytics.bigdl.optim.DistriOptimizer._ import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.utils.{Node, ReflectionUtils, T} import scala.collection.mutable import scala.reflect.ClassTag abstract class ConvertBase[T, D] { def cloneNode(allNodes: Array[Node[T]], nodeMap: mutable.HashMap[Node[T], Node[D]]): Unit = { allNodes.foreach(node => { node.nextNodesAndEdges.foreach(nextNodeAndEdge => { if (nodeMap.contains(nextNodeAndEdge._1)) { nodeMap.get(node).get.add(nodeMap.get(nextNodeAndEdge._1).get, nextNodeAndEdge._2) } }) }) // sort previous node nodeMap.toArray.foreach(node => { // if node has more than one previous nodes, we have to consider nodes order if (node._1.prevNodesAndEdges.length > 1) { node._2.removePrevEdges() node._1.prevNodesAndEdges.foreach(prevNodeAndEdge => { if (nodeMap.contains(prevNodeAndEdge._1)) { node._2.from(nodeMap.get(prevNodeAndEdge._1).get, prevNodeAndEdge._2) } }) } }) } def convertLayerCheck(layer: T) : Boolean def convertLayer(layer : T) : D def convertingCheck(allNodes: Array[Node[T]]) : Boolean = { var convert = true allNodes.foreach(node => { if (!convertLayerCheck(node.element)) { logger.info(s"${node.element} convertion failed") convert = false } }) convert } def convert(allNodes: Array[Node[T]]): mutable.HashMap[Node[T], Node[D]] = { val nodeMap = new mutable.HashMap[Node[T], Node[D]]() allNodes.foreach(node => { nodeMap.put(node, new Node(convertLayer(node.element))) }) cloneNode(allNodes, nodeMap) nodeMap } } private[bigdl] class IRToBlas[T: ClassTag] extends ConvertBase[IRElement[T], Module[T]]{ private def className(layer: IRElement[T]): String = { val name = layer.getOp().name s"com.intel.analytics.bigdl.nn.${name.substring(2)}" } override def convertLayerCheck(layer: IRElement[T]): Boolean = { ReflectionUtils.findClass(className(layer)) != null || layer.getOp().isInstanceOf[IRGeneralModule[T]] } override def convertLayer(layer : IRElement[T]) : Module[T] = { if (layer.getOp().isInstanceOf[IRGeneralModule[T]]) { return layer.getOp().asInstanceOf[IRGeneralModule[T]].model } ReflectionUtils.reflectFromIR(layer, Class.forName(className(layer))) } } private[bigdl] object IRToBlas { def apply[T: ClassTag](implicit ev: TensorNumeric[T]): IRToBlas[T] = new IRToBlas }
Example 63
Source File: Types.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.utils.serializer import com.google.protobuf.ByteString import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity} import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric.{NumericBoolean, NumericChar, NumericDouble, NumericFloat, NumericInt, NumericLong, NumericString} import com.intel.analytics.bigdl.utils.tf.TFTensorNumeric.NumericByteString import com.intel.analytics.bigdl.serialization.Bigdl.BigDLModule import scala.collection.mutable import scala.reflect.ClassTag trait StorageType object ProtoStorageType extends StorageType object BigDLStorage extends StorageType case class SerializeContext[T: ClassTag](moduleData: ModuleData[T], storages: mutable.HashMap[Int, Any], storageType: StorageType, copyWeightAndBias : Boolean = true, groupType : String = null) case class DeserializeContext(bigdlModule : BigDLModule, storages: mutable.HashMap[Int, Any], storageType: StorageType, copyWeightAndBias : Boolean = true) case class SerializeResult(bigDLModule: BigDLModule.Builder, storages: mutable.HashMap[Int, Any]) case class ModuleData[T: ClassTag](module : AbstractModule[Activity, Activity, T], pre : Seq[String], next : Seq[String]) object BigDLDataType extends Enumeration{ type BigDLDataType = Value val FLOAT, DOUBLE, CHAR, BOOL, STRING, INT, SHORT, LONG, BYTESTRING, BYTE = Value } object SerConst { val MAGIC_NO = 3721 val DIGEST_TYPE = "MD5" val GLOBAL_STORAGE = "global_storage" val MODULE_TAGES = "module_tags" val MODULE_NUMERICS = "module_numerics" val GROUP_TYPE = "group_type" } object ClassTagMapper { def apply(tpe : String): ClassTag[_] = { tpe match { case "Float" => scala.reflect.classTag[Float] case "Double" => scala.reflect.classTag[Double] case "Char" => scala.reflect.classTag[Char] case "Boolean" => scala.reflect.classTag[Boolean] case "String" => scala.reflect.classTag[String] case "Int" => scala.reflect.classTag[Int] case "Long" => scala.reflect.classTag[Long] case "com.google.protobuf.ByteString" => scala.reflect.classTag[ByteString] } } def apply(classTag: ClassTag[_]): String = classTag.toString } object TensorNumericMapper { def apply(tpe : String): TensorNumeric[_] = { tpe match { case "Float" => NumericFloat case "Double" => NumericDouble case "Char" => NumericChar case "Boolean" => NumericBoolean case "String" => NumericString case "Int" => NumericInt case "Long" => NumericLong case "ByteString" => NumericByteString } } def apply(tensorNumeric: TensorNumeric[_]): String = { tensorNumeric match { case NumericFloat => "Float" case NumericDouble => "Double" case NumericChar => "Char" case NumericBoolean => "Boolean" case NumericString => "String" case NumericInt => "Int" case NumericLong => "Long" case NumericByteString => "ByteString" } } }
Example 64
Source File: TrainSummary.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.visualization import com.intel.analytics.bigdl.optim.Trigger import com.intel.analytics.bigdl.visualization.tensorboard.{FileReader, FileWriter} import scala.collection.mutable def getSummaryTrigger(tag: String): Option[Trigger] = { if (triggers.contains(tag)) { Some(triggers(tag)) } else { None } } private[bigdl] def getScalarTriggers(): Iterator[(String, Trigger)] = { triggers.filter(!_._1.equals("Parameters")).toIterator } } object TrainSummary{ def apply(logDir: String, appName: String): TrainSummary = { new TrainSummary(logDir, appName) } }
Example 65
Source File: FrameManager.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.nn import java.util.concurrent.atomic.AtomicInteger import com.intel.analytics.bigdl.nn.Graph.ModuleNode import com.intel.analytics.bigdl.nn.tf.{Exit, MergeOps, NextIteration} import scala.collection.mutable import scala.collection.mutable.ArrayBuffer class Frame[T] private[FrameManager] ( val name: String, val parent: Option[Frame[T]] ) { // Sync all next iteration nodes execution private[bigdl] var barrier: AtomicInteger = new AtomicInteger(0) // User can use NextIteration to sync execution. This is a list of those type of nodes private[bigdl] val waitingNodes: ArrayBuffer[ModuleNode[T]] = new ArrayBuffer[ModuleNode[T]]() // Nodes should be refreshed in a iteration of the frame private[bigdl] val nodes: ArrayBuffer[ModuleNode[T]] = new ArrayBuffer[ModuleNode[T]]() } }
Example 66
Source File: SerializerSpecHelper.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.utils.serializer import java.io.{File} import java.lang.reflect.Modifier import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity} import com.intel.analytics.bigdl.nn.ops.{Exp => ExpOps, Pow => PowOps, Select => SelectOps, Sum => SumOps, Tile => TileOps} import com.intel.analytics.bigdl.nn.tf.{DecodeGif => DecodeGifOps, DecodeJpeg => DecodeJpegOps, DecodePng => DecodePngOps, DecodeRaw => DecodeRawOps} import com.intel.analytics.bigdl.utils.RandomGenerator.RNG import com.intel.analytics.bigdl.utils.tf.loaders.{Pack => _} import com.intel.analytics.bigdl.utils.{Shape => KShape} import org.reflections.Reflections import org.reflections.scanners.SubTypesScanner import org.reflections.util.{ClasspathHelper, ConfigurationBuilder, FilterBuilder} import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} import scala.collection.JavaConverters._ import scala.collection.mutable abstract class SerializerSpecHelper extends FlatSpec with Matchers with BeforeAndAfterAll{ val postFix = "bigdl" val excludedClass = new mutable.HashSet[String]() val excludedPackage = new mutable.HashSet[String]() private val expected = new mutable.HashSet[String]() val tested = new mutable.HashSet[String]() private var executedCount = 0 protected def getPackage(): String = "" protected def addExcludedClass(): Unit = {} protected def addExcludedPackage(): Unit = {} protected def getExpected(): mutable.Set[String] = expected override protected def beforeAll() = { addExcludedClass addExcludedPackage val filterBuilder = new FilterBuilder() excludedPackage.foreach(filterBuilder.excludePackage(_)) val reflections = new Reflections(new ConfigurationBuilder() .filterInputsBy(filterBuilder) .setUrls(ClasspathHelper.forPackage(getPackage())) .setScanners(new SubTypesScanner())) val subTypes = reflections.getSubTypesOf(classOf[AbstractModule[_, _, _]]) .asScala.filter(sub => !Modifier.isAbstract(sub.getModifiers)). filter(sub => !excludedClass.contains(sub.getName)) subTypes.foreach(sub => expected.add(sub.getName)) } protected def runSerializationTest(module : AbstractModule[_, _, Float], input : Activity, cls: Class[_] = null) : Unit = { runSerializationTestWithMultiClass(module, input, if (cls == null) Array(module.getClass) else Array(cls)) } protected def runSerializationTestWithMultiClass(module : AbstractModule[_, _, Float], input : Activity, classes: Array[Class[_]]) : Unit = { val name = module.getName val serFile = File.createTempFile(name, postFix) val originForward = module.evaluate().forward(input) ModulePersister.saveToFile[Float](serFile.getAbsolutePath, null, module.evaluate(), true) RNG.setSeed(1000) val loadedModule = ModuleLoader.loadFromFile[Float](serFile.getAbsolutePath) val afterLoadForward = loadedModule.forward(input) if (serFile.exists) { serFile.delete } afterLoadForward should be (originForward) classes.foreach(cls => { if (getExpected.contains(cls.getName)) { tested.add(cls.getName) } }) } override protected def afterAll() = { println(s"total ${getExpected.size}, remaining ${getExpected.size - tested.size}") tested.filter(!getExpected.contains(_)).foreach(t => { println(s"$t do not need to be tested") }) getExpected.foreach(exp => { require(tested.contains(exp), s" $exp not included in the test!") }) } }
Example 67
Source File: ReverseSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.nn import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.utils.RandomGenerator import com.intel.analytics.bigdl.utils.serializer.ModuleSerializationTest import org.scalatest.{FlatSpec, Matchers} import scala.collection.mutable import scala.util.Random @com.intel.analytics.bigdl.tags.Serial class ReverseSpec extends FlatSpec with Matchers { "A Reverse()" should "generate correct output and grad for Tensor input dim1 inplace" in { def randomn(): Double = RandomGenerator.RNG.uniform(-10, 10) val layer = new Reverse[Double](1) val input = Tensor[Double](4, 3) input.apply1(x => randomn()) val expectedOutput = Tensor[Double]().resizeAs(input) expectedOutput.select(1, 1).copy(input(4)) expectedOutput.select(1, 2).copy(input(3)) expectedOutput.select(1, 3).copy(input(2)) expectedOutput.select(1, 4).copy(input(1)) val gradOutput = Tensor[Double](4, 3) gradOutput.apply1(x => randomn()) val expectedGradInput = Tensor[Double]().resizeAs(gradOutput) expectedGradInput(1).copy(gradOutput(4)) expectedGradInput(2).copy(gradOutput(3)) expectedGradInput(3).copy(gradOutput(2)) expectedGradInput(4).copy(gradOutput(1)) val output = layer.forward(input) val gradInput = layer.backward(input, gradOutput) output should be (expectedOutput) gradInput should be (expectedGradInput) } "A Reverse()" should "generate correct output and grad for Tensor input dim1" in { def randomn(): Double = RandomGenerator.RNG.uniform(-10, 10) val layer = new Reverse[Double](1) val input = Tensor[Double](3, 3, 3) input.apply1(x => randomn()) val expectedOutput = Tensor[Double]().resizeAs(input) expectedOutput(1).copy(input(3)) expectedOutput(2).copy(input(2)) expectedOutput(3).copy(input(1)) val gradOutput = Tensor[Double](3, 3, 3) gradOutput.apply1(x => randomn()) val expectedGradInput = Tensor[Double]().resizeAs(gradOutput) expectedGradInput(1).copy(gradOutput(3)) expectedGradInput(2).copy(gradOutput(2)) expectedGradInput(3).copy(gradOutput(1)) val output = layer.forward(input) val gradInput = layer.backward(input, gradOutput) output should be (expectedOutput) gradInput should be (expectedGradInput) } "A Reverse()" should "generate correct output and grad for Tensor input dim2" in { def randomn(): Double = RandomGenerator.RNG.uniform(-10, 10) val layer = new Reverse[Double](2) val input = Tensor[Double](3, 3, 3) input.apply1(x => randomn()) val expectedOutput = Tensor[Double]().resizeAs(input) expectedOutput.select(2, 1).copy(input.select(2, 3)) expectedOutput.select(2, 2).copy(input.select(2, 2)) expectedOutput.select(2, 3).copy(input.select(2, 1)) val gradOutput = Tensor[Double](3, 3, 3) gradOutput.apply1(x => randomn()) val expectedGradInput = Tensor[Double]().resizeAs(gradOutput) expectedGradInput.select(2, 1).copy(gradOutput.select(2, 3)) expectedGradInput.select(2, 2).copy(gradOutput.select(2, 2)) expectedGradInput.select(2, 3).copy(gradOutput.select(2, 1)) val output = layer.forward(input) val gradInput = layer.backward(input, gradOutput) output should be (expectedOutput) gradInput should be (expectedGradInput) } } class ReverseSerialTest extends ModuleSerializationTest { override def test(): Unit = { val reverse = Reverse[Float]().setName("reverse") val input = Tensor[Float](10).apply1(_ => Random.nextFloat()) runSerializationTest(reverse, input) } }
Example 68
Source File: VMContext.scala From scala-json with Apache License 2.0 | 5 votes |
package json.shadow import json._ import json.internal.DefaultVMContext.PrimitiveArray import json.internal.PrimitiveJArray.Builder import json.internal.{PrimitiveJArray, SimpleStringBuilder, BaseVMContext, JValueObjectDeserializer} import scala.collection.immutable.StringOps import scala.collection.mutable import scala.reflect.ClassTag object VMContext extends BaseVMContext { def newVMStringBuilder: SimpleStringBuilder = new SimpleStringBuilder { val builder = new StringBuilder(128) def append(str: String): internal.SimpleStringBuilder = { builder append str this } def append(char: Char): SimpleStringBuilder = { builder.append(char) this } def ensureCapacity(cap: Int): Unit = builder.ensureCapacity(cap) def result(): String = builder.result() } val localMapper = new ThreadLocal[JValueObjectDeserializer] { override protected def initialValue: JValueObjectDeserializer = new JValueObjectDeserializer } //TODO: do these need to be specialized? def createPrimitiveArray[ case '\b' => sb.append("\\b") case '\t' => sb.append("\\t") case '\n' => sb.append("\\n") case '\f' => sb.append("\\f") case '\r' => sb.append("\\r") case c if c < ' ' => val t = "000" + Integer.toHexString(c) sb.append("\\u" + t.substring(t.length() - 4)) case c => sb.append(c) } } sb.append('"') sb } def newJValueFromArray(arr: Array[_]): JArray = { import json.accessors._ arr match { case x: Array[Byte] => new PrimitiveJArray[Byte](wrapPrimitiveArray(x)) case x: Array[Short] => new PrimitiveJArray[Short](wrapPrimitiveArray(x)) case x: Array[Int] => new PrimitiveJArray[Int](wrapPrimitiveArray(x)) case x: Array[Long] => new PrimitiveJArray[Long](wrapPrimitiveArray(x)) case x: Array[Double] => new PrimitiveJArray[Double](wrapPrimitiveArray(x)) case x: Array[Float] => new PrimitiveJArray[Float](wrapPrimitiveArray(x)) case x: Array[Boolean] => new PrimitiveJArray[Boolean](wrapPrimitiveArray(x)) } } def extractPrimitiveJArray[T: ClassTag: PrimitiveJArray.Builder](x: Iterable[T]): Option[JArray] = { val builder = implicitly[PrimitiveJArray.Builder[T]] x match { case x: mutable.WrappedArray[T] => Some(newJValueFromArray(x.array)) case x: IndexedSeq[T] => Some(builder.createFrom(x)) case _ => None } } }
Example 69
Source File: VMContext.scala From scala-json with Apache License 2.0 | 5 votes |
package json.shadow import json._ import json.internal.DefaultVMContext.PrimitiveArray import json.internal.PrimitiveJArray.Builder import json.internal.{JanssonDeserializer, PrimitiveJArray, SimpleStringBuilder, BaseVMContext} import scala.collection.immutable.StringOps import scala.collection.mutable import scala.reflect.ClassTag object VMContext extends BaseVMContext { def newVMStringBuilder: SimpleStringBuilder = new SimpleStringBuilder { val builder = new StringBuilder(128) def append(str: String): internal.SimpleStringBuilder = { builder append str this } def append(char: Char): SimpleStringBuilder = { builder.append(char) this } def ensureCapacity(cap: Int): Unit = builder.ensureCapacity(cap) def result(): String = builder.result() } //TODO: do these need to be specialized? def createPrimitiveArray[T: ClassTag](from: Array[T]): PrimitiveArray[T] = from def fromString(str: String): JValue = { JanssonDeserializer.parseString(str) } def fromAny(value: Any): JValue = JValue.fromAnyInternal(value) final def quoteJSONString(string: String, sb: SimpleStringBuilder): SimpleStringBuilder = { require(string != null) sb.ensureCapacity(string.length) sb.append(JanssonDeserializer.serializeString(string)) sb } def newJValueFromArray(arr: Array[_]): JArray = { import json.accessors._ arr match { case x: Array[Byte] => new PrimitiveJArray[Byte](wrapPrimitiveArray(x)) case x: Array[Short] => new PrimitiveJArray[Short](wrapPrimitiveArray(x)) case x: Array[Int] => new PrimitiveJArray[Int](wrapPrimitiveArray(x)) case x: Array[Long] => new PrimitiveJArray[Long](wrapPrimitiveArray(x)) case x: Array[Double] => new PrimitiveJArray[Double](wrapPrimitiveArray(x)) case x: Array[Float] => new PrimitiveJArray[Float](wrapPrimitiveArray(x)) case x: Array[Boolean] => new PrimitiveJArray[Boolean](wrapPrimitiveArray(x)) } } def extractPrimitiveJArray[T: ClassTag: PrimitiveJArray.Builder](x: Iterable[T]): Option[JArray] = { val builder = implicitly[PrimitiveJArray.Builder[T]] x match { case x: mutable.WrappedArray[T] => Some(newJValueFromArray(x.array)) case x: IndexedSeq[T] => Some(builder.createFrom(x)) case _ => None } } }
Example 70
Source File: frontier.scala From aima-scala with MIT License | 5 votes |
package aima.core.search.uninformed import aima.core.search.{Frontier, SearchNode} import scala.collection.immutable.{Queue, Iterable} import scala.collection.mutable import scala.util.Try class FIFOQueueFrontier[State, Action, Node <: SearchNode[State, Action]](queue: Queue[Node], stateSet: Set[State]) extends Frontier[State, Action, Node] { self => def this(n: Node) = this(Queue(n), Set(n.state)) def removeLeaf: Option[(Node, Frontier[State, Action, Node])] = queue.dequeueOption.map { case (leaf, updatedQueue) => (leaf, new FIFOQueueFrontier[State, Action, Node](updatedQueue, stateSet - leaf.state)) } def addAll(iterable: Iterable[Node]): Frontier[State, Action, Node] = new FIFOQueueFrontier(queue.enqueueAll(iterable), stateSet ++ iterable.map(_.state)) def contains(state: State): Boolean = stateSet.contains(state) def replaceByState(node: Node): Frontier[State, Action, Node] = { if (contains(node.state)) { new FIFOQueueFrontier(queue.filterNot(_.state == node.state).enqueue(node), stateSet) } else { self } } def getNode(state: State): Option[Node] = { if (contains(state)) { queue.find(_.state == state) } else { None } } def add(node: Node): Frontier[State, Action, Node] = new FIFOQueueFrontier[State, Action, Node](queue.enqueue(node), stateSet + node.state) } class PriorityQueueHashSetFrontier[State, Action, Node <: SearchNode[State, Action]]( queue: mutable.PriorityQueue[Node], stateMap: mutable.Map[State, Node] ) extends Frontier[State, Action, Node] { self => def this(n: Node, costNodeOrdering: Ordering[Node]) = this(mutable.PriorityQueue(n)(costNodeOrdering), mutable.Map(n.state -> n)) def removeLeaf: Option[(Node, Frontier[State, Action, Node])] = Try { val leaf = queue.dequeue stateMap -= leaf.state (leaf, self) }.toOption def addAll(iterable: Iterable[Node]): Frontier[State, Action, Node] = { iterable.foreach { costNode => queue += costNode stateMap += (costNode.state -> costNode) } self } def contains(state: State): Boolean = stateMap.contains(state) def replaceByState(node: Node): Frontier[State, Action, Node] = { if (contains(node.state)) { val updatedElems = node :: queue.toList.filterNot(_.state == node.state) queue.clear() queue.enqueue(updatedElems: _*) stateMap += (node.state -> node) } self } def getNode(state: State): Option[Node] = { if (contains(state)) { queue.find(_.state == state) } else { None } } def add(node: Node): Frontier[State, Action, Node] = { val costNode = node queue.enqueue(costNode) stateMap += (node.state -> costNode) self } }
Example 71
Source File: LabeledGraph.scala From aima-scala with MIT License | 5 votes |
package aima.core.environment.map2d final class LabeledGraph[Vertex, Edge] { import scala.collection.mutable val globalEdgeLookup = new mutable.LinkedHashMap[Vertex, mutable.LinkedHashMap[Vertex, Edge]]() // TODO: get rid of mutability; ListMap should work val vertexLabelsList = new mutable.ArrayBuffer[Vertex]() // TODO: get rid of mutability def addVertex(v: Vertex): Unit = { checkForNewVertex(v) () } def set(from: Vertex, to: Vertex, edge: Edge): Unit = { val localEdgeLookup = checkForNewVertex(from) localEdgeLookup.put(to, edge) checkForNewVertex(to) () } def remove(from: Vertex, to: Vertex): Unit = { val localEdgeLookup = globalEdgeLookup.get(from) localEdgeLookup.foreach(l => l.remove(to)) } def get(from: Vertex, to: Vertex): Option[Edge] = { val localEdgeLookup = globalEdgeLookup.get(from) localEdgeLookup.flatMap(_.get(to)) } def successors(v: Vertex): List[Vertex] = { val localEdgeLookup = globalEdgeLookup.get(v) localEdgeLookup.toList.flatMap(_.keySet.toList) } def vertexLabels = vertexLabelsList.toList def isVertexLabel(v: Vertex): Boolean = globalEdgeLookup.get(v).isDefined def clear(): Unit = { vertexLabelsList.clear() globalEdgeLookup.clear() } private def checkForNewVertex(v: Vertex): mutable.LinkedHashMap[Vertex, Edge] = { val maybeExisting = globalEdgeLookup.get(v) maybeExisting match { case None => val m = new mutable.LinkedHashMap[Vertex, Edge] globalEdgeLookup.put(v, m) vertexLabelsList.append(v) m case Some(existing) => existing } } }
Example 72
Source File: ExpiringMap.scala From mantis with Apache License 2.0 | 5 votes |
package io.iohk.ethereum.jsonrpc import java.time.temporal.ChronoUnit import java.time.Duration import io.iohk.ethereum.jsonrpc.ExpiringMap.ValueWithDuration import scala.collection.mutable import scala.util.Try object ExpiringMap { case class ValueWithDuration[V](value: V, expiration: Duration) def empty[K, V](defaultElementRetentionTime: Duration): ExpiringMap[K, V] = new ExpiringMap(mutable.Map.empty, defaultElementRetentionTime) } //TODO: Make class thread safe class ExpiringMap[K, V] private (val underlying: mutable.Map[K, ValueWithDuration[V]], val defaultRetentionTime: Duration) { private val maxHoldDuration = ChronoUnit.CENTURIES.getDuration def addFor(k: K, v: V, duration: Duration): ExpiringMap[K, V] = { underlying += k -> ValueWithDuration(v, Try(currentPlus(duration)).getOrElse(currentPlus(maxHoldDuration))) this } def add(k: K, v: V, duration: Duration): ExpiringMap[K, V] = { addFor(k, v, duration) } def addForever(k: K, v: V): ExpiringMap[K, V] = addFor(k, v, maxHoldDuration) def add(k: K, v: V): ExpiringMap[K, V] = addFor(k, v, defaultRetentionTime) def remove(k: K): ExpiringMap[K, V] = { underlying -= k this } def get(k: K): Option[V] = { underlying.get(k).flatMap(value => if (isNotExpired(value)) Some(value.value) else { remove(k) None } ) } private def isNotExpired(value: ValueWithDuration[V]) = currentNanoDuration().minus(value.expiration).isNegative private def currentPlus(duration: Duration) = currentNanoDuration().plus(duration) private def currentNanoDuration() = Duration.ofNanos(System.nanoTime()) }
Example 73
Source File: Entities.scala From ArchiveSpark with MIT License | 5 votes |
package org.archive.archivespark.functions import java.util.Properties import edu.stanford.nlp.ling.CoreAnnotations.{NamedEntityTagAnnotation, SentencesAnnotation, TextAnnotation, TokensAnnotation} import edu.stanford.nlp.ling.CoreLabel import edu.stanford.nlp.pipeline.{Annotation, StanfordCoreNLP} import edu.stanford.nlp.util.CoreMap import org.archive.archivespark.model._ import org.archive.archivespark.model.dataloads.ByteLoad import org.archive.archivespark.model.pointers.DependentFieldPointer import scala.collection.JavaConverters._ import scala.collection.mutable object EntitiesNamespace { def get: DependentFieldPointer[ByteLoad.Root, String] = HtmlText.mapIdentity("entities").get[String]("entities") } class Entities private (properties: Properties, tagFieldMapping: Seq[(String, String)]) extends BoundEnrichFunc[ByteLoad.Root, String, String](EntitiesNamespace.get) { override def defaultField: String = "" override def fields: Seq[String] = tagFieldMapping.map { case (tag, field) => field } @transient lazy val pipeline: StanfordCoreNLP = new StanfordCoreNLP(properties) override def derive(source: TypedEnrichable[String], derivatives: Derivatives): Unit = { val doc = new Annotation(source.get) pipeline.annotate(doc) val sentences: mutable.Seq[CoreMap] = doc.get(classOf[SentencesAnnotation]).asScala val mentions = sentences.flatMap { sentence => val tokens: mutable.Buffer[CoreLabel] = sentence.get(classOf[TokensAnnotation]).asScala tokens.map { token => val word: String = token.get(classOf[TextAnnotation]) val ne: String = token.get(classOf[NamedEntityTagAnnotation]) (ne, word) } }.groupBy{case (ne, word) => ne.toLowerCase}.mapValues(items => items.map{case (ne, word) => word}.toSet) for ((tag, _) <- tagFieldMapping) derivatives.setNext(MultiValueEnrichable(mentions.getOrElse(tag.toLowerCase, Set()).toSeq)) } } object EntitiesConstants { val DefaultTagFieldMapping: Seq[(String, String)] = Seq( "PERSON" -> "persons", "ORGANIZATION" -> "organizations", "LOCATION" -> "locations", "DATE" -> "dates" ) val DefaultProps: Properties = new Properties() {{ setProperty("annotators", "tokenize, ssplit, pos, lemma, ner") setProperty("tokenize.class", "PTBTokenizer") setProperty("tokenize.language", "en") setProperty("ner.useSUTime", "false") setProperty("ner.applyNumericClassifiers", "false") }} } object Entities extends Entities(EntitiesConstants.DefaultProps, EntitiesConstants.DefaultTagFieldMapping) { def apply() = new Entities(EntitiesConstants.DefaultProps, EntitiesConstants.DefaultTagFieldMapping) def apply(tagFieldMapping: (String, String)*) = new Entities(EntitiesConstants.DefaultProps, tagFieldMapping) def apply(props: Properties) = new Entities(props, EntitiesConstants.DefaultTagFieldMapping) def apply(props: Properties, tagFieldMapping: (String, String)*) = new Entities(props, tagFieldMapping) def apply(language: String, tagFieldMapping: Seq[(String, String)] = EntitiesConstants.DefaultTagFieldMapping): Entities = { val props = EntitiesConstants.DefaultProps props.setProperty("tokenize.language", language) new Entities(props, tagFieldMapping) } }
Example 74
Source File: PermutationStrategy.scala From scalismo-faces with Apache License 2.0 | 5 votes |
package scalismo.faces.numerics import breeze.linalg.CSCMatrix import scala.collection.mutable object CSCMatrixGraph { def degree(n: Int, A: CSCMatrix[Double]): Int = { require(n >= 0 && n < A.cols, "invalid node number") val cStart = A.colPtrs(n) val cEnd = A.colPtrs(n + 1) cEnd - cStart - 1 // remove diagonal entry } def neighbours(n: Int, A: CSCMatrix[Double]): Array[Int] = { require(n >= 0 && n < A.cols, "invalid node number") val cStart = A.colPtrs(n) val cEnd = A.colPtrs(n + 1) A.rowIndices.slice(cStart, cEnd) } }
Example 75
Source File: EntityAwarePredictor.scala From low-rank-logic with MIT License | 5 votes |
package uclmr import uclmr.EntityAwareEvaluation.Entity import uclmr.FactorizationUtil.{PredictedFact, Row} import ml.wolfe.util.Util import scala.collection.mutable class EntityAwarePredictor(val embeddings: ProbLogicEmbeddings, val entities: Map[Any, Entity]) { val distanceCache = new mutable.HashMap[(String, String), Double]() def closest(candidates: Iterable[String], target: String) = { if (candidates.isEmpty) ("NA", Double.PositiveInfinity) else candidates.map(pred => { val dist = distanceCache.getOrElseUpdate(pred -> target, embeddings.embeddings(target).distance(embeddings.embeddings(pred))) pred -> dist }).minBy(_._2) } def farthest(candidates: Iterable[String], target: String) = { if (candidates.isEmpty) ("NA", Double.PositiveInfinity) else candidates.map(pred => { val dist = distanceCache.getOrElseUpdate(pred -> target, embeddings.embeddings(target).distance(embeddings.embeddings(pred))) pred -> dist }).maxBy(_._2) } def predictAll(row: Row, targetRelations:Seq[String], useFilter:Boolean = true) = { targetRelations.map(predict(row,_,useFilter)) } import EntityAwareEvaluation._ def predict(row: Row, target: String, useFilter:Boolean = true) = { val arg1 = entities(row.arg1) val arg2 = entities(row.arg2) val targetEmbedding = embeddings.embeddings(target) def filterObs(obs:Iterable[String]) = if (useFilter) obs.filter(targetEmbedding.observationFilter) else obs def asProb(pair:(String,Double)) = pair.copy(_2 = Util.sig(targetEmbedding.bias - pair._2)) //find best unary predicate for arg1 val arg1Result = closest(filterObs(arg1.asArg1), target) //find best unary predicate for arg2 val arg2Result = closest(filterObs(arg2.asArg2), target) //find best binary predicate as observation val relResult = closest(filterObs(row.relations.view.map(_._1)), target) val (predictor, score) = Iterator(arg1Result, arg2Result, relResult).maxBy(_._2) val prob = Util.sig(targetEmbedding.bias - score) EntityAwarePrediction( PredictedFact(row, target, prob), predictor, asProb(arg1Result), asProb(arg2Result), asProb(relResult) ) } } case class EntityAwarePrediction(fact: PredictedFact, predictor: String, arg1Result: (String, Double), arg2Result: (String, Double), relResult: (String, Double)) { override def toString = { s""" |$fact | Predictor: $predictor | Arg1: $arg1Result | Arg2: $arg2Result | Rel: $relResult """.stripMargin } }
Example 76
Source File: TextSegmentor.scala From topwords with GNU General Public License v3.0 | 5 votes |
package io.github.qf6101.topwords import scala.collection.mutable protected def segment(splitPositions: List[Int]): String = { // return text itself if it has only one character if (T.length <= 1 || splitPositions.length == 0) return T // copy the characters one by one plus the splitters in the boundary positions val splitPosStack = mutable.Stack().pushAll(splitPositions.reverse) var currSplitPos = splitPosStack.pop() - 1 val splitResult = new StringBuilder() T.zipWithIndex.foreach { case (c, idx) => splitResult += c if (idx == currSplitPos) { splitResult += splitter currSplitPos = if (splitPosStack.nonEmpty) splitPosStack.pop() - 1 else -1 } } splitResult.toString() } }
Example 77
Source File: PasswordInfoDAO.scala From crm-seed with Apache License 2.0 | 5 votes |
package com.dataengi.crm.identities.daos import com.google.inject.Singleton import com.mohiva.play.silhouette.api.LoginInfo import com.mohiva.play.silhouette.api.util.PasswordInfo import com.mohiva.play.silhouette.persistence.daos.DelegableAuthInfoDAO import scala.collection.mutable import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future trait PasswordInfoDAO extends DelegableAuthInfoDAO[PasswordInfo] @Singleton class InMemoryPasswordInfoDAOImpl extends PasswordInfoDAO { val passwords = mutable.HashMap.empty[LoginInfo, PasswordInfo] def remove(loginInfo: LoginInfo): Future[Unit] = { Future.successful( passwords.remove(loginInfo) ) } }
Example 78
Source File: B3FormatPropagation.scala From opencensus-scala with Apache License 2.0 | 5 votes |
package io.opencensus.scala.http.propagation import io.opencensus.trace.propagation.TextFormat.{Getter, Setter} import io.opencensus.trace.{Span, SpanContext} import scala.collection.{immutable, mutable} import scala.util.Try trait B3FormatPropagation[Header, Request] extends Propagation[Header, Request] { def headerValue(req: Request, key: String): Option[String] def createHeader(key: String, value: String): Header override def extractContext(request: Request): Try[SpanContext] = Try(b3Format.extract(request, HeaderGetter)) private type HttpHeaderBuilder = mutable.ArrayBuffer[Header] private object HeaderSetter extends Setter[HttpHeaderBuilder] { override def put( carrier: HttpHeaderBuilder, key: String, value: String ): Unit = { carrier += createHeader(key, value) } } private object HeaderGetter extends Getter[Request] { override def get(carrier: Request, key: String): String = headerValue(carrier, key).orNull } }
Example 79
Source File: GrokHelper.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.s2jobs.utils import io.thekraken.grok.api.Grok import org.apache.s2graph.s2jobs.Logger import org.apache.spark.SparkFiles import org.apache.spark.sql.Row import scala.collection.mutable object GrokHelper extends Logger { private val grokPool:mutable.Map[String, Grok] = mutable.Map.empty def getGrok(name:String, patternFiles:Seq[String], patterns:Map[String, String], compilePattern:String):Grok = { if (grokPool.get(name).isEmpty) { println(s"Grok '$name' initialized..") val grok = new Grok() patternFiles.foreach { patternFile => val filePath = SparkFiles.get(patternFile) println(s"[Grok][$name] add pattern file : $patternFile ($filePath)") grok.addPatternFromFile(filePath) } patterns.foreach { case (name, pattern) => println(s"[Grok][$name] add pattern : $name ($pattern)") grok.addPattern(name, pattern) } grok.compile(compilePattern) println(s"[Grok][$name] patterns: ${grok.getPatterns}") grokPool.put(name, grok) } grokPool(name) } def grokMatch(text:String)(implicit grok:Grok):Option[Map[String, String]] = { import scala.collection.JavaConverters._ val m = grok.`match`(text) m.captures() val rstMap = m.toMap.asScala.toMap .filter(_._2 != null) .map{ case (k, v) => k -> v.toString} if (rstMap.isEmpty) None else Some(rstMap) } def grokMatchWithSchema(text:String)(implicit grok:Grok, keys:Array[String]):Option[Row] = { import scala.collection.JavaConverters._ val m = grok.`match`(text) m.captures() val rstMap = m.toMap.asScala.toMap if (rstMap.isEmpty) None else { val l = keys.map { key => rstMap.getOrElse(key, null)} Some(Row.fromSeq(l)) } } }
Example 80
Source File: Job.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.s2jobs import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.s2graph.s2jobs.task._ import scala.collection.mutable class Job(ss:SparkSession, jobDesc:JobDescription) extends Serializable with Logger { private val dfMap = mutable.Map[String, DataFrame]() def run() = { // source jobDesc.sources.foreach{ source => val df = source.toDF(ss) if (source.conf.cache.getOrElse(false) && !df.isStreaming) df.cache() dfMap.put(source.conf.name, df) } logger.info(s"valid source DF set : ${dfMap.keySet}") // process var processRst:Seq[(String, DataFrame)] = Nil do { processRst = getValidProcess(jobDesc.processes) processRst.foreach { case (name, df) => dfMap.put(name, df)} } while(processRst.nonEmpty) logger.info(s"valid named DF set : ${dfMap.keySet}") // sinks jobDesc.sinks.foreach { s => val inputDFs = s.conf.inputs.flatMap{ input => dfMap.get(input)} if (inputDFs.isEmpty) throw new IllegalArgumentException(s"sink has not valid inputs (${s.conf.name})") // use only first input s.write(inputDFs.head) } // if stream query exist if (ss.streams.active.length > 0) ss.streams.awaitAnyTermination() } private def getValidProcess(processes:Seq[Process]):Seq[(String, DataFrame)] = { val dfKeys = dfMap.keySet processes.filter{ p => val existAllInput = p.conf.inputs.forall{ input => dfKeys(input) } !dfKeys(p.conf.name) && existAllInput } .map { p => val inputMap = p.conf.inputs.map{ input => (input, dfMap(input)) }.toMap val df = p.execute(ss, inputMap) if (p.conf.cache.getOrElse(false) && !df.isStreaming) df.cache() p.conf.name -> df } } }
Example 81
Source File: WalLogUDAFTest.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.s2jobs.wal.udafs import org.apache.s2graph.s2jobs.wal.utils.BoundedPriorityQueue import org.scalatest._ import scala.collection.mutable import scala.util.Random class WalLogUDAFTest extends FunSuite with Matchers { test("mergeTwoSeq") { val prev: Array[Int] = Array(3, 2, 1) val cur: Array[Int] = Array(4, 2, 2) val ls = WalLogUDAF.mergeTwoSeq(prev, cur, 10) println(ls.size) ls.foreach { x => println(x) } } test("addToTopK test.") { import WalLogUDAF._ val numOfTest = 100 val numOfNums = 100 val maxNum = 10 (0 until numOfTest).foreach { testNum => val maxSize = 1 + Random.nextInt(numOfNums) val pq = new BoundedPriorityQueue[Int](maxSize) val arr = (0 until numOfNums).map(x => Random.nextInt(maxNum)) var result: mutable.Seq[Int] = mutable.ArrayBuffer.empty[Int] arr.foreach { i => pq += i result = addToTopK(result, maxSize, i) } result.sorted shouldBe pq.toSeq.sorted } } }
Example 82
Source File: S2GraphVariables.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.features import java.util import java.util.Optional import scala.collection.JavaConversions._ import org.apache.tinkerpop.gremlin.structure.Graph class S2GraphVariables extends Graph.Variables { import scala.collection.mutable private val variables = mutable.Map.empty[String, Any] override def set(key: String, value: scala.Any): Unit = { if (key == null) throw Graph.Variables.Exceptions.variableKeyCanNotBeNull() if (key.isEmpty) throw Graph.Variables.Exceptions.variableKeyCanNotBeEmpty() if (value == null) throw Graph.Variables.Exceptions.variableValueCanNotBeNull() variables.put(key, value) } override def keys(): util.Set[String] = variables.keySet override def remove(key: String): Unit = { if (key == null) throw Graph.Variables.Exceptions.variableKeyCanNotBeNull() if (key.isEmpty) throw Graph.Variables.Exceptions.variableKeyCanNotBeEmpty() variables.remove(key) } override def get[R](key: String): Optional[R] = { if (key == null) throw Graph.Variables.Exceptions.variableKeyCanNotBeNull() if (key.isEmpty) throw Graph.Variables.Exceptions.variableKeyCanNotBeEmpty() variables.get(key) match { case None => Optional.empty() case Some(value) => if (value == null) Optional.empty() else Optional.of(value.asInstanceOf[R]) } } override def toString: String = { s"variables[size:${variables.keys.size()}]" } }
Example 83
Source File: RocksVertexFetcher.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.storage.rocks import com.typesafe.config.Config import org.apache.hadoop.hbase.util.Bytes import org.apache.s2graph.core._ import org.apache.s2graph.core.schema.ServiceColumn import org.apache.s2graph.core.storage.rocks.RocksStorage.{qualifier, table} import org.apache.s2graph.core.storage.{SKeyValue, StorageIO, StorageSerDe} import org.apache.s2graph.core.types.HBaseType import org.rocksdb.RocksDB import scala.collection.mutable.ArrayBuffer import scala.concurrent.{ExecutionContext, Future} class RocksVertexFetcher(val graph: S2GraphLike, val config: Config, val db: RocksDB, val vdb: RocksDB, val serDe: StorageSerDe, val io: StorageIO) extends VertexFetcher { private def fetchKeyValues(queryRequest: QueryRequest, vertex: S2VertexLike)(implicit ec: ExecutionContext): Future[Seq[SKeyValue]] = { val rpc = RocksStorage.buildRequest(queryRequest, vertex) RocksStorage.fetchKeyValues(vdb, db, rpc) } override def fetchVertices(vertexQueryParam: VertexQueryParam)(implicit ec: ExecutionContext): Future[Seq[S2VertexLike]] = { def fromResult(kvs: Seq[SKeyValue], version: String): Seq[S2VertexLike] = { if (kvs.isEmpty) Nil else serDe.vertexDeserializer(version).fromKeyValues(kvs, None).toSeq.filter(vertexQueryParam.where.get.filter) } val vertices = vertexQueryParam.vertexIds.map(vId => graph.elementBuilder.newVertex(vId)) val futures = vertices.map { vertex => val queryParam = QueryParam.Empty val q = Query.toQuery(Seq(vertex), Seq(queryParam)) val queryRequest = QueryRequest(q, stepIdx = -1, vertex, queryParam) fetchKeyValues(queryRequest, vertex).map { kvs => fromResult(kvs, vertex.serviceColumn.schemaVersion) } recoverWith { case ex: Throwable => Future.successful(Nil) } } Future.sequence(futures).map(_.flatten) } override def fetchVerticesAll()(implicit ec: ExecutionContext) = { import scala.collection.mutable val vertices = new ArrayBuffer[S2VertexLike]() ServiceColumn.findAll().groupBy(_.service.hTableName).toSeq.foreach { case (hTableName, columns) => val distinctColumns = columns.toSet val iter = vdb.newIterator() val buffer = mutable.ListBuffer.empty[SKeyValue] var oldVertexIdBytes = Array.empty[Byte] var minusPos = 0 try { iter.seekToFirst() while (iter.isValid) { val row = iter.key() if (!Bytes.equals(oldVertexIdBytes, 0, oldVertexIdBytes.length - minusPos, row, 0, row.length - 1)) { if (buffer.nonEmpty) serDe.vertexDeserializer(schemaVer = HBaseType.DEFAULT_VERSION).fromKeyValues(buffer, None) .filter(v => distinctColumns(v.serviceColumn)) .foreach { vertex => vertices += vertex } oldVertexIdBytes = row minusPos = 1 buffer.clear() } val kv = SKeyValue(table, iter.key(), SKeyValue.VertexCf, qualifier, iter.value(), System.currentTimeMillis()) buffer += kv iter.next() } if (buffer.nonEmpty) serDe.vertexDeserializer(schemaVer = HBaseType.DEFAULT_VERSION).fromKeyValues(buffer, None) .filter(v => distinctColumns(v.serviceColumn)) .foreach { vertex => vertices += vertex } } finally { iter.close() } } Future.successful(vertices) } }
Example 84
Source File: GraphToETLStreaming.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.counter.loader.stream import kafka.producer.KeyedMessage import kafka.serializer.StringDecoder import org.apache.s2graph.core.GraphUtil import org.apache.s2graph.counter.config.S2CounterConfig import org.apache.s2graph.counter.loader.config.StreamingConfig import org.apache.s2graph.spark.config.S2ConfigFactory import org.apache.s2graph.spark.spark.{WithKafka, SparkApp, HashMapParam} import org.apache.spark.streaming.Durations._ import org.apache.spark.streaming.kafka.KafkaRDDFunctions.rddToKafkaRDDFunctions import scala.collection.mutable import scala.collection.mutable.{HashMap => MutableHashMap} object GraphToETLStreaming extends SparkApp with WithKafka { lazy val config = S2ConfigFactory.config lazy val s2Config = new S2CounterConfig(config) lazy val className = getClass.getName.stripSuffix("$") lazy val producer = getProducer[String, String](StreamingConfig.KAFKA_BROKERS) override def run(): Unit = { validateArgument("interval", "topic") val (intervalInSec, topic) = (seconds(args(0).toLong), args(1)) val groupId = buildKafkaGroupId(topic, "graph_to_etl") val kafkaParam = Map( // "auto.offset.reset" -> "smallest", "group.id" -> groupId, "metadata.broker.list" -> StreamingConfig.KAFKA_BROKERS, "zookeeper.connect" -> StreamingConfig.KAFKA_ZOOKEEPER, "zookeeper.connection.timeout.ms" -> "10000" ) val conf = sparkConf(s"$topic: $className") val ssc = streamingContext(conf, intervalInSec) val sc = ssc.sparkContext val acc = sc.accumulable(MutableHashMap.empty[String, Long], "Throughput")(HashMapParam[String, Long](_ + _)) val stream = getStreamHelper(kafkaParam).createStream[String, String, StringDecoder, StringDecoder](ssc, topic.split(',').toSet) stream.foreachRDD { rdd => rdd.foreachPartitionWithOffsetRange { case (osr, part) => val m = MutableHashMap.empty[Int, mutable.MutableList[String]] for { (k, v) <- part line <- GraphUtil.parseString(v) } { try { val sp = GraphUtil.split(line) // get partition key by target vertex id val partKey = getPartKey(sp(4), 20) val values = m.getOrElse(partKey, mutable.MutableList.empty[String]) values += line m.update(partKey, values) } catch { case ex: Throwable => log.error(s"$ex: $line") } } m.foreach { case (k, v) => v.grouped(1000).foreach { grouped => producer.send(new KeyedMessage[String, String](StreamingConfig.KAFKA_TOPIC_ETL, null, k, grouped.mkString("\n"))) } } getStreamHelper(kafkaParam).commitConsumerOffset(osr) } } ssc.start() ssc.awaitTermination() } }
Example 85
Source File: QueueActor.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.rest.play.actors import java.util.concurrent.TimeUnit import akka.actor._ import org.apache.s2graph.core.ExceptionHandler._ import org.apache.s2graph.core.utils.logger import org.apache.s2graph.core.{ExceptionHandler, S2Graph, GraphElement} import org.apache.s2graph.rest.play.actors.Protocol.FlushAll import org.apache.s2graph.rest.play.config.Config import play.api.Play.current import play.api.libs.concurrent.Akka import scala.collection.mutable import scala.concurrent.duration.Duration object Protocol { case object Flush case object FlushAll } object QueueActor { var router: ActorRef = _ // Akka.system.actorOf(props(), name = "queueActor") def init(s2: S2Graph, walLogHandler: ExceptionHandler) = { router = Akka.system.actorOf(props(s2, walLogHandler)) } def shutdown() = { router ! FlushAll Akka.system.shutdown() Thread.sleep(Config.ASYNC_HBASE_CLIENT_FLUSH_INTERVAL * 2) } def props(s2: S2Graph, walLogHandler: ExceptionHandler): Props = Props(classOf[QueueActor], s2, walLogHandler) } class QueueActor(s2: S2Graph, walLogHandler: ExceptionHandler) extends Actor with ActorLogging { import Protocol._ implicit val ec = context.system.dispatcher // logger.error(s"QueueActor: $self") val queue = mutable.Queue.empty[GraphElement] var queueSize = 0L val maxQueueSize = Config.LOCAL_QUEUE_ACTOR_MAX_QUEUE_SIZE val timeUnitInMillis = 10 val rateLimitTimeStep = 1000 / timeUnitInMillis val rateLimit = Config.LOCAL_QUEUE_ACTOR_RATE_LIMIT / rateLimitTimeStep context.system.scheduler.schedule(Duration.Zero, Duration(timeUnitInMillis, TimeUnit.MILLISECONDS), self, Flush) override def receive: Receive = { case element: GraphElement => if (queueSize > maxQueueSize) { walLogHandler.enqueue(toKafkaMessage(Config.KAFKA_FAIL_TOPIC, element, None)) } else { queueSize += 1L queue.enqueue(element) } case Flush => val elementsToFlush = if (queue.size < rateLimit) queue.dequeueAll(_ => true) else (0 until rateLimit).map(_ => queue.dequeue()) val flushSize = elementsToFlush.size queueSize -= elementsToFlush.length s2.mutateElements(elementsToFlush) if (flushSize > 0) { logger.info(s"flush: $flushSize, $queueSize") } case FlushAll => s2.mutateElements(queue) context.stop(self) case _ => logger.error("unknown protocol") } }
Example 86
Source File: CreateExchangeTransactionActor.scala From matcher with MIT License | 5 votes |
package com.wavesplatform.dex.actors.tx import akka.actor.{Actor, ActorRef, Props} import com.wavesplatform.dex.actors.tx.CreateExchangeTransactionActor.OrderExecutedObserved import com.wavesplatform.dex.domain.account.Address import com.wavesplatform.dex.domain.utils.ScorexLogging import com.wavesplatform.dex.model.Events.{ExchangeTransactionCreated, OrderExecuted} import com.wavesplatform.dex.model.ExchangeTransactionCreator.CreateTransaction import play.api.libs.json.Json import scala.collection.mutable class CreateExchangeTransactionActor(createTransaction: CreateTransaction, recipients: List[ActorRef]) extends Actor with ScorexLogging { private val pendingEvents = mutable.Set.empty[OrderExecuted] override def preStart(): Unit = context.system.eventStream.subscribe(self, classOf[OrderExecutedObserved]) override def receive: Receive = { case OrderExecutedObserved(sender, event) => val sameOwner = event.counter.order.sender == event.submitted.order.sender log.debug(s"Execution observed at $sender for OrderExecuted(${event.submitted.order.id()}, ${event.counter.order .id()}), amount=${event.executedAmount})${if (sameOwner) " Same owner for both orders" else ""}") if (sameOwner || pendingEvents.contains(event)) { import event.{counter, submitted} createTransaction(event) match { case Right(tx) => log.info(s"Created transaction: $tx") val created = ExchangeTransactionCreated(tx) recipients.foreach(_ ! created) case Left(ex) => log.warn( s"""Can't create tx: $ex |o1: (amount=${submitted.amount}, fee=${submitted.fee}): ${Json.prettyPrint(submitted.order.json())} |o2: (amount=${counter.amount}, fee=${counter.fee}): ${Json.prettyPrint(counter.order.json())}""".stripMargin ) } pendingEvents -= event } else pendingEvents += event } } object CreateExchangeTransactionActor { val name = "create-exchange-tx" case class OrderExecutedObserved(sender: Address, event: OrderExecuted) def props(createTransaction: CreateTransaction, recipients: List[ActorRef]): Props = Props(new CreateExchangeTransactionActor(createTransaction, recipients)) }
Example 87
Source File: AddressDirectoryActor.scala From matcher with MIT License | 5 votes |
package com.wavesplatform.dex.actors.address import akka.actor.{Actor, ActorRef, Props, SupervisorStrategy, Terminated} import com.wavesplatform.dex.db.OrderDB import com.wavesplatform.dex.domain.account.Address import com.wavesplatform.dex.domain.utils.{EitherExt2, ScorexLogging} import com.wavesplatform.dex.history.HistoryRouter._ import com.wavesplatform.dex.model.Events import com.wavesplatform.dex.model.Events.OrderCancelFailed import scala.collection.mutable class AddressDirectoryActor(orderDB: OrderDB, addressActorProps: (Address, Boolean) => Props, historyRouter: Option[ActorRef]) extends Actor with ScorexLogging { import AddressDirectoryActor._ import context._ private var startSchedules: Boolean = false private[this] val children = mutable.AnyRefMap.empty[Address, ActorRef] override def supervisorStrategy: SupervisorStrategy = SupervisorStrategy.stoppingStrategy private def createAddressActor(address: Address): ActorRef = { log.debug(s"Creating address actor for $address") watch(actorOf(addressActorProps(address, startSchedules), address.toString)) } private def forward(address: Address, msg: Any): Unit = (children get address, msg) match { case (None, _: AddressActor.Message.BalanceChanged) => case _ => children getOrElseUpdate (address, createAddressActor(address)) forward msg } override def receive: Receive = { case Envelope(address, cmd) => forward(address, cmd) case e @ Events.OrderAdded(lo, timestamp) => forward(lo.order.sender, e) historyRouter foreach { _ ! SaveOrder(lo, timestamp) } case e: Events.OrderExecuted => import e.{counter, submitted} forward(submitted.order.sender, e) if (counter.order.sender != submitted.order.sender) forward(counter.order.sender, e) historyRouter foreach { _ ! SaveEvent(e) } case e: Events.OrderCanceled => forward(e.acceptedOrder.order.sender, e) historyRouter foreach { _ ! SaveEvent(e) } case e: OrderCancelFailed => orderDB.get(e.id) match { case Some(order) => forward(order.sender.toAddress, e) case None => log.warn(s"The order '${e.id}' not found") } case StartSchedules => if (!startSchedules) { startSchedules = true context.children.foreach(_ ! StartSchedules) } case Terminated(child) => val addressString = child.path.name val address = Address.fromString(addressString).explicitGet() children.remove(address) log.warn(s"Address handler for $addressString terminated") } } object AddressDirectoryActor { case class Envelope(address: Address, cmd: AddressActor.Message) case object StartSchedules }
Example 88
Source File: OrderBookSideSnapshotCodecs.scala From matcher with MIT License | 5 votes |
package com.wavesplatform.dex.codecs import java.math.BigInteger import java.nio.ByteBuffer import com.google.common.primitives.{Ints, Longs} import com.wavesplatform.dex.codecs.ByteBufferCodecs.ByteBufferExt import com.wavesplatform.dex.domain.model.Price import com.wavesplatform.dex.domain.order.{Order, OrderType} import com.wavesplatform.dex.model.{BuyLimitOrder, LimitOrder, OrderBookSideSnapshot, SellLimitOrder} import scala.collection.mutable object OrderBookSideSnapshotCodecs { def encode(dest: mutable.ArrayBuilder[Byte], snapshot: OrderBookSideSnapshot): Unit = { dest ++= Ints.toByteArray(snapshot.size) snapshot.foreach { case (price, xs) => dest ++= Longs.toByteArray(price) dest ++= Ints.toByteArray(xs.size) xs.foreach(encodeLoV2(dest, _)) } } def decode(bb: ByteBuffer): OrderBookSideSnapshot = { val snapshotSize = bb.getInt val r = Map.newBuilder[Price, Seq[LimitOrder]] (1 to snapshotSize).foreach { _ => val price = bb.getLong val levelSize = bb.getInt val limitOrders = (1 to levelSize).map(_ => decodeLo(bb)) r += price -> limitOrders } r.result() } def encodeLoV1(dest: mutable.ArrayBuilder[Byte], lo: LimitOrder): Unit = { dest ++= lo.order.orderType.bytes dest ++= Longs.toByteArray(lo.amount) dest ++= Longs.toByteArray(lo.fee) dest += lo.order.version val orderBytes = lo.order.bytes() dest ++= Ints.toByteArray(orderBytes.length) dest ++= orderBytes } def encodeLoV2(dest: mutable.ArrayBuilder[Byte], lo: LimitOrder): Unit = { val avgWeighedPriceNominatorBytes = lo.avgWeighedPriceNominator.toByteArray dest += 2 encodeLoV1(dest, lo) dest ++= Ints.toByteArray(avgWeighedPriceNominatorBytes.length) dest ++= avgWeighedPriceNominatorBytes } def decodeLo(bb: ByteBuffer): LimitOrder = { val header = bb.get val version = if (header == 2) 2 else 1 val orderType = if (version == 1) header else bb.get val amount = bb.getLong val fee = bb.getLong val orderVersion = bb.get val order = Order.fromBytes(orderVersion, bb.getBytes) val avgWeighedPriceNominator = if (version == 2) new BigInteger(bb.getBytes) else { val filledAmount = order.amount - amount (BigInt(order.price) * filledAmount).bigInteger } OrderType(orderType) match { case OrderType.SELL => SellLimitOrder(amount, fee, order, avgWeighedPriceNominator) case OrderType.BUY => BuyLimitOrder(amount, fee, order, avgWeighedPriceNominator) } } }
Example 89
Source File: OrderBookSnapshot.scala From matcher with MIT License | 5 votes |
package com.wavesplatform.dex.model import java.nio.ByteBuffer import com.wavesplatform.dex.codecs.OrderBookSideSnapshotCodecs import scala.collection.mutable case class OrderBookSnapshot(bids: OrderBookSideSnapshot, asks: OrderBookSideSnapshot, lastTrade: Option[LastTrade]) object OrderBookSnapshot { val empty: OrderBookSnapshot = OrderBookSnapshot(bids = Map.empty, asks = Map.empty, None) def serialize(dest: mutable.ArrayBuilder[Byte], x: OrderBookSnapshot): Unit = { OrderBookSideSnapshotCodecs.encode(dest, x.bids) OrderBookSideSnapshotCodecs.encode(dest, x.asks) x.lastTrade match { case None => dest += 0 case Some(lastTrade) => dest += 1 LastTrade.serialize(dest, lastTrade) } } def fromBytes(bb: ByteBuffer): OrderBookSnapshot = OrderBookSnapshot( OrderBookSideSnapshotCodecs.decode(bb), OrderBookSideSnapshotCodecs.decode(bb), bb.get match { case 0 => None case 1 => Some(LastTrade.fromBytes(bb)) case x => throw new RuntimeException(s"Can't deserialize Option as $x") } ) }
Example 90
Source File: HistoryMessagesBatchSender.scala From matcher with MIT License | 5 votes |
package com.wavesplatform.dex.history import akka.actor.{Actor, Cancellable} import com.wavesplatform.dex.history.HistoryRouter.{HistoryMsg, StopAccumulate} import scala.collection.mutable import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration._ import scala.reflect.ClassTag abstract class HistoryMessagesBatchSender[M <: HistoryMsg: ClassTag] extends Actor { val batchLinger: Long val batchEntries: Long def createAndSendBatch(batchBuffer: Iterable[M]): Unit private val batchBuffer: mutable.Set[M] = mutable.Set.empty[M] private def scheduleStopAccumulating: Cancellable = context.system.scheduler.scheduleOnce(batchLinger.millis, self, StopAccumulate) private def sendBatch(): Unit = { if (batchBuffer.nonEmpty) { createAndSendBatch(batchBuffer) batchBuffer.clear() } } def receive: Receive = awaitingHistoryMessages private def awaitingHistoryMessages: Receive = { case msg: M => scheduleStopAccumulating context become accumulateBuffer(scheduleStopAccumulating) batchBuffer += msg } private def accumulateBuffer(scheduledStop: Cancellable): Receive = { case msg: M => if (batchBuffer.size == batchEntries) { scheduledStop.cancel() sendBatch() context become accumulateBuffer(scheduleStopAccumulating) } batchBuffer += msg case StopAccumulate => sendBatch(); context become awaitingHistoryMessages } }
Example 91
Source File: OrderHistoryStub.scala From matcher with MIT License | 5 votes |
package com.wavesplatform.dex.model import akka.actor.{ActorRef, ActorSystem, Props} import com.wavesplatform.dex.actors.SpendableBalancesActor import com.wavesplatform.dex.actors.address.{AddressActor, AddressDirectoryActor} import com.wavesplatform.dex.db.{EmptyOrderDB, TestOrderDB} import com.wavesplatform.dex.domain.account.Address import com.wavesplatform.dex.domain.asset.Asset import com.wavesplatform.dex.domain.bytes.ByteStr import com.wavesplatform.dex.error.ErrorFormatterContext import com.wavesplatform.dex.queue.QueueEventWithMeta import com.wavesplatform.dex.time.Time import scala.collection.mutable import scala.concurrent.Future class OrderHistoryStub(system: ActorSystem, time: Time, maxActiveOrders: Int, maxFinalizedOrders: Int) { private implicit val efc: ErrorFormatterContext = (_: Asset) => 8 private val refs = mutable.AnyRefMap.empty[Address, ActorRef] private val orders = mutable.AnyRefMap.empty[ByteStr, Address] private val spendableBalances: (Address, Set[Asset]) => Future[Map[Asset, Long]] = (_, _) => Future.successful(Map.empty[Asset, Long]) private val allAssetsSpendableBalances: Address => Future[Map[Asset, Long]] = _ => Future.successful(Map.empty[Asset, Long]) private val spendableBalanceActor = system.actorOf(Props(new SpendableBalancesActor(spendableBalances, allAssetsSpendableBalances, addressDir))) def createAddressActor(address: Address, enableSchedules: Boolean): Props = { Props( new AddressActor( address, time, new TestOrderDB(maxFinalizedOrders), (_, _) => Future.successful(Right(())), e => Future.successful { Some(QueueEventWithMeta(0, 0, e)) }, enableSchedules, spendableBalanceActor, AddressActor.Settings.default.copy(maxActiveOrders = maxActiveOrders) ) ) } private def actorFor(ao: AcceptedOrder): ActorRef = refs.getOrElseUpdate( ao.order.sender, system.actorOf(createAddressActor(ao.order.sender, enableSchedules = true)) ) lazy val addressDir = system.actorOf( Props( new AddressDirectoryActor( EmptyOrderDB, createAddressActor, None ) ) ) def ref(sender: Address): ActorRef = refs(sender) def ref(orderId: ByteStr): ActorRef = refs(orders(orderId)) def process(event: Events.Event): Unit = event match { case oa: Events.OrderAdded => orders += oa.order.order.id() -> oa.order.order.sender actorFor(oa.order) ! oa case ox: Events.OrderExecuted => orders += ox.submitted.order.id() -> ox.submitted.order.sender orders += ox.counter.order.id() -> ox.counter.order.sender actorFor(ox.counter) ! ox actorFor(ox.submitted) ! ox case oc: Events.OrderCanceled => actorFor(oc.acceptedOrder) ! oc } def processAll(events: Events.Event*): Unit = events.foreach(process) }
Example 92
Source File: AffinityPropagationSuite.scala From SparkAffinityPropagation with MIT License | 5 votes |
package org.viirya.spark.ml import scala.collection.mutable import org.scalatest.{BeforeAndAfterAll, FunSuite, Suite} import org.viirya.spark.ml.AffinityPropagation._ import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.graphx.{Edge, Graph} class AffinityPropagationSuite extends FunSuite with BeforeAndAfterAll { self: Suite => @transient var sc: SparkContext = _ override def beforeAll() { super.beforeAll() val conf = new SparkConf() .setMaster("local[2]") .setAppName("AffinityPropagationUnitTest") sc = new SparkContext(conf) } override def afterAll() { try { if (sc != null) { sc.stop() } sc = null } finally { super.afterAll() } } test("affinity propagation") { val similarities = Seq[(Long, Long, Double)]( (0, 1, 1.0), (1, 0, 1.0), (0, 2, 1.0), (2, 0, 1.0), (0, 3, 1.0), (3, 0, 1.0), (1, 2, 1.0), (2, 1, 1.0), (2, 3, 1.0), (3, 2, 1.0)) val expected = Array( Array(0.0, 1.0/3.0, 1.0/3.0, 1.0/3.0), Array(1.0/2.0, 0.0, 1.0/2.0, 0.0), Array(1.0/3.0, 1.0/3.0, 0.0, 1.0/3.0), Array(1.0/2.0, 0.0, 1.0/2.0, 0.0)) val s = constructGraph(sc.parallelize(similarities, 2), true, false) s.edges.collect().foreach { case Edge(i, j, x) => assert(math.abs(x.similarity - expected(i.toInt)(j.toInt)) < 1e-14) } } }
Example 93
Source File: RunCypher.scala From piflow with BSD 2-Clause "Simplified" License | 5 votes |
package cn.piflow.bundle.neo4j import cn.piflow.conf.bean.PropertyDescriptor import cn.piflow.conf.util.{ImageUtil, MapUtil} import cn.piflow.conf.{ConfigurableStop, Port, StopGroup} import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext} import org.neo4j.driver.v1._ import scala.collection.mutable class RunCypher extends ConfigurableStop{ override val authorEmail: String = "[email protected]" override val description: String = "Run cql on neo4j" override val inportList: List[String] =List(Port.DefaultPort) override val outportList: List[String] = List(Port.DefaultPort) var url : String =_ var userName : String =_ var password : String =_ var cql : String = "" override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = { var driver: Driver = GraphDatabase.driver(url, AuthTokens.basic(userName, password)) var session: Session = null try { session = driver.session() session.run(cql) } finally { session.close() driver.close() } } override def setProperties(map: Map[String, Any]): Unit = { url = MapUtil.get(map,"url").asInstanceOf[String] userName = MapUtil.get(map,"userName").asInstanceOf[String] password = MapUtil.get(map,"password").asInstanceOf[String] cql = MapUtil.get(map,"cql").asInstanceOf[String] } override def getPropertyDescriptor(): List[PropertyDescriptor] = { var descriptor : List[PropertyDescriptor] = List() val url=new PropertyDescriptor().name("url") .displayName("url") .description("The url of neo4j") .defaultValue("") .required(true) .example("bolt://0.0.1.1:7687") descriptor = url :: descriptor val userName=new PropertyDescriptor() .name("userName") .displayName("UserName") .description("The user of neo4j") .defaultValue("") .required(true) .example("neo4j") descriptor = userName :: descriptor val password=new PropertyDescriptor() .name("password") .displayName("Password") .description("The password of neo4j") .defaultValue("") .required(true) .sensitive(true) .example("123456") descriptor = password :: descriptor val cql=new PropertyDescriptor() .name("cql") .displayName("cql") .description(" The Cypher") .defaultValue("") .required(true) .example("match(n:user) where n.userid ='11' set n.userclass =5") descriptor = cql :: descriptor descriptor } override def getIcon(): Array[Byte] = { ImageUtil.getImage("icon/neo4j/RunCypher.png") } override def getGroup(): List[String] = { List(StopGroup.Neo4jGroup) } override def initialize(ctx: ProcessContext): Unit = { } }
Example 94
Source File: WriteToKafka.scala From piflow with BSD 2-Clause "Simplified" License | 5 votes |
package cn.piflow.bundle.kafka import java.util import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext} import cn.piflow.conf._ import cn.piflow.conf.bean.PropertyDescriptor import cn.piflow.conf.util.{ImageUtil, MapUtil} import java.util.Properties import org.apache.spark.sql.SparkSession import org.apache.kafka.clients.producer.KafkaProducer import org.apache.kafka.clients.producer.Producer import org.apache.kafka.clients.producer.ProducerRecord import scala.collection.mutable class WriteToKafka extends ConfigurableStop{ val description: String = "Write data to kafka" val inportList: List[String] = List(Port.DefaultPort) val outportList: List[String] = List(Port.DefaultPort) var kafka_host:String =_ var topic:String=_ def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = { val spark = pec.get[SparkSession]() val df = in.read() val properties:Properties = new Properties() properties.put("bootstrap.servers", kafka_host) properties.put("acks", "all") //properties.put("retries", 0) //properties.put("batch.size", 16384) //properties.put("linger.ms", 1) //properties.put("buffer.memory", 33554432) properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer") properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer") var producer:Producer[String,String] = new KafkaProducer[String,String](properties) df.collect().foreach(row=>{ //var hm:util.HashMap[String,String]=new util.HashMap() //row.schema.fields.foreach(f=>(if(!f.name.equals(column_name)&&row.getAs(f.name)!=null)hm.put(f.name,row.getAs(f.name).asInstanceOf[String]))) var res:List[String]=List() row.schema.fields.foreach(f=>{ if(row.getAs(f.name)==null)res="None"::res else{ res=row.getAs(f.name).asInstanceOf[String]::res } }) val s:String=res.reverse.mkString(",") val record=new ProducerRecord[String,String](topic,s) producer.send(record) }) producer.close() } def initialize(ctx: ProcessContext): Unit = { } def setProperties(map: Map[String, Any]): Unit = { kafka_host=MapUtil.get(map,key="kafka_host").asInstanceOf[String] //port=Integer.parseInt(MapUtil.get(map,key="port").toString) topic=MapUtil.get(map,key="topic").asInstanceOf[String] } override def getPropertyDescriptor(): List[PropertyDescriptor] = { var descriptor : List[PropertyDescriptor] = List() val kafka_host = new PropertyDescriptor().name("kafka_host").displayName("KAFKA_HOST").defaultValue("").required(true) val topic = new PropertyDescriptor().name("topic").displayName("TOPIC").defaultValue("").required(true) descriptor = kafka_host :: descriptor descriptor = topic :: descriptor descriptor } override def getIcon(): Array[Byte] = { ImageUtil.getImage("icon/kafka/WriteToKafka.png") } override def getGroup(): List[String] = { List(StopGroup.KafkaGroup.toString) } override val authorEmail: String = "[email protected]" }
Example 95
Source File: StopBean.scala From piflow with BSD 2-Clause "Simplified" License | 5 votes |
package cn.piflow.conf.bean import java.lang.ClassNotFoundException import cn.piflow.conf.{ConfigurableIncrementalStop, ConfigurableStop} import cn.piflow.conf.util.{ClassUtil, MapUtil} import scala.collection.mutable class StopBean { var flowName : String = _ var uuid : String = _ var name : String = _ var bundle : String = _ var properties : Map[String, String] = _ var customizedProperties : Map[String, String] = _ def init(flowName : String, map:Map[String,Any]) = { this.flowName = flowName this.uuid = MapUtil.get(map,"uuid").asInstanceOf[String] this.name = MapUtil.get(map,"name").asInstanceOf[String] this.bundle = MapUtil.get(map,"bundle").asInstanceOf[String] this.properties = MapUtil.get(map, "properties").asInstanceOf[Map[String, String]] if(map.contains("customizedProperties")){ this.customizedProperties = MapUtil.get(map, "customizedProperties").asInstanceOf[Map[String, String]] }else{ this.customizedProperties = Map[String, String]() } } def constructStop() : ConfigurableStop = { try{ val stop = ClassUtil.findConfigurableStop(this.bundle) //init ConfigurableIncrementalStop if( stop.isInstanceOf[ConfigurableIncrementalStop]){ stop.asInstanceOf[ConfigurableIncrementalStop].init(flowName, name) var startValue : String = stop.asInstanceOf[ConfigurableIncrementalStop].readIncrementalStart() if(startValue == null || startValue == ""){ if(this.properties.contains("incrementalStart")){ startValue = MapUtil.get(this.properties,"incrementalStart").asInstanceOf[String] }else{ throw new Exception("You must set incrementalStart value!") } } //replace the tag of incremental Field in properties val newProperties: scala.collection.mutable.Map[String, String] = scala.collection.mutable.Map() val it = this.properties.keysIterator while(it.hasNext){ val key = it.next() var value = this.properties(key) value = value.replaceAll("#~#", "'" + startValue + "'") newProperties(key) = value } stop.setProperties(newProperties.toMap) }else { stop.setProperties(this.properties) } stop.setCustomizedProperties(this.customizedProperties) stop }catch { case ex : Exception => throw ex } } } object StopBean { def apply(flowName : String, map : Map[String, Any]): StopBean = { val stopBean = new StopBean() stopBean.init(flowName, map) stopBean } }
Example 96
Source File: TestBroadCast.scala From asyspark with MIT License | 5 votes |
package org.apache.spark.examples import org.apache.spark.internal.Logging import org.apache.spark.sql.SparkSession import scala.collection.mutable object TestBroadCast extends Logging{ val sparkSession = SparkSession.builder().appName("test BoradCast").getOrCreate() val sc = sparkSession.sparkContext def main(args: Array[String]): Unit = { // val data = sc.parallelize(Seq(1 until 10000000)) val num = args(args.length - 2).toInt val times = args(args.length -1).toInt println(num) val start = System.nanoTime() val seq =Seq(1 until num) for(i <- 0 until times) { val start2 = System.nanoTime() val bc = sc.broadcast(seq) val rdd = sc.parallelize(1 until 10, 5) rdd.map(_ => bc.value.take(1)).collect() println((System.nanoTime() - start2)/ 1e6 + "ms") } logInfo((System.nanoTime() - start) / 1e6 + "ms") } def testMap(): Unit ={ val smallRDD = sc.parallelize(Seq(1,2,3)) val bigRDD = sc.parallelize(Seq(1 until 20)) bigRDD.mapPartitions { partition => val hashMap = new mutable.HashMap[Int,Int]() for(ele <- smallRDD) { hashMap(ele) = ele } // some operation here partition } } }
Example 97
Source File: Http.scala From AI with Apache License 2.0 | 5 votes |
package com.bigchange.http import com.bigchange.log.CLogger import dispatch.Defaults._ import dispatch._ import scala.collection.mutable import scala.util.{Failure, Success} def post(strUrl:String, parameters:mutable.HashMap[String,String], parse: String): Unit = { val post = url(strUrl) << parameters val response : Future[String] = Http(post OK as.String) response onComplete { case Success(content) => // parse(content) println("post Success content:"+content) case Failure(t) => println("post Failure content:"+t) } } }
Example 98
Source File: AggregateActor.scala From AI with Apache License 2.0 | 5 votes |
package com.bigchange.akka.actor import akka.actor.{ActorRef, UntypedActor} import akka.event.Logging import com.bigchange.akka.message.{ReduceData, Result} import scala.collection.mutable class AggregateActor(resultActor: ActorRef) extends UntypedActor { val finalHashMap = new mutable.HashMap[String, Int]() val log = Logging(context.system, this) @scala.throws[Throwable](classOf[Throwable]) override def onReceive(message: Any): Unit = { message match { case data: String => log.info("Aggregate got message:" + data) log.info("Aggregate ok!") case reduceData:ReduceData => aggregateInMemoryReduce(reduceData.reduceHashMap) println("path:" + sender().path) resultActor ! new Result(finalHashMap) // 给ResultActor发送计算结果 case message:Result => println("AggregateActor:" + message.resultValue.toString()) case _ => log.info("map unhandled message") unhandled(message) } } // 聚合 def aggregateInMemoryReduce(reduceMap: mutable.HashMap[String, Int]): Unit = { var count = 0 reduceMap.foreach(x => { if(finalHashMap.contains(x._1)) { count = x._2 count += finalHashMap.get(x._1).get finalHashMap.put(x._1,count) } else { finalHashMap.put(x._1,x._2) } }) } }
Example 99
Source File: NaiveBayesTest.scala From AI with Apache License 2.0 | 5 votes |
package com.bigchange.test import com.bigchange.datamining.CustomNaiveBayes import scala.collection.mutable import scala.collection.mutable.ListBuffer import scala.io.Source def main(args: Array[String]) { // val Array(dataPath) = args val data = Source.fromFile("src/main/resources/nbData/i100-i500").getLines().toList // 可实现打完包后读取jar包中对应文件数据 val data2 = Source.fromInputStream(this.getClass.getResourceAsStream("src/main/resources/nbData/i100-i500")).getLines().toList // 十折交叉验证(index,List(item1,item2)) val splitData = data.zipWithIndex.map(x => (x._2 % 10,x._1)).groupBy(_._1).mapValues(x => x.map(_._2)) val modelMap = new mutable.HashMap[Int,String]() val model = CustomNaiveBayes.model(0, splitData) var list = List((0,model)) for (id <- 1 until 10) { // 训练 val model = CustomNaiveBayes.model(id, splitData) list = list ::: List((id,model)) } // 分类 val listP = new ListBuffer[(String, Double)] list.foreach(x => { println("model:" + x) val pred = CustomNaiveBayes.predict(Array("health", "moderate", "moderate1", "yes"), x._2) listP.+=(pred) }) println("tobe:"+ listP) println("tobe:"+ listP.max) } }
Example 100
Source File: Stackoverflow58206168.scala From Binding.scala with MIT License | 5 votes |
package com.thoughtworks.binding package regression import org.scalatest.{FreeSpec, Matchers} import Binding._ import scala.collection.mutable import Binding.BindingInstances.functorSyntax._ import org.scalatest.freespec.AnyFreeSpec import org.scalatest.matchers.should.Matchers final class Stackoverflow58206168 extends AnyFreeSpec with Matchers { // See https://stackoverflow.com/questions//binding-scala-vars-bind-seems-to-not-work-correctly "Binding.scala: Vars.bind seems to not work correctly" in { val events = mutable.Buffer.empty[List[Int]] val test: Vars[Int] = Vars(1, 2, 3, 4) test.all.map { events += _.toList }.watch() test.value.append(1111) assert(events == mutable.Buffer(List(1, 2, 3, 4), List(1, 2, 3, 4, 1111))) } }
Example 101
Source File: SolrStreamWriter.scala From spark-solr with Apache License 2.0 | 5 votes |
package com.lucidworks.spark import com.lucidworks.spark.util.{SolrQuerySupport, SolrSupport} import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.execution.streaming.Sink import org.apache.spark.sql.streaming.OutputMode import com.lucidworks.spark.util.ConfigurationConstants._ import org.apache.spark.sql.types.StructType import scala.collection.mutable class SolrStreamWriter( val sparkSession: SparkSession, parameters: Map[String, String], val partitionColumns: Seq[String], val outputMode: OutputMode)( implicit val solrConf : SolrConf = new SolrConf(parameters)) extends Sink with LazyLogging { require(solrConf.getZkHost.isDefined, s"Parameter ${SOLR_ZK_HOST_PARAM} not defined") require(solrConf.getCollection.isDefined, s"Parameter ${SOLR_COLLECTION_PARAM} not defined") val collection : String = solrConf.getCollection.get val zkhost: String = solrConf.getZkHost.get lazy val solrVersion : String = SolrSupport.getSolrVersion(solrConf.getZkHost.get) lazy val uniqueKey: String = SolrQuerySupport.getUniqueKey(zkhost, collection.split(",")(0)) lazy val dynamicSuffixes: Set[String] = SolrQuerySupport.getFieldTypes( Set.empty, SolrSupport.getSolrBaseUrl(zkhost), SolrSupport.getCachedCloudClient(zkhost), collection, skipDynamicExtensions = false) .keySet .filter(f => f.startsWith("*_") || f.endsWith("_*")) .map(f => if (f.startsWith("*_")) f.substring(1) else f.substring(0, f.length-1)) @volatile private var latestBatchId: Long = -1L val acc: SparkSolrAccumulator = new SparkSolrAccumulator val accName = if (solrConf.getAccumulatorName.isDefined) solrConf.getAccumulatorName.get else "Records Written" sparkSession.sparkContext.register(acc, accName) SparkSolrAccumulatorContext.add(accName, acc.id) override def addBatch(batchId: Long, df: DataFrame): Unit = { if (batchId <= latestBatchId) { logger.info(s"Skipping already processed batch $batchId") } else { val rows = df.collect() if (rows.nonEmpty) { val schema: StructType = df.schema val solrClient = SolrSupport.getCachedCloudClient(zkhost) // build up a list of updates to send to the Solr Schema API val fieldsToAddToSolr = SolrRelation.getFieldsToAdd(schema, solrConf, solrVersion, dynamicSuffixes) if (fieldsToAddToSolr.nonEmpty) { SolrRelation.addFieldsForInsert(fieldsToAddToSolr, collection, solrClient) } val solrDocs = rows.toStream.map(row => SolrRelation.convertRowToSolrInputDocument(row, solrConf, uniqueKey)) acc.add(solrDocs.length.toLong) SolrSupport.sendBatchToSolrWithRetry(zkhost, solrClient, collection, solrDocs, solrConf.commitWithin) logger.info(s"Written ${solrDocs.length} documents to Solr collection $collection from batch $batchId") latestBatchId = batchId } } } }
Example 102
Source File: Authentication.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package it.gov.daf.common.authentication import java.util.Date import com.nimbusds.jwt.JWTClaimsSet import org.pac4j.core.profile.{CommonProfile, ProfileManager} import org.pac4j.jwt.config.signature.SecretSignatureConfiguration import org.pac4j.jwt.credentials.authenticator.JwtAuthenticator import org.pac4j.jwt.profile.JwtGenerator import org.pac4j.play.PlayWebContext import org.pac4j.play.store.PlaySessionStore import play.api.Configuration import play.api.mvc.{RequestHeader, Result, Results} import scala.collection.convert.decorateAsScala._ import scala.collection.mutable @SuppressWarnings( Array( "org.wartremover.warts.Throw", "org.wartremover.warts.Var" ) ) object Authentication extends Results { var configuration: Option[Configuration] = None var playSessionStore: Option[PlaySessionStore] = None var secret: Option[String] = None def apply(configuration: Configuration, playSessionStore: PlaySessionStore): Unit = { this.configuration = Some(configuration) this.playSessionStore = Some(playSessionStore) this.secret = this.configuration.flatMap(_.getString("pac4j.jwt_secret")) } def getClaims(requestHeader: RequestHeader): Option[mutable.Map[String, AnyRef]] = { val header: Option[String] = requestHeader.headers.get("Authorization") val token: Option[String] = for { h <- header t <- h.split("Bearer").lastOption } yield t.trim getClaimsFromToken(token) } def getClaimsFromToken(token: Option[String]): Option[mutable.Map[String, AnyRef]] = { val jwtAuthenticator = new JwtAuthenticator() jwtAuthenticator.addSignatureConfiguration(new SecretSignatureConfiguration(secret.getOrElse(throw new Exception("missing secret")))) token.map(jwtAuthenticator.validateTokenAndGetClaims(_).asScala) } def getProfiles(request: RequestHeader): List[CommonProfile] = { val webContext = new PlayWebContext(request, playSessionStore.getOrElse(throw new Exception("missing playSessionStore"))) val profileManager = new ProfileManager[CommonProfile](webContext) profileManager.getAll(true).asScala.toList } def getStringToken: (RequestHeader,Long) => Option[String] = (request: RequestHeader,minutes:Long) => { val generator = new JwtGenerator[CommonProfile](new SecretSignatureConfiguration(secret.getOrElse(throw new Exception("missing secret")))) val profiles = getProfiles(request) val token: Option[String] = profiles.headOption.map(profile => { val expDate = new Date( (new Date).getTime + 1000L*60L*minutes )//*60L*24L val claims = new JWTClaimsSet.Builder().expirationTime(expDate).build() profile.addAttributes(claims.getClaims) generator.generate(profile) }) token } def getToken: (RequestHeader,Long) => Result = (request: RequestHeader, minutes:Long) => { Ok(getStringToken(request,minutes).getOrElse("")) } }
Example 103
Source File: SpotlightLog.scala From dbpedia-spotlight-model with Apache License 2.0 | 5 votes |
package org.dbpedia.spotlight.log import org.apache.commons.logging.{Log, LogFactory} import scala.collection.mutable trait SpotlightLog[T] { def _debug(c:Class[_], msg: T, args: Any*) def _info(c:Class[_], msg: T, args: Any*) def _error(c:Class[_], msg: T, args: Any*) def _fatal(c:Class[_], msg: T, args: Any*) def _trace(c:Class[_], msg: T, args: Any*) def _warn(c:Class[_], msg: T, args: Any*) } object SpotlightLog { def debug[T](c:Class[_], msg: T, args: Any*)(implicit instance: SpotlightLog[T]) = instance._debug(c, msg, args: _*) def info[T](c:Class[_], msg: T, args: Any*)(implicit instance: SpotlightLog[T]) = instance._info(c, msg, args: _*) def error[T](c:Class[_], msg: T, args: Any*)(implicit instance: SpotlightLog[T]) = instance._error(c, msg, args: _*) def fatal[T](c:Class[_], msg: T, args: Any*)(implicit instance: SpotlightLog[T]) = instance._fatal(c, msg, args: _*) def trace[T](c:Class[_], msg: T, args: Any*)(implicit instance: SpotlightLog[T]) = instance._trace(c, msg, args: _*) def warn[T](c:Class[_], msg: T, args: Any*)(implicit instance: SpotlightLog[T]) = instance._warn(c, msg, args: _*) implicit object StringSpotlightLog extends SpotlightLog[String] { val loggers = new mutable.HashMap[Class[_], Log]() def _debug(c:Class[_], msg: String, args: Any*) = { val log = loggers.getOrElseUpdate(c, LogFactory.getLog(c)) if (log.isDebugEnabled) { if(args.size == 0) log.debug(msg) else log.debug(msg.format(args: _*)) } } def _info(c:Class[_], msg: String, args: Any*) = { val log = loggers.getOrElseUpdate(c, LogFactory.getLog(c)) if(log.isInfoEnabled) { if(args.size == 0) log.info(msg) else log.info(msg.format(args: _*)) } } def _error(c:Class[_], msg: String, args: Any*) = { val log = loggers.getOrElseUpdate(c, LogFactory.getLog(c)) if(log.isErrorEnabled) { if(args.size == 0) log.error(msg) else log.error(msg.format(args: _*)) } } def _fatal(c:Class[_], msg: String, args: Any*) = { val log = loggers.getOrElseUpdate(c, LogFactory.getLog(c)) if(log.isFatalEnabled) { if(args.size == 0) log.fatal(msg) else log.fatal(msg.format(args: _*)) } } def _trace(c:Class[_], msg: String, args: Any*) = { val log = loggers.getOrElseUpdate(c, LogFactory.getLog(c)) if(log.isTraceEnabled) { if(args.size == 0) log.trace(msg) else log.trace(msg.format(args: _*)) } } def _warn(c:Class[_], msg: String, args: Any*) = { val log = loggers.getOrElseUpdate(c, LogFactory.getLog(c)) if(log.isWarnEnabled) { if(args.size == 0) log.warn(msg) else log.warn(msg.format(args: _*)) } } } }
Example 104
Source File: GenerativeContextSimilarity.scala From dbpedia-spotlight-model with Apache License 2.0 | 5 votes |
package org.dbpedia.spotlight.db.similarity import org.dbpedia.spotlight.db.model.{ContextStore, TokenTypeStore} import org.dbpedia.spotlight.model.{DBpediaResource, TokenType} import org.dbpedia.spotlight.util.MathUtil import scala.collection.mutable def p(token: TokenType, res: DBpediaResource, cResAndToken: Int): Double = { val pML = if (cResAndToken == 0 || contextStore.getTotalTokenCount(res) == 0 ) 0.0 else cResAndToken.toDouble / contextStore.getTotalTokenCount(res) val ml = MathUtil.lnproduct(MathUtil.ln(lambda), MathUtil.ln(pML)) val lm = MathUtil.lnproduct(MathUtil.ln(1-lambda), pLM(token)) MathUtil.lnsum( lm, if(ml.isNaN) MathUtil.LOGZERO else ml ) } def intersect(query: Seq[TokenType], res: DBpediaResource): Seq[(TokenType, Int)] = { val (tokens, counts) = contextStore.getRawContextCounts(res) if (tokens.length == 0) { query.map( t => (t, 0)) } else { var j = 0 query.map { t: TokenType => while(j < tokens.length-1 && tokens(j) < t.id) { j += 1 } if(tokens(j) == t.id) (t, counts(j)) else (t, 0) } } } def score(query: Seq[TokenType], candidates: Set[DBpediaResource]): mutable.Map[DBpediaResource, Double] = { val contextScores = mutable.HashMap[DBpediaResource, Double]() candidates.map( res => { contextScores.put( res, MathUtil.lnproduct( intersect(query, res).map({ case(token: TokenType, cResAndToken: Int) =>p(token, res, cResAndToken) }) .filter(s => !MathUtil.isLogZero(s)) ) ) }) contextScores } def nilScore(query: Seq[TokenType]): Double = { MathUtil.lnproduct( query.map{ t: TokenType => MathUtil.lnproduct(MathUtil.ln(1-lambda), pLM(t)) } ) } }
Example 105
Source File: JarLoaderEngineHook.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.enginemanager.hook import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.enginemanager.{Engine, EngineHook} import com.webank.wedatasphere.linkis.enginemanager.conf.EngineManagerConfiguration.ENGINE_UDF_APP_NAME import com.webank.wedatasphere.linkis.protocol.engine.RequestEngine import com.webank.wedatasphere.linkis.rpc.Sender import com.webank.wedatasphere.linkis.udf.api.rpc.{RequestUdfTree, ResponseUdfTree} import com.webank.wedatasphere.linkis.udf.entity.{UDFInfo, UDFTree} import org.apache.commons.collections.CollectionUtils import org.apache.commons.io.FileUtils import org.apache.commons.lang.StringUtils import org.codehaus.jackson.map.ObjectMapper import scala.collection.JavaConversions._ import scala.collection.mutable class JarLoaderEngineHook extends EngineHook with Logging{ override def beforeCreateSession(requestEngine: RequestEngine): RequestEngine = { info("start loading UDFs") val udfInfos = extractUdfInfos(requestEngine).filter{info => info.getUdfType == 0 && info.getExpire == false && StringUtils.isNotBlank(info.getPath) && isJarExists(info) && info.getLoad == true } // add to class path val jars = new mutable.HashSet[String]() udfInfos.foreach{udfInfo => jars.add("file://" + udfInfo.getPath)} val jarPaths = jars.mkString(",") if(StringUtils.isBlank(requestEngine.properties.get("jars"))){ requestEngine.properties.put("jars", jarPaths) } else { requestEngine.properties.put("jars", requestEngine.properties.get("jars") + "," + jarPaths) } info("added jars: " + jarPaths) //jars.foreach(fetchRemoteFile) //info("copied jars.") info("end loading UDFs") requestEngine } override def afterCreatedSession(engine: Engine, requestEngine: RequestEngine): Unit = { } protected def isJarExists(udfInfo: UDFInfo) : Boolean = { true // if(FileUtils.getFile(udfInfo.getPath).exists()){ // true // } else { // info(s"The jar file [${udfInfo.getPath}] of UDF [${udfInfo.getUdfName}] doesn't exist, ignore it.") // false // } } protected def extractUdfInfos(requestEngine: RequestEngine): mutable.ArrayBuffer[UDFInfo] = { val udfInfoBuilder = new mutable.ArrayBuffer[UDFInfo] val userName = requestEngine.user val udfTree = queryUdfRpc(userName) extractUdfInfos(udfInfoBuilder, udfTree, userName) udfInfoBuilder } protected def extractUdfInfos(udfInfoBuilder: mutable.ArrayBuffer[UDFInfo], udfTree: UDFTree, userName: String) : Unit = { if(CollectionUtils.isNotEmpty(udfTree.getUdfInfos)){ for(udfInfo <- udfTree.getUdfInfos){ udfInfoBuilder.append(udfInfo) } } if(CollectionUtils.isNotEmpty(udfTree.getChildrens)){ for(child <- udfTree.getChildrens){ var childInfo = child if(TreeType.specialTypes.contains(child.getUserName)){ childInfo = queryUdfRpc(userName, child.getId, child.getUserName) } else { childInfo = queryUdfRpc(userName, child.getId, TreeType.SELF) } extractUdfInfos(udfInfoBuilder, childInfo, userName) } } } private def queryUdfRpc(userName: String, treeId: Long = -1, treeType: String = "self"): UDFTree = { val udfTree = Sender.getSender(ENGINE_UDF_APP_NAME.getValue) .ask(RequestUdfTree(userName, treeType, treeId, "udf")) .asInstanceOf[ResponseUdfTree] .udfTree //info("got udf tree:" + new ObjectMapper().writer().withDefaultPrettyPrinter().writeValueAsString(udfTree)) udfTree } }
Example 106
Source File: EventGroupFactory.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.resourcemanager.schedule import com.webank.wedatasphere.linkis.common.ServiceInstance import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.resourcemanager.event.metric.MetricRMEvent import com.webank.wedatasphere.linkis.scheduler.queue.parallelqueue.ParallelGroup import com.webank.wedatasphere.linkis.scheduler.queue.{Group, GroupFactory, SchedulerEvent} import scala.collection.mutable abstract class EventGroupFactory extends GroupFactory { def getGroupNameByModule(moduleInstance: ServiceInstance): String def getGroupNameByUser(user: String): String } class EventGroupFactoryImpl extends EventGroupFactory with Logging { private val groupMap = new mutable.HashMap[String, Group]() private val RM_CONTEXT_CONSTRUCTOR_LOCK = new Object() private val maxGroupNum = 100 def getInitCapacity(groupName: String): Int = 100 def getMaxCapacity(groupName: String): Int = 1000 def getBKDRHash2(str: String): Int = { val seed: Int = 131 var hash: Int = 0 for (i <- 0 to str.length - 1) { hash = hash * seed + str.charAt(i) hash = hash & 0x7FFFFFFF info("current hash code result is " + hash.toString) } return hash } override def getGroupNameByModule(moduleInstance: ServiceInstance) = { //val inputStr = moduleInstance.ip+moduleInstance.port.toString+moduleInstance.moduleName //val hash = getBKDRHash2(inputStr) val hash = moduleInstance.hashCode() val groupName = hash % maxGroupNum groupName.toString } override def getGroupNameByUser(user: String) = { //val hash = getBKDRHash2(user) val hash = user.hashCode val groupName = hash % maxGroupNum groupName.toString } override def getOrCreateGroup(groupName: String) = { RM_CONTEXT_CONSTRUCTOR_LOCK.synchronized { if (groupMap.get(groupName).isDefined) { groupMap.get(groupName).get } else { val group = new ParallelGroup(groupName, getInitCapacity(groupName), getMaxCapacity(groupName)) groupMap.put(groupName, group) group } } } override def getGroupNameByEvent(event: SchedulerEvent) = { event match { case metricRMEvent: MetricRMEvent => { "METRIC" } case _ => { val hash = event.hashCode val groupName = hash % maxGroupNum groupName.toString } } } }
Example 107
Source File: EventConsumerManager.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.resourcemanager.schedule import java.util.concurrent.ExecutorService import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils} import com.webank.wedatasphere.linkis.resourcemanager.event.RMEvent import com.webank.wedatasphere.linkis.scheduler.SchedulerContext import com.webank.wedatasphere.linkis.scheduler.listener.ConsumerListener import com.webank.wedatasphere.linkis.scheduler.queue.{ConsumerManager, LoopArrayQueue} import scala.collection.mutable override def getOrCreateConsumer(groupName: String) = { RM_CONTEXT_CONSTRUCTOR_LOCK.synchronized { var tmpConsumer = consumerGroupMap.get(groupName).getOrElse(null) if (tmpConsumer == null) { tmpConsumer = createConsumer(groupName) } tmpConsumer } } override protected def createConsumer(groupName: String) = { val group = schedulerContext.getOrCreateGroupFactory.getOrCreateGroup(groupName) val consumer = new RMEventConsumer(schedulerContext, getOrCreateExecutorService, group) consumer.start() val listener = new RMConsumerListenerImpl listener.setConsumer(consumer) consumer.setConsumeQueue(new LoopArrayQueue(group)) consumer.setRmConsumerListener(listener) consumerGroupMap.put(groupName, consumer) consumerListenerMap.put(groupName, listener) if (consumerListener != null) consumerListener.onConsumerCreated(consumer) consumer } protected def createConsumerFromConsumer(oldConsumer: RMEventConsumer) = { var newConsumer: RMEventConsumer = null if (oldConsumer != null) { info("Create new consumer from old consumer " + oldConsumer.getGroup.getGroupName) val groupName = oldConsumer.getGroup.getGroupName val group = schedulerContext.getOrCreateGroupFactory.getOrCreateGroup(groupName) newConsumer = new RMEventConsumer(schedulerContext, getOrCreateExecutorService, group) newConsumer.start() val listener = new RMConsumerListenerImpl listener.setConsumer(newConsumer) newConsumer.setConsumeQueue(oldConsumer.getConsumeQueue) newConsumer.setRmConsumerListener(listener) consumerListenerMap.update(groupName, listener) if (consumerListener != null) consumerListener.onConsumerCreated(newConsumer) } newConsumer } override def destroyConsumer(groupName: String) = { val tmpConsumer = consumerGroupMap.get(groupName).getOrElse(null) if (tmpConsumer != null) { tmpConsumer.shutdown() consumerGroupMap.remove(groupName) if (consumerListener != null) consumerListener.onConsumerDestroyed(tmpConsumer) } } override def shutdown() = { Utils.tryThrow({ consumerGroupMap.values.toArray.foreach(x => x.shutdown()) executorService.shutdown() })(t => new Exception("ConsumerManager shutdown exception", t)) } override def listConsumers() = consumerGroupMap.values.toArray override def getOrCreateExecutorService: ExecutorService = { RM_CONTEXT_CONSTRUCTOR_LOCK.synchronized { if (executorService == null) { executorService = Utils.newCachedThreadPool(3 * maxParallelismUsers + 1, "Engine-Scheduler-ThreadPool-", true) executorService } else { executorService } } } }
Example 108
Source File: DefaultUserMetaData.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.resourcemanager.service.metadata import java.util import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.protocol.config.RequestQueryAppConfigWithGlobal import com.webank.wedatasphere.linkis.protocol.utils.ProtocolUtils import com.webank.wedatasphere.linkis.resourcemanager.ResourceRequestPolicy._ import com.webank.wedatasphere.linkis.resourcemanager._ import com.webank.wedatasphere.linkis.resourcemanager.exception.RMWarnException import com.webank.wedatasphere.linkis.resourcemanager.utils.RMConfiguration._ import org.springframework.beans.factory.annotation.Autowired import org.springframework.stereotype.Component import scala.collection.mutable @Component class DefaultUserMetaData extends UserMetaData with Logging { @Autowired var moduleResourceRecordService: ModuleResourceRecordService = _ override def getUserAvailableResource(moduleName: String, user: String, creator: String): (UserAvailableResource, UserAvailableResource) = { val policy = moduleResourceRecordService.getModulePolicy(moduleName) val appName = ProtocolUtils.getAppName(moduleName).getOrElse(moduleName) val userModuleAvailableResource = UserAvailableResource(moduleName, generateResource(policy, UserConfiguration.getCacheMap(RequestQueryAppConfigWithGlobal(user, null, appName, true)))) val userCreatorAvailableResource = UserAvailableResource(moduleName, generateResource(policy, UserConfiguration.getCacheMap(RequestQueryAppConfigWithGlobal(user, creator, appName, true)))) info(s"$user available resource of module:$userModuleAvailableResource,on creator available resource:$userCreatorAvailableResource") (userModuleAvailableResource, userCreatorAvailableResource) } override def getUserGlobalInstanceLimit(user: String): Int = { val userConfiguration = UserConfiguration.getCacheMap(RequestQueryAppConfigWithGlobal(user, null, null, true)) USER_AVAILABLE_INSTANCE.getValue(userConfiguration) } def generateResource(policy: ResourceRequestPolicy, userConfiguration: util.Map[String, String]): Resource = policy match { case CPU => new CPUResource(USER_AVAILABLE_CPU.getValue(userConfiguration)) case Memory => new MemoryResource(USER_AVAILABLE_MEMORY.getValue(userConfiguration).toLong) case Load => new LoadResource(USER_AVAILABLE_MEMORY.getValue(userConfiguration).toLong, USER_AVAILABLE_CPU.getValue(userConfiguration)) case Instance => new InstanceResource(USER_AVAILABLE_INSTANCE.getValue(userConfiguration)) case LoadInstance => new LoadInstanceResource(USER_AVAILABLE_MEMORY.getValue(userConfiguration).toLong, USER_AVAILABLE_CPU.getValue(userConfiguration), USER_AVAILABLE_INSTANCE.getValue(userConfiguration)) case Yarn => new YarnResource(USER_AVAILABLE_YARN_INSTANCE_MEMORY.getValue(userConfiguration).toLong, USER_AVAILABLE_YARN_INSTANCE_CPU.getValue(userConfiguration), USER_AVAILABLE_YARN_INSTANCE.getValue(userConfiguration), USER_AVAILABLE_YARN_QUEUE_NAME.getValue(userConfiguration)) case DriverAndYarn => new DriverAndYarnResource(new LoadInstanceResource(USER_AVAILABLE_MEMORY.getValue(userConfiguration).toLong, USER_AVAILABLE_CPU.getValue(userConfiguration), USER_AVAILABLE_INSTANCE.getValue(userConfiguration)), new YarnResource(USER_AVAILABLE_YARN_INSTANCE_MEMORY.getValue(userConfiguration).toLong, USER_AVAILABLE_YARN_INSTANCE_CPU.getValue(userConfiguration), USER_AVAILABLE_YARN_INSTANCE.getValue(userConfiguration), USER_AVAILABLE_YARN_QUEUE_NAME.getValue(userConfiguration))) case Special => new SpecialResource(new java.util.HashMap[String, AnyVal]()) case _ => throw new RMWarnException(111003, "not supported resource result policy ") } override def getUserModuleInfo(moduleName: String, user: String): Map[String, Any] = { val appName = ProtocolUtils.getAppName(moduleName).getOrElse(moduleName) val userConfiguration = UserConfiguration.getCacheMap(RequestQueryAppConfigWithGlobal(user, null, appName, true)) val userModuleInfo = new mutable.HashMap[String, Any]() userModuleInfo.put("waitUsed", USER_MODULE_WAIT_USED.getValue(userConfiguration)) userModuleInfo.put("waitReleased", USER_MODULE_WAIT_RELEASE.getValue(userConfiguration)) userModuleInfo.toMap } }
Example 109
Source File: ZookeeperDistributedQueue.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.resourcemanager.notify import java.util.Collections import com.webank.wedatasphere.linkis.common.utils.Logging import org.apache.zookeeper.ZooDefs.Ids import org.apache.zookeeper.{CreateMode, KeeperException, ZKUtil, ZooKeeper} import scala.collection.JavaConversions._ import scala.collection.mutable class ZookeeperDistributedQueue(zk: ZooKeeper, var queueName: String) extends DistributedQueue[Array[Byte]] with Logging { if (!queueName.startsWith("/")) queueName = "/" + queueName try if (zk.exists(queueName, false) == null) zk.create(queueName, new Array[Byte](0), Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT) catch { case e: KeeperException => error(s"Failed to create queue[$queueName]: ", e) } override def offer(value: Array[Byte]): Unit = { zk.create(queueName + "/element", value, Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT_SEQUENTIAL) } override def poll(): Array[Byte] = { val path = head() if (path == null) return null val value = zk.getData(path, false, null) zk.delete(path, -1) value } override def peek(): Array[Byte] = { val path = head() if (path == null) return null zk.getData(path, false, null) } override def destroy(): Unit = { try if (zk.exists(queueName, false) == null) info(s"Queue[$queueName] already destroyed.") else ZKUtil.deleteRecursive(zk, queueName) catch { case e: KeeperException => error(s"Failed to destroy queue[$queueName]: ", e) } } private def head(): String = { val elements = zk.getChildren(queueName, false) if (elements.size == 0) return null Collections.sort(elements) queueName + "/" + elements.get(0) } override def copyToArray(): Array[Array[Byte]] = { val elements = zk.getChildren(queueName, false) if (elements.size == 0) return new Array[Array[Byte]](0) elements.map({ e => zk.getData(queueName + "/" + e, false, null) }).toArray } def indexOf(bytes: Array[Byte]): String = { val elements = zk.getChildren(queueName, false) elements.find(e => bytes.equals(zk.getData(queueName + "/" + e, false, null))).getOrElse("") } def copyToMap(): mutable.Map[String, Array[Byte]] = { val resultMap = mutable.Map.empty[String, Array[Byte]] val elements = zk.getChildren(queueName, false) if (elements.size == 0) return resultMap elements.map(e => resultMap.put(e, zk.getData(queueName + "/" + e, false, null))) resultMap } def remove(index: String) = if (index.length != 0) zk.delete(queueName + "/" + index, -1) } object ZookeeperDistributedQueue { def apply(queueName: String): ZookeeperDistributedQueue = new ZookeeperDistributedQueue(ZookeeperUtils.getOrCreateZookeeper(), queueName) def apply(zk: ZooKeeper, queueName: String): ZookeeperDistributedQueue = new ZookeeperDistributedQueue(zk, queueName) }
Example 110
Source File: package.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis import java.util import javax.servlet.http.HttpServletRequest import com.webank.wedatasphere.linkis.common.exception.{ErrorException, ExceptionManager, FatalException, WarnException} import com.webank.wedatasphere.linkis.common.utils.Utils import com.webank.wedatasphere.linkis.server.exception.{BDPServerErrorException, NonLoginException} import com.webank.wedatasphere.linkis.server.security.SecurityFilter import org.apache.commons.lang.StringUtils import org.apache.commons.lang.exception.ExceptionUtils import org.slf4j.Logger import scala.collection.{JavaConversions, mutable} package object server { val EXCEPTION_MSG = "errorMsg" type JMap[K, V] = java.util.HashMap[K, V] implicit def getUser(req: HttpServletRequest): String = SecurityFilter.getLoginUsername(req) def validateFailed(message: String): Message = Message(status = 2).setMessage(message) def validate[T](json: util.Map[String, T], keys: String*): Unit = { keys.foreach(k => if(!json.contains(k) || json.get(k) == null || StringUtils.isEmpty(json.get(k).toString)) throw new BDPServerErrorException(11001, s"Verification failed, $k cannot be empty!(验证失败,$k 不能为空!)")) } def error(message: String): Message = Message.error(message) implicit def ok(msg: String): Message = Message.ok(msg) implicit def error(t: Throwable): Message = Message.error(t) implicit def error(e: (String, Throwable)): Message = Message.error(e) implicit def error(msg: String, t: Throwable): Message = Message.error(msg -> t) // def tryCatch[T](tryOp: => T)(catchOp: Throwable => T): T = Utils.tryCatch(tryOp)(catchOp) // def tryCatch(tryOp: => Message)(catchOp: Throwable => Message): Message = Utils.tryCatch(tryOp){ // case nonLogin: NonLoginException => Message.noLogin(msg = nonLogin.getMessage) // case t => catchOp(t) // } def catchMsg(tryOp: => Message)(msg: String)(implicit log: Logger): Message = Utils.tryCatch(tryOp){ case fatal: FatalException => log.error("Fatal Error, system exit...", fatal) System.exit(fatal.getErrCode) Message.error("Fatal Error, system exit...") case nonLogin: NonLoginException => val message = Message.noLogin(nonLogin.getMessage) message.data(EXCEPTION_MSG, nonLogin.toMap) message case error: ErrorException => val cause = error.getCause val errorMsg = cause match { case t: ErrorException => s"error code(错误码): ${t.getErrCode}, error message(错误信息): ${t.getDesc}." case _ => s"error code(错误码): ${error.getErrCode}, error message(错误信息): ${error.getDesc}." } log.error(errorMsg, error) val message = Message.error(errorMsg) message.data(EXCEPTION_MSG, error.toMap) message case warn: WarnException => val warnMsg = s"Warning code(警告码): ${warn.getErrCode}, Warning message(警告信息): ${warn.getDesc}." log.warn(warnMsg, warn) val message = Message.warn(warnMsg) message.data(EXCEPTION_MSG, warn.toMap) message case t => log.error(msg, t) val errorMsg = ExceptionUtils.getRootCauseMessage(t) val message = if(StringUtils.isNotEmpty(errorMsg) && "operation failed(操作失败)" != msg) error(msg + "!the reason(原因):" + errorMsg) else if(StringUtils.isNotEmpty(errorMsg)) error(errorMsg) else error(msg) message.data(EXCEPTION_MSG, ExceptionManager.unknownException(message.getMessage)) } def catchIt(tryOp: => Message)(implicit log: Logger): Message = catchMsg(tryOp)("operation failed(操作失败)s") implicit def toScalaBuffer[T](list: util.List[T]): mutable.Buffer[T] = JavaConversions.asScalaBuffer(list) implicit def toScalaMap[K, V](map: util.Map[K, V]): mutable.Map[K, V] = JavaConversions.mapAsScalaMap(map) implicit def toJavaList[T](list: mutable.Buffer[T]): util.List[T] = { val arrayList = new util.ArrayList[T] list.foreach(arrayList.add) arrayList } implicit def toJavaMap[K, V](map: mutable.Map[K, V]): JMap[K, V] = { val hashMap = new util.HashMap[K, V]() map.foreach(m => hashMap.put(m._1, m._2)) hashMap } implicit def toJavaMap[K, V](map: Map[K, V]): JMap[K, V] = { val hashMap = new util.HashMap[K, V]() map.foreach(m => hashMap.put(m._1, m._2)) hashMap } implicit def asString(mapWithKey: (util.Map[String, Object], String)): String = mapWithKey._1.get(mapWithKey._2).asInstanceOf[String] implicit def getString(mapWithKey: (util.Map[String, String], String)): String = mapWithKey._1.get(mapWithKey._2) implicit def asInt(map: util.Map[String, Object], key: String): Int = map.get(key).asInstanceOf[Int] implicit def asBoolean(mapWithKey: (util.Map[String, Object], String)): Boolean = mapWithKey._1.get(mapWithKey._2).asInstanceOf[Boolean] }
Example 111
Source File: ParallelConsumerManager.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.scheduler.queue.parallelqueue import java.util.concurrent.ExecutorService import com.webank.wedatasphere.linkis.common.utils.Utils import com.webank.wedatasphere.linkis.scheduler.listener.ConsumerListener import com.webank.wedatasphere.linkis.scheduler.queue._ import com.webank.wedatasphere.linkis.scheduler.queue.fifoqueue.FIFOUserConsumer import scala.collection.mutable class ParallelConsumerManager(maxParallelismUsers: Int)extends ConsumerManager{ private val UJES_CONTEXT_CONSTRUCTOR_LOCK = new Object() private var consumerListener: Option[ConsumerListener] = None private var executorService: ExecutorService = _ private val consumerGroupMap = new mutable.HashMap[String, FIFOUserConsumer]() override def setConsumerListener(consumerListener: ConsumerListener) = { this.consumerListener = Some(consumerListener) } override def getOrCreateExecutorService = if(executorService != null) executorService else UJES_CONTEXT_CONSTRUCTOR_LOCK.synchronized { if (executorService == null) { executorService = Utils.newCachedThreadPool(5 * maxParallelismUsers + 1, "Engine-Scheduler-ThreadPool-", true) } executorService } override def getOrCreateConsumer(groupName: String) = if(consumerGroupMap.contains(groupName)) consumerGroupMap(groupName) else UJES_CONTEXT_CONSTRUCTOR_LOCK.synchronized { consumerGroupMap.getOrElse(groupName, { val newConsumer = createConsumer(groupName) val group = getSchedulerContext.getOrCreateGroupFactory.getOrCreateGroup(groupName) newConsumer.setGroup(group) newConsumer.setConsumeQueue(new LoopArrayQueue(group)) consumerGroupMap.put(groupName, newConsumer) consumerListener.foreach(_.onConsumerCreated(newConsumer)) newConsumer.start() newConsumer }) } override protected def createConsumer(groupName: String) = { val group = getSchedulerContext.getOrCreateGroupFactory.getOrCreateGroup(groupName) new FIFOUserConsumer(getSchedulerContext, getOrCreateExecutorService, group) } override def destroyConsumer(groupName: String) = consumerGroupMap.get(groupName).foreach { tmpConsumer => tmpConsumer.shutdown() consumerGroupMap.remove(groupName) consumerListener.foreach(_.onConsumerDestroyed(tmpConsumer)) } override def shutdown() = { consumerGroupMap.iterator.foreach(x => x._2.shutdown()) } override def listConsumers() = consumerGroupMap.values.toArray }
Example 112
Source File: ParallelGroupFactory.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.scheduler.queue.parallelqueue import com.webank.wedatasphere.linkis.scheduler.queue.{Group, GroupFactory, Job, SchedulerEvent} import scala.collection.mutable class ParallelGroupFactory extends GroupFactory{ private val groupMap = new mutable.HashMap[String, Group]() def getInitCapacity(groupName: String): Int= 100 def getMaxCapacity(groupName: String): Int = 1000 private val UJES_CONTEXT_CONSTRUCTOR_LOCK = new Object() override def getOrCreateGroup(groupName: String) = { UJES_CONTEXT_CONSTRUCTOR_LOCK.synchronized { if (groupMap.get(groupName).isDefined) { groupMap.get(groupName).get } else { val group = new ParallelGroup(groupName, getInitCapacity(groupName), getMaxCapacity(groupName)) groupMap.put(groupName, group) group } } } override def getGroupNameByEvent(event: SchedulerEvent) = { val belongList = groupMap.values.filter(x => x.belongTo(event)).map(x => x.getGroupName).toList if(belongList.size > 0){ belongList(0) }else{ "NULL" } } }
Example 113
Source File: FIFOGroupFactory.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.scheduler.queue.fifoqueue import com.webank.wedatasphere.linkis.scheduler.queue.{Group, GroupFactory, Job, SchedulerEvent} import scala.collection.mutable class FIFOGroupFactory extends GroupFactory { private val groupMap = new mutable.HashMap[String, Group]() private val UJES_CONTEXT_CONSTRUCTOR_LOCK = new Object() //Obtained from the database(从数据库获取) def getInitCapacity(groupName: String): Int = 1000 def getMaxCapacity(groupName: String): Int = 10000 override def getOrCreateGroup(groupName: String) = { UJES_CONTEXT_CONSTRUCTOR_LOCK.synchronized { if (groupMap.get(groupName).isDefined) { groupMap.get(groupName).get } else { val group = new FIFOGroup(groupName, getInitCapacity(groupName), getMaxCapacity(groupName)) groupMap.put(groupName, group) group } } } override def getGroupNameByEvent(event: SchedulerEvent) = "FIFOGROUP" }
Example 114
Source File: DWCArgumentsParser.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.common.conf import org.apache.commons.lang.StringUtils import scala.collection.{JavaConversions, mutable} import scala.collection.mutable.ArrayBuffer object DWCArgumentsParser { protected val DWC_CONF = "--dwc-conf" protected val SPRING_CONF = "--spring-conf" private var dwcOptionMap = Map.empty[String, String] private[linkis] def setDWCOptionMap(dwcOptionMap: Map[String, String]) = this.dwcOptionMap = dwcOptionMap def getDWCOptionMap = dwcOptionMap def parse(args: Array[String]): DWCArgumentsParser = { val keyValueRegex = "([^=]+)=(.+)".r var i = 0 val optionParser = new DWCArgumentsParser while(i < args.length) { args(i) match { case DWC_CONF | SPRING_CONF => args(i + 1) match { case keyValueRegex(key, value) => optionParser.setConf(args(i), key, value) i += 1 case _ => throw new IllegalArgumentException("illegal commond line, format: --conf key=value.") } case _ => throw new IllegalArgumentException(s"illegal commond line, ${args(i)} cannot recognize.") } i += 1 } optionParser.validate() optionParser } def formatToArray(optionParser: DWCArgumentsParser): Array[String] = { val options = ArrayBuffer[String]() def write(confMap: Map[String, String], optionType: String): Unit = confMap.foreach { case (key, value) => if (StringUtils.isNotEmpty(key) && StringUtils.isNotEmpty(value)) { options += optionType options += (key + "=" + value) } } write(optionParser.getDWCConfMap, DWC_CONF) write(optionParser.getSpringConfMap, SPRING_CONF) options.toArray } def formatToArray(springOptionMap: Map[String, String], dwcOptionMap: Map[String, String]): Array[String] = formatToArray(new DWCArgumentsParser().setSpringConf(springOptionMap).setDWCConf(dwcOptionMap)) def format(optionParser: DWCArgumentsParser): String = formatToArray(optionParser).mkString(" ") def format(springOptionMap: Map[String, String], dwcOptionMap: Map[String, String]): String = formatToArray(springOptionMap, dwcOptionMap).mkString(" ") def formatSpringOptions(springOptionMap: Map[String, String]): Array[String] = { val options = ArrayBuffer[String]() springOptionMap.foreach { case (key, value) => if (StringUtils.isNotEmpty(key) && StringUtils.isNotEmpty(value)) { options += ("--" + key + "=" + value) } } options.toArray } } class DWCArgumentsParser { import DWCArgumentsParser._ private val dwcOptionMap = new mutable.HashMap[String, String]() private val springOptionMap = new mutable.HashMap[String, String]() def getSpringConfMap = springOptionMap.toMap def getSpringConfs = JavaConversions.mapAsJavaMap(springOptionMap) def getDWCConfMap = dwcOptionMap.toMap def setConf(optionType: String, key: String, value: String) = { optionType match { case DWC_CONF => dwcOptionMap += key -> value case SPRING_CONF => springOptionMap += key -> value } this } def setSpringConf(optionMap: Map[String, String]): DWCArgumentsParser = { if(optionMap != null) this.springOptionMap ++= optionMap this } def setDWCConf(optionMap: Map[String, String]): DWCArgumentsParser = { if(optionMap != null) this.dwcOptionMap ++= optionMap this } def validate() = {} }
Example 115
Source File: AkkaIntroduction.scala From reactive-machine-learning-systems with MIT License | 5 votes |
package com.reactivemachinelearning import akka.actor.SupervisorStrategy.Restart import akka.actor._ import scala.collection.mutable import scala.util.Random object AkkaIntroduction extends App { val system = ActorSystem("voting") val connection = new DatabaseConnection("http://remotedatabase") val writerProps = Props(new VoteWriter(connection)) val writerSuperProps = Props(new WriterSupervisor(writerProps)) val votingSystem = system.actorOf(writerSuperProps) votingSystem ! Vote(1, 5, "nom nom") votingSystem ! Vote(2, 7, "Mikey") votingSystem ! Vote(3, 9, "nom nom") println(connection.votes) } case class Vote(timestamp: Long, voterId: Long, howler: String) class VoteWriter(connection: DatabaseConnection) extends Actor { def receive = { case Vote(timestamp, voterId, howler) => connection.insert(Map("timestamp" -> timestamp, "voterId" -> voterId, "howler" -> howler)) } } class WriterSupervisor(writerProps: Props) extends Actor { override def supervisorStrategy = OneForOneStrategy() { case exception: Exception => Restart } val writer = context.actorOf(writerProps) def receive = { case message => writer forward message } } class DatabaseConnection(url: String) { var votes = new mutable.HashMap[String, Any]() def insert(updateMap: Map[String, Any]) = { if (Random.nextBoolean()) throw new Exception updateMap.foreach { case (key, value) => votes.update(key, value) } } }
Example 116
Source File: _10_MutableCollections.scala From LearningScala with Apache License 2.0 | 5 votes |
package _020_collections object _10_MutableCollections { def main(args: Array[String]): Unit = { println("===== List buffers =====") listBufferExample() println() println("===== Array buffers =====") println(arrayBufferExample()) println() println("===== Mutable Sets =====") mutableSetExample() println() println("===== Mutable Maps =====") mutableMapExample() } private def mutableMapExample(): Unit = { import scala.collection.mutable val map = mutable.Map.empty[String, Int] println(map) map("hello") = 1 map("there") = 2 println(map) println(map("hello")) println("======") val nums = mutable.Map("i" -> 1, "ii" -> 2) println(nums) nums += ("vi" -> 6) println(nums) nums -= "ii" println(nums) nums ++= List("iii" -> 3, "v" -> 5) println(nums) nums --= List("i", "ii") println(nums) println("=====") println(s"nums.size: ${nums.size}") print("nums.contains(\"ii\"): ") println(nums.contains("ii")) print("nums(\"iii\"): ") println(nums("iii")) println(s"nums.keys ==> ${nums.keys}") println(s"nums.keySet ==> ${nums.keySet}") println(s"nums.values ==> ${nums.values}") println(s"nums.isEmpty: ${nums.isEmpty}") } def arrayBufferExample(): List[Int] = { import scala.collection.mutable.ArrayBuffer val ab = ArrayBuffer[Int](10, 20) ab += 30 ab += 40 ab.prepend(5) ab.toList //return immutable } private def listBufferExample(): Unit = { import scala.collection.mutable.ListBuffer val listBuffer = new ListBuffer[Int] listBuffer += 1 listBuffer += 2 println(listBuffer) 3 +=: listBuffer println(listBuffer) val list = listBuffer.toList println(list) } private def mutableSetExample(): Unit = { import scala.collection.mutable val emptySet = mutable.Set.empty[Int] println(emptySet) val nums = mutable.Set(1, 2, 3) println(nums) nums += 5 println(nums) nums -= 3 println(nums) nums ++= List(5, 6) println(nums) nums --= List(1, 2) println(nums) println(nums & Set(1, 3, 5, 7)) // intersection of two sets nums.clear() println(nums) } }
Example 117
Source File: _07_CaughtUpInClosures.scala From LearningScala with Apache License 2.0 | 5 votes |
package _970_scala_puzzlers import scala.collection.mutable object _07_CaughtUpInClosures { val accessors1: mutable.Buffer[() => Int] = mutable.Buffer.empty[() => Int] val accessors2: mutable.Buffer[() => Int] = mutable.Buffer.empty[() => Int] val data = Seq(100, 110, 120) var j = 0 for (i <- data.indices) { accessors1 += (() => data(i)) accessors2 += (() => data(j)) j += 1 } def main(args: Array[String]): Unit = { accessors1.foreach(a1 => println(a1())) // accessors2.foreach(a2 => println(a2())) // throws java.lang.IndexOutOfBoundsException: 3 println("\n===== Solution =====\n") Solution.accessors1.foreach(a1 => println(a1())) Solution.accessors2.foreach(a2 => println(a2())) } object Solution { val accessors1: mutable.Buffer[() => Int] = mutable.Buffer.empty[() => Int] val accessors2: mutable.Buffer[() => Int] = mutable.Buffer.empty[() => Int] val data = Seq(100, 110, 120) var j = 0 for (i <- data.indices) { val currentJ = j accessors1 += (() => data(i)) accessors2 += (() => data(currentJ)) j += 1 } } }
Example 118
Source File: CoreUnitTest.scala From SparkUnitTestingExamples with Apache License 2.0 | 5 votes |
package com.cloudera.sa.spark.unittest.core import org.apache.spark.{SparkConf, SparkContext} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} import scala.collection.mutable class CoreUnitTest extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll{ @transient var sc: SparkContext = null override def beforeAll(): Unit = { val envMap = Map[String,String](("Xmx", "512m")) val sparkConfig = new SparkConf() sparkConfig.set("spark.broadcast.compress", "false") sparkConfig.set("spark.shuffle.compress", "false") sparkConfig.set("spark.shuffle.spill.compress", "false") sparkConfig.set("spark.io.compression.codec", "lzf") sc = new SparkContext("local[2]", "unit test", sparkConfig) } override def afterAll(): Unit = { sc.stop() } test("Test word count") { val quotesRDD = sc.parallelize(Seq("Courage is not simply one of the virtues, but the form of every virtue at the testing point", "We have a very active testing community which people don't often think about when you have open source", "Program testing can be used to show the presence of bugs, but never to show their absence", "Simple systems are not feasible because they require infinite testing", "Testing leads to failure, and failure leads to understanding")) val wordCountRDD = quotesRDD.flatMap(r => r.split(' ')). map(r => (r.toLowerCase, 1)). reduceByKey((a,b) => a + b) val wordMap = new mutable.HashMap[String, Int]() wordCountRDD.take(100). foreach{case(word, count) => wordMap.put(word, count)} //Note this is better then foreach(r => wordMap.put(r._1, r._2) assert(wordMap.get("to").get == 4, "The word count for 'to' should had been 4 but it was " + wordMap.get("to").get) assert(wordMap.get("testing").get == 5, "The word count for 'testing' should had been 5 but it was " + wordMap.get("testing").get) assert(wordMap.get("is").get == 1, "The word count for 'is' should had been 1 but it was " + wordMap.get("is").get) } }
Example 119
Source File: package.scala From magnolify with Apache License 2.0 | 5 votes |
package magnolify import scala.collection.{mutable, Factory} import scala.util.hashing.MurmurHash3 package object shims { trait Monadic[F[_]] extends mercator.Monadic[F] { def flatMapS[A, B](from: F[A])(fn: A => F[B]): F[B] def mapS[A, B](from: F[A])(fn: A => B): F[B] override def flatMap[A, B](from: F[A])(fn: A => F[B]): F[B] = flatMapS(from)(fn) override def map[A, B](from: F[A])(fn: A => B): F[B] = mapS(from)(fn) } trait FactoryCompat[-A, +C] extends Serializable { def newBuilder: mutable.Builder[A, C] def build(xs: IterableOnce[A]): C = newBuilder.addAll(xs).result() } object FactoryCompat { implicit def fromFactory[A, C](implicit f: Factory[A, C]): FactoryCompat[A, C] = new FactoryCompat[A, C] { override def newBuilder: mutable.Builder[A, C] = f.newBuilder } } object SerializableCanBuildFroms val JavaConverters = scala.jdk.CollectionConverters object MurmurHash3Compat { def seed(data: Int): Int = MurmurHash3.mix(MurmurHash3.productSeed, data) } }
Example 120
Source File: package.scala From magnolify with Apache License 2.0 | 5 votes |
package magnolify import scala.collection.generic.CanBuildFrom import scala.collection.mutable import scala.language.higherKinds import scala.reflect.ClassTag import scala.util.hashing.MurmurHash3 package object shims { trait Monadic[F[_]] extends mercator.Monadic[F] { def flatMapS[A, B](from: F[A])(fn: A => F[B]): F[B] def mapS[A, B](from: F[A])(fn: A => B): F[B] override def flatMap[A, B](from: F[A])(fn: A => F[B]): F[B] = flatMapS(from)(fn) override def map[A, B](from: F[A])(fn: A => B): F[B] = mapS(from)(fn) } trait FactoryCompat[-A, +C] extends Serializable { def newBuilder: mutable.Builder[A, C] def build(xs: TraversableOnce[A]): C = (newBuilder ++= xs).result() } object FactoryCompat extends LowPriorityFactoryCompat1 { private type FC[A, C] = FactoryCompat[A, C] def apply[A, C](f: () => mutable.Builder[A, C]): FC[A, C] = new FactoryCompat[A, C] { override def newBuilder: mutable.Builder[A, C] = f() } implicit def arrayFC[A: ClassTag] = FactoryCompat(() => Array.newBuilder[A]) // Deprecated in 2.13 // implicit def traversableFC[A] = FactoryCompat(() => Traversable.newBuilder[A]) // List <: Iterable // implicit def iterableFC[A] = FactoryCompat(() => Iterable.newBuilder[A]) // List <: Seq // implicit def seqFC[A] = FactoryCompat(() => Seq.newBuilder[A]) // Vector <: IndexedSeq // implicit def indexedSeqFC[A] = FactoryCompat(() => IndexedSeq.newBuilder[A]) } trait LowPriorityFactoryCompat1 extends LowPriorityFactoryCompat2 { implicit def listFC[A] = FactoryCompat(() => List.newBuilder[A]) } trait LowPriorityFactoryCompat2 { implicit def vectorFC[A] = FactoryCompat(() => Vector.newBuilder[A]) // Deprecated in 2.13 // implicit def streamFC[A] = FactoryCompat(() => Stream.newBuilder[A]) } object SerializableCanBuildFroms { private def cbf[A, C](f: () => mutable.Builder[A, C]): CanBuildFrom[C, A, C] = new CanBuildFrom[C, A, C] with Serializable { override def apply(from: C): mutable.Builder[A, C] = f() override def apply(): mutable.Builder[A, C] = f() } implicit def arrayCBF[A: ClassTag] = cbf(() => Array.newBuilder[A]) implicit def traversableCBF[A] = cbf(() => Traversable.newBuilder[A]) implicit def iterableCBF[A] = cbf(() => Iterable.newBuilder[A]) implicit def seqCBF[A] = cbf(() => Seq.newBuilder[A]) implicit def indexedSeqCBF[A] = cbf(() => IndexedSeq.newBuilder[A]) implicit def listCBF[A] = cbf(() => List.newBuilder[A]) implicit def vectorCBF[A] = cbf(() => Vector.newBuilder[A]) implicit def streamCBF[A] = cbf(() => Stream.newBuilder[A]) } val JavaConverters = scala.collection.JavaConverters object MurmurHash3Compat { def seed(data: Int): Int = MurmurHash3.productSeed } }
Example 121
Source File: LogsDefinition.scala From algoliasearch-client-scala with MIT License | 5 votes |
package algolia.definitions import algolia.http.{GET, HttpPayload} import algolia.objects.RequestOptions import algolia.responses.LogType import scala.collection.mutable case class LogsDefinition( offset: Option[Int] = None, length: Option[Int] = None, `type`: Option[LogType] = None, requestOptions: Option[RequestOptions] = None ) extends Definition { type T = LogsDefinition def offset(o: Int): LogsDefinition = copy(offset = Some(o)) def length(l: Int): LogsDefinition = copy(length = Some(l)) def `type`(t: LogType): LogsDefinition = copy(`type` = Some(t)) override def options(requestOptions: RequestOptions): LogsDefinition = copy(requestOptions = Some(requestOptions)) override private[algolia] def build(): HttpPayload = { val queryParameters = mutable.Map[String, String]() offset.map { o => queryParameters.put("offset", o.toString) } length.map { l => queryParameters.put("length", l.toString) } `type`.map { t => queryParameters.put("type", t.name) } HttpPayload( GET, Seq("1", "logs"), queryParameters = Some(queryParameters.toMap), isSearch = false, requestOptions = requestOptions ) } }
Example 122
Source File: IdentList.scala From boopickle with Apache License 2.0 | 5 votes |
package boopickle import scala.collection.mutable private[boopickle] final class IdentListBig(first: IdentList.Entry, size: Int) extends IdentList { // transform the linked list into an array buffer val b = mutable.ArrayBuffer.newBuilder[AnyRef] b.sizeHint(size) var e = first while (e != null) { b += e.obj e = e.next } val entries = b.result() override def apply(idx: Int): AnyRef = { entries(idx) } override def updated(obj: AnyRef): IdentList = { entries += obj this } }
Example 123
Source File: TestAppender.scala From stryker4s with Apache License 2.0 | 5 votes |
package stryker4s.testutil import org.apache.logging.log4j.core._ import org.apache.logging.log4j.core.appender.AbstractAppender import org.apache.logging.log4j.core.config.Property import org.apache.logging.log4j.core.config.plugins._ import scala.collection.mutable import scala.collection.mutable.ListBuffer object TestAppender { val events: mutable.Map[String, ListBuffer[LogEvent]] = new mutable.HashMap[String, ListBuffer[LogEvent]]().withDefaultValue(ListBuffer.empty) def reset(implicit threadName: String): Unit = events(threadName).clear() @PluginFactory def createAppender( @PluginAttribute("name") name: String, @PluginElement("Filter") filter: Filter ): TestAppender = new TestAppender(name, filter) } @Plugin(name = "TestAppender", category = Core.CATEGORY_NAME, elementType = Appender.ELEMENT_TYPE) class TestAppender(name: String, filter: Filter) extends AbstractAppender(name, filter, null, true, Property.EMPTY_ARRAY) { override def append(eventObject: LogEvent): Unit = { // Needs to call .toImmutable because the same object is given every time, with only a mutated message val _ = TestAppender.events(eventObject.getThreadName) += eventObject.toImmutable } }
Example 124
Source File: DecoupledHandler.scala From chisel-gui with BSD 3-Clause "New" or "Revised" License | 5 votes |
// See README.md for license details. package visualizer.models import scala.collection.mutable object DecoupledHandler { val ReadyName = "_ready" val ValidName = "_valid" val BitsName = "_bits_" val decoupledNames = new mutable.HashSet[String] case class Updater(pattern: String, add: (DecoupledHandler, String) => Unit, isMatch: String => Int) def hasPattern(pattern: String)(s: String): Int = s.indexOf(pattern) def hasPatternAtEnd(pattern: String)(s: String): Int = if (s.endsWith(pattern)) s.indexOf(pattern) else -1 val updaters = Seq( Updater(ReadyName, (d, s) => d.readyNameOpt = Some(s), hasPatternAtEnd(ReadyName)), Updater(ValidName, (d, s) => d.validNameOpt = Some(s), hasPatternAtEnd(ValidName)), Updater(BitsName, (d, s) => d.bits += s, hasPattern(BitsName)) ) var _indexId: Long = -1L def assignIndex(): Long = { _indexId += 1L _indexId } val signalNameToDecouple: mutable.HashMap[String, DecoupledHandler] = new mutable.HashMap() def prefix(s: String, index: Int): String = { s.take(index) } def lookForReadyValidBundles(names: Seq[String]): Unit = { names.sorted.foreach { symbolName => for (updater <- updaters) { val index = updater.isMatch(symbolName) if (index > 0) { val prefix = symbolName.take(index) val decoupledHandler = signalNameToDecouple.getOrElseUpdate(prefix, apply(prefix)) updater.add(decoupledHandler, symbolName) decoupledNames += symbolName } } } // signalNameToDecouple.retain { case (key, d) => d.readyNameOpt.isDefined && d.validNameOpt.isDefined } signalNameToDecouple.retain { case (key, d) => d.validNameOpt.isDefined } } def apply(prefix: String): DecoupledHandler = { DecoupledHandler(assignIndex(), prefix) } } case class DecoupledHandler(indexId: Long, prefix: String) { var readyNameOpt: Option[String] = None var validNameOpt: Option[String] = None val bits: mutable.ArrayBuffer[String] = new mutable.ArrayBuffer() def getChildNames: Seq[String] = { bits ++ readyNameOpt ++ validNameOpt } }
Example 125
Source File: EnumManager.scala From chisel-gui with BSD 3-Clause "New" or "Revised" License | 5 votes |
package visualizer.models import chisel3.experimental.EnumAnnotations.{EnumComponentAnnotation, EnumDefAnnotation} import firrtl.AnnotationSeq import firrtl.annotations.{CircuitName, ComponentName, ModuleName, Named} import firrtl.options.InputAnnotationFileAnnotation import firrtl.options.phases.GetIncludes import treadle.TreadleTester import scala.collection.mutable object EnumManager { val typeNames: mutable.HashSet[String] = new mutable.HashSet() val targetToTypeName: mutable.HashMap[Named, String] = new mutable.HashMap() val definitions: mutable.HashMap[String, mutable.HashMap[BigInt, String]] = new mutable.HashMap() val signalNameToDefinition: mutable.HashMap[String, mutable.HashMap[BigInt, String]] = new mutable.HashMap() def init(annotationSeq: AnnotationSeq,dataModel: DataModel, tester: TreadleTester): Unit = { val myAnnos = (new GetIncludes).transform(annotationSeq.filter(_.isInstanceOf[InputAnnotationFileAnnotation])) myAnnos.foreach { case EnumDefAnnotation(typeName, definition) => val map = definitions.getOrElseUpdate(typeName, new mutable.HashMap()) map ++= definition.map { case (name, value) => value -> name } case EnumComponentAnnotation(target, typeName) => typeNames += typeName targetToTypeName(target) = typeName case _ => // irrelevant annotation } val engine = tester.engine val symbolTable = engine.symbolTable targetToTypeName.keys.foreach { case tt @ ComponentName(componentName, ModuleName(annoModuleName, _)) => symbolTable.instanceNameToModuleName.foreach { case (instanceName, moduleName) => if (annoModuleName == moduleName) { // this little bit of trickery is because for treadle top level signals don't carry a module name val enumWireName = if (instanceName.isEmpty) { componentName } else { instanceName + "." + componentName } dataModel.nameToSignal.get(enumWireName) match { case Some(_) => signalNameToDefinition(enumWireName) = definitions(targetToTypeName(tt)) case _ => } } } } } def hasEnumDefinition(signalName: String): Boolean = { signalNameToDefinition.contains(signalName) } def getDefinition(signalName: String): Option[mutable.HashMap[BigInt, String]] = { signalNameToDefinition.get(signalName) } }
Example 126
Source File: DecoupledFireRestrictor.scala From chisel-gui with BSD 3-Clause "New" or "Revised" License | 5 votes |
// See README.md for license details. package visualizer.models import org.scalatest.{FreeSpec, Matchers} import scala.collection.mutable object DecoupledFireRestrictor { case class Interval(start: Long, end: Long, value: BigInt) def buildTimeVector(buffer: mutable.ArrayBuffer[Transition]): List[Interval] = { val b = buffer.toList.sliding(2) val newList = b.flatMap { case transition1 :: transition2 :: Nil => List( Interval(transition1.timestamp, transition2.timestamp, transition1.value), Interval(transition1.timestamp, transition2.timestamp, transition1.value) ) case transition :: Nil => List.empty } newList }.toList } class DecoupledFireRestrictorTest extends FreeSpec with Matchers { "select from an array based on values" in {} }
Example 127
Source File: NGrams.scala From featran with Apache License 2.0 | 5 votes |
package com.spotify.featran.transformers import com.spotify.featran.FeatureBuilder import scala.collection.{mutable, SortedMap} def fromSettings( setting: Settings ): Transformer[Seq[String], Set[String], SortedMap[String, Int]] = NGrams(setting.name) } private[featran] class NGrams(name: String, val low: Int, val high: Int, val sep: String) extends NHotEncoder(name, false) { override def prepare(a: Seq[String]): Set[String] = ngrams(a).toSet override def buildFeatures( a: Option[Seq[String]], c: SortedMap[String, Int], fb: FeatureBuilder[_] ): Unit = super.buildFeatures(a.map(ngrams), c, fb) private[transformers] def ngrams(a: Seq[String]): Seq[String] = { val max = if (high == -1) a.length else high val b = Seq.newBuilder[String] var i = low while (i <= max) { if (i == 1) { b ++= a } else if (i <= a.size) { val q = mutable.Queue[String]() var j = 0 val it = a.iterator while (j < i) { q.enqueue(it.next()) j += 1 } b += mkNGram(q, sep) while (it.hasNext) { q.dequeue() q.enqueue(it.next()) b += mkNGram(q, sep) } } i += 1 } b.result() } private def mkNGram(xs: mutable.Queue[String], sep: String): String = { val sb = StringBuilder.newBuilder val i = xs.iterator sb.append(i.next()) while (i.hasNext) { sb.append(sep).append(i.next()) } sb.mkString } }
Example 128
Source File: MDLPDiscretizer.scala From featran with Apache License 2.0 | 5 votes |
package com.spotify.featran.transformers.mdl import scala.collection.JavaConverters._ import scala.collection.mutable import scala.reflect.ClassTag private[transformers] class MDLPDiscretizer[T: ClassTag]( data: Seq[(T, Double)], stoppingCriterion: Double = MDLPDiscretizer.DefaultStoppingCriterion, minBinPercentage: Double = MDLPDiscretizer.DefaultMinBinPercentage ) extends Serializable { private val labels = { val m = mutable.Map.empty[T, Int] data.foreach { case (k, _) => if (!m.contains(k)) { m(k) = m.size } } m } private def isBoundary(f1: Array[Long], f2: Array[Long]): Boolean = { val l = math.min(f1.length, f2.length) var count = 0 var i = 0 while (i < l && count <= 1) { if (f1(i) + f2(i) != 0) { count += 1 } i += 1 } count > 1 } private def midpoint(x1: Float, x2: Float): Float = (x1 + x2) / 2.0f def discretize(maxBins: Int = MDLPDiscretizer.DefaultMaxBins): Seq[Double] = { val featureValues = new java.util.TreeMap[Float, Array[Long]]() data.foreach { case (label, value) => val key = value.toFloat val i = labels(label) val x = featureValues.get(key) if (x == null) { val y = Array.fill(labels.size)(0L) y(i) = 1L featureValues.put(key, y) } else { x(i) += 1L } } val cutPoint = if (!featureValues.isEmpty) { val it = featureValues.asScala.iterator var (lastX, lastFreqs) = it.next() var result = List.empty[(Float, Array[Long])] var accumFreqs = lastFreqs while (it.hasNext) { val (x, freqs) = it.next() if (isBoundary(freqs, lastFreqs)) { result = (midpoint(x, lastX), accumFreqs) :: result accumFreqs = Array.fill(labels.size)(0L) } lastX = x lastFreqs = freqs MDLUtil.plusI(accumFreqs, freqs) } (lastX, accumFreqs) :: result } else { Nil } val minBinWeight: Long = (minBinPercentage * data.length / 100.0).toLong val finder = new ThresholdFinder(labels.size, stoppingCriterion, maxBins, minBinWeight) finder.findThresholds(cutPoint.sortBy(_._1)).map(_.toDouble) } } private[transformers] object MDLPDiscretizer { val DefaultStoppingCriterion: Double = 0.0 val DefaultMinBinPercentage: Double = 0.0 val DefaultMaxBins: Int = 50 }
Example 129
Source File: CanBuild.scala From featran with Apache License 2.0 | 5 votes |
package com.spotify.featran import scala.collection.mutable import scala.reflect.ClassTag // Workaround for CanBuildFrom not serializable trait CanBuild[T, M[_]] extends Serializable { def apply(): mutable.Builder[T, M[T]] } object CanBuild { // Collection types in _root_.scala.* implicit def iterableCB[T]: CanBuild[T, Iterable] = new CanBuild[T, Iterable] { override def apply(): mutable.Builder[T, Iterable[T]] = Iterable.newBuilder } implicit def seqCB[T]: CanBuild[T, Seq] = new CanBuild[T, Seq] { override def apply(): mutable.Builder[T, Seq[T]] = Seq.newBuilder } implicit def indexedSeqCB[T]: CanBuild[T, IndexedSeq] = new CanBuild[T, IndexedSeq] { override def apply(): mutable.Builder[T, IndexedSeq[T]] = IndexedSeq.newBuilder } implicit def listCB[T]: CanBuild[T, List] = new CanBuild[T, List] { override def apply(): mutable.Builder[T, List[T]] = List.newBuilder } implicit def vectorCB[T]: CanBuild[T, Vector] = new CanBuild[T, Vector] { override def apply(): mutable.Builder[T, Vector[T]] = Vector.newBuilder } implicit def bufferCB[T]: CanBuild[T, mutable.Buffer] = new CanBuild[T, mutable.Buffer] { override def apply(): mutable.Builder[T, mutable.Buffer[T]] = mutable.Buffer.newBuilder } implicit def floatArrayCB: CanBuild[Float, Array] = new CanBuild[Float, Array] { override def apply(): mutable.Builder[Float, Array[Float]] = Array.newBuilder[Float] } implicit def doubleArrayCB: CanBuild[Double, Array] = new CanBuild[Double, Array] { override def apply(): mutable.Builder[Double, Array[Double]] = Array.newBuilder[Double] } implicit def arrayCB[T: ClassTag]: CanBuild[T, Array] = new CanBuild[T, Array] { override def apply(): mutable.Builder[T, Array[T]] = Array.newBuilder[T] } }
Example 130
Source File: CollectionType.scala From featran with Apache License 2.0 | 5 votes |
package com.spotify.featran import simulacrum._ import scala.collection.mutable import scala.reflect.ClassTag @typeclass trait CollectionType[M[_]] { def pure[A, B: ClassTag](ma: M[A])(a: B): M[B] def map[A, B: ClassTag](ma: M[A])(f: A => B): M[B] def reduce[A](ma: M[A])(f: (A, A) => A): M[A] def cross[A, B: ClassTag](ma: M[A])(mb: M[B]): M[(A, B)] } object CollectionType { implicit def scalaCollectionType[M[_]](implicit cb: CanBuild[_, M], ti: M[_] => Iterable[_] ): CollectionType[M] = new CollectionType[M] { override def map[A, B: ClassTag](ma: M[A])(f: A => B): M[B] = { val builder = cb().asInstanceOf[mutable.Builder[B, M[B]]] ma.asInstanceOf[Iterable[A]].foreach(a => builder += f(a)) builder.result() } override def pure[A, B: ClassTag](ma: M[A])(b: B): M[B] = { val builder = cb().asInstanceOf[mutable.Builder[B, M[B]]] builder += b builder.result() } override def reduce[A](ma: M[A])(f: (A, A) => A): M[A] = { val builder = cb().asInstanceOf[mutable.Builder[A, M[A]]] if (ma.nonEmpty) { builder += ma.asInstanceOf[Iterable[A]].reduce(f) } builder.result() } override def cross[A, B: ClassTag](ma: M[A])(mb: M[B]): M[(A, B)] = { val builder = cb().asInstanceOf[mutable.Builder[(A, B), M[(A, B)]]] if (mb.nonEmpty) { val b = mb.asInstanceOf[Iterable[B]].head ma.asInstanceOf[Iterable[A]].foreach(a => builder += ((a, b))) } builder.result() } } implicit val arrayCollectionType: CollectionType[Array] = new CollectionType[Array] { override def pure[A, B: ClassTag](ma: Array[A])(b: B): Array[B] = Array(b) override def map[A, B: ClassTag](ma: Array[A])(f: A => B): Array[B] = ma.map(f) override def reduce[A](ma: Array[A])(f: (A, A) => A): Array[A] = { // workaround for "No ClassTag available for A" val r = ma.take(1) r(0) = ma.reduce(f) r } override def cross[A, B: ClassTag](ma: Array[A])(mb: Array[B]): Array[(A, B)] = ma.map((_, mb.head)) } }
Example 131
Source File: RichArrayBuffer.scala From swave with Mozilla Public License 2.0 | 5 votes |
package swave.core.util import scala.annotation.tailrec import scala.collection.mutable final class RichArrayBuffer[A](val underlying: mutable.ArrayBuffer[A]) extends AnyVal { def inplaceSortBy[B](f: A ⇒ B)(implicit ord: Ordering[B]): Unit = { val buf = underlying.asInstanceOf[mutable.ArrayBuffer[AnyRef]] val array = buf.toArray java.util.Arrays.sort(array, ord.on(f).asInstanceOf[Ordering[AnyRef]]) buf.clear() buf ++= array () } def removeWhere(f: A ⇒ Boolean): Unit = { @tailrec def rec(ix: Int): Unit = if (ix >= 0) { if (f(underlying(ix))) underlying.remove(ix) rec(ix - 1) } rec(underlying.size - 1) } def removeIfPresent(elem: A): Unit = underlying.indexOf(elem) match { case -1 ⇒ case ix ⇒ { underlying.remove(ix); () } } }
Example 132
Source File: package.scala From swave with Mozilla Public License 2.0 | 5 votes |
package swave.core import java.nio.charset.Charset import com.typesafe.config.Config import scala.concurrent.duration._ import scala.concurrent.Future import scala.collection.mutable import shapeless.HList package object util { private[this] val _identityFunc = (x: Any) ⇒ x def identityFunc[T]: T ⇒ T = _identityFunc.asInstanceOf[T ⇒ T] def identityHash(obj: AnyRef): String = Integer.toHexString(System.identityHashCode(obj)) val dropFunc: Any ⇒ Unit = _ ⇒ () val dropFunc2: (Any, Any) ⇒ Unit = (_, _) ⇒ () val oneIntFunc: Any ⇒ Int = _ ⇒ 1 val UTF8: Charset = Charset.forName("UTF-8") val ASCII: Charset = Charset.forName("US-ASCII") def isPowerOf2(i: Int): Boolean = Integer.lowestOneBit(i) == i def roundUpToPowerOf2(i: Int): Int = 1 << (32 - Integer.numberOfLeadingZeros(i - 1)) def Runnable(body: ⇒ Unit): Runnable = new Runnable { def run(): Unit = body } implicit def richByteArray(array: Array[Byte]): RichByteArray = new RichByteArray(array) implicit def richConfig[T](config: Config): RichConfig = new RichConfig(config) implicit def richDuration(duration: Duration): RichDuration = new RichDuration(duration) implicit def richFiniteDuration(duration: FiniteDuration): RichFiniteDuration = new RichFiniteDuration(duration) implicit def richFuture[T](future: Future[T]): RichFuture[T] = new RichFuture(future) implicit def richHList[L <: HList](list: L): RichHList[L] = new RichHList(list) implicit def richInt(int: Int): RichInt = new RichInt(int) implicit def richList[T](list: List[T]): RichList[T] = new RichList(list) implicit def richLong(long: Long): RichLong = new RichLong(long) implicit def richArrayBuffer[T](seq: mutable.ArrayBuffer[T]): RichArrayBuffer[T] = new RichArrayBuffer(seq) implicit def richRefArray[T <: AnyRef](array: Array[T]): RichRefArray[T] = new RichRefArray(array) implicit def richSeq[T](seq: Seq[T]): RichSeq[T] = new RichSeq(seq) implicit def richString(string: String): RichString = new RichString(string) implicit def richTraversable[T](seq: Traversable[T]): RichTraversable[T] = new RichTraversable(seq) }
Example 133
Source File: PrefixAndTailStage.scala From swave with Mozilla Public License 2.0 | 5 votes |
package swave.core.impl.stages.inout import scala.collection.mutable import swave.core.impl.stages.spout.SubSpoutStage import swave.core.impl.{Inport, Outport} import swave.core.impl.stages.InOutStage import swave.core.macros._ import swave.core._ // format: OFF @StageImplementation private[core] final class PrefixAndTailStage(prefixSize: Int, prefixBuilder: mutable.Builder[Any, AnyRef]) extends InOutStage { requireArg(prefixSize > 0, "`prefixSize` must be > 0") def kind = Stage.Kind.InOut.PrefixAndTail(prefixSize) connectInOutAndSealWith { (in, out) ⇒ region.impl.registerForXStart(this) running(in, out) } def running(in: Inport, out: Outport) = { def awaitingXStart() = state( xStart = () => { in.request(prefixSize.toLong) assemblingPrefix(prefixSize.toLong, false) }) def draining(in: Inport, sub: Outport) = state( intercept = false, request = requestF(in), cancel = stopCancelF(in), onNext = onNextF(sub), onComplete = stopCompleteF(sub), onError = stopErrorF(sub)) }
Example 134
Source File: Infrastructure.scala From swave with Mozilla Public License 2.0 | 5 votes |
package swave.core.graph.impl import scala.collection.mutable import swave.core.graph.{Digraph, Glyph} import swave.core.util._ private[graph] object Infrastructure { type Edge = (Node, Node) final class Node(val id: Int, val vertex: Any) { val preds = new mutable.ArrayBuffer[Node] val succs = new mutable.ArrayBuffer[Node] def isSingle = preds.isEmpty && succs.isEmpty def isRoot = preds.isEmpty def isLeaf = succs.isEmpty def isInOut = preds.size == 1 && succs.size == 1 def isFanIn = preds.size > 1 def isFanOut = succs.size > 1 var isHidden = false var desCount = -1 var inDegree = -1 var xRank: XRank = _ val glyphs = new mutable.ArrayBuffer[Glyph] var attributes = List.empty[AnyRef] override def toString = s"Node(vertex=$vertex, id=$id, rankGroup=${if (xRank != null && xRank.group != null) xRank.group.groupId else "null"}, " + s"attrs=${attributes.mkString("[", ",", "]")}, " + s"preds=${preds.map(_.id).mkString("[", ",", "]")}, " + s"succs=${succs.map(_.id).mkString("[", ",", "]")}" + (if (isHidden) ", hidden)" else ")") def partialCopyWith(newVertex: Any): Node = { val n = new Node(id, newVertex) n.isHidden = isHidden n.desCount = desCount n.inDegree = inDegree n.xRank = xRank glyphs.foreach(g ⇒ n.glyphs += g) n.attributes = attributes n } } final class XRank(val id: Int) { var group: XRankGroup = _ var level = -1 // smaller values -> lay out to the left, higher values -> lay out to the right var preds = List.empty[XRank] var succs = List.empty[XRank] override def toString = s"XRank(id=$id, group=${group.groupId}, level=$level, " + s"preds=[${preds.map(_.id).mkString(",")}], succs=[${succs.map(_.id).mkString(",")}])" } final class XRankGroup(var groupId: Int) { override def equals(that: Any): Boolean = that.isInstanceOf[XRankGroup] && that.asInstanceOf[XRankGroup].groupId == groupId override def hashCode() = groupId } type EdgeAttrs = Map[Edge, Digraph.EdgeAttributes] implicit class RichEdgeAttrs(val underlying: EdgeAttrs) extends AnyVal { def get(edge: Edge): Digraph.EdgeAttributes = underlying.getOrElse(edge, 0) def has(edge: Edge, attrs: Digraph.EdgeAttributes): Boolean = (get(edge) & attrs) != 0 def add(edge: Edge, attrs: Digraph.EdgeAttributes): EdgeAttrs = underlying.updated(edge, get(edge) | attrs) def move(sourceEdge: Edge, targetEdges: List[Edge], filter: Int = Digraph.EdgeAttributes.All): EdgeAttrs = underlying.get(sourceEdge) match { case None ⇒ underlying case Some(flags) ⇒ val filtered = flags & filter val map = if (filtered != 0) targetEdges.foldLeft(underlying)(_ add (_, filtered)) else underlying map - sourceEdge } def printAll() = { for ((edge, flags) ← underlying) println(format(edge) + ": " + flags) println() } } val Root: AnyRefExtractor[Node, Seq[Node]] = AnyRefExtractor(n ⇒ if (n.isRoot) n.succs else null) val Leaf: AnyRefExtractor[Node, Seq[Node]] = AnyRefExtractor(n ⇒ if (n.isLeaf) n.preds else null) val InOut: AnyRefExtractor[Node, (Node, Node)] = AnyRefExtractor(n ⇒ if (n.isInOut) n.preds.head → n.succs.head else null) def format(edge: Edge) = s"[${edge._1.id} -> ${edge._2.id}]" }
Example 135
Source File: XRanking.scala From swave with Mozilla Public License 2.0 | 5 votes |
package swave.core.graph.impl import scala.annotation.tailrec import scala.collection.mutable import Infrastructure._ private[graph] object XRanking { def assignXRanks(rootNodes: Vector[Node], allNodes: Vector[Node]): Unit = { // STEP 1: partition the nodes into group which have identical XRanks // by assigning them the same XRank instance val ranks = new mutable.ArrayBuffer[XRank]; { def createXRank(): XRank = { val rank = new XRank(ranks.size) ranks += rank rank } def _visit(node: Node, rank: XRank): Unit = visit(node, rank) @tailrec def visit(node: Node, rank: XRank): Unit = if (node.xRank eq null) { val nodeRank = if (node.isFanIn) createXRank() else rank node.xRank = nodeRank (node.succs: Seq[Node]) match { case Nil ⇒ // leaf, just backtrack case Seq(next) ⇒ visit(next, nodeRank) case succs ⇒ succs.foreach(_visit(_, createXRank())) } } for (root ← rootNodes) visit(root, createXRank()) } // STEP 2: connect the created XRank instances with edges whereby // an edge from rank `a` to rank `b` means "a should be laid out to the left of b" // which results in the XRank instances being structured into a graph forest { val connectRanks: (Node, Node) ⇒ Node = { (a, b) ⇒ a.xRank.succs ::= b.xRank b.xRank.preds ::= a.xRank b } for (node ← allNodes) { if (node.isFanIn) node.preds.reduceLeft(connectRanks) if (node.isFanOut) node.succs.reduceLeft(connectRanks) } } // STEP3: identify the connected parts of the rank forest (i.e. the rank graphs) // and mark all nodes of a connected part with the same (value equality) XRankGroup { val groupIds = Iterator.from(0) def assingGroup(rank: XRank, group: XRankGroup): Unit = if (rank.group eq null) { rank.group = group rank.preds.foreach(assingGroup(_, group)) } else group.groupId = rank.group.groupId // merge the two groups ranks.withFilter(_.succs.isEmpty).foreach(assingGroup(_, new XRankGroup(groupIds.next()))) } // STEP4: for each XRankGroup: apply a simple layering algorithm { val bitSet = new mutable.BitSet(ranks.size) for (groupRanks ← ranks.groupBy(_.group).valuesIterator) { def assignLevel(rank: XRank, level: Int): Unit = if (!bitSet.contains(rank.id)) { bitSet += rank.id if (level > rank.level) rank.level = level rank.succs.foreach(assignLevel(_, level + 1)) bitSet -= rank.id () } // else println("XRank crossing!") groupRanks.withFilter(_.preds.isEmpty).foreach(assignLevel(_, 0)) def compactLevels(rank: XRank): Boolean = !bitSet.contains(rank.id) && { bitSet += rank.id val minSubRank = if (rank.succs.nonEmpty) rank.succs.minBy(_.level).level else 0 val progress = rank.level < minSubRank - 1 && { rank.level = minSubRank - 1; true } val result = rank.succs.foldRight(progress)(compactLevels(_) || _) bitSet -= rank.id result } val leafs = groupRanks.filter(_.succs.isEmpty) while (leafs.foldRight(false)(compactLevels(_) || _)) () } } } }
Example 136
Source File: ProxyCrawler.scala From ProxyCrawler with Apache License 2.0 | 5 votes |
package org.crowdcrawler.proxycrawler import java.io.IOException import java.net.URI import java.security.cert.X509Certificate import com.typesafe.scalalogging.Logger import org.apache.http.client.methods.HttpGet import org.apache.http.impl.client.HttpClients import org.apache.http.ssl.{TrustStrategy, SSLContexts} import org.apache.http.conn.ssl.{NoopHostnameVerifier, SSLConnectionSocketFactory} import org.apache.http.util.EntityUtils import org.crowdcrawler.proxycrawler.crawler.plugins.AbstractPlugin import org.apache.http.HttpHeaders import org.slf4j.LoggerFactory import scala.collection.immutable import scala.collection.mutable class ProxyCrawler(plugins: List[AbstractPlugin]) { *;q=0.8"), (HttpHeaders.ACCEPT_ENCODING, "gzip, deflate, sdch"), (HttpHeaders.ACCEPT_LANGUAGE, "en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4"), (HttpHeaders.CONNECTION, "keep-alive") ) private val CLIENT = { // trust all certificates including self-signed certificates val sslContext = SSLContexts.custom().loadTrustMaterial(null, new TrustStrategy() { def isTrusted(chain: Array[X509Certificate], authType: String) = true }).build() val connectionFactory = new SSLConnectionSocketFactory(sslContext, NoopHostnameVerifier.INSTANCE) HttpClients.custom().setSSLSocketFactory(connectionFactory).build() } def apply(classNames: String*): ProxyCrawler = { val plugins = mutable.ListBuffer.empty[AbstractPlugin] for (className <- classNames) { val clazz = Class.forName("org.crowdcrawler.proxycrawler.crawler.plugins." + className) plugins += clazz.newInstance().asInstanceOf[AbstractPlugin] } new ProxyCrawler(plugins.toList) } private def createRequest(uri: URI, headers: immutable.Map[String, String]): HttpGet = { val request = new HttpGet(uri) for (header <- headers) { request.setHeader(header._1, header._2) } request } }
Example 137
Source File: IpcnOrgPlugin.scala From ProxyCrawler with Apache License 2.0 | 5 votes |
package org.crowdcrawler.proxycrawler.crawler.plugins import org.crowdcrawler.proxycrawler.ProxyInfo import org.jsoup.Jsoup import java.net.URI import java.nio.charset.Charset import scala.collection.mutable final class IpcnOrgPlugin extends AbstractPlugin { val seeds: List[URI] = List( new URI("http://proxy.ipcn.org/proxylist.html"), new URI("http://proxy.ipcn.org/proxylist2.html") ) def extract(html: String): List[ProxyInfo] = { val result = mutable.ListBuffer.empty[ProxyInfo] val doc = Jsoup.parse(html) val preText = doc.select("tr > td > pre").text val rows = preText.split("\n") for (row <- rows) { if (row.matches("[0-9]+(?:\\.[0-9]+){3}:[0-9]+")) { val splitted = row.split(":") val host = splitted(0) val port = splitted(1).toInt result += ProxyInfo(host, port, "HTTP", 0, null, null) } } result.toList } def next(html: String): List[URI] = List() override val responseCharset: Charset = Charset.forName("GB2312") }
Example 138
Source File: CoolProxyNetPlugin.scala From ProxyCrawler with Apache License 2.0 | 5 votes |
package org.crowdcrawler.proxycrawler.crawler.plugins import org.crowdcrawler.proxycrawler.ProxyInfo import org.jsoup.Jsoup import java.net.URI import java.nio.charset.StandardCharsets import sun.misc.BASE64Decoder import scala.collection.mutable import scala.collection.JavaConversions._ import util.control.Breaks._ class CoolProxyNetPlugin extends AbstractPlugin { private final val decoder: BASE64Decoder = new BASE64Decoder val seeds: List[URI] = List(new URI("http://www.cool-proxy.net/proxies/http_proxy_list/page:1")) private def decryptIP(ip: String): String = { val base64Encoded = new StringBuilder for (ch <- ip) { val newChar = if (Character.isAlphabetic(ch)) { if (ch.toLower < 'n') (ch + 13).toChar else (ch - 13).toChar } else { ch } base64Encoded += newChar } val bytes = decoder.decodeBuffer(base64Encoded.toString()) new String(bytes, StandardCharsets.UTF_8) } def extract(html: String): List[ProxyInfo] = { val result = mutable.ListBuffer.empty[ProxyInfo] val doc = Jsoup.parse(html) val rows = doc.select("table > tbody > tr") for (row <- rows) { breakable { val tds = row.select("td") if (tds.isEmpty) break val host = { val hostTmp = tds.get(0).html val startWith = "Base64.decode(str_rot13(\"" val start = hostTmp.indexOf(startWith) if (start == -1) break val end = hostTmp.indexOf("\")))", start) if (end == -1) break val hostEncrypted = hostTmp.substring(start + startWith.length, end) decryptIP(hostEncrypted) } val port = tds.get(1).text.toInt val location = tds.get(3).text val speed = tds.get(8).text.toInt result.add(ProxyInfo(host, port, "HTTP", speed, location, null)) } } result.toList } def next(html: String): List[URI] = { val result = mutable.ListBuffer.empty[URI] val doc = Jsoup.parse(html) val rows = doc.select(".pagination > span > a[href]") for (row <- rows) { val href = row.attr("href") result += new URI("http://www.cool-proxy.net" + href) } result.toList } }
Example 139
Source File: CnProxyComPlugin.scala From ProxyCrawler with Apache License 2.0 | 5 votes |
package org.crowdcrawler.proxycrawler.crawler.plugins import org.crowdcrawler.proxycrawler.ProxyInfo import org.jsoup.Jsoup import java.net.URI import java.nio.charset.Charset import scala.collection.{immutable,mutable} import util.control.Breaks._ private val charNum = immutable.Map( "v" -> "3", "m" -> "4", "a" -> "2", "l" -> "9", "q" -> "0", "b" -> "5", "i" -> "7", "w" -> "6", "r" -> "8", "c" -> "1" ) val seeds: List[URI] = { List( new URI("http://www.cnproxy.com/proxy1.html"), new URI("http://www.cnproxy.com/proxy2.html"), new URI("http://www.cnproxy.com/proxy3.html"), new URI("http://www.cnproxy.com/proxy4.html"), new URI("http://www.cnproxy.com/proxy5.html"), new URI("http://www.cnproxy.com/proxy6.html"), new URI("http://www.cnproxy.com/proxy7.html"), new URI("http://www.cnproxy.com/proxy8.html"), new URI("http://www.cnproxy.com/proxy9.html"), new URI("http://www.cnproxy.com/proxy10.html"), new URI("http://www.cnproxy.com/proxyedu1.html"), new URI("http://www.cnproxy.com/proxyedu2.html") ) } private def decryptPort(encrypted: String): Int = encrypted.split("\\+").map(str => charNum(str)).mkString.toInt def extract(html: String): List[ProxyInfo] = { val result = mutable.ListBuffer.empty[ProxyInfo] val doc = Jsoup.parse(html) val rows = doc.select("#proxylisttb > table").get(2).select("tr") for (i <- 1 until rows.size()) { breakable { // skip the first row val row = rows.get(i) val tds = row.select("td") val host = tds.get(0).text val port = { val pattern = "document.write(\":\"+" val original = tds.get(0).html() val pos1 = original.indexOf(pattern) if (pos1 == -1) break val pos2 = original.indexOf(")</script>", pos1) if (pos2 == -1) break val portStr = original.substring(pos1 + pattern.length, pos2) decryptPort(portStr) } val schema = tds.get(1).text val speeds = tds.get(2).text val speed = { val splitted = speeds.split(",") var sum = 0 for (str <- splitted) { val tmp = str.toInt sum += tmp } sum / splitted.length } val country = tds.get(3).text val proxyInfo = ProxyInfo(host, port, schema, speed, country, null) result += proxyInfo } } result.toList } def next(html: String): List[URI] = List() override val responseCharset: Charset = Charset.forName("GB2312") }
Example 140
Source File: ProxyListOrg.scala From ProxyCrawler with Apache License 2.0 | 5 votes |
package org.crowdcrawler.proxycrawler.crawler.plugins import java.net.URI import org.crowdcrawler.proxycrawler.ProxyInfo import org.jsoup.Jsoup import scala.collection.mutable import scala.collection.JavaConversions._ class ProxyListOrg extends AbstractPlugin { val seeds: List[URI] = List(new URI("https://proxy-list.org/english/index.php?p=1")) def extract(html: String): List[ProxyInfo] = { val result = mutable.ListBuffer.empty[ProxyInfo] val doc = Jsoup.parse(html) val rows = doc.select("div.table-wrap > div > ul") for (row <- rows) { val hostPort = row.select("li.proxy").text() val host = hostPort.split(":")(0) val port = hostPort.split(":")(1).toInt val schema = { val tmp = row.select("li.https").text() if (tmp == "-") "HTTP" else tmp.toUpperCase } val speed = { val tmp = row.select("li.speed").text() if (tmp.contains("kbit")) { (tmp.dropRight(4).toDouble * 1024).toInt } else { 0 } } val location = row.select("li.country-city > div > span.country").first().attr("title") result += ProxyInfo(host, port, schema, speed, location, null) } result.toList } def next(html: String): List[URI] = { val result = mutable.ListBuffer.empty[URI] val rootURL = "https://proxy-list.org/english" val doc = Jsoup.parse(html) val rows = doc.select("div.table-menu > a.item[href]") for (row <- rows) { val href = row.attr("href") result += new URI(rootURL + href.substring(1)) } result.toList } }
Example 141
Source File: SocksProxyNet.scala From ProxyCrawler with Apache License 2.0 | 5 votes |
package org.crowdcrawler.proxycrawler.crawler.plugins import java.net.URI import org.crowdcrawler.proxycrawler.ProxyInfo import org.jsoup.Jsoup import scala.collection.mutable import scala.collection.JavaConversions._ class SocksProxyNet extends AbstractPlugin { val seeds: List[URI] = List(new URI("http://www.socks-proxy.net/")) def extract(html: String): List[ProxyInfo] = { val result = mutable.ListBuffer.empty[ProxyInfo] val doc = Jsoup.parse(html) val rows = doc.select("table#proxylisttable > tbody > tr") for (row <- rows) { val tds = row.select("td") val host = tds.get(0).text val port = tds.get(1).text.toInt val location = tds.get(3).text val schema= tds.get(4).text.toUpperCase result += ProxyInfo(host, port, schema, 0, location, null) } result.toList } def next(html: String): List[URI] = List() }
Example 142
Source File: MemoryRepository.scala From polynote with Apache License 2.0 | 5 votes |
package polynote.testing.repository import java.io.FileNotFoundException import java.net.URI import polynote.kernel.{BaseEnv, GlobalEnv, NotebookRef, TaskB} import polynote.messages._ import polynote.server.repository.NotebookRepository import polynote.testing.kernel.MockNotebookRef import zio.{RIO, Task, UIO, ZIO} import scala.collection.mutable class MemoryRepository extends NotebookRepository { private val notebooks = new mutable.HashMap[String, Notebook]() def notebookExists(path: String): UIO[Boolean] = ZIO.effectTotal(notebooks contains path) def notebookURI(path: String): UIO[Option[URI]] = ZIO.effectTotal(if (notebooks contains path) Option(new URI(s"memory://$path")) else None) def loadNotebook(path: String): Task[Notebook] = ZIO.effectTotal(notebooks.get(path)).get.mapError(err => new FileNotFoundException(path)) def openNotebook(path: String): RIO[BaseEnv with GlobalEnv, NotebookRef] = loadNotebook(path).flatMap(nb => MockNotebookRef(nb, tup => saveNotebook(tup._2))) def saveNotebook(nb: Notebook): UIO[Unit] = ZIO.effectTotal(notebooks.put(nb.path, nb)) def listNotebooks(): UIO[List[String]] = ZIO.effectTotal(notebooks.keys.toList) def createNotebook(path: String, maybeUriOrContent: Option[String]): UIO[String] = ZIO.effectTotal(notebooks.put(path, Notebook(path, ShortList.of(), None))).as(path) def createAndOpen(path: String, notebook: Notebook, version: Int): RIO[BaseEnv with GlobalEnv, NotebookRef] = ZIO.effectTotal(notebooks.put(path, notebook)).flatMap { _ => MockNotebookRef(notebook, tup => saveNotebook(tup._2), version) } def initStorage(): TaskB[Unit] = ZIO.unit def renameNotebook(path: String, newPath: String): Task[String] = loadNotebook(path).map { notebook => notebooks.put(newPath, notebook) notebooks.remove(path) newPath } def copyNotebook(path: String, newPath: String): TaskB[String] = loadNotebook(path).map { notebook => notebooks.put(newPath, notebook) newPath } def deleteNotebook(path: String): TaskB[Unit] = ZIO.effectTotal(notebooks.get(path)).flatMap { case None => ZIO.fail(new FileNotFoundException(path)) case Some(_) => ZIO.effectTotal(notebooks.remove(path)).unit } }
Example 143
Source File: CardinalityProfiler.scala From Mastering-Spark-for-Data-Science with MIT License | 5 votes |
package io.gzet.profilers.field import io.gzet.profilers.Utils import org.apache.spark.sql.functions.col import org.apache.spark.sql.{Dataset, Row} import scala.collection.mutable import scalaz.Scalaz._ case class CardinalityProfiler(topN: Int = 5) { def profile(df: Dataset[Array[String]]): Dataset[CardinalityReport] = { val total = df.sparkSession.sparkContext.broadcast(df.count()) import df.sparkSession.implicits._ val features = Utils.buildColumns(df) val topNValues = features.groupByKey({ field => field }).count().map({ case (field, count) => (field.idx, Map(field.value -> count)) }).groupByKey({ case (column, map) => column }).reduceGroups({ (v1, v2) => val m1 = v1._2 val m2 = v2._2 val m = (m1 |+| m2).toSeq.sortBy(_._2).reverse (v1._1, m.take(math.min(m.size, topN)).toMap) }).map({ case (column, (_, map)) => val top = map.keySet.toArray (column, top) }) .withColumnRenamed("_1", "_topNValues_") .withColumnRenamed("_2", "description") val cardinalities = features.distinct().groupByKey(_.idx).count().map({ case (column, distinctValues) => val cardinality = distinctValues / total.value.toDouble (column, cardinality) }) .withColumnRenamed("_1", "column") .withColumnRenamed("_2", "cardinality") cardinalities.join(topNValues, col("column") === col("_topNValues_")) .drop("_topNValues_") .map({ case Row(column: Int, cardinality: Double, description: mutable.WrappedArray[String]) => CardinalityReport( column, cardinality, description.toArray ) }) } } case class CardinalityReport( field: Int, metricValue: Double, description: Array[String] )
Example 144
Source File: StackBootstraping.scala From Mastering-Spark-for-Data-Science with MIT License | 5 votes |
package io.gzet.tagging.stackoverflow import io.gzet.tagging.classifier.Classifier import io.gzet.tagging.html.HtmlHandler import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.{SparkSession, DataFrame, SQLContext} import scala.collection.mutable import scala.xml.{Elem, XML} object StackBootstraping { def parse(spark: SparkSession, posts: RDD[String]): DataFrame = { import spark.sqlContext.implicits._ posts filter { line => line.contains("row Id") } map { line => val xml = XML.loadString(line) (getBody(xml), getTags(xml)) } filter { case (body, tags) => body.isDefined && tags.isDefined } flatMap { case (body, tags) => tags.get.map(tag => (body.get, tag)) } toDF("body", "tag") } private def getBody(xml: Elem): Option[String] = { val bodyAttr = xml.attribute("Body") if (bodyAttr.isDefined) { val html = bodyAttr.get.head.text val htmlHandler = new HtmlHandler() val content = htmlHandler.parseHtml(html) if (content.isDefined) { return content.get.body } } None: Option[String] } private def getTags(xml: Elem): Option[Array[String]] = { val tagsAttr = xml.attribute("Tags") if (tagsAttr.isDefined) { val tagsText = tagsAttr.get.head.text val tags = tagsText .replaceAll("<", "") .replaceAll(">", ",") .split(",") return Some(tags) } None: Option[Array[String]] } def bootstrapNaiveBayes(df: DataFrame, vectorSize: Option[Int]) = { val labeledText = df.rdd map { row => val body = row.getString(0) val labels = row.getAs[mutable.WrappedArray[String]](1) (body, labels.toArray) } Classifier.train(labeledText) } }
Example 145
Source File: OneHotEncoderDemo2.scala From Scala-and-Spark-for-Big-Data-Analytics with MIT License | 5 votes |
package com.chapter11.SparkMachineLearning import org.apache.spark.sql.SparkSession import org.apache.spark.ml.feature.{ OneHotEncoder, StringIndexer } import org.apache.spark.sql.types._ import org.apache.spark.sql._ import org.apache.spark.sql.functions.year import org.apache.spark.ml.{ Pipeline, PipelineStage } import org.apache.spark.ml.classification.{ LogisticRegression, LogisticRegressionModel } import org.apache.spark.ml.feature.StringIndexer import org.apache.spark.sql.{ DataFrame, SparkSession } import scala.collection.mutable import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator object OneHotEncoderDemo2 { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .master("local[*]") .config("spark.sql.warehouse.dir", "E:/Exp/") .appName(s"OneVsRestExample") .getOrCreate() val df = spark.createDataFrame( Seq((0, "Jason", "Germany"), (1, "David", "France"), (2, "Martin", "Spain"), (3, "Jason", "USA"), (4, "Daiel", "UK"), (5, "Moahmed", "Bangladesh"), (6, "David", "Ireland"), (7, "Jason", "Netherlands"))).toDF("id", "name", "address") df.show(false) val indexer = new StringIndexer() .setInputCol("name") .setOutputCol("categoryIndex") .fit(df) val indexed = indexer.transform(df) val encoder = new OneHotEncoder() .setInputCol("categoryIndex") .setOutputCol("categoryVec") val encoded = encoder.transform(indexed) encoded.show() spark.stop() } }
Example 146
Source File: StringIndexerDemo.scala From Scala-and-Spark-for-Big-Data-Analytics with MIT License | 5 votes |
package com.chapter11.SparkMachineLearning import org.apache.spark.sql.SparkSession import org.apache.spark.ml.feature.{ OneHotEncoder, StringIndexer } import org.apache.spark.sql.types._ import org.apache.spark.sql._ import org.apache.spark.sql.functions.year import org.apache.spark.ml.{ Pipeline, PipelineStage } import org.apache.spark.ml.classification.{ LogisticRegression, LogisticRegressionModel } import org.apache.spark.ml.feature.StringIndexer import org.apache.spark.sql.{ DataFrame, SparkSession } import scala.collection.mutable import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator import org.apache.spark.sql._ import org.apache.spark.sql.SQLContext object StringIndexerDemo { def main(args: Array[String]): Unit = { val spark = SparkSession .builder .master("local[*]") .config("spark.sql.warehouse.dir", "E:/Exp/") .appName(s"OneVsRestExample") .getOrCreate() val df = spark.createDataFrame( Seq((0, "Jason", "Germany"), (1, "David", "France"), (2, "Martin", "Spain"), (3, "Jason", "USA"), (4, "Daiel", "UK"), (5, "Moahmed", "Bangladesh"), (6, "David", "Ireland"), (7, "Jason", "Netherlands"))).toDF("id", "name", "address") df.show(false) val indexer = new StringIndexer() .setInputCol("name") .setOutputCol("label") .fit(df) val indexed = indexer.transform(df) indexed.show(false) spark.stop() } }
Example 147
Source File: Load.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.op import java.net.URI import dbis.piglet.Piglet.Lineage import dbis.piglet.expr.{Ref, Value} import dbis.piglet.schema.Schema import dbis.piglet.tools.{CliParams, HDFSService} import scala.collection.mutable import scala.util.{Failure, Success, Try} override def lineageString: String = linStr getOrElse { s"""LOAD%$file%${lastModified match { case None => -1 case Some(Failure(_)) => -2 case Some(Success(v)) => v }}%""" + super.lineageString } override def toString: String = s"""LOAD | out = ${outputs.map(_.name).mkString(",")} | file = ${file.toString} | func = $loaderFunc | outSchema = $schema""".stripMargin override def resolveReferences(mapping: mutable.Map[String, Ref]): Unit = { // we replace only the filename if (file.toString.startsWith("$") && mapping.contains(file.toString)) { mapping(file.toString) match { case Value(v) => val s = v.toString if (s(0) == '"') file = s.substring(1, s.length-1) // file = new URI(s.substring(1, s.length-1)) case _ => } } } }
Example 148
Source File: RDFLoad.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.op import dbis.piglet.schema._ import java.net.URI import org.kiama.rewriting.Rewriter.everything import scala.collection.mutable case class RDFLoad(private val out: Pipe, uri: URI, grouped: Option[String]) extends PigOperator(out) { schema = if (grouped.isDefined) { if (RDFLoad.groupedSchemas.contains(grouped.get)){ Some(RDFLoad.groupedSchemas(grouped.get)) } else { throw new IllegalArgumentException(grouped.get + " is not a valid RDF grouping column") } } else { RDFLoad.plainSchema } def BGPFilterIsReachable: Boolean = { val isBGPFilter: PartialFunction[Any, Boolean] = {case t: Any => t.isInstanceOf[BGPFilter]} everything[Boolean] ("BGPFilterIsReachable", false) { (old: Boolean, newvalue: Boolean) => old || newvalue } (isBGPFilter) (this) } } object RDFLoad { // lazy final val groupedSchemas = { def groupedSchemas = { val m = mutable.Map[String, Schema]() val columns = List[String]("subject", "predicate", "object") for (grouping_column <- columns) { val fields = columns.filterNot(_ == grouping_column).map { Field(_, Types.CharArrayType) }.toArray m(grouping_column) = Schema( BagType( TupleType( Array( Field(grouping_column, Types.CharArrayType), Field("stmts", BagType( TupleType( fields))))))) } m } }
Example 149
Source File: SpatialFilter.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.op import scala.collection.mutable.Map import dbis.piglet.expr.Predicate import dbis.piglet.expr.Ref import dbis.piglet.expr.Expr import dbis.piglet.expr.SpatialFilterPredicate import dbis.piglet.op.IndexMethod.IndexMethod import scala.collection.mutable override def lineageString: String = { s"""SPATIALFILTER%$pred%$idx""" + super.lineageString } override def resolveReferences(mapping: mutable.Map[String, Ref]): Unit = pred.resolveReferences(mapping) override def checkSchemaConformance: Boolean = { schema match { case Some(s) => // if we know the schema we check all named fields pred.traverseAnd(s, Expr.checkExpressionConformance) case None => // if we don't have a schema all expressions should contain only positional fields pred.traverseAnd(null, Expr.containsNoNamedFields) } } override def toString = s"""SPATIALFILTER | out = $outPipeName | in = $inPipeName | schema = $schema | expr = $pred | idx = $idx""".stripMargin }
Example 150
Source File: MacroOp.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.op import dbis.piglet.expr.{NamedField, Ref} import dbis.piglet.op.cmd.DefineMacroCmd import dbis.piglet.plan.InvalidPlanException import dbis.piglet.schema.Schema import scala.collection.mutable import scala.collection.mutable.ListBuffer def buildParameterMapping(cmd: DefineMacroCmd): Unit = { if (cmd.params.isEmpty && params.isDefined || cmd.params.isDefined && params.isEmpty) throw InvalidPlanException(s"macro $macroName: parameter list doesn't match with definition") if (cmd.params.isDefined) { val defs = cmd.params.get val p = params.get if (defs.size != p.size) throw InvalidPlanException(s"macro $macroName: number of parameters doesn't match with definition") for (i <- defs.indices) { paramMapping += ("$" + defs(i) -> p(i)) } } paramMapping += ("$" + cmd.out.name -> NamedField(outPipeName)) } override def lineageString: String = s"""MACRO%$macroName%""" + super.lineageString override def checkSchemaConformance: Boolean = { // TODO true } override def constructSchema: Option[Schema] = { // TODO super.constructSchema } override def toString = s"""MACRO | out = $outPipeName | name = $macroName | params = ${params.map(_.mkString(","))} """.stripMargin }
Example 151
Source File: Filter.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.op import dbis.piglet.expr.{Expr, Predicate, Ref} import scala.collection.mutable override def lineageString: String = { s"""FILTER%$pred%""" + super.lineageString } override def resolveReferences(mapping: mutable.Map[String, Ref]): Unit = pred.resolveReferences(mapping) override def checkSchemaConformance: Boolean = { schema match { case Some(s) => // if we know the schema we check all named fields pred.traverseAnd(s, Expr.checkExpressionConformance) case None => // if we don't have a schema all expressions should contain only positional fields pred.traverseAnd(null, Expr.containsNoNamedFields) } } override def toString = s"""FILTER | out = $outPipeName | in = $inPipeName | schema = $inputSchema | expr = $pred | ${if (windowMode) "window mode" else ""}""".stripMargin }
Example 152
Source File: PlanWriter.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.tools import java.nio.file.{Files, Path, StandardOpenOption} import dbis.piglet.op.{PigOperator, TimingOp} import dbis.piglet.plan.DataflowPlan import dbis.piglet.tools.logging.PigletLogging //import guru.nidi.graphviz.engine.{Format, Graphviz} //import guru.nidi.graphviz.parse.Parser import scala.collection.JavaConverters._ import scala.collection.mutable import scala.collection.mutable.ListBuffer import scala.concurrent.duration.Duration case class Node(id: String, var time: Option[Duration] = None, var label: String = "") { private def mkLabel = { val t = if(time.isDefined) s"\n${time.get.toMillis}ms (${BigDecimal(time.get.toMillis / 1000.0).setScale(2,BigDecimal.RoundingMode.HALF_UP).toDouble}s)" else "" val l = s"$label\n$id\n$t" PlanWriter.quote(l) } override def toString = s"op$id ${if(label.trim().nonEmpty) s"[label=$mkLabel]" else ""}" } case class Edge(from: String, to: String, var label: String = "") { override def toString = s"op$from -> op$to ${if(label.trim().nonEmpty) s"[label=$label]" else "" }" } private def writeDotFile(file: Path, graph: String): Unit = { logger.debug(s"writing dot file to $file") if(Files.notExists(file.getParent)) { Files.createDirectories(file.getParent) } Files.write(file, List(graph).asJava, StandardOpenOption.CREATE, StandardOpenOption.WRITE, StandardOpenOption.TRUNCATE_EXISTING) } }
Example 153
Source File: StreamWindowApplyEmitter.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.codegen.flink import dbis.piglet.codegen.CodeEmitter import dbis.piglet.codegen.CodeGenContext import dbis.piglet.op.WindowApply import dbis.piglet.codegen.scala_lang.ScalaEmitter import dbis.piglet.op.OrderBy import dbis.piglet.op.PigOperator import dbis.piglet.op.Pipe import dbis.piglet.op.Distinct import dbis.piglet.op.Empty import dbis.piglet.op.Grouping import dbis.piglet.op.Foreach import dbis.piglet.op.Filter import scala.collection.mutable import dbis.piglet.codegen.scala_lang.FilterEmitter import dbis.piglet.codegen.flink.emitter.StreamFilterEmitter import dbis.piglet.codegen.flink.emitter.StreamForeachEmitter import dbis.piglet.codegen.flink.emitter.StreamDistinctEmitter class StreamWindowApplyEmitter extends CodeEmitter[WindowApply] { override def template: String = """ val <out> = <in>.apply(<func> _)""" override def code(ctx: CodeGenContext, op: WindowApply): String = { render(Map("out" -> op.outPipeName, "in" -> op.inPipeName, "func" -> op.fname)) } override def helper(ctx: CodeGenContext, op: WindowApply): String = { val inSchema = ScalaEmitter.schemaClassName(op.inputSchema.get.className) val outSchema = ScalaEmitter.schemaClassName(op.schema.get.className) var fname, applyBody = "" var lastOp: PigOperator = new Empty(Pipe("empty")) val littleWalker = mutable.Queue(op.inputs.head.producer.outputs.flatMap(_.consumer).toSeq: _*) while (!littleWalker.isEmpty) { val operator = littleWalker.dequeue() operator match { case o @ Filter(_, _, pred, windowMode) if (windowMode) => { val e = new StreamFilterEmitter applyBody += e.windowApply(ctx, o) + "\n" } case o @ Distinct(_, _, windowMode) if (windowMode) => { val e = new StreamDistinctEmitter applyBody += e.windowApply(ctx, o) + "\n" } case o @ OrderBy(_, _, spec, windowMode) if (windowMode) => { val e = new StreamOrderByEmitter applyBody += e.windowApply(ctx, o) + "\n" } case o @ Grouping(_, _, groupExpr, windowMode) if (windowMode) => { val e = new StreamGroupingEmitter applyBody += e.windowApply(ctx, o) + "\n" } case o @ Foreach(_, _, gen, windowMode) if (windowMode) => { fname = "WindowFunc" + o.outPipeName val e = new StreamForeachEmitter applyBody += e.windowApply(ctx, o) return s""" def ${fname}(wi: Window, ts: Iterable[${inSchema}], out: Collector[${outSchema}]) = { | ts |${applyBody} | } """.stripMargin } case _ => } littleWalker ++= operator.outputs.flatMap(_.consumer) if (littleWalker.isEmpty) lastOp = operator } val before = lastOp.inputs.tail.head fname = "WindowFunc" + before.name applyBody += """.foreach { t => out.collect((t)) }""" s""" def ${fname}(wi: Window, ts: Iterable[${inSchema}], out: Collector[${outSchema}]) = { | ts |${applyBody} | } """.stripMargin } } object StreamWindowApplyEmitter { lazy val instance = new StreamWindowApplyEmitter }
Example 154
Source File: CodeMatchers.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.tools import org.scalatest._ import matchers._ import scala.collection.mutable object SnippetMatcher { def matches(snippet: String, template: String): Boolean = { val replacements = mutable.Map[String, String]() val pattern = "\\$_?[0-9]".r val positions = pattern.findAllMatchIn(template) .map(p => p.start) .zipWithIndex .map{ case (p, offset) => p - offset}.toList val keys = pattern.findAllMatchIn(template).map(p => p.toString).toList val pattern2 = "_?[0-9]+".r var offs = 0 for (i <- keys.indices) { // now we look for the number that we use to replace the $i string if (snippet.length < positions(i) + offs + 1) return false pattern2.findFirstIn(snippet.substring(positions(i) + offs)) match { case Some(snip) => replacements += (keys(i) -> snip) // if it was longer than one digit we have to correct the position offs += snip.length - 1 case None => } } var s = template replacements.foreach{case (k, v) => s = s.replace(k, v)} snippet == s } } // Make them easy to import with: // import CodeMatchers._ object CodeMatchers extends CodeMatchers
Example 155
Source File: MutableHeader.scala From scalingua with Apache License 2.0 | 5 votes |
package ru.makkarpov.scalingua.pofile.parse import java.io.File import java_cup.runtime.ComplexSymbolFactory.Location import ru.makkarpov.scalingua.pofile.{MessageFlag, MessageHeader, MessageLocation, PoFile} import scala.collection.mutable class MutableHeader { private var _startLoc: Location = _ private var _endLoc: Location = _ private var comments: mutable.Builder[String, Seq[String]] = _ private var extractedComments: mutable.Builder[String, Seq[String]] = _ private var locations: mutable.Builder[MessageLocation, Seq[MessageLocation]] = _ private var flags: MessageFlag.ValueSet = _ private var tag: Option[String] = _ private def parseComment(cmt: Comment, left: Location, right: Location): Unit = cmt.commentTag match { case ' ' => val str = cmt.comment.trim if (!str.startsWith(PoFile.GeneratedPrefix)) comments += str case '.' => extractedComments += cmt.comment.trim case ':' => // It seems that GNU .po utilities can combine locations in a single line: // #: some.file:123 other.file:456 // but specifications does not specify how to handle spaces in a string. // So ignore there references, Scalingua itself will never produce such lines. val str = cmt.comment.trim val idx = str.lastIndexOf(':') if (idx != -1) { val file = str.substring(0, idx) val line = try str.substring(idx + 1) catch { case _: NumberFormatException => throw ParserException(left, right, "cannot parse line number") } locations += MessageLocation(new File(file), line.toInt) } else { locations += MessageLocation(new File(str), -1) } case ',' => val addFlags = cmt.comment.trim.split(",").flatMap { s => try Some(MessageFlag.withName(s.toLowerCase)) catch { case _: NoSuchElementException => None } } flags = addFlags.foldLeft(flags)(_ + _) case '~' => tag = Some(cmt.comment.trim) case _ => // ignore } def reset(): Unit = { _startLoc = null _endLoc = null comments = Vector.newBuilder extractedComments = Vector.newBuilder locations = Vector.newBuilder flags = MessageFlag.ValueSet() tag = None } def add(cmt: Comment, left: Location, right: Location): Unit = { if (_startLoc == null) { _startLoc = left } _endLoc = right parseComment(cmt, left, right) } def result(): MessageHeader = MessageHeader(comments.result(), extractedComments.result(), locations.result(), flags, tag) }
Example 156
Source File: GDBTableSeekWithNullValues.scala From spark-gdb with Apache License 2.0 | 5 votes |
package com.esri.gdb import scala.collection.mutable class GDBTableSeekWithNullValues(dataBuffer: DataBuffer, fields: Seq[Field], numFieldsWithNullAllowed: Int, indexIter: Iterator[IndexInfo]) extends Iterator[Map[String, Any]] with Serializable { private val nullValueIndicators = new Array[Byte]((numFieldsWithNullAllowed / 8.0).ceil.toInt) def hasNext() = indexIter.hasNext def next() = { val index = indexIter.next() val numBytes = dataBuffer.seek(index.seek).readBytes(4).getInt val byteBuffer = dataBuffer.readBytes(numBytes) 0 until nullValueIndicators.length foreach (nullValueIndicators(_) = byteBuffer.get) var bit = 0 val map = mutable.Map[String, Any]() fields.foreach(field => { if (field.nullable) { val i = bit >> 3 val m = 1 << (bit & 7) bit += 1 if ((nullValueIndicators(i) & m) == 0) { map(field.name) = field.readValue(byteBuffer, index.objectID) } } else { map(field.name) = field.readValue(byteBuffer, index.objectID) } } ) map.toMap } }
Example 157
Source File: GDBTableScanWithNullValues.scala From spark-gdb with Apache License 2.0 | 5 votes |
package com.esri.gdb import scala.collection.mutable class GDBTableScanWithNullValues(dataBuffer: DataBuffer, fields: Seq[Field], maxRows: Int, startID: Int = 0) extends Iterator[Map[String, Any]] with Serializable { val numFieldsWithNullAllowed = fields.count(_.nullable) val nullValueIndicators = new Array[Byte]((numFieldsWithNullAllowed / 8.0).ceil.toInt) var nextRow = 0 var objectID = startID def hasNext() = nextRow < maxRows def next() = { nextRow += 1 objectID += 1 val numBytes = dataBuffer.readBytes(4).getInt val byteBuffer = dataBuffer.readBytes(numBytes) 0 until nullValueIndicators.length foreach (nullValueIndicators(_) = byteBuffer.get) var bit = 0 val map = mutable.Map[String, Any]() fields.foreach(field => { if (field.nullable) { val i = bit >> 3 val m = 1 << (bit & 7) bit += 1 if ((nullValueIndicators(i) & m) == 0) { map(field.name) = field.readValue(byteBuffer, objectID) } } else { map(field.name) = field.readValue(byteBuffer, objectID) } } ) map.toMap } }
Example 158
Source File: MessageBuffer.scala From scala-loci with Apache License 2.0 | 5 votes |
package loci import java.nio.ByteBuffer import java.nio.charset.StandardCharsets import scala.annotation.compileTimeOnly import scala.collection.mutable final class MessageBuffer private (val backingArray: Array[Byte]) extends mutable.IndexedSeq[Byte] { @compileTimeOnly("`backingArrayBuffer` only available in JS") def backingArrayBuffer: Any = ??? @inline def length: Int = backingArray.length @inline def apply(index: Int) = { if (index < 0 || index >= length) throw new IndexOutOfBoundsException(s"index $index") backingArray(index) } @inline def update(index: Int, element: Byte) = { if (index < 0 || index >= length) throw new IndexOutOfBoundsException(s"index $index") backingArray(index) = element } @inline def update(offset: Int, buffer: MessageBuffer, bufferOffset: Int, count: Int) = { if (offset < 0 || bufferOffset < 0 || count < 0 || offset > length - count || bufferOffset > buffer.length - count) throw new IndexOutOfBoundsException( s"offset $offset, length $length, " + s"buffer offset ${bufferOffset}, buffer length ${buffer.length}, count $count") System.arraycopy(buffer.backingArray, bufferOffset, backingArray, offset, count) } @inline def concat(buffer: MessageBuffer): MessageBuffer = { val array = new Array[Byte](length + buffer.length) System.arraycopy(backingArray, 0, array, 0, length) System.arraycopy(buffer.backingArray, 0, array, length, buffer.length) new MessageBuffer(array) } @inline def copy(offset: Int, count: Int): MessageBuffer = { if (offset < 0 || count < 0 || offset > length - count) throw new IndexOutOfBoundsException(s"offset $offset, count $count, length $length") val array = new Array[Byte](count) System.arraycopy(backingArray, offset, array, 0, count) new MessageBuffer(array) } @inline def decodeString(offset: Int, count: Int): String = new String(backingArray, offset, count, StandardCharsets.UTF_8) @inline def decodeString: String = decodeString(0, length) @inline def asByteBuffer: ByteBuffer = ByteBuffer wrap backingArray override def toString: String = MessageBufferEncoding.byteBufferToString(asByteBuffer, 0, length, fatal = true) getOrElse MessageBufferEncoding.messageBufferToHexString(this) } object MessageBuffer { def empty: MessageBuffer = new MessageBuffer(Array.emptyByteArray) def allocate(length: Int): MessageBuffer = new MessageBuffer(new Array(length)) def encodeString(string: String): MessageBuffer = new MessageBuffer(string getBytes StandardCharsets.UTF_8) def wrapByteBuffer(buffer: ByteBuffer): MessageBuffer = if (!buffer.hasArray) { val duplicate = buffer.duplicate duplicate.position(0) duplicate.limit(buffer.capacity) val array = new Array[Byte](duplicate.remaining) duplicate.get(array) new MessageBuffer(array) } else new MessageBuffer(buffer.array) def wrapArray(array: Array[Byte]): MessageBuffer = new MessageBuffer(array) @compileTimeOnly("`wrapArrayBuffer` only available in JS") def wrapArrayBuffer(arrayBuffer: Any): MessageBuffer = ??? }
Example 159
Source File: Value.scala From scala-loci with Apache License 2.0 | 5 votes |
package loci package runtime import scala.collection.mutable object Value { case class Signature(name: String, module: String, path: List[String]) { override def toString: String = if (path.isEmpty) s"$module.$name" else s"${path mkString "."}.$module.$name" } object Signature { def serialize(signature: Signature): String = if (signature.path.isEmpty) s"${signature.module}!${signature.name}" else s"${signature.module}!${signature.path mkString "."}.${signature.name}" def deserialize(signature: String): Signature = { var first = 0 var last = 0 val end = signature.length val buffer = mutable.ListBuffer.empty[String] while (last < end && first < end) signature(last) match { case '!' => first = end case _ => last += 1 } val module = signature.substring(0, last) if (last < end) last += 1 first = last while (last < end) signature(last) match { case '.' => buffer += signature.substring(first, last) last += 1 first = last case '(' | ':' => last = end case _ => last += 1 } Signature(signature.substring(first, end), module, buffer.toList) } } case class Reference(channelName: String, channelAnchor: String, remote: Remote.Reference, system: System) extends transmitter.AbstractionRef { lazy val channel = system.obtainChannel(channelName, channelAnchor, remote) def derive(name: String) = Reference(s"$channelName:$name", channelAnchor, remote, system) override def toString: String = s"[channel:$channelName]$remote" } }
Example 160
Source File: Dispatcher.scala From scala-loci with Apache License 2.0 | 5 votes |
package loci package runtime import scala.collection.mutable import scala.concurrent.ExecutionContext import scala.util.control.NonFatal trait Dispatch[D <: Dispatch[D]] extends Runnable { def blockedBy(dispatch: D): Boolean final def blockedBy(dispatches: compatibility.IterableOnce[D]): Boolean = compatibility.iterable.exists(dispatches) { blockedBy } } trait Undispatchable[D <: Dispatch[D]] { this: Dispatch[D] => final def blockedBy(dispatch: D) = false final def run() = { } } class Dispatcher[D <: Dispatch[D]](implicit context: ExecutionContext) { private val dispatches = mutable.ListBuffer.empty[(D, Boolean)] def dispatch(dispatch: D*): Unit = dispatches synchronized { dispatch foreach { dispatches += _ -> false } next(Seq.empty) } def ignoreDispatched(dispatch: D*): Unit = dispatches synchronized { dispatch foreach { dispatches -= _ -> false } next(Seq.empty) } private def next(executed: compatibility.Iterable[D]): Unit = dispatches synchronized { executed foreach { dispatches -= _ -> true } val pendings = dispatches collect { case (dispatch, true) => dispatch } val dispatchings = mutable.ListBuffer.empty[mutable.ListBuffer[D]] compatibility.listBuffer.mapInPlace(dispatches) { case (dispatch, running) => dispatch match { case _: Undispatchable[D] => pendings += dispatch dispatch -> false case _ => if (!running) { if (!(dispatch blockedBy pendings)) { dispatchings filter dispatch.blockedBy match { case mutable.ListBuffer() => dispatchings += mutable.ListBuffer(dispatch) dispatch -> true case mutable.ListBuffer(dispatching) => dispatching += dispatch dispatch -> true case _ => pendings += dispatch dispatch -> false } } else { pendings += dispatch dispatch -> false } } else dispatch -> true } } dispatchings foreach { dispatching => logging.tracing(context).execute(new Runnable { def run() = { var throwable: Throwable = null dispatching foreach { dispatch => try dispatch.run() catch { case NonFatal(exception) => if (throwable == null) throwable = exception else throwable.addSuppressed(exception) } } next(dispatching) if (throwable != null) throw throwable } }) } } }
Example 161
Source File: AbstractAppender.scala From rollbar-scala with MIT License | 5 votes |
package com.storecove.rollbar.appenders import com.storecove.rollbar.util.FiniteQueue import com.storecove.rollbar.{RollbarNotifier, RollbarNotifierDefaults, RollbarNotifierFactory} import org.slf4j.MDC import scala.collection.JavaConversions._ import scala.collection.{immutable, mutable} trait AbstractAppender { protected val DEFAULT_LOGS_LIMITS = 100 protected var enabled: Boolean = true protected var onlyThrowable: Boolean = true protected var url: String = RollbarNotifierDefaults.defaultUrl protected var apiKey: String = _ protected var environment: String = _ protected var notifyLevelString: String = "ERROR" protected var limit: Int = DEFAULT_LOGS_LIMITS protected val rollbarNotifier: RollbarNotifier = RollbarNotifierFactory.getNotifier(apiKey, environment) protected val logBuffer: FiniteQueue[String] = new FiniteQueue[String](immutable.Queue[String]()) def setNotifyLevel(level: String): Unit protected def notifyLevel: Any = "ERROR" def setEnabled(enabled: Boolean): Unit = this.enabled = enabled def setOnlyThrowable(onlyThrowable: Boolean): Unit = this.onlyThrowable = onlyThrowable def setApiKey(apiKey: String): Unit = { this.apiKey = apiKey rollbarNotifier.setApiKey(apiKey) } def setEnvironment(environment: String): Unit = { this.environment = environment rollbarNotifier.setEnvironment(environment) } def setUrl(url: String): Unit = { this.url = url rollbarNotifier.setUrl(url) } def setLimit(limit: Int): Unit = this.limit = limit def getEnabled: Boolean = enabled def getOnlyThrowable: Boolean = onlyThrowable def getApiKey: String = apiKey def getEnvironment: String = environment def getUrl: String = url def getNotifyLevel: String = notifyLevelString def getLimit: Int = limit protected def getMDCContext: mutable.Map[String, String] = { val mdc = MDC.getCopyOfContextMap if (mdc == null) { mutable.Map.empty[String, String] } else { mapAsScalaMap(mdc) } } }
Example 162
Source File: Analyser.scala From ClassDependenceAnalyser with GNU General Public License v2.0 | 5 votes |
package com.github.jllk.analyser import java.io.File import java.net.{URLClassLoader, URL} import scala.collection.mutable import scala.collection.mutable.ListBuffer object Analyser { def notCareClass(fullClassName: String): Boolean = fullClassName.startsWith("java") || fullClassName.startsWith("scala") || fullClassName.startsWith("\"[") || (fullClassName.startsWith("android") && !fullClassName.startsWith("android/support")) } class Analyser(private val dependenceJarPath: List[File]) { import Analyser._ def analysis(fullClassName: String): mutable.Set[String] = { val dependentClasses = mutable.Set[String]() val importDependence = analysisImportDependence(fullClassName) importDependence .foreach(c => { dependentClasses += c dependentClasses ++= analysisInheritDependence(c) }) dependentClasses } private def analysisImportDependence(fullClassName: String): List[String] = { val dependentClasses = new ListBuffer[String]() val classpath = dependenceJarPath.map(f => s"-classpath ${f.toPath}") mkString " " val classReport = ProcessUtils.exec(s"javap -verbose $classpath ${fullClassName.replace('.', '/')}") val lines = classReport.split('\n') lines .filter(l => l.contains("= Class") && !l.contains("\"[Ljava/lang/Object;\"")) .foreach(l => dependentClasses += l.substring(l.indexOf("//") + 2).replaceAll(" ", "").replaceAll("/", "\\.").trim()) dependentClasses .filter(notCareClass) .toList } private def analysisInheritDependence(fullClassName: String): List[String] = { val urls = ListBuffer[URL]() dependenceJarPath.foreach(f => urls += f.toURI.toURL) val classLoader = new URLClassLoader(urls.toArray) doClassInheritSearch(fullClassName, classLoader) } private def doClassInheritSearch(fullClassName: String, classLoader: URLClassLoader): List[String] = { if (notCareClass(fullClassName)) { List.empty[String] } else { val dependentClasses = mutable.Set[String]() dependentClasses += fullClassName dependentClasses ++= analysisImportDependence(fullClassName) dependentClasses.foreach(fullClassName => { val targetClass: Either[Class[_], Exception] = try Left(classLoader.loadClass(fullClassName)) catch { case e: ClassNotFoundException => Right(e) case e: Exception => Right(e) } targetClass match { case Left(c) => val superclass = c.getSuperclass if (superclass != null) { dependentClasses ++= doClassInheritSearch(superclass.getName, classLoader) } c.getInterfaces.foreach(i => dependentClasses ++= doClassInheritSearch(i.getName, classLoader)) case Right(e) => println(s"[doClassInheritSearch] exception happened: ${e.getMessage}, please check your dependenceJarPath.") } }) dependentClasses.toList } } }
Example 163
Source File: IOUtils.scala From ClassDependenceAnalyser with GNU General Public License v2.0 | 5 votes |
package com.github.jllk.analyser import java.io._ import java.util.Set import scala.collection.JavaConversions._ import scala.collection.mutable object IOUtils { def writeToMainDexList(input: mutable.Set[String]) = { require(input != null) val output = new PrintWriter(new File("maindexlist.txt")) inSafe(output) { input.foreach(l => output.println(l.replaceAll("\\.", "/") + ".class")) } } def writeToMainDexList(input: Set[String]) = { require(input != null) val output = new PrintWriter(new File("maindexlist.txt")) inSafe(output) { input.foreach(l => output.println(l + ".class")) } } }
Example 164
Source File: JobBuilder.scala From lemon-schedule with GNU General Public License v2.0 | 5 votes |
package com.gabry.job.core.builder import java.util.concurrent.TimeUnit import com.gabry.job.core.constant.Constants import com.gabry.job.core.domain.Job import com.gabry.job.core.tools.UIDGenerator import scala.collection.mutable object JobBuilder { def apply(): JobBuilder = new JobBuilder() } class JobBuilder extends Builder[Job]{ private var name:String = _ private var className:String = _ private var cron:String = _ private var dataTimeOffset:Long = 0 private var dataTimeOffsetUnit:TimeUnit = TimeUnit.MINUTES private var parallel:Int = Int.MaxValue private var meta:mutable.Map[String,String] = mutable.HashMap.empty[String,String] private var workerNodes:Array[String] = Array.empty[String] private var cluster:String = Constants.DEFAULT_CLUSTER_NAME private var group:String = Constants.DEFAULT_GROUP_NAME private var startTime:Long = System.currentTimeMillis() private var priority:Int = Int.MaxValue private var retryTimes:Int = 0 private var timeOut:Int = Int.MaxValue private var replaceIfExist:Boolean = false def withName(name:String):this.type = { this.name = name this } def withClass(className:String):this.type = { this.className = className this } def withCron(cron:String):this.type = { this.cron = cron this } def withDataTimeOffset(dataTimeOffset:Long):this.type = { this.dataTimeOffset = dataTimeOffset this } def withDataTimeOffsetUnit(dataTimeOffsetUnit:TimeUnit):this.type = { this.dataTimeOffsetUnit = dataTimeOffsetUnit this } def withParallel(parallel:Int):this.type = { this.parallel = parallel this } def withMeta(meta:Map[String,String]):this.type = { this.meta ++= meta this } def withMeta(key:String,value:String):this.type = { this.meta.put(key,value) this } def withWorkerNodes(workerNodes:Array[String]):this.type = { this.workerNodes = workerNodes this } def withCluster(cluster:String):this.type = { this.cluster = cluster this } def withGroup(group:String):this.type = { this.group = group this } def withStartTime(startTime:Long):this.type = { this.startTime = startTime this } def withPriority(priority:Int):this.type = { this.priority = priority this } def withRetryTimes(retryTimes:Int):this.type = { this.retryTimes = retryTimes this } def withTimeOut(timeOut:Int):this.type = { this.timeOut = timeOut this } def withReplaceIfExist(replaceIfExist:Boolean):this.type = { this.replaceIfExist = replaceIfExist this } override def build():Job = Job(UIDGenerator.globalUIDGenerator.nextUID(),name,className,cron,dataTimeOffset,dataTimeOffsetUnit,parallel,meta.toMap,workerNodes,cluster,group,startTime,priority,retryTimes,timeOut,replaceIfExist) }
Example 165
Source File: DistributedRoughSet.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.spark.preprocessing.rst final def runHeuristic[O, T : ClassTag, V[A] <: GSimpleVector[A, V[A]], Sz[B, C <: GVector[C]] <: Supervizable[B, C, Sz]](data: RDD[Sz[O, V[T]]], columnsOfFeats: Seq[Seq[Int]]): mutable.Buffer[Int] = { val nbColumns = columnsOfFeats.size val dataBC = sc.broadcast(data.collect.par) sc.parallelize(0 until 8888, nbColumns).mapPartitionsWithIndex{ (idxp, _) => val dataPerFeat = dataBC.value.map(_.obtainOneBucket(idxp)) val originalFeatures = columnsOfFeats(idxp) val originalFeatIdByTmpFeatId = originalFeatures.zipWithIndex.map(_.swap).toMap val allReductSet = roughSet(dataPerFeat) allReductSet(Random.nextInt(allReductSet.size)).map(originalFeatIdByTmpFeatId).toIterator } .collect .toBuffer } }
Example 166
Source File: K-Centers.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.clustering.kcenters.dataset @annotation.tailrec def go(cpt: Int, haveAllCentersConverged: Boolean, centers: List[(Int, V)]): List[(Int, V)] = { val preUpdatedCenters = data.groupByKey( cz => obtainNearestCenterID(cz.v, centers, metric) )(encoderInt) .mapGroups(computeCenters)(encoder) .collect .sortBy(_._1) .toList val alignedOldCenters = preUpdatedCenters.map{ case (oldClusterID, _) => centers(oldClusterID) } val updatedCenters = preUpdatedCenters.zipWithIndex.map{ case ((oldClusterID, center), newClusterID) => (newClusterID, center) } val shiftingEnough = areCentersNotMovingEnough(updatedCenters, alignedOldCenters, minShift, metric) if(cpt < maxIterations && !shiftingEnough) { go(cpt + 1, shiftingEnough, updatedCenters) } else { updatedCenters } } immutable.HashMap(go(0, false, centers):_*) } }
Example 167
Source File: ClusterwiseTypes.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.spark.clustering.clusterwise import scala.collection.{mutable, immutable} import breeze.linalg.DenseMatrix trait ClusterwiseTypes { final type ClassID = Int final type ID = Int final type Xvector = Array[Double] final type Yvector = Array[Double] final type IDXYtest = Seq[(Int, (Xvector, Yvector))] final type IDXtest = Seq[(Long, Xvector)] final type DSPerClass = Array[(ID, (Xvector, Yvector, ClassID))] final type ClassedDS = Array[(Int, DSPerClass)] final type IDXDS = Array[mutable.ArrayBuffer[(Int, Xvector)]] final type YDS = Array[mutable.ArrayBuffer[Yvector]] final type RegPerClass = (Double, DenseMatrix[Double], Array[Double], Array[(Int, Array[Double])]) final type ClassedDSperGrp = Array[(Int, Array[(Int, Int, Array[(ClassID, Int, Xvector, Yvector)])])] }
Example 168
Source File: UtilSpark.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.sparktools import scala.language.higherKinds import org.apache.spark.rdd.RDD import org.apache.spark.HashPartitioner import scala.reflect.runtime.universe.TypeTag import scala.util.Random import scala.reflect.ClassTag import scala.collection.{GenSeq, mutable} import org.clustering4ever.preprocessing.Preprocessable import org.clustering4ever.hashing.HashingScalar import org.clustering4ever.vectors.{GVector, ScalarVector} object UtilSpark { type IndexPartition = Int type HasConverged = Boolean type IsOriginalDot = Boolean final def generateDataLocalityOnHashsedDS[ O, Pz[B, C <: GVector[C]] <: Preprocessable[B, C, Pz] ]( rddToPartitioned: RDD[Pz[O, ScalarVector]], nbblocs1: Int, nbBucketRange: Int ): RDD[(IndexPartition, (Pz[O, ScalarVector], IsOriginalDot, HasConverged))] = { val isOriginalPoint = true val hasConverged = true val bucketRange = 1 to nbBucketRange val lshRDD = rddToPartitioned.map((_, isOriginalPoint, !hasConverged)) val localityPerPartitionRDD = lshRDD.mapPartitionsWithIndex{ (idx, it) => val ar = it.toList def rightNeighbourhood = ar.flatMap{ case (cz, _, _) => bucketRange.collect{ case i if(idx + i < nbblocs1) => (idx + i, (cz, !isOriginalPoint, !hasConverged)) } } def leftNeighbourhood = ar.flatMap{ case (cz, _, _) => bucketRange.collect{ case i if(idx - i >= 0) => (idx - i, (cz, !isOriginalPoint, !hasConverged)) } } val composing = if(idx == 0) ar.map((idx, _)) ::: rightNeighbourhood else if(idx == nbblocs1 - 1) ar.map((idx, _)) ::: leftNeighbourhood else ar.map((idx, _)) ::: leftNeighbourhood ::: rightNeighbourhood composing.toIterator }.partitionBy(new HashPartitioner(nbblocs1)) localityPerPartitionRDD } final def generateDataLocalityLD[ O, Pz[B, C <: GVector[C]] <: Preprocessable[B, C, Pz], Hasher <: HashingScalar ]( rddToPartitioned: RDD[Pz[O, ScalarVector]], hashing: Hasher, nbblocs1: Int, nbBucketRange: Int ): RDD[(IndexPartition, (Pz[O, ScalarVector], IsOriginalDot, HasConverged))] = { val hashedRDD = rddToPartitioned.sortBy( cz => hashing.hf(cz.v) , ascending = true, nbblocs1 ) generateDataLocalityOnHashsedDS(hashedRDD, nbblocs1, nbBucketRange) } }
Example 169
Source File: KPPInitializer.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.clustering.kcenters.scala final def kppInit[ O, V <: GVector[V], Cz[Y, Z <: GVector[Z]] <: Clusterizable[Y, Z, Cz], D <: Distance[V] ](data: GenSeq[Cz[O, V]], metric: D, k: Int): immutable.HashMap[Int, V] = { val centers = mutable.ArrayBuffer(data(Random.nextInt(data.size)).v) def obtainNearestCenter(v: V): V = centers.minBy(metric.d(_, v)) @annotation.tailrec def go(i: Int): Unit = { val preprocessed = data.map{ cz => val toPow2 = metric.d(cz.v, obtainNearestCenter(cz.v)) (cz.v, toPow2 * toPow2) } val phi = preprocessed.aggregate(0D)((agg, e) => agg + e._2, _ + _) val probabilities = preprocessed.map{ case (v, toPow2) => (v, toPow2 / phi) }.seq val shuffled = Random.shuffle(probabilities) centers += Stats.obtainMedianFollowingWeightedDistribution[V](shuffled) if(i < k - 2) go(i + 1) } go(0) immutable.HashMap(centers.zipWithIndex.map{ case (center, clusterID) => (clusterID, center) }:_*) } }
Example 170
Source File: K-Means.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.clustering.kcenters.scala final def fit[D <: ContinuousDistance, GS[Y] <: GenSeq[Y]]( data: GS[Array[Double]], k: Int, metric: D, minShift: Double, maxIterations: Int ): KMeansModel[D] = { KMeans(k, metric, minShift, maxIterations, immutable.HashMap.empty[Int, ScalarVector]).fit(scalarToClusterizable(data)) } }
Example 171
Source File: EigenValue.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.scala.clustering.tensor val n2 = data.head.cols val timeColumn = DenseMatrix.zeros[Double](m,n2) val timeRow = DenseMatrix.zeros[Double](m,n1) @annotation.tailrec def matriceColumnSet(t:mutable.ArrayBuffer[DenseMatrix[Double]], m:DenseMatrix[Double], c:DenseMatrix[Double], i: Int, j: Int , k: Int): DenseMatrix[Double] = { if (j < t.head.cols && k < t.length) { m(k, j) = t(k)(i, j) matriceColumnSet(t, m, c, i, j, k + 1) } else if (k == t.length && j < t.head.cols) { matriceColumnSet(t, m, c, i, j + 1 , 0) } else if (i < t.head.rows - 1) { c += cov(m) matriceColumnSet(t, m, c, i + 1, 0, 0) } else { c += cov(m) } } @annotation.tailrec def matriceRowSet(t: mutable.ArrayBuffer[DenseMatrix[Double]], m: DenseMatrix[Double], c: DenseMatrix[Double], i: Int, j: Int , k: Int): DenseMatrix[Double] = { if (i < t.head.rows && k < t.length) { m(k, i) = t(k)(i, j) matriceRowSet(t, m, c, i, j, k + 1) } else if (k == t.length && i < t.head.rows) { matriceRowSet(t, m, c, i + 1, j , 0) } else if (j < t.head.cols - 1){ c += cov(m) matriceRowSet(t, m, c, 0, j + 1, 0) } else { c += cov(m) } } val columnMatrix = matriceColumnSet(data, timeColumn, DenseMatrix.zeros[Double](n2,n2), 0, 0, 0 ) val svd.SVD(u1,eigValue,eigVector) = svd(columnMatrix) val columnEigvalue = eigValue.toArray val rowMatrix = matriceRowSet(data, timeRow, DenseMatrix.zeros[Double](n1,n1), 0, 0, 0 ) val svd.SVD(u2,eigValue2,eigVector2) = svd(rowMatrix) val rowEigvalue = eigValue2.toArray Array(rowEigvalue.take(5), columnEigvalue.take(5)) } } object EigenValue extends Serializable { def train(k: Int, data: mutable.ArrayBuffer[DenseMatrix[Double]]) = (new EigenValue(k)).fit(data) }
Example 172
Source File: Statistics.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.stats final def obtainMedianFollowingWeightedDistribution[V](distribution: Seq[(V, Double)]): V = { val p = scala.util.Random.nextDouble * distribution.foldLeft(0D)((agg, e) => agg + e._2) @annotation.tailrec def go(accum: Double, i: Int): Int = { if(accum < p) go(accum + distribution(i)._2, i + 1) else i } val cpt = go(0D, 0) if(cpt == 0) distribution.head._1 else distribution(cpt - 1)._1 } }
Example 173
Source File: HashingFunctions.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.hashing final def hf(v: Array[Double], j: Int): Double = { @annotation.tailrec def go(s: Double, i: Int): Double = { if(i < v.size) go(s + v(i) * hvs(j)._1(i), i + 1) else s } (go(0D, 0) + hvs(j)._2) / w } final def obtainBucketPerLevel(v: Array[Double]): immutable.IndexedSeq[Int] = { hvs.map{ case (rv, _, hfid) => val bucketID = bucketsLimits.find{ case (th, _) => hf(v, hfid) <= th } if(bucketID.isDefined) bucketID.get._2 else buckets } } }
Example 174
Source File: BinaryDistanceUtils.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.math.distances final def countOccFeat(data: Seq[Array[Int]]): Array[(Int, Int)] = { import org.clustering4ever.util.VectorsAddOperationsImplicits._ val nbTotData = data.size val nbOne = data.reduce(SumVectors.sumVectors(_, _)) val nbZero = nbOne.map(nbTotData - _) nbZero.zip(nbOne) } final def genProb2Feat(nbOccFeatTab: Seq[(Int, Int)], nbTotData: Int): Seq[(Double, Double)] = { nbOccFeatTab.map{ case (zero, one) => val totDataMinusOne = nbTotData - 1D val product = nbTotData * totDataMinusOne ( (zero * (zero - 1D)) / product, (one * (one - 1D)) / product ) } } }
Example 175
Source File: Tree.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.structures.tree final def depthTraversal[T](t: Tree[T]) = { @annotation.tailrec def go(l: List[Tree[T]], ids: mutable.Buffer[Int]): mutable.Buffer[Int] = { l match { case Nil => ids case Leaf(id, v) :: ls => go(ls, ids += id) case Node(id, childrens) :: ls => go(childrens ::: ls, ids += id) } } go(List(t), mutable.ArrayBuffer.empty[Int]) } }
Example 176
Source File: SortingTools.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.utils final def bucketSort(toSort: Array[Double], b: Int) = { val buckets = parallel.mutable.ParArray.fill(b)(mutable.ArrayBuffer.empty[Double]) val m = toSort.max @annotation.tailrec def go(i: Int) : Unit = { if(i < toSort.size) { buckets((toSort(i) / m * (b - 1)).toInt) += toSort(i) go(i + 1) } } go(0) buckets.flatMap(_.sorted) } }
Example 177
Source File: DiscoverConnexComponents.scala From Clustering4Ever with Apache License 2.0 | 5 votes |
package org.clustering4ever.graph final def obtainConnexComponents(nodes: immutable.HashSet[Int], neighbors: immutable.HashMap[Int, immutable.HashSet[Int]]): List[List[Int]] = { val visited = mutable.HashMap.empty[Int, Int] def depthFirstTraverseFunctional(node: Int, clusterID: Int): Unit = { val nodeToExplore = immutable.HashSet(node) def obtainUnvisitedNeihbors(hs: immutable.HashSet[Int]) = { hs.flatMap{ n => val unvisited = neighbors(n).filter( n => !visited.contains(n) ) visited ++= unvisited.map( uv => (uv, clusterID) ) unvisited } } @annotation.tailrec def go(hs: immutable.HashSet[Int]): immutable.HashSet[Int] = if(!hs.isEmpty) go(obtainUnvisitedNeihbors(hs)) else hs go(nodeToExplore) } var clusterID = 0 nodes.foreach( n => if(!visited.contains(n)) { visited += ((n, clusterID)) depthFirstTraverseFunctional(n, clusterID) clusterID += 1 } ) val labeledNodes = nodes.toList.map( n => (n, visited(n)) ) val labels = labeledNodes.map(_._2) val connexComponents = labels.map( l => labeledNodes.collect{ case (n, cID) if cID == l => n } ) connexComponents } }
Example 178
Source File: PersistenceRepresentation.scala From vamp with Apache License 2.0 | 5 votes |
package io.vamp.persistence import io.vamp.common.Artifact import io.vamp.common.akka.CommonActorLogging import io.vamp.common.http.OffsetEnvelope import io.vamp.common.notification.NotificationProvider import scala.collection.mutable import scala.language.postfixOps import scala.reflect.ClassTag trait PersistenceRepresentation extends PersistenceApi with AccessGuard { this: CommonActorLogging with NotificationProvider ⇒ private val store: mutable.Map[String, mutable.Map[String, Artifact]] = new mutable.HashMap() protected def info(): Map[String, Any] = Map[String, Any]( "status" → (if (validData) "valid" else "corrupted"), "artifacts" → (store.map { case (key, value) ⇒ key → value.values.size } toMap) ) protected def all(`type`: String): List[Artifact] = store.get(`type`).map(_.values.toList).getOrElse(Nil) protected def all[T <: Artifact](kind: String, page: Int, perPage: Int, filter: T ⇒ Boolean): ArtifactResponseEnvelope = { log.debug(s"In memory representation: all [$kind] of $page per $perPage") val artifacts = all(kind).filter { artifact ⇒ filter(artifact.asInstanceOf[T]) } val total = artifacts.size val (p, pp) = OffsetEnvelope.normalize(page, perPage, ArtifactResponseEnvelope.maxPerPage) val (rp, rpp) = OffsetEnvelope.normalize(total, p, pp, ArtifactResponseEnvelope.maxPerPage) ArtifactResponseEnvelope(artifacts.slice((p - 1) * pp, p * pp), total, rp, rpp) } protected def get[T <: Artifact](name: String, kind: String): Option[T] = { log.debug(s"In memory representation: read [$kind] - $name}") store.get(kind).flatMap(_.get(name)).asInstanceOf[Option[T]] } protected def set[T <: Artifact](artifact: T, kind: String): T = { def put(map: mutable.Map[String, Artifact]) = { map.put(artifact.name, before( artifact )) after(set = true)(artifact) } log.debug(s"In memory representation: set [$kind] - ${artifact.name}") store.get(kind) match { case None ⇒ val map = new mutable.HashMap[String, Artifact]() put(map) store.put(kind, map) case Some(map) ⇒ put(map) } artifact } protected def delete[T <: Artifact](name: String, kind: String): Option[T] = { log.debug(s"In memory representation: delete [$kind] - $name}") store.get(kind) flatMap { map ⇒ val result = map.remove(name).map { artifact ⇒ after[T](set = false)(artifact.asInstanceOf[T]) } if (result.isEmpty) log.debug(s"Artifact not found for deletion: $kind: $name") result } } protected def find[A: ClassTag](p: A ⇒ Boolean, `type`: Class[_ <: Artifact]): Option[A] = { store.get(type2string(`type`)).flatMap { _.find { case (_, artifact: A) ⇒ p(artifact) case _ ⇒ false } } map (_._2.asInstanceOf[A]) } protected def before[T <: Artifact](artifact: T): T = artifact protected def after[T <: Artifact](set: Boolean)(artifact: T): T = artifact }
Example 179
Source File: Percolator.scala From vamp with Apache License 2.0 | 5 votes |
package io.vamp.pulse import akka.actor.{ Actor, ActorRef } import io.vamp.common.akka.CommonActorLogging import io.vamp.model.event.Event import scala.collection.mutable object Percolator { sealed trait PercolatorMessage case class GetPercolator(name: String) extends PercolatorMessage case class RegisterPercolator(name: String, tags: Set[String], `type`: Option[String], message: Any) extends PercolatorMessage case class UnregisterPercolator(name: String) extends PercolatorMessage } trait Percolator { this: Actor with CommonActorLogging ⇒ case class PercolatorEntry(tags: Set[String], `type`: Option[String], actor: ActorRef, message: Any) protected val percolators = mutable.Map[String, PercolatorEntry]() def getPercolator(name: String) = percolators.get(name) def registerPercolator(name: String, tags: Set[String], `type`: Option[String], message: Any) = { percolators.put(name, PercolatorEntry(tags, `type`, sender(), message)) match { case Some(entry) if entry.tags == tags && entry.`type` == `type` ⇒ case _ ⇒ log.info(s"Percolator '$name' has been registered for tags '${tags.mkString(", ")}'.") } } def unregisterPercolator(name: String) = { if (percolators.remove(name).nonEmpty) log.info(s"Percolator successfully removed for '$name'.") } def percolate(publishEventValue: Boolean): (Event ⇒ Event) = { (event: Event) ⇒ percolators.foreach { case (name, percolator) ⇒ if (percolator.tags.forall(event.tags.contains) && (percolator.`type`.isEmpty || percolator.`type`.get == event.`type`)) { log.debug(s"Percolate match for '$name'.") val send = if (publishEventValue) event else event.copy(value = None) percolator.actor ! (percolator.message → send) } } event } }
Example 180
Source File: SseConnector.scala From vamp with Apache License 2.0 | 5 votes |
package io.vamp.common.http import akka.Done import akka.actor.ActorSystem import akka.event.LoggingAdapter import akka.http.scaladsl.model.HttpHeader.ParsingResult.Ok import akka.http.scaladsl.model.sse.ServerSentEvent import akka.http.scaladsl.model.{ HttpHeader, HttpRequest, HttpResponse, Uri } import akka.stream.ActorMaterializer import akka.stream.scaladsl.{ Sink, Source } import io.vamp.common.http.EventSource.EventSource import scala.collection.mutable import scala.concurrent.Future import scala.concurrent.duration.{ FiniteDuration, _ } import scala.language.postfixOps import scala.util.{ Failure, Success } private case class SseConnectionConfig(url: String, headers: List[(String, String)], tlsCheck: Boolean) private case class SseConnectionEntryValue(source: EventSource) trait SseListener { def onEvent(event: ServerSentEvent): Unit } object SseConnector { private val retryDelay: FiniteDuration = 5 second private val listeners: mutable.Map[SseConnectionConfig, Set[SseListener]] = mutable.Map() private val connections: mutable.Map[SseConnectionConfig, Future[Done]] = mutable.Map() def open(url: String, headers: List[(String, String)] = Nil, tlsCheck: Boolean)(listener: SseListener)(implicit system: ActorSystem, logger: LoggingAdapter): Unit = synchronized { val config = SseConnectionConfig(url, headers, tlsCheck) implicit val materializer: ActorMaterializer = ActorMaterializer() listeners.update(config, listeners.getOrElse(config, Set()) + listener) connections.getOrElseUpdate(config, { logger.info(s"Opening SSE connection: $url") EventSource(Uri(url), send(config), None, retryDelay).takeWhile { event ⇒ event.eventType.foreach(t ⇒ logger.info(s"SSE: $t")) val receivers = listeners.getOrElse(config, Set()) receivers.foreach(_.onEvent(event)) val continue = receivers.nonEmpty if (!continue) logger.info(s"Closing SSE connection: $url") continue }.runWith(Sink.ignore) }) } def close(listener: SseListener): Unit = synchronized { listeners.transform((_, v) ⇒ v - listener) } private def send(config: SseConnectionConfig)(request: HttpRequest)(implicit system: ActorSystem, materializer: ActorMaterializer): Future[HttpResponse] = { val httpHeaders = config.headers.map { case (k, v) ⇒ HttpHeader.parse(k, v) } collect { case Ok(h, _) ⇒ h } filterNot request.headers.contains Source.single(request.withHeaders(request.headers ++ httpHeaders) → 1).via(HttpClient.pool[Any](config.url, config.tlsCheck)).map { case (Success(response: HttpResponse), _) ⇒ response case (Failure(f), _) ⇒ throw new RuntimeException(f.getMessage) }.runWith(Sink.head) } }
Example 181
Source File: LogPublisherHub.scala From vamp with Apache License 2.0 | 5 votes |
package io.vamp.common.akka import akka.actor.{ ActorRef, ActorSystem } import ch.qos.logback.classic.filter.ThresholdFilter import ch.qos.logback.classic.spi.ILoggingEvent import ch.qos.logback.classic.{ Level, LoggerContext, Logger ⇒ LogbackLogger } import ch.qos.logback.core.AppenderBase import io.vamp.common.Namespace import org.slf4j.{ Logger, LoggerFactory } import scala.collection.mutable object LogPublisherHub { private val logger = LoggerFactory.getLogger(LogPublisherHub.getClass) private val context = LoggerFactory.getILoggerFactory.asInstanceOf[LoggerContext] private val rootLogger = context.getLogger(Logger.ROOT_LOGGER_NAME) private val sessions: mutable.Map[String, LogPublisher] = new mutable.HashMap() def subscribe(to: ActorRef, level: String, loggerName: Option[String], encoder: (ILoggingEvent) ⇒ AnyRef)(implicit actorSystem: ActorSystem, namespace: Namespace): Unit = { val appenderLevel = Level.toLevel(level, Level.INFO) val appenderLogger = loggerName.map(context.getLogger).getOrElse(rootLogger) val exists = sessions.get(to.toString).exists { publisher ⇒ publisher.level == appenderLevel && publisher.logger.getName == appenderLogger.getName } if (!exists) { unsubscribe(to) if (appenderLevel != Level.OFF) { logger.info(s"Starting log publisher [${appenderLevel.levelStr}] '${appenderLogger.getName}': $to") val publisher = LogPublisher(to, appenderLogger, appenderLevel, encoder) publisher.start() sessions.put(to.toString, publisher) } } } def unsubscribe(to: ActorRef): Unit = { sessions.remove(to.toString).foreach { publisher ⇒ logger.info(s"Stopping log publisher: $to") publisher.stop() } } } private case class LogPublisher(to: ActorRef, logger: LogbackLogger, level: Level, encoder: (ILoggingEvent) ⇒ AnyRef)(implicit actorSystem: ActorSystem, namespace: Namespace) { private val filter = new ThresholdFilter() filter.setLevel(level.levelStr) private val appender = new AppenderBase[ILoggingEvent] { override def append(loggingEvent: ILoggingEvent) = to ! encoder(loggingEvent) } appender.addFilter(filter) appender.setName(to.toString) def start() = { val context = logger.getLoggerContext filter.setContext(context) appender.setContext(context) filter.start() appender.start() logger.addAppender(appender) } def stop() = { appender.stop() filter.stop() logger.detachAppender(appender) } }
Example 182
package io.vamp.common.akka import java.util.concurrent.atomic.AtomicInteger import _root_.akka.pattern.ask import akka.actor._ import akka.util.Timeout import com.typesafe.scalalogging.LazyLogging import io.vamp.common.Namespace import io.vamp.common.util.TextUtil import scala.collection.mutable import scala.concurrent.{ ExecutionContext, Future } import scala.reflect._ object IoC extends LazyLogging { private val counter = new AtomicInteger(0) private val aliases: mutable.Map[String, mutable.Map[Class[_], Class[_]]] = mutable.Map() private val actorRefs: mutable.Map[String, mutable.Map[Class[_], ActorRef]] = mutable.Map() private val namespaceMap: mutable.Map[String, Namespace] = mutable.Map() private val namespaceActors: mutable.Map[String, ActorRef] = mutable.Map() def namespaces: List[Namespace] = namespaceMap.values.toList def alias[FROM: ClassTag](implicit namespace: Namespace): Class[_] = { alias(classTag[FROM].runtimeClass) } def alias(from: Class[_])(implicit namespace: Namespace): Class[_] = { aliases.get(namespace.name).flatMap(_.get(from)).getOrElse(from) } def alias[FROM: ClassTag, TO: ClassTag](implicit namespace: Namespace): Option[Class[_]] = { alias(classTag[FROM].runtimeClass, classTag[TO].runtimeClass) } def alias(from: Class[_], to: Class[_])(implicit namespace: Namespace): Option[Class[_]] = { aliases.getOrElseUpdate(namespace.name, mutable.Map()).put(from, to) } def createActor(clazz: Class[_])(implicit actorSystem: ActorSystem, namespace: Namespace, timeout: Timeout): Future[ActorRef] = { createActor(Props(clazz)) } def createActor[ACTOR: ClassTag](implicit actorSystem: ActorSystem, namespace: Namespace, timeout: Timeout): Future[ActorRef] = { createActor(classTag[ACTOR].runtimeClass) } def createActor[ACTOR: ClassTag](arg: Any, args: Any*)(implicit actorSystem: ActorSystem, namespace: Namespace, timeout: Timeout): Future[ActorRef] = { createActor(Props(classTag[ACTOR].runtimeClass, arg :: args.toList: _*)) } def createActor(props: Props)(implicit actorSystem: ActorSystem, namespace: Namespace, timeout: Timeout): Future[ActorRef] = { logger.info(s"Create Actor ${props.clazz.getSimpleName} for namespace ${namespace.name}") implicit val ec: ExecutionContext = actorSystem.dispatcher (namespaceActor ? props) map { case actorRef: ActorRef ⇒ actorRefs.getOrElseUpdate(namespace.name, mutable.Map()).put(props.clazz, actorRef) aliases.getOrElseUpdate(namespace.name, mutable.Map()).foreach { case (from, to) if to == props.clazz ⇒ actorRefs.getOrElseUpdate(namespace.name, mutable.Map()).put(from, actorRef) case _ ⇒ } actorRef case _ ⇒ throw new RuntimeException(s"Cannot create actor for: ${props.clazz.getSimpleName}") } } def actorFor[ACTOR: ClassTag](implicit actorSystem: ActorSystem, namespace: Namespace): ActorRef = { actorFor(classTag[ACTOR].runtimeClass) } def actorFor(clazz: Class[_])(implicit actorSystem: ActorSystem, namespace: Namespace): ActorRef = { actorRefs.get(namespace.name).flatMap(_.get(alias(clazz))) match { case Some(actorRef) ⇒ actorRef case _ ⇒ throw new RuntimeException(s"No actor reference for: $clazz") } } private def namespaceActor(implicit actorSystem: ActorSystem, namespace: Namespace): ActorRef = { namespaceMap.put(namespace.name, namespace) namespaceActors.getOrElseUpdate(namespace.name, actorSystem.actorOf(Props(new Actor { def receive = { case props: Props ⇒ sender() ! context.actorOf(props, s"${TextUtil.toSnakeCase(props.clazz.getSimpleName)}-${counter.getAndIncrement}") case _ ⇒ } }), namespace.name)) } }
Example 183
Source File: YamlUtil.scala From vamp with Apache License 2.0 | 5 votes |
package io.vamp.common.util import org.yaml.snakeyaml.Yaml import org.yaml.snakeyaml.constructor.Constructor import org.yaml.snakeyaml.error.YAMLException import scala.collection.JavaConverters._ import scala.collection.mutable object YamlUtil { def yaml: Yaml = { new Yaml(new Constructor() { override def getClassForName(name: String): Class[_] = throw new YAMLException("Not supported.") }) } def convert(any: Any, preserveOrder: Boolean): Any = any match { case source: java.util.Map[_, _] ⇒ if (preserveOrder) { val map = new mutable.LinkedHashMap[String, Any]() source.entrySet().asScala.foreach(entry ⇒ map += entry.getKey.toString → convert(entry.getValue, preserveOrder)) map } else source.entrySet().asScala.map(entry ⇒ entry.getKey.toString → convert(entry.getValue, preserveOrder)).toMap case source: java.util.List[_] ⇒ source.asScala.map(convert(_, preserveOrder)).toList case source: java.lang.Iterable[_] ⇒ source.asScala.map(convert(_, preserveOrder)).toList case source ⇒ source } }
Example 184
Source File: MessageResolver.scala From vamp with Apache License 2.0 | 5 votes |
package io.vamp.common.notification import com.typesafe.scalalogging.Logger import org.slf4j.LoggerFactory import org.yaml.snakeyaml.Yaml import scala.collection.JavaConverters._ import scala.collection.mutable import scala.io.Source import scala.language.postfixOps trait MessageResolverProvider { val messageResolver: MessageResolver trait MessageResolver { def resolve(implicit notification: Notification): String } } trait DefaultPackageMessageResolverProvider extends MessageResolverProvider { val messageResolver: MessageResolver = new DefaultPackageMessageResolver() private class DefaultPackageMessageResolver extends MessageResolver { protected case class Message(parts: Seq[String], args: Seq[String]) private val logger = Logger(LoggerFactory.getLogger(classOf[Notification])) private val messages = new mutable.LinkedHashMap[String, mutable.Map[String, Any]]() def resolve(implicit notification: Notification): String = { try { val name = notification.getClass.getSimpleName val messageSource = resolveMessageSource messageSource.get(name) match { case None ⇒ logger.warn(s"No mapping for ${notification.getClass}") defaultMapping(error = false) case Some(value: Message) ⇒ resolveMessageValue(value) case Some(value: Any) ⇒ val message = parseMessage(value.toString) messageSource.put(name, message) resolveMessageValue(message) } } catch { case e: NoSuchMethodException ⇒ val field = e.getMessage.substring(e.getMessage.lastIndexOf('.') + 1, e.getMessage.length - 2) logger.error(s"Message mapping error: field '$field' not defined for ${notification.getClass}") defaultMapping() case e: Exception ⇒ logger.error(e.getMessage, e) defaultMapping() } } protected def defaultMapping(error: Boolean = true)(implicit notification: Notification): String = if (error) "Error." else "Notification." protected def resolveMessageSource(implicit notification: Notification): mutable.Map[String, Any] = { val packageName = notification.getClass.getPackage.toString messages.get(packageName) match { case None ⇒ val reader = Source.fromURL(notification.getClass.getResource("messages.yml")).bufferedReader() try { val input = new Yaml().load(reader).asInstanceOf[java.util.Map[String, Any]].asScala messages.put(packageName, input) input } finally { reader.close() } case Some(map) ⇒ map } } protected def parseMessage(message: String)(implicit notification: Notification): Message = { val pattern = "\\{[^}]+\\}" r val parts = pattern split message val args = (pattern findAllIn message).map(s ⇒ s.substring(1, s.length - 1)).toList Message(parts, args) } protected def resolveMessageValue(message: Message)(implicit notification: Notification): String = { val pi = message.parts.iterator val ai = message.args.iterator val sb = new StringBuilder() while (ai.hasNext) { sb append pi.next sb append ai.next().split('.').foldLeft(notification.asInstanceOf[AnyRef])((arg1, arg2) ⇒ arg1.getClass.getDeclaredMethod(arg2).invoke(arg1)).toString } if (pi.hasNext) sb append pi.next sb.toString() } } }
Example 185
Source File: DataFrameConverterSpec.scala From incubator-toree with Apache License 2.0 | 5 votes |
package org.apache.toree.utils import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{DataFrame, Row} import org.mockito.Mockito._ import org.scalatest.mock.MockitoSugar import org.scalatest.{BeforeAndAfterAll, FunSpec, Matchers} import play.api.libs.json.{JsArray, JsString, Json} import test.utils.SparkContextProvider import scala.collection.mutable class DataFrameConverterSpec extends FunSpec with MockitoSugar with Matchers with BeforeAndAfterAll { lazy val spark = SparkContextProvider.sparkContext override protected def afterAll(): Unit = { spark.stop() super.afterAll() } val dataFrameConverter: DataFrameConverter = new DataFrameConverter val mockDataFrame = mock[DataFrame] val mockRdd = spark.parallelize(Seq(Row(new mutable.WrappedArray.ofRef(Array("test1", "test2")), 2, null))) val mockStruct = mock[StructType] val columns = Seq("foo", "bar").toArray doReturn(mockStruct).when(mockDataFrame).schema doReturn(columns).when(mockStruct).fieldNames doReturn(mockRdd).when(mockDataFrame).rdd describe("DataFrameConverter") { describe("#convert") { it("should convert to a valid JSON object") { val someJson = dataFrameConverter.convert(mockDataFrame, "json") val jsValue = Json.parse(someJson.get) jsValue \ "columns" should be (JsArray(Seq(JsString("foo"), JsString("bar")))) jsValue \ "rows" should be (JsArray(Seq( JsArray(Seq(JsString("[test1, test2]"), JsString("2"), JsString("null"))) ))) } it("should convert to csv") { val csv = dataFrameConverter.convert(mockDataFrame, "csv").get val values = csv.split("\n") values(0) shouldBe "foo,bar" values(1) shouldBe "[test1, test2],2,null" } it("should convert to html") { val html = dataFrameConverter.convert(mockDataFrame, "html").get html.contains("<th>foo</th>") should be(true) html.contains("<th>bar</th>") should be(true) html.contains("<td>[test1, test2]</td>") should be(true) html.contains("<td>2</td>") should be(true) html.contains("<td>null</td>") should be(true) } it("should convert limit the selection") { val someLimited = dataFrameConverter.convert(mockDataFrame, "csv", 1) val limitedLines = someLimited.get.split("\n") limitedLines.length should be(2) } it("should return a Failure for invalid types") { val result = dataFrameConverter.convert(mockDataFrame, "Invalid Type") result.isFailure should be(true) } } } }
Example 186
Source File: CoapSinkTask.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.coap.sink import java.util import com.datamountaineer.streamreactor.connect.coap.configs.{CoapConstants, CoapSettings, CoapSinkConfig} import com.datamountaineer.streamreactor.connect.errors.ErrorPolicyEnum import com.datamountaineer.streamreactor.connect.utils.{JarManifest, ProgressCounter} import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.clients.consumer.OffsetAndMetadata import org.apache.kafka.common.TopicPartition import org.apache.kafka.connect.sink.{SinkRecord, SinkTask} import scala.collection.JavaConverters._ import scala.collection.mutable class CoapSinkTask extends SinkTask with StrictLogging { private val writers = mutable.Map.empty[String, CoapWriter] private val progressCounter = new ProgressCounter private var enableProgress: Boolean = false private val manifest = JarManifest(getClass.getProtectionDomain.getCodeSource.getLocation) override def start(props: util.Map[String, String]): Unit = { logger.info(scala.io.Source.fromInputStream(getClass.getResourceAsStream("/coap-sink-ascii.txt")).mkString + s" $version") logger.info(manifest.printManifest()) val conf = if (context.configs().isEmpty) props else context.configs() val sinkConfig = CoapSinkConfig(conf) enableProgress = sinkConfig.getBoolean(CoapConstants.PROGRESS_COUNTER_ENABLED) val settings = CoapSettings(sinkConfig) //if error policy is retry set retry interval if (settings.head.errorPolicy.getOrElse(ErrorPolicyEnum.THROW).equals(ErrorPolicyEnum.RETRY)) { context.timeout(sinkConfig.getString(CoapConstants.ERROR_RETRY_INTERVAL).toLong) } settings.map(s => (s.kcql.getSource, CoapWriter(s))).map({ case (k, v) => writers.put(k, v) }) } override def put(records: util.Collection[SinkRecord]): Unit = { records.asScala.map(r => writers(r.topic()).write(List(r))) val seq = records.asScala.toVector if (enableProgress) { progressCounter.update(seq) } } override def stop(): Unit = { writers.foreach({ case (t, w) => logger.info(s"Shutting down writer for $t") w.stop() }) progressCounter.empty } override def flush(map: util.Map[TopicPartition, OffsetAndMetadata]): Unit = {} override def version: String = manifest.version() }
Example 187
Source File: ConnectFileMetaDataStore.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.ftp.source import java.time.Instant import java.util import com.typesafe.scalalogging.StrictLogging import org.apache.kafka.connect.storage.OffsetStorageReader import scala.collection.JavaConverters._ import scala.collection.mutable // allows storage and retrieval of meta datas into connect framework class ConnectFileMetaDataStore(offsetStorage: OffsetStorageReader) extends FileMetaDataStore with StrictLogging { // connect offsets aren't directly committed, hence we'll cache them private val cache = mutable.Map[String, FileMetaData]() override def get(path: String): Option[FileMetaData] = cache.get(path).orElse({ val stored = getFromStorage(path) stored.foreach(set(path,_)) stored }) override def set(path: String, fileMetaData: FileMetaData): Unit = { logger.debug(s"ConnectFileMetaDataStore path = ${path}, fileMetaData.offset = ${fileMetaData.offset}, fileMetaData.attribs.size = ${fileMetaData.attribs.size}") cache.put(path, fileMetaData) } // cache couldn't provide us the info. this is a rather expensive operation (?) def getFromStorage(path: String): Option[FileMetaData] = offsetStorage.offset(Map("path" -> path).asJava) match { case null => logger.info(s"meta store storage HASN'T ${path}") None case o => logger.info(s"meta store storage has ${path}") Some(connectOffsetToFileMetas(path, o)) } def fileMetasToConnectPartition(meta:FileMetaData): util.Map[String, String] = { Map("path" -> meta.attribs.path).asJava } def connectOffsetToFileMetas(path:String, o:AnyRef): FileMetaData = { val jm = o.asInstanceOf[java.util.Map[String, AnyRef]] FileMetaData( FileAttributes( path, jm.get("size").asInstanceOf[Long], Instant.ofEpochMilli(jm.get("timestamp").asInstanceOf[Long]) ), jm.get("hash").asInstanceOf[String], Instant.ofEpochMilli(jm.get("firstfetched").asInstanceOf[Long]), Instant.ofEpochMilli(jm.get("lastmodified").asInstanceOf[Long]), Instant.ofEpochMilli(jm.get("lastinspected").asInstanceOf[Long]), jm.asScala.getOrElse("offset", -1L).asInstanceOf[Long] ) } def fileMetasToConnectOffset(meta: FileMetaData): util.Map[String, Any] = { Map("size" -> meta.attribs.size, "timestamp" -> meta.attribs.timestamp.toEpochMilli, "hash" -> meta.hash, "firstfetched" -> meta.firstFetched.toEpochMilli, "lastmodified" -> meta.lastModified.toEpochMilli, "lastinspected" -> meta.lastInspected.toEpochMilli, "offset" -> meta.offset ).asJava } }
Example 188
Source File: TypeVar.scala From lift with MIT License | 5 votes |
package arithmetic import lift.arithmetic._ import ir._ import ir.ast.Expr import scala.collection.{immutable, mutable} import scala.language.implicitConversions class TypeVar private(range : Range, fixedId: Option[Long] = None) extends ExtensibleVar("", range, fixedId) { override def copy(r: Range) = new TypeVar(r, Some(id)) override def cloneSimplified() = new TypeVar(range, Some(id)) with SimplifiedExpr override def visitAndRebuild(f: (ArithExpr) => ArithExpr): ArithExpr = f(new TypeVar(range.visitAndRebuild(f), Some(id))) override lazy val toString = "tv_" + name + "_" + id } object TypeVar { def apply(range : Range = RangeUnknown) = { new TypeVar(range) } def getTypeVars(expr: Expr) : Set[TypeVar] = { Expr.visitWithState(immutable.HashSet[TypeVar]())(expr, (inExpr, set) => set ++ getTypeVars(inExpr.t)) } def getTypeVars(t: Type) : Set[TypeVar] = { val result = new mutable.HashSet[TypeVar]() Type.visit(t, (ae:ArithExpr) => result ++= getTypeVars(ae) : Unit ) result.toSet } def getTypeVars(expr: ArithExpr) : Set[TypeVar] = { val typeVars = scala.collection.mutable.HashSet[TypeVar]() ArithExpr.visit(expr, { case tv: TypeVar => typeVars += tv case _ => }) typeVars.toSet } }
Example 189
Source File: ConfigureDiskAction.scala From berilia with Apache License 2.0 | 5 votes |
package com.criteo.dev.cluster.aws import com.criteo.dev.cluster.command.SshAction import com.criteo.dev.cluster.{command, _} import com.criteo.dev.cluster.config.AWSConfig import org.slf4j.LoggerFactory import scala.collection.mutable import scala.concurrent.duration.Duration import scala.concurrent.{Await, Future} import scala.concurrent.ExecutionContext.Implicits.global def configureDisk(node: Node) : List[String] = { val result = SshAction(node, "lsblk", returnResult = true).stripLineEnd logger.info(s"Block information on ${node.ip}:") val lines = result.split("\n").map(_.trim) require(lines(0).trim.split("\\s+")(6).equalsIgnoreCase("MOUNTPOINT"), s"Mount point not in expected position in lsblk output: ${lines(0)}") //this is a bit delicate, but assuming the unmounted ones are at the end, //then we will take the ones up to the first one that has a mount entry. val toMount = lines.reverse.takeWhile(l => l.split("\\s+").length <= 6).map(l => l.split("\\s+")(0)) val mountCommands = toMount.zipWithIndex.flatMap { case (tm, i) => List( s"sudo echo -e 'o\\nn\\np\\n1\\n\\n\\nw' | sudo fdisk /dev/$tm", // create one partition (n, p, 1, default start, default end of sector) s"sudo /sbin/mkfs.ext4 /dev/${tm}1", // make fs s"sudo mkdir -p /${GeneralConstants.data}/$i", s"sudo mount /dev/${tm}1 /${GeneralConstants.data}/$i" // mount ) }.toList command.SshMultiAction(node, mountCommands) 0.to(toMount.length - 1).map(i => s"/${GeneralConstants.data}/$i").toList } }
Example 190
Source File: CopyJarAwsCliAction.scala From berilia with Apache License 2.0 | 5 votes |
package com.criteo.dev.cluster.aws import java.io.File import java.net.{URI, URL} import com.criteo.dev.cluster.config.GlobalConfig import com.criteo.dev.cluster._ import com.criteo.dev.cluster.command.RsyncAction import org.jclouds.compute.domain.NodeMetadata.Status import org.slf4j.LoggerFactory import scala.collection.mutable import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.Duration import scala.concurrent.{Await, Future} import sys.process._ @Public object CopyJarAwsCliAction extends CliAction[Unit] { override def command: String = "copy-jar-aws" override def usageArgs: List[Any] = List("instance.id", "source", "destination") override def help: String = "Copies a file from source to destination path to all nodes of a given cluster (if target directory exists)." private val logger = LoggerFactory.getLogger(CopyJarAwsCliAction.getClass) override def applyInternal(args: List[String], conf: GlobalConfig): Unit = { val instanceId = args(0) val cluster = AwsUtilities.getCluster(conf.backCompat, instanceId) if (!cluster.master.getStatus().equals(Status.RUNNING)) { logger.info("No running clusters found matching criteria.") } val source = args(1) val target = args(2) val sourceUri = new URI(source) val targetFile = new File(target) GeneralUtilities.prepareTempDir val sourceFile = sourceUri.getScheme().toLowerCase() match { case "http" => { val path = s"${GeneralUtilities.getTempDir()}/${targetFile.getName}" DevClusterProcess.process(s"curl -o $path $source").!! path } //only localhost supported case "file" => sourceUri.getPath() case _ => throw new IllegalArgumentException("Only http and file supported for sources for now.") } //copy over files in parallel val nodesToCopy = cluster.slaves ++ Set(cluster.master) logger.info(s"Copying to ${nodesToCopy.size} nodes in parallel.") val copyFutures = nodesToCopy.map(u => GeneralUtilities.getFuture { val targetN = NodeFactory.getAwsNode(conf.target.aws, u) val role = if (AwsUtilities.isSlave(u)) "Slave" else "Master" try { RsyncAction( srcPath = sourceFile, targetN = targetN, targetPath = target, sudo = true) s"$role Node ${u.getId()} with ${targetN.ip}: Copy successful." } catch { case e : Exception => s"$role Node ${u.getId()} with ${targetN.ip}: Copy Failed. This is normal if the given directory does not exist on the node." + s" If not expected, check the directory location and try again." } }) val aggCopyFuture = Future.sequence(copyFutures) val result = Await.result(aggCopyFuture, Duration.Inf) result.foreach(r => logger.info(r)) GeneralUtilities.cleanupTempDir } }
Example 191
Source File: MessagingService.scala From korolev with Apache License 2.0 | 5 votes |
package korolev.server.internal.services import korolev.effect.syntax._ import korolev.effect.{Effect, Queue, Reporter, Stream} import korolev.server.Request.RequestHeader import korolev.server.Response import korolev.server.Response.Status import korolev.Qsid import korolev.effect.io.LazyBytes import scala.collection.mutable private[korolev] final class MessagingService[F[_]: Effect](reporter: Reporter, commonService: CommonService[F], sessionsService: SessionsService[F, _, _]) { private val commonGoneResponse = Response( status = Response.Status.Gone, body = LazyBytes.empty[F], headers = commonResponseHeaders ) private def takeTopic(qsid: Qsid) = Effect[F].delay { if (longPollingTopics.contains(qsid)) longPollingTopics(qsid) else throw new Exception(s"There is no long-polling topic matching $qsid") } private def createTopic(qsid: Qsid) = longPollingTopics.synchronized { val topic = Queue[F, String]() longPollingTopics.put(qsid, topic) topic.stream } }
Example 192
Source File: RemoteDomChangesPerformer.scala From korolev with Apache License 2.0 | 5 votes |
package korolev.internal import korolev.internal.Frontend.ModifyDomProcedure import levsha.Id import levsha.impl.DiffRenderContext.ChangesPerformer import scala.collection.mutable private[korolev] class RemoteDomChangesPerformer extends ChangesPerformer { val buffer: mutable.ArrayBuffer[Any] = mutable.ArrayBuffer.empty[Any] def remove(id: Id): Unit = { buffer += ModifyDomProcedure.Remove.code buffer += id.parent.get.mkString buffer += id.mkString } def createText(id: Id, text: String): Unit = { buffer += ModifyDomProcedure.CreateText.code buffer += id.parent.get.mkString buffer += id.mkString buffer += text } def create(id: Id, xmlNs: String, tag: String): Unit = { val parent = id.parent.fold("0")(_.mkString) val pXmlns = if (xmlNs eq levsha.XmlNs.html.uri) 0 else xmlNs buffer += ModifyDomProcedure.Create.code buffer += parent buffer += id.mkString buffer += pXmlns buffer += tag } def removeStyle(id: Id, name: String): Unit = { buffer += ModifyDomProcedure.RemoveStyle.code buffer += id.mkString buffer += name } def setStyle(id: Id, name: String, value: String): Unit = { buffer += ModifyDomProcedure.SetStyle.code buffer += id.mkString buffer += name buffer += value } def setAttr(id: Id, xmlNs: String, name: String, value: String): Unit = { val pXmlns = if (xmlNs eq levsha.XmlNs.html.uri) 0 else xmlNs buffer += ModifyDomProcedure.SetAttr.code buffer += id.mkString buffer += pXmlns buffer += name buffer += value buffer += false } def removeAttr(id: Id, xmlNs: String, name: String): Unit = { val pXmlns = if (xmlNs eq levsha.XmlNs.html.uri) 0 else xmlNs buffer += ModifyDomProcedure.RemoveAttr.code buffer += id.mkString buffer += pXmlns buffer += name buffer += false } }
Example 193
Source File: AsyncTable.scala From korolev with Apache License 2.0 | 5 votes |
package korolev.effect import korolev.effect.AsyncTable.{AlreadyContainsKeyException, RemovedBeforePutException} import scala.collection.mutable import korolev.effect.Effect.Promise final class AsyncTable[F[_]: Effect, K, V](elems: Seq[(K, V)]) { private type Callbacks = List[Promise[V]] private type Result = Either[Throwable, V] private val table = mutable.Map[K, Either[Callbacks, Result]](elems.map { case (k, v) => (k, Right(Right(v))) }: _*) def get(key: K): F[V] = Effect[F].promise[V] { cb => table.synchronized { table.get(key) match { case Some(Right(value)) => cb(value) case Some(Left(xs)) => table.update(key, Left(cb :: xs)) case None => table.update(key, Left(cb :: Nil)) } } } def put(key: K, value: V): F[Unit] = putEither(key, Right(value)) def fail(key: K, error: Throwable): F[Unit] = putEither(key, Left(error)) def putEither(key: K, errorOrValue: Either[Throwable, V]): F[Unit] = Effect[F].delay { table.synchronized { table.remove(key) match { case Some(Right(_)) => throw AlreadyContainsKeyException(key) case Some(Left(callbacks)) => table.update(key, Right(errorOrValue)) callbacks.foreach(_(errorOrValue)) case None => table.update(key, Right(errorOrValue)) } } } def remove(key: K): F[Unit] = Effect[F].delay { table.synchronized { table.remove(key) match { case Some(Left(callbacks)) => val result = Left(RemovedBeforePutException(key)) callbacks.foreach(_(result)) case _ => () } } } } object AsyncTable { final case class RemovedBeforePutException(key: Any) extends Exception(s"Key $key removed before value was put to table.") final case class AlreadyContainsKeyException(key: Any) extends Exception(s"This table already contains value for $key") def apply[F[_]: Effect, K, V](elems: (K, V)*) = new AsyncTable[F, K, V](elems) def empty[F[_]: Effect, K, V] = new AsyncTable[F, K, V](Nil) }
Example 194
Source File: Queue.scala From korolev with Apache License 2.0 | 5 votes |
package korolev.effect import scala.collection.mutable class Queue[F[_]: Effect, T](maxSize: Int) { def offer(item: T): F[Unit] = Effect[F].delay(offerUnsafe(item)) def offerUnsafe(item: T): Unit = underlyingQueue.synchronized { if (underlyingQueue.size == maxSize) { // Remove head from queue if max size reached underlyingQueue.dequeue() () } if (pending != null) { val cb = pending pending = null cb(Right(Some(item))) } else { underlyingQueue.enqueue(item) () } } def close(): F[Unit] = Effect[F].delay(closeUnsafe()) def closeUnsafe(): Unit = underlyingQueue.synchronized { if (pending != null) { val cb = pending pending = null cb(Right(None)) } closed = true } def fail(e: Throwable): F[Unit] = Effect[F].delay { underlyingQueue.synchronized { error = e if (pending != null) { val cb = pending pending = null cb(Left(e)) } } } private final class QueueStream extends Stream[F, T] { def pull(): F[Option[T]] = Effect[F].promise { cb => underlyingQueue.synchronized { if (error != null) cb(Left(error)) else if (closed) cb(Right(None)) else { if (underlyingQueue.nonEmpty) { val elem = underlyingQueue.dequeue() cb(Right(Some(elem))) } else { pending = cb } } } } def cancel(): F[Unit] = close() } val stream: Stream[F, T] = new QueueStream() @volatile private var closed = false @volatile private var error: Throwable = _ @volatile private var pending: Effect.Promise[Option[T]] = _ private val underlyingQueue: mutable.Queue[T] = mutable.Queue.empty[T] } object Queue { def apply[F[_]: Effect, T](maxSize: Int = Int.MaxValue): Queue[F, T] = new Queue[F, T](maxSize) }
Example 195
Source File: MetabrowseTextModelService.scala From metabrowse with Apache License 2.0 | 5 votes |
package metabrowse import scala.collection.mutable import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future import monaco.Promise import monaco.Uri import monaco.editor.Editor import monaco.editor.ITextModel import monaco.services.IReference import monaco.services.ITextEditorModel import monaco.services.ITextModelService import monaco.services.ImmortalReference import scala.meta.internal.{semanticdb => s} object MetabrowseTextModelService extends ITextModelService { def modelReference( filename: String ): Future[IReference[ITextEditorModel]] = modelDocument(createUri(filename)).map(_.model) // TODO(olafur): Move this state out for easier testing. private val modelDocumentCache = mutable.Map.empty[ITextModel, s.TextDocument] private def document(model: ITextModel) = MetabrowseMonacoDocument( modelDocumentCache(model), new ImmortalReference(ITextEditorModel(model)) ) def modelDocument( resource: Uri ): Future[MetabrowseMonacoDocument] = { val model = Editor.getModel(resource) if (model != null) { Future.successful(document(model)) } else { for { Some(doc) <- MetabrowseFetch.document(resource.path) } yield { val model = Editor.createModel(doc.text, "scala", resource) modelDocumentCache(model) = doc document(model) } } } override def createModelReference( resource: Uri ): Promise[IReference[ITextEditorModel]] = modelDocument(resource).map(_.model).toMonacoPromise }
Example 196
Source File: SortedBatch.scala From Waves with MIT License | 5 votes |
package com.wavesplatform.database import com.wavesplatform.common.state.ByteStr import org.iq80.leveldb.WriteBatch import scala.collection.mutable class SortedBatch extends WriteBatch { val addedEntries: mutable.Map[ByteStr, Array[Byte]] = mutable.TreeMap[ByteStr, Array[Byte]]() val deletedEntries: mutable.Set[ByteStr] = mutable.TreeSet[ByteStr]() override def put(bytes: Array[Byte], bytes1: Array[Byte]): WriteBatch = { val k = ByteStr(bytes) addedEntries.put(k, bytes1) deletedEntries.remove(k) this } override def delete(bytes: Array[Byte]): WriteBatch = { val k = ByteStr(bytes) addedEntries.remove(k) deletedEntries.add(k) this } override def close(): Unit = {} }
Example 197
Source File: ScatterPlot.scala From Scurses with MIT License | 5 votes |
package net.team2xh.onions.components.widgets import net.team2xh.onions.Symbols import net.team2xh.onions.Themes.ColorScheme import net.team2xh.onions.components.{FramePanel, Widget} import net.team2xh.onions.utils.{Drawing, Math, Varying} import net.team2xh.scurses.Scurses import scala.Numeric.Implicits._ import scala.collection.mutable case class ScatterPlot[T: Numeric](parent: FramePanel, values: Varying[Seq[(T, T)]], labelX: String = "", labelY: String = "", color: Int = 81, showLabels: Boolean = true) (implicit screen: Scurses) extends Widget(parent, values) { val gridSize = 4 override def redraw(focus: Boolean, theme: ColorScheme): Unit = { val (xs, ys) = values.value.unzip val maxX = Math.aBitMoreThanMax(xs) val maxY = Math.aBitMoreThanMax(ys) val minX = Math.aBitLessThanMin(xs) val minY = Math.aBitLessThanMin(ys) val valuesLength = maxY.toString.length max minY.toString.length val x0 = valuesLength + (if (showLabels) 2 else 0) val graphWidth = (if (showLabels) innerWidth - 3 else innerWidth - 1) - valuesLength val graphHeight = if (showLabels) innerHeight - 3 else innerHeight - 2 // Draw grid Drawing.drawGrid(x0, 0, graphWidth, graphHeight, gridSize, theme.accent1, theme.background, showVertical = true, showHorizontal = true) // Draw axis values Drawing.drawAxisValues(x0 - valuesLength, 0, graphHeight, gridSize, minY, maxY, theme.accent3, theme.background, horizontal = false) Drawing.drawAxisValues(x0, graphHeight + 1, graphWidth, gridSize, minX, maxX, theme.accent3, theme.background) // Draw labels if (showLabels) { Drawing.drawAxisLabels(x0, graphWidth, graphHeight, labelX, labelY, theme) } // Prepare values (we use half vertical resolution) val points = mutable.ArrayDeque.fill[Int](graphWidth+1, graphHeight+1)(0) val charHeight = (maxY - minY).toDouble / graphHeight for (value <- values.value) { val nx = math.round((graphWidth * (value._1.toDouble - minX)) / (maxX - minX)).toInt val ny = graphHeight - math.round((graphHeight * (value._2.toDouble - minY)) / (maxY - minY)).toInt val point = points(nx)(ny) val isLower = if ((math.round(value._2.toDouble).toInt % charHeight) < (charHeight / 2.0)) 1 else 2 points(nx).update(ny, point | isLower) } // Plot values for (x <- 0 to graphWidth; y <- 0 to graphHeight) { val point = points(x)(y) val symbol = point match { case 0 => "" case 1 => Symbols.BLOCK_UPPER case 2 => Symbols.BLOCK_LOWER case 3 => Symbols.BLOCK } if (point != 0) screen.put(x0 + x, y, symbol, foreground = color, background = theme.background) } } override def handleKeypress(keypress: Int): Unit = { } override def focusable: Boolean = false override def innerHeight: Int = parent.innerHeight - 3 }
Example 198
Source File: SubdocLookupAccessor.scala From couchbase-spark-connector with Apache License 2.0 | 5 votes |
package com.couchbase.spark.connection import java.util.concurrent.TimeUnit import com.couchbase.client.core.BackpressureException import com.couchbase.client.core.time.Delay import com.couchbase.client.java.error.{CouchbaseOutOfMemoryException, TemporaryFailureException} import com.couchbase.client.java.util.retry.RetryBuilder import com.couchbase.spark.internal.LazyIterator import rx.lang.scala.JavaConversions._ import rx.lang.scala.Observable import scala.collection.mutable import scala.concurrent.duration.Duration case class SubdocLookupSpec(id: String, get: Seq[String], exists: Seq[String]) case class SubdocLookupResult(id: String, cas: Long, content: Map[String, Any], exists: Map[String, Boolean]) class SubdocLookupAccessor(cbConfig: CouchbaseConfig, specs: Seq[SubdocLookupSpec], bucketName: String = null, timeout: Option[Duration]) { def compute(): Iterator[SubdocLookupResult] = { if (specs.isEmpty) { return Iterator[SubdocLookupResult]() } val bucket = CouchbaseConnection().bucket(cbConfig, bucketName).async() val maxDelay = cbConfig.retryOpts.maxDelay val minDelay = cbConfig.retryOpts.minDelay val maxRetries = cbConfig.retryOpts.maxTries val kvTimeout = timeout .map(_.toMillis) .orElse(cbConfig.timeouts.kv) .getOrElse(bucket.environment().kvTimeout()) LazyIterator { Observable .from(specs) .flatMap(spec => { var builder = bucket.lookupIn(spec.id) spec.exists.foreach(builder.exists(_)) spec.get.foreach(builder.get(_)) toScalaObservable(builder.execute().timeout(kvTimeout, TimeUnit.MILLISECONDS) ).map(fragment => { val content = mutable.Map[String, Any]() spec.get.foreach(path => content.put(path, fragment.content(path))) val exists = mutable.Map[String, Boolean]() spec.exists.foreach(path => exists.put(path, fragment.status(path).isSuccess)) SubdocLookupResult(spec.id, fragment.cas(), content.toMap, exists.toMap) }).retryWhen( RetryBuilder .anyOf(classOf[TemporaryFailureException], classOf[BackpressureException], classOf[CouchbaseOutOfMemoryException]) .delay(Delay.exponential(TimeUnit.MILLISECONDS, maxDelay, minDelay)) .max(maxRetries) .build()) }) .toBlocking .toIterable .iterator } } }
Example 199
Source File: WorkerInfo.scala From aloha with Apache License 2.0 | 5 votes |
package me.jrwang.aloha.scheduler.master import scala.collection.mutable import me.jrwang.aloha.common.util.Utils import me.jrwang.aloha.rpc.RpcEndpointRef private[aloha] class WorkerInfo( val id: String, val host: String, val port: Int, val cores: Int, val memory: Int, val endpoint: RpcEndpointRef) extends Serializable { Utils.checkHost(host) assert (port > 0) @transient var apps: mutable.HashMap[String, ApplicationInfo] = _ // driverId => info @transient var state: WorkerState.Value = _ @transient var coresUsed: Int = _ @transient var memoryUsed: Int = _ @transient var lastHeartBeat: Long = _ init() def coresFree: Int = cores - coresUsed def memoryFree: Int = memory - memoryUsed private def readObject(in: java.io.ObjectInputStream): Unit = Utils.tryOrIOException { in.defaultReadObject() init() } private def init() { apps = new mutable.HashMap() state = WorkerState.ALIVE coresUsed = 0 memoryUsed = 0 lastHeartBeat = System.currentTimeMillis() } def hostPort: String = { assert (port > 0) host + ":" + port } def addApplication(app: ApplicationInfo) { apps(app.id) = app memoryUsed += app.desc.memory coresUsed += app.desc.cores } def removeApplication(app: ApplicationInfo) { apps -= app.id memoryUsed -= app.desc.memory coresUsed -= app.desc.cores } def setState(state: WorkerState.Value): Unit = { this.state = state } def isAlive(): Boolean = this.state == WorkerState.ALIVE }
Example 200
Source File: ConfigReader.scala From aloha with Apache License 2.0 | 5 votes |
package me.jrwang.aloha.common.config import java.util.{Map => JMap} import scala.collection.mutable import scala.util.matching.Regex private object ConfigReader { private val REF_REGEX = "\\$\\{(?:(\\w+?):)?(\\S+?)\\}".r } private def getOrDefault(conf: ConfigProvider, key: String): Option[String] = { conf.get(key).orElse { ConfigEntry.findEntry(key) match { case e: ConfigEntryWithDefault[_] => Option(e.defaultValueString) case e: ConfigEntryWithDefaultString[_] => Option(e.defaultValueString) case e: ConfigEntryWithDefaultFunction[_] => Option(e.defaultValueString) case e: FallbackConfigEntry[_] => getOrDefault(conf, e.fallback.key) case _ => None } } } }