scala.util.Random Scala Examples
The following examples show how to use scala.util.Random.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: MockHttpServer.scala From cornichon with Apache License 2.0 | 6 votes |
package com.github.agourlay.cornichon.http.server import java.net.NetworkInterface import com.github.agourlay.cornichon.core.CornichonError import monix.eval.Task import monix.execution.Scheduler import org.http4s.HttpRoutes import org.http4s.server.Router import org.http4s.server.blaze.BlazeServerBuilder import org.http4s.implicits._ import scala.jdk.CollectionConverters._ import scala.concurrent.duration._ import scala.util.Random class MockHttpServer[A](interface: Option[String], port: Option[Range], mockService: HttpRoutes[Task], maxRetries: Int = 5)(useFromAddress: String => Task[A])(implicit scheduler: Scheduler) { private val selectedInterface = interface.getOrElse(bestInterface()) private val randomPortOrder = port.fold(0 :: Nil)(r => Random.shuffle(r.toList)) private val mockRouter = Router("/" -> mockService).orNotFound def useServer(): Task[A] = if (randomPortOrder.isEmpty) Task.raiseError(MockHttpServerError.toException) else startServerTryPorts(randomPortOrder) private def startServerTryPorts(ports: List[Int], retry: Int = 0): Task[A] = startBlazeServer(ports.head).onErrorHandleWith { case _: java.net.BindException if ports.length > 1 => startServerTryPorts(ports.tail, retry) case _: java.net.BindException if retry < maxRetries => val sleepFor = retry + 1 println(s"Could not start server on any port. Retrying in $sleepFor seconds...") startServerTryPorts(randomPortOrder, retry = retry + 1).delayExecution(sleepFor.seconds) } private def startBlazeServer(port: Int): Task[A] = BlazeServerBuilder[Task](executionContext = scheduler) .bindHttp(port, selectedInterface) .withoutBanner .withHttpApp(mockRouter) .withNio2(true) .resource .use(server => useFromAddress(s"http://${server.address.getHostString}:${server.address.getPort}")) private def bestInterface(): String = NetworkInterface.getNetworkInterfaces.asScala .filter(_.isUp) .flatMap(_.getInetAddresses.asScala) .find(_.isSiteLocalAddress) .map(_.getHostAddress) .getOrElse("localhost") } case object MockHttpServerError extends CornichonError { val baseErrorMessage = "the range of ports provided for the HTTP mock is invalid" }
Example 2
Source File: SamplingUtilsSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.util.random import scala.util.Random import org.apache.commons.math3.distribution.{BinomialDistribution, PoissonDistribution} import org.apache.spark.SparkFunSuite class SamplingUtilsSuite extends SparkFunSuite { test("reservoirSampleAndCount") { val input = Seq.fill(100)(Random.nextInt()) // input size < k val (sample1, count1) = SamplingUtils.reservoirSampleAndCount(input.iterator, 150) assert(count1 === 100) assert(input === sample1.toSeq) // input size == k val (sample2, count2) = SamplingUtils.reservoirSampleAndCount(input.iterator, 100) assert(count2 === 100) assert(input === sample2.toSeq) // input size > k val (sample3, count3) = SamplingUtils.reservoirSampleAndCount(input.iterator, 10) assert(count3 === 100) assert(sample3.length === 10) } //计算分数 test("computeFraction") { // test that the computed fraction guarantees enough data points // in the sample with a failure rate <= 0.0001 //测试计算的分数保证样品中足够的数据点,故障率<= 0.0001 val n = 100000 for (s <- 1 to 15) { val frac = SamplingUtils.computeFractionForSampleSize(s, n, true) val poisson = new PoissonDistribution(frac * n) assert(poisson.inverseCumulativeProbability(0.0001) >= s, "Computed fraction is too low") } for (s <- List(20, 100, 1000)) { val frac = SamplingUtils.computeFractionForSampleSize(s, n, true) val poisson = new PoissonDistribution(frac * n) assert(poisson.inverseCumulativeProbability(0.0001) >= s, "Computed fraction is too low") } for (s <- List(1, 10, 100, 1000)) { val frac = SamplingUtils.computeFractionForSampleSize(s, n, false) val binomial = new BinomialDistribution(n, frac) assert(binomial.inverseCumulativeProbability(0.0001)*n >= s, "Computed fraction is too low") } } }
Example 3
Source File: VectorSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.util import scala.util.Random import org.apache.spark.SparkFunSuite @deprecated("suppress compile time deprecation warning", "1.0.0") class VectorSuite extends SparkFunSuite { def verifyVector(vector: Vector, expectedLength: Int): Unit = { assert(vector.length == expectedLength) assert(vector.elements.min > 0.0) assert(vector.elements.max < 1.0) } test("random with default random number generator") {//默认随机数产生器 val vector100 = Vector.random(100) verifyVector(vector100, 100) } test("random with given random number generator") {//随机给定值随机产生器 val vector100 = Vector.random(100, new Random(100)) verifyVector(vector100, 100) } }
Example 4
Source File: ByteArrayChunkOutputStreamSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.util.io import scala.util.Random import org.apache.spark.SparkFunSuite class ByteArrayChunkOutputStreamSuite extends SparkFunSuite { test("empty output") {//空的输出 val o = new ByteArrayChunkOutputStream(1024) assert(o.toArrays.length === 0) } test("write a single byte") {//写单字节 val o = new ByteArrayChunkOutputStream(1024) o.write(10) assert(o.toArrays.length === 1) assert(o.toArrays.head.toSeq === Seq(10.toByte)) } test("write a single near boundary") {//写一个单近边界 val o = new ByteArrayChunkOutputStream(10) o.write(new Array[Byte](9)) o.write(99) assert(o.toArrays.length === 1) assert(o.toArrays.head(9) === 99.toByte) } test("write a single at boundary") {//写一个单一的边界 val o = new ByteArrayChunkOutputStream(10) o.write(new Array[Byte](10)) o.write(99) assert(o.toArrays.length === 2) assert(o.toArrays(1).length === 1) assert(o.toArrays(1)(0) === 99.toByte) } test("single chunk output") {//单块输出 val ref = new Array[Byte](8) Random.nextBytes(ref) val o = new ByteArrayChunkOutputStream(10) o.write(ref) val arrays = o.toArrays assert(arrays.length === 1) assert(arrays.head.length === ref.length) assert(arrays.head.toSeq === ref.toSeq) } test("single chunk output at boundary size") {//边界大小的单块输出 val ref = new Array[Byte](10) Random.nextBytes(ref) val o = new ByteArrayChunkOutputStream(10) o.write(ref) val arrays = o.toArrays assert(arrays.length === 1) assert(arrays.head.length === ref.length) assert(arrays.head.toSeq === ref.toSeq) } test("multiple chunk output") {//多块输出 val ref = new Array[Byte](26) Random.nextBytes(ref) val o = new ByteArrayChunkOutputStream(10) o.write(ref) val arrays = o.toArrays assert(arrays.length === 3) assert(arrays(0).length === 10) assert(arrays(1).length === 10) assert(arrays(2).length === 6) assert(arrays(0).toSeq === ref.slice(0, 10)) assert(arrays(1).toSeq === ref.slice(10, 20)) assert(arrays(2).toSeq === ref.slice(20, 26)) } test("multiple chunk output at boundary size") {//边界大小的多块输出 val ref = new Array[Byte](30) Random.nextBytes(ref) val o = new ByteArrayChunkOutputStream(10) o.write(ref) val arrays = o.toArrays assert(arrays.length === 3) assert(arrays(0).length === 10) assert(arrays(1).length === 10) assert(arrays(2).length === 10) assert(arrays(0).toSeq === ref.slice(0, 10)) assert(arrays(1).toSeq === ref.slice(10, 20)) assert(arrays(2).toSeq === ref.slice(20, 30)) } }
Example 5
Source File: Employee.scala From hazelcast-scala with Apache License 2.0 | 5 votes |
package joe.schmoe import java.util.UUID import scala.util.Random case class Employee(id: UUID, name: String, salary: Int, age: Int, active: Boolean) object Employee { def random: Employee = { val name = randomString(20) val salary = Random.nextInt(480000) + 20001 val age = Random.nextInt(60) + 20 val active = Random.nextInt(20) == 0 new Employee(UUID.randomUUID, name, salary, age, active) } }
Example 6
Source File: ClusterSetup.scala From hazelcast-scala with Apache License 2.0 | 5 votes |
package joe.schmoe import java.util.UUID import com.hazelcast.Scala._ import com.hazelcast.Scala.client._ import com.hazelcast.client.config.ClientConfig import com.hazelcast.config.Config import com.hazelcast.core.HazelcastInstance import com.hazelcast.instance.HazelcastInstanceFactory import scala.concurrent.ExecutionContext import scala.concurrent.duration._ import scala.util.Random trait ClusterSetup { def randName: String = randomString(50) implicit def ec = ExecutionContext.global private[this] var _hzs: Vector[HazelcastInstance] = _ implicit def hzs = _hzs private[this] var _client: HazelcastInstance = _ def client: HazelcastInstance = _client def member: HazelcastInstance = hzs(0) def clusterSize = 3 final val port = 49152 + Random.nextInt(9999) final val memberConfig = new Config final val clientConfig = new ClientConfig def init(): Unit def destroy(): Unit def beforeClass(): Unit = { init() val group = UUID.randomUUID.toString val passw = UUID.randomUUID.toString memberConfig.getNetworkConfig.getJoin.getMulticastConfig.setEnabled(false) memberConfig.getNetworkConfig.getJoin.getTcpIpConfig.setEnabled(true).addMember(s"127.0.0.1:$port") memberConfig.getGroupConfig.setName(group).setPassword(passw) memberConfig.setGracefulShutdownMaxWait(1.second) memberConfig.setPhoneHomeEnabled(false) memberConfig.getMapConfig("default") .setStatisticsEnabled(false) .setMaxSizeConfig(UsedHeapSize(60.gigabytes)) memberConfig.setShutdownHookEnabled(false) _hzs = (0 until clusterSize).map { i => memberConfig.getNetworkConfig.setPort(port + i) memberConfig.newInstance }.toVector clientConfig.getGroupConfig.setName(group).setPassword(passw) (0 until clusterSize).foldLeft(clientConfig.getNetworkConfig) { case (netConf, i) => netConf.addAddress(s"127.0.0.1:${port+i}") } clientConfig.getNetworkConfig.setConnectionAttemptLimit(100) _client = clientConfig.newClient() } def afterClass(): Unit = { destroy() _client.shutdown() HazelcastInstanceFactory.terminateAll() } def timed[T](warmups: Int = 0, unit: TimeUnit = MILLISECONDS)(thunk: => T): (T, Long) = { (0 until warmups).foreach(_ => thunk) val start = System.nanoTime thunk -> unit.convert(System.nanoTime - start, NANOSECONDS) } }
Example 7
Source File: package.scala From hazelcast-scala with Apache License 2.0 | 5 votes |
package joe import scala.util.Random import concurrent._ import concurrent.duration._ package object schmoe { def randomString(maxLen: Int = 10): String = { val minLen = 3 val len = Random.nextInt(maxLen - minLen) + minLen val chars = new Array[Char](len) for (i <- 0 until len) { chars(i) = Random.nextPrintableChar() } new String(chars) } implicit class TestFuture[T](private val f: Future[T]) extends AnyVal { def await: T = this.await() def await(dur: FiniteDuration = 30.seconds): T = Await.result(f, dur) } }
Example 8
Source File: Main.scala From akka-viz with MIT License | 5 votes |
import akka.actor.{Actor, ActorSystem, Props} import ask.AskDemo import fsm.DiningHakkersOnFsm import postoffice.PostOffice import restartDemo.RestartDemo import roulette.RussianRoulette import spray.SprayDemo import tree.TreeDemo import scala.util.Random object Main extends App { DiningHakkersOnFsm.run(ActorSystem("fsm")) PostOffice.run(ActorSystem("postoffice")) SprayDemo.run(ActorSystem("spray")) TreeDemo.run(ActorSystem("tree")) new RussianRoulette(5).run(ActorSystem("russianroulette")) AskDemo.run(ActorSystem("ask")) RestartDemo.run(ActorSystem("restartdemo")) val system = ActorSystem("smalldemos") val lazyActorProps = Props(new Actor { var counter = 0 override def receive: Receive = { case msg => Thread.sleep(Random.nextInt(2000)) counter += 1 sender() ! msg } }) val lazyActor1 = system.actorOf(lazyActorProps, "lazy1") val lazyActor2 = system.actorOf(lazyActorProps, "lazy2") for (i <- 0 to 1000) { lazyActor1.tell("doit", lazyActor2) } }
Example 9
Source File: actors.scala From akka-viz with MIT License | 5 votes |
package postoffice import java.time.LocalDateTime import akka.actor._ import scala.util.Random class PostOfficeActor(val postOffice: PostOffice) extends Actor with ActorLogging { import PostOffice._ val myClient = context.actorOf(Props(classOf[PostOfficeClientActor]), "client") myClient ! postOffice.city override def receive: Receive = { case p @ Parcel(src, dest, weight) if src == postOffice.city => Thread.sleep(randomDelay) if (weight > WeightLimit) sender() ! Rejected(LocalDateTime.now(), p) else { sender() ! Pickup(LocalDateTime.now(), p) nextOffice(route(src -> dest)) ! p } case p @ Parcel(src, dest, _) if dest == postOffice.city => myClient ! Delivery(LocalDateTime.now(), p) case p @ Parcel(_, dest, _) => Thread.sleep(randomDelay) if (!lostPackage) nextOffice(route(postOffice.city -> dest)) ! p } def nextOffice(route: List[City]): ActorSelection = { val nextCity = route.dropWhile(_ != postOffice.city).drop(1).head val selection: ActorSelection = context.system.actorSelection(s"/user/$nextCity") selection } def lostPackage = Random.nextGaussian() < 0.002 } class PostOfficeClientActor extends Actor with ActorLogging { import PostOffice._ import scala.concurrent.duration._ var city: Option[City] = None override def receive: Actor.Receive = { case c: City => city = Some(c) sendPackage context.become(packageReply) } def packageReply: Actor.Receive = { case Pickup(_, p) => log.debug(s"Sent parcel $p") case Rejected(_, p) => log.debug(s"$p rejected, trying again") sender() ! p.copy(weight = p.weight - 0.02) case d: Delivery => log.debug(s"received $d") sendPackage } def sendPackage = { import context.dispatcher context.system.scheduler.scheduleOnce( randomDelay.milliseconds, context.parent, Parcel(city.get, Random.shuffle(Cities.filterNot(_ == city)).head, Random.nextDouble() * (WeightLimit + 0.10)) ) } }
Example 10
Source File: Player.scala From akka-viz with MIT License | 5 votes |
package roulette import akka.actor._ import scala.util.Random class Player extends Actor with ActorLogging { var nextGuy: ActorRef = _ context.become(playerBehaviour) override def receive = { case _ => ??? } def playerBehaviour: Receive = { case Next(ref) => nextGuy = ref case Revolver(0) => Thread.sleep(2000) if (sender() != nextGuy) { sender() ! Next(nextGuy) nextGuy.tell(Revolver(Random.nextInt(6)), sender()) } self ! Kill case Revolver(x) => Thread.sleep(2000) nextGuy ! Revolver(x - 1) nextGuy ! "Unhandled message" } }
Example 11
Source File: RussianRoulette.scala From akka-viz with MIT License | 5 votes |
package roulette import akka.actor.{ActorSystem, Props} import scala.util.Random class RussianRoulette(playersNo: Int) { def run(system: ActorSystem): Unit = { val players = Vector.fill(playersNo)(system.actorOf(Props[Player])) for (x <- 0 until playersNo) { players(x).tell(Next(players((x + 1) % playersNo)), players(x)) } val firstGuyId = Random.nextInt(playersNo) val previousGuyId = (firstGuyId - 1 + playersNo) % playersNo val firstGuy = players(firstGuyId) val previousGuy = players(previousGuyId) firstGuy.tell(Revolver(Random.nextInt(6)), previousGuy) } }
Example 12
Source File: restartDemo.scala From akka-viz with MIT License | 5 votes |
package restartDemo import akka.actor._ import scala.util.Random object RestartDemo { def run(system: ActorSystem): Unit = { system.actorOf(Props[DangerZoneParent], "dangerZoneParent") ! DoIt } } class DangerZoneParent extends Actor { override def supervisorStrategy = OneForOneStrategy(){ case e: Exception => SupervisorStrategy.Restart } override def receive: Receive = { case DoIt => context.actorOf(Props[DangerZoneActor], "dangerZone") } } class DangerZoneActor extends Actor with ActorLogging { import scala.concurrent.duration._ import context.dispatcher var cancellable: Option[Cancellable] = None override def preRestart(reason: Throwable, message: Option[Any]): Unit = { cancellable.foreach(_.cancel()) super.preRestart(reason, message) } override def preStart(): Unit = { cancellable = Some(scheduleRideToTheDangerZone) } override def receive: Receive = { case DangerZone => if(Random.nextBoolean()) rideIntoTheDangerZone } def rideIntoTheDangerZone: Unit = throw new RuntimeException("the danger zone was too dangerous") def scheduleRideToTheDangerZone: Cancellable = context.system.scheduler.schedule(10.seconds , 20.seconds, self, DangerZone) } case object DangerZone case object DoIt
Example 13
Source File: MathSteps.scala From cornichon with Apache License 2.0 | 5 votes |
package com.github.agourlay.cornichon.framework.examples.math import com.github.agourlay.cornichon.CornichonFeature import com.github.agourlay.cornichon.core.Step import com.github.agourlay.cornichon.steps.regular.assertStep._ import com.github.agourlay.cornichon.steps.cats.EffectStep import scala.util.Random trait MathSteps { this: CornichonFeature => case class adding_values(arg1: String, arg2: String) { def equals(res: Int) = AssertStep( title = s"value of $arg1 + $arg2 should be $res", action = sc => Assertion.either { for { v1 <- sc.session.get(arg1).map(_.toInt) v2 <- sc.session.get(arg2).map(_.toInt) } yield GenericEqualityAssertion(res, v1 + v2) } ) } def generate_random_int(target: String, max: Int = 10): Step = EffectStep.fromSyncE( title = s"generate random Int into '$target' (max=$max)", effect = _.session.addValue(target, Random.nextInt(max).toString) ) def generate_random_double(target: String): Step = EffectStep.fromSyncE( title = s"generate random Double into '$target'", effect = _.session.addValue(target, Random.nextDouble().toString) ) case class double_value(source: String) { def isBetween(low: Double, high: Double) = AssertStep( title = s"double value of '$source' is between '$low' and '$high'", action = sc => Assertion.either { sc.session.get(source).map(v => BetweenAssertion(low, v.toDouble, high)) } ) } def calculate_point_in_circle(target: String): Step = EffectStep.fromSyncE( title = s"calculate points inside circle", effect = sc => { for { x <- sc.session.get("x").map(_.toDouble) y <- sc.session.get("y").map(_.toDouble) inside = Math.sqrt(x * x + y * y) <= 1 ns <- sc.session.addValue(target, if (inside) "1" else "0") } yield ns } ) def estimate_pi_from_ratio(inside: String, target: String): Step = EffectStep.fromSyncE( title = s"estimate PI from ratio into key '$target'", effect = sc => { sc.session.getHistory(inside).flatMap { insides => val trial = insides.size val estimation = (insides.count(_ == "1").toDouble / trial) * 4 sc.session.addValue(target, estimation.toString) } } ) def is_valid_sum: Step = AssertStep( title = "sum of 'a' + 'b' = 'c'", action = sc => { val s = sc.session GenericEqualityAssertion(s.getUnsafe("c").toInt, s.getUnsafe("a").toInt + s.getUnsafe("b").toInt) } ) }
Example 14
Source File: RandomContext.scala From cornichon with Apache License 2.0 | 5 votes |
package com.github.agourlay.cornichon.core import java.util.concurrent.atomic.AtomicLong import scala.util.Random trait RandomContext { val initialSeed: Long def nextBoolean(): Boolean def nextDouble(): Double def nextFloat(): Float def nextGaussian(): Double def nextInt(): Int def nextInt(n: Int): Int def nextLong(): Long def uniqueLong(): Long def nextString(length: Int): String def nextPrintableChar(): Char def alphanumeric(length: Int): String def shuffle[T](xs: Iterable[T]): Iterable[T] } // FIXME seededRandom works through internal mutation https://github.com/agourlay/cornichon/issues/303 class MutableRandomContext(seed: Long, seededRandom: Random) extends RandomContext { val initialSeed: Long = seed def nextBoolean(): Boolean = seededRandom.nextBoolean() def nextDouble(): Double = seededRandom.nextDouble() def nextFloat(): Float = seededRandom.nextFloat() def nextGaussian(): Double = seededRandom.nextGaussian() def nextInt(): Int = seededRandom.nextInt() def nextInt(n: Int): Int = seededRandom.nextInt(n) def nextLong(): Long = seededRandom.nextLong() def nextString(length: Int): String = seededRandom.nextString(length) def nextPrintableChar(): Char = seededRandom.nextPrintableChar() def alphanumeric(length: Int): String = seededRandom.alphanumeric.take(length).mkString("") def shuffle[T](xs: Iterable[T]): Iterable[T] = seededRandom.shuffle(xs) private val atomicLong = new AtomicLong(1L) def uniqueLong(): Long = atomicLong.getAndIncrement() } object RandomContext { def fromOptSeed(withSeed: Option[Long]): RandomContext = { val initialSeed = withSeed.getOrElse(System.currentTimeMillis()) fromSeed(initialSeed) } def fromSeed(seed: Long): RandomContext = { new MutableRandomContext(seed, new Random(new java.util.Random(seed))) } }
Example 15
Source File: NetworkRandomization.scala From sparkling-graph with BSD 2-Clause "Simplified" License | 5 votes |
package ml.sparkling.graph.examples import ml.sparkling.graph.api.loaders.GraphLoading.LoadGraph import ml.sparkling.graph.loaders.csv.GraphFromCsv.CSV import ml.sparkling.graph.loaders.csv.GraphFromCsv.LoaderParameters.{Delimiter, NoHeader, Partitions, Quotation} import org.apache.spark.graphx._ import org.apache.spark.rdd.RDD import org.apache.spark.{SparkConf, SparkContext} import scala.util.Random object NetworkRandomization { def main(args:Array[String])= { val sparkConf = new SparkConf().setAppName("network-randomization").set("spark.app.id", "sparkling-graph-example") implicit val ctx = new SparkContext(sparkConf) val path=args(0) val pathEmd=args(1) val out=args(2) val loadPartitions=args(3).toInt val graphPartitions=args(4).toInt val graph:Graph[String,String]=LoadGraph.from(CSV(path)) .using(NoHeader) .using(Delimiter(",")) .using(Partitions(loadPartitions)) .using(Quotation("\"")).load[String,String]().partitionBy(PartitionStrategy.EdgePartition2D,graphPartitions) val emd=ctx.textFile(pathEmd,loadPartitions).map(_.split(",").map(v=>v.replaceAll("\"",""))).map(r=>(r.head.toLong,r.tail)) val srcIdsBase: RDD[VertexId] =graph.edges.map(e=>e.srcId) val dstIdsBase=graph.edges.map(e=>e.dstId) val saltDst = 23456789L ; val saltSrc = 123456789L ; def randomize(srcIds:RDD[VertexId],dstIds:RDD[VertexId])= { val randomizedSrc = srcIds.mapPartitionsWithIndex((id, itr) => { val random = new Random(saltSrc + id) itr.map(vId => (random.nextLong(), vId)) }).sortByKey().zipWithIndex().map(t => (t._2, t._1._2)) val randomizedDst = dstIds.mapPartitionsWithIndex((id, itr) => { val random = new Random(saltDst + id) itr.map(vId => (random.nextLong(), vId)) }).sortByKey().zipWithIndex().map(t => (t._2, t._1._2)) randomizedSrc.join(randomizedDst).map { case (index, (src, dst)) => new Edge[Int](src, dst, 1) } } var numOfSame= -1l var lastNumOfSame= -2l var randomizedEdges=randomize(srcIdsBase,dstIdsBase) var withSame=randomizedEdges.filter(t=> t.srcId == t.dstId) while((numOfSame!=lastNumOfSame) && (!withSame.isEmpty())){ val withoutSame=randomizedEdges.filter(t=>t.srcId!=t.dstId) val newRandomized=randomize(withSame.map(_.srcId),withSame.map(_.dstId)) randomizedEdges=withoutSame.union(newRandomized) withSame=newRandomized.filter(e=>e.srcId==e.dstId) lastNumOfSame=numOfSame numOfSame=withSame.count() } val randomizedGraph= Graph(graph.vertices,randomizedEdges) randomizedGraph.outerJoinVertices(emd)((vId,old,newValue)=>newValue.getOrElse(((0 to 4).map(n=>"0").toArray))).triplets.map( edge=>{ (edge.srcId.toString :: edge.dstId.toString :: edge.srcAttr.toList ::: edge.dstAttr.toList).mkString(",") } ).saveAsTextFile(out) } }
Example 16
Source File: EigenvectorCentrality$Test.scala From sparkling-graph with BSD 2-Clause "Simplified" License | 5 votes |
package ml.sparkling.graph.operators.measures.vertex.eigenvector import ml.sparkling.graph.api.operators.measures.VertexMeasureConfiguration import ml.sparkling.graph.operators.MeasureTest import org.apache.spark.SparkContext import org.apache.spark.graphx.Graph import ml.sparkling.graph.operators.OperatorsDSL._ import scala.util.Random class EigenvectorCentrality$Test(implicit sc:SparkContext) extends MeasureTest { "Eigenvector for line graph" should "be correctly calculated" in{ Given("graph") val filePath = getClass.getResource("/graphs/5_nodes_directed") val graph:Graph[Int,Int]=loadGraph(filePath.toString) When("Computes eigenvector") val result=EigenvectorCentrality.compute(graph) Then("Should calculate eigenvector correctly") result.vertices.collect().sortBy{case (vId,data)=>vId}.map{case (vId,data)=>data}.zip(Array( 0d, 0d, 0d, 0d, 0d )).foreach{case (a,b)=>{a should be (b +- 1e-5 )}} graph.unpersist(true) } "Eigenvector for line graph" should "be correctly calculated using DSL" in{ Given("graph") val filePath = getClass.getResource("/graphs/5_nodes_directed") val graph:Graph[Int,Int]=loadGraph(filePath.toString) When("Computes eigenvector") val result=graph.eigenvectorCentrality() Then("Should calculate eigenvector correctly") result.vertices.collect().sortBy{case (vId,data)=>vId}.map{case (vId,data)=>data}.zip(Array( 0d, 0d, 0d, 0d, 0d )).foreach{case (a,b)=>{a should be (b +- 1e-5 )}} graph.unpersist(true) } "Eigenvector for full 4 node directed graph" should "be correctly calculated" in{ Given("graph") val filePath = getClass.getResource("/graphs/4_nodes_full") val graph:Graph[Int,Int]=loadGraph(filePath.toString) When("Computes eigenvector") val result=EigenvectorCentrality.compute(graph) Then("Should calculate eigenvector correctly") result.vertices.collect().sortBy{case (vId,data)=>vId}.map{case (vId,data)=>data}.zip(Array( 0.32128186442503776, 0.5515795539542094, 0.6256715148839718, 0.44841176915201825 )).foreach{case (a,b)=>{a should be (b +- 1e-5 )}} graph.unpersist(true) } "Eigenvector for full 4 node undirected graph" should "be correctly calculated" in{ Given("graph") val filePath = getClass.getResource("/graphs/4_nodes_full") val graph:Graph[Int,Int]=loadGraph(filePath.toString) When("Computes eigenvector") val result=EigenvectorCentrality.compute(graph,VertexMeasureConfiguration[Int,Int](true)) Then("Should calculate eigenvector correctly") result.vertices.collect().sortBy{case (vId,data)=>vId} should equal (Array( (1,0.5), (2,0.5), (3,0.5), (4,0.5) )) graph.unpersist(true) } "Eigenvector " should " take edge weight into account" in{ Given("graph") val filePath = getClass.getResource("/graphs/4_nodes_full") val graph:Graph[Int,Int]=loadGraph(filePath.toString) val graphWeighted=graph.mapEdges(edge=>{ 1.0/(edge.srcId+edge.dstId) }) When("Computes eigenvector") val resultUnweighted=EigenvectorCentrality.compute(graph,VertexMeasureConfiguration[Int,Int](true)) val resultWeighted=EigenvectorCentrality.compute(graphWeighted,VertexMeasureConfiguration[Int,Double](true)) Then("Should calculate eigenvector correctly") resultUnweighted.vertices.collect().sortBy{case (vId,data)=>vId} should not equal ( resultWeighted.vertices.collect().sortBy{case (vId,data)=>vId}) graph.unpersist(true) resultUnweighted.unpersist(true) resultWeighted.unpersist(true) } }
Example 17
Source File: RandomStateSpec.scala From CSYE7200 with MIT License | 5 votes |
package edu.neu.coe.csye7200.asstrs import org.scalatest.{FlatSpec, Matchers} import scala.language.postfixOps import scala.util.Random class RandomStateSpec extends FlatSpec with Matchers { private def stdDev(xs: Seq[Double]): Double = math.sqrt(xs.reduceLeft((a, x) => a + x * x)) / xs.length private def mean(xs: Seq[Double]) = xs.sum / xs.length // XXX Clearly, this doesn't look good. We will soon learn how to write // generic methods like sum and mean. But for now, this is what we've got. def sumU(xs: Seq[UniformDouble]): Double = xs.foldLeft(0.0)((a, x) => (a + x.x)) def meanU(xs: Seq[UniformDouble]) = sumU(xs) / xs.length "RandomState(0L)" should "match case RandomState(4804307197456638271)" in { val r: RandomState[Long] = RandomState(0L) r.next should matchPattern { case JavaRandomState(4804307197456638271L,_) => } } it should "match case RandomState(-1034601897293430941) on next" in { val r: RandomState[Long] = RandomState(0L) r.next.next should matchPattern { case JavaRandomState(-1034601897293430941L,_) => } // why doesn't the following work? // r.next.next.asInstanceOf[JavaRandomState[Long]].g shouldBe identity // e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 } "7th element of RandomState(0)" should "match case RandomState(5082315122564986995L)" in { val lrs = RandomState(0).toStream.slice(6, 7) (lrs head) should matchPattern { case 5082315122564986995L => } } "longToDouble" should "work" in { val max = RandomState.longToDouble(Long.MaxValue) max shouldBe 1.0 +- 1E-6 val min = RandomState.longToDouble(Long.MinValue) min shouldBe -1.0 +- 1E-6 val value = RandomState.longToDouble(3487594572834985L) value shouldBe 3.7812576126163456E-4 +- 1E-6 } "0..1 stream" should "have mean = 0.5" in { val xs = RandomState(0).map(RandomState.longToDouble).map(RandomState.doubleToUniformDouble).toStream take 1001 toList; meanU(xs) shouldBe 0.5 +- 5E-3 } "BetterRandomState" should "have mean = 0.5" in { val xs = BetterRandomState(0,BetterRandomState.hDouble).toStream take 1001 toList; mean(xs) shouldBe 0.5 +- 5E-3 } "map" should "work" in { val rLong: RandomState[Long] = RandomState(0) val rInt = rLong.map(_.toInt) rInt.get shouldBe -723955400 val next = rInt.next next.get shouldBe 406937919 val next2 = next.next next2.get shouldBe 1407270755 } it should "work with map of map" in { val rLong: RandomState[Long] = RandomState(0L) val rInt = rLong.map(_.toInt) val rBoolean = rInt.map(_ % 2 == 0) rBoolean.get shouldBe true } "flatMap" should "work" in { val r1 = RandomState(0) val r2 = r1.flatMap(RandomState(_)) r2.get shouldBe 4804307197456638271L } "for comprehension" should "work" in { val r1 = RandomState(0) val z: RandomState[Double] = for (x <- r1; _ <- RandomState(x)) yield x.toDouble/Long.MaxValue z.get shouldBe -0.5380644352028887 +- 1E-6 } }
Example 18
Source File: Euler.scala From CSYE7200 with MIT License | 5 votes |
package edu.neu.coe.csye7200.greedy import scala.Stream._ import scala.language.postfixOps import scala.util.Random object Euler extends App { def e(n: Int): Double = (from(1) map (1.0 / _)).scanLeft(1.0)(_ * _) take n sum println(e(20)) } object RandomStrings extends App { val r = Random def randomString(r: Random): String = { (for (i <- 0 until 6) yield r.nextPrintableChar()).foldLeft("")(_+_) } val strings = for (i <- 0 until 5) yield randomString(r) println(strings) }
Example 19
package edu.neu.coe.csye7200.util import scala.util.Random class RNG[+A](f: Long => A)(seed: Long) { private val random = new Random(seed) private lazy val state = random.nextLong def next = new RNG(f)(state) def value = f(state) } object RNG extends App { def modulo(n: Int, m: Int) = (n + m) % m val random = new Random(0L) val r = new RNG(x => modulo(x.toInt, 100))(0L) val someInt: Int = 55 val r1 = r.next val r2 = r.next // ... val rN = r2.next val y = rN.value assert(y == someInt) }
Example 20
Source File: BadRandomSpec.scala From CSYE7200 with MIT License | 5 votes |
package edu.neu.coe.csye7200 import org.scalatest.{FlatSpec, Matchers} import scala.util.Random class BadRandomSpec extends FlatSpec with Matchers { private val random = Random random.setSeed(0L) "random" should "be predictable" in { random.nextInt random.nextInt shouldBe (-723955400) } it should "be order-independent" in { random.nextInt shouldBe 1033096058 } }
Example 21
Source File: CacheSpec.scala From CSYE7200 with MIT License | 5 votes |
package edu.neu.coe.csye7200.cache import java.net.URL import org.scalatest.concurrent.{Futures, ScalaFutures} import org.scalatest.{FlatSpec, Matchers} import scala.concurrent.Future import scala.concurrent.ExecutionContext.Implicits.global import scala.util.{Random, Try} class CacheSpec extends FlatSpec with Matchers with Futures with ScalaFutures { behavior of "apply" val random = Random def lookupStock(k: String): Future[Double] = Future { random.setSeed(k.hashCode) random.nextInt(1000) / 100.0 } it should "work" in { val cache = MyCache[String,Double](lookupStock) val xf: Future[Double] = cache("MSFT") whenReady(xf) { u => u should matchPattern { case x: Double => } } xf.value.get.get shouldBe 3.64 } }
Example 22
Source File: SortingSpec.scala From CSYE7200 with MIT License | 5 votes |
package edu.neu.coe.csye7200.lbsort import edu.neu.coe.csye7200.util.RandomState import org.scalatest.concurrent.{Futures, ScalaFutures} import org.scalatest.{FlatSpec, Matchers} import scala.util.Random class SortingSpec extends FlatSpec with Matchers with Futures with ScalaFutures { behavior of "Insertion Sort" it should "sort List[Int]" in { val list = Array(3, 1, 2) Sorting.insertionSort(list) list shouldBe Array(1, 2, 3) } it should "sort List[String]" in { val list = Array("b", "c", "a") Sorting.insertionSort(list) list shouldBe Array("a", "b", "c") } it should "sort List[Double] using create" in { val list = Array(3.0, 1.5, 2.4) Sorting.insertionSort(list) list shouldBe Array(1.5, 2.4, 3.0) } behavior of "Quick Sort" it should "sort List[Long]" in { val list = RandomState(0L).stream.take(100).toArray Sorting.quickSort(list) list.reverse.take(5) shouldBe Array(9054633673849498218L, 8937230293740383692L, 8613213585075034408L, 8543763135442756639L, 8358116205139703580L) } }
Example 23
Source File: CacheSpec.scala From CSYE7200 with MIT License | 5 votes |
package edu.neu.coe.csye7200.cache import java.net.URL import org.scalatest.concurrent.{Futures, ScalaFutures} import org.scalatest.{FlatSpec, Matchers} import scala.concurrent.Future import scala.concurrent.ExecutionContext.Implicits.global import scala.util.{Random, Try} class CacheSpec extends FlatSpec with Matchers with Futures with ScalaFutures { behavior of "apply" val random = Random def lookupStock(k: String): Future[Double] = Future { random.setSeed(k.hashCode) random.nextInt(1000) / 100.0 } it should "work" in { val cache = MyCache[String,Double](lookupStock) val xf: Future[Double] = cache("MSFT") whenReady(xf) { u => u should matchPattern { case x: Double => } } xf.value.get.get shouldBe 3.64 } }
Example 24
Source File: NonSampleCompactor.scala From deequ with Apache License 2.0 | 5 votes |
package com.amazon.deequ.analyzers import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import scala.util.Random val output = (offset until len by 2).map(sortedBuffer(_)).toArray val tail = findOdd(items) items = items % 2 var newBuffer = ArrayBuffer[T]() if (tail.isDefined) { newBuffer = newBuffer :+ tail.get } buffer = newBuffer numOfCompress = numOfCompress + 1 output } }
Example 25
Source File: MnistLoader.scala From SparkNet with MIT License | 5 votes |
package loaders import java.io._ import scala.util.Random import libs._ class MnistLoader(path: String) { val height = 28 val width = 28 def getImages(filename: String, train: Boolean): Array[Array[Float]] = { val stream = new FileInputStream(path + filename) val numImages = if (train) 60000 else 10000 val images = new Array[Array[Float]](numImages) val magicNumber = new Array[Byte](4) stream.read(magicNumber) assert(magicNumber.deep == Array[Byte](0, 0, 8, 3).deep) val count = new Array[Byte](4) stream.read(count) assert(count.deep == (if (train) Array[Byte](0, 0, -22, 96).deep else Array[Byte](0, 0, 39, 16).deep)) val imHeight = new Array[Byte](4) stream.read(imHeight) assert(imHeight.deep == Array[Byte](0, 0, 0, 28).deep) val imWidth = new Array[Byte](4) stream.read(imWidth) assert(imWidth.deep == Array[Byte](0, 0, 0, 28).deep) var i = 0 val imageBuffer = new Array[Byte](height * width) while (i < numImages) { stream.read(imageBuffer) images(i) = imageBuffer.map(e => (e.toFloat / 255) - 0.5F) i += 1 } images } def getLabels(filename: String, train: Boolean): Array[Long] = { val stream = new FileInputStream(path + filename) val numLabels = if (train) 60000 else 10000 val magicNumber = new Array[Byte](4) stream.read(magicNumber) assert(magicNumber.deep == Array[Byte](0, 0, 8, 1).deep) val count = new Array[Byte](4) stream.read(count) assert(count.deep == (if (train) Array[Byte](0, 0, -22, 96).deep else Array[Byte](0, 0, 39, 16).deep)) val labels = new Array[Byte](numLabels) stream.read(labels) labels.map(e => (e & 0xFF).toLong) } val trainImages = getImages("train-images-idx3-ubyte", true) val trainLabels = getLabels("train-labels-idx1-ubyte", true) val testImages = getImages("t10k-images-idx3-ubyte", false) val testLabels = getLabels("t10k-labels-idx1-ubyte", false) }
Example 26
Source File: CifarLoader.scala From SparkNet with MIT License | 5 votes |
package loaders import java.io.File import java.io.FileInputStream import scala.util.Random import libs._ class CifarLoader(path: String) { // We hardcode this because these are properties of the CIFAR-10 dataset. val height = 32 val width = 32 val channels = 3 val size = channels * height * width val batchSize = 10000 val nBatches = 5 val nData = nBatches * batchSize val trainImages = new Array[Array[Float]](nData) val trainLabels = new Array[Int](nData) val testImages = new Array[Array[Float]](batchSize) val testLabels = new Array[Int](batchSize) val r = new Random() // val perm = Vector() ++ r.shuffle(1 to (nData - 1) toIterable) val indices = Vector() ++ (0 to nData - 1) toIterable val trainPerm = Vector() ++ r.shuffle(indices) val testPerm = Vector() ++ ((0 to batchSize) toIterable) val d = new File(path) if (!d.exists) { throw new Exception("The path " + path + " does not exist.") } if (!d.isDirectory) { throw new Exception("The path " + path + " is not a directory.") } val cifar10Files = List("data_batch_1.bin", "data_batch_2.bin", "data_batch_3.bin", "data_batch_4.bin", "data_batch_5.bin", "test_batch.bin") for (filename <- cifar10Files) { if (!d.list.contains(filename)) { throw new Exception("The directory " + path + " does not contain all of the Cifar10 data. Please run `bash $SPARKNET_HOME/data/cifar10/get_cifar10.sh` to obtain the Cifar10 data.") } } val fullFileList = d.listFiles.filter(_.getName().split('.').last == "bin").toList val testFile = fullFileList.find(x => x.getName().split('/').last == "test_batch.bin").head val fileList = fullFileList diff List(testFile) for (i <- 0 to nBatches - 1) { readBatch(fileList(i), i, trainImages, trainLabels, trainPerm) } readBatch(testFile, 0, testImages, testLabels, testPerm) val meanImage = new Array[Float](size) for (i <- 0 to nData - 1) { for (j <- 0 to size - 1) { meanImage(j) += trainImages(i)(j).toFloat / nData } } def readBatch(file: File, batch: Int, images: Array[Array[Float]], labels: Array[Int], perm: Vector[Int]) { val buffer = new Array[Byte](1 + size) val inputStream = new FileInputStream(file) var i = 0 var nRead = inputStream.read(buffer) while(nRead != -1) { assert(i < batchSize) labels(perm(batch * batchSize + i)) = (buffer(0) & 0xFF) // convert to unsigned images(perm(batch * batchSize + i)) = new Array[Float](size) var j = 0 while (j < size) { // we access buffer(j + 1) because the 0th position holds the label images(perm(batch * batchSize + i))(j) = buffer(j + 1) & 0xFF j += 1 } nRead = inputStream.read(buffer) i += 1 } } }
Example 27
Source File: SparkTC.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import scala.collection.mutable import scala.util.Random import org.apache.spark.sql.SparkSession object SparkTC { val numEdges = 200 val numVertices = 100 val rand = new Random(42) def generateGraph: Seq[(Int, Int)] = { val edges: mutable.Set[(Int, Int)] = mutable.Set.empty while (edges.size < numEdges) { val from = rand.nextInt(numVertices) val to = rand.nextInt(numVertices) if (from != to) edges.+=((from, to)) } edges.toSeq } def main(args: Array[String]) { val spark = SparkSession .builder .appName("SparkTC") .getOrCreate() val slices = if (args.length > 0) args(0).toInt else 2 var tc = spark.sparkContext.parallelize(generateGraph, slices).cache() // Linear transitive closure: each round grows paths by one edge, // by joining the graph's edges with the already-discovered paths. // e.g. join the path (y, z) from the TC with the edge (x, y) from // the graph to obtain the path (x, z). // Because join() joins on keys, the edges are stored in reversed order. val edges = tc.map(x => (x._2, x._1)) // This join is iterated until a fixed point is reached. var oldCount = 0L var nextCount = tc.count() do { oldCount = nextCount // Perform the join, obtaining an RDD of (y, (z, x)) pairs, // then project the result to obtain the new (x, z) paths. tc = tc.union(tc.join(edges).map(x => (x._2._2, x._2._1))).distinct().cache() nextCount = tc.count() } while (nextCount != oldCount) println(s"TC has ${tc.count()} edges.") spark.stop() } } // scalastyle:on println
Example 28
Source File: HadoopUtils.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.image import scala.language.existentials import scala.util.Random import org.apache.commons.io.FilenameUtils import org.apache.hadoop.conf.{Configuration, Configured} import org.apache.hadoop.fs.{Path, PathFilter} import org.apache.hadoop.mapreduce.lib.input.FileInputFormat import org.apache.spark.sql.SparkSession private object RecursiveFlag { def withPathFilter[T]( sampleRatio: Double, spark: SparkSession, seed: Long)(f: => T): T = { val sampleImages = sampleRatio < 1 if (sampleImages) { val flagName = FileInputFormat.PATHFILTER_CLASS val hadoopConf = spark.sparkContext.hadoopConfiguration val old = Option(hadoopConf.getClass(flagName, null)) hadoopConf.setDouble(SamplePathFilter.ratioParam, sampleRatio) hadoopConf.setLong(SamplePathFilter.seedParam, seed) hadoopConf.setClass(flagName, classOf[SamplePathFilter], classOf[PathFilter]) try f finally { hadoopConf.unset(SamplePathFilter.ratioParam) hadoopConf.unset(SamplePathFilter.seedParam) old match { case Some(v) => hadoopConf.setClass(flagName, v, classOf[PathFilter]) case None => hadoopConf.unset(flagName) } } } else { f } } }
Example 29
Source File: KMeansDataGenerator.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.util import scala.util.Random import org.apache.spark.SparkContext import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.rdd.RDD @Since("0.8.0") def generateKMeansRDD( sc: SparkContext, numPoints: Int, k: Int, d: Int, r: Double, numPartitions: Int = 2) : RDD[Array[Double]] = { // First, generate some centers val rand = new Random(42) val centers = Array.fill(k)(Array.fill(d)(rand.nextGaussian() * r)) // Then generate points around each center sc.parallelize(0 until numPoints, numPartitions).map { idx => val center = centers(idx % k) val rand2 = new Random(42 + idx) Array.tabulate(d)(i => center(i) + rand2.nextGaussian()) } } @Since("0.8.0") def main(args: Array[String]) { if (args.length < 6) { // scalastyle:off println println("Usage: KMeansGenerator " + "<master> <output_dir> <num_points> <k> <d> <r> [<num_partitions>]") // scalastyle:on println System.exit(1) } val sparkMaster = args(0) val outputPath = args(1) val numPoints = args(2).toInt val k = args(3).toInt val d = args(4).toInt val r = args(5).toDouble val parts = if (args.length >= 7) args(6).toInt else 2 val sc = new SparkContext(sparkMaster, "KMeansDataGenerator") val data = generateKMeansRDD(sc, numPoints, k, d, r, parts) data.map(_.mkString(" ")).saveAsTextFile(outputPath) sc.stop() System.exit(0) } }
Example 30
Source File: LogisticRegressionDataGenerator.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.util import scala.util.Random import org.apache.spark.SparkContext import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.rdd.RDD @Since("0.8.0") def generateLogisticRDD( sc: SparkContext, nexamples: Int, nfeatures: Int, eps: Double, nparts: Int = 2, probOne: Double = 0.5): RDD[LabeledPoint] = { val data = sc.parallelize(0 until nexamples, nparts).map { idx => val rnd = new Random(42 + idx) val y = if (idx % 2 == 0) 0.0 else 1.0 val x = Array.fill[Double](nfeatures) { rnd.nextGaussian() + (y * eps) } LabeledPoint(y, Vectors.dense(x)) } data } @Since("0.8.0") def main(args: Array[String]) { if (args.length != 5) { // scalastyle:off println println("Usage: LogisticRegressionGenerator " + "<master> <output_dir> <num_examples> <num_features> <num_partitions>") // scalastyle:on println System.exit(1) } val sparkMaster: String = args(0) val outputPath: String = args(1) val nexamples: Int = if (args.length > 2) args(2).toInt else 1000 val nfeatures: Int = if (args.length > 3) args(3).toInt else 2 val parts: Int = if (args.length > 4) args(4).toInt else 2 val eps = 3 val sc = new SparkContext(sparkMaster, "LogisticRegressionDataGenerator") val data = generateLogisticRDD(sc, nexamples, nfeatures, eps, parts) data.saveAsTextFile(outputPath) sc.stop() } }
Example 31
Source File: SVMDataGenerator.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.util import scala.util.Random import com.github.fommil.netlib.BLAS.{getInstance => blas} import org.apache.spark.SparkContext import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.rdd.RDD @DeveloperApi @Since("0.8.0") object SVMDataGenerator { @Since("0.8.0") def main(args: Array[String]) { if (args.length < 2) { // scalastyle:off println println("Usage: SVMGenerator " + "<master> <output_dir> [num_examples] [num_features] [num_partitions]") // scalastyle:on println System.exit(1) } val sparkMaster: String = args(0) val outputPath: String = args(1) val nexamples: Int = if (args.length > 2) args(2).toInt else 1000 val nfeatures: Int = if (args.length > 3) args(3).toInt else 2 val parts: Int = if (args.length > 4) args(4).toInt else 2 val sc = new SparkContext(sparkMaster, "SVMGenerator") val globalRnd = new Random(94720) val trueWeights = Array.fill[Double](nfeatures)(globalRnd.nextGaussian()) val data: RDD[LabeledPoint] = sc.parallelize(0 until nexamples, parts).map { idx => val rnd = new Random(42 + idx) val x = Array.fill[Double](nfeatures) { rnd.nextDouble() * 2.0 - 1.0 } val yD = blas.ddot(trueWeights.length, x, 1, trueWeights, 1) + rnd.nextGaussian() * 0.1 val y = if (yD < 0) 0.0 else 1.0 LabeledPoint(y, Vectors.dense(x)) } data.saveAsTextFile(outputPath) sc.stop() } }
Example 32
Source File: RidgeRegressionSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.regression import scala.util.Random import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.util.{LinearDataGenerator, LocalClusterSparkContext, MLlibTestSparkContext} import org.apache.spark.util.Utils private object RidgeRegressionSuite { val model = new RidgeRegressionModel(weights = Vectors.dense(0.1, 0.2, 0.3), intercept = 0.5) } class RidgeRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { def predictionError(predictions: Seq[Double], input: Seq[LabeledPoint]): Double = { predictions.zip(input).map { case (prediction, expected) => (prediction - expected.label) * (prediction - expected.label) }.sum / predictions.size } test("ridge regression can help avoid overfitting") { // For small number of examples and large variance of error distribution, // ridge regression should give smaller generalization error that linear regression. val numExamples = 50 val numFeatures = 20 // Pick weights as random values distributed uniformly in [-0.5, 0.5] val random = new Random(42) val w = Array.fill(numFeatures)(random.nextDouble() - 0.5) // Use half of data for training and other half for validation val data = LinearDataGenerator.generateLinearInput(3.0, w, 2 * numExamples, 42, 10.0) val testData = data.take(numExamples) val validationData = data.takeRight(numExamples) val testRDD = sc.parallelize(testData, 2).cache() val validationRDD = sc.parallelize(validationData, 2).cache() // First run without regularization. val linearReg = new LinearRegressionWithSGD() linearReg.optimizer.setNumIterations(200) .setStepSize(1.0) val linearModel = linearReg.run(testRDD) val linearErr = predictionError( linearModel.predict(validationRDD.map(_.features)).collect(), validationData) val ridgeReg = new RidgeRegressionWithSGD() ridgeReg.optimizer.setNumIterations(200) .setRegParam(0.1) .setStepSize(1.0) val ridgeModel = ridgeReg.run(testRDD) val ridgeErr = predictionError( ridgeModel.predict(validationRDD.map(_.features)).collect(), validationData) // Ridge validation error should be lower than linear regression. assert(ridgeErr < linearErr, "ridgeError (" + ridgeErr + ") was not less than linearError(" + linearErr + ")") } test("model save/load") { val model = RidgeRegressionSuite.model val tempDir = Utils.createTempDir() val path = tempDir.toURI.toString // Save model, load it back, and compare. try { model.save(sc, path) val sameModel = RidgeRegressionModel.load(sc, path) assert(model.weights == sameModel.weights) assert(model.intercept == sameModel.intercept) } finally { Utils.deleteRecursively(tempDir) } } } class RidgeRegressionClusterSuite extends SparkFunSuite with LocalClusterSparkContext { test("task size should be small in both training and prediction") { val m = 4 val n = 200000 val points = sc.parallelize(0 until m, 2).mapPartitionsWithIndex { (idx, iter) => val random = new Random(idx) iter.map(i => LabeledPoint(1.0, Vectors.dense(Array.fill(n)(random.nextDouble())))) }.cache() // If we serialize data directly in the task closure, the size of the serialized task would be // greater than 1MB and hence Spark would throw an error. val model = RidgeRegressionWithSGD.train(points, 2) val predictions = model.predict(points.map(_.features)) } }
Example 33
Source File: KafkaStreamSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kafka import scala.collection.mutable import scala.concurrent.duration._ import scala.language.postfixOps import scala.util.Random import kafka.serializer.StringDecoder import org.scalatest.BeforeAndAfterAll import org.scalatest.concurrent.Eventually import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.{Milliseconds, StreamingContext} class KafkaStreamSuite extends SparkFunSuite with Eventually with BeforeAndAfterAll { private var ssc: StreamingContext = _ private var kafkaTestUtils: KafkaTestUtils = _ override def beforeAll(): Unit = { kafkaTestUtils = new KafkaTestUtils kafkaTestUtils.setup() } override def afterAll(): Unit = { if (ssc != null) { ssc.stop() ssc = null } if (kafkaTestUtils != null) { kafkaTestUtils.teardown() kafkaTestUtils = null } } test("Kafka input stream") { val sparkConf = new SparkConf().setMaster("local[4]").setAppName(this.getClass.getSimpleName) ssc = new StreamingContext(sparkConf, Milliseconds(500)) val topic = "topic1" val sent = Map("a" -> 5, "b" -> 3, "c" -> 10) kafkaTestUtils.createTopic(topic) kafkaTestUtils.sendMessages(topic, sent) val kafkaParams = Map("zookeeper.connect" -> kafkaTestUtils.zkAddress, "group.id" -> s"test-consumer-${Random.nextInt(10000)}", "auto.offset.reset" -> "smallest") val stream = KafkaUtils.createStream[String, String, StringDecoder, StringDecoder]( ssc, kafkaParams, Map(topic -> 1), StorageLevel.MEMORY_ONLY) val result = new mutable.HashMap[String, Long]() stream.map(_._2).countByValue().foreachRDD { r => r.collect().foreach { kv => result.synchronized { val count = result.getOrElseUpdate(kv._1, 0) + kv._2 result.put(kv._1, count) } } } ssc.start() eventually(timeout(10000 milliseconds), interval(100 milliseconds)) { assert(result.synchronized { sent === result }) } ssc.stop() } }
Example 34
Source File: KafkaClusterSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kafka import scala.util.Random import kafka.common.TopicAndPartition import org.scalatest.BeforeAndAfterAll import org.apache.spark.SparkFunSuite class KafkaClusterSuite extends SparkFunSuite with BeforeAndAfterAll { private val topic = "kcsuitetopic" + Random.nextInt(10000) private val topicAndPartition = TopicAndPartition(topic, 0) private var kc: KafkaCluster = null private var kafkaTestUtils: KafkaTestUtils = _ override def beforeAll() { kafkaTestUtils = new KafkaTestUtils kafkaTestUtils.setup() kafkaTestUtils.createTopic(topic) kafkaTestUtils.sendMessages(topic, Map("a" -> 1)) kc = new KafkaCluster(Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress)) } override def afterAll() { if (kafkaTestUtils != null) { kafkaTestUtils.teardown() kafkaTestUtils = null } } test("metadata apis") { val leader = kc.findLeaders(Set(topicAndPartition)).right.get(topicAndPartition) val leaderAddress = s"${leader._1}:${leader._2}" assert(leaderAddress === kafkaTestUtils.brokerAddress, "didn't get leader") val parts = kc.getPartitions(Set(topic)).right.get assert(parts(topicAndPartition), "didn't get partitions") val err = kc.getPartitions(Set(topic + "BAD")) assert(err.isLeft, "getPartitions for a nonexistant topic should be an error") } test("leader offset apis") { val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get assert(earliest(topicAndPartition).offset === 0, "didn't get earliest") val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get assert(latest(topicAndPartition).offset === 1, "didn't get latest") } test("consumer offset apis") { val group = "kcsuitegroup" + Random.nextInt(10000) val offset = Random.nextInt(10000) val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset)) assert(set.isRight, "didn't set consumer offsets") val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get assert(get(topicAndPartition) === offset, "didn't get consumer offsets") } }
Example 35
Source File: KafkaContinuousSourceSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kafka010 import java.util.Properties import java.util.concurrent.atomic.AtomicInteger import org.scalatest.time.SpanSugar._ import scala.collection.mutable import scala.util.Random import org.apache.spark.SparkContext import org.apache.spark.sql.{DataFrame, Dataset, ForeachWriter, Row} import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.execution.streaming.StreamExecution import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution import org.apache.spark.sql.streaming.{StreamTest, Trigger} import org.apache.spark.sql.test.{SharedSQLContext, TestSparkSession} // Run tests in KafkaSourceSuiteBase in continuous execution mode. class KafkaContinuousSourceSuite extends KafkaSourceSuiteBase with KafkaContinuousTest class KafkaContinuousSourceTopicDeletionSuite extends KafkaContinuousTest { import testImplicits._ override val brokerProps = Map("auto.create.topics.enable" -> "false") test("subscribing topic by pattern with topic deletions") { val topicPrefix = newTopic() val topic = topicPrefix + "-seems" val topic2 = topicPrefix + "-bad" testUtils.createTopic(topic, partitions = 5) testUtils.sendMessages(topic, Array("-1")) require(testUtils.getLatestOffsets(Set(topic)).size === 5) val reader = spark .readStream .format("kafka") .option("kafka.bootstrap.servers", testUtils.brokerAddress) .option("kafka.metadata.max.age.ms", "1") .option("subscribePattern", s"$topicPrefix-.*") .option("failOnDataLoss", "false") val kafka = reader.load() .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)") .as[(String, String)] val mapped = kafka.map(kv => kv._2.toInt + 1) testStream(mapped)( makeSureGetOffsetCalled, AddKafkaData(Set(topic), 1, 2, 3), CheckAnswer(2, 3, 4), Execute { query => testUtils.deleteTopic(topic) testUtils.createTopic(topic2, partitions = 5) eventually(timeout(streamingTimeout)) { assert( query.lastExecution.logical.collectFirst { case DataSourceV2Relation(_, r: KafkaContinuousReader) => r }.exists { r => // Ensure the new topic is present and the old topic is gone. r.knownPartitions.exists(_.topic == topic2) }, s"query never reconfigured to new topic $topic2") } }, AddKafkaData(Set(topic2), 4, 5, 6), CheckAnswer(2, 3, 4, 5, 6, 7) ) } } class KafkaContinuousSourceStressForDontFailOnDataLossSuite extends KafkaSourceStressForDontFailOnDataLossSuite { override protected def startStream(ds: Dataset[Int]) = { ds.writeStream .format("memory") .queryName("memory") .trigger(Trigger.Continuous("1 second")) .start() } }
Example 36
Source File: UISeleniumSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import scala.util.Random import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.openqa.selenium.WebDriver import org.openqa.selenium.htmlunit.HtmlUnitDriver import org.scalatest.{BeforeAndAfterAll, Matchers} import org.scalatest.concurrent.Eventually._ import org.scalatest.selenium.WebBrowser import org.scalatest.time.SpanSugar._ import org.apache.spark.ui.SparkUICssErrorHandler class UISeleniumSuite extends HiveThriftJdbcTest with WebBrowser with Matchers with BeforeAndAfterAll { implicit var webDriver: WebDriver = _ var server: HiveThriftServer2 = _ val uiPort = 20000 + Random.nextInt(10000) override def mode: ServerMode.Value = ServerMode.binary override def beforeAll(): Unit = { webDriver = new HtmlUnitDriver { getWebClient.setCssErrorHandler(new SparkUICssErrorHandler) } super.beforeAll() } override def afterAll(): Unit = { if (webDriver != null) { webDriver.quit() } super.afterAll() } override protected def serverStartCommand(port: Int) = { val portConf = if (mode == ServerMode.binary) { ConfVars.HIVE_SERVER2_THRIFT_PORT } else { ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT } s"""$startScript | --master local | --hiveconf hive.root.logger=INFO,console | --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$metastoreJdbcUri | --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath | --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=localhost | --hiveconf ${ConfVars.HIVE_SERVER2_TRANSPORT_MODE}=$mode | --hiveconf $portConf=$port | --driver-class-path ${sys.props("java.class.path")} | --conf spark.ui.enabled=true | --conf spark.ui.port=$uiPort """.stripMargin.split("\\s+").toSeq } ignore("thrift server ui test") { withJdbcStatement("test_map") { statement => val baseURL = s"http://localhost:$uiPort" val queries = Seq( "CREATE TABLE test_map(key INT, value STRING)", s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_map") queries.foreach(statement.execute) eventually(timeout(10 seconds), interval(50 milliseconds)) { go to baseURL find(cssSelector("""ul li a[href*="sql"]""")) should not be None } eventually(timeout(10 seconds), interval(50 milliseconds)) { go to (baseURL + "/sql") find(id("sessionstat")) should not be None find(id("sqlstat")) should not be None // check whether statements exists queries.foreach { line => findAll(cssSelector("""ul table tbody tr td""")).map(_.text).toList should contain (line) } } } } }
Example 37
Source File: RandomDataGeneratorSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import scala.util.Random import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.CatalystTypeConverters import org.apache.spark.sql.types._ def testRandomDataGeneration(dataType: DataType, nullable: Boolean = true): Unit = { val toCatalyst = CatalystTypeConverters.createToCatalystConverter(dataType) val generator = RandomDataGenerator.forType(dataType, nullable, new Random(33)).getOrElse { fail(s"Random data generator was not defined for $dataType") } if (nullable) { assert(Iterator.fill(100)(generator()).contains(null)) } else { assert(!Iterator.fill(100)(generator()).contains(null)) } for (_ <- 1 to 10) { val generatedValue = generator() toCatalyst(generatedValue) } } // Basic types: for ( dataType <- DataTypeTestUtils.atomicTypes; nullable <- Seq(true, false) if !dataType.isInstanceOf[DecimalType]) { test(s"$dataType (nullable=$nullable)") { testRandomDataGeneration(dataType) } } for ( arrayType <- DataTypeTestUtils.atomicArrayTypes if RandomDataGenerator.forType(arrayType.elementType, arrayType.containsNull).isDefined ) { test(s"$arrayType") { testRandomDataGeneration(arrayType) } } val atomicTypesWithDataGenerators = DataTypeTestUtils.atomicTypes.filter(RandomDataGenerator.forType(_).isDefined) // Complex types: for ( keyType <- atomicTypesWithDataGenerators; valueType <- atomicTypesWithDataGenerators // Scala's BigDecimal.hashCode can lead to OutOfMemoryError on Scala 2.10 (see SI-6173) and // Spark can hit NumberFormatException errors when converting certain BigDecimals (SPARK-8802). // For these reasons, we don't support generation of maps with decimal keys. if !keyType.isInstanceOf[DecimalType] ) { val mapType = MapType(keyType, valueType) test(s"$mapType") { testRandomDataGeneration(mapType) } } for ( colOneType <- atomicTypesWithDataGenerators; colTwoType <- atomicTypesWithDataGenerators ) { val structType = StructType(StructField("a", colOneType) :: StructField("b", colTwoType) :: Nil) test(s"$structType") { testRandomDataGeneration(structType) } } test("check size of generated map") { val mapType = MapType(IntegerType, IntegerType) for (seed <- 1 to 1000) { val generator = RandomDataGenerator.forType( mapType, nullable = false, rand = new Random(seed)).get val maps = Seq.fill(100)(generator().asInstanceOf[Map[Int, Int]]) val expectedTotalElements = 100 / 2 * RandomDataGenerator.MAX_MAP_SIZE val deviation = math.abs(maps.map(_.size).sum - expectedTotalElements) assert(deviation.toDouble / expectedTotalElements < 2e-1) } } }
Example 38
Source File: MiscExpressionsSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import scala.util.Random import org.apache.spark.SparkFunSuite import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String class MiscExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("assert_true") { intercept[RuntimeException] { checkEvaluation(AssertTrue(Literal.create(false, BooleanType)), null) } intercept[RuntimeException] { checkEvaluation(AssertTrue(Cast(Literal(0), BooleanType)), null) } intercept[RuntimeException] { checkEvaluation(AssertTrue(Literal.create(null, NullType)), null) } intercept[RuntimeException] { checkEvaluation(AssertTrue(Literal.create(null, BooleanType)), null) } checkEvaluation(AssertTrue(Literal.create(true, BooleanType)), null) checkEvaluation(AssertTrue(Cast(Literal(1), BooleanType)), null) } test("uuid") { def assertIncorrectEval(f: () => Unit): Unit = { intercept[Exception] { f() }.getMessage().contains("Incorrect evaluation") } checkEvaluation(Length(Uuid(Some(0))), 36) val r = new Random() val seed1 = Some(r.nextLong()) val uuid1 = evaluate(Uuid(seed1)).asInstanceOf[UTF8String] checkEvaluation(Uuid(seed1), uuid1.toString) val seed2 = Some(r.nextLong()) val uuid2 = evaluate(Uuid(seed2)).asInstanceOf[UTF8String] assertIncorrectEval(() => checkEvaluationWithoutCodegen(Uuid(seed1), uuid2)) assertIncorrectEval(() => checkEvaluationWithGeneratedMutableProjection(Uuid(seed1), uuid2)) assertIncorrectEval(() => checkEvalutionWithUnsafeProjection(Uuid(seed1), uuid2)) assertIncorrectEval(() => checkEvaluationWithOptimization(Uuid(seed1), uuid2)) } }
Example 39
Source File: RandomUUIDGeneratorSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.util import scala.util.Random import org.apache.spark.SparkFunSuite class RandomUUIDGeneratorSuite extends SparkFunSuite { test("RandomUUIDGenerator should generate version 4, variant 2 UUIDs") { val generator = RandomUUIDGenerator(new Random().nextLong()) for (_ <- 0 to 100) { val uuid = generator.getNextUUID() assert(uuid.version() == 4) assert(uuid.variant() == 2) } } test("UUID from RandomUUIDGenerator should be deterministic") { val r1 = new Random(100) val generator1 = RandomUUIDGenerator(r1.nextLong()) val r2 = new Random(100) val generator2 = RandomUUIDGenerator(r2.nextLong()) val r3 = new Random(101) val generator3 = RandomUUIDGenerator(r3.nextLong()) for (_ <- 0 to 100) { val uuid1 = generator1.getNextUUID() val uuid2 = generator2.getNextUUID() val uuid3 = generator3.getNextUUID() assert(uuid1 == uuid2) assert(uuid1 != uuid3) } } test("Get UTF8String UUID") { val generator = RandomUUIDGenerator(new Random().nextLong()) val utf8StringUUID = generator.getNextUUIDUTF8String() val uuid = java.util.UUID.fromString(utf8StringUUID.toString) assert(uuid.version() == 4 && uuid.variant() == 2 && utf8StringUUID.toString == uuid.toString) } }
Example 40
Source File: TakeOrderedAndProjectSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import scala.util.Random import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ class TakeOrderedAndProjectSuite extends SparkPlanTest with SharedSQLContext { private var rand: Random = _ private var seed: Long = 0 protected override def beforeAll(): Unit = { super.beforeAll() seed = System.currentTimeMillis() rand = new Random(seed) } private def generateRandomInputData(): DataFrame = { val schema = new StructType() .add("a", IntegerType, nullable = false) .add("b", IntegerType, nullable = false) val inputData = Seq.fill(10000)(Row(rand.nextInt(), rand.nextInt())) spark.createDataFrame(sparkContext.parallelize(Random.shuffle(inputData), 10), schema) } private def noOpFilter(plan: SparkPlan): SparkPlan = FilterExec(Literal(true), plan) val limit = 250 val sortOrder = 'a.desc :: 'b.desc :: Nil test("TakeOrderedAndProject.doExecute without project") { withClue(s"seed = $seed") { checkThatPlansAgree( generateRandomInputData(), input => noOpFilter(TakeOrderedAndProjectExec(limit, sortOrder, input.output, input)), input => GlobalLimitExec(limit, LocalLimitExec(limit, SortExec(sortOrder, true, input))), sortAnswers = false) } } test("TakeOrderedAndProject.doExecute with project") { withClue(s"seed = $seed") { checkThatPlansAgree( generateRandomInputData(), input => noOpFilter( TakeOrderedAndProjectExec(limit, sortOrder, Seq(input.output.last), input)), input => GlobalLimitExec(limit, LocalLimitExec(limit, ProjectExec(Seq(input.output.last), SortExec(sortOrder, true, input)))), sortAnswers = false) } } }
Example 41
Source File: ColumnarTestUtils.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.columnar import scala.collection.immutable.HashSet import scala.util.Random import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types.{AtomicType, Decimal} import org.apache.spark.unsafe.types.UTF8String object ColumnarTestUtils { def makeNullRow(length: Int): GenericInternalRow = { val row = new GenericInternalRow(length) (0 until length).foreach(row.setNullAt) row } def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = { def randomBytes(length: Int) = { val bytes = new Array[Byte](length) Random.nextBytes(bytes) bytes } (columnType match { case NULL => null case BOOLEAN => Random.nextBoolean() case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort case INT => Random.nextInt() case LONG => Random.nextLong() case FLOAT => Random.nextFloat() case DOUBLE => Random.nextDouble() case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32))) case BINARY => randomBytes(Random.nextInt(32)) case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale) case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale) case STRUCT(_) => new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10)))) case ARRAY(_) => new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt())) case MAP(_) => ArrayBasedMapData( Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32))))) case _ => throw new IllegalArgumentException(s"Unknown column type $columnType") }).asInstanceOf[JvmType] } def makeRandomValues( head: ColumnType[_], tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail) def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = { columnTypes.map(makeRandomValue(_)) } def makeUniqueRandomValues[JvmType]( columnType: ColumnType[JvmType], count: Int): Seq[JvmType] = { Iterator.iterate(HashSet.empty[JvmType]) { set => set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next() }.drop(count).next().toSeq } def makeRandomRow( head: ColumnType[_], tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail) def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = { val row = new GenericInternalRow(columnTypes.length) makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) => row(index) = value } row } def makeUniqueValuesAndSingleValueRows[T <: AtomicType]( columnType: NativeColumnType[T], count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = { val values = makeUniqueRandomValues(columnType, count) val rows = values.map { value => val row = new GenericInternalRow(1) row(0) = value row } (values, rows) } }
Example 42
Source File: BitArraySuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.util.sketch import scala.util.Random import org.scalatest.FunSuite // scalastyle:ignore funsuite class BitArraySuite extends FunSuite { // scalastyle:ignore funsuite test("error case when create BitArray") { intercept[IllegalArgumentException](new BitArray(0)) intercept[IllegalArgumentException](new BitArray(64L * Integer.MAX_VALUE + 1)) } test("bitSize") { assert(new BitArray(64).bitSize() == 64) // BitArray is word-aligned, so 65~128 bits need 2 long to store, which is 128 bits. assert(new BitArray(65).bitSize() == 128) assert(new BitArray(127).bitSize() == 128) assert(new BitArray(128).bitSize() == 128) } test("set") { val bitArray = new BitArray(64) assert(bitArray.set(1)) // Only returns true if the bit changed. assert(!bitArray.set(1)) assert(bitArray.set(2)) } test("normal operation") { // use a fixed seed to make the test predictable. val r = new Random(37) val bitArray = new BitArray(320) val indexes = (1 to 100).map(_ => r.nextInt(320).toLong).distinct indexes.foreach(bitArray.set) indexes.foreach(i => assert(bitArray.get(i))) assert(bitArray.cardinality() == indexes.length) } test("merge") { // use a fixed seed to make the test predictable. val r = new Random(37) val bitArray1 = new BitArray(64 * 6) val bitArray2 = new BitArray(64 * 6) val indexes1 = (1 to 100).map(_ => r.nextInt(64 * 6).toLong).distinct val indexes2 = (1 to 100).map(_ => r.nextInt(64 * 6).toLong).distinct indexes1.foreach(bitArray1.set) indexes2.foreach(bitArray2.set) bitArray1.putAll(bitArray2) indexes1.foreach(i => assert(bitArray1.get(i))) indexes2.foreach(i => assert(bitArray1.get(i))) assert(bitArray1.cardinality() == (indexes1 ++ indexes2).distinct.length) } }
Example 43
Source File: NettyBlockTransferServiceSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.network.netty import scala.util.Random import org.mockito.Mockito.mock import org.scalatest._ import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} import org.apache.spark.network.BlockDataManager class NettyBlockTransferServiceSuite extends SparkFunSuite with BeforeAndAfterEach with Matchers { private var service0: NettyBlockTransferService = _ private var service1: NettyBlockTransferService = _ override def afterEach() { try { if (service0 != null) { service0.close() service0 = null } if (service1 != null) { service1.close() service1 = null } } finally { super.afterEach() } } test("can bind to a random port") { service0 = createService(port = 0) service0.port should not be 0 } test("can bind to two random ports") { service0 = createService(port = 0) service1 = createService(port = 0) service0.port should not be service1.port } test("can bind to a specific port") { val port = 17634 + Random.nextInt(10000) logInfo("random port for test: " + port) service0 = createService(port) verifyServicePort(expectedPort = port, actualPort = service0.port) } test("can bind to a specific port twice and the second increments") { val port = 17634 + Random.nextInt(10000) logInfo("random port for test: " + port) service0 = createService(port) verifyServicePort(expectedPort = port, actualPort = service0.port) service1 = createService(service0.port) // `service0.port` is occupied, so `service1.port` should not be `service0.port` verifyServicePort(expectedPort = service0.port + 1, actualPort = service1.port) } private def verifyServicePort(expectedPort: Int, actualPort: Int): Unit = { actualPort should be >= expectedPort // avoid testing equality in case of simultaneous tests // the default value for `spark.port.maxRetries` is 100 under test actualPort should be <= (expectedPort + 100) } private def createService(port: Int): NettyBlockTransferService = { val conf = new SparkConf() .set("spark.app.id", s"test-${getClass.getName}") val securityManager = new SecurityManager(conf) val blockDataManager = mock(classOf[BlockDataManager]) val service = new NettyBlockTransferService(conf, securityManager, "localhost", "localhost", port, 1) service.init(blockDataManager) service } }
Example 44
Source File: SparkHadoopUtilSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy import java.security.PrivilegedExceptionAction import scala.util.Random import org.apache.hadoop.fs.FileStatus import org.apache.hadoop.fs.permission.{FsAction, FsPermission} import org.apache.hadoop.security.UserGroupInformation import org.scalatest.Matchers import org.apache.spark.SparkFunSuite class SparkHadoopUtilSuite extends SparkFunSuite with Matchers { test("check file permission") { import FsAction._ val testUser = s"user-${Random.nextInt(100)}" val testGroups = Array(s"group-${Random.nextInt(100)}") val testUgi = UserGroupInformation.createUserForTesting(testUser, testGroups) testUgi.doAs(new PrivilegedExceptionAction[Void] { override def run(): Void = { val sparkHadoopUtil = new SparkHadoopUtil // If file is owned by user and user has access permission var status = fileStatus(testUser, testGroups.head, READ_WRITE, READ_WRITE, NONE) sparkHadoopUtil.checkAccessPermission(status, READ) should be(true) sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(true) // If file is owned by user but user has no access permission status = fileStatus(testUser, testGroups.head, NONE, READ_WRITE, NONE) sparkHadoopUtil.checkAccessPermission(status, READ) should be(false) sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(false) val otherUser = s"test-${Random.nextInt(100)}" val otherGroup = s"test-${Random.nextInt(100)}" // If file is owned by user's group and user's group has access permission status = fileStatus(otherUser, testGroups.head, NONE, READ_WRITE, NONE) sparkHadoopUtil.checkAccessPermission(status, READ) should be(true) sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(true) // If file is owned by user's group but user's group has no access permission status = fileStatus(otherUser, testGroups.head, READ_WRITE, NONE, NONE) sparkHadoopUtil.checkAccessPermission(status, READ) should be(false) sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(false) // If file is owned by other user and this user has access permission status = fileStatus(otherUser, otherGroup, READ_WRITE, READ_WRITE, READ_WRITE) sparkHadoopUtil.checkAccessPermission(status, READ) should be(true) sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(true) // If file is owned by other user but this user has no access permission status = fileStatus(otherUser, otherGroup, READ_WRITE, READ_WRITE, NONE) sparkHadoopUtil.checkAccessPermission(status, READ) should be(false) sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(false) null } }) } private def fileStatus( owner: String, group: String, userAction: FsAction, groupAction: FsAction, otherAction: FsAction): FileStatus = { new FileStatus(0L, false, 0, 0L, 0L, 0L, new FsPermission(userAction, groupAction, otherAction), owner, group, null) } }
Example 45
Source File: SamplingUtilsSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.util.random import scala.util.Random import org.apache.commons.math3.distribution.{BinomialDistribution, PoissonDistribution} import org.apache.spark.SparkFunSuite class SamplingUtilsSuite extends SparkFunSuite { test("reservoirSampleAndCount") { val input = Seq.fill(100)(Random.nextInt()) // input size < k val (sample1, count1) = SamplingUtils.reservoirSampleAndCount(input.iterator, 150) assert(count1 === 100) assert(input === sample1.toSeq) // input size == k val (sample2, count2) = SamplingUtils.reservoirSampleAndCount(input.iterator, 100) assert(count2 === 100) assert(input === sample2.toSeq) // input size > k val (sample3, count3) = SamplingUtils.reservoirSampleAndCount(input.iterator, 10) assert(count3 === 100) assert(sample3.length === 10) } test("SPARK-18678 reservoirSampleAndCount with tiny input") { val input = Seq(0, 1) val counts = new Array[Int](input.size) for (i <- 0 until 500) { val (samples, inputSize) = SamplingUtils.reservoirSampleAndCount(input.iterator, 1) assert(inputSize === 2) assert(samples.length === 1) counts(samples.head) += 1 } // If correct, should be true with prob ~ 0.99999707 assert(math.abs(counts(0) - counts(1)) <= 100) } test("computeFraction") { // test that the computed fraction guarantees enough data points // in the sample with a failure rate <= 0.0001 val n = 100000 for (s <- 1 to 15) { val frac = SamplingUtils.computeFractionForSampleSize(s, n, true) val poisson = new PoissonDistribution(frac * n) assert(poisson.inverseCumulativeProbability(0.0001) >= s, "Computed fraction is too low") } for (s <- List(20, 100, 1000)) { val frac = SamplingUtils.computeFractionForSampleSize(s, n, true) val poisson = new PoissonDistribution(frac * n) assert(poisson.inverseCumulativeProbability(0.0001) >= s, "Computed fraction is too low") } for (s <- List(1, 10, 100, 1000)) { val frac = SamplingUtils.computeFractionForSampleSize(s, n, false) val binomial = new BinomialDistribution(n, frac) assert(binomial.inverseCumulativeProbability(0.0001)*n >= s, "Computed fraction is too low") } } }
Example 46
Source File: BlockReplicationPolicySuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.storage import scala.collection.mutable import scala.language.implicitConversions import scala.util.Random import org.scalatest.{BeforeAndAfter, Matchers} import org.apache.spark.{LocalSparkContext, SparkFunSuite} class RandomBlockReplicationPolicyBehavior extends SparkFunSuite with Matchers with BeforeAndAfter with LocalSparkContext { // Implicitly convert strings to BlockIds for test clarity. protected implicit def StringToBlockId(value: String): BlockId = new TestBlockId(value) val replicationPolicy: BlockReplicationPolicy = new RandomBlockReplicationPolicy val blockId = "test-block" protected def generateBlockManagerIds(count: Int, racks: Seq[String]): Seq[BlockManagerId] = { val randomizedRacks: Seq[String] = Random.shuffle( racks ++ racks.length.until(count).map(_ => racks(Random.nextInt(racks.length))) ) (0 until count).map { i => BlockManagerId(s"Exec-$i", s"Host-$i", 10000 + i, Some(randomizedRacks(i))) } } } class TopologyAwareBlockReplicationPolicyBehavior extends RandomBlockReplicationPolicyBehavior { override val replicationPolicy = new BasicBlockReplicationPolicy test("All peers in the same rack") { val racks = Seq("/default-rack") val numBlockManager = 10 (1 to 10).foreach {numReplicas => val peers = generateBlockManagerIds(numBlockManager, racks) val blockManager = BlockManagerId("Driver", "Host-driver", 10001, Some(racks.head)) val prioritizedPeers = replicationPolicy.prioritize( blockManager, peers, mutable.HashSet.empty, blockId, numReplicas ) assert(prioritizedPeers.toSet.size == numReplicas) assert(prioritizedPeers.forall(p => p.host != blockManager.host)) } } test("Peers in 2 racks") { val racks = Seq("/Rack-1", "/Rack-2") (1 to 10).foreach {numReplicas => val peers = generateBlockManagerIds(10, racks) val blockManager = BlockManagerId("Driver", "Host-driver", 9001, Some(racks.head)) val prioritizedPeers = replicationPolicy.prioritize( blockManager, peers, mutable.HashSet.empty, blockId, numReplicas ) assert(prioritizedPeers.toSet.size == numReplicas) val priorityPeers = prioritizedPeers.take(2) assert(priorityPeers.forall(p => p.host != blockManager.host)) if(numReplicas > 1) { // both these conditions should be satisfied when numReplicas > 1 assert(priorityPeers.exists(p => p.topologyInfo == blockManager.topologyInfo)) assert(priorityPeers.exists(p => p.topologyInfo != blockManager.topologyInfo)) } } } }
Example 47
Source File: DatasetUtils.scala From doddle-model with Apache License 2.0 | 5 votes |
package io.picnicml.doddlemodel.data import breeze.stats.hist import scala.util.Random object DatasetUtils { def splitDatasetWithGroups(x: Features, y: Target, groups: IntVector, proportionTrain: Float = 0.5f): GroupTrainTestSplit = { val numTrain = numberOfTrainExamplesBasedOnProportion(x.rows, proportionTrain) val numSamplesPerGroup = hist(groups, numberOfUniqueGroups(groups)).hist.toArray val (sortedNumSamplesPerGroup, toOriginalGroupIndex) = numSamplesPerGroup.zipWithIndex.sorted.unzip val numGroupsInTrain = sortedNumSamplesPerGroup .foldLeft(List(0)) { case (acc, currGroupSize) => (acc(0) + currGroupSize) :: acc }.reverse.drop(1) .takeWhile(cumulativeNumSamples => cumulativeNumSamples <= numTrain) .length val groupsInTrain = (0 until numGroupsInTrain).map(group => toOriginalGroupIndex(group)) val (trIndices, teIndices) = (0 until groups.length).foldLeft((IndexedSeq[Int](), IndexedSeq[Int]())) { case ((currTrIndices, currTeIndices), groupIndex) => if (groupsInTrain.contains(groups(groupIndex))) (currTrIndices :+ groupIndex, currTeIndices) else (currTrIndices, currTeIndices :+ groupIndex) } GroupTrainTestSplit( x(trIndices, ::).toDenseMatrix, y(trIndices).toDenseVector, groups(trIndices).toDenseVector, x(teIndices, ::).toDenseMatrix, y(teIndices).toDenseVector, groups(teIndices).toDenseVector ) } private def numberOfTrainExamplesBasedOnProportion(numTotal: Int, proportionTrain: Float): Int = { require(proportionTrain > 0.0 && proportionTrain < 1.0, "proportionTrain must be between 0 and 1") val numTrain = (proportionTrain * numTotal.toFloat).toInt require(numTrain > 0 && numTrain < numTotal, "the value of proportionTrain is either too high or too low") numTrain } }
Example 48
Source File: KFoldSplitter.scala From doddle-model with Apache License 2.0 | 5 votes |
package io.picnicml.doddlemodel.modelselection import io.picnicml.doddlemodel.CrossScalaCompat.{LazyListCompat, lazyListCompatFromSeq} import io.picnicml.doddlemodel.data.{Features, IntVector, Target, TrainTestSplit} import scala.util.Random class KFoldSplitter private (val numFolds: Int, val shuffleRows: Boolean) extends DataSplitter { override def splitData(x: Features, y: Target) (implicit rand: Random = new Random()): LazyListCompat[TrainTestSplit] = { require(x.rows >= this.numFolds, "Number of examples must be at least the same as number of folds") val shuffleIndices = if (this.shuffleRows) rand.shuffle((0 until y.length).toIndexedSeq) else 0 until y.length val xShuffled = x(shuffleIndices, ::) val yShuffled = y(shuffleIndices) val splitIndices = this.calculateSplitIndices(x.rows) lazyListCompatFromSeq(splitIndices zip splitIndices.tail) map { case (indexStart, indexEnd) => val trIndices = (0 until indexStart) ++ (indexEnd until x.rows) val teIndices = indexStart until indexEnd TrainTestSplit( xShuffled(trIndices, ::).toDenseMatrix, yShuffled(trIndices).toDenseVector, xShuffled(teIndices, ::).toDenseMatrix, yShuffled(teIndices).toDenseVector ) } } private def calculateSplitIndices(numExamples: Int): List[Int] = { val atLeastNumExamplesPerFold = List.fill(this.numFolds)(numExamples / this.numFolds) val numFoldsWithOneMore = numExamples % this.numFolds val numExamplesPerFold = atLeastNumExamplesPerFold.zipWithIndex map { case (num, i) if i < numFoldsWithOneMore => num + 1 case (num, _) => num } // calculate indices by subtracting number of examples per fold from total number of examples numExamplesPerFold.foldRight(List(numExamples)) { case (num, head :: tail) => head - num :: head :: tail case _ => throw new IllegalStateException() } } override def splitData(x: Features, y: Target, groups: IntVector) (implicit rand: Random): LazyListCompat[TrainTestSplit] = throw new NotImplementedError("KFoldSplitter doesn't split data based on groups") } object KFoldSplitter { def apply(numFolds: Int, shuffleRows: Boolean = true): KFoldSplitter = { require(numFolds > 0, "Number of folds must be positive") new KFoldSplitter(numFolds, shuffleRows) } }
Example 49
Source File: GroupKFoldSplitter.scala From doddle-model with Apache License 2.0 | 5 votes |
package io.picnicml.doddlemodel.modelselection import breeze.linalg.argmin import breeze.stats.hist import io.picnicml.doddlemodel.CrossScalaCompat.{LazyListCompat, lazyListCompatFromSeq} import io.picnicml.doddlemodel.data._ import io.picnicml.doddlemodel.modelselection.GroupKFoldSplitter.{TestFolds, TrainTestIndices} import scala.util.Random class GroupKFoldSplitter private (val numFolds: Int) extends DataSplitter { override def splitData(x: Features, y: Target, groups: IntVector) (implicit rand: Random = new Random()): LazyListCompat[TrainTestSplit] = { val testFolds = calculateTestFolds(groups) lazyListCompatFromSeq(0 until numFolds).map { foldIndex => val indices = groups.iterator.foldLeft(TrainTestIndices()) { case (acc, (exampleIndex, group)) => if (testFolds.groupToTestFoldIndex(group) == foldIndex) acc.addToTestIndex(exampleIndex) else acc.addToTrainIndex(exampleIndex) } TrainTestSplit( x(indices.trIndices, ::).toDenseMatrix, y(indices.trIndices).toDenseVector, x(indices.teIndices, ::).toDenseMatrix, y(indices.teIndices).toDenseVector ) } } private def calculateTestFolds(groups: IntVector): TestFolds = { val numGroups = numberOfUniqueGroups(groups) val numSamplesPerGroup = hist(groups, numGroups).hist.toArray implicit val ordering: Ordering[Int] = Ordering.Int.reverse val (sortedNumSamplesPerGroup, toOriginalGroupIndex) = numSamplesPerGroup.zipWithIndex.sorted.unzip sortedNumSamplesPerGroup.zipWithIndex.foldLeft(TestFolds(numFolds, numGroups)) { case (acc, (numSamples, group)) => val smallestFoldIndex = argmin(acc.numTestSamplesPerFold) acc.addNumSamplesToFold(numSamples, smallestFoldIndex) acc.setGroupToTestFoldIndex(toOriginalGroupIndex(group), smallestFoldIndex) acc } } override def splitData(x: Features, y: Target)(implicit rand: Random): LazyListCompat[TrainTestSplit] = throw new NotImplementedError("GroupKFoldSplitter only splits data based on groups") } object GroupKFoldSplitter { def apply(numFolds: Int): GroupKFoldSplitter = { require(numFolds > 0, "Number of folds must be positive") new GroupKFoldSplitter(numFolds) } private case class TrainTestIndices(trIndices: IndexedSeq[Int] = IndexedSeq(), teIndices: IndexedSeq[Int] = IndexedSeq()) { def addToTrainIndex(x: Int): TrainTestIndices = this.copy(trIndices = this.trIndices :+ x) def addToTestIndex(x: Int): TrainTestIndices = this.copy(teIndices = this.teIndices :+ x) } private case class TestFolds(numTestSamplesPerFold: Array[Int], groupToTestFoldIndex: Array[Int]) { def addNumSamplesToFold(numSamples: Int, foldIndex: Int): Unit = this.numTestSamplesPerFold(foldIndex) += numSamples def setGroupToTestFoldIndex(group: Int, foldIndex: Int): Unit = this.groupToTestFoldIndex(group) = foldIndex } private object TestFolds { def apply(numFolds: Int, numGroups: Int): TestFolds = TestFolds(new Array[Int](numFolds), new Array[Int](numGroups)) } }
Example 50
Source File: DatasetUtilsTest.scala From doddle-model with Apache License 2.0 | 5 votes |
package io.picnicml.doddlemodel.data import breeze.linalg.DenseVector import io.picnicml.doddlemodel.TestingUtils import io.picnicml.doddlemodel.data.DatasetUtils.{shuffleDataset, splitDataset, splitDatasetWithGroups} import org.scalactic.{Equality, TolerantNumerics} import scala.util.Random import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class DatasetUtilsTest extends AnyFlatSpec with Matchers with TestingUtils { implicit val rand: Random = new Random(0) implicit val tolerance: Equality[Float] = TolerantNumerics.tolerantFloatEquality(1.0f) val (x, y, _) = loadIrisDataset "Dataset utils" should "shuffle the dataset" in { val (_, yShuffled) = shuffleDataset(x, y) breezeEqual(y, yShuffled) shouldBe false } they should "split the dataset" in { val split = splitDataset(x, y) split.yTr.length shouldBe 75 split.yTe.length shouldBe 75 } they should "split the dataset with groups" in { val groups = DenseVector((0 until x.rows).map(x => x % 4):_*) val split = splitDatasetWithGroups(x, y, groups, proportionTrain = 0.8f) val groupsTe = split.groupsTe.toArray split.groupsTr.forall(trGroup => !groupsTe.contains(trGroup)) shouldBe true } }
Example 51
Source File: KFoldSplitterTest.scala From doddle-model with Apache License 2.0 | 5 votes |
package io.picnicml.doddlemodel.modelselection import io.picnicml.doddlemodel.TestingUtils import scala.util.Random import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class KFoldSplitterTest extends AnyFlatSpec with Matchers with TestingUtils { val splitter = KFoldSplitter(numFolds = 3, shuffleRows = false) "KFoldSplitter" should "split 8 examples" in { implicit val rand: Random = new Random() val (x, y) = dummyData(8) val splits = splitter.splitData(x, y) splits.length shouldBe 3 splits(0).yTr.toArray shouldBe (3 to 7).toArray splits(0).yTe.toArray shouldBe (0 to 2).toArray splits(1).yTr.toArray shouldBe Array(0, 1, 2, 6, 7) splits(1).yTe.toArray shouldBe (3 to 5).toArray splits(2).yTr.toArray shouldBe (0 to 5).toArray splits(2).yTe.toArray shouldBe Array(6, 7) } it should "split 9 examples" in { implicit val rand: Random = new Random() val (x, y) = dummyData(9) val splits = splitter.splitData(x, y) splits.length shouldBe 3 splits(0).yTr.toArray shouldBe (3 to 8).toArray splits(0).yTe.toArray shouldBe (0 to 2).toArray splits(1).yTr.toArray shouldBe Array(0, 1, 2, 6, 7, 8) splits(1).yTe.toArray shouldBe (3 to 5).toArray splits(2).yTr.toArray shouldBe (0 to 5).toArray splits(2).yTe.toArray shouldBe (6 to 8).toArray } it should "split 10 examples" in { implicit val rand: Random = new Random() val (x, y) = dummyData(10) val splits = splitter.splitData(x, y) splits.length shouldBe 3 splits(0).yTr.toArray shouldBe (4 to 9).toArray splits(0).yTe.toArray shouldBe (0 to 3).toArray splits(1).yTr.toArray shouldBe Array(0, 1, 2, 3, 7, 8, 9) splits(1).yTe.toArray shouldBe (4 to 6).toArray splits(2).yTr.toArray shouldBe (0 to 6).toArray splits(2).yTe.toArray shouldBe (7 to 9).toArray } }
Example 52
Source File: HyperparameterSearchTest.scala From doddle-model with Apache License 2.0 | 5 votes |
package io.picnicml.doddlemodel.modelselection import io.picnicml.doddlemodel.data.DatasetUtils.shuffleDataset import io.picnicml.doddlemodel.data.loadBreastCancerDataset import io.picnicml.doddlemodel.linear.LogisticRegression import io.picnicml.doddlemodel.linear.LogisticRegression.ev import io.picnicml.doddlemodel.metrics.accuracy import scala.util.Random import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class HyperparameterSearchTest extends AnyFlatSpec with Matchers { "Hyperparameter search" should "return the best model from available candidates" in { val (x, y, _) = loadBreastCancerDataset implicit val rand: Random = new Random(42) val (xShuffled, yShuffled) = shuffleDataset(x, y) val trIndices = 0 until 400 val teIndices = 400 until x.rows val (xTr, yTr) = (xShuffled(trIndices, ::), yShuffled(trIndices)) val (xTe, yTe) = (xShuffled(teIndices, ::), yShuffled(teIndices)) val cv: CrossValidation = CrossValidation(metric = accuracy, dataSplitter = KFoldSplitter(numFolds = 5)) val search = HyperparameterSearch(numIterations = 100, crossValidation = cv, verbose = false) val grid = (0 until 100).iterator.map(_.toFloat) val underfittedModel = ev.fit(LogisticRegression(lambda = 99.0f), xTr, yTr) val bestModel = search.bestOf(xTr, yTr) { LogisticRegression(lambda = grid.next) } accuracy(yTe, ev.predict(bestModel, xTe)) > accuracy(yTe, ev.predict(underfittedModel, xTe)) shouldBe true } }
Example 53
Source File: Retry.scala From futiles with Apache License 2.0 | 5 votes |
package markatta.futiles import java.util.concurrent.{ThreadLocalRandom, TimeUnit} import scala.concurrent.duration.FiniteDuration import scala.concurrent.{ExecutionContext, Future} import scala.util.Random object Retry { private val alwaysRetry: Throwable => Boolean = _ => true def retryWithBackOff[A]( times: Int, backOffUnit: FiniteDuration, shouldRetry: Throwable => Boolean = alwaysRetry )(fBlock: => Future[A])(implicit ec: ExecutionContext): Future[A] = try { if (times <= 1) fBlock else retryWithBackOffLoop(times, 1, backOffUnit, shouldRetry)(fBlock) } catch { // failure to actually create the future case x: Throwable => Future.failed(x) } private def retryWithBackOffLoop[A]( totalTimes: Int, timesTried: Int, backOffUnit: FiniteDuration, shouldRetry: Throwable => Boolean )(fBlock: => Future[A])(implicit ec: ExecutionContext): Future[A] = if (totalTimes <= timesTried) fBlock else fBlock.recoverWith { case ex: Throwable if shouldRetry(ex) => val timesTriedNow = timesTried + 1 val backOff = nextBackOff(timesTriedNow, backOffUnit) Timeouts .timeout(backOff)(()) .flatMap( _ => retryWithBackOffLoop( totalTimes, timesTriedNow, backOffUnit, shouldRetry )(fBlock) ) } private[futiles] def nextBackOff( tries: Int, backOffUnit: FiniteDuration ): FiniteDuration = { require(tries > 0, "tries should start from 1") val rng = new Random(ThreadLocalRandom.current()) // jitter between 0.5 and 1.5 val jitter = 0.5 + rng.nextDouble() val factor = math.pow(2, tries) * jitter FiniteDuration( (backOffUnit.toMillis * factor).toLong, TimeUnit.MILLISECONDS ) } }
Example 54
Source File: LocalFSRawFileProvider.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.data.staging import java.net.URL import java.io.{ File, InputStream, OutputStream, FileOutputStream } import java.sql.SQLException import scala.util.Random import com.typesafe.scalalogging.LazyLogging import org.apache.spark.sql.DataFrame import mimir.algebra.ID private def transferBytes(input: InputStream, output: OutputStream): Unit = { val buffer = Array.ofDim[Byte](1024*1024) // 1MB buffer var bytesRead = input.read(buffer) while(bytesRead >= 0) { output.write(buffer, 0, bytesRead) bytesRead = input.read(buffer) } } def stage(input: InputStream, fileExtension: String, nameHint: Option[String]): String = { val file = makeName(fileExtension, nameHint) transferBytes(input, new FileOutputStream(file)) return file.toString } def stage(url: URL, nameHint: Option[String]): String = { val pathComponents = url.getPath.split("/") val nameComponents = pathComponents.reverse.head.split(".") val extension = if(nameComponents.size > 1) { nameComponents.reverse.head } else { "data" } // default to generic 'data' if there's no extension stage(url.openStream(), extension, nameHint) } def stage(input: DataFrame, format: ID, nameHint:Option[String]): String = { val targetFile = makeName(format.id, nameHint).toString input.write .format(format.id) .save(targetFile) return targetFile } def drop(local: String): Unit = { new File(local).delete() } }
Example 55
Source File: HDFSRawFileProvider.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.data.staging import java.net.URL import java.io.{ File, InputStream, OutputStream, FileOutputStream } import java.sql.SQLException import scala.util.Random import com.typesafe.scalalogging.LazyLogging import org.apache.spark.sql.DataFrame import mimir.algebra.ID import mimir.util.HadoopUtils import mimir.exec.spark.MimirSpark private def makeName(extension: String, nameHint: Option[String]): File = { val rand = new Random().alphanumeric // Try 1000 times to create a randomly named file for(i <- 0 until 1000){ val candidate = new File(basePath, nameHint match { case Some(hint) => s"${hint.replaceAll("[^a-zA-Z0-9]", "")}-${rand.take(10).mkString}.${extension}" case None => s"${rand.take(20).mkString}.${extension}" } ) // If the randomly named file doesn't exist, we're done. if(!candidate.exists()){ return candidate } } // Fail after 1000 attempts. throw new SQLException(s"Can't allocate name for $nameHint") } def stage(input: InputStream, fileExtension: String, nameHint: Option[String]): String = { val file = makeName(fileExtension, nameHint) logger.debug("Stage File To HDFS: " +hdfsHome+File.separator+file.toString) //if(!HadoopUtils.fileExistsHDFS(sparkSql.sparkSession.sparkContext, fileName)) HadoopUtils.writeToHDFS(MimirSpark.get.sparkSession.sparkContext, file.getName, input, true) logger.debug("... done\n") return s"$hdfsHome/${file.getName}" } def stage(url: URL, nameHint: Option[String]): String = { val pathComponents = url.getPath.split("/") val nameComponents = pathComponents.reverse.head.split(".") val extension = if(nameComponents.size > 1) { nameComponents.reverse.head } else { "data" } // default to generic 'data' if there's no extension stage(url.openStream(), extension, nameHint) } def stage(input: DataFrame, format: ID, nameHint:Option[String]): String = { val targetFile = makeName(format.id, nameHint).toString input.write .format(format.id) .save(targetFile) return targetFile } def drop(local: String): Unit = { new File(local).delete() } }
Example 56
Source File: CommentModel.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.models; import scala.util.Random import mimir.algebra._ import mimir.util._ import java.sql.SQLException @SerialVersionUID(1001L) class CommentModel(override val name: ID, cols:Seq[ID], colTypes:Seq[Type], comments:Seq[String]) extends Model(name) with Serializable with SourcedFeedback { def getFeedbackKey(idx: Int, args: Seq[PrimitiveValue] ) : ID = ID(s"${args(0).asString}:$idx") def argTypes(idx: Int) = Seq(TRowId()) def varType(idx: Int, args: Seq[Type]) = colTypes(idx) def bestGuess(idx: Int, args: Seq[PrimitiveValue], hints: Seq[PrimitiveValue] ) = { getFeedback(idx, args) match { case Some(v) => v case None => { hints(0) } } } def sample(idx: Int, randomness: Random, args: Seq[PrimitiveValue], hints: Seq[PrimitiveValue]) = hints(0) def reason(idx: Int, args: Seq[PrimitiveValue],hints: Seq[PrimitiveValue]): String = { //println("CommentModel:reason: " + idx + " [ " + args.mkString(",") + " ] [ " + hints.mkString(",") + " ]" ); val rowid = RowIdPrimitive(args(0).asString) val rval = getFeedback(idx, args) match { case Some(v) => s"${getReasonWho(idx,args)} told me that $v is valid for row $rowid" case None => s" ${comments(idx)}" } rval } def feedback(idx: Int, args: Seq[PrimitiveValue], v: PrimitiveValue): Unit = { val rowid = args(0).asString setFeedback(idx, args, v) } def isAcknowledged (idx: Int, args: Seq[PrimitiveValue]): Boolean = hasFeedback(idx, args) def hintTypes(idx: Int): Seq[mimir.algebra.Type] = colTypes //def getDomain(idx: Int, args: Seq[PrimitiveValue], hints:Seq[PrimitiveValue]): Seq[(PrimitiveValue,Double)] = Seq((hints(0), 0.0)) def confidence (idx: Int, args: Seq[PrimitiveValue], hints: Seq[PrimitiveValue]): Double = { val rowid = RowIdPrimitive(args(0).asString) getFeedback(idx,args) match { case Some(v) => { 1.0 } case None => { 0.0 } } } }
Example 57
Source File: MissingKeyModel.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.models; import scala.util.Random import mimir.algebra._ import mimir.util._ @SerialVersionUID(1001L) class MissingKeyModel(override val name: ID, keys:Seq[ID], colTypes:Seq[Type]) extends Model(name) with Serializable with FiniteDiscreteDomain with SourcedFeedback { def getFeedbackKey(idx: Int, args: Seq[PrimitiveValue] ) : ID = ID(s"${args(0).asString}_$idx") def argTypes(idx: Int) = { Seq(TRowId()) } def varType(idx: Int, args: Seq[Type]) = colTypes(idx) def bestGuess(idx: Int, args: Seq[PrimitiveValue], hints: Seq[PrimitiveValue] ) = { //println(s"MissingKeyModel:bestGuess: idx: $idx args: ${args.mkString("[ ",","," ]")} hints: ${hints.mkString("[ ",","," ]")}") getFeedback(idx, args) match { case Some(v) => v case None => hints(0) } } def sample(idx: Int, randomness: Random, args: Seq[PrimitiveValue], hints: Seq[PrimitiveValue]) = { hints(0) } def reason(idx: Int, args: Seq[PrimitiveValue],hints: Seq[PrimitiveValue]): String = { val rowid = RowIdPrimitive(args(0).asString) getFeedback(idx, args) match { case Some(v) => v match { case NullPrimitive() => { s"${getReasonWho(idx,args)} told me that the row of this cell was missing and that the value of this cell is unknown so I have made it NULL." } case i => { s"${getReasonWho(idx,args)} told me that this key was missing because it was in a sequence but not in the query results: $i" } } case None => hints(0) match { case NullPrimitive() => { "I guessed that the row of this cell was missing. The value of this cell is unknown so I have made it NULL." } case i => { s"I guessed that this key was missing because it was in a sequence but not in the query results: $i" } } } } def feedback(idx: Int, args: Seq[PrimitiveValue], v: PrimitiveValue): Unit = { setFeedback(idx, args, v) } def isAcknowledged (idx: Int, args: Seq[PrimitiveValue]): Boolean = { hasFeedback(idx, args) } def hintTypes(idx: Int): Seq[mimir.algebra.Type] = Seq(TAny()) def getDomain(idx: Int, args: Seq[PrimitiveValue], hints:Seq[PrimitiveValue]): Seq[(PrimitiveValue,Double)] = Seq((hints(0), 0.0)) def confidence (idx: Int, args: Seq[PrimitiveValue], hints:Seq[PrimitiveValue]) : Double = { val rowid = RowIdPrimitive(args(0).asString) getFeedback(idx,args) match { case Some(v) => 1.0 case None => 0.0 } } }
Example 58
Source File: DefaultMetaModel.scala From mimir with Apache License 2.0 | 5 votes |
package mimir.models; import scala.util.Random import mimir.algebra._ import mimir.util._ @SerialVersionUID(1001L) class DefaultMetaModel(name: ID, context: String, models: Seq[ID]) extends Model(name) with DataIndependentFeedback with NoArgModel with FiniteDiscreteDomain { def varType(idx: Int, args: Seq[Type]): Type = TString() def bestGuess(idx: Int, args: Seq[PrimitiveValue], hints: Seq[PrimitiveValue]): PrimitiveValue = choices(idx).getOrElse( StringPrimitive(models.head.id) ) def sample(idx: Int, randomness: Random, args: Seq[PrimitiveValue], hints: Seq[PrimitiveValue]): PrimitiveValue = StringPrimitive(RandUtils.pickFromList(randomness, models).id) def reason(idx: Int, args: Seq[PrimitiveValue], hints: Seq[PrimitiveValue]): String = { choices(idx) match { case None => { val bestChoice = models.head val modelString = models.mkString(", ") s"I defaulted to guessing with '$bestChoice' (out of $modelString) for $context" } case Some(choiceStr) => s"${getReasonWho(idx,args)} told me to use $choiceStr for $context" } } def validateChoice(idx: Int, v: PrimitiveValue) = models.contains(v.asString) def getDomain(idx: Int, args: Seq[PrimitiveValue], hints: Seq[PrimitiveValue]): Seq[(PrimitiveValue,Double)] = models.map( x => (StringPrimitive(x.id), 0.0) ) def confidence (idx: Int, args: Seq[PrimitiveValue], hints:Seq[PrimitiveValue]) : Double = 1.0/models.size }
Example 59
Source File: GenRandom.scala From tofu with Apache License 2.0 | 5 votes |
package tofu.generate import cats.effect.Sync import cats.syntax.functor._ import simulacrum.typeclass import tofu.higherKind import tofu.higherKind.RepresentableK import scala.util.Random @typeclass trait GenRandom[F[_]] { def nextInt(n: Int): F[Int] } object GenRandom { def nextLong[F[_]](implicit g: GenRandom[F]): F[Long] = g.nextLong def nextInt[F[_]](n: Int)(implicit g: GenRandom[F]): F[Int] = g.nextInt(n) def instance[I[_]: Sync, F[_]: Sync](seed: Option[Long] = None, secure: Boolean = false): I[GenRandom[F]] = { def createStd() = seed.fold(new java.util.Random)(new java.util.Random(_)) def createSecure() = { val rnd = new java.security.SecureRandom() seed.foreach(rnd.setSeed) rnd } def random(): java.util.Random = if (secure) createSecure() else createStd() for (rnd <- Sync[I].delay(new Random(random()))) yield new ScalaUtil[F](rnd) } private class ScalaUtil[F[_]](rnd: Random)(implicit F: Sync[F]) extends GenRandom[F] { def nextLong: F[Long] = F.delay(rnd.nextLong()) def nextInt(max: Int): F[Int] = F.delay(rnd.nextInt(max)) } implicit val genRandomRepresentableK: RepresentableK[GenRandom] = higherKind.derived.genRepresentableK[GenRandom] }
Example 60
Source File: ExampleApp.scala From kafka4s with Apache License 2.0 | 5 votes |
package example3 import cats.effect._ import cats.implicits._ import fs2.Stream import com.banno.kafka._ import com.banno.kafka.admin._ import com.banno.kafka.consumer._ import com.banno.kafka.producer._ import org.apache.kafka.clients.admin.NewTopic import org.apache.kafka.clients.producer.ProducerRecord import scala.concurrent.duration._ import scala.util.Random final class ExampleApp[F[_]: Concurrent: ContextShift: Timer] { // Change these for your environment as needed val topic = new NewTopic(s"example3", 1, 3.toShort) val kafkaBootstrapServers = "kafka.local:9092,kafka.local:9093" val example: F[Unit] = for { _ <- Sync[F].delay(println("Starting kafka4s example")) _ <- AdminApi.createTopicsIdempotent[F](kafkaBootstrapServers, topic) writeStream = Stream .resource(ProducerApi.resource[F, Int, Int](BootstrapServers(kafkaBootstrapServers))) .flatMap { producer => Stream .awakeDelay[F](1 second) .evalMap { _ => Sync[F].delay(Random.nextInt()).flatMap { i => producer.sendAndForget(new ProducerRecord(topic.name, i, i)) } } } readStream = Stream .resource( ConsumerApi .resource[F, Int, Int]( BootstrapServers(kafkaBootstrapServers), GroupId("example3"), AutoOffsetReset.earliest, EnableAutoCommit(true) ) ) .evalTap(_.subscribe(topic.name)) .flatMap( _.recordStream(1.second) .map(_.value) .filter(_ % 2 == 0) .evalMap(i => Sync[F].delay(println(i))) ) _ <- writeStream .merge(readStream) .onFinalize(Sync[F].delay(println("Finished kafka4s example"))) .compile .drain } yield () } object ExampleApp { def apply[F[_]: Concurrent: ContextShift: Timer] = new ExampleApp[F] }
Example 61
Source File: MyStream.scala From scala-in-practice with Apache License 2.0 | 5 votes |
package chapter7.collections.stream import scala.util.Random trait MyStream[+A] { import MyStream._ def filter(p: A => Boolean): MyStream[A] = { this match { case Cons(h, t) => if (p(h())) cons(h(), t().filter(p)) else t().filter(p) case Empty => empty } } def take(n: Int): MyStream[A] = { if (n > 0) this match { case Cons(h, t) if n == 1 => cons(h(), MyStream.empty) case Cons(h, t) => cons(h(), t().take(n - 1)) case _ => MyStream.empty } else MyStream() } def toList: List[A] = { this match { case Cons(h, t) => h() :: t().toList case Empty => Nil } } } case object Empty extends MyStream[Nothing] case class Cons[+A](h: () => A, t: () => MyStream[A]) extends MyStream[A] object MyStream { def apply[A](elems: A*): MyStream[A] = { if (elems.isEmpty) empty else cons(elems.head, apply(elems.tail: _*)) } def cons[A](hd: => A, tl: => MyStream[A]): MyStream[A] = { lazy val head = hd lazy val tail = tl Cons(() => head, () => tail) } def empty[A]: MyStream[A] = Empty } object MyStreamTest extends App { def randomList = (1 to 50).map(_ => Random.nextInt(100)).toList def isDivisibleBy3(n: Int) = { val isDivisible = n % 3 == 0 println(s"$n $isDivisible") isDivisible } MyStream(randomList: _*).filter(isDivisibleBy3).take(2).toList }
Example 62
Source File: TeeCommandTest.scala From shellbase with Apache License 2.0 | 5 votes |
package com.sumologic.shellbase.commands import java.nio.charset.Charset import java.nio.file.{Files, Path} import com.sumologic.shellbase.CommonWordSpec import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner import scala.collection.JavaConverters._ import scala.util.Random @RunWith(classOf[JUnitRunner]) class TeeCommandTest extends CommonWordSpec { "TeeCommand" should { "execute a subcommand and propagate exit code" in { var calls = 0 def callCheck(ret: Boolean)(input: String): Boolean = { input should be("hi") calls += 1 ret } new TeeCommand(callCheck(true)).executeLine(List("`hi`", "-o", getTempFilePath().toString)) should be(true) calls should be(1) new TeeCommand(callCheck(false)).executeLine(List("`hi`", "-o", getTempFilePath().toString)) should be(false) calls should be(2) } "degrade nicely with malformatted input" in { new TeeCommand(_ => true).executeLine(List.empty) should be(false) new TeeCommand(_ => true).executeLine(List("test")) should be(false) } "write output to file, and support append mode" in { def printMessage(str: String): Boolean = { println(str) true } val tempFile = getTempFilePath() new TeeCommand(printMessage).executeLine(List("`hi mom`", "-o", tempFile.toString)) // The first line is the debug line, so everything after is logged readTempFile(tempFile) should be(List("hi mom")) // We should override since not in append mode new TeeCommand(printMessage).executeLine(List("`hi mom 2`", "-o", tempFile.toString)) // The first line is the debug line, so everything after is logged readTempFile(tempFile) should be(List("hi mom 2")) // We have both 2 and 3 since in append move new TeeCommand(printMessage).executeLine(List("`hi mom 3`", "-o", tempFile.toString, "-a")) // The first line is the debug line, so everything after is logged readTempFile(tempFile) should be(List("hi mom 2", "hi mom 3")) } } private def getTempFilePath(): Path = { Files.createTempFile("teecommand", ".tmp") } private def readTempFile(path: Path): List[String] = { Files.readAllLines(path, Charset.defaultCharset()).asScala.filterNot(_.startsWith("Running")).toList } }
Example 63
Source File: DemoController.scala From elastiknn with Apache License 2.0 | 5 votes |
package controllers import com.klibisz.elastiknn.api.Vec import com.klibisz.elastiknn.client.ElastiknnFutureClient import com.klibisz.elastiknn.client.ElastiknnRequests._ import com.sksamuel.elastic4s.ElasticDsl._ import io.circe.generic.auto._ import io.circe.syntax._ import javax.inject._ import models.{Dataset, ExampleWithResults} import play.api.Logging import play.api.libs.circe.Circe import play.api.mvc._ import scala.concurrent.{ExecutionContext, Future} import scala.util.Random @Singleton class DemoController @Inject()(val controllerComponents: ControllerComponents, protected val eknn: ElastiknnFutureClient)( implicit ec: ExecutionContext) extends BaseController with Logging with Circe { def index() = Action { implicit request: Request[AnyContent] => Ok(views.html.index()) } def dataset(permalink: String, queryIdOpt: Option[String]): Action[AnyContent] = Action.async { implicit req => Dataset.defaults.find(_.permalink == permalink) match { case Some(ds) => queryIdOpt match { case Some(queryId) => for { countRes <- eknn.execute(count(ds.examples.head.index)) // This ensures the search requests execute serially. examplesWithResults <- ds.examples.foldLeft(Future(Vector.empty[ExampleWithResults])) { case (accF, ex) => for { acc <- accF q = nearestNeighborsQuery(ex.index, ex.query.withVec(Vec.Indexed(ex.index, queryId, ex.field)), 10, true) response <- eknn.execute(q) hits = response.result.hits.hits.toSeq results <- Future.traverse(hits.map(ds.parseHit))(Future.fromTry) } yield acc :+ ExampleWithResults(ex, q, results, response.result.took) } } yield Ok(views.html.dataset(ds, queryId, countRes.result.count, examplesWithResults)) case None => for { countRes <- eknn.execute(count(ds.examples.head.index)) id = Random.nextInt(countRes.result.count.toInt + 1) } yield Redirect(routes.DemoController.dataset(permalink, Some(id.toString))) } case None => Future.successful(NotFound(views.html.notfound())) } } def datasets(): Action[AnyContent] = Action(Ok(Dataset.defaults.asJson)) def health(): Action[AnyContent] = Action.async { implicit req => for { countResults <- Future.sequence(for { ds <- Dataset.defaults ex <- ds.examples } yield eknn.execute(count(ex.index))) code = if (countResults.forall(_.isSuccess) && countResults.forall(_.result.count > 1000)) 200 else 500 } yield Status(code) } }
Example 64
Source File: Enqueue.scala From elastiknn with Apache License 2.0 | 5 votes |
package com.klibisz.elastiknn.benchmarks import java.io.File import java.nio.file.Files import com.klibisz.elastiknn.benchmarks.codecs._ import io.circe.syntax._ import org.apache.commons.codec.digest.DigestUtils import zio._ import zio.blocking.Blocking import zio.console._ import scala.util.Random object Enqueue extends App { case class Params(datasetsFilter: Set[String] = Set.empty, file: File = new File("/tmp/hashes.txt"), experimentsBucket: String = "", experimentsPrefix: String = "", s3Minio: Boolean = false) private val parser = new scopt.OptionParser[Params]("Build a list of benchmark jobs") { override def showUsageOnError: Option[Boolean] = Some(true) help("help") opt[Seq[String]]("datasetsFilter") .unbounded() .action((s, c) => c.copy(datasetsFilter = s.map(_.toLowerCase).toSet)) opt[String]("experimentsBucket") .action((x, c) => c.copy(experimentsBucket = x)) opt[String]("experimentsPrefix") .action((x, c) => c.copy(experimentsPrefix = x)) opt[String]("file") .action((s, c) => c.copy(file = new File(s))) opt[Boolean]("s3Minio") .action((x, c) => c.copy(s3Minio = x)) } override def run(args: List[String]): URIO[Console, ExitCode] = parser.parse(args, Params()) match { case Some(params) => val experiments = if (params.datasetsFilter.isEmpty) Experiment.defaults else Experiment.defaults.filter(e => params.datasetsFilter.contains(e.dataset.name.toLowerCase)) val s3Client = if (params.s3Minio) S3Utils.minioClient() else S3Utils.defaultClient() val layer = Blocking.live ++ Console.live val logic: ZIO[Console with Blocking, Throwable, Unit] = for { blocking <- ZIO.access[Blocking](_.get) hashesAndEffects = experiments.map { exp => val body = exp.asJson.noSpaces val hash = DigestUtils.md5Hex(body).toLowerCase val key = s"${params.experimentsPrefix}/$hash.json" hash -> blocking.effectBlocking(s3Client.putObject(params.experimentsBucket, key, body)) } _ <- putStrLn(s"Saving ${hashesAndEffects.length} experiments to S3") _ <- ZIO.collectAllParN(10)(hashesAndEffects.map(_._2)) jsonListOfHashes = new Random(0).shuffle(hashesAndEffects).map(_._1).asJson.noSpaces _ <- blocking.effectBlocking(Files.writeString(params.file.toPath, jsonListOfHashes)) } yield () logic.provideLayer(layer).exitCode case None => sys.exit(1) } }
Example 65
Source File: DatasetClient.scala From elastiknn with Apache License 2.0 | 5 votes |
package com.klibisz.elastiknn.benchmarks import java.util.zip.GZIPInputStream import com.amazonaws.services.s3.AmazonS3 import com.klibisz.elastiknn.api.{ElasticsearchCodec, Vec} import com.klibisz.elastiknn.benchmarks.Dataset._ import io.circe import zio._ import zio.stream._ import scala.io.Source import scala.util.Random import scala.util.hashing.MurmurHash3 object DatasetClient { trait Service { def streamTrain(dataset: Dataset, limit: Option[Int] = None): Stream[Throwable, Vec] def streamTest(dataset: Dataset, limit: Option[Int] = None): Stream[Throwable, Vec] } def s3(bucket: String, keyPrefix: String): ZLayer[Has[AmazonS3], Throwable, DatasetClient] = ZLayer.fromService[AmazonS3, Service] { client => new Service { private def stream(dataset: Dataset, name: String, limit: Option[Int]): Stream[Throwable, Vec] = dataset match { case r: RandomSparseBool => implicit val rng: Random = new Random(MurmurHash3.orderedHash(Seq(r.dims, name))) Stream .range(0, if (name == "train") r.train else r.test) .map(_ => Vec.SparseBool.random(r.dims, r.bias)) case r: RandomDenseFloat => implicit val rng: Random = new Random(MurmurHash3.orderedHash(Seq(r.dims, name))) Stream .range(0, if (name == "train") r.train else r.test) .map(_ => Vec.DenseFloat.random(r.dims)) case _ => def parseDecode(s: String): Either[circe.Error, Vec] = ElasticsearchCodec.parse(s).flatMap(j => ElasticsearchCodec.decode[Vec](j.hcursor)) val obj = client.getObject(bucket, s"$keyPrefix/${dataset.name}/${name}.json.gz") val iterManaged = Managed.makeEffect(Source.fromInputStream(new GZIPInputStream(obj.getObjectContent)))(_.close()) val lines = Stream.fromIteratorManaged(iterManaged.map(src => limit.map(n => src.getLines.take(n)).getOrElse(src.getLines()))) val rawJson = lines.map(_.dropWhile(_ != '{')) rawJson.mapM(s => ZIO.fromEither(parseDecode(s))) } override def streamTrain(dataset: Dataset, limit: Option[Int]): Stream[Throwable, Vec] = stream(dataset, "train", limit) override def streamTest(dataset: Dataset, limit: Option[Int]): Stream[Throwable, Vec] = stream(dataset, "test", limit) } } }
Example 66
Source File: Profiling.scala From elastiknn with Apache License 2.0 | 5 votes |
package com.klibisz.elastiknn import com.klibisz.elastiknn import com.klibisz.elastiknn.api.{Mapping, Vec} import com.klibisz.elastiknn.utils.ArrayUtils import scala.util.Random // Simple apps that make it easy to profile hotspots using VisualVM. // One quirk with using VisualVM for profiling is that it has to be running on the same JVM as the app. // For me it was enough to folow this comment: https://github.com/oracle/visualvm/issues/130#issuecomment-483898542 object ProfileVectorHashing { def main(args: Array[String]): Unit = { implicit val r: Random = new Random(100) val m = new elastiknn.models.LshFunction.Jaccard(Mapping.JaccardLsh(100, 150, 1)) val vecs = Vec.SparseBool.randoms(100, 5000) while (true) { val t0 = System.currentTimeMillis() vecs.foreach(v => m(v)) println(vecs.length * 1.0 / (System.currentTimeMillis() - t0) * 1000) } } } object ProfileSortedIntersection { def main(args: Array[String]): Unit = { implicit val r: Random = new Random(100) val vecs = Vec.SparseBool.randoms(100, 5000) while (true) { val t0 = System.currentTimeMillis() vecs.drop(1).zip(vecs).map { case (a, b) => ArrayUtils.sortedIntersectionCount(a.trueIndices, b.trueIndices) } println(vecs.length * 1.0 / (System.currentTimeMillis() - t0) * 1000) } } } object PairingFunctions { def main(args: Array[String]): Unit = { // Based on https://stackoverflow.com/a/14051714 def szudzik(a: Int, b: Int): Int = { val c = if (a >= 0) 2 * a else -2 * a - 1 val d = if (b >= 0) 2 * b else -2 * b - 1 if (c >= d) c * c + c + d else c + d * d } val r = new Random(System.currentTimeMillis()) val n = 10 var uniq = Set.empty[Int] var i = 0 while (i - uniq.size < 100) { var bandHash = r.nextInt(80) (0 until n) .map(_ => r.nextInt(Int.MaxValue) - r.nextInt(Int.MaxValue)) .foreach(h => bandHash = szudzik(bandHash, h)) uniq = uniq + bandHash i += 1 println(s"$i, ${uniq.size}") } } }
Example 67
Source File: TestData.scala From elastiknn with Apache License 2.0 | 5 votes |
package com.klibisz.elastiknn.testing import java.io.FileOutputStream import java.util.zip.{GZIPInputStream, GZIPOutputStream} import com.klibisz.elastiknn.api.{Similarity, Vec} import io.circe._ import com.klibisz.elastiknn.api.ElasticsearchCodec._ import com.klibisz.elastiknn.models.ExactSimilarityFunction import io.circe.syntax._ import io.circe.generic.semiauto._ import scala.util.{Random, Try} case class Result(similarity: Similarity, values: Vector[Double]) object Result { implicit val codec: Codec[Result] = deriveCodec[Result] } case class Query(vector: Vec, results: Seq[Result]) object Query { implicit val codec: Codec[Query] = deriveCodec[Query] } case class TestData(corpus: Vector[Vec], queries: Vector[Query]) object TestData { implicit val codec: Codec[TestData] = deriveCodec[TestData] def read(fname: String): TestData = { val resource = getClass.getResource(fname) val gin = new GZIPInputStream(resource.openStream()) val contents = new String(gin.readAllBytes()) gin.close() io.circe.parser.decode[TestData](contents).toTry.get } def write(testData: TestData, fname: String): Unit = { val gout = new GZIPOutputStream(new FileOutputStream(fname)) gout.write(testData.asJson.noSpaces.getBytes()) gout.close() } def genSparseBool(dims: Int, numCorpus: Int, numQueries: Int, numNeighbors: Int)(implicit rng: Random): TestData = { // TODO: have a min and max bias to introduce more variety to the corpus. val corpus = Vec.SparseBool.randoms(dims, numCorpus, 0.2) val queries = Vec.SparseBool.randoms(dims, numQueries, 0.2).map { qv => Query( qv, Seq( Result(Similarity.Jaccard, corpus.map(cv => ExactSimilarityFunction.Jaccard(cv, qv)).sorted.reverse.take(numNeighbors)), Result(Similarity.Hamming, corpus.map(cv => ExactSimilarityFunction.Hamming(cv, qv)).sorted.reverse.take(numNeighbors)) ) ) } TestData(corpus, queries) } def genDenseFloat(dims: Int, numCorpus: Int, numQueries: Int, numNeighbors: Int, unit: Boolean = false)( implicit rng: Random): TestData = { val corpus = Vec.DenseFloat.randoms(dims, numCorpus) val queries = Vec.DenseFloat.randoms(dims, numQueries).map { qv => Query( qv, Seq( Result(Similarity.L1, corpus.map(cv => ExactSimilarityFunction.L1(cv, qv)).sorted.reverse.take(numNeighbors)), Result(Similarity.L2, corpus.map(cv => ExactSimilarityFunction.L2(cv, qv)).sorted.reverse.take(numNeighbors)), Result(Similarity.Angular, corpus.map(cv => ExactSimilarityFunction.Angular(cv, qv)).sorted.reverse.take(numNeighbors)) ) ) } TestData(corpus, queries) } } object Generate { import TestData._ def main(args: Array[String]): Unit = { implicit val rng = new Random(0) val dims = 1024 write(genSparseBool(dims, 5000, 50, 100), "testdata-sparsebool.json.gz") write(genDenseFloat(dims, 5000, 50, 100), "testdata-densefloat.json.gz") write(genDenseFloat(dims, 5000, 50, 100, unit = true), "testdata-densefloat-unit.json.gz") } }
Example 68
Source File: UnsafeSerializationSuite.scala From elastiknn with Apache License 2.0 | 5 votes |
package com.klibisz.elastiknn.storage import org.scalatest.{FunSuite, Matchers} import scala.util.Random class UnsafeSerializationSuite extends FunSuite with Matchers { test("arrays of ints") { val seed = System.currentTimeMillis() val maxLen = 4096 val rng = new Random(seed) for (i <- 0 to 1000) { withClue(s"Failed on iteration $i with seed $seed and max length $maxLen") { // Generate array of random ints. val len = rng.nextInt(maxLen) val iarr = (0 until len).map(_ => rng.nextInt(Int.MaxValue) * (if (rng.nextBoolean()) 1 else -1)).toArray // Serialize and check serialized length. val trimmed = UnsafeSerialization.writeInts(iarr) trimmed should have length (iarr.length * UnsafeSerialization.numBytesInInt) // Deserialize and check. val iarrReadTrimmed = UnsafeSerialization.readInts(trimmed, 0, trimmed.length) iarrReadTrimmed shouldBe iarr // Place in larger array with random offset. val offset = rng.nextInt(maxLen) val embedded = new Array[Byte](offset) ++ trimmed ++ new Array[Byte](rng.nextInt(maxLen)) // Deserialize and check. val iarrReadEmbedded = UnsafeSerialization.readInts(embedded, offset, trimmed.length) iarrReadEmbedded shouldBe iarr } } } test("arrays of floats") { val seed = System.currentTimeMillis() val maxLen = 4096 val rng = new Random(seed) for (i <- 0 to 1000) { withClue(s"Failed on iteration $i with seed $seed and max length $maxLen") { // Generate array of random floats. val len = rng.nextInt(maxLen) val farr = (0 until len).map(_ => rng.nextFloat() * (if (rng.nextBoolean()) Float.MaxValue else Float.MinValue)).toArray // Serialize and check length. val trimmed = UnsafeSerialization.writeFloats(farr) trimmed should have length (farr.length * UnsafeSerialization.numBytesInFloat) // Deserialize and check. val farrTrimmed = UnsafeSerialization.readFloats(trimmed, 0, trimmed.length) farrTrimmed shouldBe farr // Place in larger array with random offset. val offset = rng.nextInt(maxLen) val embedded = new Array[Byte](offset) ++ trimmed ++ new Array[Byte](rng.nextInt(maxLen)) // Deserialize and check. val farrReadEmbedded = UnsafeSerialization.readFloats(embedded, offset, trimmed.length) farrReadEmbedded shouldBe farr } } } test("ints variable length encoding") { UnsafeSerialization.writeInt(127) should have length 1 UnsafeSerialization.writeInt(-127) should have length 1 UnsafeSerialization.writeInt(32767) should have length 2 UnsafeSerialization.writeInt(-32767) should have length 2 } test("ints randomized") { val seed = System.currentTimeMillis() val rng = new Random(seed) for (i <- 0 to 10000) { withClue(s"Failed on iteration $i with seed $seed") { val i = rng.nextInt(Int.MaxValue) * (if (rng.nextBoolean()) 1 else -1) val barr = UnsafeSerialization.writeInt(i) val iRead = UnsafeSerialization.readInt(barr) iRead shouldBe i } } } }
Example 69
Source File: MetricsEndpointSpec.scala From prometheus-akka-http with MIT License | 5 votes |
package com.lonelyplanet.prometheus.api import java.io.StringWriter import akka.http.scaladsl.model.HttpCharsets import akka.http.scaladsl.testkit.ScalatestRouteTest import com.lonelyplanet.prometheus.Utils._ import io.prometheus.client.exporter.common.TextFormat import io.prometheus.client.{CollectorRegistry, Histogram} import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers import scala.util.Random class MetricsEndpointSpec extends AnyFlatSpec with Matchers with ScalatestRouteTest { "Metrics endpoint" should "return the correct media type and charset" in { val api = createEndpoint(CollectorRegistry.defaultRegistry) Get("/metrics") ~> api.routes ~> check { mediaType.subType shouldBe "plain" mediaType.isText shouldBe true mediaType.params shouldBe Map("version" -> "0.0.4") charset shouldBe HttpCharsets.`UTF-8` } } it should "return serialized metrics in the prometheus text format" in { val registry = new CollectorRegistry() val api = createEndpoint(registry) val hist = Histogram.build().name(RandomTestName).help(RandomTestHelp).linearBuckets(0, 1, 10).register(registry) hist.observe(Math.abs(Random.nextDouble())) Get("/metrics") ~> api.routes ~> check { val resp = responseAs[String] val writer = new StringWriter() TextFormat.write004(writer, registry.metricFamilySamples()) resp shouldBe writer.toString } } private val RandomTestName = generateRandomStringOfLength(16) private val RandomTestHelp = generateRandomStringOfLength(16) private def createEndpoint(collectorRegistry: CollectorRegistry) = { new MetricsEndpoint(collectorRegistry) } }
Example 70
Source File: PrometheusResponseTimeRecorderSpec.scala From prometheus-akka-http with MIT License | 5 votes |
package com.lonelyplanet.prometheus import io.prometheus.client.{Collector, CollectorRegistry} import org.scalamock.scalatest.MockFactory import com.lonelyplanet.prometheus.Utils._ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers import scala.concurrent.duration import scala.concurrent.duration.FiniteDuration import scala.util.Random class PrometheusResponseTimeRecorderSpec extends AnyFlatSpec with Matchers with MockFactory { "PrometheusLatencyRecorder" should "register a histogram and record request latencies" in { val registry = new CollectorRegistry() val randomMetricName = generateRandomString val randomMetricHelp = generateRandomString val randomLabelName = generateRandomString val randomEndpointName = generateRandomString val randomLatency = Math.abs(Random.nextInt(10000)) // our random value will end up in the second bucket val buckets = List((randomLatency - 1).toDouble, (randomLatency + 1).toDouble) val recorder = new PrometheusResponseTimeRecorder( randomMetricName, randomMetricHelp, buckets, randomLabelName, registry, duration.MILLISECONDS) recorder.recordResponseTime(randomEndpointName, FiniteDuration(randomLatency, duration.MILLISECONDS)) val first = getBucketValue(registry, randomMetricName, List(randomLabelName), List(randomEndpointName), buckets.head) val second = getBucketValue(registry, randomMetricName, List(randomLabelName), List(randomEndpointName), buckets.last) val positiveInf = getBucketValue(registry, randomMetricName, List(randomLabelName), List(randomEndpointName), Double.PositiveInfinity) first shouldBe 0 second shouldBe 1 positiveInf shouldBe 1 } private def getBucketValue(registry: CollectorRegistry, metricName: String, labelNames: List[String], labelValues: List[String], bucket: Double) = { val name = metricName + "_bucket" // 'le' should be the first label in the list val allLabelNames = (Array("le") ++ labelNames).reverse val allLabelValues = (Array(Collector.doubleToGoString(bucket)) ++ labelValues).reverse registry.getSampleValue(name, allLabelNames, allLabelValues).intValue() } }
Example 71
Source File: ProtoBuffTest.scala From c4proto with Apache License 2.0 | 5 votes |
package ee.cone.c4actor import java.lang.management.ManagementFactory import java.util import java.util.concurrent.{Callable, Executors} import ee.cone.c4actor.AnyAdapter._ import ee.cone.c4actor.AnyOrigProtocol.N_AnyOrig import ee.cone.c4actor.ProtoBuffTestProtocol.{D_TestOrig, D_TestOrigForDecode} import ee.cone.c4di.{c4, c4app} import ee.cone.c4proto._ import scala.collection.immutable import scala.util.Random trait ProtoBuffTestProtocolAppBase @protocol("ProtoBuffTestProtocolApp") object ProtoBuffTestProtocol { @Id(0x1) case class D_TestOrig( @Id(0x2) srcId: String, @Id(0x3) list: List[String], @Id(0x4) byteStr: List[N_AnyOrig] ) @Id(0x5) case class D_TestOrigForDecode( @Id(0x6) srcId: String, @Id(0x7) number: Long ) } @c4app class SeqProtoBuffTestAppBase extends ProtoBuffTestApp @c4app class ParProtoBuffTestAppBase extends ProtoBuffTestApp trait ProtoBuffTestApp extends VMExecutionApp with ExecutableApp with BaseApp with ProtoApp with ProtoBuffTestProtocolApp with AnyOrigProtocolApp class SerializationRunnable(pid: Int, testOrigs: Seq[D_TestOrigForDecode], qAdapterRegistry: QAdapterRegistry) extends Callable[Long] { def call(): Long = { TestCode.test(testOrigs, qAdapterRegistry) } } object TestCode { def test(testOrigs: Seq[D_TestOrigForDecode], qAdapterRegistry: QAdapterRegistry): Long = { val time = System.currentTimeMillis() val encoded: immutable.Seq[N_AnyOrig] = testOrigs.map(encode(qAdapterRegistry)(_)) val testOrigsss: immutable.Seq[D_TestOrig] = encoded.zipWithIndex.map { case (a, b) => D_TestOrig(b.toString, a.toString.split(",").toList, List(a)) } val encoded2: immutable.Seq[N_AnyOrig] = testOrigsss.map(encode(qAdapterRegistry)(_)) val decoded: immutable.Seq[D_TestOrig] = encoded2.map(decode[D_TestOrig](qAdapterRegistry)) // assert (testOrigsss == decoded) val time2 = System.currentTimeMillis() time2 - time } }
Example 72
Source File: SandboxApp.scala From bloom-filter-scala with MIT License | 5 votes |
import java.text.NumberFormat import bloomfilter.mutable.{CuckooFilter, UnsafeTable8Bit} import com.google.monitoring.runtime.instrumentation.{AllocationRecorder, Sampler} import com.twitter.algebird.{BloomFilter => AlgebirdBloomFilter} import scala.util.Random object SandboxApp { def checkMemory(): Unit = { val runtime = Runtime.getRuntime val format = NumberFormat.getInstance() val sb = new StringBuilder() val maxMemory = runtime.maxMemory() val allocatedMemory = runtime.totalMemory() val freeMemory = runtime.freeMemory() sb.append("free memory: " + format.format(freeMemory / 1024) + "\n") sb.append("allocated memory: " + format.format(allocatedMemory / 1024) + "\n") sb.append("max memory: " + format.format(maxMemory / 1024) + "\n") sb.append("total free memory: " + format.format((freeMemory + (maxMemory - allocatedMemory)) / 1024) + "\n") System.out.println(sb.toString()) } def main(args: Array[String]): Unit = { val sut = CuckooFilter[Long](1000) sut.add(8) assert(sut.mightContain(8)) sut.add(10) assert(sut.mightContain(10)) sut.add(8) assert(sut.mightContain(8)) sut.add(10000) assert(sut.mightContain(10000)) } def compareAlgebirdFPR(): Unit = { val random: Random = new Random() val itemsExpected = 10000L val falsePositiveRate = 0.1 var bf = AlgebirdBloomFilter(itemsExpected.toInt, falsePositiveRate, 0).create("") val bf2 = bloomfilter.mutable.BloomFilter[String](itemsExpected, falsePositiveRate) var i = 0 while (i < itemsExpected) { val str: String = random.nextString(1000) bf = bf.+(str) bf2.add(str) i += 1 } i = 0 var in, in2 = 0 while (true) { val str = random.nextString(1000) if (bf.contains(str).isTrue) { in += 1 } if (bf2.mightContain(str)) { in2 += 1 } if (i % 1000 == 0) { println(s"in: $in; in2: $in2") } } } def checkAllocations(): Unit = { val sampler: Sampler = new Sampler() { def sampleAllocation(count: Int, desc: String, newObj: Object, size: Long) { System.out.println("I just allocated the object " + newObj + " of type " + desc + " whose size is " + size) if (count != -1) { System.out.println("It's an array of size " + count) } } } AllocationRecorder.addSampler(sampler) AllocationRecorder.removeSampler(sampler) } }
Example 73
Source File: StringItemCuckooBenchmark.scala From bloom-filter-scala with MIT License | 5 votes |
package bloomfilter.mutable import java.util.concurrent.TimeUnit import org.openjdk.jmh.annotations.{BenchmarkMode, OperationsPerInvocation, OutputTimeUnit, _} import scala.util.Random @State(Scope.Benchmark) class StringItemCuckooBenchmark { private val itemsExpected = 100000000L private val random = new Random() private var bf: CuckooFilter[String] = _ @Param(Array("1024")) var length: Int = _ private val items = new Array[String](10000) var i = 0 while (i < items.length) { items(i) = random.nextString(length) i += 1 } @Setup(Level.Iteration) def setup(): Unit = { bf = CuckooFilter[String](itemsExpected) } @Benchmark @BenchmarkMode(Array(Mode.SingleShotTime)) @OutputTimeUnit(TimeUnit.NANOSECONDS) @OperationsPerInvocation(10000) def myPut(): Unit = { var i = 0 while (i < items.length) { bf.add(items(i)) i += 1 } } @Benchmark @BenchmarkMode(Array(Mode.Throughput)) @OperationsPerInvocation(10000) def myGet(): Unit = { var i = 0 while (i < items.length) { bf.mightContain(items(i)) i += 1 } } }
Example 74
Source File: SparkTC.scala From BigDatalog with Apache License 2.0 | 5 votes |
// scalastyle:off println package org.apache.spark.examples import scala.util.Random import scala.collection.mutable import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.SparkContext._ object SparkTC { val numEdges = 200 val numVertices = 100 val rand = new Random(42) def generateGraph: Seq[(Int, Int)] = { val edges: mutable.Set[(Int, Int)] = mutable.Set.empty while (edges.size < numEdges) { val from = rand.nextInt(numVertices) val to = rand.nextInt(numVertices) if (from != to) edges.+=((from, to)) } edges.toSeq } def main(args: Array[String]) { val sparkConf = new SparkConf().setAppName("SparkTC") val spark = new SparkContext(sparkConf) val slices = if (args.length > 0) args(0).toInt else 2 var tc = spark.parallelize(generateGraph, slices).cache() // Linear transitive closure: each round grows paths by one edge, // by joining the graph's edges with the already-discovered paths. // e.g. join the path (y, z) from the TC with the edge (x, y) from // the graph to obtain the path (x, z). // Because join() joins on keys, the edges are stored in reversed order. val edges = tc.map(x => (x._2, x._1)) // This join is iterated until a fixed point is reached. var oldCount = 0L var nextCount = tc.count() do { oldCount = nextCount // Perform the join, obtaining an RDD of (y, (z, x)) pairs, // then project the result to obtain the new (x, z) paths. tc = tc.union(tc.join(edges).map(x => (x._2._2, x._2._1))).distinct().cache() nextCount = tc.count() } while (nextCount != oldCount) println("TC has " + tc.count() + " edges.") spark.stop() } } // scalastyle:on println
Example 75
Source File: LocalKMeans.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.clustering import scala.util.Random import org.apache.spark.Logging import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.linalg.BLAS.{axpy, scal} def kMeansPlusPlus( seed: Int, points: Array[VectorWithNorm], weights: Array[Double], k: Int, maxIterations: Int ): Array[VectorWithNorm] = { val rand = new Random(seed) val dimensions = points(0).vector.size val centers = new Array[VectorWithNorm](k) // Initialize centers by sampling using the k-means++ procedure. centers(0) = pickWeighted(rand, points, weights).toDense for (i <- 1 until k) { // Pick the next center with a probability proportional to cost under current centers val curCenters = centers.view.take(i) val sum = points.view.zip(weights).map { case (p, w) => w * KMeans.pointCost(curCenters, p) }.sum val r = rand.nextDouble() * sum var cumulativeScore = 0.0 var j = 0 while (j < points.length && cumulativeScore < r) { cumulativeScore += weights(j) * KMeans.pointCost(curCenters, points(j)) j += 1 } if (j == 0) { logWarning("kMeansPlusPlus initialization ran out of distinct points for centers." + s" Using duplicate point for center k = $i.") centers(i) = points(0).toDense } else { centers(i) = points(j - 1).toDense } } // Run up to maxIterations iterations of Lloyd's algorithm val oldClosest = Array.fill(points.length)(-1) var iteration = 0 var moved = true while (moved && iteration < maxIterations) { moved = false val counts = Array.fill(k)(0.0) val sums = Array.fill(k)(Vectors.zeros(dimensions)) var i = 0 while (i < points.length) { val p = points(i) val index = KMeans.findClosest(centers, p)._1 axpy(weights(i), p.vector, sums(index)) counts(index) += weights(i) if (index != oldClosest(i)) { moved = true oldClosest(i) = index } i += 1 } // Update centers var j = 0 while (j < k) { if (counts(j) == 0.0) { // Assign center to a random point centers(j) = points(rand.nextInt(points.length)).toDense } else { scal(1.0 / counts(j), sums(j)) centers(j) = new VectorWithNorm(sums(j)) } j += 1 } iteration += 1 } if (iteration == maxIterations) { logInfo(s"Local KMeans++ reached the max number of iterations: $maxIterations.") } else { logInfo(s"Local KMeans++ converged in $iteration iterations.") } centers } private def pickWeighted[T](rand: Random, data: Array[T], weights: Array[Double]): T = { val r = rand.nextDouble() * weights.sum var i = 0 var curWeight = 0.0 while (i < data.length && curWeight < r) { curWeight += weights(i) i += 1 } data(i - 1) } }
Example 76
Source File: KMeansDataGenerator.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.util import scala.util.Random import org.apache.spark.SparkContext import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.rdd.RDD @Since("0.8.0") def generateKMeansRDD( sc: SparkContext, numPoints: Int, k: Int, d: Int, r: Double, numPartitions: Int = 2) : RDD[Array[Double]] = { // First, generate some centers val rand = new Random(42) val centers = Array.fill(k)(Array.fill(d)(rand.nextGaussian() * r)) // Then generate points around each center sc.parallelize(0 until numPoints, numPartitions).map { idx => val center = centers(idx % k) val rand2 = new Random(42 + idx) Array.tabulate(d)(i => center(i) + rand2.nextGaussian()) } } @Since("0.8.0") def main(args: Array[String]) { if (args.length < 6) { // scalastyle:off println println("Usage: KMeansGenerator " + "<master> <output_dir> <num_points> <k> <d> <r> [<num_partitions>]") // scalastyle:on println System.exit(1) } val sparkMaster = args(0) val outputPath = args(1) val numPoints = args(2).toInt val k = args(3).toInt val d = args(4).toInt val r = args(5).toDouble val parts = if (args.length >= 7) args(6).toInt else 2 val sc = new SparkContext(sparkMaster, "KMeansDataGenerator") val data = generateKMeansRDD(sc, numPoints, k, d, r, parts) data.map(_.mkString(" ")).saveAsTextFile(outputPath) System.exit(0) } }
Example 77
Source File: LogisticRegressionDataGenerator.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.util import scala.util.Random import org.apache.spark.annotation.{Since, DeveloperApi} import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.linalg.Vectors @Since("0.8.0") def generateLogisticRDD( sc: SparkContext, nexamples: Int, nfeatures: Int, eps: Double, nparts: Int = 2, probOne: Double = 0.5): RDD[LabeledPoint] = { val data = sc.parallelize(0 until nexamples, nparts).map { idx => val rnd = new Random(42 + idx) val y = if (idx % 2 == 0) 0.0 else 1.0 val x = Array.fill[Double](nfeatures) { rnd.nextGaussian() + (y * eps) } LabeledPoint(y, Vectors.dense(x)) } data } @Since("0.8.0") def main(args: Array[String]) { if (args.length != 5) { // scalastyle:off println println("Usage: LogisticRegressionGenerator " + "<master> <output_dir> <num_examples> <num_features> <num_partitions>") // scalastyle:on println System.exit(1) } val sparkMaster: String = args(0) val outputPath: String = args(1) val nexamples: Int = if (args.length > 2) args(2).toInt else 1000 val nfeatures: Int = if (args.length > 3) args(3).toInt else 2 val parts: Int = if (args.length > 4) args(4).toInt else 2 val eps = 3 val sc = new SparkContext(sparkMaster, "LogisticRegressionDataGenerator") val data = generateLogisticRDD(sc, nexamples, nfeatures, eps, parts) data.saveAsTextFile(outputPath) sc.stop() } }
Example 78
Source File: SVMDataGenerator.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.util import scala.util.Random import com.github.fommil.netlib.BLAS.{getInstance => blas} import org.apache.spark.SparkContext import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.rdd.RDD @DeveloperApi @Since("0.8.0") object SVMDataGenerator { @Since("0.8.0") def main(args: Array[String]) { if (args.length < 2) { // scalastyle:off println println("Usage: SVMGenerator " + "<master> <output_dir> [num_examples] [num_features] [num_partitions]") // scalastyle:on println System.exit(1) } val sparkMaster: String = args(0) val outputPath: String = args(1) val nexamples: Int = if (args.length > 2) args(2).toInt else 1000 val nfeatures: Int = if (args.length > 3) args(3).toInt else 2 val parts: Int = if (args.length > 4) args(4).toInt else 2 val sc = new SparkContext(sparkMaster, "SVMGenerator") val globalRnd = new Random(94720) val trueWeights = Array.fill[Double](nfeatures + 1)(globalRnd.nextGaussian()) val data: RDD[LabeledPoint] = sc.parallelize(0 until nexamples, parts).map { idx => val rnd = new Random(42 + idx) val x = Array.fill[Double](nfeatures) { rnd.nextDouble() * 2.0 - 1.0 } val yD = blas.ddot(trueWeights.length, x, 1, trueWeights, 1) + rnd.nextGaussian() * 0.1 val y = if (yD < 0) 0.0 else 1.0 LabeledPoint(y, Vectors.dense(x)) } data.saveAsTextFile(outputPath) sc.stop() } }
Example 79
Source File: RidgeRegressionSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.regression import scala.util.Random import org.jblas.DoubleMatrix import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.util.{LocalClusterSparkContext, LinearDataGenerator, MLlibTestSparkContext} import org.apache.spark.util.Utils private object RidgeRegressionSuite { val model = new RidgeRegressionModel(weights = Vectors.dense(0.1, 0.2, 0.3), intercept = 0.5) } class RidgeRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { def predictionError(predictions: Seq[Double], input: Seq[LabeledPoint]): Double = { predictions.zip(input).map { case (prediction, expected) => (prediction - expected.label) * (prediction - expected.label) }.reduceLeft(_ + _) / predictions.size } test("ridge regression can help avoid overfitting") { // For small number of examples and large variance of error distribution, // ridge regression should give smaller generalization error that linear regression. val numExamples = 50 val numFeatures = 20 org.jblas.util.Random.seed(42) // Pick weights as random values distributed uniformly in [-0.5, 0.5] val w = DoubleMatrix.rand(numFeatures, 1).subi(0.5) // Use half of data for training and other half for validation val data = LinearDataGenerator.generateLinearInput(3.0, w.toArray, 2 * numExamples, 42, 10.0) val testData = data.take(numExamples) val validationData = data.takeRight(numExamples) val testRDD = sc.parallelize(testData, 2).cache() val validationRDD = sc.parallelize(validationData, 2).cache() // First run without regularization. val linearReg = new LinearRegressionWithSGD() linearReg.optimizer.setNumIterations(200) .setStepSize(1.0) val linearModel = linearReg.run(testRDD) val linearErr = predictionError( linearModel.predict(validationRDD.map(_.features)).collect(), validationData) val ridgeReg = new RidgeRegressionWithSGD() ridgeReg.optimizer.setNumIterations(200) .setRegParam(0.1) .setStepSize(1.0) val ridgeModel = ridgeReg.run(testRDD) val ridgeErr = predictionError( ridgeModel.predict(validationRDD.map(_.features)).collect(), validationData) // Ridge validation error should be lower than linear regression. assert(ridgeErr < linearErr, "ridgeError (" + ridgeErr + ") was not less than linearError(" + linearErr + ")") } test("model save/load") { val model = RidgeRegressionSuite.model val tempDir = Utils.createTempDir() val path = tempDir.toURI.toString // Save model, load it back, and compare. try { model.save(sc, path) val sameModel = RidgeRegressionModel.load(sc, path) assert(model.weights == sameModel.weights) assert(model.intercept == sameModel.intercept) } finally { Utils.deleteRecursively(tempDir) } } } class RidgeRegressionClusterSuite extends SparkFunSuite with LocalClusterSparkContext { test("task size should be small in both training and prediction") { val m = 4 val n = 200000 val points = sc.parallelize(0 until m, 2).mapPartitionsWithIndex { (idx, iter) => val random = new Random(idx) iter.map(i => LabeledPoint(1.0, Vectors.dense(Array.fill(n)(random.nextDouble())))) }.cache() // If we serialize data directly in the task closure, the size of the serialized task would be // greater than 1MB and hence Spark would throw an error. val model = RidgeRegressionWithSGD.train(points, 2) val predictions = model.predict(points.map(_.features)) } }
Example 80
Source File: KafkaStreamSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kafka import scala.collection.mutable import scala.concurrent.duration._ import scala.language.postfixOps import scala.util.Random import kafka.serializer.StringDecoder import org.scalatest.BeforeAndAfterAll import org.scalatest.concurrent.Eventually import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.{Milliseconds, StreamingContext} class KafkaStreamSuite extends SparkFunSuite with Eventually with BeforeAndAfterAll { private var ssc: StreamingContext = _ private var kafkaTestUtils: KafkaTestUtils = _ override def beforeAll(): Unit = { kafkaTestUtils = new KafkaTestUtils kafkaTestUtils.setup() } override def afterAll(): Unit = { if (ssc != null) { ssc.stop() ssc = null } if (kafkaTestUtils != null) { kafkaTestUtils.teardown() kafkaTestUtils = null } } test("Kafka input stream") { val sparkConf = new SparkConf().setMaster("local[4]").setAppName(this.getClass.getSimpleName) ssc = new StreamingContext(sparkConf, Milliseconds(500)) val topic = "topic1" val sent = Map("a" -> 5, "b" -> 3, "c" -> 10) kafkaTestUtils.createTopic(topic) kafkaTestUtils.sendMessages(topic, sent) val kafkaParams = Map("zookeeper.connect" -> kafkaTestUtils.zkAddress, "group.id" -> s"test-consumer-${Random.nextInt(10000)}", "auto.offset.reset" -> "smallest") val stream = KafkaUtils.createStream[String, String, StringDecoder, StringDecoder]( ssc, kafkaParams, Map(topic -> 1), StorageLevel.MEMORY_ONLY) val result = new mutable.HashMap[String, Long]() with mutable.SynchronizedMap[String, Long] stream.map(_._2).countByValue().foreachRDD { r => val ret = r.collect() ret.toMap.foreach { kv => val count = result.getOrElseUpdate(kv._1, 0) + kv._2 result.put(kv._1, count) } } ssc.start() eventually(timeout(10000 milliseconds), interval(100 milliseconds)) { assert(sent === result) } } }
Example 81
Source File: KafkaClusterSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kafka import scala.util.Random import kafka.common.TopicAndPartition import org.scalatest.BeforeAndAfterAll import org.apache.spark.SparkFunSuite class KafkaClusterSuite extends SparkFunSuite with BeforeAndAfterAll { private val topic = "kcsuitetopic" + Random.nextInt(10000) private val topicAndPartition = TopicAndPartition(topic, 0) private var kc: KafkaCluster = null private var kafkaTestUtils: KafkaTestUtils = _ override def beforeAll() { kafkaTestUtils = new KafkaTestUtils kafkaTestUtils.setup() kafkaTestUtils.createTopic(topic) kafkaTestUtils.sendMessages(topic, Map("a" -> 1)) kc = new KafkaCluster(Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress)) } override def afterAll() { if (kafkaTestUtils != null) { kafkaTestUtils.teardown() kafkaTestUtils = null } } test("metadata apis") { val leader = kc.findLeaders(Set(topicAndPartition)).right.get(topicAndPartition) val leaderAddress = s"${leader._1}:${leader._2}" assert(leaderAddress === kafkaTestUtils.brokerAddress, "didn't get leader") val parts = kc.getPartitions(Set(topic)).right.get assert(parts(topicAndPartition), "didn't get partitions") val err = kc.getPartitions(Set(topic + "BAD")) assert(err.isLeft, "getPartitions for a nonexistant topic should be an error") } test("leader offset apis") { val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get assert(earliest(topicAndPartition).offset === 0, "didn't get earliest") val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get assert(latest(topicAndPartition).offset === 1, "didn't get latest") } test("consumer offset apis") { val group = "kcsuitegroup" + Random.nextInt(10000) val offset = Random.nextInt(10000) val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset)) assert(set.isRight, "didn't set consumer offsets") val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get assert(get(topicAndPartition) === offset, "didn't get consumer offsets") } }
Example 82
Source File: UISeleniumSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import scala.util.Random import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.openqa.selenium.WebDriver import org.openqa.selenium.htmlunit.HtmlUnitDriver import org.scalatest.{BeforeAndAfterAll, Matchers} import org.scalatest.concurrent.Eventually._ import org.scalatest.selenium.WebBrowser import org.scalatest.time.SpanSugar._ import org.apache.spark.ui.SparkUICssErrorHandler class UISeleniumSuite extends HiveThriftJdbcTest with WebBrowser with Matchers with BeforeAndAfterAll { implicit var webDriver: WebDriver = _ var server: HiveThriftServer2 = _ val uiPort = 20000 + Random.nextInt(10000) override def mode: ServerMode.Value = ServerMode.binary override def beforeAll(): Unit = { webDriver = new HtmlUnitDriver { getWebClient.setCssErrorHandler(new SparkUICssErrorHandler) } super.beforeAll() } override def afterAll(): Unit = { if (webDriver != null) { webDriver.quit() } super.afterAll() } override protected def serverStartCommand(port: Int) = { val portConf = if (mode == ServerMode.binary) { ConfVars.HIVE_SERVER2_THRIFT_PORT } else { ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT } s"""$startScript | --master local | --hiveconf hive.root.logger=INFO,console | --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$metastoreJdbcUri | --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath | --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=localhost | --hiveconf ${ConfVars.HIVE_SERVER2_TRANSPORT_MODE}=$mode | --hiveconf $portConf=$port | --driver-class-path ${sys.props("java.class.path")} | --conf spark.ui.enabled=true | --conf spark.ui.port=$uiPort """.stripMargin.split("\\s+").toSeq } ignore("thrift server ui test") { withJdbcStatement { statement => val baseURL = s"http://localhost:$uiPort" val queries = Seq( "CREATE TABLE test_map(key INT, value STRING)", s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_map") queries.foreach(statement.execute) eventually(timeout(10 seconds), interval(50 milliseconds)) { go to baseURL find(cssSelector("""ul li a[href*="sql"]""")) should not be None } eventually(timeout(10 seconds), interval(50 milliseconds)) { go to (baseURL + "/sql") find(id("sessionstat")) should not be None find(id("sqlstat")) should not be None // check whether statements exists queries.foreach { line => findAll(cssSelector("""ul table tbody tr td""")).map(_.text).toList should contain (line) } } } } }
Example 83
Source File: GenerateUnsafeRowJoinerSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions.codegen import scala.util.Random import org.apache.spark.SparkFunSuite import org.apache.spark.sql.RandomDataGenerator import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.expressions.UnsafeProjection import org.apache.spark.sql.types._ class GenerateUnsafeRowJoinerSuite extends SparkFunSuite { private val fixed = Seq(IntegerType) private val variable = Seq(IntegerType, StringType) test("simple fixed width types") { testConcat(0, 0, fixed) testConcat(0, 1, fixed) testConcat(1, 0, fixed) testConcat(64, 0, fixed) testConcat(0, 64, fixed) testConcat(64, 64, fixed) } test("randomized fix width types") { for (i <- 0 until 20) { testConcatOnce(Random.nextInt(100), Random.nextInt(100), fixed) } } test("simple variable width types") { testConcat(0, 0, variable) testConcat(0, 1, variable) testConcat(1, 0, variable) testConcat(64, 0, variable) testConcat(0, 64, variable) testConcat(64, 64, variable) } test("randomized variable width types") { for (i <- 0 until 10) { testConcatOnce(Random.nextInt(100), Random.nextInt(100), variable) } } private def testConcat(numFields1: Int, numFields2: Int, candidateTypes: Seq[DataType]): Unit = { for (i <- 0 until 10) { testConcatOnce(numFields1, numFields2, candidateTypes) } } private def testConcatOnce(numFields1: Int, numFields2: Int, candidateTypes: Seq[DataType]) { info(s"schema size $numFields1, $numFields2") val schema1 = RandomDataGenerator.randomSchema(numFields1, candidateTypes) val schema2 = RandomDataGenerator.randomSchema(numFields2, candidateTypes) // Create the converters needed to convert from external row to internal row and to UnsafeRows. val internalConverter1 = CatalystTypeConverters.createToCatalystConverter(schema1) val internalConverter2 = CatalystTypeConverters.createToCatalystConverter(schema2) val converter1 = UnsafeProjection.create(schema1) val converter2 = UnsafeProjection.create(schema2) // Create the input rows, convert them into UnsafeRows. val extRow1 = RandomDataGenerator.forType(schema1, nullable = false).get.apply() val extRow2 = RandomDataGenerator.forType(schema2, nullable = false).get.apply() val row1 = converter1.apply(internalConverter1.apply(extRow1).asInstanceOf[InternalRow]) val row2 = converter2.apply(internalConverter2.apply(extRow2).asInstanceOf[InternalRow]) // Run the joiner. val mergedSchema = StructType(schema1 ++ schema2) val concater = GenerateUnsafeRowJoiner.create(schema1, schema2) val output = concater.join(row1, row2) // Test everything equals ... for (i <- mergedSchema.indices) { if (i < schema1.size) { assert(output.isNullAt(i) === row1.isNullAt(i)) if (!output.isNullAt(i)) { assert(output.get(i, mergedSchema(i).dataType) === row1.get(i, mergedSchema(i).dataType)) } } else { assert(output.isNullAt(i) === row2.isNullAt(i - schema1.size)) if (!output.isNullAt(i)) { assert(output.get(i, mergedSchema(i).dataType) === row2.get(i - schema1.size, mergedSchema(i).dataType)) } } } } }
Example 84
Source File: TakeOrderedAndProjectNodeSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.local import scala.util.Random import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.SortOrder class TakeOrderedAndProjectNodeSuite extends LocalNodeTest { private def testTakeOrderedAndProject(desc: Boolean): Unit = { val limit = 10 val ascOrDesc = if (desc) "desc" else "asc" test(ascOrDesc) { val inputData = Random.shuffle((1 to 100).toList).map { i => (i, i) }.toArray val inputNode = new DummyNode(kvIntAttributes, inputData) val firstColumn = inputNode.output(0) val sortDirection = if (desc) Descending else Ascending val sortOrder = SortOrder(firstColumn, sortDirection) val takeOrderAndProjectNode = new TakeOrderedAndProjectNode( conf, limit, Seq(sortOrder), Some(Seq(firstColumn)), inputNode) val expectedOutput = inputData .map { case (k, _) => k } .sortBy { k => k * (if (desc) -1 else 1) } .take(limit) val actualOutput = takeOrderAndProjectNode.collect().map { row => row.getInt(0) } assert(actualOutput === expectedOutput) } } testTakeOrderedAndProject(desc = false) testTakeOrderedAndProject(desc = true) }
Example 85
Source File: ColumnarTestUtils.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.columnar import scala.collection.immutable.HashSet import scala.util.Random import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, GenericMutableRow} import org.apache.spark.sql.catalyst.util.{GenericArrayData, ArrayBasedMapData} import org.apache.spark.sql.types.{AtomicType, Decimal} import org.apache.spark.unsafe.types.UTF8String object ColumnarTestUtils { def makeNullRow(length: Int): GenericMutableRow = { val row = new GenericMutableRow(length) (0 until length).foreach(row.setNullAt) row } def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = { def randomBytes(length: Int) = { val bytes = new Array[Byte](length) Random.nextBytes(bytes) bytes } (columnType match { case NULL => null case BOOLEAN => Random.nextBoolean() case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort case INT => Random.nextInt() case LONG => Random.nextLong() case FLOAT => Random.nextFloat() case DOUBLE => Random.nextDouble() case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32))) case BINARY => randomBytes(Random.nextInt(32)) case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale) case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale) case STRUCT(_) => new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10)))) case ARRAY(_) => new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt())) case MAP(_) => ArrayBasedMapData( Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32))))) }).asInstanceOf[JvmType] } def makeRandomValues( head: ColumnType[_], tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail) def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = { columnTypes.map(makeRandomValue(_)) } def makeUniqueRandomValues[JvmType]( columnType: ColumnType[JvmType], count: Int): Seq[JvmType] = { Iterator.iterate(HashSet.empty[JvmType]) { set => set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next() }.drop(count).next().toSeq } def makeRandomRow( head: ColumnType[_], tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail) def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = { val row = new GenericMutableRow(columnTypes.length) makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) => row(index) = value } row } def makeUniqueValuesAndSingleValueRows[T <: AtomicType]( columnType: NativeColumnType[T], count: Int): (Seq[T#InternalType], Seq[GenericMutableRow]) = { val values = makeUniqueRandomValues(columnType, count) val rows = values.map { value => val row = new GenericMutableRow(1) row(0) = value row } (values, rows) } }
Example 86
Source File: SortSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import scala.util.Random import org.apache.spark.AccumulatorSuite import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ import org.apache.spark.sql.{RandomDataGenerator, Row} class SortSuite extends SparkPlanTest with SharedSQLContext { import testImplicits.localSeqToDataFrameHolder test("basic sorting using ExternalSort") { val input = Seq( ("Hello", 4, 2.0), ("Hello", 1, 1.0), ("World", 8, 3.0) ) checkAnswer( input.toDF("a", "b", "c"), (child: SparkPlan) => Sort('a.asc :: 'b.asc :: Nil, global = true, child = child), input.sortBy(t => (t._1, t._2)).map(Row.fromTuple), sortAnswers = false) checkAnswer( input.toDF("a", "b", "c"), (child: SparkPlan) => Sort('b.asc :: 'a.asc :: Nil, global = true, child = child), input.sortBy(t => (t._2, t._1)).map(Row.fromTuple), sortAnswers = false) } test("sort followed by limit") { checkThatPlansAgree( (1 to 100).map(v => Tuple1(v)).toDF("a"), (child: SparkPlan) => Limit(10, Sort('a.asc :: Nil, global = true, child = child)), (child: SparkPlan) => Limit(10, ReferenceSort('a.asc :: Nil, global = true, child)), sortAnswers = false ) } test("sorting does not crash for large inputs") { val sortOrder = 'a.asc :: Nil val stringLength = 1024 * 1024 * 2 checkThatPlansAgree( Seq(Tuple1("a" * stringLength), Tuple1("b" * stringLength)).toDF("a").repartition(1), Sort(sortOrder, global = true, _: SparkPlan, testSpillFrequency = 1), ReferenceSort(sortOrder, global = true, _: SparkPlan), sortAnswers = false ) } test("sorting updates peak execution memory") { AccumulatorSuite.verifyPeakExecutionMemorySet(sparkContext, "unsafe external sort") { checkThatPlansAgree( (1 to 100).map(v => Tuple1(v)).toDF("a"), (child: SparkPlan) => Sort('a.asc :: Nil, global = true, child = child), (child: SparkPlan) => ReferenceSort('a.asc :: Nil, global = true, child), sortAnswers = false) } } // Test sorting on different data types for ( dataType <- DataTypeTestUtils.atomicTypes ++ Set(NullType); nullable <- Seq(true, false); sortOrder <- Seq('a.asc :: Nil, 'a.desc :: Nil); randomDataGenerator <- RandomDataGenerator.forType(dataType, nullable) ) { test(s"sorting on $dataType with nullable=$nullable, sortOrder=$sortOrder") { val inputData = Seq.fill(1000)(randomDataGenerator()) val inputDf = sqlContext.createDataFrame( sparkContext.parallelize(Random.shuffle(inputData).map(v => Row(v))), StructType(StructField("a", dataType, nullable = true) :: Nil) ) checkThatPlansAgree( inputDf, p => ConvertToSafe(Sort(sortOrder, global = true, p: SparkPlan, testSpillFrequency = 23)), ReferenceSort(sortOrder, global = true, _: SparkPlan), sortAnswers = false ) } } }
Example 87
Source File: Vector.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.util import scala.language.implicitConversions import scala.util.Random import org.apache.spark.util.random.XORShiftRandom @deprecated("Use Vectors.dense from Spark's mllib.linalg package instead.", "1.0.0") class Vector(val elements: Array[Double]) extends Serializable { def length: Int = elements.length def apply(index: Int): Double = elements(index) def + (other: Vector): Vector = { if (length != other.length) { throw new IllegalArgumentException("Vectors of different length") } Vector(length, i => this(i) + other(i)) } def add(other: Vector): Vector = this + other def - (other: Vector): Vector = { if (length != other.length) { throw new IllegalArgumentException("Vectors of different length") } Vector(length, i => this(i) - other(i)) } def subtract(other: Vector): Vector = this - other def dot(other: Vector): Double = { if (length != other.length) { throw new IllegalArgumentException("Vectors of different length") } var ans = 0.0 var i = 0 while (i < length) { ans += this(i) * other(i) i += 1 } ans } def random(length: Int, random: Random = new XORShiftRandom()): Vector = Vector(length, _ => random.nextDouble()) class Multiplier(num: Double) { def * (vec: Vector): Vector = vec * num } implicit def doubleToMultiplier(num: Double): Multiplier = new Multiplier(num) implicit object VectorAccumParam extends org.apache.spark.AccumulatorParam[Vector] { def addInPlace(t1: Vector, t2: Vector): Vector = t1 + t2 def zero(initialValue: Vector): Vector = Vector.zeros(initialValue.length) } }
Example 88
Source File: SamplingUtilsSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.util.random import scala.util.Random import org.apache.commons.math3.distribution.{BinomialDistribution, PoissonDistribution} import org.apache.spark.SparkFunSuite class SamplingUtilsSuite extends SparkFunSuite { test("reservoirSampleAndCount") { val input = Seq.fill(100)(Random.nextInt()) // input size < k val (sample1, count1) = SamplingUtils.reservoirSampleAndCount(input.iterator, 150) assert(count1 === 100) assert(input === sample1.toSeq) // input size == k val (sample2, count2) = SamplingUtils.reservoirSampleAndCount(input.iterator, 100) assert(count2 === 100) assert(input === sample2.toSeq) // input size > k val (sample3, count3) = SamplingUtils.reservoirSampleAndCount(input.iterator, 10) assert(count3 === 100) assert(sample3.length === 10) } test("computeFraction") { // test that the computed fraction guarantees enough data points // in the sample with a failure rate <= 0.0001 val n = 100000 for (s <- 1 to 15) { val frac = SamplingUtils.computeFractionForSampleSize(s, n, true) val poisson = new PoissonDistribution(frac * n) assert(poisson.inverseCumulativeProbability(0.0001) >= s, "Computed fraction is too low") } for (s <- List(20, 100, 1000)) { val frac = SamplingUtils.computeFractionForSampleSize(s, n, true) val poisson = new PoissonDistribution(frac * n) assert(poisson.inverseCumulativeProbability(0.0001) >= s, "Computed fraction is too low") } for (s <- List(1, 10, 100, 1000)) { val frac = SamplingUtils.computeFractionForSampleSize(s, n, false) val binomial = new BinomialDistribution(n, frac) assert(binomial.inverseCumulativeProbability(0.0001)*n >= s, "Computed fraction is too low") } } }
Example 89
Source File: VectorSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.util import scala.util.Random import org.apache.spark.SparkFunSuite @deprecated("suppress compile time deprecation warning", "1.0.0") class VectorSuite extends SparkFunSuite { def verifyVector(vector: Vector, expectedLength: Int): Unit = { assert(vector.length == expectedLength) assert(vector.elements.min > 0.0) assert(vector.elements.max < 1.0) } test("random with default random number generator") { val vector100 = Vector.random(100) verifyVector(vector100, 100) } test("random with given random number generator") { val vector100 = Vector.random(100, new Random(100)) verifyVector(vector100, 100) } }
Example 90
Source File: ByteArrayChunkOutputStreamSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.util.io import scala.util.Random import org.apache.spark.SparkFunSuite class ByteArrayChunkOutputStreamSuite extends SparkFunSuite { test("empty output") { val o = new ByteArrayChunkOutputStream(1024) assert(o.toArrays.length === 0) } test("write a single byte") { val o = new ByteArrayChunkOutputStream(1024) o.write(10) assert(o.toArrays.length === 1) assert(o.toArrays.head.toSeq === Seq(10.toByte)) } test("write a single near boundary") { val o = new ByteArrayChunkOutputStream(10) o.write(new Array[Byte](9)) o.write(99) assert(o.toArrays.length === 1) assert(o.toArrays.head(9) === 99.toByte) } test("write a single at boundary") { val o = new ByteArrayChunkOutputStream(10) o.write(new Array[Byte](10)) o.write(99) assert(o.toArrays.length === 2) assert(o.toArrays(1).length === 1) assert(o.toArrays(1)(0) === 99.toByte) } test("single chunk output") { val ref = new Array[Byte](8) Random.nextBytes(ref) val o = new ByteArrayChunkOutputStream(10) o.write(ref) val arrays = o.toArrays assert(arrays.length === 1) assert(arrays.head.length === ref.length) assert(arrays.head.toSeq === ref.toSeq) } test("single chunk output at boundary size") { val ref = new Array[Byte](10) Random.nextBytes(ref) val o = new ByteArrayChunkOutputStream(10) o.write(ref) val arrays = o.toArrays assert(arrays.length === 1) assert(arrays.head.length === ref.length) assert(arrays.head.toSeq === ref.toSeq) } test("multiple chunk output") { val ref = new Array[Byte](26) Random.nextBytes(ref) val o = new ByteArrayChunkOutputStream(10) o.write(ref) val arrays = o.toArrays assert(arrays.length === 3) assert(arrays(0).length === 10) assert(arrays(1).length === 10) assert(arrays(2).length === 6) assert(arrays(0).toSeq === ref.slice(0, 10)) assert(arrays(1).toSeq === ref.slice(10, 20)) assert(arrays(2).toSeq === ref.slice(20, 26)) } test("multiple chunk output at boundary size") { val ref = new Array[Byte](30) Random.nextBytes(ref) val o = new ByteArrayChunkOutputStream(10) o.write(ref) val arrays = o.toArrays assert(arrays.length === 3) assert(arrays(0).length === 10) assert(arrays(1).length === 10) assert(arrays(2).length === 10) assert(arrays(0).toSeq === ref.slice(0, 10)) assert(arrays(1).toSeq === ref.slice(10, 20)) assert(arrays(2).toSeq === ref.slice(20, 30)) } }
Example 91
Source File: SparkTC.scala From learning-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.examples import scala.util.Random import scala.collection.mutable import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.SparkContext._ object SparkTC { val numEdges = 200 val numVertices = 100 val rand = new Random(42) def generateGraph = { val edges: mutable.Set[(Int, Int)] = mutable.Set.empty while (edges.size < numEdges) { val from = rand.nextInt(numVertices) val to = rand.nextInt(numVertices) if (from != to) edges.+=((from, to)) } edges.toSeq } def main(args: Array[String]) { val sparkConf = new SparkConf().setAppName("SparkTC") val spark = new SparkContext(sparkConf) val slices = if (args.length > 0) args(0).toInt else 2 var tc = spark.parallelize(generateGraph, slices).cache() // Linear transitive closure: each round grows paths by one edge, // by joining the graph's edges with the already-discovered paths. // e.g. join the path (y, z) from the TC with the edge (x, y) from // the graph to obtain the path (x, z). // Because join() joins on keys, the edges are stored in reversed order. val edges = tc.map(x => (x._2, x._1)) // This join is iterated until a fixed point is reached. var oldCount = 0L var nextCount = tc.count() do { oldCount = nextCount // Perform the join, obtaining an RDD of (y, (z, x)) pairs, // then project the result to obtain the new (x, z) paths. tc = tc.union(tc.join(edges).map(x => (x._2._2, x._2._1))).distinct().cache() nextCount = tc.count() } while (nextCount != oldCount) println("TC has " + tc.count() + " edges.") spark.stop() } }
Example 92
Source File: client.scala From zio-saga with MIT License | 5 votes |
package com.vladkopanev.zio.saga.example import zio.{ Task, ZIO } import scala.util.Random package object client { import zio.duration._ def randomSleep(maxTimeout: Int): TaskC[Unit] = for { randomSeconds <- ZIO.effectTotal(Random.nextInt(maxTimeout)) _ <- ZIO.sleep(randomSeconds.seconds) } yield () def randomFail(operationName: String): Task[Unit] = for { randomInt <- ZIO.effectTotal(Random.nextInt(100)) _ <- if (randomInt % 10 == 0) ZIO.fail(new RuntimeException(s"Failed to execute $operationName")) else ZIO.unit } yield () }
Example 93
Source File: FunctionEqual.scala From scalaprops with MIT License | 5 votes |
package scalaprops import scala.util.Random import scalaz._ object FunctionEqual extends FunctionEqual(5) sealed class FunctionEqual(size: Int) { implicit def f1[A1: Gen, B](implicit B: Equal[B]): Equal[A1 => B] = { val values = Gen[A1].samples(listSize = size, size = size, seed = Random.nextLong()) Equal.equal((x, y) => values.forall(a => B.equal(x(a), y(a)))) } implicit def f2[A1: Gen, A2: Gen, B](implicit B: Equal[B]): Equal[(A1, A2) => B] = f1[(A1, A2), B].contramap(_.tupled) implicit def f3[A1: Gen, A2: Gen, A3: Gen, B](implicit B: Equal[B]): Equal[(A1, A2, A3) => B] = f1[(A1, A2, A3), B].contramap(_.tupled) implicit def f4[A1: Gen, A2: Gen, A3: Gen, A4: Gen, B](implicit B: Equal[B]): Equal[(A1, A2, A3, A4) => B] = f1[(A1, A2, A3, A4), B].contramap(_.tupled) implicit def f5[A1: Gen, A2: Gen, A3: Gen, A4: Gen, A5: Gen, B](implicit B: Equal[B] ): Equal[(A1, A2, A3, A4, A5) => B] = f1[(A1, A2, A3, A4, A5), B].contramap(_.tupled) implicit def f6[A1: Gen, A2: Gen, A3: Gen, A4: Gen, A5: Gen, A6: Gen, B](implicit B: Equal[B] ): Equal[(A1, A2, A3, A4, A5, A6) => B] = f1[(A1, A2, A3, A4, A5, A6), B].contramap(_.tupled) implicit def f7[A1: Gen, A2: Gen, A3: Gen, A4: Gen, A5: Gen, A6: Gen, A7: Gen, B](implicit B: Equal[B] ): Equal[(A1, A2, A3, A4, A5, A6, A7) => B] = f1[(A1, A2, A3, A4, A5, A6, A7), B].contramap(_.tupled) implicit def f8[A1: Gen, A2: Gen, A3: Gen, A4: Gen, A5: Gen, A6: Gen, A7: Gen, A8: Gen, B](implicit B: Equal[B] ): Equal[(A1, A2, A3, A4, A5, A6, A7, A8) => B] = f1[(A1, A2, A3, A4, A5, A6, A7, A8), B].contramap(_.tupled) }
Example 94
Source File: RandTestJVM.scala From scalaprops with MIT License | 5 votes |
package scalaprops import scala.util.Random object RandTestJVM extends Scalaprops { private[this] def chooseLong(rng: Long => Rand) = Property.forAll( Iterator.fill(100000)((Random.nextLong, Random.nextLong, Random.nextLong)).forall { case (seed, y, z) => val r = rng(seed).chooseLong(y, z)._2 val min = math.min(y, z) val max = math.max(y, z) (min <= r) && (r <= max) } ) val chooseLong32 = chooseLong(l => MersenneTwister32.fromSeed(l.toInt)) val chooseLong64 = chooseLong(MersenneTwister64.standard) }
Example 95
Source File: SimulateDistributionSpec.scala From squbs with Apache License 2.0 | 5 votes |
package org.squbs.pattern.timeoutpolicy import org.scalatest.{FlatSpecLike, Matchers} import scala.concurrent.duration._ import scala.concurrent.{Await, Future} import scala.util.{Random, Try} class SimulateDistributionSpec extends FlatSpecLike with Matchers{ "Random.nextGaussian" should "work as expected" in { import scala.concurrent.ExecutionContext.Implicits.global val timeoutPolicy = TimeoutPolicy(Some("test"), initial = 1.seconds, rule = 3.sigma, minSamples = 100, startOverCount = 500) val sigma = 30 val mean = 50 for (i <- 0 until 1000) { val tx = timeoutPolicy.transaction Try{ Await.ready(Future{ val s = (Random.nextGaussian() * sigma + mean).round Thread.sleep(s) }, tx.waitTime) } tx.end() // val metrics = timeoutPolicy.metrics // println(s"average=${metrics.averageTime}, standardDeviation=${metrics.standardDeviation}") } Thread.sleep(5000) val metrics = timeoutPolicy.metrics println(s"average=${metrics.averageTime.toLong}, standardDeviation=${metrics.standardDeviation.toLong}") val succeedPercent = (metrics.totalCount - metrics.timeoutCount) / metrics.totalCount.toDouble println(succeedPercent) println(metrics) } "NegativeExponentialTruncated" should "works fine with TimeoutPolicy " in { negativeExponential(truncate = true) } "NegativeExponentialNotTruncated" should "works fine with TimeoutPolicy " in { negativeExponential(truncate = false) } def negativeExponential(truncate: Boolean): Unit = { val delay = getDelay(truncate = truncate, cycleMin = 20.millis, cycleMean = 30.millis, cycleMax = 50.milliseconds) import scala.concurrent.ExecutionContext.Implicits.global val timeoutPolicy = TimeoutPolicy(Some("test"), initial = 1.seconds, rule = 3.sigma) for (i <- 0 until 1000) { val tx = timeoutPolicy.transaction Try{ Await.ready(Future{ val s = delay().toMillis Thread.sleep(s) }, tx.waitTime) } tx.end() // val metrics = timeoutPolicy.metrics } Thread.sleep(5000) val metrics = timeoutPolicy.metrics println(s"average=${metrics.averageTime.toLong}, standardDeviation=${metrics.standardDeviation.toLong}") val succeedPercent = (metrics.totalCount - metrics.timeoutCount) / metrics.totalCount.toDouble println(succeedPercent) println(metrics) } def getDelay(truncate: Boolean = true, cycleMin: FiniteDuration = 0.seconds, cycleMean: FiniteDuration = 1.seconds, cycleMax: FiniteDuration = 5.seconds): () => FiniteDuration = { val (shift, mean) = if (!truncate) { val shift1 = cycleMin.toNanos val mean1 = cycleMean.toNanos - shift1 (shift1, mean1) } else (0L, cycleMean.toNanos) () => { val delay = if (cycleMean.toNanos > 0) { val x = { val ix = Random.nextDouble() if (ix == 0d) Double.MinPositiveValue else ix } val iDelay = shift + (mean * -Math.log(x)).toLong if (iDelay < cycleMin.toNanos) cycleMin.toNanos else if (iDelay > cycleMax.toNanos) cycleMax.toNanos else iDelay } else 0L delay.nanoseconds } } }
Example 96
Source File: StreamSpecUtil.scala From squbs with Apache License 2.0 | 5 votes |
package org.squbs.pattern.stream import java.io.File import java.nio.file.Files import java.util.concurrent.atomic.AtomicInteger import akka.stream.ThrottleMode import akka.stream.scaladsl._ import com.typesafe.config.ConfigFactory import net.openhft.chronicle.wire.{WireIn, WireOut} import scala.concurrent.duration._ import scala.language.postfixOps import scala.collection.JavaConverters._ import scala.util.Random object StreamSpecUtil { val elementCount = 100000 val failTestAt = elementCount * 3 / 10 val elementsAfterFail = 100 val flowRate = 1000 val flowUnit = 10 millisecond val burstSize = 500 } class StreamSpecUtil[T, S](outputPort: Int = 1) { import StreamSpecUtil._ val outputPorts = outputPort val tempPath: File = Files.createTempDirectory("persistent_queue").toFile val totalProcessed = elementCount + elementsAfterFail val config = ConfigFactory.parseMap { Map( "persist-dir" -> s"${tempPath.getAbsolutePath}", "output-ports" -> s"$outputPorts", "roll-cycle" -> "TEST_SECONDLY".toLowerCase() ).asJava } val in = Source(1 to elementCount) lazy val atomicCounter = Vector.tabulate(outputPorts)(_ => new AtomicInteger(0)) lazy val flowCounter = Flow[Any].map(_ => 1L).reduce(_ + _).toMat(Sink.head)(Keep.right) lazy val merge = Merge[S](outputPorts) lazy val throttle = Flow[S].throttle(flowRate, flowUnit, burstSize, ThrottleMode.shaping) lazy val throttleMore = Flow[S].throttle(flowRate * 9 / 10, flowUnit, burstSize, ThrottleMode.shaping) lazy val head = Sink.head[S] lazy val last = Sink.last[S] val minRandom = 100 lazy val random = Random.nextInt(elementCount - minRandom - 1) + minRandom lazy val filterCounter = new AtomicInteger(0) lazy val filterARandomElement = Flow[Event[T]].map(e => (e, filterCounter.incrementAndGet())).filter(_._2 != random).map(_._1) def commitCounter(outputPortId: Int) = atomicCounter(outputPortId).incrementAndGet() def clean() = delete(tempPath) private def delete(file: File): Unit = { if (file.isDirectory) Option(file.listFiles).map(_.toList).getOrElse(Nil).foreach(delete) file.delete } } case class Person(name: String, age: Int) class PersonSerializer extends QueueSerializer[Person] { override def readElement(wire: WireIn): Option[Person] = { for { name <- Option(wire.read().`object`(classOf[String])) age <- Option(wire.read().int32) } yield { Person(name, age) } } override def writeElement(element: Person, wire: WireOut): Unit = { wire.write().`object`(classOf[String], element.name) wire.write().int32(element.age) } }
Example 97
Source File: RNNEmbeddingExample.scala From ScalNet with Apache License 2.0 | 5 votes |
package org.deeplearning4j.scalnet.examples.dl4j.recurrent import org.deeplearning4j.nn.conf.inputs.InputType import org.deeplearning4j.optimize.listeners.ScoreIterationListener import org.deeplearning4j.scalnet.layers.embeddings.EmbeddingLayer import org.deeplearning4j.scalnet.layers.recurrent.{ GravesLSTM, RnnOutputLayer } import org.deeplearning4j.scalnet.models.NeuralNet import org.nd4j.linalg.activations.Activation import org.nd4j.linalg.dataset.DataSet import org.nd4j.linalg.factory.Nd4j import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction import scala.util.Random object RNNEmbeddingExample extends App { val nClassesIn = 10 val batchSize = 3 val timeSeriesLength = 8 val inEmbedding = Nd4j.create(batchSize, 1, timeSeriesLength) val outLabels = Nd4j.create(batchSize, 4, timeSeriesLength) val seed = 12345 val rand = new Random(seed) val timeSeries: DataSet = { for (i <- 0 until batchSize; j <- 0 until timeSeriesLength) { val classIdx = rand.nextInt(nClassesIn) inEmbedding.putScalar(Array[Int](i, 0, j), classIdx) val labelIdx = rand.nextInt(batchSize + 1) outLabels.putScalar(Array[Int](i, labelIdx, j), 1.0) } new DataSet(inEmbedding, outLabels) } val model: NeuralNet = { val model: NeuralNet = NeuralNet(inputType = InputType.recurrent(3, 8), rngSeed = seed) model.add(EmbeddingLayer(nClassesIn, 5)) model.add(GravesLSTM(5, 7, Activation.SOFTSIGN)) model.add(RnnOutputLayer(7, 4, Activation.SOFTMAX)) model.compile(LossFunction.MCXENT) model } model.fit(timeSeries, 1, List(new ScoreIterationListener(1))) }
Example 98
Source File: BasicRNNExample.scala From ScalNet with Apache License 2.0 | 5 votes |
package org.deeplearning4j.scalnet.examples.dl4j.recurrent import org.deeplearning4j.nn.api.OptimizationAlgorithm import org.deeplearning4j.nn.conf.Updater import org.deeplearning4j.scalnet.layers.recurrent.{ GravesLSTM, RnnOutputLayer } import org.deeplearning4j.scalnet.logging.Logging import org.deeplearning4j.scalnet.models.NeuralNet import org.nd4j.linalg.activations.Activation import org.nd4j.linalg.api.ops.impl.indexaccum.IMax import org.nd4j.linalg.dataset.DataSet import org.nd4j.linalg.factory.Nd4j import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction import scala.util.Random object BasicRNNExample extends App with Logging { // define a sentence to learn. // Add a special character at the beginning so the RNN learns the complete string and ends with the marker. val learningString = "*Der Cottbuser Postkutscher putzt den Cottbuser Postkutschkasten.".toVector val learningChars = learningString.distinct val hiddenSize = 64 val epochs = 200 val seed = 1234 val rand = new Random(seed) val input = Nd4j.zeros(1, learningChars.length, learningString.length) val labels = Nd4j.zeros(1, learningChars.length, learningString.length) val trainingData: DataSet = { learningString.zipWithIndex.foreach { case (currentChar, index) => val nextChar = if (index + 1 > learningString.indices.max) learningString(0) else learningString(index + 1) input.putScalar(Array[Int](0, learningChars.indexOf(currentChar), index), 1) labels.putScalar(Array[Int](0, learningChars.indexOf(nextChar), index), 1) } new DataSet(input, labels) } logger.info("Build model...") val model: NeuralNet = { val model: NeuralNet = NeuralNet(rngSeed = seed, miniBatch = false) model.add(GravesLSTM(learningChars.length, hiddenSize, Activation.TANH)) model.add(GravesLSTM(hiddenSize, hiddenSize, Activation.TANH)) model.add(RnnOutputLayer(hiddenSize, learningChars.length, Activation.SOFTMAX)) model.compile(LossFunction.MCXENT, OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT, Updater.RMSPROP) model } val rnn = model.getNetwork (0 until epochs).foreach { e => rnn.fit(trainingData) rnn.rnnClearPreviousState() val init = Nd4j.zeros(learningChars.length) init.putScalar(learningChars.indexOf(learningString(0)), 1) var output = rnn.rnnTimeStep(init) val predicted: Vector[Char] = learningString.map { _ => val sampledCharacterIdx = Nd4j.getExecutioner.exec(new IMax(output), 1).getInt(0) val nextInput = Nd4j.zeros(learningChars.length) nextInput.putScalar(sampledCharacterIdx, 1) output = rnn.rnnTimeStep(nextInput) learningChars(sampledCharacterIdx) } logger.info(s"Epoch $e - ${predicted.mkString}") } }
Example 99
Source File: RunnersCache.scala From infinispan-spark with Apache License 2.0 | 5 votes |
package org.infinispan.spark.test import org.infinispan.spark.domain.Runner import org.scalatest.{BeforeAndAfterAll, Suite} import scala.util.Random trait RunnersCache extends BeforeAndAfterAll { this: Suite with RemoteTest => protected def getNumEntries: Int override protected def beforeAll(): Unit = { val random = new Random(System.currentTimeMillis()) val MinFinishTime = 3600 val MaxFinishTime = 4500 val MinAge = 15 val MaxAge = 60 (1 to getNumEntries).par.foreach { i => val name = "Runner " + i val finished = if (i % 2 == 0) true else false val finishTime = random.nextInt((MaxFinishTime - MinFinishTime) + 1) + MinFinishTime val age = Integer.valueOf(i * (MaxAge - MinAge) / getNumEntries + MinAge) val runner = new Runner(name, finished, if(finished) finishTime else 0, age) getRemoteCache.put(i, runner) } super.beforeAll() } override protected def afterAll(): Unit = { super.afterAll() } }
Example 100
Source File: WordCache.scala From infinispan-spark with Apache License 2.0 | 5 votes |
package org.infinispan.spark.test import org.scalatest.{BeforeAndAfterAll, Suite} import scala.util.Random trait WordCache extends BeforeAndAfterAll { this: Suite with RemoteTest => // https://github.com/bmarcot/haiku/blob/master/haiku.scala val adjs = List("autumn", "hidden", "bitter", "misty", "silent", "empty", "dry", "dark", "summer", "icy", "delicate", "quiet", "white", "cool", "spring", "winter", "patient", "twilight", "dawn", "crimson", "wispy", "weathered", "blue", "billowing", "broken", "cold", "damp", "falling", "frosty", "green", "long", "late", "lingering", "bold", "little", "morning", "muddy", "old", "red", "rough", "still", "small", "sparkling", "throbbing", "shy", "wandering", "withered", "wild", "black", "holy", "solitary", "fragrant", "aged", "snowy", "proud", "floral", "restless", "divine", "polished", "purple", "lively", "nameless", "puffy", "fluffy", "calm", "young", "golden", "avenging", "ancestral", "ancient", "argent", "reckless", "daunting", "short", "rising", "strong", "timber", "tumbling", "silver", "dusty", "celestial", "cosmic", "crescent", "double", "far", "half", "inner", "milky", "northern", "southern", "eastern", "western", "outer", "terrestrial", "huge", "deep", "epic", "titanic", "mighty", "powerful") val nouns = List("waterfall", "river", "breeze", "moon", "rain", "wind", "sea", "morning", "snow", "lake", "sunset", "pine", "shadow", "leaf", "dawn", "glitter", "forest", "hill", "cloud", "meadow", "glade", "bird", "brook", "butterfly", "bush", "dew", "dust", "field", "flower", "firefly", "feather", "grass", "haze", "mountain", "night", "pond", "darkness", "snowflake", "silence", "sound", "sky", "shape", "surf", "thunder", "violet", "wildflower", "wave", "water", "resonance", "sun", "wood", "dream", "cherry", "tree", "fog", "frost", "voice", "paper", "frog", "smoke", "star", "sierra", "castle", "fortress", "tiger", "day", "sequoia", "cedar", "wrath", "blessing", "spirit", "nova", "storm", "burst", "protector", "drake", "dragon", "knight", "fire", "king", "jungle", "queen", "giant", "elemental", "throne", "game", "weed", "stone", "apogee", "bang", "cluster", "corona", "cosmos", "equinox", "horizon", "light", "nebula", "solstice", "spectrum", "universe", "magnitude", "parallax") protected def getNumEntries: Int private val random = new Random(System.currentTimeMillis()) private def randomWordFrom(l: List[String]) = l(random.nextInt(l.size)) private def pickNouns = (for (_ <- 0 to random.nextInt(3)) yield randomWordFrom(nouns)).mkString(" ") lazy val wordsCache = getRemoteCache[Int,String] override protected def beforeAll(): Unit = { (1 to getNumEntries).par.foreach { i => val contents = Seq(randomWordFrom(adjs), pickNouns).mkString(" ") wordsCache.put(i, contents) } super.beforeAll() } override protected def afterAll(): Unit = { super.afterAll() } }
Example 101
Source File: SeQuiLaAnalyzer.scala From bdg-sequila with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.ResolveTableValuedFunctionsSeq import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf import scala.util.Random class SeQuiLaAnalyzer(catalog: SessionCatalog, conf: SQLConf) extends Analyzer(catalog, conf, conf.optimizerMaxIterations){ //override val extendedResolutionRules: Seq[Rule[LogicalPlan]] = Seq(ResolveTableValuedFunctionsSeq) // override lazy val batches: Seq[Batch] = Seq( // Batch("Custeom", fixedPoint, ResolveTableValuedFunctionsSeq), // Batch("Hints", fixedPoint, new ResolveHints.ResolveBroadcastHints(conf), // ResolveHints.RemoveAllHints)) var sequilaOptmazationRules: Seq[Rule[LogicalPlan]] = Nil override lazy val batches: Seq[Batch] = Seq( Batch("Hints", fixedPoint, new ResolveHints.ResolveBroadcastHints(conf), ResolveHints.RemoveAllHints), Batch("Simple Sanity Check", Once, LookupFunctions), Batch("Substitution", fixedPoint, CTESubstitution, WindowsSubstitution, EliminateUnions, new SubstituteUnresolvedOrdinals(conf)), Batch("Resolution", fixedPoint, ResolveTableValuedFunctionsSeq :: ResolveRelations :: ResolveReferences :: ResolveCreateNamedStruct :: ResolveDeserializer :: ResolveNewInstance :: ResolveUpCast :: ResolveGroupingAnalytics :: ResolvePivot :: ResolveOrdinalInOrderByAndGroupBy :: ResolveAggAliasInGroupBy :: ResolveMissingReferences :: ExtractGenerator :: ResolveGenerate :: ResolveFunctions :: ResolveAliases :: ResolveSubquery :: ResolveSubqueryColumnAliases :: ResolveWindowOrder :: ResolveWindowFrame :: ResolveNaturalAndUsingJoin :: ExtractWindowExpressions :: GlobalAggregates :: ResolveAggregateFunctions :: TimeWindowing :: ResolveInlineTables(conf) :: ResolveTimeZone(conf) :: TypeCoercion.typeCoercionRules(conf) ++ extendedResolutionRules : _*), Batch("Post-Hoc Resolution", Once, postHocResolutionRules: _*), Batch("SeQuiLa", Once,sequilaOptmazationRules: _*), //SeQuilaOptimization rules Batch("View", Once, AliasViewChild(conf)), Batch("Nondeterministic", Once, PullOutNondeterministic), Batch("UDF", Once, HandleNullInputsForUDF), Batch("FixNullability", Once, FixNullability), Batch("Subquery", Once, UpdateOuterReferences), Batch("Cleanup", fixedPoint, CleanupAliases) ) }
Example 102
Source File: VPTree.scala From traj-sim-spark with Apache License 2.0 | 5 votes |
package edu.utah.cs.index import edu.utah.cs.util.MetricObject import scala.collection.mutable import scala.reflect.ClassTag import scala.util.Random abstract class VPTreeNode[T <: MetricObject: ClassTag] case class VPTreeInternalNode[T <: MetricObject: ClassTag](vp: T, threshold: Double, left: VPTreeNode[T], right: VPTreeNode[T]) extends VPTreeNode[T] case class VPTreeLeafNode[T <: MetricObject: ClassTag](points: Array[T]) extends VPTreeNode[T] case class VPTree[T <: MetricObject: ClassTag](root: VPTreeNode[T]) extends Index with Serializable { private[cs] case class HeapItem(point: T, dis: Double) extends Ordered[HeapItem] { override def compare(that: HeapItem): Int = dis.compare(that.dis) } def knn(query: T, k: Int, dis_threshold: Double = Double.MaxValue): (Array[(T, Double)], Int) = { val pq = mutable.PriorityQueue[HeapItem]() var tau = dis_threshold var checked = 0 def offer(x: HeapItem) = { if (pq.size == k) pq.dequeue() pq.enqueue(x) if (pq.size == k) tau = pq.head.dis } def recursive_knn(node: VPTreeNode[T]) : Unit = { if (node != null) { node match { case VPTreeLeafNode(ps) => checked += ps.length ps.foreach(x => { val dis = query.distance(x) if (dis < tau) offer(HeapItem(x, dis)) }) case VPTreeInternalNode(vp, th, left, right) => val vp_dis = query.distance(vp) checked += 1 if (vp_dis < tau) offer(HeapItem(vp, vp_dis)) if (vp_dis < th) { if (vp_dis - tau <= th) recursive_knn(left) if (vp_dis + tau >= th) recursive_knn(right) } else { if (vp_dis + tau >= th) recursive_knn(right) if (vp_dis - tau <= th) recursive_knn(left) } } } } recursive_knn(root) (pq.dequeueAll.map(x => (x.point, x.dis)).toArray.reverse, checked) } } object VPTree { def buildNode[T <: MetricObject: ClassTag](points: Array[T], leaf_capacity: Int): VPTreeNode[T] = { if (points.isEmpty) { null } else if (points.length < leaf_capacity) { VPTreeLeafNode(points) } else { val n = points.length val vp_id = Random.nextInt(n) val t = points(vp_id) points(vp_id) = points(0) points(0) = t val vp = points.head val ps_with_dis = points.slice(1, n).map(x => (vp.distance(x), x)).sortBy(_._1) val median = Math.ceil((n - 1) / 2.0).toInt - 1 val threshold = ps_with_dis(median)._1 VPTreeInternalNode(vp, threshold, buildNode(ps_with_dis.slice(0, median + 1).map(_._2), leaf_capacity), buildNode(ps_with_dis.slice(median + 1, n).map(_._2), leaf_capacity)) } } def apply[T <: MetricObject: ClassTag](points: Array[T], leaf_capacity: Int = 25): VPTree[T] = { VPTree(buildNode(points, leaf_capacity)) } }
Example 103
Source File: BloomFilter.scala From traj-sim-spark with Apache License 2.0 | 5 votes |
package edu.utah.cs.util import scala.util.Random case class BloomFilterMeta(num_bits: Int, num_hashs: Int) { val seeds = (1 to num_hashs).map(x => (Random.nextInt(Integer.MAX_VALUE), Random.nextInt(Integer.MAX_VALUE))) } object BloomFilter { var meta: BloomFilterMeta = null private def calcHash(seed: (Int, Int), key: Int) = (((seed._1 % meta.num_bits) * (key & meta.num_bits) + seed._2 % meta.num_bits) % meta.num_bits + meta.num_bits) % meta.num_bits def put(bf: Array[Int], key: Int): Unit = { meta.seeds.foreach(seed => { BitArray.set(bf, calcHash(seed, key)) }) } def mayContains(bf: Array[Int], key: Int): Boolean = { meta.seeds.foreach(seed => { if (!BitArray.get(bf, calcHash(seed, key))) return false }) true } def optimalNumBits(num_items: Long, fp_rate: Double): Int = { math.ceil(-1 * num_items * math.log(fp_rate) / math.log(2) / math.log(2)).toInt } def optimalNumHashes(num_items: Long, num_bits: Long): Int = { math.ceil(num_bits / num_items * math.log(2)).toInt } }
Example 104
Source File: 5-Future.scala From wow-spark with MIT License | 5 votes |
package com.sev7e0.wow.scala import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future import scala.util.{Failure, Random, Success} object UseFuture { type CoffeeBeans = String type GroundCoffee = String type Milk = String type FrothedMilk = String type Espresso = String type Cappuccino = String def main(args: Array[String]): Unit = { // 语句引入了一个全局的执行上下文,确保了隐式值的存在 import scala.concurrent.ExecutionContext.Implicits.global //成功或者失败都可以使用回调函数,一个偏函数 不推荐使用,推荐使用onComplete 代替 grind("ara beans").onSuccess { case ground => Thread.sleep(Random.nextInt(2000)) println("okay, got my ground coffee") } //在 onComplete 方法上注册回调,回调的输入是一个 Try。 grind("java beans").onComplete { case Success(message) => println(s"okay, got my ground coffee $message") case Failure(exception) => println(exception.getMessage) } val eventualBoolean: Future[Boolean] = heatWater(Water(50)) flatMap { water => temperatureOkay(water) } eventualBoolean.foreach(println) //for语句之前三个Future并行运行,for语句之内顺序执行,注意for语句内部其实是flat map的语法糖 val eventualCoffee = grind("java beans") val eventualWater = heatWater(Water(20)) val eventualMilk = frothMilk("milk") val coffee = for { ground <- eventualCoffee water <- eventualWater milk <- eventualMilk okay <- brew(ground, water) } yield combine(okay, milk) coffee.foreach(println) Thread.sleep(10000) } //使用异步线程检查水的温度 def temperatureOkay(water: Water): Future[Boolean] = Future { (80 to 85) contains water.temperature } def grind(coffeeBeans: CoffeeBeans): Future[GroundCoffee] = Future { println("start grinding...") Thread.sleep(Random.nextInt(2000)) if (coffeeBeans == "baked beans") throw GrindingException("are you joking?") println("finished grinding...") s"ground coffee of $coffeeBeans" } def heatWater(water: Water): Future[Water] = Future { println("heating the water now") Thread.sleep(Random.nextInt(2000)) println("hot, it's hot!") water.copy(temperature = 85) } def frothMilk(milk: Milk): Future[FrothedMilk] = Future { println("milk frothing system engaged!") Thread.sleep(Random.nextInt(2000)) println("shutting down milk frothing system") s"frothed $milk" } def brew(coffeeBeans: CoffeeBeans, water: Water): Future[Espresso] = Future { println("happy brewing :)") Thread.sleep(Random.nextInt(2000)) println("it's brewed!") "espresso" } def combine(espresso: Espresso, frothedMilk: FrothedMilk): Cappuccino = "cappuccino" case class Water(temperature: Int) case class GrindingException(msg: String) extends Exception(msg) case class FrothingException(msg: String) extends Exception(msg) case class WaterBoilingException(msg: String) extends Exception(msg) case class BrewingException(msg: String) extends Exception(msg) }
Example 105
Source File: SigSerializerSpecification.scala From sigmastate-interpreter with MIT License | 5 votes |
package sigmastate.serialization import java.util import org.ergoplatform.{ErgoLikeContext, ErgoLikeTransaction} import org.scalacheck.{Arbitrary, Gen} import org.scalatest.Assertion import sigmastate.Values.{SigmaBoolean, SigmaPropConstant, SigmaPropValue, Value} import sigmastate._ import sigmastate.basics.DLogProtocol.ProveDlog import sigmastate.basics.ProveDHTuple import sigmastate.helpers.{ContextEnrichingTestProvingInterpreter, ErgoLikeContextTesting, ErgoLikeTransactionTesting, SigmaTestingCommons} import sigmastate.serialization.generators.ObjectGenerators import sigmastate.utxo.Transformer import scala.util.Random class SigSerializerSpecification extends SigmaTestingCommons with ObjectGenerators { implicit lazy val IR = new TestingIRContext private lazy implicit val arbExprGen: Arbitrary[SigmaBoolean] = Arbitrary(exprTreeGen) private lazy val prover = new ContextEnrichingTestProvingInterpreter() private lazy val interpreterProveDlogGen: Gen[ProveDlog] = Gen.oneOf(prover.dlogSecrets.map(secret => ProveDlog(secret.publicImage.h))) private lazy val interpreterProveDHTGen = Gen.oneOf( prover.dhSecrets .map(_.commonInput) .map(ci => ProveDHTuple(ci.g, ci.h, ci.u, ci.v))) private def exprTreeNodeGen: Gen[SigmaBoolean] = for { left <- exprTreeGen right <- exprTreeGen node <- Gen.oneOf( COR(Seq(left, right)), CAND(Seq(left, right)) ) } yield node private def exprTreeGen: Gen[SigmaBoolean] = Gen.oneOf(interpreterProveDlogGen, interpreterProveDHTGen, Gen.delay(exprTreeNodeGen)) private def isEquivalent(expected: ProofTree, actual: ProofTree): Boolean = (expected, actual) match { case (NoProof, NoProof) => true case (dht1: UncheckedDiffieHellmanTuple, dht2: UncheckedDiffieHellmanTuple) => // `firstMessageOpt` is not serialized dht1.copy(commitmentOpt = None) == dht2 case (sch1: UncheckedSchnorr, sch2: UncheckedSchnorr) => // `firstMessageOpt` is not serialized sch1.copy(commitmentOpt = None) == sch2 case (conj1: UncheckedConjecture, conj2: UncheckedConjecture) => util.Arrays.equals(conj1.challenge, conj2.challenge) && conj1.children.zip(conj2.children).forall(t => isEquivalent(t._1, t._2)) case _ => false } private def roundTrip(uncheckedTree: UncheckedTree, exp: SigmaBoolean): Assertion = { val bytes = SigSerializer.toBytes(uncheckedTree) val parsedUncheckedTree = SigSerializer.parseAndComputeChallenges(exp, bytes) isEquivalent(uncheckedTree, parsedUncheckedTree) shouldBe true } property("SigSerializer no proof round trip") { roundTrip(NoProof, TrivialProp.TrueProp) } property("SigSerializer round trip") { forAll { sb: SigmaBoolean => val expr = sb.toSigmaProp val challenge = Array.fill(32)(Random.nextInt(100).toByte) val ctx = ErgoLikeContextTesting( currentHeight = 1, lastBlockUtxoRoot = AvlTreeData.dummy, minerPubkey = ErgoLikeContextTesting.dummyPubkey, boxesToSpend = IndexedSeq(fakeSelf), spendingTransaction = ErgoLikeTransactionTesting.dummy, self = fakeSelf) // get sigma conjectures out of transformers val prop = prover.reduceToCrypto(ctx, expr).get._1 val proof = prover.prove(expr, ctx, challenge).get.proof val proofTree = SigSerializer.parseAndComputeChallenges(prop, proof) roundTrip(proofTree, prop) } } }
Example 106
Source File: ConcreteCollectionSerializerSpecification.scala From sigmastate-interpreter with MIT License | 5 votes |
package sigmastate.serialization import sigmastate.Values.{FalseLeaf, Constant, TrueLeaf, IntConstant, TaggedInt, ConcreteCollection} import sigmastate._ import sigmastate.eval.Evaluation import sigmastate.lang.Terms._ import scala.util.Random class ConcreteCollectionSerializerSpecification extends TableSerializationSpecification { private def testCollectionWithConstant[T <: SType](tpe: T) = { implicit val wWrapped = wrappedTypeGen(tpe) implicit val tT = Evaluation.stypeToRType(tpe) implicit val tag = tT.classTag forAll { x: Array[T#WrappedType] => roundTripTest(ConcreteCollection[T](x.map(v => Constant(v, tpe)), tpe)) } } property("ConcreteCollection (Constant[SBoolean.type]): Serializer round trip ") { testCollectionWithConstant(SBoolean) } property("ConcreteCollection (Constant): Serializer round trip ") { testCollectionWithConstant(SByte) testCollectionWithConstant(SShort) testCollectionWithConstant(SInt) testCollectionWithConstant(SLong) testCollectionWithConstant(SBigInt) testCollectionWithConstant(SGroupElement) testCollectionWithConstant(SSigmaProp) testCollectionWithConstant(SUnit) testCollectionWithConstant(SBox) testCollectionWithConstant(SAvlTree) } property("ConcreteCollection: Serializer round trip with different types seq") { forAll { (i: IntConstant, ti: TaggedInt) => val seq = Random.shuffle(Seq(i.asIntValue, ti.asIntValue)).toArray roundTripTest(ConcreteCollection.fromSeq(seq)) } } override def objects = Table( ("object", "bytes"), (ConcreteCollection.fromItems(TrueLeaf, FalseLeaf, TrueLeaf), Array[Byte](OpCodes.ConcreteCollectionBooleanConstantCode, 3, 5)) // bits: 00000101 ) tableRoundTripTest("Specific objects serializer round trip") tablePredefinedBytesTest("Specific objects deserialize from predefined bytes") property("ConcreteCollection: deserialize collection of a crazy size") { val bytes = Array[Byte](OpCodes.ConcreteCollectionCode) ++ SigmaSerializer.startWriter().putUInt(Int.MaxValue).toBytes an[IllegalArgumentException] should be thrownBy ValueSerializer.deserialize(bytes) } }
Example 107
Source File: BlockchainSimulationSpecification.scala From sigmastate-interpreter with MIT License | 5 votes |
package sigmastate.utxo.blockchain import java.io.{File, FileWriter} import org.scalacheck.Gen import sigmastate.Values.{BooleanConstant, ErgoTree, GetVarBoolean, TrueLeaf} import sigmastate.helpers.{ContextEnrichingTestProvingInterpreter, ErgoLikeTestProvingInterpreter} import sigmastate.interpreter.ContextExtension import sigmastate.utxo.blockchain.BlockchainSimulationTestingCommons._ import scala.collection.concurrent.TrieMap import scala.util.Random class BlockchainSimulationSpecification extends BlockchainSimulationTestingCommons { implicit lazy val IR = new TestingIRContext property("apply one valid block") { val state = ValidationState.initialState() val miner = new ErgoLikeTestProvingInterpreter() val block = generateBlock(state, miner, 0) val updStateTry = state.applyBlock(block) updStateTry.isSuccess shouldBe true } property("too costly block") { val state = ValidationState.initialState() val miner = new ErgoLikeTestProvingInterpreter() val block = generateBlock(state, miner, 0) val updStateTry = state.applyBlock(block, maxCost = 1) updStateTry.isSuccess shouldBe false } property("apply many blocks") { val state = ValidationState.initialState() val miner = new ErgoLikeTestProvingInterpreter() checkState(state, miner, 0, randomDeepness) } property("apply many blocks with enriched context") { val state = ValidationState.initialState() val miner = new ErgoLikeTestProvingInterpreter() val varId = 1.toByte val prop = GetVarBoolean(varId).get.toSigmaProp // unable to spend boxes without correct context extension an[RuntimeException] should be thrownBy checkState(state, miner, 0, randomDeepness, Some(prop)) // spend boxes with context extension val contextExtension = ContextExtension(Map(varId -> TrueLeaf)) checkState(state, miner, 0, randomDeepness, Some(prop), contextExtension) } ignore(s"benchmarking applying many blocks (!!! ignored)") { val results = new TrieMap[Int, Long] def bench(numberOfBlocks: Int): Unit = { val state = ValidationState.initialState() val miner = new ContextEnrichingTestProvingInterpreter() val (_, time) = (0 until numberOfBlocks).foldLeft(state -> 0L) { case ((s, timeAcc), h) => val b = generateBlock(state, miner, h) val t0 = System.currentTimeMillis() val updStateTry = s.applyBlock(b) val t = System.currentTimeMillis() updStateTry shouldBe 'success updStateTry.get -> (timeAcc + (t - t0)) } println(s"Total time for $numberOfBlocks blocks: $time ms") results.put(numberOfBlocks, time) } bench(100) bench(200) bench(300) bench(400) printResults(results.toMap) def printResults(results: Map[Int, Long]): Unit = { val file = new File("target/bench") file.mkdirs() val writer = new FileWriter(s"target/bench/result.csv", false) val sorted = results.toList.sortBy { case (i, _) => i } val header = sorted.map(_._1).mkString(",") writer.write(s"$header\n") val values = sorted.map(_._2).mkString(",") writer.write(s"$values\n") writer.flush() writer.close() } } }
Example 108
Source File: V3PackageSpec.scala From cosmos with Apache License 2.0 | 5 votes |
package com.mesosphere.universe.v3.model import com.mesosphere.universe import java.nio.ByteBuffer import org.scalatest.FreeSpec import org.scalatest.Matchers import scala.util.Random class V3PackageSpec extends FreeSpec with Matchers { val input = List( // scalastyle:off magic.number ("pkg1", Version("1.0-1"), ReleaseVersion(1)), ("pkg1", Version("1.0-2"), ReleaseVersion(2)), ("pkg1", Version("1.0-3"), ReleaseVersion(3)), ("pkg2", Version("1.0"), ReleaseVersion(1)), ("pkg2", Version("2.0"), ReleaseVersion(2)), ("pkg3", Version("1.0"), ReleaseVersion(3)), ("pkg4", Version("1.0"), ReleaseVersion(4)), ("pkg5", Version("1.0-1"), ReleaseVersion(1)), ("pkg5", Version("2.0-1"), ReleaseVersion(2)), ("pkg5", Version("1.1-1"), ReleaseVersion(3)), ("pkg6", Version("0.0.0.1"), ReleaseVersion(1)), ("pkg6", Version("0.0.0.5"), ReleaseVersion(2)), ("pkg6", Version("0.0.0.2"), ReleaseVersion(3)), ("pkg7", Version("0.0.1"), ReleaseVersion(1)), ("pkg7", Version("0.0.4.2"), ReleaseVersion(10)) // scalastyle:on magic.number ) "V3Package" - { "Ordering should work" in { val expected = input.map(v3Package(_)) val actual = Random.shuffle(expected).sorted actual shouldBe expected } } "V2Package" - { "Ordering should work" in { val expected = input.map(v2Package(_)) val actual = Random.shuffle(expected).sorted actual shouldBe expected } } "PackageDefinition" - { "Ordering should work" in { val expected = input.map(packageDefinition(_)) val actual = Random.shuffle(expected).sorted actual shouldBe expected } } def v3Package(tuple: (String, Version, ReleaseVersion)): V3Package = { val (name, version, relVer) = tuple V3Package( V3PackagingVersion, name, version, relVer, "[email protected]", "doesn't matter" ) } def v2Package(tuple: (String, Version, ReleaseVersion)): V2Package = { val (name, version, relVer) = tuple V2Package( V2PackagingVersion, name, version, relVer, "[email protected]", "doesn't matter", Marathon(ByteBuffer.allocate(0)) ) } def packageDefinition(tuple: (String, Version, ReleaseVersion)): universe.v4.model.PackageDefinition = { if (Random.nextBoolean) { v2Package(tuple) } else { v3Package(tuple) } } }
Example 109
Source File: SemVerSpec.scala From cosmos with Apache License 2.0 | 5 votes |
package com.mesosphere.universe.v3.model import com.mesosphere.Generators.Implicits._ import org.scalatest.FreeSpec import org.scalatest.Matchers import org.scalatest.prop.PropertyChecks import scala.util.Random final class SemVerSpec extends FreeSpec with PropertyChecks with Matchers { "For all SemVer => String => SemVer" in { forAll { (expected: SemVer) => val string = expected.toString val actual = SemVer(string).get actual shouldBe expected } } "Test semver ordering" in { val expected = List( "1.0.0-alpha", "1.0.0-alpha.1", "1.0.0-alpha.beta", "1.0.0-beta", "1.0.0-beta.2", "1.0.0-beta.11", "1.0.0-rc.1", "1.0.0", "1.0.2", "1.2.0", "1.11.0", "1.11.11", "2", "11.11.11" ).map(SemVer(_).get) val actual = Random.shuffle(expected).sorted actual shouldBe expected } }
Example 110
Source File: RawCarDataGenerator.scala From cloudflow with Apache License 2.0 | 5 votes |
package connectedcar.streamlets import akka.NotUsed import akka.stream.scaladsl.Source import cloudflow.akkastream.AkkaStreamlet import cloudflow.akkastream.scaladsl.RunnableGraphStreamletLogic import cloudflow.streamlets.StreamletShape import cloudflow.streamlets.avro.AvroOutlet import connectedcar.data.ConnectedCarERecord import scala.util.Random import scala.concurrent.duration._ object RawCarDataGenerator extends AkkaStreamlet { val out = AvroOutlet[ConnectedCarERecord]("out", m ⇒ m.carId.toString + m.timestamp.toString) val shape = StreamletShape.withOutlets(out) override def createLogic = new RunnableGraphStreamletLogic() { override def runnableGraph() = Source .repeat(NotUsed) .map(_ ⇒ generateCarERecord()) // Only keep the record part of the tuple .throttle(1, 1.second) .to(plainSink(out)) } case class Driver(carId: Int, driver: String) val drivers = List( Driver(10001001, "Duncan"), Driver(10001002, "Kiki"), Driver(10001003, "Trevor"), Driver(10001004, "Jeremy"), Driver(10001005, "David"), Driver(10001006, "Nolan"), Driver(10001007, "Adam"), Driver(10001008, "Hywel") ) val status = List("whoosh", "zoom", "vrrroom") def randomDriver(): Driver = drivers(Random.nextInt(8)) //normal temp is 90c - 105c def randomTemp() = 90 + Random.nextInt(16) // battery from 1 - 100% def randomBattery() = 1 + Random.nextInt(100) //power consumption, no idea but 120 - 150 def randomPowerConsumption() = 120 + Random.nextInt(31) //highway speed 60mph - 90mph def randomSpeed() = 60 + Random.nextInt(31) def randomStatus() = status(Random.nextInt(3)) def generateCarERecord(): ConnectedCarERecord = { val driver = randomDriver; ConnectedCarERecord(System.currentTimeMillis, driver.carId, driver.driver, randomBattery, randomTemp, randomPowerConsumption, randomSpeed, randomStatus) } }
Example 111
Source File: CallRecordGeneratorIngress.scala From cloudflow with Apache License 2.0 | 5 votes |
package carly.aggregator import java.sql.Timestamp import scala.util.Random import scala.concurrent.duration._ import org.apache.spark.sql.{ Dataset, SparkSession } import org.apache.spark.sql.streaming.OutputMode import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.LongType import cloudflow.streamlets._ import cloudflow.streamlets.avro._ import cloudflow.spark.sql.SQLImplicits._ import carly.data.CallRecord import cloudflow.spark.{ SparkStreamlet, SparkStreamletLogic } import org.apache.log4j.{ Level, Logger } case class Rate(timestamp: Timestamp, value: Long) class CallRecordGeneratorIngress extends SparkStreamlet { val rootLogger = Logger.getRootLogger() rootLogger.setLevel(Level.ERROR) val RecordsPerSecond = IntegerConfigParameter("records-per-second", "Records per second to process.", Some(50)) override def configParameters = Vector(RecordsPerSecond) val out = AvroOutlet[CallRecord]("out", _.user) val shape = StreamletShape(out) override def createLogic() = new SparkStreamletLogic { val recordsPerSecond = RecordsPerSecond.value override def buildStreamingQueries = { val outStream = DataGenerator.mkData(super.session, recordsPerSecond) writeStream(outStream, out, OutputMode.Append).toQueryExecution } } } object DataGenerator { def mkData(session: SparkSession, recordsPerSecond: Int): Dataset[CallRecord] = { // do we need to expose this through configuration? val MaxTime = 2.hours.toMillis val MaxUsers = 100000 val TS0 = new java.sql.Timestamp(0) val ZeroTimestampProb = 0.05 // error rate // Random Data Generator val usersUdf = udf(() ⇒ "user-" + Random.nextInt(MaxUsers)) val directionUdf = udf(() ⇒ if (Random.nextDouble() < 0.5) "incoming" else "outgoing") // Time-biased randomized filter - 1/2 hour cycles val sinTime: Long ⇒ Double = t ⇒ Math.sin((t / 1000 % 1800) * 1.0 / 1800 * Math.PI) val timeBoundFilter: Long ⇒ Double ⇒ Boolean = t ⇒ prob ⇒ (sinTime(t) + 0.5) > prob val timeFilterUdf = udf((ts: java.sql.Timestamp, rng: Double) ⇒ timeBoundFilter(ts.getTime)(rng)) val zeroTimestampUdf = udf { (ts: java.sql.Timestamp, rng: Double) ⇒ if (rng < ZeroTimestampProb) { TS0 } else { ts } } val rateStream = session.readStream .format("rate") .option("rowsPerSecond", recordsPerSecond) .load() .as[Rate] val randomDataset = rateStream.withColumn("rng", rand()).withColumn("tsRng", rand()) val sampledData = randomDataset .where(timeFilterUdf($"timestamp", $"rng")) .withColumn("user", usersUdf()) .withColumn("other", usersUdf()) .withColumn("direction", directionUdf()) .withColumn("duration", (round(abs(rand()) * MaxTime)).cast(LongType)) .withColumn("updatedTimestamp", zeroTimestampUdf($"timestamp", $"tsRng")) .select($"user", $"other", $"direction", $"duration", $"updatedTimestamp".as("timestamp")) .as[CallRecord] sampledData } }
Example 112
Source File: SparkRandomGenIngress.scala From cloudflow with Apache License 2.0 | 5 votes |
package cloudflow.sparkdoc import scala.util.Random import cloudflow.spark._ import cloudflow.streamlets._ import cloudflow.streamlets.avro._ import cloudflow.spark.sql.SQLImplicits._ import org.apache.spark.sql.Dataset import org.apache.spark.sql.streaming.OutputMode import java.sql.Timestamp class SparkRandomGenDataIngress extends SparkStreamlet { val out = AvroOutlet[Data]("out", d ⇒ d.key) val shape = StreamletShape(out) case class Rate(timestamp: Timestamp, value: Long) override def createLogic() = new SparkStreamletLogic { override def buildStreamingQueries = writeStream(process, out, OutputMode.Append).toQueryExecution private def process: Dataset[Data] = { val recordsPerSecond = 10 val keyGen: () ⇒ String = () ⇒ if (Random.nextDouble() < 0.5) "keyOne" else "keyTwo" val rateStream = session.readStream .format("rate") .option("rowsPerSecond", recordsPerSecond) .load() .as[Rate] rateStream.map { case Rate(_, value) ⇒ Data(keyGen(), value.toInt) } } } }
Example 113
Source File: OrderRepositoryInMemoryInterpreter.scala From scala-pet-store with Apache License 2.0 | 5 votes |
package io.github.pauljamescleary.petstore package infrastructure.repository.inmemory import scala.collection.concurrent.TrieMap import scala.util.Random import cats._ import cats.implicits._ import domain.orders.{Order, OrderRepositoryAlgebra} class OrderRepositoryInMemoryInterpreter[F[_]: Applicative] extends OrderRepositoryAlgebra[F] { private val cache = new TrieMap[Long, Order] private val random = new Random def create(order: Order): F[Order] = { val toSave = order.copy(id = order.id.orElse(random.nextLong.some)) toSave.id.foreach(cache.put(_, toSave)) toSave.pure[F] } def get(orderId: Long): F[Option[Order]] = cache.get(orderId).pure[F] def delete(orderId: Long): F[Option[Order]] = cache.remove(orderId).pure[F] } object OrderRepositoryInMemoryInterpreter { def apply[F[_]: Applicative]() = new OrderRepositoryInMemoryInterpreter[F]() }
Example 114
Source File: PetRepositoryInMemoryInterpreter.scala From scala-pet-store with Apache License 2.0 | 5 votes |
package io.github.pauljamescleary.petstore package infrastructure.repository.inmemory import scala.collection.concurrent.TrieMap import scala.util.Random import cats._ import cats.data.NonEmptyList import cats.implicits._ import domain.pets.{Pet, PetRepositoryAlgebra, PetStatus} class PetRepositoryInMemoryInterpreter[F[_]: Applicative] extends PetRepositoryAlgebra[F] { private val cache = new TrieMap[Long, Pet] private val random = new Random def create(pet: Pet): F[Pet] = { val id = random.nextLong val toSave = pet.copy(id = id.some) cache += (id -> pet.copy(id = id.some)) toSave.pure[F] } def update(pet: Pet): F[Option[Pet]] = pet.id.traverse { id => cache.update(id, pet) pet.pure[F] } def get(id: Long): F[Option[Pet]] = cache.get(id).pure[F] def delete(id: Long): F[Option[Pet]] = cache.remove(id).pure[F] def findByNameAndCategory(name: String, category: String): F[Set[Pet]] = cache.values .filter(p => p.name == name && p.category == category) .toSet .pure[F] def list(pageSize: Int, offset: Int): F[List[Pet]] = cache.values.toList.sortBy(_.name).slice(offset, offset + pageSize).pure[F] def findByStatus(statuses: NonEmptyList[PetStatus]): F[List[Pet]] = cache.values.filter(p => statuses.exists(_ == p.status)).toList.pure[F] def findByTag(tags: NonEmptyList[String]): F[List[Pet]] = { val tagSet = tags.toNes cache.values.filter(_.tags.exists(tagSet.contains(_))).toList.pure[F] } } object PetRepositoryInMemoryInterpreter { def apply[F[_]: Applicative]() = new PetRepositoryInMemoryInterpreter[F]() }
Example 115
Source File: CalibrationHistogramCreateBenchmark.scala From noether with Apache License 2.0 | 5 votes |
package com.spotify.noether package benchmark import com.spotify.noether.benchmark.CalibrationHistogramCreateBenchmark.CalibrationHistogramState import org.openjdk.jmh.annotations._ import scala.util.Random object PredictionUtils { def generatePredictions(nbPrediction: Int): Seq[Prediction[Boolean, Double]] = Seq.fill(nbPrediction)(Prediction(Random.nextBoolean(), Random.nextDouble())) } object CalibrationHistogramCreateBenchmark { @State(Scope.Benchmark) class CalibrationHistogramState() { @Param(Array("100", "1000", "3000")) var nbElement = 0 @Param(Array("100", "200", "300")) var nbBucket = 0 @Param(Array("0.1", "0.2", "0.3")) var lowerBound = 0.0 @Param(Array("0.2", "0.4", "0.5")) var upperBound = 0.0 var histogram: CalibrationHistogram = _ @Setup def setup(): Unit = histogram = CalibrationHistogram(lowerBound, upperBound, nbBucket) } } class CalibrationHistogramCreateBenchmark { @Benchmark def createCalibrationHistogram(calibrationHistogramState: CalibrationHistogramState): Double = calibrationHistogramState.histogram.bucketSize }
Example 116
Source File: UISeleniumSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import scala.util.Random import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.openqa.selenium.WebDriver import org.openqa.selenium.htmlunit.HtmlUnitDriver import org.scalatest.{BeforeAndAfterAll, Matchers} import org.scalatest.concurrent.Eventually._ import org.scalatest.selenium.WebBrowser import org.scalatest.time.SpanSugar._ import org.apache.spark.ui.SparkUICssErrorHandler class UISeleniumSuite extends HiveThriftJdbcTest with WebBrowser with Matchers with BeforeAndAfterAll { implicit var webDriver: WebDriver = _ var server: HiveThriftServer2 = _ val uiPort = 20000 + Random.nextInt(10000) override def mode: ServerMode.Value = ServerMode.binary override def beforeAll(): Unit = { webDriver = new HtmlUnitDriver { getWebClient.setCssErrorHandler(new SparkUICssErrorHandler) } super.beforeAll() } override def afterAll(): Unit = { if (webDriver != null) { webDriver.quit() } super.afterAll() } override protected def serverStartCommand(port: Int) = { val portConf = if (mode == ServerMode.binary) { ConfVars.HIVE_SERVER2_THRIFT_PORT } else { ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT } s"""$startScript | --master local | --hiveconf hive.root.logger=INFO,console | --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$metastoreJdbcUri | --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath | --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=localhost | --hiveconf ${ConfVars.HIVE_SERVER2_TRANSPORT_MODE}=$mode | --hiveconf $portConf=$port | --driver-class-path ${sys.props("java.class.path")} | --conf spark.ui.enabled=true | --conf spark.ui.port=$uiPort """.stripMargin.split("\\s+").toSeq } ignore("thrift server ui test") { withJdbcStatement("test_map") { statement => val baseURL = s"http://localhost:$uiPort" val queries = Seq( "CREATE TABLE test_map(key INT, value STRING)", s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_map") queries.foreach(statement.execute) eventually(timeout(10 seconds), interval(50 milliseconds)) { go to baseURL find(cssSelector("""ul li a[href*="sql"]""")) should not be None } eventually(timeout(10 seconds), interval(50 milliseconds)) { go to (baseURL + "/sql") find(id("sessionstat")) should not be None find(id("sqlstat")) should not be None // check whether statements exists queries.foreach { line => findAll(cssSelector("""ul table tbody tr td""")).map(_.text).toList should contain (line) } } } } }
Example 117
Source File: MiscExpressionsSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions import java.io.PrintStream import scala.util.Random import org.apache.spark.SparkFunSuite import org.apache.spark.sql.types._ class MiscExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { test("assert_true") { intercept[RuntimeException] { checkEvaluation(AssertTrue(Literal.create(false, BooleanType)), null) } intercept[RuntimeException] { checkEvaluation(AssertTrue(Cast(Literal(0), BooleanType)), null) } intercept[RuntimeException] { checkEvaluation(AssertTrue(Literal.create(null, NullType)), null) } intercept[RuntimeException] { checkEvaluation(AssertTrue(Literal.create(null, BooleanType)), null) } checkEvaluation(AssertTrue(Literal.create(true, BooleanType)), null) checkEvaluation(AssertTrue(Cast(Literal(1), BooleanType)), null) } test("uuid") { checkEvaluation(Length(Uuid(Some(0))), 36) val r = new Random() val seed1 = Some(r.nextLong()) assert(evaluateWithoutCodegen(Uuid(seed1)) === evaluateWithoutCodegen(Uuid(seed1))) assert(evaluateWithGeneratedMutableProjection(Uuid(seed1)) === evaluateWithGeneratedMutableProjection(Uuid(seed1))) assert(evaluateWithUnsafeProjection(Uuid(seed1)) === evaluateWithUnsafeProjection(Uuid(seed1))) val seed2 = Some(r.nextLong()) assert(evaluateWithoutCodegen(Uuid(seed1)) !== evaluateWithoutCodegen(Uuid(seed2))) assert(evaluateWithGeneratedMutableProjection(Uuid(seed1)) !== evaluateWithGeneratedMutableProjection(Uuid(seed2))) assert(evaluateWithUnsafeProjection(Uuid(seed1)) !== evaluateWithUnsafeProjection(Uuid(seed2))) val uuid = Uuid(seed1) assert(uuid.fastEquals(uuid)) assert(!uuid.fastEquals(Uuid(seed1))) assert(!uuid.fastEquals(uuid.freshCopy())) assert(!uuid.fastEquals(Uuid(seed2))) } test("PrintToStderr") { val inputExpr = Literal(1) val systemErr = System.err val (outputEval, outputCodegen) = try { val errorStream = new java.io.ByteArrayOutputStream() System.setErr(new PrintStream(errorStream)) // check without codegen checkEvaluationWithoutCodegen(PrintToStderr(inputExpr), 1) val outputEval = errorStream.toString errorStream.reset() // check with codegen checkEvaluationWithGeneratedMutableProjection(PrintToStderr(inputExpr), 1) val outputCodegen = errorStream.toString (outputEval, outputCodegen) } finally { System.setErr(systemErr) } assert(outputCodegen.contains(s"Result of $inputExpr is 1")) assert(outputEval.contains(s"Result of $inputExpr is 1")) } }
Example 118
Source File: RandomUUIDGeneratorSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.util import scala.util.Random import org.apache.spark.SparkFunSuite class RandomUUIDGeneratorSuite extends SparkFunSuite { test("RandomUUIDGenerator should generate version 4, variant 2 UUIDs") { val generator = RandomUUIDGenerator(new Random().nextLong()) for (_ <- 0 to 100) { val uuid = generator.getNextUUID() assert(uuid.version() == 4) assert(uuid.variant() == 2) } } test("UUID from RandomUUIDGenerator should be deterministic") { val r1 = new Random(100) val generator1 = RandomUUIDGenerator(r1.nextLong()) val r2 = new Random(100) val generator2 = RandomUUIDGenerator(r2.nextLong()) val r3 = new Random(101) val generator3 = RandomUUIDGenerator(r3.nextLong()) for (_ <- 0 to 100) { val uuid1 = generator1.getNextUUID() val uuid2 = generator2.getNextUUID() val uuid3 = generator3.getNextUUID() assert(uuid1 == uuid2) assert(uuid1 != uuid3) } } test("Get UTF8String UUID") { val generator = RandomUUIDGenerator(new Random().nextLong()) val utf8StringUUID = generator.getNextUUIDUTF8String() val uuid = java.util.UUID.fromString(utf8StringUUID.toString) assert(uuid.version() == 4 && uuid.variant() == 2 && utf8StringUUID.toString == uuid.toString) } }
Example 119
Source File: ArrayDataIndexedSeqSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.util import scala.util.Random import org.apache.spark.SparkFunSuite import org.apache.spark.sql.RandomDataGenerator import org.apache.spark.sql.catalyst.encoders.{ExamplePointUDT, RowEncoder} import org.apache.spark.sql.catalyst.expressions.{FromUnsafeProjection, UnsafeArrayData, UnsafeProjection} import org.apache.spark.sql.types._ class ArrayDataIndexedSeqSuite extends SparkFunSuite { private def compArray(arrayData: ArrayData, elementDt: DataType, array: Array[Any]): Unit = { assert(arrayData.numElements == array.length) array.zipWithIndex.map { case (e, i) => if (e != null) { elementDt match { // For NaN, etc. case FloatType | DoubleType => assert(arrayData.get(i, elementDt).equals(e)) case _ => assert(arrayData.get(i, elementDt) === e) } } else { assert(arrayData.isNullAt(i)) } } val seq = arrayData.toSeq[Any](elementDt) array.zipWithIndex.map { case (e, i) => if (e != null) { elementDt match { // For Nan, etc. case FloatType | DoubleType => assert(seq(i).equals(e)) case _ => assert(seq(i) === e) } } else { assert(seq(i) == null) } } intercept[IndexOutOfBoundsException] { seq(-1) }.getMessage().contains("must be between 0 and the length of the ArrayData.") intercept[IndexOutOfBoundsException] { seq(seq.length) }.getMessage().contains("must be between 0 and the length of the ArrayData.") } private def testArrayData(): Unit = { val elementTypes = Seq(BooleanType, ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType, DecimalType.USER_DEFAULT, StringType, BinaryType, DateType, TimestampType, CalendarIntervalType, new ExamplePointUDT()) val arrayTypes = elementTypes.flatMap { elementType => Seq(ArrayType(elementType, containsNull = false), ArrayType(elementType, containsNull = true)) } val random = new Random(100) arrayTypes.foreach { dt => val schema = StructType(StructField("col_1", dt, nullable = false) :: Nil) val row = RandomDataGenerator.randomRow(random, schema) val rowConverter = RowEncoder(schema) val internalRow = rowConverter.toRow(row) val unsafeRowConverter = UnsafeProjection.create(schema) val safeRowConverter = FromUnsafeProjection(schema) val unsafeRow = unsafeRowConverter(internalRow) val safeRow = safeRowConverter(unsafeRow) val genericArrayData = safeRow.getArray(0).asInstanceOf[GenericArrayData] val unsafeArrayData = unsafeRow.getArray(0).asInstanceOf[UnsafeArrayData] val elementType = dt.elementType test("ArrayDataIndexedSeq - UnsafeArrayData - " + dt.toString) { compArray(unsafeArrayData, elementType, unsafeArrayData.toArray[Any](elementType)) } test("ArrayDataIndexedSeq - GenericArrayData - " + dt.toString) { compArray(genericArrayData, elementType, genericArrayData.toArray[Any](elementType)) } } } testArrayData() }
Example 120
Source File: TakeOrderedAndProjectSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import scala.util.Random import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ class TakeOrderedAndProjectSuite extends SparkPlanTest with SharedSQLContext { private var rand: Random = _ private var seed: Long = 0 protected override def beforeAll(): Unit = { super.beforeAll() seed = System.currentTimeMillis() rand = new Random(seed) } private def generateRandomInputData(): DataFrame = { val schema = new StructType() .add("a", IntegerType, nullable = false) .add("b", IntegerType, nullable = false) val inputData = Seq.fill(10000)(Row(rand.nextInt(), rand.nextInt())) spark.createDataFrame(sparkContext.parallelize(Random.shuffle(inputData), 10), schema) } private def noOpFilter(plan: SparkPlan): SparkPlan = FilterExec(Literal(true), plan) val limit = 250 val sortOrder = 'a.desc :: 'b.desc :: Nil test("TakeOrderedAndProject.doExecute without project") { withClue(s"seed = $seed") { checkThatPlansAgree( generateRandomInputData(), input => noOpFilter(TakeOrderedAndProjectExec(limit, sortOrder, input.output, input)), input => GlobalLimitExec(limit, LocalLimitExec(limit, SortExec(sortOrder, true, input))), sortAnswers = false) } } test("TakeOrderedAndProject.doExecute with project") { withClue(s"seed = $seed") { checkThatPlansAgree( generateRandomInputData(), input => noOpFilter( TakeOrderedAndProjectExec(limit, sortOrder, Seq(input.output.last), input)), input => GlobalLimitExec(limit, LocalLimitExec(limit, ProjectExec(Seq(input.output.last), SortExec(sortOrder, true, input)))), sortAnswers = false) } } }
Example 121
Source File: ColumnarTestUtils.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.columnar import scala.collection.immutable.HashSet import scala.util.Random import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData} import org.apache.spark.sql.types.{AtomicType, Decimal} import org.apache.spark.unsafe.types.UTF8String object ColumnarTestUtils { def makeNullRow(length: Int): GenericInternalRow = { val row = new GenericInternalRow(length) (0 until length).foreach(row.setNullAt) row } def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = { def randomBytes(length: Int) = { val bytes = new Array[Byte](length) Random.nextBytes(bytes) bytes } (columnType match { case NULL => null case BOOLEAN => Random.nextBoolean() case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort case INT => Random.nextInt() case LONG => Random.nextLong() case FLOAT => Random.nextFloat() case DOUBLE => Random.nextDouble() case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32))) case BINARY => randomBytes(Random.nextInt(32)) case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale) case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale) case STRUCT(_) => new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10)))) case ARRAY(_) => new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt())) case MAP(_) => ArrayBasedMapData( Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32))))) case _ => throw new IllegalArgumentException(s"Unknown column type $columnType") }).asInstanceOf[JvmType] } def makeRandomValues( head: ColumnType[_], tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail) def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = { columnTypes.map(makeRandomValue(_)) } def makeUniqueRandomValues[JvmType]( columnType: ColumnType[JvmType], count: Int): Seq[JvmType] = { Iterator.iterate(HashSet.empty[JvmType]) { set => set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next() }.drop(count).next().toSeq } def makeRandomRow( head: ColumnType[_], tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail) def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = { val row = new GenericInternalRow(columnTypes.length) makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) => row(index) = value } row } def makeUniqueValuesAndSingleValueRows[T <: AtomicType]( columnType: NativeColumnType[T], count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = { val values = makeUniqueRandomValues(columnType, count) val rows = values.map { value => val row = new GenericInternalRow(1) row(0) = value row } (values, rows) } }
Example 122
Source File: hierarchyGen.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.test import org.apache.spark.sql.types.{LongType, Node} import org.scalacheck.{Arbitrary, Gen} import scala.util.Random import scalaz._ import Scalaz._ import scalaz.scalacheck.ScalazArbitrary._ // scalastyle:off file.size.limit object HierarchyGen { val MIN_SIZE_TREE = 6 val MAX_SIZE_TREE = 100 def next(): Long = { synchronized { if (currentSeq == Long.MaxValue) { currentSeq = Long.MinValue } val result = currentSeq currentSeq += 1 result } } def arb: Arbitrary[Long] = Arbitrary { gen } def gen: Gen[Long] = Gen.resultOf[Int,Long] { x => next() } }
Example 123
Source File: NoSharingDepot.scala From trucking-iot with Apache License 2.0 | 5 votes |
package com.orendainx.trucking.simulator.depots import akka.actor.{ActorLogging, Props, Stash} import com.orendainx.trucking.simulator.depots.ResourceDepot.{RequestRoute, RequestTruck, ReturnRoute, ReturnTruck} import com.orendainx.trucking.simulator.generators.DataGenerator.NewResource import com.orendainx.trucking.simulator.models._ import com.orendainx.trucking.simulator.services.RouteParser import com.orendainx.trucking.simulator.models.EmptyRoute import com.typesafe.config.Config import scala.util.Random object NoSharingDepot { def props()(implicit config: Config) = Props(new NoSharingDepot()) } class NoSharingDepot(implicit config: Config) extends ResourceDepot with Stash with ActorLogging { private val trucksAvailable = Random.shuffle(1 to config.getInt("resource-depot.trucks-available")).toList.map(Truck).toBuffer private val routesAvailable = RouteParser(config.getString("resource-depot.route-directory")).routes.toBuffer log.info("Trucks and routes initialized and ready for deployment") log.info(s"${trucksAvailable.length} trucks available.") log.info(s"${routesAvailable.length} routes available.") def receive = { case RequestTruck(previous) if previous != EmptyTruck => val ind = trucksAvailable.indexWhere(_ != previous) if (ind >= 0) sender() ! NewResource(trucksAvailable.remove(ind)) else stash() // None available, stash request for later case RequestTruck(_) => if (trucksAvailable.nonEmpty) sender() ! NewResource(trucksAvailable.remove(0)) else stash() case RequestRoute(previous) if previous != EmptyRoute => val ind = routesAvailable.indexWhere(_ != previous) if (ind >= 0) sender() ! NewResource(routesAvailable.remove(ind)) else stash() case RequestRoute(_) => if (routesAvailable.nonEmpty) sender() ! NewResource(routesAvailable.remove(0)) else stash() case ReturnTruck(truck) => trucksAvailable.append(truck) unstashAll() case ReturnRoute(route) => routesAvailable.append(route) unstashAll() } }
Example 124
Source File: DriverFactory.scala From trucking-iot with Apache License 2.0 | 5 votes |
package com.orendainx.trucking.simulator.services import com.orendainx.trucking.simulator.models.{Driver, DrivingPattern} import com.typesafe.config.Config import scala.collection.JavaConverters._ import scala.util.Random def drivers(implicit config: Config): Seq[Driver] = { // Generate driving patterns val patterns = config.getConfigList("driver.driving-patterns").asScala.map { conf => val name = conf.getString("name") (name, DrivingPattern(name, conf.getInt("min-speed"), conf.getInt("max-speed"), conf.getInt("spree-frequency"), conf.getInt("spree-length"), conf.getInt("violation-percentage"))) }.toMap // First, initialize all special drivers val specialDrivers = config.getConfigList("driver.special-drivers").asScala.map { conf => Driver(conf.getInt("id"), conf.getString("name"), patterns(conf.getString("pattern"))) } // If we need more drivers, generate "normal" drivers. Or if we need to remove some special drivers, do so. val driverCount = config.getInt("driver.driver-count") if (specialDrivers.lengthCompare(driverCount) < 0) specialDrivers ++ ((specialDrivers.length+1) to driverCount).map { newId => Driver(newId, Random.alphanumeric.take(config.getInt("driver-name-length")).mkString, patterns("normal")) } else specialDrivers.take(driverCount) } }
Example 125
Source File: AutomaticCoordinator.scala From trucking-iot with Apache License 2.0 | 5 votes |
package com.orendainx.trucking.simulator.coordinators import akka.actor.{ActorLogging, ActorRef, PoisonPill, Props, Terminated} import com.orendainx.trucking.simulator.coordinators.AutomaticCoordinator.TickGenerator import com.orendainx.trucking.simulator.coordinators.GeneratorCoordinator.AcknowledgeTick import com.orendainx.trucking.simulator.flows.FlowManager import com.orendainx.trucking.simulator.generators.DataGenerator import com.typesafe.config.Config import scala.collection.mutable import scala.concurrent.duration._ import scala.util.Random def props(eventCount: Int, generators: Seq[ActorRef], flowManager: ActorRef)(implicit config: Config) = Props(new AutomaticCoordinator(eventCount, generators, flowManager)) } class AutomaticCoordinator(eventCount: Int, generators: Seq[ActorRef], flowManager: ActorRef)(implicit config: Config) extends GeneratorCoordinator with ActorLogging { // For receive messages and an execution context import context.dispatcher // Event delay settings, and initialize a counter for each data generator val eventDelay = config.getInt("generator.event-delay") val eventDelayJitter = config.getInt("generator.event-delay-jitter") val generateCounters = mutable.Map(generators.map((_, 0)): _*) // Insert each new generator into the simulation (at a random scheduled point) and begin "ticking" generators.foreach { generator => context.system.scheduler.scheduleOnce(Random.nextInt(eventDelay + eventDelayJitter).milliseconds, self, TickGenerator(generator)) } def receive = { case AcknowledgeTick(generator) => self ! TickGenerator(generator) // Each ack triggers another tick case TickGenerator(generator) => generateCounters.update(generator, generateCounters(generator)+1) if (generateCounters(generator) <= eventCount) { context.system.scheduler.scheduleOnce((eventDelay + Random.nextInt(eventDelayJitter)).milliseconds, generator, DataGenerator.GenerateData) } else { // Kill the individual generator, since we are done with it. generator ! PoisonPill // If all other generators have met their count, tell flow manager to shutdown if (!generateCounters.values.exists(_ <= eventCount)) { flowManager ! FlowManager.ShutdownFlow context watch flowManager } } // Once the flow manager and its transmitters terminate, shut it all down case Terminated(`flowManager`) => context.system.terminate() } }
Example 126
Source File: TrafficGenerator.scala From trucking-iot with Apache License 2.0 | 5 votes |
package com.orendainx.trucking.simulator.generators import java.time.Instant import akka.actor.{ActorLogging, ActorRef, Props, Stash} import com.orendainx.trucking.commons.models._ import com.orendainx.trucking.simulator.coordinators.GeneratorCoordinator import com.orendainx.trucking.simulator.depots.ResourceDepot.{RequestRoute, ReturnRoute} import com.orendainx.trucking.simulator.generators.DataGenerator.{GenerateData, NewResource} import com.orendainx.trucking.simulator.models._ import com.orendainx.trucking.simulator.transmitters.DataTransmitter.Transmit import com.orendainx.trucking.simulator.models.{EmptyRoute, Route} import com.typesafe.config.Config import scala.collection.mutable import scala.util.Random def props(depot: ActorRef, flowManager: ActorRef)(implicit config: Config) = Props(new TrafficGenerator(depot, flowManager)) } class TrafficGenerator(depot: ActorRef, flowManager: ActorRef)(implicit config: Config) extends DataGenerator with Stash with ActorLogging { // Some settings val NumberOfRoutes = config.getInt("generator.routes-to-simulate") val CongestionDelta = config.getInt("generator.congestion.delta") var congestionLevel = config.getInt("generator.congestion.start") var routes = mutable.Buffer.empty[Route] // Request NumberOfRoutes routes (1 to NumberOfRoutes).foreach(_ => depot ! RequestRoute(EmptyRoute)) context become waitingOnDepot def waitingOnDepot: Receive = { case NewResource(newRoute: Route) => routes += newRoute unstashAll() context become driverActive log.info(s"Received new route: ${newRoute.name}") case GenerateData => stash() log.debug("Received Tick command while waiting on route. Command stashed for later processing.") } def driverActive: Receive = { case GenerateData => routes.foreach { route => // Create traffic data and emit it congestionLevel += -CongestionDelta + Random.nextInt(CongestionDelta*2 + 1) val traffic = TrafficData(Instant.now().toEpochMilli, route.id, congestionLevel) flowManager ! Transmit(traffic) } // Tell the coordinator we've acknowledged the drive command sender() ! GeneratorCoordinator.AcknowledgeTick(self) } def receive = { case _ => log.error("This message should never be seen.") } // When this actor is stopped, release resources it may still be holding onto override def postStop(): Unit = routes.foreach(ReturnRoute) }
Example 127
Source File: AvroRandomGenerator.scala From streamliner-examples with Apache License 2.0 | 5 votes |
package com.memsql.spark.examples.avro import org.apache.avro.Schema import org.apache.avro.generic.GenericData import scala.collection.JavaConversions._ import scala.util.Random class AvroRandomGenerator(inSchema: Schema) { // Avoid nested Records, since our destination is a DataFrame. val MAX_RECURSION_LEVEL: Int = 1 val topSchema: Schema = inSchema val random = new Random def next(schema: Schema = this.topSchema, level: Int = 0): Any = { if (level <= MAX_RECURSION_LEVEL) { schema.getType match { case Schema.Type.RECORD => { val datum = new GenericData.Record(schema) schema.getFields.foreach { x => datum.put(x.pos, next(x.schema, level + 1)) } datum } case Schema.Type.UNION => { val types = schema.getTypes // Generate a value using the first type in the union. // "Random type" is also a valid option. next(types(0), level) } case _ => generateValue(schema.getType) } } else { null } } def generateValue(avroType: Schema.Type): Any = avroType match { case Schema.Type.BOOLEAN => random.nextBoolean case Schema.Type.DOUBLE => random.nextDouble case Schema.Type.FLOAT => random.nextFloat case Schema.Type.INT => random.nextInt case Schema.Type.LONG => random.nextLong case Schema.Type.NULL => null case Schema.Type.STRING => getRandomString case _ => null } def getRandomString(): String = { val length: Int = 5 + random.nextInt(5) (1 to length).map(x => ('a'.toInt + random.nextInt(26)).toChar).mkString } }
Example 128
Source File: ThriftRandomGenerator.scala From streamliner-examples with Apache License 2.0 | 5 votes |
package com.memsql.spark.examples.thrift import collection.JavaConversions._ import java.lang.reflect.Method import java.nio.ByteBuffer import org.apache.thrift.{TBase, TFieldIdEnum} import org.apache.thrift.protocol.{TField, TType} import org.apache.thrift.meta_data._ import scala.util.Random object ThriftRandomGenerator { val random = new Random val MAX_RECURSION_LEVEL = 5 def next[F <: TFieldIdEnum](c: Class[_], level: Int = 0): Any = { if (level > MAX_RECURSION_LEVEL) { return null } val className = c.getName try { val tBaseClass = c.asInstanceOf[Class[TBase[_ <: TBase[_, _], F]]] val instance = tBaseClass.newInstance() val metaDataMap: Map[_ <: TFieldIdEnum, FieldMetaData] = FieldMetaData.getStructMetaDataMap(tBaseClass).toMap metaDataMap.foreach({ case (field, fieldMetaData) => val valueMetaData = fieldMetaData.valueMetaData val value = getValue(valueMetaData, level) instance.setFieldValue(instance.fieldForId(field.getThriftFieldId), value) }) instance } catch { case e: ClassCastException => throw new IllegalArgumentException(s"Class $className is not a subclass of org.apache.thrift.TBase") } } def getValue(valueMetaData: FieldValueMetaData, level: Int): Any = { if (level > MAX_RECURSION_LEVEL) { return null } valueMetaData.`type` match { case TType.BOOL => random.nextBoolean case TType.BYTE => random.nextInt.toByte case TType.I16 => random.nextInt.toShort case TType.I32 => random.nextInt case TType.I64 => random.nextLong case TType.DOUBLE => random.nextInt(5) * 0.25 case TType.ENUM => { val enumClass = valueMetaData.asInstanceOf[EnumMetaData].enumClass getEnumValue(enumClass) } case TType.STRING => { val length: Int = 5 + random.nextInt(5) val s = (1 to length).map(x => ('a'.toInt + random.nextInt(26)).toChar).mkString if (valueMetaData.isBinary) { ByteBuffer.wrap(s.getBytes) } else { s } } case TType.LIST => { val elemMetaData = valueMetaData.asInstanceOf[ListMetaData].elemMetaData val length: Int = 5 + random.nextInt(5) val ret: java.util.List[Any] = (1 to length).map(x => getValue(elemMetaData, level + 1)) ret } case TType.SET => { val elemMetaData = valueMetaData.asInstanceOf[SetMetaData].elemMetaData val length: Int = 5 + random.nextInt(5) val ret: Set[Any] = (1 to length).map(x => getValue(elemMetaData, level + 1)).toSet val javaSet: java.util.Set[Any] = ret javaSet } case TType.MAP => { val mapMetaData = valueMetaData.asInstanceOf[MapMetaData] val keyMetaData = mapMetaData.keyMetaData val mapValueMetaData = mapMetaData.valueMetaData val length: Int = 5 + random.nextInt(5) val ret: Map[Any, Any] = (1 to length).map(_ => { val mapKey = getValue(keyMetaData, level + 1) val mapValue = getValue(mapValueMetaData, level + 1) mapKey -> mapValue }).toMap val javaMap: java.util.Map[Any, Any] = ret javaMap } case TType.STRUCT => { val structClass = valueMetaData.asInstanceOf[StructMetaData].structClass next(structClass, level = level + 1) } case _ => null } } def getEnumValue(enumType: Class[_]): Any = { val enumConstants = enumType.getEnumConstants enumConstants(random.nextInt(enumConstants.length)) } }
Example 129
Source File: JsonTransformSpec.scala From play-json-ops with MIT License | 5 votes |
package play.api.libs.json.ops.v4 import org.scalatest.FlatSpec import org.scalatest.prop.GeneratorDrivenPropertyChecks._ import play.api.libs.json._ import play.api.libs.json.scalacheck.JsValueGenerators import scala.annotation.tailrec import scala.util.Random class JsonTransformSpec extends FlatSpec with CompatibilityImplicits with JsValueGenerators { @tailrec private def verifyAllRedacted(all: Seq[(JsPath, JsValue)]): Unit = { val invalid = all collect { case (path, value) if value != JsonTransform.RedactedValue => path } assert(invalid.isEmpty, s"The following paths are invalid: ${invalid.mkString(", ")}") val nextGen = all flatMap { case (path, JsArray(items)) => items.zipWithIndex map { case (item, i) => (JsPath(path.path :+ IdxPathNode(i)), item) } case (path, JsObject(fields)) => fields map { case (k, v) => (path \ k, v) } case _ => Nil } if (nextGen.nonEmpty) { verifyAllRedacted(nextGen) } } "redactPaths" should "redact selected fields by path at the top level" in { forAll { obj: JsObject => val topLevelPaths: Seq[JsPath] = obj.fields.map(__ \ _._1) whenever(topLevelPaths.nonEmpty) { val redactedPaths: Seq[JsPath] = Random.shuffle(topLevelPaths) take Random.nextInt(topLevelPaths.size) implicit val redactor: JsonTransform[Any] = JsonTransform.redactPaths[Any](redactedPaths) val redacted = obj.transformAs[Any] // Useful for debugging // if (redactedPaths.nonEmpty) { // println(Json.prettyPrint(obj)) // println(s"with redacted paths (${redactedPaths.mkString(", ")}):") // println(Json.prettyPrint(redacted)) // } for (path <- redactedPaths) { assertResult(JsonTransform.RedactedValue) { path.asSingleJson(redacted).get } } } } } "redactAll" should "redact all fields of all paths" in { implicit val redactor: JsonTransform[Any] = JsonTransform.redactAll[Any]() forAll { obj: JsObject => val redacted = obj.transformAs[Any] verifyAllRedacted(Seq(__ -> redacted)) } } }
Example 130
Source File: JsonTransformSpec.scala From play-json-ops with MIT License | 5 votes |
package play.api.libs.json.ops.v4 import org.scalatest.flatspec.AnyFlatSpec import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks._ import play.api.libs.json._ import play.api.libs.json.scalacheck.JsValueGenerators import scala.annotation.tailrec import scala.util.Random class JsonTransformSpec extends AnyFlatSpec with CompatibilityImplicits with JsValueGenerators { @tailrec private def verifyAllRedacted(all: Seq[(JsPath, JsValue)]): Unit = { val invalid = all collect { case (path, value) if value != JsonTransform.RedactedValue => path } assert(invalid.isEmpty, s"The following paths are invalid: ${invalid.mkString(", ")}") val nextGen = all flatMap { case (path, JsArray(items)) => items.zipWithIndex map { case (item, i) => (JsPath(path.path :+ IdxPathNode(i)), item) } case (path, JsObject(fields)) => fields map { case (k, v) => (path \ k, v) } case _ => Nil } if (nextGen.nonEmpty) { verifyAllRedacted(nextGen) } } "redactPaths" should "redact selected fields by path at the top level" in { forAll { obj: JsObject => val topLevelPaths: Seq[JsPath] = obj.fields.map(__ \ _._1) whenever(topLevelPaths.nonEmpty) { val redactedPaths: Seq[JsPath] = Random.shuffle(topLevelPaths) take Random.nextInt(topLevelPaths.size) implicit val redactor: JsonTransform[Any] = JsonTransform.redactPaths[Any](redactedPaths) val redacted = obj.transformAs[Any] // Useful for debugging // if (redactedPaths.nonEmpty) { // println(Json.prettyPrint(obj)) // println(s"with redacted paths (${redactedPaths.mkString(", ")}):") // println(Json.prettyPrint(redacted)) // } for (path <- redactedPaths) { assertResult(JsonTransform.RedactedValue) { path.asSingleJson(redacted).get } } } } } "redactAll" should "redact all fields of all paths" in { implicit val redactor: JsonTransform[Any] = JsonTransform.redactAll[Any]() forAll { obj: JsObject => val redacted = obj.transformAs[Any] verifyAllRedacted(Seq(__ -> redacted)) } } }
Example 131
Source File: JsonTransformSpec.scala From play-json-ops with MIT License | 5 votes |
package play.api.libs.json.ops.v4 import org.scalatest.flatspec.AnyFlatSpec import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks._ import play.api.libs.json._ import play.api.libs.json.scalacheck.JsValueGenerators import scala.annotation.tailrec import scala.util.Random class JsonTransformSpec extends AnyFlatSpec with CompatibilityImplicits with JsValueGenerators { @tailrec private def verifyAllRedacted(all: Seq[(JsPath, JsValue)]): Unit = { val invalid = all collect { case (path, value) if value != JsonTransform.RedactedValue => path } assert(invalid.isEmpty, s"The following paths are invalid: ${invalid.mkString(", ")}") val nextGen = all flatMap { case (path, JsArray(items)) => items.zipWithIndex map { case (item, i) => (JsPath(path.path :+ IdxPathNode(i)), item) } case (path, JsObject(fields)) => fields map { case (k, v) => (path \ k, v) } case _ => Nil } if (nextGen.nonEmpty) { verifyAllRedacted(nextGen) } } "redactPaths" should "redact selected fields by path at the top level" in { forAll { obj: JsObject => val topLevelPaths: Seq[JsPath] = obj.fields.map(__ \ _._1).toSeq whenever(topLevelPaths.nonEmpty) { val redactedPaths: Seq[JsPath] = Random.shuffle(topLevelPaths) take Random.nextInt(topLevelPaths.size) implicit val redactor: JsonTransform[Any] = JsonTransform.redactPaths[Any](redactedPaths) val redacted = obj.transformAs[Any] // Useful for debugging // if (redactedPaths.nonEmpty) { // println(Json.prettyPrint(obj)) // println(s"with redacted paths (${redactedPaths.mkString(", ")}):") // println(Json.prettyPrint(redacted)) // } for (path <- redactedPaths) { assertResult(JsonTransform.RedactedValue) { path.asSingleJson(redacted).get } } } } } "redactAll" should "redact all fields of all paths" in { implicit val redactor: JsonTransform[Any] = JsonTransform.redactAll[Any]() forAll { obj: JsObject => val redacted = obj.transformAs[Any] verifyAllRedacted(Seq(__ -> redacted)) } } }
Example 132
Source File: DataGen.scala From chinese-restaurant-process with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.monsanto.stats.tables.clustering import scala.util.Random object DataGen { private def cannedDataFrom(csvFile: String): scala.collection.immutable.Vector[TopicVectorInput] = { scala.io.Source.fromFile(csvFile).getLines.filter(!_.isEmpty).map { line => val tokens: List[String] = line.split(", ").toList val id = tokens.head.toLong val vecMap: Map[Int, Int] = Map.empty ++ tokens.tail.grouped(2).map { slice => (slice(0).toInt, slice(1).toInt) } TopicVectorInput(id, vecMap) }.toVector } def cannedBigData: scala.collection.immutable.Vector[TopicVectorInput] = cannedDataFrom("canned-data/big-data.csv") // Parens because not functional, has the side affect of generating random numbers def getData(): scala.collection.immutable.Vector[TopicVectorInput] = getDataWithRnd(new Random) def getDataWithRnd(rnd: Random): scala.collection.immutable.Vector[TopicVectorInput] = { import breeze.linalg._ import breeze.stats.distributions._ val numberOfBags = 100000 val vocabularySize = 10000 val tablesCount = 10 val minTableSize = 100 def halves(itemsRemaining: Int, acc: List[Int]): List[Int] = { val newBinSize = itemsRemaining / 2 if(newBinSize <= minTableSize){ itemsRemaining :: acc } else { halves(itemsRemaining - newBinSize, newBinSize :: acc) } } val tablesSizes = halves(numberOfBags, Nil) val sm = 0.0001 val countOfInterestsDist = new Exponential(1/10.0) val topicInterestLevelDist = new Exponential(1/100.0) def gimmieInterests(): DenseVector[Double] = { // Returns interests for one table, length 10,000 val countOfInterests = (countOfInterestsDist.draw() + 1).toInt val interestProbs = Array.fill(vocabularySize)(sm) // size 10,000 array, filled initially with .0001 (0 to countOfInterests).foreach{ _ => // countOfInterests is exponentially distributed interestProbs(rnd.nextInt(vocabularySize)) = topicInterestLevelDist.draw() + 10 } val normalizingConstant = interestProbs.sum.toDouble DenseVector(interestProbs.map( _ / normalizingConstant)) // now they sum to 1 } val tableTopicsDistributions = Array.fill(tablesCount)(Multinomial(gimmieInterests())) // same as xmasM, reguM... val instancePerPersonDist = Gaussian(400, 100) def gimmieAPerson(tableIndex: Long, m: Multinomial[DenseVector[Double],Int]): TopicVectorInput = { // like a bag val instanceCount = Math.abs(instancePerPersonDist.draw()).toInt val instanceTopicIndexes: Map[Int, Int] = Array.fill(instanceCount)(m.draw()).groupBy(i => i).mapValues(_.length) TopicVectorInput(tableIndex, instanceTopicIndexes) } var i = 0L val topicVectors: scala.collection.immutable.Vector[TopicVectorInput] = tablesSizes.zipWithIndex.flatMap { case (tableSize, tableIdx) => Array.fill(tableSize){ gimmieAPerson(tableIdx, tableTopicsDistributions(tableIdx)) } }.map { tv => i += 1 tv.copy(id = i) }.toVector println("###topicVectors.map(_.id).distinct.length: " + topicVectors.map(_.id).distinct.length) assert(topicVectors.map(_.id).distinct.length == 100000) topicVectors } }
Example 133
Source File: BoardStateTests.scala From checkers with Apache License 2.0 | 5 votes |
package checkers.core import checkers.consts._ import checkers.test.TestSuiteBase import utest._ import utest.framework._ import scala.util.Random object BoardStateTests extends TestSuiteBase { private val allPieces = List(LIGHTMAN, DARKMAN, LIGHTKING, DARKKING) private val allSquares = Board.playableSquares.toSet private def shuffledSquares() = Random.shuffle(Board.playableSquares.toList) private def randomSquares(count: Int) = shuffledSquares().take(count) override def tests: Tree[Test] = TestSuite { 'BoardState { 'PlacePieces { val squares = randomSquares(4) val placements = squares.zip(allPieces) val bs = placements.foldLeft(BoardState.empty){ case (result, (square, piece)) => result.updated(square, piece) } // pieces in correct place placements.foreach { case (square, piece) => val occupant = bs.getOccupant(square) assert(occupant == piece) } // all other squares are empty (allSquares -- squares.toSet).foreach { square => assert(bs.isSquareEmpty(square)) } } } } }
Example 134
package benchmarks import scala.annotation.tailrec import scala.util.Random import java.util.concurrent.ExecutorService trait Gen[T] { def apply(dist: List[(Gen.Op, Int)])(implicit s: ExecutorService): T = { val depth = 100 val rnd = new Random(1) import rnd._ val values = dist.collect { case (g: Gen.Value, i) => (g, i) } val transforms = dist.collect { case (g: Gen.Transform, i) => (g, i) } require(values.nonEmpty) def choose[O <: Gen.Op](l: List[(O, Int)]): O = { @tailrec def find(n: Int, prev: Int, l: List[(O, Int)]): O = { l match { case Nil => ??? case (o, i) :: tail => if (prev + i > n) o else find(n, prev + i, tail) } } val max = l.map(_._2).sum find(nextInt(max), 0, l) } val ex = new Exception def genValue: T = choose(values) match { case Gen.Async => async(s.submit(_)) case Gen.Sync => sync case Gen.Failure => failure(ex) } def genTransform(depth: Int, t: T): T = depth match { case 0 => t case _ => choose(transforms) match { case Gen.Map => val i = nextInt genTransform(depth - 1, map(t, _ + i)) case Gen.FlatMap => val d = nextInt(depth) val n = genTransform(depth - d, genValue) genTransform(d, flatMap(t, n)) case Gen.Handle => val i = nextInt genTransform(depth - 1, handle(t, i)) } } genTransform(depth, genValue) } def sync: T def async(schedule: Runnable => Unit): T def failure(ex: Throwable): T def map(t: T, f: Int => Int): T def flatMap(t: T, f: T): T def handle(t: T, i: Int): T } object Gen { sealed trait Op sealed trait Value extends Op case object Async extends Value case object Sync extends Value case object Failure extends Value sealed trait Transform extends Op case object Map extends Transform case object FlatMap extends Transform case object Handle extends Transform }
Example 135
Source File: ProduceDemo.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.web.guide.views.frontend.demos import com.github.ghik.silencer.silent import io.udash.web.guide.demos.AutoDemo import io.udash.web.guide.styles.partials.GuideStyles import scalatags.JsDom.all._ object ProduceDemo extends AutoDemo { private val (rendered, source) = { import io.udash._ import io.udash.css.CssView._ import org.scalajs.dom.window import scalatags.JsDom.all._ import scala.util.Random @silent("deprecated") val names = Stream.continually(Stream("John", "Amy", "Bryan", "Diana")).flatten.iterator val name = Property(names.next()) val integers = SeqProperty(1, 2, 3, 4) window.setInterval(() => { name.set(names.next()) val size = integers.get.size val idx = Random.nextInt(size) val amount = Random.nextInt(size - idx) + 1 val count = Random.nextInt(5) integers.replace(idx, amount, Stream.range(idx, idx + amount * count + 1, amount): _*): @silent("deprecated") }, 2000) p( "Name: ", produce(name)(value => b(id := "produce-demo-name")(value).render), br, "Integers: ", span(id := "produce-demo-integers")( produce(integers)(seq => span(GuideStyles.highlightRed)(seq.mkString(",")).render) ) ).render }.withSourceCode override protected def demoWithSource(): (Modifier, Iterator[String]) = { import io.udash.css.CssView._ ( div( id := "produce-demo", GuideStyles.frame )(rendered), source.linesIterator ) } }
Example 136
Source File: RepeatDemo.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.web.guide.views.frontend.demos import com.github.ghik.silencer.silent import io.udash.web.guide.demos.AutoDemo import io.udash.web.guide.styles.partials.GuideStyles import scalatags.JsDom.all._ object RepeatDemo extends AutoDemo { private val (rendered, source) = { import io.udash._ import io.udash.css.CssView._ import org.scalajs.dom.window import scalatags.JsDom.all._ import scala.util.Random val integers = SeqProperty(1, 2, 3, 4) window.setInterval(() => { val size = integers.get.size val idx = Random.nextInt(size) val amount = Random.nextInt(size - idx) + 1 val count = Random.nextInt(5) integers.replace(idx, amount, Stream.range(idx, idx + amount * count + 1, amount): _*): @silent("deprecated") }, 2000) p( "Integers: ", span(id := "repeat-demo-integers")(repeat(integers)(p => span(GuideStyles.highlightRed)(s"${p.get}, ").render )), br, "Integers (produce): ", produce(integers)(seq => span(id := "repeat-demo-integers-produce")( seq.map(p => span(GuideStyles.highlightRed)(s"$p, ")) ).render) ) }.withSourceCode override protected def demoWithSource(): (Modifier, Iterator[String]) = { import io.udash.css.CssView._ (div(id := "repeat-demo", GuideStyles.frame)(rendered), source.linesIterator) } }
Example 137
Source File: TableDemo.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.web.guide.views.ext.demo.bootstrap import io.udash.css.CssView import io.udash.web.guide.demos.AutoDemo import io.udash.web.guide.styles.partials.GuideStyles import scalatags.JsDom.all._ object TableDemo extends AutoDemo with CssView { private val (rendered, source) = { import io.udash._ import io.udash.bootstrap._ import BootstrapStyles.ResponsiveBreakpoint import io.udash.bootstrap.button.{UdashButton, UdashButtonGroup} import io.udash.bootstrap.table.UdashTable import scalatags.JsDom.all._ import scala.util.Random val responsive = Property[Option[ResponsiveBreakpoint]](Some(ResponsiveBreakpoint.All)) val dark = Property(false) val striped = Property(true) val bordered = Property(true) val hover = Property(true) val small = Property(false) val darkButton = UdashButton.toggle(active = dark)("Dark theme") val stripedButton = UdashButton.toggle(active = striped)("Striped") val borderedButton = UdashButton.toggle(active = bordered)("Bordered") val hoverButton = UdashButton.toggle(active = hover)("Hover") val smallButton = UdashButton.toggle(active = small)("Small") val items = SeqProperty(Seq.fill(7, 3)(Random.nextDouble())) val table = UdashTable(items, responsive, dark, striped, bordered, hover, small)( headerFactory = Some(_ => tr(Seq("x", "y", "z").map(header => th(b(header)))).render), rowFactory = (el, nested) => tr( nested(produce(el)(_.map(td(_).render))) ).render ) div( UdashButtonGroup(justified = true.toProperty)( darkButton.render, stripedButton.render, borderedButton.render, hoverButton.render, smallButton.render ), table ).render }.withSourceCode override protected def demoWithSource(): (Modifier, Iterator[String]) = { (rendered.setup(_.applyTags(GuideStyles.frame)), source.linesIterator) } }
Example 138
Source File: AlertsDemo.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.web.guide.views.ext.demo.bootstrap import io.udash.web.guide.demos.AutoDemo import io.udash.web.guide.styles.partials.GuideStyles import scalatags.JsDom.all._ object AlertsDemo extends AutoDemo { private val (rendered, source) = { import io.udash._ import io.udash.bootstrap._ import BootstrapStyles._ import io.udash.bootstrap.alert._ import io.udash.bootstrap.button.UdashButton import io.udash.css.CssView._ import org.scalajs.dom.Element import scalatags.JsDom.all._ import scala.util.Random val dismissed = SeqProperty.blank[String] def contentCentered: Seq[Modifier] = { Seq(Display.flex(), Flex.justifyContent(FlexContentJustification.Center)) } def randomDismissible: Element = { val title = Random.nextLong().toString val alert = DismissibleUdashAlert( alertStyle = Color.values( Random.nextInt(Color.values.size) ).toProperty )(div(title, contentCentered)) alert.dismissed.listen(_ => dismissed.append(title)) alert.render } val alerts = div()( UdashAlert(Color.Info.toProperty)(div("info", contentCentered)), UdashAlert(Color.Success.toProperty)(div("success", contentCentered)), UdashAlert(Color.Warning.toProperty)(div("warning", contentCentered)), UdashAlert(Color.Danger.toProperty)(div("danger", contentCentered)) ).render val create = UdashButton()("Create dismissible alert") create.listen { case _ => alerts.appendChild(randomDismissible) } div( alerts, create, div(Spacing.margin( side = Side.Top, size = SpacingSize.Normal ))( h4("Dismissed: "), div(Card.card, Card.body, Background.color(Color.Light))( produce(dismissed)(seq => ul(seq.map(li(_))).render) ) ) ).render }.withSourceCode override protected def demoWithSource(): (Modifier, Iterator[String]) = { import io.udash.css.CssView._ (rendered.setup(_.applyTags(GuideStyles.frame)), source.linesIterator) } }
Example 139
Source File: ButtonsDemo.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.web.guide.views.ext.demo.bootstrap import io.udash.web.guide.demos.AutoDemo import io.udash.web.guide.styles.partials.GuideStyles import scalatags.JsDom.all._ object ButtonsDemo extends AutoDemo { private val (rendered, source) = { import io.udash._ import io.udash.bootstrap._ import BootstrapStyles._ import io.udash.bootstrap.button.UdashButton import io.udash.css.CssStyle import io.udash.css.CssView._ import scalatags.JsDom.all._ import scala.util.Random val smallBtn = Some(Size.Small).toProperty[Option[Size]] val disabledButtons = Property(Set.empty[Int]) def bottomMargin: CssStyle = { Spacing.margin( side = Side.Bottom, size = SpacingSize.Normal ) } val buttons = Color.values.map(color => UdashButton( color.toProperty, smallBtn, disabled = disabledButtons.transform(_.contains(color.ordinal)) )(_ => Seq[Modifier]( color.name, Spacing.margin(size = SpacingSize.ExtraSmall) )) ) val clicks = SeqProperty[String](Seq.empty) buttons.foreach(_.listen { case UdashButton.ButtonClickEvent(source, _) => clicks.append(source.render.textContent) }) val push = UdashButton( size = Some(Size.Large).toProperty, block = true.toProperty )("Disable random buttons!") push.listen { case UdashButton.ButtonClickEvent(_, _) => clicks.set(Seq.empty) val maxDisabledCount = Random.nextInt(buttons.size + 1) disabledButtons.set(Seq.fill(maxDisabledCount)( Random.nextInt(buttons.size) ).toSet) } div( div(bottomMargin)(push), div( Display.flex(), Flex.justifyContent(FlexContentJustification.Center), bottomMargin )(buttons), h4("Clicks: "), produce(clicks)(seq => ul(Card.card, Card.body, Background.color(Color.Light))(seq.map(li(_))).render ) ).render }.withSourceCode override protected def demoWithSource(): (Modifier, Iterator[String]) = { import io.udash.css.CssView._ (rendered.setup(_.applyTags(GuideStyles.frame)), source.linesIterator) } }
Example 140
Source File: CarouselDemo.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.web.guide.views.ext.demo.bootstrap import io.udash.css.CssView import io.udash.web.guide.demos.AutoDemo import io.udash.web.guide.styles.partials.GuideStyles import scalatags.JsDom.all._ object CarouselDemo extends AutoDemo with CssView { private val (rendered, source) = { import io.udash._ import io.udash.bootstrap._ import io.udash.bootstrap.button.{UdashButton, UdashButtonGroup, UdashButtonToolbar} import io.udash.bootstrap.carousel.UdashCarousel.AnimationOptions import io.udash.bootstrap.carousel.{UdashCarousel, UdashCarouselSlide} import scalatags.JsDom.all._ import scala.concurrent.duration._ import scala.util.Random def randomString(): String = { Random.nextLong().toString } def newSlide(): UdashCarouselSlide = { UdashCarouselSlide( Url("/assets/images/ext/bootstrap/carousel.jpg") )( h3(randomString()), p(randomString()) ) } val slides = SeqProperty((1 to 5).map(_ => newSlide())) val active = Property(false) val animationOptions = active.transform(a => AnimationOptions( interval = 2.seconds, keyboard = false, active = a )) val carousel = UdashCarousel( slides = slides, activeSlide = Property(1), animationOptions = animationOptions ) { case (slide, nested) => nested(produce(slide)(_.render)) } val prevButton = UdashButton()("Prev") val nextButton = UdashButton()("Next") val prependButton = UdashButton()("Prepend") val appendButton = UdashButton()("Append") prevButton.listen { case _ => carousel.previousSlide() } nextButton.listen { case _ => carousel.nextSlide() } prependButton.listen { case _ => slides.prepend(newSlide()) } appendButton.listen { case _ => slides.append(newSlide()) } div( div( UdashButtonToolbar()( UdashButton.toggle(active = active)( "Run animation" ).render, UdashButtonGroup()( prevButton.render, nextButton.render ).render, UdashButtonGroup()( prependButton.render, appendButton.render ).render ) ), div(carousel.render) ).render }.withSourceCode override protected def demoWithSource(): (Modifier, Iterator[String]) = { (rendered.setup(_.applyTags(GuideStyles.frame)), source.linesIterator) } }
Example 141
Source File: SerializationIntegrationTestBase.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.rpc import com.avsystem.commons.serialization.{GenCodec, Input, Output} import io.udash.testing.UdashSharedTest import org.scalactic.source.Position import scala.util.Random class SerializationIntegrationTestBase extends UdashSharedTest with Utils { val repeats = 1000 def tests(implicit pos: Position): Unit = { "serialize and deserialize all types" in { for (i <- 1 to repeats) { def cc() = TestCC(Random.nextInt(), Random.nextLong(), 123, Random.nextBoolean(), Random.nextString(200), List.fill(Random.nextInt(200))('a')) def ncc() = NestedTestCC(Random.nextInt(), cc(), cc()) def dncc(counter: Int = 0): DeepNestedTestCC = if (counter < 200) DeepNestedTestCC(ncc(), dncc(counter + 1)) else DeepNestedTestCC(ncc(), null) val test: DeepNestedTestCC = dncc() val serialized = write(test) val deserialized = read[DeepNestedTestCC](serialized) deserialized should be(test) } } "serialize and deserialize types with custom gencodec" in { implicit def optionGencodec[T: GenCodec]: GenCodec[Option[T]] = new GenCodec[Option[T]] { override def write(output: Output, value: Option[T]): Unit = value match { case Some(v) => implicitly[GenCodec[T]].write(output, v) case None => output.writeNull() } override def read(input: Input): Option[T] = if (input.readNull()) None else Some(implicitly[GenCodec[T]].read(input)) } val testOpts = Seq( None, Some(10L), Some(Long.MaxValue) ) testOpts.foreach(opt => { val serialized = write(opt) val deserialized = read[Option[Long]](serialized) deserialized should be(opt) }) } } }
Example 142
Source File: Utils.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.rpc import com.avsystem.commons.serialization._ import com.avsystem.commons.serialization.json.{JsonStringInput, JsonStringOutput} import scala.util.Random trait Utils { def completeItem() = CompleteItem( unit = (), string = Random.nextString(Random.nextInt(20)), specialString = "\n\f\b\t\r\n\\\"\\\\", char = Random.nextString(1).head, boolean = Random.nextBoolean(), byte = Random.nextInt().toByte, short = Random.nextInt().toShort, int = Random.nextInt(), long = Random.nextLong(), float = Random.nextFloat(), double = Random.nextDouble(), binary = Array.fill(Random.nextInt(20))(Random.nextInt().toByte), list = List.fill(Random.nextInt(20))(Random.nextString(Random.nextInt(20))), set = List.fill(Random.nextInt(20))(Random.nextString(Random.nextInt(20))).toSet, obj = TestCC(Random.nextInt(), Random.nextLong(), Random.nextInt(), Random.nextBoolean(), Random.nextString(Random.nextInt(20)), Nil), map = Map(Seq.fill(Random.nextInt(20))(Random.nextString(20) -> Random.nextInt()): _*) ) implicit val codec: GenCodec[TestCC] = GenCodec.materialize[TestCC] implicit val codecN: GenCodec[NestedTestCC] = GenCodec.materialize[NestedTestCC] implicit val codecDN: GenCodec[DeepNestedTestCC] = new GenCodec[DeepNestedTestCC] { override def read(input: Input): DeepNestedTestCC = { def _read(acc: List[NestedTestCC])(next: Input): DeepNestedTestCC = if (next.readNull()) { acc.foldLeft(null: DeepNestedTestCC)((acc: DeepNestedTestCC, n: NestedTestCC) => DeepNestedTestCC(n, acc)) } else { val obj = next.readObject() val n: NestedTestCC = obj.nextField() match { case in if in.fieldName == "n" => codecN.read(in) } obj.nextField() match { case in if in.fieldName == "nest" => _read(n :: acc)(in) } } _read(Nil)(input) } override def write(output: Output, value: DeepNestedTestCC): Unit = { val obj = output.writeObject() codecN.write(obj.writeField("n"), value.n) val f = obj.writeField("nest") if (value.l != null) this.write(f, value.l) else f.writeNull() obj.finish() } } implicit val codecCI: GenCodec[CompleteItem] = GenCodec.materialize[CompleteItem] def write[T: GenCodec](value: T): JsonStr = JsonStr(JsonStringOutput.write(value)) def read[T: GenCodec](jsonStr: JsonStr): T = JsonStringInput.read[T](jsonStr.json) }
Example 143
Source File: TooltipTestUtils.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.bootstrap.tooltip import io.udash._ import io.udash.testing.AsyncUdashCoreFrontendTest import scala.concurrent.Future import scala.util.Random class TooltipTestUtils extends AsyncUdashCoreFrontendTest { def tooltipTest(companion: TooltipUtils[_ <: Tooltip], expectContent: Boolean): Unit = { "display translated content" in { import io.udash.i18n._ import io.udash.wrappers.jquery._ import scalatags.JsDom.all._ val body = jQ("body") val item = button("btn").render body.append(item) val randMarker = Random.nextInt() implicit val lang = Property(Lang("test")) implicit val tp = new LocalTranslationProvider( Map( Lang("test") -> Bundle(BundleHash("h"), Map("a" -> s"$randMarker:AAA", "b" -> s"$randMarker:BBB")), Lang("test2") -> Bundle(BundleHash("h"), Map("a" -> s"$randMarker:ccc", "b" -> s"$randMarker:ddd")) ) ) val tooltip = companion.apply( title = span(translatedDynamic(TranslationKey.key("a"))(_.apply())).render, content = span(translatedDynamic(TranslationKey.key("b"))(_.apply())).render )(item) def expectedText(): String = if (expectContent) s"$randMarker:AAA$randMarker:BBB" else s"$randMarker:AAA" def secondExpectedText(): String = if (expectContent) s"$randMarker:ccc$randMarker:ddd" else s"$randMarker:ccc" body.text() shouldNot include(expectedText()) body.text() shouldNot include(secondExpectedText()) for { _ <- Future(tooltip.show()) _ <- retrying { body.text() should include(expectedText()) body.text() shouldNot include(secondExpectedText()) } _ <- Future(tooltip.hide()) _ <- retrying { body.text() shouldNot include(expectedText()) body.text() shouldNot include(secondExpectedText()) } _ <- Future(lang.set(Lang("test2"))) _ <- Future(tooltip.show()) _ <- retrying { body.text() shouldNot include(expectedText()) body.text() should include(secondExpectedText()) } _ <- Future(tooltip.hide()) r <- retrying { body.text() shouldNot include(expectedText()) body.text() shouldNot include(secondExpectedText()) } } yield r } } }
Example 144
Source File: BenchmarkUtils.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.benchmarks.properties import com.avsystem.commons._ import io.udash._ import japgolly.scalajs.benchmark.Benchmark import scala.util.Random trait BenchmarkUtils { case class ModelItem(i: Int, s: String, sub: Option[ModelItem]) object ModelItem extends HasModelPropertyCreator[ModelItem] { def random: ModelItem = ModelItem( Random.nextInt(100), Random.nextString(5), Some(ModelItem(Random.nextInt(100), Random.nextString(5), None)) ) } sealed trait ModelWithSeqItem { def i: Int def s: String def seq: BSeq[Int] } case class ModelWithBSeqItem(i: Int, s: String, seq: BSeq[Int]) extends ModelWithSeqItem object ModelWithBSeqItem extends HasModelPropertyCreator[ModelWithBSeqItem] { def random: ModelWithBSeqItem = ModelWithBSeqItem( Random.nextInt(100), Random.nextString(5), 1 to Random.nextInt(100) + 100 ) } case class ModelWithISeqItem(i: Int, s: String, seq: ISeq[Int]) extends ModelWithSeqItem object ModelWithISeqItem extends HasModelPropertyCreator[ModelWithISeqItem] { def random: ModelWithISeqItem = ModelWithISeqItem( Random.nextInt(100), Random.nextString(5), 1 to Random.nextInt(100) + 100 ) } def slowInc(v: Int): Int = { var r = v (1 to 10000).foreach(_ => r += 1) r } def slowDec(v: Int): Int = { var r = v (1 to 10000).foreach(_ => r -= 1) r } def addEmptyListeners[T](p: T)(count: Int, listenOp: T => Unit): Unit = { (1 to count).foreach(_ => listenOp(p)) } def setAndGetValues[T1, T2](p: T1, t: T2)(count: Int, getToSetRatio: Double, setOp: (T1, Int) => Unit, getOp: T2 => Any): Unit = { var counter: Double = 0 (1 to count).foreach { i => setOp(p, i) counter += getToSetRatio while (counter >= 1) { getOp(t) counter -= 1 } } } def replaceElements(p: SeqProperty[Int], i: Int): Unit = { val start = Random.nextInt(p.size / 2) val count = Random.nextInt(p.size / 3) p.replace(start, count, Seq.tabulate(count)(_ + i): _*) } def generateGetSetListenBenchmarks[T1, T2](properties: Seq[(String, () => (T1, T2))])( setsCounts: Seq[Int], getToSetRatios: Seq[Double], listenersCounts: Seq[Int], setAndGetOps: Seq[(String, (T1, Int) => Unit, T2 => Any)], listenOps: Seq[(String, T2 => Unit)] ): Seq[Benchmark[Unit]] = { var id = 0 for { propertyCreator <- properties setAndGetOp <- setAndGetOps listenOp <- listenOps listenersCount <- listenersCounts setsCount <- setsCounts getToSetRatio <- getToSetRatios } yield { val (propertiesDesc, props) = propertyCreator val (setAndGetDesc, setter, getter) = setAndGetOp val (listenerDesc, listener) = listenOp id += 1 Benchmark(s"${"%03d".format(id)}. set and get ($setsCount and ${setsCount * getToSetRatio} times - $setAndGetDesc) on $propertiesDesc with $listenersCount listeners ($listenerDesc)") { val (p, t) = props() addEmptyListeners(t)(listenersCount, listener) setAndGetValues(p, t)(setsCount, getToSetRatio, setter, getter) } } } }
Example 145
Source File: GroupedButtonsBinding.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.bindings.inputs import io.udash._ import org.scalajs.dom.html.{Div, Input => JSInput} import org.scalajs.dom.{Event, Node} import scalatags.JsDom.all._ import scala.util.Random private[inputs] class GroupedButtonsBinding[T]( options: ReadableSeqProperty[T], decorator: Seq[(JSInput, T)] => Seq[Node], inputModifiers: Modifier* )( inputTpe: String, checkedIf: T => ReadableProperty[Boolean], refreshSelection: Seq[T] => Unit, onChange: (JSInput, T) => Event => Unit ) extends InputBinding[Div] { private val groupIdPrefix: Long = Random.nextLong private val buttons = div( produce(options) { opts => kill() refreshSelection(opts) decorator( opts.zipWithIndex.map { case (opt, idx) => val in = input( id := s"$groupIdPrefix-$idx", // default id, can be replaced by `inputModifiers` inputModifiers, tpe := inputTpe, value := idx.toString ).render val selected = checkedIf(opt) propertyListeners += selected.listen(in.checked = _, initUpdate = true) in.onchange = onChange(in, opt) (in, opt) } ) } ).render override def render: Div = buttons }
Example 146
Source File: RetryPolicies.scala From cats-retry with Apache License 2.0 | 5 votes |
package retry import java.util.concurrent.TimeUnit import cats.Applicative import cats.syntax.functor._ import cats.syntax.show._ import cats.instances.finiteDuration._ import cats.instances.int._ import retry.PolicyDecision._ import scala.concurrent.duration.{Duration, FiniteDuration} import scala.util.Random object RetryPolicies { private val LongMax: BigInt = BigInt(Long.MaxValue) def limitRetriesByCumulativeDelay[M[_]: Applicative]( threshold: FiniteDuration, policy: RetryPolicy[M] ): RetryPolicy[M] = { def decideNextRetry(status: RetryStatus): M[PolicyDecision] = policy.decideNextRetry(status).map { case r @ DelayAndRetry(delay) => if (status.cumulativeDelay + delay >= threshold) GiveUp else r case GiveUp => GiveUp } RetryPolicy.withShow[M]( decideNextRetry, show"limitRetriesByCumulativeDelay(threshold=$threshold, $policy)" ) } }
Example 147
Source File: JavaIdentifierSpec.scala From tscfg with Apache License 2.0 | 5 votes |
package tscfg import org.specs2.mutable.Specification import org.specs2.specification.core.Fragments import tscfg.generators.java.javaUtil.{javaKeywords, javaIdentifier} import scala.util.Random object javaIdentifierSpec extends Specification { """javaIdentifier""" should { List("foo", "bar_3", "$baz").foldLeft(Fragments.empty) { (res, id) => res.append(s"""keep valid identifier "$id"""" in { javaIdentifier(id) must_== id }) } Random.shuffle(javaKeywords).take(3).foldLeft(Fragments.empty) { (res, kw) => res.append(s"""convert java keyword "$kw" to "${kw}_"""" in { javaIdentifier(kw) must_== kw + "_" }) } List("foo-bar", "foo:bar", "foo#bar").foldLeft(Fragments.empty) { (res, id) => res.append(s"""replace non java id character with '_': "$id" -> "foo_bar"""" in { javaIdentifier(id) must_== "foo_bar" }) } s"""prefix with '_' if first character is valid but not at first position: "21" -> "_21"""" in { javaIdentifier("21") must_== "_21" } } }
Example 148
Source File: scalaIdentifierSpec.scala From tscfg with Apache License 2.0 | 5 votes |
package tscfg import org.specs2.mutable.Specification import org.specs2.specification.core.Fragments import tscfg.generators.scala.ScalaUtil import tscfg.generators.scala.ScalaUtil.scalaReservedWords import scala.util.Random object scalaIdentifierSpec extends Specification { """scalaIdentifier""" should { val scalaUtil: ScalaUtil = new ScalaUtil() import scalaUtil.scalaIdentifier List("foo", "bar_3", "$baz").foldLeft(Fragments.empty) { (res, id) => res.append(s"""keep valid identifier "$id"""" in { scalaIdentifier(id) must_== id }) } Random.shuffle(scalaReservedWords).take(3).foldLeft(Fragments.empty) { (res, w) => val e = "`" +w + "`" res.append(s"""convert scala reserved word "$w" to "$e"""" in { scalaIdentifier(w) must_== e }) } List("foo-bar", "foo:bar", "foo#bar").foldLeft(Fragments.empty) { (res, id) => res.append(s"""replace non scala id character with '_': "$id" -> "foo_bar"""" in { scalaIdentifier(id) must_== "foo_bar" }) } s"""prefix with '_' if first character is valid but not at first position: "21" -> "_21"""" in { scalaIdentifier("21") must_== "_21" } } """scalaIdentifier with useBackticks=true""" should { val scalaUtil: ScalaUtil = new ScalaUtil(useBackticks = true) import scalaUtil.scalaIdentifier List("foo-bar", "foo:bar", "foo#bar").foldLeft(Fragments.empty) { (res, id) => res.append(s"""put non scala id with backticks: "$id" -> "`$id`"""" in { scalaIdentifier(id) must_== s"`$id`" }) } List("0", "1", "3").foldLeft(Fragments.empty) { (res, id) => res.append(s"""put literal number with backticks: "$id" -> "`$id`"""" in { scalaIdentifier(id) must_== s"`$id`" }) } } }
Example 149
Source File: PutRecordAction.scala From aws-kinesis-scala with Apache License 2.0 | 5 votes |
package jp.co.bizreach.kinesisfirehose.action import com.amazonaws.retry.PredefinedRetryPolicies.DEFAULT_MAX_ERROR_RETRY import com.amazonaws.services.kinesisfirehose.model.ServiceUnavailableException import jp.co.bizreach.kinesisfirehose._ import org.slf4j.LoggerFactory import scala.annotation.tailrec import scala.collection.mutable.ArrayBuffer import scala.math._ import scala.util.Random trait PutRecordAction { private val logger = LoggerFactory.getLogger(getClass) def withPutBatchRetry(records: Seq[Array[Byte]], retryLimit: Int = DEFAULT_MAX_ERROR_RETRY) (f: Seq[Array[Byte]] => PutRecordBatchResult): Seq[Either[PutRecordBatchResponseEntry, PutRecordBatchResponseEntry]] = { val buffer = ArrayBuffer[Either[PutRecordBatchResponseEntry, PutRecordBatchResponseEntry]](Nil.padTo(records.size, null): _*) @tailrec def put0(records: Seq[(Array[Byte], Int)], retry: Int = 0): Unit = { val result = f(records.map(_._1)) val failed = records zip result.records flatMap { case ((_, i), entry) if Option(entry.errorCode).isEmpty => buffer(i) = Right(entry) None case ((record, i), entry) => buffer(i) = Left(entry) Some(record -> i) } // success, or exceed the upper limit of the retry if (failed.isEmpty || retry >= retryLimit) () // retry else { Thread.sleep(sleepDuration(retry, retryLimit)) logger.warn(s"Retrying the put requests. Retry count: ${retry + 1}") put0(failed, retry + 1) } } put0(records.zipWithIndex) buffer.toList } def withPutRetry(retryLimit: Int = DEFAULT_MAX_ERROR_RETRY) (f: => PutRecordResult): Either[Throwable, PutRecordResult] = { @tailrec def put0(retry: Int = 0): Either[Throwable, PutRecordResult] = { try Right(f) catch { case e: ServiceUnavailableException => if (retry >= retryLimit) Left(e) else { Thread.sleep(sleepDuration(retry, retryLimit)) logger.warn(s"Retrying the put request. Retry count: ${retry + 1}") put0(retry + 1) } } } put0() } protected def sleepDuration(retry: Int, retryLimit: Int): Long = { // scaling factor val d = 0.5 + Random.nextDouble() * 0.1 // possible seconds val durations = (0 until retryLimit).map(n => pow(2, n) * d) (durations(retry) * 1000).toLong } }
Example 150
Source File: BruteForceTicTacToeStrategy.scala From fx-tictactoe with Apache License 2.0 | 5 votes |
package net.ladstatt.tictactoe import scala.util.Random def determineMove(game: TicTacToe, potentialMoves: Seq[Seq[TMove]]): TMove = { // check if we could win with the next move val winningMove = game.lookAhead(PlayerB) if (winningMove.isDefined) { winningMove.get } else { // check if there is already an obvious threat from the opponent to win the game // if there is, we'll take the move val winningMoveForOpponent = game.lookAhead(PlayerA) if (winningMoveForOpponent.isDefined) { winningMoveForOpponent.get } else { // prefer the middle center f if (potentialMoves.exists { case moves => moves.drop(game.movesSoFar.length).head == MiddleCenter }) { MiddleCenter } else { // we take the shortest path to win val possibilities = potentialMoves.sortWith((a, b) => a.size < b.size) val aPathToWin = possibilities.head aPathToWin.drop(game.movesSoFar.length).head } } } } }
Example 151
Source File: WhiskAdminCliTestBase.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.core.database import akka.stream.ActorMaterializer import common.{StreamLogging, WskActorSystem} import org.rogach.scallop.throwError import org.scalatest.concurrent.ScalaFutures import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FlatSpec, Matchers} import org.apache.openwhisk.core.cli.{Conf, WhiskAdmin} import org.apache.openwhisk.core.database.test.DbUtils import org.apache.openwhisk.core.entity.WhiskAuthStore import scala.util.Random trait WhiskAdminCliTestBase extends FlatSpec with WskActorSystem with DbUtils with StreamLogging with BeforeAndAfterEach with BeforeAndAfterAll with ScalaFutures with Matchers { implicit val materializer = ActorMaterializer() //Bring in sync the timeout used by ScalaFutures and DBUtils implicit override val patienceConfig: PatienceConfig = PatienceConfig(timeout = dbOpTimeout) protected val authStore = WhiskAuthStore.datastore() //Ensure scalaop does not exit upon validation failure throwError.value = true override def afterEach(): Unit = { cleanup() } override def afterAll(): Unit = { println("Shutting down store connections") authStore.shutdown() super.afterAll() } protected def randomString(len: Int = 5): String = Random.alphanumeric.take(len).mkString protected def resultOk(args: String*): String = WhiskAdmin(new Conf(args.toSeq)) .executeCommand() .futureValue .right .get protected def resultNotOk(args: String*): String = WhiskAdmin(new Conf(args.toSeq)) .executeCommand() .futureValue .left .get .message }
Example 152
Source File: CosmosDBTestSupport.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.core.database.cosmosdb import com.microsoft.azure.cosmosdb.{Database, SqlParameter, SqlParameterCollection, SqlQuerySpec} import org.scalatest.{BeforeAndAfterAll, FlatSpecLike} import pureconfig._ import pureconfig.generic.auto._ import org.apache.openwhisk.core.ConfigKeys import org.apache.openwhisk.core.database.test.behavior.ArtifactStoreTestUtil.storeAvailable import scala.collection.mutable.ListBuffer import scala.util.{Random, Try} trait CosmosDBTestSupport extends FlatSpecLike with BeforeAndAfterAll with RxObservableImplicits { private val dbsToDelete = ListBuffer[Database]() lazy val storeConfigTry = Try { loadConfigOrThrow[CosmosDBConfig](ConfigKeys.cosmosdb) } lazy val client = storeConfig.createClient() val useExistingDB = java.lang.Boolean.getBoolean("whisk.cosmosdb.useExistingDB") def storeConfig = storeConfigTry.get override protected def withFixture(test: NoArgTest) = { assume(storeAvailable(storeConfigTry), "CosmosDB not configured or available") super.withFixture(test) } protected def generateDBName() = { s"travis-${getClass.getSimpleName}-${Random.alphanumeric.take(5).mkString}" } protected def createTestDB() = { if (useExistingDB) { val db = getOrCreateDatabase() println(s"Using existing database ${db.getId}") db } else { val databaseDefinition = new Database databaseDefinition.setId(generateDBName()) val db = client.createDatabase(databaseDefinition, null).blockingResult() dbsToDelete += db println(s"Created database ${db.getId}") db } } private def getOrCreateDatabase(): Database = { client .queryDatabases(querySpec(storeConfig.db), null) .blockingOnlyResult() .getOrElse { client.createDatabase(newDatabase, null).blockingResult() } } protected def querySpec(id: String) = new SqlQuerySpec("SELECT * FROM root r WHERE r.id=@id", new SqlParameterCollection(new SqlParameter("@id", id))) private def newDatabase = { val databaseDefinition = new Database databaseDefinition.setId(storeConfig.db) databaseDefinition } override def afterAll(): Unit = { super.afterAll() if (!useExistingDB) { dbsToDelete.foreach(db => client.deleteDatabase(db.getSelfLink, null).blockingResult()) } client.close() } }
Example 153
Source File: S3AttachmentStoreBehaviorBase.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.core.database.s3 import akka.actor.ActorSystem import akka.stream.ActorMaterializer import org.scalatest.FlatSpec import org.apache.openwhisk.common.Logging import org.apache.openwhisk.core.database.{AttachmentStore, DocumentSerializer} import org.apache.openwhisk.core.database.memory.{MemoryArtifactStoreBehaviorBase, MemoryArtifactStoreProvider} import org.apache.openwhisk.core.database.test.AttachmentStoreBehaviors import org.apache.openwhisk.core.database.test.behavior.ArtifactStoreAttachmentBehaviors import org.apache.openwhisk.core.entity.WhiskEntity import scala.reflect.ClassTag import scala.util.Random trait S3AttachmentStoreBehaviorBase extends FlatSpec with MemoryArtifactStoreBehaviorBase with ArtifactStoreAttachmentBehaviors with AttachmentStoreBehaviors { override lazy val store = makeS3Store[WhiskEntity] override implicit val materializer: ActorMaterializer = ActorMaterializer() override val prefix = s"attachmentTCK_${Random.alphanumeric.take(4).mkString}" override protected def beforeAll(): Unit = { MemoryArtifactStoreProvider.purgeAll() super.beforeAll() } override def getAttachmentStore[D <: DocumentSerializer: ClassTag](): AttachmentStore = makeS3Store[D]() def makeS3Store[D <: DocumentSerializer: ClassTag]()(implicit actorSystem: ActorSystem, logging: Logging, materializer: ActorMaterializer): AttachmentStore }
Example 154
Source File: ActivationStoreCRUDBehaviors.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.core.database.test.behavior import org.apache.openwhisk.common.TransactionId import org.apache.openwhisk.core.database.NoDocumentException import org.apache.openwhisk.core.entity.{ActivationId, WhiskActivation} import scala.util.Random trait ActivationStoreCRUDBehaviors extends ActivationStoreBehaviorBase { protected def checkStoreActivation(activation: WhiskActivation)(implicit transid: TransactionId): Unit = { store(activation, context) shouldBe activation.docinfo } protected def checkDeleteActivation(activation: WhiskActivation)(implicit transid: TransactionId): Unit = { activationStore.delete(ActivationId(activation.docid.asString), context).futureValue shouldBe true } protected def checkGetActivation(activation: WhiskActivation)(implicit transid: TransactionId): Unit = { activationStore.get(ActivationId(activation.docid.asString), context).futureValue shouldBe activation } behavior of s"${storeType}ActivationStore store" it should "put activation and get docinfo" in { implicit val tid: TransactionId = transId() val namespace = s"ns_${Random.alphanumeric.take(4).mkString}" val action = s"action1_${Random.alphanumeric.take(4).mkString}" val activation = newActivation(namespace, action, 1L) checkStoreActivation(activation) } behavior of s"${storeType}ActivationStore delete" it should "deletes existing activation" in { implicit val tid: TransactionId = transId() val namespace = s"ns_${Random.alphanumeric.take(4).mkString}" val action = s"action1_${Random.alphanumeric.take(4).mkString}" val activation = newActivation(namespace, action, 1L) store(activation, context) checkDeleteActivation(activation) } it should "throws NoDocumentException when activation does not exist" in { implicit val tid: TransactionId = transId() activationStore.delete(ActivationId("non-existing-doc"), context).failed.futureValue shouldBe a[NoDocumentException] } behavior of s"${storeType}ActivationStore get" it should "get existing activation matching id" in { implicit val tid: TransactionId = transId() val namespace = s"ns_${Random.alphanumeric.take(4).mkString}" val action = s"action1_${Random.alphanumeric.take(4).mkString}" val activation = newActivation(namespace, action, 1L) store(activation, context) checkGetActivation(activation) } it should "throws NoDocumentException when activation does not exist" in { implicit val tid: TransactionId = transId() activationStore.get(ActivationId("non-existing-doc"), context).failed.futureValue shouldBe a[NoDocumentException] } }
Example 155
Source File: ActivationStoreBehaviorBase.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.core.database.test.behavior import java.time.Instant import akka.stream.ActorMaterializer import common.{StreamLogging, WskActorSystem} import org.apache.openwhisk.common.TransactionId import org.apache.openwhisk.core.database.{ActivationStore, CacheChangeNotification, UserContext} import org.apache.openwhisk.core.database.test.behavior.ArtifactStoreTestUtil.storeAvailable import org.apache.openwhisk.core.entity._ import org.scalatest.concurrent.{IntegrationPatience, ScalaFutures} import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers, Outcome} import scala.collection.mutable.ListBuffer import scala.concurrent.Await import scala.concurrent.duration.Duration import scala.concurrent.duration.DurationInt import scala.language.postfixOps import scala.util.{Random, Try} trait ActivationStoreBehaviorBase extends FlatSpec with ScalaFutures with Matchers with StreamLogging with WskActorSystem with IntegrationPatience with BeforeAndAfterEach { protected implicit val materializer: ActorMaterializer = ActorMaterializer() protected implicit val notifier: Option[CacheChangeNotification] = None def context: UserContext def activationStore: ActivationStore private val docsToDelete = ListBuffer[(UserContext, ActivationId)]() def storeType: String protected def transId() = TransactionId(Random.alphanumeric.take(32).mkString) override def afterEach(): Unit = { cleanup() stream.reset() } override protected def withFixture(test: NoArgTest): Outcome = { assume(storeAvailable(storeAvailableCheck), s"$storeType not configured or available") val outcome = super.withFixture(test) if (outcome.isFailed) { println(logLines.mkString("\n")) } outcome } protected def storeAvailableCheck: Try[Any] = Try(true) //~----------------------------------------< utility methods > protected def store(activation: WhiskActivation, context: UserContext)( implicit transid: TransactionId, notifier: Option[CacheChangeNotification]): DocInfo = { val doc = activationStore.store(activation, context).futureValue docsToDelete.append((context, ActivationId(activation.docid.asString))) doc } protected def newActivation(ns: String, actionName: String, start: Long): WhiskActivation = { WhiskActivation( EntityPath(ns), EntityName(actionName), Subject(), ActivationId.generate(), Instant.ofEpochMilli(start), Instant.ofEpochMilli(start + 1000)) } def cleanup()(implicit timeout: Duration = 10 seconds): Unit = { implicit val tid: TransactionId = transId() docsToDelete.map { e => Try { Await.result(activationStore.delete(e._2, e._1), timeout) } } docsToDelete.clear() } }
Example 156
Source File: ReservoirSampling.scala From Mastering-Scala-Machine-Learning with MIT License | 5 votes |
package org.akozlov.chapter01 import scala.reflect.ClassTag import scala.util.Random import util.Properties object ReservoirSampling extends App { def reservoirSample[T: ClassTag](input: Iterator[T], k: Int): Array[T] = { val reservoir = new Array[T](k) // Put the first k elements in the reservoir. var i = 0 while (i < k && input.hasNext) { val item = input.next() reservoir(i) = item i += 1 } if (i < k) { // If input size < k, trim the array size reservoir.take(i) } else { // If input size > k, continue the sampling process. while (input.hasNext) { val item = input.next val replacementIndex = Random.nextInt(i) if (replacementIndex < k) { reservoir(replacementIndex) = item } i += 1 } reservoir } } val numLines=15 val w = new java.io.FileWriter(new java.io.File("out.txt")) val lines = io.Source.fromFile("data/iris/in.txt").getLines reservoirSample(lines, numLines).foreach { s => w.write(s + scala.util.Properties.lineSeparator) } w.close }
Example 157
Source File: FeatureHasherParitySpec.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.parity.feature import org.apache.spark.ml.Transformer import org.apache.spark.ml.feature.FeatureHasher import org.apache.spark.ml.parity.SparkParityBase import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.types._ import scala.util.Random class FeatureHasherParitySpec extends SparkParityBase { val categories = Seq( "spark", "and", "mleap", "are", "super", "dope", "together" ) def randomRow(): Row = Row(Random.nextDouble(), Random.nextBoolean(), Random.nextInt(20), Random.nextInt(20).toString, Random.shuffle(categories).head) val rows = spark.sparkContext.parallelize(Seq.tabulate(100) { _ => randomRow() }) val schema = new StructType() .add("real", DoubleType, nullable = false) .add("bool", BooleanType, nullable = false) .add("int", IntegerType, nullable = false) .add("stringNum", StringType, nullable = true) .add("string", StringType, nullable = true) override val dataset: DataFrame = spark.sqlContext.createDataFrame(rows, schema) override val sparkTransformer: Transformer = new FeatureHasher() .setInputCols("real", "bool", "int", "stringNum", "string") .setOutputCol("features") .setNumFeatures(1 << 17) .setCategoricalCols(Array("int")) }
Example 158
Source File: BundleFileSystemSpec.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.bundle.serializer import java.net.URI import java.nio.file.Files import ml.combust.bundle.test.TestSupport._ import ml.combust.bundle.{BundleFile, BundleRegistry} import ml.combust.bundle.test.ops._ import ml.combust.bundle.test.{TestBundleFileSystem, TestContext} import org.scalatest.FunSpec import resource.managed import scala.util.Random class BundleFileSystemSpec extends FunSpec { implicit val testContext = TestContext(BundleRegistry("test-registry"). registerFileSystem(new TestBundleFileSystem)) val randomCoefficients = (0 to 100000).map(v => Random.nextDouble()) val lr = LinearRegression(uid = "linear_regression_example", input = "input_field", output = "output_field", model = LinearModel(coefficients = randomCoefficients, intercept = 44.5)) describe("saving/loading bundle file using test file system") { it("loads/saves using the custom file system") { val tmpDir = Files.createTempDirectory("BundleFileSystemSpec") val uri = new URI(s"test://$tmpDir/test.zip") lr.writeBundle.name("my_bundle").save(uri) val loaded = uri.loadBundle().get assert(loaded.root == lr) } } }
Example 159
Source File: ErrorHandlingSpec.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.bundle.serializer import java.io.File import ml.combust.bundle.{BundleFile, BundleRegistry, TestUtil} import ml.combust.bundle.test.TestContext import ml.combust.bundle.test.ops._ import org.scalatest.FunSpec import ml.combust.bundle.test.TestSupport._ import resource._ import scala.util.{Failure, Random} case class UnknownTransformer() extends Transformer { override val uid: String = "haha" } class ErrorHandlingSpec extends FunSpec { implicit val testContext = TestContext(BundleRegistry("test-registry")) val randomCoefficients = (0 to 100000).map(v => Random.nextDouble()) val lr = LinearRegression(uid = "linear_regression_example", input = "input_field", output = "output_field", model = LinearModel(coefficients = randomCoefficients, intercept = 44.5)) val si = StringIndexer(uid = "string_indexer_example", input = "input_string", output = "output_index", model = StringIndexerModel(strings = Seq("hey", "there", "man"))) val pipeline = Pipeline(uid = "my_pipeline", PipelineModel(Seq(si, lr))) describe("with unknown op") { it("returns a failure") { val result = (for(bf <- managed(BundleFile(new File(TestUtil.baseDir, "bad-model.zip")))) yield { UnknownTransformer().writeBundle.save(bf) }).tried.flatMap(identity) assert(result.isFailure) result match { case Failure(error) => assert(error.isInstanceOf[NoSuchElementException]) assert(error.getMessage == "key not found: ml.combust.bundle.serializer.UnknownTransformer") case _ => } } } }
Example 160
Source File: ImputerParitySpec.scala From mleap with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.mleap.parity.feature import org.apache.spark.ml.Transformer import org.apache.spark.ml.mleap.feature.Imputer import org.apache.spark.ml.parity.SparkParityBase import org.apache.spark.sql._ import org.apache.spark.sql.types.{DoubleType, StructType} import scala.util.Random class ImputerParitySpec extends SparkParityBase { def randomRow(): Row = { if(Random.nextBoolean()) { if(Random.nextBoolean()) { Row(23.4) } else { Row(Random.nextDouble()) } } else { Row(33.2) } } val rows = spark.sparkContext.parallelize(Seq.tabulate(100) { i => randomRow() }) val schema = new StructType().add("mv", DoubleType, nullable = true) override val dataset: DataFrame = spark.sqlContext.createDataFrame(rows, schema) override val sparkTransformer: Transformer = new Imputer(uid = "imputer"). setInputCol("mv"). setOutputCol("mv_imputed"). setMissingValue(23.4). setStrategy("mean").fit(dataset) }
Example 161
Source File: JMXRegistryTest.scala From airframe with Apache License 2.0 | 5 votes |
package wvlet.airframe.jmx import wvlet.airspec.AirSpec import wvlet.log.LogSupport import scala.util.Random @JMX(description = "A example MBean object") class SampleMBean { @JMX(description = "free memory size") def freeMemory: Long = { Runtime.getRuntime.freeMemory() } } case class FieldMBean(@JMX a: Int, @JMX b: String) class NestedMBean { @JMX(description = "nested stat") def stat: Stat = { new Stat(Random.nextInt(10), "nested JMX bean") } } case class Stat(@JMX count: Int, @JMX state: String) trait MyJMXApp extends LogSupport {} object MyJMXAppObj class JMXRegistryTest extends AirSpec { val agent = new JMXAgent(new JMXConfig()) override protected def afterAll: Unit = { agent.unregisterAll } def `register a new mbean`: Unit = { val b = new SampleMBean agent.register(b) if (!JMXUtil.isAtLeastJava9) { val m = agent.getMBeanInfo("wvlet.airframe.jmx:name=SampleMBean") debug(m) val a = agent.getMBeanAttribute("wvlet.airframe.jmx:name=SampleMBean", "freeMemory") debug(a) } } def `support class field`: Unit = { val f = new FieldMBean(1, "apple") agent.register(f) if (!JMXUtil.isAtLeastJava9) { val m = agent.getMBeanInfo("wvlet.airframe.jmx:name=FieldMBean") info(m) agent.getMBeanAttribute("wvlet.airframe.jmx:name=FieldMBean", "a") shouldBe 1 agent.getMBeanAttribute("wvlet.airframe.jmx:name=FieldMBean", "b") shouldBe "apple" } } def `handle nested JMX MBean`: Unit = { val n = new NestedMBean agent.register(n) if (!JMXUtil.isAtLeastJava9) { val m = agent.getMBeanInfo("wvlet.airframe.jmx:name=NestedMBean") info(m) agent.getMBeanAttribute("wvlet.airframe.jmx:name=NestedMBean", "stat.count").toString.toInt <= 10 shouldBe true agent.getMBeanAttribute("wvlet.airframe.jmx:name=NestedMBean", "stat.state") shouldBe "nested JMX bean" } } def `avoid double registration`: Unit = { val f = new FieldMBean(1, "apple") agent.register(f) agent.register(f) } def `support complex trait name`: Unit = { agent.register[MyJMXApp](new MyJMXApp {}) } }
Example 162
Source File: LocalAuthSrv.scala From Cortex with GNU Affero General Public License v3.0 | 5 votes |
package org.thp.cortex.services import javax.inject.{Inject, Singleton} import scala.concurrent.{ExecutionContext, Future} import scala.util.Random import play.api.mvc.RequestHeader import akka.stream.Materializer import org.thp.cortex.models.User import org.elastic4play.controllers.Fields import org.elastic4play.services.{AuthCapability, AuthContext, AuthSrv} import org.elastic4play.utils.Hasher import org.elastic4play.{AuthenticationError, AuthorizationError} @Singleton class LocalAuthSrv @Inject()(userSrv: UserSrv, implicit val ec: ExecutionContext, implicit val mat: Materializer) extends AuthSrv { val name = "local" override val capabilities = Set(AuthCapability.changePassword, AuthCapability.setPassword) private[services] def doAuthenticate(user: User, password: String): Boolean = user.password().map(_.split(",", 2)).fold(false) { case Array(seed, pwd) ⇒ val hash = Hasher("SHA-256").fromString(seed + password).head.toString hash == pwd case _ ⇒ false } override def authenticate(username: String, password: String)(implicit request: RequestHeader): Future[AuthContext] = userSrv.get(username).flatMap { user ⇒ if (doAuthenticate(user, password)) userSrv.getFromUser(request, user, name) else Future.failed(AuthenticationError("Authentication failure")) } override def changePassword(username: String, oldPassword: String, newPassword: String)(implicit authContext: AuthContext): Future[Unit] = userSrv.get(username).flatMap { user ⇒ if (doAuthenticate(user, oldPassword)) setPassword(username, newPassword) else Future.failed(AuthorizationError("Authentication failure")) } override def setPassword(username: String, newPassword: String)(implicit authContext: AuthContext): Future[Unit] = { val seed = Random.nextString(10).replace(',', '!') val newHash = seed + "," + Hasher("SHA-256").fromString(seed + newPassword).head.toString userSrv.update(username, Fields.empty.set("password", newHash)).map(_ ⇒ ()) } }
Example 163
Source File: KeyAuthSrv.scala From Cortex with GNU Affero General Public License v3.0 | 5 votes |
package org.thp.cortex.services import java.util.Base64 import javax.inject.{Inject, Singleton} import scala.concurrent.{ExecutionContext, Future} import scala.util.Random import play.api.libs.json.JsArray import play.api.mvc.RequestHeader import akka.stream.Materializer import akka.stream.scaladsl.Sink import org.elastic4play.controllers.Fields import org.elastic4play.services.{AuthCapability, AuthContext, AuthSrv} import org.elastic4play.{AuthenticationError, BadRequestError} @Singleton class KeyAuthSrv @Inject()(userSrv: UserSrv, implicit val ec: ExecutionContext, implicit val mat: Materializer) extends AuthSrv { override val name = "key" final protected def generateKey(): String = { val bytes = Array.ofDim[Byte](24) Random.nextBytes(bytes) Base64.getEncoder.encodeToString(bytes) } override val capabilities = Set(AuthCapability.authByKey) override def authenticate(key: String)(implicit request: RequestHeader): Future[AuthContext] = { import org.elastic4play.services.QueryDSL._ // key attribute is sensitive so it is not possible to search on that field userSrv .find("status" ~= "Ok", Some("all"), Nil) ._1 .filter(_.key().contains(key)) .runWith(Sink.headOption) .flatMap { case Some(user) ⇒ userSrv.getFromUser(request, user, name) case None ⇒ Future.failed(AuthenticationError("Authentication failure")) } } override def renewKey(username: String)(implicit authContext: AuthContext): Future[String] = { val newKey = generateKey() userSrv.update(username, Fields.empty.set("key", newKey)).map(_ ⇒ newKey) } override def getKey(username: String)(implicit authContext: AuthContext): Future[String] = userSrv.get(username).map(_.key().getOrElse(throw BadRequestError(s"User $username hasn't key"))) override def removeKey(username: String)(implicit authContext: AuthContext): Future[Unit] = userSrv.update(username, Fields.empty.set("key", JsArray())).map(_ ⇒ ()) }
Example 164
Source File: BasicsEx1Tester.scala From chisel-lab with BSD 2-Clause "Simplified" License | 5 votes |
package exercises import chisel3._ import chisel3.util._ import chisel3.iotesters.{ChiselFlatSpec, Driver, PeekPokeTester} class MACorACMPeekPoke(c: MACorACM) extends PeekPokeTester(c) { // The parameter c refers to the module we are testing. To access signals from MACorACM // the prefix "c." is therefore needed. val tests = 50 import scala.util.Random poke(c.io.sel, true) // Set the selector signal to 1/true.B for (i <- 0 until tests) { // For loop to make 50 tests val in_a = Random.nextInt(16) // Sets the scala values in_a, in_b and in_c to a random integer value val in_b = Random.nextInt(16) // Between 0 and 16, 16 not included. This range is chosen to avoid overflow. val in_c = Random.nextInt(16) poke(c.io.a, in_a) // Sets in MACorACM inputs a, b and c to the random integer value. poke(c.io.b, in_b) poke(c.io.c, in_c) expect(c.io.z, (in_a*in_b)+in_c) // Tests if the module under test computes the output correctly. If not an // an error is thrown step(1) // Advance the simlation by one clock cycle. Not needed for this test. } poke(c.io.sel, false) // This loop test the other operation the MACorACM module should compute for (i <- 0 until tests) { val in_a = Random.nextInt(16) val in_b = Random.nextInt(16) val in_c = Random.nextInt(16) poke(c.io.a, in_a) poke(c.io.b, in_b) poke(c.io.c, in_c) expect(c.io.z, ((in_a+in_b)*in_c)) step(1) } } object MACorACMTester extends App{ assert(Driver(() => new MACorACM) {c => new MACorACMPeekPoke(c)}) // by using assert the next line will not be run println("SUCCESS!!") // if all expect statement are not succesful }
Example 165
Source File: StreamStreamDataGenerator.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.kafka import java.util.Properties import akka.actor.ActorSystem import knolx.Config._ import knolx.KnolXLogger import knolx.spark.Stock import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.json4s.jackson.Serialization.write import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.DurationInt import scala.util.Random object StreamStreamDataGenerator extends App with KnolXLogger { val system = ActorSystem("DataStreamer") val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) val producer = new KafkaProducer[String, String](props) val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay") val orderTypes = List("buy", "sell") val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9) val randomCompanyNames = Random.shuffle(companyNames).drop(Random.shuffle((1 to 3).toList).head) implicit val formats = Serialization.formats(NoTypeHints) info("Streaming companies listed into Kafka...") system.scheduler.schedule(0 seconds, 20 seconds) { randomCompanyNames.foreach { name => producer.send(new ProducerRecord[String, String](companiesTopic, name)) } } info("Streaming stocks data into Kafka...") system.scheduler.schedule(0 seconds, 5 seconds) { companyNames.foreach { name => val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head) producer.send(new ProducerRecord[String, String](stocksTopic, write(stock))) } } }
Example 166
Source File: DataStreamer.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.kafka import java.util.Properties import akka.actor.ActorSystem import knolx.Config.{bootstrapServer, topic} import knolx.KnolXLogger import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.DurationInt import scala.util.Random object DataStreamer extends App with KnolXLogger { val system = ActorSystem("DataStreamer") val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) val producer = new KafkaProducer[String, String](props) val someWords = List("about", "above", "after", "again", "against") info("Streaming data into Kafka...") system.scheduler.schedule(0 seconds, 200 milliseconds) { Random.shuffle(someWords).headOption.foreach { word => producer.send(new ProducerRecord[String, String](topic, word)) } } }
Example 167
Source File: StreamStaticDataGenerator.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.kafka import java.util.Properties import akka.actor.ActorSystem import knolx.Config.{bootstrapServer, topic} import knolx.KnolXLogger import knolx.spark.Stock import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.json4s.jackson.Serialization.write import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.DurationInt import scala.util.Random object StreamStaticDataGenerator extends App with KnolXLogger { val system = ActorSystem("DataStreamer") val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) val producer = new KafkaProducer[String, String](props) val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay") val orderTypes = List("buy", "sell") val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9) implicit val formats = Serialization.formats(NoTypeHints) info("Streaming data into Kafka...") system.scheduler.schedule(0 seconds, 5 seconds) { companyNames.foreach { name => val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head) producer.send(new ProducerRecord[String, String](topic, write(stock))) } } }
Example 168
Source File: MultiDataStreamer.scala From structured-streaming-application with Apache License 2.0 | 5 votes |
package knolx.kafka import java.util.Properties import akka.actor.ActorSystem import knolx.Config.{bootstrapServer, topic} import knolx.KnolXLogger import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringSerializer import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.DurationInt import scala.language.postfixOps import scala.util.Random object MultiDataStreamer extends App with KnolXLogger { val system = ActorSystem("DataStreamer") val props = new Properties() props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer) props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName) val producer = new KafkaProducer[String, String](props) info("Streaming data into Kafka...") system.scheduler.schedule(0 seconds, 3000 milliseconds) { (1 to Random.nextInt(100)).foreach { id => producer.send(new ProducerRecord[String, String](topic,s"device$id", (Math.random * 2 + 1).toString)) } } }
Example 169
Source File: TestFlinkGenLast.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.flink.generator import com.amazon.milan.application.ApplicationConfiguration import com.amazon.milan.compiler.flink.testing._ import com.amazon.milan.lang._ import com.amazon.milan.testing.applications._ import org.junit.Assert._ import org.junit.Test import scala.util.Random @Test class TestFlinkGenLast { @Test def test_FlinkGenLast_InFlatMapOfGroupBy_WithOneGroupKeyInInputRecords_OutputsOnlyLastInputRecordToOutput(): Unit = { val input = Stream.of[IntKeyValueRecord].withName("input") val grouped = input.groupBy(r => r.key) def maxByValueAndLast(stream: Stream[IntKeyValueRecord]): Stream[IntKeyValueRecord] = stream.maxBy(r => r.value).last() val output = grouped.flatMap((key, group) => maxByValueAndLast(group)).withName("output") val graph = new StreamGraph(output) val config = new ApplicationConfiguration config.setListSource(input, IntKeyValueRecord(1, 1), IntKeyValueRecord(1, 3), IntKeyValueRecord(1, 2)) // Keep running until we find records in the output file. val results = TestApplicationExecutor.executeApplication( graph, config, 20, r => r.getRecords(output).isEmpty, output) val outputRecords = results.getRecords(output) assertEquals(List(IntKeyValueRecord(1, 3)), outputRecords) } @Test def test_FlinkGenLast_InFlatMapOfGroupBy_With10GroupKeysInInputRecords_With10RecordsPerGroupKey_OutputsOnlyLastRecordInInputForEachGroupKey(): Unit = { val input = Stream.of[IntKeyValueRecord].withName("input") val grouped = input.groupBy(r => r.key) def maxByValueAndLast(stream: Stream[IntKeyValueRecord]): Stream[IntKeyValueRecord] = stream.maxBy(r => r.value).last() val output = grouped.flatMap((key, group) => maxByValueAndLast(group)).withName("output") val graph = new StreamGraph(output) val inputRecords = Random.shuffle(List.tabulate(10)(group => List.tabulate(10)(i => IntKeyValueRecord(group, i))).flatten) val config = new ApplicationConfiguration config.setListSource(input, inputRecords: _*) val results = TestApplicationExecutor.executeApplication( graph, config, 20, r => r.getRecords(output).length < 10, output) val outputRecords = results.getRecords(output).sortBy(_.key) val expectedOutputRecords = List.tabulate(10)(i => inputRecords.filter(_.key == i).maxBy(_.value)) assertEquals(expectedOutputRecords, outputRecords) } }
Example 170
Source File: TestPriorityQueueTypeSerializer.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.flink.types import com.amazon.milan.compiler.flink.runtime.SequenceNumberOrdering import com.amazon.milan.compiler.flink.testing.IntRecord import com.amazon.milan.compiler.flink.testutil._ import com.amazon.milan.compiler.flink.types import org.apache.flink.api.scala._ import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment import org.junit.Assert._ import org.junit.Test import scala.collection.mutable import scala.util.Random @Test class TestPriorityQueueTypeSerializer { @Test def test_PriorityQueueTypeSerializer_Deserialize_WithQueueOfInt_With100RandomItems_ReturnsQueueThatYieldsSameItemsAsOriginal(): Unit = { val typeInfo = new PriorityQueueTypeInformation[Int](createTypeInformation[Int], Ordering.Int) val env = StreamExecutionEnvironment.getExecutionEnvironment val serializer = typeInfo.createSerializer(env.getConfig) val original = new mutable.PriorityQueue[Int]() val rand = new Random(0) val values = List.tabulate(100)(_ => rand.nextInt(100)) original.enqueue(values: _*) val copy = copyWithSerializer(original, serializer) assertEquals(original.length, copy.length) assertEquals(original.dequeueAll.toList, copy.dequeueAll.toList) } @Test def test_PriorityQueueTypeSerializer_Deserialize_AfterRestoring_WithQueueOfInt_With100RandomItems_ReturnsQueueThatYieldsSameItemsAsOriginal(): Unit = { val typeInfo = new PriorityQueueTypeInformation[Int](createTypeInformation[Int], Ordering.Int) val env = StreamExecutionEnvironment.getExecutionEnvironment val serializer = typeInfo.createSerializer(env.getConfig) val snapshot = serializer.snapshotConfiguration() val snapshotCopy = new types.PriorityQueueTypeSerializer.Snapshot[Int]() copyData(snapshot.writeSnapshot, input => snapshotCopy.readSnapshot(snapshot.getCurrentVersion, input, getClass.getClassLoader)) val serializerCopy = snapshotCopy.restoreSerializer() val original = new mutable.PriorityQueue[Int]() val rand = new Random(0) val values = List.tabulate(100)(_ => rand.nextInt(100)) original.enqueue(values: _*) val copy = copyData( output => serializer.serialize(original, output), input => serializerCopy.deserialize(input)) assertEquals(original.length, copy.length) assertEquals(original.dequeueAll.toList, copy.dequeueAll.toList) } @Test def test_PriorityQueueTypeSerializer_Deserialize_AfterRestoring_WithQueueOfRecordWrapperAndSequenceNumberOrdering_With100RandomItems_ReturnsQueueThatYieldsSameItemsAsOriginal(): Unit = { val ordering = new SequenceNumberOrdering[IntRecord, Product] val typeInfo = new PriorityQueueTypeInformation[RecordWrapper[IntRecord, Product]]( RecordWrapperTypeInformation.wrap(createTypeInformation[IntRecord]), ordering) val env = StreamExecutionEnvironment.getExecutionEnvironment val serializer = typeInfo.createSerializer(env.getConfig) val snapshot = serializer.snapshotConfiguration() val snapshotCopy = new types.PriorityQueueTypeSerializer.Snapshot[RecordWrapper[IntRecord, Product]]() copyData(snapshot.writeSnapshot, input => snapshotCopy.readSnapshot(snapshot.getCurrentVersion, input, getClass.getClassLoader)) val serializerCopy = snapshotCopy.restoreSerializer() val original = new mutable.PriorityQueue[RecordWrapper[IntRecord, Product]]()(ordering) val rand = new Random(0) val values = List.tabulate(100)(i => RecordWrapper.wrap(IntRecord(rand.nextInt(100)), i.toLong)) original.enqueue(values: _*) val copy = copyData( output => serializer.serialize(original, output), input => serializerCopy.deserialize(input)) assertEquals(original.length, copy.length) assertEquals(original.dequeueAll.toList, copy.dequeueAll.toList) } }
Example 171
Source File: TestKeyedLastByOperator.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.flink.runtime import com.amazon.milan.compiler.flink.testing.{SingletonMemorySinkFunction, _} import com.amazon.milan.compiler.flink.testutil._ import com.amazon.milan.compiler.flink.types.RecordWrapper import org.apache.flink.api.scala._ import org.junit.Assert._ import org.junit.Test import scala.collection.JavaConverters._ import scala.util.Random @Test class TestKeyedLastByOperator { @Test def test_KeyedLastByOperator_WithRandomInputsWithTenKeys_ReturnsOneRecordPerKeyWithMaxValue(): Unit = { val operator: KeyedLastByOperator[IntKeyValueRecord, Tuple1[Int]] = new KeyedLastByOperator[IntKeyValueRecord, Tuple1[Int]](createTypeInformation[IntKeyValueRecord], createTypeInformation[Tuple1[Int]]) { override protected def takeNewValue(newRecord: RecordWrapper[IntKeyValueRecord, Tuple1[Int]], currentRecord: RecordWrapper[IntKeyValueRecord, Tuple1[Int]]): Boolean = { newRecord.value.value > currentRecord.value.value } } val rand = new Random(0) val data = List.tabulate(1000)(_ => { IntKeyValueRecord(rand.nextInt(10), rand.nextInt(100)) }) val env = getTestExecutionEnvironment val input = env.fromCollection(data.asJavaCollection, createTypeInformation[IntKeyValueRecord]).wrap(createTypeInformation[IntKeyValueRecord]) val keySelector = new RecordWrapperKeySelector[IntKeyValueRecord, Tuple1[Int]](createTypeInformation[Tuple1[Int]]) val keyed = input .map(new ModifyRecordKeyMapFunction[IntKeyValueRecord, Product, Tuple1[Int]](createTypeInformation[IntKeyValueRecord], createTypeInformation[Tuple1[Int]]) { override protected def getNewKey(value: IntKeyValueRecord, key: Product): Tuple1[Int] = Tuple1(value.key) }) .keyBy(keySelector, keySelector.getKeyType) val output = keyed.transform( "op", operator.getProducedType, operator) .unwrap() val sink = new SingletonMemorySinkFunction[IntKeyValueRecord]() output.addSink(sink) env.executeThenWaitFor(() => sink.getRecordCount >= 10, 5) val expectedOutput = data.groupBy(_.key).map { case (_, g) => g.maxBy(_.value) }.toList.sortBy(_.key) val actualOutput = sink.getValues.sortBy(_.key) assertEquals(expectedOutput, actualOutput) } }
Example 172
Source File: package.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.flink import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import com.amazon.milan.compiler.flink.runtime.{UnwrapRecordsMapFunction, WrapRecordsMapFunction} import com.amazon.milan.compiler.flink.testing.IntKeyValueRecord import com.amazon.milan.compiler.flink.types.{RecordWrapper, RecordWrapperTypeInformation} import org.apache.flink.api.common.typeinfo.TypeInformation import org.apache.flink.api.common.typeutils.TypeSerializer import org.apache.flink.api.java.typeutils.ResultTypeQueryable import org.apache.flink.core.memory.{DataInputView, DataInputViewStreamWrapper, DataOutputView, DataOutputViewStreamWrapper} import org.apache.flink.streaming.api.TimeCharacteristic import org.apache.flink.streaming.api.datastream.DataStream import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment import scala.language.implicitConversions import scala.util.Random package object testutil { def getTestExecutionEnvironment: StreamExecutionEnvironment = { val env = StreamExecutionEnvironment.getExecutionEnvironment env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) env.setBufferTimeout(0) env } def copyWithSerializer[T](value: T, serializer: TypeSerializer[T]): T = { val outputStream = new ByteArrayOutputStream() val outputView = new DataOutputViewStreamWrapper(outputStream) serializer.serialize(value, outputView) val bytes = outputStream.toByteArray val inputStream = new ByteArrayInputStream(bytes) val inputView = new DataInputViewStreamWrapper(inputStream) serializer.deserialize(inputView) } def copyData[T](writeValue: DataOutputView => Unit, readValue: DataInputView => T): T = { val outputStream = new ByteArrayOutputStream() val outputView = new DataOutputViewStreamWrapper(outputStream) writeValue(outputView) val bytes = outputStream.toByteArray val inputStream = new ByteArrayInputStream(bytes) val inputView = new DataInputViewStreamWrapper(inputStream) readValue(inputView) } def generateIntKeyValueRecords(recordCount: Int, keyCount: Int, maxValue: Int): List[IntKeyValueRecord] = { val rand = new Random(0) List.tabulate(recordCount)(_ => IntKeyValueRecord(rand.nextInt(keyCount), rand.nextInt(maxValue + 1))) } implicit class WrappedDataStreamExtensions[T >: Null, TKey >: Null <: Product](dataStream: DataStream[RecordWrapper[T, TKey]]) { def unwrap(recordTypeInformation: TypeInformation[T]): DataStream[T] = { val mapper = new UnwrapRecordsMapFunction[T, TKey](recordTypeInformation) this.dataStream.map(mapper) } def unwrap(): DataStream[T] = { val recordType = this.dataStream.getType.asInstanceOf[RecordWrapperTypeInformation[T, TKey]].valueTypeInformation this.unwrap(recordType) } } implicit class DataStreamExtensions[T >: Null](dataStream: DataStream[T]) { def wrap(recordTypeInformation: TypeInformation[T]): DataStream[RecordWrapper[T, Product]] = { val mapper = new WrapRecordsMapFunction[T](recordTypeInformation) this.dataStream.map(mapper) } def wrap(): DataStream[RecordWrapper[T, Product]] = { val recordType = this.dataStream.asInstanceOf[ResultTypeQueryable[T]].getProducedType this.wrap(recordType) } } }
Example 173
Source File: Records.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.scala.testing import com.amazon.milan.Id import scala.util.Random class IntRecord(val recordId: String, val i: Int) { override def toString: String = s"IntRecord($i)" override def equals(obj: Any): Boolean = obj match { case o: IntRecord => this.i == o.i case _ => false } } object IntRecord { def apply(i: Int): IntRecord = new IntRecord(Id.newId(), i) } class KeyValueRecord(val recordId: String, val key: Int, val value: Int) { override def toString: String = s"($key: $value)" override def equals(obj: Any): Boolean = obj match { case o: KeyValueRecord => this.key == o.key && this.value == o.value case _ => false } } object KeyValueRecord { def apply(key: Int, value: Int): KeyValueRecord = new KeyValueRecord(Id.newId(), key, value) def generate(recordCount: Int, maxKey: Int, maxValue: Int): List[KeyValueRecord] = { val rand = new Random() List.tabulate(recordCount)(_ => KeyValueRecord(rand.nextInt(maxKey), rand.nextInt(maxValue))) } }
Example 174
Source File: Example.scala From temperature-machine with Apache License 2.0 | 5 votes |
package bad.robot.temperature.rrd import bad.robot.temperature.rrd.Seconds.{now, secondsToLong} import bad.robot.temperature.server.JsonFile import bad.robot.temperature.task.FixedTimeMeasurement import bad.robot.temperature.{Error, Measurement, SensorReading, Temperature} import scala.concurrent.duration.Duration import scala.util.Random import scalaz.{-\/, \/} object Example extends App { sys.props += ("org.slf4j.simpleLogger.defaultLogLevel" -> "info") val random = new Random() val duration = Duration(1, "days") val start = now() - duration.toSeconds val end = now() val frequency = Duration(30, "seconds") val hosts = List(Host("bedroom"), Host("lounge")) RrdFile(hosts, frequency).create(start - 5) populateRrd(hosts) val xml = Xml(start, start + aDay, hosts) xml.exportJson(JsonFile.filename) xml.exportXml("temperature.xml") Graph.create(start, start + aDay, hosts, "A day") Graph.create(start, start + aDay * 2, hosts, "2 days") Graph.create(start, start + aWeek, hosts, "A week") Graph.create(start, start + aMonth, hosts, "A month") println("Done generating " + duration) def populateRrd(hosts: List[Host]) = { def seed = random.nextInt(30) + random.nextDouble() def smooth = (value: Double) => if (random.nextDouble() > 0.5) value + random.nextDouble() else value - random.nextDouble() val temperatures = Stream.iterate(seed)(smooth).zip(Stream.iterate(seed)(smooth)) val times = Stream.iterate(start)(_ + frequency.toSeconds).takeWhile(_ < end) times.zip(temperatures).foreach({ case (time, (temperature1, temperature2)) => { handleError(RrdUpdate(hosts).apply(FixedTimeMeasurement(time, List( Measurement(hosts(0), time, List( SensorReading("?", Temperature(temperature1)), SensorReading("?", Temperature(temperature1 + 6.3))) )) ))) handleError(RrdUpdate(hosts).apply(FixedTimeMeasurement(time + 1, List( Measurement(hosts(1), time + 1, List( SensorReading("?", Temperature(temperature2)), SensorReading("?", Temperature(temperature2 + 1.3))) )) ))) } }) def handleError(f: => Error \/ Any): Unit = { f match { case -\/(error) => println(error) case _ => () } } } }
Example 175
Source File: UsersControllerSpec.scala From play-quill-jdbc with MIT License | 5 votes |
package controllers import org.scalatest.TestData import org.scalatestplus.play.{OneAppPerTest, PlaySpec} import play.api.Application import play.api.libs.json.Json import play.api.test.FakeRequest import play.api.test.Helpers._ import models.{User, Users} import test._ import scala.util.Random class UsersControllerSpec extends PlaySpec with OneAppPerTest { override def newAppForTest(testData: TestData): Application = fakeApp "GET /users/:id" should { "return 200 OK with body" in { val users = app.injector.instanceOf(classOf[Users]) val name = s"Name${Random.nextLong()}" val user = users.create(User(0, name, true)) val response = route(app, FakeRequest(GET, s"/users/${user.id}")).get status(response) mustBe OK val json = contentAsJson(response) (json \ "name").as[String] mustBe user.name } } "POST /users" should { "return 201 Created with Location header with created resource" in { val name = s"Name${Random.nextLong()}" val userJson = Json.obj("name" -> name, "isActive" -> true) val responseCreated = route(app, FakeRequest(POST, "/users").withJsonBody(userJson)).get status(responseCreated) mustBe CREATED val location = headers(responseCreated).get(LOCATION).get val responseGet = route(app, FakeRequest(GET, location)).get val json = contentAsJson(responseGet) (json \ "name").as[String] mustBe name } } "DELETE /users/:id" should { "return 204 No Content and delete resource" in { val users = app.injector.instanceOf(classOf[Users]) val name = s"Name${Random.nextLong()}" val user = users.create(User(0, name, true)) val response = route(app, FakeRequest(DELETE, s"/users/${user.id}")).get status(response) mustBe NO_CONTENT users.find(user.id) mustBe empty } } "PUT /users/:id" should { "return 204 No Content and update resource" in { val users = app.injector.instanceOf(classOf[Users]) val name = s"Name${Random.nextLong()}" val user = users.create(User(0, name, true)) val updatedName = s"Name${Random.nextLong()}" val updateUserJson = Json.obj("name" -> updatedName, "isActive" -> true) val response = route(app, FakeRequest(PUT, s"/users/${user.id}").withJsonBody(updateUserJson)).get status(response) mustBe NO_CONTENT val updatedUser = users.find(user.id) updatedUser.get.name mustBe updatedName } } }
Example 176
Source File: ExponentialBackOff.scala From schedoscope with Apache License 2.0 | 5 votes |
package org.schedoscope.scheduler.utils import scala.concurrent.duration.{Duration, FiniteDuration} import scala.util.Random case class ExponentialBackOff(backOffSlotTime: FiniteDuration, backOffSlot: Int = 1, backOffWaitTime: FiniteDuration = Duration.Zero, constantDelay: FiniteDuration = Duration.Zero, ceiling: Int = 10, resetOnCeiling: Boolean = false, retries: Int = 0, resets: Int = 0, totalRetries: Long = 0) { private def updateTime = backOffSlotTime * expectedBackOff(backOffSlot) + constantDelay private def expectedBackOff(backOffSlot: Int) = { val rand = new Random().nextInt(backOffSlot + 1) math.round(math.pow(2, rand) - 1) } def nextBackOff: ExponentialBackOff = { if (backOffSlot >= ceiling && resetOnCeiling) // reset copy(backOffSlot = 1, backOffWaitTime = Duration.Zero, resets = resets + 1, retries = 0, totalRetries = totalRetries + 1) else { val newBackOffSlot = if (backOffSlot >= ceiling) ceiling else backOffSlot + 1 // increase 1 collision copy(backOffSlot = newBackOffSlot, backOffWaitTime = updateTime, retries = retries + 1, totalRetries = totalRetries + 1) } } }
Example 177
Source File: XcsCover.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.reinforcement.xcs import org.scalaml.trading.Signal import org.scalaml.reinforcement.qlearning.QLState import scala.util.Random import org.scalaml.trading.operator.SOperator import org.scalaml.ga.Quantization import org.scalaml.ga.Gene.Encoding def cover( sensor: XcsSensor, actions: List[XcsAction] )(implicit quant: Quantization[Double], geneBits: Encoding): List[XcsRule] = { import Random._ require( actions.nonEmpty, "XcsCover.cover Cannot generates new rules from undefined list of actions" ) require( actions.nonEmpty && actions.size < MAX_NUM_ACTIONS, s"XcsCover.cover The number of actions per state ${actions.size} if out of range" ) actions./:(List[XcsRule]())((xs, act) => { val signal = Signal(sensor.id, sensor.value, new SOperator(nextInt(Signal.numOperators))) new XcsRule(signal, XcsAction(act, Random)) :: xs }) } } // ------------------------- EOF -----------------------------------------
Example 178
Source File: DataGenerator.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.unsupervised.functionapprox import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import scala.collection.mutable import scala.io.Source import scala.util.Random def apply(sc: SparkContext): RDD[(Float, Float)] = { // See the random noise val r = new Random(System.currentTimeMillis + Random.nextLong) val src = Source.fromFile(sourceName) val input = src.getLines.map(_.split(DELIM)) ./:(mutable.ArrayBuffer[(Float, Float)]())((buf, xy) => { val x = addNoise(xy(0).trim.toFloat, r) val y = addNoise(xy(1).trim.toFloat, r) buf += ((x, y)) }) datasetSize = input.size val data_rdd = sc.makeRDD(input, nTasks) src.close data_rdd } // Original signal + random noise private def addNoise(value: Float, r: Random): Float = value*(1.0 + RATIO*(r.nextDouble - 0.5)).toFloat } // ------------------------------------- EOF ----------------------------------------------
Example 179
Source File: MonteCarloApproximation.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.sampling import scala.util.Random def sum(from: Double, to: Double): Double = { // Get the minimum and maximum values for the function val (min, max) = getBounds(from, to) val width = to - from val height = if (min >= 0.0) max else max - min // compute the enclosing area (rectangle) val outerArea = width * height val randomx = new Random(System.currentTimeMillis) val randomy = new Random(System.currentTimeMillis + 42L) // Monte Carlo simulator for the function def randomSquare: Double = { val numInsideArea = Range(0, numPoints)./:(0)( (s, n) => { val ptx = randomx.nextDouble * width + from val pty = randomy.nextDouble * height // update the seeds randomx.setSeed(randomy.nextLong) randomy.setSeed(randomx.nextLong) s + (if (pty > 0.0 && pty < f(ptx)) 1 else if (pty < 0.0 && pty > f(ptx)) -1 else 0) } ) numInsideArea.toDouble * outerArea / numPoints } randomSquare } // Compute the bounds for the y values of the function private def getBounds(from: Double, to: Double): (Double, Double) = { def updateBounds(y: Double, minMax: (Double,Double)): Int = { var flag = 0x00 if (y < minMax._1) flag += 0x01 if (y > minMax._2) flag += 0x02 flag } // extract the properties for the integration step val numSteps = Math.sqrt(numPoints).floor.toInt val stepSize = (to - from) / numSteps (0 to numSteps)./:((Double.MaxValue, -Double.MaxValue))( (minMax, n) => { val y = f(n * stepSize + from) updateBounds(y, minMax) match { case 0x01 => (y, minMax._2) case 0x02 => (minMax._1, y) case 0x03 => (y, y) case _ => minMax } } ) } } // -------------------------- EOF ------------------------------------------
Example 180
Source File: Bootstrap.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.sampling import scala.collection.mutable import scala.util.Random private[scalaml] final class Bootstrap ( numSamples: Int, s: Vector[Double] => Double, inputDistribution: Vector[Double], randomizer: Int => Int ) { lazy val bootstrappedReplicates: Array[Double] = (0 until numSamples)./:(mutable.ArrayBuffer[Double]())( (buf, _) => buf += createBootstrapSample ).toArray def createBootstrapSample: Double = s( (0 until inputDistribution.size)./:(mutable.ArrayBuffer[Double]())( (buf, _) => { val randomValueIndex = randomizer( inputDistribution.size ) buf += inputDistribution( randomValueIndex ) } ).toVector ) lazy val mean = bootstrappedReplicates.reduce( _ + _ )/numSamples final def error: Double = { import Math._ val sumOfSquaredDiff = bootstrappedReplicates.reduce( (s1: Double, s2: Double) => (s1 - mean)*(s1 - mean) + (s2 - mean)*(s2 - mean) ) sqrt(sumOfSquaredDiff / (numSamples - 1)) } } // ---------------------------- EOF -------------------------------------------------
Example 181
Source File: KullbackLeiblerTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.unsupervised.divergence import org.apache.commons.math3.distribution.GammaDistribution import org.scalaml.Logging import org.scalatest.{FlatSpec, Matchers} import scala.util.Random final class KullbackLeiblerTest extends FlatSpec with Matchers with Logging { protected[this] val name = "Kullback Leibler divergence" it should s"$name Kullback Leibler test on two data sets" in { show("$name Kullback Leibler test on two data sets") val numDataPoints = 100000 def gammaDistribution( shape: Double, scale: Double ): Seq[Double] = { val gamma = new GammaDistribution( shape, scale ) Seq.tabulate( numDataPoints )( n => gamma.density( 2.0 * Random.nextDouble ) ) } val kl = new KullbackLeibler[Double]( gammaDistribution( 2.0, 1.0 ), gammaDistribution( 2.0, 1.0 ) ) val divergence = kl.divergence( 100 ) val expectedDivergence = 0.0063 Math.abs( divergence - expectedDivergence ) < 0.001 should be( true ) show( s"$name divergence $divergence" ) val kl2 = new KullbackLeibler[Double]( gammaDistribution( 2.0, 1.0 ), gammaDistribution( 1.0, 0.5 ) ) val divergence2 = kl2.divergence( 100 ) val expectedDivergence2 = 2.655 Math.abs( divergence2 - expectedDivergence2 ) < 0.1 should be( true ) show( s"$name divergence $divergence2" ) } } // ------------------------------------------- EIF ----------------------------------------------
Example 182
Source File: FunctionApproxTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.unsupervised.functionapprox import org.scalaml.Logging import org.scalaml.Predef.Context.ToDouble import org.scalatest.{FlatSpec, Matchers} import scala.util.Random final class FunctionApproxTest extends FlatSpec with Matchers with Logging { protected[this] val name = "Function Approximation" // Simplest data point definition case class DataPoint( id: String, value: Double ) final val expected = Math.log( _ ) it should s"$name using a non-resizable histogram" in { show(s"$name using a non-resizable histogram") implicit val dataPoint2Double = new ToDouble[DataPoint] { def apply( dataPoint: DataPoint ): Double = dataPoint.value } val input = Array.tabulate( 10000 )( n => { val x = 1.0 + 9.0 * Random.nextDouble ( DataPoint( n.toString, x ), expected( x ) ) } ) val testSample = List[DataPoint]( DataPoint( "2001", 2.8 ), DataPoint( "2002", 5.5 ), DataPoint( "2003", 7.1 ) ) val error2 = error( new HistogramApprox[DataPoint]( 2, input ), testSample ) show( s"$name error 2 $error2" ) val error5 = error( new HistogramApprox[DataPoint]( 5, input ), testSample ) show( s"$name error 5 $error5" ) val error10 = error( new HistogramApprox[DataPoint]( 10, input ), testSample ) show( s"$name error 10 $error10" ) val error25 = error( new HistogramApprox[DataPoint]( 25, input ), testSample ) show( s"$name error 25 $error25" ) val error100 = error( new HistogramApprox[DataPoint]( 100, input ), testSample ) show( s"$name error 100 $error100" ) } private def error( functionApprox: FunctionApprox[DataPoint], testSample: List[DataPoint] ): Double = Math.sqrt( testSample./:( 0.0 )( ( s, dataPoint ) => { val delta = functionApprox.predict( dataPoint ) - expected( dataPoint.value ) s + delta * delta } ) ) } // ----------------------- EOF ----------------------------------------------------
Example 183
Source File: WorkflowTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.workflow import org.scalaml.Logging import org.scalaml.core.Design.{ConfigDouble, ConfigInt} import org.scalaml.core.ETransform import org.scalaml.Predef._ import org.scalaml.stats.MinMax import org.scalatest.{FlatSpec, Matchers} import scala.util.{Failure, Random, Success, Try} final class WorkflowTest extends FlatSpec with Matchers with Logging { protected[this] val name = "Workflow for data pipeline" it should s"$name Illustration of a monadic workflow" in { val samples: Int = 100 val normRatio = 10 val splits = 4 val g = (x: Double) => Math.log(x + 1.0) + Random.nextDouble val workflow = new Workflow[Double => Double, DblVec, DblVec, Int] with Sampling[Double => Double, DblVec] with Normalization[DblVec, DblVec] with Aggregation[DblVec, Int] { val sampler = new ETransform[Double => Double, DblVec](ConfigInt(samples)) { override def |> : PartialFunction[Double => Double, Try[DblVec]] = { case f: (Double => Double) => Try { val sampled: DblVec = Vector.tabulate(samples)(n => f(n.toDouble / samples)) show(s"$name sampling : ${sampled.mkString(",")}") sampled } } } val normalizer = new ETransform[DblVec, DblVec](ConfigDouble(normRatio)) { override def |> : PartialFunction[DblVec, Try[DblVec]] = { case x: DblVec if x.nonEmpty => Try { val minMax = MinMax[Double](x).map(_.normalize(0.0, 1.0)).getOrElse(Vector.empty[Double]) show(s"$name normalization : ${minMax.mkString(",")}") minMax } } } val aggregator = new ETransform[DblVec, Int](ConfigInt(splits)) { override def |> : PartialFunction[DblVec, Try[Int]] = { case x: DblVec if x.nonEmpty => Try { show(s"$name aggregation") Range(0, x.size).find(x(_) == 1.0).getOrElse(-1) } } } } (workflow |> g) match { case Success(res) => show(s"$name result = ${res.toString}") case Failure(e) => error(s"$name", e) } } } // --------------------------------------- EOF ----------------------------------------------
Example 184
Source File: MetropolisHastingsTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.sampling import org.scalatest.{FlatSpec, Matchers} import scala.util.Random final class MetropolisHastingsTest extends FlatSpec with Matchers with org.scalaml.Logging { protected[this] val name = "MCMC Metropolis-Hastings" val square = (x: Double) => if(x < 0.0 && x >= 1.0) 0.0 else x val linear = (x: Double) => 2.0*x -1.0 it should s"$name evaluation square signal with 20 iterations and 0.5 initial value" in { show(s"Evaluation square signal with 20 iterations and 0.5 initial value") val numIterations = 20 val initialValue = 0.5 val results = test(numIterations, initialValue) val acceptance = results.acceptedRate(numIterations) acceptance > 0.80 should be (true) show(s"$name ${results.toString}\n$acceptance") } it should s"$name evaluation square signal with 100 iterations and 0.5 initial value" in { show("Evaluation square signal with 100 iterations and 0.5 initial value") val numIterations = 100 val initialValue = 0.5 val results = test(numIterations, initialValue) val acceptance = results.acceptedRate(numIterations) acceptance > 0.80 should be (true) show(s"$name ${results.toString}\n$acceptance") } it should s"$name evaluation square signal with 250 iterations and 0.5 initial value" in { show("Evaluation square signal with 100 iterations and 0.5 initial value") val numIterations = 250 val initialValue = 0.5 val results = test(numIterations, initialValue) val acceptance = results.acceptedRate(numIterations) acceptance > 0.80 should be (true) show(s"$name ${results.toString}\n$acceptance") } it should s"$name evaluation square signal with 250 iterations and 1.0 initial value" in { show("Evaluation square signal with 250 iterations and 1.0 initial value") val numIterations = 250 val initialValue = 1.0 val results = test(numIterations, initialValue) val acceptance = results.acceptedRate(numIterations) acceptance > 0.80 should be (true) show(s"$name ${results.toString}\n$acceptance") } private def test(numIters: Int, initialValue: Double): Trace = { val random = new Random val q = (s: Double, sPrime: Double) => 0.5*(s + sPrime) val proposer = (s: Double) => { val r = random.nextDouble (if(r < 0.2 || r > 0.8) s*r else 1.0) } val mh = new OneMetropolisHastings(square, q, proposer, ()=>random.nextDouble) mh.mcmc(initialValue, numIters) } } // ---------------------------- EOF -----------------------------------------------
Example 185
Source File: BootstrapTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.sampling import org.apache.commons.math3.distribution.{NormalDistribution, RealDistribution} import org.scalaml.Logging import org.scalatest.{FlatSpec, Matchers} import scala.collection.mutable.ArrayBuffer import scala.util.Random final class BootstrapTest extends FlatSpec with Matchers with Logging { protected val name = "Bootstrap sampling replicates" final val NumReplicates1 = 256 final val NumReplicates2 = 1024 final val NumDataPoints = 10000 private def bootstrapEvaluation( dist: RealDistribution, random: Random, coefs: (Double, Double), numReplicates: Int ): (Double, Double) = { val input = (0 until NumDataPoints)./:(new ArrayBuffer[(Double, Double)])( ( buf, _ ) => { val (a, b) = coefs val x = a * random.nextDouble - b buf += ( (x, dist.density(x)) ) } ).toVector // Bootstrap for the statistisx val bootstrap = new Bootstrap( numReplicates, (x: Vector[Double]) => x.sum/x.length, input.map( _._2 ), (rLen: Int) => new Random( System.currentTimeMillis).nextInt(rLen) ) (bootstrap.mean, bootstrap.error) } it should s"$name over a input with the distribution a*r + b $NumReplicates1 replicates" in { import Math._ show(s"$name over a input with the distribution a*r + b $NumReplicates1 replicates") val (meanNormal, errorNormal) = bootstrapEvaluation( new NormalDistribution, new scala.util.Random, (5.0, 2.5), NumReplicates1 ) val expectedMean = 0.185 show(s"$name meanNormal $meanNormal error $errorNormal") abs(expectedMean - meanNormal) < 0.05 should be (true) abs(errorNormal) < 0.05 should be (true) } it should s"$name over a input with the distribution a*r + b $NumReplicates2 replicates" in { import Math._ show("$name over a input with the distribution a*r + b $NumReplicates2 replicates") val (meanNormal, errorNormal) = bootstrapEvaluation( new NormalDistribution, new scala.util.Random, (5.0, 2.5), NumReplicates2 ) val expectedMean = 0.185 show(s"$name meanNormal $meanNormal error $errorNormal") abs(expectedMean - meanNormal) < 0.05 should be (true) abs(errorNormal) < 0.05 should be (true) } } // ----------------------------------- EOF -------------------------------------------
Example 186
Source File: ParallelismTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.scalability.scala import org.scalaml.Logging import org.scalatest.{FlatSpec, Matchers} final class ParallelismTest extends FlatSpec with Matchers with Logging { import scala.collection.mutable.HashMap import scala.collection.parallel.mutable.{ParArray, ParHashMap} import scala.util.Random protected[this] val name: String = "Scala parallel collections" final private val SZ = 100000 final private val NUM_TASKS = 8 final private val evalRange = Range(1, NUM_TASKS) final private val TIMES = 20 // Arbitrary map function final val mapF = (x: Double) => Math.sin(x * 0.01) + Math.exp(-x) // Arbitrary filter function final val filterF = (x: Double) => x > 0.8 // Arbitrary reduce function final val reduceF = (x: Double, y: Double) => (x + y) * x it should s"$name: arrays" in { show(s"Evaluation of arrays") // Generate random vector for both the non-parallel and parallel array val data = Array.fill(SZ)(Random.nextDouble) val pData = ParArray.fill(SZ)(Random.nextDouble) // Initialized and execute the benchmark for the parallel array val benchmark = new ParallelArray[Double](data, pData, TIMES) val ratios = new Array[Double](NUM_TASKS) evalRange.foreach(n => ratios.update(n, benchmark.map(mapF)(n))) val resultMap = ratios.tail resultMap.sum / resultMap.size < 1.0 should be(true) display(resultMap, "ParArray.map") evalRange.foreach(n => ratios.update(n, benchmark.filter(filterF)(n))) val resultfilter = ratios.tail resultfilter.sum / resultfilter.size < 1.0 should be(true) display(resultfilter, "ParArray.filter") } it should s"$name: maps" in { show("Evaluation of maps") val mapData = new HashMap[Int, Double] Range(0, SZ).foreach(n => mapData.put(n, Random.nextDouble)) val parMapData = new ParHashMap[Int, Double] Range(0, SZ).foreach(n => parMapData.put(n, Random.nextDouble)) // Initialized and execute the benchmark for the parallel map val benchmark = new ParallelMap[Double](mapData.toMap, parMapData, TIMES) val ratios = new Array[Double](NUM_TASKS) evalRange.foreach(n => ratios.update(n, benchmark.map(mapF)(n))) val resultMap = ratios.tail resultMap.sum / resultMap.size < 1.0 should be(true) display(resultMap, "ParMap.map") evalRange.foreach(n => ratios.update(n, benchmark.filter(filterF)(n))) val resultfilter = ratios.tail resultfilter.sum / resultfilter.size < 1.0 should be(true) } private def display(x: Array[Double], label: String): Unit = { import org.scalaml.plots.{Legend, LightPlotTheme, LinePlot} val labels = Legend( name, "Scala parallel collections", s"Scala parallel computation for $label", "Relative timing" ) LinePlot.display(x.toVector, labels, new LightPlotTheme) } } // ------------------------------------------- EOF --------------------------------------------------
Example 187
Source File: StreamsTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.scalability.scala import java.lang.ref._ import org.apache.log4j.Logger import org.scalaml.Logging import org.scalaml.Predef._ import org.scalatest.{FlatSpec, Matchers} import scala.math._ case class DataPoint(x: DblVec, y: Double) final class StreamsTest extends FlatSpec with Matchers with Logging { import scala.util.Random protected[this] val name = "Scala streams" it should s"$name huge list" in { show(s"$name huge list") val input = (0 until 1000000000).toStream input(10) should be(10) } it should s"$name recursion" in { show(s"$name recursion") def mean(strm: => Stream[Double]): Double = { @scala.annotation.tailrec def mean(z: Double, count: Int, strm: Stream[Double]): (Double, Int) = if (strm.isEmpty) (z, count) else mean((1.0 - 1.0 / count) * z + strm.head / count, count + 1, strm.tail) mean(0.0, 1, strm)._1 } val input = List[Double](2.0, 5.0, 3.5, 2.0, 5.7, 1.0, 8.0) val ave: Double = mean(input.toStream) ave should be(3.88 +- 0.05) } it should s"$name with recycled memory blocks" in { show("$name with recycled memory blocks") type DblVec = Vector[Double] val DATASIZE = 20000 val dot = (s: Double, xy: (Double, Double)) => s + xy._1 * xy._2 val diff = (x: DblVec, y: DblVec) => x.zip(y).aggregate(0.0)(dot, _ + _) val weights = Vector[Double](0.5, 0.7) val lossFunction = new LossFunction(diff, weights, DATASIZE) // Create a stream of weak references to 10 stream segments of size DATESIZE/10 val stream = () => new WeakReference( Stream.tabulate(DATASIZE)(n => DataPoint( Vector[Double](n.toDouble, n * (n.toDouble)), n.toDouble * weights(0) + n * (n.toDouble) * weights(1) + 0.1 * Random.nextDouble )) ) // Compute a simple distance using the dot product val totalLoss = sqrt(lossFunction.compute(stream)) show(s"$name totalLoss ${totalLoss / DATASIZE}") val averageLoss = totalLoss / DATASIZE averageLoss should be(0.0 +- 0.001) } } // -------------------------- EOF --------------------------------
Example 188
Source File: TFuturesTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.scalability.akka import akka.actor.{ActorSystem, Props} import akka.pattern.ask import akka.util.Timeout import org.scalaml.Logging import org.scalaml.Predef.DblVec import org.scalaml.filtering.dft.DFT import org.scalaml.scalability.akka.message._ import org.scalaml.util.FormatUtils._ import org.scalatest.{FlatSpec, Matchers} protected[this] val name: String = "Scala futures" private val NUM_WORKERS = 8 private val NUM_DATA_POINTS = 1000000 private val h = (x: Double) => 2.0 * Math.cos(Math.PI * 0.005 * x) + // simulated first harmonic Math.cos(Math.PI * 0.05 * x) + // simulated second harmonic 0.5 * Math.cos(Math.PI * 0.2 * x) + // simulated third harmonic 0.2 * Random.nextDouble private val TimeOut = 5000L private val duration = Duration(TimeOut, "millis") implicit val timeout = new Timeout(duration) it should s"$name Data transformation futures using Akka actors" in { show("$name Data transformation futures using Akka actors") val actorSystem = ActorSystem("System") val xt = Vector.tabulate(NUM_DATA_POINTS)(h(_)) val master = actorSystem.actorOf( Props(new DFTFutures(xt, NUM_WORKERS)), "DFTTransform" ) val future = master ? Start() Thread.sleep(TimeOut) actorSystem.shutdown() } } // ----------------------------------------------- EOF ---------------------------
Example 189
Source File: DKalmanTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.filtering.kalman import org.scalaml.{Logging, Predef, Resource} import org.scalaml.Predef.DblVec import org.scalaml.stats.TSeries.zipWithShift import org.scalaml.trading.YahooFinancials import org.scalaml.trading.YahooFinancials.adjClose import org.scalaml.util.Assertable import org.scalaml.util.FormatUtils.{LONG, format} import org.scalaml.workflow.data.{DataSink, DataSource} import org.scalatest.{FlatSpec, Matchers} import scala.util.{Failure, Random, Success} final class DKalmanTest extends FlatSpec with Matchers with Logging with Assertable with Resource { protected[this] val name: String = "Kalman filter" private val OUTPUT_FILE = "output/filtering/dkalman" private val RESOURCE_DIR = "filtering/" private val NUM_VALUES = 128 // Noise has to be declared implicitly implicit val qrNoise = new QRNoise((0.7, 0.3), (m: Double) => m * Random.nextGaussian) // Contract extractor private val extractor = YahooFinancials.adjClose :: List[Array[String] => Double]() it should s"$name evaluation" in { import Predef._ show(s"$name evaluation") // H and P0 are the only components that are independent from // input data and smoothing factor. The control matrix B is not defined // as there is no external control on the time series. val H: DblMatrix = ((0.9, 0.0), (0.0, 0.1)) val P0: DblMatrix = ((0.4, 0.3), (0.5, 0.4)) private def display(z: DblVec, x: DblVec, alpha: Double): Unit = { import org.scalaml.plots.{LinePlot, LightPlotTheme, Legend} val labels = Legend( name, s"Kalman filter alpha = $alpha", s"Kalman with alpha $alpha", "y" ) val data = (z, "price") :: (x, "Filtered") :: List[(DblVec, String)]() LinePlot.display(data, labels, new LightPlotTheme) } } // -------------------------------- EOF ----------------------------------------------------
Example 190
Source File: Mixer.scala From Learn-Scala-Programming with MIT License | 5 votes |
package ch12 import akka.actor.typed.{ActorRef, Behavior, SupervisorStrategy} import akka.actor.typed.scaladsl.Behaviors import ch12.Bakery.{Groceries, Dough} import ch12.Chef.Collect import scala.concurrent.duration.FiniteDuration import scala.util.Random object Mixer { class MotorOverheatException extends Exception class SlowRotationSpeedException extends Exception class StrongVibrationException extends Exception final case class Mix(groceries: Groceries, sender: ActorRef[Collect]) def mix(mixTime: FiniteDuration): Behavior[Mix] = Behaviors.receive[Mix] { case (ctx, Mix(Groceries(eggs, flour, sugar, chocolate), sender)) => if (Random.nextBoolean()) throw new MotorOverheatException Thread.sleep(mixTime.toMillis) sender ! Collect(Dough(eggs * 50 + flour + sugar + chocolate), ctx.self) Behaviors.stopped } def controlledMix(mixTime: FiniteDuration): Behavior[Mix] = Behaviors .supervise( Behaviors .supervise(Behaviors .supervise(mix(mixTime)) .onFailure[MotorOverheatException](SupervisorStrategy.stop)) .onFailure[SlowRotationSpeedException](SupervisorStrategy.restart)) .onFailure[StrongVibrationException](SupervisorStrategy.resume) }
Example 191
Source File: BakerySpec.scala From Learn-Scala-Programming with MIT License | 5 votes |
package ch11 import akka.actor.{ActorSystem, Props} import akka.testkit.{ImplicitSender, TestKit, TestProbe} import ch11.Cook.RawCookies import ch11.Manager.ShoppingList import ch11.Oven.Cookies import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike} import scala.concurrent.duration._ import scala.language.postfixOps import scala.util.Random class BakerySpec(_system: ActorSystem) extends TestKit(_system) with Matchers with WordSpecLike with BeforeAndAfterAll with ImplicitSender { def this() = this(ActorSystem("BakerySpec")) override def afterAll: Unit = shutdown(system) "The boy should" should { val boyProps = Boy.props(system.actorSelection(testActor.path)) val boy = system.actorOf(boyProps) "forward given ShoppingList to the seller" in { val list = ShoppingList(0, 0, 0, 0) boy ! list within(3 millis, 20 millis) { expectMsg(list) lastSender shouldBe testActor } } "ignore other message types" in { boy ! 'GoHome expectNoMessage(500 millis) } } "The baker should" should { val parent = TestProbe() val baker = parent.childActorOf(Props(classOf[Baker], 0 millis)) "bake cookies in batches" in { val count = Random.nextInt(100) baker ! RawCookies(Oven.size * count) parent.expectMsgAllOf(List.fill(count)(Cookies(Oven.size)):_*) } } }
Example 192
Source File: Main.scala From perf_tester with Apache License 2.0 | 5 votes |
package org.preftester import java.io.File import java.nio.file.{Files, Paths} import com.typesafe.config.{ConfigFactory, ConfigObject, ConfigParseOptions} import org.perftester.results.renderer.TextRenderer import org.perftester.results.{ResultReader, RunResult} import scala.collection.JavaConverters._ import scala.sys.process.Process import scala.util.{Random, Try} object Main extends App { val baseDir = Paths.get(args.headOption.getOrElse(".")) case class Configuration( reference: String, baseScalaVersion: String, buildLocally: Boolean, jvmOptions: String, scalaOptions: String ){ val scalaVersion = if(buildLocally) s"$baseScalaVersion-$reference-SNAPSHOT" else reference } val config = ConfigFactory.parseFile( baseDir.resolve("benchmark.conf").toFile, ConfigParseOptions.defaults().setAllowMissing(false) ) val benchmarks = config.getObject("benchmarks").asScala.map { case (name, obj: ConfigObject) => def read(name: String, default: String) = Try(obj.toConfig.getString(name)).getOrElse(default) name -> Configuration( reference = read("reference", name), baseScalaVersion = read("baseScalaVersion", "2.12.4"), buildLocally = read("buildLocally", "false").toBoolean, jvmOptions = read("jvmOptions", ""), scalaOptions = read("scalaOptions", "") ) }.toSeq val iterations = config.getInt("iterations") val N = config.getInt("N") val M = config.getInt("M") val results = (1 to iterations).foldLeft(Map.empty[String, Vector[RunResult]]){ case (all, i) => Random.shuffle(benchmarks).foldLeft(all){ case (all, (name, benchmark)) => val location = baseDir.resolve(benchmark.scalaVersion) val cmd = Seq(s"./run.sh", ".", N, M, benchmark.scalaOptions).map(_.toString) println(s"## Run $i for $name") val env = if(benchmark.jvmOptions.isEmpty) Nil else Seq("_JAVA_OPTIONS" -> benchmark.jvmOptions) val output = Process(cmd, location.toFile, env:_*).!! println(output) val resultsDir = location.resolve("output").resolve("profile.txt") if (Files.exists(resultsDir)){ val result = ResultReader.readResults(name, resultsDir, N) val previous = all.getOrElse(name, Vector.empty) all + (name -> (previous :+ result)) } else all } } results.foreach{ case (name, results) => println(s"########## Result for $name ##########") TextRenderer.outputTextResults(iterations, results) } }
Example 193
Source File: TikaParquetParser.scala From project-matt with MIT License | 5 votes |
package org.datafy.aws.app.matt.extras import java.io.{File, FileOutputStream, IOException, InputStream} import java.util import scala.collection.JavaConverters._ import org.xml.sax.{ContentHandler, SAXException} import org.apache.tika.metadata.Metadata import org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE import org.apache.tika.mime.MediaType import org.apache.tika.parser.{AbstractParser, ParseContext} import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.parquet.hadoop.ParquetFileReader import org.apache.parquet.hadoop.ParquetReader import org.apache.parquet.format.converter.ParquetMetadataConverter import org.apache.parquet.hadoop.util.HadoopInputFile import org.apache.parquet.tools.json.JsonRecordFormatter import org.apache.parquet.tools.read.{SimpleReadSupport, SimpleRecord} import org.apache.tika.exception.TikaException import org.apache.tika.sax.XHTMLContentHandler import scala.util.Random class TikaParquetParser extends AbstractParser { // make some stuff here final val PARQUET_RAW = MediaType.application("x-parquet") private val SUPPORTED_TYPES: Set[MediaType] = Set(PARQUET_RAW) def getSupportedTypes(context: ParseContext): util.Set[MediaType] = { SUPPORTED_TYPES.asJava } @throws(classOf[IOException]) @throws(classOf[SAXException]) @throws(classOf[TikaException]) def parse(stream: InputStream, handler: ContentHandler, metadata: Metadata, context: ParseContext): Unit = { // create temp file from stream val fileNamePrefix = Random.alphanumeric.take(5).mkString val tempFile = File.createTempFile(s"parquet-${fileNamePrefix}", ".parquet") IOUtils.copy(stream, new FileOutputStream(tempFile)) val conf = new Configuration() val path = new Path(tempFile.getAbsolutePath) val parquetMetadata = ParquetFileReader.readFooter(conf, path, ParquetMetadataConverter.NO_FILTER) var defaultReader: ParquetReader[SimpleRecord] = null val columns = parquetMetadata.getFileMetaData.getSchema.getFields metadata.set(CONTENT_TYPE, PARQUET_RAW.toString) metadata.set("Total Number of Columns", columns.size.toString) metadata.set("Parquet Column Names", columns.toString) val xhtml = new XHTMLContentHandler(handler, metadata) xhtml.startDocument() xhtml.startElement("p") // ::TODO:: ensure parquet reader reads all files not only file row try { defaultReader = ParquetReader.builder(new SimpleReadSupport(), new Path(tempFile.getAbsolutePath)).build() if(defaultReader.read() != null) { val values: SimpleRecord = defaultReader.read() val jsonFormatter = JsonRecordFormatter.fromSchema(parquetMetadata.getFileMetaData.getSchema) val textContent: String = jsonFormatter.formatRecord(values) xhtml.characters(textContent) xhtml.endElement("p") xhtml.endDocument() } } catch { case e: Throwable => e.printStackTrace() if (defaultReader != null) { try { defaultReader.close() } catch{ case _: Throwable => } } } finally { if (tempFile != null) tempFile.delete() } } }
Example 194
Source File: TikaHadoopOrcParser.scala From project-matt with MIT License | 5 votes |
package org.datafy.aws.app.matt.extras import java.io.{File, FileOutputStream, IOException, InputStream} import java.util import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import scala.collection.JavaConverters._ import org.apache.hadoop.fs.Path import org.apache.hadoop.hive.serde2.objectinspector.StructField import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector import org.apache.orc.OrcFile import org.apache.orc.OrcFile.ReaderOptions import org.apache.orc.Reader import org.apache.orc.RecordReader import org.apache.tika.exception.TikaException import org.apache.tika.metadata.Metadata import org.apache.tika.mime.MediaType import org.apache.tika.parser.{AbstractParser, ParseContext} import org.xml.sax.{ContentHandler, SAXException} import scala.util.Random class TikaHadoopOrcParser extends AbstractParser { final val ORC_RAW = MediaType.application("x-orc") private val SUPPORTED_TYPES: Set[MediaType] = Set(ORC_RAW) def getSupportedTypes(context: ParseContext): util.Set[MediaType] = { SUPPORTED_TYPES.asJava } @throws(classOf[IOException]) @throws(classOf[SAXException]) @throws(classOf[TikaException]) def parse(stream: InputStream, handler: ContentHandler, metadata: Metadata, context: ParseContext): Unit = { // create temp file from stream try { val fileNamePrefix = Random.alphanumeric.take(5).mkString val tempFile = File.createTempFile(s"orc-${fileNamePrefix}", ".orc") IOUtils.copy(stream, new FileOutputStream(tempFile)) val path = new Path(tempFile.getAbsolutePath) val conf = new Configuration() val orcReader = OrcFile.createReader(path, new ReaderOptions(conf)) val records: RecordReader = orcReader.rows() val storeRecord = null val firstBlockKey = null } catch { case e: Throwable => e.printStackTrace() } // val fields = } }
Example 195
Source File: Api.scala From endpoints4s with MIT License | 5 votes |
package sample import endpoints4s.play.server._ import scala.concurrent.Future import scala.util.Random class Api(val playComponents: PlayComponents) extends ApiAlg with AssetsAlg with Endpoints with JsonEntitiesFromCodecs with Assets with BasicAuthentication { val routes = routesFromEndpoints( index.implementedBy { case (name, age, _) => User(name, age) }, action.implementedBy(param => ActionResult(index.call(("Julien", 30, "a&b+c")).url)), actionFut.implementedByAsync(param => Future.successful(ActionResult(index.call(("Julien", 30, "future")).url)) ), assets.implementedBy(assetsResources()), maybe.implementedBy(_ => if (util.Random.nextBoolean()) Some(()) else None), auth.implementedBy { credentials => println(s"Authenticated request: ${credentials.username}") if (Random.nextBoolean()) Some(()) else None // Randomly return a forbidden } ) }
Example 196
Source File: Api.scala From endpoints4s with MIT License | 5 votes |
package sample import endpoints4s.akkahttp.server._ import scala.concurrent.Future import scala.util.Random object Api extends ApiAlg with Endpoints with JsonEntitiesFromCodecs with BasicAuthentication { import akka.http.scaladsl.server.Directives._ val routes = index.implementedBy { case (name, age, _) => User(name, age) } ~ action.implementedBy { param => ActionResult("Action") } ~ actionFut .implementedByAsync { param => Future.successful(ActionResult("Future Action")) } ~ maybe.implementedBy { _ => if (util.Random.nextBoolean()) Some(()) else None } ~ auth.implementedBy { credentials => println(s"Authenticated request: ${credentials.username}") if (Random.nextBoolean()) Some(()) else None // Randomly return a forbidden } }
Example 197
Source File: Api.scala From endpoints4s with MIT License | 5 votes |
package sample import cats.effect.IO import endpoints4s.http4s.server.{BasicAuthentication, Endpoints, JsonEntitiesFromCodecs} import org.http4s.HttpRoutes import scala.util.Random object Api extends Endpoints[IO] with JsonEntitiesFromCodecs with BasicAuthentication with ApiAlg { val router: HttpRoutes[IO] = HttpRoutes.of( routesFromEndpoints( index.implementedBy { case (name, age, _) => User(name, age) }, maybe.implementedBy(_ => if (util.Random.nextBoolean()) Some(()) else None) orElse action.implementedBy { _ => ActionResult("Action") }, actionFut.implementedByEffect { _ => IO.pure(ActionResult("Action")) }, auth.implementedBy { credentials => println(s"Authenticated request: ${credentials.username}") if (Random.nextBoolean()) Some(()) else None // Randomly return a forbidden } ) ) }
Example 198
Source File: LogkafkaAdminUtils.scala From CMAK with Apache License 2.0 | 5 votes |
package kafka.manager.utils import java.util.Properties import grizzled.slf4j.Logging import kafka.manager.model.{Kafka_0_8_2_0, KafkaVersion, ActorModel} import org.apache.curator.framework.CuratorFramework import scala.collection.mutable import scala.util.Random class LogkafkaAdminUtils(version: KafkaVersion) extends Logging { val rand = new Random def isDeleteSupported : Boolean = { version match { case Kafka_0_8_2_0 => true case _ => false } } def deleteLogkafka(curator: CuratorFramework, logkafka_id: String, log_path: String, logkafkaConfigOption: Option[ActorModel.LogkafkaConfig]): Unit = { logkafkaConfigOption.map { lcg => lcg.config.map { c => val configMap =kafka.manager.utils.Logkafka.parseJsonStr(logkafka_id, c) if (!configMap.isEmpty || !(configMap - log_path).isEmpty ) { writeLogkafkaConfig(curator, logkafka_id, configMap - log_path, -1) } } getOrElse { LogkafkaErrors.LogkafkaIdNotExists(logkafka_id) } } getOrElse { LogkafkaErrors.LogkafkaIdNotExists(logkafka_id) } } def createLogkafka(curator: CuratorFramework, logkafka_id: String, log_path: String, config: Properties = new Properties, logkafkaConfigOption: Option[ActorModel.LogkafkaConfig] = None ): Unit = { createOrUpdateLogkafkaConfigPathInZK(curator, logkafka_id, log_path, config, logkafkaConfigOption) } def createOrUpdateLogkafkaConfigPathInZK(curator: CuratorFramework, logkafka_id: String, log_path: String, config: Properties = new Properties, logkafkaConfigOption: Option[ActorModel.LogkafkaConfig], update: Boolean = false, readVersion: Int = -1, checkConfig: Boolean = true ) { // validate arguments Logkafka.validateLogkafkaId(logkafka_id) Logkafka.validatePath(log_path) if (checkConfig) { LogkafkaNewConfigs.validate(version, config) } val configMap: mutable.Map[String, String] = { import scala.collection.JavaConverters._ config.asScala } val newConfigMap = Map(log_path -> Map(configMap.toSeq:_*)) val logkafkaConfigMap = logkafkaConfigOption.map { lcg => lcg.config.map { c => kafka.manager.utils.Logkafka.parseJsonStr(logkafka_id, c) } getOrElse { Map.empty } } getOrElse { Map.empty } if(!update ) { // write out the config on create, not update, if there is any writeLogkafkaConfig(curator, logkafka_id, logkafkaConfigMap ++ newConfigMap, readVersion) } else { val merged = logkafkaConfigMap.toSeq ++ newConfigMap.toSeq val grouped = merged.groupBy(_._1) val cleaned = grouped.mapValues(_.map(_._2).fold(Map.empty)(_ ++ _)) writeLogkafkaConfig(curator, logkafka_id, cleaned, readVersion) } } private def writeLogkafkaConfig(curator: CuratorFramework, logkafka_id: String, configMap: Map[String, Map[String, String]], readVersion: Int = -1) { ZkUtils.updatePersistentPath(curator, LogkafkaZkUtils.getLogkafkaConfigPath(logkafka_id), toJson(configMap), readVersion) } }
Example 199
Source File: EmbeddedKafkaCustomConfigSpec.scala From embedded-kafka with MIT License | 5 votes |
package net.manub.embeddedkafka import kafka.server.KafkaConfig import net.manub.embeddedkafka.EmbeddedKafka._ import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.clients.producer.ProducerConfig import scala.language.postfixOps import scala.util.Random class EmbeddedKafkaCustomConfigSpec extends EmbeddedKafkaSpecSupport { final val TwoMegabytes = 2097152 final val ThreeMegabytes = 3145728 "the custom config" should { "allow pass additional producer parameters" in { val customBrokerConfig = Map( KafkaConfig.ReplicaFetchMaxBytesProp -> s"$ThreeMegabytes", KafkaConfig.MessageMaxBytesProp -> s"$ThreeMegabytes" ) val customProducerConfig = Map(ProducerConfig.MAX_REQUEST_SIZE_CONFIG -> s"$ThreeMegabytes") val customConsumerConfig = Map( ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG -> s"$ThreeMegabytes" ) implicit val customKafkaConfig: EmbeddedKafkaConfig = EmbeddedKafkaConfig( customBrokerProperties = customBrokerConfig, customProducerProperties = customProducerConfig, customConsumerProperties = customConsumerConfig ) val bigMessage = generateMessageOfLength(TwoMegabytes) val topic = "big-message-topic" withRunningKafka { publishStringMessageToKafka(topic, bigMessage) consumeFirstStringMessageFrom(topic) shouldBe bigMessage } } } def generateMessageOfLength(length: Int): String = Iterator.continually(Random.nextPrintableChar) take length mkString }
Example 200
Source File: package.scala From wix-http-testkit with MIT License | 5 votes |
package com.wix.test import scala.util.Random package object random { def randomStrOpt: Option[String] = Some( randomStr ) def randomStr: String = randomStrWith(length = 20) def randomStrWith(length: Int): String = Random.alphanumeric .take(length).mkString def randomStrPair = randomStr -> randomStr def randomInt: Int = Random.nextInt() def randomBytes(length:Int): Array[Byte] = { val result = Array.ofDim[Byte](length) Random.nextBytes(result) result } def randomInt(from: Int, to: Int): Int = { require(math.abs(to.toDouble - from.toDouble) <= Int.MaxValue.toDouble, s"Range can't exceed ${Int.MaxValue}") from + Random.nextInt(math.max(to - from, 1)) } def randomPort = randomInt(0, 65535) def randomPath = "/" + Seq.fill(5)(randomStr).mkString("/") def randomParameter = randomStr -> randomStr def randomHeader = randomStr -> randomStr }