scala.util.Random Scala Example

Source File: MockHttpServer.scala From cornichon with Apache License 2.0

6 votes

package com.github.agourlay.cornichon.http.server

import java.net.NetworkInterface

import com.github.agourlay.cornichon.core.CornichonError
import monix.eval.Task
import monix.execution.Scheduler
import org.http4s.HttpRoutes
import org.http4s.server.Router
import org.http4s.server.blaze.BlazeServerBuilder
import org.http4s.implicits._

import scala.jdk.CollectionConverters._
import scala.concurrent.duration._
import scala.util.Random

class MockHttpServer[A](interface: Option[String], port: Option[Range], mockService: HttpRoutes[Task], maxRetries: Int = 5)(useFromAddress: String => Task[A])(implicit scheduler: Scheduler) {

  private val selectedInterface = interface.getOrElse(bestInterface())
  private val randomPortOrder = port.fold(0 :: Nil)(r => Random.shuffle(r.toList))

  private val mockRouter = Router("/" -> mockService).orNotFound

  def useServer(): Task[A] =
    if (randomPortOrder.isEmpty)
      Task.raiseError(MockHttpServerError.toException)
    else
      startServerTryPorts(randomPortOrder)

  private def startServerTryPorts(ports: List[Int], retry: Int = 0): Task[A] =
    startBlazeServer(ports.head).onErrorHandleWith {
      case _: java.net.BindException if ports.length > 1 =>
        startServerTryPorts(ports.tail, retry)
      case _: java.net.BindException if retry < maxRetries =>
        val sleepFor = retry + 1
        println(s"Could not start server on any port. Retrying in $sleepFor seconds...")
        startServerTryPorts(randomPortOrder, retry = retry + 1).delayExecution(sleepFor.seconds)
    }

  private def startBlazeServer(port: Int): Task[A] =
    BlazeServerBuilder[Task](executionContext = scheduler)
      .bindHttp(port, selectedInterface)
      .withoutBanner
      .withHttpApp(mockRouter)
      .withNio2(true)
      .resource
      .use(server => useFromAddress(s"http://${server.address.getHostString}:${server.address.getPort}"))

  private def bestInterface(): String =
    NetworkInterface.getNetworkInterfaces.asScala
      .filter(_.isUp)
      .flatMap(_.getInetAddresses.asScala)
      .find(_.isSiteLocalAddress)
      .map(_.getHostAddress)
      .getOrElse("localhost")
}

case object MockHttpServerError extends CornichonError {
  val baseErrorMessage = "the range of ports provided for the HTTP mock is invalid"
}

Source File: SamplingUtilsSuite.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.util.random

import scala.util.Random

import org.apache.commons.math3.distribution.{BinomialDistribution, PoissonDistribution}

import org.apache.spark.SparkFunSuite

class SamplingUtilsSuite extends SparkFunSuite {

  test("reservoirSampleAndCount") {
    val input = Seq.fill(100)(Random.nextInt())

    // input size < k
    val (sample1, count1) = SamplingUtils.reservoirSampleAndCount(input.iterator, 150)
    assert(count1 === 100)
    assert(input === sample1.toSeq)

    // input size == k
    val (sample2, count2) = SamplingUtils.reservoirSampleAndCount(input.iterator, 100)
    assert(count2 === 100)
    assert(input === sample2.toSeq)

    // input size > k
    val (sample3, count3) = SamplingUtils.reservoirSampleAndCount(input.iterator, 10)
    assert(count3 === 100)
    assert(sample3.length === 10)
  }
  //计算分数
  test("computeFraction") {
    // test that the computed fraction guarantees enough data points
    // in the sample with a failure rate <= 0.0001
    //测试计算的分数保证样品中足够的数据点，故障率<= 0.0001
    val n = 100000

    for (s <- 1 to 15) {
      val frac = SamplingUtils.computeFractionForSampleSize(s, n, true)
      val poisson = new PoissonDistribution(frac * n)
      assert(poisson.inverseCumulativeProbability(0.0001) >= s, "Computed fraction is too low")
    }
    for (s <- List(20, 100, 1000)) {
      val frac = SamplingUtils.computeFractionForSampleSize(s, n, true)
      val poisson = new PoissonDistribution(frac * n)
      assert(poisson.inverseCumulativeProbability(0.0001) >= s, "Computed fraction is too low")
    }
    for (s <- List(1, 10, 100, 1000)) {
      val frac = SamplingUtils.computeFractionForSampleSize(s, n, false)
      val binomial = new BinomialDistribution(n, frac)
      assert(binomial.inverseCumulativeProbability(0.0001)*n >= s, "Computed fraction is too low")
    }
  }
}

Source File: VectorSuite.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.util

import scala.util.Random

import org.apache.spark.SparkFunSuite


@deprecated("suppress compile time deprecation warning", "1.0.0")
class VectorSuite extends SparkFunSuite {

  def verifyVector(vector: Vector, expectedLength: Int): Unit = {
    assert(vector.length == expectedLength)
    assert(vector.elements.min > 0.0)
    assert(vector.elements.max < 1.0)
  }

  test("random with default random number generator") {//默认随机数产生器
    val vector100 = Vector.random(100)
    verifyVector(vector100, 100)
  }

  test("random with given random number generator") {//随机给定值随机产生器
    val vector100 = Vector.random(100, new Random(100))
    verifyVector(vector100, 100)
  }
}

Source File: ByteArrayChunkOutputStreamSuite.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.util.io

import scala.util.Random

import org.apache.spark.SparkFunSuite


class ByteArrayChunkOutputStreamSuite extends SparkFunSuite {

  test("empty output") {//空的输出
    val o = new ByteArrayChunkOutputStream(1024)
    assert(o.toArrays.length === 0)
  }

  test("write a single byte") {//写单字节
    val o = new ByteArrayChunkOutputStream(1024)
    o.write(10)
    assert(o.toArrays.length === 1)
    assert(o.toArrays.head.toSeq === Seq(10.toByte))
  }

  test("write a single near boundary") {//写一个单近边界
    val o = new ByteArrayChunkOutputStream(10)
    o.write(new Array[Byte](9))
    o.write(99)
    assert(o.toArrays.length === 1)
    assert(o.toArrays.head(9) === 99.toByte)
  }

  test("write a single at boundary") {//写一个单一的边界
    val o = new ByteArrayChunkOutputStream(10)
    o.write(new Array[Byte](10))
    o.write(99)
    assert(o.toArrays.length === 2)
    assert(o.toArrays(1).length === 1)
    assert(o.toArrays(1)(0) === 99.toByte)
  }

  test("single chunk output") {//单块输出
    val ref = new Array[Byte](8)
    Random.nextBytes(ref)
    val o = new ByteArrayChunkOutputStream(10)
    o.write(ref)
    val arrays = o.toArrays
    assert(arrays.length === 1)
    assert(arrays.head.length === ref.length)
    assert(arrays.head.toSeq === ref.toSeq)
  }

  test("single chunk output at boundary size") {//边界大小的单块输出
    val ref = new Array[Byte](10)
    Random.nextBytes(ref)
    val o = new ByteArrayChunkOutputStream(10)
    o.write(ref)
    val arrays = o.toArrays
    assert(arrays.length === 1)
    assert(arrays.head.length === ref.length)
    assert(arrays.head.toSeq === ref.toSeq)
  }

  test("multiple chunk output") {//多块输出
    val ref = new Array[Byte](26)
    Random.nextBytes(ref)
    val o = new ByteArrayChunkOutputStream(10)
    o.write(ref)
    val arrays = o.toArrays
    assert(arrays.length === 3)
    assert(arrays(0).length === 10)
    assert(arrays(1).length === 10)
    assert(arrays(2).length === 6)

    assert(arrays(0).toSeq === ref.slice(0, 10))
    assert(arrays(1).toSeq === ref.slice(10, 20))
    assert(arrays(2).toSeq === ref.slice(20, 26))
  }

  test("multiple chunk output at boundary size") {//边界大小的多块输出
    val ref = new Array[Byte](30)
    Random.nextBytes(ref)
    val o = new ByteArrayChunkOutputStream(10)
    o.write(ref)
    val arrays = o.toArrays
    assert(arrays.length === 3)
    assert(arrays(0).length === 10)
    assert(arrays(1).length === 10)
    assert(arrays(2).length === 10)

    assert(arrays(0).toSeq === ref.slice(0, 10))
    assert(arrays(1).toSeq === ref.slice(10, 20))
    assert(arrays(2).toSeq === ref.slice(20, 30))
  }
}

Source File: Employee.scala From hazelcast-scala with Apache License 2.0

5 votes

package joe.schmoe

import java.util.UUID
import scala.util.Random

case class Employee(id: UUID, name: String, salary: Int, age: Int, active: Boolean)

object Employee {
  def random: Employee = {
    val name = randomString(20)
    val salary = Random.nextInt(480000) + 20001
    val age = Random.nextInt(60) + 20
    val active = Random.nextInt(20) == 0
    new Employee(UUID.randomUUID, name, salary, age, active)
  }
}

Source File: ClusterSetup.scala From hazelcast-scala with Apache License 2.0

5 votes

package joe.schmoe

import java.util.UUID

import com.hazelcast.Scala._
import com.hazelcast.Scala.client._
import com.hazelcast.client.config.ClientConfig
import com.hazelcast.config.Config
import com.hazelcast.core.HazelcastInstance
import com.hazelcast.instance.HazelcastInstanceFactory

import scala.concurrent.ExecutionContext
import scala.concurrent.duration._
import scala.util.Random

trait ClusterSetup {
  def randName: String = randomString(50)

  implicit def ec = ExecutionContext.global

  private[this] var _hzs: Vector[HazelcastInstance] = _
  implicit def hzs = _hzs
  private[this] var _client: HazelcastInstance = _
  def client: HazelcastInstance = _client
  def member: HazelcastInstance = hzs(0)

  def clusterSize = 3

  final val port = 49152 + Random.nextInt(9999)

  final val memberConfig = new Config
  final val clientConfig = new ClientConfig

  def init(): Unit
  def destroy(): Unit

  def beforeClass(): Unit = {
    init()
    val group = UUID.randomUUID.toString
    val passw = UUID.randomUUID.toString
    memberConfig.getNetworkConfig.getJoin.getMulticastConfig.setEnabled(false)
    memberConfig.getNetworkConfig.getJoin.getTcpIpConfig.setEnabled(true).addMember(s"127.0.0.1:$port")
    memberConfig.getGroupConfig.setName(group).setPassword(passw)
    memberConfig.setGracefulShutdownMaxWait(1.second)
    memberConfig.setPhoneHomeEnabled(false)
    memberConfig.getMapConfig("default")
      .setStatisticsEnabled(false)
      .setMaxSizeConfig(UsedHeapSize(60.gigabytes))
    memberConfig.setShutdownHookEnabled(false)
    _hzs = (0 until clusterSize).map { i =>
      memberConfig.getNetworkConfig.setPort(port + i)
      memberConfig.newInstance
    }.toVector
    clientConfig.getGroupConfig.setName(group).setPassword(passw)
    (0 until clusterSize).foldLeft(clientConfig.getNetworkConfig) {
      case (netConf, i) => netConf.addAddress(s"127.0.0.1:${port+i}")
    }
    clientConfig.getNetworkConfig.setConnectionAttemptLimit(100)
    _client = clientConfig.newClient()
  }

  def afterClass(): Unit = {
    destroy()
    _client.shutdown()
    HazelcastInstanceFactory.terminateAll()
  }

  def timed[T](warmups: Int = 0, unit: TimeUnit = MILLISECONDS)(thunk: => T): (T, Long) = {
    (0 until warmups).foreach(_ => thunk)
    val start = System.nanoTime
    thunk -> unit.convert(System.nanoTime - start, NANOSECONDS)
  }
}

Source File: package.scala From hazelcast-scala with Apache License 2.0

5 votes

package joe

import scala.util.Random
import concurrent._
import concurrent.duration._

package object schmoe {
  def randomString(maxLen: Int = 10): String = {
    val minLen = 3
    val len = Random.nextInt(maxLen - minLen) + minLen
    val chars = new Array[Char](len)
    for (i <- 0 until len) {
      chars(i) = Random.nextPrintableChar()
    }
    new String(chars)
  }

  implicit class TestFuture[T](private val f: Future[T]) extends AnyVal {
    def await: T = this.await()
    def await(dur: FiniteDuration = 30.seconds): T = Await.result(f, dur)
  }

}

Source File: Main.scala From akka-viz with MIT License

5 votes

import akka.actor.{Actor, ActorSystem, Props}
import ask.AskDemo
import fsm.DiningHakkersOnFsm
import postoffice.PostOffice
import restartDemo.RestartDemo
import roulette.RussianRoulette
import spray.SprayDemo
import tree.TreeDemo

import scala.util.Random

object Main extends App {
  DiningHakkersOnFsm.run(ActorSystem("fsm"))
  PostOffice.run(ActorSystem("postoffice"))
  SprayDemo.run(ActorSystem("spray"))
  TreeDemo.run(ActorSystem("tree"))
  new RussianRoulette(5).run(ActorSystem("russianroulette"))
  AskDemo.run(ActorSystem("ask"))
  RestartDemo.run(ActorSystem("restartdemo"))

  val system = ActorSystem("smalldemos")

  val lazyActorProps = Props(new Actor {
    var counter = 0

    override def receive: Receive = {
      case msg =>
        Thread.sleep(Random.nextInt(2000))
        counter += 1
        sender() ! msg
    }
  })

  val lazyActor1 = system.actorOf(lazyActorProps, "lazy1")
  val lazyActor2 = system.actorOf(lazyActorProps, "lazy2")
  for (i <- 0 to 1000) {
    lazyActor1.tell("doit", lazyActor2)
  }

}

Source File: actors.scala From akka-viz with MIT License

5 votes

package postoffice

import java.time.LocalDateTime

import akka.actor._

import scala.util.Random

class PostOfficeActor(val postOffice: PostOffice) extends Actor with ActorLogging {
  import PostOffice._

  val myClient = context.actorOf(Props(classOf[PostOfficeClientActor]), "client")
  myClient ! postOffice.city

  override def receive: Receive = {
    case p @ Parcel(src, dest, weight) if src == postOffice.city =>
      Thread.sleep(randomDelay)
      if (weight > WeightLimit)
        sender() ! Rejected(LocalDateTime.now(), p)
      else {
        sender() ! Pickup(LocalDateTime.now(), p)
        nextOffice(route(src -> dest)) ! p
      }

    case p @ Parcel(src, dest, _) if dest == postOffice.city =>

      myClient ! Delivery(LocalDateTime.now(), p)

    case p @ Parcel(_, dest, _) =>

      Thread.sleep(randomDelay)
      if (!lostPackage) nextOffice(route(postOffice.city -> dest)) ! p
  }

  def nextOffice(route: List[City]): ActorSelection = {
    val nextCity = route.dropWhile(_ != postOffice.city).drop(1).head

    val selection: ActorSelection = context.system.actorSelection(s"/user/$nextCity")
    selection
  }

  def lostPackage = Random.nextGaussian() < 0.002
}

class PostOfficeClientActor extends Actor with ActorLogging {
  import PostOffice._

  import scala.concurrent.duration._

  var city: Option[City] = None

  override def receive: Actor.Receive = {

    case c: City =>
      city = Some(c)
      sendPackage
      context.become(packageReply)
  }

  def packageReply: Actor.Receive = {
    case Pickup(_, p) =>
      log.debug(s"Sent parcel $p")

    case Rejected(_, p) =>
      log.debug(s"$p rejected, trying again")
      sender() ! p.copy(weight = p.weight - 0.02)

    case d: Delivery =>
      log.debug(s"received $d")
      sendPackage
  }

  def sendPackage = {
    import context.dispatcher
    context.system.scheduler.scheduleOnce(
      randomDelay.milliseconds,
      context.parent, Parcel(city.get, Random.shuffle(Cities.filterNot(_ == city)).head, Random.nextDouble() * (WeightLimit + 0.10))
    )
  }

}

Source File: Player.scala From akka-viz with MIT License

5 votes

package roulette

import akka.actor._

import scala.util.Random

class Player extends Actor with ActorLogging {

  var nextGuy: ActorRef = _

  context.become(playerBehaviour)

  override def receive = {
    case _ => ???
  }

  def playerBehaviour: Receive = {
    case Next(ref) =>
      nextGuy = ref

    case Revolver(0) =>
      Thread.sleep(2000)
      if (sender() != nextGuy) {
        sender() ! Next(nextGuy)
        nextGuy.tell(Revolver(Random.nextInt(6)), sender())
      }
      self ! Kill

    case Revolver(x) =>
      Thread.sleep(2000)
      nextGuy ! Revolver(x - 1)
      nextGuy ! "Unhandled message"
  }
}

Source File: RussianRoulette.scala From akka-viz with MIT License

5 votes

package roulette

import akka.actor.{ActorSystem, Props}

import scala.util.Random

class RussianRoulette(playersNo: Int) {
  def run(system: ActorSystem): Unit = {
    val players = Vector.fill(playersNo)(system.actorOf(Props[Player]))

    for (x <- 0 until playersNo) {
      players(x).tell(Next(players((x + 1) % playersNo)), players(x))
    }

    val firstGuyId = Random.nextInt(playersNo)
    val previousGuyId = (firstGuyId - 1 + playersNo) % playersNo
    val firstGuy = players(firstGuyId)
    val previousGuy = players(previousGuyId)

    firstGuy.tell(Revolver(Random.nextInt(6)), previousGuy)
  }
}

Source File: restartDemo.scala From akka-viz with MIT License

5 votes

package restartDemo

import akka.actor._

import scala.util.Random

object RestartDemo {
  def run(system: ActorSystem): Unit = {
    system.actorOf(Props[DangerZoneParent], "dangerZoneParent") ! DoIt
  }
}

class DangerZoneParent extends Actor {

  override def supervisorStrategy = OneForOneStrategy(){
    case e: Exception => SupervisorStrategy.Restart
  }

  override def receive: Receive = {
    case DoIt => context.actorOf(Props[DangerZoneActor], "dangerZone")
  }
}

class DangerZoneActor extends Actor with ActorLogging {
  import scala.concurrent.duration._
  import context.dispatcher

  var cancellable: Option[Cancellable] = None

  override def preRestart(reason: Throwable, message: Option[Any]): Unit = {
    cancellable.foreach(_.cancel())
    super.preRestart(reason, message)
  }

  override def preStart(): Unit = {
    cancellable = Some(scheduleRideToTheDangerZone)
  }

  override def receive: Receive = {
    case DangerZone =>
      if(Random.nextBoolean()) rideIntoTheDangerZone
  }

  def rideIntoTheDangerZone: Unit = throw new RuntimeException("the danger zone was too dangerous")

  def scheduleRideToTheDangerZone: Cancellable = context.system.scheduler.schedule(10.seconds , 20.seconds, self, DangerZone)
}

case object DangerZone
case object DoIt

Source File: MathSteps.scala From cornichon with Apache License 2.0

5 votes

package com.github.agourlay.cornichon.framework.examples.math

import com.github.agourlay.cornichon.CornichonFeature
import com.github.agourlay.cornichon.core.Step
import com.github.agourlay.cornichon.steps.regular.assertStep._
import com.github.agourlay.cornichon.steps.cats.EffectStep

import scala.util.Random

trait MathSteps {
  this: CornichonFeature =>

  case class adding_values(arg1: String, arg2: String) {
    def equals(res: Int) = AssertStep(
      title = s"value of $arg1 + $arg2 should be $res",
      action = sc => Assertion.either {
        for {
          v1 <- sc.session.get(arg1).map(_.toInt)
          v2 <- sc.session.get(arg2).map(_.toInt)
        } yield GenericEqualityAssertion(res, v1 + v2)
      }
    )
  }

  def generate_random_int(target: String, max: Int = 10): Step =
    EffectStep.fromSyncE(
      title = s"generate random Int into '$target' (max=$max)",
      effect = _.session.addValue(target, Random.nextInt(max).toString)
    )

  def generate_random_double(target: String): Step =
    EffectStep.fromSyncE(
      title = s"generate random Double into '$target'",
      effect = _.session.addValue(target, Random.nextDouble().toString)
    )

  case class double_value(source: String) {
    def isBetween(low: Double, high: Double) =
      AssertStep(
        title = s"double value of '$source' is between '$low' and '$high'",
        action = sc => Assertion.either {
          sc.session.get(source).map(v => BetweenAssertion(low, v.toDouble, high))
        }
      )
  }

  def calculate_point_in_circle(target: String): Step = EffectStep.fromSyncE(
    title = s"calculate points inside circle",
    effect = sc => {
      for {
        x <- sc.session.get("x").map(_.toDouble)
        y <- sc.session.get("y").map(_.toDouble)
        inside = Math.sqrt(x * x + y * y) <= 1
        ns <- sc.session.addValue(target, if (inside) "1" else "0")
      } yield ns
    }
  )

  def estimate_pi_from_ratio(inside: String, target: String): Step =
    EffectStep.fromSyncE(
      title = s"estimate PI from ratio into key '$target'",
      effect = sc => {
        sc.session.getHistory(inside).flatMap { insides =>
          val trial = insides.size
          val estimation = (insides.count(_ == "1").toDouble / trial) * 4
          sc.session.addValue(target, estimation.toString)
        }
      }
    )

  def is_valid_sum: Step = AssertStep(
    title = "sum of 'a' + 'b' = 'c'",
    action = sc => {
      val s = sc.session
      GenericEqualityAssertion(s.getUnsafe("c").toInt, s.getUnsafe("a").toInt + s.getUnsafe("b").toInt)
    }
  )
}

Source File: RandomContext.scala From cornichon with Apache License 2.0

5 votes

package com.github.agourlay.cornichon.core

import java.util.concurrent.atomic.AtomicLong
import scala.util.Random

trait RandomContext {
  val initialSeed: Long
  def nextBoolean(): Boolean
  def nextDouble(): Double
  def nextFloat(): Float
  def nextGaussian(): Double
  def nextInt(): Int
  def nextInt(n: Int): Int
  def nextLong(): Long
  def uniqueLong(): Long
  def nextString(length: Int): String
  def nextPrintableChar(): Char
  def alphanumeric(length: Int): String
  def shuffle[T](xs: Iterable[T]): Iterable[T]
}

// FIXME seededRandom works through internal mutation https://github.com/agourlay/cornichon/issues/303
class MutableRandomContext(seed: Long, seededRandom: Random) extends RandomContext {
  val initialSeed: Long = seed
  def nextBoolean(): Boolean = seededRandom.nextBoolean()
  def nextDouble(): Double = seededRandom.nextDouble()
  def nextFloat(): Float = seededRandom.nextFloat()
  def nextGaussian(): Double = seededRandom.nextGaussian()
  def nextInt(): Int = seededRandom.nextInt()
  def nextInt(n: Int): Int = seededRandom.nextInt(n)
  def nextLong(): Long = seededRandom.nextLong()
  def nextString(length: Int): String = seededRandom.nextString(length)
  def nextPrintableChar(): Char = seededRandom.nextPrintableChar()
  def alphanumeric(length: Int): String = seededRandom.alphanumeric.take(length).mkString("")
  def shuffle[T](xs: Iterable[T]): Iterable[T] = seededRandom.shuffle(xs)
  private val atomicLong = new AtomicLong(1L)
  def uniqueLong(): Long = atomicLong.getAndIncrement()
}

object RandomContext {
  def fromOptSeed(withSeed: Option[Long]): RandomContext = {
    val initialSeed = withSeed.getOrElse(System.currentTimeMillis())
    fromSeed(initialSeed)
  }

  def fromSeed(seed: Long): RandomContext = {
    new MutableRandomContext(seed, new Random(new java.util.Random(seed)))
  }
}

Source File: NetworkRandomization.scala From sparkling-graph with BSD 2-Clause "Simplified" License

5 votes

package ml.sparkling.graph.examples

import ml.sparkling.graph.api.loaders.GraphLoading.LoadGraph
import ml.sparkling.graph.loaders.csv.GraphFromCsv.CSV
import ml.sparkling.graph.loaders.csv.GraphFromCsv.LoaderParameters.{Delimiter, NoHeader, Partitions, Quotation}
import org.apache.spark.graphx._
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

import scala.util.Random


object NetworkRandomization {
  def main(args:Array[String])= {
    val sparkConf = new SparkConf().setAppName("network-randomization").set("spark.app.id", "sparkling-graph-example")
    implicit val ctx = new SparkContext(sparkConf)
    val path=args(0)
    val pathEmd=args(1)
    val out=args(2)
    val loadPartitions=args(3).toInt
    val graphPartitions=args(4).toInt
    val graph:Graph[String,String]=LoadGraph.from(CSV(path))
      .using(NoHeader)
      .using(Delimiter(","))
      .using(Partitions(loadPartitions))
      .using(Quotation("\"")).load[String,String]().partitionBy(PartitionStrategy.EdgePartition2D,graphPartitions)
    val emd=ctx.textFile(pathEmd,loadPartitions).map(_.split(",").map(v=>v.replaceAll("\"",""))).map(r=>(r.head.toLong,r.tail))
    val srcIdsBase: RDD[VertexId] =graph.edges.map(e=>e.srcId)
    val dstIdsBase=graph.edges.map(e=>e.dstId)
    val saltDst = 23456789L ;
    val saltSrc = 123456789L ;

    def randomize(srcIds:RDD[VertexId],dstIds:RDD[VertexId])= {
      val randomizedSrc = srcIds.mapPartitionsWithIndex((id, itr) => {
        val random = new Random(saltSrc + id)
        itr.map(vId => (random.nextLong(), vId))
      }).sortByKey().zipWithIndex().map(t => (t._2, t._1._2))
      val randomizedDst = dstIds.mapPartitionsWithIndex((id, itr) => {
        val random = new Random(saltDst + id)
        itr.map(vId => (random.nextLong(), vId))
      }).sortByKey().zipWithIndex().map(t => (t._2, t._1._2))
      randomizedSrc.join(randomizedDst).map {
        case (index, (src, dst)) => new Edge[Int](src, dst, 1)
      }
    }
    var numOfSame= -1l
    var lastNumOfSame= -2l
    var randomizedEdges=randomize(srcIdsBase,dstIdsBase)
    var withSame=randomizedEdges.filter(t=> t.srcId == t.dstId)
    while((numOfSame!=lastNumOfSame) && (!withSame.isEmpty())){
      val withoutSame=randomizedEdges.filter(t=>t.srcId!=t.dstId)
      val newRandomized=randomize(withSame.map(_.srcId),withSame.map(_.dstId))
      randomizedEdges=withoutSame.union(newRandomized)
      withSame=newRandomized.filter(e=>e.srcId==e.dstId)
      lastNumOfSame=numOfSame
      numOfSame=withSame.count()
    }

    val randomizedGraph= Graph(graph.vertices,randomizedEdges)

    randomizedGraph.outerJoinVertices(emd)((vId,old,newValue)=>newValue.getOrElse(((0 to 4).map(n=>"0").toArray))).triplets.map(
      edge=>{
        (edge.srcId.toString :: edge.dstId.toString :: edge.srcAttr.toList :::  edge.dstAttr.toList).mkString(",")
      }
    ).saveAsTextFile(out)
  }

}

Source File: EigenvectorCentrality$Test.scala From sparkling-graph with BSD 2-Clause "Simplified" License

5 votes

package ml.sparkling.graph.operators.measures.vertex.eigenvector

import ml.sparkling.graph.api.operators.measures.VertexMeasureConfiguration
import ml.sparkling.graph.operators.MeasureTest
import org.apache.spark.SparkContext
import org.apache.spark.graphx.Graph
import ml.sparkling.graph.operators.OperatorsDSL._

import scala.util.Random

class EigenvectorCentrality$Test(implicit sc:SparkContext)   extends MeasureTest  {




  "Eigenvector  for line graph" should "be correctly calculated" in{
    Given("graph")
    val filePath = getClass.getResource("/graphs/5_nodes_directed")
    val graph:Graph[Int,Int]=loadGraph(filePath.toString)
    When("Computes eigenvector")
    val result=EigenvectorCentrality.compute(graph)
    Then("Should calculate eigenvector correctly")
    result.vertices.collect().sortBy{case (vId,data)=>vId}.map{case (vId,data)=>data}.zip(Array(
      0d, 0d, 0d, 0d, 0d
    )).foreach{case (a,b)=>{a should be (b +- 1e-5 )}}
    graph.unpersist(true)
  }

  "Eigenvector  for line graph" should "be correctly calculated using DSL" in{
    Given("graph")
    val filePath = getClass.getResource("/graphs/5_nodes_directed")
    val graph:Graph[Int,Int]=loadGraph(filePath.toString)
    When("Computes eigenvector")
    val result=graph.eigenvectorCentrality()
    Then("Should calculate eigenvector correctly")
    result.vertices.collect().sortBy{case (vId,data)=>vId}.map{case (vId,data)=>data}.zip(Array(
      0d, 0d, 0d, 0d, 0d
    )).foreach{case (a,b)=>{a should be (b +- 1e-5 )}}
    graph.unpersist(true)
  }

  "Eigenvector  for full 4 node directed graph" should "be correctly calculated" in{
    Given("graph")
    val filePath = getClass.getResource("/graphs/4_nodes_full")
    val graph:Graph[Int,Int]=loadGraph(filePath.toString)
    When("Computes eigenvector")
    val result=EigenvectorCentrality.compute(graph)
    Then("Should calculate eigenvector correctly")
    result.vertices.collect().sortBy{case (vId,data)=>vId}.map{case (vId,data)=>data}.zip(Array(
      0.32128186442503776, 0.5515795539542094, 0.6256715148839718, 0.44841176915201825
    )).foreach{case (a,b)=>{a should be (b +- 1e-5 )}}
    graph.unpersist(true)
  }

  "Eigenvector  for full 4 node undirected graph" should "be correctly calculated" in{
    Given("graph")
    val filePath = getClass.getResource("/graphs/4_nodes_full")
    val graph:Graph[Int,Int]=loadGraph(filePath.toString)
    When("Computes eigenvector")
    val result=EigenvectorCentrality.compute(graph,VertexMeasureConfiguration[Int,Int](true))
    Then("Should calculate eigenvector correctly")
    result.vertices.collect().sortBy{case (vId,data)=>vId} should equal (Array(
      (1,0.5), (2,0.5), (3,0.5), (4,0.5)
    ))
    graph.unpersist(true)
  }

  "Eigenvector " should " take edge weight into account" in{
    Given("graph")
    val filePath = getClass.getResource("/graphs/4_nodes_full")
    val graph:Graph[Int,Int]=loadGraph(filePath.toString)
    val graphWeighted=graph.mapEdges(edge=>{
      1.0/(edge.srcId+edge.dstId)
    })
    When("Computes eigenvector")
    val resultUnweighted=EigenvectorCentrality.compute(graph,VertexMeasureConfiguration[Int,Int](true))
    val resultWeighted=EigenvectorCentrality.compute(graphWeighted,VertexMeasureConfiguration[Int,Double](true))
    Then("Should calculate eigenvector correctly")
    resultUnweighted.vertices.collect().sortBy{case (vId,data)=>vId} should not equal (
      resultWeighted.vertices.collect().sortBy{case (vId,data)=>vId})
    graph.unpersist(true)
    resultUnweighted.unpersist(true)
    resultWeighted.unpersist(true)
  }



}

Source File: RandomStateSpec.scala From CSYE7200 with MIT License

5 votes

package edu.neu.coe.csye7200.asstrs

import org.scalatest.{FlatSpec, Matchers}

import scala.language.postfixOps
import scala.util.Random


class RandomStateSpec extends FlatSpec with Matchers {

  private def stdDev(xs: Seq[Double]): Double = math.sqrt(xs.reduceLeft((a, x) => a + x * x)) / xs.length

  private def mean(xs: Seq[Double]) = xs.sum / xs.length

  // XXX Clearly, this doesn't look good. We will soon learn how to write
  // generic methods like sum and mean. But for now, this is what we've got.
  def sumU(xs: Seq[UniformDouble]): Double = xs.foldLeft(0.0)((a, x) => (a + x.x))

  def meanU(xs: Seq[UniformDouble]) = sumU(xs) / xs.length

  "RandomState(0L)" should "match case RandomState(4804307197456638271)" in {
    val r: RandomState[Long] = RandomState(0L)
    r.next should matchPattern { case JavaRandomState(4804307197456638271L,_) => }
  }
  it should "match case RandomState(-1034601897293430941) on next" in {
    val r: RandomState[Long] = RandomState(0L)
    r.next.next should matchPattern { case JavaRandomState(-1034601897293430941L,_) => }
    // why doesn't the following work?
//    r.next.next.asInstanceOf[JavaRandomState[Long]].g shouldBe identity
    // e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
  }
  "7th element of RandomState(0)" should "match case RandomState(5082315122564986995L)" in {
    val lrs = RandomState(0).toStream.slice(6, 7)
    (lrs head) should matchPattern { case 5082315122564986995L => }
  }
  "longToDouble" should "work" in {
    val max = RandomState.longToDouble(Long.MaxValue)
    max shouldBe 1.0 +- 1E-6
    val min = RandomState.longToDouble(Long.MinValue)
    min shouldBe -1.0 +- 1E-6
    val value = RandomState.longToDouble(3487594572834985L)
    value shouldBe 3.7812576126163456E-4 +- 1E-6
  }
  "0..1 stream" should "have mean = 0.5" in {
    val xs = RandomState(0).map(RandomState.longToDouble).map(RandomState.doubleToUniformDouble).toStream take 1001 toList;
    meanU(xs) shouldBe 0.5 +- 5E-3
  }
  "BetterRandomState" should "have mean = 0.5" in {
    val xs = BetterRandomState(0,BetterRandomState.hDouble).toStream take 1001 toList;
    mean(xs) shouldBe 0.5 +- 5E-3
  }
  "map" should "work" in {
    val rLong: RandomState[Long] = RandomState(0)
    val rInt = rLong.map(_.toInt)
    rInt.get shouldBe -723955400
    val next = rInt.next
    next.get shouldBe 406937919
    val next2 = next.next
    next2.get shouldBe 1407270755
  }
  it should "work with map of map" in {
    val rLong: RandomState[Long] = RandomState(0L)
    val rInt = rLong.map(_.toInt)
    val rBoolean = rInt.map(_ % 2 == 0)
    rBoolean.get shouldBe true
  }
  "flatMap" should "work" in {
    val r1 = RandomState(0)
    val r2 = r1.flatMap(RandomState(_))
    r2.get shouldBe 4804307197456638271L
  }

  "for comprehension" should "work" in {
    val r1 = RandomState(0)
    val z: RandomState[Double] = for (x <- r1; _ <- RandomState(x)) yield x.toDouble/Long.MaxValue
    z.get shouldBe -0.5380644352028887 +- 1E-6
  }
}

Source File: Euler.scala From CSYE7200 with MIT License

5 votes

package edu.neu.coe.csye7200.greedy

import scala.Stream._
import scala.language.postfixOps
import scala.util.Random

object Euler extends App {

  def e(n: Int): Double = (from(1) map (1.0 / _)).scanLeft(1.0)(_ * _) take n sum

  println(e(20))
}


object RandomStrings extends App {
  val r = Random
  def randomString(r: Random): String = {
    (for (i <- 0 until 6) yield r.nextPrintableChar()).foldLeft("")(_+_)
  }
  val strings = for (i <- 0 until 5) yield randomString(r)
  println(strings)
}

Source File: RNG.scala From CSYE7200 with MIT License

5 votes

package edu.neu.coe.csye7200.util

import scala.util.Random


class RNG[+A](f: Long => A)(seed: Long) {
  private val random = new Random(seed)
  private lazy val state = random.nextLong

  def next = new RNG(f)(state)

  def value = f(state)
}

object RNG extends App {
  def modulo(n: Int, m: Int) = (n + m) % m

  val random = new Random(0L)
  val r = new RNG(x => modulo(x.toInt, 100))(0L)
  val someInt: Int = 55
  val r1 = r.next
  val r2 = r.next
  // ...
  val rN = r2.next
  val y = rN.value
  assert(y == someInt)
}

Source File: BadRandomSpec.scala From CSYE7200 with MIT License

5 votes

package edu.neu.coe.csye7200

import org.scalatest.{FlatSpec, Matchers}

import scala.util.Random


class BadRandomSpec extends FlatSpec with Matchers {
  private val random = Random
  random.setSeed(0L)
  "random" should "be predictable" in {
    random.nextInt
    random.nextInt shouldBe (-723955400)
  }
  it should "be order-independent" in {
    random.nextInt shouldBe 1033096058
  }
}

Source File: CacheSpec.scala From CSYE7200 with MIT License

5 votes

package edu.neu.coe.csye7200.cache

import java.net.URL

import org.scalatest.concurrent.{Futures, ScalaFutures}
import org.scalatest.{FlatSpec, Matchers}

import scala.concurrent.Future
import scala.concurrent.ExecutionContext.Implicits.global
import scala.util.{Random, Try}

class CacheSpec extends FlatSpec with Matchers with Futures with ScalaFutures {

  behavior of "apply"

  val random = Random

  def lookupStock(k: String): Future[Double] = Future {
    random.setSeed(k.hashCode)
    random.nextInt(1000) / 100.0
  }

  it should "work" in {
    val cache = MyCache[String,Double](lookupStock)
    val xf: Future[Double] = cache("MSFT")
    whenReady(xf) { u => u should matchPattern { case x: Double =>  } }
    xf.value.get.get shouldBe 3.64
  }
}

Source File: SortingSpec.scala From CSYE7200 with MIT License

5 votes

package edu.neu.coe.csye7200.lbsort

import edu.neu.coe.csye7200.util.RandomState
import org.scalatest.concurrent.{Futures, ScalaFutures}
import org.scalatest.{FlatSpec, Matchers}

import scala.util.Random


class SortingSpec extends FlatSpec with Matchers with Futures with ScalaFutures {

  behavior of "Insertion Sort"

  it should "sort List[Int]" in {
    val list = Array(3, 1, 2)
    Sorting.insertionSort(list)
    list shouldBe Array(1, 2, 3)
  }
  it should "sort List[String]" in {
    val list = Array("b", "c", "a")
    Sorting.insertionSort(list)
    list shouldBe Array("a", "b", "c")
  }
  it should "sort List[Double] using create" in {
    val list = Array(3.0, 1.5, 2.4)
    Sorting.insertionSort(list)
    list shouldBe Array(1.5, 2.4, 3.0)
  }

  behavior of "Quick Sort"

  it should "sort List[Long]" in {
    val list = RandomState(0L).stream.take(100).toArray
    Sorting.quickSort(list)
    list.reverse.take(5) shouldBe Array(9054633673849498218L, 8937230293740383692L, 8613213585075034408L, 8543763135442756639L, 8358116205139703580L)
  }
}

Source File: CacheSpec.scala From CSYE7200 with MIT License

5 votes

package edu.neu.coe.csye7200.cache

import java.net.URL

import org.scalatest.concurrent.{Futures, ScalaFutures}
import org.scalatest.{FlatSpec, Matchers}

import scala.concurrent.Future
import scala.concurrent.ExecutionContext.Implicits.global
import scala.util.{Random, Try}

class CacheSpec extends FlatSpec with Matchers with Futures with ScalaFutures {

  behavior of "apply"

  val random = Random

  def lookupStock(k: String): Future[Double] = Future {
    random.setSeed(k.hashCode)
    random.nextInt(1000) / 100.0
  }

  it should "work" in {
    val cache = MyCache[String,Double](lookupStock)
    val xf: Future[Double] = cache("MSFT")
    whenReady(xf) { u => u should matchPattern { case x: Double =>  } }
    xf.value.get.get shouldBe 3.64
  }
}

Source File: NonSampleCompactor.scala From deequ with Apache License 2.0

5 votes

package com.amazon.deequ.analyzers

import scala.collection.mutable.ArrayBuffer
import scala.reflect.ClassTag
import scala.util.Random


    val output = (offset until len by 2).map(sortedBuffer(_)).toArray
    val tail = findOdd(items)
    items = items % 2
    var newBuffer = ArrayBuffer[T]()
    if (tail.isDefined) {
      newBuffer = newBuffer :+ tail.get
    }
    buffer = newBuffer
    numOfCompress = numOfCompress + 1
    output
  }
}

Source File: MnistLoader.scala From SparkNet with MIT License

5 votes

package loaders

import java.io._

import scala.util.Random

import libs._

class MnistLoader(path: String) {
  val height = 28
  val width = 28

  def getImages(filename: String, train: Boolean): Array[Array[Float]] = {
    val stream = new FileInputStream(path + filename)
    val numImages = if (train) 60000 else 10000
    val images = new Array[Array[Float]](numImages)

    val magicNumber = new Array[Byte](4)
    stream.read(magicNumber)
    assert(magicNumber.deep == Array[Byte](0, 0, 8, 3).deep)
    val count = new Array[Byte](4)
    stream.read(count)
    assert(count.deep == (if (train) Array[Byte](0, 0, -22, 96).deep else Array[Byte](0, 0, 39, 16).deep))
    val imHeight = new Array[Byte](4)
    stream.read(imHeight)
    assert(imHeight.deep == Array[Byte](0, 0, 0, 28).deep)
    val imWidth = new Array[Byte](4)
    stream.read(imWidth)
    assert(imWidth.deep == Array[Byte](0, 0, 0, 28).deep)

    var i = 0
    val imageBuffer = new Array[Byte](height * width)
    while (i < numImages) {
      stream.read(imageBuffer)
      images(i) = imageBuffer.map(e => (e.toFloat / 255) - 0.5F)
      i += 1
    }
    images
  }

  def getLabels(filename: String, train: Boolean): Array[Long] = {
    val stream = new FileInputStream(path + filename)
    val numLabels = if (train) 60000 else 10000

    val magicNumber = new Array[Byte](4)
    stream.read(magicNumber)
    assert(magicNumber.deep == Array[Byte](0, 0, 8, 1).deep)
    val count = new Array[Byte](4)
    stream.read(count)
    assert(count.deep == (if (train) Array[Byte](0, 0, -22, 96).deep else Array[Byte](0, 0, 39, 16).deep))

    val labels = new Array[Byte](numLabels)
    stream.read(labels)
    labels.map(e => (e & 0xFF).toLong)
  }

  val trainImages = getImages("train-images-idx3-ubyte", true)
  val trainLabels = getLabels("train-labels-idx1-ubyte", true)
  val testImages = getImages("t10k-images-idx3-ubyte", false)
  val testLabels = getLabels("t10k-labels-idx1-ubyte", false)

}

Source File: CifarLoader.scala From SparkNet with MIT License

5 votes

package loaders

import java.io.File
import java.io.FileInputStream

import scala.util.Random

import libs._


class CifarLoader(path: String) {
  // We hardcode this because these are properties of the CIFAR-10 dataset.
  val height = 32
  val width = 32
  val channels = 3
  val size = channels * height * width
  val batchSize = 10000
  val nBatches = 5
  val nData = nBatches * batchSize

  val trainImages = new Array[Array[Float]](nData)
  val trainLabels = new Array[Int](nData)

  val testImages = new Array[Array[Float]](batchSize)
  val testLabels = new Array[Int](batchSize)

  val r = new Random()
  // val perm = Vector() ++ r.shuffle(1 to (nData - 1) toIterable)
  val indices = Vector() ++ (0 to nData - 1) toIterable
  val trainPerm = Vector() ++ r.shuffle(indices)
  val testPerm = Vector() ++ ((0 to batchSize) toIterable)

  val d = new File(path)
  if (!d.exists) {
    throw new Exception("The path " + path + " does not exist.")
  }
  if (!d.isDirectory) {
    throw new Exception("The path " + path + " is not a directory.")
  }
  val cifar10Files = List("data_batch_1.bin", "data_batch_2.bin", "data_batch_3.bin", "data_batch_4.bin", "data_batch_5.bin", "test_batch.bin")
  for (filename <- cifar10Files) {
    if (!d.list.contains(filename)) {
      throw new Exception("The directory " + path + " does not contain all of the Cifar10 data. Please run `bash $SPARKNET_HOME/data/cifar10/get_cifar10.sh` to obtain the Cifar10 data.")
    }
  }

  val fullFileList = d.listFiles.filter(_.getName().split('.').last == "bin").toList
  val testFile = fullFileList.find(x => x.getName().split('/').last == "test_batch.bin").head
  val fileList = fullFileList diff List(testFile)

  for (i <- 0 to nBatches - 1) {
    readBatch(fileList(i), i, trainImages, trainLabels, trainPerm)
  }
  readBatch(testFile, 0, testImages, testLabels, testPerm)

  val meanImage = new Array[Float](size)

  for (i <- 0 to nData - 1) {
    for (j <- 0 to size - 1) {
      meanImage(j) += trainImages(i)(j).toFloat / nData
    }
  }

  def readBatch(file: File, batch: Int, images: Array[Array[Float]], labels: Array[Int], perm: Vector[Int]) {
    val buffer = new Array[Byte](1 + size)
    val inputStream = new FileInputStream(file)

    var i = 0
    var nRead = inputStream.read(buffer)

    while(nRead != -1) {
      assert(i < batchSize)
      labels(perm(batch * batchSize + i)) = (buffer(0) & 0xFF) // convert to unsigned
      images(perm(batch * batchSize + i)) = new Array[Float](size)
      var j = 0
      while (j < size) {
        // we access buffer(j + 1) because the 0th position holds the label
        images(perm(batch * batchSize + i))(j) = buffer(j + 1) & 0xFF
        j += 1
      }
      nRead = inputStream.read(buffer)
      i += 1
    }
  }
}

Source File: SparkTC.scala From Spark-2.3.1 with Apache License 2.0

5 votes

// scalastyle:off println
package org.apache.spark.examples

import scala.collection.mutable
import scala.util.Random

import org.apache.spark.sql.SparkSession


object SparkTC {
  val numEdges = 200
  val numVertices = 100
  val rand = new Random(42)

  def generateGraph: Seq[(Int, Int)] = {
    val edges: mutable.Set[(Int, Int)] = mutable.Set.empty
    while (edges.size < numEdges) {
      val from = rand.nextInt(numVertices)
      val to = rand.nextInt(numVertices)
      if (from != to) edges.+=((from, to))
    }
    edges.toSeq
  }

  def main(args: Array[String]) {
    val spark = SparkSession
      .builder
      .appName("SparkTC")
      .getOrCreate()
    val slices = if (args.length > 0) args(0).toInt else 2
    var tc = spark.sparkContext.parallelize(generateGraph, slices).cache()

    // Linear transitive closure: each round grows paths by one edge,
    // by joining the graph's edges with the already-discovered paths.
    // e.g. join the path (y, z) from the TC with the edge (x, y) from
    // the graph to obtain the path (x, z).

    // Because join() joins on keys, the edges are stored in reversed order.
    val edges = tc.map(x => (x._2, x._1))

    // This join is iterated until a fixed point is reached.
    var oldCount = 0L
    var nextCount = tc.count()
    do {
      oldCount = nextCount
      // Perform the join, obtaining an RDD of (y, (z, x)) pairs,
      // then project the result to obtain the new (x, z) paths.
      tc = tc.union(tc.join(edges).map(x => (x._2._2, x._2._1))).distinct().cache()
      nextCount = tc.count()
    } while (nextCount != oldCount)

    println(s"TC has ${tc.count()} edges.")
    spark.stop()
  }
}
// scalastyle:on println

Source File: HadoopUtils.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.ml.image

import scala.language.existentials
import scala.util.Random

import org.apache.commons.io.FilenameUtils
import org.apache.hadoop.conf.{Configuration, Configured}
import org.apache.hadoop.fs.{Path, PathFilter}
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat

import org.apache.spark.sql.SparkSession

private object RecursiveFlag {
  
  def withPathFilter[T](
      sampleRatio: Double,
      spark: SparkSession,
      seed: Long)(f: => T): T = {
    val sampleImages = sampleRatio < 1
    if (sampleImages) {
      val flagName = FileInputFormat.PATHFILTER_CLASS
      val hadoopConf = spark.sparkContext.hadoopConfiguration
      val old = Option(hadoopConf.getClass(flagName, null))
      hadoopConf.setDouble(SamplePathFilter.ratioParam, sampleRatio)
      hadoopConf.setLong(SamplePathFilter.seedParam, seed)
      hadoopConf.setClass(flagName, classOf[SamplePathFilter], classOf[PathFilter])
      try f finally {
        hadoopConf.unset(SamplePathFilter.ratioParam)
        hadoopConf.unset(SamplePathFilter.seedParam)
        old match {
          case Some(v) => hadoopConf.setClass(flagName, v, classOf[PathFilter])
          case None => hadoopConf.unset(flagName)
        }
      }
    } else {
      f
    }
  }
}

Source File: KMeansDataGenerator.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.mllib.util

import scala.util.Random

import org.apache.spark.SparkContext
import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.rdd.RDD


  @Since("0.8.0")
  def generateKMeansRDD(
      sc: SparkContext,
      numPoints: Int,
      k: Int,
      d: Int,
      r: Double,
      numPartitions: Int = 2)
    : RDD[Array[Double]] =
  {
    // First, generate some centers
    val rand = new Random(42)
    val centers = Array.fill(k)(Array.fill(d)(rand.nextGaussian() * r))
    // Then generate points around each center
    sc.parallelize(0 until numPoints, numPartitions).map { idx =>
      val center = centers(idx % k)
      val rand2 = new Random(42 + idx)
      Array.tabulate(d)(i => center(i) + rand2.nextGaussian())
    }
  }

  @Since("0.8.0")
  def main(args: Array[String]) {
    if (args.length < 6) {
      // scalastyle:off println
      println("Usage: KMeansGenerator " +
        "<master> <output_dir> <num_points> <k> <d> <r> [<num_partitions>]")
      // scalastyle:on println
      System.exit(1)
    }

    val sparkMaster = args(0)
    val outputPath = args(1)
    val numPoints = args(2).toInt
    val k = args(3).toInt
    val d = args(4).toInt
    val r = args(5).toDouble
    val parts = if (args.length >= 7) args(6).toInt else 2

    val sc = new SparkContext(sparkMaster, "KMeansDataGenerator")
    val data = generateKMeansRDD(sc, numPoints, k, d, r, parts)
    data.map(_.mkString(" ")).saveAsTextFile(outputPath)

    sc.stop()
    System.exit(0)
  }
}

Source File: LogisticRegressionDataGenerator.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.mllib.util

import scala.util.Random

import org.apache.spark.SparkContext
import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD


  @Since("0.8.0")
  def generateLogisticRDD(
    sc: SparkContext,
    nexamples: Int,
    nfeatures: Int,
    eps: Double,
    nparts: Int = 2,
    probOne: Double = 0.5): RDD[LabeledPoint] = {
    val data = sc.parallelize(0 until nexamples, nparts).map { idx =>
      val rnd = new Random(42 + idx)

      val y = if (idx % 2 == 0) 0.0 else 1.0
      val x = Array.fill[Double](nfeatures) {
        rnd.nextGaussian() + (y * eps)
      }
      LabeledPoint(y, Vectors.dense(x))
    }
    data
  }

  @Since("0.8.0")
  def main(args: Array[String]) {
    if (args.length != 5) {
      // scalastyle:off println
      println("Usage: LogisticRegressionGenerator " +
        "<master> <output_dir> <num_examples> <num_features> <num_partitions>")
      // scalastyle:on println
      System.exit(1)
    }

    val sparkMaster: String = args(0)
    val outputPath: String = args(1)
    val nexamples: Int = if (args.length > 2) args(2).toInt else 1000
    val nfeatures: Int = if (args.length > 3) args(3).toInt else 2
    val parts: Int = if (args.length > 4) args(4).toInt else 2
    val eps = 3

    val sc = new SparkContext(sparkMaster, "LogisticRegressionDataGenerator")
    val data = generateLogisticRDD(sc, nexamples, nfeatures, eps, parts)

    data.saveAsTextFile(outputPath)

    sc.stop()
  }
}

Source File: SVMDataGenerator.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.mllib.util

import scala.util.Random

import com.github.fommil.netlib.BLAS.{getInstance => blas}

import org.apache.spark.SparkContext
import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD


@DeveloperApi
@Since("0.8.0")
object SVMDataGenerator {

  @Since("0.8.0")
  def main(args: Array[String]) {
    if (args.length < 2) {
      // scalastyle:off println
      println("Usage: SVMGenerator " +
        "<master> <output_dir> [num_examples] [num_features] [num_partitions]")
      // scalastyle:on println
      System.exit(1)
    }

    val sparkMaster: String = args(0)
    val outputPath: String = args(1)
    val nexamples: Int = if (args.length > 2) args(2).toInt else 1000
    val nfeatures: Int = if (args.length > 3) args(3).toInt else 2
    val parts: Int = if (args.length > 4) args(4).toInt else 2

    val sc = new SparkContext(sparkMaster, "SVMGenerator")

    val globalRnd = new Random(94720)
    val trueWeights = Array.fill[Double](nfeatures)(globalRnd.nextGaussian())

    val data: RDD[LabeledPoint] = sc.parallelize(0 until nexamples, parts).map { idx =>
      val rnd = new Random(42 + idx)

      val x = Array.fill[Double](nfeatures) {
        rnd.nextDouble() * 2.0 - 1.0
      }
      val yD = blas.ddot(trueWeights.length, x, 1, trueWeights, 1) + rnd.nextGaussian() * 0.1
      val y = if (yD < 0) 0.0 else 1.0
      LabeledPoint(y, Vectors.dense(x))
    }

    data.saveAsTextFile(outputPath)

    sc.stop()
  }
}

Source File: RidgeRegressionSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.mllib.regression

import scala.util.Random

import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.util.{LinearDataGenerator, LocalClusterSparkContext,
  MLlibTestSparkContext}
import org.apache.spark.util.Utils

private object RidgeRegressionSuite {

  
  val model = new RidgeRegressionModel(weights = Vectors.dense(0.1, 0.2, 0.3), intercept = 0.5)
}

class RidgeRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {

  def predictionError(predictions: Seq[Double], input: Seq[LabeledPoint]): Double = {
    predictions.zip(input).map { case (prediction, expected) =>
      (prediction - expected.label) * (prediction - expected.label)
    }.sum / predictions.size
  }

  test("ridge regression can help avoid overfitting") {

    // For small number of examples and large variance of error distribution,
    // ridge regression should give smaller generalization error that linear regression.

    val numExamples = 50
    val numFeatures = 20

    // Pick weights as random values distributed uniformly in [-0.5, 0.5]
    val random = new Random(42)
    val w = Array.fill(numFeatures)(random.nextDouble() - 0.5)

    // Use half of data for training and other half for validation
    val data = LinearDataGenerator.generateLinearInput(3.0, w, 2 * numExamples, 42, 10.0)
    val testData = data.take(numExamples)
    val validationData = data.takeRight(numExamples)

    val testRDD = sc.parallelize(testData, 2).cache()
    val validationRDD = sc.parallelize(validationData, 2).cache()

    // First run without regularization.
    val linearReg = new LinearRegressionWithSGD()
    linearReg.optimizer.setNumIterations(200)
                       .setStepSize(1.0)

    val linearModel = linearReg.run(testRDD)
    val linearErr = predictionError(
        linearModel.predict(validationRDD.map(_.features)).collect(), validationData)

    val ridgeReg = new RidgeRegressionWithSGD()
    ridgeReg.optimizer.setNumIterations(200)
                      .setRegParam(0.1)
                      .setStepSize(1.0)
    val ridgeModel = ridgeReg.run(testRDD)
    val ridgeErr = predictionError(
        ridgeModel.predict(validationRDD.map(_.features)).collect(), validationData)

    // Ridge validation error should be lower than linear regression.
    assert(ridgeErr < linearErr,
      "ridgeError (" + ridgeErr + ") was not less than linearError(" + linearErr + ")")
  }

  test("model save/load") {
    val model = RidgeRegressionSuite.model

    val tempDir = Utils.createTempDir()
    val path = tempDir.toURI.toString

    // Save model, load it back, and compare.
    try {
      model.save(sc, path)
      val sameModel = RidgeRegressionModel.load(sc, path)
      assert(model.weights == sameModel.weights)
      assert(model.intercept == sameModel.intercept)
    } finally {
      Utils.deleteRecursively(tempDir)
    }
  }
}

class RidgeRegressionClusterSuite extends SparkFunSuite with LocalClusterSparkContext {

  test("task size should be small in both training and prediction") {
    val m = 4
    val n = 200000
    val points = sc.parallelize(0 until m, 2).mapPartitionsWithIndex { (idx, iter) =>
      val random = new Random(idx)
      iter.map(i => LabeledPoint(1.0, Vectors.dense(Array.fill(n)(random.nextDouble()))))
    }.cache()
    // If we serialize data directly in the task closure, the size of the serialized task would be
    // greater than 1MB and hence Spark would throw an error.
    val model = RidgeRegressionWithSGD.train(points, 2)
    val predictions = model.predict(points.map(_.features))
  }
}

Source File: KafkaStreamSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.streaming.kafka

import scala.collection.mutable
import scala.concurrent.duration._
import scala.language.postfixOps
import scala.util.Random

import kafka.serializer.StringDecoder
import org.scalatest.BeforeAndAfterAll
import org.scalatest.concurrent.Eventually

import org.apache.spark.{SparkConf, SparkFunSuite}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.{Milliseconds, StreamingContext}

class KafkaStreamSuite extends SparkFunSuite with Eventually with BeforeAndAfterAll {
  private var ssc: StreamingContext = _
  private var kafkaTestUtils: KafkaTestUtils = _

  override def beforeAll(): Unit = {
    kafkaTestUtils = new KafkaTestUtils
    kafkaTestUtils.setup()
  }

  override def afterAll(): Unit = {
    if (ssc != null) {
      ssc.stop()
      ssc = null
    }

    if (kafkaTestUtils != null) {
      kafkaTestUtils.teardown()
      kafkaTestUtils = null
    }
  }

  test("Kafka input stream") {
    val sparkConf = new SparkConf().setMaster("local[4]").setAppName(this.getClass.getSimpleName)
    ssc = new StreamingContext(sparkConf, Milliseconds(500))
    val topic = "topic1"
    val sent = Map("a" -> 5, "b" -> 3, "c" -> 10)
    kafkaTestUtils.createTopic(topic)
    kafkaTestUtils.sendMessages(topic, sent)

    val kafkaParams = Map("zookeeper.connect" -> kafkaTestUtils.zkAddress,
      "group.id" -> s"test-consumer-${Random.nextInt(10000)}",
      "auto.offset.reset" -> "smallest")

    val stream = KafkaUtils.createStream[String, String, StringDecoder, StringDecoder](
      ssc, kafkaParams, Map(topic -> 1), StorageLevel.MEMORY_ONLY)
    val result = new mutable.HashMap[String, Long]()
    stream.map(_._2).countByValue().foreachRDD { r =>
      r.collect().foreach { kv =>
        result.synchronized {
          val count = result.getOrElseUpdate(kv._1, 0) + kv._2
          result.put(kv._1, count)
        }
      }
    }

    ssc.start()

    eventually(timeout(10000 milliseconds), interval(100 milliseconds)) {
      assert(result.synchronized { sent === result })
    }
    ssc.stop()
  }
}

Source File: KafkaClusterSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.streaming.kafka

import scala.util.Random

import kafka.common.TopicAndPartition
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.SparkFunSuite

class KafkaClusterSuite extends SparkFunSuite with BeforeAndAfterAll {
  private val topic = "kcsuitetopic" + Random.nextInt(10000)
  private val topicAndPartition = TopicAndPartition(topic, 0)
  private var kc: KafkaCluster = null

  private var kafkaTestUtils: KafkaTestUtils = _

  override def beforeAll() {
    kafkaTestUtils = new KafkaTestUtils
    kafkaTestUtils.setup()

    kafkaTestUtils.createTopic(topic)
    kafkaTestUtils.sendMessages(topic, Map("a" -> 1))
    kc = new KafkaCluster(Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress))
  }

  override def afterAll() {
    if (kafkaTestUtils != null) {
      kafkaTestUtils.teardown()
      kafkaTestUtils = null
    }
  }

  test("metadata apis") {
    val leader = kc.findLeaders(Set(topicAndPartition)).right.get(topicAndPartition)
    val leaderAddress = s"${leader._1}:${leader._2}"
    assert(leaderAddress === kafkaTestUtils.brokerAddress, "didn't get leader")

    val parts = kc.getPartitions(Set(topic)).right.get
    assert(parts(topicAndPartition), "didn't get partitions")

    val err = kc.getPartitions(Set(topic + "BAD"))
    assert(err.isLeft, "getPartitions for a nonexistant topic should be an error")
  }

  test("leader offset apis") {
    val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get
    assert(earliest(topicAndPartition).offset === 0, "didn't get earliest")

    val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get
    assert(latest(topicAndPartition).offset === 1, "didn't get latest")
  }

  test("consumer offset apis") {
    val group = "kcsuitegroup" + Random.nextInt(10000)

    val offset = Random.nextInt(10000)

    val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset))
    assert(set.isRight, "didn't set consumer offsets")

    val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get
    assert(get(topicAndPartition) === offset, "didn't get consumer offsets")
  }
}

Source File: KafkaContinuousSourceSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.kafka010

import java.util.Properties
import java.util.concurrent.atomic.AtomicInteger

import org.scalatest.time.SpanSugar._
import scala.collection.mutable
import scala.util.Random

import org.apache.spark.SparkContext
import org.apache.spark.sql.{DataFrame, Dataset, ForeachWriter, Row}
import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
import org.apache.spark.sql.execution.streaming.StreamExecution
import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
import org.apache.spark.sql.streaming.{StreamTest, Trigger}
import org.apache.spark.sql.test.{SharedSQLContext, TestSparkSession}

// Run tests in KafkaSourceSuiteBase in continuous execution mode.
class KafkaContinuousSourceSuite extends KafkaSourceSuiteBase with KafkaContinuousTest

class KafkaContinuousSourceTopicDeletionSuite extends KafkaContinuousTest {
  import testImplicits._

  override val brokerProps = Map("auto.create.topics.enable" -> "false")

  test("subscribing topic by pattern with topic deletions") {
    val topicPrefix = newTopic()
    val topic = topicPrefix + "-seems"
    val topic2 = topicPrefix + "-bad"
    testUtils.createTopic(topic, partitions = 5)
    testUtils.sendMessages(topic, Array("-1"))
    require(testUtils.getLatestOffsets(Set(topic)).size === 5)

    val reader = spark
      .readStream
      .format("kafka")
      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
      .option("kafka.metadata.max.age.ms", "1")
      .option("subscribePattern", s"$topicPrefix-.*")
      .option("failOnDataLoss", "false")

    val kafka = reader.load()
      .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
      .as[(String, String)]
    val mapped = kafka.map(kv => kv._2.toInt + 1)

    testStream(mapped)(
      makeSureGetOffsetCalled,
      AddKafkaData(Set(topic), 1, 2, 3),
      CheckAnswer(2, 3, 4),
      Execute { query =>
        testUtils.deleteTopic(topic)
        testUtils.createTopic(topic2, partitions = 5)
        eventually(timeout(streamingTimeout)) {
          assert(
            query.lastExecution.logical.collectFirst {
              case DataSourceV2Relation(_, r: KafkaContinuousReader) => r
            }.exists { r =>
              // Ensure the new topic is present and the old topic is gone.
              r.knownPartitions.exists(_.topic == topic2)
            },
            s"query never reconfigured to new topic $topic2")
        }
      },
      AddKafkaData(Set(topic2), 4, 5, 6),
      CheckAnswer(2, 3, 4, 5, 6, 7)
    )
  }
}

class KafkaContinuousSourceStressForDontFailOnDataLossSuite
    extends KafkaSourceStressForDontFailOnDataLossSuite {
  override protected def startStream(ds: Dataset[Int]) = {
    ds.writeStream
      .format("memory")
      .queryName("memory")
      .trigger(Trigger.Continuous("1 second"))
      .start()
  }
}

Source File: UISeleniumSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.hive.thriftserver

import scala.util.Random

import org.apache.hadoop.hive.conf.HiveConf.ConfVars
import org.openqa.selenium.WebDriver
import org.openqa.selenium.htmlunit.HtmlUnitDriver
import org.scalatest.{BeforeAndAfterAll, Matchers}
import org.scalatest.concurrent.Eventually._
import org.scalatest.selenium.WebBrowser
import org.scalatest.time.SpanSugar._

import org.apache.spark.ui.SparkUICssErrorHandler

class UISeleniumSuite
  extends HiveThriftJdbcTest
  with WebBrowser with Matchers with BeforeAndAfterAll {

  implicit var webDriver: WebDriver = _
  var server: HiveThriftServer2 = _
  val uiPort = 20000 + Random.nextInt(10000)
  override def mode: ServerMode.Value = ServerMode.binary

  override def beforeAll(): Unit = {
    webDriver = new HtmlUnitDriver {
      getWebClient.setCssErrorHandler(new SparkUICssErrorHandler)
    }
    super.beforeAll()
  }

  override def afterAll(): Unit = {
    if (webDriver != null) {
      webDriver.quit()
    }
    super.afterAll()
  }

  override protected def serverStartCommand(port: Int) = {
    val portConf = if (mode == ServerMode.binary) {
      ConfVars.HIVE_SERVER2_THRIFT_PORT
    } else {
      ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT
    }

    s"""$startScript
        |  --master local
        |  --hiveconf hive.root.logger=INFO,console
        |  --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$metastoreJdbcUri
        |  --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath
        |  --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=localhost
        |  --hiveconf ${ConfVars.HIVE_SERVER2_TRANSPORT_MODE}=$mode
        |  --hiveconf $portConf=$port
        |  --driver-class-path ${sys.props("java.class.path")}
        |  --conf spark.ui.enabled=true
        |  --conf spark.ui.port=$uiPort
     """.stripMargin.split("\\s+").toSeq
  }

  ignore("thrift server ui test") {
    withJdbcStatement("test_map") { statement =>
      val baseURL = s"http://localhost:$uiPort"

      val queries = Seq(
        "CREATE TABLE test_map(key INT, value STRING)",
        s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_map")

      queries.foreach(statement.execute)

      eventually(timeout(10 seconds), interval(50 milliseconds)) {
        go to baseURL
        find(cssSelector("""ul li a[href*="sql"]""")) should not be None
      }

      eventually(timeout(10 seconds), interval(50 milliseconds)) {
        go to (baseURL + "/sql")
        find(id("sessionstat")) should not be None
        find(id("sqlstat")) should not be None

        // check whether statements exists
        queries.foreach { line =>
          findAll(cssSelector("""ul table tbody tr td""")).map(_.text).toList should contain (line)
        }
      }
    }
  }
}

Source File: RandomDataGeneratorSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql

import scala.util.Random

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.CatalystTypeConverters
import org.apache.spark.sql.types._


  def testRandomDataGeneration(dataType: DataType, nullable: Boolean = true): Unit = {
    val toCatalyst = CatalystTypeConverters.createToCatalystConverter(dataType)
    val generator = RandomDataGenerator.forType(dataType, nullable, new Random(33)).getOrElse {
      fail(s"Random data generator was not defined for $dataType")
    }
    if (nullable) {
      assert(Iterator.fill(100)(generator()).contains(null))
    } else {
      assert(!Iterator.fill(100)(generator()).contains(null))
    }
    for (_ <- 1 to 10) {
      val generatedValue = generator()
      toCatalyst(generatedValue)
    }
  }

  // Basic types:
  for (
    dataType <- DataTypeTestUtils.atomicTypes;
    nullable <- Seq(true, false)
    if !dataType.isInstanceOf[DecimalType]) {
    test(s"$dataType (nullable=$nullable)") {
      testRandomDataGeneration(dataType)
    }
  }

  for (
    arrayType <- DataTypeTestUtils.atomicArrayTypes
    if RandomDataGenerator.forType(arrayType.elementType, arrayType.containsNull).isDefined
  ) {
    test(s"$arrayType") {
      testRandomDataGeneration(arrayType)
    }
  }

  val atomicTypesWithDataGenerators =
    DataTypeTestUtils.atomicTypes.filter(RandomDataGenerator.forType(_).isDefined)

  // Complex types:
  for (
    keyType <- atomicTypesWithDataGenerators;
    valueType <- atomicTypesWithDataGenerators
    // Scala's BigDecimal.hashCode can lead to OutOfMemoryError on Scala 2.10 (see SI-6173) and
    // Spark can hit NumberFormatException errors when converting certain BigDecimals (SPARK-8802).
    // For these reasons, we don't support generation of maps with decimal keys.
    if !keyType.isInstanceOf[DecimalType]
  ) {
    val mapType = MapType(keyType, valueType)
    test(s"$mapType") {
      testRandomDataGeneration(mapType)
    }
  }

  for (
    colOneType <- atomicTypesWithDataGenerators;
    colTwoType <- atomicTypesWithDataGenerators
  ) {
    val structType = StructType(StructField("a", colOneType) :: StructField("b", colTwoType) :: Nil)
    test(s"$structType") {
      testRandomDataGeneration(structType)
    }
  }

  test("check size of generated map") {
    val mapType = MapType(IntegerType, IntegerType)
    for (seed <- 1 to 1000) {
      val generator = RandomDataGenerator.forType(
        mapType, nullable = false, rand = new Random(seed)).get
      val maps = Seq.fill(100)(generator().asInstanceOf[Map[Int, Int]])
      val expectedTotalElements = 100 / 2 * RandomDataGenerator.MAX_MAP_SIZE
      val deviation = math.abs(maps.map(_.size).sum - expectedTotalElements)
      assert(deviation.toDouble / expectedTotalElements < 2e-1)
    }
  }
}

Source File: MiscExpressionsSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import scala.util.Random

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String

class MiscExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {

  test("assert_true") {
    intercept[RuntimeException] {
      checkEvaluation(AssertTrue(Literal.create(false, BooleanType)), null)
    }
    intercept[RuntimeException] {
      checkEvaluation(AssertTrue(Cast(Literal(0), BooleanType)), null)
    }
    intercept[RuntimeException] {
      checkEvaluation(AssertTrue(Literal.create(null, NullType)), null)
    }
    intercept[RuntimeException] {
      checkEvaluation(AssertTrue(Literal.create(null, BooleanType)), null)
    }
    checkEvaluation(AssertTrue(Literal.create(true, BooleanType)), null)
    checkEvaluation(AssertTrue(Cast(Literal(1), BooleanType)), null)
  }

  test("uuid") {
    def assertIncorrectEval(f: () => Unit): Unit = {
      intercept[Exception] {
        f()
      }.getMessage().contains("Incorrect evaluation")
    }

    checkEvaluation(Length(Uuid(Some(0))), 36)
    val r = new Random()
    val seed1 = Some(r.nextLong())
    val uuid1 = evaluate(Uuid(seed1)).asInstanceOf[UTF8String]
    checkEvaluation(Uuid(seed1), uuid1.toString)

    val seed2 = Some(r.nextLong())
    val uuid2 = evaluate(Uuid(seed2)).asInstanceOf[UTF8String]
    assertIncorrectEval(() => checkEvaluationWithoutCodegen(Uuid(seed1), uuid2))
    assertIncorrectEval(() => checkEvaluationWithGeneratedMutableProjection(Uuid(seed1), uuid2))
    assertIncorrectEval(() => checkEvalutionWithUnsafeProjection(Uuid(seed1), uuid2))
    assertIncorrectEval(() => checkEvaluationWithOptimization(Uuid(seed1), uuid2))
  }
}

Source File: RandomUUIDGeneratorSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.util

import scala.util.Random

import org.apache.spark.SparkFunSuite

class RandomUUIDGeneratorSuite extends SparkFunSuite {
  test("RandomUUIDGenerator should generate version 4, variant 2 UUIDs") {
    val generator = RandomUUIDGenerator(new Random().nextLong())
    for (_ <- 0 to 100) {
      val uuid = generator.getNextUUID()
      assert(uuid.version() == 4)
      assert(uuid.variant() == 2)
    }
  }

 test("UUID from RandomUUIDGenerator should be deterministic") {
   val r1 = new Random(100)
   val generator1 = RandomUUIDGenerator(r1.nextLong())
   val r2 = new Random(100)
   val generator2 = RandomUUIDGenerator(r2.nextLong())
   val r3 = new Random(101)
   val generator3 = RandomUUIDGenerator(r3.nextLong())

   for (_ <- 0 to 100) {
      val uuid1 = generator1.getNextUUID()
      val uuid2 = generator2.getNextUUID()
      val uuid3 = generator3.getNextUUID()
      assert(uuid1 == uuid2)
      assert(uuid1 != uuid3)
   }
 }

 test("Get UTF8String UUID") {
   val generator = RandomUUIDGenerator(new Random().nextLong())
   val utf8StringUUID = generator.getNextUUIDUTF8String()
   val uuid = java.util.UUID.fromString(utf8StringUUID.toString)
   assert(uuid.version() == 4 && uuid.variant() == 2 && utf8StringUUID.toString == uuid.toString)
 }
}

Source File: TakeOrderedAndProjectSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.execution

import scala.util.Random

import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions.Literal
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types._


class TakeOrderedAndProjectSuite extends SparkPlanTest with SharedSQLContext {

  private var rand: Random = _
  private var seed: Long = 0

  protected override def beforeAll(): Unit = {
    super.beforeAll()
    seed = System.currentTimeMillis()
    rand = new Random(seed)
  }

  private def generateRandomInputData(): DataFrame = {
    val schema = new StructType()
      .add("a", IntegerType, nullable = false)
      .add("b", IntegerType, nullable = false)
    val inputData = Seq.fill(10000)(Row(rand.nextInt(), rand.nextInt()))
    spark.createDataFrame(sparkContext.parallelize(Random.shuffle(inputData), 10), schema)
  }

  
  private def noOpFilter(plan: SparkPlan): SparkPlan = FilterExec(Literal(true), plan)

  val limit = 250
  val sortOrder = 'a.desc :: 'b.desc :: Nil

  test("TakeOrderedAndProject.doExecute without project") {
    withClue(s"seed = $seed") {
      checkThatPlansAgree(
        generateRandomInputData(),
        input =>
          noOpFilter(TakeOrderedAndProjectExec(limit, sortOrder, input.output, input)),
        input =>
          GlobalLimitExec(limit,
            LocalLimitExec(limit,
              SortExec(sortOrder, true, input))),
        sortAnswers = false)
    }
  }

  test("TakeOrderedAndProject.doExecute with project") {
    withClue(s"seed = $seed") {
      checkThatPlansAgree(
        generateRandomInputData(),
        input =>
          noOpFilter(
            TakeOrderedAndProjectExec(limit, sortOrder, Seq(input.output.last), input)),
        input =>
          GlobalLimitExec(limit,
            LocalLimitExec(limit,
              ProjectExec(Seq(input.output.last),
                SortExec(sortOrder, true, input)))),
        sortAnswers = false)
    }
  }
}

Source File: ColumnarTestUtils.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.columnar

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
import org.apache.spark.sql.types.{AtomicType, Decimal}
import org.apache.spark.unsafe.types.UTF8String

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericInternalRow = {
    val row = new GenericInternalRow(length)
    (0 until length).foreach(row.setNullAt)
    row
  }

  def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)
      Random.nextBytes(bytes)
      bytes
    }

    (columnType match {
      case NULL => null
      case BOOLEAN => Random.nextBoolean()
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
      case BINARY => randomBytes(Random.nextInt(32))
      case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale)
      case STRUCT(_) =>
        new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10))))
      case ARRAY(_) =>
        new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt()))
      case MAP(_) =>
        ArrayBasedMapData(
          Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32)))))
      case _ => throw new IllegalArgumentException(s"Unknown column type $columnType")
    }).asInstanceOf[JvmType]
  }

  def makeRandomValues(
      head: ColumnType[_],
      tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = {
    columnTypes.map(makeRandomValue(_))
  }

  def makeUniqueRandomValues[JvmType](
      columnType: ColumnType[JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()
    }.drop(count).next().toSeq
  }

  def makeRandomRow(
      head: ColumnType[_],
      tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = {
    val row = new GenericInternalRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value
    }
    row
  }

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = values.map { value =>
      val row = new GenericInternalRow(1)
      row(0) = value
      row
    }

    (values, rows)
  }
}

Source File: BitArraySuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.util.sketch

import scala.util.Random

import org.scalatest.FunSuite // scalastyle:ignore funsuite

class BitArraySuite extends FunSuite { // scalastyle:ignore funsuite

  test("error case when create BitArray") {
    intercept[IllegalArgumentException](new BitArray(0))
    intercept[IllegalArgumentException](new BitArray(64L * Integer.MAX_VALUE + 1))
  }

  test("bitSize") {
    assert(new BitArray(64).bitSize() == 64)
    // BitArray is word-aligned, so 65~128 bits need 2 long to store, which is 128 bits.
    assert(new BitArray(65).bitSize() == 128)
    assert(new BitArray(127).bitSize() == 128)
    assert(new BitArray(128).bitSize() == 128)
  }

  test("set") {
    val bitArray = new BitArray(64)
    assert(bitArray.set(1))
    // Only returns true if the bit changed.
    assert(!bitArray.set(1))
    assert(bitArray.set(2))
  }

  test("normal operation") {
    // use a fixed seed to make the test predictable.
    val r = new Random(37)

    val bitArray = new BitArray(320)
    val indexes = (1 to 100).map(_ => r.nextInt(320).toLong).distinct

    indexes.foreach(bitArray.set)
    indexes.foreach(i => assert(bitArray.get(i)))
    assert(bitArray.cardinality() == indexes.length)
  }

  test("merge") {
    // use a fixed seed to make the test predictable.
    val r = new Random(37)

    val bitArray1 = new BitArray(64 * 6)
    val bitArray2 = new BitArray(64 * 6)

    val indexes1 = (1 to 100).map(_ => r.nextInt(64 * 6).toLong).distinct
    val indexes2 = (1 to 100).map(_ => r.nextInt(64 * 6).toLong).distinct

    indexes1.foreach(bitArray1.set)
    indexes2.foreach(bitArray2.set)

    bitArray1.putAll(bitArray2)
    indexes1.foreach(i => assert(bitArray1.get(i)))
    indexes2.foreach(i => assert(bitArray1.get(i)))
    assert(bitArray1.cardinality() == (indexes1 ++ indexes2).distinct.length)
  }
}

Source File: NettyBlockTransferServiceSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.network.netty

import scala.util.Random

import org.mockito.Mockito.mock
import org.scalatest._

import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
import org.apache.spark.network.BlockDataManager

class NettyBlockTransferServiceSuite
  extends SparkFunSuite
  with BeforeAndAfterEach
  with Matchers {

  private var service0: NettyBlockTransferService = _
  private var service1: NettyBlockTransferService = _

  override def afterEach() {
    try {
      if (service0 != null) {
        service0.close()
        service0 = null
      }

      if (service1 != null) {
        service1.close()
        service1 = null
      }
    } finally {
      super.afterEach()
    }
  }

  test("can bind to a random port") {
    service0 = createService(port = 0)
    service0.port should not be 0
  }

  test("can bind to two random ports") {
    service0 = createService(port = 0)
    service1 = createService(port = 0)
    service0.port should not be service1.port
  }

  test("can bind to a specific port") {
    val port = 17634 + Random.nextInt(10000)
    logInfo("random port for test: " + port)
    service0 = createService(port)
    verifyServicePort(expectedPort = port, actualPort = service0.port)
  }

  test("can bind to a specific port twice and the second increments") {
    val port = 17634 + Random.nextInt(10000)
    logInfo("random port for test: " + port)
    service0 = createService(port)
    verifyServicePort(expectedPort = port, actualPort = service0.port)
    service1 = createService(service0.port)
    // `service0.port` is occupied, so `service1.port` should not be `service0.port`
    verifyServicePort(expectedPort = service0.port + 1, actualPort = service1.port)
  }

  private def verifyServicePort(expectedPort: Int, actualPort: Int): Unit = {
    actualPort should be >= expectedPort
    // avoid testing equality in case of simultaneous tests
    // the default value for `spark.port.maxRetries` is 100 under test
    actualPort should be <= (expectedPort + 100)
  }

  private def createService(port: Int): NettyBlockTransferService = {
    val conf = new SparkConf()
      .set("spark.app.id", s"test-${getClass.getName}")
    val securityManager = new SecurityManager(conf)
    val blockDataManager = mock(classOf[BlockDataManager])
    val service = new NettyBlockTransferService(conf, securityManager, "localhost", "localhost",
      port, 1)
    service.init(blockDataManager)
    service
  }
}

Source File: SparkHadoopUtilSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.deploy

import java.security.PrivilegedExceptionAction

import scala.util.Random

import org.apache.hadoop.fs.FileStatus
import org.apache.hadoop.fs.permission.{FsAction, FsPermission}
import org.apache.hadoop.security.UserGroupInformation
import org.scalatest.Matchers

import org.apache.spark.SparkFunSuite

class SparkHadoopUtilSuite extends SparkFunSuite with Matchers {
  test("check file permission") {
    import FsAction._
    val testUser = s"user-${Random.nextInt(100)}"
    val testGroups = Array(s"group-${Random.nextInt(100)}")
    val testUgi = UserGroupInformation.createUserForTesting(testUser, testGroups)

    testUgi.doAs(new PrivilegedExceptionAction[Void] {
      override def run(): Void = {
        val sparkHadoopUtil = new SparkHadoopUtil

        // If file is owned by user and user has access permission
        var status = fileStatus(testUser, testGroups.head, READ_WRITE, READ_WRITE, NONE)
        sparkHadoopUtil.checkAccessPermission(status, READ) should be(true)
        sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(true)

        // If file is owned by user but user has no access permission
        status = fileStatus(testUser, testGroups.head, NONE, READ_WRITE, NONE)
        sparkHadoopUtil.checkAccessPermission(status, READ) should be(false)
        sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(false)

        val otherUser = s"test-${Random.nextInt(100)}"
        val otherGroup = s"test-${Random.nextInt(100)}"

        // If file is owned by user's group and user's group has access permission
        status = fileStatus(otherUser, testGroups.head, NONE, READ_WRITE, NONE)
        sparkHadoopUtil.checkAccessPermission(status, READ) should be(true)
        sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(true)

        // If file is owned by user's group but user's group has no access permission
        status = fileStatus(otherUser, testGroups.head, READ_WRITE, NONE, NONE)
        sparkHadoopUtil.checkAccessPermission(status, READ) should be(false)
        sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(false)

        // If file is owned by other user and this user has access permission
        status = fileStatus(otherUser, otherGroup, READ_WRITE, READ_WRITE, READ_WRITE)
        sparkHadoopUtil.checkAccessPermission(status, READ) should be(true)
        sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(true)

        // If file is owned by other user but this user has no access permission
        status = fileStatus(otherUser, otherGroup, READ_WRITE, READ_WRITE, NONE)
        sparkHadoopUtil.checkAccessPermission(status, READ) should be(false)
        sparkHadoopUtil.checkAccessPermission(status, WRITE) should be(false)

        null
      }
    })
  }

  private def fileStatus(
      owner: String,
      group: String,
      userAction: FsAction,
      groupAction: FsAction,
      otherAction: FsAction): FileStatus = {
    new FileStatus(0L,
      false,
      0,
      0L,
      0L,
      0L,
      new FsPermission(userAction, groupAction, otherAction),
      owner,
      group,
      null)
  }
}

Source File: SamplingUtilsSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.util.random

import scala.util.Random

import org.apache.commons.math3.distribution.{BinomialDistribution, PoissonDistribution}

import org.apache.spark.SparkFunSuite

class SamplingUtilsSuite extends SparkFunSuite {

  test("reservoirSampleAndCount") {
    val input = Seq.fill(100)(Random.nextInt())

    // input size < k
    val (sample1, count1) = SamplingUtils.reservoirSampleAndCount(input.iterator, 150)
    assert(count1 === 100)
    assert(input === sample1.toSeq)

    // input size == k
    val (sample2, count2) = SamplingUtils.reservoirSampleAndCount(input.iterator, 100)
    assert(count2 === 100)
    assert(input === sample2.toSeq)

    // input size > k
    val (sample3, count3) = SamplingUtils.reservoirSampleAndCount(input.iterator, 10)
    assert(count3 === 100)
    assert(sample3.length === 10)
  }

  test("SPARK-18678 reservoirSampleAndCount with tiny input") {
    val input = Seq(0, 1)
    val counts = new Array[Int](input.size)
    for (i <- 0 until 500) {
      val (samples, inputSize) = SamplingUtils.reservoirSampleAndCount(input.iterator, 1)
      assert(inputSize === 2)
      assert(samples.length === 1)
      counts(samples.head) += 1
    }
    // If correct, should be true with prob ~ 0.99999707
    assert(math.abs(counts(0) - counts(1)) <= 100)
  }

  test("computeFraction") {
    // test that the computed fraction guarantees enough data points
    // in the sample with a failure rate <= 0.0001
    val n = 100000

    for (s <- 1 to 15) {
      val frac = SamplingUtils.computeFractionForSampleSize(s, n, true)
      val poisson = new PoissonDistribution(frac * n)
      assert(poisson.inverseCumulativeProbability(0.0001) >= s, "Computed fraction is too low")
    }
    for (s <- List(20, 100, 1000)) {
      val frac = SamplingUtils.computeFractionForSampleSize(s, n, true)
      val poisson = new PoissonDistribution(frac * n)
      assert(poisson.inverseCumulativeProbability(0.0001) >= s, "Computed fraction is too low")
    }
    for (s <- List(1, 10, 100, 1000)) {
      val frac = SamplingUtils.computeFractionForSampleSize(s, n, false)
      val binomial = new BinomialDistribution(n, frac)
      assert(binomial.inverseCumulativeProbability(0.0001)*n >= s, "Computed fraction is too low")
    }
  }
}

Source File: BlockReplicationPolicySuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.storage

import scala.collection.mutable
import scala.language.implicitConversions
import scala.util.Random

import org.scalatest.{BeforeAndAfter, Matchers}

import org.apache.spark.{LocalSparkContext, SparkFunSuite}

class RandomBlockReplicationPolicyBehavior extends SparkFunSuite
  with Matchers
  with BeforeAndAfter
  with LocalSparkContext {

  // Implicitly convert strings to BlockIds for test clarity.
  protected implicit def StringToBlockId(value: String): BlockId = new TestBlockId(value)

  val replicationPolicy: BlockReplicationPolicy = new RandomBlockReplicationPolicy

  val blockId = "test-block"
  
  protected def generateBlockManagerIds(count: Int, racks: Seq[String]): Seq[BlockManagerId] = {
    val randomizedRacks: Seq[String] = Random.shuffle(
      racks ++ racks.length.until(count).map(_ => racks(Random.nextInt(racks.length)))
    )

    (0 until count).map { i =>
      BlockManagerId(s"Exec-$i", s"Host-$i", 10000 + i, Some(randomizedRacks(i)))
    }
  }
}

class TopologyAwareBlockReplicationPolicyBehavior extends RandomBlockReplicationPolicyBehavior {
  override val replicationPolicy = new BasicBlockReplicationPolicy

  test("All peers in the same rack") {
    val racks = Seq("/default-rack")
    val numBlockManager = 10
    (1 to 10).foreach {numReplicas =>
      val peers = generateBlockManagerIds(numBlockManager, racks)
      val blockManager = BlockManagerId("Driver", "Host-driver", 10001, Some(racks.head))

      val prioritizedPeers = replicationPolicy.prioritize(
        blockManager,
        peers,
        mutable.HashSet.empty,
        blockId,
        numReplicas
      )

      assert(prioritizedPeers.toSet.size == numReplicas)
      assert(prioritizedPeers.forall(p => p.host != blockManager.host))
    }
  }

  test("Peers in 2 racks") {
    val racks = Seq("/Rack-1", "/Rack-2")
    (1 to 10).foreach {numReplicas =>
      val peers = generateBlockManagerIds(10, racks)
      val blockManager = BlockManagerId("Driver", "Host-driver", 9001, Some(racks.head))

      val prioritizedPeers = replicationPolicy.prioritize(
        blockManager,
        peers,
        mutable.HashSet.empty,
        blockId,
        numReplicas
      )

      assert(prioritizedPeers.toSet.size == numReplicas)
      val priorityPeers = prioritizedPeers.take(2)
      assert(priorityPeers.forall(p => p.host != blockManager.host))
      if(numReplicas > 1) {
        // both these conditions should be satisfied when numReplicas > 1
        assert(priorityPeers.exists(p => p.topologyInfo == blockManager.topologyInfo))
        assert(priorityPeers.exists(p => p.topologyInfo != blockManager.topologyInfo))
      }
    }
  }
}

Source File: DatasetUtils.scala From doddle-model with Apache License 2.0

5 votes

package io.picnicml.doddlemodel.data

import breeze.stats.hist

import scala.util.Random

object DatasetUtils {

  
  def splitDatasetWithGroups(x: Features,
                             y: Target,
                             groups: IntVector,
                             proportionTrain: Float = 0.5f): GroupTrainTestSplit = {
    val numTrain = numberOfTrainExamplesBasedOnProportion(x.rows, proportionTrain)
    val numSamplesPerGroup = hist(groups, numberOfUniqueGroups(groups)).hist.toArray
    val (sortedNumSamplesPerGroup, toOriginalGroupIndex) = numSamplesPerGroup.zipWithIndex.sorted.unzip

    val numGroupsInTrain = sortedNumSamplesPerGroup
      .foldLeft(List(0)) { case (acc, currGroupSize) => (acc(0) + currGroupSize) :: acc }.reverse.drop(1)
      .takeWhile(cumulativeNumSamples => cumulativeNumSamples <= numTrain)
      .length

    val groupsInTrain = (0 until numGroupsInTrain).map(group => toOriginalGroupIndex(group))

    val (trIndices, teIndices) = (0 until groups.length).foldLeft((IndexedSeq[Int](), IndexedSeq[Int]())) {
      case ((currTrIndices, currTeIndices), groupIndex) =>
        if (groupsInTrain.contains(groups(groupIndex)))
          (currTrIndices :+ groupIndex, currTeIndices)
        else
          (currTrIndices, currTeIndices :+ groupIndex)
    }

    GroupTrainTestSplit(
      x(trIndices, ::).toDenseMatrix,
      y(trIndices).toDenseVector,
      groups(trIndices).toDenseVector,
      x(teIndices, ::).toDenseMatrix,
      y(teIndices).toDenseVector,
      groups(teIndices).toDenseVector
    )
  }

  private def numberOfTrainExamplesBasedOnProportion(numTotal: Int, proportionTrain: Float): Int = {
    require(proportionTrain > 0.0 && proportionTrain < 1.0, "proportionTrain must be between 0 and 1")
    val numTrain = (proportionTrain * numTotal.toFloat).toInt
    require(numTrain > 0 && numTrain < numTotal, "the value of proportionTrain is either too high or too low")
    numTrain
  }
}

Source File: KFoldSplitter.scala From doddle-model with Apache License 2.0

5 votes

package io.picnicml.doddlemodel.modelselection
import io.picnicml.doddlemodel.CrossScalaCompat.{LazyListCompat, lazyListCompatFromSeq}
import io.picnicml.doddlemodel.data.{Features, IntVector, Target, TrainTestSplit}

import scala.util.Random


class KFoldSplitter private (val numFolds: Int, val shuffleRows: Boolean) extends DataSplitter {

  override def splitData(x: Features, y: Target)
                        (implicit rand: Random = new Random()): LazyListCompat[TrainTestSplit] = {
    require(x.rows >= this.numFolds, "Number of examples must be at least the same as number of folds")

    val shuffleIndices = if (this.shuffleRows) rand.shuffle((0 until y.length).toIndexedSeq) else 0 until y.length
    val xShuffled = x(shuffleIndices, ::)
    val yShuffled = y(shuffleIndices)

    val splitIndices = this.calculateSplitIndices(x.rows)

    lazyListCompatFromSeq(splitIndices zip splitIndices.tail) map { case (indexStart, indexEnd) =>
      val trIndices = (0 until indexStart) ++ (indexEnd until x.rows)
      val teIndices = indexStart until indexEnd

      TrainTestSplit(
        xShuffled(trIndices, ::).toDenseMatrix,
        yShuffled(trIndices).toDenseVector,
        xShuffled(teIndices, ::).toDenseMatrix,
        yShuffled(teIndices).toDenseVector
      )
    }
  }

  private def calculateSplitIndices(numExamples: Int): List[Int] = {
    val atLeastNumExamplesPerFold = List.fill(this.numFolds)(numExamples / this.numFolds)
    val numFoldsWithOneMore = numExamples % this.numFolds

    val numExamplesPerFold = atLeastNumExamplesPerFold.zipWithIndex map {
      case (num, i) if i < numFoldsWithOneMore => num + 1
      case (num, _) => num
    }

    // calculate indices by subtracting number of examples per fold from total number of examples
    numExamplesPerFold.foldRight(List(numExamples)) {
      case (num, head :: tail) => head - num :: head :: tail
      case _ => throw new IllegalStateException()
    }
  }


  override def splitData(x: Features, y: Target, groups: IntVector)
                        (implicit rand: Random): LazyListCompat[TrainTestSplit] =
    throw new NotImplementedError("KFoldSplitter doesn't split data based on groups")
}

object KFoldSplitter {

  def apply(numFolds: Int, shuffleRows: Boolean = true): KFoldSplitter = {
    require(numFolds > 0, "Number of folds must be positive")
    new KFoldSplitter(numFolds, shuffleRows)
  }
}

Source File: GroupKFoldSplitter.scala From doddle-model with Apache License 2.0

5 votes

package io.picnicml.doddlemodel.modelselection

import breeze.linalg.argmin
import breeze.stats.hist
import io.picnicml.doddlemodel.CrossScalaCompat.{LazyListCompat, lazyListCompatFromSeq}
import io.picnicml.doddlemodel.data._
import io.picnicml.doddlemodel.modelselection.GroupKFoldSplitter.{TestFolds, TrainTestIndices}

import scala.util.Random


class GroupKFoldSplitter private (val numFolds: Int) extends DataSplitter {

  override def splitData(x: Features, y: Target, groups: IntVector)
                        (implicit rand: Random = new Random()): LazyListCompat[TrainTestSplit] = {
    val testFolds = calculateTestFolds(groups)

    lazyListCompatFromSeq(0 until numFolds).map { foldIndex =>
      val indices = groups.iterator.foldLeft(TrainTestIndices()) { case (acc, (exampleIndex, group)) =>
        if (testFolds.groupToTestFoldIndex(group) == foldIndex)
          acc.addToTestIndex(exampleIndex)
        else
          acc.addToTrainIndex(exampleIndex)
      }

      TrainTestSplit(
        x(indices.trIndices, ::).toDenseMatrix,
        y(indices.trIndices).toDenseVector,
        x(indices.teIndices, ::).toDenseMatrix,
        y(indices.teIndices).toDenseVector
      )
    }
  }

  private def calculateTestFolds(groups: IntVector): TestFolds = {
    val numGroups = numberOfUniqueGroups(groups)
    val numSamplesPerGroup = hist(groups, numGroups).hist.toArray

    implicit val ordering: Ordering[Int] = Ordering.Int.reverse
    val (sortedNumSamplesPerGroup, toOriginalGroupIndex) = numSamplesPerGroup.zipWithIndex.sorted.unzip

    sortedNumSamplesPerGroup.zipWithIndex.foldLeft(TestFolds(numFolds, numGroups)) {
      case (acc, (numSamples, group)) =>
        val smallestFoldIndex = argmin(acc.numTestSamplesPerFold)
        acc.addNumSamplesToFold(numSamples, smallestFoldIndex)
        acc.setGroupToTestFoldIndex(toOriginalGroupIndex(group), smallestFoldIndex)
        acc
    }
  }

  override def splitData(x: Features, y: Target)(implicit rand: Random): LazyListCompat[TrainTestSplit] =
    throw new NotImplementedError("GroupKFoldSplitter only splits data based on groups")
}


object GroupKFoldSplitter {

  def apply(numFolds: Int): GroupKFoldSplitter = {
    require(numFolds > 0, "Number of folds must be positive")
    new GroupKFoldSplitter(numFolds)
  }

  private case class TrainTestIndices(trIndices: IndexedSeq[Int] = IndexedSeq(),
                                      teIndices: IndexedSeq[Int] = IndexedSeq()) {
    def addToTrainIndex(x: Int): TrainTestIndices = this.copy(trIndices = this.trIndices :+ x)
    def addToTestIndex(x: Int): TrainTestIndices = this.copy(teIndices = this.teIndices :+ x)
  }

  private case class TestFolds(numTestSamplesPerFold: Array[Int], groupToTestFoldIndex: Array[Int]) {
    def addNumSamplesToFold(numSamples: Int, foldIndex: Int): Unit =
      this.numTestSamplesPerFold(foldIndex) += numSamples

    def setGroupToTestFoldIndex(group: Int, foldIndex: Int): Unit =
      this.groupToTestFoldIndex(group) = foldIndex
  }

  private object TestFolds {
    def apply(numFolds: Int, numGroups: Int): TestFolds =
      TestFolds(new Array[Int](numFolds), new Array[Int](numGroups))
  }
}

Source File: DatasetUtilsTest.scala From doddle-model with Apache License 2.0

5 votes

package io.picnicml.doddlemodel.data

import breeze.linalg.DenseVector
import io.picnicml.doddlemodel.TestingUtils
import io.picnicml.doddlemodel.data.DatasetUtils.{shuffleDataset, splitDataset, splitDatasetWithGroups}
import org.scalactic.{Equality, TolerantNumerics}

import scala.util.Random
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class DatasetUtilsTest extends AnyFlatSpec with Matchers with TestingUtils {

  implicit val rand: Random = new Random(0)
  implicit val tolerance: Equality[Float] = TolerantNumerics.tolerantFloatEquality(1.0f)

  val (x, y, _) = loadIrisDataset

  "Dataset utils" should "shuffle the dataset" in {
    val (_, yShuffled) = shuffleDataset(x, y)
    breezeEqual(y, yShuffled) shouldBe false
  }

  they should "split the dataset" in {
    val split = splitDataset(x, y)
    split.yTr.length shouldBe 75
    split.yTe.length shouldBe 75
  }

  they should "split the dataset with groups" in {
    val groups = DenseVector((0 until x.rows).map(x => x % 4):_*)
    val split = splitDatasetWithGroups(x, y, groups, proportionTrain = 0.8f)
    val groupsTe = split.groupsTe.toArray
    split.groupsTr.forall(trGroup => !groupsTe.contains(trGroup)) shouldBe true
  }
}

Source File: KFoldSplitterTest.scala From doddle-model with Apache License 2.0

5 votes

package io.picnicml.doddlemodel.modelselection

import io.picnicml.doddlemodel.TestingUtils

import scala.util.Random
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class KFoldSplitterTest extends AnyFlatSpec with Matchers with TestingUtils {

  val splitter = KFoldSplitter(numFolds = 3, shuffleRows = false)

  "KFoldSplitter" should "split 8 examples" in {
    implicit val rand: Random = new Random()
    val (x, y) = dummyData(8)
    val splits = splitter.splitData(x, y)

    splits.length shouldBe 3
    splits(0).yTr.toArray shouldBe (3 to 7).toArray
    splits(0).yTe.toArray shouldBe (0 to 2).toArray
    splits(1).yTr.toArray shouldBe Array(0, 1, 2, 6, 7)
    splits(1).yTe.toArray shouldBe (3 to 5).toArray
    splits(2).yTr.toArray shouldBe (0 to 5).toArray
    splits(2).yTe.toArray shouldBe Array(6, 7)
  }

  it should "split 9 examples" in {
    implicit val rand: Random = new Random()
    val (x, y) = dummyData(9)
    val splits = splitter.splitData(x, y)

    splits.length shouldBe 3
    splits(0).yTr.toArray shouldBe (3 to 8).toArray
    splits(0).yTe.toArray shouldBe (0 to 2).toArray
    splits(1).yTr.toArray shouldBe Array(0, 1, 2, 6, 7, 8)
    splits(1).yTe.toArray shouldBe (3 to 5).toArray
    splits(2).yTr.toArray shouldBe (0 to 5).toArray
    splits(2).yTe.toArray shouldBe (6 to 8).toArray
  }

  it should "split 10 examples" in {
    implicit val rand: Random = new Random()
    val (x, y) = dummyData(10)
    val splits = splitter.splitData(x, y)

    splits.length shouldBe 3
    splits(0).yTr.toArray shouldBe (4 to 9).toArray
    splits(0).yTe.toArray shouldBe (0 to 3).toArray
    splits(1).yTr.toArray shouldBe Array(0, 1, 2, 3, 7, 8, 9)
    splits(1).yTe.toArray shouldBe (4 to 6).toArray
    splits(2).yTr.toArray shouldBe (0 to 6).toArray
    splits(2).yTe.toArray shouldBe (7 to 9).toArray
  }
}

Source File: HyperparameterSearchTest.scala From doddle-model with Apache License 2.0

5 votes

package io.picnicml.doddlemodel.modelselection

import io.picnicml.doddlemodel.data.DatasetUtils.shuffleDataset
import io.picnicml.doddlemodel.data.loadBreastCancerDataset
import io.picnicml.doddlemodel.linear.LogisticRegression
import io.picnicml.doddlemodel.linear.LogisticRegression.ev
import io.picnicml.doddlemodel.metrics.accuracy

import scala.util.Random
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

class HyperparameterSearchTest extends AnyFlatSpec with Matchers {

  "Hyperparameter search" should "return the best model from available candidates" in {
    val (x, y, _) = loadBreastCancerDataset

    implicit val rand: Random = new Random(42)
    val (xShuffled, yShuffled) = shuffleDataset(x, y)
    val trIndices = 0 until 400
    val teIndices = 400 until x.rows
    val (xTr, yTr) = (xShuffled(trIndices, ::), yShuffled(trIndices))
    val (xTe, yTe) = (xShuffled(teIndices, ::), yShuffled(teIndices))

    val cv: CrossValidation = CrossValidation(metric = accuracy, dataSplitter = KFoldSplitter(numFolds = 5))
    val search = HyperparameterSearch(numIterations = 100, crossValidation = cv, verbose = false)
    val grid = (0 until 100).iterator.map(_.toFloat)

    val underfittedModel = ev.fit(LogisticRegression(lambda = 99.0f), xTr, yTr)
    val bestModel = search.bestOf(xTr, yTr) {
      LogisticRegression(lambda = grid.next)
    }

    accuracy(yTe, ev.predict(bestModel, xTe)) > accuracy(yTe, ev.predict(underfittedModel, xTe)) shouldBe true
  }
}

Source File: Retry.scala From futiles with Apache License 2.0

5 votes

package markatta.futiles

import java.util.concurrent.{ThreadLocalRandom, TimeUnit}

import scala.concurrent.duration.FiniteDuration
import scala.concurrent.{ExecutionContext, Future}
import scala.util.Random

object Retry {

  private val alwaysRetry: Throwable => Boolean = _ => true

  
  def retryWithBackOff[A](
    times: Int,
    backOffUnit: FiniteDuration,
    shouldRetry: Throwable => Boolean = alwaysRetry
  )(fBlock: => Future[A])(implicit ec: ExecutionContext): Future[A] =
    try {
      if (times <= 1) fBlock
      else retryWithBackOffLoop(times, 1, backOffUnit, shouldRetry)(fBlock)
    } catch {
      // failure to actually create the future
      case x: Throwable => Future.failed(x)
    }

  private def retryWithBackOffLoop[A](
    totalTimes: Int,
    timesTried: Int,
    backOffUnit: FiniteDuration,
    shouldRetry: Throwable => Boolean
  )(fBlock: => Future[A])(implicit ec: ExecutionContext): Future[A] =
    if (totalTimes <= timesTried) fBlock
    else
      fBlock.recoverWith {
        case ex: Throwable if shouldRetry(ex) =>
          val timesTriedNow = timesTried + 1
          val backOff = nextBackOff(timesTriedNow, backOffUnit)
          Timeouts
            .timeout(backOff)(())
            .flatMap(
              _ =>
                retryWithBackOffLoop(
                  totalTimes,
                  timesTriedNow,
                  backOffUnit,
                  shouldRetry
                )(fBlock)
            )
      }

  private[futiles] def nextBackOff(
    tries: Int,
    backOffUnit: FiniteDuration
  ): FiniteDuration = {
    require(tries > 0, "tries should start from 1")
    val rng = new Random(ThreadLocalRandom.current())
    // jitter between 0.5 and 1.5
    val jitter = 0.5 + rng.nextDouble()
    val factor = math.pow(2, tries) * jitter
    FiniteDuration(
      (backOffUnit.toMillis * factor).toLong,
      TimeUnit.MILLISECONDS
    )
  }

}

Source File: LocalFSRawFileProvider.scala From mimir with Apache License 2.0

5 votes

package mimir.data.staging

import java.net.URL
import java.io.{ File, InputStream, OutputStream, FileOutputStream }
import java.sql.SQLException
import scala.util.Random
import com.typesafe.scalalogging.LazyLogging
import org.apache.spark.sql.DataFrame
import mimir.algebra.ID


  private def transferBytes(input: InputStream, output: OutputStream): Unit =
  {
    val buffer = Array.ofDim[Byte](1024*1024) // 1MB buffer
    var bytesRead = input.read(buffer)
    while(bytesRead >= 0) { 
      output.write(buffer, 0, bytesRead)
      bytesRead = input.read(buffer)
    }
  }

  def stage(input: InputStream, fileExtension: String, nameHint: Option[String]): String = 
  {
    val file = makeName(fileExtension, nameHint)
    transferBytes(input, new FileOutputStream(file))
    return file.toString
  }
  def stage(url: URL, nameHint: Option[String]): String =
  {
    val pathComponents = url.getPath.split("/")
    val nameComponents = pathComponents.reverse.head.split(".")
    val extension = 
      if(nameComponents.size > 1) { nameComponents.reverse.head }
      else { "data" } // default to generic 'data' if there's no extension
    stage(url.openStream(), extension, nameHint)
  }
  def stage(input: DataFrame, format: ID, nameHint:Option[String]): String =
  {
    val targetFile = makeName(format.id, nameHint).toString
    input.write
         .format(format.id)
         .save(targetFile)
    return targetFile
  }
  def drop(local: String): Unit = 
  {
    new File(local).delete()
  }
}

Source File: HDFSRawFileProvider.scala From mimir with Apache License 2.0

5 votes

package mimir.data.staging

import java.net.URL
import java.io.{ File, InputStream, OutputStream, FileOutputStream }
import java.sql.SQLException
import scala.util.Random
import com.typesafe.scalalogging.LazyLogging
import org.apache.spark.sql.DataFrame
import mimir.algebra.ID
import mimir.util.HadoopUtils
import mimir.exec.spark.MimirSpark


  private def makeName(extension: String, nameHint: Option[String]): File =
  {
    val rand = new Random().alphanumeric
    // Try 1000 times to create a randomly named file
    for(i <- 0 until 1000){
      val candidate = new File(basePath,
        nameHint match { 
          case Some(hint) => s"${hint.replaceAll("[^a-zA-Z0-9]", "")}-${rand.take(10).mkString}.${extension}"
          case None => s"${rand.take(20).mkString}.${extension}"
        }
      )
      // If the randomly named file doesn't exist, we're done.
      if(!candidate.exists()){ return candidate }
    }
    // Fail after 1000 attempts.
    throw new SQLException(s"Can't allocate name for $nameHint")
  }


  def stage(input: InputStream, fileExtension: String, nameHint: Option[String]): String = 
  {
    val file = makeName(fileExtension, nameHint)
    logger.debug("Stage File To HDFS: " +hdfsHome+File.separator+file.toString)
    //if(!HadoopUtils.fileExistsHDFS(sparkSql.sparkSession.sparkContext, fileName))
    HadoopUtils.writeToHDFS(MimirSpark.get.sparkSession.sparkContext, file.getName, input, true)
    logger.debug("... done\n")
    return s"$hdfsHome/${file.getName}"
  }
  def stage(url: URL, nameHint: Option[String]): String =
  {
    val pathComponents = url.getPath.split("/")
    val nameComponents = pathComponents.reverse.head.split(".")
    val extension = 
      if(nameComponents.size > 1) { nameComponents.reverse.head }
      else { "data" } // default to generic 'data' if there's no extension
    stage(url.openStream(), extension, nameHint)
  }
  def stage(input: DataFrame, format: ID, nameHint:Option[String]): String =
  {
    val targetFile = makeName(format.id, nameHint).toString
    input.write
         .format(format.id)
         .save(targetFile)
    return targetFile
  }
  def drop(local: String): Unit = 
  {
    new File(local).delete()
  }
}

Source File: CommentModel.scala From mimir with Apache License 2.0

5 votes

package mimir.models;

import scala.util.Random

import mimir.algebra._
import mimir.util._
import java.sql.SQLException


@SerialVersionUID(1001L)
class CommentModel(override val name: ID, cols:Seq[ID], colTypes:Seq[Type], comments:Seq[String]) 
  extends Model(name) 
  with Serializable
  with SourcedFeedback
{
  
  def getFeedbackKey(idx: Int, args: Seq[PrimitiveValue] ) : ID = ID(s"${args(0).asString}:$idx")
  
  def argTypes(idx: Int) = Seq(TRowId())
  def varType(idx: Int, args: Seq[Type]) = colTypes(idx)
  def bestGuess(idx: Int, args: Seq[PrimitiveValue], hints: Seq[PrimitiveValue]  ) = {
    getFeedback(idx, args) match {
      case Some(v) => v
      case None => {
        hints(0)
      }
    }
  }
  def sample(idx: Int, randomness: Random, args: Seq[PrimitiveValue], hints: Seq[PrimitiveValue]) = hints(0)
  def reason(idx: Int, args: Seq[PrimitiveValue],hints: Seq[PrimitiveValue]): String = {
    //println("CommentModel:reason: " + idx + " [ " + args.mkString(",") + " ] [ " + hints.mkString(",") + " ]" );
    val rowid = RowIdPrimitive(args(0).asString)
    val rval = getFeedback(idx, args) match {
      case Some(v) => s"${getReasonWho(idx,args)} told me that $v is valid for row $rowid"
      case None => s" ${comments(idx)}"
    }
    rval
  }
  def feedback(idx: Int, args: Seq[PrimitiveValue], v: PrimitiveValue): Unit = { 
    val rowid = args(0).asString
    setFeedback(idx, args, v)
  }
  def isAcknowledged (idx: Int, args: Seq[PrimitiveValue]): Boolean = hasFeedback(idx, args)
  def hintTypes(idx: Int): Seq[mimir.algebra.Type] = colTypes
  //def getDomain(idx: Int, args: Seq[PrimitiveValue], hints:Seq[PrimitiveValue]): Seq[(PrimitiveValue,Double)] = Seq((hints(0), 0.0))

  def confidence (idx: Int, args: Seq[PrimitiveValue], hints: Seq[PrimitiveValue]): Double = {
    val rowid = RowIdPrimitive(args(0).asString)
    getFeedback(idx,args) match {
      case Some(v) => {
        1.0
      }
      case None => {
        0.0
      }
    }
  }

}

Source File: MissingKeyModel.scala From mimir with Apache License 2.0

5 votes

package mimir.models;

import scala.util.Random

import mimir.algebra._
import mimir.util._


@SerialVersionUID(1001L)
class MissingKeyModel(override val name: ID, keys:Seq[ID], colTypes:Seq[Type]) 
  extends Model(name) 
  with Serializable
  with FiniteDiscreteDomain
  with SourcedFeedback
{
  
  def getFeedbackKey(idx: Int, args: Seq[PrimitiveValue] ) : ID = ID(s"${args(0).asString}_$idx")
  
  def argTypes(idx: Int) = {
      Seq(TRowId())
  }
  def varType(idx: Int, args: Seq[Type]) = colTypes(idx)
  def bestGuess(idx: Int, args: Seq[PrimitiveValue], hints: Seq[PrimitiveValue]  ) = {
    //println(s"MissingKeyModel:bestGuess: idx: $idx args: ${args.mkString("[ ",","," ]")} hints: ${hints.mkString("[ ",","," ]")}")
    getFeedback(idx, args) match {
      case Some(v) => v
      case None => hints(0) 
    }
  }
  def sample(idx: Int, randomness: Random, args: Seq[PrimitiveValue], hints: Seq[PrimitiveValue]) = {
    hints(0)
  }
  def reason(idx: Int, args: Seq[PrimitiveValue],hints: Seq[PrimitiveValue]): String = {
    val rowid = RowIdPrimitive(args(0).asString)
    getFeedback(idx, args) match {
      case Some(v) => v match {
          case NullPrimitive() => {
            s"${getReasonWho(idx,args)} told me that the row of this cell was missing and that the value of this cell is unknown so I have made it NULL."
          }
          case i => {
            s"${getReasonWho(idx,args)} told me that this key was missing because it was in a sequence but not in the query results: $i" 
          }
      }
      case None => hints(0) match {
        case NullPrimitive() => {
          "I guessed that the row of this cell was missing. The value of this cell is unknown so I have made it NULL."
        }
        case i => {
          s"I guessed that this key was missing because it was in a sequence but not in the query results: $i" 
        }
      }
    }
  }
  def feedback(idx: Int, args: Seq[PrimitiveValue], v: PrimitiveValue): Unit = { 
    setFeedback(idx, args, v)
  }
  def isAcknowledged (idx: Int, args: Seq[PrimitiveValue]): Boolean = {
    hasFeedback(idx, args)
  }
  def hintTypes(idx: Int): Seq[mimir.algebra.Type] = Seq(TAny())
  def getDomain(idx: Int, args: Seq[PrimitiveValue], hints:Seq[PrimitiveValue]): Seq[(PrimitiveValue,Double)] = Seq((hints(0), 0.0))

  def confidence (idx: Int, args: Seq[PrimitiveValue], hints:Seq[PrimitiveValue]) : Double = {
    val rowid = RowIdPrimitive(args(0).asString)
    getFeedback(idx,args) match {
      case Some(v) => 1.0
      case None => 0.0
    }
  }

}

Source File: DefaultMetaModel.scala From mimir with Apache License 2.0

5 votes

package mimir.models;

import scala.util.Random

import mimir.algebra._
import mimir.util._


@SerialVersionUID(1001L)
class DefaultMetaModel(name: ID, context: String, models: Seq[ID]) 
  extends Model(name) 
  with DataIndependentFeedback 
  with NoArgModel
  with FiniteDiscreteDomain
{
  def varType(idx: Int, args: Seq[Type]): Type = TString()
  def bestGuess(idx: Int, args: Seq[PrimitiveValue], hints: Seq[PrimitiveValue]): PrimitiveValue =
    choices(idx).getOrElse( StringPrimitive(models.head.id) )
  def sample(idx: Int, randomness: Random, args: Seq[PrimitiveValue], hints: Seq[PrimitiveValue]): PrimitiveValue =
    StringPrimitive(RandUtils.pickFromList(randomness, models).id)
  def reason(idx: Int, args: Seq[PrimitiveValue], hints: Seq[PrimitiveValue]): String =
  {
    choices(idx) match {
      case None => {
        val bestChoice = models.head
        val modelString = models.mkString(", ")
        s"I defaulted to guessing with '$bestChoice' (out of $modelString) for $context"
      }
      case Some(choiceStr) => 
        s"${getReasonWho(idx,args)} told me to use $choiceStr for $context"
    }
  }
  def validateChoice(idx: Int, v: PrimitiveValue) = models.contains(v.asString)

  def getDomain(idx: Int, args: Seq[PrimitiveValue], hints: Seq[PrimitiveValue]): Seq[(PrimitiveValue,Double)] =
    models.map( x => (StringPrimitive(x.id), 0.0) )

  def confidence (idx: Int, args: Seq[PrimitiveValue], hints:Seq[PrimitiveValue]) : Double = 1.0/models.size

}

Source File: GenRandom.scala From tofu with Apache License 2.0

5 votes

package tofu.generate

import cats.effect.Sync
import cats.syntax.functor._
import simulacrum.typeclass
import tofu.higherKind
import tofu.higherKind.RepresentableK

import scala.util.Random

@typeclass
trait GenRandom[F[_]] {

  
  def nextInt(n: Int): F[Int]
}

object GenRandom {
  def nextLong[F[_]](implicit g: GenRandom[F]): F[Long]       = g.nextLong
  def nextInt[F[_]](n: Int)(implicit g: GenRandom[F]): F[Int] = g.nextInt(n)

  def instance[I[_]: Sync, F[_]: Sync](seed: Option[Long] = None, secure: Boolean = false): I[GenRandom[F]] = {
    def createStd() = seed.fold(new java.util.Random)(new java.util.Random(_))
    def createSecure() = {
      val rnd = new java.security.SecureRandom()
      seed.foreach(rnd.setSeed)
      rnd
    }

    def random(): java.util.Random = if (secure) createSecure() else createStd()
    for (rnd <- Sync[I].delay(new Random(random()))) yield new ScalaUtil[F](rnd)
  }

  private class ScalaUtil[F[_]](rnd: Random)(implicit F: Sync[F]) extends GenRandom[F] {
    def nextLong: F[Long]         = F.delay(rnd.nextLong())
    def nextInt(max: Int): F[Int] = F.delay(rnd.nextInt(max))
  }

  implicit val genRandomRepresentableK: RepresentableK[GenRandom] = higherKind.derived.genRepresentableK[GenRandom]
}

Source File: ExampleApp.scala From kafka4s with Apache License 2.0

5 votes

package example3

import cats.effect._
import cats.implicits._
import fs2.Stream
import com.banno.kafka._
import com.banno.kafka.admin._
import com.banno.kafka.consumer._
import com.banno.kafka.producer._
import org.apache.kafka.clients.admin.NewTopic
import org.apache.kafka.clients.producer.ProducerRecord
import scala.concurrent.duration._
import scala.util.Random

final class ExampleApp[F[_]: Concurrent: ContextShift: Timer] {

  // Change these for your environment as needed
  val topic = new NewTopic(s"example3", 1, 3.toShort)
  val kafkaBootstrapServers = "kafka.local:9092,kafka.local:9093"

  val example: F[Unit] =
    for {
      _ <- Sync[F].delay(println("Starting kafka4s example"))

      _ <- AdminApi.createTopicsIdempotent[F](kafkaBootstrapServers, topic)

      writeStream = Stream
        .resource(ProducerApi.resource[F, Int, Int](BootstrapServers(kafkaBootstrapServers)))
        .flatMap { producer =>
          Stream
            .awakeDelay[F](1 second)
            .evalMap { _ =>
              Sync[F].delay(Random.nextInt()).flatMap { i =>
                producer.sendAndForget(new ProducerRecord(topic.name, i, i))
              }
            }
        }

      readStream = Stream
        .resource(
          ConsumerApi
            .resource[F, Int, Int](
              BootstrapServers(kafkaBootstrapServers),
              GroupId("example3"),
              AutoOffsetReset.earliest,
              EnableAutoCommit(true)
            )
        )
        .evalTap(_.subscribe(topic.name))
        .flatMap(
          _.recordStream(1.second)
            .map(_.value)
            .filter(_ % 2 == 0)
            .evalMap(i => Sync[F].delay(println(i)))
        )

      _ <- writeStream
        .merge(readStream)
        .onFinalize(Sync[F].delay(println("Finished kafka4s example")))
        .compile
        .drain
    } yield ()
}

object ExampleApp {
  def apply[F[_]: Concurrent: ContextShift: Timer] = new ExampleApp[F]
}

Source File: MyStream.scala From scala-in-practice with Apache License 2.0

5 votes

package chapter7.collections.stream

import scala.util.Random


trait MyStream[+A] {

  import MyStream._

  def filter(p: A => Boolean): MyStream[A] = {
    this match {
      case Cons(h, t) =>
        if (p(h())) cons(h(), t().filter(p))
        else t().filter(p)
      case Empty => empty
    }
  }

  def take(n: Int): MyStream[A] = {
    if (n > 0) this match {
      case Cons(h, t) if n == 1 => cons(h(), MyStream.empty)
      case Cons(h, t) => cons(h(), t().take(n - 1))
      case _ => MyStream.empty
    }
    else MyStream()
  }

  def toList: List[A] = {
    this match {
      case Cons(h, t) => h() :: t().toList
      case Empty => Nil
    }
  }

}

case object Empty extends MyStream[Nothing]

case class Cons[+A](h: () => A, t: () => MyStream[A]) extends MyStream[A]

object MyStream {

  def apply[A](elems: A*): MyStream[A] = {
    if (elems.isEmpty) empty
    else cons(elems.head, apply(elems.tail: _*))
  }

  def cons[A](hd: => A, tl: => MyStream[A]): MyStream[A] = {
    lazy val head = hd
    lazy val tail = tl
    Cons(() => head, () => tail)
  }

  def empty[A]: MyStream[A] = Empty
}

object MyStreamTest extends App {
  def randomList = (1 to 50).map(_ => Random.nextInt(100)).toList

  def isDivisibleBy3(n: Int) = {
    val isDivisible = n % 3 == 0
    println(s"$n $isDivisible")
    isDivisible
  }

  MyStream(randomList: _*).filter(isDivisibleBy3).take(2).toList
}

Source File: TeeCommandTest.scala From shellbase with Apache License 2.0

5 votes

package com.sumologic.shellbase.commands

import java.nio.charset.Charset
import java.nio.file.{Files, Path}

import com.sumologic.shellbase.CommonWordSpec
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner

import scala.collection.JavaConverters._
import scala.util.Random

@RunWith(classOf[JUnitRunner])
class TeeCommandTest extends CommonWordSpec {
  "TeeCommand" should {
    "execute a subcommand and propagate exit code" in {
      var calls = 0
      def callCheck(ret: Boolean)(input: String): Boolean = {
        input should be("hi")
        calls += 1
        ret
      }

      new TeeCommand(callCheck(true)).executeLine(List("`hi`", "-o", getTempFilePath().toString)) should be(true)
      calls should be(1)

      new TeeCommand(callCheck(false)).executeLine(List("`hi`", "-o", getTempFilePath().toString)) should be(false)
      calls should be(2)
    }

    "degrade nicely with malformatted input" in {
      new TeeCommand(_ => true).executeLine(List.empty) should be(false)
      new TeeCommand(_ => true).executeLine(List("test")) should be(false)
    }

    "write output to file, and support append mode" in {
      def printMessage(str: String): Boolean = {
        println(str)
        true
      }

      val tempFile = getTempFilePath()
      new TeeCommand(printMessage).executeLine(List("`hi mom`", "-o", tempFile.toString))
      // The first line is the debug line, so everything after is logged
      readTempFile(tempFile) should be(List("hi mom"))

      // We should override since not in append mode
      new TeeCommand(printMessage).executeLine(List("`hi mom 2`", "-o", tempFile.toString))
      // The first line is the debug line, so everything after is logged
      readTempFile(tempFile) should be(List("hi mom 2"))

      // We have both 2 and 3 since in append move
      new TeeCommand(printMessage).executeLine(List("`hi mom 3`", "-o", tempFile.toString, "-a"))
      // The first line is the debug line, so everything after is logged
      readTempFile(tempFile) should be(List("hi mom 2", "hi mom 3"))
    }


  }

  private def getTempFilePath(): Path = {
    Files.createTempFile("teecommand", ".tmp")
  }

  private def readTempFile(path: Path): List[String] = {
    Files.readAllLines(path, Charset.defaultCharset()).asScala.filterNot(_.startsWith("Running")).toList
  }

}

Source File: DemoController.scala From elastiknn with Apache License 2.0

5 votes

package controllers

import com.klibisz.elastiknn.api.Vec
import com.klibisz.elastiknn.client.ElastiknnFutureClient
import com.klibisz.elastiknn.client.ElastiknnRequests._
import com.sksamuel.elastic4s.ElasticDsl._
import io.circe.generic.auto._
import io.circe.syntax._
import javax.inject._
import models.{Dataset, ExampleWithResults}
import play.api.Logging
import play.api.libs.circe.Circe
import play.api.mvc._

import scala.concurrent.{ExecutionContext, Future}
import scala.util.Random

@Singleton
class DemoController @Inject()(val controllerComponents: ControllerComponents, protected val eknn: ElastiknnFutureClient)(
    implicit ec: ExecutionContext)
    extends BaseController
    with Logging
    with Circe {

  def index() = Action { implicit request: Request[AnyContent] =>
    Ok(views.html.index())
  }

  def dataset(permalink: String, queryIdOpt: Option[String]): Action[AnyContent] = Action.async { implicit req =>
    Dataset.defaults.find(_.permalink == permalink) match {
      case Some(ds) =>
        queryIdOpt match {
          case Some(queryId) =>
            for {
              countRes <- eknn.execute(count(ds.examples.head.index))
              // This ensures the search requests execute serially.
              examplesWithResults <- ds.examples.foldLeft(Future(Vector.empty[ExampleWithResults])) {
                case (accF, ex) =>
                  for {
                    acc <- accF
                    q = nearestNeighborsQuery(ex.index, ex.query.withVec(Vec.Indexed(ex.index, queryId, ex.field)), 10, true)
                    response <- eknn.execute(q)
                    hits = response.result.hits.hits.toSeq
                    results <- Future.traverse(hits.map(ds.parseHit))(Future.fromTry)
                  } yield acc :+ ExampleWithResults(ex, q, results, response.result.took)
              }
            } yield Ok(views.html.dataset(ds, queryId, countRes.result.count, examplesWithResults))
          case None =>
            for {
              countRes <- eknn.execute(count(ds.examples.head.index))
              id = Random.nextInt(countRes.result.count.toInt + 1)
            } yield Redirect(routes.DemoController.dataset(permalink, Some(id.toString)))
        }

      case None => Future.successful(NotFound(views.html.notfound()))
    }
  }

  def datasets(): Action[AnyContent] = Action(Ok(Dataset.defaults.asJson))

  def health(): Action[AnyContent] = Action.async { implicit req =>
    for {
      countResults <- Future.sequence(for {
        ds <- Dataset.defaults
        ex <- ds.examples
      } yield eknn.execute(count(ex.index)))
      code = if (countResults.forall(_.isSuccess) && countResults.forall(_.result.count > 1000)) 200 else 500
    } yield Status(code)
  }

}

Source File: Enqueue.scala From elastiknn with Apache License 2.0

5 votes

package com.klibisz.elastiknn.benchmarks

import java.io.File
import java.nio.file.Files

import com.klibisz.elastiknn.benchmarks.codecs._
import io.circe.syntax._
import org.apache.commons.codec.digest.DigestUtils
import zio._
import zio.blocking.Blocking
import zio.console._

import scala.util.Random


object Enqueue extends App {

  case class Params(datasetsFilter: Set[String] = Set.empty,
                    file: File = new File("/tmp/hashes.txt"),
                    experimentsBucket: String = "",
                    experimentsPrefix: String = "",
                    s3Minio: Boolean = false)

  private val parser = new scopt.OptionParser[Params]("Build a list of benchmark jobs") {
    override def showUsageOnError: Option[Boolean] = Some(true)
    help("help")
    opt[Seq[String]]("datasetsFilter")
      .unbounded()
      .action((s, c) => c.copy(datasetsFilter = s.map(_.toLowerCase).toSet))
    opt[String]("experimentsBucket")
      .action((x, c) => c.copy(experimentsBucket = x))
    opt[String]("experimentsPrefix")
      .action((x, c) => c.copy(experimentsPrefix = x))
    opt[String]("file")
      .action((s, c) => c.copy(file = new File(s)))
    opt[Boolean]("s3Minio")
      .action((x, c) => c.copy(s3Minio = x))
  }

  override def run(args: List[String]): URIO[Console, ExitCode] = parser.parse(args, Params()) match {
    case Some(params) =>
      val experiments =
        if (params.datasetsFilter.isEmpty) Experiment.defaults
        else Experiment.defaults.filter(e => params.datasetsFilter.contains(e.dataset.name.toLowerCase))
      val s3Client = if (params.s3Minio) S3Utils.minioClient() else S3Utils.defaultClient()
      val layer = Blocking.live ++ Console.live
      val logic: ZIO[Console with Blocking, Throwable, Unit] = for {
        blocking <- ZIO.access[Blocking](_.get)
        hashesAndEffects = experiments.map { exp =>
          val body = exp.asJson.noSpaces
          val hash = DigestUtils.md5Hex(body).toLowerCase
          val key = s"${params.experimentsPrefix}/$hash.json"
          hash -> blocking.effectBlocking(s3Client.putObject(params.experimentsBucket, key, body))
        }
        _ <- putStrLn(s"Saving ${hashesAndEffects.length} experiments to S3")
        _ <- ZIO.collectAllParN(10)(hashesAndEffects.map(_._2))
        jsonListOfHashes = new Random(0).shuffle(hashesAndEffects).map(_._1).asJson.noSpaces
        _ <- blocking.effectBlocking(Files.writeString(params.file.toPath, jsonListOfHashes))
      } yield ()
      logic.provideLayer(layer).exitCode
    case None => sys.exit(1)
  }
}

Source File: DatasetClient.scala From elastiknn with Apache License 2.0

5 votes

package com.klibisz.elastiknn.benchmarks

import java.util.zip.GZIPInputStream

import com.amazonaws.services.s3.AmazonS3
import com.klibisz.elastiknn.api.{ElasticsearchCodec, Vec}
import com.klibisz.elastiknn.benchmarks.Dataset._
import io.circe
import zio._
import zio.stream._

import scala.io.Source
import scala.util.Random
import scala.util.hashing.MurmurHash3

object DatasetClient {

  trait Service {
    def streamTrain(dataset: Dataset, limit: Option[Int] = None): Stream[Throwable, Vec]
    def streamTest(dataset: Dataset, limit: Option[Int] = None): Stream[Throwable, Vec]
  }

  
  def s3(bucket: String, keyPrefix: String): ZLayer[Has[AmazonS3], Throwable, DatasetClient] = ZLayer.fromService[AmazonS3, Service] {
    client =>
      new Service {
        private def stream(dataset: Dataset, name: String, limit: Option[Int]): Stream[Throwable, Vec] =
          dataset match {
            case r: RandomSparseBool =>
              implicit val rng: Random = new Random(MurmurHash3.orderedHash(Seq(r.dims, name)))
              Stream
                .range(0, if (name == "train") r.train else r.test)
                .map(_ => Vec.SparseBool.random(r.dims, r.bias))
            case r: RandomDenseFloat =>
              implicit val rng: Random = new Random(MurmurHash3.orderedHash(Seq(r.dims, name)))
              Stream
                .range(0, if (name == "train") r.train else r.test)
                .map(_ => Vec.DenseFloat.random(r.dims))
            case _ =>
              def parseDecode(s: String): Either[circe.Error, Vec] =
                ElasticsearchCodec.parse(s).flatMap(j => ElasticsearchCodec.decode[Vec](j.hcursor))
              val obj = client.getObject(bucket, s"$keyPrefix/${dataset.name}/${name}.json.gz")
              val iterManaged = Managed.makeEffect(Source.fromInputStream(new GZIPInputStream(obj.getObjectContent)))(_.close())
              val lines = Stream.fromIteratorManaged(iterManaged.map(src => limit.map(n => src.getLines.take(n)).getOrElse(src.getLines())))
              val rawJson = lines.map(_.dropWhile(_ != '{'))
              rawJson.mapM(s => ZIO.fromEither(parseDecode(s)))
          }

        override def streamTrain(dataset: Dataset, limit: Option[Int]): Stream[Throwable, Vec] =
          stream(dataset, "train", limit)

        override def streamTest(dataset: Dataset, limit: Option[Int]): Stream[Throwable, Vec] =
          stream(dataset, "test", limit)
      }
  }

}

Source File: Profiling.scala From elastiknn with Apache License 2.0

5 votes

package com.klibisz.elastiknn

import com.klibisz.elastiknn
import com.klibisz.elastiknn.api.{Mapping, Vec}
import com.klibisz.elastiknn.utils.ArrayUtils

import scala.util.Random

// Simple apps that make it easy to profile hotspots using VisualVM.
// One quirk with using VisualVM for profiling is that it has to be running on the same JVM as the app.
// For me it was enough to folow this comment: https://github.com/oracle/visualvm/issues/130#issuecomment-483898542

object ProfileVectorHashing {
  def main(args: Array[String]): Unit = {
    implicit val r: Random = new Random(100)
    val m = new elastiknn.models.LshFunction.Jaccard(Mapping.JaccardLsh(100, 150, 1))
    val vecs = Vec.SparseBool.randoms(100, 5000)
    while (true) {
      val t0 = System.currentTimeMillis()
      vecs.foreach(v => m(v))
      println(vecs.length * 1.0 / (System.currentTimeMillis() - t0) * 1000)
    }
  }
}

object ProfileSortedIntersection {
  def main(args: Array[String]): Unit = {
    implicit val r: Random = new Random(100)
    val vecs = Vec.SparseBool.randoms(100, 5000)
    while (true) {
      val t0 = System.currentTimeMillis()
      vecs.drop(1).zip(vecs).map {
        case (a, b) => ArrayUtils.sortedIntersectionCount(a.trueIndices, b.trueIndices)
      }
      println(vecs.length * 1.0 / (System.currentTimeMillis() - t0) * 1000)
    }
  }
}

object PairingFunctions {
  def main(args: Array[String]): Unit = {
    // Based on https://stackoverflow.com/a/14051714
    def szudzik(a: Int, b: Int): Int = {
      val c = if (a >= 0) 2 * a else -2 * a - 1
      val d = if (b >= 0) 2 * b else -2 * b - 1
      if (c >= d) c * c + c + d else c + d * d
    }
    val r = new Random(System.currentTimeMillis())
    val n = 10
    var uniq = Set.empty[Int]
    var i = 0
    while (i - uniq.size < 100) {
      var bandHash = r.nextInt(80)
      (0 until n)
        .map(_ => r.nextInt(Int.MaxValue) - r.nextInt(Int.MaxValue))
        .foreach(h => bandHash = szudzik(bandHash, h))
      uniq = uniq + bandHash
      i += 1
      println(s"$i, ${uniq.size}")
    }
  }
}

Source File: TestData.scala From elastiknn with Apache License 2.0

5 votes

package com.klibisz.elastiknn.testing

import java.io.FileOutputStream
import java.util.zip.{GZIPInputStream, GZIPOutputStream}

import com.klibisz.elastiknn.api.{Similarity, Vec}
import io.circe._
import com.klibisz.elastiknn.api.ElasticsearchCodec._
import com.klibisz.elastiknn.models.ExactSimilarityFunction
import io.circe.syntax._
import io.circe.generic.semiauto._

import scala.util.{Random, Try}

case class Result(similarity: Similarity, values: Vector[Double])
object Result {
  implicit val codec: Codec[Result] = deriveCodec[Result]
}

case class Query(vector: Vec, results: Seq[Result])
object Query {
  implicit val codec: Codec[Query] = deriveCodec[Query]
}

case class TestData(corpus: Vector[Vec], queries: Vector[Query])
object TestData {

  implicit val codec: Codec[TestData] = deriveCodec[TestData]

  def read(fname: String): TestData = {
    val resource = getClass.getResource(fname)
    val gin = new GZIPInputStream(resource.openStream())
    val contents = new String(gin.readAllBytes())
    gin.close()
    io.circe.parser.decode[TestData](contents).toTry.get
  }

  def write(testData: TestData, fname: String): Unit = {
    val gout = new GZIPOutputStream(new FileOutputStream(fname))
    gout.write(testData.asJson.noSpaces.getBytes())
    gout.close()
  }

  def genSparseBool(dims: Int, numCorpus: Int, numQueries: Int, numNeighbors: Int)(implicit rng: Random): TestData = {
    // TODO: have a min and max bias to introduce more variety to the corpus.
    val corpus = Vec.SparseBool.randoms(dims, numCorpus, 0.2)
    val queries = Vec.SparseBool.randoms(dims, numQueries, 0.2).map { qv =>
      Query(
        qv,
        Seq(
          Result(Similarity.Jaccard, corpus.map(cv => ExactSimilarityFunction.Jaccard(cv, qv)).sorted.reverse.take(numNeighbors)),
          Result(Similarity.Hamming, corpus.map(cv => ExactSimilarityFunction.Hamming(cv, qv)).sorted.reverse.take(numNeighbors))
        )
      )
    }
    TestData(corpus, queries)
  }

  def genDenseFloat(dims: Int, numCorpus: Int, numQueries: Int, numNeighbors: Int, unit: Boolean = false)(
      implicit rng: Random): TestData = {
    val corpus = Vec.DenseFloat.randoms(dims, numCorpus)
    val queries = Vec.DenseFloat.randoms(dims, numQueries).map { qv =>
      Query(
        qv,
        Seq(
          Result(Similarity.L1, corpus.map(cv => ExactSimilarityFunction.L1(cv, qv)).sorted.reverse.take(numNeighbors)),
          Result(Similarity.L2, corpus.map(cv => ExactSimilarityFunction.L2(cv, qv)).sorted.reverse.take(numNeighbors)),
          Result(Similarity.Angular, corpus.map(cv => ExactSimilarityFunction.Angular(cv, qv)).sorted.reverse.take(numNeighbors))
        )
      )
    }
    TestData(corpus, queries)
  }
}


object Generate {

  import TestData._

  def main(args: Array[String]): Unit = {
    implicit val rng = new Random(0)
    val dims = 1024
    write(genSparseBool(dims, 5000, 50, 100), "testdata-sparsebool.json.gz")
    write(genDenseFloat(dims, 5000, 50, 100), "testdata-densefloat.json.gz")
    write(genDenseFloat(dims, 5000, 50, 100, unit = true), "testdata-densefloat-unit.json.gz")
  }

}

Source File: UnsafeSerializationSuite.scala From elastiknn with Apache License 2.0

5 votes

package com.klibisz.elastiknn.storage

import org.scalatest.{FunSuite, Matchers}

import scala.util.Random

class UnsafeSerializationSuite extends FunSuite with Matchers {

  test("arrays of ints") {
    val seed = System.currentTimeMillis()
    val maxLen = 4096
    val rng = new Random(seed)
    for (i <- 0 to 1000) {
      withClue(s"Failed on iteration $i with seed $seed and max length $maxLen") {
        // Generate array of random ints.
        val len = rng.nextInt(maxLen)
        val iarr = (0 until len).map(_ => rng.nextInt(Int.MaxValue) * (if (rng.nextBoolean()) 1 else -1)).toArray

        // Serialize and check serialized length.
        val trimmed = UnsafeSerialization.writeInts(iarr)
        trimmed should have length (iarr.length * UnsafeSerialization.numBytesInInt)

        // Deserialize and check.
        val iarrReadTrimmed = UnsafeSerialization.readInts(trimmed, 0, trimmed.length)
        iarrReadTrimmed shouldBe iarr

        // Place in larger array with random offset.
        val offset = rng.nextInt(maxLen)
        val embedded = new Array[Byte](offset) ++ trimmed ++ new Array[Byte](rng.nextInt(maxLen))

        // Deserialize and check.
        val iarrReadEmbedded = UnsafeSerialization.readInts(embedded, offset, trimmed.length)
        iarrReadEmbedded shouldBe iarr
      }
    }
  }

  test("arrays of floats") {
    val seed = System.currentTimeMillis()
    val maxLen = 4096
    val rng = new Random(seed)
    for (i <- 0 to 1000) {
      withClue(s"Failed on iteration $i with seed $seed and max length $maxLen") {
        // Generate array of random floats.
        val len = rng.nextInt(maxLen)
        val farr = (0 until len).map(_ => rng.nextFloat() * (if (rng.nextBoolean()) Float.MaxValue else Float.MinValue)).toArray

        // Serialize and check length.
        val trimmed = UnsafeSerialization.writeFloats(farr)
        trimmed should have length (farr.length * UnsafeSerialization.numBytesInFloat)

        // Deserialize and check.
        val farrTrimmed = UnsafeSerialization.readFloats(trimmed, 0, trimmed.length)
        farrTrimmed shouldBe farr

        // Place in larger array with random offset.
        val offset = rng.nextInt(maxLen)
        val embedded = new Array[Byte](offset) ++ trimmed ++ new Array[Byte](rng.nextInt(maxLen))

        // Deserialize and check.
        val farrReadEmbedded = UnsafeSerialization.readFloats(embedded, offset, trimmed.length)
        farrReadEmbedded shouldBe farr
      }
    }
  }

  test("ints variable length encoding") {
    UnsafeSerialization.writeInt(127) should have length 1
    UnsafeSerialization.writeInt(-127) should have length 1
    UnsafeSerialization.writeInt(32767) should have length 2
    UnsafeSerialization.writeInt(-32767) should have length 2
  }

  test("ints randomized") {
    val seed = System.currentTimeMillis()
    val rng = new Random(seed)
    for (i <- 0 to 10000) {
      withClue(s"Failed on iteration $i with seed $seed") {
        val i = rng.nextInt(Int.MaxValue) * (if (rng.nextBoolean()) 1 else -1)
        val barr = UnsafeSerialization.writeInt(i)
        val iRead = UnsafeSerialization.readInt(barr)
        iRead shouldBe i
      }
    }
  }

}

Source File: MetricsEndpointSpec.scala From prometheus-akka-http with MIT License

5 votes

package com.lonelyplanet.prometheus.api

import java.io.StringWriter

import akka.http.scaladsl.model.HttpCharsets
import akka.http.scaladsl.testkit.ScalatestRouteTest
import com.lonelyplanet.prometheus.Utils._
import io.prometheus.client.exporter.common.TextFormat
import io.prometheus.client.{CollectorRegistry, Histogram}
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

import scala.util.Random

class MetricsEndpointSpec extends AnyFlatSpec with Matchers with ScalatestRouteTest {

  "Metrics endpoint" should "return the correct media type and charset" in {
    val api = createEndpoint(CollectorRegistry.defaultRegistry)
    Get("/metrics") ~> api.routes ~> check {
      mediaType.subType shouldBe "plain"
      mediaType.isText shouldBe true
      mediaType.params shouldBe Map("version" -> "0.0.4")
      charset shouldBe HttpCharsets.`UTF-8`
    }
  }

  it should "return serialized metrics in the prometheus text format" in {
    val registry = new CollectorRegistry()
    val api = createEndpoint(registry)
    val hist = Histogram.build().name(RandomTestName).help(RandomTestHelp).linearBuckets(0, 1, 10).register(registry)

    hist.observe(Math.abs(Random.nextDouble()))

    Get("/metrics") ~> api.routes ~> check {
      val resp = responseAs[String]
      val writer = new StringWriter()
      TextFormat.write004(writer, registry.metricFamilySamples())

      resp shouldBe writer.toString
    }
  }

  private val RandomTestName = generateRandomStringOfLength(16)
  private val RandomTestHelp = generateRandomStringOfLength(16)

  private def createEndpoint(collectorRegistry: CollectorRegistry) = {
    new MetricsEndpoint(collectorRegistry)
  }

}

Source File: PrometheusResponseTimeRecorderSpec.scala From prometheus-akka-http with MIT License

5 votes

package com.lonelyplanet.prometheus

import io.prometheus.client.{Collector, CollectorRegistry}
import org.scalamock.scalatest.MockFactory
import com.lonelyplanet.prometheus.Utils._
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers

import scala.concurrent.duration
import scala.concurrent.duration.FiniteDuration
import scala.util.Random

class PrometheusResponseTimeRecorderSpec extends AnyFlatSpec with Matchers with MockFactory {

  "PrometheusLatencyRecorder" should "register a histogram and record request latencies" in {
    val registry = new CollectorRegistry()
    val randomMetricName = generateRandomString
    val randomMetricHelp = generateRandomString
    val randomLabelName = generateRandomString
    val randomEndpointName = generateRandomString
    val randomLatency = Math.abs(Random.nextInt(10000))

    // our random value will end up in the second bucket
    val buckets = List((randomLatency - 1).toDouble, (randomLatency + 1).toDouble)

    val recorder = new PrometheusResponseTimeRecorder(
      randomMetricName,
      randomMetricHelp,
      buckets,
      randomLabelName,
      registry,
      duration.MILLISECONDS)

    recorder.recordResponseTime(randomEndpointName, FiniteDuration(randomLatency, duration.MILLISECONDS))

    val first = getBucketValue(registry, randomMetricName, List(randomLabelName), List(randomEndpointName), buckets.head)
    val second = getBucketValue(registry, randomMetricName, List(randomLabelName), List(randomEndpointName), buckets.last)
    val positiveInf = getBucketValue(registry, randomMetricName, List(randomLabelName), List(randomEndpointName), Double.PositiveInfinity)

    first shouldBe 0
    second shouldBe 1
    positiveInf shouldBe 1
  }

  private def getBucketValue(registry: CollectorRegistry, metricName: String, labelNames: List[String], labelValues: List[String], bucket: Double) = {
    val name = metricName + "_bucket"

    // 'le' should be the first label in the list
    val allLabelNames = (Array("le") ++ labelNames).reverse
    val allLabelValues = (Array(Collector.doubleToGoString(bucket)) ++ labelValues).reverse
    registry.getSampleValue(name, allLabelNames, allLabelValues).intValue()
  }

}

Source File: ProtoBuffTest.scala From c4proto with Apache License 2.0

5 votes

package ee.cone.c4actor

import java.lang.management.ManagementFactory
import java.util
import java.util.concurrent.{Callable, Executors}

import ee.cone.c4actor.AnyAdapter._
import ee.cone.c4actor.AnyOrigProtocol.N_AnyOrig
import ee.cone.c4actor.ProtoBuffTestProtocol.{D_TestOrig, D_TestOrigForDecode}
import ee.cone.c4di.{c4, c4app}
import ee.cone.c4proto._

import scala.collection.immutable
import scala.util.Random

trait ProtoBuffTestProtocolAppBase

@protocol("ProtoBuffTestProtocolApp") object ProtoBuffTestProtocol {

  @Id(0x1) case class D_TestOrig(
    @Id(0x2) srcId: String,
    @Id(0x3) list: List[String],
    @Id(0x4) byteStr: List[N_AnyOrig]
  )

  @Id(0x5) case class D_TestOrigForDecode(
    @Id(0x6) srcId: String,
    @Id(0x7) number: Long
  )

}

@c4app class SeqProtoBuffTestAppBase extends ProtoBuffTestApp
@c4app class ParProtoBuffTestAppBase extends ProtoBuffTestApp

trait ProtoBuffTestApp
  extends VMExecutionApp with ExecutableApp
    with BaseApp with ProtoApp
    with ProtoBuffTestProtocolApp
    with AnyOrigProtocolApp



class SerializationRunnable(pid: Int, testOrigs: Seq[D_TestOrigForDecode], qAdapterRegistry: QAdapterRegistry) extends Callable[Long] {

  def call(): Long = {
    TestCode.test(testOrigs, qAdapterRegistry)
  }
}

object TestCode {
  def test(testOrigs: Seq[D_TestOrigForDecode], qAdapterRegistry: QAdapterRegistry): Long = {
    val time = System.currentTimeMillis()
    val encoded: immutable.Seq[N_AnyOrig] = testOrigs.map(encode(qAdapterRegistry)(_))
    val testOrigsss: immutable.Seq[D_TestOrig] = encoded.zipWithIndex.map { case (a, b) => D_TestOrig(b.toString, a.toString.split(",").toList, List(a)) }
    val encoded2: immutable.Seq[N_AnyOrig] = testOrigsss.map(encode(qAdapterRegistry)(_))
    val decoded: immutable.Seq[D_TestOrig] = encoded2.map(decode[D_TestOrig](qAdapterRegistry))
    // assert (testOrigsss == decoded)
    val time2 = System.currentTimeMillis()
    time2 - time
  }
}

Source File: SandboxApp.scala From bloom-filter-scala with MIT License

5 votes

import java.text.NumberFormat

import bloomfilter.mutable.{CuckooFilter, UnsafeTable8Bit}
import com.google.monitoring.runtime.instrumentation.{AllocationRecorder, Sampler}
import com.twitter.algebird.{BloomFilter => AlgebirdBloomFilter}

import scala.util.Random

object SandboxApp {

  def checkMemory(): Unit = {
    val runtime = Runtime.getRuntime

    val format = NumberFormat.getInstance()

    val sb = new StringBuilder()
    val maxMemory = runtime.maxMemory()
    val allocatedMemory = runtime.totalMemory()
    val freeMemory = runtime.freeMemory()

    sb.append("free memory: " + format.format(freeMemory / 1024) + "\n")
    sb.append("allocated memory: " + format.format(allocatedMemory / 1024) + "\n")
    sb.append("max memory: " + format.format(maxMemory / 1024) + "\n")
    sb.append("total free memory: " + format.format((freeMemory + (maxMemory - allocatedMemory)) / 1024) + "\n")
    System.out.println(sb.toString())
  }


  def main(args: Array[String]): Unit = {

    val sut = CuckooFilter[Long](1000)
    sut.add(8)
    assert(sut.mightContain(8))
    sut.add(10)
    assert(sut.mightContain(10))
    sut.add(8)
    assert(sut.mightContain(8))
    sut.add(10000)
    assert(sut.mightContain(10000))

  }

  def compareAlgebirdFPR(): Unit = {

    val random: Random = new Random()

    val itemsExpected = 10000L
    val falsePositiveRate = 0.1
    var bf = AlgebirdBloomFilter(itemsExpected.toInt, falsePositiveRate, 0).create("")
    val bf2 = bloomfilter.mutable.BloomFilter[String](itemsExpected, falsePositiveRate)

    var i = 0
    while (i < itemsExpected) {
      val str: String = random.nextString(1000)
      bf = bf.+(str)
      bf2.add(str)
      i += 1
    }

    i = 0
    var in, in2 = 0
    while (true) {
      val str = random.nextString(1000)
      if (bf.contains(str).isTrue) {
        in += 1
      }
      if (bf2.mightContain(str)) {
        in2 += 1
      }

      if (i % 1000 == 0) {
        println(s"in: $in; in2: $in2")
      }
    }


  }

  def checkAllocations(): Unit = {
    val sampler: Sampler = new Sampler() {
      def sampleAllocation(count: Int, desc: String, newObj: Object, size: Long) {
        System.out.println("I just allocated the object " + newObj +
          " of type " + desc + " whose size is " + size)
        if (count != -1) {
          System.out.println("It's an array of size " + count)
        }
      }
    }

    AllocationRecorder.addSampler(sampler)

    AllocationRecorder.removeSampler(sampler)

  }
}

Source File: StringItemCuckooBenchmark.scala From bloom-filter-scala with MIT License

5 votes

package bloomfilter.mutable

import java.util.concurrent.TimeUnit

import org.openjdk.jmh.annotations.{BenchmarkMode, OperationsPerInvocation, OutputTimeUnit, _}

import scala.util.Random

@State(Scope.Benchmark)
class StringItemCuckooBenchmark {

  private val itemsExpected = 100000000L
  private val random = new Random()

  private var bf: CuckooFilter[String] = _

  @Param(Array("1024"))
  var length: Int = _

  private val items = new Array[String](10000)
  var i = 0
  while (i < items.length) {
    items(i) = random.nextString(length)
    i += 1
  }

  @Setup(Level.Iteration)
  def setup(): Unit = {
    bf = CuckooFilter[String](itemsExpected)
  }

  @Benchmark
  @BenchmarkMode(Array(Mode.SingleShotTime))
  @OutputTimeUnit(TimeUnit.NANOSECONDS)
  @OperationsPerInvocation(10000)
  def myPut(): Unit = {
    var i = 0
    while (i < items.length) {
      bf.add(items(i))
      i += 1
    }
  }

  @Benchmark
  @BenchmarkMode(Array(Mode.Throughput))
  @OperationsPerInvocation(10000)
  def myGet(): Unit = {
    var i = 0
    while (i < items.length) {
      bf.mightContain(items(i))
      i += 1
    }
  }

}

Source File: SparkTC.scala From BigDatalog with Apache License 2.0

5 votes

// scalastyle:off println
package org.apache.spark.examples

import scala.util.Random
import scala.collection.mutable

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.SparkContext._


object SparkTC {
  val numEdges = 200
  val numVertices = 100
  val rand = new Random(42)

  def generateGraph: Seq[(Int, Int)] = {
    val edges: mutable.Set[(Int, Int)] = mutable.Set.empty
    while (edges.size < numEdges) {
      val from = rand.nextInt(numVertices)
      val to = rand.nextInt(numVertices)
      if (from != to) edges.+=((from, to))
    }
    edges.toSeq
  }

  def main(args: Array[String]) {
    val sparkConf = new SparkConf().setAppName("SparkTC")
    val spark = new SparkContext(sparkConf)
    val slices = if (args.length > 0) args(0).toInt else 2
    var tc = spark.parallelize(generateGraph, slices).cache()

    // Linear transitive closure: each round grows paths by one edge,
    // by joining the graph's edges with the already-discovered paths.
    // e.g. join the path (y, z) from the TC with the edge (x, y) from
    // the graph to obtain the path (x, z).

    // Because join() joins on keys, the edges are stored in reversed order.
    val edges = tc.map(x => (x._2, x._1))

    // This join is iterated until a fixed point is reached.
    var oldCount = 0L
    var nextCount = tc.count()
    do {
      oldCount = nextCount
      // Perform the join, obtaining an RDD of (y, (z, x)) pairs,
      // then project the result to obtain the new (x, z) paths.
      tc = tc.union(tc.join(edges).map(x => (x._2._2, x._2._1))).distinct().cache()
      nextCount = tc.count()
    } while (nextCount != oldCount)

    println("TC has " + tc.count() + " edges.")
    spark.stop()
  }
}
// scalastyle:on println

Source File: LocalKMeans.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.mllib.clustering

import scala.util.Random

import org.apache.spark.Logging
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.linalg.BLAS.{axpy, scal}


  def kMeansPlusPlus(
      seed: Int,
      points: Array[VectorWithNorm],
      weights: Array[Double],
      k: Int,
      maxIterations: Int
  ): Array[VectorWithNorm] = {
    val rand = new Random(seed)
    val dimensions = points(0).vector.size
    val centers = new Array[VectorWithNorm](k)

    // Initialize centers by sampling using the k-means++ procedure.
    centers(0) = pickWeighted(rand, points, weights).toDense
    for (i <- 1 until k) {
      // Pick the next center with a probability proportional to cost under current centers
      val curCenters = centers.view.take(i)
      val sum = points.view.zip(weights).map { case (p, w) =>
        w * KMeans.pointCost(curCenters, p)
      }.sum
      val r = rand.nextDouble() * sum
      var cumulativeScore = 0.0
      var j = 0
      while (j < points.length && cumulativeScore < r) {
        cumulativeScore += weights(j) * KMeans.pointCost(curCenters, points(j))
        j += 1
      }
      if (j == 0) {
        logWarning("kMeansPlusPlus initialization ran out of distinct points for centers." +
          s" Using duplicate point for center k = $i.")
        centers(i) = points(0).toDense
      } else {
        centers(i) = points(j - 1).toDense
      }
    }

    // Run up to maxIterations iterations of Lloyd's algorithm
    val oldClosest = Array.fill(points.length)(-1)
    var iteration = 0
    var moved = true
    while (moved && iteration < maxIterations) {
      moved = false
      val counts = Array.fill(k)(0.0)
      val sums = Array.fill(k)(Vectors.zeros(dimensions))
      var i = 0
      while (i < points.length) {
        val p = points(i)
        val index = KMeans.findClosest(centers, p)._1
        axpy(weights(i), p.vector, sums(index))
        counts(index) += weights(i)
        if (index != oldClosest(i)) {
          moved = true
          oldClosest(i) = index
        }
        i += 1
      }
      // Update centers
      var j = 0
      while (j < k) {
        if (counts(j) == 0.0) {
          // Assign center to a random point
          centers(j) = points(rand.nextInt(points.length)).toDense
        } else {
          scal(1.0 / counts(j), sums(j))
          centers(j) = new VectorWithNorm(sums(j))
        }
        j += 1
      }
      iteration += 1
    }

    if (iteration == maxIterations) {
      logInfo(s"Local KMeans++ reached the max number of iterations: $maxIterations.")
    } else {
      logInfo(s"Local KMeans++ converged in $iteration iterations.")
    }

    centers
  }

  private def pickWeighted[T](rand: Random, data: Array[T], weights: Array[Double]): T = {
    val r = rand.nextDouble() * weights.sum
    var i = 0
    var curWeight = 0.0
    while (i < data.length && curWeight < r) {
      curWeight += weights(i)
      i += 1
    }
    data(i - 1)
  }
}

Source File: KMeansDataGenerator.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.mllib.util

import scala.util.Random

import org.apache.spark.SparkContext
import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.rdd.RDD


  @Since("0.8.0")
  def generateKMeansRDD(
      sc: SparkContext,
      numPoints: Int,
      k: Int,
      d: Int,
      r: Double,
      numPartitions: Int = 2)
    : RDD[Array[Double]] =
  {
    // First, generate some centers
    val rand = new Random(42)
    val centers = Array.fill(k)(Array.fill(d)(rand.nextGaussian() * r))
    // Then generate points around each center
    sc.parallelize(0 until numPoints, numPartitions).map { idx =>
      val center = centers(idx % k)
      val rand2 = new Random(42 + idx)
      Array.tabulate(d)(i => center(i) + rand2.nextGaussian())
    }
  }

  @Since("0.8.0")
  def main(args: Array[String]) {
    if (args.length < 6) {
      // scalastyle:off println
      println("Usage: KMeansGenerator " +
        "<master> <output_dir> <num_points> <k> <d> <r> [<num_partitions>]")
      // scalastyle:on println
      System.exit(1)
    }

    val sparkMaster = args(0)
    val outputPath = args(1)
    val numPoints = args(2).toInt
    val k = args(3).toInt
    val d = args(4).toInt
    val r = args(5).toDouble
    val parts = if (args.length >= 7) args(6).toInt else 2

    val sc = new SparkContext(sparkMaster, "KMeansDataGenerator")
    val data = generateKMeansRDD(sc, numPoints, k, d, r, parts)
    data.map(_.mkString(" ")).saveAsTextFile(outputPath)

    System.exit(0)
  }
}

Source File: LogisticRegressionDataGenerator.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.mllib.util

import scala.util.Random

import org.apache.spark.annotation.{Since, DeveloperApi}
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.linalg.Vectors


  @Since("0.8.0")
  def generateLogisticRDD(
    sc: SparkContext,
    nexamples: Int,
    nfeatures: Int,
    eps: Double,
    nparts: Int = 2,
    probOne: Double = 0.5): RDD[LabeledPoint] = {
    val data = sc.parallelize(0 until nexamples, nparts).map { idx =>
      val rnd = new Random(42 + idx)

      val y = if (idx % 2 == 0) 0.0 else 1.0
      val x = Array.fill[Double](nfeatures) {
        rnd.nextGaussian() + (y * eps)
      }
      LabeledPoint(y, Vectors.dense(x))
    }
    data
  }

  @Since("0.8.0")
  def main(args: Array[String]) {
    if (args.length != 5) {
      // scalastyle:off println
      println("Usage: LogisticRegressionGenerator " +
        "<master> <output_dir> <num_examples> <num_features> <num_partitions>")
      // scalastyle:on println
      System.exit(1)
    }

    val sparkMaster: String = args(0)
    val outputPath: String = args(1)
    val nexamples: Int = if (args.length > 2) args(2).toInt else 1000
    val nfeatures: Int = if (args.length > 3) args(3).toInt else 2
    val parts: Int = if (args.length > 4) args(4).toInt else 2
    val eps = 3

    val sc = new SparkContext(sparkMaster, "LogisticRegressionDataGenerator")
    val data = generateLogisticRDD(sc, nexamples, nfeatures, eps, parts)

    data.saveAsTextFile(outputPath)

    sc.stop()
  }
}

Source File: SVMDataGenerator.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.mllib.util

import scala.util.Random

import com.github.fommil.netlib.BLAS.{getInstance => blas}

import org.apache.spark.SparkContext
import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.rdd.RDD


@DeveloperApi
@Since("0.8.0")
object SVMDataGenerator {

  @Since("0.8.0")
  def main(args: Array[String]) {
    if (args.length < 2) {
      // scalastyle:off println
      println("Usage: SVMGenerator " +
        "<master> <output_dir> [num_examples] [num_features] [num_partitions]")
      // scalastyle:on println
      System.exit(1)
    }

    val sparkMaster: String = args(0)
    val outputPath: String = args(1)
    val nexamples: Int = if (args.length > 2) args(2).toInt else 1000
    val nfeatures: Int = if (args.length > 3) args(3).toInt else 2
    val parts: Int = if (args.length > 4) args(4).toInt else 2

    val sc = new SparkContext(sparkMaster, "SVMGenerator")

    val globalRnd = new Random(94720)
    val trueWeights = Array.fill[Double](nfeatures + 1)(globalRnd.nextGaussian())

    val data: RDD[LabeledPoint] = sc.parallelize(0 until nexamples, parts).map { idx =>
      val rnd = new Random(42 + idx)

      val x = Array.fill[Double](nfeatures) {
        rnd.nextDouble() * 2.0 - 1.0
      }
      val yD = blas.ddot(trueWeights.length, x, 1, trueWeights, 1) + rnd.nextGaussian() * 0.1
      val y = if (yD < 0) 0.0 else 1.0
      LabeledPoint(y, Vectors.dense(x))
    }

    data.saveAsTextFile(outputPath)

    sc.stop()
  }
}

Source File: RidgeRegressionSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.mllib.regression

import scala.util.Random

import org.jblas.DoubleMatrix

import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.util.{LocalClusterSparkContext, LinearDataGenerator,
  MLlibTestSparkContext}
import org.apache.spark.util.Utils

private object RidgeRegressionSuite {

  
  val model = new RidgeRegressionModel(weights = Vectors.dense(0.1, 0.2, 0.3), intercept = 0.5)
}

class RidgeRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {

  def predictionError(predictions: Seq[Double], input: Seq[LabeledPoint]): Double = {
    predictions.zip(input).map { case (prediction, expected) =>
      (prediction - expected.label) * (prediction - expected.label)
    }.reduceLeft(_ + _) / predictions.size
  }

  test("ridge regression can help avoid overfitting") {

    // For small number of examples and large variance of error distribution,
    // ridge regression should give smaller generalization error that linear regression.

    val numExamples = 50
    val numFeatures = 20

    org.jblas.util.Random.seed(42)
    // Pick weights as random values distributed uniformly in [-0.5, 0.5]
    val w = DoubleMatrix.rand(numFeatures, 1).subi(0.5)

    // Use half of data for training and other half for validation
    val data = LinearDataGenerator.generateLinearInput(3.0, w.toArray, 2 * numExamples, 42, 10.0)
    val testData = data.take(numExamples)
    val validationData = data.takeRight(numExamples)

    val testRDD = sc.parallelize(testData, 2).cache()
    val validationRDD = sc.parallelize(validationData, 2).cache()

    // First run without regularization.
    val linearReg = new LinearRegressionWithSGD()
    linearReg.optimizer.setNumIterations(200)
                       .setStepSize(1.0)

    val linearModel = linearReg.run(testRDD)
    val linearErr = predictionError(
        linearModel.predict(validationRDD.map(_.features)).collect(), validationData)

    val ridgeReg = new RidgeRegressionWithSGD()
    ridgeReg.optimizer.setNumIterations(200)
                      .setRegParam(0.1)
                      .setStepSize(1.0)
    val ridgeModel = ridgeReg.run(testRDD)
    val ridgeErr = predictionError(
        ridgeModel.predict(validationRDD.map(_.features)).collect(), validationData)

    // Ridge validation error should be lower than linear regression.
    assert(ridgeErr < linearErr,
      "ridgeError (" + ridgeErr + ") was not less than linearError(" + linearErr + ")")
  }

  test("model save/load") {
    val model = RidgeRegressionSuite.model

    val tempDir = Utils.createTempDir()
    val path = tempDir.toURI.toString

    // Save model, load it back, and compare.
    try {
      model.save(sc, path)
      val sameModel = RidgeRegressionModel.load(sc, path)
      assert(model.weights == sameModel.weights)
      assert(model.intercept == sameModel.intercept)
    } finally {
      Utils.deleteRecursively(tempDir)
    }
  }
}

class RidgeRegressionClusterSuite extends SparkFunSuite with LocalClusterSparkContext {

  test("task size should be small in both training and prediction") {
    val m = 4
    val n = 200000
    val points = sc.parallelize(0 until m, 2).mapPartitionsWithIndex { (idx, iter) =>
      val random = new Random(idx)
      iter.map(i => LabeledPoint(1.0, Vectors.dense(Array.fill(n)(random.nextDouble()))))
    }.cache()
    // If we serialize data directly in the task closure, the size of the serialized task would be
    // greater than 1MB and hence Spark would throw an error.
    val model = RidgeRegressionWithSGD.train(points, 2)
    val predictions = model.predict(points.map(_.features))
  }
}

Source File: KafkaStreamSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.streaming.kafka

import scala.collection.mutable
import scala.concurrent.duration._
import scala.language.postfixOps
import scala.util.Random

import kafka.serializer.StringDecoder
import org.scalatest.BeforeAndAfterAll
import org.scalatest.concurrent.Eventually

import org.apache.spark.{SparkConf, SparkFunSuite}
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming.{Milliseconds, StreamingContext}

class KafkaStreamSuite extends SparkFunSuite with Eventually with BeforeAndAfterAll {
  private var ssc: StreamingContext = _
  private var kafkaTestUtils: KafkaTestUtils = _

  override def beforeAll(): Unit = {
    kafkaTestUtils = new KafkaTestUtils
    kafkaTestUtils.setup()
  }

  override def afterAll(): Unit = {
    if (ssc != null) {
      ssc.stop()
      ssc = null
    }

    if (kafkaTestUtils != null) {
      kafkaTestUtils.teardown()
      kafkaTestUtils = null
    }
  }

  test("Kafka input stream") {
    val sparkConf = new SparkConf().setMaster("local[4]").setAppName(this.getClass.getSimpleName)
    ssc = new StreamingContext(sparkConf, Milliseconds(500))
    val topic = "topic1"
    val sent = Map("a" -> 5, "b" -> 3, "c" -> 10)
    kafkaTestUtils.createTopic(topic)
    kafkaTestUtils.sendMessages(topic, sent)

    val kafkaParams = Map("zookeeper.connect" -> kafkaTestUtils.zkAddress,
      "group.id" -> s"test-consumer-${Random.nextInt(10000)}",
      "auto.offset.reset" -> "smallest")

    val stream = KafkaUtils.createStream[String, String, StringDecoder, StringDecoder](
      ssc, kafkaParams, Map(topic -> 1), StorageLevel.MEMORY_ONLY)
    val result = new mutable.HashMap[String, Long]() with mutable.SynchronizedMap[String, Long]
    stream.map(_._2).countByValue().foreachRDD { r =>
      val ret = r.collect()
      ret.toMap.foreach { kv =>
        val count = result.getOrElseUpdate(kv._1, 0) + kv._2
        result.put(kv._1, count)
      }
    }

    ssc.start()

    eventually(timeout(10000 milliseconds), interval(100 milliseconds)) {
      assert(sent === result)
    }
  }
}

Source File: KafkaClusterSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.streaming.kafka

import scala.util.Random

import kafka.common.TopicAndPartition
import org.scalatest.BeforeAndAfterAll

import org.apache.spark.SparkFunSuite

class KafkaClusterSuite extends SparkFunSuite with BeforeAndAfterAll {
  private val topic = "kcsuitetopic" + Random.nextInt(10000)
  private val topicAndPartition = TopicAndPartition(topic, 0)
  private var kc: KafkaCluster = null

  private var kafkaTestUtils: KafkaTestUtils = _

  override def beforeAll() {
    kafkaTestUtils = new KafkaTestUtils
    kafkaTestUtils.setup()

    kafkaTestUtils.createTopic(topic)
    kafkaTestUtils.sendMessages(topic, Map("a" -> 1))
    kc = new KafkaCluster(Map("metadata.broker.list" -> kafkaTestUtils.brokerAddress))
  }

  override def afterAll() {
    if (kafkaTestUtils != null) {
      kafkaTestUtils.teardown()
      kafkaTestUtils = null
    }
  }

  test("metadata apis") {
    val leader = kc.findLeaders(Set(topicAndPartition)).right.get(topicAndPartition)
    val leaderAddress = s"${leader._1}:${leader._2}"
    assert(leaderAddress === kafkaTestUtils.brokerAddress, "didn't get leader")

    val parts = kc.getPartitions(Set(topic)).right.get
    assert(parts(topicAndPartition), "didn't get partitions")

    val err = kc.getPartitions(Set(topic + "BAD"))
    assert(err.isLeft, "getPartitions for a nonexistant topic should be an error")
  }

  test("leader offset apis") {
    val earliest = kc.getEarliestLeaderOffsets(Set(topicAndPartition)).right.get
    assert(earliest(topicAndPartition).offset === 0, "didn't get earliest")

    val latest = kc.getLatestLeaderOffsets(Set(topicAndPartition)).right.get
    assert(latest(topicAndPartition).offset === 1, "didn't get latest")
  }

  test("consumer offset apis") {
    val group = "kcsuitegroup" + Random.nextInt(10000)

    val offset = Random.nextInt(10000)

    val set = kc.setConsumerOffsets(group, Map(topicAndPartition -> offset))
    assert(set.isRight, "didn't set consumer offsets")

    val get = kc.getConsumerOffsets(group, Set(topicAndPartition)).right.get
    assert(get(topicAndPartition) === offset, "didn't get consumer offsets")
  }
}

Source File: UISeleniumSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.sql.hive.thriftserver

import scala.util.Random

import org.apache.hadoop.hive.conf.HiveConf.ConfVars
import org.openqa.selenium.WebDriver
import org.openqa.selenium.htmlunit.HtmlUnitDriver
import org.scalatest.{BeforeAndAfterAll, Matchers}
import org.scalatest.concurrent.Eventually._
import org.scalatest.selenium.WebBrowser
import org.scalatest.time.SpanSugar._

import org.apache.spark.ui.SparkUICssErrorHandler

class UISeleniumSuite
  extends HiveThriftJdbcTest
  with WebBrowser with Matchers with BeforeAndAfterAll {

  implicit var webDriver: WebDriver = _
  var server: HiveThriftServer2 = _
  val uiPort = 20000 + Random.nextInt(10000)
  override def mode: ServerMode.Value = ServerMode.binary

  override def beforeAll(): Unit = {
    webDriver = new HtmlUnitDriver {
      getWebClient.setCssErrorHandler(new SparkUICssErrorHandler)
    }
    super.beforeAll()
  }

  override def afterAll(): Unit = {
    if (webDriver != null) {
      webDriver.quit()
    }
    super.afterAll()
  }

  override protected def serverStartCommand(port: Int) = {
    val portConf = if (mode == ServerMode.binary) {
      ConfVars.HIVE_SERVER2_THRIFT_PORT
    } else {
      ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT
    }

    s"""$startScript
        |  --master local
        |  --hiveconf hive.root.logger=INFO,console
        |  --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$metastoreJdbcUri
        |  --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath
        |  --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=localhost
        |  --hiveconf ${ConfVars.HIVE_SERVER2_TRANSPORT_MODE}=$mode
        |  --hiveconf $portConf=$port
        |  --driver-class-path ${sys.props("java.class.path")}
        |  --conf spark.ui.enabled=true
        |  --conf spark.ui.port=$uiPort
     """.stripMargin.split("\\s+").toSeq
  }

  ignore("thrift server ui test") {
    withJdbcStatement { statement =>
      val baseURL = s"http://localhost:$uiPort"

      val queries = Seq(
        "CREATE TABLE test_map(key INT, value STRING)",
        s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_map")

      queries.foreach(statement.execute)

      eventually(timeout(10 seconds), interval(50 milliseconds)) {
        go to baseURL
        find(cssSelector("""ul li a[href*="sql"]""")) should not be None
      }

      eventually(timeout(10 seconds), interval(50 milliseconds)) {
        go to (baseURL + "/sql")
        find(id("sessionstat")) should not be None
        find(id("sqlstat")) should not be None

        // check whether statements exists
        queries.foreach { line =>
          findAll(cssSelector("""ul table tbody tr td""")).map(_.text).toList should contain (line)
        }
      }
    }
  }
}

Source File: GenerateUnsafeRowJoinerSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions.codegen

import scala.util.Random

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.RandomDataGenerator
import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
import org.apache.spark.sql.types._


class GenerateUnsafeRowJoinerSuite extends SparkFunSuite {

  private val fixed = Seq(IntegerType)
  private val variable = Seq(IntegerType, StringType)

  test("simple fixed width types") {
    testConcat(0, 0, fixed)
    testConcat(0, 1, fixed)
    testConcat(1, 0, fixed)
    testConcat(64, 0, fixed)
    testConcat(0, 64, fixed)
    testConcat(64, 64, fixed)
  }

  test("randomized fix width types") {
    for (i <- 0 until 20) {
      testConcatOnce(Random.nextInt(100), Random.nextInt(100), fixed)
    }
  }

  test("simple variable width types") {
    testConcat(0, 0, variable)
    testConcat(0, 1, variable)
    testConcat(1, 0, variable)
    testConcat(64, 0, variable)
    testConcat(0, 64, variable)
    testConcat(64, 64, variable)
  }

  test("randomized variable width types") {
    for (i <- 0 until 10) {
      testConcatOnce(Random.nextInt(100), Random.nextInt(100), variable)
    }
  }

  private def testConcat(numFields1: Int, numFields2: Int, candidateTypes: Seq[DataType]): Unit = {
    for (i <- 0 until 10) {
      testConcatOnce(numFields1, numFields2, candidateTypes)
    }
  }

  private def testConcatOnce(numFields1: Int, numFields2: Int, candidateTypes: Seq[DataType]) {
    info(s"schema size $numFields1, $numFields2")
    val schema1 = RandomDataGenerator.randomSchema(numFields1, candidateTypes)
    val schema2 = RandomDataGenerator.randomSchema(numFields2, candidateTypes)

    // Create the converters needed to convert from external row to internal row and to UnsafeRows.
    val internalConverter1 = CatalystTypeConverters.createToCatalystConverter(schema1)
    val internalConverter2 = CatalystTypeConverters.createToCatalystConverter(schema2)
    val converter1 = UnsafeProjection.create(schema1)
    val converter2 = UnsafeProjection.create(schema2)

    // Create the input rows, convert them into UnsafeRows.
    val extRow1 = RandomDataGenerator.forType(schema1, nullable = false).get.apply()
    val extRow2 = RandomDataGenerator.forType(schema2, nullable = false).get.apply()
    val row1 = converter1.apply(internalConverter1.apply(extRow1).asInstanceOf[InternalRow])
    val row2 = converter2.apply(internalConverter2.apply(extRow2).asInstanceOf[InternalRow])

    // Run the joiner.
    val mergedSchema = StructType(schema1 ++ schema2)
    val concater = GenerateUnsafeRowJoiner.create(schema1, schema2)
    val output = concater.join(row1, row2)

    // Test everything equals ...
    for (i <- mergedSchema.indices) {
      if (i < schema1.size) {
        assert(output.isNullAt(i) === row1.isNullAt(i))
        if (!output.isNullAt(i)) {
          assert(output.get(i, mergedSchema(i).dataType) === row1.get(i, mergedSchema(i).dataType))
        }
      } else {
        assert(output.isNullAt(i) === row2.isNullAt(i - schema1.size))
        if (!output.isNullAt(i)) {
          assert(output.get(i, mergedSchema(i).dataType) ===
            row2.get(i - schema1.size, mergedSchema(i).dataType))
        }
      }
    }
  }

}

Source File: TakeOrderedAndProjectNodeSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.local

import scala.util.Random

import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.SortOrder


class TakeOrderedAndProjectNodeSuite extends LocalNodeTest {

  private def testTakeOrderedAndProject(desc: Boolean): Unit = {
    val limit = 10
    val ascOrDesc = if (desc) "desc" else "asc"
    test(ascOrDesc) {
      val inputData = Random.shuffle((1 to 100).toList).map { i => (i, i) }.toArray
      val inputNode = new DummyNode(kvIntAttributes, inputData)
      val firstColumn = inputNode.output(0)
      val sortDirection = if (desc) Descending else Ascending
      val sortOrder = SortOrder(firstColumn, sortDirection)
      val takeOrderAndProjectNode = new TakeOrderedAndProjectNode(
        conf, limit, Seq(sortOrder), Some(Seq(firstColumn)), inputNode)
      val expectedOutput = inputData
        .map { case (k, _) => k }
        .sortBy { k => k * (if (desc) -1 else 1) }
        .take(limit)
      val actualOutput = takeOrderAndProjectNode.collect().map { row => row.getInt(0) }
      assert(actualOutput === expectedOutput)
    }
  }

  testTakeOrderedAndProject(desc = false)
  testTakeOrderedAndProject(desc = true)
}

Source File: ColumnarTestUtils.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.columnar

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, GenericMutableRow}
import org.apache.spark.sql.catalyst.util.{GenericArrayData, ArrayBasedMapData}
import org.apache.spark.sql.types.{AtomicType, Decimal}
import org.apache.spark.unsafe.types.UTF8String

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericMutableRow = {
    val row = new GenericMutableRow(length)
    (0 until length).foreach(row.setNullAt)
    row
  }

  def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)
      Random.nextBytes(bytes)
      bytes
    }

    (columnType match {
      case NULL => null
      case BOOLEAN => Random.nextBoolean()
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
      case BINARY => randomBytes(Random.nextInt(32))
      case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale)
      case STRUCT(_) =>
        new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10))))
      case ARRAY(_) =>
        new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt()))
      case MAP(_) =>
        ArrayBasedMapData(
          Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32)))))
    }).asInstanceOf[JvmType]
  }

  def makeRandomValues(
      head: ColumnType[_],
      tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = {
    columnTypes.map(makeRandomValue(_))
  }

  def makeUniqueRandomValues[JvmType](
      columnType: ColumnType[JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()
    }.drop(count).next().toSeq
  }

  def makeRandomRow(
      head: ColumnType[_],
      tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = {
    val row = new GenericMutableRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value
    }
    row
  }

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericMutableRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = values.map { value =>
      val row = new GenericMutableRow(1)
      row(0) = value
      row
    }

    (values, rows)
  }
}

Source File: SortSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.sql.execution

import scala.util.Random

import org.apache.spark.AccumulatorSuite
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types._
import org.apache.spark.sql.{RandomDataGenerator, Row}



class SortSuite extends SparkPlanTest with SharedSQLContext {
  import testImplicits.localSeqToDataFrameHolder

  test("basic sorting using ExternalSort") {

    val input = Seq(
      ("Hello", 4, 2.0),
      ("Hello", 1, 1.0),
      ("World", 8, 3.0)
    )

    checkAnswer(
      input.toDF("a", "b", "c"),
      (child: SparkPlan) => Sort('a.asc :: 'b.asc :: Nil, global = true, child = child),
      input.sortBy(t => (t._1, t._2)).map(Row.fromTuple),
      sortAnswers = false)

    checkAnswer(
      input.toDF("a", "b", "c"),
      (child: SparkPlan) => Sort('b.asc :: 'a.asc :: Nil, global = true, child = child),
      input.sortBy(t => (t._2, t._1)).map(Row.fromTuple),
      sortAnswers = false)
  }

  test("sort followed by limit") {
    checkThatPlansAgree(
      (1 to 100).map(v => Tuple1(v)).toDF("a"),
      (child: SparkPlan) => Limit(10, Sort('a.asc :: Nil, global = true, child = child)),
      (child: SparkPlan) => Limit(10, ReferenceSort('a.asc :: Nil, global = true, child)),
      sortAnswers = false
    )
  }

  test("sorting does not crash for large inputs") {
    val sortOrder = 'a.asc :: Nil
    val stringLength = 1024 * 1024 * 2
    checkThatPlansAgree(
      Seq(Tuple1("a" * stringLength), Tuple1("b" * stringLength)).toDF("a").repartition(1),
      Sort(sortOrder, global = true, _: SparkPlan, testSpillFrequency = 1),
      ReferenceSort(sortOrder, global = true, _: SparkPlan),
      sortAnswers = false
    )
  }

  test("sorting updates peak execution memory") {
    AccumulatorSuite.verifyPeakExecutionMemorySet(sparkContext, "unsafe external sort") {
      checkThatPlansAgree(
        (1 to 100).map(v => Tuple1(v)).toDF("a"),
        (child: SparkPlan) => Sort('a.asc :: Nil, global = true, child = child),
        (child: SparkPlan) => ReferenceSort('a.asc :: Nil, global = true, child),
        sortAnswers = false)
    }
  }

  // Test sorting on different data types
  for (
    dataType <- DataTypeTestUtils.atomicTypes ++ Set(NullType);
    nullable <- Seq(true, false);
    sortOrder <- Seq('a.asc :: Nil, 'a.desc :: Nil);
    randomDataGenerator <- RandomDataGenerator.forType(dataType, nullable)
  ) {
    test(s"sorting on $dataType with nullable=$nullable, sortOrder=$sortOrder") {
      val inputData = Seq.fill(1000)(randomDataGenerator())
      val inputDf = sqlContext.createDataFrame(
        sparkContext.parallelize(Random.shuffle(inputData).map(v => Row(v))),
        StructType(StructField("a", dataType, nullable = true) :: Nil)
      )
      checkThatPlansAgree(
        inputDf,
        p => ConvertToSafe(Sort(sortOrder, global = true, p: SparkPlan, testSpillFrequency = 23)),
        ReferenceSort(sortOrder, global = true, _: SparkPlan),
        sortAnswers = false
      )
    }
  }
}

Source File: Vector.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.util

import scala.language.implicitConversions
import scala.util.Random

import org.apache.spark.util.random.XORShiftRandom

@deprecated("Use Vectors.dense from Spark's mllib.linalg package instead.", "1.0.0")
class Vector(val elements: Array[Double]) extends Serializable {
  def length: Int = elements.length

  def apply(index: Int): Double = elements(index)

  def + (other: Vector): Vector = {
    if (length != other.length) {
      throw new IllegalArgumentException("Vectors of different length")
    }
    Vector(length, i => this(i) + other(i))
  }

  def add(other: Vector): Vector = this + other

  def - (other: Vector): Vector = {
    if (length != other.length) {
      throw new IllegalArgumentException("Vectors of different length")
    }
    Vector(length, i => this(i) - other(i))
  }

  def subtract(other: Vector): Vector = this - other

  def dot(other: Vector): Double = {
    if (length != other.length) {
      throw new IllegalArgumentException("Vectors of different length")
    }
    var ans = 0.0
    var i = 0
    while (i < length) {
      ans += this(i) * other(i)
      i += 1
    }
    ans
  }

  
  def random(length: Int, random: Random = new XORShiftRandom()): Vector =
    Vector(length, _ => random.nextDouble())

  class Multiplier(num: Double) {
    def * (vec: Vector): Vector = vec * num
  }

  implicit def doubleToMultiplier(num: Double): Multiplier = new Multiplier(num)

  implicit object VectorAccumParam extends org.apache.spark.AccumulatorParam[Vector] {
    def addInPlace(t1: Vector, t2: Vector): Vector = t1 + t2

    def zero(initialValue: Vector): Vector = Vector.zeros(initialValue.length)
  }

}

Source File: SamplingUtilsSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.util.random

import scala.util.Random

import org.apache.commons.math3.distribution.{BinomialDistribution, PoissonDistribution}

import org.apache.spark.SparkFunSuite

class SamplingUtilsSuite extends SparkFunSuite {

  test("reservoirSampleAndCount") {
    val input = Seq.fill(100)(Random.nextInt())

    // input size < k
    val (sample1, count1) = SamplingUtils.reservoirSampleAndCount(input.iterator, 150)
    assert(count1 === 100)
    assert(input === sample1.toSeq)

    // input size == k
    val (sample2, count2) = SamplingUtils.reservoirSampleAndCount(input.iterator, 100)
    assert(count2 === 100)
    assert(input === sample2.toSeq)

    // input size > k
    val (sample3, count3) = SamplingUtils.reservoirSampleAndCount(input.iterator, 10)
    assert(count3 === 100)
    assert(sample3.length === 10)
  }

  test("computeFraction") {
    // test that the computed fraction guarantees enough data points
    // in the sample with a failure rate <= 0.0001
    val n = 100000

    for (s <- 1 to 15) {
      val frac = SamplingUtils.computeFractionForSampleSize(s, n, true)
      val poisson = new PoissonDistribution(frac * n)
      assert(poisson.inverseCumulativeProbability(0.0001) >= s, "Computed fraction is too low")
    }
    for (s <- List(20, 100, 1000)) {
      val frac = SamplingUtils.computeFractionForSampleSize(s, n, true)
      val poisson = new PoissonDistribution(frac * n)
      assert(poisson.inverseCumulativeProbability(0.0001) >= s, "Computed fraction is too low")
    }
    for (s <- List(1, 10, 100, 1000)) {
      val frac = SamplingUtils.computeFractionForSampleSize(s, n, false)
      val binomial = new BinomialDistribution(n, frac)
      assert(binomial.inverseCumulativeProbability(0.0001)*n >= s, "Computed fraction is too low")
    }
  }
}

Source File: VectorSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.util

import scala.util.Random

import org.apache.spark.SparkFunSuite


@deprecated("suppress compile time deprecation warning", "1.0.0")
class VectorSuite extends SparkFunSuite {

  def verifyVector(vector: Vector, expectedLength: Int): Unit = {
    assert(vector.length == expectedLength)
    assert(vector.elements.min > 0.0)
    assert(vector.elements.max < 1.0)
  }

  test("random with default random number generator") {
    val vector100 = Vector.random(100)
    verifyVector(vector100, 100)
  }

  test("random with given random number generator") {
    val vector100 = Vector.random(100, new Random(100))
    verifyVector(vector100, 100)
  }
}

Source File: ByteArrayChunkOutputStreamSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.util.io

import scala.util.Random

import org.apache.spark.SparkFunSuite


class ByteArrayChunkOutputStreamSuite extends SparkFunSuite {

  test("empty output") {
    val o = new ByteArrayChunkOutputStream(1024)
    assert(o.toArrays.length === 0)
  }

  test("write a single byte") {
    val o = new ByteArrayChunkOutputStream(1024)
    o.write(10)
    assert(o.toArrays.length === 1)
    assert(o.toArrays.head.toSeq === Seq(10.toByte))
  }

  test("write a single near boundary") {
    val o = new ByteArrayChunkOutputStream(10)
    o.write(new Array[Byte](9))
    o.write(99)
    assert(o.toArrays.length === 1)
    assert(o.toArrays.head(9) === 99.toByte)
  }

  test("write a single at boundary") {
    val o = new ByteArrayChunkOutputStream(10)
    o.write(new Array[Byte](10))
    o.write(99)
    assert(o.toArrays.length === 2)
    assert(o.toArrays(1).length === 1)
    assert(o.toArrays(1)(0) === 99.toByte)
  }

  test("single chunk output") {
    val ref = new Array[Byte](8)
    Random.nextBytes(ref)
    val o = new ByteArrayChunkOutputStream(10)
    o.write(ref)
    val arrays = o.toArrays
    assert(arrays.length === 1)
    assert(arrays.head.length === ref.length)
    assert(arrays.head.toSeq === ref.toSeq)
  }

  test("single chunk output at boundary size") {
    val ref = new Array[Byte](10)
    Random.nextBytes(ref)
    val o = new ByteArrayChunkOutputStream(10)
    o.write(ref)
    val arrays = o.toArrays
    assert(arrays.length === 1)
    assert(arrays.head.length === ref.length)
    assert(arrays.head.toSeq === ref.toSeq)
  }

  test("multiple chunk output") {
    val ref = new Array[Byte](26)
    Random.nextBytes(ref)
    val o = new ByteArrayChunkOutputStream(10)
    o.write(ref)
    val arrays = o.toArrays
    assert(arrays.length === 3)
    assert(arrays(0).length === 10)
    assert(arrays(1).length === 10)
    assert(arrays(2).length === 6)

    assert(arrays(0).toSeq === ref.slice(0, 10))
    assert(arrays(1).toSeq === ref.slice(10, 20))
    assert(arrays(2).toSeq === ref.slice(20, 26))
  }

  test("multiple chunk output at boundary size") {
    val ref = new Array[Byte](30)
    Random.nextBytes(ref)
    val o = new ByteArrayChunkOutputStream(10)
    o.write(ref)
    val arrays = o.toArrays
    assert(arrays.length === 3)
    assert(arrays(0).length === 10)
    assert(arrays(1).length === 10)
    assert(arrays(2).length === 10)

    assert(arrays(0).toSeq === ref.slice(0, 10))
    assert(arrays(1).toSeq === ref.slice(10, 20))
    assert(arrays(2).toSeq === ref.slice(20, 30))
  }
}

Source File: SparkTC.scala From learning-spark with Apache License 2.0

5 votes

package org.apache.spark.examples

import scala.util.Random
import scala.collection.mutable

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.SparkContext._


object SparkTC {
  val numEdges = 200
  val numVertices = 100
  val rand = new Random(42)

  def generateGraph = {
    val edges: mutable.Set[(Int, Int)] = mutable.Set.empty
    while (edges.size < numEdges) {
      val from = rand.nextInt(numVertices)
      val to = rand.nextInt(numVertices)
      if (from != to) edges.+=((from, to))
    }
    edges.toSeq
  }

  def main(args: Array[String]) {
    val sparkConf = new SparkConf().setAppName("SparkTC")
    val spark = new SparkContext(sparkConf)
    val slices = if (args.length > 0) args(0).toInt else 2
    var tc = spark.parallelize(generateGraph, slices).cache()

    // Linear transitive closure: each round grows paths by one edge,
    // by joining the graph's edges with the already-discovered paths.
    // e.g. join the path (y, z) from the TC with the edge (x, y) from
    // the graph to obtain the path (x, z).

    // Because join() joins on keys, the edges are stored in reversed order.
    val edges = tc.map(x => (x._2, x._1))

    // This join is iterated until a fixed point is reached.
    var oldCount = 0L
    var nextCount = tc.count()
    do {
      oldCount = nextCount
      // Perform the join, obtaining an RDD of (y, (z, x)) pairs,
      // then project the result to obtain the new (x, z) paths.
      tc = tc.union(tc.join(edges).map(x => (x._2._2, x._2._1))).distinct().cache()
      nextCount = tc.count()
    } while (nextCount != oldCount)

    println("TC has " + tc.count() + " edges.")
    spark.stop()
  }
}

Source File: client.scala From zio-saga with MIT License

5 votes

package com.vladkopanev.zio.saga.example

import zio.{ Task, ZIO }

import scala.util.Random

package object client {

  import zio.duration._

  def randomSleep(maxTimeout: Int): TaskC[Unit] =
    for {
      randomSeconds <- ZIO.effectTotal(Random.nextInt(maxTimeout))
      _             <- ZIO.sleep(randomSeconds.seconds)
    } yield ()

  def randomFail(operationName: String): Task[Unit] =
    for {
      randomInt <- ZIO.effectTotal(Random.nextInt(100))
      _         <- if (randomInt % 10 == 0) ZIO.fail(new RuntimeException(s"Failed to execute $operationName")) else ZIO.unit
    } yield ()
}

Source File: FunctionEqual.scala From scalaprops with MIT License

5 votes

package scalaprops

import scala.util.Random
import scalaz._

object FunctionEqual extends FunctionEqual(5)

sealed class FunctionEqual(size: Int) {
  implicit def f1[A1: Gen, B](implicit B: Equal[B]): Equal[A1 => B] = {
    val values = Gen[A1].samples(listSize = size, size = size, seed = Random.nextLong())

    Equal.equal((x, y) => values.forall(a => B.equal(x(a), y(a))))
  }

  implicit def f2[A1: Gen, A2: Gen, B](implicit B: Equal[B]): Equal[(A1, A2) => B] =
    f1[(A1, A2), B].contramap(_.tupled)

  implicit def f3[A1: Gen, A2: Gen, A3: Gen, B](implicit B: Equal[B]): Equal[(A1, A2, A3) => B] =
    f1[(A1, A2, A3), B].contramap(_.tupled)

  implicit def f4[A1: Gen, A2: Gen, A3: Gen, A4: Gen, B](implicit B: Equal[B]): Equal[(A1, A2, A3, A4) => B] =
    f1[(A1, A2, A3, A4), B].contramap(_.tupled)

  implicit def f5[A1: Gen, A2: Gen, A3: Gen, A4: Gen, A5: Gen, B](implicit
    B: Equal[B]
  ): Equal[(A1, A2, A3, A4, A5) => B] =
    f1[(A1, A2, A3, A4, A5), B].contramap(_.tupled)

  implicit def f6[A1: Gen, A2: Gen, A3: Gen, A4: Gen, A5: Gen, A6: Gen, B](implicit
    B: Equal[B]
  ): Equal[(A1, A2, A3, A4, A5, A6) => B] =
    f1[(A1, A2, A3, A4, A5, A6), B].contramap(_.tupled)

  implicit def f7[A1: Gen, A2: Gen, A3: Gen, A4: Gen, A5: Gen, A6: Gen, A7: Gen, B](implicit
    B: Equal[B]
  ): Equal[(A1, A2, A3, A4, A5, A6, A7) => B] =
    f1[(A1, A2, A3, A4, A5, A6, A7), B].contramap(_.tupled)

  implicit def f8[A1: Gen, A2: Gen, A3: Gen, A4: Gen, A5: Gen, A6: Gen, A7: Gen, A8: Gen, B](implicit
    B: Equal[B]
  ): Equal[(A1, A2, A3, A4, A5, A6, A7, A8) => B] =
    f1[(A1, A2, A3, A4, A5, A6, A7, A8), B].contramap(_.tupled)
}

Source File: RandTestJVM.scala From scalaprops with MIT License

5 votes

package scalaprops

import scala.util.Random

object RandTestJVM extends Scalaprops {
  private[this] def chooseLong(rng: Long => Rand) =
    Property.forAll(
      Iterator.fill(100000)((Random.nextLong, Random.nextLong, Random.nextLong)).forall {
        case (seed, y, z) =>
          val r = rng(seed).chooseLong(y, z)._2
          val min = math.min(y, z)
          val max = math.max(y, z)
          (min <= r) && (r <= max)
      }
    )

  val chooseLong32 = chooseLong(l => MersenneTwister32.fromSeed(l.toInt))
  val chooseLong64 = chooseLong(MersenneTwister64.standard)
}

Source File: SimulateDistributionSpec.scala From squbs with Apache License 2.0

5 votes

package org.squbs.pattern.timeoutpolicy

import org.scalatest.{FlatSpecLike, Matchers}

import scala.concurrent.duration._
import scala.concurrent.{Await, Future}
import scala.util.{Random, Try}

class SimulateDistributionSpec extends FlatSpecLike with Matchers{

  "Random.nextGaussian" should "work as expected" in {
    import scala.concurrent.ExecutionContext.Implicits.global
    val timeoutPolicy = TimeoutPolicy(Some("test"), initial = 1.seconds, rule = 3.sigma, minSamples = 100, startOverCount = 500)
    val sigma = 30
    val mean = 50
    for (i <- 0 until 1000) {
      val tx = timeoutPolicy.transaction
      Try{
        Await.ready(Future{
          val s = (Random.nextGaussian() * sigma + mean).round
          Thread.sleep(s)
        }, tx.waitTime)
      }
      tx.end()
      //      val metrics = timeoutPolicy.metrics
      //      println(s"average=${metrics.averageTime}, standardDeviation=${metrics.standardDeviation}")
    }

    Thread.sleep(5000)
    val metrics = timeoutPolicy.metrics
    println(s"average=${metrics.averageTime.toLong}, standardDeviation=${metrics.standardDeviation.toLong}")
    val succeedPercent = (metrics.totalCount - metrics.timeoutCount) / metrics.totalCount.toDouble
    println(succeedPercent)
    println(metrics)
  }

  "NegativeExponentialTruncated" should "works fine with TimeoutPolicy " in {
    negativeExponential(truncate = true)
  }

  "NegativeExponentialNotTruncated" should "works fine with TimeoutPolicy " in {
    negativeExponential(truncate = false)
  }

  def negativeExponential(truncate: Boolean): Unit = {
    val delay = getDelay(truncate = truncate, cycleMin = 20.millis, cycleMean = 30.millis, cycleMax = 50.milliseconds)

    import scala.concurrent.ExecutionContext.Implicits.global
    val timeoutPolicy = TimeoutPolicy(Some("test"), initial = 1.seconds, rule = 3.sigma)
    for (i <- 0 until 1000) {
      val tx = timeoutPolicy.transaction
      Try{
        Await.ready(Future{
          val s = delay().toMillis
          Thread.sleep(s)
        }, tx.waitTime)
      }
      tx.end()
//      val metrics = timeoutPolicy.metrics
    }

    Thread.sleep(5000)
    val metrics = timeoutPolicy.metrics
    println(s"average=${metrics.averageTime.toLong}, standardDeviation=${metrics.standardDeviation.toLong}")
    val succeedPercent = (metrics.totalCount - metrics.timeoutCount) / metrics.totalCount.toDouble
    println(succeedPercent)
    println(metrics)

  }

  def getDelay(truncate: Boolean = true,
               cycleMin: FiniteDuration = 0.seconds,
               cycleMean: FiniteDuration = 1.seconds,
               cycleMax: FiniteDuration = 5.seconds): () => FiniteDuration = {

    val (shift, mean) =
      if (!truncate) {
        val shift1 = cycleMin.toNanos
        val mean1 = cycleMean.toNanos - shift1
        (shift1, mean1)
      } else (0L, cycleMean.toNanos)

    () => {
      val delay =
        if (cycleMean.toNanos > 0) {
          val x = {
            val ix = Random.nextDouble()
            if (ix == 0d) Double.MinPositiveValue else ix
          }
          val iDelay = shift + (mean * -Math.log(x)).toLong
          if (iDelay < cycleMin.toNanos)
            cycleMin.toNanos
          else if (iDelay > cycleMax.toNanos)
            cycleMax.toNanos
          else iDelay
        } else 0L
      delay.nanoseconds
    }
  }
}

Source File: StreamSpecUtil.scala From squbs with Apache License 2.0

5 votes

package org.squbs.pattern.stream

import java.io.File
import java.nio.file.Files
import java.util.concurrent.atomic.AtomicInteger

import akka.stream.ThrottleMode
import akka.stream.scaladsl._
import com.typesafe.config.ConfigFactory
import net.openhft.chronicle.wire.{WireIn, WireOut}

import scala.concurrent.duration._
import scala.language.postfixOps
import scala.collection.JavaConverters._
import scala.util.Random

object StreamSpecUtil {
  val elementCount = 100000
  val failTestAt = elementCount * 3 / 10
  val elementsAfterFail = 100
  val flowRate = 1000
  val flowUnit = 10 millisecond
  val burstSize = 500
}

class StreamSpecUtil[T, S](outputPort: Int = 1) {

  import StreamSpecUtil._
  val outputPorts = outputPort
  val tempPath: File = Files.createTempDirectory("persistent_queue").toFile
  val totalProcessed = elementCount + elementsAfterFail

  val config = ConfigFactory.parseMap {
    Map(
      "persist-dir" -> s"${tempPath.getAbsolutePath}",
      "output-ports" -> s"$outputPorts",
      "roll-cycle" -> "TEST_SECONDLY".toLowerCase()
    ).asJava
  }

  val in = Source(1 to elementCount)
  lazy val atomicCounter = Vector.tabulate(outputPorts)(_ => new AtomicInteger(0))
  lazy val flowCounter = Flow[Any].map(_ => 1L).reduce(_ + _).toMat(Sink.head)(Keep.right)
  lazy val merge = Merge[S](outputPorts)
  lazy val throttle = Flow[S].throttle(flowRate, flowUnit, burstSize, ThrottleMode.shaping)
  lazy val throttleMore = Flow[S].throttle(flowRate * 9 / 10, flowUnit, burstSize, ThrottleMode.shaping)
  lazy val head = Sink.head[S]
  lazy val last = Sink.last[S]
  val minRandom = 100
  lazy val random = Random.nextInt(elementCount - minRandom - 1) + minRandom
  lazy val filterCounter = new AtomicInteger(0)
  lazy val filterARandomElement = Flow[Event[T]].map(e => (e, filterCounter.incrementAndGet())).filter(_._2 != random).map(_._1)

  def commitCounter(outputPortId: Int) = atomicCounter(outputPortId).incrementAndGet()

  def clean() = delete(tempPath)

  private def delete(file: File): Unit = {
    if (file.isDirectory)
      Option(file.listFiles).map(_.toList).getOrElse(Nil).foreach(delete)
    file.delete
  }
}

case class Person(name: String, age: Int)

class PersonSerializer extends QueueSerializer[Person] {

  override def readElement(wire: WireIn): Option[Person] = {
    for {
      name <- Option(wire.read().`object`(classOf[String]))
      age <- Option(wire.read().int32)
    } yield { Person(name, age) }
  }

  override def writeElement(element: Person, wire: WireOut): Unit = {
    wire.write().`object`(classOf[String], element.name)
    wire.write().int32(element.age)
  }
}

Source File: RNNEmbeddingExample.scala From ScalNet with Apache License 2.0

5 votes

package org.deeplearning4j.scalnet.examples.dl4j.recurrent

import org.deeplearning4j.nn.conf.inputs.InputType
import org.deeplearning4j.optimize.listeners.ScoreIterationListener
import org.deeplearning4j.scalnet.layers.embeddings.EmbeddingLayer
import org.deeplearning4j.scalnet.layers.recurrent.{ GravesLSTM, RnnOutputLayer }
import org.deeplearning4j.scalnet.models.NeuralNet
import org.nd4j.linalg.activations.Activation
import org.nd4j.linalg.dataset.DataSet
import org.nd4j.linalg.factory.Nd4j
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction

import scala.util.Random

object RNNEmbeddingExample extends App {

  val nClassesIn = 10
  val batchSize = 3
  val timeSeriesLength = 8
  val inEmbedding = Nd4j.create(batchSize, 1, timeSeriesLength)
  val outLabels = Nd4j.create(batchSize, 4, timeSeriesLength)
  val seed = 12345
  val rand = new Random(seed)

  val timeSeries: DataSet = {
    for (i <- 0 until batchSize; j <- 0 until timeSeriesLength) {
      val classIdx = rand.nextInt(nClassesIn)
      inEmbedding.putScalar(Array[Int](i, 0, j), classIdx)
      val labelIdx = rand.nextInt(batchSize + 1)
      outLabels.putScalar(Array[Int](i, labelIdx, j), 1.0)
    }
    new DataSet(inEmbedding, outLabels)
  }

  val model: NeuralNet = {
    val model: NeuralNet = NeuralNet(inputType = InputType.recurrent(3, 8), rngSeed = seed)
    model.add(EmbeddingLayer(nClassesIn, 5))
    model.add(GravesLSTM(5, 7, Activation.SOFTSIGN))
    model.add(RnnOutputLayer(7, 4, Activation.SOFTMAX))
    model.compile(LossFunction.MCXENT)
    model
  }

  model.fit(timeSeries, 1, List(new ScoreIterationListener(1)))
}

Source File: BasicRNNExample.scala From ScalNet with Apache License 2.0

5 votes

package org.deeplearning4j.scalnet.examples.dl4j.recurrent

import org.deeplearning4j.nn.api.OptimizationAlgorithm
import org.deeplearning4j.nn.conf.Updater
import org.deeplearning4j.scalnet.layers.recurrent.{ GravesLSTM, RnnOutputLayer }
import org.deeplearning4j.scalnet.logging.Logging
import org.deeplearning4j.scalnet.models.NeuralNet
import org.nd4j.linalg.activations.Activation
import org.nd4j.linalg.api.ops.impl.indexaccum.IMax
import org.nd4j.linalg.dataset.DataSet
import org.nd4j.linalg.factory.Nd4j
import org.nd4j.linalg.lossfunctions.LossFunctions.LossFunction

import scala.util.Random

object BasicRNNExample extends App with Logging {

  // define a sentence to learn.
  // Add a special character at the beginning so the RNN learns the complete string and ends with the marker.
  val learningString = "*Der Cottbuser Postkutscher putzt den Cottbuser Postkutschkasten.".toVector
  val learningChars = learningString.distinct
  val hiddenSize = 64
  val epochs = 200
  val seed = 1234
  val rand = new Random(seed)

  val input = Nd4j.zeros(1, learningChars.length, learningString.length)
  val labels = Nd4j.zeros(1, learningChars.length, learningString.length)

  val trainingData: DataSet = {
    learningString.zipWithIndex.foreach {
      case (currentChar, index) =>
        val nextChar = if (index + 1 > learningString.indices.max) learningString(0) else learningString(index + 1)
        input.putScalar(Array[Int](0, learningChars.indexOf(currentChar), index), 1)
        labels.putScalar(Array[Int](0, learningChars.indexOf(nextChar), index), 1)
    }
    new DataSet(input, labels)
  }

  logger.info("Build model...")
  val model: NeuralNet = {
    val model: NeuralNet = NeuralNet(rngSeed = seed, miniBatch = false)
    model.add(GravesLSTM(learningChars.length, hiddenSize, Activation.TANH))
    model.add(GravesLSTM(hiddenSize, hiddenSize, Activation.TANH))
    model.add(RnnOutputLayer(hiddenSize, learningChars.length, Activation.SOFTMAX))
    model.compile(LossFunction.MCXENT, OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT, Updater.RMSPROP)
    model
  }

  val rnn = model.getNetwork

  (0 until epochs).foreach { e =>
    rnn.fit(trainingData)
    rnn.rnnClearPreviousState()
    val init = Nd4j.zeros(learningChars.length)
    init.putScalar(learningChars.indexOf(learningString(0)), 1)
    var output = rnn.rnnTimeStep(init)

    val predicted: Vector[Char] = learningString.map { _ =>
      val sampledCharacterIdx = Nd4j.getExecutioner.exec(new IMax(output), 1).getInt(0)
      val nextInput = Nd4j.zeros(learningChars.length)
      nextInput.putScalar(sampledCharacterIdx, 1)
      output = rnn.rnnTimeStep(nextInput)
      learningChars(sampledCharacterIdx)
    }
    logger.info(s"Epoch $e - ${predicted.mkString}")
  }
}

Source File: RunnersCache.scala From infinispan-spark with Apache License 2.0

5 votes

package org.infinispan.spark.test

import org.infinispan.spark.domain.Runner
import org.scalatest.{BeforeAndAfterAll, Suite}

import scala.util.Random


trait RunnersCache extends BeforeAndAfterAll {
   this: Suite with RemoteTest =>

   protected def getNumEntries: Int

   override protected def beforeAll(): Unit = {

      val random = new Random(System.currentTimeMillis())
      val MinFinishTime = 3600
      val MaxFinishTime = 4500
      val MinAge = 15
      val MaxAge = 60
      (1 to getNumEntries).par.foreach { i =>
         val name = "Runner " + i
         val finished = if (i % 2 == 0) true else false
         val finishTime = random.nextInt((MaxFinishTime - MinFinishTime) + 1) + MinFinishTime
         val age = Integer.valueOf(i * (MaxAge - MinAge) / getNumEntries + MinAge)
         val runner = new Runner(name, finished, if(finished) finishTime else 0, age)
         getRemoteCache.put(i, runner)
      }
      super.beforeAll()
   }

   override protected def afterAll(): Unit = {
      super.afterAll()
   }
}

Source File: WordCache.scala From infinispan-spark with Apache License 2.0

5 votes

package org.infinispan.spark.test

import org.scalatest.{BeforeAndAfterAll, Suite}

import scala.util.Random


trait WordCache extends BeforeAndAfterAll {
   this: Suite with RemoteTest =>

   // https://github.com/bmarcot/haiku/blob/master/haiku.scala
   val adjs = List("autumn", "hidden", "bitter", "misty", "silent",
      "empty", "dry", "dark", "summer", "icy", "delicate", "quiet", "white", "cool",
      "spring", "winter", "patient", "twilight", "dawn", "crimson", "wispy",
      "weathered", "blue", "billowing", "broken", "cold", "damp", "falling",
      "frosty", "green", "long", "late", "lingering", "bold", "little", "morning",
      "muddy", "old", "red", "rough", "still", "small", "sparkling", "throbbing",
      "shy", "wandering", "withered", "wild", "black", "holy", "solitary",
      "fragrant", "aged", "snowy", "proud", "floral", "restless", "divine",
      "polished", "purple", "lively", "nameless", "puffy", "fluffy",
      "calm", "young", "golden", "avenging", "ancestral", "ancient", "argent",
      "reckless", "daunting", "short", "rising", "strong", "timber", "tumbling",
      "silver", "dusty", "celestial", "cosmic", "crescent", "double", "far", "half",
      "inner", "milky", "northern", "southern", "eastern", "western", "outer",
      "terrestrial", "huge", "deep", "epic", "titanic", "mighty", "powerful")

   val nouns = List("waterfall", "river", "breeze", "moon", "rain",
      "wind", "sea", "morning", "snow", "lake", "sunset", "pine", "shadow", "leaf",
      "dawn", "glitter", "forest", "hill", "cloud", "meadow", "glade",
      "bird", "brook", "butterfly", "bush", "dew", "dust", "field",
      "flower", "firefly", "feather", "grass", "haze", "mountain", "night", "pond",
      "darkness", "snowflake", "silence", "sound", "sky", "shape", "surf",
      "thunder", "violet", "wildflower", "wave", "water", "resonance",
      "sun", "wood", "dream", "cherry", "tree", "fog", "frost", "voice", "paper",
      "frog", "smoke", "star", "sierra", "castle", "fortress", "tiger", "day",
      "sequoia", "cedar", "wrath", "blessing", "spirit", "nova", "storm", "burst",
      "protector", "drake", "dragon", "knight", "fire", "king", "jungle", "queen",
      "giant", "elemental", "throne", "game", "weed", "stone", "apogee", "bang",
      "cluster", "corona", "cosmos", "equinox", "horizon", "light", "nebula",
      "solstice", "spectrum", "universe", "magnitude", "parallax")

   protected def getNumEntries: Int

   private val random = new Random(System.currentTimeMillis())

   private def randomWordFrom(l: List[String]) = l(random.nextInt(l.size))

   private def pickNouns = (for (_ <- 0 to random.nextInt(3)) yield randomWordFrom(nouns)).mkString(" ")

   lazy val wordsCache = getRemoteCache[Int,String]

   override protected def beforeAll(): Unit = {
      (1 to getNumEntries).par.foreach { i =>
         val contents = Seq(randomWordFrom(adjs), pickNouns).mkString(" ")
         wordsCache.put(i, contents)
      }
      super.beforeAll()
   }

   override protected def afterAll(): Unit = {
      super.afterAll()
   }

}

Source File: SeQuiLaAnalyzer.scala From bdg-sequila with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.analysis

import org.apache.spark.sql.ResolveTableValuedFunctionsSeq
import org.apache.spark.sql.catalyst.catalog.SessionCatalog
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.internal.SQLConf

import scala.util.Random


class SeQuiLaAnalyzer(catalog: SessionCatalog, conf: SQLConf) extends Analyzer(catalog, conf, conf.optimizerMaxIterations){
  //override val extendedResolutionRules: Seq[Rule[LogicalPlan]] = Seq(ResolveTableValuedFunctionsSeq)


  //  override lazy val batches: Seq[Batch] = Seq(
  //    Batch("Custeom", fixedPoint, ResolveTableValuedFunctionsSeq),
  //    Batch("Hints", fixedPoint, new ResolveHints.ResolveBroadcastHints(conf),
  //      ResolveHints.RemoveAllHints))


  var sequilaOptmazationRules: Seq[Rule[LogicalPlan]] = Nil

  override lazy val batches: Seq[Batch] = Seq(
    Batch("Hints", fixedPoint,
      new ResolveHints.ResolveBroadcastHints(conf),
      ResolveHints.RemoveAllHints),
    Batch("Simple Sanity Check", Once,
      LookupFunctions),
    Batch("Substitution", fixedPoint,
      CTESubstitution,
      WindowsSubstitution,
      EliminateUnions,
      new SubstituteUnresolvedOrdinals(conf)),
    Batch("Resolution", fixedPoint,
      ResolveTableValuedFunctionsSeq ::
      ResolveRelations ::
        ResolveReferences ::
        ResolveCreateNamedStruct ::
        ResolveDeserializer ::
        ResolveNewInstance ::
        ResolveUpCast ::
        ResolveGroupingAnalytics ::
        ResolvePivot ::
        ResolveOrdinalInOrderByAndGroupBy ::
        ResolveAggAliasInGroupBy ::
        ResolveMissingReferences ::
        ExtractGenerator ::
        ResolveGenerate ::
        ResolveFunctions ::
        ResolveAliases ::
        ResolveSubquery ::
        ResolveSubqueryColumnAliases ::
        ResolveWindowOrder ::
        ResolveWindowFrame ::
        ResolveNaturalAndUsingJoin ::

        ExtractWindowExpressions ::
        GlobalAggregates ::
        ResolveAggregateFunctions ::
        TimeWindowing ::
        ResolveInlineTables(conf) ::
        ResolveTimeZone(conf) ::
        TypeCoercion.typeCoercionRules(conf) ++
          extendedResolutionRules : _*),
    Batch("Post-Hoc Resolution", Once, postHocResolutionRules: _*),
    Batch("SeQuiLa", Once,sequilaOptmazationRules: _*), //SeQuilaOptimization rules
    Batch("View", Once,
      AliasViewChild(conf)),
    Batch("Nondeterministic", Once,
      PullOutNondeterministic),
    Batch("UDF", Once,
      HandleNullInputsForUDF),
    Batch("FixNullability", Once,
      FixNullability),
    Batch("Subquery", Once,
      UpdateOuterReferences),
    Batch("Cleanup", fixedPoint,
      CleanupAliases)
  )



}

Source File: VPTree.scala From traj-sim-spark with Apache License 2.0

5 votes

package edu.utah.cs.index

import edu.utah.cs.util.MetricObject

import scala.collection.mutable
import scala.reflect.ClassTag
import scala.util.Random


abstract class VPTreeNode[T <: MetricObject: ClassTag]

case class VPTreeInternalNode[T <: MetricObject: ClassTag](vp: T, threshold: Double,
                                 left: VPTreeNode[T], right: VPTreeNode[T]) extends VPTreeNode[T]

case class VPTreeLeafNode[T <: MetricObject: ClassTag](points: Array[T]) extends VPTreeNode[T]

case class VPTree[T <: MetricObject: ClassTag](root: VPTreeNode[T]) extends Index with Serializable {
  private[cs] case class HeapItem(point: T, dis: Double) extends Ordered[HeapItem] {
    override def compare(that: HeapItem): Int = dis.compare(that.dis)
  }

  def knn(query: T, k: Int, dis_threshold: Double = Double.MaxValue): (Array[(T, Double)], Int) = {
    val pq = mutable.PriorityQueue[HeapItem]()
    var tau = dis_threshold
    var checked = 0

    def offer(x: HeapItem) = {
      if (pq.size == k) pq.dequeue()
      pq.enqueue(x)
      if (pq.size == k) tau = pq.head.dis
    }

    def recursive_knn(node: VPTreeNode[T]) : Unit = {
      if (node != null) {
        node match {
          case VPTreeLeafNode(ps) =>
            checked += ps.length
            ps.foreach(x => {
              val dis = query.distance(x)
              if (dis < tau) offer(HeapItem(x, dis))
            })
          case VPTreeInternalNode(vp, th, left, right) =>
            val vp_dis = query.distance(vp)
            checked += 1
            if (vp_dis < tau) offer(HeapItem(vp, vp_dis))
            if (vp_dis < th) {
              if (vp_dis - tau <= th) recursive_knn(left)
              if (vp_dis + tau >= th) recursive_knn(right)
            } else {
              if (vp_dis + tau >= th) recursive_knn(right)
              if (vp_dis - tau <= th) recursive_knn(left)
            }
        }
      }
    }
    recursive_knn(root)

    (pq.dequeueAll.map(x => (x.point, x.dis)).toArray.reverse, checked)
  }

}

object VPTree {
  def buildNode[T <: MetricObject: ClassTag](points: Array[T], leaf_capacity: Int): VPTreeNode[T] = {
    if (points.isEmpty) {
      null
    } else if (points.length < leaf_capacity) {
      VPTreeLeafNode(points)
    } else {
      val n = points.length
      val vp_id = Random.nextInt(n)
      val t = points(vp_id)
      points(vp_id) = points(0)
      points(0) = t
      val vp = points.head
      val ps_with_dis = points.slice(1, n).map(x => (vp.distance(x), x)).sortBy(_._1)
      val median = Math.ceil((n - 1) / 2.0).toInt - 1
      val threshold = ps_with_dis(median)._1
      VPTreeInternalNode(vp, threshold,
        buildNode(ps_with_dis.slice(0, median + 1).map(_._2), leaf_capacity),
        buildNode(ps_with_dis.slice(median + 1, n).map(_._2), leaf_capacity))
    }
  }

  def apply[T <: MetricObject: ClassTag](points: Array[T], leaf_capacity: Int = 25): VPTree[T] = {
    VPTree(buildNode(points, leaf_capacity))
  }
}

Source File: BloomFilter.scala From traj-sim-spark with Apache License 2.0

5 votes

package edu.utah.cs.util

import scala.util.Random


case class BloomFilterMeta(num_bits: Int, num_hashs: Int) {
  val seeds = (1 to num_hashs).map(x => (Random.nextInt(Integer.MAX_VALUE), Random.nextInt(Integer.MAX_VALUE)))
}

object BloomFilter {
  var meta: BloomFilterMeta = null

  private def calcHash(seed: (Int, Int), key: Int) =
    (((seed._1 % meta.num_bits) * (key & meta.num_bits) + seed._2 % meta.num_bits) % meta.num_bits + meta.num_bits) % meta.num_bits

  def put(bf: Array[Int], key: Int): Unit = {
    meta.seeds.foreach(seed => {
      BitArray.set(bf, calcHash(seed, key))
    })
  }

  def mayContains(bf: Array[Int], key: Int): Boolean = {
    meta.seeds.foreach(seed => {
      if (!BitArray.get(bf, calcHash(seed, key))) return false
    })
    true
  }

  def optimalNumBits(num_items: Long, fp_rate: Double): Int = {
    math.ceil(-1 * num_items * math.log(fp_rate) / math.log(2) / math.log(2)).toInt
  }

  def optimalNumHashes(num_items: Long, num_bits: Long): Int = {
    math.ceil(num_bits / num_items * math.log(2)).toInt
  }
}

Source File: 5-Future.scala From wow-spark with MIT License

5 votes

package com.sev7e0.wow.scala

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future
import scala.util.{Failure, Random, Success}

object UseFuture {
  type CoffeeBeans = String
  type GroundCoffee = String
  type Milk = String
  type FrothedMilk = String
  type Espresso = String
  type Cappuccino = String

  
  def main(args: Array[String]): Unit = {

    // 语句引入了一个全局的执行上下文，确保了隐式值的存在
    import scala.concurrent.ExecutionContext.Implicits.global
    //成功或者失败都可以使用回调函数，一个偏函数 不推荐使用,推荐使用onComplete 代替
    grind("ara beans").onSuccess { case ground =>
      Thread.sleep(Random.nextInt(2000))
      println("okay, got my ground coffee")
    }

    //在 onComplete 方法上注册回调，回调的输入是一个 Try。
    grind("java beans").onComplete {
      case Success(message) => println(s"okay, got my ground coffee $message")
      case Failure(exception) => println(exception.getMessage)
    }

    val eventualBoolean: Future[Boolean] = heatWater(Water(50)) flatMap {
      water => temperatureOkay(water)
    }
    eventualBoolean.foreach(println)


    //for语句之前三个Future并行运行，for语句之内顺序执行，注意for语句内部其实是flat map的语法糖
    val eventualCoffee = grind("java beans")
    val eventualWater = heatWater(Water(20))
    val eventualMilk = frothMilk("milk")

    val coffee = for {
      ground <- eventualCoffee
      water <- eventualWater
      milk <- eventualMilk
      okay <- brew(ground, water)
    } yield combine(okay, milk)
    coffee.foreach(println)

    Thread.sleep(10000)
  }

  //使用异步线程检查水的温度
  def temperatureOkay(water: Water): Future[Boolean] = Future {
    (80 to 85) contains water.temperature
  }

  def grind(coffeeBeans: CoffeeBeans): Future[GroundCoffee] = Future {
    println("start grinding...")
    Thread.sleep(Random.nextInt(2000))
    if (coffeeBeans == "baked beans") throw GrindingException("are you joking?")
    println("finished grinding...")
    s"ground coffee of $coffeeBeans"
  }

  def heatWater(water: Water): Future[Water] = Future {
    println("heating the water now")
    Thread.sleep(Random.nextInt(2000))
    println("hot, it's hot!")
    water.copy(temperature = 85)
  }

  def frothMilk(milk: Milk): Future[FrothedMilk] = Future {
    println("milk frothing system engaged!")
    Thread.sleep(Random.nextInt(2000))
    println("shutting down milk frothing system")
    s"frothed $milk"
  }

  def brew(coffeeBeans: CoffeeBeans, water: Water): Future[Espresso] = Future {
    println("happy brewing :)")
    Thread.sleep(Random.nextInt(2000))
    println("it's brewed!")
    "espresso"
  }

  def combine(espresso: Espresso, frothedMilk: FrothedMilk): Cappuccino = "cappuccino"

  case class Water(temperature: Int)

  case class GrindingException(msg: String) extends Exception(msg)

  case class FrothingException(msg: String) extends Exception(msg)

  case class WaterBoilingException(msg: String) extends Exception(msg)

  case class BrewingException(msg: String) extends Exception(msg)
}

Source File: SigSerializerSpecification.scala From sigmastate-interpreter with MIT License

5 votes

package sigmastate.serialization

import java.util

import org.ergoplatform.{ErgoLikeContext, ErgoLikeTransaction}
import org.scalacheck.{Arbitrary, Gen}
import org.scalatest.Assertion
import sigmastate.Values.{SigmaBoolean, SigmaPropConstant, SigmaPropValue, Value}
import sigmastate._
import sigmastate.basics.DLogProtocol.ProveDlog
import sigmastate.basics.ProveDHTuple
import sigmastate.helpers.{ContextEnrichingTestProvingInterpreter, ErgoLikeContextTesting, ErgoLikeTransactionTesting, SigmaTestingCommons}
import sigmastate.serialization.generators.ObjectGenerators
import sigmastate.utxo.Transformer

import scala.util.Random

class SigSerializerSpecification extends SigmaTestingCommons with ObjectGenerators {
  implicit lazy val IR = new TestingIRContext
  private lazy implicit val arbExprGen: Arbitrary[SigmaBoolean] = Arbitrary(exprTreeGen)

  private lazy val prover = new ContextEnrichingTestProvingInterpreter()

  private lazy val interpreterProveDlogGen: Gen[ProveDlog] =
    Gen.oneOf(prover.dlogSecrets.map(secret => ProveDlog(secret.publicImage.h)))

  private lazy val interpreterProveDHTGen =
    Gen.oneOf(
      prover.dhSecrets
        .map(_.commonInput)
        .map(ci => ProveDHTuple(ci.g, ci.h, ci.u, ci.v)))

  private def exprTreeNodeGen: Gen[SigmaBoolean] = for {
    left <- exprTreeGen
    right <- exprTreeGen
    node <- Gen.oneOf(
      COR(Seq(left, right)),
      CAND(Seq(left, right))
    )
  } yield node

  private def exprTreeGen: Gen[SigmaBoolean] =
    Gen.oneOf(interpreterProveDlogGen, interpreterProveDHTGen, Gen.delay(exprTreeNodeGen))

  private def isEquivalent(expected: ProofTree, actual: ProofTree): Boolean = (expected, actual) match {
    case (NoProof, NoProof) => true
    case (dht1: UncheckedDiffieHellmanTuple, dht2: UncheckedDiffieHellmanTuple) =>
      // `firstMessageOpt` is not serialized
      dht1.copy(commitmentOpt = None) == dht2
    case (sch1: UncheckedSchnorr, sch2: UncheckedSchnorr) =>
      // `firstMessageOpt` is not serialized
      sch1.copy(commitmentOpt = None) == sch2
    case (conj1: UncheckedConjecture, conj2: UncheckedConjecture) =>
      util.Arrays.equals(conj1.challenge, conj2.challenge) &&
        conj1.children.zip(conj2.children).forall(t => isEquivalent(t._1, t._2))
    case _ => false
  }


  private def roundTrip(uncheckedTree: UncheckedTree, exp: SigmaBoolean): Assertion = {
    val bytes = SigSerializer.toBytes(uncheckedTree)
    val parsedUncheckedTree = SigSerializer.parseAndComputeChallenges(exp, bytes)
    isEquivalent(uncheckedTree, parsedUncheckedTree) shouldBe true
  }

  property("SigSerializer no proof round trip") {
    roundTrip(NoProof, TrivialProp.TrueProp)
  }

  property("SigSerializer round trip") {
    forAll { sb: SigmaBoolean =>
      val expr = sb.toSigmaProp
      val challenge = Array.fill(32)(Random.nextInt(100).toByte)

      val ctx = ErgoLikeContextTesting(
        currentHeight = 1,
        lastBlockUtxoRoot = AvlTreeData.dummy,
        minerPubkey = ErgoLikeContextTesting.dummyPubkey,
        boxesToSpend = IndexedSeq(fakeSelf),
        spendingTransaction = ErgoLikeTransactionTesting.dummy,
        self = fakeSelf)

      // get sigma conjectures out of transformers
      val prop = prover.reduceToCrypto(ctx, expr).get._1

      val proof = prover.prove(expr, ctx, challenge).get.proof
      val proofTree = SigSerializer.parseAndComputeChallenges(prop, proof)
      roundTrip(proofTree, prop)
    }
  }

}

Source File: ConcreteCollectionSerializerSpecification.scala From sigmastate-interpreter with MIT License

5 votes

package sigmastate.serialization

import sigmastate.Values.{FalseLeaf, Constant, TrueLeaf, IntConstant, TaggedInt, ConcreteCollection}
import sigmastate._
import sigmastate.eval.Evaluation
import sigmastate.lang.Terms._

import scala.util.Random

class ConcreteCollectionSerializerSpecification extends TableSerializationSpecification {

  private def testCollectionWithConstant[T <: SType](tpe: T) = {
    implicit val wWrapped = wrappedTypeGen(tpe)
    implicit val tT = Evaluation.stypeToRType(tpe)
    implicit val tag = tT.classTag
    forAll { x: Array[T#WrappedType] =>
      roundTripTest(ConcreteCollection[T](x.map(v => Constant(v, tpe)),
        tpe))
    }
  }

  property("ConcreteCollection (Constant[SBoolean.type]): Serializer round trip ") {
    testCollectionWithConstant(SBoolean)
  }

  property("ConcreteCollection (Constant): Serializer round trip ") {
    testCollectionWithConstant(SByte)
    testCollectionWithConstant(SShort)
    testCollectionWithConstant(SInt)
    testCollectionWithConstant(SLong)
    testCollectionWithConstant(SBigInt)
    testCollectionWithConstant(SGroupElement)
    testCollectionWithConstant(SSigmaProp)
    testCollectionWithConstant(SUnit)
    testCollectionWithConstant(SBox)
    testCollectionWithConstant(SAvlTree)
  }

  property("ConcreteCollection: Serializer round trip with different types seq") {
    forAll { (i: IntConstant, ti: TaggedInt) =>
      val seq = Random.shuffle(Seq(i.asIntValue, ti.asIntValue)).toArray
      roundTripTest(ConcreteCollection.fromSeq(seq))
    }
  }

  override def objects = Table(
    ("object", "bytes"),
    (ConcreteCollection.fromItems(TrueLeaf, FalseLeaf, TrueLeaf),
      Array[Byte](OpCodes.ConcreteCollectionBooleanConstantCode, 3, 5)) // bits: 00000101
  )

  tableRoundTripTest("Specific objects serializer round trip")
  tablePredefinedBytesTest("Specific objects deserialize from predefined bytes")

  property("ConcreteCollection: deserialize collection of a crazy size") {
    val bytes = Array[Byte](OpCodes.ConcreteCollectionCode) ++
      SigmaSerializer.startWriter().putUInt(Int.MaxValue).toBytes
    an[IllegalArgumentException] should be thrownBy ValueSerializer.deserialize(bytes)
  }
}

Source File: BlockchainSimulationSpecification.scala From sigmastate-interpreter with MIT License

5 votes

package sigmastate.utxo.blockchain

import java.io.{File, FileWriter}

import org.scalacheck.Gen
import sigmastate.Values.{BooleanConstant, ErgoTree, GetVarBoolean, TrueLeaf}
import sigmastate.helpers.{ContextEnrichingTestProvingInterpreter, ErgoLikeTestProvingInterpreter}
import sigmastate.interpreter.ContextExtension
import sigmastate.utxo.blockchain.BlockchainSimulationTestingCommons._

import scala.collection.concurrent.TrieMap
import scala.util.Random


class BlockchainSimulationSpecification extends BlockchainSimulationTestingCommons {

  implicit lazy val IR = new TestingIRContext

  property("apply one valid block") {
    val state = ValidationState.initialState()
    val miner = new ErgoLikeTestProvingInterpreter()
    val block = generateBlock(state, miner, 0)
    val updStateTry = state.applyBlock(block)
    updStateTry.isSuccess shouldBe true
  }

  property("too costly block") {
    val state = ValidationState.initialState()
    val miner = new ErgoLikeTestProvingInterpreter()
    val block = generateBlock(state, miner, 0)
    val updStateTry = state.applyBlock(block, maxCost = 1)
    updStateTry.isSuccess shouldBe false
  }

  property("apply many blocks") {
    val state = ValidationState.initialState()
    val miner = new ErgoLikeTestProvingInterpreter()
    checkState(state, miner, 0, randomDeepness)
  }

  property("apply many blocks with enriched context") {
    val state = ValidationState.initialState()
    val miner = new ErgoLikeTestProvingInterpreter()
    val varId = 1.toByte
    val prop = GetVarBoolean(varId).get.toSigmaProp
    // unable to spend boxes without correct context extension
    an[RuntimeException] should be thrownBy checkState(state, miner, 0, randomDeepness, Some(prop))

    // spend boxes with context extension
    val contextExtension = ContextExtension(Map(varId -> TrueLeaf))
    checkState(state, miner, 0, randomDeepness, Some(prop), contextExtension)
  }

  ignore(s"benchmarking applying many blocks (!!! ignored)") {
    val results = new TrieMap[Int, Long]

    def bench(numberOfBlocks: Int): Unit = {

      val state = ValidationState.initialState()
      val miner = new ContextEnrichingTestProvingInterpreter()

      val (_, time) = (0 until numberOfBlocks).foldLeft(state -> 0L) { case ((s, timeAcc), h) =>
        val b = generateBlock(state, miner, h)

        val t0 = System.currentTimeMillis()
        val updStateTry = s.applyBlock(b)
        val t = System.currentTimeMillis()

        updStateTry shouldBe 'success
        updStateTry.get -> (timeAcc + (t - t0))
      }

      println(s"Total time for $numberOfBlocks blocks: $time ms")
      results.put(numberOfBlocks, time)
    }

    bench(100)
    bench(200)
    bench(300)
    bench(400)

    printResults(results.toMap)

    def printResults(results: Map[Int, Long]): Unit = {
      val file = new File("target/bench")
      file.mkdirs()
      val writer = new FileWriter(s"target/bench/result.csv", false)
      val sorted = results.toList.sortBy { case (i, _) => i }
      val header = sorted.map(_._1).mkString(",")
      writer.write(s"$header\n")
      val values = sorted.map(_._2).mkString(",")
      writer.write(s"$values\n")
      writer.flush()
      writer.close()
    }
  }
}

Source File: V3PackageSpec.scala From cosmos with Apache License 2.0

5 votes

package com.mesosphere.universe.v3.model

import com.mesosphere.universe
import java.nio.ByteBuffer
import org.scalatest.FreeSpec
import org.scalatest.Matchers
import scala.util.Random

class V3PackageSpec extends FreeSpec with Matchers {
  val input = List(
    // scalastyle:off magic.number
    ("pkg1", Version("1.0-1"), ReleaseVersion(1)),
    ("pkg1", Version("1.0-2"), ReleaseVersion(2)),
    ("pkg1", Version("1.0-3"), ReleaseVersion(3)),
    ("pkg2", Version("1.0"), ReleaseVersion(1)),
    ("pkg2", Version("2.0"), ReleaseVersion(2)),
    ("pkg3", Version("1.0"), ReleaseVersion(3)),
    ("pkg4", Version("1.0"), ReleaseVersion(4)),
    ("pkg5", Version("1.0-1"), ReleaseVersion(1)),
    ("pkg5", Version("2.0-1"), ReleaseVersion(2)),
    ("pkg5", Version("1.1-1"), ReleaseVersion(3)),
    ("pkg6", Version("0.0.0.1"), ReleaseVersion(1)),
    ("pkg6", Version("0.0.0.5"), ReleaseVersion(2)),
    ("pkg6", Version("0.0.0.2"), ReleaseVersion(3)),
    ("pkg7", Version("0.0.1"), ReleaseVersion(1)),
    ("pkg7", Version("0.0.4.2"), ReleaseVersion(10))
    // scalastyle:on magic.number
  )

  "V3Package" - {
    "Ordering should work" in {
      val expected = input.map(v3Package(_))

      val actual = Random.shuffle(expected).sorted

      actual shouldBe expected
    }
  }

  "V2Package" - {
    "Ordering should work" in {
      val expected = input.map(v2Package(_))

      val actual = Random.shuffle(expected).sorted

      actual shouldBe expected
    }
  }

  "PackageDefinition" - {
    "Ordering should work" in {
      val expected = input.map(packageDefinition(_))

      val actual = Random.shuffle(expected).sorted

      actual shouldBe expected
    }
  }

  def v3Package(tuple: (String, Version, ReleaseVersion)): V3Package = {
    val (name, version, relVer) = tuple

    V3Package(
      V3PackagingVersion,
      name,
      version,
      relVer,
      "does@not.matter",
      "doesn't matter"
    )
  }

  def v2Package(tuple: (String, Version, ReleaseVersion)): V2Package = {
    val (name, version, relVer) = tuple

    V2Package(
      V2PackagingVersion,
      name,
      version,
      relVer,
      "does@not.matter",
      "doesn't matter",
      Marathon(ByteBuffer.allocate(0))
    )
  }

  def packageDefinition(tuple: (String, Version, ReleaseVersion)): universe.v4.model.PackageDefinition = {
    if (Random.nextBoolean) {
      v2Package(tuple)
    } else {
      v3Package(tuple)
    }
  }
}

Source File: SemVerSpec.scala From cosmos with Apache License 2.0

5 votes

package com.mesosphere.universe.v3.model

import com.mesosphere.Generators.Implicits._
import org.scalatest.FreeSpec
import org.scalatest.Matchers
import org.scalatest.prop.PropertyChecks
import scala.util.Random

final class SemVerSpec extends FreeSpec with PropertyChecks with Matchers {
  "For all SemVer => String => SemVer" in {

    forAll { (expected: SemVer) =>
        val string = expected.toString
        val actual = SemVer(string).get

        actual shouldBe expected
    }
  }

  "Test semver ordering" in {
    val expected = List(
      "1.0.0-alpha",
      "1.0.0-alpha.1",
      "1.0.0-alpha.beta",
      "1.0.0-beta",
      "1.0.0-beta.2",
      "1.0.0-beta.11",
      "1.0.0-rc.1",
      "1.0.0",
      "1.0.2",
      "1.2.0",
      "1.11.0",
      "1.11.11",
      "2",
      "11.11.11"
    ).map(SemVer(_).get)

    val actual = Random.shuffle(expected).sorted

    actual shouldBe expected
  }
}

Source File: RawCarDataGenerator.scala From cloudflow with Apache License 2.0

5 votes

package connectedcar.streamlets

import akka.NotUsed
import akka.stream.scaladsl.Source
import cloudflow.akkastream.AkkaStreamlet
import cloudflow.akkastream.scaladsl.RunnableGraphStreamletLogic
import cloudflow.streamlets.StreamletShape
import cloudflow.streamlets.avro.AvroOutlet
import connectedcar.data.ConnectedCarERecord

import scala.util.Random
import scala.concurrent.duration._

object RawCarDataGenerator extends AkkaStreamlet {
  val out   = AvroOutlet[ConnectedCarERecord]("out", m ⇒ m.carId.toString + m.timestamp.toString)
  val shape = StreamletShape.withOutlets(out)

  override def createLogic = new RunnableGraphStreamletLogic() {

    override def runnableGraph() =
      Source
        .repeat(NotUsed)
        .map(_ ⇒ generateCarERecord()) // Only keep the record part of the tuple
        .throttle(1, 1.second)
        .to(plainSink(out))
  }

  case class Driver(carId: Int, driver: String)
  val drivers = List(
    Driver(10001001, "Duncan"),
    Driver(10001002, "Kiki"),
    Driver(10001003, "Trevor"),
    Driver(10001004, "Jeremy"),
    Driver(10001005, "David"),
    Driver(10001006, "Nolan"),
    Driver(10001007, "Adam"),
    Driver(10001008, "Hywel")
  )

  val status = List("whoosh", "zoom", "vrrroom")

  def randomDriver(): Driver =
    drivers(Random.nextInt(8))

  //normal temp is 90c - 105c
  def randomTemp() =
    90 + Random.nextInt(16)

  // battery from 1 - 100%
  def randomBattery() =
    1 + Random.nextInt(100)

  //power consumption, no idea but 120 - 150
  def randomPowerConsumption() =
    120 + Random.nextInt(31)

  //highway speed 60mph - 90mph
  def randomSpeed() =
    60 + Random.nextInt(31)

  def randomStatus() =
    status(Random.nextInt(3))

  def generateCarERecord(): ConnectedCarERecord = {
    val driver = randomDriver;
    ConnectedCarERecord(System.currentTimeMillis,
                        driver.carId,
                        driver.driver,
                        randomBattery,
                        randomTemp,
                        randomPowerConsumption,
                        randomSpeed,
                        randomStatus)
  }

}

Source File: CallRecordGeneratorIngress.scala From cloudflow with Apache License 2.0

5 votes

package carly.aggregator

import java.sql.Timestamp

import scala.util.Random
import scala.concurrent.duration._

import org.apache.spark.sql.{ Dataset, SparkSession }
import org.apache.spark.sql.streaming.OutputMode

import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.LongType

import cloudflow.streamlets._
import cloudflow.streamlets.avro._
import cloudflow.spark.sql.SQLImplicits._
import carly.data.CallRecord
import cloudflow.spark.{ SparkStreamlet, SparkStreamletLogic }
import org.apache.log4j.{ Level, Logger }

case class Rate(timestamp: Timestamp, value: Long)

class CallRecordGeneratorIngress extends SparkStreamlet {

  val rootLogger = Logger.getRootLogger()
  rootLogger.setLevel(Level.ERROR)

  val RecordsPerSecond = IntegerConfigParameter("records-per-second", "Records per second to process.", Some(50))

  override def configParameters = Vector(RecordsPerSecond)

  val out   = AvroOutlet[CallRecord]("out", _.user)
  val shape = StreamletShape(out)

  override def createLogic() = new SparkStreamletLogic {
    val recordsPerSecond = RecordsPerSecond.value
    override def buildStreamingQueries = {
      val outStream = DataGenerator.mkData(super.session, recordsPerSecond)
      writeStream(outStream, out, OutputMode.Append).toQueryExecution
    }
  }
}

object DataGenerator {
  def mkData(session: SparkSession, recordsPerSecond: Int): Dataset[CallRecord] = {
    // do we need to expose this through configuration?

    val MaxTime           = 2.hours.toMillis
    val MaxUsers          = 100000
    val TS0               = new java.sql.Timestamp(0)
    val ZeroTimestampProb = 0.05 // error rate

    // Random Data Generator
    val usersUdf     = udf(() ⇒ "user-" + Random.nextInt(MaxUsers))
    val directionUdf = udf(() ⇒ if (Random.nextDouble() < 0.5) "incoming" else "outgoing")

    // Time-biased randomized filter - 1/2 hour cycles
    val sinTime: Long ⇒ Double                   = t ⇒ Math.sin((t / 1000 % 1800) * 1.0 / 1800 * Math.PI)
    val timeBoundFilter: Long ⇒ Double ⇒ Boolean = t ⇒ prob ⇒ (sinTime(t) + 0.5) > prob
    val timeFilterUdf                            = udf((ts: java.sql.Timestamp, rng: Double) ⇒ timeBoundFilter(ts.getTime)(rng))
    val zeroTimestampUdf = udf { (ts: java.sql.Timestamp, rng: Double) ⇒
      if (rng < ZeroTimestampProb) {
        TS0
      } else {
        ts
      }
    }

    val rateStream = session.readStream
      .format("rate")
      .option("rowsPerSecond", recordsPerSecond)
      .load()
      .as[Rate]

    val randomDataset = rateStream.withColumn("rng", rand()).withColumn("tsRng", rand())
    val sampledData = randomDataset
      .where(timeFilterUdf($"timestamp", $"rng"))
      .withColumn("user", usersUdf())
      .withColumn("other", usersUdf())
      .withColumn("direction", directionUdf())
      .withColumn("duration", (round(abs(rand()) * MaxTime)).cast(LongType))
      .withColumn("updatedTimestamp", zeroTimestampUdf($"timestamp", $"tsRng"))
      .select($"user", $"other", $"direction", $"duration", $"updatedTimestamp".as("timestamp"))
      .as[CallRecord]
    sampledData
  }
}

Source File: SparkRandomGenIngress.scala From cloudflow with Apache License 2.0

5 votes

package cloudflow.sparkdoc

import scala.util.Random

import cloudflow.spark._
import cloudflow.streamlets._
import cloudflow.streamlets.avro._
import cloudflow.spark.sql.SQLImplicits._

import org.apache.spark.sql.Dataset
import org.apache.spark.sql.streaming.OutputMode
import java.sql.Timestamp

class SparkRandomGenDataIngress extends SparkStreamlet {
  val out   = AvroOutlet[Data]("out", d ⇒ d.key)
  val shape = StreamletShape(out)

  case class Rate(timestamp: Timestamp, value: Long)

  override def createLogic() = new SparkStreamletLogic {

    override def buildStreamingQueries =
      writeStream(process, out, OutputMode.Append).toQueryExecution

    private def process: Dataset[Data] = {

      val recordsPerSecond = 10

      val keyGen: () ⇒ String = () ⇒ if (Random.nextDouble() < 0.5) "keyOne" else "keyTwo"

      val rateStream = session.readStream
        .format("rate")
        .option("rowsPerSecond", recordsPerSecond)
        .load()
        .as[Rate]

      rateStream.map {
        case Rate(_, value) ⇒ Data(keyGen(), value.toInt)
      }
    }
  }
}

Source File: OrderRepositoryInMemoryInterpreter.scala From scala-pet-store with Apache License 2.0

5 votes

package io.github.pauljamescleary.petstore
package infrastructure.repository.inmemory

import scala.collection.concurrent.TrieMap
import scala.util.Random

import cats._
import cats.implicits._
import domain.orders.{Order, OrderRepositoryAlgebra}

class OrderRepositoryInMemoryInterpreter[F[_]: Applicative] extends OrderRepositoryAlgebra[F] {
  private val cache = new TrieMap[Long, Order]

  private val random = new Random

  def create(order: Order): F[Order] = {
    val toSave = order.copy(id = order.id.orElse(random.nextLong.some))
    toSave.id.foreach(cache.put(_, toSave))
    toSave.pure[F]
  }

  def get(orderId: Long): F[Option[Order]] =
    cache.get(orderId).pure[F]

  def delete(orderId: Long): F[Option[Order]] =
    cache.remove(orderId).pure[F]
}

object OrderRepositoryInMemoryInterpreter {
  def apply[F[_]: Applicative]() = new OrderRepositoryInMemoryInterpreter[F]()
}

Source File: PetRepositoryInMemoryInterpreter.scala From scala-pet-store with Apache License 2.0

5 votes

package io.github.pauljamescleary.petstore
package infrastructure.repository.inmemory

import scala.collection.concurrent.TrieMap
import scala.util.Random

import cats._
import cats.data.NonEmptyList
import cats.implicits._
import domain.pets.{Pet, PetRepositoryAlgebra, PetStatus}

class PetRepositoryInMemoryInterpreter[F[_]: Applicative] extends PetRepositoryAlgebra[F] {
  private val cache = new TrieMap[Long, Pet]

  private val random = new Random

  def create(pet: Pet): F[Pet] = {
    val id = random.nextLong
    val toSave = pet.copy(id = id.some)
    cache += (id -> pet.copy(id = id.some))
    toSave.pure[F]
  }

  def update(pet: Pet): F[Option[Pet]] = pet.id.traverse { id =>
    cache.update(id, pet)
    pet.pure[F]
  }

  def get(id: Long): F[Option[Pet]] = cache.get(id).pure[F]

  def delete(id: Long): F[Option[Pet]] = cache.remove(id).pure[F]

  def findByNameAndCategory(name: String, category: String): F[Set[Pet]] =
    cache.values
      .filter(p => p.name == name && p.category == category)
      .toSet
      .pure[F]

  def list(pageSize: Int, offset: Int): F[List[Pet]] =
    cache.values.toList.sortBy(_.name).slice(offset, offset + pageSize).pure[F]

  def findByStatus(statuses: NonEmptyList[PetStatus]): F[List[Pet]] =
    cache.values.filter(p => statuses.exists(_ == p.status)).toList.pure[F]

  def findByTag(tags: NonEmptyList[String]): F[List[Pet]] = {
    val tagSet = tags.toNes
    cache.values.filter(_.tags.exists(tagSet.contains(_))).toList.pure[F]
  }
}

object PetRepositoryInMemoryInterpreter {
  def apply[F[_]: Applicative]() = new PetRepositoryInMemoryInterpreter[F]()
}

Source File: CalibrationHistogramCreateBenchmark.scala From noether with Apache License 2.0

5 votes

package com.spotify.noether
package benchmark

import com.spotify.noether.benchmark.CalibrationHistogramCreateBenchmark.CalibrationHistogramState
import org.openjdk.jmh.annotations._

import scala.util.Random

object PredictionUtils {
  def generatePredictions(nbPrediction: Int): Seq[Prediction[Boolean, Double]] =
    Seq.fill(nbPrediction)(Prediction(Random.nextBoolean(), Random.nextDouble()))
}

object CalibrationHistogramCreateBenchmark {
  @State(Scope.Benchmark)
  class CalibrationHistogramState() {
    @Param(Array("100", "1000", "3000"))
    var nbElement = 0

    @Param(Array("100", "200", "300"))
    var nbBucket = 0

    @Param(Array("0.1", "0.2", "0.3"))
    var lowerBound = 0.0

    @Param(Array("0.2", "0.4", "0.5"))
    var upperBound = 0.0

    var histogram: CalibrationHistogram = _

    @Setup
    def setup(): Unit =
      histogram = CalibrationHistogram(lowerBound, upperBound, nbBucket)
  }
}

class CalibrationHistogramCreateBenchmark {
  @Benchmark
  def createCalibrationHistogram(calibrationHistogramState: CalibrationHistogramState): Double =
    calibrationHistogramState.histogram.bucketSize
}

Source File: UISeleniumSuite.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.hive.thriftserver

import scala.util.Random

import org.apache.hadoop.hive.conf.HiveConf.ConfVars
import org.openqa.selenium.WebDriver
import org.openqa.selenium.htmlunit.HtmlUnitDriver
import org.scalatest.{BeforeAndAfterAll, Matchers}
import org.scalatest.concurrent.Eventually._
import org.scalatest.selenium.WebBrowser
import org.scalatest.time.SpanSugar._

import org.apache.spark.ui.SparkUICssErrorHandler

class UISeleniumSuite
  extends HiveThriftJdbcTest
  with WebBrowser with Matchers with BeforeAndAfterAll {

  implicit var webDriver: WebDriver = _
  var server: HiveThriftServer2 = _
  val uiPort = 20000 + Random.nextInt(10000)
  override def mode: ServerMode.Value = ServerMode.binary

  override def beforeAll(): Unit = {
    webDriver = new HtmlUnitDriver {
      getWebClient.setCssErrorHandler(new SparkUICssErrorHandler)
    }
    super.beforeAll()
  }

  override def afterAll(): Unit = {
    if (webDriver != null) {
      webDriver.quit()
    }
    super.afterAll()
  }

  override protected def serverStartCommand(port: Int) = {
    val portConf = if (mode == ServerMode.binary) {
      ConfVars.HIVE_SERVER2_THRIFT_PORT
    } else {
      ConfVars.HIVE_SERVER2_THRIFT_HTTP_PORT
    }

    s"""$startScript
        |  --master local
        |  --hiveconf hive.root.logger=INFO,console
        |  --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$metastoreJdbcUri
        |  --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath
        |  --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=localhost
        |  --hiveconf ${ConfVars.HIVE_SERVER2_TRANSPORT_MODE}=$mode
        |  --hiveconf $portConf=$port
        |  --driver-class-path ${sys.props("java.class.path")}
        |  --conf spark.ui.enabled=true
        |  --conf spark.ui.port=$uiPort
     """.stripMargin.split("\\s+").toSeq
  }

  ignore("thrift server ui test") {
    withJdbcStatement("test_map") { statement =>
      val baseURL = s"http://localhost:$uiPort"

      val queries = Seq(
        "CREATE TABLE test_map(key INT, value STRING)",
        s"LOAD DATA LOCAL INPATH '${TestData.smallKv}' OVERWRITE INTO TABLE test_map")

      queries.foreach(statement.execute)

      eventually(timeout(10 seconds), interval(50 milliseconds)) {
        go to baseURL
        find(cssSelector("""ul li a[href*="sql"]""")) should not be None
      }

      eventually(timeout(10 seconds), interval(50 milliseconds)) {
        go to (baseURL + "/sql")
        find(id("sessionstat")) should not be None
        find(id("sqlstat")) should not be None

        // check whether statements exists
        queries.foreach { line =>
          findAll(cssSelector("""ul table tbody tr td""")).map(_.text).toList should contain (line)
        }
      }
    }
  }
}

Source File: MiscExpressionsSuite.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions

import java.io.PrintStream

import scala.util.Random

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.types._

class MiscExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {

  test("assert_true") {
    intercept[RuntimeException] {
      checkEvaluation(AssertTrue(Literal.create(false, BooleanType)), null)
    }
    intercept[RuntimeException] {
      checkEvaluation(AssertTrue(Cast(Literal(0), BooleanType)), null)
    }
    intercept[RuntimeException] {
      checkEvaluation(AssertTrue(Literal.create(null, NullType)), null)
    }
    intercept[RuntimeException] {
      checkEvaluation(AssertTrue(Literal.create(null, BooleanType)), null)
    }
    checkEvaluation(AssertTrue(Literal.create(true, BooleanType)), null)
    checkEvaluation(AssertTrue(Cast(Literal(1), BooleanType)), null)
  }

  test("uuid") {
    checkEvaluation(Length(Uuid(Some(0))), 36)
    val r = new Random()
    val seed1 = Some(r.nextLong())
    assert(evaluateWithoutCodegen(Uuid(seed1)) === evaluateWithoutCodegen(Uuid(seed1)))
    assert(evaluateWithGeneratedMutableProjection(Uuid(seed1)) ===
      evaluateWithGeneratedMutableProjection(Uuid(seed1)))
    assert(evaluateWithUnsafeProjection(Uuid(seed1)) ===
      evaluateWithUnsafeProjection(Uuid(seed1)))

    val seed2 = Some(r.nextLong())
    assert(evaluateWithoutCodegen(Uuid(seed1)) !== evaluateWithoutCodegen(Uuid(seed2)))
    assert(evaluateWithGeneratedMutableProjection(Uuid(seed1)) !==
      evaluateWithGeneratedMutableProjection(Uuid(seed2)))
    assert(evaluateWithUnsafeProjection(Uuid(seed1)) !==
      evaluateWithUnsafeProjection(Uuid(seed2)))

    val uuid = Uuid(seed1)
    assert(uuid.fastEquals(uuid))
    assert(!uuid.fastEquals(Uuid(seed1)))
    assert(!uuid.fastEquals(uuid.freshCopy()))
    assert(!uuid.fastEquals(Uuid(seed2)))
  }

  test("PrintToStderr") {
    val inputExpr = Literal(1)
    val systemErr = System.err

    val (outputEval, outputCodegen) = try {
      val errorStream = new java.io.ByteArrayOutputStream()
      System.setErr(new PrintStream(errorStream))
      // check without codegen
      checkEvaluationWithoutCodegen(PrintToStderr(inputExpr), 1)
      val outputEval = errorStream.toString
      errorStream.reset()
      // check with codegen
      checkEvaluationWithGeneratedMutableProjection(PrintToStderr(inputExpr), 1)
      val outputCodegen = errorStream.toString
      (outputEval, outputCodegen)
    } finally {
      System.setErr(systemErr)
    }

    assert(outputCodegen.contains(s"Result of $inputExpr is 1"))
    assert(outputEval.contains(s"Result of $inputExpr is 1"))
  }
}

Source File: RandomUUIDGeneratorSuite.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.util

import scala.util.Random

import org.apache.spark.SparkFunSuite

class RandomUUIDGeneratorSuite extends SparkFunSuite {
  test("RandomUUIDGenerator should generate version 4, variant 2 UUIDs") {
    val generator = RandomUUIDGenerator(new Random().nextLong())
    for (_ <- 0 to 100) {
      val uuid = generator.getNextUUID()
      assert(uuid.version() == 4)
      assert(uuid.variant() == 2)
    }
  }

 test("UUID from RandomUUIDGenerator should be deterministic") {
   val r1 = new Random(100)
   val generator1 = RandomUUIDGenerator(r1.nextLong())
   val r2 = new Random(100)
   val generator2 = RandomUUIDGenerator(r2.nextLong())
   val r3 = new Random(101)
   val generator3 = RandomUUIDGenerator(r3.nextLong())

   for (_ <- 0 to 100) {
      val uuid1 = generator1.getNextUUID()
      val uuid2 = generator2.getNextUUID()
      val uuid3 = generator3.getNextUUID()
      assert(uuid1 == uuid2)
      assert(uuid1 != uuid3)
   }
 }

 test("Get UTF8String UUID") {
   val generator = RandomUUIDGenerator(new Random().nextLong())
   val utf8StringUUID = generator.getNextUUIDUTF8String()
   val uuid = java.util.UUID.fromString(utf8StringUUID.toString)
   assert(uuid.version() == 4 && uuid.variant() == 2 && utf8StringUUID.toString == uuid.toString)
 }
}

Source File: ArrayDataIndexedSeqSuite.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.util

import scala.util.Random

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.RandomDataGenerator
import org.apache.spark.sql.catalyst.encoders.{ExamplePointUDT, RowEncoder}
import org.apache.spark.sql.catalyst.expressions.{FromUnsafeProjection, UnsafeArrayData, UnsafeProjection}
import org.apache.spark.sql.types._

class ArrayDataIndexedSeqSuite extends SparkFunSuite {
  private def compArray(arrayData: ArrayData, elementDt: DataType, array: Array[Any]): Unit = {
    assert(arrayData.numElements == array.length)
    array.zipWithIndex.map { case (e, i) =>
      if (e != null) {
        elementDt match {
          // For NaN, etc.
          case FloatType | DoubleType => assert(arrayData.get(i, elementDt).equals(e))
          case _ => assert(arrayData.get(i, elementDt) === e)
        }
      } else {
        assert(arrayData.isNullAt(i))
      }
    }

    val seq = arrayData.toSeq[Any](elementDt)
    array.zipWithIndex.map { case (e, i) =>
      if (e != null) {
        elementDt match {
          // For Nan, etc.
          case FloatType | DoubleType => assert(seq(i).equals(e))
          case _ => assert(seq(i) === e)
        }
      } else {
        assert(seq(i) == null)
      }
    }

    intercept[IndexOutOfBoundsException] {
      seq(-1)
    }.getMessage().contains("must be between 0 and the length of the ArrayData.")

    intercept[IndexOutOfBoundsException] {
      seq(seq.length)
    }.getMessage().contains("must be between 0 and the length of the ArrayData.")
  }

  private def testArrayData(): Unit = {
    val elementTypes = Seq(BooleanType, ByteType, ShortType, IntegerType, LongType, FloatType,
      DoubleType, DecimalType.USER_DEFAULT, StringType, BinaryType, DateType, TimestampType,
      CalendarIntervalType, new ExamplePointUDT())
    val arrayTypes = elementTypes.flatMap { elementType =>
      Seq(ArrayType(elementType, containsNull = false), ArrayType(elementType, containsNull = true))
    }
    val random = new Random(100)
    arrayTypes.foreach { dt =>
      val schema = StructType(StructField("col_1", dt, nullable = false) :: Nil)
      val row = RandomDataGenerator.randomRow(random, schema)
      val rowConverter = RowEncoder(schema)
      val internalRow = rowConverter.toRow(row)

      val unsafeRowConverter = UnsafeProjection.create(schema)
      val safeRowConverter = FromUnsafeProjection(schema)

      val unsafeRow = unsafeRowConverter(internalRow)
      val safeRow = safeRowConverter(unsafeRow)

      val genericArrayData = safeRow.getArray(0).asInstanceOf[GenericArrayData]
      val unsafeArrayData = unsafeRow.getArray(0).asInstanceOf[UnsafeArrayData]

      val elementType = dt.elementType
      test("ArrayDataIndexedSeq - UnsafeArrayData - " + dt.toString) {
        compArray(unsafeArrayData, elementType, unsafeArrayData.toArray[Any](elementType))
      }

      test("ArrayDataIndexedSeq - GenericArrayData - " + dt.toString) {
        compArray(genericArrayData, elementType, genericArrayData.toArray[Any](elementType))
      }
    }
  }

  testArrayData()
}

Source File: TakeOrderedAndProjectSuite.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.execution

import scala.util.Random

import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.catalyst.dsl.expressions._
import org.apache.spark.sql.catalyst.expressions.Literal
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types._


class TakeOrderedAndProjectSuite extends SparkPlanTest with SharedSQLContext {

  private var rand: Random = _
  private var seed: Long = 0

  protected override def beforeAll(): Unit = {
    super.beforeAll()
    seed = System.currentTimeMillis()
    rand = new Random(seed)
  }

  private def generateRandomInputData(): DataFrame = {
    val schema = new StructType()
      .add("a", IntegerType, nullable = false)
      .add("b", IntegerType, nullable = false)
    val inputData = Seq.fill(10000)(Row(rand.nextInt(), rand.nextInt()))
    spark.createDataFrame(sparkContext.parallelize(Random.shuffle(inputData), 10), schema)
  }

  
  private def noOpFilter(plan: SparkPlan): SparkPlan = FilterExec(Literal(true), plan)

  val limit = 250
  val sortOrder = 'a.desc :: 'b.desc :: Nil

  test("TakeOrderedAndProject.doExecute without project") {
    withClue(s"seed = $seed") {
      checkThatPlansAgree(
        generateRandomInputData(),
        input =>
          noOpFilter(TakeOrderedAndProjectExec(limit, sortOrder, input.output, input)),
        input =>
          GlobalLimitExec(limit,
            LocalLimitExec(limit,
              SortExec(sortOrder, true, input))),
        sortAnswers = false)
    }
  }

  test("TakeOrderedAndProject.doExecute with project") {
    withClue(s"seed = $seed") {
      checkThatPlansAgree(
        generateRandomInputData(),
        input =>
          noOpFilter(
            TakeOrderedAndProjectExec(limit, sortOrder, Seq(input.output.last), input)),
        input =>
          GlobalLimitExec(limit,
            LocalLimitExec(limit,
              ProjectExec(Seq(input.output.last),
                SortExec(sortOrder, true, input)))),
        sortAnswers = false)
    }
  }
}

Source File: ColumnarTestUtils.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.columnar

import scala.collection.immutable.HashSet
import scala.util.Random

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
import org.apache.spark.sql.types.{AtomicType, Decimal}
import org.apache.spark.unsafe.types.UTF8String

object ColumnarTestUtils {
  def makeNullRow(length: Int): GenericInternalRow = {
    val row = new GenericInternalRow(length)
    (0 until length).foreach(row.setNullAt)
    row
  }

  def makeRandomValue[JvmType](columnType: ColumnType[JvmType]): JvmType = {
    def randomBytes(length: Int) = {
      val bytes = new Array[Byte](length)
      Random.nextBytes(bytes)
      bytes
    }

    (columnType match {
      case NULL => null
      case BOOLEAN => Random.nextBoolean()
      case BYTE => (Random.nextInt(Byte.MaxValue * 2) - Byte.MaxValue).toByte
      case SHORT => (Random.nextInt(Short.MaxValue * 2) - Short.MaxValue).toShort
      case INT => Random.nextInt()
      case LONG => Random.nextLong()
      case FLOAT => Random.nextFloat()
      case DOUBLE => Random.nextDouble()
      case STRING => UTF8String.fromString(Random.nextString(Random.nextInt(32)))
      case BINARY => randomBytes(Random.nextInt(32))
      case COMPACT_DECIMAL(precision, scale) => Decimal(Random.nextLong() % 100, precision, scale)
      case LARGE_DECIMAL(precision, scale) => Decimal(Random.nextLong(), precision, scale)
      case STRUCT(_) =>
        new GenericInternalRow(Array[Any](UTF8String.fromString(Random.nextString(10))))
      case ARRAY(_) =>
        new GenericArrayData(Array[Any](Random.nextInt(), Random.nextInt()))
      case MAP(_) =>
        ArrayBasedMapData(
          Map(Random.nextInt() -> UTF8String.fromString(Random.nextString(Random.nextInt(32)))))
      case _ => throw new IllegalArgumentException(s"Unknown column type $columnType")
    }).asInstanceOf[JvmType]
  }

  def makeRandomValues(
      head: ColumnType[_],
      tail: ColumnType[_]*): Seq[Any] = makeRandomValues(Seq(head) ++ tail)

  def makeRandomValues(columnTypes: Seq[ColumnType[_]]): Seq[Any] = {
    columnTypes.map(makeRandomValue(_))
  }

  def makeUniqueRandomValues[JvmType](
      columnType: ColumnType[JvmType],
      count: Int): Seq[JvmType] = {

    Iterator.iterate(HashSet.empty[JvmType]) { set =>
      set + Iterator.continually(makeRandomValue(columnType)).filterNot(set.contains).next()
    }.drop(count).next().toSeq
  }

  def makeRandomRow(
      head: ColumnType[_],
      tail: ColumnType[_]*): InternalRow = makeRandomRow(Seq(head) ++ tail)

  def makeRandomRow(columnTypes: Seq[ColumnType[_]]): InternalRow = {
    val row = new GenericInternalRow(columnTypes.length)
    makeRandomValues(columnTypes).zipWithIndex.foreach { case (value, index) =>
      row(index) = value
    }
    row
  }

  def makeUniqueValuesAndSingleValueRows[T <: AtomicType](
      columnType: NativeColumnType[T],
      count: Int): (Seq[T#InternalType], Seq[GenericInternalRow]) = {

    val values = makeUniqueRandomValues(columnType, count)
    val rows = values.map { value =>
      val row = new GenericInternalRow(1)
      row(0) = value
      row
    }

    (values, rows)
  }
}

Source File: hierarchyGen.scala From HANAVora-Extensions with Apache License 2.0

5 votes

package org.apache.spark.test

import org.apache.spark.sql.types.{LongType, Node}
import org.scalacheck.{Arbitrary, Gen}

import scala.util.Random
import scalaz._
import Scalaz._
import scalaz.scalacheck.ScalazArbitrary._

// scalastyle:off file.size.limit

object HierarchyGen {

  val MIN_SIZE_TREE = 6
  val MAX_SIZE_TREE = 100

  
  def next(): Long = {
    synchronized {
      if (currentSeq == Long.MaxValue) {
        currentSeq = Long.MinValue
      }
      val result = currentSeq
      currentSeq += 1
      result
    }
  }

  def arb: Arbitrary[Long] = Arbitrary {
    gen
  }

  def gen: Gen[Long] = Gen.resultOf[Int,Long] { x => next() }
}

Source File: NoSharingDepot.scala From trucking-iot with Apache License 2.0

5 votes

package com.orendainx.trucking.simulator.depots

import akka.actor.{ActorLogging, Props, Stash}
import com.orendainx.trucking.simulator.depots.ResourceDepot.{RequestRoute, RequestTruck, ReturnRoute, ReturnTruck}
import com.orendainx.trucking.simulator.generators.DataGenerator.NewResource
import com.orendainx.trucking.simulator.models._
import com.orendainx.trucking.simulator.services.RouteParser
import com.orendainx.trucking.simulator.models.EmptyRoute
import com.typesafe.config.Config

import scala.util.Random


object NoSharingDepot {

  def props()(implicit config: Config) =
    Props(new NoSharingDepot())
}

class NoSharingDepot(implicit config: Config) extends ResourceDepot with Stash with ActorLogging {

  private val trucksAvailable = Random.shuffle(1 to config.getInt("resource-depot.trucks-available")).toList.map(Truck).toBuffer
  private val routesAvailable = RouteParser(config.getString("resource-depot.route-directory")).routes.toBuffer

  log.info("Trucks and routes initialized and ready for deployment")
  log.info(s"${trucksAvailable.length} trucks available.")
  log.info(s"${routesAvailable.length} routes available.")

  def receive = {
    case RequestTruck(previous) if previous != EmptyTruck =>
      val ind = trucksAvailable.indexWhere(_ != previous)
      if (ind >= 0) sender() ! NewResource(trucksAvailable.remove(ind))
      else stash() // None available, stash request for later

    case RequestTruck(_) =>
      if (trucksAvailable.nonEmpty) sender() ! NewResource(trucksAvailable.remove(0))
      else stash()

    case RequestRoute(previous) if previous != EmptyRoute =>
      val ind = routesAvailable.indexWhere(_ != previous)
      if (ind >= 0) sender() ! NewResource(routesAvailable.remove(ind))
      else stash()

    case RequestRoute(_) =>
      if (routesAvailable.nonEmpty) sender() ! NewResource(routesAvailable.remove(0))
      else stash()

    case ReturnTruck(truck) =>
      trucksAvailable.append(truck)
      unstashAll()

    case ReturnRoute(route) =>
      routesAvailable.append(route)
      unstashAll()
  }
}

Source File: DriverFactory.scala From trucking-iot with Apache License 2.0

5 votes

package com.orendainx.trucking.simulator.services

import com.orendainx.trucking.simulator.models.{Driver, DrivingPattern}
import com.typesafe.config.Config

import scala.collection.JavaConverters._
import scala.util.Random


  def drivers(implicit config: Config): Seq[Driver] = {

    // Generate driving patterns
    val patterns = config.getConfigList("driver.driving-patterns").asScala.map { conf =>
      val name = conf.getString("name")
      (name, DrivingPattern(name, conf.getInt("min-speed"), conf.getInt("max-speed"), conf.getInt("spree-frequency"), conf.getInt("spree-length"), conf.getInt("violation-percentage")))
    }.toMap

    // First, initialize all special drivers
    val specialDrivers = config.getConfigList("driver.special-drivers").asScala.map { conf =>
      Driver(conf.getInt("id"), conf.getString("name"), patterns(conf.getString("pattern")))
    }

    // If we need more drivers, generate "normal" drivers. Or if we need to remove some special drivers, do so.
    val driverCount = config.getInt("driver.driver-count")
    if (specialDrivers.lengthCompare(driverCount) < 0)
      specialDrivers ++ ((specialDrivers.length+1) to driverCount).map { newId =>
        Driver(newId, Random.alphanumeric.take(config.getInt("driver-name-length")).mkString, patterns("normal"))
      }
    else
      specialDrivers.take(driverCount)
  }
}

Source File: AutomaticCoordinator.scala From trucking-iot with Apache License 2.0

5 votes

package com.orendainx.trucking.simulator.coordinators

import akka.actor.{ActorLogging, ActorRef, PoisonPill, Props, Terminated}
import com.orendainx.trucking.simulator.coordinators.AutomaticCoordinator.TickGenerator
import com.orendainx.trucking.simulator.coordinators.GeneratorCoordinator.AcknowledgeTick
import com.orendainx.trucking.simulator.flows.FlowManager
import com.orendainx.trucking.simulator.generators.DataGenerator
import com.typesafe.config.Config

import scala.collection.mutable
import scala.concurrent.duration._
import scala.util.Random


  def props(eventCount: Int, generators: Seq[ActorRef], flowManager: ActorRef)(implicit config: Config) =
    Props(new AutomaticCoordinator(eventCount, generators, flowManager))
}

class AutomaticCoordinator(eventCount: Int, generators: Seq[ActorRef], flowManager: ActorRef)(implicit config: Config) extends GeneratorCoordinator with ActorLogging {

  // For receive messages and an execution context
  import context.dispatcher

  // Event delay settings, and initialize a counter for each data generator
  val eventDelay = config.getInt("generator.event-delay")
  val eventDelayJitter = config.getInt("generator.event-delay-jitter")
  val generateCounters = mutable.Map(generators.map((_, 0)): _*)

  // Insert each new generator into the simulation (at a random scheduled point) and begin "ticking"
  generators.foreach { generator =>
    context.system.scheduler.scheduleOnce(Random.nextInt(eventDelay + eventDelayJitter).milliseconds, self, TickGenerator(generator))
  }

  def receive = {
    case AcknowledgeTick(generator) =>
      self ! TickGenerator(generator) // Each ack triggers another tick

    case TickGenerator(generator) =>
      generateCounters.update(generator, generateCounters(generator)+1)

      if (generateCounters(generator) <= eventCount) {
        context.system.scheduler.scheduleOnce((eventDelay + Random.nextInt(eventDelayJitter)).milliseconds, generator, DataGenerator.GenerateData)
      } else {
        // Kill the individual generator, since we are done with it.
        generator ! PoisonPill

        // If all other generators have met their count, tell flow manager to shutdown
        if (!generateCounters.values.exists(_ <= eventCount)) {
          flowManager ! FlowManager.ShutdownFlow
          context watch flowManager
        }
      }

    // Once the flow manager and its transmitters terminate, shut it all down
    case Terminated(`flowManager`) =>
      context.system.terminate()
  }
}

Source File: TrafficGenerator.scala From trucking-iot with Apache License 2.0

5 votes

package com.orendainx.trucking.simulator.generators

import java.time.Instant

import akka.actor.{ActorLogging, ActorRef, Props, Stash}
import com.orendainx.trucking.commons.models._
import com.orendainx.trucking.simulator.coordinators.GeneratorCoordinator
import com.orendainx.trucking.simulator.depots.ResourceDepot.{RequestRoute, ReturnRoute}
import com.orendainx.trucking.simulator.generators.DataGenerator.{GenerateData, NewResource}
import com.orendainx.trucking.simulator.models._
import com.orendainx.trucking.simulator.transmitters.DataTransmitter.Transmit
import com.orendainx.trucking.simulator.models.{EmptyRoute, Route}
import com.typesafe.config.Config

import scala.collection.mutable
import scala.util.Random


  def props(depot: ActorRef, flowManager: ActorRef)(implicit config: Config) =
    Props(new TrafficGenerator(depot, flowManager))
}

class TrafficGenerator(depot: ActorRef, flowManager: ActorRef)(implicit config: Config) extends DataGenerator with Stash with ActorLogging {

  // Some settings
  val NumberOfRoutes = config.getInt("generator.routes-to-simulate")
  val CongestionDelta = config.getInt("generator.congestion.delta")

  var congestionLevel = config.getInt("generator.congestion.start")
  var routes = mutable.Buffer.empty[Route]

  // Request NumberOfRoutes routes
  (1 to NumberOfRoutes).foreach(_ => depot ! RequestRoute(EmptyRoute))

  context become waitingOnDepot

  def waitingOnDepot: Receive = {
    case NewResource(newRoute: Route) =>
      routes += newRoute
      unstashAll()
      context become driverActive
      log.info(s"Received new route: ${newRoute.name}")

    case GenerateData =>
      stash()
      log.debug("Received Tick command while waiting on route. Command stashed for later processing.")
  }

  def driverActive: Receive = {
    case GenerateData =>
      routes.foreach { route =>
        // Create traffic data and emit it
        congestionLevel += -CongestionDelta + Random.nextInt(CongestionDelta*2 + 1)
        val traffic = TrafficData(Instant.now().toEpochMilli, route.id, congestionLevel)
        flowManager ! Transmit(traffic)
      }

      // Tell the coordinator we've acknowledged the drive command
      sender() ! GeneratorCoordinator.AcknowledgeTick(self)
  }

  def receive = {
    case _ => log.error("This message should never be seen.")
  }

  // When this actor is stopped, release resources it may still be holding onto
  override def postStop(): Unit =
    routes.foreach(ReturnRoute)
}

Source File: AvroRandomGenerator.scala From streamliner-examples with Apache License 2.0

5 votes

package com.memsql.spark.examples.avro

import org.apache.avro.Schema
import org.apache.avro.generic.GenericData

import scala.collection.JavaConversions._
import scala.util.Random

class AvroRandomGenerator(inSchema: Schema) {
  // Avoid nested Records, since our destination is a DataFrame.
  val MAX_RECURSION_LEVEL: Int = 1

  val topSchema: Schema = inSchema
  val random = new Random

  def next(schema: Schema = this.topSchema, level: Int = 0): Any = {
    if (level <= MAX_RECURSION_LEVEL) {

      schema.getType match {
        case Schema.Type.RECORD => {
          val datum = new GenericData.Record(schema)
          schema.getFields.foreach {
            x => datum.put(x.pos, next(x.schema, level + 1))
          }
          datum
        }

        case Schema.Type.UNION => {
          val types = schema.getTypes
          // Generate a value using the first type in the union.
          // "Random type" is also a valid option.
          next(types(0), level)
        }

        case _ => generateValue(schema.getType)
      }

    } else {
      null
    }
  }

  def generateValue(avroType: Schema.Type): Any = avroType match {
    case Schema.Type.BOOLEAN => random.nextBoolean
    case Schema.Type.DOUBLE => random.nextDouble
    case Schema.Type.FLOAT => random.nextFloat
    case Schema.Type.INT => random.nextInt
    case Schema.Type.LONG => random.nextLong
    case Schema.Type.NULL => null
    case Schema.Type.STRING => getRandomString
    case _ => null
  }

  def getRandomString(): String = {
    val length: Int = 5 + random.nextInt(5)
    (1 to length).map(x => ('a'.toInt + random.nextInt(26)).toChar).mkString
  }

}

Source File: ThriftRandomGenerator.scala From streamliner-examples with Apache License 2.0

5 votes

package com.memsql.spark.examples.thrift

import collection.JavaConversions._
import java.lang.reflect.Method
import java.nio.ByteBuffer
import org.apache.thrift.{TBase, TFieldIdEnum}
import org.apache.thrift.protocol.{TField, TType}
import org.apache.thrift.meta_data._

import scala.util.Random

object ThriftRandomGenerator {
  val random = new Random
  val MAX_RECURSION_LEVEL = 5

  def next[F <: TFieldIdEnum](c: Class[_], level: Int = 0): Any = {
    if (level > MAX_RECURSION_LEVEL) {
      return null
    }
    val className = c.getName
    try {
      val tBaseClass = c.asInstanceOf[Class[TBase[_ <: TBase[_, _], F]]]
      val instance = tBaseClass.newInstance()
      val metaDataMap: Map[_ <: TFieldIdEnum, FieldMetaData] = FieldMetaData.getStructMetaDataMap(tBaseClass).toMap
      metaDataMap.foreach({ case (field, fieldMetaData) =>
        val valueMetaData = fieldMetaData.valueMetaData
        val value = getValue(valueMetaData, level)
        instance.setFieldValue(instance.fieldForId(field.getThriftFieldId), value)
      })
      instance
    } catch {
      case e: ClassCastException => throw new IllegalArgumentException(s"Class $className is not a subclass of org.apache.thrift.TBase")
    }
  }

  def getValue(valueMetaData: FieldValueMetaData, level: Int): Any = {
    if (level > MAX_RECURSION_LEVEL) {
      return null
    }
    valueMetaData.`type` match {
      case TType.BOOL => random.nextBoolean
      case TType.BYTE => random.nextInt.toByte
      case TType.I16 => random.nextInt.toShort
      case TType.I32 => random.nextInt
      case TType.I64 => random.nextLong
      case TType.DOUBLE => random.nextInt(5) * 0.25
      case TType.ENUM => {
        val enumClass = valueMetaData.asInstanceOf[EnumMetaData].enumClass
        getEnumValue(enumClass)
      }
      case TType.STRING => {
        val length: Int = 5 + random.nextInt(5)
        val s = (1 to length).map(x => ('a'.toInt + random.nextInt(26)).toChar).mkString
        if (valueMetaData.isBinary) {
          ByteBuffer.wrap(s.getBytes)
        } else {
          s
        }
      }
      case TType.LIST => {
        val elemMetaData = valueMetaData.asInstanceOf[ListMetaData].elemMetaData
        val length: Int = 5 + random.nextInt(5)
        val ret: java.util.List[Any] = (1 to length).map(x => getValue(elemMetaData, level + 1))
        ret
      }
      case TType.SET => {
        val elemMetaData = valueMetaData.asInstanceOf[SetMetaData].elemMetaData
        val length: Int = 5 + random.nextInt(5)
        val ret: Set[Any] = (1 to length).map(x => getValue(elemMetaData, level + 1)).toSet
        val javaSet: java.util.Set[Any] = ret
        javaSet
      }
      case TType.MAP => {
        val mapMetaData = valueMetaData.asInstanceOf[MapMetaData]
        val keyMetaData = mapMetaData.keyMetaData
        val mapValueMetaData = mapMetaData.valueMetaData
        val length: Int = 5 + random.nextInt(5)
        val ret: Map[Any, Any] = (1 to length).map(_ => {
          val mapKey = getValue(keyMetaData, level + 1)
          val mapValue = getValue(mapValueMetaData, level + 1)
          mapKey -> mapValue
        }).toMap
        val javaMap: java.util.Map[Any, Any] = ret
        javaMap
      }
      case TType.STRUCT => {
        val structClass = valueMetaData.asInstanceOf[StructMetaData].structClass
        next(structClass, level = level + 1)
      }
      case _ => null
    }
  }

  def getEnumValue(enumType: Class[_]): Any = {
    val enumConstants = enumType.getEnumConstants
    enumConstants(random.nextInt(enumConstants.length))
  }
}

Source File: JsonTransformSpec.scala From play-json-ops with MIT License

5 votes

package play.api.libs.json.ops.v4

import org.scalatest.FlatSpec
import org.scalatest.prop.GeneratorDrivenPropertyChecks._
import play.api.libs.json._
import play.api.libs.json.scalacheck.JsValueGenerators

import scala.annotation.tailrec
import scala.util.Random

class JsonTransformSpec extends FlatSpec
with CompatibilityImplicits
with JsValueGenerators {

  @tailrec private def verifyAllRedacted(all: Seq[(JsPath, JsValue)]): Unit = {
    val invalid = all collect {
      case (path, value) if value != JsonTransform.RedactedValue => path
    }
    assert(invalid.isEmpty, s"The following paths are invalid: ${invalid.mkString(", ")}")
    val nextGen = all flatMap {
      case (path, JsArray(items)) => items.zipWithIndex map {
        case (item, i) => (JsPath(path.path :+ IdxPathNode(i)), item)
      }
      case (path, JsObject(fields)) => fields map {
        case (k, v) => (path \ k, v)
      }
      case _ => Nil
    }
    if (nextGen.nonEmpty) {
      verifyAllRedacted(nextGen)
    }
  }

  "redactPaths" should "redact selected fields by path at the top level" in {
    forAll { obj: JsObject =>
      val topLevelPaths: Seq[JsPath] = obj.fields.map(__ \ _._1)
      whenever(topLevelPaths.nonEmpty) {
        val redactedPaths: Seq[JsPath] = Random.shuffle(topLevelPaths) take Random.nextInt(topLevelPaths.size)
        implicit val redactor: JsonTransform[Any] = JsonTransform.redactPaths[Any](redactedPaths)
        val redacted = obj.transformAs[Any]
        // Useful for debugging
//        if (redactedPaths.nonEmpty) {
//          println(Json.prettyPrint(obj))
//          println(s"with redacted paths (${redactedPaths.mkString(", ")}):")
//          println(Json.prettyPrint(redacted))
//        }
        for (path <- redactedPaths) {
          assertResult(JsonTransform.RedactedValue) {
            path.asSingleJson(redacted).get
          }
        }
      }
    }
  }

  "redactAll" should "redact all fields of all paths" in {
    implicit val redactor: JsonTransform[Any] = JsonTransform.redactAll[Any]()
    forAll { obj: JsObject =>
      val redacted = obj.transformAs[Any]
      verifyAllRedacted(Seq(__ -> redacted))
    }
  }
}

Source File: JsonTransformSpec.scala From play-json-ops with MIT License

5 votes

package play.api.libs.json.ops.v4

import org.scalatest.flatspec.AnyFlatSpec
import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks._
import play.api.libs.json._
import play.api.libs.json.scalacheck.JsValueGenerators

import scala.annotation.tailrec
import scala.util.Random

class JsonTransformSpec extends AnyFlatSpec
with CompatibilityImplicits
with JsValueGenerators {

  @tailrec private def verifyAllRedacted(all: Seq[(JsPath, JsValue)]): Unit = {
    val invalid = all collect {
      case (path, value) if value != JsonTransform.RedactedValue => path
    }
    assert(invalid.isEmpty, s"The following paths are invalid: ${invalid.mkString(", ")}")
    val nextGen = all flatMap {
      case (path, JsArray(items)) => items.zipWithIndex map {
        case (item, i) => (JsPath(path.path :+ IdxPathNode(i)), item)
      }
      case (path, JsObject(fields)) => fields map {
        case (k, v) => (path \ k, v)
      }
      case _ => Nil
    }
    if (nextGen.nonEmpty) {
      verifyAllRedacted(nextGen)
    }
  }

  "redactPaths" should "redact selected fields by path at the top level" in {
    forAll { obj: JsObject =>
      val topLevelPaths: Seq[JsPath] = obj.fields.map(__ \ _._1)
      whenever(topLevelPaths.nonEmpty) {
        val redactedPaths: Seq[JsPath] = Random.shuffle(topLevelPaths) take Random.nextInt(topLevelPaths.size)
        implicit val redactor: JsonTransform[Any] = JsonTransform.redactPaths[Any](redactedPaths)
        val redacted = obj.transformAs[Any]
        // Useful for debugging
//        if (redactedPaths.nonEmpty) {
//          println(Json.prettyPrint(obj))
//          println(s"with redacted paths (${redactedPaths.mkString(", ")}):")
//          println(Json.prettyPrint(redacted))
//        }
        for (path <- redactedPaths) {
          assertResult(JsonTransform.RedactedValue) {
            path.asSingleJson(redacted).get
          }
        }
      }
    }
  }

  "redactAll" should "redact all fields of all paths" in {
    implicit val redactor: JsonTransform[Any] = JsonTransform.redactAll[Any]()
    forAll { obj: JsObject =>
      val redacted = obj.transformAs[Any]
      verifyAllRedacted(Seq(__ -> redacted))
    }
  }
}

Source File: JsonTransformSpec.scala From play-json-ops with MIT License

5 votes

package play.api.libs.json.ops.v4

import org.scalatest.flatspec.AnyFlatSpec
import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks._
import play.api.libs.json._
import play.api.libs.json.scalacheck.JsValueGenerators

import scala.annotation.tailrec
import scala.util.Random

class JsonTransformSpec extends AnyFlatSpec
with CompatibilityImplicits
with JsValueGenerators {

  @tailrec private def verifyAllRedacted(all: Seq[(JsPath, JsValue)]): Unit = {
    val invalid = all collect {
      case (path, value) if value != JsonTransform.RedactedValue => path
    }
    assert(invalid.isEmpty, s"The following paths are invalid: ${invalid.mkString(", ")}")
    val nextGen = all flatMap {
      case (path, JsArray(items)) => items.zipWithIndex map {
        case (item, i) => (JsPath(path.path :+ IdxPathNode(i)), item)
      }
      case (path, JsObject(fields)) => fields map {
        case (k, v) => (path \ k, v)
      }
      case _ => Nil
    }
    if (nextGen.nonEmpty) {
      verifyAllRedacted(nextGen)
    }
  }

  "redactPaths" should "redact selected fields by path at the top level" in {
    forAll { obj: JsObject =>
      val topLevelPaths: Seq[JsPath] = obj.fields.map(__ \ _._1).toSeq
      whenever(topLevelPaths.nonEmpty) {
        val redactedPaths: Seq[JsPath] = Random.shuffle(topLevelPaths) take Random.nextInt(topLevelPaths.size)
        implicit val redactor: JsonTransform[Any] = JsonTransform.redactPaths[Any](redactedPaths)
        val redacted = obj.transformAs[Any]
        // Useful for debugging
//        if (redactedPaths.nonEmpty) {
//          println(Json.prettyPrint(obj))
//          println(s"with redacted paths (${redactedPaths.mkString(", ")}):")
//          println(Json.prettyPrint(redacted))
//        }
        for (path <- redactedPaths) {
          assertResult(JsonTransform.RedactedValue) {
            path.asSingleJson(redacted).get
          }
        }
      }
    }
  }

  "redactAll" should "redact all fields of all paths" in {
    implicit val redactor: JsonTransform[Any] = JsonTransform.redactAll[Any]()
    forAll { obj: JsObject =>
      val redacted = obj.transformAs[Any]
      verifyAllRedacted(Seq(__ -> redacted))
    }
  }
}

Source File: DataGen.scala From chinese-restaurant-process with BSD 3-Clause "New" or "Revised" License

5 votes

package com.monsanto.stats.tables.clustering

import scala.util.Random

object DataGen {

  private def cannedDataFrom(csvFile: String): scala.collection.immutable.Vector[TopicVectorInput] = {
    scala.io.Source.fromFile(csvFile).getLines.filter(!_.isEmpty).map { line =>
      val tokens: List[String] = line.split(", ").toList
      val id = tokens.head.toLong
      val vecMap: Map[Int, Int] = Map.empty ++ tokens.tail.grouped(2).map { slice =>
        (slice(0).toInt, slice(1).toInt)
      }
      TopicVectorInput(id, vecMap)
    }.toVector
  }
  
  def cannedBigData: scala.collection.immutable.Vector[TopicVectorInput] = cannedDataFrom("canned-data/big-data.csv")

  // Parens because not functional, has the side affect of generating random numbers
  def getData(): scala.collection.immutable.Vector[TopicVectorInput] = getDataWithRnd(new Random)

  def getDataWithRnd(rnd: Random): scala.collection.immutable.Vector[TopicVectorInput] = {
    import breeze.linalg._
    import breeze.stats.distributions._

    val numberOfBags = 100000
    val vocabularySize = 10000
    val tablesCount = 10

    val minTableSize = 100
    def halves(itemsRemaining: Int, acc: List[Int]): List[Int] = {
      val newBinSize = itemsRemaining / 2
      if(newBinSize <= minTableSize){
        itemsRemaining :: acc
      }
      else {
        halves(itemsRemaining - newBinSize, newBinSize :: acc)
      }
    }
    val tablesSizes = halves(numberOfBags, Nil)

    val sm = 0.0001

    val countOfInterestsDist = new Exponential(1/10.0)
    val topicInterestLevelDist = new Exponential(1/100.0)
    def gimmieInterests(): DenseVector[Double] = { // Returns interests for one table, length 10,000
    val countOfInterests = (countOfInterestsDist.draw() + 1).toInt
      val interestProbs = Array.fill(vocabularySize)(sm) // size 10,000 array, filled initially with .0001
      (0 to countOfInterests).foreach{ _ => // countOfInterests is exponentially distributed
        interestProbs(rnd.nextInt(vocabularySize)) = topicInterestLevelDist.draw() + 10
      }
      val normalizingConstant = interestProbs.sum.toDouble
      DenseVector(interestProbs.map( _ / normalizingConstant)) // now they sum to 1
    }

    val tableTopicsDistributions = Array.fill(tablesCount)(Multinomial(gimmieInterests())) // same as xmasM, reguM...

    val instancePerPersonDist = Gaussian(400, 100)
    def gimmieAPerson(tableIndex: Long, m: Multinomial[DenseVector[Double],Int]): TopicVectorInput = { // like a bag
      val instanceCount = Math.abs(instancePerPersonDist.draw()).toInt
      val instanceTopicIndexes: Map[Int, Int] = Array.fill(instanceCount)(m.draw()).groupBy(i => i).mapValues(_.length)
      TopicVectorInput(tableIndex, instanceTopicIndexes)
    }

    var i = 0L
    val topicVectors: scala.collection.immutable.Vector[TopicVectorInput] = tablesSizes.zipWithIndex.flatMap { case (tableSize, tableIdx) =>
      Array.fill(tableSize){
        gimmieAPerson(tableIdx, tableTopicsDistributions(tableIdx))
      }
    }.map { tv =>
      i += 1
      tv.copy(id = i)
    }.toVector
    println("###topicVectors.map(_.id).distinct.length: " + topicVectors.map(_.id).distinct.length)
    assert(topicVectors.map(_.id).distinct.length == 100000)
    topicVectors
  }
}

Source File: BoardStateTests.scala From checkers with Apache License 2.0

5 votes

package checkers.core


import checkers.consts._
import checkers.test.TestSuiteBase
import utest._
import utest.framework._

import scala.util.Random

object BoardStateTests extends TestSuiteBase {

  private val allPieces = List(LIGHTMAN, DARKMAN, LIGHTKING, DARKKING)
  private val allSquares = Board.playableSquares.toSet

  private def shuffledSquares() = Random.shuffle(Board.playableSquares.toList)

  private def randomSquares(count: Int) = shuffledSquares().take(count)


  override def tests: Tree[Test] = TestSuite {
    'BoardState {
      'PlacePieces {
        val squares = randomSquares(4)
        val placements = squares.zip(allPieces)
        val bs = placements.foldLeft(BoardState.empty){
          case (result, (square, piece)) => result.updated(square, piece)
        }

        // pieces in correct place
        placements.foreach {
          case (square, piece) =>
            val occupant = bs.getOccupant(square)
            assert(occupant == piece)
        }

        // all other squares are empty
        (allSquares -- squares.toSet).foreach { square =>
          assert(bs.isSquareEmpty(square))
        }
      }
    }
  }
}

Source File: Gen.scala From arrows with Apache License 2.0

5 votes

package benchmarks

import scala.annotation.tailrec
import scala.util.Random
import java.util.concurrent.ExecutorService

trait Gen[T] {

  def apply(dist: List[(Gen.Op, Int)])(implicit s: ExecutorService): T = {
    val depth = 100
    val rnd = new Random(1)
    import rnd._

    val values = dist.collect { case (g: Gen.Value, i) => (g, i) }
    val transforms = dist.collect { case (g: Gen.Transform, i) => (g, i) }

    require(values.nonEmpty)

    def choose[O <: Gen.Op](l: List[(O, Int)]): O = {
      @tailrec
      def find(n: Int, prev: Int, l: List[(O, Int)]): O = {
        l match {
          case Nil => ???
          case (o, i) :: tail =>
            if (prev + i > n) o
            else find(n, prev + i, tail)
        }
      }

      val max = l.map(_._2).sum
      find(nextInt(max), 0, l)
    }

    val ex = new Exception

    def genValue: T =
      choose(values) match {
        case Gen.Async =>
          async(s.submit(_))
        case Gen.Sync =>
          sync
        case Gen.Failure =>
          failure(ex)
      }

    def genTransform(depth: Int, t: T): T =
      depth match {
        case 0 => t
        case _ =>
          choose(transforms) match {
            case Gen.Map =>
              val i = nextInt
              genTransform(depth - 1, map(t, _ + i))
            case Gen.FlatMap =>
              val d = nextInt(depth)
              val n = genTransform(depth - d, genValue)
              genTransform(d, flatMap(t, n))
            case Gen.Handle =>
              val i = nextInt
              genTransform(depth - 1, handle(t, i))
          }
      }

    genTransform(depth, genValue)
  }

  def sync: T
  def async(schedule: Runnable => Unit): T
  def failure(ex: Throwable): T
  def map(t: T, f: Int => Int): T
  def flatMap(t: T, f: T): T
  def handle(t: T, i: Int): T
}

object Gen {
  sealed trait Op

  sealed trait Value extends Op
  case object Async extends Value
  case object Sync extends Value
  case object Failure extends Value

  sealed trait Transform extends Op
  case object Map extends Transform
  case object FlatMap extends Transform
  case object Handle extends Transform
}

Source File: ProduceDemo.scala From udash-core with Apache License 2.0

5 votes

package io.udash.web.guide.views.frontend.demos

import com.github.ghik.silencer.silent
import io.udash.web.guide.demos.AutoDemo
import io.udash.web.guide.styles.partials.GuideStyles
import scalatags.JsDom.all._

object ProduceDemo extends AutoDemo {

  private val (rendered, source) = {
    import io.udash._
    import io.udash.css.CssView._
    import org.scalajs.dom.window
    import scalatags.JsDom.all._

    import scala.util.Random

    @silent("deprecated")
    val names = Stream.continually(Stream("John", "Amy", "Bryan", "Diana")).flatten.iterator
    val name = Property(names.next())
    val integers = SeqProperty(1, 2, 3, 4)

    window.setInterval(() => {
      name.set(names.next())

      val size = integers.get.size
      val idx = Random.nextInt(size)
      val amount = Random.nextInt(size - idx) + 1
      val count = Random.nextInt(5)
      integers.replace(idx, amount, Stream.range(idx, idx + amount * count + 1, amount): _*): @silent("deprecated")
    }, 2000)

    p(
      "Name: ",
      produce(name)(value => b(id := "produce-demo-name")(value).render), br,
      "Integers: ",
      span(id := "produce-demo-integers")(
        produce(integers)(seq => span(GuideStyles.highlightRed)(seq.mkString(",")).render)
      )
    ).render
  }.withSourceCode

  override protected def demoWithSource(): (Modifier, Iterator[String]) = {
    import io.udash.css.CssView._
    (
      div(
        id := "produce-demo",
        GuideStyles.frame
      )(rendered),
      source.linesIterator
    )
  }
}

Source File: RepeatDemo.scala From udash-core with Apache License 2.0

5 votes

package io.udash.web.guide.views.frontend.demos

import com.github.ghik.silencer.silent
import io.udash.web.guide.demos.AutoDemo
import io.udash.web.guide.styles.partials.GuideStyles
import scalatags.JsDom.all._

object RepeatDemo extends AutoDemo {

  private val (rendered, source) = {
    import io.udash._
    import io.udash.css.CssView._
    import org.scalajs.dom.window
    import scalatags.JsDom.all._

    import scala.util.Random

    val integers = SeqProperty(1, 2, 3, 4)

    window.setInterval(() => {
      val size = integers.get.size
      val idx = Random.nextInt(size)
      val amount = Random.nextInt(size - idx) + 1
      val count = Random.nextInt(5)
      integers.replace(idx, amount, Stream.range(idx, idx + amount * count + 1, amount): _*): @silent("deprecated")
    }, 2000)

    p(
      "Integers: ",
      span(id := "repeat-demo-integers")(repeat(integers)(p =>
        span(GuideStyles.highlightRed)(s"${p.get}, ").render
      )), br,
      "Integers (produce): ",
      produce(integers)(seq => span(id := "repeat-demo-integers-produce")(
        seq.map(p => span(GuideStyles.highlightRed)(s"$p, "))
      ).render)
    )
  }.withSourceCode

  override protected def demoWithSource(): (Modifier, Iterator[String]) = {
    import io.udash.css.CssView._
    (div(id := "repeat-demo", GuideStyles.frame)(rendered), source.linesIterator)
  }
}

Source File: TableDemo.scala From udash-core with Apache License 2.0

5 votes

package io.udash.web.guide.views.ext.demo.bootstrap

import io.udash.css.CssView
import io.udash.web.guide.demos.AutoDemo
import io.udash.web.guide.styles.partials.GuideStyles
import scalatags.JsDom.all._

object TableDemo extends AutoDemo with CssView {

  private val (rendered, source) = {
    import io.udash._
    import io.udash.bootstrap._
    import BootstrapStyles.ResponsiveBreakpoint
    import io.udash.bootstrap.button.{UdashButton, UdashButtonGroup}
    import io.udash.bootstrap.table.UdashTable
    import scalatags.JsDom.all._

    import scala.util.Random

    val responsive = Property[Option[ResponsiveBreakpoint]](Some(ResponsiveBreakpoint.All))
    val dark = Property(false)
    val striped = Property(true)
    val bordered = Property(true)
    val hover = Property(true)
    val small = Property(false)

    val darkButton = UdashButton.toggle(active = dark)("Dark theme")
    val stripedButton = UdashButton.toggle(active = striped)("Striped")
    val borderedButton = UdashButton.toggle(active = bordered)("Bordered")
    val hoverButton = UdashButton.toggle(active = hover)("Hover")
    val smallButton = UdashButton.toggle(active = small)("Small")

    val items = SeqProperty(Seq.fill(7, 3)(Random.nextDouble()))
    val table = UdashTable(items, responsive, dark, striped, bordered, hover, small)(
      headerFactory = Some(_ => tr(Seq("x", "y", "z").map(header => th(b(header)))).render),
      rowFactory = (el, nested) => tr(
        nested(produce(el)(_.map(td(_).render)))
      ).render
    )

    div(
      UdashButtonGroup(justified = true.toProperty)(
        darkButton.render,
        stripedButton.render,
        borderedButton.render,
        hoverButton.render,
        smallButton.render
      ),
      table
    ).render
  }.withSourceCode

  override protected def demoWithSource(): (Modifier, Iterator[String]) = {
    (rendered.setup(_.applyTags(GuideStyles.frame)), source.linesIterator)
  }
}

Source File: AlertsDemo.scala From udash-core with Apache License 2.0

5 votes

package io.udash.web.guide.views.ext.demo.bootstrap

import io.udash.web.guide.demos.AutoDemo
import io.udash.web.guide.styles.partials.GuideStyles
import scalatags.JsDom.all._

object AlertsDemo extends AutoDemo {

  private val (rendered, source) = {
    import io.udash._
    import io.udash.bootstrap._
    import BootstrapStyles._
    import io.udash.bootstrap.alert._
    import io.udash.bootstrap.button.UdashButton
    import io.udash.css.CssView._
    import org.scalajs.dom.Element
    import scalatags.JsDom.all._

    import scala.util.Random

    val dismissed = SeqProperty.blank[String]

    def contentCentered: Seq[Modifier] = {
      Seq(Display.flex(), Flex.justifyContent(FlexContentJustification.Center))
    }

    def randomDismissible: Element = {
      val title = Random.nextLong().toString
      val alert = DismissibleUdashAlert(
        alertStyle = Color.values(
          Random.nextInt(Color.values.size)
        ).toProperty
      )(div(title, contentCentered))
      alert.dismissed.listen(_ => dismissed.append(title))
      alert.render
    }

    val alerts = div()(
      UdashAlert(Color.Info.toProperty)(div("info", contentCentered)),
      UdashAlert(Color.Success.toProperty)(div("success", contentCentered)),
      UdashAlert(Color.Warning.toProperty)(div("warning", contentCentered)),
      UdashAlert(Color.Danger.toProperty)(div("danger", contentCentered))
    ).render

    val create = UdashButton()("Create dismissible alert")
    create.listen { case _ =>
      alerts.appendChild(randomDismissible)
    }

    div(
      alerts,
      create,
      div(Spacing.margin(
        side = Side.Top,
        size = SpacingSize.Normal
      ))(
        h4("Dismissed: "),
        div(Card.card, Card.body, Background.color(Color.Light))(
          produce(dismissed)(seq => ul(seq.map(li(_))).render)
        )
      )
    ).render
  }.withSourceCode

  override protected def demoWithSource(): (Modifier, Iterator[String]) = {
    import io.udash.css.CssView._
    (rendered.setup(_.applyTags(GuideStyles.frame)), source.linesIterator)
  }
}

Source File: ButtonsDemo.scala From udash-core with Apache License 2.0

5 votes

package io.udash.web.guide.views.ext.demo.bootstrap


import io.udash.web.guide.demos.AutoDemo
import io.udash.web.guide.styles.partials.GuideStyles
import scalatags.JsDom.all._

object ButtonsDemo extends AutoDemo {

  private val (rendered, source) = {
    import io.udash._
    import io.udash.bootstrap._
    import BootstrapStyles._
    import io.udash.bootstrap.button.UdashButton
    import io.udash.css.CssStyle
    import io.udash.css.CssView._
    import scalatags.JsDom.all._

    import scala.util.Random

    val smallBtn = Some(Size.Small).toProperty[Option[Size]]
    val disabledButtons = Property(Set.empty[Int])

    def bottomMargin: CssStyle = {
      Spacing.margin(
        side = Side.Bottom,
        size = SpacingSize.Normal
      )
    }

    val buttons = Color.values.map(color =>
      UdashButton(
        color.toProperty,
        smallBtn,
        disabled = disabledButtons.transform(_.contains(color.ordinal))
      )(_ => Seq[Modifier](
        color.name,
        Spacing.margin(size = SpacingSize.ExtraSmall)
      ))
    )

    val clicks = SeqProperty[String](Seq.empty)
    buttons.foreach(_.listen {
      case UdashButton.ButtonClickEvent(source, _) =>
        clicks.append(source.render.textContent)
    })

    val push = UdashButton(
      size = Some(Size.Large).toProperty,
      block = true.toProperty
    )("Disable random buttons!")
    push.listen {
      case UdashButton.ButtonClickEvent(_, _) =>
        clicks.set(Seq.empty)

        val maxDisabledCount = Random.nextInt(buttons.size + 1)
        disabledButtons.set(Seq.fill(maxDisabledCount)(
          Random.nextInt(buttons.size)
        ).toSet)
    }

    div(
      div(bottomMargin)(push),
      div(
        Display.flex(),
        Flex.justifyContent(FlexContentJustification.Center),
        bottomMargin
      )(buttons),
      h4("Clicks: "),
      produce(clicks)(seq =>
        ul(Card.card, Card.body, Background.color(Color.Light))(seq.map(li(_))).render
      )
    ).render
  }.withSourceCode

  override protected def demoWithSource(): (Modifier, Iterator[String]) = {
    import io.udash.css.CssView._
    (rendered.setup(_.applyTags(GuideStyles.frame)), source.linesIterator)
  }
}

Source File: CarouselDemo.scala From udash-core with Apache License 2.0

5 votes

package io.udash.web.guide.views.ext.demo.bootstrap

import io.udash.css.CssView
import io.udash.web.guide.demos.AutoDemo
import io.udash.web.guide.styles.partials.GuideStyles
import scalatags.JsDom.all._

object CarouselDemo extends AutoDemo with CssView {

  private val (rendered, source) = {
    import io.udash._
    import io.udash.bootstrap._
    import io.udash.bootstrap.button.{UdashButton, UdashButtonGroup, UdashButtonToolbar}
    import io.udash.bootstrap.carousel.UdashCarousel.AnimationOptions
    import io.udash.bootstrap.carousel.{UdashCarousel, UdashCarouselSlide}
    import scalatags.JsDom.all._

    import scala.concurrent.duration._
    import scala.util.Random

    def randomString(): String = {
      Random.nextLong().toString
    }

    def newSlide(): UdashCarouselSlide = {
      UdashCarouselSlide(
        Url("/assets/images/ext/bootstrap/carousel.jpg")
      )(
        h3(randomString()),
        p(randomString())
      )
    }

    val slides = SeqProperty((1 to 5).map(_ => newSlide()))
    val active = Property(false)
    val animationOptions = active.transform(a => AnimationOptions(
      interval = 2.seconds,
      keyboard = false,
      active = a
    ))

    val carousel = UdashCarousel(
      slides = slides,
      activeSlide = Property(1),
      animationOptions = animationOptions
    ) { case (slide, nested) => nested(produce(slide)(_.render)) }

    val prevButton = UdashButton()("Prev")
    val nextButton = UdashButton()("Next")
    val prependButton = UdashButton()("Prepend")
    val appendButton = UdashButton()("Append")

    prevButton.listen { case _ => carousel.previousSlide() }
    nextButton.listen { case _ => carousel.nextSlide() }
    prependButton.listen { case _ => slides.prepend(newSlide()) }
    appendButton.listen { case _ => slides.append(newSlide()) }

    div(
      div(
        UdashButtonToolbar()(
          UdashButton.toggle(active = active)(
            "Run animation"
          ).render,
          UdashButtonGroup()(
            prevButton.render,
            nextButton.render
          ).render,
          UdashButtonGroup()(
            prependButton.render,
            appendButton.render
          ).render
        )
      ),
      div(carousel.render)
    ).render
  }.withSourceCode

  override protected def demoWithSource(): (Modifier, Iterator[String]) = {
    (rendered.setup(_.applyTags(GuideStyles.frame)), source.linesIterator)
  }
}

Source File: SerializationIntegrationTestBase.scala From udash-core with Apache License 2.0

5 votes

package io.udash.rpc

import com.avsystem.commons.serialization.{GenCodec, Input, Output}
import io.udash.testing.UdashSharedTest
import org.scalactic.source.Position

import scala.util.Random

class SerializationIntegrationTestBase extends UdashSharedTest with Utils {
  val repeats = 1000

  def tests(implicit pos: Position): Unit = {
    "serialize and deserialize all types" in {
      for (i <- 1 to repeats) {
        def cc() = TestCC(Random.nextInt(), Random.nextLong(), 123, Random.nextBoolean(), Random.nextString(200), List.fill(Random.nextInt(200))('a'))
        def ncc() = NestedTestCC(Random.nextInt(), cc(), cc())
        def dncc(counter: Int = 0): DeepNestedTestCC =
          if (counter < 200) DeepNestedTestCC(ncc(), dncc(counter + 1))
          else DeepNestedTestCC(ncc(), null)

        val test: DeepNestedTestCC = dncc()
        val serialized = write(test)
        val deserialized = read[DeepNestedTestCC](serialized)

        deserialized should be(test)
      }
    }

    "serialize and deserialize types with custom gencodec" in {
      implicit def optionGencodec[T: GenCodec]: GenCodec[Option[T]] =
        new GenCodec[Option[T]] {
          override def write(output: Output, value: Option[T]): Unit =
            value match {
              case Some(v) => implicitly[GenCodec[T]].write(output, v)
              case None => output.writeNull()
            }

          override def read(input: Input): Option[T] =
            if (input.readNull()) None
            else Some(implicitly[GenCodec[T]].read(input))
        }

      val testOpts = Seq(
        None,
        Some(10L),
        Some(Long.MaxValue)
      )

      testOpts.foreach(opt => {
        val serialized = write(opt)
        val deserialized = read[Option[Long]](serialized)
        deserialized should be(opt)
      })
    }
  }
}

Source File: Utils.scala From udash-core with Apache License 2.0

5 votes

package io.udash.rpc

import com.avsystem.commons.serialization._
import com.avsystem.commons.serialization.json.{JsonStringInput, JsonStringOutput}

import scala.util.Random

trait Utils {
  def completeItem() = CompleteItem(
    unit = (),
    string = Random.nextString(Random.nextInt(20)),
    specialString = "\n\f\b\t\r\n\\\"\\\\",
    char = Random.nextString(1).head,
    boolean = Random.nextBoolean(),
    byte = Random.nextInt().toByte,
    short = Random.nextInt().toShort,
    int = Random.nextInt(),
    long = Random.nextLong(),
    float = Random.nextFloat(),
    double = Random.nextDouble(),
    binary = Array.fill(Random.nextInt(20))(Random.nextInt().toByte),
    list = List.fill(Random.nextInt(20))(Random.nextString(Random.nextInt(20))),
    set = List.fill(Random.nextInt(20))(Random.nextString(Random.nextInt(20))).toSet,
    obj = TestCC(Random.nextInt(), Random.nextLong(), Random.nextInt(), Random.nextBoolean(), Random.nextString(Random.nextInt(20)), Nil),
    map = Map(Seq.fill(Random.nextInt(20))(Random.nextString(20) -> Random.nextInt()): _*)
  )

  implicit val codec: GenCodec[TestCC] = GenCodec.materialize[TestCC]
  implicit val codecN: GenCodec[NestedTestCC] = GenCodec.materialize[NestedTestCC]
  implicit val codecDN: GenCodec[DeepNestedTestCC] = new GenCodec[DeepNestedTestCC] {
    override def read(input: Input): DeepNestedTestCC = {
      def _read(acc: List[NestedTestCC])(next: Input): DeepNestedTestCC =
        if (next.readNull()) {
          acc.foldLeft(null: DeepNestedTestCC)((acc: DeepNestedTestCC, n: NestedTestCC) => DeepNestedTestCC(n, acc))
        } else {
          val obj = next.readObject()
          val n: NestedTestCC = obj.nextField() match {
            case in if in.fieldName == "n" =>
              codecN.read(in)
          }
          obj.nextField() match {
            case in if in.fieldName == "nest" =>
              _read(n :: acc)(in)
          }
        }

      _read(Nil)(input)
    }

    override def write(output: Output, value: DeepNestedTestCC): Unit = {
      val obj = output.writeObject()
      codecN.write(obj.writeField("n"), value.n)
      val f = obj.writeField("nest")
      if (value.l != null) this.write(f, value.l)
      else f.writeNull()
      obj.finish()
    }
  }
  implicit val codecCI: GenCodec[CompleteItem] = GenCodec.materialize[CompleteItem]

  def write[T: GenCodec](value: T): JsonStr =
    JsonStr(JsonStringOutput.write(value))

  def read[T: GenCodec](jsonStr: JsonStr): T =
    JsonStringInput.read[T](jsonStr.json)
}

Source File: TooltipTestUtils.scala From udash-core with Apache License 2.0

5 votes

package io.udash.bootstrap.tooltip

import io.udash._
import io.udash.testing.AsyncUdashCoreFrontendTest

import scala.concurrent.Future
import scala.util.Random

class TooltipTestUtils extends AsyncUdashCoreFrontendTest {
  def tooltipTest(companion: TooltipUtils[_ <: Tooltip], expectContent: Boolean): Unit = {
    "display translated content" in {
      import io.udash.i18n._
      import io.udash.wrappers.jquery._
      import scalatags.JsDom.all._

      val body = jQ("body")
      val item = button("btn").render
      body.append(item)

      val randMarker = Random.nextInt()
      implicit val lang = Property(Lang("test"))
      implicit val tp = new LocalTranslationProvider(
        Map(
          Lang("test") -> Bundle(BundleHash("h"), Map("a" -> s"$randMarker:AAA", "b" -> s"$randMarker:BBB")),
          Lang("test2") -> Bundle(BundleHash("h"), Map("a" -> s"$randMarker:ccc", "b" -> s"$randMarker:ddd"))
        )
      )

      val tooltip = companion.apply(
        title = span(translatedDynamic(TranslationKey.key("a"))(_.apply())).render,
        content = span(translatedDynamic(TranslationKey.key("b"))(_.apply())).render
      )(item)

      def expectedText(): String =
        if (expectContent) s"$randMarker:AAA$randMarker:BBB"
        else s"$randMarker:AAA"

      def secondExpectedText(): String =
        if (expectContent) s"$randMarker:ccc$randMarker:ddd"
        else s"$randMarker:ccc"

      body.text() shouldNot include(expectedText())
      body.text() shouldNot include(secondExpectedText())

      for {
        _ <- Future(tooltip.show())
        _ <- retrying {
          body.text() should include(expectedText())
          body.text() shouldNot include(secondExpectedText())
        }

        _ <- Future(tooltip.hide())
        _ <- retrying {
          body.text() shouldNot include(expectedText())
          body.text() shouldNot include(secondExpectedText())
        }

        _ <- Future(lang.set(Lang("test2")))

        _ <- Future(tooltip.show())
        _ <- retrying {
          body.text() shouldNot include(expectedText())
          body.text() should include(secondExpectedText())
        }

        _ <- Future(tooltip.hide())
        r <- retrying {
          body.text() shouldNot include(expectedText())
          body.text() shouldNot include(secondExpectedText())
        }
      } yield r
    }
  }
}

Source File: BenchmarkUtils.scala From udash-core with Apache License 2.0

5 votes

package io.udash.benchmarks.properties

import com.avsystem.commons._
import io.udash._
import japgolly.scalajs.benchmark.Benchmark

import scala.util.Random

trait BenchmarkUtils {
  case class ModelItem(i: Int, s: String, sub: Option[ModelItem])
  object ModelItem extends HasModelPropertyCreator[ModelItem] {
    def random: ModelItem = ModelItem(
      Random.nextInt(100), Random.nextString(5), Some(ModelItem(Random.nextInt(100), Random.nextString(5), None))
    )
  }

  sealed trait ModelWithSeqItem {
    def i: Int
    def s: String
    def seq: BSeq[Int]
  }

  case class ModelWithBSeqItem(i: Int, s: String, seq: BSeq[Int]) extends ModelWithSeqItem
  object ModelWithBSeqItem extends HasModelPropertyCreator[ModelWithBSeqItem] {
    def random: ModelWithBSeqItem = ModelWithBSeqItem(
      Random.nextInt(100), Random.nextString(5), 1 to Random.nextInt(100) + 100
    )
  }

  case class ModelWithISeqItem(i: Int, s: String, seq: ISeq[Int]) extends ModelWithSeqItem
  object ModelWithISeqItem extends HasModelPropertyCreator[ModelWithISeqItem] {
    def random: ModelWithISeqItem = ModelWithISeqItem(
      Random.nextInt(100), Random.nextString(5), 1 to Random.nextInt(100) + 100
    )
  }

  def slowInc(v: Int): Int = {
    var r = v
    (1 to 10000).foreach(_ => r += 1)
    r
  }

  def slowDec(v: Int): Int = {
    var r = v
    (1 to 10000).foreach(_ => r -= 1)
    r
  }

  def addEmptyListeners[T](p: T)(count: Int, listenOp: T => Unit): Unit = {
    (1 to count).foreach(_ => listenOp(p))
  }

  def setAndGetValues[T1, T2](p: T1, t: T2)(count: Int, getToSetRatio: Double, setOp: (T1, Int) => Unit, getOp: T2 => Any): Unit = {
    var counter: Double = 0
    (1 to count).foreach { i =>
      setOp(p, i)

      counter += getToSetRatio
      while (counter >= 1) {
        getOp(t)
        counter -= 1
      }
    }
  }

  def replaceElements(p: SeqProperty[Int], i: Int): Unit = {
    val start = Random.nextInt(p.size / 2)
    val count = Random.nextInt(p.size / 3)
    p.replace(start, count, Seq.tabulate(count)(_ + i): _*)
  }

  def generateGetSetListenBenchmarks[T1, T2](properties: Seq[(String, () => (T1, T2))])(
    setsCounts: Seq[Int], getToSetRatios: Seq[Double], listenersCounts: Seq[Int],
    setAndGetOps: Seq[(String, (T1, Int) => Unit, T2 => Any)], listenOps: Seq[(String, T2 => Unit)]
  ): Seq[Benchmark[Unit]] = {
    var id = 0
    for {
      propertyCreator <- properties
      setAndGetOp <- setAndGetOps
      listenOp <- listenOps
      listenersCount <- listenersCounts
      setsCount <- setsCounts
      getToSetRatio <- getToSetRatios
    } yield {
      val (propertiesDesc, props) = propertyCreator
      val (setAndGetDesc, setter, getter) = setAndGetOp
      val (listenerDesc, listener) = listenOp
      id += 1
      Benchmark(s"${"%03d".format(id)}. set and get ($setsCount and ${setsCount * getToSetRatio} times - $setAndGetDesc) on $propertiesDesc with $listenersCount listeners ($listenerDesc)") {
        val (p, t) = props()
        addEmptyListeners(t)(listenersCount, listener)
        setAndGetValues(p, t)(setsCount, getToSetRatio, setter, getter)
      }
    }
  }
}

Source File: GroupedButtonsBinding.scala From udash-core with Apache License 2.0

5 votes

package io.udash.bindings.inputs

import io.udash._
import org.scalajs.dom.html.{Div, Input => JSInput}
import org.scalajs.dom.{Event, Node}
import scalatags.JsDom.all._

import scala.util.Random

private[inputs] class GroupedButtonsBinding[T](
  options: ReadableSeqProperty[T], decorator: Seq[(JSInput, T)] => Seq[Node], inputModifiers: Modifier*
)(
  inputTpe: String,
  checkedIf: T => ReadableProperty[Boolean],
  refreshSelection: Seq[T] => Unit,
  onChange: (JSInput, T) => Event => Unit
) extends InputBinding[Div] {
  private val groupIdPrefix: Long = Random.nextLong

  private val buttons = div(
    produce(options) { opts =>
      kill()
      refreshSelection(opts)

      decorator(
        opts.zipWithIndex.map { case (opt, idx) =>
          val in = input(
            id := s"$groupIdPrefix-$idx", // default id, can be replaced by `inputModifiers`
            inputModifiers, tpe := inputTpe, value := idx.toString
          ).render

          val selected = checkedIf(opt)
          propertyListeners += selected.listen(in.checked = _, initUpdate = true)
          in.onchange = onChange(in, opt)

          (in, opt)
        }
      )
    }
  ).render

  override def render: Div = buttons
}

Source File: RetryPolicies.scala From cats-retry with Apache License 2.0

5 votes

package retry

import java.util.concurrent.TimeUnit

import cats.Applicative
import cats.syntax.functor._
import cats.syntax.show._
import cats.instances.finiteDuration._
import cats.instances.int._
import retry.PolicyDecision._

import scala.concurrent.duration.{Duration, FiniteDuration}
import scala.util.Random

object RetryPolicies {
  private val LongMax: BigInt = BigInt(Long.MaxValue)

  
  def limitRetriesByCumulativeDelay[M[_]: Applicative](
      threshold: FiniteDuration,
      policy: RetryPolicy[M]
  ): RetryPolicy[M] = {
    def decideNextRetry(status: RetryStatus): M[PolicyDecision] =
      policy.decideNextRetry(status).map {
        case r @ DelayAndRetry(delay) =>
          if (status.cumulativeDelay + delay >= threshold) GiveUp else r
        case GiveUp => GiveUp
      }

    RetryPolicy.withShow[M](
      decideNextRetry,
      show"limitRetriesByCumulativeDelay(threshold=$threshold, $policy)"
    )
  }
}

Source File: JavaIdentifierSpec.scala From tscfg with Apache License 2.0

5 votes

package tscfg

import org.specs2.mutable.Specification
import org.specs2.specification.core.Fragments
import tscfg.generators.java.javaUtil.{javaKeywords, javaIdentifier}
import scala.util.Random

object javaIdentifierSpec extends Specification {

  """javaIdentifier""" should {

    List("foo", "bar_3", "$baz").foldLeft(Fragments.empty) { (res, id) =>
      res.append(s"""keep valid identifier "$id"""" in {
        javaIdentifier(id) must_== id
      })
    }

    Random.shuffle(javaKeywords).take(3).foldLeft(Fragments.empty) { (res, kw) =>
      res.append(s"""convert java keyword "$kw" to "${kw}_"""" in {
        javaIdentifier(kw) must_== kw + "_"
      })
    }

    List("foo-bar", "foo:bar", "foo#bar").foldLeft(Fragments.empty) { (res, id) =>
      res.append(s"""replace non java id character with '_': "$id" -> "foo_bar"""" in {
        javaIdentifier(id) must_== "foo_bar"
      })
    }

    s"""prefix with '_' if first character is valid but not at first position: "21" -> "_21"""" in {
      javaIdentifier("21") must_== "_21"
    }
  }
}

Source File: scalaIdentifierSpec.scala From tscfg with Apache License 2.0

5 votes

package tscfg

import org.specs2.mutable.Specification
import org.specs2.specification.core.Fragments
import tscfg.generators.scala.ScalaUtil
import tscfg.generators.scala.ScalaUtil.scalaReservedWords

import scala.util.Random

object scalaIdentifierSpec extends Specification {

  """scalaIdentifier""" should {
    val scalaUtil: ScalaUtil = new ScalaUtil()
    import scalaUtil.scalaIdentifier


    List("foo", "bar_3", "$baz").foldLeft(Fragments.empty) { (res, id) =>
      res.append(s"""keep valid identifier "$id"""" in {
        scalaIdentifier(id) must_== id
      })
    }

    Random.shuffle(scalaReservedWords).take(3).foldLeft(Fragments.empty) { (res, w) =>
      val e = "`" +w + "`"
      res.append(s"""convert scala reserved word "$w" to "$e"""" in {
        scalaIdentifier(w) must_== e
      })
    }

    List("foo-bar", "foo:bar", "foo#bar").foldLeft(Fragments.empty) { (res, id) =>
      res.append(s"""replace non scala id character with '_': "$id" -> "foo_bar"""" in {
        scalaIdentifier(id) must_== "foo_bar"
      })
    }

    s"""prefix with '_' if first character is valid but not at first position: "21" -> "_21"""" in {
      scalaIdentifier("21") must_== "_21"
    }
  }

  """scalaIdentifier with useBackticks=true""" should {
    val scalaUtil: ScalaUtil = new ScalaUtil(useBackticks = true)
    import scalaUtil.scalaIdentifier

    List("foo-bar", "foo:bar", "foo#bar").foldLeft(Fragments.empty) { (res, id) =>
      res.append(s"""put non scala id with backticks: "$id" -> "`$id`"""" in {
        scalaIdentifier(id) must_== s"`$id`"
      })
    }

    List("0", "1", "3").foldLeft(Fragments.empty) { (res, id) =>
      res.append(s"""put literal number with backticks: "$id" -> "`$id`"""" in {
        scalaIdentifier(id) must_== s"`$id`"
      })
    }
  }
}

Source File: PutRecordAction.scala From aws-kinesis-scala with Apache License 2.0

5 votes

package jp.co.bizreach.kinesisfirehose.action

import com.amazonaws.retry.PredefinedRetryPolicies.DEFAULT_MAX_ERROR_RETRY
import com.amazonaws.services.kinesisfirehose.model.ServiceUnavailableException

import jp.co.bizreach.kinesisfirehose._
import org.slf4j.LoggerFactory

import scala.annotation.tailrec
import scala.collection.mutable.ArrayBuffer
import scala.math._
import scala.util.Random

trait PutRecordAction {

  private val logger = LoggerFactory.getLogger(getClass)

  def withPutBatchRetry(records: Seq[Array[Byte]], retryLimit: Int = DEFAULT_MAX_ERROR_RETRY)
                       (f: Seq[Array[Byte]] => PutRecordBatchResult): Seq[Either[PutRecordBatchResponseEntry, PutRecordBatchResponseEntry]] = {

    val buffer = ArrayBuffer[Either[PutRecordBatchResponseEntry, PutRecordBatchResponseEntry]](Nil.padTo(records.size, null): _*)

    @tailrec
    def put0(records: Seq[(Array[Byte], Int)], retry: Int = 0): Unit = {
      val result = f(records.map(_._1))

      val failed = records zip result.records flatMap {
        case ((_, i), entry) if Option(entry.errorCode).isEmpty =>
          buffer(i) = Right(entry)
          None
        case ((record, i), entry) =>
          buffer(i) = Left(entry)
          Some(record -> i)
      }

      // success, or exceed the upper limit of the retry
      if (failed.isEmpty || retry >= retryLimit) ()
      // retry
      else {
        Thread.sleep(sleepDuration(retry, retryLimit))
        logger.warn(s"Retrying the put requests. Retry count: ${retry + 1}")
        put0(failed, retry + 1)
      }
    }

    put0(records.zipWithIndex)
    buffer.toList
  }

  def withPutRetry(retryLimit: Int = DEFAULT_MAX_ERROR_RETRY)
                  (f: => PutRecordResult): Either[Throwable, PutRecordResult] = {
    @tailrec
    def put0(retry: Int = 0): Either[Throwable, PutRecordResult] = {
      try
        Right(f)
      catch {
        case e: ServiceUnavailableException => if (retry >= retryLimit) Left(e) else {
          Thread.sleep(sleepDuration(retry, retryLimit))
          logger.warn(s"Retrying the put request. Retry count: ${retry + 1}")
          put0(retry + 1)
        }
      }
    }

    put0()
  }

  protected def sleepDuration(retry: Int, retryLimit: Int): Long = {
    // scaling factor
    val d = 0.5 + Random.nextDouble() * 0.1
    // possible seconds
    val durations = (0 until retryLimit).map(n => pow(2, n) * d)

    (durations(retry) * 1000).toLong
  }

}

Source File: BruteForceTicTacToeStrategy.scala From fx-tictactoe with Apache License 2.0

5 votes

package net.ladstatt.tictactoe

import scala.util.Random


  def determineMove(game: TicTacToe, potentialMoves: Seq[Seq[TMove]]): TMove = {
    // check if we could win with the next move
    val winningMove = game.lookAhead(PlayerB)
    if (winningMove.isDefined) {
      winningMove.get
    } else {
      // check if there is already an obvious threat from the opponent to win the game
      // if there is, we'll take the move
      val winningMoveForOpponent = game.lookAhead(PlayerA)
      if (winningMoveForOpponent.isDefined) {
        winningMoveForOpponent.get
      } else {
        // prefer the middle center f
        if (potentialMoves.exists {
          case moves => moves.drop(game.movesSoFar.length).head == MiddleCenter
        }) {
          MiddleCenter
        } else {
          // we take the shortest path to win
          val possibilities = potentialMoves.sortWith((a, b) => a.size < b.size)
          val aPathToWin = possibilities.head

          aPathToWin.drop(game.movesSoFar.length).head
        }
      }
    }
  }
}

Source File: WhiskAdminCliTestBase.scala From openwhisk with Apache License 2.0

5 votes

package org.apache.openwhisk.core.database

import akka.stream.ActorMaterializer
import common.{StreamLogging, WskActorSystem}
import org.rogach.scallop.throwError
import org.scalatest.concurrent.ScalaFutures
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FlatSpec, Matchers}
import org.apache.openwhisk.core.cli.{Conf, WhiskAdmin}
import org.apache.openwhisk.core.database.test.DbUtils
import org.apache.openwhisk.core.entity.WhiskAuthStore

import scala.util.Random

trait WhiskAdminCliTestBase
    extends FlatSpec
    with WskActorSystem
    with DbUtils
    with StreamLogging
    with BeforeAndAfterEach
    with BeforeAndAfterAll
    with ScalaFutures
    with Matchers {

  implicit val materializer = ActorMaterializer()
  //Bring in sync the timeout used by ScalaFutures and DBUtils
  implicit override val patienceConfig: PatienceConfig = PatienceConfig(timeout = dbOpTimeout)
  protected val authStore = WhiskAuthStore.datastore()

  //Ensure scalaop does not exit upon validation failure
  throwError.value = true

  override def afterEach(): Unit = {
    cleanup()
  }

  override def afterAll(): Unit = {
    println("Shutting down store connections")
    authStore.shutdown()
    super.afterAll()
  }

  protected def randomString(len: Int = 5): String = Random.alphanumeric.take(len).mkString

  protected def resultOk(args: String*): String =
    WhiskAdmin(new Conf(args.toSeq))
      .executeCommand()
      .futureValue
      .right
      .get

  protected def resultNotOk(args: String*): String =
    WhiskAdmin(new Conf(args.toSeq))
      .executeCommand()
      .futureValue
      .left
      .get
      .message
}

Source File: CosmosDBTestSupport.scala From openwhisk with Apache License 2.0

5 votes

package org.apache.openwhisk.core.database.cosmosdb

import com.microsoft.azure.cosmosdb.{Database, SqlParameter, SqlParameterCollection, SqlQuerySpec}
import org.scalatest.{BeforeAndAfterAll, FlatSpecLike}
import pureconfig._
import pureconfig.generic.auto._
import org.apache.openwhisk.core.ConfigKeys
import org.apache.openwhisk.core.database.test.behavior.ArtifactStoreTestUtil.storeAvailable

import scala.collection.mutable.ListBuffer
import scala.util.{Random, Try}

trait CosmosDBTestSupport extends FlatSpecLike with BeforeAndAfterAll with RxObservableImplicits {
  private val dbsToDelete = ListBuffer[Database]()

  lazy val storeConfigTry = Try { loadConfigOrThrow[CosmosDBConfig](ConfigKeys.cosmosdb) }
  lazy val client = storeConfig.createClient()
  val useExistingDB = java.lang.Boolean.getBoolean("whisk.cosmosdb.useExistingDB")

  def storeConfig = storeConfigTry.get

  override protected def withFixture(test: NoArgTest) = {
    assume(storeAvailable(storeConfigTry), "CosmosDB not configured or available")
    super.withFixture(test)
  }

  protected def generateDBName() = {
    s"travis-${getClass.getSimpleName}-${Random.alphanumeric.take(5).mkString}"
  }

  protected def createTestDB() = {
    if (useExistingDB) {
      val db = getOrCreateDatabase()
      println(s"Using existing database ${db.getId}")
      db
    } else {
      val databaseDefinition = new Database
      databaseDefinition.setId(generateDBName())
      val db = client.createDatabase(databaseDefinition, null).blockingResult()
      dbsToDelete += db
      println(s"Created database ${db.getId}")
      db
    }
  }

  private def getOrCreateDatabase(): Database = {
    client
      .queryDatabases(querySpec(storeConfig.db), null)
      .blockingOnlyResult()
      .getOrElse {
        client.createDatabase(newDatabase, null).blockingResult()
      }
  }

  protected def querySpec(id: String) =
    new SqlQuerySpec("SELECT * FROM root r WHERE r.id=@id", new SqlParameterCollection(new SqlParameter("@id", id)))

  private def newDatabase = {
    val databaseDefinition = new Database
    databaseDefinition.setId(storeConfig.db)
    databaseDefinition
  }

  override def afterAll(): Unit = {
    super.afterAll()
    if (!useExistingDB) {
      dbsToDelete.foreach(db => client.deleteDatabase(db.getSelfLink, null).blockingResult())
    }
    client.close()
  }
}

Source File: S3AttachmentStoreBehaviorBase.scala From openwhisk with Apache License 2.0

5 votes

package org.apache.openwhisk.core.database.s3

import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import org.scalatest.FlatSpec
import org.apache.openwhisk.common.Logging
import org.apache.openwhisk.core.database.{AttachmentStore, DocumentSerializer}
import org.apache.openwhisk.core.database.memory.{MemoryArtifactStoreBehaviorBase, MemoryArtifactStoreProvider}
import org.apache.openwhisk.core.database.test.AttachmentStoreBehaviors
import org.apache.openwhisk.core.database.test.behavior.ArtifactStoreAttachmentBehaviors
import org.apache.openwhisk.core.entity.WhiskEntity

import scala.reflect.ClassTag
import scala.util.Random

trait S3AttachmentStoreBehaviorBase
    extends FlatSpec
    with MemoryArtifactStoreBehaviorBase
    with ArtifactStoreAttachmentBehaviors
    with AttachmentStoreBehaviors {
  override lazy val store = makeS3Store[WhiskEntity]

  override implicit val materializer: ActorMaterializer = ActorMaterializer()

  override val prefix = s"attachmentTCK_${Random.alphanumeric.take(4).mkString}"

  override protected def beforeAll(): Unit = {
    MemoryArtifactStoreProvider.purgeAll()
    super.beforeAll()
  }

  override def getAttachmentStore[D <: DocumentSerializer: ClassTag](): AttachmentStore =
    makeS3Store[D]()

  def makeS3Store[D <: DocumentSerializer: ClassTag]()(implicit actorSystem: ActorSystem,
                                                       logging: Logging,
                                                       materializer: ActorMaterializer): AttachmentStore
}

Source File: ActivationStoreCRUDBehaviors.scala From openwhisk with Apache License 2.0

5 votes

package org.apache.openwhisk.core.database.test.behavior

import org.apache.openwhisk.common.TransactionId
import org.apache.openwhisk.core.database.NoDocumentException
import org.apache.openwhisk.core.entity.{ActivationId, WhiskActivation}

import scala.util.Random

trait ActivationStoreCRUDBehaviors extends ActivationStoreBehaviorBase {

  protected def checkStoreActivation(activation: WhiskActivation)(implicit transid: TransactionId): Unit = {
    store(activation, context) shouldBe activation.docinfo
  }

  protected def checkDeleteActivation(activation: WhiskActivation)(implicit transid: TransactionId): Unit = {
    activationStore.delete(ActivationId(activation.docid.asString), context).futureValue shouldBe true
  }

  protected def checkGetActivation(activation: WhiskActivation)(implicit transid: TransactionId): Unit = {
    activationStore.get(ActivationId(activation.docid.asString), context).futureValue shouldBe activation
  }

  behavior of s"${storeType}ActivationStore store"

  it should "put activation and get docinfo" in {
    implicit val tid: TransactionId = transId()
    val namespace = s"ns_${Random.alphanumeric.take(4).mkString}"
    val action = s"action1_${Random.alphanumeric.take(4).mkString}"
    val activation = newActivation(namespace, action, 1L)
    checkStoreActivation(activation)
  }

  behavior of s"${storeType}ActivationStore delete"

  it should "deletes existing activation" in {
    implicit val tid: TransactionId = transId()
    val namespace = s"ns_${Random.alphanumeric.take(4).mkString}"
    val action = s"action1_${Random.alphanumeric.take(4).mkString}"
    val activation = newActivation(namespace, action, 1L)
    store(activation, context)
    checkDeleteActivation(activation)
  }

  it should "throws NoDocumentException when activation does not exist" in {
    implicit val tid: TransactionId = transId()
    activationStore.delete(ActivationId("non-existing-doc"), context).failed.futureValue shouldBe a[NoDocumentException]
  }

  behavior of s"${storeType}ActivationStore get"

  it should "get existing activation matching id" in {
    implicit val tid: TransactionId = transId()
    val namespace = s"ns_${Random.alphanumeric.take(4).mkString}"
    val action = s"action1_${Random.alphanumeric.take(4).mkString}"
    val activation = newActivation(namespace, action, 1L)
    store(activation, context)
    checkGetActivation(activation)
  }

  it should "throws NoDocumentException when activation does not exist" in {
    implicit val tid: TransactionId = transId()
    activationStore.get(ActivationId("non-existing-doc"), context).failed.futureValue shouldBe a[NoDocumentException]
  }
}

Source File: ActivationStoreBehaviorBase.scala From openwhisk with Apache License 2.0

5 votes

package org.apache.openwhisk.core.database.test.behavior

import java.time.Instant

import akka.stream.ActorMaterializer
import common.{StreamLogging, WskActorSystem}
import org.apache.openwhisk.common.TransactionId
import org.apache.openwhisk.core.database.{ActivationStore, CacheChangeNotification, UserContext}
import org.apache.openwhisk.core.database.test.behavior.ArtifactStoreTestUtil.storeAvailable
import org.apache.openwhisk.core.entity._
import org.scalatest.concurrent.{IntegrationPatience, ScalaFutures}
import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers, Outcome}

import scala.collection.mutable.ListBuffer
import scala.concurrent.Await
import scala.concurrent.duration.Duration
import scala.concurrent.duration.DurationInt
import scala.language.postfixOps
import scala.util.{Random, Try}

trait ActivationStoreBehaviorBase
    extends FlatSpec
    with ScalaFutures
    with Matchers
    with StreamLogging
    with WskActorSystem
    with IntegrationPatience
    with BeforeAndAfterEach {

  protected implicit val materializer: ActorMaterializer = ActorMaterializer()
  protected implicit val notifier: Option[CacheChangeNotification] = None

  def context: UserContext
  def activationStore: ActivationStore
  private val docsToDelete = ListBuffer[(UserContext, ActivationId)]()

  def storeType: String

  protected def transId() = TransactionId(Random.alphanumeric.take(32).mkString)

  override def afterEach(): Unit = {
    cleanup()
    stream.reset()
  }

  override protected def withFixture(test: NoArgTest): Outcome = {
    assume(storeAvailable(storeAvailableCheck), s"$storeType not configured or available")
    val outcome = super.withFixture(test)
    if (outcome.isFailed) {
      println(logLines.mkString("\n"))
    }
    outcome
  }

  protected def storeAvailableCheck: Try[Any] = Try(true)
  //~----------------------------------------< utility methods >

  protected def store(activation: WhiskActivation, context: UserContext)(
    implicit transid: TransactionId,
    notifier: Option[CacheChangeNotification]): DocInfo = {
    val doc = activationStore.store(activation, context).futureValue
    docsToDelete.append((context, ActivationId(activation.docid.asString)))
    doc
  }

  protected def newActivation(ns: String, actionName: String, start: Long): WhiskActivation = {
    WhiskActivation(
      EntityPath(ns),
      EntityName(actionName),
      Subject(),
      ActivationId.generate(),
      Instant.ofEpochMilli(start),
      Instant.ofEpochMilli(start + 1000))
  }

  
  def cleanup()(implicit timeout: Duration = 10 seconds): Unit = {
    implicit val tid: TransactionId = transId()
    docsToDelete.map { e =>
      Try {
        Await.result(activationStore.delete(e._2, e._1), timeout)
      }
    }
    docsToDelete.clear()
  }

}

Source File: ReservoirSampling.scala From Mastering-Scala-Machine-Learning with MIT License

5 votes

package org.akozlov.chapter01

import scala.reflect.ClassTag
import scala.util.Random
import util.Properties

object ReservoirSampling extends App {
  def reservoirSample[T: ClassTag](input: Iterator[T], k: Int): Array[T] = {
    val reservoir = new Array[T](k)
    // Put the first k elements in the reservoir.
    var i = 0
    while (i < k && input.hasNext) {
      val item = input.next()
      reservoir(i) = item
      i += 1
    }

    if (i < k) {
      // If input size < k, trim the array size
      reservoir.take(i)
    } else {
      // If input size > k, continue the sampling process.
      while (input.hasNext) {
        val item = input.next
        val replacementIndex = Random.nextInt(i)
        if (replacementIndex < k) {
          reservoir(replacementIndex) = item
        }
        i += 1
      }
      reservoir
    }
  }

  val numLines=15
  val w = new java.io.FileWriter(new java.io.File("out.txt"))

  val lines = io.Source.fromFile("data/iris/in.txt").getLines
  reservoirSample(lines, numLines).foreach { s =>
    w.write(s + scala.util.Properties.lineSeparator)
  }
  w.close
}

Source File: FeatureHasherParitySpec.scala From mleap with Apache License 2.0

5 votes

package org.apache.spark.ml.parity.feature

import org.apache.spark.ml.Transformer
import org.apache.spark.ml.feature.FeatureHasher
import org.apache.spark.ml.parity.SparkParityBase
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.types._

import scala.util.Random

class FeatureHasherParitySpec extends SparkParityBase {

  val categories = Seq(
    "spark",
    "and",
    "mleap",
    "are",
    "super",
    "dope",
    "together"
  )

  def randomRow(): Row = Row(Random.nextDouble(), Random.nextBoolean(), Random.nextInt(20), Random.nextInt(20).toString,
    Random.shuffle(categories).head)

  val rows = spark.sparkContext.parallelize(Seq.tabulate(100) { _ => randomRow() })
  val schema = new StructType()
    .add("real", DoubleType, nullable = false)
    .add("bool", BooleanType, nullable = false)
    .add("int", IntegerType, nullable = false)
    .add("stringNum", StringType, nullable = true)
    .add("string", StringType, nullable = true)

  override val dataset: DataFrame = spark.sqlContext.createDataFrame(rows, schema)

  override val sparkTransformer: Transformer = new FeatureHasher()
    .setInputCols("real", "bool", "int", "stringNum", "string")
    .setOutputCol("features")
    .setNumFeatures(1 << 17)
    .setCategoricalCols(Array("int"))

}

Source File: BundleFileSystemSpec.scala From mleap with Apache License 2.0

5 votes

package ml.combust.bundle.serializer

import java.net.URI
import java.nio.file.Files

import ml.combust.bundle.test.TestSupport._
import ml.combust.bundle.{BundleFile, BundleRegistry}
import ml.combust.bundle.test.ops._
import ml.combust.bundle.test.{TestBundleFileSystem, TestContext}
import org.scalatest.FunSpec
import resource.managed

import scala.util.Random

class BundleFileSystemSpec extends FunSpec {
  implicit val testContext = TestContext(BundleRegistry("test-registry").
    registerFileSystem(new TestBundleFileSystem))

  val randomCoefficients = (0 to 100000).map(v => Random.nextDouble())
  val lr = LinearRegression(uid = "linear_regression_example",
    input = "input_field",
    output = "output_field",
    model = LinearModel(coefficients = randomCoefficients,
      intercept = 44.5))

  describe("saving/loading bundle file using test file system") {
    it("loads/saves using the custom file system") {
      val tmpDir = Files.createTempDirectory("BundleFileSystemSpec")
      val uri = new URI(s"test://$tmpDir/test.zip")

      lr.writeBundle.name("my_bundle").save(uri)
      val loaded = uri.loadBundle().get

      assert(loaded.root == lr)
    }
  }
}

Source File: ErrorHandlingSpec.scala From mleap with Apache License 2.0

5 votes

package ml.combust.bundle.serializer

import java.io.File

import ml.combust.bundle.{BundleFile, BundleRegistry, TestUtil}
import ml.combust.bundle.test.TestContext
import ml.combust.bundle.test.ops._
import org.scalatest.FunSpec
import ml.combust.bundle.test.TestSupport._
import resource._

import scala.util.{Failure, Random}


case class UnknownTransformer() extends Transformer {
  override val uid: String = "haha"
}

class ErrorHandlingSpec extends FunSpec {
  implicit val testContext = TestContext(BundleRegistry("test-registry"))
  val randomCoefficients = (0 to 100000).map(v => Random.nextDouble())
  val lr = LinearRegression(uid = "linear_regression_example",
    input = "input_field",
    output = "output_field",
    model = LinearModel(coefficients = randomCoefficients,
      intercept = 44.5))
  val si = StringIndexer(uid = "string_indexer_example",
    input = "input_string",
    output = "output_index",
    model = StringIndexerModel(strings = Seq("hey", "there", "man")))
  val pipeline = Pipeline(uid = "my_pipeline", PipelineModel(Seq(si, lr)))

  describe("with unknown op") {
    it("returns a failure") {
      val result = (for(bf <- managed(BundleFile(new File(TestUtil.baseDir, "bad-model.zip")))) yield {
        UnknownTransformer().writeBundle.save(bf)
      }).tried.flatMap(identity)

      assert(result.isFailure)
      result match {
        case Failure(error) =>
          assert(error.isInstanceOf[NoSuchElementException])
          assert(error.getMessage == "key not found: ml.combust.bundle.serializer.UnknownTransformer")
        case _ =>
      }
    }
  }
}

Source File: ImputerParitySpec.scala From mleap with Apache License 2.0

5 votes

package org.apache.spark.ml.mleap.parity.feature

import org.apache.spark.ml.Transformer
import org.apache.spark.ml.mleap.feature.Imputer
import org.apache.spark.ml.parity.SparkParityBase
import org.apache.spark.sql._
import org.apache.spark.sql.types.{DoubleType, StructType}

import scala.util.Random


class ImputerParitySpec extends SparkParityBase {
  def randomRow(): Row = {
    if(Random.nextBoolean()) {
      if(Random.nextBoolean()) {
        Row(23.4)
      } else { Row(Random.nextDouble()) }
    } else {
      Row(33.2)
    }
  }
  val rows = spark.sparkContext.parallelize(Seq.tabulate(100) { i => randomRow() })
  val schema = new StructType().add("mv", DoubleType, nullable = true)

  override val dataset: DataFrame = spark.sqlContext.createDataFrame(rows, schema)
  override val sparkTransformer: Transformer = new Imputer(uid = "imputer").
    setInputCol("mv").
    setOutputCol("mv_imputed").
    setMissingValue(23.4).
    setStrategy("mean").fit(dataset)
}

Source File: JMXRegistryTest.scala From airframe with Apache License 2.0

5 votes

package wvlet.airframe.jmx

import wvlet.airspec.AirSpec
import wvlet.log.LogSupport

import scala.util.Random

@JMX(description = "A example MBean object")
class SampleMBean {
  @JMX(description = "free memory size")
  def freeMemory: Long = {
    Runtime.getRuntime.freeMemory()
  }
}

case class FieldMBean(@JMX a: Int, @JMX b: String)

class NestedMBean {
  @JMX(description = "nested stat")
  def stat: Stat = {
    new Stat(Random.nextInt(10), "nested JMX bean")
  }
}

case class Stat(@JMX count: Int, @JMX state: String)

trait MyJMXApp extends LogSupport {}
object MyJMXAppObj


class JMXRegistryTest extends AirSpec {
  val agent = new JMXAgent(new JMXConfig())

  override protected def afterAll: Unit = {
    agent.unregisterAll
  }

  def `register a new mbean`: Unit = {
    val b = new SampleMBean
    agent.register(b)

    if (!JMXUtil.isAtLeastJava9) {
      val m = agent.getMBeanInfo("wvlet.airframe.jmx:name=SampleMBean")
      debug(m)

      val a = agent.getMBeanAttribute("wvlet.airframe.jmx:name=SampleMBean", "freeMemory")
      debug(a)
    }
  }

  def `support class field`: Unit = {
    val f = new FieldMBean(1, "apple")
    agent.register(f)

    if (!JMXUtil.isAtLeastJava9) {
      val m = agent.getMBeanInfo("wvlet.airframe.jmx:name=FieldMBean")
      info(m)

      agent.getMBeanAttribute("wvlet.airframe.jmx:name=FieldMBean", "a") shouldBe 1
      agent.getMBeanAttribute("wvlet.airframe.jmx:name=FieldMBean", "b") shouldBe "apple"
    }
  }

  def `handle nested JMX MBean`: Unit = {
    val n = new NestedMBean
    agent.register(n)

    if (!JMXUtil.isAtLeastJava9) {
      val m = agent.getMBeanInfo("wvlet.airframe.jmx:name=NestedMBean")
      info(m)

      agent.getMBeanAttribute("wvlet.airframe.jmx:name=NestedMBean", "stat.count").toString.toInt <= 10 shouldBe true
      agent.getMBeanAttribute("wvlet.airframe.jmx:name=NestedMBean", "stat.state") shouldBe "nested JMX bean"
    }
  }

  def `avoid double registration`: Unit = {
    val f = new FieldMBean(1, "apple")
    agent.register(f)
    agent.register(f)
  }

  def `support complex trait name`: Unit = {
    agent.register[MyJMXApp](new MyJMXApp {})
  }
}

Source File: LocalAuthSrv.scala From Cortex with GNU Affero General Public License v3.0

5 votes

package org.thp.cortex.services

import javax.inject.{Inject, Singleton}

import scala.concurrent.{ExecutionContext, Future}
import scala.util.Random

import play.api.mvc.RequestHeader

import akka.stream.Materializer
import org.thp.cortex.models.User

import org.elastic4play.controllers.Fields
import org.elastic4play.services.{AuthCapability, AuthContext, AuthSrv}
import org.elastic4play.utils.Hasher
import org.elastic4play.{AuthenticationError, AuthorizationError}

@Singleton
class LocalAuthSrv @Inject()(userSrv: UserSrv, implicit val ec: ExecutionContext, implicit val mat: Materializer) extends AuthSrv {

  val name                  = "local"
  override val capabilities = Set(AuthCapability.changePassword, AuthCapability.setPassword)

  private[services] def doAuthenticate(user: User, password: String): Boolean =
    user.password().map(_.split(",", 2)).fold(false) {
      case Array(seed, pwd) ⇒
        val hash = Hasher("SHA-256").fromString(seed + password).head.toString
        hash == pwd
      case _ ⇒ false
    }

  override def authenticate(username: String, password: String)(implicit request: RequestHeader): Future[AuthContext] =
    userSrv.get(username).flatMap { user ⇒
      if (doAuthenticate(user, password)) userSrv.getFromUser(request, user, name)
      else Future.failed(AuthenticationError("Authentication failure"))
    }

  override def changePassword(username: String, oldPassword: String, newPassword: String)(implicit authContext: AuthContext): Future[Unit] =
    userSrv.get(username).flatMap { user ⇒
      if (doAuthenticate(user, oldPassword)) setPassword(username, newPassword)
      else Future.failed(AuthorizationError("Authentication failure"))
    }

  override def setPassword(username: String, newPassword: String)(implicit authContext: AuthContext): Future[Unit] = {
    val seed    = Random.nextString(10).replace(',', '!')
    val newHash = seed + "," + Hasher("SHA-256").fromString(seed + newPassword).head.toString
    userSrv.update(username, Fields.empty.set("password", newHash)).map(_ ⇒ ())
  }
}

Source File: KeyAuthSrv.scala From Cortex with GNU Affero General Public License v3.0

5 votes

package org.thp.cortex.services

import java.util.Base64
import javax.inject.{Inject, Singleton}

import scala.concurrent.{ExecutionContext, Future}
import scala.util.Random

import play.api.libs.json.JsArray
import play.api.mvc.RequestHeader

import akka.stream.Materializer
import akka.stream.scaladsl.Sink

import org.elastic4play.controllers.Fields
import org.elastic4play.services.{AuthCapability, AuthContext, AuthSrv}
import org.elastic4play.{AuthenticationError, BadRequestError}

@Singleton
class KeyAuthSrv @Inject()(userSrv: UserSrv, implicit val ec: ExecutionContext, implicit val mat: Materializer) extends AuthSrv {
  override val name = "key"

  final protected def generateKey(): String = {
    val bytes = Array.ofDim[Byte](24)
    Random.nextBytes(bytes)
    Base64.getEncoder.encodeToString(bytes)
  }

  override val capabilities = Set(AuthCapability.authByKey)

  override def authenticate(key: String)(implicit request: RequestHeader): Future[AuthContext] = {
    import org.elastic4play.services.QueryDSL._
    // key attribute is sensitive so it is not possible to search on that field
    userSrv
      .find("status" ~= "Ok", Some("all"), Nil)
      ._1
      .filter(_.key().contains(key))
      .runWith(Sink.headOption)
      .flatMap {
        case Some(user) ⇒ userSrv.getFromUser(request, user, name)
        case None       ⇒ Future.failed(AuthenticationError("Authentication failure"))
      }
  }

  override def renewKey(username: String)(implicit authContext: AuthContext): Future[String] = {
    val newKey = generateKey()
    userSrv.update(username, Fields.empty.set("key", newKey)).map(_ ⇒ newKey)
  }

  override def getKey(username: String)(implicit authContext: AuthContext): Future[String] =
    userSrv.get(username).map(_.key().getOrElse(throw BadRequestError(s"User $username hasn't key")))

  override def removeKey(username: String)(implicit authContext: AuthContext): Future[Unit] =
    userSrv.update(username, Fields.empty.set("key", JsArray())).map(_ ⇒ ())
}

Source File: BasicsEx1Tester.scala From chisel-lab with BSD 2-Clause "Simplified" License

5 votes

package exercises

import chisel3._
import chisel3.util._
import chisel3.iotesters.{ChiselFlatSpec, Driver, PeekPokeTester}


class MACorACMPeekPoke(c: MACorACM) extends PeekPokeTester(c) {
  // The parameter c refers to the module we are testing. To access signals from MACorACM 
  // the prefix "c." is therefore needed.
  val tests = 50
  import scala.util.Random
  
    poke(c.io.sel, true)              // Set the selector signal to 1/true.B
  for (i <- 0 until tests) {          // For loop to make 50 tests
    val in_a = Random.nextInt(16)     // Sets the scala values in_a, in_b and in_c to a random integer value 
    val in_b = Random.nextInt(16)     // Between 0 and 16, 16 not included. This range is chosen to avoid overflow.
    val in_c = Random.nextInt(16)
    poke(c.io.a, in_a)                // Sets in MACorACM inputs a, b and c to the random integer value.
    poke(c.io.b, in_b)
    poke(c.io.c, in_c)
    expect(c.io.z, (in_a*in_b)+in_c)  // Tests if the module under test computes the output correctly. If not an 
                                      // an error is thrown
    step(1)                           // Advance the simlation by one clock cycle. Not needed for this test.
  }
    
  poke(c.io.sel, false)               // This loop test the other operation the MACorACM module should compute 
  for (i <- 0 until tests) {
    val in_a = Random.nextInt(16)
    val in_b = Random.nextInt(16)
    val in_c = Random.nextInt(16)
    poke(c.io.a, in_a)
    poke(c.io.b, in_b)
    poke(c.io.c, in_c)
    expect(c.io.z, ((in_a+in_b)*in_c))
    step(1)
  }
}

object MACorACMTester extends App{
    assert(Driver(() => new MACorACM) {c => new MACorACMPeekPoke(c)}) // by using assert the next line will not be run 
    println("SUCCESS!!")                                            // if all expect statement are not succesful
}

Source File: StreamStreamDataGenerator.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config._
import knolx.KnolXLogger
import knolx.spark.Stock
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization
import org.json4s.jackson.Serialization.write

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object StreamStreamDataGenerator extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay")
  val orderTypes = List("buy", "sell")
  val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9)
  val randomCompanyNames = Random.shuffle(companyNames).drop(Random.shuffle((1 to 3).toList).head)

  implicit val formats = Serialization.formats(NoTypeHints)

  info("Streaming companies listed into Kafka...")
  system.scheduler.schedule(0 seconds, 20 seconds) {
    randomCompanyNames.foreach { name =>
      producer.send(new ProducerRecord[String, String](companiesTopic, name))
    }
  }

  info("Streaming stocks data into Kafka...")
  system.scheduler.schedule(0 seconds, 5 seconds) {
    companyNames.foreach { name =>
      val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head)
      producer.send(new ProducerRecord[String, String](stocksTopic, write(stock)))
    }
  }
}

Source File: DataStreamer.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object DataStreamer extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val someWords = List("about", "above", "after", "again", "against")

  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 200 milliseconds) {
    Random.shuffle(someWords).headOption.foreach { word =>
      producer.send(new ProducerRecord[String, String](topic, word))
    }
  }
}

Source File: StreamStaticDataGenerator.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import knolx.spark.Stock
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization
import org.json4s.jackson.Serialization.write

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.util.Random


object StreamStaticDataGenerator extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")
  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  val companyNames = List("kirloskar", "bajaj", "amul", "dlf", "ebay")
  val orderTypes = List("buy", "sell")
  val numberOfSharesList = List(1, 2, 3, 4, 5, 6, 7, 8, 9)

  implicit val formats = Serialization.formats(NoTypeHints)
  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 5 seconds) {
    companyNames.foreach { name =>
      val stock = Stock(name, Random.shuffle(numberOfSharesList).head, Random.shuffle(orderTypes).head)
      producer.send(new ProducerRecord[String, String](topic, write(stock)))
    }
  }
}

Source File: MultiDataStreamer.scala From structured-streaming-application with Apache License 2.0

5 votes

package knolx.kafka

import java.util.Properties

import akka.actor.ActorSystem
import knolx.Config.{bootstrapServer, topic}
import knolx.KnolXLogger
import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig, ProducerRecord}
import org.apache.kafka.common.serialization.StringSerializer

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.DurationInt
import scala.language.postfixOps
import scala.util.Random


object MultiDataStreamer extends App with KnolXLogger {
  val system = ActorSystem("DataStreamer")

  val props = new Properties()
  props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServer)
  props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)
  props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer].getName)

  val producer = new KafkaProducer[String, String](props)

  info("Streaming data into Kafka...")
  system.scheduler.schedule(0 seconds, 3000 milliseconds) {
    (1 to Random.nextInt(100)).foreach { id =>
      producer.send(new ProducerRecord[String, String](topic,s"device$id", (Math.random * 2 + 1).toString))
    }
  }
}

Source File: TestFlinkGenLast.scala From milan with Apache License 2.0

5 votes

package com.amazon.milan.compiler.flink.generator

import com.amazon.milan.application.ApplicationConfiguration
import com.amazon.milan.compiler.flink.testing._
import com.amazon.milan.lang._
import com.amazon.milan.testing.applications._
import org.junit.Assert._
import org.junit.Test

import scala.util.Random


@Test
class TestFlinkGenLast {
  @Test
  def test_FlinkGenLast_InFlatMapOfGroupBy_WithOneGroupKeyInInputRecords_OutputsOnlyLastInputRecordToOutput(): Unit = {
    val input = Stream.of[IntKeyValueRecord].withName("input")
    val grouped = input.groupBy(r => r.key)

    def maxByValueAndLast(stream: Stream[IntKeyValueRecord]): Stream[IntKeyValueRecord] =
      stream.maxBy(r => r.value).last()

    val output = grouped.flatMap((key, group) => maxByValueAndLast(group)).withName("output")

    val graph = new StreamGraph(output)

    val config = new ApplicationConfiguration
    config.setListSource(input, IntKeyValueRecord(1, 1), IntKeyValueRecord(1, 3), IntKeyValueRecord(1, 2))

    // Keep running until we find records in the output file.
    val results = TestApplicationExecutor.executeApplication(
      graph,
      config,
      20,
      r => r.getRecords(output).isEmpty,
      output)

    val outputRecords = results.getRecords(output)
    assertEquals(List(IntKeyValueRecord(1, 3)), outputRecords)
  }

  @Test
  def test_FlinkGenLast_InFlatMapOfGroupBy_With10GroupKeysInInputRecords_With10RecordsPerGroupKey_OutputsOnlyLastRecordInInputForEachGroupKey(): Unit = {
    val input = Stream.of[IntKeyValueRecord].withName("input")
    val grouped = input.groupBy(r => r.key)

    def maxByValueAndLast(stream: Stream[IntKeyValueRecord]): Stream[IntKeyValueRecord] =
      stream.maxBy(r => r.value).last()

    val output = grouped.flatMap((key, group) => maxByValueAndLast(group)).withName("output")

    val graph = new StreamGraph(output)

    val inputRecords = Random.shuffle(List.tabulate(10)(group => List.tabulate(10)(i => IntKeyValueRecord(group, i))).flatten)
    val config = new ApplicationConfiguration
    config.setListSource(input, inputRecords: _*)

    val results = TestApplicationExecutor.executeApplication(
      graph,
      config,
      20,
      r => r.getRecords(output).length < 10,
      output)

    val outputRecords = results.getRecords(output).sortBy(_.key)
    val expectedOutputRecords = List.tabulate(10)(i => inputRecords.filter(_.key == i).maxBy(_.value))
    assertEquals(expectedOutputRecords, outputRecords)
  }
}

Source File: TestPriorityQueueTypeSerializer.scala From milan with Apache License 2.0

5 votes

package com.amazon.milan.compiler.flink.types

import com.amazon.milan.compiler.flink.runtime.SequenceNumberOrdering
import com.amazon.milan.compiler.flink.testing.IntRecord
import com.amazon.milan.compiler.flink.testutil._
import com.amazon.milan.compiler.flink.types
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment
import org.junit.Assert._
import org.junit.Test

import scala.collection.mutable
import scala.util.Random


@Test
class TestPriorityQueueTypeSerializer {
  @Test
  def test_PriorityQueueTypeSerializer_Deserialize_WithQueueOfInt_With100RandomItems_ReturnsQueueThatYieldsSameItemsAsOriginal(): Unit = {
    val typeInfo = new PriorityQueueTypeInformation[Int](createTypeInformation[Int], Ordering.Int)
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    val serializer = typeInfo.createSerializer(env.getConfig)

    val original = new mutable.PriorityQueue[Int]()
    val rand = new Random(0)
    val values = List.tabulate(100)(_ => rand.nextInt(100))
    original.enqueue(values: _*)

    val copy = copyWithSerializer(original, serializer)

    assertEquals(original.length, copy.length)
    assertEquals(original.dequeueAll.toList, copy.dequeueAll.toList)
  }

  @Test
  def test_PriorityQueueTypeSerializer_Deserialize_AfterRestoring_WithQueueOfInt_With100RandomItems_ReturnsQueueThatYieldsSameItemsAsOriginal(): Unit = {
    val typeInfo = new PriorityQueueTypeInformation[Int](createTypeInformation[Int], Ordering.Int)
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    val serializer = typeInfo.createSerializer(env.getConfig)

    val snapshot = serializer.snapshotConfiguration()
    val snapshotCopy = new types.PriorityQueueTypeSerializer.Snapshot[Int]()
    copyData(snapshot.writeSnapshot, input => snapshotCopy.readSnapshot(snapshot.getCurrentVersion, input, getClass.getClassLoader))

    val serializerCopy = snapshotCopy.restoreSerializer()

    val original = new mutable.PriorityQueue[Int]()
    val rand = new Random(0)
    val values = List.tabulate(100)(_ => rand.nextInt(100))
    original.enqueue(values: _*)

    val copy = copyData(
      output => serializer.serialize(original, output),
      input => serializerCopy.deserialize(input))

    assertEquals(original.length, copy.length)
    assertEquals(original.dequeueAll.toList, copy.dequeueAll.toList)
  }

  @Test
  def test_PriorityQueueTypeSerializer_Deserialize_AfterRestoring_WithQueueOfRecordWrapperAndSequenceNumberOrdering_With100RandomItems_ReturnsQueueThatYieldsSameItemsAsOriginal(): Unit = {
    val ordering = new SequenceNumberOrdering[IntRecord, Product]
    val typeInfo = new PriorityQueueTypeInformation[RecordWrapper[IntRecord, Product]](
      RecordWrapperTypeInformation.wrap(createTypeInformation[IntRecord]),
      ordering)
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    val serializer = typeInfo.createSerializer(env.getConfig)

    val snapshot = serializer.snapshotConfiguration()
    val snapshotCopy = new types.PriorityQueueTypeSerializer.Snapshot[RecordWrapper[IntRecord, Product]]()
    copyData(snapshot.writeSnapshot, input => snapshotCopy.readSnapshot(snapshot.getCurrentVersion, input, getClass.getClassLoader))

    val serializerCopy = snapshotCopy.restoreSerializer()

    val original = new mutable.PriorityQueue[RecordWrapper[IntRecord, Product]]()(ordering)
    val rand = new Random(0)
    val values = List.tabulate(100)(i => RecordWrapper.wrap(IntRecord(rand.nextInt(100)), i.toLong))
    original.enqueue(values: _*)

    val copy = copyData(
      output => serializer.serialize(original, output),
      input => serializerCopy.deserialize(input))

    assertEquals(original.length, copy.length)
    assertEquals(original.dequeueAll.toList, copy.dequeueAll.toList)
  }
}

Source File: TestKeyedLastByOperator.scala From milan with Apache License 2.0

5 votes

package com.amazon.milan.compiler.flink.runtime

import com.amazon.milan.compiler.flink.testing.{SingletonMemorySinkFunction, _}
import com.amazon.milan.compiler.flink.testutil._
import com.amazon.milan.compiler.flink.types.RecordWrapper
import org.apache.flink.api.scala._
import org.junit.Assert._
import org.junit.Test

import scala.collection.JavaConverters._
import scala.util.Random


@Test
class TestKeyedLastByOperator {
  @Test
  def test_KeyedLastByOperator_WithRandomInputsWithTenKeys_ReturnsOneRecordPerKeyWithMaxValue(): Unit = {
    val operator: KeyedLastByOperator[IntKeyValueRecord, Tuple1[Int]] = new KeyedLastByOperator[IntKeyValueRecord, Tuple1[Int]](createTypeInformation[IntKeyValueRecord], createTypeInformation[Tuple1[Int]]) {
      override protected def takeNewValue(newRecord: RecordWrapper[IntKeyValueRecord, Tuple1[Int]], currentRecord: RecordWrapper[IntKeyValueRecord, Tuple1[Int]]): Boolean = {
        newRecord.value.value > currentRecord.value.value
      }
    }

    val rand = new Random(0)
    val data = List.tabulate(1000)(_ => {
      IntKeyValueRecord(rand.nextInt(10), rand.nextInt(100))
    })

    val env = getTestExecutionEnvironment

    val input = env.fromCollection(data.asJavaCollection, createTypeInformation[IntKeyValueRecord]).wrap(createTypeInformation[IntKeyValueRecord])

    val keySelector = new RecordWrapperKeySelector[IntKeyValueRecord, Tuple1[Int]](createTypeInformation[Tuple1[Int]])
    val keyed =
      input
        .map(new ModifyRecordKeyMapFunction[IntKeyValueRecord, Product, Tuple1[Int]](createTypeInformation[IntKeyValueRecord], createTypeInformation[Tuple1[Int]]) {
          override protected def getNewKey(value: IntKeyValueRecord, key: Product): Tuple1[Int] = Tuple1(value.key)
        })
        .keyBy(keySelector, keySelector.getKeyType)

    val output = keyed.transform(
      "op",
      operator.getProducedType,
      operator)
      .unwrap()

    val sink = new SingletonMemorySinkFunction[IntKeyValueRecord]()
    output.addSink(sink)

    env.executeThenWaitFor(() => sink.getRecordCount >= 10, 5)

    val expectedOutput = data.groupBy(_.key).map { case (_, g) => g.maxBy(_.value) }.toList.sortBy(_.key)
    val actualOutput = sink.getValues.sortBy(_.key)

    assertEquals(expectedOutput, actualOutput)
  }
}

Source File: package.scala From milan with Apache License 2.0

5 votes

package com.amazon.milan.compiler.flink

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import com.amazon.milan.compiler.flink.runtime.{UnwrapRecordsMapFunction, WrapRecordsMapFunction}
import com.amazon.milan.compiler.flink.testing.IntKeyValueRecord
import com.amazon.milan.compiler.flink.types.{RecordWrapper, RecordWrapperTypeInformation}
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.common.typeutils.TypeSerializer
import org.apache.flink.api.java.typeutils.ResultTypeQueryable
import org.apache.flink.core.memory.{DataInputView, DataInputViewStreamWrapper, DataOutputView, DataOutputViewStreamWrapper}
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.datastream.DataStream
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment

import scala.language.implicitConversions
import scala.util.Random


package object testutil {
  def getTestExecutionEnvironment: StreamExecutionEnvironment = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    env.setBufferTimeout(0)
    env
  }

  def copyWithSerializer[T](value: T, serializer: TypeSerializer[T]): T = {
    val outputStream = new ByteArrayOutputStream()
    val outputView = new DataOutputViewStreamWrapper(outputStream)
    serializer.serialize(value, outputView)

    val bytes = outputStream.toByteArray
    val inputStream = new ByteArrayInputStream(bytes)
    val inputView = new DataInputViewStreamWrapper(inputStream)
    serializer.deserialize(inputView)
  }

  def copyData[T](writeValue: DataOutputView => Unit, readValue: DataInputView => T): T = {
    val outputStream = new ByteArrayOutputStream()
    val outputView = new DataOutputViewStreamWrapper(outputStream)
    writeValue(outputView)

    val bytes = outputStream.toByteArray
    val inputStream = new ByteArrayInputStream(bytes)
    val inputView = new DataInputViewStreamWrapper(inputStream)
    readValue(inputView)
  }

  def generateIntKeyValueRecords(recordCount: Int, keyCount: Int, maxValue: Int): List[IntKeyValueRecord] = {
    val rand = new Random(0)
    List.tabulate(recordCount)(_ => IntKeyValueRecord(rand.nextInt(keyCount), rand.nextInt(maxValue + 1)))
  }

  implicit class WrappedDataStreamExtensions[T >: Null, TKey >: Null <: Product](dataStream: DataStream[RecordWrapper[T, TKey]]) {
    def unwrap(recordTypeInformation: TypeInformation[T]): DataStream[T] = {
      val mapper = new UnwrapRecordsMapFunction[T, TKey](recordTypeInformation)
      this.dataStream.map(mapper)
    }

    def unwrap(): DataStream[T] = {
      val recordType = this.dataStream.getType.asInstanceOf[RecordWrapperTypeInformation[T, TKey]].valueTypeInformation
      this.unwrap(recordType)
    }
  }

  implicit class DataStreamExtensions[T >: Null](dataStream: DataStream[T]) {
    def wrap(recordTypeInformation: TypeInformation[T]): DataStream[RecordWrapper[T, Product]] = {
      val mapper = new WrapRecordsMapFunction[T](recordTypeInformation)
      this.dataStream.map(mapper)
    }

    def wrap(): DataStream[RecordWrapper[T, Product]] = {
      val recordType = this.dataStream.asInstanceOf[ResultTypeQueryable[T]].getProducedType
      this.wrap(recordType)
    }
  }

}

Source File: Records.scala From milan with Apache License 2.0

5 votes

package com.amazon.milan.compiler.scala.testing

import com.amazon.milan.Id

import scala.util.Random


class IntRecord(val recordId: String, val i: Int) {
  override def toString: String = s"IntRecord($i)"

  override def equals(obj: Any): Boolean = obj match {
    case o: IntRecord => this.i == o.i
    case _ => false
  }
}

object IntRecord {
  def apply(i: Int): IntRecord = new IntRecord(Id.newId(), i)
}


class KeyValueRecord(val recordId: String, val key: Int, val value: Int) {
  override def toString: String = s"($key: $value)"

  override def equals(obj: Any): Boolean = obj match {
    case o: KeyValueRecord => this.key == o.key && this.value == o.value
    case _ => false
  }
}

object KeyValueRecord {
  def apply(key: Int, value: Int): KeyValueRecord = new KeyValueRecord(Id.newId(), key, value)

  def generate(recordCount: Int, maxKey: Int, maxValue: Int): List[KeyValueRecord] = {
    val rand = new Random()
    List.tabulate(recordCount)(_ => KeyValueRecord(rand.nextInt(maxKey), rand.nextInt(maxValue)))
  }
}

Source File: Example.scala From temperature-machine with Apache License 2.0

5 votes

package bad.robot.temperature.rrd

import bad.robot.temperature.rrd.Seconds.{now, secondsToLong}
import bad.robot.temperature.server.JsonFile
import bad.robot.temperature.task.FixedTimeMeasurement
import bad.robot.temperature.{Error, Measurement, SensorReading, Temperature}

import scala.concurrent.duration.Duration
import scala.util.Random
import scalaz.{-\/, \/}

object Example extends App {

  sys.props += ("org.slf4j.simpleLogger.defaultLogLevel" -> "info")

  val random = new Random()

  val duration = Duration(1, "days")

  val start = now() - duration.toSeconds
  val end = now()

  val frequency = Duration(30, "seconds")

  val hosts = List(Host("bedroom"), Host("lounge"))

  RrdFile(hosts, frequency).create(start - 5)

  populateRrd(hosts)

  val xml = Xml(start, start + aDay, hosts)
  xml.exportJson(JsonFile.filename)
  xml.exportXml("temperature.xml")

  Graph.create(start, start + aDay, hosts, "A day")
  Graph.create(start, start + aDay * 2, hosts, "2 days")
  Graph.create(start, start + aWeek, hosts, "A week")
  Graph.create(start, start + aMonth, hosts, "A month")

  println("Done generating " + duration)


  def populateRrd(hosts: List[Host]) = {
    def seed = random.nextInt(30) + random.nextDouble()

    def smooth = (value: Double) => if (random.nextDouble() > 0.5) value + random.nextDouble() else value - random.nextDouble()

    val temperatures = Stream.iterate(seed)(smooth).zip(Stream.iterate(seed)(smooth))
    val times = Stream.iterate(start)(_ + frequency.toSeconds).takeWhile(_ < end)

    times.zip(temperatures).foreach({
      case (time, (temperature1, temperature2)) => {
        handleError(RrdUpdate(hosts).apply(FixedTimeMeasurement(time, List(
          Measurement(hosts(0), time, List(
            SensorReading("?", Temperature(temperature1)),
            SensorReading("?", Temperature(temperature1 + 6.3)))
          ))
        )))
        handleError(RrdUpdate(hosts).apply(FixedTimeMeasurement(time + 1, List(
          Measurement(hosts(1), time + 1, List(
            SensorReading("?", Temperature(temperature2)),
            SensorReading("?", Temperature(temperature2 + 1.3)))
          ))
        )))
      }
    })

    def handleError(f: => Error \/ Any): Unit = {
      f match {
        case -\/(error) => println(error)
        case _          => ()
      }
    }
  }

}

Source File: UsersControllerSpec.scala From play-quill-jdbc with MIT License

5 votes

package controllers

import org.scalatest.TestData
import org.scalatestplus.play.{OneAppPerTest, PlaySpec}
import play.api.Application
import play.api.libs.json.Json
import play.api.test.FakeRequest
import play.api.test.Helpers._
import models.{User, Users}
import test._

import scala.util.Random

class UsersControllerSpec extends PlaySpec with OneAppPerTest {

  override def newAppForTest(testData: TestData): Application = fakeApp

  "GET /users/:id" should {
    "return 200 OK with body" in {
      val users = app.injector.instanceOf(classOf[Users])
      val name = s"Name${Random.nextLong()}"
      val user = users.create(User(0, name, true))
      val response = route(app, FakeRequest(GET, s"/users/${user.id}")).get
      status(response) mustBe OK
      val json = contentAsJson(response)
      (json \ "name").as[String] mustBe user.name
    }
  }

  "POST /users" should {
    "return 201 Created with Location header with created resource" in {
      val name = s"Name${Random.nextLong()}"
      val userJson = Json.obj("name" -> name, "isActive" -> true)
      val responseCreated = route(app, FakeRequest(POST, "/users").withJsonBody(userJson)).get
      status(responseCreated) mustBe CREATED
      val location = headers(responseCreated).get(LOCATION).get
      val responseGet = route(app, FakeRequest(GET, location)).get
      val json = contentAsJson(responseGet)
      (json \ "name").as[String] mustBe name
    }
  }

  "DELETE /users/:id" should {
    "return 204 No Content and delete resource" in {
      val users = app.injector.instanceOf(classOf[Users])
      val name = s"Name${Random.nextLong()}"
      val user = users.create(User(0, name, true))
      val response = route(app, FakeRequest(DELETE, s"/users/${user.id}")).get
      status(response) mustBe NO_CONTENT
      users.find(user.id) mustBe empty
    }
  }

  "PUT /users/:id" should {
    "return 204 No Content and update resource" in {
      val users = app.injector.instanceOf(classOf[Users])
      val name = s"Name${Random.nextLong()}"
      val user = users.create(User(0, name, true))
      val updatedName = s"Name${Random.nextLong()}"
      val updateUserJson = Json.obj("name" -> updatedName, "isActive" -> true)
      val response = route(app, FakeRequest(PUT, s"/users/${user.id}").withJsonBody(updateUserJson)).get
      status(response) mustBe NO_CONTENT
      val updatedUser = users.find(user.id)
      updatedUser.get.name mustBe updatedName
    }
  }

}

Source File: ExponentialBackOff.scala From schedoscope with Apache License 2.0

5 votes

package org.schedoscope.scheduler.utils

import scala.concurrent.duration.{Duration, FiniteDuration}
import scala.util.Random


case class ExponentialBackOff(backOffSlotTime: FiniteDuration,
                              backOffSlot: Int = 1,
                              backOffWaitTime: FiniteDuration = Duration.Zero,
                              constantDelay: FiniteDuration = Duration.Zero,
                              ceiling: Int = 10,
                              resetOnCeiling: Boolean = false,
                              retries: Int = 0,
                              resets: Int = 0,
                              totalRetries: Long = 0) {

  private def updateTime = backOffSlotTime * expectedBackOff(backOffSlot) + constantDelay

  private def expectedBackOff(backOffSlot: Int) = {
    val rand = new Random().nextInt(backOffSlot + 1)
    math.round(math.pow(2, rand) - 1)
  }

  def nextBackOff: ExponentialBackOff = {
    if (backOffSlot >= ceiling && resetOnCeiling)
    // reset
      copy(backOffSlot = 1,
        backOffWaitTime = Duration.Zero,
        resets = resets + 1,
        retries = 0,
        totalRetries = totalRetries + 1)
    else {
      val newBackOffSlot = if (backOffSlot >= ceiling) ceiling else backOffSlot + 1
      // increase 1 collision
      copy(backOffSlot = newBackOffSlot,
        backOffWaitTime = updateTime,
        retries = retries + 1,
        totalRetries = totalRetries + 1)
    }
  }
}

Source File: XcsCover.scala From Scala-for-Machine-Learning-Second-Edition with MIT License

5 votes

package org.scalaml.reinforcement.xcs

import org.scalaml.trading.Signal
import org.scalaml.reinforcement.qlearning.QLState
import scala.util.Random
import org.scalaml.trading.operator.SOperator
import org.scalaml.ga.Quantization
import org.scalaml.ga.Gene.Encoding


  def cover(
    sensor: XcsSensor,
    actions: List[XcsAction]
  )(implicit quant: Quantization[Double], geneBits: Encoding): List[XcsRule] = {
    import Random._

    require(
      actions.nonEmpty,
      "XcsCover.cover Cannot generates new rules from undefined list of actions"
    )
    require(
      actions.nonEmpty && actions.size < MAX_NUM_ACTIONS,
      s"XcsCover.cover The number of actions per state ${actions.size} if out of range"
    )

    actions./:(List[XcsRule]())((xs, act) => {
      val signal = Signal(sensor.id, sensor.value, new SOperator(nextInt(Signal.numOperators)))
      new XcsRule(signal, XcsAction(act, Random)) :: xs
    })
  }
}

// -------------------------  EOF -----------------------------------------

Source File: DataGenerator.scala From Scala-for-Machine-Learning-Second-Edition with MIT License

5 votes

package org.scalaml.unsupervised.functionapprox

import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

import scala.collection.mutable
import scala.io.Source
import scala.util.Random



  def apply(sc: SparkContext): RDD[(Float, Float)] = {
      // See the random noise
    val r = new Random(System.currentTimeMillis + Random.nextLong)
    val src = Source.fromFile(sourceName)
    val input = src.getLines.map(_.split(DELIM))
      ./:(mutable.ArrayBuffer[(Float, Float)]())((buf, xy) => {
      val x = addNoise(xy(0).trim.toFloat, r)
      val y = addNoise(xy(1).trim.toFloat, r)
      buf += ((x, y))
    })
    datasetSize = input.size
    val data_rdd = sc.makeRDD(input, nTasks)
    src.close
    data_rdd
  }
    // Original signal + random noise
  private def addNoise(value: Float, r: Random): Float = value*(1.0 + RATIO*(r.nextDouble - 0.5)).toFloat
}

// -------------------------------------  EOF ----------------------------------------------

Source File: MonteCarloApproximation.scala From Scala-for-Machine-Learning-Second-Edition with MIT License

5 votes

package org.scalaml.sampling

import scala.util.Random


    def sum(from: Double, to: Double): Double = {
        // Get the minimum and maximum values for the function
      val (min, max) = getBounds(from, to)
      val width = to - from
      val height = if (min >= 0.0) max else max - min
        // compute the enclosing area (rectangle)
      val outerArea = width * height
      val randomx = new Random(System.currentTimeMillis)
      val randomy = new Random(System.currentTimeMillis + 42L)

        // Monte Carlo simulator for the  function
      def randomSquare: Double = {
        val numInsideArea = Range(0, numPoints)./:(0)(
          (s, n) => {
            val ptx = randomx.nextDouble * width + from
            val pty = randomy.nextDouble * height
              // update the seeds
            randomx.setSeed(randomy.nextLong)
            randomy.setSeed(randomx.nextLong)

            s + (if (pty > 0.0 && pty < f(ptx)) 1
            else if (pty < 0.0 && pty > f(ptx)) -1
            else 0)
          }
        )
        numInsideArea.toDouble * outerArea / numPoints
      }
      randomSquare
    }

    // Compute the bounds for the y values of the function
  private def getBounds(from: Double, to: Double): (Double, Double) = {
    def updateBounds(y: Double, minMax: (Double,Double)): Int = {
      var flag = 0x00
      if (y < minMax._1) flag += 0x01
      if (y > minMax._2) flag += 0x02
      flag
    }
      // extract the properties for the integration step
    val numSteps = Math.sqrt(numPoints).floor.toInt
    val stepSize = (to - from) / numSteps

    (0 to numSteps)./:((Double.MaxValue, -Double.MaxValue))(
      (minMax, n) => {
        val y = f(n * stepSize + from)
        updateBounds(y, minMax) match {
          case 0x01 => (y, minMax._2)
          case 0x02 => (minMax._1, y)
          case 0x03 => (y, y)
          case _ => minMax
        }
      }
    )
  }
}

// --------------------------  EOF ------------------------------------------

Source File: Bootstrap.scala From Scala-for-Machine-Learning-Second-Edition with MIT License

5 votes

package org.scalaml.sampling

import scala.collection.mutable
import scala.util.Random


private[scalaml] final class Bootstrap (
    numSamples: Int,
    s: Vector[Double] => Double,
    inputDistribution: Vector[Double],
    randomizer: Int => Int
  ) {

    lazy val bootstrappedReplicates: Array[Double] =
      (0 until numSamples)./:(mutable.ArrayBuffer[Double]())(
        (buf, _) => buf += createBootstrapSample
      ).toArray

    def createBootstrapSample: Double = s(
      (0 until inputDistribution.size)./:(mutable.ArrayBuffer[Double]())(
        (buf, _) => {
          val randomValueIndex = randomizer( inputDistribution.size )
          buf += inputDistribution( randomValueIndex )
        }
      ).toVector
    )

    lazy val mean = bootstrappedReplicates.reduce( _ + _ )/numSamples

    final def error: Double = {
      import Math._
      val sumOfSquaredDiff = bootstrappedReplicates.reduce(
        (s1: Double, s2: Double) =>
          (s1 - mean)*(s1 - mean) +  (s2 - mean)*(s2 - mean)
      )
      sqrt(sumOfSquaredDiff / (numSamples - 1))
  }
}


// ----------------------------  EOF -------------------------------------------------

Source File: KullbackLeiblerTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License

5 votes

package org.scalaml.unsupervised.divergence

import org.apache.commons.math3.distribution.GammaDistribution
import org.scalaml.Logging
import org.scalatest.{FlatSpec, Matchers}

import scala.util.Random


final class KullbackLeiblerTest extends FlatSpec with Matchers with Logging {
  protected[this] val name = "Kullback Leibler divergence"

  it should s"$name Kullback Leibler test on two data sets" in {
    show("$name Kullback Leibler test on two data sets")

    val numDataPoints = 100000
    def gammaDistribution( shape: Double, scale: Double ): Seq[Double] = {
      val gamma = new GammaDistribution( shape, scale )
      Seq.tabulate( numDataPoints )( n => gamma.density( 2.0 * Random.nextDouble ) )
    }

    val kl = new KullbackLeibler[Double]( gammaDistribution( 2.0, 1.0 ), gammaDistribution( 2.0, 1.0 ) )
    val divergence = kl.divergence( 100 )
    val expectedDivergence = 0.0063
    Math.abs( divergence - expectedDivergence ) < 0.001 should be( true )
    show( s"$name divergence $divergence" )

    val kl2 = new KullbackLeibler[Double]( gammaDistribution( 2.0, 1.0 ), gammaDistribution( 1.0, 0.5 ) )
    val divergence2 = kl2.divergence( 100 )

    val expectedDivergence2 = 2.655
    Math.abs( divergence2 - expectedDivergence2 ) < 0.1 should be( true )
    show( s"$name divergence $divergence2" )
  }
}


// -------------------------------------------  EIF ----------------------------------------------

Source File: FunctionApproxTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License

5 votes

package org.scalaml.unsupervised.functionapprox

import org.scalaml.Logging
import org.scalaml.Predef.Context.ToDouble
import org.scalatest.{FlatSpec, Matchers}

import scala.util.Random


final class FunctionApproxTest extends FlatSpec with Matchers with Logging {
  protected[this] val name = "Function Approximation"
    // Simplest data point definition
  case class DataPoint( id: String, value: Double )
  final val expected = Math.log( _ )

  it should s"$name using a non-resizable histogram" in {
    show(s"$name using a non-resizable histogram")

    implicit val dataPoint2Double = new ToDouble[DataPoint] {
      def apply( dataPoint: DataPoint ): Double = dataPoint.value
    }

    val input = Array.tabulate( 10000 )( n => {
      val x = 1.0 + 9.0 * Random.nextDouble
      ( DataPoint( n.toString, x ), expected( x ) )
    } )

    val testSample = List[DataPoint](
      DataPoint( "2001", 2.8 ),
      DataPoint( "2002", 5.5 ),
      DataPoint( "2003", 7.1 )
    )

    val error2 = error( new HistogramApprox[DataPoint]( 2, input ), testSample )
    show( s"$name error 2 $error2" )

    val error5 = error( new HistogramApprox[DataPoint]( 5, input ), testSample )
    show( s"$name error 5 $error5" )

    val error10 = error( new HistogramApprox[DataPoint]( 10, input ), testSample )
    show( s"$name error 10 $error10" )

    val error25 = error( new HistogramApprox[DataPoint]( 25, input ), testSample )
    show( s"$name error 25 $error25" )

    val error100 = error( new HistogramApprox[DataPoint]( 100, input ), testSample )
    show( s"$name error 100 $error100" )
  }

  private def error( functionApprox: FunctionApprox[DataPoint], testSample: List[DataPoint] ): Double =
    Math.sqrt( testSample./:( 0.0 )( ( s, dataPoint ) => {
      val delta = functionApprox.predict( dataPoint ) - expected( dataPoint.value )
      s + delta * delta
    } ) )
}

// -----------------------  EOF ----------------------------------------------------

Source File: WorkflowTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License

5 votes

package org.scalaml.workflow

import org.scalaml.Logging
import org.scalaml.core.Design.{ConfigDouble, ConfigInt}
import org.scalaml.core.ETransform
import org.scalaml.Predef._
import org.scalaml.stats.MinMax
import org.scalatest.{FlatSpec, Matchers}

import scala.util.{Failure, Random, Success, Try}

final class WorkflowTest extends FlatSpec with Matchers with Logging {
  protected[this] val name = "Workflow for data pipeline"

  it should s"$name Illustration of a monadic workflow" in {

    val samples: Int = 100
    val normRatio = 10
    val splits = 4

    val g = (x: Double) => Math.log(x + 1.0) + Random.nextDouble

    val workflow = new Workflow[Double => Double, DblVec, DblVec, Int] with Sampling[Double => Double, DblVec] with Normalization[DblVec, DblVec] with Aggregation[DblVec, Int] {

      val sampler = new ETransform[Double => Double, DblVec](ConfigInt(samples)) {

        override def |> : PartialFunction[Double => Double, Try[DblVec]] = {
          case f: (Double => Double) => Try {
            val sampled: DblVec = Vector.tabulate(samples)(n => f(n.toDouble / samples))
            show(s"$name sampling : ${sampled.mkString(",")}")
            sampled
          }
        }
      }

      val normalizer = new ETransform[DblVec, DblVec](ConfigDouble(normRatio)) {

        override def |> : PartialFunction[DblVec, Try[DblVec]] = {
          case x: DblVec if x.nonEmpty => Try {
            val minMax = MinMax[Double](x).map(_.normalize(0.0, 1.0)).getOrElse(Vector.empty[Double])
            show(s"$name normalization : ${minMax.mkString(",")}")
            minMax
          }
        }
      }

      val aggregator = new ETransform[DblVec, Int](ConfigInt(splits)) {

        override def |> : PartialFunction[DblVec, Try[Int]] = {
          case x: DblVec if x.nonEmpty => Try {
            show(s"$name aggregation")
            Range(0, x.size).find(x(_) == 1.0).getOrElse(-1)
          }
        }
      }
    }
    (workflow |> g) match {
      case Success(res) => show(s"$name result = ${res.toString}")
      case Failure(e) => error(s"$name", e)
    }
  }
}


// ---------------------------------------  EOF ----------------------------------------------

Source File: MetropolisHastingsTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License

5 votes

package org.scalaml.sampling

import org.scalatest.{FlatSpec, Matchers}
import scala.util.Random


final class MetropolisHastingsTest extends FlatSpec with Matchers with org.scalaml.Logging {
  protected[this] val name = "MCMC Metropolis-Hastings"

  val square = (x: Double) => if(x < 0.0 && x >= 1.0) 0.0 else x
  val linear = (x: Double) => 2.0*x -1.0

  it should s"$name evaluation square signal with 20 iterations and 0.5 initial value" in {
    show(s"Evaluation square signal with 20 iterations and 0.5 initial value")

    val numIterations = 20
    val initialValue = 0.5

    val results = test(numIterations, initialValue)
    val acceptance = results.acceptedRate(numIterations)
    acceptance > 0.80 should be (true)
    show(s"$name ${results.toString}\n$acceptance")
  }


  it should s"$name evaluation square signal with 100 iterations and 0.5 initial value" in {
    show("Evaluation square signal with 100 iterations and 0.5 initial value")

    val numIterations = 100
    val initialValue = 0.5

    val results = test(numIterations, initialValue)

    val acceptance = results.acceptedRate(numIterations)
    acceptance > 0.80 should be (true)
    show(s"$name ${results.toString}\n$acceptance")
  }

  it should s"$name evaluation square signal with 250 iterations and 0.5 initial value" in {
    show("Evaluation square signal with 100 iterations and 0.5 initial value")

    val numIterations = 250
    val initialValue = 0.5

    val results = test(numIterations, initialValue)
    val acceptance = results.acceptedRate(numIterations)
    acceptance > 0.80 should be (true)
    show(s"$name ${results.toString}\n$acceptance")
  }

  it should s"$name evaluation square signal with 250 iterations and 1.0 initial value" in {
    show("Evaluation square signal with 250 iterations and 1.0 initial value")

    val numIterations = 250
    val initialValue = 1.0

    val results = test(numIterations, initialValue)
    val acceptance = results.acceptedRate(numIterations)
    acceptance > 0.80 should be (true)
    show(s"$name ${results.toString}\n$acceptance")
  }

   private def test(numIters: Int, initialValue: Double): Trace = {
    val random = new Random
    val q = (s: Double, sPrime: Double) => 0.5*(s + sPrime)
    val proposer = (s: Double) => {
      val r = random.nextDouble
      (if(r < 0.2 || r > 0.8) s*r else 1.0)
    }

    val mh = new OneMetropolisHastings(square, q, proposer, ()=>random.nextDouble)
    mh.mcmc(initialValue, numIters)
  }
}


// ----------------------------  EOF -----------------------------------------------

Source File: BootstrapTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License

5 votes

package org.scalaml.sampling

import org.apache.commons.math3.distribution.{NormalDistribution, RealDistribution}
import org.scalaml.Logging
import org.scalatest.{FlatSpec, Matchers}
import scala.collection.mutable.ArrayBuffer
import scala.util.Random



final class BootstrapTest extends FlatSpec with Matchers with Logging {
  protected val name = "Bootstrap sampling replicates"
  final val NumReplicates1 = 256
  final val NumReplicates2 = 1024
  final val NumDataPoints = 10000

  private def bootstrapEvaluation(
    dist: RealDistribution,
    random: Random,
    coefs: (Double, Double),
    numReplicates: Int
  ): (Double, Double) = {

    val input = (0 until NumDataPoints)./:(new ArrayBuffer[(Double, Double)])(
      ( buf, _ ) => {
        val (a, b) = coefs
        val x = a * random.nextDouble - b
        buf += ( (x, dist.density(x)) )
      }
      ).toVector

      // Bootstrap for the statistisx
    val bootstrap = new Bootstrap(
      numReplicates,
      (x: Vector[Double]) => x.sum/x.length,
      input.map( _._2 ),
      (rLen: Int) => new Random( System.currentTimeMillis).nextInt(rLen)
    )
    (bootstrap.mean, bootstrap.error)
  }

  it should s"$name over a input with the distribution a*r + b $NumReplicates1 replicates" in {
    import Math._
    show(s"$name over a input with the distribution a*r + b $NumReplicates1 replicates")

    val (meanNormal, errorNormal) = bootstrapEvaluation(
      new NormalDistribution,
      new scala.util.Random,
      (5.0, 2.5),
      NumReplicates1
    )
    val expectedMean = 0.185
    show(s"$name meanNormal $meanNormal error $errorNormal")

    abs(expectedMean - meanNormal) < 0.05 should be (true)
    abs(errorNormal) < 0.05 should be (true)
  }

  it should s"$name over a input with the distribution a*r + b $NumReplicates2 replicates" in {
    import Math._
    show("$name over a input with the distribution a*r + b $NumReplicates2 replicates")

    val (meanNormal, errorNormal) = bootstrapEvaluation(
      new NormalDistribution,
      new scala.util.Random,
      (5.0, 2.5),
      NumReplicates2
    )
    val expectedMean = 0.185
    show(s"$name meanNormal $meanNormal error $errorNormal")

    abs(expectedMean - meanNormal) < 0.05 should be (true)
    abs(errorNormal) < 0.05 should be (true)
  }
}

// -----------------------------------  EOF -------------------------------------------

Source File: ParallelismTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License

5 votes

package org.scalaml.scalability.scala

import org.scalaml.Logging
import org.scalatest.{FlatSpec, Matchers}


final class ParallelismTest extends FlatSpec with Matchers with Logging {
  import scala.collection.mutable.HashMap
  import scala.collection.parallel.mutable.{ParArray, ParHashMap}
  import scala.util.Random

  protected[this] val name: String = "Scala parallel collections"

  final private val SZ = 100000
  final private val NUM_TASKS = 8
  final private val evalRange = Range(1, NUM_TASKS)
  final private val TIMES = 20

  // Arbitrary map function
  final val mapF = (x: Double) => Math.sin(x * 0.01) + Math.exp(-x)

  // Arbitrary filter function
  final val filterF = (x: Double) => x > 0.8

  // Arbitrary reduce function
  final val reduceF = (x: Double, y: Double) => (x + y) * x


  it should s"$name: arrays" in {
    show(s"Evaluation of arrays")

    // Generate random vector for both the non-parallel and parallel array
    val data = Array.fill(SZ)(Random.nextDouble)
    val pData = ParArray.fill(SZ)(Random.nextDouble)

    // Initialized and execute the benchmark for the parallel array
    val benchmark = new ParallelArray[Double](data, pData, TIMES)

    val ratios = new Array[Double](NUM_TASKS)
    evalRange.foreach(n => ratios.update(n, benchmark.map(mapF)(n)))
    val resultMap = ratios.tail
    resultMap.sum / resultMap.size < 1.0 should be(true)
    display(resultMap, "ParArray.map")

    evalRange.foreach(n => ratios.update(n, benchmark.filter(filterF)(n)))
    val resultfilter = ratios.tail
    resultfilter.sum / resultfilter.size < 1.0 should be(true)
    display(resultfilter, "ParArray.filter")
  }

  it should s"$name: maps" in {
    show("Evaluation of maps")

    val mapData = new HashMap[Int, Double]
    Range(0, SZ).foreach(n => mapData.put(n, Random.nextDouble))
    val parMapData = new ParHashMap[Int, Double]
    Range(0, SZ).foreach(n => parMapData.put(n, Random.nextDouble))

    // Initialized and execute the benchmark for the parallel map
    val benchmark = new ParallelMap[Double](mapData.toMap, parMapData, TIMES)

    val ratios = new Array[Double](NUM_TASKS)
    evalRange.foreach(n => ratios.update(n, benchmark.map(mapF)(n)))
    val resultMap = ratios.tail
    resultMap.sum / resultMap.size < 1.0 should be(true)
    display(resultMap, "ParMap.map")

    evalRange.foreach(n => ratios.update(n, benchmark.filter(filterF)(n)))
    val resultfilter = ratios.tail
    resultfilter.sum / resultfilter.size < 1.0 should be(true)
  }


  private def display(x: Array[Double], label: String): Unit = {
    import org.scalaml.plots.{Legend, LightPlotTheme, LinePlot}

    val labels = Legend(
      name,
      "Scala parallel collections",
      s"Scala parallel computation for $label",
      "Relative timing"
    )
    LinePlot.display(x.toVector, labels, new LightPlotTheme)
  }
}

// -------------------------------------------  EOF --------------------------------------------------

Source File: StreamsTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License

5 votes

package org.scalaml.scalability.scala

import java.lang.ref._

import org.apache.log4j.Logger
import org.scalaml.Logging
import org.scalaml.Predef._
import org.scalatest.{FlatSpec, Matchers}

import scala.math._

case class DataPoint(x: DblVec, y: Double)


final class StreamsTest extends FlatSpec with Matchers with Logging {
  import scala.util.Random

  protected[this] val name = "Scala streams"

  it should s"$name huge list" in {
    show(s"$name huge list")

    val input = (0 until 1000000000).toStream
    input(10) should be(10)
  }

  it should s"$name recursion" in {
    show(s"$name recursion")

    def mean(strm: => Stream[Double]): Double = {
      @scala.annotation.tailrec
      def mean(z: Double, count: Int, strm: Stream[Double]): (Double, Int) =
        if (strm.isEmpty)
          (z, count)
        else
          mean((1.0 - 1.0 / count) * z + strm.head / count, count + 1, strm.tail)
      mean(0.0, 1, strm)._1
    }

    val input = List[Double](2.0, 5.0, 3.5, 2.0, 5.7, 1.0, 8.0)
    val ave: Double = mean(input.toStream)
    ave should be(3.88 +- 0.05)
  }

  it should s"$name with recycled memory blocks" in {
    show("$name with recycled memory blocks")

    type DblVec = Vector[Double]
    val DATASIZE = 20000

    val dot = (s: Double, xy: (Double, Double)) => s + xy._1 * xy._2
    val diff = (x: DblVec, y: DblVec) => x.zip(y).aggregate(0.0)(dot, _ + _)

    val weights = Vector[Double](0.5, 0.7)
    val lossFunction = new LossFunction(diff, weights, DATASIZE)

    // Create a stream of weak references to 10 stream segments of size DATESIZE/10
    val stream = () => new WeakReference(
      Stream.tabulate(DATASIZE)(n =>
        DataPoint(
          Vector[Double](n.toDouble, n * (n.toDouble)),
          n.toDouble * weights(0) + n * (n.toDouble) * weights(1) + 0.1 * Random.nextDouble
        ))
    )
    // Compute a simple distance using the dot product
    val totalLoss = sqrt(lossFunction.compute(stream))
    show(s"$name totalLoss ${totalLoss / DATASIZE}")

    val averageLoss = totalLoss / DATASIZE
    averageLoss should be(0.0 +- 0.001)
  }
}

// --------------------------  EOF --------------------------------

Source File: TFuturesTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License

5 votes

package org.scalaml.scalability.akka

import akka.actor.{ActorSystem, Props}
import akka.pattern.ask
import akka.util.Timeout
import org.scalaml.Logging
import org.scalaml.Predef.DblVec
import org.scalaml.filtering.dft.DFT
import org.scalaml.scalability.akka.message._
import org.scalaml.util.FormatUtils._
import org.scalatest.{FlatSpec, Matchers}


  protected[this] val name: String = "Scala futures"

  private val NUM_WORKERS = 8
  private val NUM_DATA_POINTS = 1000000
  private val h = (x: Double) => 2.0 * Math.cos(Math.PI * 0.005 * x) + // simulated first harmonic
    Math.cos(Math.PI * 0.05 * x) + // simulated second harmonic
    0.5 * Math.cos(Math.PI * 0.2 * x) + // simulated third harmonic
    0.2 * Random.nextDouble

  private val TimeOut = 5000L
  private val duration = Duration(TimeOut, "millis")
  implicit val timeout = new Timeout(duration)


  it should s"$name Data transformation futures using Akka actors" in {
    show("$name Data transformation futures using Akka actors")

    val actorSystem = ActorSystem("System")
    val xt = Vector.tabulate(NUM_DATA_POINTS)(h(_))

    val master = actorSystem.actorOf(
      Props(new DFTFutures(xt, NUM_WORKERS)),
      "DFTTransform"
    )

    val future = master ? Start()
    Thread.sleep(TimeOut)

    actorSystem.shutdown()
  }
}

// -----------------------------------------------  EOF ---------------------------

Source File: DKalmanTest.scala From Scala-for-Machine-Learning-Second-Edition with MIT License

5 votes

package org.scalaml.filtering.kalman

import org.scalaml.{Logging, Predef, Resource}
import org.scalaml.Predef.DblVec
import org.scalaml.stats.TSeries.zipWithShift
import org.scalaml.trading.YahooFinancials
import org.scalaml.trading.YahooFinancials.adjClose
import org.scalaml.util.Assertable
import org.scalaml.util.FormatUtils.{LONG, format}
import org.scalaml.workflow.data.{DataSink, DataSource}
import org.scalatest.{FlatSpec, Matchers}

import scala.util.{Failure, Random, Success}


final class DKalmanTest extends FlatSpec with Matchers with Logging with Assertable with Resource {
  protected[this] val name: String = "Kalman filter"

  private val OUTPUT_FILE = "output/filtering/dkalman"
  private val RESOURCE_DIR = "filtering/"
  private val NUM_VALUES = 128

  // Noise has to be declared implicitly
  implicit val qrNoise = new QRNoise((0.7, 0.3), (m: Double) => m * Random.nextGaussian)
  // Contract extractor
  private val extractor = YahooFinancials.adjClose :: List[Array[String] => Double]()

  it should s"$name evaluation" in {
    import Predef._

    show(s"$name evaluation")

    // H and P0 are the only components that are independent from
    // input data and smoothing factor. The control matrix B is not defined
    // as there is no external control on the time series.
    val H: DblMatrix = ((0.9, 0.0), (0.0, 0.1))
    val P0: DblMatrix = ((0.4, 0.3), (0.5, 0.4))

    
  private def display(z: DblVec, x: DblVec, alpha: Double): Unit = {
    import org.scalaml.plots.{LinePlot, LightPlotTheme, Legend}

    val labels = Legend(
      name, s"Kalman filter alpha = $alpha", s"Kalman with alpha $alpha", "y"
    )
    val data = (z, "price") :: (x, "Filtered") :: List[(DblVec, String)]()
    LinePlot.display(data, labels, new LightPlotTheme)
  }

}


// --------------------------------  EOF ----------------------------------------------------

Source File: Mixer.scala From Learn-Scala-Programming with MIT License

5 votes

package ch12

import akka.actor.typed.{ActorRef, Behavior, SupervisorStrategy}
import akka.actor.typed.scaladsl.Behaviors
import ch12.Bakery.{Groceries, Dough}
import ch12.Chef.Collect

import scala.concurrent.duration.FiniteDuration
import scala.util.Random

object Mixer {
  class MotorOverheatException extends Exception
  class SlowRotationSpeedException extends Exception
  class StrongVibrationException extends Exception

  final case class Mix(groceries: Groceries, sender: ActorRef[Collect])

  def mix(mixTime: FiniteDuration): Behavior[Mix] = Behaviors.receive[Mix] {
    case (ctx, Mix(Groceries(eggs, flour, sugar, chocolate), sender)) =>
      if (Random.nextBoolean()) throw new MotorOverheatException
      Thread.sleep(mixTime.toMillis)
      sender ! Collect(Dough(eggs * 50 + flour + sugar + chocolate), ctx.self)
      Behaviors.stopped
  }

  def controlledMix(mixTime: FiniteDuration): Behavior[Mix] =
    Behaviors
      .supervise(
        Behaviors
          .supervise(Behaviors
            .supervise(mix(mixTime))
            .onFailure[MotorOverheatException](SupervisorStrategy.stop))
          .onFailure[SlowRotationSpeedException](SupervisorStrategy.restart))
      .onFailure[StrongVibrationException](SupervisorStrategy.resume)
}

Source File: BakerySpec.scala From Learn-Scala-Programming with MIT License

5 votes

package ch11

import akka.actor.{ActorSystem, Props}
import akka.testkit.{ImplicitSender, TestKit, TestProbe}
import ch11.Cook.RawCookies
import ch11.Manager.ShoppingList
import ch11.Oven.Cookies
import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike}

import scala.concurrent.duration._
import scala.language.postfixOps
import scala.util.Random

class BakerySpec(_system: ActorSystem)
    extends TestKit(_system)
    with Matchers
    with WordSpecLike
    with BeforeAndAfterAll
    with ImplicitSender {

  def this() = this(ActorSystem("BakerySpec"))

  override def afterAll: Unit = shutdown(system)

  "The boy should" should {
    val boyProps = Boy.props(system.actorSelection(testActor.path))
    val boy = system.actorOf(boyProps)

    "forward given ShoppingList to the seller" in {
      val list = ShoppingList(0, 0, 0, 0)
      boy ! list
      within(3 millis, 20 millis) {
        expectMsg(list)
        lastSender shouldBe testActor
      }
    }
    "ignore other message types" in {
      boy ! 'GoHome
      expectNoMessage(500 millis)
    }
  }
  "The baker should" should {
    val parent = TestProbe()
    val baker = parent.childActorOf(Props(classOf[Baker], 0 millis))
    "bake cookies in batches" in {
      val count = Random.nextInt(100)
      baker ! RawCookies(Oven.size * count)
      parent.expectMsgAllOf(List.fill(count)(Cookies(Oven.size)):_*)
    }
  }
}

Source File: Main.scala From perf_tester with Apache License 2.0

5 votes

package org.preftester

import java.io.File
import java.nio.file.{Files, Paths}

import com.typesafe.config.{ConfigFactory, ConfigObject, ConfigParseOptions}
import org.perftester.results.renderer.TextRenderer
import org.perftester.results.{ResultReader, RunResult}

import scala.collection.JavaConverters._
import scala.sys.process.Process
import scala.util.{Random, Try}

object Main extends App {
  val baseDir = Paths.get(args.headOption.getOrElse("."))

  case class Configuration(
                            reference: String,
                            baseScalaVersion: String,
                            buildLocally: Boolean,
                            jvmOptions: String,
                            scalaOptions: String
                          ){
    val scalaVersion = if(buildLocally) s"$baseScalaVersion-$reference-SNAPSHOT" else reference
  }

  val config = ConfigFactory.parseFile(
    baseDir.resolve("benchmark.conf").toFile,
    ConfigParseOptions.defaults().setAllowMissing(false)
  )

  val benchmarks = config.getObject("benchmarks").asScala.map {
    case (name, obj: ConfigObject) =>
      def read(name: String, default: String) = Try(obj.toConfig.getString(name)).getOrElse(default)

      name -> Configuration(
        reference = read("reference", name),
        baseScalaVersion = read("baseScalaVersion", "2.12.4"),
        buildLocally = read("buildLocally", "false").toBoolean,
        jvmOptions = read("jvmOptions", ""),
        scalaOptions = read("scalaOptions", "")
      )
  }.toSeq

  val iterations = config.getInt("iterations")
  val N = config.getInt("N")
  val M = config.getInt("M")

  val results = (1 to iterations).foldLeft(Map.empty[String, Vector[RunResult]]){
    case (all, i) =>
      Random.shuffle(benchmarks).foldLeft(all){
        case (all, (name, benchmark)) =>
          val location = baseDir.resolve(benchmark.scalaVersion)
          val cmd = Seq(s"./run.sh", ".", N, M, benchmark.scalaOptions).map(_.toString)
          println(s"## Run $i for $name")
          val env = if(benchmark.jvmOptions.isEmpty) Nil else Seq("_JAVA_OPTIONS" -> benchmark.jvmOptions)
          val output = Process(cmd, location.toFile, env:_*).!!
          println(output)
          val resultsDir = location.resolve("output").resolve("profile.txt")
          if (Files.exists(resultsDir)){
            val result = ResultReader.readResults(name, resultsDir, N)
            val previous = all.getOrElse(name, Vector.empty)
            all + (name -> (previous :+ result))
          } else all

      }
  }
  results.foreach{ case (name, results) =>
    println(s"########## Result for $name ##########")
    TextRenderer.outputTextResults(iterations, results)
  }
}

Source File: TikaParquetParser.scala From project-matt with MIT License

5 votes

package org.datafy.aws.app.matt.extras

import java.io.{File, FileOutputStream, IOException, InputStream}
import java.util

import scala.collection.JavaConverters._
import org.xml.sax.{ContentHandler, SAXException}
import org.apache.tika.metadata.Metadata
import org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE
import org.apache.tika.mime.MediaType
import org.apache.tika.parser.{AbstractParser, ParseContext}
import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.parquet.hadoop.ParquetFileReader
import org.apache.parquet.hadoop.ParquetReader
import org.apache.parquet.format.converter.ParquetMetadataConverter
import org.apache.parquet.hadoop.util.HadoopInputFile
import org.apache.parquet.tools.json.JsonRecordFormatter
import org.apache.parquet.tools.read.{SimpleReadSupport, SimpleRecord}
import org.apache.tika.exception.TikaException
import org.apache.tika.sax.XHTMLContentHandler

import scala.util.Random


class TikaParquetParser extends AbstractParser {
  // make some stuff here
  final val PARQUET_RAW = MediaType.application("x-parquet")

  private val SUPPORTED_TYPES: Set[MediaType] = Set(PARQUET_RAW)

  def getSupportedTypes(context: ParseContext): util.Set[MediaType] = {
    SUPPORTED_TYPES.asJava
  }

  @throws(classOf[IOException])
  @throws(classOf[SAXException])
  @throws(classOf[TikaException])
  def parse(stream: InputStream, handler: ContentHandler,
            metadata: Metadata, context: ParseContext): Unit = {
    // create temp file from stream
    val fileNamePrefix = Random.alphanumeric.take(5).mkString
    val tempFile = File.createTempFile(s"parquet-${fileNamePrefix}", ".parquet")
    IOUtils.copy(stream, new FileOutputStream(tempFile))

    val conf = new Configuration()
    val path = new Path(tempFile.getAbsolutePath)
    val parquetMetadata = ParquetFileReader.readFooter(conf, path, ParquetMetadataConverter.NO_FILTER)
    var defaultReader: ParquetReader[SimpleRecord] = null

    val columns = parquetMetadata.getFileMetaData.getSchema.getFields
    metadata.set(CONTENT_TYPE, PARQUET_RAW.toString)
    metadata.set("Total Number of Columns", columns.size.toString)
    metadata.set("Parquet Column Names", columns.toString)

    val xhtml = new XHTMLContentHandler(handler, metadata)
    xhtml.startDocument()
    xhtml.startElement("p")

    // ::TODO:: ensure parquet reader reads all files not only file row
    try {
      defaultReader = ParquetReader.builder(new SimpleReadSupport(), new Path(tempFile.getAbsolutePath)).build()
      if(defaultReader.read() != null) {
        val values: SimpleRecord = defaultReader.read()
        val jsonFormatter = JsonRecordFormatter.fromSchema(parquetMetadata.getFileMetaData.getSchema)

        val textContent: String = jsonFormatter.formatRecord(values)
        xhtml.characters(textContent)
        xhtml.endElement("p")
        xhtml.endDocument()
      }

    } catch {
        case e: Throwable => e.printStackTrace()
          if (defaultReader != null) {
          try {
            defaultReader.close()
          } catch{
            case _: Throwable =>
          }
        }
    } finally {
      if (tempFile != null) tempFile.delete()
    }
  }

}

Source File: TikaHadoopOrcParser.scala From project-matt with MIT License

5 votes

package org.datafy.aws.app.matt.extras

import java.io.{File, FileOutputStream, IOException, InputStream}
import java.util

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration

import scala.collection.JavaConverters._
import org.apache.hadoop.fs.Path
import org.apache.hadoop.hive.serde2.objectinspector.StructField
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector
import org.apache.orc.OrcFile
import org.apache.orc.OrcFile.ReaderOptions
import org.apache.orc.Reader
import org.apache.orc.RecordReader
import org.apache.tika.exception.TikaException
import org.apache.tika.metadata.Metadata
import org.apache.tika.mime.MediaType
import org.apache.tika.parser.{AbstractParser, ParseContext}
import org.xml.sax.{ContentHandler, SAXException}

import scala.util.Random


class TikaHadoopOrcParser extends AbstractParser  {
  final val ORC_RAW = MediaType.application("x-orc")

  private val SUPPORTED_TYPES: Set[MediaType] = Set(ORC_RAW)

  def getSupportedTypes(context: ParseContext): util.Set[MediaType] = {
    SUPPORTED_TYPES.asJava
  }

  @throws(classOf[IOException])
  @throws(classOf[SAXException])
  @throws(classOf[TikaException])
  def parse(stream: InputStream, handler: ContentHandler,
            metadata: Metadata, context: ParseContext): Unit = {
    // create temp file from stream
    try {
      val fileNamePrefix = Random.alphanumeric.take(5).mkString
      val tempFile = File.createTempFile(s"orc-${fileNamePrefix}", ".orc")
      IOUtils.copy(stream, new FileOutputStream(tempFile))

      val path = new Path(tempFile.getAbsolutePath)
      val conf = new Configuration()
      val orcReader = OrcFile.createReader(path, new ReaderOptions(conf))
      val records: RecordReader = orcReader.rows()

      val storeRecord = null
      val firstBlockKey = null

    } catch {
      case e: Throwable => e.printStackTrace()
    }



//    val fields =

  }
}

Source File: Api.scala From endpoints4s with MIT License

5 votes

package sample

import endpoints4s.play.server._

import scala.concurrent.Future
import scala.util.Random

class Api(val playComponents: PlayComponents)
    extends ApiAlg
    with AssetsAlg
    with Endpoints
    with JsonEntitiesFromCodecs
    with Assets
    with BasicAuthentication {

  val routes = routesFromEndpoints(
    index.implementedBy { case (name, age, _) => User(name, age) },
    action.implementedBy(param => ActionResult(index.call(("Julien", 30, "a&b+c")).url)),
    actionFut.implementedByAsync(param =>
      Future.successful(ActionResult(index.call(("Julien", 30, "future")).url))
    ),
    assets.implementedBy(assetsResources()),
    maybe.implementedBy(_ => if (util.Random.nextBoolean()) Some(()) else None),
    auth.implementedBy { credentials =>
      println(s"Authenticated request: ${credentials.username}")
      if (Random.nextBoolean()) Some(())
      else None // Randomly return a forbidden
    }
  )

}

Source File: Api.scala From endpoints4s with MIT License

5 votes

package sample

import endpoints4s.akkahttp.server._

import scala.concurrent.Future
import scala.util.Random

object Api extends ApiAlg with Endpoints with JsonEntitiesFromCodecs with BasicAuthentication {

  import akka.http.scaladsl.server.Directives._

  val routes =
    index.implementedBy {
      case (name, age, _) => User(name, age)
    } ~ action.implementedBy { param => ActionResult("Action") } ~ actionFut
      .implementedByAsync { param =>
        Future.successful(ActionResult("Future Action"))
      } ~
      maybe.implementedBy { _ =>
        if (util.Random.nextBoolean()) Some(()) else None
      } ~ auth.implementedBy { credentials =>
      println(s"Authenticated request: ${credentials.username}")
      if (Random.nextBoolean()) Some(())
      else None // Randomly return a forbidden
    }

}

Source File: Api.scala From endpoints4s with MIT License

5 votes

package sample

import cats.effect.IO
import endpoints4s.http4s.server.{BasicAuthentication, Endpoints, JsonEntitiesFromCodecs}
import org.http4s.HttpRoutes

import scala.util.Random

object Api extends Endpoints[IO] with JsonEntitiesFromCodecs with BasicAuthentication with ApiAlg {

  val router: HttpRoutes[IO] = HttpRoutes.of(
    routesFromEndpoints(
      index.implementedBy { case (name, age, _) => User(name, age) },
      maybe.implementedBy(_ => if (util.Random.nextBoolean()) Some(()) else None) orElse
        action.implementedBy { _ => ActionResult("Action") },
      actionFut.implementedByEffect { _ => IO.pure(ActionResult("Action")) },
      auth.implementedBy { credentials =>
        println(s"Authenticated request: ${credentials.username}")
        if (Random.nextBoolean()) Some(())
        else None // Randomly return a forbidden
      }
    )
  )
}

Source File: LogkafkaAdminUtils.scala From CMAK with Apache License 2.0

5 votes

package kafka.manager.utils

import java.util.Properties

import grizzled.slf4j.Logging
import kafka.manager.model.{Kafka_0_8_2_0, KafkaVersion, ActorModel}
import org.apache.curator.framework.CuratorFramework

import scala.collection.mutable
import scala.util.Random

class LogkafkaAdminUtils(version: KafkaVersion) extends Logging {

  val rand = new Random

  def isDeleteSupported : Boolean = {
    version match {
      case Kafka_0_8_2_0 => true
      case _ => false
    }
  }

  def deleteLogkafka(curator: CuratorFramework, 
                   logkafka_id: String, 
                   log_path: String, 
                   logkafkaConfigOption: Option[ActorModel.LogkafkaConfig]): Unit = {
    logkafkaConfigOption.map { lcg =>
      lcg.config.map { c => 
        val configMap =kafka.manager.utils.Logkafka.parseJsonStr(logkafka_id, c)
        if (!configMap.isEmpty || !(configMap - log_path).isEmpty ) { 
          writeLogkafkaConfig(curator, logkafka_id, configMap - log_path, -1)
        }
      } getOrElse { LogkafkaErrors.LogkafkaIdNotExists(logkafka_id) }
    } getOrElse { LogkafkaErrors.LogkafkaIdNotExists(logkafka_id) }
  }

  def createLogkafka(curator: CuratorFramework,
                  logkafka_id: String,
                  log_path: String,
                  config: Properties = new Properties,
                  logkafkaConfigOption: Option[ActorModel.LogkafkaConfig] = None
                  ): Unit = {
    createOrUpdateLogkafkaConfigPathInZK(curator, logkafka_id, log_path, config, logkafkaConfigOption)
  }

  def createOrUpdateLogkafkaConfigPathInZK(curator: CuratorFramework,
                                           logkafka_id: String,
                                           log_path: String,
                                           config: Properties = new Properties,
                                           logkafkaConfigOption: Option[ActorModel.LogkafkaConfig],
                                           update: Boolean = false,
                                           readVersion: Int = -1,
                                           checkConfig: Boolean = true 
                                           ) {
    // validate arguments
    Logkafka.validateLogkafkaId(logkafka_id)
    Logkafka.validatePath(log_path)

    if (checkConfig) {
      LogkafkaNewConfigs.validate(version, config)
    }

    val configMap: mutable.Map[String, String] = {
      import scala.collection.JavaConverters._
      config.asScala
    }
    val newConfigMap = Map(log_path -> Map(configMap.toSeq:_*))

    val logkafkaConfigMap = logkafkaConfigOption.map { lcg =>
      lcg.config.map { c =>
        kafka.manager.utils.Logkafka.parseJsonStr(logkafka_id, c)
      } getOrElse { Map.empty }
    } getOrElse { Map.empty }

    if(!update ) {
      // write out the config on create, not update, if there is any
      writeLogkafkaConfig(curator, logkafka_id, logkafkaConfigMap ++ newConfigMap, readVersion)
    } else {
      val merged = logkafkaConfigMap.toSeq ++ newConfigMap.toSeq
      val grouped = merged.groupBy(_._1)
      val cleaned = grouped.mapValues(_.map(_._2).fold(Map.empty)(_ ++ _))
      writeLogkafkaConfig(curator, logkafka_id, cleaned, readVersion)
    }
  }

  
  private def writeLogkafkaConfig(curator: CuratorFramework, logkafka_id: String, configMap: Map[String, Map[String, String]], readVersion: Int = -1) {
    ZkUtils.updatePersistentPath(curator, LogkafkaZkUtils.getLogkafkaConfigPath(logkafka_id), toJson(configMap), readVersion)
  }
}

Source File: EmbeddedKafkaCustomConfigSpec.scala From embedded-kafka with MIT License

5 votes

package net.manub.embeddedkafka

import kafka.server.KafkaConfig
import net.manub.embeddedkafka.EmbeddedKafka._
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.kafka.clients.producer.ProducerConfig

import scala.language.postfixOps
import scala.util.Random

class EmbeddedKafkaCustomConfigSpec extends EmbeddedKafkaSpecSupport {
  final val TwoMegabytes   = 2097152
  final val ThreeMegabytes = 3145728

  "the custom config" should {
    "allow pass additional producer parameters" in {
      val customBrokerConfig =
        Map(
          KafkaConfig.ReplicaFetchMaxBytesProp -> s"$ThreeMegabytes",
          KafkaConfig.MessageMaxBytesProp      -> s"$ThreeMegabytes"
        )

      val customProducerConfig =
        Map(ProducerConfig.MAX_REQUEST_SIZE_CONFIG -> s"$ThreeMegabytes")
      val customConsumerConfig =
        Map(
          ConsumerConfig.MAX_PARTITION_FETCH_BYTES_CONFIG -> s"$ThreeMegabytes"
        )

      implicit val customKafkaConfig: EmbeddedKafkaConfig =
        EmbeddedKafkaConfig(
          customBrokerProperties = customBrokerConfig,
          customProducerProperties = customProducerConfig,
          customConsumerProperties = customConsumerConfig
        )

      val bigMessage = generateMessageOfLength(TwoMegabytes)
      val topic      = "big-message-topic"

      withRunningKafka {
        publishStringMessageToKafka(topic, bigMessage)
        consumeFirstStringMessageFrom(topic) shouldBe bigMessage
      }
    }
  }

  def generateMessageOfLength(length: Int): String =
    Iterator.continually(Random.nextPrintableChar) take length mkString
}

Source File: package.scala From wix-http-testkit with MIT License

5 votes

package com.wix.test

import scala.util.Random

package object random {

  def randomStrOpt: Option[String] = Some( randomStr )
  def randomStr: String = randomStrWith(length = 20)
  def randomStrWith(length: Int): String =
    Random.alphanumeric
          .take(length).mkString
  def randomStrPair = randomStr -> randomStr

  def randomInt: Int = Random.nextInt()

  def randomBytes(length:Int): Array[Byte] = {
    val result = Array.ofDim[Byte](length)
    Random.nextBytes(result)
    result
  }

  def randomInt(from: Int, to: Int): Int = {
    require(math.abs(to.toDouble - from.toDouble) <= Int.MaxValue.toDouble, s"Range can't exceed ${Int.MaxValue}")
    from + Random.nextInt(math.max(to - from, 1))
  }

  def randomPort = randomInt(0, 65535)

  def randomPath = "/" + Seq.fill(5)(randomStr).mkString("/")
  def randomParameter = randomStr -> randomStr
  def randomHeader = randomStr -> randomStr

}

scala.util.Random Scala Examples