Example 1
Source File: ContainerFactory.scala    From openwhisk   with Apache License 2.0 5 votes vote down vote up
package org.apache.openwhisk.core.containerpool

import org.apache.openwhisk.common.{Logging, TransactionId}
import org.apache.openwhisk.core.WhiskConfig
import org.apache.openwhisk.core.entity.{ByteSize, ExecManifest, ExecutableWhiskAction, InvokerInstanceId}
import org.apache.openwhisk.spi.Spi

import scala.concurrent.Future
import scala.concurrent.duration.FiniteDuration
import scala.math.max

case class ContainerArgsConfig(network: String,
                               dnsServers: Seq[String] = Seq.empty,
                               dnsSearch: Seq[String] = Seq.empty,
                               dnsOptions: Seq[String] = Seq.empty,
                               extraEnvVars: Seq[String] = Seq.empty,
                               extraArgs: Map[String, Set[String]] = Map.empty) {

  val extraEnvVarMap: Map[String, String] =
    extraEnvVars.flatMap {
      _.split("=", 2) match {
        case Array(key)        => Some(key -> "")
        case Array(key, value) => Some(key -> value)
        case _                 => None

case class ContainerPoolConfig(userMemory: ByteSize,
                               concurrentPeekFactor: Double,
                               akkaClient: Boolean,
                               prewarmExpirationCheckInterval: FiniteDuration) {
    concurrentPeekFactor > 0 && concurrentPeekFactor <= 1.0,
    s"concurrentPeekFactor must be > 0 and <= 1.0; was $concurrentPeekFactor")

  require(prewarmExpirationCheckInterval.toSeconds > 0, "prewarmExpirationCheckInterval must be > 0")

trait ContainerFactoryProvider extends Spi {
  def instance(actorSystem: ActorSystem,
               logging: Logging,
               config: WhiskConfig,
               instance: InvokerInstanceId,
               parameters: Map[String, Set[String]]): ContainerFactory
Example 2
Source File: MaxAbsScalerModel.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package ml.combust.mleap.core.feature

import ml.combust.mleap.core.Model
import ml.combust.mleap.core.annotation.SparkCode
import ml.combust.mleap.core.types.{StructType, TensorType}
import{DenseVector, SparseVector, Vector, Vectors}

import scala.math.{max, min}

@SparkCode(uri = "")
case class MaxAbsScalerModel(maxAbs: Vector) extends Model {
  def apply(vector: Vector): Vector = {
    val maxAbsUnzero = Vectors.dense( => if (x == 0) 1 else x))

    vector match {
      case DenseVector(values) =>
        val vs = values.clone()
        val size = vs.length
        var i = 0

        while (i < size) {
          if (!values(i).isNaN) {
            val rescale = max(-1.0, min(1.0, values(i) / maxAbsUnzero(i)))
            vs(i) = rescale
          i += 1
      case SparseVector(size, indices, values) =>
        val vs = values.clone()
        val nnz = vs.length
        var i = 0
        while (i < nnz) {
          val raw = max(-1.0, min(1.0, values(i) / maxAbsUnzero(indices(i))))

          vs(i) = raw
          i += 1
        Vectors.sparse(size, indices, vs)

  override def inputSchema: StructType = StructType("input" -> TensorType.Double(maxAbs.size)).get

  override def outputSchema: StructType = StructType("output" -> TensorType.Double(maxAbs.size)).get

Example 3
Source File: MinMaxScalerModel.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package ml.combust.mleap.core.feature

import ml.combust.mleap.core.Model
import ml.combust.mleap.core.annotation.SparkCode
import ml.combust.mleap.core.types.{StructType, TensorType}
import{DenseVector, SparseVector, Vector, Vectors}

import scala.math.{max, min}

  def apply(vector: Vector): Vector = {
    val scale = maxValue - minValue

    // 0 in sparse vector will probably be rescaled to non-zero
    val values = vector.copy.toArray
    val size = values.length
    var i = 0
    while (i < size) {
      if (!values(i).isNaN) {
        val raw = if (originalRange(i) != 0) (values(i) - minArray(i)) / originalRange(i) else 0.5
        values(i) = raw * scale + minValue
      i += 1

  override def inputSchema: StructType = StructType("input" -> TensorType.Double(originalRange.length)).get

  override def outputSchema: StructType = StructType("output" -> TensorType.Double(originalRange.length)).get

Example 4
Source File: CurrentOffsetCounter.scala    From kafka4s   with Apache License 2.0 5 votes vote down vote up
package com.banno.kafka.metrics.prometheus

import cats.effect.Sync
import cats.implicits._
import org.apache.kafka.clients.consumer.ConsumerRecord
import io.prometheus.client._
import scala.math.max

object CurrentOffsetCounter {

  def apply[F[_]](cr: CollectorRegistry, prefix: String, clientId: String)(
      implicit F: Sync[F]
  ): F[ConsumerRecord[_, _] => F[Unit]] =
    F.delay {
          .name(prefix + "_current_offset")
          .help("Counter for last consumed (not necessarily committed) offset of topic partition.")
          .labelNames("client_id", "topic", "partition")
      .map { counter => (record: ConsumerRecord[_, _]) =>
        for {
          value <- F.delay(counter.labels(clientId, record.topic, record.partition.toString).get)
          delta = max(0, record.offset.toDouble - value)
          _ <- F.delay(counter.labels(clientId, record.topic, record.partition.toString).inc(delta))
        } yield ()
Example 5
Source File: Statistics.scala    From Clustering4Ever   with Apache License 2.0 5 votes vote down vote up
package org.clustering4ever.stats

	final def obtainMedianFollowingWeightedDistribution[V](distribution: Seq[(V, Double)]): V = {
		val p = scala.util.Random.nextDouble * distribution.foldLeft(0D)((agg, e) => agg + e._2)
		def go(accum: Double, i: Int): Int = {
			if(accum < p) go(accum + distribution(i)._2, i + 1)
			else i
		val cpt = go(0D, 0)
		if(cpt == 0) distribution.head._1 else distribution(cpt - 1)._1
Example 6
Source File: SkewReplication.scala    From spark-skewjoin   with Apache License 2.0 5 votes vote down vote up
package com.tresata.spark.skewjoin

import scala.math.{ min, max }
import org.slf4j.LoggerFactory

trait SkewReplication extends Serializable {
  def getReplications(leftCount: Long, rightCount: Long, numPartitions: Int): (Int, Int)

case class DefaultSkewReplication(replicationFactor: Double = 1e-2) extends SkewReplication {
  override def getReplications(leftCount: Long, rightCount: Long, numPartitions: Int): (Int, Int) = (
    max(min((rightCount * replicationFactor).toInt, numPartitions), 1),
    max(min((leftCount * replicationFactor).toInt, numPartitions), 1)

private case class RightReplication(skewReplication: SkewReplication) extends SkewReplication {
  override def getReplications(leftCount: Long, rightCount: Long, numPartitions: Int): (Int, Int) = {
    val (left, right) = skewReplication.getReplications(leftCount, rightCount, numPartitions)
    (1, max(min(left * right, numPartitions), 1))
    //(1, right)

private case class LeftReplication(skewReplication: SkewReplication) extends SkewReplication {
  override def getReplications(leftCount: Long, rightCount: Long, numPartitions: Int): (Int, Int) = {
    val (left, right) = skewReplication.getReplications(leftCount, rightCount, numPartitions)
    (max(min(left * right, numPartitions), 1), 1)
    //(left, 1)

private object LoggingSkewReplication {
  private val log = LoggerFactory.getLogger(getClass)

case class LoggingSkewReplication(skewReplication: SkewReplication) extends SkewReplication {
  import LoggingSkewReplication._
  private var maxLeftReplication = 0
  private var maxRightReplication = 0

  override def getReplications(leftCount: Long, rightCount: Long, numPartitions: Int): (Int, Int) = {
    val (left, right) = skewReplication.getReplications(leftCount, rightCount, numPartitions)
    if (left > maxLeftReplication) {"new max left replication {}", left)
      maxLeftReplication = left
    if (right > maxRightReplication) {"new max right replication {}", right)
      maxRightReplication = right
    (left, right)
Example 7
Source File: DocumentSegmenterSuite.scala    From lexrank-summarizer   with MIT License 5 votes vote down vote up
package io.github.karlhigley.lexrank

import scala.math.max

import org.scalatest.FunSuite

class DocumentSegmenterSuite extends FunSuite with TestSparkContext {
  val doc1 = """
    Apache Spark is an open-source cluster computing framework originally developed in the AMPLab at UC Berkeley.
    In contrast to Hadoop's two-stage disk-based MapReduce paradigm, Spark's in-memory primitives provide performance up to 100 times faster for certain applications.
    By allowing user programs to load data into a cluster's memory and query it repeatedly, Spark is well suited to machine learning algorithms.

  val doc2 = """
    Spark requires a cluster manager and a distributed storage system.
    For cluster management, Spark supports standalone (native Spark cluster), Hadoop YARN, or Apache Mesos.
    For distributed storage, Spark can interface with a wide variety, including Hadoop Distributed File System (HDFS), Cassandra, OpenStack Swift, and Amazon S3.
    Spark also supports a pseudo-distributed local mode, usually used only for development or testing purposes, where distributed storage is not required and the local file system can be used instead; in this scenario, Spark is running on a single machine with one executor per CPU core.

  val doc3 = """
    Spark had over 465 contributors in 2014, making it the most active project in the Apache Software Foundation and among Big Data open source projects.

  val segmenter = new DocumentSegmenter

  val localDocs = List(doc1, doc2, doc3){ case (text, id) => Document(id.toString, text) })

  test("sentences are segmented reasonably") {    
    val documents = sc.parallelize(localDocs)
    val (sentences, tokenized) = segmenter(documents)
    assert(sentences.count() === 8)

  test("tokens are alphabetic and lowercase") {
    val documents = sc.parallelize(localDocs)
    val (sentences, tokenized) = segmenter(documents)
    val tokens = tokenized.flatMap(_.tokens).collect()
    tokens.foreach { t =>
      assert("^[a-z]*$".r.findFirstIn(t).isEmpty === false)

  test("short tokens are removed") {
    val documents = sc.parallelize(localDocs)
    val (sentences, tokenized) = segmenter(documents)
    val tokens = tokenized.flatMap(_.tokens).collect()
    List("is", "an", "the", "at").foreach { s =>
      assert(tokens.find(_ == s).isEmpty === true)

Example 8
Source File: SplitNode.scala    From streamDM   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.streamdm.classifiers.trees.nodes

import org.apache.spark.streamdm.classifiers.trees.{ConditionalTest, Utils}
import org.apache.spark.streamdm.core.Example

import scala.collection.mutable.ArrayBuffer
import scala.math.max

  override def description(): String = {
    val sb = new StringBuffer("  " * dep + "\n")
    val testDes = conditionalTest.description()
    for (i <- 0 until children.length) {
      sb.append("  " * dep + " if " + testDes(i) + "\n")
      sb.append("  " * dep + children(i).description())

  override def toString(): String = "level[" + dep + "] SplitNode"

Example 9
Source File: CodebaseAnalyzer.scala    From CodeAnalyzerTutorial   with Apache License 2.0 4 votes vote down vote up
package tutor

import tutor.repo.AnalyzeHistoryRepository
import tutor.utils.{BenchmarkUtil, FileUtil}
import tutor.utils.FileUtil._

import scala.math.max

object CodebaseInfo {
  def empty: CodebaseInfo = new CodebaseInfo(0, Map.empty[String, Int], 0, 0, None, Seq.empty[SourceCodeInfo])

case class CodebaseInfo(totalFileNums: Int, fileTypeNums: Map[String, Int], totalLineCount: Int, avgLineCount: Double, longestFileInfo: Option[SourceCodeInfo], top10Files: Seq[SourceCodeInfo]) {
  def +(sourceCodeInfo: SourceCodeInfo): CodebaseInfo = {
    val fileExt = FileUtil.extractExtFileName(sourceCodeInfo.localPath)
    val newFileTypeNums: Map[String, Int] = if (fileTypeNums.contains(fileExt)) {
      fileTypeNums.updated(fileExt, fileTypeNums(fileExt) + 1)
    } else {
      fileTypeNums + (fileExt -> 1)
    val newTotalLineCount = totalLineCount + sourceCodeInfo.lineCount
    val newTotalFileNum = totalFileNums + 1
    CodebaseInfo(newTotalFileNum, newFileTypeNums, newTotalLineCount, newTotalLineCount / newTotalFileNum,
      if (longestFileInfo.isEmpty) {
      } else {
        if (longestFileInfo.get.lineCount < sourceCodeInfo.lineCount) Some(sourceCodeInfo)
        else longestFileInfo
      if (top10Files.isEmpty) {
      } else if (top10Files.size < 10 || sourceCodeInfo.lineCount > top10Files.last.lineCount) {
        (top10Files :+ sourceCodeInfo).sortBy(_.lineCount).reverse.take(10)
      } else {

trait CodebaseAnalyzer extends CodebaseAnalyzerInterface {
  this: DirectoryScanner with SourceCodeAnalyzer with AnalyzeHistoryRepository=>

  override def analyze(path: Path, knownFileTypes: Set[String], ignoreFolders: Set[String]): Option[CodebaseInfo] = {
    val files = BenchmarkUtil.record("scan folders") {
      scan(path, knownFileTypes, ignoreFolders)
    if (files.isEmpty) {
    } else {
      val sourceCodeInfos: Seq[SourceCodeInfo] = BenchmarkUtil.record("processing each file") {
      BenchmarkUtil.record("make last result ##") {
        val codebaseInfo = sourceCodeInfos.foldLeft(CodebaseInfo.empty)(_ + _)
        record(path, codebaseInfo)

  protected def processSourceFiles(files: Seq[Path]): Seq[SourceCodeInfo]
