scala.language.existentials Scala Example

Source File: BlockedRDD.scala From hail with MIT License

5 votes

package is.hail.sparkextras

import is.hail.utils._
import org.apache.spark.rdd.RDD
import org.apache.spark.{Dependency, NarrowDependency, Partition, TaskContext}

import scala.language.existentials
import scala.reflect.ClassTag

case class BlockedRDDPartition(@transient rdd: RDD[_],
  index: Int,
  first: Int,
  last: Int) extends Partition {
  require(first <= last)

  val parentPartitions: Array[Partition] = range.map(rdd.partitions).toArray

  def range: Range = first to last
}

class BlockedRDD[T](@transient var prev: RDD[T],
  @transient val partFirst: Array[Int],
  @transient val partLast: Array[Int]
)(implicit tct: ClassTag[T]) extends RDD[T](prev.sparkContext, Nil) {
  assert(partFirst.length == partLast.length)

  override def getPartitions: Array[Partition] = {
    Array.tabulate[Partition](partFirst.length)(i =>
      BlockedRDDPartition(prev, i, partFirst(i), partLast(i)))
  }

  override def compute(split: Partition, context: TaskContext): Iterator[T] = {
    val parent = dependencies.head.rdd.asInstanceOf[RDD[T]]
    split.asInstanceOf[BlockedRDDPartition].parentPartitions.iterator.flatMap(p =>
      parent.iterator(p, context))
  }

  override def getDependencies: Seq[Dependency[_]] = {
    FastSeq(new NarrowDependency(prev) {
      def getParents(id: Int): Seq[Int] =
        partitions(id).asInstanceOf[BlockedRDDPartition].range
    })
  }

  override def clearDependencies() {
    super.clearDependencies()
    prev = null
  }

  override def getPreferredLocations(partition: Partition): Seq[String] = {
    val prevPartitions = prev.partitions
    val range = partition.asInstanceOf[BlockedRDDPartition].range

    val locationAvail = range.flatMap(i =>
      prev.preferredLocations(prevPartitions(i)))
      .groupBy(identity)
      .mapValues(_.length)

    if (locationAvail.isEmpty)
      return FastSeq.empty[String]

    val m = locationAvail.values.max
    locationAvail.filter(_._2 == m)
      .keys
      .toFastSeq
  }
}

Source File: FPTreeSuite.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.mllib.fpm

import scala.language.existentials

import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.util.MLlibTestSparkContext

class FPTreeSuite extends SparkFunSuite with MLlibTestSparkContext {

  test("add transaction") {//增加转换
    val tree = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("b"))

    assert(tree.root.children.size == 2)
    assert(tree.root.children.contains("a"))
    assert(tree.root.children("a").item.equals("a"))
    assert(tree.root.children("a").count == 2)
    assert(tree.root.children.contains("b"))
    assert(tree.root.children("b").item.equals("b"))
    assert(tree.root.children("b").count == 1)
    var child = tree.root.children("a")
    assert(child.children.size == 1)
    assert(child.children.contains("b"))
    assert(child.children("b").item.equals("b"))
    assert(child.children("b").count == 2)
    child = child.children("b")
    assert(child.children.size == 2)
    assert(child.children.contains("c"))
    assert(child.children.contains("y"))
    assert(child.children("c").item.equals("c"))
    assert(child.children("y").item.equals("y"))
    assert(child.children("c").count == 1)
    assert(child.children("y").count == 1)
  }

  test("merge tree") {//合并树
    val tree1 = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("b"))

    val tree2 = new FPTree[String]
      .add(Seq("a", "b"))
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "c", "d"))
      .add(Seq("a", "x"))
      .add(Seq("a", "x", "y"))
      .add(Seq("c", "n"))
      .add(Seq("c", "m"))

    val tree3 = tree1.merge(tree2)

    assert(tree3.root.children.size == 3)
    assert(tree3.root.children("a").count == 7)
    assert(tree3.root.children("b").count == 1)
    assert(tree3.root.children("c").count == 2)
    val child1 = tree3.root.children("a")
    assert(child1.children.size == 2)
    assert(child1.children("b").count == 5)
    assert(child1.children("x").count == 2)
    val child2 = child1.children("b")
    assert(child2.children.size == 2)
    assert(child2.children("y").count == 1)
    assert(child2.children("c").count == 3)
    val child3 = child2.children("c")
    assert(child3.children.size == 1)
    assert(child3.children("d").count == 1)
    val child4 = child1.children("x")
    assert(child4.children.size == 1)
    assert(child4.children("y").count == 1)
    val child5 = tree3.root.children("c")
    assert(child5.children.size == 2)
    assert(child5.children("n").count == 1)
    assert(child5.children("m").count == 1)
  }

  test("extract freq itemsets") {//频繁项集的提取物
    val tree = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("a", "b"))
      .add(Seq("a"))
      .add(Seq("b"))
      .add(Seq("b", "n"))

    val freqItemsets = tree.extract(3L).map { case (items, count) =>
      (items.toSet, count)
    }.toSet
    val expected = Set(
      (Set("a"), 4L),
      (Set("b"), 5L),
      (Set("a", "b"), 3L))
    assert(freqItemsets === expected)
  }
}

Source File: OutputDataStream.scala From affinity with Apache License 2.0

5 votes

package io.amient.affinity.core.util

import akka.util.Timeout
import io.amient.affinity.core.actor.TransactionCoordinator
import io.amient.affinity.core.serde.AbstractSerde
import io.amient.affinity.core.storage.{LogStorage, LogStorageConf, Record}

import scala.concurrent.duration._
import scala.concurrent.{ExecutionContext, Future}
import scala.language.{existentials, postfixOps}


object OutputDataStream {

  class TransactionCoordinatorNoop extends TransactionCoordinator {
    override def _begin(): Future[Unit] = Future.successful(())

    override def _commit(): Future[Unit] = Future.successful(())

    override def _abort(): Future[Unit] = Future.successful(())

    override def append(topic: String, key: Array[Byte], value: Array[Byte], timestamp: Option[Long], partition: Option[Int]): Future[_ <: Comparable[_]] = {
      Future.successful(0L)
    }
  }

  //create OutputDataStream without transactional support
  def apply[K, V](keySerde: AbstractSerde[_ >: K], valSerde: AbstractSerde[_ >: V], conf: LogStorageConf): OutputDataStream[K, V] = {
    new OutputDataStream[K, V](new TransactionCoordinatorNoop, keySerde, valSerde, conf)
  }

}

class OutputDataStream[K, V] private[affinity](txn: TransactionCoordinator, keySerde: AbstractSerde[_ >: K], valSerde: AbstractSerde[_ >: V], conf: LogStorageConf) {

  lazy val storage = LogStorage.newInstanceEnsureExists(conf)

  lazy private val topic: String = storage.getTopic()

  implicit val timeout = Timeout(1 minute) //FIXME

  def append(record: Record[K, V]): Future[_ <: Comparable[_]] = {
    if (txn.inTransaction()) {
      txn.append(topic, keySerde.toBytes(record.key), valSerde.toBytes(record.value), Option(record.timestamp), None)
    } else {
      val binaryRecord = new Record(keySerde.toBytes(record.key), valSerde.toBytes(record.value), record.timestamp)
      val jf = storage.append(binaryRecord)
      Future(jf.get)(ExecutionContext.Implicits.global)
    }
  }

  def delete(key: K): Future[_ <: Comparable[_]] = {
    if (txn.inTransaction()) {
      txn.append(topic, keySerde.toBytes(key), null, None, None)
    } else {
      val jf = storage.delete(keySerde.toBytes(key))
      Future(jf.get)(ExecutionContext.Implicits.global)
    }
  }

  def flush(): Unit = storage.flush()

  def close(): Unit = {
    try flush() finally try storage.close() finally {
      keySerde.close()
      valSerde.close()
    }
  }
}

Source File: EvalConfig.scala From aerosolve with Apache License 2.0

5 votes

package com.airbnb.common.ml.strategy.config

import scala.language.existentials

import com.typesafe.config.Config

import com.airbnb.common.ml.strategy.data.TrainingData
import com.airbnb.common.ml.util.ScalaLogging


case class EvalConfig(
    trainingConfig: TrainingConfig,
    evalDataQuery: String,
    holdoutDataQuery: String
)

object DirectQueryEvalConfig extends ScalaLogging {

  def loadConfig[T](
      config: Config
  ): EvalConfig = {
    val evalDataQuery = config.getString("eval_data_query")
    val holdoutDataQuery = config.getString("holdout_data_query")

    logger.info(s"Eval Data Query: $evalDataQuery")

    EvalConfig(
      TrainingConfig.loadConfig(config),
      evalDataQuery,
      holdoutDataQuery)
  }
}

Source File: ShuffleMapTask.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.nio.ByteBuffer

import scala.language.existentials

import org.apache.spark._
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import org.apache.spark.shuffle.ShuffleWriter


  def this(partitionId: Int) {
    this(0, null, new Partition { override def index: Int = 0 }, null)
  }

  @transient private val preferredLocs: Seq[TaskLocation] = {
    if (locs == null) Nil else locs.toSet.toSeq
  }

  override def runTask(context: TaskContext): MapStatus = {
    // Deserialize the RDD using the broadcast variable.
    val deserializeStartTime = System.currentTimeMillis()
    val ser = SparkEnv.get.closureSerializer.newInstance()
    val (rdd, dep) = ser.deserialize[(RDD[_], ShuffleDependency[_, _, _])](
      ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader)
    _executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime

    metrics = Some(context.taskMetrics)
    var writer: ShuffleWriter[Any, Any] = null
    try {
      val manager = SparkEnv.get.shuffleManager
      writer = manager.getWriter[Any, Any](dep.shuffleHandle, partitionId, context)
      writer.write(rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]])
      return writer.stop(success = true).get
    } catch {
      case e: Exception =>
        try {
          if (writer != null) {
            writer.stop(success = false)
          }
        } catch {
          case e: Exception =>
            log.debug("Could not stop writer", e)
        }
        throw e
    }
  }

  override def preferredLocations: Seq[TaskLocation] = preferredLocs

  override def toString: String = "ShuffleMapTask(%d, %d)".format(stageId, partitionId)
}

Source File: FPTreeSuite.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.mllib.fpm

import scala.language.existentials

import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.util.MLlibTestSparkContext

class FPTreeSuite extends SparkFunSuite with MLlibTestSparkContext {

  test("add transaction") {
    val tree = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("b"))

    assert(tree.root.children.size == 2)
    assert(tree.root.children.contains("a"))
    assert(tree.root.children("a").item.equals("a"))
    assert(tree.root.children("a").count == 2)
    assert(tree.root.children.contains("b"))
    assert(tree.root.children("b").item.equals("b"))
    assert(tree.root.children("b").count == 1)
    var child = tree.root.children("a")
    assert(child.children.size == 1)
    assert(child.children.contains("b"))
    assert(child.children("b").item.equals("b"))
    assert(child.children("b").count == 2)
    child = child.children("b")
    assert(child.children.size == 2)
    assert(child.children.contains("c"))
    assert(child.children.contains("y"))
    assert(child.children("c").item.equals("c"))
    assert(child.children("y").item.equals("y"))
    assert(child.children("c").count == 1)
    assert(child.children("y").count == 1)
  }

  test("merge tree") {
    val tree1 = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("b"))

    val tree2 = new FPTree[String]
      .add(Seq("a", "b"))
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "c", "d"))
      .add(Seq("a", "x"))
      .add(Seq("a", "x", "y"))
      .add(Seq("c", "n"))
      .add(Seq("c", "m"))

    val tree3 = tree1.merge(tree2)

    assert(tree3.root.children.size == 3)
    assert(tree3.root.children("a").count == 7)
    assert(tree3.root.children("b").count == 1)
    assert(tree3.root.children("c").count == 2)
    val child1 = tree3.root.children("a")
    assert(child1.children.size == 2)
    assert(child1.children("b").count == 5)
    assert(child1.children("x").count == 2)
    val child2 = child1.children("b")
    assert(child2.children.size == 2)
    assert(child2.children("y").count == 1)
    assert(child2.children("c").count == 3)
    val child3 = child2.children("c")
    assert(child3.children.size == 1)
    assert(child3.children("d").count == 1)
    val child4 = child1.children("x")
    assert(child4.children.size == 1)
    assert(child4.children("y").count == 1)
    val child5 = tree3.root.children("c")
    assert(child5.children.size == 2)
    assert(child5.children("n").count == 1)
    assert(child5.children("m").count == 1)
  }

  test("extract freq itemsets") {
    val tree = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("a", "b"))
      .add(Seq("a"))
      .add(Seq("b"))
      .add(Seq("b", "n"))

    val freqItemsets = tree.extract(3L).map { case (items, count) =>
      (items.toSet, count)
    }.toSet
    val expected = Set(
      (Set("a"), 4L),
      (Set("b"), 5L),
      (Set("a", "b"), 3L))
    assert(freqItemsets === expected)
  }
}

Source File: NettyBlockRpcServer.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.network.netty

import java.nio.ByteBuffer

import scala.collection.JavaConverters._
import scala.language.existentials
import scala.reflect.ClassTag

import org.apache.spark.internal.Logging
import org.apache.spark.network.BlockDataManager
import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
import org.apache.spark.network.client.{RpcResponseCallback, TransportClient}
import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager}
import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock}
import org.apache.spark.serializer.Serializer
import org.apache.spark.storage.{BlockId, StorageLevel}


class NettyBlockRpcServer(
    appId: String,
    serializer: Serializer,
    blockManager: BlockDataManager)
  extends RpcHandler with Logging {

  private val streamManager = new OneForOneStreamManager()

  override def receive(
      client: TransportClient,
      rpcMessage: ByteBuffer,
      responseContext: RpcResponseCallback): Unit = {
    val message = BlockTransferMessage.Decoder.fromByteBuffer(rpcMessage)
    logTrace(s"Received request: $message")

    message match {
      case openBlocks: OpenBlocks =>
        val blocks: Seq[ManagedBuffer] =
          openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData)
        val streamId = streamManager.registerStream(appId, blocks.iterator.asJava)
        logTrace(s"Registered streamId $streamId with ${blocks.size} buffers")
        responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteBuffer)

      case uploadBlock: UploadBlock =>
        // StorageLevel and ClassTag are serialized as bytes using our JavaSerializer.
        val (level: StorageLevel, classTag: ClassTag[_]) = {
          serializer
            .newInstance()
            .deserialize(ByteBuffer.wrap(uploadBlock.metadata))
            .asInstanceOf[(StorageLevel, ClassTag[_])]
        }
        val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData))
        val blockId = BlockId(uploadBlock.blockId)
        blockManager.putBlockData(blockId, data, level, classTag)
        responseContext.onSuccess(ByteBuffer.allocate(0))
    }
  }

  override def getStreamManager(): StreamManager = streamManager
}

Source File: FPTreeSuite.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.mllib.fpm

import scala.language.existentials

import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.util.MLlibTestSparkContext

class FPTreeSuite extends SparkFunSuite with MLlibTestSparkContext {

  test("add transaction") {
    val tree = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("b"))

    assert(tree.root.children.size == 2)
    assert(tree.root.children.contains("a"))
    assert(tree.root.children("a").item.equals("a"))
    assert(tree.root.children("a").count == 2)
    assert(tree.root.children.contains("b"))
    assert(tree.root.children("b").item.equals("b"))
    assert(tree.root.children("b").count == 1)
    var child = tree.root.children("a")
    assert(child.children.size == 1)
    assert(child.children.contains("b"))
    assert(child.children("b").item.equals("b"))
    assert(child.children("b").count == 2)
    child = child.children("b")
    assert(child.children.size == 2)
    assert(child.children.contains("c"))
    assert(child.children.contains("y"))
    assert(child.children("c").item.equals("c"))
    assert(child.children("y").item.equals("y"))
    assert(child.children("c").count == 1)
    assert(child.children("y").count == 1)
  }

  test("merge tree") {
    val tree1 = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("b"))

    val tree2 = new FPTree[String]
      .add(Seq("a", "b"))
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "c", "d"))
      .add(Seq("a", "x"))
      .add(Seq("a", "x", "y"))
      .add(Seq("c", "n"))
      .add(Seq("c", "m"))

    val tree3 = tree1.merge(tree2)

    assert(tree3.root.children.size == 3)
    assert(tree3.root.children("a").count == 7)
    assert(tree3.root.children("b").count == 1)
    assert(tree3.root.children("c").count == 2)
    val child1 = tree3.root.children("a")
    assert(child1.children.size == 2)
    assert(child1.children("b").count == 5)
    assert(child1.children("x").count == 2)
    val child2 = child1.children("b")
    assert(child2.children.size == 2)
    assert(child2.children("y").count == 1)
    assert(child2.children("c").count == 3)
    val child3 = child2.children("c")
    assert(child3.children.size == 1)
    assert(child3.children("d").count == 1)
    val child4 = child1.children("x")
    assert(child4.children.size == 1)
    assert(child4.children("y").count == 1)
    val child5 = tree3.root.children("c")
    assert(child5.children.size == 2)
    assert(child5.children("n").count == 1)
    assert(child5.children("m").count == 1)
  }

  test("extract freq itemsets") {
    val tree = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("a", "b"))
      .add(Seq("a"))
      .add(Seq("b"))
      .add(Seq("b", "n"))

    val freqItemsets = tree.extract(3L).map { case (items, count) =>
      (items.toSet, count)
    }.toSet
    val expected = Set(
      (Set("a"), 4L),
      (Set("b"), 5L),
      (Set("a", "b"), 3L))
    assert(freqItemsets === expected)
  }
}

Source File: Casts.scala From hail with MIT License

5 votes

package is.hail.expr.ir

import is.hail.asm4s._
import is.hail.types._
import is.hail.types.virtual._

import scala.language.existentials

object Casts {
  private val casts: Map[(Type, Type), (Code[T] => Code[_]) forSome {type T}] = Map(
    (TInt32, TInt32) -> ((x: Code[Int]) => x),
    (TInt32, TInt64) -> ((x: Code[Int]) => x.toL),
    (TInt32, TFloat32) -> ((x: Code[Int]) => x.toF),
    (TInt32, TFloat64) -> ((x: Code[Int]) => x.toD),
    (TInt64, TInt32) -> ((x: Code[Long]) => x.toI),
    (TInt64, TInt64) -> ((x: Code[Long]) => x),
    (TInt64, TFloat32) -> ((x: Code[Long]) => x.toF),
    (TInt64, TFloat64) -> ((x: Code[Long]) => x.toD),
    (TFloat32, TInt32) -> ((x: Code[Float]) => x.toI),
    (TFloat32, TInt64) -> ((x: Code[Float]) => x.toL),
    (TFloat32, TFloat32) -> ((x: Code[Float]) => x),
    (TFloat32, TFloat64) -> ((x: Code[Float]) => x.toD),
    (TFloat64, TInt32) -> ((x: Code[Double]) => x.toI),
    (TFloat64, TInt64) -> ((x: Code[Double]) => x.toL),
    (TFloat64, TFloat32) -> ((x: Code[Double]) => x.toF),
    (TFloat64, TFloat64) -> ((x: Code[Double]) => x),
    (TInt32, TCall) -> ((x: Code[Int]) => x))

  def get(from: Type, to: Type): Code[_] => Code[_] =
    casts(from -> to).asInstanceOf[Code[_] => Code[_]]

  def valid(from: Type, to: Type): Boolean =
    casts.contains(from -> to)
}

Source File: BinarySearch.scala From hail with MIT License

5 votes

package is.hail.expr.ir

import is.hail.annotations.{CodeOrdering, Region}
import is.hail.asm4s._
import is.hail.types.physical._
import is.hail.utils.FastIndexedSeq

import scala.language.existentials

class BinarySearch[C](mb: EmitMethodBuilder[C], typ: PContainer, eltType: PType, keyOnly: Boolean) {

  val elt: PType = typ.elementType
  val ti: TypeInfo[_] = typeToTypeInfo(elt)

  val (compare: CodeOrdering.F[Int], equiv: CodeOrdering.F[Boolean], findElt: EmitMethodBuilder[C], t: PType) = if (keyOnly) {
    val ttype = elt match {
      case t: PBaseStruct =>
        require(t.size == 2)
        t
      case t: PInterval => t.representation.asInstanceOf[PStruct]
    }
    val kt = ttype.types(0)
    val findMB = mb.genEmitMethod("findElt", FastIndexedSeq[ParamType](typeInfo[Long], typeInfo[Boolean], typeToTypeInfo(kt)), typeInfo[Int])
    val mk2l = findMB.newLocal[Boolean]()
    val mk2l1 = mb.newLocal[Boolean]()

    val comp: CodeOrdering.F[Int] = {
      case ((mk1: Code[Boolean], k1: Code[_]), (m2: Code[Boolean], v2: Code[Long] @unchecked)) =>
        Code.memoize(v2, "bs_comp_v2") { v2 =>
          val mk2 = Code(mk2l := m2 || ttype.isFieldMissing(v2, 0), mk2l)
          val k2 = mk2l.mux(defaultValue(kt), Region.loadIRIntermediate(kt)(ttype.fieldOffset(v2, 0)))
          findMB.getCodeOrdering(eltType, kt, CodeOrdering.Compare())((mk1, k1), (mk2, k2))
        }
    }
    val ceq: CodeOrdering.F[Boolean] = {
      case ((mk1: Code[Boolean], k1: Code[_]), (m2: Code[Boolean], v2: Code[Long] @unchecked)) =>
        Code.memoize(v2, "bs_comp_v2") { v2 =>
          val mk2 = Code(mk2l1 := m2 || ttype.isFieldMissing(v2, 0), mk2l1)
          val k2 = mk2l1.mux(defaultValue(kt), Region.loadIRIntermediate(kt)(ttype.fieldOffset(v2, 0)))
          mb.getCodeOrdering(eltType, kt, CodeOrdering.Equiv())((mk1, k1), (mk2, k2))
        }
    }
    (comp, ceq, findMB, kt)
  } else
    (mb.getCodeOrdering(eltType, elt, CodeOrdering.Compare()),
      mb.getCodeOrdering(eltType, elt, CodeOrdering.Equiv()),
      mb.genEmitMethod("findElt", FastIndexedSeq[ParamType](typeInfo[Long], typeInfo[Boolean], elt.ti), typeInfo[Int]), elt)

  private[this] val array = findElt.getCodeParam[Long](1)
  private[this] val m = findElt.getCodeParam[Boolean](2)
  private[this] val e = findElt.getCodeParam(3)(t.ti)
  private[this] val len = findElt.newLocal[Int]()
  private[this] val i = findElt.newLocal[Int]()
  private[this] val low = findElt.newLocal[Int]()
  private[this] val high = findElt.newLocal[Int]()

  def cmp(i: Code[Int]): Code[Int] =
    Code.memoize(i, "binsearch_cmp_i") { i =>
      compare((m, e),
        (typ.isElementMissing(array, i),
          Region.loadIRIntermediate(elt)(typ.elementOffset(array, len, i))))
    }

  // Returns smallest i, 0 <= i < n, for which a(i) >= key, or returns n if a(i) < key for all i
  findElt.emit(Code(
    len := typ.loadLength(array),
    low := 0,
    high := len,
    Code.whileLoop(low < high,
      i := (low + high) / 2,
      (cmp(i) <= 0).mux(
        high := i,
        low := i + 1)),
    low))

  // check missingness of v before calling
  def getClosestIndex(array: Code[Long], m: Code[Boolean], v: Code[_]): Code[Int] = {
    findElt.invokeCode[Int](array, m, v)
  }
}

Source File: PrunedScanSuite.scala From spark1.52 with Apache License 2.0

5 votes

package org.apache.spark.sql.sources

import scala.language.existentials

import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
import org.apache.spark.sql.test.SharedSQLContext
import org.apache.spark.sql.types._
//PrunedScan 可以指定列,其他的列数据源可以不用返回
class PrunedScanSource extends RelationProvider {//提供关系
  override def createRelation(
      sqlContext: SQLContext,
      parameters: Map[String, String]): BaseRelation = {
    SimplePrunedScan(parameters("from").toInt, parameters("to").toInt)(sqlContext)
  }
}

case class SimplePrunedScan(from: Int, to: Int)(@transient val sqlContext: SQLContext)
  extends BaseRelation
  with PrunedScan {

  override def schema: StructType =
    StructType(//StructType代表一张表,StructField代表一个字段
      StructField("a", IntegerType, nullable = false) ::
      StructField("b", IntegerType, nullable = false) :: Nil)

  override def buildScan(requiredColumns: Array[String]): RDD[Row] = {
    val rowBuilders = requiredColumns.map {
      case "a" => (i: Int) => Seq(i)
      case "b" => (i: Int) => {
        //println(">>>>>>>"+i * 2)
        Seq(i * 2)
      }
    }
    //parallelize 分区数
    sqlContext.sparkContext.parallelize(from to to).map(i =>
      Row.fromSeq(rowBuilders.map(_(i)).reduceOption(_ ++ _).getOrElse(Seq.empty)))
  }
}

class PrunedScanSuite extends DataSourceTest with SharedSQLContext {
  protected override lazy val sql = caseInsensitiveContext.sql _

  override def beforeAll(): Unit = {
    super.beforeAll()
    sql(
      """
        |CREATE TEMPORARY TABLE oneToTenPruned
        |USING org.apache.spark.sql.sources.PrunedScanSource
        |OPTIONS (
        |  from '1',
        |  to '10'
        |)
      """.stripMargin)
     
  }

  def testPruning(sqlString: String, expectedColumns: String*): Unit = {
    test(s"Columns output ${expectedColumns.mkString(",")}: $sqlString") {
      val queryExecution = sql(sqlString).queryExecution
      val rawPlan = queryExecution.executedPlan.collect {
        case p: execution.PhysicalRDD => p
      } match {
        case Seq(p) => p
        case _ => fail(s"More than one PhysicalRDD found\n$queryExecution")
      }
      val rawColumns = rawPlan.output.map(_.name)
      val rawOutput = rawPlan.execute().first()

      if (rawColumns != expectedColumns) {
        fail(
          s"Wrong column names. Got $rawColumns, Expected $expectedColumns\n" +
          s"Filters pushed: ${FiltersPushed.list.mkString(",")}\n" +
            queryExecution)
      }

      if (rawOutput.numFields != expectedColumns.size) {
        fail(s"Wrong output row. Got $rawOutput\n$queryExecution")
      }
    }
  }

}

Source File: SprayUtilities.scala From mmlspark with MIT License

5 votes

// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.nbtest

import spray.json.{JsArray, JsObject, JsValue, JsonFormat}

import scala.language.{existentials, implicitConversions}

abstract class SprayOp

case class IndexOp(item: Int) extends SprayOp

case class FieldOp(value: String) extends SprayOp

class SprayUtility(val json: JsValue) {

  private def parseQuery(q: String): List[SprayOp] = {
    q.split("." (0)).flatMap { t =>
      if (t.contains("]") & t.contains("]")) {
        t.split("][".toCharArray).filter(_.length > 0).toSeq match {
          case Seq(index) => Seq(IndexOp(index.toInt))
          case Seq(field, index) => Seq(FieldOp(field), IndexOp(index.toInt))
        }
      } else if (!t.contains("]") & !t.contains("]")) {
        Seq(FieldOp(t)).asInstanceOf[List[SprayOp]]
      } else {
        throw new IllegalArgumentException(s"Cannot parse query: $q")
      }
    }.toList
  }

  private def selectInternal[T](json: JsValue, ops: List[SprayOp])(implicit format: JsonFormat[T]): T = {
    ops match {
      case Nil => json.convertTo[T]
      case IndexOp(i) :: tail =>
        selectInternal[T](json.asInstanceOf[JsArray].elements(i), tail)
      case FieldOp(f) :: tail =>
        selectInternal[T](json.asInstanceOf[JsObject].fields(f), tail)
      case _ => throw new MatchError("This code should be unreachable")
    }
  }

  def select[T](query: String)(implicit format: JsonFormat[T]): T = {
    selectInternal[T](json, parseQuery(query))
  }
}

object SprayImplicits {
  implicit def sprayUtilityConverter(s: JsValue): SprayUtility = new SprayUtility(s)

  implicit def sprayUtilityConversion(s: SprayUtility): JsValue = s.json
}

Source File: NotebookTests.scala From mmlspark with MIT License

5 votes

// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.nbtest
//TODO temp hack because ij picks up on it test classes by mistake

import java.util.concurrent.TimeUnit

import com.microsoft.ml.spark.core.test.base.TestBase
import com.microsoft.ml.nbtest.DatabricksUtilities._

import scala.concurrent.Await
import scala.concurrent.duration.Duration
import scala.language.existentials


class NotebookTests extends TestBase {

  test("Databricks Notebooks") {
    val clusterId = createClusterInPool(ClusterName, PoolId)
    try {
      println("Checking if cluster is active")
      tryWithRetries(Seq.fill(60*15)(1000).toArray){() =>
        assert(isClusterActive(clusterId))}
      println("Installing libraries")
      installLibraries(clusterId)
      tryWithRetries(Seq.fill(60*3)(1000).toArray){() =>
        assert(isClusterActive(clusterId))}
      println(s"Creating folder $Folder")
      workspaceMkDir(Folder)
      println(s"Submitting jobs")
      val jobIds = NotebookFiles.map(uploadAndSubmitNotebook(clusterId, _))
      println(s"Submitted ${jobIds.length} for execution: ${jobIds.toList}")
      try {
        val monitors = jobIds.map((runId: Int) => monitorJob(runId, TimeoutInMillis, logLevel = 2))
        println(s"Monitoring Jobs...")
        val failures = monitors
          .map(Await.ready(_, Duration(TimeoutInMillis.toLong, TimeUnit.MILLISECONDS)).value.get)
          .filter(_.isFailure)
        assert(failures.isEmpty)
      } catch {
        case t: Throwable =>
          jobIds.foreach { jid =>
            println(s"Cancelling job $jid")
            cancelRun(jid)
          }
          throw t
      }
    } finally {
      deleteCluster(clusterId)
    }
  }

  ignore("list running jobs for convenievce") {
    val obj = databricksGet("jobs/runs/list?active_only=true&limit=1000")
    println(obj)
  }

}

Source File: ScannerSpec.scala From better-files with MIT License

5 votes

package better.files

import Dsl._

import scala.language.existentials

class ScannerSpec extends CommonSpec {
  def t1 = File.newTemporaryFile()

  "splitter" should "split" in {
    val csvSplitter      = StringSplitter.on(',')
    def split(s: String) = csvSplitter.split(s).toList

    assert(split(",") === List("", ""))
    assert(split("") === List(""))
    assert(split("Hello World") === List("Hello World"))
    assert(split("Hello,World") === List("Hello", "World"))

    assert(split(",,") === List("", "", ""))
    assert(split(",Hello,World,") === List("", "Hello", "World", ""))
    assert(split(",Hello,World") === List("", "Hello", "World"))
    assert(split("Hello,World,") === List("Hello", "World", ""))
  }

  "scanner" should "parse files" in {
    val data = t1 << s"""
    | Hello World
    | 1 2 3
    | Ok 23 football
    """.stripMargin
    data.scanner() foreach { scanner =>
      assert(scanner.lineNumber() == 0)
      assert(scanner.next[String] == "Hello")
      assert(scanner.lineNumber() == 2)
      assert(scanner.next[String] == "World")
      assert(scanner.next[Int] == 1)
      assert(scanner.next[Int] == 2)
      assert(scanner.lineNumber() == 3)
      assert(scanner.next[Int] == 3)
      assert(scanner.nextLine() == " Ok 23 football")
      assert(!scanner.hasNext)
      a[NoSuchElementException] should be thrownBy scanner.next()
      a[NoSuchElementException] should be thrownBy scanner.nextLine()
      assert(!scanner.hasNext)
    }
    data.tokens().toSeq shouldEqual data.newScanner().toSeq
  }

  it should "parse longs/booleans" in {
    val data = for {
      scanner <- Scanner("10 false").autoClosed
    } yield scanner.next[(Long, Boolean)]
    data.get() shouldBe ((10L, false))
  }

  it should "parse custom parsers" in {
    val file = t1 < """
      |Garfield
      |Woofer
    """.stripMargin

    sealed trait Animal
    case class Dog(name: String) extends Animal
    case class Cat(name: String) extends Animal

    implicit val animalParser: Scannable[Animal] = Scannable { scanner =>
      val name = scanner.next[String]
      if (name == "Garfield") Cat(name) else Dog(name)
    }
    file.scanner() foreach { scanner =>
      Seq.fill(2)(scanner.next[Animal]) should contain theSameElementsInOrderAs Seq(Cat("Garfield"), Dog("Woofer"))
    }
  }

  it should "parse empty tokens" in {
    val scanner = Scanner("hello||world", StringSplitter.on('|'))
    List.fill(3)(scanner.next[Option[String]]) shouldEqual List(Some("hello"), None, Some("world"))
  }
}

Source File: LabelsSelectize.scala From ProductWebUI with Apache License 2.0

5 votes

package synereo.client.components


import shared.models.Label
import synereo.client.services.SYNEREOCircuit

import scala.language.existentials
import japgolly.scalajs.react._
import japgolly.scalajs.react.vdom.prefix_<^._
import org.querki.jquery._
import org.scalajs.dom._
import synereo.client.facades.SynereoSelectizeFacade

import scala.language.existentials
import scala.scalajs.js

)(
        <.option(^.value := "")("Select"),
        //          props.proxy().render(searchesRootModel => searchesRootModel.se)
        for (label <- SYNEREOCircuit.zoom(_.searches.searchesModel).value) yield {
          <.option(^.value := label.text, ^.key := label.uid)(s"#${label.text}")
        })

    }
  }

  val component = ReactComponentB[Props]("LabelsSelectize")
    .initialState(State())
    .renderBackend[Backend]
    .componentDidMount(scope => scope.backend.mounted(scope.props))
    .build

  def apply(props: Props) = component(props)
}

Source File: ConnectionsLabelsSelectize.scala From ProductWebUI with Apache License 2.0

5 votes

package synereo.client.components

import japgolly.scalajs.react._
import japgolly.scalajs.react.vdom.prefix_<^._
import org.querki.jquery._
import org.scalajs.dom._
import shared.dtos.Connection
import synereo.client.facades.SynereoSelectizeFacade
import synereo.client.services.SYNEREOCircuit

import scala.language.existentials
import scala.scalajs.js


//scalastyle:off
object ConnectionsLabelsSelectize {
  def getCnxnsAndLabelsFromSelectize(selectizeInputId: String): (Seq[Connection], Seq[String]) = {
    var selectedConnections = Seq[Connection]()
    var selectedLabels = Seq[String]()
    val selector: js.Object = s"#${selectizeInputId} > .selectize-control> .selectize-input > div"

    $(selector).each((y: Element) => {
      val dataVal = $(y).attr("data-value").toString
      try {
        val cnxn = upickle.default.read[Connection](dataVal)
        selectedConnections :+= cnxn
      } catch {
        case e: Exception =>
          selectedLabels :+= dataVal
      }
    })
    (selectedConnections, selectedLabels)
  }

  def filterLabelStrings(value: Seq[String], character: String): Seq[String] = {
    value
      .filter(e => e.charAt(0) == "#" && e.count(_ == character) == 1)
      .map(_.replace(character, "")).distinct

  }

  case class Props(parentIdentifier: String)

  case class State(maxItems: Int = 7,
                   maxCharLimit: Int = 16,
                   allowNewItemsCreation: Boolean = false)

  case class Backend(t: BackendScope[Props, State]) {
    def initializeTagsInput(): Unit = {
      val state = t.state.runNow()
      val parentIdentifier = t.props.runNow().parentIdentifier
      SynereoSelectizeFacade.initilizeSelectize(s"${parentIdentifier}-selectize", state.maxItems, state.maxCharLimit, state.allowNewItemsCreation)
    }

    def mounted(props: Props): Callback = Callback {
      initializeTagsInput()
    }


    def render(props: Props, state: State) = {
      <.select(^.className := "select-state", ^.id := s"${props.parentIdentifier}-selectize",
        ^.className := "demo-default", ^.placeholder := "Search e.g. @Synereo or #fun")(
        <.option(^.value := "")("Select"),
        for (connection <- SYNEREOCircuit.zoom(_.connections).value.connectionsResponse) yield <.option(^.value := upickle.default.write(connection.connection),
          ^.key := connection.connection.target)(s"@${connection.name}"),
        for (label <- SYNEREOCircuit.zoom(_.searches).value.searchesModel) yield
          <.option(^.value := label.text, ^.key := label.uid)(s"#${label.text}")
      )

    }
  }

  val component = ReactComponentB[Props]("SearchesConnectionList")
    .initialState(State())
    .renderBackend[Backend]
    .componentDidMount(scope => scope.backend.mounted(scope.props))
    .build

  def apply(props: Props) = component(props)
}

Source File: UserPersona.scala From ProductWebUI with Apache License 2.0

5 votes

package synereo.client.components

import diode.react.ModelProxy
import japgolly.scalajs.react.{ReactComponentB, _}
import japgolly.scalajs.react.vdom.prefix_<^._
import shared.models.UserModel
import synereo.client.css.{NewMessageCSS, SynereoCommanStylesCSS}
import scala.language.existentials
import scalacss.ScalaCssReact._


//scalastyle:off
object UserPersona {

  def getPersona(): String = {
    ""
  }

  case class Props(proxy: ModelProxy[UserModel])

  case class Backend(t: BackendScope[Props, _]) {


    def mounted(props: Props): Callback = Callback {
      //      println("UserPersona is : " + props.proxy.value)
    }

    def render(props: Props) = {
      val model = props.proxy.value
      <.div(^.className := "row", NewMessageCSS.Style.PersonaContainerDiv)(
        <.div(^.className := "col-md-2 col-sm-2 col-xs-2", SynereoCommanStylesCSS.Style.paddingLeftZero)(
          <.img(^.alt := "userImage", ^.src := model.imgSrc, ^.className := "img-responsive", NewMessageCSS.Style.userImage)
        ),
        <.div(^.className := "col-md-10", SynereoCommanStylesCSS.Style.paddingLeftZero, SynereoCommanStylesCSS.Style.paddingRightZero)(
          <.div(
            <.button(^.className := "btn", ^.`type` := "button", NewMessageCSS.Style.changePersonaBtn)("Change posting persona", <.span(^.className := "caret", ^.color.blue)),
            <.div(^.className := "pull-right hidden-xs")(MIcon.apply("more_vert", "24"))
          )
        ),
        <.div(NewMessageCSS.Style.userNameOnDilogue)(
          <.div(model.name, <.span(Icon.chevronRight), "public", <.span(Icon.share))
        )
      )
    }
  }

  val component = ReactComponentB[Props]("UserPersona")
    .renderBackend[Backend]
    .componentDidMount(scope => scope.backend.mounted(scope.props))
    .build

  def apply(props: Props) = component(props)

}

Source File: LabelsSelectize.scala From ProductWebUI with Apache License 2.0

5 votes

package client.components

import client.utils.LabelsUtils
import diode.react.ModelProxy
import japgolly.scalajs.react._
import japgolly.scalajs.react.vdom.prefix_<^._
import org.denigma.selectize._
import org.querki.jquery._
import org.scalajs.dom._
import client.rootmodel.SearchesRootModel
import shared.models.Label
import client.sessionitems.SessionItems

import scala.collection.mutable.ListBuffer
import scala.language.existentials
import scala.scalajs.js

object LabelsSelectize {

  def getLabelsTxtFromSelectize(selectizeInputId: String): Seq[String] = {
    var selectedLabels = Seq[String]()
    val selector: js.Object = s"#${selectizeInputId} > .selectize-control> .selectize-input > div"
    if ($(selector).length > 0) {
      $(selector).each((y: Element) => selectedLabels :+= $(y).attr("data-value").toString)
    } else {
      selectedLabels = Nil
    }

    selectedLabels
  }

  def getLabelsFromSelectizeInput(selectizeInputId: String): Seq[Label] = {
    var selectedLabels = Seq[Label]()
    val selector: js.Object = s"#${selectizeInputId} > .selectize-control> .selectize-input > div"

    $(selector).each((y: Element) => selectedLabels :+= upickle.default.read[Label]($(y).attr("data-value").toString))
    selectedLabels
  }

  var getSelectedValue = new ListBuffer[String]()

  
  case class Props(proxy: ModelProxy[SearchesRootModel], parentIdentifier: String)

  case class Backend(t: BackendScope[Props, _]) {
    def initializeTagsInput(parentIdentifier: String): Unit = {
      val selectState: js.Object = s"#$parentIdentifier > .selectize-control"
      //      println(s"element lenth: ${$(selectState).length}")
      if ($(selectState).length < 1) {
        val selectizeInput: js.Object = "#labelsSelectize"
        //        $(selectizeInput).selectize(SelectizeConfig.maxOptions(2)).destroy()
        //        println(s"test : ${$(selectizeInput)}")
        $(selectizeInput).selectize(SelectizeConfig
          .create(true)
          .maxItems(3)
          .plugins("remove_button"))
      }

    }

    def getSelectedValues = Callback {
      val selectState: js.Object = "#selectize"
      val getSelectedValue = $(selectState).find("option").text()
      //scalastyle:off
      //      println(getSelectedValue)
    }

    def mounted(props: Props): Callback = Callback {
      //      println("searches model is = " + props.proxy().searchesModel)
      initializeTagsInput(props.parentIdentifier)
    }

    def render(props: Props) = {
      val parentDiv: js.Object = s"#${props.parentIdentifier}"
      //      println(s"parent div length ${$(parentDiv).length}")
      if ($(parentDiv).length == 0) {
        <.select(^.className := "select-state", ^.id := "labelsSelectize", ^.className := "demo-default", ^.placeholder := "select #label(s)", ^.onChange --> getSelectedValues)(
          <.option(^.value := "")("Select"),
          //          props.proxy().render(searchesRootModel => searchesRootModel.se)
          for (label <- props.proxy().searchesModel
            .filter(e => e.parentUid == "self")
            .filterNot(e => LabelsUtils.getSystemLabels().contains(e.text))) yield {
            <.option(^.value := upickle.default.write(label), ^.key := label.uid)(label.text)
          }
        )
      } else {
        <.div()
      }
    }
  }

  val component = ReactComponentB[Props]("SearchesConnectionList")
    .renderBackend[Backend]
    .componentDidMount(scope => scope.backend.mounted(scope.props))
    .build

  def apply(props: Props) = component(props)
}

Source File: Dashboard.scala From scalajs-spa-tutorial with Apache License 2.0

5 votes

package spatutorial.client.modules

import diode.data.Pot
import diode.react._
import japgolly.scalajs.react._
import japgolly.scalajs.react.extra.router.RouterCtl
import japgolly.scalajs.react.vdom.html_<^._
import spatutorial.client.SPAMain.{Loc, TodoLoc}
import spatutorial.client.components._

import scala.util.Random
import scala.language.existentials

object Dashboard {

  case class Props(router: RouterCtl[Loc], proxy: ModelProxy[Pot[String]])

  case class State(motdWrapper: ReactConnectProxy[Pot[String]])

  // create dummy data for the chart
  val cp = Chart.ChartProps(
    "Test chart",
    Chart.BarChart,
    ChartData(
      Random.alphanumeric.map(_.toUpper.toString).distinct.take(10),
      Seq(ChartDataset(Iterator.continually(Random.nextDouble() * 10).take(10).toSeq, "Data1"))
    )
  )

  // create the React component for Dashboard
  private val component = ScalaComponent.builder[Props]("Dashboard")
    // create and store the connect proxy in state for later use
    .initialStateFromProps(props => State(props.proxy.connect(m => m)))
    .renderPS { (_, props, state) =>
      <.div(
        // header, MessageOfTheDay and chart components
        <.h2("Dashboard"),
        state.motdWrapper(Motd(_)),
        Chart(cp),
        // create a link to the To Do view
        <.div(props.router.link(TodoLoc)("Check your todos!"))
      )
    }
    .build

  def apply(router: RouterCtl[Loc], proxy: ModelProxy[Pot[String]]) = component(Props(router, proxy))
}

Source File: ShuffleMapTask.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.nio.ByteBuffer

import scala.language.existentials

import org.apache.spark._
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import org.apache.spark.shuffle.ShuffleWriter


  def this(partitionId: Int) {
    this(0, 0, null, new Partition { override def index: Int = 0 }, null, null)
  }

  @transient private val preferredLocs: Seq[TaskLocation] = {
    if (locs == null) Nil else locs.toSet.toSeq
  }

  override def runTask(context: TaskContext): MapStatus = {
    // Deserialize the RDD using the broadcast variable.
    val deserializeStartTime = System.currentTimeMillis()
    val ser = SparkEnv.get.closureSerializer.newInstance()
    val (rdd, dep) = ser.deserialize[(RDD[_], ShuffleDependency[_, _, _])](
      ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader)
    _executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime

    metrics = Some(context.taskMetrics)
    var writer: ShuffleWriter[Any, Any] = null
    try {
      val manager = SparkEnv.get.shuffleManager
      writer = manager.getWriter[Any, Any](dep.shuffleHandle, partitionId, context)
      writer.write(rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]])
      writer.stop(success = true).get
    } catch {
      case e: Exception =>
        try {
          if (writer != null) {
            writer.stop(success = false)
          }
        } catch {
          case e: Exception =>
            log.debug("Could not stop writer", e)
        }
        throw e
    }
  }

  override def preferredLocations: Seq[TaskLocation] = preferredLocs

  override def toString: String = "ShuffleMapTask(%d, %d)".format(stageId, partitionId)
}

Source File: AnyFormatSpec.scala From scalapb-json4s with Apache License 2.0

5 votes

package scalapb.json4s

import com.google.protobuf.any.{Any => PBAny}
import jsontest.anytests.{AnyTest, ManyAnyTest}
import org.json4s.jackson.JsonMethods._

import scala.language.existentials
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.must.Matchers

class AnyFormatSpec extends AnyFlatSpec with Matchers with JavaAssertions {
  val RawExample = AnyTest("test")

  val RawJson = parse(s"""{"field":"test"}""")

  val AnyExample = PBAny.pack(RawExample)

  val AnyJson = parse(
    s"""{"@type":"type.googleapis.com/jsontest.AnyTest","field":"test"}"""
  )

  val CustomPrefixAny = PBAny.pack(RawExample, "example.com/")

  val CustomPrefixJson = parse(
    s"""{"@type":"example.com/jsontest.AnyTest","field":"test"}"""
  )

  val ManyExample = ManyAnyTest(
    Seq(
      PBAny.pack(AnyTest("1")),
      PBAny.pack(AnyTest("2"))
    )
  )

  val ManyPackedJson = parse(
    """
      |{
      |  "@type": "type.googleapis.com/jsontest.ManyAnyTest",
      |  "fields": [
      |    {"@type": "type.googleapis.com/jsontest.AnyTest", "field": "1"},
      |    {"@type": "type.googleapis.com/jsontest.AnyTest", "field": "2"}
      |  ]
      |}
    """.stripMargin
  )

  override def registeredCompanions = Seq(AnyTest, ManyAnyTest)

  // For clarity
  def UnregisteredPrinter = JsonFormat.printer

  def UnregisteredParser = JsonFormat.parser

  "Any" should "fail to serialize if its respective companion is not registered" in {
    an[IllegalStateException] must be thrownBy UnregisteredPrinter.toJson(
      AnyExample
    )
  }

  "Any" should "fail to deserialize if its respective companion is not registered" in {
    a[JsonFormatException] must be thrownBy UnregisteredParser.fromJson[PBAny](
      AnyJson
    )
  }

  "Any" should "serialize correctly if its respective companion is registered" in {
    ScalaJsonPrinter.toJson(AnyExample) must be(AnyJson)
  }

  "Any" should "fail to serialize with a custom URL prefix if specified" in {
    an[IllegalStateException] must be thrownBy ScalaJsonPrinter.toJson(
      CustomPrefixAny
    )
  }

  "Any" should "fail to deserialize for a non-Google-prefixed type URL" in {
    a[JsonFormatException] must be thrownBy ScalaJsonParser.fromJson[PBAny](
      CustomPrefixJson
    )
  }

  "Any" should "deserialize correctly if its respective companion is registered" in {
    ScalaJsonParser.fromJson[PBAny](AnyJson) must be(AnyExample)
  }

  "Any" should "be serialized the same as in Java (and parsed back to original)" in {
    assertJsonIsSameAsJava(AnyExample)
  }

  "Any" should "resolve printers recursively" in {
    val packed = PBAny.pack(ManyExample)
    ScalaJsonPrinter.toJson(packed) must be(ManyPackedJson)
  }

  "Any" should "resolve parsers recursively" in {
    ScalaJsonParser.fromJson[PBAny](ManyPackedJson).unpack[ManyAnyTest] must be(
      ManyExample
    )
  }
}

Source File: AnyFormat.scala From scalapb-json4s with Apache License 2.0

5 votes

package scalapb.json4s

import com.google.protobuf.any.{Any => PBAny}
import org.json4s.JsonAST.{JNothing, JObject, JString, JValue}

import scala.language.existentials

object AnyFormat {
  val anyWriter: (Printer, PBAny) => JValue = {
    case (printer, any) =>
      // Find the companion so it can be used to JSON-serialize the message. Perhaps this can be circumvented by
      // including the original GeneratedMessage with the Any (at least in memory).
      val cmp = printer.typeRegistry
        .findType(any.typeUrl)
        .getOrElse(
          throw new IllegalStateException(
            s"Unknown type ${any.typeUrl} in Any.  Add a TypeRegistry that supports this type to the Printer."
          )
        )

      // Unpack the message...
      val message = any.unpack(cmp)

      // ... and add the @type marker to the resulting JSON
      printer.toJson(message) match {
        case JObject(fields) =>
          JObject(("@type" -> JString(any.typeUrl)) +: fields)
        case value =>
          // Safety net, this shouldn't happen
          throw new IllegalStateException(
            s"Message of type ${any.typeUrl} emitted non-object JSON: $value"
          )
      }
  }

  val anyParser: (Parser, JValue) => PBAny = {
    case (parser, obj @ JObject(fields)) =>
      obj \ "@type" match {
        case JString(typeUrl) =>
          val cmp = parser.typeRegistry
            .findType(typeUrl)
            .getOrElse(
              throw new JsonFormatException(
                s"Unknown type ${typeUrl} in Any.  Add a TypeRegistry that supports this type to the Parser."
              )
            )
          val message = parser.fromJson(obj, true)(cmp)
          PBAny(typeUrl = typeUrl, value = message.toByteString)

        case JNothing =>
          throw new JsonFormatException(s"Missing type url when parsing $obj")

        case unknown =>
          throw new JsonFormatException(
            s"Expected string @type field, got $unknown"
          )
      }

    case (_, unknown) =>
      throw new JsonFormatException(s"Expected an object, got $unknown")
  }
}

Source File: ModelSerializabilityTestBase.scala From aloha with MIT License

5 votes

package com.eharmony.aloha

import scala.language.existentials
import com.eharmony.aloha
import com.eharmony.aloha.models.{Model, SubmodelBase}
import org.junit.Assert._
import org.junit.Test
import org.reflections.Reflections

import scala.collection.JavaConversions.asScalaSet
import scala.util.Try
import java.lang.reflect.{Method, Modifier}

import com.eharmony.aloha.util.Logging


abstract class ModelSerializabilityTestBase(pkgs: Seq[String], outFilters: Seq[String])
extends Logging {

  def this() = this(pkgs = Seq(aloha.pkgName), Seq.empty)

  @Test def testSerialization(): Unit = {
    val ref = new Reflections(pkgs:_*)
    val submodels = ref.getSubTypesOf(classOf[SubmodelBase[_, _, _, _]]).toSeq
    val models = ref.getSubTypesOf(classOf[Model[_, _]]).toSeq

    val modelClasses =
      (models ++ submodels).
        filterNot { _.isInterface }.
        filterNot { c =>
          val name = c.getName
          outFilters.exists(name.matches)
        }

    if (modelClasses.isEmpty) {
      fail(s"No models found to test for Serializability in packages: ${pkgs.mkString(",")}")
    }
    else {
      debug {
        modelClasses
          .map(_.getCanonicalName)
          .mkString("Models tested for Serializability:\n\t", "\n\t", "")
      }
    }

    modelClasses.foreach { c =>
      val m = for {
        testClass  <- getTestClass(c.getCanonicalName)
        testMethod <- getTestMethod(testClass)
        method     <- ensureTestMethodIsTest(testMethod)
      } yield method

      m.left foreach fail
    }
  }

  private[this] implicit class RightMonad[L, R](e: Either[L, R]) {
    def flatMap[R1](f: R => Either[L, R1]) = e.right.flatMap(f)
    def map[R1](f: R => R1) = e.right.map(f)
  }

  private[this] def getTestClass(modelClassName: String) = {
    val testName = modelClassName + "Test"
    Try {
      Class.forName(testName)
    } map {
      Right(_)
    } getOrElse Left("No test class exists for " + modelClassName)
  }

  private[this] def getTestMethod(testClass: Class[_]) = {
    val testMethodName = "testSerialization"
    lazy val msg = s"$testMethodName doesn't exist in ${testClass.getCanonicalName}."
    Try {
      Option(testClass.getMethod(testMethodName))
    } map {
      case Some(m) => Right(m)
      case None => Left(msg)
    } getOrElse Left(msg)
  }

  private[this] def ensureTestMethodIsTest(method: Method) = {
    if (!Modifier.isPublic(method.getModifiers))
      Left(s"testSerialization in ${method.getDeclaringClass.getCanonicalName} is not public")
    if (!method.getDeclaredAnnotations.exists(_.annotationType() == classOf[Test]))
      Left(s"testSerialization in ${method.getDeclaringClass.getCanonicalName} does not have a @org.junit.Test annotation.")
    else if (method.getReturnType != classOf[Void] && method.getReturnType != classOf[Unit])
      Left(s"testSerialization in ${method.getDeclaringClass.getCanonicalName} is not a void function. It returns: ${method.getReturnType}")
    else Right(method)
  }
}

Source File: fields.scala From aloha with MIT License

5 votes

package com.eharmony.aloha.semantics.compiled.plugin.schemabased.schema

import com.eharmony.aloha.reflect.RefInfo

import scala.language.existentials

// RECORD, ENUM, ARRAY, MAP, UNION, FIXED, STRING, BYTES, INT, LONG, FLOAT, DOUBLE, BOOLEAN, NULL;

sealed trait FieldDesc {
  def name: String
  def index: Int
  def nullable: Boolean
}

// TODO: Add additional types as necessary.

case class RecordField(name: String, index: Int, schema: Schema, refInfo: RefInfo[_], nullable: Boolean) extends FieldDesc
case class EnumField(name: String, index: Int, nullable: Boolean) extends FieldDesc
case class ListField(name: String, index: Int, elementType: FieldDesc, nullable: Boolean) extends FieldDesc
case class StringField(name: String, index: Int, nullable: Boolean) extends FieldDesc
case class IntField(name: String, index: Int, nullable: Boolean) extends FieldDesc
case class LongField(name: String, index: Int, nullable: Boolean) extends FieldDesc
case class FloatField(name: String, index: Int, nullable: Boolean) extends FieldDesc
case class DoubleField(name: String, index: Int, nullable: Boolean) extends FieldDesc
case class BooleanField(name: String, index: Int, nullable: Boolean) extends FieldDesc

Source File: HadoopUtils.scala From spark-images with Apache License 2.0

5 votes

package org.apache.spark.image

import java.nio.file.Paths

import org.apache.commons.io.FilenameUtils

import scala.sys.process._
import org.apache.hadoop.conf.{Configuration, Configured}
import org.apache.hadoop.fs.{Path, PathFilter}
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.spark.sql.SparkSession
import scala.language.existentials
import scala.util.Random

object RecursiveFlag {

  
  def setPathFilter(value: Option[Class[_]], sampleRatio: Option[Double] = None, spark: SparkSession)
  : Option[Class[_]] = {
    val flagName = FileInputFormat.PATHFILTER_CLASS
    val hadoopConf = spark.sparkContext.hadoopConfiguration
    val old = Option(hadoopConf.getClass(flagName, null))
    if (sampleRatio.isDefined) {
      hadoopConf.setDouble(SamplePathFilter.ratioParam, sampleRatio.get)
    } else {
      hadoopConf.unset(SamplePathFilter.ratioParam)
      None
    }

    value match {
      case Some(v) => hadoopConf.setClass(flagName, v, classOf[PathFilter])
      case None => hadoopConf.unset(flagName)
    }
    old
  }
}

Source File: JobUtils.scala From fusion-data with Apache License 2.0

5 votes

package mass.job.util

import java.io.File
import java.nio.charset.Charset
import java.nio.file.{ Files, Path, StandardCopyOption }
import java.util.zip.ZipFile

import com.typesafe.scalalogging.StrictLogging
import helloscala.common.Configuration
import helloscala.common.util.{ DigestUtils, Utils }
import mass.common.util.FileUtils
import mass.core.job.JobConstants
import mass.job.JobSettings
import mass.message.job._
import mass.model.job.{ JobItem, JobTrigger }

import scala.concurrent.{ ExecutionContext, Future }


object JobUtils extends StrictLogging {
  case class JobZipInternal private (configs: Vector[JobCreateReq], entries: Vector[Path])

  def uploadJob(jobSettings: JobSettings, req: JobUploadJobReq)(implicit ec: ExecutionContext): Future[JobZip] =
    Future {
      val sha256 = DigestUtils.sha256HexFromPath(req.file)
      val dest = jobSettings.jobSavedDir.resolve(sha256.take(2)).resolve(sha256)

      val jobZipInternal = parseJobZip(req.file, req.charset, dest.resolve(JobConstants.DIST)) match {
        case Right(v) => v
        case Left(e)  => throw e
      }

      val zipPath = dest.resolve(req.fileName)
      Files.move(req.file, zipPath, StandardCopyOption.REPLACE_EXISTING)
      JobZip(zipPath, jobZipInternal.configs, jobZipInternal.entries)
    }

  @inline def parseJobZip(file: Path, charset: Charset, dest: Path): Either[Throwable, JobZipInternal] =
    parseJobZip(file.toFile, charset, dest)

  def parseJobZip(file: File, charset: Charset, dest: Path): Either[Throwable, JobZipInternal] = Utils.either {
    import scala.jdk.CollectionConverters._
    import scala.language.existentials

    val zip = new ZipFile(file, charset)
    try {
      val (confEntries, fileEntries) = zip
        .entries()
        .asScala
        .filterNot(entry => entry.isDirectory)
        .span(entry => entry.getName.endsWith(JobConstants.ENDS_SUFFIX) && !entry.isDirectory)
      val configs =
        confEntries.map(confEntry =>
          parseJobConf(FileUtils.getString(zip.getInputStream(confEntry), charset, "\n")) match {
            case Right(config) => config
            case Left(e)       => throw e
          })

      val buf = Array.ofDim[Byte](1024)
      val entryPaths = fileEntries.map { entry =>
        val entryName = entry.getName
        val savePath = dest.resolve(entryName)
        if (!Files.isDirectory(savePath.getParent)) {
          Files.createDirectories(savePath.getParent)
        }
        FileUtils.write(zip.getInputStream(entry), Files.newOutputStream(savePath), buf) // zip entry存磁盘
        savePath
      }

      JobZipInternal(configs.toVector, entryPaths.toVector)
    } finally {
      if (zip ne null) zip.close()
    }
  }

  def parseJobConf(content: String): Either[Throwable, JobCreateReq] = Utils.either {
    val conf = Configuration.parseString(content)
    val jobItem = JobItem(conf.getConfiguration("item"))
    val jobTrigger = JobTrigger(conf.getConfiguration("trigger"))
    JobCreateReq(conf.get[Option[String]]("key"), jobItem, jobTrigger)
  }
}

case class JobZip(zipPath: Path, configs: Vector[JobCreateReq], entries: Vector[Path])

Source File: ScheduledTaskManager.scala From incubator-toree with Apache License 2.0

5 votes

package org.apache.toree.utils

import scala.language.existentials
import java.util.concurrent._
import java.util.UUID
import com.google.common.util.concurrent.ThreadFactoryBuilder
import ScheduledTaskManager._
import scala.util.Try


  def stop() = {
    _taskMap.clear()
    _scheduler.shutdown()
  }
}

object ScheduledTaskManager {
  val DefaultMaxThreads = 4
  val DefaultExecutionDelay = 10 // 10 milliseconds
  val DefaultTimeInterval = 100 // 100 milliseconds
}

Source File: TipTestSuite.scala From inox with Apache License 2.0

5 votes

package inox
package tip

import solvers._

import scala.language.existentials

class TipTestSuite extends TestSuite with ResourceUtils {

  override def configurations = Seq(
    Seq(optSelectedSolvers(Set("nativez3")), optCheckModels(true)),
    Seq(optSelectedSolvers(Set("smt-z3")),   optCheckModels(true)),
    Seq(optSelectedSolvers(Set("smt-cvc4")), optCheckModels(true)),
    Seq(optSelectedSolvers(Set("smt-z3")),   optCheckModels(true), optAssumeChecked(true))
  )

  override protected def optionsString(options: Options): String = {
    "solver=" + options.findOptionOrDefault(optSelectedSolvers).head +
    (if (options.findOptionOrDefault(optAssumeChecked)) " assumechecked" else "")
  }

  private def ignoreSAT(ctx: Context, file: java.io.File): FilterStatus = 
    ctx.options.findOptionOrDefault(optSelectedSolvers).headOption match {
      case Some(solver) => (solver, file.getName) match {
        // test containing list of booleans, so CVC4 will crash on this
        // See http://church.cims.nyu.edu/bugzilla3/show_bug.cgi?id=500
        case ("smt-cvc4", "List-fold.tip") => Skip
        // Z3 and CVC4 binaries are exceedingly slow on these benchmarks
        case ("smt-z3" | "smt-cvc4", "BinarySearchTreeQuant.scala-2.tip") => Ignore
        case ("smt-z3" | "smt-cvc4", "ForallAssoc.scala-0.tip") => Ignore
        // this test only holds when assumeChecked=false
        case (_, "LambdaEquality2.scala-1.tip") if ctx.options.findOptionOrDefault(optAssumeChecked) => Skip
        case _ => Test
      }

      case _ => Test
    }

  private def ignoreUNSAT(ctx: Context, file: java.io.File): FilterStatus = 
    ctx.options.findOptionOrDefault(optSelectedSolvers).headOption match {
      case Some(solver) => (solver, file.getName) match {
        // Z3 binary will predictably segfault on certain permutations of this problem
        case ("smt-z3", "MergeSort2.scala-1.tip") => Ignore
        // use non-linear operators that aren't supported in CVC4
        case ("smt-cvc4", "Instantiation.scala-0.tip") => Skip
        case ("smt-cvc4", "LetsInForall.tip") => Skip
        case ("smt-cvc4", "Weird.scala-0.tip") => Skip
        // this test only holds when assumeChecked=true
        case (_, "QuickSortFilter.scala-1.tip") if !ctx.options.findOptionOrDefault(optAssumeChecked) => Skip
        case _ => Test
      }
      case _ => Test
    }

  private def ignoreUNKNOWN(ctx: Context, file: java.io.File): FilterStatus = 
    ctx.options.findOptionOrDefault(optSelectedSolvers).headOption match {
      case Some(solver) => (solver, file.getName) match {
        // non-linear operations are too slow on smt-z3
        case ("smt-z3", "Soundness2.scala-0.tip") => Ignore
        // use non-linear operators that aren't supported in CVC4
        case ("smt-cvc4", "Soundness.scala-0.tip") => Skip
        case ("smt-cvc4", "Soundness2.scala-0.tip") => Skip
        case _ => Test
      }
      case _ => Test
    }

  for (file <- resourceFiles("regression/tip/SAT", _.endsWith(".tip"))) {
    test(s"SAT - ${file.getName}", ignoreSAT(_, file)) { implicit ctx =>
      for ((program, expr) <- Parser(file).parseScript) {
        assert(SimpleSolverAPI(program.getSolver).solveSAT(expr).isSAT)
      }
    }
  }

  for (file <- resourceFiles("regression/tip/UNSAT", _.endsWith(".tip"))) {
    test(s"UNSAT - ${file.getName}", ignoreUNSAT(_, file)) { implicit ctx =>
      for ((program, expr) <- Parser(file).parseScript) {
        assert(SimpleSolverAPI(program.getSolver).solveSAT(expr).isUNSAT)
      }
    }
  }

  for (file <- resourceFiles("regression/tip/UNKNOWN", _.endsWith(".tip"))) {
    test(s"UNKNOWN - ${file.getName}", ignoreUNKNOWN(_, file)) { ctx0 =>
      implicit val ctx = ctx0.copy(options = ctx0.options + optCheckModels(false))
      for ((program, expr) <- Parser(file).parseScript) {
        val api = SimpleSolverAPI(program.getSolver)
        val res = api.solveSAT(expr)
        assert(!res.isSAT && !res.isUNSAT)
        assert(ctx.reporter.errorCount > 0)
      }
    }
  }
}

Source File: string_formats_yaml.base.scala From play-swagger with MIT License

5 votes

package string_formats.yaml

import scala.language.existentials

import play.api.mvc.{Action, Controller, Results}
import play.api.http._
import Results.Status

import de.zalando.play.controllers.{PlayBodyParsing, ParsingError, ResultWrapper}
import PlayBodyParsing._
import scala.util._
import de.zalando.play.controllers.Base64String
import Base64String._
import de.zalando.play.controllers.BinaryString
import BinaryString._
import org.joda.time.DateTime
import java.util.UUID
import org.joda.time.LocalDate

import de.zalando.play.controllers.PlayPathBindables





trait String_formatsYamlBase extends Controller with PlayBodyParsing {
    sealed trait GetType[T] extends ResultWrapper[T]
    
    case object Get200 extends EmptyReturn(200)
    

    private type getActionRequestType       = (GetDate_time, GetDate, GetBase64, GetUuid, BinaryString)
    private type getActionType[T]            = getActionRequestType => GetType[T] forSome { type T }

        private def getParser(acceptedTypes: Seq[String], maxLength: Int = parse.DefaultMaxTextLength) = {
            def bodyMimeType: Option[MediaType] => String = mediaType => {
                val requestType = mediaType.toSeq.map {
                    case m: MediaRange => m
                    case MediaType(a,b,c) => new MediaRange(a,b,c,None,Nil)
                }
                negotiateContent(requestType, acceptedTypes).orElse(acceptedTypes.headOption).getOrElse("application/json")
            }
            
            import de.zalando.play.controllers.WrappedBodyParsers
            
            val customParsers = WrappedBodyParsers.anyParser[BinaryString]
            anyParser[BinaryString](bodyMimeType, customParsers, "Invalid BinaryString", maxLength)
        }

    val getActionConstructor  = Action
    def getAction[T] = (f: getActionType[T]) => (date_time: GetDate_time, date: GetDate, base64: GetBase64, uuid: GetUuid) => getActionConstructor(getParser(Seq[String]())) { request =>
        val providedTypes = Seq[String]("application/json", "application/yaml")

        negotiateContent(request.acceptedTypes, providedTypes).map { getResponseMimeType =>

            val petId = request.body
            
            

                val result =
                        new GetValidator(date_time, date, base64, uuid, petId).errors match {
                            case e if e.isEmpty => processValidgetRequest(f)((date_time, date, base64, uuid, petId))(getResponseMimeType)
                            case l =>
                                implicit val marshaller: Writeable[Seq[ParsingError]] = parsingErrors2Writable(getResponseMimeType)
                                BadRequest(l)
                        }
                result
            
        }.getOrElse(Status(406)("The server doesn't support any of the requested mime types"))
    }

    private def processValidgetRequest[T](f: getActionType[T])(request: getActionRequestType)(mimeType: String) = {
      f(request).toResult(mimeType).getOrElse {
        Results.NotAcceptable
      }
    }
    abstract class EmptyReturn(override val statusCode: Int = 204) extends ResultWrapper[Results.EmptyContent]  with GetType[Results.EmptyContent] { val result = Results.EmptyContent(); val writer = (x: String) => Some(new DefaultWriteables{}.writeableOf_EmptyContent); override def toResult(mimeType: String): Option[play.api.mvc.Result] = Some(Results.NoContent) }
    case object NotImplementedYet extends ResultWrapper[Results.EmptyContent]  with GetType[Results.EmptyContent] { val statusCode = 501; val result = Results.EmptyContent(); val writer = (x: String) => Some(new DefaultWriteables{}.writeableOf_EmptyContent); override def toResult(mimeType: String): Option[play.api.mvc.Result] = Some(Results.NotImplemented) }
}

Source File: FPTreeSuite.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.mllib.fpm

import scala.language.existentials

import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.util.MLlibTestSparkContext

class FPTreeSuite extends SparkFunSuite with MLlibTestSparkContext {

  test("add transaction") {
    val tree = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("b"))

    assert(tree.root.children.size == 2)
    assert(tree.root.children.contains("a"))
    assert(tree.root.children("a").item.equals("a"))
    assert(tree.root.children("a").count == 2)
    assert(tree.root.children.contains("b"))
    assert(tree.root.children("b").item.equals("b"))
    assert(tree.root.children("b").count == 1)
    var child = tree.root.children("a")
    assert(child.children.size == 1)
    assert(child.children.contains("b"))
    assert(child.children("b").item.equals("b"))
    assert(child.children("b").count == 2)
    child = child.children("b")
    assert(child.children.size == 2)
    assert(child.children.contains("c"))
    assert(child.children.contains("y"))
    assert(child.children("c").item.equals("c"))
    assert(child.children("y").item.equals("y"))
    assert(child.children("c").count == 1)
    assert(child.children("y").count == 1)
  }

  test("merge tree") {
    val tree1 = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("b"))

    val tree2 = new FPTree[String]
      .add(Seq("a", "b"))
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "c", "d"))
      .add(Seq("a", "x"))
      .add(Seq("a", "x", "y"))
      .add(Seq("c", "n"))
      .add(Seq("c", "m"))

    val tree3 = tree1.merge(tree2)

    assert(tree3.root.children.size == 3)
    assert(tree3.root.children("a").count == 7)
    assert(tree3.root.children("b").count == 1)
    assert(tree3.root.children("c").count == 2)
    val child1 = tree3.root.children("a")
    assert(child1.children.size == 2)
    assert(child1.children("b").count == 5)
    assert(child1.children("x").count == 2)
    val child2 = child1.children("b")
    assert(child2.children.size == 2)
    assert(child2.children("y").count == 1)
    assert(child2.children("c").count == 3)
    val child3 = child2.children("c")
    assert(child3.children.size == 1)
    assert(child3.children("d").count == 1)
    val child4 = child1.children("x")
    assert(child4.children.size == 1)
    assert(child4.children("y").count == 1)
    val child5 = tree3.root.children("c")
    assert(child5.children.size == 2)
    assert(child5.children("n").count == 1)
    assert(child5.children("m").count == 1)
  }

  test("extract freq itemsets") {
    val tree = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("a", "b"))
      .add(Seq("a"))
      .add(Seq("b"))
      .add(Seq("b", "n"))

    val freqItemsets = tree.extract(3L).map { case (items, count) =>
      (items.toSet, count)
    }.toSet
    val expected = Set(
      (Set("a"), 4L),
      (Set("b"), 5L),
      (Set("a", "b"), 3L))
    assert(freqItemsets === expected)
  }
}

Source File: DataRow.scala From flink-elasticsearch-source-connector with Apache License 2.0

5 votes

package com.mnubo.flink.streaming.connectors

import org.apache.commons.lang3.ClassUtils
import org.apache.flink.api.common.typeinfo.TypeInformation
import org.apache.flink.api.java.typeutils.TypeExtractor

import scala.language.existentials

case class Value(v: Any, name: String, givenTypeInfo: Option[TypeInformation[_]] = None) {
  require(v != null || givenTypeInfo.isDefined, "You must pass a TypeInformation for null values")

  val typeInfo = givenTypeInfo match {
    case Some(ti) => ti
    case None => TypeExtractor.getForObject(v)
  }

  require(isAssignable(v, typeInfo.getTypeClass), s"data element '$v' is not compatible with class ${typeInfo.getTypeClass.getName}")

  private def isAssignable(value: Any, cl: Class[_]) = {
    if (value == null && classOf[AnyRef].isAssignableFrom(cl))
      true
    else
      ClassUtils.isAssignable(value.getClass, cl)
  }
}

object Value {
  def apply(v: Any, name: String, givenTypeInfo: TypeInformation[_]) = {
    new Value(v, name, Some(givenTypeInfo))
  }
}



class DataRow(private [connectors] val data: Array[Any], private [connectors] val info: DataRowTypeInfo) extends Product with Serializable {
  require(data != null, "data must not be null")
  require(info != null, "info must not be null")
  require(data.length == info.getArity, "data must be of the correct arity")

  def apply[T](i: Int): T =
    data(i).asInstanceOf[T]

  def apply[T](fieldExpression: String): T =
    apply(info.getFieldIndex(fieldExpression))

  override def productElement(n: Int): Any =
    apply[AnyRef](n)

  override def productArity =
    info.getArity

  override def canEqual(that: Any) =
    that.isInstanceOf[DataRow]

  override def equals(that: Any) =
    canEqual(that) && data.sameElements(that.asInstanceOf[DataRow].data) && info.getFieldNames.sameElements(that.asInstanceOf[DataRow].info.getFieldNames)

  override def hashCode = {
    var result = 1

    for (element <- data)
      result = 31 * result + (if (element == null) 0 else element.hashCode)

    result
  }

  override def toString =
    info.getFieldNames
      .zip(data.map(v => if (v == null) "null" else v.toString))
      .map{case (name, value) => s"$name=$value"}
      .mkString("DataRow(", ", ", ")")
}

object DataRow {
  
  def apply(data: Value*): DataRow = {
    require(data != null, "data cannot be null")
    require(!data.contains(null), "data value cannot be null")

    new DataRow(
      data.map(_.v).toArray,
      new DataRowTypeInfo(
        data.map(_.name),
        data.map(_.typeInfo)
      )
    )
  }
}

Source File: RecordTransformer.scala From flink-elasticsearch-source-connector with Apache License 2.0

5 votes

package com.mnubo.flink.streaming.connectors

import org.apache.flink.api.common.operators.Keys.ExpressionKeys._
import org.apache.flink.api.common.typeinfo.TypeInformation

import scala.annotation.tailrec
import scala.language.existentials
import scala.reflect.ClassTag

sealed trait FieldSpecification extends Serializable

case class ExistingField(name: String) extends FieldSpecification

case class NewField(name: String, typeInfo: TypeInformation[_]) extends FieldSpecification

trait RecordTransformer extends Serializable {
  val classTag = ClassTag[DataRow](classOf[DataRow])
  def typeInfo : DataRowTypeInfo
  def transform(dataRow: DataRow, values:Any*) : DataRow
}

class FieldMapperRecordTransformer private[connectors](srcTypeInfo:DataRowTypeInfo, fieldSpecifications: FieldSpecification*) extends RecordTransformer {
  require(srcTypeInfo != null, s"srcTypeInfo must not be null")
  require(fieldSpecifications != null, s"fieldSpecifications must not be null")
  require(fieldSpecifications.nonEmpty, s"fieldSpecifications must not be empty")
  require(!fieldSpecifications.contains(null), s"fieldSpecifications must not contain any nulls")

  override val typeInfo = {
    val (fieldNames, elementTypes) = fieldSpecifications.flatMap {
      case ExistingField(name) if name == SELECT_ALL_CHAR || name == SELECT_ALL_CHAR_SCALA => srcTypeInfo.getFieldNames.zip(srcTypeInfo.getElementTypes)
      case ExistingField(name) => Seq(name -> srcTypeInfo.getFieldType(name))
      case NewField(name, newFieldTypeInfo) => Seq(name -> newFieldTypeInfo)
    }.unzip
    require(fieldNames.length == fieldNames.distinct.length, s"Fields can't have duplicates. Fields were $fieldNames.")
    new DataRowTypeInfo(fieldNames, elementTypes)
  }

  private def newFieldsNames = fieldSpecifications.collect{ case newValue: NewField => newValue.name }

  override def transform(dataRow: DataRow, values:Any*) : DataRow = {
    require(dataRow != null, s"dataRow must not be null")
    require(values != null, s"values must not be null")
    require(newFieldsNames.length == values.length, s"Must specify values for all new fields and only new fields. New fields are '$newFieldsNames'")

    val resultValues = new Array[Any](typeInfo.getArity)
    @tailrec
    def transform(index:Int, remainingSpecs: Seq[FieldSpecification], remainingValues:Seq[Any]) : DataRow = {
      if(remainingSpecs.isEmpty) {
        new DataRow(resultValues, typeInfo)
      } else {
        val currentSpec = remainingSpecs.head
        currentSpec match {
          case ExistingField(name) if name == SELECT_ALL_CHAR || name == SELECT_ALL_CHAR_SCALA =>
            Array.copy(dataRow.data, 0, resultValues, index, dataRow.data.length)
            transform(index + dataRow.data.length, remainingSpecs.tail, remainingValues)
          case ExistingField(name) =>
            resultValues(index) = dataRow(name)
            transform(index + 1, remainingSpecs.tail, remainingValues)
          case NewField(name, _) =>
            resultValues(index) = remainingValues.head
            transform(index + 1, remainingSpecs.tail, remainingValues.tail)
        }
      }
    }
    transform(0, fieldSpecifications, values)
  }
}

object RecordTransformer {
  def mapFields(srcTypeInfo: DataRowTypeInfo, fieldSpecifications: FieldSpecification*) : RecordTransformer = {
    new FieldMapperRecordTransformer(srcTypeInfo, fieldSpecifications:_*)
  }
}

Source File: DAGSchedulerEvent.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.util.Properties

import scala.language.existentials

import org.apache.spark._
import org.apache.spark.rdd.RDD
import org.apache.spark.util.{AccumulatorV2, CallSite}


private[scheduler] case class MapStageSubmitted(
  jobId: Int,
  dependency: ShuffleDependency[_, _, _],
  callSite: CallSite,
  listener: JobListener,
  properties: Properties = null)
  extends DAGSchedulerEvent

private[scheduler] case class StageCancelled(
    stageId: Int,
    reason: Option[String])
  extends DAGSchedulerEvent

private[scheduler] case class JobCancelled(
    jobId: Int,
    reason: Option[String])
  extends DAGSchedulerEvent

private[scheduler] case class JobGroupCancelled(groupId: String) extends DAGSchedulerEvent

private[scheduler] case object AllJobsCancelled extends DAGSchedulerEvent

private[scheduler]
case class BeginEvent(task: Task[_], taskInfo: TaskInfo) extends DAGSchedulerEvent

private[scheduler]
case class GettingResultEvent(taskInfo: TaskInfo) extends DAGSchedulerEvent

private[scheduler] case class CompletionEvent(
    task: Task[_],
    reason: TaskEndReason,
    result: Any,
    accumUpdates: Seq[AccumulatorV2[_, _]],
    taskInfo: TaskInfo)
  extends DAGSchedulerEvent

private[scheduler] case class ExecutorAdded(execId: String, host: String) extends DAGSchedulerEvent

private[scheduler] case class ExecutorLost(execId: String, reason: ExecutorLossReason)
  extends DAGSchedulerEvent

private[scheduler] case class WorkerRemoved(workerId: String, host: String, message: String)
  extends DAGSchedulerEvent

private[scheduler]
case class TaskSetFailed(taskSet: TaskSet, reason: String, exception: Option[Throwable])
  extends DAGSchedulerEvent

private[scheduler] case object ResubmitFailedStages extends DAGSchedulerEvent

private[scheduler]
case class SpeculativeTaskSubmitted(task: Task[_]) extends DAGSchedulerEvent

Source File: NettyBlockRpcServer.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.network.netty

import java.nio.ByteBuffer

import scala.collection.JavaConverters._
import scala.language.existentials
import scala.reflect.ClassTag

import org.apache.spark.internal.Logging
import org.apache.spark.network.BlockDataManager
import org.apache.spark.network.buffer.NioManagedBuffer
import org.apache.spark.network.client.{RpcResponseCallback, TransportClient}
import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager}
import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock}
import org.apache.spark.serializer.Serializer
import org.apache.spark.storage.{BlockId, StorageLevel}


class NettyBlockRpcServer(
    appId: String,
    serializer: Serializer,
    blockManager: BlockDataManager)
  extends RpcHandler with Logging {

  private val streamManager = new OneForOneStreamManager()

  override def receive(
      client: TransportClient,
      rpcMessage: ByteBuffer,
      responseContext: RpcResponseCallback): Unit = {
    val message = BlockTransferMessage.Decoder.fromByteBuffer(rpcMessage)
    logTrace(s"Received request: $message")

    message match {
      case openBlocks: OpenBlocks =>
        val blocksNum = openBlocks.blockIds.length
        val blocks = for (i <- (0 until blocksNum).view)
          yield blockManager.getBlockData(BlockId.apply(openBlocks.blockIds(i)))
        val streamId = streamManager.registerStream(appId, blocks.iterator.asJava)
        logTrace(s"Registered streamId $streamId with $blocksNum buffers")
        responseContext.onSuccess(new StreamHandle(streamId, blocksNum).toByteBuffer)

      case uploadBlock: UploadBlock =>
        // StorageLevel and ClassTag are serialized as bytes using our JavaSerializer.
        val (level: StorageLevel, classTag: ClassTag[_]) = {
          serializer
            .newInstance()
            .deserialize(ByteBuffer.wrap(uploadBlock.metadata))
            .asInstanceOf[(StorageLevel, ClassTag[_])]
        }
        val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData))
        val blockId = BlockId(uploadBlock.blockId)
        blockManager.putBlockData(blockId, data, level, classTag)
        responseContext.onSuccess(ByteBuffer.allocate(0))
    }
  }

  override def getStreamManager(): StreamManager = streamManager
}

Source File: InsertIntoHiveDirCommand.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.hive.execution

import scala.language.existentials

import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.hive.common.FileUtils
import org.apache.hadoop.hive.ql.plan.TableDesc
import org.apache.hadoop.hive.serde.serdeConstants
import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
import org.apache.hadoop.mapred._

import org.apache.spark.SparkException
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.SparkPlan
import org.apache.spark.sql.hive.client.HiveClientImpl


case class InsertIntoHiveDirCommand(
    isLocal: Boolean,
    storage: CatalogStorageFormat,
    query: LogicalPlan,
    overwrite: Boolean,
    outputColumns: Seq[Attribute]) extends SaveAsHiveFile {

  override def run(sparkSession: SparkSession, child: SparkPlan): Seq[Row] = {
    assert(storage.locationUri.nonEmpty)

    val hiveTable = HiveClientImpl.toHiveTable(CatalogTable(
      identifier = TableIdentifier(storage.locationUri.get.toString, Some("default")),
      tableType = org.apache.spark.sql.catalyst.catalog.CatalogTableType.VIEW,
      storage = storage,
      schema = query.schema
    ))
    hiveTable.getMetadata.put(serdeConstants.SERIALIZATION_LIB,
      storage.serde.getOrElse(classOf[LazySimpleSerDe].getName))

    val tableDesc = new TableDesc(
      hiveTable.getInputFormatClass,
      hiveTable.getOutputFormatClass,
      hiveTable.getMetadata
    )

    val hadoopConf = sparkSession.sessionState.newHadoopConf()
    val jobConf = new JobConf(hadoopConf)

    val targetPath = new Path(storage.locationUri.get)
    val writeToPath =
      if (isLocal) {
        val localFileSystem = FileSystem.getLocal(jobConf)
        localFileSystem.makeQualified(targetPath)
      } else {
        val qualifiedPath = FileUtils.makeQualified(targetPath, hadoopConf)
        val dfs = qualifiedPath.getFileSystem(jobConf)
        if (!dfs.exists(qualifiedPath)) {
          dfs.mkdirs(qualifiedPath.getParent)
        }
        qualifiedPath
      }

    val tmpPath = getExternalTmpPath(sparkSession, hadoopConf, writeToPath)
    val fileSinkConf = new org.apache.spark.sql.hive.HiveShim.ShimFileSinkDesc(
      tmpPath.toString, tableDesc, false)

    try {
      saveAsHiveFile(
        sparkSession = sparkSession,
        plan = child,
        hadoopConf = hadoopConf,
        fileSinkConf = fileSinkConf,
        outputLocation = tmpPath.toString,
        allColumns = outputColumns)

      val fs = writeToPath.getFileSystem(hadoopConf)
      if (overwrite && fs.exists(writeToPath)) {
        fs.listStatus(writeToPath).foreach { existFile =>
          if (Option(existFile.getPath) != createdTempDir) fs.delete(existFile.getPath, true)
        }
      }

      fs.listStatus(tmpPath).foreach {
        tmpFile => fs.rename(tmpFile.getPath, writeToPath)
      }
    } catch {
      case e: Throwable =>
        throw new SparkException(
          "Failed inserting overwrite directory " + storage.locationUri.get, e)
    } finally {
      deleteExternalTmpPath(hadoopConf)
    }

    Seq.empty[Row]
  }
}

Source File: FPTreeSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.mllib.fpm

import scala.language.existentials

import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.util.MLlibTestSparkContext

class FPTreeSuite extends SparkFunSuite with MLlibTestSparkContext {

  test("add transaction") {
    val tree = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("b"))

    assert(tree.root.children.size == 2)
    assert(tree.root.children.contains("a"))
    assert(tree.root.children("a").item.equals("a"))
    assert(tree.root.children("a").count == 2)
    assert(tree.root.children.contains("b"))
    assert(tree.root.children("b").item.equals("b"))
    assert(tree.root.children("b").count == 1)
    var child = tree.root.children("a")
    assert(child.children.size == 1)
    assert(child.children.contains("b"))
    assert(child.children("b").item.equals("b"))
    assert(child.children("b").count == 2)
    child = child.children("b")
    assert(child.children.size == 2)
    assert(child.children.contains("c"))
    assert(child.children.contains("y"))
    assert(child.children("c").item.equals("c"))
    assert(child.children("y").item.equals("y"))
    assert(child.children("c").count == 1)
    assert(child.children("y").count == 1)
  }

  test("merge tree") {
    val tree1 = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("b"))

    val tree2 = new FPTree[String]
      .add(Seq("a", "b"))
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "c", "d"))
      .add(Seq("a", "x"))
      .add(Seq("a", "x", "y"))
      .add(Seq("c", "n"))
      .add(Seq("c", "m"))

    val tree3 = tree1.merge(tree2)

    assert(tree3.root.children.size == 3)
    assert(tree3.root.children("a").count == 7)
    assert(tree3.root.children("b").count == 1)
    assert(tree3.root.children("c").count == 2)
    val child1 = tree3.root.children("a")
    assert(child1.children.size == 2)
    assert(child1.children("b").count == 5)
    assert(child1.children("x").count == 2)
    val child2 = child1.children("b")
    assert(child2.children.size == 2)
    assert(child2.children("y").count == 1)
    assert(child2.children("c").count == 3)
    val child3 = child2.children("c")
    assert(child3.children.size == 1)
    assert(child3.children("d").count == 1)
    val child4 = child1.children("x")
    assert(child4.children.size == 1)
    assert(child4.children("y").count == 1)
    val child5 = tree3.root.children("c")
    assert(child5.children.size == 2)
    assert(child5.children("n").count == 1)
    assert(child5.children("m").count == 1)
  }

  test("extract freq itemsets") {
    val tree = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("a", "b"))
      .add(Seq("a"))
      .add(Seq("b"))
      .add(Seq("b", "n"))

    val freqItemsets = tree.extract(3L).map { case (items, count) =>
      (items.toSet, count)
    }.toSet
    val expected = Set(
      (Set("a"), 4L),
      (Set("b"), 5L),
      (Set("a", "b"), 3L))
    assert(freqItemsets === expected)
  }
}

Source File: HadoopUtils.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.ml.image

import scala.language.existentials
import scala.util.Random

import org.apache.commons.io.FilenameUtils
import org.apache.hadoop.conf.{Configuration, Configured}
import org.apache.hadoop.fs.{Path, PathFilter}
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat

import org.apache.spark.sql.SparkSession

private object RecursiveFlag {
  
  def withPathFilter[T](
      sampleRatio: Double,
      spark: SparkSession,
      seed: Long)(f: => T): T = {
    val sampleImages = sampleRatio < 1
    if (sampleImages) {
      val flagName = FileInputFormat.PATHFILTER_CLASS
      val hadoopConf = spark.sparkContext.hadoopConfiguration
      val old = Option(hadoopConf.getClass(flagName, null))
      hadoopConf.setDouble(SamplePathFilter.ratioParam, sampleRatio)
      hadoopConf.setLong(SamplePathFilter.seedParam, seed)
      hadoopConf.setClass(flagName, classOf[SamplePathFilter], classOf[PathFilter])
      try f finally {
        hadoopConf.unset(SamplePathFilter.ratioParam)
        hadoopConf.unset(SamplePathFilter.seedParam)
        old match {
          case Some(v) => hadoopConf.setClass(flagName, v, classOf[PathFilter])
          case None => hadoopConf.unset(flagName)
        }
      }
    } else {
      f
    }
  }
}

Source File: IStep.scala From incubator-s2graph with Apache License 2.0

5 votes

package org.apache.s2graph.core.step

import org.apache.s2graph.core._
import rx.lang.scala.Observable

import scala.language.higherKinds
import scala.language.existentials

trait RxStep[-A, +B] extends (A => Observable[B])

object RxStep {

  case class VertexFetchStep(g: S2GraphLike) extends RxStep[Seq[S2VertexLike], S2VertexLike] {
    override def apply(vertices: Seq[S2VertexLike]): Observable[S2VertexLike] = {
      Observable.from(vertices)
    }
  }

  case class EdgeFetchStep(g: S2GraphLike, qp: QueryParam) extends RxStep[S2VertexLike, S2EdgeLike] {
    override def apply(v: S2VertexLike): Observable[S2EdgeLike] = {
      implicit val ec = g.ec

      val step = org.apache.s2graph.core.Step(Seq(qp))
      val q = Query(Seq(v), steps = Vector(step))

      val f = g.getEdges(q).map { stepResult =>
        val edges = stepResult.edgeWithScores.map(_.edge)
        Observable.from(edges)
      }

      Observable.from(f).flatten
    }
  }

  private def merge[A, B](steps: RxStep[A, B]*): RxStep[A, B] = new RxStep[A, B] {
    override def apply(in: A): Observable[B] =
      steps.map(_.apply(in)).toObservable.flatten
  }

  def toObservable(q: Query)(implicit graph: S2GraphLike): Observable[S2EdgeLike] = {
    val v1: Observable[S2VertexLike] = VertexFetchStep(graph).apply(q.vertices)

    val serialSteps = q.steps.map { step =>
      val parallelSteps = step.queryParams.map(qp => EdgeFetchStep(graph, qp))
      merge(parallelSteps: _*)
    }

    v1.flatMap { v =>
      val initOpt = serialSteps.headOption.map(_.apply(v))

      initOpt.map { init =>
        serialSteps.tail.foldLeft(init) { case (prev, next) =>
          prev.map(_.tgtForVertex).flatMap(next)
        }
      }.getOrElse(Observable.empty)
    }
  }
}

Source File: ArrayBasedMapData.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.util

import java.util.{Map => JavaMap}

class ArrayBasedMapData(val keyArray: ArrayData, val valueArray: ArrayData) extends MapData {
  require(keyArray.numElements() == valueArray.numElements())

  override def numElements(): Int = keyArray.numElements()

  override def copy(): MapData = new ArrayBasedMapData(keyArray.copy(), valueArray.copy())

  override def toString: String = {
    s"keys: $keyArray, values: $valueArray"
  }
}

object ArrayBasedMapData {
  
  def apply(
      iterator: Iterator[(_, _)],
      size: Int,
      keyConverter: (Any) => Any,
      valueConverter: (Any) => Any): ArrayBasedMapData = {

    val keys: Array[Any] = new Array[Any](size)
    val values: Array[Any] = new Array[Any](size)

    var i = 0
    for ((key, value) <- iterator) {
      keys(i) = keyConverter(key)
      values(i) = valueConverter(value)
      i += 1
    }
    ArrayBasedMapData(keys, values)
  }

  def apply(keys: Array[_], values: Array[_]): ArrayBasedMapData = {
    new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values))
  }

  def toScalaMap(map: ArrayBasedMapData): Map[Any, Any] = {
    val keys = map.keyArray.asInstanceOf[GenericArrayData].array
    val values = map.valueArray.asInstanceOf[GenericArrayData].array
    keys.zip(values).toMap
  }

  def toScalaMap(keys: Array[Any], values: Array[Any]): Map[Any, Any] = {
    keys.zip(values).toMap
  }

  def toScalaMap(keys: Seq[Any], values: Seq[Any]): Map[Any, Any] = {
    keys.zip(values).toMap
  }

  def toJavaMap(keys: Array[Any], values: Array[Any]): java.util.Map[Any, Any] = {
    import scala.collection.JavaConverters._
    keys.zip(values).toMap.asJava
  }
}

Source File: VLinearRegressionSuite.scala From spark-vlbfgs with Apache License 2.0

5 votes

package org.apache.spark.ml.regression

import scala.language.existentials
import org.apache.spark.SparkFunSuite
import org.apache.spark.ml.feature.Instance
import org.apache.spark.ml.linalg.{Vector, Vectors}
import org.apache.spark.ml.util.TestingUtils._
import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.sql.DataFrame


class VLinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {

  import testImplicits._
  var datasetWithWeight: DataFrame = _

  override def beforeAll(): Unit = {
    super.beforeAll()

    datasetWithWeight = sc.parallelize(Seq(
      Instance(17.0, 1.0, Vectors.dense(0.0, 5.0).toSparse),
      Instance(19.0, 2.0, Vectors.dense(1.0, 7.0)),
      Instance(23.0, 3.0, Vectors.dense(2.0, 11.0)),
      Instance(29.0, 4.0, Vectors.dense(3.0, 13.0))
    ), 2).toDF()
  }

  test("test on datasetWithWeight") {

    def b2s(b: Boolean): String = {
      if (b) "w/" else "w/o"
    }

    for (fitIntercept <- Seq(false, true)) {
      for (standardization <- Seq(false, true)) {
        for ((reg, elasticNet)<- Seq((0.0, 0.0), (2.3, 0.0), (2.3, 0.5))) {

          println()
          println(s"# test ${b2s(fitIntercept)} intercept, ${b2s(standardization)} standardization, reg=${reg}, elasticNet=${elasticNet}")

          val vtrainer = new VLinearRegression()
            .setColsPerBlock(1)
            .setRowsPerBlock(1)
            .setGeneratingFeatureMatrixBuffer(2)
            .setFitIntercept(fitIntercept)
            .setStandardization(standardization)
            .setRegParam(reg)
            .setWeightCol("weight")
            .setElasticNetParam(elasticNet)
          val vmodel = vtrainer.fit(datasetWithWeight)

          // Note that in ml.LinearRegression, when datasets numInstanse is small
          // solver l-bfgs and solver normal will generate slightly different result when reg not zero
          // because there std calculation result have multiple difference numInstance/(numInstance - 1)
          // here test keep consistent with l-bfgs solver
          val trainer = new LinearRegression()
            .setSolver("l-bfgs") // by default it may use noraml solver so here force set it.
            .setFitIntercept(fitIntercept)
            .setStandardization(standardization)
            .setRegParam(reg)
            .setWeightCol("weight")
            .setElasticNetParam(elasticNet)

          val model = trainer.fit(datasetWithWeight)
          logInfo(s"LinearRegression total iterations: ${model.summary.totalIterations}")

          println(s"VLinearRegression coefficients: ${vmodel.coefficients.toDense}, intercept: ${vmodel.intercept}\n" +
            s"LinearRegression coefficients: ${model.coefficients.toDense}, intercept: ${model.intercept}")

          def filterSmallValue(v: Vector) = {
            Vectors.dense(v.toArray.map(x => if (math.abs(x) < 1e-6) 0.0 else x))
          }
          assert(filterSmallValue(vmodel.coefficients) ~== filterSmallValue(model.coefficients) relTol 1e-3)
          assert(vmodel.intercept ~== model.intercept relTol 1e-3)
        }
      }
    }
  }
}

Source File: VSoftmaxRegressionSuite.scala From spark-vlbfgs with Apache License 2.0

5 votes

package org.apache.spark.ml.classification

import org.apache.spark.SparkFunSuite
import org.apache.spark.ml.feature.Instance
import org.apache.spark.ml.linalg.{SparseMatrix, Vector, Vectors}
import org.apache.spark.ml.util.TestingUtils._
import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.sql.functions._
import org.apache.spark.sql.{DataFrame, Dataset}

import scala.language.existentials


class VSoftmaxRegressionSuite extends SparkFunSuite with MLlibTestSparkContext {

  import testImplicits._

  private val seed = 42
  @transient var multinomialDataset: Dataset[_] = _
  private val eps: Double = 1e-5

  override def beforeAll(): Unit = {
    super.beforeAll()

    multinomialDataset = {
      val nPoints = 50
      val coefficients = Array(
        -0.57997, 0.912083, -0.371077, -0.819866, 2.688191,
        -0.16624, -0.84355, -0.048509, -0.301789, 4.170682)

      val xMean = Array(5.843, 3.057, 3.758, 1.199)
      val xVariance = Array(0.6856, 0.1899, 3.116, 0.581)

      val testData = LogisticRegressionSuite.generateMultinomialLogisticInput(
        coefficients, xMean, xVariance, addIntercept = true, nPoints, seed)

      val df = sc.parallelize(testData, 4).toDF().withColumn("weight", rand(seed))
      df.cache()
      println("softmax test data:")
      df.show(10, false)
      df
    }
  }

  test("test on multinomialDataset") {

    def b2s(b: Boolean): String = {
      if (b) "w/" else "w/o"
    }

    for (standardization <- Seq(false, true)) {
      for ((reg, elasticNet) <- Seq((0.0, 0.0), (2.3, 0.0), (0.3, 0.05), (0.01, 1.0))) {
        println()
        println(s"# test ${b2s(standardization)} standardization, reg=${reg}, elasticNet=${elasticNet}")

        val trainer = new LogisticRegression()
          .setFamily("multinomial")
          .setStandardization(standardization)
          .setWeightCol("weight")
          .setRegParam(reg)
          .setFitIntercept(false)
          .setElasticNetParam(elasticNet)

        val model = trainer.fit(multinomialDataset)

        val vtrainer = new VSoftmaxRegression()
          .setColsPerBlock(2)
          .setRowsPerBlock(5)
          .setColPartitions(2)
          .setRowPartitions(3)
          .setWeightCol("weight")
          .setGeneratingFeatureMatrixBuffer(2)
          .setStandardization(standardization)
          .setRegParam(reg)
          .setElasticNetParam(elasticNet)
        val vmodel = vtrainer.fit(multinomialDataset)

        println(s"VSoftmaxRegression coefficientMatrix:\n" +
          s"${vmodel.coefficientMatrix.asInstanceOf[SparseMatrix].toDense},\n" +
          s"ml.SoftmaxRegression coefficientMatrix:\n" +
          s"${model.coefficientMatrix}\n")

        assert(vmodel.coefficientMatrix ~== model.coefficientMatrix relTol eps)
      }
    }
  }
}

Source File: SortedMapDeserializerModule.scala From mango with Apache License 2.0

5 votes

package com.kakao.shaded.jackson.module.scala.deser

import java.util.AbstractMap
import java.util.Map.Entry

import scala.collection.{mutable, SortedMap}
import scala.collection.immutable.TreeMap

import com.kakao.shaded.jackson.core.JsonParser
import com.kakao.shaded.jackson.databind._
import com.kakao.shaded.jackson.databind.deser.std.{MapDeserializer, ContainerDeserializerBase}
import com.kakao.shaded.jackson.databind.jsontype.TypeDeserializer
import com.kakao.shaded.jackson.databind.`type`.MapLikeType
import com.kakao.shaded.jackson.module.scala.modifiers.MapTypeModifierModule
import deser.{ContextualDeserializer, Deserializers, ValueInstantiator}
import com.kakao.shaded.jackson.module.scala.introspect.OrderingLocator
import scala.language.existentials

private class SortedMapBuilderWrapper[K,V](val builder: mutable.Builder[(K,V), SortedMap[K,V]]) extends AbstractMap[K,V] {
  override def put(k: K, v: V) = { builder += ((k,v)); v }

  // Isn't used by the deserializer
  def entrySet(): java.util.Set[Entry[K, V]] = throw new UnsupportedOperationException
}

private object SortedMapDeserializer {
  def orderingFor = OrderingLocator.locate _

  def builderFor(cls: Class[_], keyCls: JavaType): mutable.Builder[(AnyRef,AnyRef), SortedMap[AnyRef,AnyRef]] =
    if (classOf[TreeMap[_,_]].isAssignableFrom(cls)) TreeMap.newBuilder[AnyRef,AnyRef](orderingFor(keyCls)) else
    SortedMap.newBuilder[AnyRef,AnyRef](orderingFor(keyCls))
}

private class SortedMapDeserializer(
    collectionType: MapLikeType,
    config: DeserializationConfig,
    keyDeser: KeyDeserializer,
    valueDeser: JsonDeserializer[_],
    valueTypeDeser: TypeDeserializer)
  extends ContainerDeserializerBase[SortedMap[_,_]](collectionType)
  with ContextualDeserializer {
  
  private val javaContainerType =
    config.getTypeFactory.constructMapLikeType(classOf[MapBuilderWrapper[_,_]], collectionType.getKeyType, collectionType.getContentType)

  private val instantiator =
    new ValueInstantiator {
      def getValueTypeDesc = collectionType.getRawClass.getCanonicalName
      override def canCreateUsingDefault = true
      override def createUsingDefault(ctx: DeserializationContext) =
        new SortedMapBuilderWrapper[AnyRef,AnyRef](SortedMapDeserializer.builderFor(collectionType.getRawClass, collectionType.getKeyType))
    }

  private val containerDeserializer =
    new MapDeserializer(javaContainerType,instantiator,keyDeser,valueDeser.asInstanceOf[JsonDeserializer[AnyRef]],valueTypeDeser)

  override def getContentType = containerDeserializer.getContentType

  override def getContentDeserializer = containerDeserializer.getContentDeserializer

  override def createContextual(ctxt: DeserializationContext, property: BeanProperty) =
    if (keyDeser != null && valueDeser != null) this
    else {
      val newKeyDeser = Option(keyDeser).getOrElse(ctxt.findKeyDeserializer(collectionType.getKeyType, property))
      val newValDeser = Option(valueDeser).getOrElse(ctxt.findContextualValueDeserializer(collectionType.getContentType, property))
      new SortedMapDeserializer(collectionType, config, newKeyDeser, newValDeser, valueTypeDeser)
    }
  
  override def deserialize(jp: JsonParser, ctxt: DeserializationContext): SortedMap[_,_] = {
    containerDeserializer.deserialize(jp,ctxt) match {
      case wrapper: SortedMapBuilderWrapper[_,_] => wrapper.builder.result()
    }
  }
}

private object SortedMapDeserializerResolver extends Deserializers.Base {
  
  private val SORTED_MAP = classOf[collection.SortedMap[_,_]]

  override def findMapLikeDeserializer(theType: MapLikeType,
                              config: DeserializationConfig,
                              beanDesc: BeanDescription,
                              keyDeserializer: KeyDeserializer,
                              elementTypeDeserializer: TypeDeserializer,
                              elementDeserializer: JsonDeserializer[_]): JsonDeserializer[_] =
    if (!SORTED_MAP.isAssignableFrom(theType.getRawClass)) null
    else new SortedMapDeserializer(theType,config,keyDeserializer,elementDeserializer,elementTypeDeserializer)
}


trait SortedMapDeserializerModule extends MapTypeModifierModule {
  this += (_ addDeserializers SortedMapDeserializerResolver)
}

Source File: PropertyDescriptor.scala From mango with Apache License 2.0

5 votes

package com.kakao.shaded.jackson.module.scala
package introspect

import util.Implicits._
import java.lang.reflect.{AccessibleObject, Constructor, Field, Method}

import scala.language.existentials

case class ConstructorParameter(constructor: Constructor[_], index: Int, defaultValueMethod: Option[Method])

case class PropertyDescriptor(name: String,
                              param: Option[ConstructorParameter],
                              field: Option[Field],
                              getter: Option[Method],
                              setter: Option[Method],
                              beanGetter: Option[Method],
                              beanSetter: Option[Method])
{
  if (List(field, getter).flatten.isEmpty) throw new IllegalArgumentException("One of field or getter must be defined.")

  def findAnnotation[A <: java.lang.annotation.Annotation](implicit mf: Manifest[A]): Option[A] = {
    val cls = mf.runtimeClass.asInstanceOf[Class[A]]
    lazy val paramAnnotation = (param flatMap { cp =>
      val paramAnnos = cp.constructor.getParameterAnnotations
      paramAnnos(cp.index).find(cls.isInstance)
    }).asInstanceOf[Option[A]]
    val getAnno = (o: AccessibleObject) => o.getAnnotation(cls)
    lazy val fieldAnnotation = field optMap getAnno
    lazy val getterAnnotation = getter optMap getAnno
    lazy val beanGetterAnnotation = beanGetter optMap getAnno

    paramAnnotation orElse fieldAnnotation orElse getterAnnotation orElse beanGetterAnnotation
  }

}

Source File: ReplicationFilterSerializer.scala From eventuate with Apache License 2.0

5 votes

package com.rbmhtechnology.eventuate.serializer

import akka.actor.ExtendedActorSystem
import akka.serialization._

import com.rbmhtechnology.eventuate.ReplicationFilter.AndFilter
import com.rbmhtechnology.eventuate.ReplicationFilter.NoFilter
import com.rbmhtechnology.eventuate.ReplicationFilter.OrFilter

import com.rbmhtechnology.eventuate._
import com.rbmhtechnology.eventuate.serializer.ReplicationFilterFormats._

import scala.collection.JavaConverters._
import scala.language.existentials

class ReplicationFilterSerializer(system: ExtendedActorSystem) extends Serializer {
  import ReplicationFilterTreeFormat.NodeType._

  val payloadSerializer = new DelegatingPayloadSerializer(system)

  val AndFilterClass = classOf[AndFilter]
  val OrFilterClass = classOf[OrFilter]
  val NoFilterClass = NoFilter.getClass

  override def identifier: Int = 22564
  override def includeManifest: Boolean = true

  override def toBinary(o: AnyRef): Array[Byte] = o match {
    case NoFilter =>
      NoFilterFormat.newBuilder().build().toByteArray
    case f: ReplicationFilter =>
      filterTreeFormatBuilder(f).build().toByteArray
    case _ =>
      throw new IllegalArgumentException(s"can't serialize object of type ${o.getClass}")
  }

  override def fromBinary(bytes: Array[Byte], manifest: Option[Class[_]]): AnyRef = manifest match {
    case None => throw new IllegalArgumentException("manifest required")
    case Some(clazz) => clazz match {
      case NoFilterClass =>
        NoFilter
      case AndFilterClass | OrFilterClass =>
        filterTree(ReplicationFilterTreeFormat.parseFrom(bytes))
      case _ =>
        throw new IllegalArgumentException(s"can't deserialize object of type ${clazz}")
    }
  }

  // --------------------------------------------------------------------------------
  //  toBinary helpers
  // --------------------------------------------------------------------------------

  def filterTreeFormatBuilder(filterTree: ReplicationFilter): ReplicationFilterTreeFormat.Builder = {
    val builder = ReplicationFilterTreeFormat.newBuilder()
    filterTree match {
      case AndFilter(filters) =>
        builder.setNodeType(AND)
        filters.foreach(filter => builder.addChildren(filterTreeFormatBuilder(filter)))
      case OrFilter(filters) =>
        builder.setNodeType(OR)
        filters.foreach(filter => builder.addChildren(filterTreeFormatBuilder(filter)))
      case filter =>
        builder.setNodeType(LEAF)
        builder.setFilter(payloadSerializer.payloadFormatBuilder(filter))
    }
    builder
  }

  // --------------------------------------------------------------------------------
  //  fromBinary helpers
  // --------------------------------------------------------------------------------

  def filterTree(filterTreeFormat: ReplicationFilterTreeFormat): ReplicationFilter = {
    filterTreeFormat.getNodeType match {
      case AND  => AndFilter(filterTreeFormat.getChildrenList.asScala.map(filterTree).toList)
      case OR   => OrFilter(filterTreeFormat.getChildrenList.asScala.map(filterTree).toList)
      case LEAF => payloadSerializer.payload(filterTreeFormat.getFilter).asInstanceOf[ReplicationFilter]
    }
  }
}

Source File: BytecodeUtils.scala From graphx-algorithm with GNU General Public License v2.0

5 votes

package org.apache.spark.graphx.util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import scala.collection.mutable.HashSet
import scala.language.existentials

import org.apache.spark.util.Utils

import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.{ClassReader, ClassVisitor, MethodVisitor}
import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.Opcodes._



  private class MethodInvocationFinder(className: String, methodName: String)
    extends ClassVisitor(ASM4) {

    val methodsInvoked = new HashSet[(Class[_], String)]

    override def visitMethod(access: Int, name: String, desc: String,
                             sig: String, exceptions: Array[String]): MethodVisitor = {
      if (name == methodName) {
        new MethodVisitor(ASM4) {
          override def visitMethodInsn(op: Int, owner: String, name: String, desc: String) {
            if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) {
              if (!skipClass(owner)) {
                methodsInvoked.add((Class.forName(owner.replace("/", ".")), name))
              }
            }
          }
        }
      } else {
        null
      }
    }
  }
}

Source File: LoggingState.scala From logging with Apache License 2.0

5 votes

package com.persist.logging

import akka.actor._
import LogActor.{AkkaMessage, LogActorMessage}
import scala.language.existentials
import scala.concurrent.Promise
import scala.collection.mutable
import TimeActorMessages._


private[logging] object LoggingState extends ClassLogging {

  // Queue of messages sent before logger is started
  private[logging] val msgs = new mutable.Queue[LogActorMessage]()

  @volatile var doTrace:Boolean = false
  @volatile var doDebug: Boolean = false
  @volatile var doInfo: Boolean = true
  @volatile var doWarn: Boolean = true
  @volatile var doError: Boolean = true

  private[logging] var loggingSys: LoggingSystem = null

  private[logging] var logger: Option[ActorRef] = None
  @volatile private[logging] var loggerStopping = false

  private[logging] var doTime: Boolean = false
  private[logging] var timeActorOption: Option[ActorRef] = None


  // Use to sync akka logging actor shutdown
  private[logging] val akkaStopPromise = Promise[Unit]

  private[logging] def sendMsg(msg: LogActorMessage) {
    if (loggerStopping) {
      println(s"*** Log message received after logger shutdown: $msg")
    } else {
      logger match {
        case Some(a) =>
          a ! msg
        case None =>
          msgs.synchronized {
            msgs.enqueue(msg)
          }
      }
    }
  }

  private[logging] def akkaMsg(m: AkkaMessage) {
    if (m.msg == "DIE") {
      akkaStopPromise.trySuccess(())
    } else {
      sendMsg(m)
    }
  }

  private[logging] def timeStart(id: RequestId, name: String, uid: String) {
    timeActorOption foreach {
      case timeActor =>
        val time = System.nanoTime() / 1000
        timeActor ! TimeStart(id, name, uid, time)
    }
  }

  private[logging] def timeEnd(id: RequestId, name: String, uid: String) {
    timeActorOption foreach {
      case timeActor =>
        val time = System.nanoTime() / 1000
        timeActor ! TimeEnd(id, name, uid, time)
    }
  }
}

Source File: Query.scala From finagle-postgres with Apache License 2.0

5 votes

package com.twitter.finagle.postgres.generic

import com.twitter.concurrent.AsyncStream

import scala.collection.immutable.Queue
import com.twitter.finagle.postgres.{Param, PostgresClient, Row}
import com.twitter.util.Future

import scala.language.existentials

case class Query[T](parts: Seq[String], queryParams: Seq[QueryParam], cont: Row => T) {

  def stream(client: PostgresClient): AsyncStream[T] = {
    val (queryString, params) = impl
    client.prepareAndQueryToStream[T](queryString, params: _*)(cont)
  }

  def run(client: PostgresClient): Future[Seq[T]] =
    stream(client).toSeq

  def exec(client: PostgresClient): Future[Int] = {
    val (queryString, params) = impl
    client.prepareAndExecute(queryString, params: _*)
  }

  def map[U](fn: T => U): Query[U] = copy(cont = cont andThen fn)

  def as[U](implicit rowDecoder: RowDecoder[U], columnNamer: ColumnNamer): Query[U] = {
    copy(cont = row => rowDecoder(row)(columnNamer))
  }

  private def impl: (String, Seq[Param[_]]) = {
    val (last, placeholders, params) = queryParams.foldLeft((1, Queue.empty[Seq[String]], Queue.empty[Param[_]])) {
      case ((start, placeholders, params), next) =>
        val nextPlaceholders = next.placeholders(start)
        val nextParams = Queue(next.params: _*)
        (start + nextParams.length, placeholders enqueue nextPlaceholders, params ++ nextParams)
    }

    val queryString = parts.zipAll(placeholders, "", Seq.empty).flatMap {
      case (part, ph) => Seq(part, ph.mkString(", "))
    }.mkString

    (queryString, params)
  }


}

object Query {
  implicit class RowQueryOps(val self: Query[Row]) extends AnyVal {
    def ++(that: Query[Row]): Query[Row] = Query[Row](
      parts = if(self.parts.length > self.queryParams.length)
        (self.parts.dropRight(1) :+ (self.parts.lastOption.getOrElse("") + that.parts.headOption.getOrElse(""))) ++ that.parts.drop(1)
      else
        self.parts ++ that.parts,
      queryParams = self.queryParams ++ that.queryParams,
      cont = self.cont
    )

    def ++(that: String): Query[Row] = Query[Row](
      parts = if(self.parts.length > self.queryParams.length)
          self.parts.dropRight(1) :+ (self.parts.last + that)
        else
          self.parts :+ that,
      queryParams = self.queryParams,
      cont = self.cont
    )
  }
}

Source File: InferShape.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.nn.abstractnn

import com.intel.analytics.bigdl.nn.keras.{Input => KInput, Sequential => KSequential}
import com.intel.analytics.bigdl.nn.{Input => TInput}
import com.intel.analytics.bigdl.utils.Shape

import scala.language.existentials
import scala.reflect.ClassTag

class InvalidLayer(msg: String) extends RuntimeException(msg)

trait InferShape {
  private[bigdl] var _inputShapeValue: Shape = null

  private[bigdl] var _outputShapeValue: Shape = null

  private[bigdl] def inputShapeValue: Shape = _inputShapeValue

  private[bigdl] def outputShapeValue: Shape = _outputShapeValue

  // scalastyle:off
  private[bigdl] def inputShapeValue_=(value: Shape): Unit = {
    _inputShapeValue = value
  }

  private[bigdl] def outputShapeValue_=(value: Shape): Unit = {
    _outputShapeValue = value
  }
  // scalastyle:on

  
  private[bigdl] def computeOutputShape(inputShape: Shape): Shape = {
    throw new RuntimeException("Haven't been implemented yet. Do not use it with Keras Layer")
  }

  private[bigdl] def excludeInvalidLayers[T: ClassTag]
  (modules : Seq[AbstractModule[_, _, T]]): Unit = {
    val invalidNodes = if (this.isKerasStyle()) {
      modules.filter{!_.isKerasStyle()}
    } else {
      modules.filter{_.isKerasStyle()}
    }
    if (invalidNodes.length > 0) {
      throw new InvalidLayer(s"""Do not mix ${this}(isKerasStyle=${isKerasStyle()}) with Layer
                           (isKerasStyle=${invalidNodes(0).isKerasStyle()}):
         ${invalidNodes.mkString(",")}""")
    }
  }

  private[bigdl] def validateInput[T: ClassTag](modules : Seq[AbstractModule[_, _, T]]): Unit = {
    if (this.isKerasStyle()) {
      require(modules != null && !modules.isEmpty, "Empty input is not allowed")
    }
    excludeInvalidLayers(modules)
  }
}

Source File: PythonBigDLValidator.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.python.api

import java.lang.{Boolean => JBoolean}
import java.util.{ArrayList => JArrayList, HashMap => JHashMap, List => JList, Map => JMap}

import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.utils.Table

import scala.collection.JavaConverters._
import scala.collection.mutable.Map
import scala.language.existentials
import scala.reflect.ClassTag

object PythonBigDLValidator {

  def ofFloat(): PythonBigDLValidator[Float] = new PythonBigDLValidator[Float]()

  def ofDouble(): PythonBigDLValidator[Double] = new PythonBigDLValidator[Double]()
}

class PythonBigDLValidator[T: ClassTag](implicit ev: TensorNumeric[T]) extends PythonBigDL[T]{

  def testDict(): JMap[String, String] = {
    return Map("jack" -> "40", "lucy" -> "50").asJava
  }

  def testDictJTensor(): JMap[String, JTensor] = {
    return Map("jack" -> JTensor(Array(1.0f, 2.0f, 3.0f, 4.0f), Array(4, 1), "float")).asJava
  }

  def testDictJMapJTensor(): JMap[String, JMap[String, JTensor]] = {
    val table = new Table()
    val tensor = JTensor(Array(1.0f, 2.0f, 3.0f, 4.0f), Array(4, 1), "float")
    val result = Map("jack" -> tensor).asJava
    table.insert(tensor)
    return Map("nested" -> result).asJava
  }

  def testActivityWithTensor(): JActivity = {
    val tensor = Tensor(Array(1.0f, 2.0f, 3.0f, 4.0f), Array(4, 1))
    return JActivity(tensor)
  }

  def testActivityWithTableOfTensor(): JActivity = {
    val tensor1 = Tensor(Array(1.0f, 1.0f), Array(2))
    val tensor2 = Tensor(Array(2.0f, 2.0f), Array(2))
    val tensor3 = Tensor(Array(3.0f, 3.0f), Array(2))
    val table = new Table()
    table.insert(tensor1)
    table.insert(tensor2)
    table.insert(tensor3)
    return JActivity(table)
  }

  def testActivityWithTableOfTable(): JActivity = {
    val tensor = Tensor(Array(1.0f, 2.0f, 3.0f, 4.0f), Array(4, 1))
    val table = new Table()
    table.insert(tensor)
    val nestedTable = new Table()
    nestedTable.insert(table)
    nestedTable.insert(table)
    return JActivity(nestedTable)
  }
}

Source File: TreeSentiment.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.example.treeLSTMSentiment

import com.intel.analytics.bigdl._
import com.intel.analytics.bigdl.nn._
import com.intel.analytics.bigdl.numeric.NumericFloat
import com.intel.analytics.bigdl.tensor.Tensor

import scala.language.existentials

object TreeLSTMSentiment {
  def apply(
    word2VecTensor: Tensor[Float],
    hiddenSize: Int,
    classNum: Int,
    p: Double = 0.5
  ): Module[Float] = {
    val vocabSize = word2VecTensor.size(1)
    val embeddingDim = word2VecTensor.size(2)
    val embedding = LookupTable(vocabSize, embeddingDim)
    embedding.weight.set(word2VecTensor)
    embedding.setScaleW(2)

    val treeLSTMModule = Sequential()
      .add(BinaryTreeLSTM(
        embeddingDim, hiddenSize, withGraph = true))
      .add(TimeDistributed(Dropout(p)))
      .add(TimeDistributed(Linear(hiddenSize, classNum)))
      .add(TimeDistributed(LogSoftMax()))

    Sequential()
      .add(MapTable(Squeeze(3)))
      .add(ParallelTable()
        .add(embedding)
        .add(Identity()))
      .add(treeLSTMModule)
  }
}

Source File: TextClassifier.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.example.textclassification

import com.intel.analytics.bigdl.example.utils._
import com.intel.analytics.bigdl.nn.{ClassNLLCriterion, _}
import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, T}
import org.apache.log4j.{Level => Levle4j, Logger => Logger4j}
import org.slf4j.{Logger, LoggerFactory}
import scopt.OptionParser

import scala.collection.mutable.{ArrayBuffer, Map => MMap}
import scala.language.existentials

object TextClassifier {
  val log: Logger = LoggerFactory.getLogger(this.getClass)
  LoggerFilter.redirectSparkInfoLogs()
  Logger4j.getLogger("com.intel.analytics.bigdl.optim").setLevel(Levle4j.INFO)

  def main(args: Array[String]): Unit = {
    val localParser = new OptionParser[TextClassificationParams]("BigDL Example") {
      opt[String]('b', "baseDir")
        .required()
        .text("Base dir containing the training and word2Vec data")
        .action((x, c) => c.copy(baseDir = x))
      opt[String]('p', "partitionNum")
        .text("you may want to tune the partitionNum if run into spark mode")
        .action((x, c) => c.copy(partitionNum = x.toInt))
      opt[String]('s', "maxSequenceLength")
        .text("maxSequenceLength")
        .action((x, c) => c.copy(maxSequenceLength = x.toInt))
      opt[String]('w', "maxWordsNum")
        .text("maxWordsNum")
        .action((x, c) => c.copy(maxWordsNum = x.toInt))
      opt[String]('l', "trainingSplit")
        .text("trainingSplit")
        .action((x, c) => c.copy(trainingSplit = x.toDouble))
      opt[String]('z', "batchSize")
        .text("batchSize")
        .action((x, c) => c.copy(batchSize = x.toInt))
      opt[Int]('l', "learningRate")
        .text("learningRate")
        .action((x, c) => c.copy(learningRate = x))
    }

    localParser.parse(args, TextClassificationParams()).map { param =>
      log.info(s"Current parameters: $param")
      val textClassification = new TextClassifier(param)
      textClassification.train()
    }
  }
}

Source File: ShuffleMapTask.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.lang.management.ManagementFactory
import java.nio.ByteBuffer
import java.util.Properties

import scala.language.existentials

import org.apache.spark._
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.executor.TaskMetrics
import org.apache.spark.internal.Logging
import org.apache.spark.rdd.RDD
import org.apache.spark.shuffle.ShuffleWriter
import org.apache.spark.storage.BlockManagerId


  def this(partitionId: Int) {
    this(0, 0, null, new Partition { override def index: Int = 0 }, null, new Properties, null)
  }

  @transient private val preferredLocs: Seq[TaskLocation] = {
    if (locs == null) Nil else locs.toSet.toSeq
  }

  var rdd: RDD[_] = null
  var dep: ShuffleDependency[_, _, _] = null

  override def prepTask(): Unit = {
    // Deserialize the RDD using the broadcast variable.
    val threadMXBean = ManagementFactory.getThreadMXBean
    val deserializeStartTime = System.currentTimeMillis()
    val deserializeStartCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
      threadMXBean.getCurrentThreadCpuTime
    } else 0L
    val ser = SparkEnv.get.closureSerializer.newInstance()
    val (_rdd, _dep) = ser.deserialize[(RDD[_], ShuffleDependency[_, _, _])](
      ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader)
     rdd = _rdd
     dep = _dep
    _executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime
    _executorDeserializeCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
      threadMXBean.getCurrentThreadCpuTime - deserializeStartCpuTime
    } else 0L
  }

  override def runTask(context: TaskContext): MapStatus = {
    if (dep == null || rdd == null) {
      prepTask()
    }

    var writer: ShuffleWriter[Any, Any] = null
    try {
      val manager = SparkEnv.get.shuffleManager
      writer = manager.getWriter[Any, Any](dep.shuffleHandle, partitionId, context)
      writer.write(rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]])
      val status = writer.stop(success = true).get
      FutureTaskNotifier.taskCompleted(status, partitionId, dep.shuffleId,
        dep.partitioner.numPartitions, nextStageLocs, metrics.shuffleWriteMetrics, false)
      status
    } catch {
      case e: Exception =>
        try {
          if (writer != null) {
            writer.stop(success = false)
          }
        } catch {
          case e: Exception =>
            log.debug("Could not stop writer", e)
        }
        throw e
    }
  }

  override def preferredLocations: Seq[TaskLocation] = preferredLocs

  override def toString: String = "ShuffleMapTask(%d, %d)".format(stageId, partitionId)
}

object ShuffleMapTask {

  def apply(
      stageId: Int,
      stageAttemptId: Int,
      partition: Partition,
      properties: Properties,
      internalAccumulatorsSer: Array[Byte],
      isFutureTask: Boolean,
      rdd: RDD[_],
      dep: ShuffleDependency[_, _, _],
      nextStageLocs: Option[Seq[BlockManagerId]]): ShuffleMapTask = {

    val smt = new ShuffleMapTask(stageId, stageAttemptId, null, partition, null,
      properties, internalAccumulatorsSer, isFutureTask, nextStageLocs)

    smt.rdd = rdd
    smt.dep = dep
    smt
  }
}

Source File: NettyBlockRpcServer.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.network.netty

import java.nio.ByteBuffer

import scala.collection.JavaConverters._
import scala.language.existentials
import scala.reflect.ClassTag

import org.apache.spark.internal.Logging
import org.apache.spark.network.BlockDataManager
import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
import org.apache.spark.network.client.{RpcResponseCallback, TransportClient}
import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager}
import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, MapOutputReady, OpenBlocks, StreamHandle, UploadBlock}
import org.apache.spark.scheduler.MapStatus
import org.apache.spark.serializer.Serializer
import org.apache.spark.storage.{BlockId, StorageLevel}


class NettyBlockRpcServer(
    appId: String,
    serializer: Serializer,
    blockManager: BlockDataManager)
  extends RpcHandler with Logging {

  private val streamManager = new OneForOneStreamManager()

  override def receive(
      client: TransportClient,
      rpcMessage: ByteBuffer,
      responseContext: RpcResponseCallback): Unit = {
    val message = BlockTransferMessage.Decoder.fromByteBuffer(rpcMessage)
    logTrace(s"Received request: $message")

    message match {
      case openBlocks: OpenBlocks =>
        val blocks: Seq[ManagedBuffer] =
          openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData)
        val streamId = streamManager.registerStream(appId, blocks.iterator.asJava)
        logTrace(s"Registered streamId $streamId with ${blocks.size} buffers")
        responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteBuffer)

      case uploadBlock: UploadBlock =>
        // StorageLevel and ClassTag are serialized as bytes using our JavaSerializer.
        val (level: StorageLevel, classTag: ClassTag[_]) = {
          serializer
            .newInstance()
            .deserialize(ByteBuffer.wrap(uploadBlock.metadata))
            .asInstanceOf[(StorageLevel, ClassTag[_])]
        }
        val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData))
        val blockId = BlockId(uploadBlock.blockId)
        blockManager.putBlockData(blockId, data, level, classTag)
        responseContext.onSuccess(ByteBuffer.allocate(0))

      case mapOutputReady: MapOutputReady =>
        val mapStatus: MapStatus =
          serializer.newInstance().deserialize(ByteBuffer.wrap(mapOutputReady.serializedMapStatus))
        blockManager.mapOutputReady(
          mapOutputReady.shuffleId, mapOutputReady.mapId, mapOutputReady.numReduces, mapStatus)
    }
  }

  override def getStreamManager(): StreamManager = streamManager
}

Source File: ArrayBasedMapData.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.util

import java.util.{Map => JavaMap}

class ArrayBasedMapData(val keyArray: ArrayData, val valueArray: ArrayData) extends MapData {
  require(keyArray.numElements() == valueArray.numElements())

  override def numElements(): Int = keyArray.numElements()

  override def copy(): MapData = new ArrayBasedMapData(keyArray.copy(), valueArray.copy())

  override def toString: String = {
    s"keys: $keyArray, values: $valueArray"
  }
}

object ArrayBasedMapData {
  
  def apply(
      iterator: Iterator[(_, _)],
      size: Int,
      keyConverter: (Any) => Any,
      valueConverter: (Any) => Any): ArrayBasedMapData = {

    val keys: Array[Any] = new Array[Any](size)
    val values: Array[Any] = new Array[Any](size)

    var i = 0
    for ((key, value) <- iterator) {
      keys(i) = keyConverter(key)
      values(i) = valueConverter(value)
      i += 1
    }
    ArrayBasedMapData(keys, values)
  }

  def apply(keys: Array[_], values: Array[_]): ArrayBasedMapData = {
    new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values))
  }

  def toScalaMap(map: ArrayBasedMapData): Map[Any, Any] = {
    val keys = map.keyArray.asInstanceOf[GenericArrayData].array
    val values = map.valueArray.asInstanceOf[GenericArrayData].array
    keys.zip(values).toMap
  }

  def toScalaMap(keys: Array[Any], values: Array[Any]): Map[Any, Any] = {
    keys.zip(values).toMap
  }

  def toScalaMap(keys: Seq[Any], values: Seq[Any]): Map[Any, Any] = {
    keys.zip(values).toMap
  }

  def toJavaMap(keys: Array[Any], values: Array[Any]): java.util.Map[Any, Any] = {
    import scala.collection.JavaConverters._
    keys.zip(values).toMap.asJava
  }
}

Source File: FPTreeSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.mllib.fpm

import scala.language.existentials

import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.util.MLlibTestSparkContext

class FPTreeSuite extends SparkFunSuite with MLlibTestSparkContext {

  test("add transaction") {
    val tree = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("b"))

    assert(tree.root.children.size == 2)
    assert(tree.root.children.contains("a"))
    assert(tree.root.children("a").item.equals("a"))
    assert(tree.root.children("a").count == 2)
    assert(tree.root.children.contains("b"))
    assert(tree.root.children("b").item.equals("b"))
    assert(tree.root.children("b").count == 1)
    var child = tree.root.children("a")
    assert(child.children.size == 1)
    assert(child.children.contains("b"))
    assert(child.children("b").item.equals("b"))
    assert(child.children("b").count == 2)
    child = child.children("b")
    assert(child.children.size == 2)
    assert(child.children.contains("c"))
    assert(child.children.contains("y"))
    assert(child.children("c").item.equals("c"))
    assert(child.children("y").item.equals("y"))
    assert(child.children("c").count == 1)
    assert(child.children("y").count == 1)
  }

  test("merge tree") {
    val tree1 = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("b"))

    val tree2 = new FPTree[String]
      .add(Seq("a", "b"))
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "c", "d"))
      .add(Seq("a", "x"))
      .add(Seq("a", "x", "y"))
      .add(Seq("c", "n"))
      .add(Seq("c", "m"))

    val tree3 = tree1.merge(tree2)

    assert(tree3.root.children.size == 3)
    assert(tree3.root.children("a").count == 7)
    assert(tree3.root.children("b").count == 1)
    assert(tree3.root.children("c").count == 2)
    val child1 = tree3.root.children("a")
    assert(child1.children.size == 2)
    assert(child1.children("b").count == 5)
    assert(child1.children("x").count == 2)
    val child2 = child1.children("b")
    assert(child2.children.size == 2)
    assert(child2.children("y").count == 1)
    assert(child2.children("c").count == 3)
    val child3 = child2.children("c")
    assert(child3.children.size == 1)
    assert(child3.children("d").count == 1)
    val child4 = child1.children("x")
    assert(child4.children.size == 1)
    assert(child4.children("y").count == 1)
    val child5 = tree3.root.children("c")
    assert(child5.children.size == 2)
    assert(child5.children("n").count == 1)
    assert(child5.children("m").count == 1)
  }

  test("extract freq itemsets") {
    val tree = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("a", "b"))
      .add(Seq("a"))
      .add(Seq("b"))
      .add(Seq("b", "n"))

    val freqItemsets = tree.extract(3L).map { case (items, count) =>
      (items.toSet, count)
    }.toSet
    val expected = Set(
      (Set("a"), 4L),
      (Set("b"), 5L),
      (Set("a", "b"), 3L))
    assert(freqItemsets === expected)
  }
}

Source File: SNV.scala From seqspark with Apache License 2.0

5 votes

package org.dizhang.seqspark.assoc

import breeze.stats.distributions.Gaussian
import org.dizhang.seqspark.stat.HypoTest.{NullModel => NM}
import org.dizhang.seqspark.stat.{Resampling, ScoreTest, WaldTest}
import org.dizhang.seqspark.util.General._

import scala.language.existentials


trait SNV extends AssocMethod {
  def nullModel: NM
  def x: Encode.Common
  def result: AssocMethod.Result
}

object SNV {
  def apply(nullModel: NM,
            x: Encode.Coding): SNV with AssocMethod.AnalyticTest = {
    nullModel match {
      case nm: NM.Fitted =>
        AnalyticScoreTest(nm, x.asInstanceOf[Encode.Common])
      case _ =>
        AnalyticWaldTest(nullModel, x.asInstanceOf[Encode.Common])
    }
  }

  def apply(ref: Double, min: Int, max: Int,
            nullModel: NM.Fitted,
            x: Encode.Coding): ResamplingTest = {
    ResamplingTest(ref, min, max, nullModel, x.asInstanceOf[Encode.Common])
  }

  def getStatistic(nm: NM.Fitted, x: Encode.Coding): Double = {
    val st = ScoreTest(nm, x.asInstanceOf[Encode.Common].coding)
    math.abs(st.score(0)/st.variance(0,0).sqrt)
  }

  @SerialVersionUID(7727280101L)
  final case class AnalyticScoreTest(nullModel: NM.Fitted,
                                     x: Encode.Common)
    extends SNV with AssocMethod.AnalyticTest
  {
    //val scoreTest = ScoreTest(nullModel, x.coding)
    val statistic = getStatistic(nullModel, x)
    val pValue = {
      val dis = new Gaussian(0.0, 1.0)
      Some((1.0 - dis.cdf(statistic)) * 2)
    }

    def result: AssocMethod.BurdenAnalytic = {
      AssocMethod.BurdenAnalytic(x.vars, statistic, pValue, "test=score")
    }
  }

  case class AnalyticWaldTest(nullModel: NM,
                              x: Encode.Common)
    extends SNV with AssocMethod.AnalyticTest
  {
    private val wt = WaldTest(nullModel, x.coding.toDenseVector)
    val statistic = wt.beta(1) / wt.std(1)
    val pVaue = Some(wt.pValue(oneSided = false).apply(1))
    def result = {
      AssocMethod.BurdenAnalytic(x.vars, statistic, pVaue, s"test=wald;beta=${wt.beta(1)};betaStd=${wt.std(1)}")
    }
  }

  @SerialVersionUID(7727280201L)
  final case class ResamplingTest(refStatistic: Double,
                                  min: Int,
                                  max: Int,
                                  nullModel: NM.Fitted,
                                  x: Encode.Common)
    extends SNV with AssocMethod.ResamplingTest
  {
    def pCount = Resampling.Simple(refStatistic, min, max, nullModel, x, getStatistic).pCount
    def result: AssocMethod.BurdenResampling = {
      AssocMethod.BurdenResampling(x.vars, refStatistic, pCount)
    }
  }
}

Source File: ShuffleMapTask.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.nio.ByteBuffer

import scala.language.existentials

import org.apache.spark._
import org.apache.spark.broadcast.Broadcast
import org.apache.spark.rdd.RDD
import org.apache.spark.shuffle.ShuffleWriter


  def this(partitionId: Int) {
    this(0, null, new Partition { override def index = 0 }, null)
  }

  @transient private val preferredLocs: Seq[TaskLocation] = {
    if (locs == null) Nil else locs.toSet.toSeq
  }

  override def runTask(context: TaskContext): MapStatus = {
    // Deserialize the RDD using the broadcast variable.
    val ser = SparkEnv.get.closureSerializer.newInstance()
    val (rdd, dep) = ser.deserialize[(RDD[_], ShuffleDependency[_, _, _])](
      ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader)

    metrics = Some(context.taskMetrics)
    var writer: ShuffleWriter[Any, Any] = null
    try {
      val manager = SparkEnv.get.shuffleManager
      writer = manager.getWriter[Any, Any](dep.shuffleHandle, partitionId, context)
      writer.write(rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]])
      return writer.stop(success = true).get
    } catch {
      case e: Exception =>
        try {
          if (writer != null) {
            writer.stop(success = false)
          }
        } catch {
          case e: Exception =>
            log.debug("Could not stop writer", e)
        }
        throw e
    }
  }

  override def preferredLocations: Seq[TaskLocation] = preferredLocs

  override def toString = "ShuffleMapTask(%d, %d)".format(stageId, partitionId)
}

Source File: NettyBlockRpcServer.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.network.netty

import java.nio.ByteBuffer

import scala.collection.JavaConverters._
import scala.language.existentials
import scala.reflect.ClassTag

import org.apache.spark.internal.Logging
import org.apache.spark.network.BlockDataManager
import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
import org.apache.spark.network.client.{RpcResponseCallback, TransportClient}
import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager}
import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock}
import org.apache.spark.serializer.Serializer
import org.apache.spark.storage.{BlockId, StorageLevel}


class NettyBlockRpcServer(
    appId: String,
    serializer: Serializer,
    blockManager: BlockDataManager)
  extends RpcHandler with Logging {

  private val streamManager = new OneForOneStreamManager()

  override def receive(
      client: TransportClient,
      rpcMessage: ByteBuffer,
      responseContext: RpcResponseCallback): Unit = {
    val message = BlockTransferMessage.Decoder.fromByteBuffer(rpcMessage)
    logTrace(s"Received request: $message")

    message match {
      case openBlocks: OpenBlocks =>
        val blocks: Seq[ManagedBuffer] =
          openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData)
        val streamId = streamManager.registerStream(appId, blocks.iterator.asJava)
        logTrace(s"Registered streamId $streamId with ${blocks.size} buffers")
        responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteBuffer)

      case uploadBlock: UploadBlock =>
        // StorageLevel and ClassTag are serialized as bytes using our JavaSerializer.
        val (level: StorageLevel, classTag: ClassTag[_]) = {
          serializer
            .newInstance()
            .deserialize(ByteBuffer.wrap(uploadBlock.metadata))
            .asInstanceOf[(StorageLevel, ClassTag[_])]
        }
        val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData))
        val blockId = BlockId(uploadBlock.blockId)
        blockManager.putBlockData(blockId, data, level, classTag)
        responseContext.onSuccess(ByteBuffer.allocate(0))
    }
  }

  override def getStreamManager(): StreamManager = streamManager
}

Source File: FPTreeSuite.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.mllib.fpm

import scala.language.existentials

import org.apache.spark.SparkFunSuite
import org.apache.spark.mllib.util.MLlibTestSparkContext

class FPTreeSuite extends SparkFunSuite with MLlibTestSparkContext {

  test("add transaction") {
    val tree = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("b"))

    assert(tree.root.children.size == 2)
    assert(tree.root.children.contains("a"))
    assert(tree.root.children("a").item.equals("a"))
    assert(tree.root.children("a").count == 2)
    assert(tree.root.children.contains("b"))
    assert(tree.root.children("b").item.equals("b"))
    assert(tree.root.children("b").count == 1)
    var child = tree.root.children("a")
    assert(child.children.size == 1)
    assert(child.children.contains("b"))
    assert(child.children("b").item.equals("b"))
    assert(child.children("b").count == 2)
    child = child.children("b")
    assert(child.children.size == 2)
    assert(child.children.contains("c"))
    assert(child.children.contains("y"))
    assert(child.children("c").item.equals("c"))
    assert(child.children("y").item.equals("y"))
    assert(child.children("c").count == 1)
    assert(child.children("y").count == 1)
  }

  test("merge tree") {
    val tree1 = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("b"))

    val tree2 = new FPTree[String]
      .add(Seq("a", "b"))
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "c", "d"))
      .add(Seq("a", "x"))
      .add(Seq("a", "x", "y"))
      .add(Seq("c", "n"))
      .add(Seq("c", "m"))

    val tree3 = tree1.merge(tree2)

    assert(tree3.root.children.size == 3)
    assert(tree3.root.children("a").count == 7)
    assert(tree3.root.children("b").count == 1)
    assert(tree3.root.children("c").count == 2)
    val child1 = tree3.root.children("a")
    assert(child1.children.size == 2)
    assert(child1.children("b").count == 5)
    assert(child1.children("x").count == 2)
    val child2 = child1.children("b")
    assert(child2.children.size == 2)
    assert(child2.children("y").count == 1)
    assert(child2.children("c").count == 3)
    val child3 = child2.children("c")
    assert(child3.children.size == 1)
    assert(child3.children("d").count == 1)
    val child4 = child1.children("x")
    assert(child4.children.size == 1)
    assert(child4.children("y").count == 1)
    val child5 = tree3.root.children("c")
    assert(child5.children.size == 2)
    assert(child5.children("n").count == 1)
    assert(child5.children("m").count == 1)
  }

  test("extract freq itemsets") {
    val tree = new FPTree[String]
      .add(Seq("a", "b", "c"))
      .add(Seq("a", "b", "y"))
      .add(Seq("a", "b"))
      .add(Seq("a"))
      .add(Seq("b"))
      .add(Seq("b", "n"))

    val freqItemsets = tree.extract(3L).map { case (items, count) =>
      (items.toSet, count)
    }.toSet
    val expected = Set(
      (Set("a"), 4L),
      (Set("b"), 5L),
      (Set("a", "b"), 3L))
    assert(freqItemsets === expected)
  }
}

Source File: RpcMessages.scala From spark-monitoring with MIT License

5 votes

package org.apache.spark.metrics

import java.util.concurrent.TimeUnit

import com.codahale.metrics.{Clock, Reservoir}

trait MetricMessage[T] {
  val namespace: String
  val metricName: String
  val value: T
}

private[metrics] case class CounterMessage(
                                          override val namespace: String,
                                          override val metricName: String,
                                          override val value: Long
                                          ) extends MetricMessage[Long]

private[metrics] case class SettableGaugeMessage[T](
                                                                   override val namespace: String,
                                                                   override val metricName: String,
                                                                   override val value: T
                                                                   ) extends MetricMessage[T]

import scala.language.existentials

private[metrics] case class HistogramMessage(
                                            override val namespace: String,
                                            override val metricName: String,
                                            override val value: Long,
                                            reservoirClass: Class[_ <: Reservoir]
                                            ) extends MetricMessage[Long]

private[metrics] case class MeterMessage(
                                        override val namespace: String,
                                        override val metricName: String,
                                        override val value: Long,
                                        clockClass: Class[_ <: Clock]
                                        ) extends MetricMessage[Long]

private[metrics] case class TimerMessage(
                                        override val namespace: String,
                                        override val metricName: String,
                                        override val value: Long,
                                        timeUnit: TimeUnit,
                                        reservoirClass: Class[_ <: Reservoir],
                                        clockClass: Class[_ <: Clock]
                                        ) extends MetricMessage[Long]

Source File: ScheduledTaskManager.scala From incubator-toree with Apache License 2.0

5 votes

package org.apache.toree.utils

import scala.language.existentials
import java.util.concurrent._
import java.util.UUID
import com.google.common.util.concurrent.ThreadFactoryBuilder
import ScheduledTaskManager._
import scala.util.Try


  def stop() = {
    _taskMap.clear()
    _scheduler.shutdown()
  }
}

object ScheduledTaskManager {
  val DefaultMaxThreads = 4
  val DefaultExecutionDelay = 10 // 10 milliseconds
  val DefaultTimeInterval = 100 // 100 milliseconds
}

Source File: Resampling.scala From seqspark with Apache License 2.0

5 votes

package org.dizhang.seqspark.stat

import breeze.linalg.{DenseVector, shuffle}
import breeze.stats.distributions.Bernoulli
import org.dizhang.seqspark.assoc.Encode
import org.dizhang.seqspark.ds.SemiGroup.PairInt
import org.dizhang.seqspark.stat.HypoTest.NullModel

import scala.language.existentials


  def makeNewNullModel: NullModel.Fitted = {
    val newY = makeNewY()
    val cols = nullModel.xs.cols
    NullModel(
      newY,
      nullModel.xs(::, 1 until cols),
      fit = true,
      binary = nullModel.binary
    ).asInstanceOf[NullModel.Fitted]
  }
}

Source File: VT.scala From seqspark with Apache License 2.0

5 votes

package org.dizhang.seqspark.assoc

import breeze.linalg._
import org.dizhang.seqspark.stat.HypoTest.{NullModel => NM}
import org.dizhang.seqspark.stat.{Resampling, ScoreTest}
import org.dizhang.seqspark.util.General.RichDouble
import org.slf4j.LoggerFactory

import scala.language.existentials


@SerialVersionUID(7727880001L)
trait VT extends AssocMethod {
  def nullModel: NM
  def x: Encode.VT
  def result: AssocMethod.Result
}

object VT {

  val logger = LoggerFactory.getLogger(getClass)

  def apply(nullModel: NM,
            x: Encode.Coding): VT with AssocMethod.AnalyticTest = {
    val nmf = nullModel match {
      case NM.Simple(y, b) => NM.Fit(y, b)
      case NM.Mutiple(y, c, b) => NM.Fit(y, c, b)
      case nm: NM.Fitted => nm
    }
    AnalyticScoreTest(nmf, x.asInstanceOf[Encode.VT])
  }

  def apply(ref: Double, min: Int, max: Int,
            nullModel: NM.Fitted,
            x: Encode.Coding): ResamplingTest = {
    ResamplingTest(ref, min, max, nullModel, x.asInstanceOf[Encode.VT])
  }

  def getStatistic(nm: NM.Fitted, x: Encode.Coding): Double = {
    //println(s"scores: ${st.score.toArray.mkString(",")}")
    //println(s"variances: ${diag(st.variance).toArray.mkString(",")}")
    val m = x.asInstanceOf[Encode.VT].coding
    val ts = m.map{sv =>
      val st = ScoreTest(nm, sv)
      st.score(0)/st.variance(0, 0).sqrt
    }
    //val ts = st.score :/ diag(st.variance).map(x => x.sqrt)
    max(ts)
  }

  @SerialVersionUID(7727880101L)
  final case class AnalyticScoreTest(nullModel: NM.Fitted,
                                     x: Encode.VT)
    extends VT with AssocMethod.AnalyticTest
  {

    val statistic = getStatistic(nullModel, x)
    val pValue = None
    def result: AssocMethod.VTAnalytic = {
      val info = s"MAFs=${x.coding.length}"
      AssocMethod.VTAnalytic(x.vars, x.size, statistic, pValue, info)
    }
  }

  @SerialVersionUID(7727880201L)
  final case class ResamplingTest(refStatistic: Double,
                                  min: Int,
                                  max: Int,
                                  nullModel: NM.Fitted,
                                  x: Encode.VT)
    extends VT with AssocMethod.ResamplingTest
  {
    def pCount = {
      Resampling.Simple(refStatistic, min, max, nullModel, x, getStatistic).pCount
    }
    def result: AssocMethod.VTResampling =
      AssocMethod.VTResampling(x.vars, x.size, refStatistic, pCount)
  }

}

Source File: Burden.scala From seqspark with Apache License 2.0

5 votes

package org.dizhang.seqspark.assoc

import breeze.linalg.DenseVector
import breeze.stats.distributions.{Gaussian, StudentsT}
import org.dizhang.seqspark.stat.HypoTest.{NullModel => NM}
import org.dizhang.seqspark.stat.{Resampling, ScoreTest, WaldTest}
import org.dizhang.seqspark.util.General._

import scala.language.existentials


@SerialVersionUID(7727280001L)
trait Burden extends AssocMethod {
  def nullModel: NM
  def x: Encode.Fixed
  def result: AssocMethod.Result
}

object Burden {

  def apply(nullModel: NM,
            x: Encode.Coding): Burden with AssocMethod.AnalyticTest = {
    nullModel match {
      case nm: NM.Fitted =>
        AnalyticScoreTest(nm, x.asInstanceOf[Encode.Fixed])
      case _ =>
        AnalyticWaldTest(nullModel, x.asInstanceOf[Encode.Fixed])
    }
  }

  def apply(ref: Double, min: Int, max: Int,
            nullModel: NM.Fitted,
            x: Encode.Coding): ResamplingTest = {
    ResamplingTest(ref, min, max, nullModel, x.asInstanceOf[Encode.Fixed])
  }

  def getStatistic(nm: NM.Fitted, x: Encode.Coding): Double = {
    val st = ScoreTest(nm, x.asInstanceOf[Encode.Fixed].coding)
    st.score(0)/st.variance(0,0).sqrt
  }

  def getStatistic(nm: NM, x: DenseVector[Double]): Double = {
    val wt = WaldTest(nm, x)
    (wt.beta /:/ wt.std).apply(1)
  }

  @SerialVersionUID(7727280101L)
  final case class AnalyticScoreTest(nullModel: NM.Fitted,
                                     x: Encode.Fixed)
    extends Burden with AssocMethod.AnalyticTest
  {
    def geno = x.coding
    //val scoreTest = ScoreTest(nullModel, geno)
    val statistic = getStatistic(nullModel, x)
    val pValue = {
      val dis = new Gaussian(0.0, 1.0)
      Some(1.0 - dis.cdf(statistic))
    }

    def result: AssocMethod.BurdenAnalytic = {
      AssocMethod.BurdenAnalytic(x.vars, statistic, pValue, "test=score")
    }

  }
  case class AnalyticWaldTest(nullModel: NM,
                              x: Encode.Fixed) extends Burden with AssocMethod.AnalyticTest {
    def geno = x.coding
    private val wt = WaldTest(nullModel, x.coding)
    val statistic = getStatistic(nullModel, geno)
    val pValue = {
      val dis = new StudentsT(nullModel.dof - 1)
      Some(1.0 - dis.cdf(statistic))
    }
    def result = {
      AssocMethod.BurdenAnalytic(x.vars, statistic, pValue, s"test=wald;beta=${wt.beta(1)};betaStd=${wt.std(1)}")
    }
  }

  @SerialVersionUID(7727280201L)
  final case class ResamplingTest(refStatistic: Double,
                                  min: Int,
                                  max: Int,
                                  nullModel: NM.Fitted,
                                  x: Encode.Fixed)
    extends Burden with AssocMethod.ResamplingTest
  {
    def pCount = Resampling.Simple(refStatistic, min, max, nullModel, x, getStatistic).pCount
    def result: AssocMethod.BurdenResampling = {
      AssocMethod.BurdenResampling(x.vars, refStatistic, pCount)
    }
  }
}

Source File: BytecodeUtils.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.graphx.util

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}

import scala.collection.mutable.HashSet
import scala.language.existentials

import org.apache.xbean.asm5.{ClassReader, ClassVisitor, MethodVisitor}
import org.apache.xbean.asm5.Opcodes._

import org.apache.spark.util.Utils


  private class MethodInvocationFinder(className: String, methodName: String)
    extends ClassVisitor(ASM5) {

    val methodsInvoked = new HashSet[(Class[_], String)]

    override def visitMethod(access: Int, name: String, desc: String,
                             sig: String, exceptions: Array[String]): MethodVisitor = {
      if (name == methodName) {
        new MethodVisitor(ASM5) {
          override def visitMethodInsn(
              op: Int, owner: String, name: String, desc: String, itf: Boolean) {
            if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) {
              if (!skipClass(owner)) {
                methodsInvoked.add((Utils.classForName(owner.replace("/", ".")), name))
              }
            }
          }
        }
      } else {
        null
      }
    }
  }
}

Source File: MyNettyBlockRpcServer.scala From OAP with Apache License 2.0

5 votes

package org.apache.spark.network.netty

import java.nio.ByteBuffer

import scala.language.existentials

import org.apache.spark.SparkEnv
import org.apache.spark.internal.Logging
import org.apache.spark.network.BlockDataManager
import org.apache.spark.network.client.{RpcResponseCallback, StreamCallbackWithID, TransportClient}
import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager}
import org.apache.spark.network.shuffle.protocol._
import org.apache.spark.serializer.Serializer
import org.apache.spark.shuffle.remote.{HadoopFileSegmentManagedBuffer, MessageForHadoopManagedBuffers, RemoteShuffleManager}
import org.apache.spark.shuffle.sort.SortShuffleManager
import org.apache.spark.storage.{BlockId, ShuffleBlockId}


class MyNettyBlockRpcServer(
    appId: String,
    serializer: Serializer,
    blockManager: BlockDataManager)
  extends RpcHandler with Logging {

  private val streamManager = new OneForOneStreamManager()

  override def receive(
      client: TransportClient,
      rpcMessage: ByteBuffer,
      responseContext: RpcResponseCallback): Unit = {
    val message = BlockTransferMessage.Decoder.fromByteBuffer(rpcMessage)
    logTrace(s"Received request: $message")

    message match {
      case openBlocks: OpenBlocks =>
        val blocksNum = openBlocks.blockIds.length
        val isShuffleRequest = (blocksNum > 0) &&
          BlockId.apply(openBlocks.blockIds(0)).isInstanceOf[ShuffleBlockId] &&
          (SparkEnv.get.conf.get("spark.shuffle.manager", classOf[SortShuffleManager].getName)
            == classOf[RemoteShuffleManager].getName)
        if (isShuffleRequest) {
          val blockIdAndManagedBufferPair =
            openBlocks.blockIds.map(block => (block, blockManager.getHostLocalShuffleData(
              BlockId.apply(block), Array.empty).asInstanceOf[HadoopFileSegmentManagedBuffer]))
          responseContext.onSuccess(new MessageForHadoopManagedBuffers(
            blockIdAndManagedBufferPair).toByteBuffer.nioBuffer())
        } else {
          // This customized Netty RPC server is only served for RemoteShuffle requests,
          // Other RPC messages or data chunks transferring should go through
          // NettyBlockTransferService' NettyBlockRpcServer
          throw new UnsupportedOperationException("MyNettyBlockRpcServer only serves remote" +
            " shuffle requests for OpenBlocks")
        }

      case uploadBlock: UploadBlock =>
        throw new UnsupportedOperationException("MyNettyBlockRpcServer doesn't serve UploadBlock")
    }
  }

  override def receiveStream(
      client: TransportClient,
      messageHeader: ByteBuffer,
      responseContext: RpcResponseCallback): StreamCallbackWithID = {
    throw new UnsupportedOperationException("MyNettyBlockRpcServer doesn't support receiving" +
      " stream")
  }

  override def getStreamManager(): StreamManager = streamManager
}

Source File: RowToVectorBuilder.scala From filo with Apache License 2.0

5 votes

package org.velvia.filo

import java.nio.ByteBuffer
import scala.language.existentials
import scala.language.postfixOps
import scalaxy.loops._

import BuilderEncoder.{EncodingHint, AutoDetect}

case class VectorInfo(name: String, dataType: Class[_])

// To help matching against the ClassTag in the VectorBuilder
private object Classes {
  val Boolean = classOf[Boolean]
  val Byte = java.lang.Byte.TYPE
  val Short = java.lang.Short.TYPE
  val Int = java.lang.Integer.TYPE
  val Long = java.lang.Long.TYPE
  val Float = java.lang.Float.TYPE
  val Double = java.lang.Double.TYPE
  val String = classOf[String]
  val DateTime = classOf[org.joda.time.DateTime]
  val SqlTimestamp = classOf[java.sql.Timestamp]
  val UTF8 = classOf[ZeroCopyUTF8String]
}

object RowToVectorBuilder {
  
  def convertToBytes(hint: EncodingHint = AutoDetect): Map[String, ByteBuffer] = {
    val chunks = builders.map(_.toFiloBuffer(hint))
    schema.zip(chunks).map { case (VectorInfo(colName, _), bytes) => (colName, bytes) }.toMap
  }

  private def unsupportedInput(typ: Any) =
    throw new RuntimeException("Unsupported input type " + typ)
}

Source File: FlinkScalarFunctionGenerator.scala From milan with Apache License 2.0

5 votes

package com.amazon.milan.compiler.flink.internal

import com.amazon.milan.compiler.scala.{CodeBlock, DefaultTypeEmitter, ScalarFunctionGenerator, TypeEmitter}
import com.amazon.milan.compiler.flink.generator.FlinkGeneratorException
import com.amazon.milan.compiler.flink.typeutil._
import com.amazon.milan.program.ValueDef
import com.amazon.milan.types._
import com.amazon.milan.typeutil.{TypeDescriptor, types}

import scala.language.existentials


object FlinkScalarFunctionGenerator {
  val default = new FlinkScalarFunctionGenerator(new DefaultTypeEmitter)
}


case class FunctionParts(arguments: CodeBlock, returnType: CodeBlock, body: CodeBlock)


class FlinkScalarFunctionGenerator(typeEmitter: TypeEmitter) extends ScalarFunctionGenerator(typeEmitter, ContextualTreeTransformer) {

  
  private class ArrayFieldConversionContext(tupleType: TypeDescriptor[_]) extends ConversionContext {
    override def generateSelectTermAndContext(name: String): (String, ConversionContext) = {
      if (name == RecordIdFieldName) {
        // RecordId is a special field for tuple streams, because it's a property of the ArrayRecord class rather than
        // being present in the fields array itself.
        (s".$name", createContextForType(types.String))
      }
      else {
        val fieldIndex = this.tupleType.fields.takeWhile(_.name != name).length

        if (fieldIndex >= this.tupleType.fields.length) {
          throw new FlinkGeneratorException(s"Field '$name' not found.")
        }

        val fieldType = this.tupleType.fields(fieldIndex).fieldType
        (s"($fieldIndex).asInstanceOf[${typeEmitter.getTypeFullName(fieldType)}]", createContextForType(fieldType))
      }
    }
  }

  override protected def createContextForArgument(valueDef: ValueDef): ConversionContext = {
    // If the record type is a tuple with named fields then this is a tuple stream whose records are stored as
    // ArrayRecord objects.
    if (valueDef.tpe.isTupleRecord) {
      new ArrayArgumentConversionContext(valueDef.name, valueDef.tpe)
    }
    else {
      super.createContextForArgument(valueDef)
    }
  }

  override protected def createContextForType(contextType: TypeDescriptor[_]): ConversionContext = {
    // If the context type is a tuple with named fields then term names must be mapped to indices in the ArrayRecord
    // objects.
    if (contextType.isTupleRecord) {
      new ArrayFieldConversionContext(contextType)
    }
    else {
      super.createContextForType(contextType)
    }
  }
}

Source File: TestWindow.scala From milan with Apache License 2.0

5 votes

package com.amazon.milan.lang

import java.time.Duration

import com.amazon.milan.lang.aggregation._
import com.amazon.milan.program
import com.amazon.milan.program.{GroupBy, _}
import com.amazon.milan.test.{DateIntRecord, DateKeyValueRecord}
import com.amazon.milan.typeutil.{FieldDescriptor, types}
import org.junit.Assert._
import org.junit.Test

import scala.language.existentials


@Test
class TestWindow {
  @Test
  def test_TumblingWindow_ReturnsStreamWithCorrectInputNodeAndWindowProperties(): Unit = {
    val stream = Stream.of[DateIntRecord]
    val windowed = stream.tumblingWindow(r => r.dateTime, Duration.ofHours(1), Duration.ofMinutes(30))

    val TumblingWindow(_, dateExtractorFunc, period, offset) = windowed.expr

    // If this extraction doesn't throw an exception then the formula is correct.
    val FunctionDef(List(ValueDef("r", _)), SelectField(SelectTerm("r"), "dateTime")) = dateExtractorFunc

    assertEquals(Duration.ofHours(1), period.asJava)
    assertEquals(Duration.ofMinutes(30), offset.asJava)
  }

  @Test
  def test_TumblingWindow_ThenSelectToTuple_ReturnsStreamWithCorrectFieldComputationExpression(): Unit = {
    val stream = Stream.of[DateIntRecord]
    val grouped = stream.tumblingWindow(r => r.dateTime, Duration.ofHours(1), Duration.ofMinutes(30))
    val selected = grouped.select((key, r) => fields(field("max", max(r.i))))

    val Aggregate(source, FunctionDef(_, NamedFields(fieldList))) = selected.expr

    assertEquals(1, selected.recordType.fields.length)
    assertEquals(FieldDescriptor("max", types.Int), selected.recordType.fields.head)

    assertEquals(1, fieldList.length)
    assertEquals("max", fieldList.head.fieldName)

    // If this extraction statement doesn't crash then we're good.
    val Max(SelectField(SelectTerm("r"), "i")) = fieldList.head.expr
  }

  @Test
  def test_SlidingWindow_ReturnsStreamWithCorrectInputNodeAndWindowProperties(): Unit = {
    val stream = Stream.of[DateIntRecord]
    val windowed = stream.slidingWindow(r => r.dateTime, Duration.ofHours(1), Duration.ofMinutes(10), Duration.ofMinutes(30))

    val SlidingWindow(_, dateExtractorFunc, size, slide, offset) = windowed.expr

    val FunctionDef(List(ValueDef("r", _)), SelectField(SelectTerm("r"), "dateTime")) = dateExtractorFunc

    assertEquals(Duration.ofHours(1), size.asJava)
    assertEquals(Duration.ofMinutes(10), slide.asJava)
    assertEquals(Duration.ofMinutes(30), offset.asJava)
  }

  @Test
  def test_GroupBy_ThenTumblingWindow_ThenSelect_ReturnsStreamWithCorrectInputNodeAndWindowProperties(): Unit = {
    val input = Stream.of[DateKeyValueRecord].withId("input")
    val output = input.groupBy(r => r.key)
      .tumblingWindow(r => r.dateTime, Duration.ofMinutes(5), Duration.ZERO)
      .select((windowStart, r) => any(r))

    val Aggregate(windowExpr, FunctionDef(List(ValueDef("windowStart", _), ValueDef("r", _)), First(SelectTerm("r")))) = output.expr
    val TumblingWindow(groupExpr, FunctionDef(List(ValueDef("r", _)), SelectField(SelectTerm("r"), "dateTime")), program.Duration(300000), program.Duration(0)) = windowExpr
    val GroupBy(ExternalStream("input", "input", _), FunctionDef(List(ValueDef("r", _)), SelectField(SelectTerm("r"), "key"))) = groupExpr
  }
}

Source File: Surface.scala From airframe with Apache License 2.0

5 votes

package wvlet.airframe.surface

import scala.language.existentials
import scala.language.experimental.macros

object Surface {
  def of[A]: Surface = macro SurfaceMacros.surfaceOf[A]
  def methodsOf[A]: Seq[MethodSurface] = macro SurfaceMacros.methodSurfaceOf[A]
}

trait Surface extends Serializable {
  def rawType: Class[_]
  def typeArgs: Seq[Surface]
  def params: Seq[Parameter]
  def name: String
  def fullName: String

  def dealias: Surface = this

  def isOption: Boolean
  def isAlias: Boolean
  def isPrimitive: Boolean
  def isSeq: Boolean = classOf[Seq[_]].isAssignableFrom(rawType)

  def objectFactory: Option[ObjectFactory] = None
}

sealed trait ParameterBase extends Serializable {
  def name: String
  def surface: Surface

  def call(obj: Any, x: Any*): Any
}

trait Parameter extends ParameterBase {
  def index: Int
  def name: String

  
  def getMethodArgDefaultValue(methodOwner: Any): Option[Any] = getDefaultValue
}

trait MethodSurface extends ParameterBase {
  def mod: Int
  def owner: Surface
  def name: String
  def args: Seq[MethodParameter]
  def surface: Surface = returnType
  def returnType: Surface

  def isPublic: Boolean    = (mod & MethodModifier.PUBLIC) != 0
  def isPrivate: Boolean   = (mod & MethodModifier.PRIVATE) != 0
  def isProtected: Boolean = (mod & MethodModifier.PROTECTED) != 0
  def isStatic: Boolean    = (mod & MethodModifier.STATIC) != 0
  def isFinal: Boolean     = (mod & MethodModifier.FINAL) != 0
  def isAbstract: Boolean  = (mod & MethodModifier.ABSTRACT) != 0
}

Source File: AirframeException.scala From airframe with Apache License 2.0

5 votes

package wvlet.airframe

import wvlet.airframe.surface.Surface
import scala.language.existentials

trait AirframeException extends Exception { self =>

  
  def getCode: String           = this.getClass.getSimpleName
  override def toString: String = getMessage
}

object AirframeException {
  case class MISSING_SESSION(cl: Class[_]) extends AirframeException {
    override def getMessage: String =
      s"[$getCode] Session is not found inside ${cl}. You may need to define ${cl} as a trait or implement DISupport to inject the current Session."
  }
  case class CYCLIC_DEPENDENCY(deps: List[Surface], sourceCode: SourceCode) extends AirframeException {
    override def getMessage: String = s"[$getCode] ${deps.reverse.mkString(" -> ")} at ${sourceCode}"
  }
  case class MISSING_DEPENDENCY(stack: List[Surface], sourceCode: SourceCode) extends AirframeException {
    override def getMessage: String =
      s"[$getCode] Binding for ${stack.head} at ${sourceCode} is not found: ${stack.mkString(" <- ")}"
  }

  case class SHUTDOWN_FAILURE(cause: Throwable) extends AirframeException {
    override def getMessage: String = {
      s"[${getCode}] Failure at session shutdown: ${cause.getMessage}"
    }
  }
  case class MULTIPLE_SHUTDOWN_FAILURES(causes: List[Throwable]) extends AirframeException {
    override def getMessage: String = {
      s"[${getCode}] Multiple failures occurred during session shutdown:\n${causes.map(x => s"  - ${x.getMessage}").mkString("\n")}"
    }
  }
}

Source File: RunServer.scala From mleap with Apache License 2.0

5 votes

package ml.combust.mleap.grpc.server

import java.util.concurrent.{Executors, TimeUnit}

import akka.Done
import akka.actor.{ActorSystem, CoordinatedShutdown}
import akka.stream.{ActorMaterializer, Materializer}
import com.typesafe.config.Config
import com.typesafe.scalalogging.Logger
import io.grpc.ServerBuilder
import ml.combust.mleap.executor.MleapExecutor
import ml.combust.mleap.pb.MleapGrpc

import scala.concurrent.{ExecutionContext, Future}
import scala.language.existentials
import scala.util.{Failure, Success, Try}

class RunServer(config: Config)
               (implicit system: ActorSystem) {
  private val logger = Logger(classOf[RunServer])

  private var coordinator: Option[CoordinatedShutdown] = None

  def run(): Unit = {
    Try {
      logger.info("Starting MLeap gRPC Server")

      val coordinator = CoordinatedShutdown(system)
      this.coordinator = Some(coordinator)

      implicit val materializer: Materializer = ActorMaterializer()

      val grpcServerConfig = new GrpcServerConfig(config.getConfig("default"))
      val mleapExecutor = MleapExecutor(system)
      val port: Int = config.getInt("port")
      val threads: Option[Int] = if (config.hasPath("threads")) Some(config.getInt("threads")) else None
      val threadCount = threads.getOrElse {
        Math.min(Math.max(Runtime.getRuntime.availableProcessors() * 4, 32), 64)
      }

      logger.info(s"Creating thread pool for server with size $threadCount")
      val grpcThreadPool = Executors.newFixedThreadPool(threadCount)
      implicit val ec: ExecutionContext = ExecutionContext.fromExecutor(grpcThreadPool)

      coordinator.addTask(CoordinatedShutdown.PhaseServiceRequestsDone, "threadPoolShutdownNow") {
        () =>
          Future {
            logger.info("Shutting down gRPC thread pool")
            grpcThreadPool.shutdown()
            grpcThreadPool.awaitTermination(5, TimeUnit.SECONDS)

            Done
          }
      }

      logger.info(s"Creating executor service")
      val grpcService: GrpcServer = new GrpcServer(mleapExecutor, grpcServerConfig)
      val builder = ServerBuilder.forPort(port)
      builder.intercept(new ErrorInterceptor)
      builder.addService(MleapGrpc.bindService(grpcService, ec))
      val grpcServer = builder.build()

      logger.info(s"Starting server on port $port")
      grpcServer.start()

      coordinator.addTask(CoordinatedShutdown.PhaseServiceUnbind, "grpcServiceShutdown") {
        () =>
          Future {
            logger.info("Shutting down gRPC")
            grpcServer.shutdown()
            grpcServer.awaitTermination(10, TimeUnit.SECONDS)
            Done
          }(ExecutionContext.global)
      }

      coordinator.addTask(CoordinatedShutdown.PhaseServiceStop, "grpcServiceShutdownNow") {
        () =>
          Future {
            if (!grpcServer.isShutdown) {
              logger.info("Shutting down gRPC NOW!")

              grpcServer.shutdownNow()
              grpcServer.awaitTermination(5, TimeUnit.SECONDS)
            }

            Done
          }(ExecutionContext.global)
      }
    } match {
      case Success(_) =>
      case Failure(err) =>
        logger.error("Error encountered starting server", err)
        for (c <- this.coordinator) {
          c.run(CoordinatedShutdown.UnknownReason)
        }
        throw err
    }
  }
}

Source File: Responses.scala From finagle-postgres with Apache License 2.0

5 votes

package com.twitter.finagle.postgres

import java.nio.charset.Charset

import com.twitter.finagle.postgres.messages.{DataRow, Field}
import com.twitter.finagle.postgres.values.ValueDecoder
import com.twitter.util.Try
import Try._
import com.twitter.concurrent.AsyncStream
import com.twitter.finagle.postgres.PostgresClient.TypeSpecifier
import com.twitter.finagle.postgres.codec.NullValue
import io.netty.buffer.ByteBuf

import scala.language.existentials

// capture all common format data for a set of rows to reduce repeated references
case class RowFormat(
  indexMap: Map[String, Int],
  formats: Array[Short],
  oids: Array[Int],
  dataTypes: Map[Int, TypeSpecifier],
  receives: PartialFunction[String, ValueDecoder[T] forSome {type T}],
  charset: Charset
) {
  @inline final def recv(index: Int) = dataTypes(oids(index)).receiveFunction
  @inline final def defaultDecoder(index: Int) = receives.applyOrElse(recv(index), (_: String) => ValueDecoder.never)
}

trait Row {
  def getOption[T](name: String)(implicit decoder: ValueDecoder[T]): Option[T]
  def getOption[T](index: Int)(implicit decoder: ValueDecoder[T]): Option[T]
  def get[T](name: String)(implicit decoder: ValueDecoder[T]): T
  def get[T](index: Int)(implicit decoder: ValueDecoder[T]): T
  def getTry[T](name: String)(implicit decoder: ValueDecoder[T]): Try[T]
  def getTry[T](index: Int)(implicit decoder: ValueDecoder[T]): Try[T]
  def getOrElse[T](name: String, default: => T)(implicit decoder: ValueDecoder[T]): T
  def getOrElse[T](index: Int, default: => T)(implicit decoder: ValueDecoder[T]): T
  def getAnyOption(name: String): Option[Any]
  def getAnyOption(index: Int): Option[Any]
}

object Row {
  def apply(values: Array[Option[ByteBuf]], rowFormat: RowFormat): Row = RowImpl(values, rowFormat)
}


object ResultSet {
  def apply(
    fields: Array[Field],
    charset: Charset,
    dataRows: AsyncStream[DataRow],
    types: Map[Int, TypeSpecifier],
    receives: PartialFunction[String, ValueDecoder[T] forSome { type T }]
  ): ResultSet = {
    val (indexMap, formats, oids) = {
      val l = fields.length
      val stringIndex = new Array[(String, Int)](l)
      val formats = new Array[Short](l)
      val oids = new Array[Int](l)
      var i = 0
      while(i < l) {
        val Field(name, format, dataType) = fields(i)
        stringIndex(i) = (name, i)
        formats(i) = format
        oids(i) = dataType
        i += 1
      }
      (stringIndex.toMap, formats, oids)
    }

    val rowFormat = RowFormat(indexMap, formats, oids, types, receives, charset)

    val rows = dataRows.map {
      dataRow => Row(
        values = dataRow.data,
        rowFormat = rowFormat
      )
    }

    ResultSet(rows)
  }
}

scala.language.existentials Scala Examples