scala.language.existentials Scala Examples
The following examples show how to use scala.language.existentials.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: BlockedRDD.scala From hail with MIT License | 5 votes |
package is.hail.sparkextras import is.hail.utils._ import org.apache.spark.rdd.RDD import org.apache.spark.{Dependency, NarrowDependency, Partition, TaskContext} import scala.language.existentials import scala.reflect.ClassTag case class BlockedRDDPartition(@transient rdd: RDD[_], index: Int, first: Int, last: Int) extends Partition { require(first <= last) val parentPartitions: Array[Partition] = range.map(rdd.partitions).toArray def range: Range = first to last } class BlockedRDD[T](@transient var prev: RDD[T], @transient val partFirst: Array[Int], @transient val partLast: Array[Int] )(implicit tct: ClassTag[T]) extends RDD[T](prev.sparkContext, Nil) { assert(partFirst.length == partLast.length) override def getPartitions: Array[Partition] = { Array.tabulate[Partition](partFirst.length)(i => BlockedRDDPartition(prev, i, partFirst(i), partLast(i))) } override def compute(split: Partition, context: TaskContext): Iterator[T] = { val parent = dependencies.head.rdd.asInstanceOf[RDD[T]] split.asInstanceOf[BlockedRDDPartition].parentPartitions.iterator.flatMap(p => parent.iterator(p, context)) } override def getDependencies: Seq[Dependency[_]] = { FastSeq(new NarrowDependency(prev) { def getParents(id: Int): Seq[Int] = partitions(id).asInstanceOf[BlockedRDDPartition].range }) } override def clearDependencies() { super.clearDependencies() prev = null } override def getPreferredLocations(partition: Partition): Seq[String] = { val prevPartitions = prev.partitions val range = partition.asInstanceOf[BlockedRDDPartition].range val locationAvail = range.flatMap(i => prev.preferredLocations(prevPartitions(i))) .groupBy(identity) .mapValues(_.length) if (locationAvail.isEmpty) return FastSeq.empty[String] val m = locationAvail.values.max locationAvail.filter(_._2 == m) .keys .toFastSeq } }
Example 2
Source File: FPTreeSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.fpm import scala.language.existentials import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext class FPTreeSuite extends SparkFunSuite with MLlibTestSparkContext { test("add transaction") {//增加转换 val tree = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("b")) assert(tree.root.children.size == 2) assert(tree.root.children.contains("a")) assert(tree.root.children("a").item.equals("a")) assert(tree.root.children("a").count == 2) assert(tree.root.children.contains("b")) assert(tree.root.children("b").item.equals("b")) assert(tree.root.children("b").count == 1) var child = tree.root.children("a") assert(child.children.size == 1) assert(child.children.contains("b")) assert(child.children("b").item.equals("b")) assert(child.children("b").count == 2) child = child.children("b") assert(child.children.size == 2) assert(child.children.contains("c")) assert(child.children.contains("y")) assert(child.children("c").item.equals("c")) assert(child.children("y").item.equals("y")) assert(child.children("c").count == 1) assert(child.children("y").count == 1) } test("merge tree") {//合并树 val tree1 = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("b")) val tree2 = new FPTree[String] .add(Seq("a", "b")) .add(Seq("a", "b", "c")) .add(Seq("a", "b", "c", "d")) .add(Seq("a", "x")) .add(Seq("a", "x", "y")) .add(Seq("c", "n")) .add(Seq("c", "m")) val tree3 = tree1.merge(tree2) assert(tree3.root.children.size == 3) assert(tree3.root.children("a").count == 7) assert(tree3.root.children("b").count == 1) assert(tree3.root.children("c").count == 2) val child1 = tree3.root.children("a") assert(child1.children.size == 2) assert(child1.children("b").count == 5) assert(child1.children("x").count == 2) val child2 = child1.children("b") assert(child2.children.size == 2) assert(child2.children("y").count == 1) assert(child2.children("c").count == 3) val child3 = child2.children("c") assert(child3.children.size == 1) assert(child3.children("d").count == 1) val child4 = child1.children("x") assert(child4.children.size == 1) assert(child4.children("y").count == 1) val child5 = tree3.root.children("c") assert(child5.children.size == 2) assert(child5.children("n").count == 1) assert(child5.children("m").count == 1) } test("extract freq itemsets") {//频繁项集的提取物 val tree = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("a", "b")) .add(Seq("a")) .add(Seq("b")) .add(Seq("b", "n")) val freqItemsets = tree.extract(3L).map { case (items, count) => (items.toSet, count) }.toSet val expected = Set( (Set("a"), 4L), (Set("b"), 5L), (Set("a", "b"), 3L)) assert(freqItemsets === expected) } }
Example 3
Source File: OutputDataStream.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.core.util import akka.util.Timeout import io.amient.affinity.core.actor.TransactionCoordinator import io.amient.affinity.core.serde.AbstractSerde import io.amient.affinity.core.storage.{LogStorage, LogStorageConf, Record} import scala.concurrent.duration._ import scala.concurrent.{ExecutionContext, Future} import scala.language.{existentials, postfixOps} object OutputDataStream { class TransactionCoordinatorNoop extends TransactionCoordinator { override def _begin(): Future[Unit] = Future.successful(()) override def _commit(): Future[Unit] = Future.successful(()) override def _abort(): Future[Unit] = Future.successful(()) override def append(topic: String, key: Array[Byte], value: Array[Byte], timestamp: Option[Long], partition: Option[Int]): Future[_ <: Comparable[_]] = { Future.successful(0L) } } //create OutputDataStream without transactional support def apply[K, V](keySerde: AbstractSerde[_ >: K], valSerde: AbstractSerde[_ >: V], conf: LogStorageConf): OutputDataStream[K, V] = { new OutputDataStream[K, V](new TransactionCoordinatorNoop, keySerde, valSerde, conf) } } class OutputDataStream[K, V] private[affinity](txn: TransactionCoordinator, keySerde: AbstractSerde[_ >: K], valSerde: AbstractSerde[_ >: V], conf: LogStorageConf) { lazy val storage = LogStorage.newInstanceEnsureExists(conf) lazy private val topic: String = storage.getTopic() implicit val timeout = Timeout(1 minute) //FIXME def append(record: Record[K, V]): Future[_ <: Comparable[_]] = { if (txn.inTransaction()) { txn.append(topic, keySerde.toBytes(record.key), valSerde.toBytes(record.value), Option(record.timestamp), None) } else { val binaryRecord = new Record(keySerde.toBytes(record.key), valSerde.toBytes(record.value), record.timestamp) val jf = storage.append(binaryRecord) Future(jf.get)(ExecutionContext.Implicits.global) } } def delete(key: K): Future[_ <: Comparable[_]] = { if (txn.inTransaction()) { txn.append(topic, keySerde.toBytes(key), null, None, None) } else { val jf = storage.delete(keySerde.toBytes(key)) Future(jf.get)(ExecutionContext.Implicits.global) } } def flush(): Unit = storage.flush() def close(): Unit = { try flush() finally try storage.close() finally { keySerde.close() valSerde.close() } } }
Example 4
Source File: EvalConfig.scala From aerosolve with Apache License 2.0 | 5 votes |
package com.airbnb.common.ml.strategy.config import scala.language.existentials import com.typesafe.config.Config import com.airbnb.common.ml.strategy.data.TrainingData import com.airbnb.common.ml.util.ScalaLogging case class EvalConfig( trainingConfig: TrainingConfig, evalDataQuery: String, holdoutDataQuery: String ) object DirectQueryEvalConfig extends ScalaLogging { def loadConfig[T]( config: Config ): EvalConfig = { val evalDataQuery = config.getString("eval_data_query") val holdoutDataQuery = config.getString("holdout_data_query") logger.info(s"Eval Data Query: $evalDataQuery") EvalConfig( TrainingConfig.loadConfig(config), evalDataQuery, holdoutDataQuery) } }
Example 5
Source File: ShuffleMapTask.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.nio.ByteBuffer import scala.language.existentials import org.apache.spark._ import org.apache.spark.broadcast.Broadcast import org.apache.spark.rdd.RDD import org.apache.spark.shuffle.ShuffleWriter def this(partitionId: Int) { this(0, null, new Partition { override def index: Int = 0 }, null) } @transient private val preferredLocs: Seq[TaskLocation] = { if (locs == null) Nil else locs.toSet.toSeq } override def runTask(context: TaskContext): MapStatus = { // Deserialize the RDD using the broadcast variable. val deserializeStartTime = System.currentTimeMillis() val ser = SparkEnv.get.closureSerializer.newInstance() val (rdd, dep) = ser.deserialize[(RDD[_], ShuffleDependency[_, _, _])]( ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader) _executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime metrics = Some(context.taskMetrics) var writer: ShuffleWriter[Any, Any] = null try { val manager = SparkEnv.get.shuffleManager writer = manager.getWriter[Any, Any](dep.shuffleHandle, partitionId, context) writer.write(rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]]) return writer.stop(success = true).get } catch { case e: Exception => try { if (writer != null) { writer.stop(success = false) } } catch { case e: Exception => log.debug("Could not stop writer", e) } throw e } } override def preferredLocations: Seq[TaskLocation] = preferredLocs override def toString: String = "ShuffleMapTask(%d, %d)".format(stageId, partitionId) }
Example 6
Source File: FPTreeSuite.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.fpm import scala.language.existentials import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext class FPTreeSuite extends SparkFunSuite with MLlibTestSparkContext { test("add transaction") { val tree = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("b")) assert(tree.root.children.size == 2) assert(tree.root.children.contains("a")) assert(tree.root.children("a").item.equals("a")) assert(tree.root.children("a").count == 2) assert(tree.root.children.contains("b")) assert(tree.root.children("b").item.equals("b")) assert(tree.root.children("b").count == 1) var child = tree.root.children("a") assert(child.children.size == 1) assert(child.children.contains("b")) assert(child.children("b").item.equals("b")) assert(child.children("b").count == 2) child = child.children("b") assert(child.children.size == 2) assert(child.children.contains("c")) assert(child.children.contains("y")) assert(child.children("c").item.equals("c")) assert(child.children("y").item.equals("y")) assert(child.children("c").count == 1) assert(child.children("y").count == 1) } test("merge tree") { val tree1 = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("b")) val tree2 = new FPTree[String] .add(Seq("a", "b")) .add(Seq("a", "b", "c")) .add(Seq("a", "b", "c", "d")) .add(Seq("a", "x")) .add(Seq("a", "x", "y")) .add(Seq("c", "n")) .add(Seq("c", "m")) val tree3 = tree1.merge(tree2) assert(tree3.root.children.size == 3) assert(tree3.root.children("a").count == 7) assert(tree3.root.children("b").count == 1) assert(tree3.root.children("c").count == 2) val child1 = tree3.root.children("a") assert(child1.children.size == 2) assert(child1.children("b").count == 5) assert(child1.children("x").count == 2) val child2 = child1.children("b") assert(child2.children.size == 2) assert(child2.children("y").count == 1) assert(child2.children("c").count == 3) val child3 = child2.children("c") assert(child3.children.size == 1) assert(child3.children("d").count == 1) val child4 = child1.children("x") assert(child4.children.size == 1) assert(child4.children("y").count == 1) val child5 = tree3.root.children("c") assert(child5.children.size == 2) assert(child5.children("n").count == 1) assert(child5.children("m").count == 1) } test("extract freq itemsets") { val tree = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("a", "b")) .add(Seq("a")) .add(Seq("b")) .add(Seq("b", "n")) val freqItemsets = tree.extract(3L).map { case (items, count) => (items.toSet, count) }.toSet val expected = Set( (Set("a"), 4L), (Set("b"), 5L), (Set("a", "b"), 3L)) assert(freqItemsets === expected) } }
Example 7
Source File: NettyBlockRpcServer.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.network.netty import java.nio.ByteBuffer import scala.collection.JavaConverters._ import scala.language.existentials import scala.reflect.ClassTag import org.apache.spark.internal.Logging import org.apache.spark.network.BlockDataManager import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager} import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock} import org.apache.spark.serializer.Serializer import org.apache.spark.storage.{BlockId, StorageLevel} class NettyBlockRpcServer( appId: String, serializer: Serializer, blockManager: BlockDataManager) extends RpcHandler with Logging { private val streamManager = new OneForOneStreamManager() override def receive( client: TransportClient, rpcMessage: ByteBuffer, responseContext: RpcResponseCallback): Unit = { val message = BlockTransferMessage.Decoder.fromByteBuffer(rpcMessage) logTrace(s"Received request: $message") message match { case openBlocks: OpenBlocks => val blocks: Seq[ManagedBuffer] = openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData) val streamId = streamManager.registerStream(appId, blocks.iterator.asJava) logTrace(s"Registered streamId $streamId with ${blocks.size} buffers") responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteBuffer) case uploadBlock: UploadBlock => // StorageLevel and ClassTag are serialized as bytes using our JavaSerializer. val (level: StorageLevel, classTag: ClassTag[_]) = { serializer .newInstance() .deserialize(ByteBuffer.wrap(uploadBlock.metadata)) .asInstanceOf[(StorageLevel, ClassTag[_])] } val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData)) val blockId = BlockId(uploadBlock.blockId) blockManager.putBlockData(blockId, data, level, classTag) responseContext.onSuccess(ByteBuffer.allocate(0)) } } override def getStreamManager(): StreamManager = streamManager }
Example 8
Source File: FPTreeSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.fpm import scala.language.existentials import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext class FPTreeSuite extends SparkFunSuite with MLlibTestSparkContext { test("add transaction") { val tree = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("b")) assert(tree.root.children.size == 2) assert(tree.root.children.contains("a")) assert(tree.root.children("a").item.equals("a")) assert(tree.root.children("a").count == 2) assert(tree.root.children.contains("b")) assert(tree.root.children("b").item.equals("b")) assert(tree.root.children("b").count == 1) var child = tree.root.children("a") assert(child.children.size == 1) assert(child.children.contains("b")) assert(child.children("b").item.equals("b")) assert(child.children("b").count == 2) child = child.children("b") assert(child.children.size == 2) assert(child.children.contains("c")) assert(child.children.contains("y")) assert(child.children("c").item.equals("c")) assert(child.children("y").item.equals("y")) assert(child.children("c").count == 1) assert(child.children("y").count == 1) } test("merge tree") { val tree1 = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("b")) val tree2 = new FPTree[String] .add(Seq("a", "b")) .add(Seq("a", "b", "c")) .add(Seq("a", "b", "c", "d")) .add(Seq("a", "x")) .add(Seq("a", "x", "y")) .add(Seq("c", "n")) .add(Seq("c", "m")) val tree3 = tree1.merge(tree2) assert(tree3.root.children.size == 3) assert(tree3.root.children("a").count == 7) assert(tree3.root.children("b").count == 1) assert(tree3.root.children("c").count == 2) val child1 = tree3.root.children("a") assert(child1.children.size == 2) assert(child1.children("b").count == 5) assert(child1.children("x").count == 2) val child2 = child1.children("b") assert(child2.children.size == 2) assert(child2.children("y").count == 1) assert(child2.children("c").count == 3) val child3 = child2.children("c") assert(child3.children.size == 1) assert(child3.children("d").count == 1) val child4 = child1.children("x") assert(child4.children.size == 1) assert(child4.children("y").count == 1) val child5 = tree3.root.children("c") assert(child5.children.size == 2) assert(child5.children("n").count == 1) assert(child5.children("m").count == 1) } test("extract freq itemsets") { val tree = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("a", "b")) .add(Seq("a")) .add(Seq("b")) .add(Seq("b", "n")) val freqItemsets = tree.extract(3L).map { case (items, count) => (items.toSet, count) }.toSet val expected = Set( (Set("a"), 4L), (Set("b"), 5L), (Set("a", "b"), 3L)) assert(freqItemsets === expected) } }
Example 9
Source File: Casts.scala From hail with MIT License | 5 votes |
package is.hail.expr.ir import is.hail.asm4s._ import is.hail.types._ import is.hail.types.virtual._ import scala.language.existentials object Casts { private val casts: Map[(Type, Type), (Code[T] => Code[_]) forSome {type T}] = Map( (TInt32, TInt32) -> ((x: Code[Int]) => x), (TInt32, TInt64) -> ((x: Code[Int]) => x.toL), (TInt32, TFloat32) -> ((x: Code[Int]) => x.toF), (TInt32, TFloat64) -> ((x: Code[Int]) => x.toD), (TInt64, TInt32) -> ((x: Code[Long]) => x.toI), (TInt64, TInt64) -> ((x: Code[Long]) => x), (TInt64, TFloat32) -> ((x: Code[Long]) => x.toF), (TInt64, TFloat64) -> ((x: Code[Long]) => x.toD), (TFloat32, TInt32) -> ((x: Code[Float]) => x.toI), (TFloat32, TInt64) -> ((x: Code[Float]) => x.toL), (TFloat32, TFloat32) -> ((x: Code[Float]) => x), (TFloat32, TFloat64) -> ((x: Code[Float]) => x.toD), (TFloat64, TInt32) -> ((x: Code[Double]) => x.toI), (TFloat64, TInt64) -> ((x: Code[Double]) => x.toL), (TFloat64, TFloat32) -> ((x: Code[Double]) => x.toF), (TFloat64, TFloat64) -> ((x: Code[Double]) => x), (TInt32, TCall) -> ((x: Code[Int]) => x)) def get(from: Type, to: Type): Code[_] => Code[_] = casts(from -> to).asInstanceOf[Code[_] => Code[_]] def valid(from: Type, to: Type): Boolean = casts.contains(from -> to) }
Example 10
Source File: BinarySearch.scala From hail with MIT License | 5 votes |
package is.hail.expr.ir import is.hail.annotations.{CodeOrdering, Region} import is.hail.asm4s._ import is.hail.types.physical._ import is.hail.utils.FastIndexedSeq import scala.language.existentials class BinarySearch[C](mb: EmitMethodBuilder[C], typ: PContainer, eltType: PType, keyOnly: Boolean) { val elt: PType = typ.elementType val ti: TypeInfo[_] = typeToTypeInfo(elt) val (compare: CodeOrdering.F[Int], equiv: CodeOrdering.F[Boolean], findElt: EmitMethodBuilder[C], t: PType) = if (keyOnly) { val ttype = elt match { case t: PBaseStruct => require(t.size == 2) t case t: PInterval => t.representation.asInstanceOf[PStruct] } val kt = ttype.types(0) val findMB = mb.genEmitMethod("findElt", FastIndexedSeq[ParamType](typeInfo[Long], typeInfo[Boolean], typeToTypeInfo(kt)), typeInfo[Int]) val mk2l = findMB.newLocal[Boolean]() val mk2l1 = mb.newLocal[Boolean]() val comp: CodeOrdering.F[Int] = { case ((mk1: Code[Boolean], k1: Code[_]), (m2: Code[Boolean], v2: Code[Long] @unchecked)) => Code.memoize(v2, "bs_comp_v2") { v2 => val mk2 = Code(mk2l := m2 || ttype.isFieldMissing(v2, 0), mk2l) val k2 = mk2l.mux(defaultValue(kt), Region.loadIRIntermediate(kt)(ttype.fieldOffset(v2, 0))) findMB.getCodeOrdering(eltType, kt, CodeOrdering.Compare())((mk1, k1), (mk2, k2)) } } val ceq: CodeOrdering.F[Boolean] = { case ((mk1: Code[Boolean], k1: Code[_]), (m2: Code[Boolean], v2: Code[Long] @unchecked)) => Code.memoize(v2, "bs_comp_v2") { v2 => val mk2 = Code(mk2l1 := m2 || ttype.isFieldMissing(v2, 0), mk2l1) val k2 = mk2l1.mux(defaultValue(kt), Region.loadIRIntermediate(kt)(ttype.fieldOffset(v2, 0))) mb.getCodeOrdering(eltType, kt, CodeOrdering.Equiv())((mk1, k1), (mk2, k2)) } } (comp, ceq, findMB, kt) } else (mb.getCodeOrdering(eltType, elt, CodeOrdering.Compare()), mb.getCodeOrdering(eltType, elt, CodeOrdering.Equiv()), mb.genEmitMethod("findElt", FastIndexedSeq[ParamType](typeInfo[Long], typeInfo[Boolean], elt.ti), typeInfo[Int]), elt) private[this] val array = findElt.getCodeParam[Long](1) private[this] val m = findElt.getCodeParam[Boolean](2) private[this] val e = findElt.getCodeParam(3)(t.ti) private[this] val len = findElt.newLocal[Int]() private[this] val i = findElt.newLocal[Int]() private[this] val low = findElt.newLocal[Int]() private[this] val high = findElt.newLocal[Int]() def cmp(i: Code[Int]): Code[Int] = Code.memoize(i, "binsearch_cmp_i") { i => compare((m, e), (typ.isElementMissing(array, i), Region.loadIRIntermediate(elt)(typ.elementOffset(array, len, i)))) } // Returns smallest i, 0 <= i < n, for which a(i) >= key, or returns n if a(i) < key for all i findElt.emit(Code( len := typ.loadLength(array), low := 0, high := len, Code.whileLoop(low < high, i := (low + high) / 2, (cmp(i) <= 0).mux( high := i, low := i + 1)), low)) // check missingness of v before calling def getClosestIndex(array: Code[Long], m: Code[Boolean], v: Code[_]): Code[Int] = { findElt.invokeCode[Int](array, m, v) } }
Example 11
Source File: PrunedScanSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.sources import scala.language.existentials import org.apache.spark.rdd.RDD import org.apache.spark.sql._ import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ //PrunedScan 可以指定列,其他的列数据源可以不用返回 class PrunedScanSource extends RelationProvider {//提供关系 override def createRelation( sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = { SimplePrunedScan(parameters("from").toInt, parameters("to").toInt)(sqlContext) } } case class SimplePrunedScan(from: Int, to: Int)(@transient val sqlContext: SQLContext) extends BaseRelation with PrunedScan { override def schema: StructType = StructType(//StructType代表一张表,StructField代表一个字段 StructField("a", IntegerType, nullable = false) :: StructField("b", IntegerType, nullable = false) :: Nil) override def buildScan(requiredColumns: Array[String]): RDD[Row] = { val rowBuilders = requiredColumns.map { case "a" => (i: Int) => Seq(i) case "b" => (i: Int) => { //println(">>>>>>>"+i * 2) Seq(i * 2) } } //parallelize 分区数 sqlContext.sparkContext.parallelize(from to to).map(i => Row.fromSeq(rowBuilders.map(_(i)).reduceOption(_ ++ _).getOrElse(Seq.empty))) } } class PrunedScanSuite extends DataSourceTest with SharedSQLContext { protected override lazy val sql = caseInsensitiveContext.sql _ override def beforeAll(): Unit = { super.beforeAll() sql( """ |CREATE TEMPORARY TABLE oneToTenPruned |USING org.apache.spark.sql.sources.PrunedScanSource |OPTIONS ( | from '1', | to '10' |) """.stripMargin) } def testPruning(sqlString: String, expectedColumns: String*): Unit = { test(s"Columns output ${expectedColumns.mkString(",")}: $sqlString") { val queryExecution = sql(sqlString).queryExecution val rawPlan = queryExecution.executedPlan.collect { case p: execution.PhysicalRDD => p } match { case Seq(p) => p case _ => fail(s"More than one PhysicalRDD found\n$queryExecution") } val rawColumns = rawPlan.output.map(_.name) val rawOutput = rawPlan.execute().first() if (rawColumns != expectedColumns) { fail( s"Wrong column names. Got $rawColumns, Expected $expectedColumns\n" + s"Filters pushed: ${FiltersPushed.list.mkString(",")}\n" + queryExecution) } if (rawOutput.numFields != expectedColumns.size) { fail(s"Wrong output row. Got $rawOutput\n$queryExecution") } } } }
Example 12
Source File: SprayUtilities.scala From mmlspark with MIT License | 5 votes |
// Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. package com.microsoft.ml.nbtest import spray.json.{JsArray, JsObject, JsValue, JsonFormat} import scala.language.{existentials, implicitConversions} abstract class SprayOp case class IndexOp(item: Int) extends SprayOp case class FieldOp(value: String) extends SprayOp class SprayUtility(val json: JsValue) { private def parseQuery(q: String): List[SprayOp] = { q.split("." (0)).flatMap { t => if (t.contains("]") & t.contains("]")) { t.split("][".toCharArray).filter(_.length > 0).toSeq match { case Seq(index) => Seq(IndexOp(index.toInt)) case Seq(field, index) => Seq(FieldOp(field), IndexOp(index.toInt)) } } else if (!t.contains("]") & !t.contains("]")) { Seq(FieldOp(t)).asInstanceOf[List[SprayOp]] } else { throw new IllegalArgumentException(s"Cannot parse query: $q") } }.toList } private def selectInternal[T](json: JsValue, ops: List[SprayOp])(implicit format: JsonFormat[T]): T = { ops match { case Nil => json.convertTo[T] case IndexOp(i) :: tail => selectInternal[T](json.asInstanceOf[JsArray].elements(i), tail) case FieldOp(f) :: tail => selectInternal[T](json.asInstanceOf[JsObject].fields(f), tail) case _ => throw new MatchError("This code should be unreachable") } } def select[T](query: String)(implicit format: JsonFormat[T]): T = { selectInternal[T](json, parseQuery(query)) } } object SprayImplicits { implicit def sprayUtilityConverter(s: JsValue): SprayUtility = new SprayUtility(s) implicit def sprayUtilityConversion(s: SprayUtility): JsValue = s.json }
Example 13
Source File: NotebookTests.scala From mmlspark with MIT License | 5 votes |
// Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. package com.microsoft.ml.nbtest //TODO temp hack because ij picks up on it test classes by mistake import java.util.concurrent.TimeUnit import com.microsoft.ml.spark.core.test.base.TestBase import com.microsoft.ml.nbtest.DatabricksUtilities._ import scala.concurrent.Await import scala.concurrent.duration.Duration import scala.language.existentials class NotebookTests extends TestBase { test("Databricks Notebooks") { val clusterId = createClusterInPool(ClusterName, PoolId) try { println("Checking if cluster is active") tryWithRetries(Seq.fill(60*15)(1000).toArray){() => assert(isClusterActive(clusterId))} println("Installing libraries") installLibraries(clusterId) tryWithRetries(Seq.fill(60*3)(1000).toArray){() => assert(isClusterActive(clusterId))} println(s"Creating folder $Folder") workspaceMkDir(Folder) println(s"Submitting jobs") val jobIds = NotebookFiles.map(uploadAndSubmitNotebook(clusterId, _)) println(s"Submitted ${jobIds.length} for execution: ${jobIds.toList}") try { val monitors = jobIds.map((runId: Int) => monitorJob(runId, TimeoutInMillis, logLevel = 2)) println(s"Monitoring Jobs...") val failures = monitors .map(Await.ready(_, Duration(TimeoutInMillis.toLong, TimeUnit.MILLISECONDS)).value.get) .filter(_.isFailure) assert(failures.isEmpty) } catch { case t: Throwable => jobIds.foreach { jid => println(s"Cancelling job $jid") cancelRun(jid) } throw t } } finally { deleteCluster(clusterId) } } ignore("list running jobs for convenievce") { val obj = databricksGet("jobs/runs/list?active_only=true&limit=1000") println(obj) } }
Example 14
Source File: ScannerSpec.scala From better-files with MIT License | 5 votes |
package better.files import Dsl._ import scala.language.existentials class ScannerSpec extends CommonSpec { def t1 = File.newTemporaryFile() "splitter" should "split" in { val csvSplitter = StringSplitter.on(',') def split(s: String) = csvSplitter.split(s).toList assert(split(",") === List("", "")) assert(split("") === List("")) assert(split("Hello World") === List("Hello World")) assert(split("Hello,World") === List("Hello", "World")) assert(split(",,") === List("", "", "")) assert(split(",Hello,World,") === List("", "Hello", "World", "")) assert(split(",Hello,World") === List("", "Hello", "World")) assert(split("Hello,World,") === List("Hello", "World", "")) } "scanner" should "parse files" in { val data = t1 << s""" | Hello World | 1 2 3 | Ok 23 football """.stripMargin data.scanner() foreach { scanner => assert(scanner.lineNumber() == 0) assert(scanner.next[String] == "Hello") assert(scanner.lineNumber() == 2) assert(scanner.next[String] == "World") assert(scanner.next[Int] == 1) assert(scanner.next[Int] == 2) assert(scanner.lineNumber() == 3) assert(scanner.next[Int] == 3) assert(scanner.nextLine() == " Ok 23 football") assert(!scanner.hasNext) a[NoSuchElementException] should be thrownBy scanner.next() a[NoSuchElementException] should be thrownBy scanner.nextLine() assert(!scanner.hasNext) } data.tokens().toSeq shouldEqual data.newScanner().toSeq } it should "parse longs/booleans" in { val data = for { scanner <- Scanner("10 false").autoClosed } yield scanner.next[(Long, Boolean)] data.get() shouldBe ((10L, false)) } it should "parse custom parsers" in { val file = t1 < """ |Garfield |Woofer """.stripMargin sealed trait Animal case class Dog(name: String) extends Animal case class Cat(name: String) extends Animal implicit val animalParser: Scannable[Animal] = Scannable { scanner => val name = scanner.next[String] if (name == "Garfield") Cat(name) else Dog(name) } file.scanner() foreach { scanner => Seq.fill(2)(scanner.next[Animal]) should contain theSameElementsInOrderAs Seq(Cat("Garfield"), Dog("Woofer")) } } it should "parse empty tokens" in { val scanner = Scanner("hello||world", StringSplitter.on('|')) List.fill(3)(scanner.next[Option[String]]) shouldEqual List(Some("hello"), None, Some("world")) } }
Example 15
Source File: LabelsSelectize.scala From ProductWebUI with Apache License 2.0 | 5 votes |
package synereo.client.components import shared.models.Label import synereo.client.services.SYNEREOCircuit import scala.language.existentials import japgolly.scalajs.react._ import japgolly.scalajs.react.vdom.prefix_<^._ import org.querki.jquery._ import org.scalajs.dom._ import synereo.client.facades.SynereoSelectizeFacade import scala.language.existentials import scala.scalajs.js )( <.option(^.value := "")("Select"), // props.proxy().render(searchesRootModel => searchesRootModel.se) for (label <- SYNEREOCircuit.zoom(_.searches.searchesModel).value) yield { <.option(^.value := label.text, ^.key := label.uid)(s"#${label.text}") }) } } val component = ReactComponentB[Props]("LabelsSelectize") .initialState(State()) .renderBackend[Backend] .componentDidMount(scope => scope.backend.mounted(scope.props)) .build def apply(props: Props) = component(props) }
Example 16
Source File: ConnectionsLabelsSelectize.scala From ProductWebUI with Apache License 2.0 | 5 votes |
package synereo.client.components import japgolly.scalajs.react._ import japgolly.scalajs.react.vdom.prefix_<^._ import org.querki.jquery._ import org.scalajs.dom._ import shared.dtos.Connection import synereo.client.facades.SynereoSelectizeFacade import synereo.client.services.SYNEREOCircuit import scala.language.existentials import scala.scalajs.js //scalastyle:off object ConnectionsLabelsSelectize { def getCnxnsAndLabelsFromSelectize(selectizeInputId: String): (Seq[Connection], Seq[String]) = { var selectedConnections = Seq[Connection]() var selectedLabels = Seq[String]() val selector: js.Object = s"#${selectizeInputId} > .selectize-control> .selectize-input > div" $(selector).each((y: Element) => { val dataVal = $(y).attr("data-value").toString try { val cnxn = upickle.default.read[Connection](dataVal) selectedConnections :+= cnxn } catch { case e: Exception => selectedLabels :+= dataVal } }) (selectedConnections, selectedLabels) } def filterLabelStrings(value: Seq[String], character: String): Seq[String] = { value .filter(e => e.charAt(0) == "#" && e.count(_ == character) == 1) .map(_.replace(character, "")).distinct } case class Props(parentIdentifier: String) case class State(maxItems: Int = 7, maxCharLimit: Int = 16, allowNewItemsCreation: Boolean = false) case class Backend(t: BackendScope[Props, State]) { def initializeTagsInput(): Unit = { val state = t.state.runNow() val parentIdentifier = t.props.runNow().parentIdentifier SynereoSelectizeFacade.initilizeSelectize(s"${parentIdentifier}-selectize", state.maxItems, state.maxCharLimit, state.allowNewItemsCreation) } def mounted(props: Props): Callback = Callback { initializeTagsInput() } def render(props: Props, state: State) = { <.select(^.className := "select-state", ^.id := s"${props.parentIdentifier}-selectize", ^.className := "demo-default", ^.placeholder := "Search e.g. @Synereo or #fun")( <.option(^.value := "")("Select"), for (connection <- SYNEREOCircuit.zoom(_.connections).value.connectionsResponse) yield <.option(^.value := upickle.default.write(connection.connection), ^.key := connection.connection.target)(s"@${connection.name}"), for (label <- SYNEREOCircuit.zoom(_.searches).value.searchesModel) yield <.option(^.value := label.text, ^.key := label.uid)(s"#${label.text}") ) } } val component = ReactComponentB[Props]("SearchesConnectionList") .initialState(State()) .renderBackend[Backend] .componentDidMount(scope => scope.backend.mounted(scope.props)) .build def apply(props: Props) = component(props) }
Example 17
Source File: UserPersona.scala From ProductWebUI with Apache License 2.0 | 5 votes |
package synereo.client.components import diode.react.ModelProxy import japgolly.scalajs.react.{ReactComponentB, _} import japgolly.scalajs.react.vdom.prefix_<^._ import shared.models.UserModel import synereo.client.css.{NewMessageCSS, SynereoCommanStylesCSS} import scala.language.existentials import scalacss.ScalaCssReact._ //scalastyle:off object UserPersona { def getPersona(): String = { "" } case class Props(proxy: ModelProxy[UserModel]) case class Backend(t: BackendScope[Props, _]) { def mounted(props: Props): Callback = Callback { // println("UserPersona is : " + props.proxy.value) } def render(props: Props) = { val model = props.proxy.value <.div(^.className := "row", NewMessageCSS.Style.PersonaContainerDiv)( <.div(^.className := "col-md-2 col-sm-2 col-xs-2", SynereoCommanStylesCSS.Style.paddingLeftZero)( <.img(^.alt := "userImage", ^.src := model.imgSrc, ^.className := "img-responsive", NewMessageCSS.Style.userImage) ), <.div(^.className := "col-md-10", SynereoCommanStylesCSS.Style.paddingLeftZero, SynereoCommanStylesCSS.Style.paddingRightZero)( <.div( <.button(^.className := "btn", ^.`type` := "button", NewMessageCSS.Style.changePersonaBtn)("Change posting persona", <.span(^.className := "caret", ^.color.blue)), <.div(^.className := "pull-right hidden-xs")(MIcon.apply("more_vert", "24")) ) ), <.div(NewMessageCSS.Style.userNameOnDilogue)( <.div(model.name, <.span(Icon.chevronRight), "public", <.span(Icon.share)) ) ) } } val component = ReactComponentB[Props]("UserPersona") .renderBackend[Backend] .componentDidMount(scope => scope.backend.mounted(scope.props)) .build def apply(props: Props) = component(props) }
Example 18
Source File: LabelsSelectize.scala From ProductWebUI with Apache License 2.0 | 5 votes |
package client.components import client.utils.LabelsUtils import diode.react.ModelProxy import japgolly.scalajs.react._ import japgolly.scalajs.react.vdom.prefix_<^._ import org.denigma.selectize._ import org.querki.jquery._ import org.scalajs.dom._ import client.rootmodel.SearchesRootModel import shared.models.Label import client.sessionitems.SessionItems import scala.collection.mutable.ListBuffer import scala.language.existentials import scala.scalajs.js object LabelsSelectize { def getLabelsTxtFromSelectize(selectizeInputId: String): Seq[String] = { var selectedLabels = Seq[String]() val selector: js.Object = s"#${selectizeInputId} > .selectize-control> .selectize-input > div" if ($(selector).length > 0) { $(selector).each((y: Element) => selectedLabels :+= $(y).attr("data-value").toString) } else { selectedLabels = Nil } selectedLabels } def getLabelsFromSelectizeInput(selectizeInputId: String): Seq[Label] = { var selectedLabels = Seq[Label]() val selector: js.Object = s"#${selectizeInputId} > .selectize-control> .selectize-input > div" $(selector).each((y: Element) => selectedLabels :+= upickle.default.read[Label]($(y).attr("data-value").toString)) selectedLabels } var getSelectedValue = new ListBuffer[String]() case class Props(proxy: ModelProxy[SearchesRootModel], parentIdentifier: String) case class Backend(t: BackendScope[Props, _]) { def initializeTagsInput(parentIdentifier: String): Unit = { val selectState: js.Object = s"#$parentIdentifier > .selectize-control" // println(s"element lenth: ${$(selectState).length}") if ($(selectState).length < 1) { val selectizeInput: js.Object = "#labelsSelectize" // $(selectizeInput).selectize(SelectizeConfig.maxOptions(2)).destroy() // println(s"test : ${$(selectizeInput)}") $(selectizeInput).selectize(SelectizeConfig .create(true) .maxItems(3) .plugins("remove_button")) } } def getSelectedValues = Callback { val selectState: js.Object = "#selectize" val getSelectedValue = $(selectState).find("option").text() //scalastyle:off // println(getSelectedValue) } def mounted(props: Props): Callback = Callback { // println("searches model is = " + props.proxy().searchesModel) initializeTagsInput(props.parentIdentifier) } def render(props: Props) = { val parentDiv: js.Object = s"#${props.parentIdentifier}" // println(s"parent div length ${$(parentDiv).length}") if ($(parentDiv).length == 0) { <.select(^.className := "select-state", ^.id := "labelsSelectize", ^.className := "demo-default", ^.placeholder := "select #label(s)", ^.onChange --> getSelectedValues)( <.option(^.value := "")("Select"), // props.proxy().render(searchesRootModel => searchesRootModel.se) for (label <- props.proxy().searchesModel .filter(e => e.parentUid == "self") .filterNot(e => LabelsUtils.getSystemLabels().contains(e.text))) yield { <.option(^.value := upickle.default.write(label), ^.key := label.uid)(label.text) } ) } else { <.div() } } } val component = ReactComponentB[Props]("SearchesConnectionList") .renderBackend[Backend] .componentDidMount(scope => scope.backend.mounted(scope.props)) .build def apply(props: Props) = component(props) }
Example 19
Source File: Dashboard.scala From scalajs-spa-tutorial with Apache License 2.0 | 5 votes |
package spatutorial.client.modules import diode.data.Pot import diode.react._ import japgolly.scalajs.react._ import japgolly.scalajs.react.extra.router.RouterCtl import japgolly.scalajs.react.vdom.html_<^._ import spatutorial.client.SPAMain.{Loc, TodoLoc} import spatutorial.client.components._ import scala.util.Random import scala.language.existentials object Dashboard { case class Props(router: RouterCtl[Loc], proxy: ModelProxy[Pot[String]]) case class State(motdWrapper: ReactConnectProxy[Pot[String]]) // create dummy data for the chart val cp = Chart.ChartProps( "Test chart", Chart.BarChart, ChartData( Random.alphanumeric.map(_.toUpper.toString).distinct.take(10), Seq(ChartDataset(Iterator.continually(Random.nextDouble() * 10).take(10).toSeq, "Data1")) ) ) // create the React component for Dashboard private val component = ScalaComponent.builder[Props]("Dashboard") // create and store the connect proxy in state for later use .initialStateFromProps(props => State(props.proxy.connect(m => m))) .renderPS { (_, props, state) => <.div( // header, MessageOfTheDay and chart components <.h2("Dashboard"), state.motdWrapper(Motd(_)), Chart(cp), // create a link to the To Do view <.div(props.router.link(TodoLoc)("Check your todos!")) ) } .build def apply(router: RouterCtl[Loc], proxy: ModelProxy[Pot[String]]) = component(Props(router, proxy)) }
Example 20
Source File: ShuffleMapTask.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.nio.ByteBuffer import scala.language.existentials import org.apache.spark._ import org.apache.spark.broadcast.Broadcast import org.apache.spark.rdd.RDD import org.apache.spark.shuffle.ShuffleWriter def this(partitionId: Int) { this(0, 0, null, new Partition { override def index: Int = 0 }, null, null) } @transient private val preferredLocs: Seq[TaskLocation] = { if (locs == null) Nil else locs.toSet.toSeq } override def runTask(context: TaskContext): MapStatus = { // Deserialize the RDD using the broadcast variable. val deserializeStartTime = System.currentTimeMillis() val ser = SparkEnv.get.closureSerializer.newInstance() val (rdd, dep) = ser.deserialize[(RDD[_], ShuffleDependency[_, _, _])]( ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader) _executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime metrics = Some(context.taskMetrics) var writer: ShuffleWriter[Any, Any] = null try { val manager = SparkEnv.get.shuffleManager writer = manager.getWriter[Any, Any](dep.shuffleHandle, partitionId, context) writer.write(rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]]) writer.stop(success = true).get } catch { case e: Exception => try { if (writer != null) { writer.stop(success = false) } } catch { case e: Exception => log.debug("Could not stop writer", e) } throw e } } override def preferredLocations: Seq[TaskLocation] = preferredLocs override def toString: String = "ShuffleMapTask(%d, %d)".format(stageId, partitionId) }
Example 21
Source File: AnyFormatSpec.scala From scalapb-json4s with Apache License 2.0 | 5 votes |
package scalapb.json4s import com.google.protobuf.any.{Any => PBAny} import jsontest.anytests.{AnyTest, ManyAnyTest} import org.json4s.jackson.JsonMethods._ import scala.language.existentials import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers class AnyFormatSpec extends AnyFlatSpec with Matchers with JavaAssertions { val RawExample = AnyTest("test") val RawJson = parse(s"""{"field":"test"}""") val AnyExample = PBAny.pack(RawExample) val AnyJson = parse( s"""{"@type":"type.googleapis.com/jsontest.AnyTest","field":"test"}""" ) val CustomPrefixAny = PBAny.pack(RawExample, "example.com/") val CustomPrefixJson = parse( s"""{"@type":"example.com/jsontest.AnyTest","field":"test"}""" ) val ManyExample = ManyAnyTest( Seq( PBAny.pack(AnyTest("1")), PBAny.pack(AnyTest("2")) ) ) val ManyPackedJson = parse( """ |{ | "@type": "type.googleapis.com/jsontest.ManyAnyTest", | "fields": [ | {"@type": "type.googleapis.com/jsontest.AnyTest", "field": "1"}, | {"@type": "type.googleapis.com/jsontest.AnyTest", "field": "2"} | ] |} """.stripMargin ) override def registeredCompanions = Seq(AnyTest, ManyAnyTest) // For clarity def UnregisteredPrinter = JsonFormat.printer def UnregisteredParser = JsonFormat.parser "Any" should "fail to serialize if its respective companion is not registered" in { an[IllegalStateException] must be thrownBy UnregisteredPrinter.toJson( AnyExample ) } "Any" should "fail to deserialize if its respective companion is not registered" in { a[JsonFormatException] must be thrownBy UnregisteredParser.fromJson[PBAny]( AnyJson ) } "Any" should "serialize correctly if its respective companion is registered" in { ScalaJsonPrinter.toJson(AnyExample) must be(AnyJson) } "Any" should "fail to serialize with a custom URL prefix if specified" in { an[IllegalStateException] must be thrownBy ScalaJsonPrinter.toJson( CustomPrefixAny ) } "Any" should "fail to deserialize for a non-Google-prefixed type URL" in { a[JsonFormatException] must be thrownBy ScalaJsonParser.fromJson[PBAny]( CustomPrefixJson ) } "Any" should "deserialize correctly if its respective companion is registered" in { ScalaJsonParser.fromJson[PBAny](AnyJson) must be(AnyExample) } "Any" should "be serialized the same as in Java (and parsed back to original)" in { assertJsonIsSameAsJava(AnyExample) } "Any" should "resolve printers recursively" in { val packed = PBAny.pack(ManyExample) ScalaJsonPrinter.toJson(packed) must be(ManyPackedJson) } "Any" should "resolve parsers recursively" in { ScalaJsonParser.fromJson[PBAny](ManyPackedJson).unpack[ManyAnyTest] must be( ManyExample ) } }
Example 22
Source File: AnyFormat.scala From scalapb-json4s with Apache License 2.0 | 5 votes |
package scalapb.json4s import com.google.protobuf.any.{Any => PBAny} import org.json4s.JsonAST.{JNothing, JObject, JString, JValue} import scala.language.existentials object AnyFormat { val anyWriter: (Printer, PBAny) => JValue = { case (printer, any) => // Find the companion so it can be used to JSON-serialize the message. Perhaps this can be circumvented by // including the original GeneratedMessage with the Any (at least in memory). val cmp = printer.typeRegistry .findType(any.typeUrl) .getOrElse( throw new IllegalStateException( s"Unknown type ${any.typeUrl} in Any. Add a TypeRegistry that supports this type to the Printer." ) ) // Unpack the message... val message = any.unpack(cmp) // ... and add the @type marker to the resulting JSON printer.toJson(message) match { case JObject(fields) => JObject(("@type" -> JString(any.typeUrl)) +: fields) case value => // Safety net, this shouldn't happen throw new IllegalStateException( s"Message of type ${any.typeUrl} emitted non-object JSON: $value" ) } } val anyParser: (Parser, JValue) => PBAny = { case (parser, obj @ JObject(fields)) => obj \ "@type" match { case JString(typeUrl) => val cmp = parser.typeRegistry .findType(typeUrl) .getOrElse( throw new JsonFormatException( s"Unknown type ${typeUrl} in Any. Add a TypeRegistry that supports this type to the Parser." ) ) val message = parser.fromJson(obj, true)(cmp) PBAny(typeUrl = typeUrl, value = message.toByteString) case JNothing => throw new JsonFormatException(s"Missing type url when parsing $obj") case unknown => throw new JsonFormatException( s"Expected string @type field, got $unknown" ) } case (_, unknown) => throw new JsonFormatException(s"Expected an object, got $unknown") } }
Example 23
Source File: ModelSerializabilityTestBase.scala From aloha with MIT License | 5 votes |
package com.eharmony.aloha import scala.language.existentials import com.eharmony.aloha import com.eharmony.aloha.models.{Model, SubmodelBase} import org.junit.Assert._ import org.junit.Test import org.reflections.Reflections import scala.collection.JavaConversions.asScalaSet import scala.util.Try import java.lang.reflect.{Method, Modifier} import com.eharmony.aloha.util.Logging abstract class ModelSerializabilityTestBase(pkgs: Seq[String], outFilters: Seq[String]) extends Logging { def this() = this(pkgs = Seq(aloha.pkgName), Seq.empty) @Test def testSerialization(): Unit = { val ref = new Reflections(pkgs:_*) val submodels = ref.getSubTypesOf(classOf[SubmodelBase[_, _, _, _]]).toSeq val models = ref.getSubTypesOf(classOf[Model[_, _]]).toSeq val modelClasses = (models ++ submodels). filterNot { _.isInterface }. filterNot { c => val name = c.getName outFilters.exists(name.matches) } if (modelClasses.isEmpty) { fail(s"No models found to test for Serializability in packages: ${pkgs.mkString(",")}") } else { debug { modelClasses .map(_.getCanonicalName) .mkString("Models tested for Serializability:\n\t", "\n\t", "") } } modelClasses.foreach { c => val m = for { testClass <- getTestClass(c.getCanonicalName) testMethod <- getTestMethod(testClass) method <- ensureTestMethodIsTest(testMethod) } yield method m.left foreach fail } } private[this] implicit class RightMonad[L, R](e: Either[L, R]) { def flatMap[R1](f: R => Either[L, R1]) = e.right.flatMap(f) def map[R1](f: R => R1) = e.right.map(f) } private[this] def getTestClass(modelClassName: String) = { val testName = modelClassName + "Test" Try { Class.forName(testName) } map { Right(_) } getOrElse Left("No test class exists for " + modelClassName) } private[this] def getTestMethod(testClass: Class[_]) = { val testMethodName = "testSerialization" lazy val msg = s"$testMethodName doesn't exist in ${testClass.getCanonicalName}." Try { Option(testClass.getMethod(testMethodName)) } map { case Some(m) => Right(m) case None => Left(msg) } getOrElse Left(msg) } private[this] def ensureTestMethodIsTest(method: Method) = { if (!Modifier.isPublic(method.getModifiers)) Left(s"testSerialization in ${method.getDeclaringClass.getCanonicalName} is not public") if (!method.getDeclaredAnnotations.exists(_.annotationType() == classOf[Test])) Left(s"testSerialization in ${method.getDeclaringClass.getCanonicalName} does not have a @org.junit.Test annotation.") else if (method.getReturnType != classOf[Void] && method.getReturnType != classOf[Unit]) Left(s"testSerialization in ${method.getDeclaringClass.getCanonicalName} is not a void function. It returns: ${method.getReturnType}") else Right(method) } }
Example 24
Source File: fields.scala From aloha with MIT License | 5 votes |
package com.eharmony.aloha.semantics.compiled.plugin.schemabased.schema import com.eharmony.aloha.reflect.RefInfo import scala.language.existentials // RECORD, ENUM, ARRAY, MAP, UNION, FIXED, STRING, BYTES, INT, LONG, FLOAT, DOUBLE, BOOLEAN, NULL; sealed trait FieldDesc { def name: String def index: Int def nullable: Boolean } // TODO: Add additional types as necessary. case class RecordField(name: String, index: Int, schema: Schema, refInfo: RefInfo[_], nullable: Boolean) extends FieldDesc case class EnumField(name: String, index: Int, nullable: Boolean) extends FieldDesc case class ListField(name: String, index: Int, elementType: FieldDesc, nullable: Boolean) extends FieldDesc case class StringField(name: String, index: Int, nullable: Boolean) extends FieldDesc case class IntField(name: String, index: Int, nullable: Boolean) extends FieldDesc case class LongField(name: String, index: Int, nullable: Boolean) extends FieldDesc case class FloatField(name: String, index: Int, nullable: Boolean) extends FieldDesc case class DoubleField(name: String, index: Int, nullable: Boolean) extends FieldDesc case class BooleanField(name: String, index: Int, nullable: Boolean) extends FieldDesc
Example 25
Source File: HadoopUtils.scala From spark-images with Apache License 2.0 | 5 votes |
package org.apache.spark.image import java.nio.file.Paths import org.apache.commons.io.FilenameUtils import scala.sys.process._ import org.apache.hadoop.conf.{Configuration, Configured} import org.apache.hadoop.fs.{Path, PathFilter} import org.apache.hadoop.mapreduce.lib.input.FileInputFormat import org.apache.spark.sql.SparkSession import scala.language.existentials import scala.util.Random object RecursiveFlag { def setPathFilter(value: Option[Class[_]], sampleRatio: Option[Double] = None, spark: SparkSession) : Option[Class[_]] = { val flagName = FileInputFormat.PATHFILTER_CLASS val hadoopConf = spark.sparkContext.hadoopConfiguration val old = Option(hadoopConf.getClass(flagName, null)) if (sampleRatio.isDefined) { hadoopConf.setDouble(SamplePathFilter.ratioParam, sampleRatio.get) } else { hadoopConf.unset(SamplePathFilter.ratioParam) None } value match { case Some(v) => hadoopConf.setClass(flagName, v, classOf[PathFilter]) case None => hadoopConf.unset(flagName) } old } }
Example 26
Source File: JobUtils.scala From fusion-data with Apache License 2.0 | 5 votes |
package mass.job.util import java.io.File import java.nio.charset.Charset import java.nio.file.{ Files, Path, StandardCopyOption } import java.util.zip.ZipFile import com.typesafe.scalalogging.StrictLogging import helloscala.common.Configuration import helloscala.common.util.{ DigestUtils, Utils } import mass.common.util.FileUtils import mass.core.job.JobConstants import mass.job.JobSettings import mass.message.job._ import mass.model.job.{ JobItem, JobTrigger } import scala.concurrent.{ ExecutionContext, Future } object JobUtils extends StrictLogging { case class JobZipInternal private (configs: Vector[JobCreateReq], entries: Vector[Path]) def uploadJob(jobSettings: JobSettings, req: JobUploadJobReq)(implicit ec: ExecutionContext): Future[JobZip] = Future { val sha256 = DigestUtils.sha256HexFromPath(req.file) val dest = jobSettings.jobSavedDir.resolve(sha256.take(2)).resolve(sha256) val jobZipInternal = parseJobZip(req.file, req.charset, dest.resolve(JobConstants.DIST)) match { case Right(v) => v case Left(e) => throw e } val zipPath = dest.resolve(req.fileName) Files.move(req.file, zipPath, StandardCopyOption.REPLACE_EXISTING) JobZip(zipPath, jobZipInternal.configs, jobZipInternal.entries) } @inline def parseJobZip(file: Path, charset: Charset, dest: Path): Either[Throwable, JobZipInternal] = parseJobZip(file.toFile, charset, dest) def parseJobZip(file: File, charset: Charset, dest: Path): Either[Throwable, JobZipInternal] = Utils.either { import scala.jdk.CollectionConverters._ import scala.language.existentials val zip = new ZipFile(file, charset) try { val (confEntries, fileEntries) = zip .entries() .asScala .filterNot(entry => entry.isDirectory) .span(entry => entry.getName.endsWith(JobConstants.ENDS_SUFFIX) && !entry.isDirectory) val configs = confEntries.map(confEntry => parseJobConf(FileUtils.getString(zip.getInputStream(confEntry), charset, "\n")) match { case Right(config) => config case Left(e) => throw e }) val buf = Array.ofDim[Byte](1024) val entryPaths = fileEntries.map { entry => val entryName = entry.getName val savePath = dest.resolve(entryName) if (!Files.isDirectory(savePath.getParent)) { Files.createDirectories(savePath.getParent) } FileUtils.write(zip.getInputStream(entry), Files.newOutputStream(savePath), buf) // zip entry存磁盘 savePath } JobZipInternal(configs.toVector, entryPaths.toVector) } finally { if (zip ne null) zip.close() } } def parseJobConf(content: String): Either[Throwable, JobCreateReq] = Utils.either { val conf = Configuration.parseString(content) val jobItem = JobItem(conf.getConfiguration("item")) val jobTrigger = JobTrigger(conf.getConfiguration("trigger")) JobCreateReq(conf.get[Option[String]]("key"), jobItem, jobTrigger) } } case class JobZip(zipPath: Path, configs: Vector[JobCreateReq], entries: Vector[Path])
Example 27
Source File: ScheduledTaskManager.scala From incubator-toree with Apache License 2.0 | 5 votes |
package org.apache.toree.utils import scala.language.existentials import java.util.concurrent._ import java.util.UUID import com.google.common.util.concurrent.ThreadFactoryBuilder import ScheduledTaskManager._ import scala.util.Try def stop() = { _taskMap.clear() _scheduler.shutdown() } } object ScheduledTaskManager { val DefaultMaxThreads = 4 val DefaultExecutionDelay = 10 // 10 milliseconds val DefaultTimeInterval = 100 // 100 milliseconds }
Example 28
Source File: TipTestSuite.scala From inox with Apache License 2.0 | 5 votes |
package inox package tip import solvers._ import scala.language.existentials class TipTestSuite extends TestSuite with ResourceUtils { override def configurations = Seq( Seq(optSelectedSolvers(Set("nativez3")), optCheckModels(true)), Seq(optSelectedSolvers(Set("smt-z3")), optCheckModels(true)), Seq(optSelectedSolvers(Set("smt-cvc4")), optCheckModels(true)), Seq(optSelectedSolvers(Set("smt-z3")), optCheckModels(true), optAssumeChecked(true)) ) override protected def optionsString(options: Options): String = { "solver=" + options.findOptionOrDefault(optSelectedSolvers).head + (if (options.findOptionOrDefault(optAssumeChecked)) " assumechecked" else "") } private def ignoreSAT(ctx: Context, file: java.io.File): FilterStatus = ctx.options.findOptionOrDefault(optSelectedSolvers).headOption match { case Some(solver) => (solver, file.getName) match { // test containing list of booleans, so CVC4 will crash on this // See http://church.cims.nyu.edu/bugzilla3/show_bug.cgi?id=500 case ("smt-cvc4", "List-fold.tip") => Skip // Z3 and CVC4 binaries are exceedingly slow on these benchmarks case ("smt-z3" | "smt-cvc4", "BinarySearchTreeQuant.scala-2.tip") => Ignore case ("smt-z3" | "smt-cvc4", "ForallAssoc.scala-0.tip") => Ignore // this test only holds when assumeChecked=false case (_, "LambdaEquality2.scala-1.tip") if ctx.options.findOptionOrDefault(optAssumeChecked) => Skip case _ => Test } case _ => Test } private def ignoreUNSAT(ctx: Context, file: java.io.File): FilterStatus = ctx.options.findOptionOrDefault(optSelectedSolvers).headOption match { case Some(solver) => (solver, file.getName) match { // Z3 binary will predictably segfault on certain permutations of this problem case ("smt-z3", "MergeSort2.scala-1.tip") => Ignore // use non-linear operators that aren't supported in CVC4 case ("smt-cvc4", "Instantiation.scala-0.tip") => Skip case ("smt-cvc4", "LetsInForall.tip") => Skip case ("smt-cvc4", "Weird.scala-0.tip") => Skip // this test only holds when assumeChecked=true case (_, "QuickSortFilter.scala-1.tip") if !ctx.options.findOptionOrDefault(optAssumeChecked) => Skip case _ => Test } case _ => Test } private def ignoreUNKNOWN(ctx: Context, file: java.io.File): FilterStatus = ctx.options.findOptionOrDefault(optSelectedSolvers).headOption match { case Some(solver) => (solver, file.getName) match { // non-linear operations are too slow on smt-z3 case ("smt-z3", "Soundness2.scala-0.tip") => Ignore // use non-linear operators that aren't supported in CVC4 case ("smt-cvc4", "Soundness.scala-0.tip") => Skip case ("smt-cvc4", "Soundness2.scala-0.tip") => Skip case _ => Test } case _ => Test } for (file <- resourceFiles("regression/tip/SAT", _.endsWith(".tip"))) { test(s"SAT - ${file.getName}", ignoreSAT(_, file)) { implicit ctx => for ((program, expr) <- Parser(file).parseScript) { assert(SimpleSolverAPI(program.getSolver).solveSAT(expr).isSAT) } } } for (file <- resourceFiles("regression/tip/UNSAT", _.endsWith(".tip"))) { test(s"UNSAT - ${file.getName}", ignoreUNSAT(_, file)) { implicit ctx => for ((program, expr) <- Parser(file).parseScript) { assert(SimpleSolverAPI(program.getSolver).solveSAT(expr).isUNSAT) } } } for (file <- resourceFiles("regression/tip/UNKNOWN", _.endsWith(".tip"))) { test(s"UNKNOWN - ${file.getName}", ignoreUNKNOWN(_, file)) { ctx0 => implicit val ctx = ctx0.copy(options = ctx0.options + optCheckModels(false)) for ((program, expr) <- Parser(file).parseScript) { val api = SimpleSolverAPI(program.getSolver) val res = api.solveSAT(expr) assert(!res.isSAT && !res.isUNSAT) assert(ctx.reporter.errorCount > 0) } } } }
Example 29
Source File: string_formats_yaml.base.scala From play-swagger with MIT License | 5 votes |
package string_formats.yaml import scala.language.existentials import play.api.mvc.{Action, Controller, Results} import play.api.http._ import Results.Status import de.zalando.play.controllers.{PlayBodyParsing, ParsingError, ResultWrapper} import PlayBodyParsing._ import scala.util._ import de.zalando.play.controllers.Base64String import Base64String._ import de.zalando.play.controllers.BinaryString import BinaryString._ import org.joda.time.DateTime import java.util.UUID import org.joda.time.LocalDate import de.zalando.play.controllers.PlayPathBindables trait String_formatsYamlBase extends Controller with PlayBodyParsing { sealed trait GetType[T] extends ResultWrapper[T] case object Get200 extends EmptyReturn(200) private type getActionRequestType = (GetDate_time, GetDate, GetBase64, GetUuid, BinaryString) private type getActionType[T] = getActionRequestType => GetType[T] forSome { type T } private def getParser(acceptedTypes: Seq[String], maxLength: Int = parse.DefaultMaxTextLength) = { def bodyMimeType: Option[MediaType] => String = mediaType => { val requestType = mediaType.toSeq.map { case m: MediaRange => m case MediaType(a,b,c) => new MediaRange(a,b,c,None,Nil) } negotiateContent(requestType, acceptedTypes).orElse(acceptedTypes.headOption).getOrElse("application/json") } import de.zalando.play.controllers.WrappedBodyParsers val customParsers = WrappedBodyParsers.anyParser[BinaryString] anyParser[BinaryString](bodyMimeType, customParsers, "Invalid BinaryString", maxLength) } val getActionConstructor = Action def getAction[T] = (f: getActionType[T]) => (date_time: GetDate_time, date: GetDate, base64: GetBase64, uuid: GetUuid) => getActionConstructor(getParser(Seq[String]())) { request => val providedTypes = Seq[String]("application/json", "application/yaml") negotiateContent(request.acceptedTypes, providedTypes).map { getResponseMimeType => val petId = request.body val result = new GetValidator(date_time, date, base64, uuid, petId).errors match { case e if e.isEmpty => processValidgetRequest(f)((date_time, date, base64, uuid, petId))(getResponseMimeType) case l => implicit val marshaller: Writeable[Seq[ParsingError]] = parsingErrors2Writable(getResponseMimeType) BadRequest(l) } result }.getOrElse(Status(406)("The server doesn't support any of the requested mime types")) } private def processValidgetRequest[T](f: getActionType[T])(request: getActionRequestType)(mimeType: String) = { f(request).toResult(mimeType).getOrElse { Results.NotAcceptable } } abstract class EmptyReturn(override val statusCode: Int = 204) extends ResultWrapper[Results.EmptyContent] with GetType[Results.EmptyContent] { val result = Results.EmptyContent(); val writer = (x: String) => Some(new DefaultWriteables{}.writeableOf_EmptyContent); override def toResult(mimeType: String): Option[play.api.mvc.Result] = Some(Results.NoContent) } case object NotImplementedYet extends ResultWrapper[Results.EmptyContent] with GetType[Results.EmptyContent] { val statusCode = 501; val result = Results.EmptyContent(); val writer = (x: String) => Some(new DefaultWriteables{}.writeableOf_EmptyContent); override def toResult(mimeType: String): Option[play.api.mvc.Result] = Some(Results.NotImplemented) } }
Example 30
Source File: FPTreeSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.fpm import scala.language.existentials import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext class FPTreeSuite extends SparkFunSuite with MLlibTestSparkContext { test("add transaction") { val tree = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("b")) assert(tree.root.children.size == 2) assert(tree.root.children.contains("a")) assert(tree.root.children("a").item.equals("a")) assert(tree.root.children("a").count == 2) assert(tree.root.children.contains("b")) assert(tree.root.children("b").item.equals("b")) assert(tree.root.children("b").count == 1) var child = tree.root.children("a") assert(child.children.size == 1) assert(child.children.contains("b")) assert(child.children("b").item.equals("b")) assert(child.children("b").count == 2) child = child.children("b") assert(child.children.size == 2) assert(child.children.contains("c")) assert(child.children.contains("y")) assert(child.children("c").item.equals("c")) assert(child.children("y").item.equals("y")) assert(child.children("c").count == 1) assert(child.children("y").count == 1) } test("merge tree") { val tree1 = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("b")) val tree2 = new FPTree[String] .add(Seq("a", "b")) .add(Seq("a", "b", "c")) .add(Seq("a", "b", "c", "d")) .add(Seq("a", "x")) .add(Seq("a", "x", "y")) .add(Seq("c", "n")) .add(Seq("c", "m")) val tree3 = tree1.merge(tree2) assert(tree3.root.children.size == 3) assert(tree3.root.children("a").count == 7) assert(tree3.root.children("b").count == 1) assert(tree3.root.children("c").count == 2) val child1 = tree3.root.children("a") assert(child1.children.size == 2) assert(child1.children("b").count == 5) assert(child1.children("x").count == 2) val child2 = child1.children("b") assert(child2.children.size == 2) assert(child2.children("y").count == 1) assert(child2.children("c").count == 3) val child3 = child2.children("c") assert(child3.children.size == 1) assert(child3.children("d").count == 1) val child4 = child1.children("x") assert(child4.children.size == 1) assert(child4.children("y").count == 1) val child5 = tree3.root.children("c") assert(child5.children.size == 2) assert(child5.children("n").count == 1) assert(child5.children("m").count == 1) } test("extract freq itemsets") { val tree = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("a", "b")) .add(Seq("a")) .add(Seq("b")) .add(Seq("b", "n")) val freqItemsets = tree.extract(3L).map { case (items, count) => (items.toSet, count) }.toSet val expected = Set( (Set("a"), 4L), (Set("b"), 5L), (Set("a", "b"), 3L)) assert(freqItemsets === expected) } }
Example 31
Source File: DataRow.scala From flink-elasticsearch-source-connector with Apache License 2.0 | 5 votes |
package com.mnubo.flink.streaming.connectors import org.apache.commons.lang3.ClassUtils import org.apache.flink.api.common.typeinfo.TypeInformation import org.apache.flink.api.java.typeutils.TypeExtractor import scala.language.existentials case class Value(v: Any, name: String, givenTypeInfo: Option[TypeInformation[_]] = None) { require(v != null || givenTypeInfo.isDefined, "You must pass a TypeInformation for null values") val typeInfo = givenTypeInfo match { case Some(ti) => ti case None => TypeExtractor.getForObject(v) } require(isAssignable(v, typeInfo.getTypeClass), s"data element '$v' is not compatible with class ${typeInfo.getTypeClass.getName}") private def isAssignable(value: Any, cl: Class[_]) = { if (value == null && classOf[AnyRef].isAssignableFrom(cl)) true else ClassUtils.isAssignable(value.getClass, cl) } } object Value { def apply(v: Any, name: String, givenTypeInfo: TypeInformation[_]) = { new Value(v, name, Some(givenTypeInfo)) } } class DataRow(private [connectors] val data: Array[Any], private [connectors] val info: DataRowTypeInfo) extends Product with Serializable { require(data != null, "data must not be null") require(info != null, "info must not be null") require(data.length == info.getArity, "data must be of the correct arity") def apply[T](i: Int): T = data(i).asInstanceOf[T] def apply[T](fieldExpression: String): T = apply(info.getFieldIndex(fieldExpression)) override def productElement(n: Int): Any = apply[AnyRef](n) override def productArity = info.getArity override def canEqual(that: Any) = that.isInstanceOf[DataRow] override def equals(that: Any) = canEqual(that) && data.sameElements(that.asInstanceOf[DataRow].data) && info.getFieldNames.sameElements(that.asInstanceOf[DataRow].info.getFieldNames) override def hashCode = { var result = 1 for (element <- data) result = 31 * result + (if (element == null) 0 else element.hashCode) result } override def toString = info.getFieldNames .zip(data.map(v => if (v == null) "null" else v.toString)) .map{case (name, value) => s"$name=$value"} .mkString("DataRow(", ", ", ")") } object DataRow { def apply(data: Value*): DataRow = { require(data != null, "data cannot be null") require(!data.contains(null), "data value cannot be null") new DataRow( data.map(_.v).toArray, new DataRowTypeInfo( data.map(_.name), data.map(_.typeInfo) ) ) } }
Example 32
Source File: RecordTransformer.scala From flink-elasticsearch-source-connector with Apache License 2.0 | 5 votes |
package com.mnubo.flink.streaming.connectors import org.apache.flink.api.common.operators.Keys.ExpressionKeys._ import org.apache.flink.api.common.typeinfo.TypeInformation import scala.annotation.tailrec import scala.language.existentials import scala.reflect.ClassTag sealed trait FieldSpecification extends Serializable case class ExistingField(name: String) extends FieldSpecification case class NewField(name: String, typeInfo: TypeInformation[_]) extends FieldSpecification trait RecordTransformer extends Serializable { val classTag = ClassTag[DataRow](classOf[DataRow]) def typeInfo : DataRowTypeInfo def transform(dataRow: DataRow, values:Any*) : DataRow } class FieldMapperRecordTransformer private[connectors](srcTypeInfo:DataRowTypeInfo, fieldSpecifications: FieldSpecification*) extends RecordTransformer { require(srcTypeInfo != null, s"srcTypeInfo must not be null") require(fieldSpecifications != null, s"fieldSpecifications must not be null") require(fieldSpecifications.nonEmpty, s"fieldSpecifications must not be empty") require(!fieldSpecifications.contains(null), s"fieldSpecifications must not contain any nulls") override val typeInfo = { val (fieldNames, elementTypes) = fieldSpecifications.flatMap { case ExistingField(name) if name == SELECT_ALL_CHAR || name == SELECT_ALL_CHAR_SCALA => srcTypeInfo.getFieldNames.zip(srcTypeInfo.getElementTypes) case ExistingField(name) => Seq(name -> srcTypeInfo.getFieldType(name)) case NewField(name, newFieldTypeInfo) => Seq(name -> newFieldTypeInfo) }.unzip require(fieldNames.length == fieldNames.distinct.length, s"Fields can't have duplicates. Fields were $fieldNames.") new DataRowTypeInfo(fieldNames, elementTypes) } private def newFieldsNames = fieldSpecifications.collect{ case newValue: NewField => newValue.name } override def transform(dataRow: DataRow, values:Any*) : DataRow = { require(dataRow != null, s"dataRow must not be null") require(values != null, s"values must not be null") require(newFieldsNames.length == values.length, s"Must specify values for all new fields and only new fields. New fields are '$newFieldsNames'") val resultValues = new Array[Any](typeInfo.getArity) @tailrec def transform(index:Int, remainingSpecs: Seq[FieldSpecification], remainingValues:Seq[Any]) : DataRow = { if(remainingSpecs.isEmpty) { new DataRow(resultValues, typeInfo) } else { val currentSpec = remainingSpecs.head currentSpec match { case ExistingField(name) if name == SELECT_ALL_CHAR || name == SELECT_ALL_CHAR_SCALA => Array.copy(dataRow.data, 0, resultValues, index, dataRow.data.length) transform(index + dataRow.data.length, remainingSpecs.tail, remainingValues) case ExistingField(name) => resultValues(index) = dataRow(name) transform(index + 1, remainingSpecs.tail, remainingValues) case NewField(name, _) => resultValues(index) = remainingValues.head transform(index + 1, remainingSpecs.tail, remainingValues.tail) } } } transform(0, fieldSpecifications, values) } } object RecordTransformer { def mapFields(srcTypeInfo: DataRowTypeInfo, fieldSpecifications: FieldSpecification*) : RecordTransformer = { new FieldMapperRecordTransformer(srcTypeInfo, fieldSpecifications:_*) } }
Example 33
Source File: DAGSchedulerEvent.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.util.Properties import scala.language.existentials import org.apache.spark._ import org.apache.spark.rdd.RDD import org.apache.spark.util.{AccumulatorV2, CallSite} private[scheduler] case class MapStageSubmitted( jobId: Int, dependency: ShuffleDependency[_, _, _], callSite: CallSite, listener: JobListener, properties: Properties = null) extends DAGSchedulerEvent private[scheduler] case class StageCancelled( stageId: Int, reason: Option[String]) extends DAGSchedulerEvent private[scheduler] case class JobCancelled( jobId: Int, reason: Option[String]) extends DAGSchedulerEvent private[scheduler] case class JobGroupCancelled(groupId: String) extends DAGSchedulerEvent private[scheduler] case object AllJobsCancelled extends DAGSchedulerEvent private[scheduler] case class BeginEvent(task: Task[_], taskInfo: TaskInfo) extends DAGSchedulerEvent private[scheduler] case class GettingResultEvent(taskInfo: TaskInfo) extends DAGSchedulerEvent private[scheduler] case class CompletionEvent( task: Task[_], reason: TaskEndReason, result: Any, accumUpdates: Seq[AccumulatorV2[_, _]], taskInfo: TaskInfo) extends DAGSchedulerEvent private[scheduler] case class ExecutorAdded(execId: String, host: String) extends DAGSchedulerEvent private[scheduler] case class ExecutorLost(execId: String, reason: ExecutorLossReason) extends DAGSchedulerEvent private[scheduler] case class WorkerRemoved(workerId: String, host: String, message: String) extends DAGSchedulerEvent private[scheduler] case class TaskSetFailed(taskSet: TaskSet, reason: String, exception: Option[Throwable]) extends DAGSchedulerEvent private[scheduler] case object ResubmitFailedStages extends DAGSchedulerEvent private[scheduler] case class SpeculativeTaskSubmitted(task: Task[_]) extends DAGSchedulerEvent
Example 34
Source File: NettyBlockRpcServer.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.network.netty import java.nio.ByteBuffer import scala.collection.JavaConverters._ import scala.language.existentials import scala.reflect.ClassTag import org.apache.spark.internal.Logging import org.apache.spark.network.BlockDataManager import org.apache.spark.network.buffer.NioManagedBuffer import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager} import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock} import org.apache.spark.serializer.Serializer import org.apache.spark.storage.{BlockId, StorageLevel} class NettyBlockRpcServer( appId: String, serializer: Serializer, blockManager: BlockDataManager) extends RpcHandler with Logging { private val streamManager = new OneForOneStreamManager() override def receive( client: TransportClient, rpcMessage: ByteBuffer, responseContext: RpcResponseCallback): Unit = { val message = BlockTransferMessage.Decoder.fromByteBuffer(rpcMessage) logTrace(s"Received request: $message") message match { case openBlocks: OpenBlocks => val blocksNum = openBlocks.blockIds.length val blocks = for (i <- (0 until blocksNum).view) yield blockManager.getBlockData(BlockId.apply(openBlocks.blockIds(i))) val streamId = streamManager.registerStream(appId, blocks.iterator.asJava) logTrace(s"Registered streamId $streamId with $blocksNum buffers") responseContext.onSuccess(new StreamHandle(streamId, blocksNum).toByteBuffer) case uploadBlock: UploadBlock => // StorageLevel and ClassTag are serialized as bytes using our JavaSerializer. val (level: StorageLevel, classTag: ClassTag[_]) = { serializer .newInstance() .deserialize(ByteBuffer.wrap(uploadBlock.metadata)) .asInstanceOf[(StorageLevel, ClassTag[_])] } val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData)) val blockId = BlockId(uploadBlock.blockId) blockManager.putBlockData(blockId, data, level, classTag) responseContext.onSuccess(ByteBuffer.allocate(0)) } } override def getStreamManager(): StreamManager = streamManager }
Example 35
Source File: InsertIntoHiveDirCommand.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import scala.language.existentials import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.hive.common.FileUtils import org.apache.hadoop.hive.ql.plan.TableDesc import org.apache.hadoop.hive.serde.serdeConstants import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe import org.apache.hadoop.mapred._ import org.apache.spark.SparkException import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable} import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.hive.client.HiveClientImpl case class InsertIntoHiveDirCommand( isLocal: Boolean, storage: CatalogStorageFormat, query: LogicalPlan, overwrite: Boolean, outputColumns: Seq[Attribute]) extends SaveAsHiveFile { override def run(sparkSession: SparkSession, child: SparkPlan): Seq[Row] = { assert(storage.locationUri.nonEmpty) val hiveTable = HiveClientImpl.toHiveTable(CatalogTable( identifier = TableIdentifier(storage.locationUri.get.toString, Some("default")), tableType = org.apache.spark.sql.catalyst.catalog.CatalogTableType.VIEW, storage = storage, schema = query.schema )) hiveTable.getMetadata.put(serdeConstants.SERIALIZATION_LIB, storage.serde.getOrElse(classOf[LazySimpleSerDe].getName)) val tableDesc = new TableDesc( hiveTable.getInputFormatClass, hiveTable.getOutputFormatClass, hiveTable.getMetadata ) val hadoopConf = sparkSession.sessionState.newHadoopConf() val jobConf = new JobConf(hadoopConf) val targetPath = new Path(storage.locationUri.get) val writeToPath = if (isLocal) { val localFileSystem = FileSystem.getLocal(jobConf) localFileSystem.makeQualified(targetPath) } else { val qualifiedPath = FileUtils.makeQualified(targetPath, hadoopConf) val dfs = qualifiedPath.getFileSystem(jobConf) if (!dfs.exists(qualifiedPath)) { dfs.mkdirs(qualifiedPath.getParent) } qualifiedPath } val tmpPath = getExternalTmpPath(sparkSession, hadoopConf, writeToPath) val fileSinkConf = new org.apache.spark.sql.hive.HiveShim.ShimFileSinkDesc( tmpPath.toString, tableDesc, false) try { saveAsHiveFile( sparkSession = sparkSession, plan = child, hadoopConf = hadoopConf, fileSinkConf = fileSinkConf, outputLocation = tmpPath.toString, allColumns = outputColumns) val fs = writeToPath.getFileSystem(hadoopConf) if (overwrite && fs.exists(writeToPath)) { fs.listStatus(writeToPath).foreach { existFile => if (Option(existFile.getPath) != createdTempDir) fs.delete(existFile.getPath, true) } } fs.listStatus(tmpPath).foreach { tmpFile => fs.rename(tmpFile.getPath, writeToPath) } } catch { case e: Throwable => throw new SparkException( "Failed inserting overwrite directory " + storage.locationUri.get, e) } finally { deleteExternalTmpPath(hadoopConf) } Seq.empty[Row] } }
Example 36
Source File: FPTreeSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.fpm import scala.language.existentials import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext class FPTreeSuite extends SparkFunSuite with MLlibTestSparkContext { test("add transaction") { val tree = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("b")) assert(tree.root.children.size == 2) assert(tree.root.children.contains("a")) assert(tree.root.children("a").item.equals("a")) assert(tree.root.children("a").count == 2) assert(tree.root.children.contains("b")) assert(tree.root.children("b").item.equals("b")) assert(tree.root.children("b").count == 1) var child = tree.root.children("a") assert(child.children.size == 1) assert(child.children.contains("b")) assert(child.children("b").item.equals("b")) assert(child.children("b").count == 2) child = child.children("b") assert(child.children.size == 2) assert(child.children.contains("c")) assert(child.children.contains("y")) assert(child.children("c").item.equals("c")) assert(child.children("y").item.equals("y")) assert(child.children("c").count == 1) assert(child.children("y").count == 1) } test("merge tree") { val tree1 = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("b")) val tree2 = new FPTree[String] .add(Seq("a", "b")) .add(Seq("a", "b", "c")) .add(Seq("a", "b", "c", "d")) .add(Seq("a", "x")) .add(Seq("a", "x", "y")) .add(Seq("c", "n")) .add(Seq("c", "m")) val tree3 = tree1.merge(tree2) assert(tree3.root.children.size == 3) assert(tree3.root.children("a").count == 7) assert(tree3.root.children("b").count == 1) assert(tree3.root.children("c").count == 2) val child1 = tree3.root.children("a") assert(child1.children.size == 2) assert(child1.children("b").count == 5) assert(child1.children("x").count == 2) val child2 = child1.children("b") assert(child2.children.size == 2) assert(child2.children("y").count == 1) assert(child2.children("c").count == 3) val child3 = child2.children("c") assert(child3.children.size == 1) assert(child3.children("d").count == 1) val child4 = child1.children("x") assert(child4.children.size == 1) assert(child4.children("y").count == 1) val child5 = tree3.root.children("c") assert(child5.children.size == 2) assert(child5.children("n").count == 1) assert(child5.children("m").count == 1) } test("extract freq itemsets") { val tree = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("a", "b")) .add(Seq("a")) .add(Seq("b")) .add(Seq("b", "n")) val freqItemsets = tree.extract(3L).map { case (items, count) => (items.toSet, count) }.toSet val expected = Set( (Set("a"), 4L), (Set("b"), 5L), (Set("a", "b"), 3L)) assert(freqItemsets === expected) } }
Example 37
Source File: HadoopUtils.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.image import scala.language.existentials import scala.util.Random import org.apache.commons.io.FilenameUtils import org.apache.hadoop.conf.{Configuration, Configured} import org.apache.hadoop.fs.{Path, PathFilter} import org.apache.hadoop.mapreduce.lib.input.FileInputFormat import org.apache.spark.sql.SparkSession private object RecursiveFlag { def withPathFilter[T]( sampleRatio: Double, spark: SparkSession, seed: Long)(f: => T): T = { val sampleImages = sampleRatio < 1 if (sampleImages) { val flagName = FileInputFormat.PATHFILTER_CLASS val hadoopConf = spark.sparkContext.hadoopConfiguration val old = Option(hadoopConf.getClass(flagName, null)) hadoopConf.setDouble(SamplePathFilter.ratioParam, sampleRatio) hadoopConf.setLong(SamplePathFilter.seedParam, seed) hadoopConf.setClass(flagName, classOf[SamplePathFilter], classOf[PathFilter]) try f finally { hadoopConf.unset(SamplePathFilter.ratioParam) hadoopConf.unset(SamplePathFilter.seedParam) old match { case Some(v) => hadoopConf.setClass(flagName, v, classOf[PathFilter]) case None => hadoopConf.unset(flagName) } } } else { f } } }
Example 38
Source File: IStep.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.step import org.apache.s2graph.core._ import rx.lang.scala.Observable import scala.language.higherKinds import scala.language.existentials trait RxStep[-A, +B] extends (A => Observable[B]) object RxStep { case class VertexFetchStep(g: S2GraphLike) extends RxStep[Seq[S2VertexLike], S2VertexLike] { override def apply(vertices: Seq[S2VertexLike]): Observable[S2VertexLike] = { Observable.from(vertices) } } case class EdgeFetchStep(g: S2GraphLike, qp: QueryParam) extends RxStep[S2VertexLike, S2EdgeLike] { override def apply(v: S2VertexLike): Observable[S2EdgeLike] = { implicit val ec = g.ec val step = org.apache.s2graph.core.Step(Seq(qp)) val q = Query(Seq(v), steps = Vector(step)) val f = g.getEdges(q).map { stepResult => val edges = stepResult.edgeWithScores.map(_.edge) Observable.from(edges) } Observable.from(f).flatten } } private def merge[A, B](steps: RxStep[A, B]*): RxStep[A, B] = new RxStep[A, B] { override def apply(in: A): Observable[B] = steps.map(_.apply(in)).toObservable.flatten } def toObservable(q: Query)(implicit graph: S2GraphLike): Observable[S2EdgeLike] = { val v1: Observable[S2VertexLike] = VertexFetchStep(graph).apply(q.vertices) val serialSteps = q.steps.map { step => val parallelSteps = step.queryParams.map(qp => EdgeFetchStep(graph, qp)) merge(parallelSteps: _*) } v1.flatMap { v => val initOpt = serialSteps.headOption.map(_.apply(v)) initOpt.map { init => serialSteps.tail.foldLeft(init) { case (prev, next) => prev.map(_.tgtForVertex).flatMap(next) } }.getOrElse(Observable.empty) } } }
Example 39
Source File: ArrayBasedMapData.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.util import java.util.{Map => JavaMap} class ArrayBasedMapData(val keyArray: ArrayData, val valueArray: ArrayData) extends MapData { require(keyArray.numElements() == valueArray.numElements()) override def numElements(): Int = keyArray.numElements() override def copy(): MapData = new ArrayBasedMapData(keyArray.copy(), valueArray.copy()) override def toString: String = { s"keys: $keyArray, values: $valueArray" } } object ArrayBasedMapData { def apply( iterator: Iterator[(_, _)], size: Int, keyConverter: (Any) => Any, valueConverter: (Any) => Any): ArrayBasedMapData = { val keys: Array[Any] = new Array[Any](size) val values: Array[Any] = new Array[Any](size) var i = 0 for ((key, value) <- iterator) { keys(i) = keyConverter(key) values(i) = valueConverter(value) i += 1 } ArrayBasedMapData(keys, values) } def apply(keys: Array[_], values: Array[_]): ArrayBasedMapData = { new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values)) } def toScalaMap(map: ArrayBasedMapData): Map[Any, Any] = { val keys = map.keyArray.asInstanceOf[GenericArrayData].array val values = map.valueArray.asInstanceOf[GenericArrayData].array keys.zip(values).toMap } def toScalaMap(keys: Array[Any], values: Array[Any]): Map[Any, Any] = { keys.zip(values).toMap } def toScalaMap(keys: Seq[Any], values: Seq[Any]): Map[Any, Any] = { keys.zip(values).toMap } def toJavaMap(keys: Array[Any], values: Array[Any]): java.util.Map[Any, Any] = { import scala.collection.JavaConverters._ keys.zip(values).toMap.asJava } }
Example 40
Source File: VLinearRegressionSuite.scala From spark-vlbfgs with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.regression import scala.language.existentials import org.apache.spark.SparkFunSuite import org.apache.spark.ml.feature.Instance import org.apache.spark.ml.linalg.{Vector, Vectors} import org.apache.spark.ml.util.TestingUtils._ import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.sql.DataFrame class VLinearRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { import testImplicits._ var datasetWithWeight: DataFrame = _ override def beforeAll(): Unit = { super.beforeAll() datasetWithWeight = sc.parallelize(Seq( Instance(17.0, 1.0, Vectors.dense(0.0, 5.0).toSparse), Instance(19.0, 2.0, Vectors.dense(1.0, 7.0)), Instance(23.0, 3.0, Vectors.dense(2.0, 11.0)), Instance(29.0, 4.0, Vectors.dense(3.0, 13.0)) ), 2).toDF() } test("test on datasetWithWeight") { def b2s(b: Boolean): String = { if (b) "w/" else "w/o" } for (fitIntercept <- Seq(false, true)) { for (standardization <- Seq(false, true)) { for ((reg, elasticNet)<- Seq((0.0, 0.0), (2.3, 0.0), (2.3, 0.5))) { println() println(s"# test ${b2s(fitIntercept)} intercept, ${b2s(standardization)} standardization, reg=${reg}, elasticNet=${elasticNet}") val vtrainer = new VLinearRegression() .setColsPerBlock(1) .setRowsPerBlock(1) .setGeneratingFeatureMatrixBuffer(2) .setFitIntercept(fitIntercept) .setStandardization(standardization) .setRegParam(reg) .setWeightCol("weight") .setElasticNetParam(elasticNet) val vmodel = vtrainer.fit(datasetWithWeight) // Note that in ml.LinearRegression, when datasets numInstanse is small // solver l-bfgs and solver normal will generate slightly different result when reg not zero // because there std calculation result have multiple difference numInstance/(numInstance - 1) // here test keep consistent with l-bfgs solver val trainer = new LinearRegression() .setSolver("l-bfgs") // by default it may use noraml solver so here force set it. .setFitIntercept(fitIntercept) .setStandardization(standardization) .setRegParam(reg) .setWeightCol("weight") .setElasticNetParam(elasticNet) val model = trainer.fit(datasetWithWeight) logInfo(s"LinearRegression total iterations: ${model.summary.totalIterations}") println(s"VLinearRegression coefficients: ${vmodel.coefficients.toDense}, intercept: ${vmodel.intercept}\n" + s"LinearRegression coefficients: ${model.coefficients.toDense}, intercept: ${model.intercept}") def filterSmallValue(v: Vector) = { Vectors.dense(v.toArray.map(x => if (math.abs(x) < 1e-6) 0.0 else x)) } assert(filterSmallValue(vmodel.coefficients) ~== filterSmallValue(model.coefficients) relTol 1e-3) assert(vmodel.intercept ~== model.intercept relTol 1e-3) } } } } }
Example 41
Source File: VSoftmaxRegressionSuite.scala From spark-vlbfgs with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.classification import org.apache.spark.SparkFunSuite import org.apache.spark.ml.feature.Instance import org.apache.spark.ml.linalg.{SparseMatrix, Vector, Vectors} import org.apache.spark.ml.util.TestingUtils._ import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.sql.functions._ import org.apache.spark.sql.{DataFrame, Dataset} import scala.language.existentials class VSoftmaxRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { import testImplicits._ private val seed = 42 @transient var multinomialDataset: Dataset[_] = _ private val eps: Double = 1e-5 override def beforeAll(): Unit = { super.beforeAll() multinomialDataset = { val nPoints = 50 val coefficients = Array( -0.57997, 0.912083, -0.371077, -0.819866, 2.688191, -0.16624, -0.84355, -0.048509, -0.301789, 4.170682) val xMean = Array(5.843, 3.057, 3.758, 1.199) val xVariance = Array(0.6856, 0.1899, 3.116, 0.581) val testData = LogisticRegressionSuite.generateMultinomialLogisticInput( coefficients, xMean, xVariance, addIntercept = true, nPoints, seed) val df = sc.parallelize(testData, 4).toDF().withColumn("weight", rand(seed)) df.cache() println("softmax test data:") df.show(10, false) df } } test("test on multinomialDataset") { def b2s(b: Boolean): String = { if (b) "w/" else "w/o" } for (standardization <- Seq(false, true)) { for ((reg, elasticNet) <- Seq((0.0, 0.0), (2.3, 0.0), (0.3, 0.05), (0.01, 1.0))) { println() println(s"# test ${b2s(standardization)} standardization, reg=${reg}, elasticNet=${elasticNet}") val trainer = new LogisticRegression() .setFamily("multinomial") .setStandardization(standardization) .setWeightCol("weight") .setRegParam(reg) .setFitIntercept(false) .setElasticNetParam(elasticNet) val model = trainer.fit(multinomialDataset) val vtrainer = new VSoftmaxRegression() .setColsPerBlock(2) .setRowsPerBlock(5) .setColPartitions(2) .setRowPartitions(3) .setWeightCol("weight") .setGeneratingFeatureMatrixBuffer(2) .setStandardization(standardization) .setRegParam(reg) .setElasticNetParam(elasticNet) val vmodel = vtrainer.fit(multinomialDataset) println(s"VSoftmaxRegression coefficientMatrix:\n" + s"${vmodel.coefficientMatrix.asInstanceOf[SparseMatrix].toDense},\n" + s"ml.SoftmaxRegression coefficientMatrix:\n" + s"${model.coefficientMatrix}\n") assert(vmodel.coefficientMatrix ~== model.coefficientMatrix relTol eps) } } } }
Example 42
Source File: SortedMapDeserializerModule.scala From mango with Apache License 2.0 | 5 votes |
package com.kakao.shaded.jackson.module.scala.deser import java.util.AbstractMap import java.util.Map.Entry import scala.collection.{mutable, SortedMap} import scala.collection.immutable.TreeMap import com.kakao.shaded.jackson.core.JsonParser import com.kakao.shaded.jackson.databind._ import com.kakao.shaded.jackson.databind.deser.std.{MapDeserializer, ContainerDeserializerBase} import com.kakao.shaded.jackson.databind.jsontype.TypeDeserializer import com.kakao.shaded.jackson.databind.`type`.MapLikeType import com.kakao.shaded.jackson.module.scala.modifiers.MapTypeModifierModule import deser.{ContextualDeserializer, Deserializers, ValueInstantiator} import com.kakao.shaded.jackson.module.scala.introspect.OrderingLocator import scala.language.existentials private class SortedMapBuilderWrapper[K,V](val builder: mutable.Builder[(K,V), SortedMap[K,V]]) extends AbstractMap[K,V] { override def put(k: K, v: V) = { builder += ((k,v)); v } // Isn't used by the deserializer def entrySet(): java.util.Set[Entry[K, V]] = throw new UnsupportedOperationException } private object SortedMapDeserializer { def orderingFor = OrderingLocator.locate _ def builderFor(cls: Class[_], keyCls: JavaType): mutable.Builder[(AnyRef,AnyRef), SortedMap[AnyRef,AnyRef]] = if (classOf[TreeMap[_,_]].isAssignableFrom(cls)) TreeMap.newBuilder[AnyRef,AnyRef](orderingFor(keyCls)) else SortedMap.newBuilder[AnyRef,AnyRef](orderingFor(keyCls)) } private class SortedMapDeserializer( collectionType: MapLikeType, config: DeserializationConfig, keyDeser: KeyDeserializer, valueDeser: JsonDeserializer[_], valueTypeDeser: TypeDeserializer) extends ContainerDeserializerBase[SortedMap[_,_]](collectionType) with ContextualDeserializer { private val javaContainerType = config.getTypeFactory.constructMapLikeType(classOf[MapBuilderWrapper[_,_]], collectionType.getKeyType, collectionType.getContentType) private val instantiator = new ValueInstantiator { def getValueTypeDesc = collectionType.getRawClass.getCanonicalName override def canCreateUsingDefault = true override def createUsingDefault(ctx: DeserializationContext) = new SortedMapBuilderWrapper[AnyRef,AnyRef](SortedMapDeserializer.builderFor(collectionType.getRawClass, collectionType.getKeyType)) } private val containerDeserializer = new MapDeserializer(javaContainerType,instantiator,keyDeser,valueDeser.asInstanceOf[JsonDeserializer[AnyRef]],valueTypeDeser) override def getContentType = containerDeserializer.getContentType override def getContentDeserializer = containerDeserializer.getContentDeserializer override def createContextual(ctxt: DeserializationContext, property: BeanProperty) = if (keyDeser != null && valueDeser != null) this else { val newKeyDeser = Option(keyDeser).getOrElse(ctxt.findKeyDeserializer(collectionType.getKeyType, property)) val newValDeser = Option(valueDeser).getOrElse(ctxt.findContextualValueDeserializer(collectionType.getContentType, property)) new SortedMapDeserializer(collectionType, config, newKeyDeser, newValDeser, valueTypeDeser) } override def deserialize(jp: JsonParser, ctxt: DeserializationContext): SortedMap[_,_] = { containerDeserializer.deserialize(jp,ctxt) match { case wrapper: SortedMapBuilderWrapper[_,_] => wrapper.builder.result() } } } private object SortedMapDeserializerResolver extends Deserializers.Base { private val SORTED_MAP = classOf[collection.SortedMap[_,_]] override def findMapLikeDeserializer(theType: MapLikeType, config: DeserializationConfig, beanDesc: BeanDescription, keyDeserializer: KeyDeserializer, elementTypeDeserializer: TypeDeserializer, elementDeserializer: JsonDeserializer[_]): JsonDeserializer[_] = if (!SORTED_MAP.isAssignableFrom(theType.getRawClass)) null else new SortedMapDeserializer(theType,config,keyDeserializer,elementDeserializer,elementTypeDeserializer) } trait SortedMapDeserializerModule extends MapTypeModifierModule { this += (_ addDeserializers SortedMapDeserializerResolver) }
Example 43
Source File: PropertyDescriptor.scala From mango with Apache License 2.0 | 5 votes |
package com.kakao.shaded.jackson.module.scala package introspect import util.Implicits._ import java.lang.reflect.{AccessibleObject, Constructor, Field, Method} import scala.language.existentials case class ConstructorParameter(constructor: Constructor[_], index: Int, defaultValueMethod: Option[Method]) case class PropertyDescriptor(name: String, param: Option[ConstructorParameter], field: Option[Field], getter: Option[Method], setter: Option[Method], beanGetter: Option[Method], beanSetter: Option[Method]) { if (List(field, getter).flatten.isEmpty) throw new IllegalArgumentException("One of field or getter must be defined.") def findAnnotation[A <: java.lang.annotation.Annotation](implicit mf: Manifest[A]): Option[A] = { val cls = mf.runtimeClass.asInstanceOf[Class[A]] lazy val paramAnnotation = (param flatMap { cp => val paramAnnos = cp.constructor.getParameterAnnotations paramAnnos(cp.index).find(cls.isInstance) }).asInstanceOf[Option[A]] val getAnno = (o: AccessibleObject) => o.getAnnotation(cls) lazy val fieldAnnotation = field optMap getAnno lazy val getterAnnotation = getter optMap getAnno lazy val beanGetterAnnotation = beanGetter optMap getAnno paramAnnotation orElse fieldAnnotation orElse getterAnnotation orElse beanGetterAnnotation } }
Example 44
Source File: ReplicationFilterSerializer.scala From eventuate with Apache License 2.0 | 5 votes |
package com.rbmhtechnology.eventuate.serializer import akka.actor.ExtendedActorSystem import akka.serialization._ import com.rbmhtechnology.eventuate.ReplicationFilter.AndFilter import com.rbmhtechnology.eventuate.ReplicationFilter.NoFilter import com.rbmhtechnology.eventuate.ReplicationFilter.OrFilter import com.rbmhtechnology.eventuate._ import com.rbmhtechnology.eventuate.serializer.ReplicationFilterFormats._ import scala.collection.JavaConverters._ import scala.language.existentials class ReplicationFilterSerializer(system: ExtendedActorSystem) extends Serializer { import ReplicationFilterTreeFormat.NodeType._ val payloadSerializer = new DelegatingPayloadSerializer(system) val AndFilterClass = classOf[AndFilter] val OrFilterClass = classOf[OrFilter] val NoFilterClass = NoFilter.getClass override def identifier: Int = 22564 override def includeManifest: Boolean = true override def toBinary(o: AnyRef): Array[Byte] = o match { case NoFilter => NoFilterFormat.newBuilder().build().toByteArray case f: ReplicationFilter => filterTreeFormatBuilder(f).build().toByteArray case _ => throw new IllegalArgumentException(s"can't serialize object of type ${o.getClass}") } override def fromBinary(bytes: Array[Byte], manifest: Option[Class[_]]): AnyRef = manifest match { case None => throw new IllegalArgumentException("manifest required") case Some(clazz) => clazz match { case NoFilterClass => NoFilter case AndFilterClass | OrFilterClass => filterTree(ReplicationFilterTreeFormat.parseFrom(bytes)) case _ => throw new IllegalArgumentException(s"can't deserialize object of type ${clazz}") } } // -------------------------------------------------------------------------------- // toBinary helpers // -------------------------------------------------------------------------------- def filterTreeFormatBuilder(filterTree: ReplicationFilter): ReplicationFilterTreeFormat.Builder = { val builder = ReplicationFilterTreeFormat.newBuilder() filterTree match { case AndFilter(filters) => builder.setNodeType(AND) filters.foreach(filter => builder.addChildren(filterTreeFormatBuilder(filter))) case OrFilter(filters) => builder.setNodeType(OR) filters.foreach(filter => builder.addChildren(filterTreeFormatBuilder(filter))) case filter => builder.setNodeType(LEAF) builder.setFilter(payloadSerializer.payloadFormatBuilder(filter)) } builder } // -------------------------------------------------------------------------------- // fromBinary helpers // -------------------------------------------------------------------------------- def filterTree(filterTreeFormat: ReplicationFilterTreeFormat): ReplicationFilter = { filterTreeFormat.getNodeType match { case AND => AndFilter(filterTreeFormat.getChildrenList.asScala.map(filterTree).toList) case OR => OrFilter(filterTreeFormat.getChildrenList.asScala.map(filterTree).toList) case LEAF => payloadSerializer.payload(filterTreeFormat.getFilter).asInstanceOf[ReplicationFilter] } } }
Example 45
Source File: BytecodeUtils.scala From graphx-algorithm with GNU General Public License v2.0 | 5 votes |
package org.apache.spark.graphx.util import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import scala.collection.mutable.HashSet import scala.language.existentials import org.apache.spark.util.Utils import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.{ClassReader, ClassVisitor, MethodVisitor} import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.Opcodes._ private class MethodInvocationFinder(className: String, methodName: String) extends ClassVisitor(ASM4) { val methodsInvoked = new HashSet[(Class[_], String)] override def visitMethod(access: Int, name: String, desc: String, sig: String, exceptions: Array[String]): MethodVisitor = { if (name == methodName) { new MethodVisitor(ASM4) { override def visitMethodInsn(op: Int, owner: String, name: String, desc: String) { if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) { if (!skipClass(owner)) { methodsInvoked.add((Class.forName(owner.replace("/", ".")), name)) } } } } } else { null } } } }
Example 46
Source File: LoggingState.scala From logging with Apache License 2.0 | 5 votes |
package com.persist.logging import akka.actor._ import LogActor.{AkkaMessage, LogActorMessage} import scala.language.existentials import scala.concurrent.Promise import scala.collection.mutable import TimeActorMessages._ private[logging] object LoggingState extends ClassLogging { // Queue of messages sent before logger is started private[logging] val msgs = new mutable.Queue[LogActorMessage]() @volatile var doTrace:Boolean = false @volatile var doDebug: Boolean = false @volatile var doInfo: Boolean = true @volatile var doWarn: Boolean = true @volatile var doError: Boolean = true private[logging] var loggingSys: LoggingSystem = null private[logging] var logger: Option[ActorRef] = None @volatile private[logging] var loggerStopping = false private[logging] var doTime: Boolean = false private[logging] var timeActorOption: Option[ActorRef] = None // Use to sync akka logging actor shutdown private[logging] val akkaStopPromise = Promise[Unit] private[logging] def sendMsg(msg: LogActorMessage) { if (loggerStopping) { println(s"*** Log message received after logger shutdown: $msg") } else { logger match { case Some(a) => a ! msg case None => msgs.synchronized { msgs.enqueue(msg) } } } } private[logging] def akkaMsg(m: AkkaMessage) { if (m.msg == "DIE") { akkaStopPromise.trySuccess(()) } else { sendMsg(m) } } private[logging] def timeStart(id: RequestId, name: String, uid: String) { timeActorOption foreach { case timeActor => val time = System.nanoTime() / 1000 timeActor ! TimeStart(id, name, uid, time) } } private[logging] def timeEnd(id: RequestId, name: String, uid: String) { timeActorOption foreach { case timeActor => val time = System.nanoTime() / 1000 timeActor ! TimeEnd(id, name, uid, time) } } }
Example 47
Source File: Query.scala From finagle-postgres with Apache License 2.0 | 5 votes |
package com.twitter.finagle.postgres.generic import com.twitter.concurrent.AsyncStream import scala.collection.immutable.Queue import com.twitter.finagle.postgres.{Param, PostgresClient, Row} import com.twitter.util.Future import scala.language.existentials case class Query[T](parts: Seq[String], queryParams: Seq[QueryParam], cont: Row => T) { def stream(client: PostgresClient): AsyncStream[T] = { val (queryString, params) = impl client.prepareAndQueryToStream[T](queryString, params: _*)(cont) } def run(client: PostgresClient): Future[Seq[T]] = stream(client).toSeq def exec(client: PostgresClient): Future[Int] = { val (queryString, params) = impl client.prepareAndExecute(queryString, params: _*) } def map[U](fn: T => U): Query[U] = copy(cont = cont andThen fn) def as[U](implicit rowDecoder: RowDecoder[U], columnNamer: ColumnNamer): Query[U] = { copy(cont = row => rowDecoder(row)(columnNamer)) } private def impl: (String, Seq[Param[_]]) = { val (last, placeholders, params) = queryParams.foldLeft((1, Queue.empty[Seq[String]], Queue.empty[Param[_]])) { case ((start, placeholders, params), next) => val nextPlaceholders = next.placeholders(start) val nextParams = Queue(next.params: _*) (start + nextParams.length, placeholders enqueue nextPlaceholders, params ++ nextParams) } val queryString = parts.zipAll(placeholders, "", Seq.empty).flatMap { case (part, ph) => Seq(part, ph.mkString(", ")) }.mkString (queryString, params) } } object Query { implicit class RowQueryOps(val self: Query[Row]) extends AnyVal { def ++(that: Query[Row]): Query[Row] = Query[Row]( parts = if(self.parts.length > self.queryParams.length) (self.parts.dropRight(1) :+ (self.parts.lastOption.getOrElse("") + that.parts.headOption.getOrElse(""))) ++ that.parts.drop(1) else self.parts ++ that.parts, queryParams = self.queryParams ++ that.queryParams, cont = self.cont ) def ++(that: String): Query[Row] = Query[Row]( parts = if(self.parts.length > self.queryParams.length) self.parts.dropRight(1) :+ (self.parts.last + that) else self.parts :+ that, queryParams = self.queryParams, cont = self.cont ) } }
Example 48
Source File: InferShape.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.nn.abstractnn import com.intel.analytics.bigdl.nn.keras.{Input => KInput, Sequential => KSequential} import com.intel.analytics.bigdl.nn.{Input => TInput} import com.intel.analytics.bigdl.utils.Shape import scala.language.existentials import scala.reflect.ClassTag class InvalidLayer(msg: String) extends RuntimeException(msg) trait InferShape { private[bigdl] var _inputShapeValue: Shape = null private[bigdl] var _outputShapeValue: Shape = null private[bigdl] def inputShapeValue: Shape = _inputShapeValue private[bigdl] def outputShapeValue: Shape = _outputShapeValue // scalastyle:off private[bigdl] def inputShapeValue_=(value: Shape): Unit = { _inputShapeValue = value } private[bigdl] def outputShapeValue_=(value: Shape): Unit = { _outputShapeValue = value } // scalastyle:on private[bigdl] def computeOutputShape(inputShape: Shape): Shape = { throw new RuntimeException("Haven't been implemented yet. Do not use it with Keras Layer") } private[bigdl] def excludeInvalidLayers[T: ClassTag] (modules : Seq[AbstractModule[_, _, T]]): Unit = { val invalidNodes = if (this.isKerasStyle()) { modules.filter{!_.isKerasStyle()} } else { modules.filter{_.isKerasStyle()} } if (invalidNodes.length > 0) { throw new InvalidLayer(s"""Do not mix ${this}(isKerasStyle=${isKerasStyle()}) with Layer (isKerasStyle=${invalidNodes(0).isKerasStyle()}): ${invalidNodes.mkString(",")}""") } } private[bigdl] def validateInput[T: ClassTag](modules : Seq[AbstractModule[_, _, T]]): Unit = { if (this.isKerasStyle()) { require(modules != null && !modules.isEmpty, "Empty input is not allowed") } excludeInvalidLayers(modules) } }
Example 49
Source File: PythonBigDLValidator.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.python.api import java.lang.{Boolean => JBoolean} import java.util.{ArrayList => JArrayList, HashMap => JHashMap, List => JList, Map => JMap} import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.utils.Table import scala.collection.JavaConverters._ import scala.collection.mutable.Map import scala.language.existentials import scala.reflect.ClassTag object PythonBigDLValidator { def ofFloat(): PythonBigDLValidator[Float] = new PythonBigDLValidator[Float]() def ofDouble(): PythonBigDLValidator[Double] = new PythonBigDLValidator[Double]() } class PythonBigDLValidator[T: ClassTag](implicit ev: TensorNumeric[T]) extends PythonBigDL[T]{ def testDict(): JMap[String, String] = { return Map("jack" -> "40", "lucy" -> "50").asJava } def testDictJTensor(): JMap[String, JTensor] = { return Map("jack" -> JTensor(Array(1.0f, 2.0f, 3.0f, 4.0f), Array(4, 1), "float")).asJava } def testDictJMapJTensor(): JMap[String, JMap[String, JTensor]] = { val table = new Table() val tensor = JTensor(Array(1.0f, 2.0f, 3.0f, 4.0f), Array(4, 1), "float") val result = Map("jack" -> tensor).asJava table.insert(tensor) return Map("nested" -> result).asJava } def testActivityWithTensor(): JActivity = { val tensor = Tensor(Array(1.0f, 2.0f, 3.0f, 4.0f), Array(4, 1)) return JActivity(tensor) } def testActivityWithTableOfTensor(): JActivity = { val tensor1 = Tensor(Array(1.0f, 1.0f), Array(2)) val tensor2 = Tensor(Array(2.0f, 2.0f), Array(2)) val tensor3 = Tensor(Array(3.0f, 3.0f), Array(2)) val table = new Table() table.insert(tensor1) table.insert(tensor2) table.insert(tensor3) return JActivity(table) } def testActivityWithTableOfTable(): JActivity = { val tensor = Tensor(Array(1.0f, 2.0f, 3.0f, 4.0f), Array(4, 1)) val table = new Table() table.insert(tensor) val nestedTable = new Table() nestedTable.insert(table) nestedTable.insert(table) return JActivity(nestedTable) } }
Example 50
Source File: TreeSentiment.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.example.treeLSTMSentiment import com.intel.analytics.bigdl._ import com.intel.analytics.bigdl.nn._ import com.intel.analytics.bigdl.numeric.NumericFloat import com.intel.analytics.bigdl.tensor.Tensor import scala.language.existentials object TreeLSTMSentiment { def apply( word2VecTensor: Tensor[Float], hiddenSize: Int, classNum: Int, p: Double = 0.5 ): Module[Float] = { val vocabSize = word2VecTensor.size(1) val embeddingDim = word2VecTensor.size(2) val embedding = LookupTable(vocabSize, embeddingDim) embedding.weight.set(word2VecTensor) embedding.setScaleW(2) val treeLSTMModule = Sequential() .add(BinaryTreeLSTM( embeddingDim, hiddenSize, withGraph = true)) .add(TimeDistributed(Dropout(p))) .add(TimeDistributed(Linear(hiddenSize, classNum))) .add(TimeDistributed(LogSoftMax())) Sequential() .add(MapTable(Squeeze(3))) .add(ParallelTable() .add(embedding) .add(Identity())) .add(treeLSTMModule) } }
Example 51
Source File: TextClassifier.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.example.textclassification import com.intel.analytics.bigdl.example.utils._ import com.intel.analytics.bigdl.nn.{ClassNLLCriterion, _} import com.intel.analytics.bigdl.utils.{Engine, LoggerFilter, T} import org.apache.log4j.{Level => Levle4j, Logger => Logger4j} import org.slf4j.{Logger, LoggerFactory} import scopt.OptionParser import scala.collection.mutable.{ArrayBuffer, Map => MMap} import scala.language.existentials object TextClassifier { val log: Logger = LoggerFactory.getLogger(this.getClass) LoggerFilter.redirectSparkInfoLogs() Logger4j.getLogger("com.intel.analytics.bigdl.optim").setLevel(Levle4j.INFO) def main(args: Array[String]): Unit = { val localParser = new OptionParser[TextClassificationParams]("BigDL Example") { opt[String]('b', "baseDir") .required() .text("Base dir containing the training and word2Vec data") .action((x, c) => c.copy(baseDir = x)) opt[String]('p', "partitionNum") .text("you may want to tune the partitionNum if run into spark mode") .action((x, c) => c.copy(partitionNum = x.toInt)) opt[String]('s', "maxSequenceLength") .text("maxSequenceLength") .action((x, c) => c.copy(maxSequenceLength = x.toInt)) opt[String]('w', "maxWordsNum") .text("maxWordsNum") .action((x, c) => c.copy(maxWordsNum = x.toInt)) opt[String]('l', "trainingSplit") .text("trainingSplit") .action((x, c) => c.copy(trainingSplit = x.toDouble)) opt[String]('z', "batchSize") .text("batchSize") .action((x, c) => c.copy(batchSize = x.toInt)) opt[Int]('l', "learningRate") .text("learningRate") .action((x, c) => c.copy(learningRate = x)) } localParser.parse(args, TextClassificationParams()).map { param => log.info(s"Current parameters: $param") val textClassification = new TextClassifier(param) textClassification.train() } } }
Example 52
Source File: ShuffleMapTask.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.lang.management.ManagementFactory import java.nio.ByteBuffer import java.util.Properties import scala.language.existentials import org.apache.spark._ import org.apache.spark.broadcast.Broadcast import org.apache.spark.executor.TaskMetrics import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD import org.apache.spark.shuffle.ShuffleWriter import org.apache.spark.storage.BlockManagerId def this(partitionId: Int) { this(0, 0, null, new Partition { override def index: Int = 0 }, null, new Properties, null) } @transient private val preferredLocs: Seq[TaskLocation] = { if (locs == null) Nil else locs.toSet.toSeq } var rdd: RDD[_] = null var dep: ShuffleDependency[_, _, _] = null override def prepTask(): Unit = { // Deserialize the RDD using the broadcast variable. val threadMXBean = ManagementFactory.getThreadMXBean val deserializeStartTime = System.currentTimeMillis() val deserializeStartCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) { threadMXBean.getCurrentThreadCpuTime } else 0L val ser = SparkEnv.get.closureSerializer.newInstance() val (_rdd, _dep) = ser.deserialize[(RDD[_], ShuffleDependency[_, _, _])]( ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader) rdd = _rdd dep = _dep _executorDeserializeTime = System.currentTimeMillis() - deserializeStartTime _executorDeserializeCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) { threadMXBean.getCurrentThreadCpuTime - deserializeStartCpuTime } else 0L } override def runTask(context: TaskContext): MapStatus = { if (dep == null || rdd == null) { prepTask() } var writer: ShuffleWriter[Any, Any] = null try { val manager = SparkEnv.get.shuffleManager writer = manager.getWriter[Any, Any](dep.shuffleHandle, partitionId, context) writer.write(rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]]) val status = writer.stop(success = true).get FutureTaskNotifier.taskCompleted(status, partitionId, dep.shuffleId, dep.partitioner.numPartitions, nextStageLocs, metrics.shuffleWriteMetrics, false) status } catch { case e: Exception => try { if (writer != null) { writer.stop(success = false) } } catch { case e: Exception => log.debug("Could not stop writer", e) } throw e } } override def preferredLocations: Seq[TaskLocation] = preferredLocs override def toString: String = "ShuffleMapTask(%d, %d)".format(stageId, partitionId) } object ShuffleMapTask { def apply( stageId: Int, stageAttemptId: Int, partition: Partition, properties: Properties, internalAccumulatorsSer: Array[Byte], isFutureTask: Boolean, rdd: RDD[_], dep: ShuffleDependency[_, _, _], nextStageLocs: Option[Seq[BlockManagerId]]): ShuffleMapTask = { val smt = new ShuffleMapTask(stageId, stageAttemptId, null, partition, null, properties, internalAccumulatorsSer, isFutureTask, nextStageLocs) smt.rdd = rdd smt.dep = dep smt } }
Example 53
Source File: NettyBlockRpcServer.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.network.netty import java.nio.ByteBuffer import scala.collection.JavaConverters._ import scala.language.existentials import scala.reflect.ClassTag import org.apache.spark.internal.Logging import org.apache.spark.network.BlockDataManager import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager} import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, MapOutputReady, OpenBlocks, StreamHandle, UploadBlock} import org.apache.spark.scheduler.MapStatus import org.apache.spark.serializer.Serializer import org.apache.spark.storage.{BlockId, StorageLevel} class NettyBlockRpcServer( appId: String, serializer: Serializer, blockManager: BlockDataManager) extends RpcHandler with Logging { private val streamManager = new OneForOneStreamManager() override def receive( client: TransportClient, rpcMessage: ByteBuffer, responseContext: RpcResponseCallback): Unit = { val message = BlockTransferMessage.Decoder.fromByteBuffer(rpcMessage) logTrace(s"Received request: $message") message match { case openBlocks: OpenBlocks => val blocks: Seq[ManagedBuffer] = openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData) val streamId = streamManager.registerStream(appId, blocks.iterator.asJava) logTrace(s"Registered streamId $streamId with ${blocks.size} buffers") responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteBuffer) case uploadBlock: UploadBlock => // StorageLevel and ClassTag are serialized as bytes using our JavaSerializer. val (level: StorageLevel, classTag: ClassTag[_]) = { serializer .newInstance() .deserialize(ByteBuffer.wrap(uploadBlock.metadata)) .asInstanceOf[(StorageLevel, ClassTag[_])] } val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData)) val blockId = BlockId(uploadBlock.blockId) blockManager.putBlockData(blockId, data, level, classTag) responseContext.onSuccess(ByteBuffer.allocate(0)) case mapOutputReady: MapOutputReady => val mapStatus: MapStatus = serializer.newInstance().deserialize(ByteBuffer.wrap(mapOutputReady.serializedMapStatus)) blockManager.mapOutputReady( mapOutputReady.shuffleId, mapOutputReady.mapId, mapOutputReady.numReduces, mapStatus) } } override def getStreamManager(): StreamManager = streamManager }
Example 54
Source File: ArrayBasedMapData.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.util import java.util.{Map => JavaMap} class ArrayBasedMapData(val keyArray: ArrayData, val valueArray: ArrayData) extends MapData { require(keyArray.numElements() == valueArray.numElements()) override def numElements(): Int = keyArray.numElements() override def copy(): MapData = new ArrayBasedMapData(keyArray.copy(), valueArray.copy()) override def toString: String = { s"keys: $keyArray, values: $valueArray" } } object ArrayBasedMapData { def apply( iterator: Iterator[(_, _)], size: Int, keyConverter: (Any) => Any, valueConverter: (Any) => Any): ArrayBasedMapData = { val keys: Array[Any] = new Array[Any](size) val values: Array[Any] = new Array[Any](size) var i = 0 for ((key, value) <- iterator) { keys(i) = keyConverter(key) values(i) = valueConverter(value) i += 1 } ArrayBasedMapData(keys, values) } def apply(keys: Array[_], values: Array[_]): ArrayBasedMapData = { new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values)) } def toScalaMap(map: ArrayBasedMapData): Map[Any, Any] = { val keys = map.keyArray.asInstanceOf[GenericArrayData].array val values = map.valueArray.asInstanceOf[GenericArrayData].array keys.zip(values).toMap } def toScalaMap(keys: Array[Any], values: Array[Any]): Map[Any, Any] = { keys.zip(values).toMap } def toScalaMap(keys: Seq[Any], values: Seq[Any]): Map[Any, Any] = { keys.zip(values).toMap } def toJavaMap(keys: Array[Any], values: Array[Any]): java.util.Map[Any, Any] = { import scala.collection.JavaConverters._ keys.zip(values).toMap.asJava } }
Example 55
Source File: FPTreeSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.fpm import scala.language.existentials import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext class FPTreeSuite extends SparkFunSuite with MLlibTestSparkContext { test("add transaction") { val tree = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("b")) assert(tree.root.children.size == 2) assert(tree.root.children.contains("a")) assert(tree.root.children("a").item.equals("a")) assert(tree.root.children("a").count == 2) assert(tree.root.children.contains("b")) assert(tree.root.children("b").item.equals("b")) assert(tree.root.children("b").count == 1) var child = tree.root.children("a") assert(child.children.size == 1) assert(child.children.contains("b")) assert(child.children("b").item.equals("b")) assert(child.children("b").count == 2) child = child.children("b") assert(child.children.size == 2) assert(child.children.contains("c")) assert(child.children.contains("y")) assert(child.children("c").item.equals("c")) assert(child.children("y").item.equals("y")) assert(child.children("c").count == 1) assert(child.children("y").count == 1) } test("merge tree") { val tree1 = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("b")) val tree2 = new FPTree[String] .add(Seq("a", "b")) .add(Seq("a", "b", "c")) .add(Seq("a", "b", "c", "d")) .add(Seq("a", "x")) .add(Seq("a", "x", "y")) .add(Seq("c", "n")) .add(Seq("c", "m")) val tree3 = tree1.merge(tree2) assert(tree3.root.children.size == 3) assert(tree3.root.children("a").count == 7) assert(tree3.root.children("b").count == 1) assert(tree3.root.children("c").count == 2) val child1 = tree3.root.children("a") assert(child1.children.size == 2) assert(child1.children("b").count == 5) assert(child1.children("x").count == 2) val child2 = child1.children("b") assert(child2.children.size == 2) assert(child2.children("y").count == 1) assert(child2.children("c").count == 3) val child3 = child2.children("c") assert(child3.children.size == 1) assert(child3.children("d").count == 1) val child4 = child1.children("x") assert(child4.children.size == 1) assert(child4.children("y").count == 1) val child5 = tree3.root.children("c") assert(child5.children.size == 2) assert(child5.children("n").count == 1) assert(child5.children("m").count == 1) } test("extract freq itemsets") { val tree = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("a", "b")) .add(Seq("a")) .add(Seq("b")) .add(Seq("b", "n")) val freqItemsets = tree.extract(3L).map { case (items, count) => (items.toSet, count) }.toSet val expected = Set( (Set("a"), 4L), (Set("b"), 5L), (Set("a", "b"), 3L)) assert(freqItemsets === expected) } }
Example 56
package org.dizhang.seqspark.assoc import breeze.stats.distributions.Gaussian import org.dizhang.seqspark.stat.HypoTest.{NullModel => NM} import org.dizhang.seqspark.stat.{Resampling, ScoreTest, WaldTest} import org.dizhang.seqspark.util.General._ import scala.language.existentials trait SNV extends AssocMethod { def nullModel: NM def x: Encode.Common def result: AssocMethod.Result } object SNV { def apply(nullModel: NM, x: Encode.Coding): SNV with AssocMethod.AnalyticTest = { nullModel match { case nm: NM.Fitted => AnalyticScoreTest(nm, x.asInstanceOf[Encode.Common]) case _ => AnalyticWaldTest(nullModel, x.asInstanceOf[Encode.Common]) } } def apply(ref: Double, min: Int, max: Int, nullModel: NM.Fitted, x: Encode.Coding): ResamplingTest = { ResamplingTest(ref, min, max, nullModel, x.asInstanceOf[Encode.Common]) } def getStatistic(nm: NM.Fitted, x: Encode.Coding): Double = { val st = ScoreTest(nm, x.asInstanceOf[Encode.Common].coding) math.abs(st.score(0)/st.variance(0,0).sqrt) } @SerialVersionUID(7727280101L) final case class AnalyticScoreTest(nullModel: NM.Fitted, x: Encode.Common) extends SNV with AssocMethod.AnalyticTest { //val scoreTest = ScoreTest(nullModel, x.coding) val statistic = getStatistic(nullModel, x) val pValue = { val dis = new Gaussian(0.0, 1.0) Some((1.0 - dis.cdf(statistic)) * 2) } def result: AssocMethod.BurdenAnalytic = { AssocMethod.BurdenAnalytic(x.vars, statistic, pValue, "test=score") } } case class AnalyticWaldTest(nullModel: NM, x: Encode.Common) extends SNV with AssocMethod.AnalyticTest { private val wt = WaldTest(nullModel, x.coding.toDenseVector) val statistic = wt.beta(1) / wt.std(1) val pVaue = Some(wt.pValue(oneSided = false).apply(1)) def result = { AssocMethod.BurdenAnalytic(x.vars, statistic, pVaue, s"test=wald;beta=${wt.beta(1)};betaStd=${wt.std(1)}") } } @SerialVersionUID(7727280201L) final case class ResamplingTest(refStatistic: Double, min: Int, max: Int, nullModel: NM.Fitted, x: Encode.Common) extends SNV with AssocMethod.ResamplingTest { def pCount = Resampling.Simple(refStatistic, min, max, nullModel, x, getStatistic).pCount def result: AssocMethod.BurdenResampling = { AssocMethod.BurdenResampling(x.vars, refStatistic, pCount) } } }
Example 57
Source File: ShuffleMapTask.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.nio.ByteBuffer import scala.language.existentials import org.apache.spark._ import org.apache.spark.broadcast.Broadcast import org.apache.spark.rdd.RDD import org.apache.spark.shuffle.ShuffleWriter def this(partitionId: Int) { this(0, null, new Partition { override def index = 0 }, null) } @transient private val preferredLocs: Seq[TaskLocation] = { if (locs == null) Nil else locs.toSet.toSeq } override def runTask(context: TaskContext): MapStatus = { // Deserialize the RDD using the broadcast variable. val ser = SparkEnv.get.closureSerializer.newInstance() val (rdd, dep) = ser.deserialize[(RDD[_], ShuffleDependency[_, _, _])]( ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader) metrics = Some(context.taskMetrics) var writer: ShuffleWriter[Any, Any] = null try { val manager = SparkEnv.get.shuffleManager writer = manager.getWriter[Any, Any](dep.shuffleHandle, partitionId, context) writer.write(rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]]) return writer.stop(success = true).get } catch { case e: Exception => try { if (writer != null) { writer.stop(success = false) } } catch { case e: Exception => log.debug("Could not stop writer", e) } throw e } } override def preferredLocations: Seq[TaskLocation] = preferredLocs override def toString = "ShuffleMapTask(%d, %d)".format(stageId, partitionId) }
Example 58
Source File: NettyBlockRpcServer.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.network.netty import java.nio.ByteBuffer import scala.collection.JavaConverters._ import scala.language.existentials import scala.reflect.ClassTag import org.apache.spark.internal.Logging import org.apache.spark.network.BlockDataManager import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager} import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, OpenBlocks, StreamHandle, UploadBlock} import org.apache.spark.serializer.Serializer import org.apache.spark.storage.{BlockId, StorageLevel} class NettyBlockRpcServer( appId: String, serializer: Serializer, blockManager: BlockDataManager) extends RpcHandler with Logging { private val streamManager = new OneForOneStreamManager() override def receive( client: TransportClient, rpcMessage: ByteBuffer, responseContext: RpcResponseCallback): Unit = { val message = BlockTransferMessage.Decoder.fromByteBuffer(rpcMessage) logTrace(s"Received request: $message") message match { case openBlocks: OpenBlocks => val blocks: Seq[ManagedBuffer] = openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData) val streamId = streamManager.registerStream(appId, blocks.iterator.asJava) logTrace(s"Registered streamId $streamId with ${blocks.size} buffers") responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteBuffer) case uploadBlock: UploadBlock => // StorageLevel and ClassTag are serialized as bytes using our JavaSerializer. val (level: StorageLevel, classTag: ClassTag[_]) = { serializer .newInstance() .deserialize(ByteBuffer.wrap(uploadBlock.metadata)) .asInstanceOf[(StorageLevel, ClassTag[_])] } val data = new NioManagedBuffer(ByteBuffer.wrap(uploadBlock.blockData)) val blockId = BlockId(uploadBlock.blockId) blockManager.putBlockData(blockId, data, level, classTag) responseContext.onSuccess(ByteBuffer.allocate(0)) } } override def getStreamManager(): StreamManager = streamManager }
Example 59
Source File: FPTreeSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.fpm import scala.language.existentials import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.util.MLlibTestSparkContext class FPTreeSuite extends SparkFunSuite with MLlibTestSparkContext { test("add transaction") { val tree = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("b")) assert(tree.root.children.size == 2) assert(tree.root.children.contains("a")) assert(tree.root.children("a").item.equals("a")) assert(tree.root.children("a").count == 2) assert(tree.root.children.contains("b")) assert(tree.root.children("b").item.equals("b")) assert(tree.root.children("b").count == 1) var child = tree.root.children("a") assert(child.children.size == 1) assert(child.children.contains("b")) assert(child.children("b").item.equals("b")) assert(child.children("b").count == 2) child = child.children("b") assert(child.children.size == 2) assert(child.children.contains("c")) assert(child.children.contains("y")) assert(child.children("c").item.equals("c")) assert(child.children("y").item.equals("y")) assert(child.children("c").count == 1) assert(child.children("y").count == 1) } test("merge tree") { val tree1 = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("b")) val tree2 = new FPTree[String] .add(Seq("a", "b")) .add(Seq("a", "b", "c")) .add(Seq("a", "b", "c", "d")) .add(Seq("a", "x")) .add(Seq("a", "x", "y")) .add(Seq("c", "n")) .add(Seq("c", "m")) val tree3 = tree1.merge(tree2) assert(tree3.root.children.size == 3) assert(tree3.root.children("a").count == 7) assert(tree3.root.children("b").count == 1) assert(tree3.root.children("c").count == 2) val child1 = tree3.root.children("a") assert(child1.children.size == 2) assert(child1.children("b").count == 5) assert(child1.children("x").count == 2) val child2 = child1.children("b") assert(child2.children.size == 2) assert(child2.children("y").count == 1) assert(child2.children("c").count == 3) val child3 = child2.children("c") assert(child3.children.size == 1) assert(child3.children("d").count == 1) val child4 = child1.children("x") assert(child4.children.size == 1) assert(child4.children("y").count == 1) val child5 = tree3.root.children("c") assert(child5.children.size == 2) assert(child5.children("n").count == 1) assert(child5.children("m").count == 1) } test("extract freq itemsets") { val tree = new FPTree[String] .add(Seq("a", "b", "c")) .add(Seq("a", "b", "y")) .add(Seq("a", "b")) .add(Seq("a")) .add(Seq("b")) .add(Seq("b", "n")) val freqItemsets = tree.extract(3L).map { case (items, count) => (items.toSet, count) }.toSet val expected = Set( (Set("a"), 4L), (Set("b"), 5L), (Set("a", "b"), 3L)) assert(freqItemsets === expected) } }
Example 60
Source File: RpcMessages.scala From spark-monitoring with MIT License | 5 votes |
package org.apache.spark.metrics import java.util.concurrent.TimeUnit import com.codahale.metrics.{Clock, Reservoir} trait MetricMessage[T] { val namespace: String val metricName: String val value: T } private[metrics] case class CounterMessage( override val namespace: String, override val metricName: String, override val value: Long ) extends MetricMessage[Long] private[metrics] case class SettableGaugeMessage[T]( override val namespace: String, override val metricName: String, override val value: T ) extends MetricMessage[T] import scala.language.existentials private[metrics] case class HistogramMessage( override val namespace: String, override val metricName: String, override val value: Long, reservoirClass: Class[_ <: Reservoir] ) extends MetricMessage[Long] private[metrics] case class MeterMessage( override val namespace: String, override val metricName: String, override val value: Long, clockClass: Class[_ <: Clock] ) extends MetricMessage[Long] private[metrics] case class TimerMessage( override val namespace: String, override val metricName: String, override val value: Long, timeUnit: TimeUnit, reservoirClass: Class[_ <: Reservoir], clockClass: Class[_ <: Clock] ) extends MetricMessage[Long]
Example 61
Source File: ScheduledTaskManager.scala From incubator-toree with Apache License 2.0 | 5 votes |
package org.apache.toree.utils import scala.language.existentials import java.util.concurrent._ import java.util.UUID import com.google.common.util.concurrent.ThreadFactoryBuilder import ScheduledTaskManager._ import scala.util.Try def stop() = { _taskMap.clear() _scheduler.shutdown() } } object ScheduledTaskManager { val DefaultMaxThreads = 4 val DefaultExecutionDelay = 10 // 10 milliseconds val DefaultTimeInterval = 100 // 100 milliseconds }
Example 62
Source File: Resampling.scala From seqspark with Apache License 2.0 | 5 votes |
package org.dizhang.seqspark.stat import breeze.linalg.{DenseVector, shuffle} import breeze.stats.distributions.Bernoulli import org.dizhang.seqspark.assoc.Encode import org.dizhang.seqspark.ds.SemiGroup.PairInt import org.dizhang.seqspark.stat.HypoTest.NullModel import scala.language.existentials def makeNewNullModel: NullModel.Fitted = { val newY = makeNewY() val cols = nullModel.xs.cols NullModel( newY, nullModel.xs(::, 1 until cols), fit = true, binary = nullModel.binary ).asInstanceOf[NullModel.Fitted] } }
Example 63
package org.dizhang.seqspark.assoc import breeze.linalg._ import org.dizhang.seqspark.stat.HypoTest.{NullModel => NM} import org.dizhang.seqspark.stat.{Resampling, ScoreTest} import org.dizhang.seqspark.util.General.RichDouble import org.slf4j.LoggerFactory import scala.language.existentials @SerialVersionUID(7727880001L) trait VT extends AssocMethod { def nullModel: NM def x: Encode.VT def result: AssocMethod.Result } object VT { val logger = LoggerFactory.getLogger(getClass) def apply(nullModel: NM, x: Encode.Coding): VT with AssocMethod.AnalyticTest = { val nmf = nullModel match { case NM.Simple(y, b) => NM.Fit(y, b) case NM.Mutiple(y, c, b) => NM.Fit(y, c, b) case nm: NM.Fitted => nm } AnalyticScoreTest(nmf, x.asInstanceOf[Encode.VT]) } def apply(ref: Double, min: Int, max: Int, nullModel: NM.Fitted, x: Encode.Coding): ResamplingTest = { ResamplingTest(ref, min, max, nullModel, x.asInstanceOf[Encode.VT]) } def getStatistic(nm: NM.Fitted, x: Encode.Coding): Double = { //println(s"scores: ${st.score.toArray.mkString(",")}") //println(s"variances: ${diag(st.variance).toArray.mkString(",")}") val m = x.asInstanceOf[Encode.VT].coding val ts = m.map{sv => val st = ScoreTest(nm, sv) st.score(0)/st.variance(0, 0).sqrt } //val ts = st.score :/ diag(st.variance).map(x => x.sqrt) max(ts) } @SerialVersionUID(7727880101L) final case class AnalyticScoreTest(nullModel: NM.Fitted, x: Encode.VT) extends VT with AssocMethod.AnalyticTest { val statistic = getStatistic(nullModel, x) val pValue = None def result: AssocMethod.VTAnalytic = { val info = s"MAFs=${x.coding.length}" AssocMethod.VTAnalytic(x.vars, x.size, statistic, pValue, info) } } @SerialVersionUID(7727880201L) final case class ResamplingTest(refStatistic: Double, min: Int, max: Int, nullModel: NM.Fitted, x: Encode.VT) extends VT with AssocMethod.ResamplingTest { def pCount = { Resampling.Simple(refStatistic, min, max, nullModel, x, getStatistic).pCount } def result: AssocMethod.VTResampling = AssocMethod.VTResampling(x.vars, x.size, refStatistic, pCount) } }
Example 64
Source File: Burden.scala From seqspark with Apache License 2.0 | 5 votes |
package org.dizhang.seqspark.assoc import breeze.linalg.DenseVector import breeze.stats.distributions.{Gaussian, StudentsT} import org.dizhang.seqspark.stat.HypoTest.{NullModel => NM} import org.dizhang.seqspark.stat.{Resampling, ScoreTest, WaldTest} import org.dizhang.seqspark.util.General._ import scala.language.existentials @SerialVersionUID(7727280001L) trait Burden extends AssocMethod { def nullModel: NM def x: Encode.Fixed def result: AssocMethod.Result } object Burden { def apply(nullModel: NM, x: Encode.Coding): Burden with AssocMethod.AnalyticTest = { nullModel match { case nm: NM.Fitted => AnalyticScoreTest(nm, x.asInstanceOf[Encode.Fixed]) case _ => AnalyticWaldTest(nullModel, x.asInstanceOf[Encode.Fixed]) } } def apply(ref: Double, min: Int, max: Int, nullModel: NM.Fitted, x: Encode.Coding): ResamplingTest = { ResamplingTest(ref, min, max, nullModel, x.asInstanceOf[Encode.Fixed]) } def getStatistic(nm: NM.Fitted, x: Encode.Coding): Double = { val st = ScoreTest(nm, x.asInstanceOf[Encode.Fixed].coding) st.score(0)/st.variance(0,0).sqrt } def getStatistic(nm: NM, x: DenseVector[Double]): Double = { val wt = WaldTest(nm, x) (wt.beta /:/ wt.std).apply(1) } @SerialVersionUID(7727280101L) final case class AnalyticScoreTest(nullModel: NM.Fitted, x: Encode.Fixed) extends Burden with AssocMethod.AnalyticTest { def geno = x.coding //val scoreTest = ScoreTest(nullModel, geno) val statistic = getStatistic(nullModel, x) val pValue = { val dis = new Gaussian(0.0, 1.0) Some(1.0 - dis.cdf(statistic)) } def result: AssocMethod.BurdenAnalytic = { AssocMethod.BurdenAnalytic(x.vars, statistic, pValue, "test=score") } } case class AnalyticWaldTest(nullModel: NM, x: Encode.Fixed) extends Burden with AssocMethod.AnalyticTest { def geno = x.coding private val wt = WaldTest(nullModel, x.coding) val statistic = getStatistic(nullModel, geno) val pValue = { val dis = new StudentsT(nullModel.dof - 1) Some(1.0 - dis.cdf(statistic)) } def result = { AssocMethod.BurdenAnalytic(x.vars, statistic, pValue, s"test=wald;beta=${wt.beta(1)};betaStd=${wt.std(1)}") } } @SerialVersionUID(7727280201L) final case class ResamplingTest(refStatistic: Double, min: Int, max: Int, nullModel: NM.Fitted, x: Encode.Fixed) extends Burden with AssocMethod.ResamplingTest { def pCount = Resampling.Simple(refStatistic, min, max, nullModel, x, getStatistic).pCount def result: AssocMethod.BurdenResampling = { AssocMethod.BurdenResampling(x.vars, refStatistic, pCount) } } }
Example 65
Source File: BytecodeUtils.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx.util import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import scala.collection.mutable.HashSet import scala.language.existentials import org.apache.xbean.asm5.{ClassReader, ClassVisitor, MethodVisitor} import org.apache.xbean.asm5.Opcodes._ import org.apache.spark.util.Utils private class MethodInvocationFinder(className: String, methodName: String) extends ClassVisitor(ASM5) { val methodsInvoked = new HashSet[(Class[_], String)] override def visitMethod(access: Int, name: String, desc: String, sig: String, exceptions: Array[String]): MethodVisitor = { if (name == methodName) { new MethodVisitor(ASM5) { override def visitMethodInsn( op: Int, owner: String, name: String, desc: String, itf: Boolean) { if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) { if (!skipClass(owner)) { methodsInvoked.add((Utils.classForName(owner.replace("/", ".")), name)) } } } } } else { null } } } }
Example 66
Source File: MyNettyBlockRpcServer.scala From OAP with Apache License 2.0 | 5 votes |
package org.apache.spark.network.netty import java.nio.ByteBuffer import scala.language.existentials import org.apache.spark.SparkEnv import org.apache.spark.internal.Logging import org.apache.spark.network.BlockDataManager import org.apache.spark.network.client.{RpcResponseCallback, StreamCallbackWithID, TransportClient} import org.apache.spark.network.server.{OneForOneStreamManager, RpcHandler, StreamManager} import org.apache.spark.network.shuffle.protocol._ import org.apache.spark.serializer.Serializer import org.apache.spark.shuffle.remote.{HadoopFileSegmentManagedBuffer, MessageForHadoopManagedBuffers, RemoteShuffleManager} import org.apache.spark.shuffle.sort.SortShuffleManager import org.apache.spark.storage.{BlockId, ShuffleBlockId} class MyNettyBlockRpcServer( appId: String, serializer: Serializer, blockManager: BlockDataManager) extends RpcHandler with Logging { private val streamManager = new OneForOneStreamManager() override def receive( client: TransportClient, rpcMessage: ByteBuffer, responseContext: RpcResponseCallback): Unit = { val message = BlockTransferMessage.Decoder.fromByteBuffer(rpcMessage) logTrace(s"Received request: $message") message match { case openBlocks: OpenBlocks => val blocksNum = openBlocks.blockIds.length val isShuffleRequest = (blocksNum > 0) && BlockId.apply(openBlocks.blockIds(0)).isInstanceOf[ShuffleBlockId] && (SparkEnv.get.conf.get("spark.shuffle.manager", classOf[SortShuffleManager].getName) == classOf[RemoteShuffleManager].getName) if (isShuffleRequest) { val blockIdAndManagedBufferPair = openBlocks.blockIds.map(block => (block, blockManager.getHostLocalShuffleData( BlockId.apply(block), Array.empty).asInstanceOf[HadoopFileSegmentManagedBuffer])) responseContext.onSuccess(new MessageForHadoopManagedBuffers( blockIdAndManagedBufferPair).toByteBuffer.nioBuffer()) } else { // This customized Netty RPC server is only served for RemoteShuffle requests, // Other RPC messages or data chunks transferring should go through // NettyBlockTransferService' NettyBlockRpcServer throw new UnsupportedOperationException("MyNettyBlockRpcServer only serves remote" + " shuffle requests for OpenBlocks") } case uploadBlock: UploadBlock => throw new UnsupportedOperationException("MyNettyBlockRpcServer doesn't serve UploadBlock") } } override def receiveStream( client: TransportClient, messageHeader: ByteBuffer, responseContext: RpcResponseCallback): StreamCallbackWithID = { throw new UnsupportedOperationException("MyNettyBlockRpcServer doesn't support receiving" + " stream") } override def getStreamManager(): StreamManager = streamManager }
Example 67
Source File: RowToVectorBuilder.scala From filo with Apache License 2.0 | 5 votes |
package org.velvia.filo import java.nio.ByteBuffer import scala.language.existentials import scala.language.postfixOps import scalaxy.loops._ import BuilderEncoder.{EncodingHint, AutoDetect} case class VectorInfo(name: String, dataType: Class[_]) // To help matching against the ClassTag in the VectorBuilder private object Classes { val Boolean = classOf[Boolean] val Byte = java.lang.Byte.TYPE val Short = java.lang.Short.TYPE val Int = java.lang.Integer.TYPE val Long = java.lang.Long.TYPE val Float = java.lang.Float.TYPE val Double = java.lang.Double.TYPE val String = classOf[String] val DateTime = classOf[org.joda.time.DateTime] val SqlTimestamp = classOf[java.sql.Timestamp] val UTF8 = classOf[ZeroCopyUTF8String] } object RowToVectorBuilder { def convertToBytes(hint: EncodingHint = AutoDetect): Map[String, ByteBuffer] = { val chunks = builders.map(_.toFiloBuffer(hint)) schema.zip(chunks).map { case (VectorInfo(colName, _), bytes) => (colName, bytes) }.toMap } private def unsupportedInput(typ: Any) = throw new RuntimeException("Unsupported input type " + typ) }
Example 68
Source File: FlinkScalarFunctionGenerator.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.flink.internal import com.amazon.milan.compiler.scala.{CodeBlock, DefaultTypeEmitter, ScalarFunctionGenerator, TypeEmitter} import com.amazon.milan.compiler.flink.generator.FlinkGeneratorException import com.amazon.milan.compiler.flink.typeutil._ import com.amazon.milan.program.ValueDef import com.amazon.milan.types._ import com.amazon.milan.typeutil.{TypeDescriptor, types} import scala.language.existentials object FlinkScalarFunctionGenerator { val default = new FlinkScalarFunctionGenerator(new DefaultTypeEmitter) } case class FunctionParts(arguments: CodeBlock, returnType: CodeBlock, body: CodeBlock) class FlinkScalarFunctionGenerator(typeEmitter: TypeEmitter) extends ScalarFunctionGenerator(typeEmitter, ContextualTreeTransformer) { private class ArrayFieldConversionContext(tupleType: TypeDescriptor[_]) extends ConversionContext { override def generateSelectTermAndContext(name: String): (String, ConversionContext) = { if (name == RecordIdFieldName) { // RecordId is a special field for tuple streams, because it's a property of the ArrayRecord class rather than // being present in the fields array itself. (s".$name", createContextForType(types.String)) } else { val fieldIndex = this.tupleType.fields.takeWhile(_.name != name).length if (fieldIndex >= this.tupleType.fields.length) { throw new FlinkGeneratorException(s"Field '$name' not found.") } val fieldType = this.tupleType.fields(fieldIndex).fieldType (s"($fieldIndex).asInstanceOf[${typeEmitter.getTypeFullName(fieldType)}]", createContextForType(fieldType)) } } } override protected def createContextForArgument(valueDef: ValueDef): ConversionContext = { // If the record type is a tuple with named fields then this is a tuple stream whose records are stored as // ArrayRecord objects. if (valueDef.tpe.isTupleRecord) { new ArrayArgumentConversionContext(valueDef.name, valueDef.tpe) } else { super.createContextForArgument(valueDef) } } override protected def createContextForType(contextType: TypeDescriptor[_]): ConversionContext = { // If the context type is a tuple with named fields then term names must be mapped to indices in the ArrayRecord // objects. if (contextType.isTupleRecord) { new ArrayFieldConversionContext(contextType) } else { super.createContextForType(contextType) } } }
Example 69
Source File: TestWindow.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.lang import java.time.Duration import com.amazon.milan.lang.aggregation._ import com.amazon.milan.program import com.amazon.milan.program.{GroupBy, _} import com.amazon.milan.test.{DateIntRecord, DateKeyValueRecord} import com.amazon.milan.typeutil.{FieldDescriptor, types} import org.junit.Assert._ import org.junit.Test import scala.language.existentials @Test class TestWindow { @Test def test_TumblingWindow_ReturnsStreamWithCorrectInputNodeAndWindowProperties(): Unit = { val stream = Stream.of[DateIntRecord] val windowed = stream.tumblingWindow(r => r.dateTime, Duration.ofHours(1), Duration.ofMinutes(30)) val TumblingWindow(_, dateExtractorFunc, period, offset) = windowed.expr // If this extraction doesn't throw an exception then the formula is correct. val FunctionDef(List(ValueDef("r", _)), SelectField(SelectTerm("r"), "dateTime")) = dateExtractorFunc assertEquals(Duration.ofHours(1), period.asJava) assertEquals(Duration.ofMinutes(30), offset.asJava) } @Test def test_TumblingWindow_ThenSelectToTuple_ReturnsStreamWithCorrectFieldComputationExpression(): Unit = { val stream = Stream.of[DateIntRecord] val grouped = stream.tumblingWindow(r => r.dateTime, Duration.ofHours(1), Duration.ofMinutes(30)) val selected = grouped.select((key, r) => fields(field("max", max(r.i)))) val Aggregate(source, FunctionDef(_, NamedFields(fieldList))) = selected.expr assertEquals(1, selected.recordType.fields.length) assertEquals(FieldDescriptor("max", types.Int), selected.recordType.fields.head) assertEquals(1, fieldList.length) assertEquals("max", fieldList.head.fieldName) // If this extraction statement doesn't crash then we're good. val Max(SelectField(SelectTerm("r"), "i")) = fieldList.head.expr } @Test def test_SlidingWindow_ReturnsStreamWithCorrectInputNodeAndWindowProperties(): Unit = { val stream = Stream.of[DateIntRecord] val windowed = stream.slidingWindow(r => r.dateTime, Duration.ofHours(1), Duration.ofMinutes(10), Duration.ofMinutes(30)) val SlidingWindow(_, dateExtractorFunc, size, slide, offset) = windowed.expr val FunctionDef(List(ValueDef("r", _)), SelectField(SelectTerm("r"), "dateTime")) = dateExtractorFunc assertEquals(Duration.ofHours(1), size.asJava) assertEquals(Duration.ofMinutes(10), slide.asJava) assertEquals(Duration.ofMinutes(30), offset.asJava) } @Test def test_GroupBy_ThenTumblingWindow_ThenSelect_ReturnsStreamWithCorrectInputNodeAndWindowProperties(): Unit = { val input = Stream.of[DateKeyValueRecord].withId("input") val output = input.groupBy(r => r.key) .tumblingWindow(r => r.dateTime, Duration.ofMinutes(5), Duration.ZERO) .select((windowStart, r) => any(r)) val Aggregate(windowExpr, FunctionDef(List(ValueDef("windowStart", _), ValueDef("r", _)), First(SelectTerm("r")))) = output.expr val TumblingWindow(groupExpr, FunctionDef(List(ValueDef("r", _)), SelectField(SelectTerm("r"), "dateTime")), program.Duration(300000), program.Duration(0)) = windowExpr val GroupBy(ExternalStream("input", "input", _), FunctionDef(List(ValueDef("r", _)), SelectField(SelectTerm("r"), "key"))) = groupExpr } }
Example 70
Source File: Surface.scala From airframe with Apache License 2.0 | 5 votes |
package wvlet.airframe.surface import scala.language.existentials import scala.language.experimental.macros object Surface { def of[A]: Surface = macro SurfaceMacros.surfaceOf[A] def methodsOf[A]: Seq[MethodSurface] = macro SurfaceMacros.methodSurfaceOf[A] } trait Surface extends Serializable { def rawType: Class[_] def typeArgs: Seq[Surface] def params: Seq[Parameter] def name: String def fullName: String def dealias: Surface = this def isOption: Boolean def isAlias: Boolean def isPrimitive: Boolean def isSeq: Boolean = classOf[Seq[_]].isAssignableFrom(rawType) def objectFactory: Option[ObjectFactory] = None } sealed trait ParameterBase extends Serializable { def name: String def surface: Surface def call(obj: Any, x: Any*): Any } trait Parameter extends ParameterBase { def index: Int def name: String def getMethodArgDefaultValue(methodOwner: Any): Option[Any] = getDefaultValue } trait MethodSurface extends ParameterBase { def mod: Int def owner: Surface def name: String def args: Seq[MethodParameter] def surface: Surface = returnType def returnType: Surface def isPublic: Boolean = (mod & MethodModifier.PUBLIC) != 0 def isPrivate: Boolean = (mod & MethodModifier.PRIVATE) != 0 def isProtected: Boolean = (mod & MethodModifier.PROTECTED) != 0 def isStatic: Boolean = (mod & MethodModifier.STATIC) != 0 def isFinal: Boolean = (mod & MethodModifier.FINAL) != 0 def isAbstract: Boolean = (mod & MethodModifier.ABSTRACT) != 0 }
Example 71
Source File: AirframeException.scala From airframe with Apache License 2.0 | 5 votes |
package wvlet.airframe import wvlet.airframe.surface.Surface import scala.language.existentials trait AirframeException extends Exception { self => def getCode: String = this.getClass.getSimpleName override def toString: String = getMessage } object AirframeException { case class MISSING_SESSION(cl: Class[_]) extends AirframeException { override def getMessage: String = s"[$getCode] Session is not found inside ${cl}. You may need to define ${cl} as a trait or implement DISupport to inject the current Session." } case class CYCLIC_DEPENDENCY(deps: List[Surface], sourceCode: SourceCode) extends AirframeException { override def getMessage: String = s"[$getCode] ${deps.reverse.mkString(" -> ")} at ${sourceCode}" } case class MISSING_DEPENDENCY(stack: List[Surface], sourceCode: SourceCode) extends AirframeException { override def getMessage: String = s"[$getCode] Binding for ${stack.head} at ${sourceCode} is not found: ${stack.mkString(" <- ")}" } case class SHUTDOWN_FAILURE(cause: Throwable) extends AirframeException { override def getMessage: String = { s"[${getCode}] Failure at session shutdown: ${cause.getMessage}" } } case class MULTIPLE_SHUTDOWN_FAILURES(causes: List[Throwable]) extends AirframeException { override def getMessage: String = { s"[${getCode}] Multiple failures occurred during session shutdown:\n${causes.map(x => s" - ${x.getMessage}").mkString("\n")}" } } }
Example 72
Source File: RunServer.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.grpc.server import java.util.concurrent.{Executors, TimeUnit} import akka.Done import akka.actor.{ActorSystem, CoordinatedShutdown} import akka.stream.{ActorMaterializer, Materializer} import com.typesafe.config.Config import com.typesafe.scalalogging.Logger import io.grpc.ServerBuilder import ml.combust.mleap.executor.MleapExecutor import ml.combust.mleap.pb.MleapGrpc import scala.concurrent.{ExecutionContext, Future} import scala.language.existentials import scala.util.{Failure, Success, Try} class RunServer(config: Config) (implicit system: ActorSystem) { private val logger = Logger(classOf[RunServer]) private var coordinator: Option[CoordinatedShutdown] = None def run(): Unit = { Try { logger.info("Starting MLeap gRPC Server") val coordinator = CoordinatedShutdown(system) this.coordinator = Some(coordinator) implicit val materializer: Materializer = ActorMaterializer() val grpcServerConfig = new GrpcServerConfig(config.getConfig("default")) val mleapExecutor = MleapExecutor(system) val port: Int = config.getInt("port") val threads: Option[Int] = if (config.hasPath("threads")) Some(config.getInt("threads")) else None val threadCount = threads.getOrElse { Math.min(Math.max(Runtime.getRuntime.availableProcessors() * 4, 32), 64) } logger.info(s"Creating thread pool for server with size $threadCount") val grpcThreadPool = Executors.newFixedThreadPool(threadCount) implicit val ec: ExecutionContext = ExecutionContext.fromExecutor(grpcThreadPool) coordinator.addTask(CoordinatedShutdown.PhaseServiceRequestsDone, "threadPoolShutdownNow") { () => Future { logger.info("Shutting down gRPC thread pool") grpcThreadPool.shutdown() grpcThreadPool.awaitTermination(5, TimeUnit.SECONDS) Done } } logger.info(s"Creating executor service") val grpcService: GrpcServer = new GrpcServer(mleapExecutor, grpcServerConfig) val builder = ServerBuilder.forPort(port) builder.intercept(new ErrorInterceptor) builder.addService(MleapGrpc.bindService(grpcService, ec)) val grpcServer = builder.build() logger.info(s"Starting server on port $port") grpcServer.start() coordinator.addTask(CoordinatedShutdown.PhaseServiceUnbind, "grpcServiceShutdown") { () => Future { logger.info("Shutting down gRPC") grpcServer.shutdown() grpcServer.awaitTermination(10, TimeUnit.SECONDS) Done }(ExecutionContext.global) } coordinator.addTask(CoordinatedShutdown.PhaseServiceStop, "grpcServiceShutdownNow") { () => Future { if (!grpcServer.isShutdown) { logger.info("Shutting down gRPC NOW!") grpcServer.shutdownNow() grpcServer.awaitTermination(5, TimeUnit.SECONDS) } Done }(ExecutionContext.global) } } match { case Success(_) => case Failure(err) => logger.error("Error encountered starting server", err) for (c <- this.coordinator) { c.run(CoordinatedShutdown.UnknownReason) } throw err } } }
Example 73
Source File: Responses.scala From finagle-postgres with Apache License 2.0 | 5 votes |
package com.twitter.finagle.postgres import java.nio.charset.Charset import com.twitter.finagle.postgres.messages.{DataRow, Field} import com.twitter.finagle.postgres.values.ValueDecoder import com.twitter.util.Try import Try._ import com.twitter.concurrent.AsyncStream import com.twitter.finagle.postgres.PostgresClient.TypeSpecifier import com.twitter.finagle.postgres.codec.NullValue import io.netty.buffer.ByteBuf import scala.language.existentials // capture all common format data for a set of rows to reduce repeated references case class RowFormat( indexMap: Map[String, Int], formats: Array[Short], oids: Array[Int], dataTypes: Map[Int, TypeSpecifier], receives: PartialFunction[String, ValueDecoder[T] forSome {type T}], charset: Charset ) { @inline final def recv(index: Int) = dataTypes(oids(index)).receiveFunction @inline final def defaultDecoder(index: Int) = receives.applyOrElse(recv(index), (_: String) => ValueDecoder.never) } trait Row { def getOption[T](name: String)(implicit decoder: ValueDecoder[T]): Option[T] def getOption[T](index: Int)(implicit decoder: ValueDecoder[T]): Option[T] def get[T](name: String)(implicit decoder: ValueDecoder[T]): T def get[T](index: Int)(implicit decoder: ValueDecoder[T]): T def getTry[T](name: String)(implicit decoder: ValueDecoder[T]): Try[T] def getTry[T](index: Int)(implicit decoder: ValueDecoder[T]): Try[T] def getOrElse[T](name: String, default: => T)(implicit decoder: ValueDecoder[T]): T def getOrElse[T](index: Int, default: => T)(implicit decoder: ValueDecoder[T]): T def getAnyOption(name: String): Option[Any] def getAnyOption(index: Int): Option[Any] } object Row { def apply(values: Array[Option[ByteBuf]], rowFormat: RowFormat): Row = RowImpl(values, rowFormat) } object ResultSet { def apply( fields: Array[Field], charset: Charset, dataRows: AsyncStream[DataRow], types: Map[Int, TypeSpecifier], receives: PartialFunction[String, ValueDecoder[T] forSome { type T }] ): ResultSet = { val (indexMap, formats, oids) = { val l = fields.length val stringIndex = new Array[(String, Int)](l) val formats = new Array[Short](l) val oids = new Array[Int](l) var i = 0 while(i < l) { val Field(name, format, dataType) = fields(i) stringIndex(i) = (name, i) formats(i) = format oids(i) = dataType i += 1 } (stringIndex.toMap, formats, oids) } val rowFormat = RowFormat(indexMap, formats, oids, types, receives, charset) val rows = dataRows.map { dataRow => Row( values = dataRow.data, rowFormat = rowFormat ) } ResultSet(rows) } }