scala.collection.mutable.ArrayBuffer Scala Examples
The following examples show how to use scala.collection.mutable.ArrayBuffer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: IntegrationTest.scala From kmq with Apache License 2.0 | 6 votes |
package com.softwaremill.kmq.redelivery import java.time.Duration import java.util.Random import akka.actor.ActorSystem import akka.kafka.scaladsl.{Consumer, Producer} import akka.kafka.{ConsumerSettings, ProducerMessage, ProducerSettings, Subscriptions} import akka.stream.ActorMaterializer import akka.testkit.TestKit import com.softwaremill.kmq._ import com.softwaremill.kmq.redelivery.infrastructure.KafkaSpec import org.apache.kafka.clients.consumer.ConsumerConfig import org.apache.kafka.clients.producer.{ProducerConfig, ProducerRecord} import org.apache.kafka.common.serialization.StringDeserializer import org.scalatest.concurrent.Eventually import org.scalatest.time.{Seconds, Span} import org.scalatest.{BeforeAndAfterAll, FlatSpecLike, Matchers} import scala.collection.mutable.ArrayBuffer class IntegrationTest extends TestKit(ActorSystem("test-system")) with FlatSpecLike with KafkaSpec with BeforeAndAfterAll with Eventually with Matchers { implicit val materializer = ActorMaterializer() import system.dispatcher "KMQ" should "resend message if not committed" in { val bootstrapServer = s"localhost:${testKafkaConfig.kafkaPort}" val kmqConfig = new KmqConfig("queue", "markers", "kmq_client", "kmq_redelivery", Duration.ofSeconds(1).toMillis, 1000) val consumerSettings = ConsumerSettings(system, new StringDeserializer, new StringDeserializer) .withBootstrapServers(bootstrapServer) .withGroupId(kmqConfig.getMsgConsumerGroupId) .withProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest") val markerProducerSettings = ProducerSettings(system, new MarkerKey.MarkerKeySerializer(), new MarkerValue.MarkerValueSerializer()) .withBootstrapServers(bootstrapServer) .withProperty(ProducerConfig.PARTITIONER_CLASS_CONFIG, classOf[ParititionFromMarkerKey].getName) val markerProducer = markerProducerSettings.createKafkaProducer() val random = new Random() lazy val processedMessages = ArrayBuffer[String]() lazy val receivedMessages = ArrayBuffer[String]() val control = Consumer.committableSource(consumerSettings, Subscriptions.topics(kmqConfig.getMsgTopic)) // 1. get messages from topic .map { msg => ProducerMessage.Message( new ProducerRecord[MarkerKey, MarkerValue](kmqConfig.getMarkerTopic, MarkerKey.fromRecord(msg.record), new StartMarker(kmqConfig.getMsgTimeoutMs)), msg) } .via(Producer.flow(markerProducerSettings, markerProducer)) // 2. write the "start" marker .map(_.message.passThrough) .mapAsync(1) { msg => msg.committableOffset.commitScaladsl().map(_ => msg.record) // this should be batched } .map { msg => receivedMessages += msg.value msg } .filter(_ => random.nextInt(5) != 0) .map { processedMessage => processedMessages += processedMessage.value new ProducerRecord[MarkerKey, MarkerValue](kmqConfig.getMarkerTopic, MarkerKey.fromRecord(processedMessage), EndMarker.INSTANCE) } .to(Producer.plainSink(markerProducerSettings, markerProducer)) // 5. write "end" markers .run() val redeliveryHook = RedeliveryTracker.start(new KafkaClients(bootstrapServer), kmqConfig) val messages = (0 to 20).map(_.toString) messages.foreach(msg => sendToKafka(kmqConfig.getMsgTopic,msg)) eventually { receivedMessages.size should be > processedMessages.size processedMessages.sortBy(_.toInt).distinct shouldBe messages }(PatienceConfig(timeout = Span(15, Seconds)), implicitly) redeliveryHook.close() control.shutdown() } override def afterAll(): Unit = { super.afterAll() TestKit.shutdownActorSystem(system) } }
Example 2
Source File: UndoSnackbarManager.scala From shadowsocksr-android with GNU General Public License v3.0 | 5 votes |
package com.github.shadowsocks.widget import android.support.design.widget.Snackbar import android.view.View import com.github.shadowsocks.R import scala.collection.mutable.ArrayBuffer class UndoSnackbarManager[T](view: View, undo: Iterator[(Int, T)] => Unit, commit: Iterator[(Int, T)] => Unit = null) { private val recycleBin = new ArrayBuffer[(Int, T)] private val removedCallback = new Snackbar.Callback { override def onDismissed(snackbar: Snackbar, event: Int) = { event match { case Snackbar.Callback.DISMISS_EVENT_SWIPE | Snackbar.Callback.DISMISS_EVENT_MANUAL | Snackbar.Callback.DISMISS_EVENT_TIMEOUT => if (commit != null) commit(recycleBin.iterator) recycleBin.clear case _ => } last = null } } private var last: Snackbar = _ def remove(index: Int, item: T) = { recycleBin.append((index, item)) val count = recycleBin.length last = Snackbar .make(view, view.getResources.getQuantityString(R.plurals.removed, count, count: Integer), Snackbar.LENGTH_LONG) .setCallback(removedCallback).setAction(R.string.undo, (_ => { undo(recycleBin.reverseIterator) recycleBin.clear }): View.OnClickListener) last.show } def flush = if (last != null) last.dismiss }
Example 3
Source File: SinkRouteHandler.scala From ohara with Apache License 2.0 | 5 votes |
package oharastream.ohara.shabondi.sink import java.time.{Duration => JDuration} import java.util.concurrent.TimeUnit import akka.actor.ActorSystem import akka.http.scaladsl.model.{ContentTypes, HttpEntity, StatusCodes} import akka.http.scaladsl.server.{ExceptionHandler, Route} import com.typesafe.scalalogging.Logger import oharastream.ohara.common.data.Row import oharastream.ohara.common.util.Releasable import oharastream.ohara.shabondi.common.{JsonSupport, RouteHandler, ShabondiUtils} import org.apache.commons.lang3.StringUtils import scala.collection.mutable.ArrayBuffer import scala.compat.java8.DurationConverters._ import scala.concurrent.ExecutionContextExecutor import scala.concurrent.duration.Duration import spray.json.DefaultJsonProtocol._ import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ private[shabondi] object SinkRouteHandler { def apply(config: SinkConfig)(implicit actorSystem: ActorSystem) = new SinkRouteHandler(config) } private[shabondi] class SinkRouteHandler(config: SinkConfig)(implicit actorSystem: ActorSystem) extends RouteHandler { implicit private val contextExecutor: ExecutionContextExecutor = actorSystem.dispatcher private val log = Logger(classOf[SinkRouteHandler]) private[sink] val dataGroups = SinkDataGroups(config) def scheduleFreeIdleGroups(interval: JDuration, idleTime: JDuration): Unit = actorSystem.scheduler.scheduleWithFixedDelay(Duration(1, TimeUnit.SECONDS), interval.toScala) { () => { log.trace("scheduled free group, total group: {} ", dataGroups.size) dataGroups.freeIdleGroup(idleTime) } } private val exceptionHandler = ExceptionHandler { case ex: Throwable => log.error(ex.getMessage, ex) complete((StatusCodes.InternalServerError, ex.getMessage)) } private def fullyPollQueue(queue: RowQueue): Seq[Row] = { val buffer = ArrayBuffer.empty[Row] var item: Row = queue.poll() while (item != null) { buffer += item item = queue.poll() } buffer.toSeq } private def apiUrl = ShabondiUtils.apiUrl def route(): Route = handleExceptions(exceptionHandler) { path("groups" / Segment) { groupId => get { if (StringUtils.isAlphanumeric(groupId)) { val group = dataGroups.createIfAbsent(groupId) val result = fullyPollQueue(group.queue).map(row => JsonSupport.toRowData(row)) complete(result) } else { val entity = HttpEntity(ContentTypes.`text/plain(UTF-8)`, "Illegal group name, only accept alpha and numeric.") complete(StatusCodes.NotAcceptable -> entity) } } ~ { complete(StatusCodes.MethodNotAllowed -> s"Unsupported method, please reference: $apiUrl") } } ~ { complete(StatusCodes.NotFound -> s"Please reference: $apiUrl") } } override def close(): Unit = { Releasable.close(dataGroups) } }
Example 4
Source File: CSVConverter.scala From spark-snowflake with Apache License 2.0 | 5 votes |
package net.snowflake.spark.snowflake import org.apache.spark.sql.types.StructType import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag object CSVConverter { private final val delimiter = '|' private final val quoteChar = '"' private[snowflake] def convert[T: ClassTag]( partition: Iterator[String], resultSchema: StructType ): Iterator[T] = { val converter = Conversions.createRowConverter[T](resultSchema) partition.map(s => { val fields = ArrayBuffer.empty[String] var buff = new StringBuilder def addField(): Unit = { if (buff.isEmpty) fields.append(null) else { val field = buff.toString() buff = new StringBuilder fields.append(field) } } var escaped = false var index = 0 while (index < s.length) { escaped = false if (s(index) == quoteChar) { index += 1 while (index < s.length && !(escaped && s(index) == delimiter)) { if (escaped) { escaped = false buff.append(s(index)) } else if (s(index) == quoteChar) escaped = true else buff.append(s(index)) index += 1 } addField() } else { while (index < s.length && s(index) != delimiter) { buff.append(s(index)) index += 1 } addField() } index += 1 } addField() converter(fields.toArray) }) } }
Example 5
Source File: InterfaceTreeSpec.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.lf.codegen import com.daml.lf.data.ImmArray import com.daml.lf.data.ImmArray.ImmArraySeq import com.daml.lf.data.Ref.{DottedName, QualifiedName, PackageId} import com.daml.lf.iface.{DefDataType, Interface, InterfaceType, Record, Variant} import org.scalatest.{FlatSpec, Matchers} import scala.collection.mutable.ArrayBuffer class InterfaceTreeSpec extends FlatSpec with Matchers { behavior of "InterfaceTree.bfs" it should "traverse an empty tree" in { val interfaceTree = InterfaceTree(Map.empty, Interface(PackageId.assertFromString("packageid"), Map.empty)) interfaceTree.bfs(0)((x, _) => x + 1) shouldEqual 0 } it should "traverse a tree with n elements in bfs order" in { val qualifiedName1 = QualifiedName( DottedName.assertFromSegments(ImmArray("foo").toSeq), DottedName.assertFromSegments(ImmArray("bar").toSeq)) val record1 = InterfaceType.Normal(DefDataType(ImmArraySeq(), Record(ImmArraySeq()))) val qualifiedName2 = QualifiedName( DottedName.assertFromSegments(ImmArray("foo").toSeq), DottedName.assertFromSegments(ImmArray("bar", "baz").toSeq)) val variant1 = InterfaceType.Normal(DefDataType(ImmArraySeq(), Variant(ImmArraySeq()))) val qualifiedName3 = QualifiedName( DottedName.assertFromSegments(ImmArray("foo").toSeq), DottedName.assertFromSegments(ImmArray("qux").toSeq)) val record2 = InterfaceType.Normal(DefDataType(ImmArraySeq(), Record(ImmArraySeq()))) val typeDecls = Map(qualifiedName1 -> record1, qualifiedName2 -> variant1, qualifiedName3 -> record2) val interface = new Interface(PackageId.assertFromString("packageId2"), typeDecls) val tree = InterfaceTree.fromInterface(interface) val result = tree.bfs(ArrayBuffer.empty[InterfaceType])((ab, n) => n match { case ModuleWithContext(interface @ _, modulesLineage @ _, name @ _, module @ _) => ab case TypeWithContext(interface @ _, modulesLineage @ _, typesLineage @ _, name @ _, typ) => ab ++= typ.typ.toList }) result should contain theSameElementsInOrderAs Seq(record1, record2, variant1) } behavior of "InterfaceTree.fromInterface" it should "permit standalone types with multi-component names" in { val bazQuux = QualifiedName( DottedName.assertFromSegments(ImmArray("foo", "bar").toSeq), DottedName.assertFromSegments(ImmArray("baz", "quux").toSeq) ) val record = InterfaceType.Normal(DefDataType(ImmArraySeq(), Record(ImmArraySeq()))) val typeDecls = Map(bazQuux -> record) val interface = new Interface(PackageId.assertFromString("pkgid"), typeDecls) val tree = InterfaceTree.fromInterface(interface) val result = tree.bfs(ArrayBuffer.empty[InterfaceType])((types, n) => n match { case _: ModuleWithContext => types case TypeWithContext(_, _, _, _, tpe) => types ++= tpe.typ.toList }) result.toList shouldBe List(record) } }
Example 6
Source File: SpearmanCorrelation.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.stat.correlation import scala.collection.mutable.ArrayBuffer import org.apache.spark.internal.Logging import org.apache.spark.mllib.linalg.{Matrix, Vector, Vectors} import org.apache.spark.rdd.RDD override def computeCorrelationMatrix(X: RDD[Vector]): Matrix = { // ((columnIndex, value), rowUid) val colBased = X.zipWithUniqueId().flatMap { case (vec, uid) => vec.toArray.view.zipWithIndex.map { case (v, j) => ((j, v), uid) } } // global sort by (columnIndex, value) val sorted = colBased.sortByKey() // assign global ranks (using average ranks for tied values) val globalRanks = sorted.zipWithIndex().mapPartitions { iter => var preCol = -1 var preVal = Double.NaN var startRank = -1.0 var cachedUids = ArrayBuffer.empty[Long] val flush: () => Iterable[(Long, (Int, Double))] = () => { val averageRank = startRank + (cachedUids.size - 1) / 2.0 val output = cachedUids.map { uid => (uid, (preCol, averageRank)) } cachedUids.clear() output } iter.flatMap { case (((j, v), uid), rank) => // If we see a new value or cachedUids is too big, we flush ids with their average rank. if (j != preCol || v != preVal || cachedUids.size >= 10000000) { val output = flush() preCol = j preVal = v startRank = rank cachedUids += uid output } else { cachedUids += uid Iterator.empty } } ++ flush() } // Replace values in the input matrix by their ranks compared with values in the same column. // Note that shifting all ranks in a column by a constant value doesn't affect result. val groupedRanks = globalRanks.groupByKey().map { case (uid, iter) => // sort by column index and then convert values to a vector Vectors.dense(iter.toSeq.sortBy(_._1).map(_._2).toArray) } PearsonCorrelation.computeCorrelationMatrix(groupedRanks) } }
Example 7
Source File: KPLBasedKinesisTestUtils.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kinesis import java.nio.ByteBuffer import java.nio.charset.StandardCharsets import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import com.amazonaws.services.kinesis.producer.{KinesisProducer => KPLProducer, KinesisProducerConfiguration, UserRecordResult} import com.google.common.util.concurrent.{FutureCallback, Futures} private[kinesis] class KPLBasedKinesisTestUtils extends KinesisTestUtils { override protected def getProducer(aggregate: Boolean): KinesisDataGenerator = { if (!aggregate) { new SimpleDataGenerator(kinesisClient) } else { new KPLDataGenerator(regionName) } } } private[kinesis] class KPLDataGenerator(regionName: String) extends KinesisDataGenerator { private lazy val producer: KPLProducer = { val conf = new KinesisProducerConfiguration() .setRecordMaxBufferedTime(1000) .setMaxConnections(1) .setRegion(regionName) .setMetricsLevel("none") new KPLProducer(conf) } override def sendData(streamName: String, data: Seq[Int]): Map[String, Seq[(Int, String)]] = { val shardIdToSeqNumbers = new mutable.HashMap[String, ArrayBuffer[(Int, String)]]() data.foreach { num => val str = num.toString val data = ByteBuffer.wrap(str.getBytes(StandardCharsets.UTF_8)) val future = producer.addUserRecord(streamName, str, data) val kinesisCallBack = new FutureCallback[UserRecordResult]() { override def onFailure(t: Throwable): Unit = {} // do nothing override def onSuccess(result: UserRecordResult): Unit = { val shardId = result.getShardId val seqNumber = result.getSequenceNumber() val sentSeqNumbers = shardIdToSeqNumbers.getOrElseUpdate(shardId, new ArrayBuffer[(Int, String)]()) sentSeqNumbers += ((num, seqNumber)) } } Futures.addCallback(future, kinesisCallBack) } producer.flushSync() shardIdToSeqNumbers.toMap } }
Example 8
Source File: Exchange.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.exchange import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import org.apache.spark.broadcast import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.{LeafExecNode, SparkPlan, UnaryExecNode} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StructType case class ReuseExchange(conf: SQLConf) extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.exchangeReuseEnabled) { return plan } // Build a hash map using schema of exchanges to avoid O(N*N) sameResult calls. val exchanges = mutable.HashMap[StructType, ArrayBuffer[Exchange]]() plan.transformUp { case exchange: Exchange => // the exchanges that have same results usually also have same schemas (same column names). val sameSchema = exchanges.getOrElseUpdate(exchange.schema, ArrayBuffer[Exchange]()) val samePlan = sameSchema.find { e => exchange.sameResult(e) } if (samePlan.isDefined) { // Keep the output of this exchange, the following plans require that to resolve // attributes. ReusedExchangeExec(exchange.output, samePlan.get) } else { sameSchema += exchange exchange } } } }
Example 9
Source File: subquery.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.{expressions, InternalRow} import org.apache.spark.sql.catalyst.expressions.{Expression, ExprId, InSet, Literal, PlanExpression} import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{BooleanType, DataType, StructType} case class ReuseSubquery(conf: SQLConf) extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.exchangeReuseEnabled) { return plan } // Build a hash map using schema of exchanges to avoid O(N*N) sameResult calls. val subqueries = mutable.HashMap[StructType, ArrayBuffer[SubqueryExec]]() plan transformAllExpressions { case sub: ExecSubqueryExpression => val sameSchema = subqueries.getOrElseUpdate(sub.plan.schema, ArrayBuffer[SubqueryExec]()) val sameResult = sameSchema.find(_.sameResult(sub.plan)) if (sameResult.isDefined) { sub.withNewPlan(sameResult.get) } else { sameSchema += sub.plan sub } } } }
Example 10
Source File: ApplicationMasterArguments.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import scala.collection.mutable.ArrayBuffer import org.apache.spark.util.{IntParam, MemoryParam} class ApplicationMasterArguments(val args: Array[String]) { var userJar: String = null var userClass: String = null var primaryPyFile: String = null var primaryRFile: String = null var userArgs: Seq[String] = Nil var propertiesFile: String = null parseArgs(args.toList) private def parseArgs(inputArgs: List[String]): Unit = { val userArgsBuffer = new ArrayBuffer[String]() var args = inputArgs while (!args.isEmpty) { // --num-workers, --worker-memory, and --worker-cores are deprecated since 1.0, // the properties with executor in their names are preferred. args match { case ("--jar") :: value :: tail => userJar = value args = tail case ("--class") :: value :: tail => userClass = value args = tail case ("--primary-py-file") :: value :: tail => primaryPyFile = value args = tail case ("--primary-r-file") :: value :: tail => primaryRFile = value args = tail case ("--arg") :: value :: tail => userArgsBuffer += value args = tail case ("--properties-file") :: value :: tail => propertiesFile = value args = tail case _ => printUsageAndExit(1, args) } } if (primaryPyFile != null && primaryRFile != null) { // scalastyle:off println System.err.println("Cannot have primary-py-file and primary-r-file at the same time") // scalastyle:on println System.exit(-1) } userArgs = userArgsBuffer.toList } def printUsageAndExit(exitCode: Int, unknownParam: Any = null) { // scalastyle:off println if (unknownParam != null) { System.err.println("Unknown/unsupported param " + unknownParam) } System.err.println(""" |Usage: org.apache.spark.deploy.yarn.ApplicationMaster [options] |Options: | --jar JAR_PATH Path to your application's JAR file | --class CLASS_NAME Name of your application's main class | --primary-py-file A main Python file | --primary-r-file A main R file | --arg ARG Argument to be passed to your application's main class. | Multiple invocations are possible, each will be passed in order. | --properties-file FILE Path to a custom Spark properties file. """.stripMargin) // scalastyle:on println System.exit(exitCode) } } object ApplicationMasterArguments { val DEFAULT_NUMBER_EXECUTORS = 2 }
Example 11
Source File: ClientArguments.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import scala.collection.mutable.ArrayBuffer // TODO: Add code and support for ensuring that yarn resource 'tasks' are location aware ! private[spark] class ClientArguments(args: Array[String]) { var userJar: String = null var userClass: String = null var primaryPyFile: String = null var primaryRFile: String = null var userArgs: ArrayBuffer[String] = new ArrayBuffer[String]() parseArgs(args.toList) private def parseArgs(inputArgs: List[String]): Unit = { var args = inputArgs while (!args.isEmpty) { args match { case ("--jar") :: value :: tail => userJar = value args = tail case ("--class") :: value :: tail => userClass = value args = tail case ("--primary-py-file") :: value :: tail => primaryPyFile = value args = tail case ("--primary-r-file") :: value :: tail => primaryRFile = value args = tail case ("--arg") :: value :: tail => userArgs += value args = tail case Nil => case _ => throw new IllegalArgumentException(getUsageMessage(args)) } } if (primaryPyFile != null && primaryRFile != null) { throw new IllegalArgumentException("Cannot have primary-py-file and primary-r-file" + " at the same time") } } private def getUsageMessage(unknownParam: List[String] = null): String = { val message = if (unknownParam != null) s"Unknown/unsupported param $unknownParam\n" else "" message + s""" |Usage: org.apache.spark.deploy.yarn.Client [options] |Options: | --jar JAR_PATH Path to your application's JAR file (required in yarn-cluster | mode) | --class CLASS_NAME Name of your application's main class (required) | --primary-py-file A main Python file | --primary-r-file A main R file | --arg ARG Argument to be passed to your application's main class. | Multiple invocations are possible, each will be passed in order. """.stripMargin } }
Example 12
Source File: YarnClientSchedulerBackend.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler.cluster import scala.collection.mutable.ArrayBuffer import org.apache.hadoop.yarn.api.records.YarnApplicationState import org.apache.spark.{SparkContext, SparkException} import org.apache.spark.deploy.yarn.{Client, ClientArguments, YarnSparkHadoopUtil} import org.apache.spark.internal.Logging import org.apache.spark.launcher.SparkAppHandle import org.apache.spark.scheduler.TaskSchedulerImpl private[spark] class YarnClientSchedulerBackend( scheduler: TaskSchedulerImpl, sc: SparkContext) extends YarnSchedulerBackend(scheduler, sc) with Logging { private var client: Client = null private var monitorThread: MonitorThread = null override def stop() { assert(client != null, "Attempted to stop this scheduler before starting it!") if (monitorThread != null) { monitorThread.stopMonitor() } // Report a final state to the launcher if one is connected. This is needed since in client // mode this backend doesn't let the app monitor loop run to completion, so it does not report // the final state itself. // // Note: there's not enough information at this point to provide a better final state, // so assume the application was successful. client.reportLauncherState(SparkAppHandle.State.FINISHED) super.stop() YarnSparkHadoopUtil.get.stopCredentialUpdater() client.stop() logInfo("Stopped") } }
Example 13
Source File: UnionDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.SparkException import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class UnionDStream[T: ClassTag](parents: Array[DStream[T]]) extends DStream[T](parents.head.ssc) { require(parents.length > 0, "List of DStreams to union is empty") require(parents.map(_.ssc).distinct.length == 1, "Some of the DStreams have different contexts") require(parents.map(_.slideDuration).distinct.length == 1, "Some of the DStreams have different slide durations") override def dependencies: List[DStream[_]] = parents.toList override def slideDuration: Duration = parents.head.slideDuration override def compute(validTime: Time): Option[RDD[T]] = { val rdds = new ArrayBuffer[RDD[T]]() parents.map(_.getOrCompute(validTime)).foreach { case Some(rdd) => rdds += rdd case None => throw new SparkException("Could not generate RDD from a parent for unifying at" + s" time $validTime") } if (rdds.nonEmpty) { Some(ssc.sc.union(rdds)) } else { None } } }
Example 14
Source File: QueueInputDStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{NotSerializableException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.{StreamingContext, Time} private[streaming] class QueueInputDStream[T: ClassTag]( ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { throw new NotSerializableException("queueStream doesn't support checkpointing. " + "Please don't use queueStream when checkpointing is enabled.") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() queue.synchronized { if (oneAtATime && queue.nonEmpty) { buffer += queue.dequeue() } else { buffer ++= queue queue.clear() } } if (buffer.nonEmpty) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(context.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { Some(ssc.sparkContext.emptyRDD) } } }
Example 15
Source File: LocalSparkCluster.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy import scala.collection.mutable.ArrayBuffer import org.apache.spark.SparkConf import org.apache.spark.deploy.master.Master import org.apache.spark.deploy.worker.Worker import org.apache.spark.internal.Logging import org.apache.spark.rpc.RpcEnv import org.apache.spark.util.Utils for (workerNum <- 1 to numWorkers) { val workerEnv = Worker.startRpcEnvAndEndpoint(localHostname, 0, 0, coresPerWorker, memoryPerWorker, masters, null, Some(workerNum), _conf) workerRpcEnvs += workerEnv } masters } def stop() { logInfo("Shutting down local Spark cluster.") // Stop the workers before the master so they don't get upset that it disconnected workerRpcEnvs.foreach(_.shutdown()) masterRpcEnvs.foreach(_.shutdown()) workerRpcEnvs.foreach(_.awaitTermination()) masterRpcEnvs.foreach(_.awaitTermination()) masterRpcEnvs.clear() workerRpcEnvs.clear() } }
Example 16
Source File: TaskResult.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io._ import java.nio.ByteBuffer import scala.collection.mutable.ArrayBuffer import org.apache.spark.SparkEnv import org.apache.spark.serializer.SerializerInstance import org.apache.spark.storage.BlockId import org.apache.spark.util.{AccumulatorV2, Utils} // Task result. Also contains updates to accumulator variables. private[spark] sealed trait TaskResult[T] def value(resultSer: SerializerInstance = null): T = { if (valueObjectDeserialized) { valueObject } else { // This should not run when holding a lock because it may cost dozens of seconds for a large // value val ser = if (resultSer == null) SparkEnv.get.serializer.newInstance() else resultSer valueObject = ser.deserialize(valueBytes) valueObjectDeserialized = true valueObject } } }
Example 17
Source File: Schedulable.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.util.concurrent.ConcurrentLinkedQueue import scala.collection.mutable.ArrayBuffer import org.apache.spark.scheduler.SchedulingMode.SchedulingMode private[spark] trait Schedulable { var parent: Pool // child queues def schedulableQueue: ConcurrentLinkedQueue[Schedulable] def schedulingMode: SchedulingMode def weight: Int def minShare: Int def runningTasks: Int def priority: Int def stageId: Int def name: String def addSchedulable(schedulable: Schedulable): Unit def removeSchedulable(schedulable: Schedulable): Unit def getSchedulableByName(name: String): Schedulable def executorLost(executorId: String, host: String, reason: ExecutorLossReason): Unit def checkSpeculatableTasks(minTimeToSpeculation: Int): Boolean def getSortedTaskSetQueue: ArrayBuffer[TaskSetManager] }
Example 18
Source File: ChunkedByteBufferOutputStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.util.io import java.io.OutputStream import java.nio.ByteBuffer import scala.collection.mutable.ArrayBuffer import org.apache.spark.storage.StorageUtils private[this] var position = chunkSize private[this] var _size = 0 private[this] var closed: Boolean = false def size: Long = _size override def close(): Unit = { if (!closed) { super.close() closed = true } } override def write(b: Int): Unit = { require(!closed, "cannot write to a closed ChunkedByteBufferOutputStream") allocateNewChunkIfNeeded() chunks(lastChunkIndex).put(b.toByte) position += 1 _size += 1 } override def write(bytes: Array[Byte], off: Int, len: Int): Unit = { require(!closed, "cannot write to a closed ChunkedByteBufferOutputStream") var written = 0 while (written < len) { allocateNewChunkIfNeeded() val thisBatch = math.min(chunkSize - position, len - written) chunks(lastChunkIndex).put(bytes, written + off, thisBatch) written += thisBatch position += thisBatch } _size += len } @inline private def allocateNewChunkIfNeeded(): Unit = { if (position == chunkSize) { chunks += allocator(chunkSize) lastChunkIndex += 1 position = 0 } } def toChunkedByteBuffer: ChunkedByteBuffer = { require(closed, "cannot call toChunkedByteBuffer() unless close() has been called") require(!toChunkedByteBufferWasCalled, "toChunkedByteBuffer() can only be called once") toChunkedByteBufferWasCalled = true if (lastChunkIndex == -1) { new ChunkedByteBuffer(Array.empty[ByteBuffer]) } else { // Copy the first n-1 chunks to the output, and then create an array that fits the last chunk. // An alternative would have been returning an array of ByteBuffers, with the last buffer // bounded to only the last chunk's position. However, given our use case in Spark (to put // the chunks in block manager), only limiting the view bound of the buffer would still // require the block manager to store the whole chunk. val ret = new Array[ByteBuffer](chunks.size) for (i <- 0 until chunks.size - 1) { ret(i) = chunks(i) ret(i).flip() } if (position == chunkSize) { ret(lastChunkIndex) = chunks(lastChunkIndex) ret(lastChunkIndex).flip() } else { ret(lastChunkIndex) = allocator(position) chunks(lastChunkIndex).flip() ret(lastChunkIndex).put(chunks(lastChunkIndex)) ret(lastChunkIndex).flip() StorageUtils.dispose(chunks(lastChunkIndex)) } new ChunkedByteBuffer(ret) } } }
Example 19
Source File: SubtractedRDD.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.util.{HashMap => JHashMap} import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.Dependency import org.apache.spark.OneToOneDependency import org.apache.spark.Partition import org.apache.spark.Partitioner import org.apache.spark.ShuffleDependency import org.apache.spark.SparkEnv import org.apache.spark.TaskContext private[spark] class SubtractedRDD[K: ClassTag, V: ClassTag, W: ClassTag]( @transient var rdd1: RDD[_ <: Product2[K, V]], @transient var rdd2: RDD[_ <: Product2[K, W]], part: Partitioner) extends RDD[(K, V)](rdd1.context, Nil) { override def getDependencies: Seq[Dependency[_]] = { def rddDependency[T1: ClassTag, T2: ClassTag](rdd: RDD[_ <: Product2[T1, T2]]) : Dependency[_] = { if (rdd.partitioner == Some(part)) { logDebug("Adding one-to-one dependency with " + rdd) new OneToOneDependency(rdd) } else { logDebug("Adding shuffle dependency with " + rdd) new ShuffleDependency[T1, T2, Any](rdd, part) } } Seq(rddDependency[K, V](rdd1), rddDependency[K, W](rdd2)) } override def getPartitions: Array[Partition] = { val array = new Array[Partition](part.numPartitions) for (i <- 0 until array.length) { // Each CoGroupPartition will depend on rdd1 and rdd2 array(i) = new CoGroupPartition(i, Seq(rdd1, rdd2).zipWithIndex.map { case (rdd, j) => dependencies(j) match { case s: ShuffleDependency[_, _, _] => None case _ => Some(new NarrowCoGroupSplitDep(rdd, i, rdd.partitions(i))) } }.toArray) } array } override val partitioner = Some(part) override def compute(p: Partition, context: TaskContext): Iterator[(K, V)] = { val partition = p.asInstanceOf[CoGroupPartition] val map = new JHashMap[K, ArrayBuffer[V]] def getSeq(k: K): ArrayBuffer[V] = { val seq = map.get(k) if (seq != null) { seq } else { val seq = new ArrayBuffer[V]() map.put(k, seq) seq } } def integrate(depNum: Int, op: Product2[K, V] => Unit): Unit = { dependencies(depNum) match { case oneToOneDependency: OneToOneDependency[_] => val dependencyPartition = partition.narrowDeps(depNum).get.split oneToOneDependency.rdd.iterator(dependencyPartition, context) .asInstanceOf[Iterator[Product2[K, V]]].foreach(op) case shuffleDependency: ShuffleDependency[_, _, _] => val iter = SparkEnv.get.shuffleManager .getReader( shuffleDependency.shuffleHandle, partition.index, partition.index + 1, context) .read() iter.foreach(op) } } // the first dep is rdd1; add all values to the map integrate(0, t => getSeq(t._1) += t._2) // the second dep is rdd2; remove all of its keys integrate(1, t => map.remove(t._1)) map.asScala.iterator.map(t => t._2.iterator.map((t._1, _))).flatten } override def clearDependencies() { super.clearDependencies() rdd1 = null rdd2 = null } }
Example 20
Source File: UnionRDD.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer import scala.collection.parallel.{ForkJoinTaskSupport, ThreadPoolTaskSupport} import scala.concurrent.forkjoin.ForkJoinPool import scala.reflect.ClassTag import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils private[spark] class UnionPartition[T: ClassTag]( idx: Int, @transient private val rdd: RDD[T], val parentRddIndex: Int, @transient private val parentRddPartitionIndex: Int) extends Partition { var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex) def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization parentPartition = rdd.partitions(parentRddPartitionIndex) oos.defaultWriteObject() } } object UnionRDD { private[spark] lazy val partitionEvalTaskSupport = new ForkJoinTaskSupport(new ForkJoinPool(8)) } @DeveloperApi class UnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]]) extends RDD[T](sc, Nil) { // Nil since we implement getDependencies // visible for testing private[spark] val isPartitionListingParallel: Boolean = rdds.length > conf.getInt("spark.rdd.parallelListingThreshold", 10) override def getPartitions: Array[Partition] = { val parRDDs = if (isPartitionListingParallel) { val parArray = rdds.par parArray.tasksupport = UnionRDD.partitionEvalTaskSupport parArray } else { rdds } val array = new Array[Partition](parRDDs.map(_.partitions.length).seq.sum) var pos = 0 for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) { array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index) pos += 1 } array } override def getDependencies: Seq[Dependency[_]] = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 for (rdd <- rdds) { deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length) pos += rdd.partitions.length } deps } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val part = s.asInstanceOf[UnionPartition[T]] parent[T](part.parentRddIndex).iterator(part.parentPartition, context) } override def getPreferredLocations(s: Partition): Seq[String] = s.asInstanceOf[UnionPartition[T]].preferredLocations() override def clearDependencies() { super.clearDependencies() rdds = null } }
Example 21
Source File: TaskContextImpl.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark import java.util.Properties import scala.collection.mutable.ArrayBuffer import org.apache.spark.executor.TaskMetrics import org.apache.spark.internal.Logging import org.apache.spark.memory.TaskMemoryManager import org.apache.spark.metrics.MetricsSystem import org.apache.spark.metrics.source.Source import org.apache.spark.util._ private[spark] class TaskContextImpl( val stageId: Int, val partitionId: Int, override val taskAttemptId: Long, override val attemptNumber: Int, var _taskMemoryManager: TaskMemoryManager, localProperties: Properties, @transient private val metricsSystem: MetricsSystem, // The default value is only used in tests. override val taskMetrics: TaskMetrics = TaskMetrics.empty, var batchId: Int = 0) extends TaskContext with Logging { private[spark] def markInterrupted(): Unit = { interrupted = true } override def isCompleted(): Boolean = completed override def isRunningLocally(): Boolean = false override def isInterrupted(): Boolean = interrupted override def getLocalProperty(key: String): String = localProperties.getProperty(key) override def getMetricsSources(sourceName: String): Seq[Source] = metricsSystem.getSourcesByName(sourceName) private[spark] override def registerAccumulator(a: AccumulatorV2[_, _]): Unit = { taskMetrics.registerAccumulator(a) } }
Example 22
Source File: TimeStampedHashMapSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.util import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.util.Random import org.apache.spark.SparkFunSuite class TimeStampedHashMapSuite extends SparkFunSuite { // Test the testMap function - a Scala HashMap should obviously pass testMap(new mutable.HashMap[String, String]()) // Test TimeStampedHashMap basic functionality testMap(new TimeStampedHashMap[String, String]()) testMapThreadSafety(new TimeStampedHashMap[String, String]()) test("TimeStampedHashMap - clearing by timestamp") { // clearing by insertion time val map = new TimeStampedHashMap[String, String](updateTimeStampOnGet = false) map("k1") = "v1" assert(map("k1") === "v1") Thread.sleep(10) val threshTime = System.currentTimeMillis assert(map.getTimestamp("k1").isDefined) assert(map.getTimestamp("k1").get < threshTime) map.clearOldValues(threshTime) assert(map.get("k1") === None) // clearing by modification time val map1 = new TimeStampedHashMap[String, String](updateTimeStampOnGet = true) map1("k1") = "v1" map1("k2") = "v2" assert(map1("k1") === "v1") Thread.sleep(10) val threshTime1 = System.currentTimeMillis Thread.sleep(10) assert(map1("k2") === "v2") // access k2 to update its access time to > threshTime assert(map1.getTimestamp("k1").isDefined) assert(map1.getTimestamp("k1").get < threshTime1) assert(map1.getTimestamp("k2").isDefined) assert(map1.getTimestamp("k2").get >= threshTime1) map1.clearOldValues(threshTime1) // should only clear k1 assert(map1.get("k1") === None) assert(map1.get("k2").isDefined) } def testMapThreadSafety(hashMapConstructor: => mutable.Map[String, String]) { def newMap() = hashMapConstructor val name = newMap().getClass.getSimpleName val testMap = newMap() @volatile var error = false def getRandomKey(m: mutable.Map[String, String]): Option[String] = { val keys = testMap.keysIterator.toSeq if (keys.nonEmpty) { Some(keys(Random.nextInt(keys.size))) } else { None } } val threads = (1 to 25).map(i => new Thread() { override def run() { try { for (j <- 1 to 1000) { Random.nextInt(3) match { case 0 => testMap(Random.nextString(10)) = Random.nextDouble().toString // put case 1 => getRandomKey(testMap).map(testMap.get) // get case 2 => getRandomKey(testMap).map(testMap.remove) // remove } } } catch { case t: Throwable => error = true throw t } } }) test(name + " - threading safety test") { threads.foreach(_.start()) threads.foreach(_.join()) assert(!error) } } }
Example 23
Source File: Predict.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.example.lenetLocal import com.intel.analytics.bigdl.dataset.image.{BytesToGreyImg, GreyImgNormalizer, GreyImgToSample} import com.intel.analytics.bigdl.nn.Module import com.intel.analytics.bigdl.utils.Engine import com.intel.analytics.bigdl.dataset.Sample import com.intel.analytics.bigdl.optim.LocalPredictor import org.apache.log4j.{Level, Logger} import scala.collection.mutable.ArrayBuffer object Predict { Logger.getLogger("org").setLevel(Level.ERROR) Logger.getLogger("akka").setLevel(Level.ERROR) Logger.getLogger("breeze").setLevel(Level.ERROR) import Utils._ def main(args: Array[String]): Unit = { predictParser.parse(args, new PredictParams()).foreach { param => System.setProperty("bigdl.localMode", "true") System.setProperty("bigdl.coreNumber", (param.coreNumber.toString)) Engine.init val validationData = param.folder + "/t10k-images-idx3-ubyte" val validationLabel = param.folder + "/t10k-labels-idx1-ubyte" val rawData = load(validationData, validationLabel) val iter = rawData.iterator val sampleIter = GreyImgToSample()( GreyImgNormalizer(trainMean, trainStd)( BytesToGreyImg(28, 28)(iter))) var samplesBuffer = ArrayBuffer[Sample[Float]]() while (sampleIter.hasNext) { val elem = sampleIter.next().clone() samplesBuffer += elem } val samples = samplesBuffer.toArray val model = Module.load[Float](param.model) val localPredictor = LocalPredictor(model) val result = localPredictor.predict(samples) val result_class = localPredictor.predictClass(samples) result_class.foreach(r => println(s"${r}")) } } }
Example 24
Source File: BatchSampler.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.transform.vision.image.label.roi import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.transform.vision.image.util.{BboxUtil, BoundingBox} import com.intel.analytics.bigdl.utils.RandomGenerator._ import scala.collection.mutable.ArrayBuffer def generateBatchSamples(label: RoiLabel, batchSamplers: Array[BatchSampler], sampledBoxes: ArrayBuffer[BoundingBox]): Unit = { sampledBoxes.clear() var i = 0 val unitBox = BoundingBox(0, 0, 1, 1) while (i < batchSamplers.length) { batchSamplers(i).sample(unitBox, label, sampledBoxes) i += 1 } } }
Example 25
Source File: RandomSampler.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.transform.vision.image.label.roi import com.intel.analytics.bigdl.transform.vision.image.{FeatureTransformer, ImageFeature} import com.intel.analytics.bigdl.transform.vision.image.augmentation.Crop import com.intel.analytics.bigdl.transform.vision.image.util.{BoundingBox} import com.intel.analytics.bigdl.utils.RandomGenerator._ import org.opencv.core.Mat import scala.collection.mutable.ArrayBuffer class RandomSampler extends Crop { // random cropping samplers val batchSamplers = Array( new BatchSampler(maxTrials = 1), new BatchSampler(minScale = 0.3, minAspectRatio = 0.5, maxAspectRatio = 2, minOverlap = Some(0.1)), new BatchSampler(minScale = 0.3, minAspectRatio = 0.5, maxAspectRatio = 2, minOverlap = Some(0.3)), new BatchSampler(minScale = 0.3, minAspectRatio = 0.5, maxAspectRatio = 2, minOverlap = Some(0.5)), new BatchSampler(minScale = 0.3, minAspectRatio = 0.5, maxAspectRatio = 2, minOverlap = Some(0.7)), new BatchSampler(minScale = 0.3, minAspectRatio = 0.5, maxAspectRatio = 2, minOverlap = Some(0.9)), new BatchSampler(minScale = 0.3, minAspectRatio = 0.5, maxAspectRatio = 2, maxOverlap = Some(1.0))) def generateRoi(feature: ImageFeature): BoundingBox = { val roiLabel = feature(ImageFeature.label).asInstanceOf[RoiLabel] val boxesBuffer = new ArrayBuffer[BoundingBox]() BatchSampler.generateBatchSamples(roiLabel, batchSamplers, boxesBuffer) // randomly pick up one as input data if (boxesBuffer.nonEmpty) { // Randomly pick a sampled bbox and crop the expand_datum. val index = (RNG.uniform(0, 1) * boxesBuffer.length).toInt boxesBuffer(index) } else { BoundingBox(0, 0, 1, 1) } } } object RandomSampler { def apply(): FeatureTransformer = { new RandomSampler() -> RoiProject() } }
Example 26
Source File: RoiTransformer.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.transform.vision.image.label.roi import com.intel.analytics.bigdl.transform.vision.image.util.{BboxUtil, BoundingBox} import com.intel.analytics.bigdl.transform.vision.image.{FeatureTransformer, ImageFeature} import scala.collection.mutable.ArrayBuffer case class RoiProject(needMeetCenterConstraint: Boolean = true) extends FeatureTransformer { val transformedAnnot = new ArrayBuffer[BoundingBox]() override def transformMat(feature: ImageFeature): Unit = { val imageBoundary = feature[BoundingBox](ImageFeature.boundingBox) if (!imageBoundary.normalized) { imageBoundary.scaleBox(1.0f / feature.getHeight(), 1f / feature.getWidth(), imageBoundary) } val target = feature[RoiLabel](ImageFeature.label) transformedAnnot.clear() // Transform the annotation according to bounding box. var i = 1 while (i <= target.size()) { val gtBoxes = BoundingBox(target.bboxes.valueAt(i, 1), target.bboxes.valueAt(i, 2), target.bboxes.valueAt(i, 3), target.bboxes.valueAt(i, 4)) if (!needMeetCenterConstraint || imageBoundary.meetEmitCenterConstraint(gtBoxes)) { val transformedBox = new BoundingBox() if (imageBoundary.projectBbox(gtBoxes, transformedBox)) { transformedBox.setLabel(target.classes.valueAt(1, i)) transformedBox.setDifficult(target.classes.valueAt(2, i)) transformedAnnot.append(transformedBox) } } i += 1 } // write the transformed annotation back to target target.bboxes.resize(transformedAnnot.length, 4) target.classes.resize(2, transformedAnnot.length) i = 1 while (i <= transformedAnnot.length) { target.bboxes.setValue(i, 1, transformedAnnot(i - 1).x1) target.bboxes.setValue(i, 2, transformedAnnot(i - 1).y1) target.bboxes.setValue(i, 3, transformedAnnot(i - 1).x2) target.bboxes.setValue(i, 4, transformedAnnot(i - 1).y2) target.classes.setValue(1, i, transformedAnnot(i - 1).label) target.classes.setValue(2, i, transformedAnnot(i - 1).difficult) i += 1 } } }
Example 27
Source File: Mean.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.utils.tf.loaders import java.nio.ByteOrder import com.intel.analytics.bigdl.Module import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity} import com.intel.analytics.bigdl.nn.Sequential import com.intel.analytics.bigdl.nn.tf.Mean import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.utils.tf.Context import org.tensorflow.framework.{DataType, NodeDef} import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag class Mean extends TensorflowOpsLoader { import Utils._ override def build[T: ClassTag](nodeDef: NodeDef, byteOrder: ByteOrder , context: Context[T])(implicit ev: TensorNumeric[T]): Module[T] = { val attr = nodeDef.getAttrMap val dataType = getType(attr, "T") val squeeze = !getBoolean(attr, "keep_dims") val dt = dataType match { case DataType.DT_INT8 => "Int" case DataType.DT_INT16 => "Int" case DataType.DT_UINT8 => "Int" case DataType.DT_UINT16 => "Int" case DataType.DT_INT32 => "Int" case DataType.DT_INT64 => "Long" case DataType.DT_FLOAT => "Float" case DataType.DT_DOUBLE => "Double" case _ => throw new UnsupportedOperationException("Data Type: " + dataType + " is not Unsupported yet.") } new MeanLoadTF[T](dt, squeeze) } } class MeanLoadTF[T: ClassTag](val dataType: String, val squeeze: Boolean)(implicit ev: TensorNumeric[T]) extends Adapter[T](Array(2)) { override def build(tensorArrays: Array[Tensor[_]]): AbstractModule[Activity, Activity, T] = { val dims = tensorArrays(0).asInstanceOf[Tensor[Int]] val dim = ArrayBuffer[Int]() val mean = Sequential[T]() for (i <- 1 to dims.size(1)) { dim += dims.valueAt(i) + 1 } dataType match { case "Int" => dim.foreach(i => mean.add(Mean[T, Int](i, squeeze = squeeze))) case "Long" => dim.foreach(i => mean.add(Mean[T, Long](i, squeeze = squeeze))) case "Float" => dim.foreach(i => mean.add(Mean[T, Float](i, squeeze = squeeze))) case "Double" => dim.foreach(i => mean.add(Mean[T, Double](i, squeeze = squeeze))) case _ => throw new UnsupportedOperationException("Data Type: " + dataType + " is not Unsupported yet.") } mean } }
Example 28
Source File: Transpose.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.utils.tf.loaders import java.nio.ByteOrder import com.intel.analytics.bigdl.Module import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity} import com.intel.analytics.bigdl.nn.{Contiguous, Sequential, Transpose => TransposeLayer} import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.utils.tf.Context import org.tensorflow.framework.NodeDef import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag class Transpose extends TensorflowOpsLoader { import Utils._ override def build[T: ClassTag](nodeDef: NodeDef, byteOrder: ByteOrder , context: Context[T])(implicit ev: TensorNumeric[T]): Module[T] = { new TransposeLoadTF[T]() } } object TransposeLoadTF { def permToPair(perm: Array[Int]): Array[(Int, Int)] = { val numToRank = perm.zipWithIndex.toMap val arr = perm.indices.toArray val pairs = ArrayBuffer[(Int, Int)]() def sort(arr: Array[Int], low: Int, high: Int): Unit = { var i = low var j = high val pivot = arr(low + (high - low)/2) while (i <= j) { while (arr(i) < pivot) i += 1 while (arr(j) > pivot) j -= 1 if (i <= j) { exchangeNumbers(arr, i, j) i += 1 j -= 1 } } if (low < j) sort(arr, low, j) if (i < high) sort(arr, i, high) } def exchangeNumbers(arr: Array[Int], i: Int, j: Int): Unit = { val temp = arr(i) arr(i) = arr(j) arr(j) = temp pairs += ((i, j)) } sort(arr.map(numToRank), 0, arr.length-1) pairs.filter(pair => pair._1 != pair._2).toArray } } class TransposeLoadTF[T: ClassTag]()(implicit ev: TensorNumeric[T]) extends Adapter[T](Array(2)) { import TransposeLoadTF._ override def build(tensorArrays: Array[Tensor[_]]): AbstractModule[Activity, Activity, T] = { val perm = tensorArrays(0).asInstanceOf[Tensor[Int]].storage().array() val paris = permToPair(perm) val layer = Sequential() layer.add(TransposeLayer[T](paris.map(x => (x._1 + 1, x._2 + 1)))) layer.add(Contiguous()) layer } }
Example 29
package com.intel.analytics.bigdl.utils.tf.loaders import java.nio.ByteOrder import com.intel.analytics.bigdl.Module import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity} import com.intel.analytics.bigdl.nn.{Padding, Sequential} import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.utils.tf.{Context, TFUtils} import org.tensorflow.framework.NodeDef import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag class Pad extends TensorflowOpsLoader { import Utils._ override def build[T: ClassTag](nodeDef: NodeDef, byteOrder: ByteOrder, context: Context[T])(implicit ev: TensorNumeric[T]): Module[T] = { new PadLoadTF[T]() } } class PadLoadTF[T: ClassTag]()(implicit ev: TensorNumeric[T]) extends Adapter[T](Array(2)) { override def build(tensorArrays: Array[Tensor[_]]): AbstractModule[Activity, Activity, T] = { val paddings = tensorArrays(0).asInstanceOf[Tensor[Int]] val pad = ArrayBuffer[Int]() val padding = Sequential[T]() for(dim <- 1 to paddings.size(1)) { if (paddings.valueAt(dim, 1) != 0 || paddings.valueAt(dim, 2) != 0 ) { if (paddings(Array(dim, 1)) != 0) { padding.add(Padding[T](dim, -paddings.valueAt(dim, 1), 4)) } if (paddings(Array(dim, 2)) != 0) { padding.add(Padding[T](dim, paddings.valueAt(dim, 2), 4)) } } } padding } }
Example 30
Source File: IRConverter.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.utils.intermediate import com.intel.analytics.bigdl.nn.Graph import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity} import com.intel.analytics.bigdl.nn.mkldnn._ import com.intel.analytics.bigdl.tensor.{FloatType, Tensor} import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.{Module, utils} import com.intel.analytics.bigdl.utils.{Engine, MklBlas, MklDnn, Node} import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag private[bigdl] class IRConverter[T: ClassTag](IRgraph: IRGraph[T])(implicit ev: TensorNumeric[T]) { private val allNodes = new ArrayBuffer[Node[IRElement[T]]] private val irInputs = IRgraph.inputs.toArray private val irOutputs = IRgraph.outputs.toArray init() private def init() : Unit = { getNodes(irInputs, allNodes) // reminder: some output nodes may not be searched from inputs irOutputs.foreach(node => { if (!allNodes.contains(node)) allNodes.append(node) }) } private def getNodes(inputs: Seq[Node[IRElement[T]]], nodesBuffer: ArrayBuffer[Node[IRElement[T]]]): Unit = { if (inputs.length == 0) return inputs.foreach(node => { if (!nodesBuffer.contains(node)) { nodesBuffer.append(node) getNodes(node.nextNodes, nodesBuffer) } }) } def toGraph() : Graph[T] = { if (utils.Engine.getEngineType() == MklBlas) { require(IRToBlas[T].convertingCheck(allNodes.toArray), "IR graph can not be converted to Blas layer") toBlasGraph() } else if (utils.Engine.getEngineType() == MklDnn) { require(ev.getType() == FloatType, "Mkldnn engine only supports float data") require(IRToDnn[Float].convertingCheck( allNodes.toArray.asInstanceOf[Array[Node[IRElement[Float]]]]), "IR graph can not be converted to Dnn layer") toDnnGraph() } else throw new UnsupportedOperationException( s"Only support engineType mkldnn/mklblas, but get ${Engine.getEngineType()}") } private def toDnnGraph(): Graph[T] = { val nodeMap = IRToDnn[Float].convert( allNodes.toArray.asInstanceOf[Array[Node[IRElement[Float]]]]) val inputs = irInputs.map( n => nodeMap.get(n.asInstanceOf[Node[IRElement[Float]]]).get) val outputs = irOutputs.map( n => nodeMap.get(n.asInstanceOf[Node[IRElement[Float]]]).get) // add input node for dnn graph val realInputs = inputs.map(n => { val node = new Node[Module[Float]](new InputWrapper()) n.from(node) node }) // add output node for graph val realOutputs = outputs.zipWithIndex.map { case (model: Node[Module[Float]], index: Int) => val node = if (model.element.isInstanceOf[BlasWrapper]) { model } else { model.add(new Node[Module[Float]](Output(IRgraph.outputFormats(index)))) } node } DnnGraph(realInputs, realOutputs, IRgraph.variables.asInstanceOf[Option[(Array[Tensor[Float]], Array[Tensor[Float]])]], IRgraph.generateBackward).asInstanceOf[Graph[T]] } private def toBlasGraph(): Graph[T] = { val nodeMap = IRToBlas[T].convert(allNodes.toArray) val inputs = irInputs.map(n => nodeMap.get(n).get) val outputs = irOutputs.map(n => nodeMap.get(n).get) Graph.dynamic(inputs, outputs, IRgraph.variables, IRgraph.generateBackward) } }
Example 31
Source File: FileReader.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.visualization.tensorboard import java.io.{BufferedInputStream} import java.nio.ByteBuffer import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.tensorflow.util.Event import scala.collection.mutable.ArrayBuffer import scala.util.matching.Regex private[bigdl] object FileReader { val fileNameRegex = """bigdl.tfevents.*""".r def readScalar(file: Path, tag: String, fs: FileSystem): Array[(Long, Float, Double)] = { require(fs.isFile(file), s"FileReader: ${file} should be a file") val bis = new BufferedInputStream(fs.open(file)) val longBuffer = new Array[Byte](8) val crcBuffer = new Array[Byte](4) val bf = new ArrayBuffer[(Long, Float, Double)] while (bis.read(longBuffer) > 0) { val l = ByteBuffer.wrap(longBuffer.reverse).getLong() bis.read(crcBuffer) // TODO: checksum // val crc1 = ByteBuffer.wrap(crcBuffer.reverse).getInt() val eventBuffer = new Array[Byte](l.toInt) bis.read(eventBuffer) val e = Event.parseFrom(eventBuffer) if (e.getSummary.getValueCount == 1 && tag.equals(e.getSummary.getValue(0).getTag())) { bf.append((e.getStep, e.getSummary.getValue(0).getSimpleValue, e.getWallTime)) } bis.read(crcBuffer) // val crc2 = ByteBuffer.wrap(crcBuffer.reverse).getInt() } bis.close() bf.toArray.sortWith(_._1 < _._1) } }
Example 32
Source File: Permute.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.nn.keras import com.intel.analytics.bigdl.nn.Transpose import com.intel.analytics.bigdl.nn.abstractnn.AbstractModule import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.utils.Shape import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag class Permute[T: ClassTag]( val dims: Array[Int], val inputShape: Shape = null)(implicit ev: TensorNumeric[T]) extends KerasLayer[Tensor[T], Tensor[T], T](KerasLayer.addBatch(inputShape)) { private def permToPair(perm: Array[Int]): Array[(Int, Int)] = { val numToRank = perm.zipWithIndex.toMap val arr = perm.indices.toArray val pairs = ArrayBuffer[(Int, Int)]() def sort(arr: Array[Int], low: Int, high: Int): Unit = { var i = low var j = high val pivot = arr(low + (high - low)/2) while (i <= j) { while (arr(i) < pivot) i += 1 while (arr(j) > pivot) j -= 1 if (i <= j) { exchangeNumbers(arr, i, j) i += 1 j -= 1 } } if (low < j) sort(arr, low, j) if (i < high) sort(arr, i, high) } def exchangeNumbers(arr: Array[Int], i: Int, j: Int): Unit = { val temp = arr(i) arr(i) = arr(j) arr(j) = temp pairs += ((i, j)) } sort(arr.map(numToRank), 0, arr.length-1) pairs.filter(pair => pair._1 != pair._2).toArray } override def computeOutputShape(inputShape: Shape): Shape = { val input = inputShape.toSingle().toArray val outputShape = input.clone() var i = 0 while (i < dims.length) { outputShape(i + 1) = input(dims(i)) i += 1 } Shape(outputShape) } override def doBuild(inputShape: Shape): AbstractModule[Tensor[T], Tensor[T], T] = { val swaps = permToPair(dims.map(x => x - 1)).map(pair => (pair._1 + 2, pair._2 + 2)) val layer = Transpose(swaps) layer.asInstanceOf[AbstractModule[Tensor[T], Tensor[T], T]] } } object Permute { def apply[@specialized(Float, Double) T: ClassTag]( dims: Array[Int], inputShape: Shape = null)(implicit ev: TensorNumeric[T]): Permute[T] = { new Permute[T](dims, inputShape) } }
Example 33
Source File: FrameManager.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.nn import java.util.concurrent.atomic.AtomicInteger import com.intel.analytics.bigdl.nn.Graph.ModuleNode import com.intel.analytics.bigdl.nn.tf.{Exit, MergeOps, NextIteration} import scala.collection.mutable import scala.collection.mutable.ArrayBuffer class Frame[T] private[FrameManager] ( val name: String, val parent: Option[Frame[T]] ) { // Sync all next iteration nodes execution private[bigdl] var barrier: AtomicInteger = new AtomicInteger(0) // User can use NextIteration to sync execution. This is a list of those type of nodes private[bigdl] val waitingNodes: ArrayBuffer[ModuleNode[T]] = new ArrayBuffer[ModuleNode[T]]() // Nodes should be refreshed in a iteration of the frame private[bigdl] val nodes: ArrayBuffer[ModuleNode[T]] = new ArrayBuffer[ModuleNode[T]]() } }
Example 34
Source File: TimeDistributedCriterion.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.nn import com.intel.analytics.bigdl.nn.abstractnn.TensorCriterion import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.utils.Engine import scala.collection.mutable.ArrayBuffer import scala.concurrent.Future import scala.reflect.ClassTag require(input.size(dimension) == target.size(dimension), s"target should have as many elements as input, " + s"input ${input.size(dimension)}, target ${target.size(dimension)}") gradInput.resizeAs(input).zero() val nstep = input.size(dimension) var i = 0 while (i < nstep) { val _i = i + 1 results(i) = Engine.model.invoke(() => { fInput = input.select(dimension, _i) fTarget = target.select(dimension, _i) _gradInput = gradInput.select(dimension, _i) _gradInput.copy(cells(_i - 1).updateGradInput(fInput, fTarget).toTensor[T]) if (sizeAverage) { _gradInput = _gradInput.div(ev.fromType[Int](nstep)) } }) i += 1 } Engine.model.sync(results) gradInput } override def canEqual(other: Any): Boolean = other.isInstanceOf[TimeDistributedCriterion[T]] } object TimeDistributedCriterion { def apply[@specialized(Float, Double) T: ClassTag]( critrn: TensorCriterion[T] = null, sizeAverage: Boolean = false, dimension: Int = 2) (implicit ev: TensorNumeric[T]) : TimeDistributedCriterion[T] = { new TimeDistributedCriterion[T](critrn, sizeAverage, dimension) } }
Example 35
Source File: ExpandSize.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.nn import com.intel.analytics.bigdl.nn.abstractnn.AbstractModule import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag class ExpandSize[T: ClassTag](targetSizes: Array[Int]) (implicit ev: TensorNumeric[T]) extends AbstractModule[Tensor[T], Tensor[T], T] { override def updateOutput(input: Tensor[T]): Tensor[T] = { require(targetSizes.length == input.dim(), s"the number of dimensions provided must equal ${input.dim()}") val tensorDim = input.dim() val tensorStride = input.stride() val tensorSize = input.size() var i = 0 while (i < tensorDim) { if (targetSizes(i) != -1) { if (tensorSize(i) == 1) { tensorSize(i) = targetSizes(i) tensorStride(i) = 0 } else if (tensorSize(i) != targetSizes(i)) { throw new UnsupportedOperationException( "incorrect size: only supporting singleton expansion (size=1)") } } i += 1 } output.set(input.storage(), input.storageOffset(), tensorSize, tensorStride) output } override def updateGradInput(input: Tensor[T], gradOutput: Tensor[T]): Tensor[T] = { val tensorDim = input.dim() val tensorSize = input.size() gradInput = Tensor[T](tensorSize) val expandDim = new ArrayBuffer[Int]() var i = 0 while (i < tensorDim) { if (targetSizes(i) != -1) { if (tensorSize(i) == 1 && targetSizes(i) != 1) { expandDim.append(i + 1) } } i += 1 } i = expandDim.size - 1 val sizes = gradOutput.size() var _gradOutput = gradOutput while (i >= 0) { var start = 1 sizes(expandDim(i) - 1) = 1 val _gradInput = Tensor[T](sizes) while (start <= gradOutput.size(expandDim(i))) { val x = _gradOutput.narrow(expandDim(i), start, 1) _gradInput.add(x) start += 1 } _gradOutput = _gradInput i -= 1 } gradInput = _gradOutput gradInput } override def toString: String = s"ExpandSize" } object ExpandSize { def apply[@specialized(Float, Double) T: ClassTag](targetSizes: Array[Int]) (implicit ev: TensorNumeric[T]) : ExpandSize[T] = { new ExpandSize[T](targetSizes) } }
Example 36
Source File: Utils.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.nn.quantized import com.intel.analytics.bigdl.Module import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity, TensorModule} import com.intel.analytics.bigdl.nn.tf.WithoutInput import com.intel.analytics.bigdl.nn.{Cell, Container, Graph, Input, TimeDistributed, Linear => NNLinear, SpatialConvolution => NNConv, SpatialDilatedConvolution => NNDilatedConv} import com.intel.analytics.bigdl.tensor.{QuantizedTensor, Tensor} import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.utils.Node import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag object Utils { type ModuleNode[R] = AbstractModule[Activity, Activity, R] type SeqNodes[R] = Seq[Node[ModuleNode[R]]] type ArrayNodes[R] = Array[Node[ModuleNode[R]]] type ANode[R] = Node[ModuleNode[R]] type AbsModule[R] = AbstractModule[Activity, Activity, R] def reorganizeParameters[T: ClassTag](parameters: Array[Tensor[T]])( implicit ev: TensorNumeric[T]): Tensor[T] = { var length = 0 for (i <- parameters.indices) { if (!parameters(i).isInstanceOf[QuantizedTensor[T]]) { length += parameters(i).nElement() } } val result = Tensor[T](length) var offset = 0 for (i <- parameters.indices) { val parameter = parameters(i) if (!parameter.isInstanceOf[QuantizedTensor[T]]) { val length = parameter.nElement() val (src, srcOffset) = (parameter.storage().array(), parameter.storageOffset() - 1) val (dst, dstOffset) = (result.storage().array(), offset) val (size, stride) = (parameter.size(), parameter.stride()) System.arraycopy(src, srcOffset, dst, dstOffset, length) parameter.set(result.storage(), offset + 1, size, stride) offset += length } } result } }
Example 37
package com.intel.analytics.bigdl.nn.ops import com.intel.analytics.bigdl.nn.abstractnn.Activity import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.utils.Table import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag class Any[T: ClassTag](keepDim : Boolean = false, startFromZero : Boolean = false) (implicit ev: TensorNumeric[T]) extends Operation[Table, Tensor[Boolean], T] { output = Tensor[Boolean]() private var buffer = Tensor[Boolean]() override def updateOutput(input: Table): Tensor[Boolean] = { val data = input[Tensor[Boolean]](1) val indices = input[Tensor[Int]](2) require(indices.nDimension() == 1 || indices.isScalar, "indices must be 1D tensor or scala") output.resizeAs(data) buffer.resizeAs(data).copy(data) val reduceDims = new ArrayBuffer[Int]() val size = output.size() if (indices.isScalar) { val dim = if (indices.value() < 0) { data.nDimension() + indices.value() + 1 } else if (startFromZero) { indices.value() + 1 } else { indices.value() } if (size(dim - 1) != 1) { size(dim - 1) = 1 reduceDims += dim output.resize(size) buffer.reduce(dim, output, (a, b) => a || b) buffer.resizeAs(output).copy(output) } } else { var i = 1 while (i <= indices.size(1)) { val dim = if (indices.valueAt(i) < 0) { data.nDimension() + indices.valueAt(i) + 1 } else if (startFromZero) { indices.valueAt(i) + 1 } else { indices.valueAt(i) } if (size(dim - 1) != 1) { size(dim - 1) = 1 reduceDims += dim output.resize(size) buffer.reduce(dim, output, (a, b) => a || b) buffer.resizeAs(output).copy(output) } i += 1 } } if (!keepDim) { val sizeBuffer = new ArrayBuffer[Int]() var i = 1 while (i <= data.nDimension()) { if (!reduceDims.contains(i)) sizeBuffer.append(data.size(i)) i += 1 } output.resize(sizeBuffer.toArray) } output } override def clearState(): this.type = { super.clearState() buffer.set() this } } object Any { def apply[T: ClassTag](keepDim: Boolean = false, startFromZero : Boolean = false) (implicit ev: TensorNumeric[T]): Any[T] = new Any[T](keepDim, startFromZero) }
Example 38
Source File: CategoricalColVocaList.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.nn.ops import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.utils.HashFunc import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag class CategoricalColVocaList[T: ClassTag]( val vocaList: Array[String], val strDelimiter: String = ",", val isSetDefault: Boolean = false, val numOovBuckets: Int = 0 ) (implicit ev: TensorNumeric[T]) extends Operation[Tensor[String], Tensor[Int], T]{ private val vocaLen = vocaList.length private val vocaMap = vocaList.zipWithIndex.toMap require(numOovBuckets >= 0, "numOovBuckets is a negative integer") require(!(isSetDefault && numOovBuckets != 0), "defaultValue and numOovBuckets are both specified") require(vocaLen > 0, "the vocabulary list is empty") require(vocaLen == vocaMap.size, "the vocabulary list contains duplicate keys") output = Tensor[Int]() override def updateOutput(input: Tensor[String]): Tensor[Int] = { input.squeeze() val rows = input.size(dim = 1) val cols = if (numOovBuckets==0) { if (isSetDefault) vocaLen + 1 else vocaLen } else { vocaLen + numOovBuckets } val shape = Array(rows, cols) val indices0 = new ArrayBuffer[Int]() val indices1 = new ArrayBuffer[Int]() val values = new ArrayBuffer[Int]() var i = 1 while (i <= rows) { var feaStrArr = input.valueAt(i).split(strDelimiter) if (!isSetDefault && numOovBuckets == 0) { feaStrArr = feaStrArr.filter(x => vocaMap.contains(x)) } var j = 0 while (j < feaStrArr.length) { val mapVal = numOovBuckets==0 match { case true => vocaMap.getOrElse(feaStrArr(j), vocaMap.size) case false => vocaMap.getOrElse(feaStrArr(j), HashFunc.stringHashBucket32(feaStrArr(j), numOovBuckets) + vocaLen) } indices0 += i-1 indices1 += j values += mapVal j += 1 } i += 1 } val indices = Array(indices0.toArray, indices1.toArray) output = Tensor.sparse(indices, values.toArray, shape) output } } object CategoricalColVocaList { def apply[T: ClassTag]( vocaList: Array[String], strDelimiter: String = ",", isSetDefault: Boolean = false, numOovBuckets: Int = 0 ) (implicit ev: TensorNumeric[T]): CategoricalColVocaList[T] = new CategoricalColVocaList[T]( vocaList = vocaList, strDelimiter = strDelimiter, isSetDefault = isSetDefault, numOovBuckets = numOovBuckets ) }
Example 39
Source File: CategoricalColHashBucket.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.nn.ops import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import scala.util.hashing.MurmurHash3 class CategoricalColHashBucket[T: ClassTag]( val hashBucketSize: Int, val strDelimiter: String = ",", val isSparse: Boolean = true )(implicit ev: TensorNumeric[T]) extends Operation[Tensor[String], Tensor[Int], T] { output = Tensor[Int]() override def updateOutput(input: Tensor[String]): Tensor[Int] = { val rows = input.size(dim = 1) val indices0 = new ArrayBuffer[Int]() val indices1 = new ArrayBuffer[Int]() val values = new ArrayBuffer[Int]() var i = 1 var max_fea_len = 0 while(i <= rows) { val feaStrArr = input.valueAt(i, 1).split(strDelimiter) max_fea_len = math.max(max_fea_len, feaStrArr.length) var j = 0 while(j < feaStrArr.length) { val hashVal = MurmurHash3.stringHash(feaStrArr(j)) % hashBucketSize match { case v if v < 0 => v + hashBucketSize case v => v } indices0 += i-1 indices1 += j values += hashVal j += 1 } i += 1 } val indices = Array(indices0.toArray, indices1.toArray) val shape = Array(rows, max_fea_len) output = isSparse match { case true => Tensor.sparse(indices, values.toArray, shape) case false => Tensor.dense(Tensor.sparse(indices, values.toArray, shape)) } output } } object CategoricalColHashBucket{ def apply[T: ClassTag]( hashBucketSize: Int, strDelimiter: String = ",", isSparse: Boolean = true) (implicit ev: TensorNumeric[T]) : CategoricalColHashBucket[T] = new CategoricalColHashBucket[T]( hashBucketSize = hashBucketSize, strDelimiter = strDelimiter, isSparse = isSparse ) }
Example 40
package com.intel.analytics.bigdl.nn.ops import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.utils.Table import com.intel.analytics.bigdl.nn.{Sum => SumLayer} import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag class Sum[T: ClassTag, D: ClassTag](val keepDims: Boolean, val startFromZero: Boolean = false) (implicit ev: TensorNumeric[T], ev2: TensorNumeric[D]) extends Operation[Table, Tensor[D], T] { private val sum: SumLayer[D] = SumLayer[D](squeeze = !keepDims) output = Tensor[D]() override def updateOutput(input: Table): Tensor[D] = { val data = input[Tensor[D]](1) val dims = input[Tensor[Int]](2) output.resizeAs(data).copy(data) val sumDims = if (dims.isEmpty) { return output } else if (dims.isScalar) { Array(if (startFromZero) dims.value() + 1 else dims.value()) } else { require(dims.nDimension() == 1, s"Only accept 1D as dims, but now is ${dims.nDimension()}") val buffer = new ArrayBuffer[Int]() dims.apply1(a => { buffer.append(if (startFromZero) a + 1 else a) a }) buffer.toArray.sortWith(_ > _) } var i = 0 while(i < sumDims.length) { sum.changeSumDims(sumDims(i)) val tmp = sum.updateOutput(output) output.resizeAs(tmp).copy(tmp) i += 1 } output } override def getClassTagNumerics() : (Array[ClassTag[_]], Array[TensorNumeric[_]]) = { (Array[ClassTag[_]](scala.reflect.classTag[T], scala.reflect.classTag[D]), Array[TensorNumeric[_]](ev, ev2)) } } object Sum { def apply[T: ClassTag, D: ClassTag](keepDims: Boolean = false, startFromZero: Boolean = false) (implicit ev: TensorNumeric[T], ev2: TensorNumeric[D]): Sum[T, D] = new Sum(keepDims, startFromZero) }
Example 41
Source File: Kv2Tensor.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.nn.ops import com.intel.analytics.bigdl.nn.abstractnn.Activity import com.intel.analytics.bigdl.tensor._ import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.utils.Table import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag class Kv2Tensor[T: ClassTag, D: ClassTag]( val kvDelimiter: String, val itemDelimiter: String, val transType: Int )(implicit ev: TensorNumeric[T], ev2: TensorNumeric[D]) extends Operation[Table, Tensor[D], T]{ output = Activity.allocate[Tensor[D], D]() override def updateOutput(input: Table): Tensor[D] = { val kvTensor = input[Tensor[String]](1) val feaLen = input[Tensor[Int]](2).value() val indices0 = new ArrayBuffer[Int]() val indices1 = new ArrayBuffer[Int]() val values = new ArrayBuffer[D]() val rows = kvTensor.size(dim = 1) val shape = Array(rows, feaLen) var i = 1 while(i<=rows) { val kvFeaString = kvTensor.select(1, i).valueAt(1) kvFeaString.split(kvDelimiter).foreach { kv => indices0 += i-1 indices1 += kv.split(itemDelimiter)(0).toInt ev2.getType() match { case DoubleType => values += kv.split(itemDelimiter)(1).toDouble.asInstanceOf[D] case FloatType => values += kv.split(itemDelimiter)(1).toFloat.asInstanceOf[D] case t => throw new NotImplementedError(s"$t is not supported") } } i += 1 } val indices = Array(indices0.toArray, indices1.toArray) val resTensor = transType match { case 0 => Tensor.dense(Tensor.sparse(indices, values.toArray, shape)) case 1 => Tensor.sparse(indices, values.toArray, shape) } output = resTensor output } override def getClassTagNumerics() : (Array[ClassTag[_]], Array[TensorNumeric[_]]) = { (Array[ClassTag[_]](scala.reflect.classTag[T], scala.reflect.classTag[D]), Array[TensorNumeric[_]](ev, ev2)) } } object Kv2Tensor{ def apply[T: ClassTag, D: ClassTag]( kvDelimiter: String = ",", itemDelimiter: String = ":", transType: Int = 0) (implicit ev: TensorNumeric[T], ev2: TensorNumeric[D]): Kv2Tensor[T, D] = new Kv2Tensor[T, D]( kvDelimiter = kvDelimiter, itemDelimiter = itemDelimiter, transType = transType ) }
Example 42
package com.intel.analytics.bigdl.nn.ops import com.intel.analytics.bigdl.nn.abstractnn.Activity import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.utils.Table import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag class All[T: ClassTag](keepDim : Boolean = false, startFromZero : Boolean = false) (implicit ev: TensorNumeric[T]) extends Operation[Table, Tensor[Boolean], T] { output = Tensor[Boolean]() private var buffer = Tensor[Boolean]() override def updateOutput(input: Table): Tensor[Boolean] = { val data = input[Tensor[Boolean]](1) val indices = input[Tensor[Int]](2) require(indices.nDimension() == 1 || indices.isScalar, "indices must be 1D tensor or scala") output.resizeAs(data) buffer.resizeAs(data).copy(data) val reduceDims = new ArrayBuffer[Int]() val size = output.size() if (indices.isScalar) { val dim = if (indices.value() < 0) { data.nDimension() + indices.value() + 1 } else if (startFromZero) { indices.value() + 1 } else { indices.value() } if (size(dim - 1) != 1) { size(dim - 1) = 1 reduceDims += dim output.resize(size) buffer.reduce(dim, output, (a, b) => a && b) buffer.resizeAs(output).copy(output) } } else { var i = 1 while (i <= indices.size(1)) { val dim = if (indices.valueAt(i) < 0) { data.nDimension() + indices.valueAt(i) + 1 } else if (startFromZero) { indices.valueAt(i) + 1 } else { indices.valueAt(i) } if (size(dim - 1) != 1) { size(dim - 1) = 1 reduceDims += dim output.resize(size) buffer.reduce(dim, output, (a, b) => a && b) buffer.resizeAs(output).copy(output) } i += 1 } } if (!keepDim) { val sizeBuffer = new ArrayBuffer[Int]() var i = 1 while (i <= data.nDimension()) { if (!reduceDims.contains(i)) sizeBuffer.append(data.size(i)) i += 1 } output.resize(sizeBuffer.toArray) } output } override def clearState(): this.type = { super.clearState() buffer.set() this } } object All { def apply[T: ClassTag](keepDim: Boolean = false, startFromZero : Boolean = false) (implicit ev: TensorNumeric[T]): All[T] = new All[T](keepDim, startFromZero) }
Example 43
Source File: ParallelTable.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.nn import com.intel.analytics.bigdl.nn.Graph.ModuleNode import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity} import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.utils.Table import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag @SerialVersionUID(- 1197848941394786045L) class ParallelTable[T: ClassTag] (implicit ev: TensorNumeric[T]) extends DynamicContainer[Table, Table, T] { override def updateOutput(input: Table): Table = { var i = 0 while (i < input.length()) { output.update(i + 1, modules(i).forward(input(i + 1))) i += 1 } output } override def updateGradInput(input: Table, gradOutput: Table): Table = { var i = 0 while (i < input.length()) { gradInput.update(i + 1, modules(i).updateGradInput(input(i + 1), gradOutput(i + 1))) i += 1 } gradInput } override def accGradParameters(input: Table, gradOutput: Table): Unit = { var i = 0 while (i < input.length()) { modules(i).accGradParameters(input(i + 1), gradOutput(i + 1)) i += 1 } } override def backward(input: Table, gradOutput: Table): Table = { val before = System.nanoTime() var i = 0 while (i < input.length()) { gradInput.update(i + 1, modules(i).backward(input(i + 1), gradOutput(i + 1))) i += 1 } backwardTime += System.nanoTime() - before gradInput } override def getEndNodes(startNodes: Array[ModuleNode[T]]): Array[ModuleNode[T]] = { val outputs = ArrayBuffer[ModuleNode[T]]() var outputTuple: Array[ModuleNode[T]] = null require(startNodes.length == modules.length, s"ParallelTable: " + s"startNodes length ${startNodes.length} is more than modules length ${modules.length}") for (i <- 0 to modules.size - 1) { outputTuple = modules(i).getEndNodes(Array(startNodes(i))) outputs ++= outputTuple } outputs.toArray } override def toString: String = { val tab = "\t" val line = "\n" val next = " |`-> " val lastNext = " `-> " val ext = " | " val extlast = " " val last = " ... -> " var str = "nn.ParallelTable" str = str + " {" + line + tab + "input" var i = 1 while (i <= modules.length) { if (i == modules.length) { str = str + line + tab + lastNext + "(" + i + "): " + modules(i-1).toString.replace(line, line + tab + extlast) } else { str = str + line + tab + next + "(" + i + "): " + modules(i-1).toString.replace(line, line + tab + ext) } i += 1 } str = str + line + tab + last + "output" str = str + line + "}" str } } object ParallelTable { def apply[@specialized(Float, Double) T: ClassTag]() (implicit ev: TensorNumeric[T]) : ParallelTable[T] = { new ParallelTable[T]() } }
Example 44
Source File: MultiCriterion.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.nn import com.intel.analytics.bigdl.nn.abstractnn.{Activity, AbstractCriterion} import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.utils.T import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag @SerialVersionUID(- 8679064077837483164L) class MultiCriterion[@specialized(Float, Double) T: ClassTag] (implicit ev: TensorNumeric[T]) extends AbstractCriterion[Activity, Activity, T] { private val weights = new ArrayBuffer[Double] private val criterions = T() def add(criterion: AbstractCriterion[Activity, Activity, T], weight: Double = 1): Unit = { criterions.insert(criterions.length() + 1, criterion) weights.append(weight) } override def updateOutput(input: Activity, target: Activity): T = { var i = 1 while (i <= criterions.length) { output = ev.plus(output, ev.times(ev.fromType(weights(i-1)), criterions[AbstractCriterion[Activity, Activity, T]](i).updateOutput(input, target))) i +=1 } output } override def updateGradInput(input: Activity, target: Activity): Activity = { gradInput = Utils.recursiveResizeAs[T](gradInput, input) Utils.recursiveFill[T](gradInput, 0) var i = 1 while (i <= criterions.length) { Utils.recursiveAdd(gradInput, weights(i - 1), criterions[AbstractCriterion[Activity, Activity, T]](i).updateGradInput(input, target)) i += 1 } gradInput } override def canEqual(other: Any): Boolean = other.isInstanceOf[MultiCriterion[T]] override def equals(other: Any): Boolean = other match { case that: MultiCriterion[T] => super.equals(that) && (that canEqual this) && weights == that.weights case _ => false } override def hashCode(): Int = { def getHashCode(a: Any): Int = if (a == null) 0 else a.hashCode() val state = Seq(super.hashCode(), weights) state.map(getHashCode).foldLeft(0)((a, b) => 31 * a + b) } override def toString(): String = { s"nn.MultiCriterion" } } object MultiCriterion { def apply[@specialized(Float, Double) T: ClassTag]() (implicit ev: TensorNumeric[T]) : MultiCriterion[T] = { new MultiCriterion[T]() } }
Example 45
Source File: Metrics.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.optim import com.google.common.util.concurrent.AtomicDouble import org.apache.spark.{Accumulable, Accumulator, SparkContext} import scala.collection.mutable.{ArrayBuffer, Map} class Metrics extends Serializable { private val localMetricsMap: Map[String, LocalMetricsEntry] = Map() private val aggregateDistributeMetricsMap: Map[String, AggregateDistributeMetricsEntry] = Map() private val distributeMetricsMap: Map[String, DistributeMetricsEntry] = Map() def add(name: String, value: Double): this.type = { require(localMetricsMap.contains(name) || aggregateDistributeMetricsMap.contains(name) || distributeMetricsMap.contains(name)) if (localMetricsMap.contains(name)) { localMetricsMap(name).value.addAndGet(value) } if (aggregateDistributeMetricsMap.contains(name)) { aggregateDistributeMetricsMap(name).value += value } if (distributeMetricsMap.contains(name)) { distributeMetricsMap(name).value += value } this } def set(name: String, value: Double, parallel: Int = 1): this.type = { require(!aggregateDistributeMetricsMap.contains(name), "duplicated distribute metric") require(!distributeMetricsMap.contains(name), "duplicated distribute metric2") if (localMetricsMap.contains(name)) { localMetricsMap(name).value.set(value) localMetricsMap(name).parallel = parallel } else { localMetricsMap(name) = LocalMetricsEntry(new AtomicDouble(value), parallel) } this } def set(name: String, value: Double, sc: SparkContext, parallel: Int): this.type = { require(!localMetricsMap.contains(name), "duplicated local metric") if (aggregateDistributeMetricsMap.contains(name)) { aggregateDistributeMetricsMap(name).value.setValue(value) aggregateDistributeMetricsMap(name).parallel = parallel } else { aggregateDistributeMetricsMap(name) = AggregateDistributeMetricsEntry(sc.accumulator(value, name), parallel) } this } def set(name: String, value: ArrayBuffer[Double], sc: SparkContext): this.type = { require(!localMetricsMap.contains(name), "duplicated local metric") require(!aggregateDistributeMetricsMap.contains(name), "duplicated distribute metric") if (distributeMetricsMap.contains(name)) { distributeMetricsMap(name).value.setValue(value) } else { distributeMetricsMap(name) = DistributeMetricsEntry(sc.accumulableCollection(value)) } this } def get(name: String): (Double, Int) = { require(localMetricsMap.contains(name) || aggregateDistributeMetricsMap.contains(name)) if (localMetricsMap.contains(name)) { (localMetricsMap(name).value.get(), localMetricsMap(name).parallel) } else { (aggregateDistributeMetricsMap(name).value.value, aggregateDistributeMetricsMap(name).parallel) } } def get(name: String, number: Int): Array[Double] = { require(distributeMetricsMap.contains(name)) distributeMetricsMap(name).value.value.toArray.dropRight(number) } def summary(unit: String = "s", scale: Double = 1e9): String = { "========== Metrics Summary ==========\n" + localMetricsMap.map( entry => s"${entry._1} : ${entry._2.value.get() / entry._2.parallel / scale} $unit\n") .mkString("") + aggregateDistributeMetricsMap.map( entry => s"${entry._1} : ${entry._2.value.value / entry._2.parallel / scale} $unit\n") .mkString("") + distributeMetricsMap.map { entry => s"${entry._1} : ${entry._2.value.value.map(_ / scale).mkString(" ")} \n" }.mkString("") + "=====================================" } } private case class LocalMetricsEntry(value: AtomicDouble, var parallel: Int) private case class AggregateDistributeMetricsEntry(value: Accumulator[Double], var parallel: Int) private case class DistributeMetricsEntry(value: Accumulable[ArrayBuffer[Double], Double])
Example 46
Source File: BatchSamplerSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.transform.vision.image.label.roi import com.intel.analytics.bigdl.tensor.{Storage, Tensor} import com.intel.analytics.bigdl.transform.vision.image.util.BoundingBox import org.scalatest.{FlatSpec, Matchers} import scala.collection.mutable.ArrayBuffer class BatchSamplerSpec extends FlatSpec with Matchers { "batch sampler with no change" should "work properly" in { val sampler = new BatchSampler(maxTrials = 1) val unitBox = BoundingBox(0, 0, 1, 1) val boxes = Tensor(Storage(Array(0.582296, 0.334719, 0.673582, 0.52183, 0.596127, 0.282744, 0.670816, 0.449064, 0.936376, 0.627859, 0.961272, 0.733888, 0.896266, 0.640333, 0.923928, 0.740125).map(x => x.toFloat))).resize(4, 4) val classes = Tensor[Float](4).randn() val target = RoiLabel(classes, boxes) val sampledBoxes = new ArrayBuffer[BoundingBox]() sampler.sample(unitBox, target, sampledBoxes) sampledBoxes.length should be(1) sampledBoxes(0) should be(unitBox) } "satisfySampleConstraint with minOverlap 0.1" should "work properly" in { val boxes = Tensor(Storage(Array(0.418, 0.396396, 0.55, 0.666667, 0.438, 0.321321, 0.546, 0.561562, 0.93, 0.81982, 0.966, 0.972973, 0.872, 0.837838, 0.912, 0.981982).map(x => x.toFloat))).resize(4, 4) val classes = Tensor[Float](4).randn() val target = RoiLabel(classes, boxes) val sampledBox = BoundingBox(0.114741f, 0.248062f, 0.633665f, 0.763736f) val sampler = new BatchSampler(minScale = 0.3, minAspectRatio = 0.5, maxAspectRatio = 2, minOverlap = Some(0.1)) sampler.satisfySampleConstraint(sampledBox, target) should be(true) } "satisfySampleConstraint with minOverlap 0.3" should "work properly" in { val boxes = Tensor(Storage(Array(0.418, 0.396396, 0.55, 0.666667, 0.438, 0.321321, 0.546, 0.561562, 0.93, 0.81982, 0.966, 0.972973, 0.872, 0.837838, 0.912, 0.981982).map(x => x.toFloat))).resize(4, 4) val classes = Tensor[Float](4).randn() val target = RoiLabel(classes, boxes) val sampledBox = BoundingBox(0.266885f, 0.416113f, 0.678256f, 0.67208f) val sampler = new BatchSampler(minScale = 0.3, minAspectRatio = 0.5, maxAspectRatio = 2, minOverlap = Some(0.3)) sampler.satisfySampleConstraint(sampledBox, target) should be(true) } "batch samplers" should "work properly" in { val boxes = Tensor(Storage(Array(0.418, 0.396396, 0.55, 0.666667, 0.438, 0.321321, 0.546, 0.561562, 0.93, 0.81982, 0.966, 0.972973, 0.872, 0.837838, 0.912, 0.981982).map(x => x.toFloat))).resize(4, 4) val classes = Tensor[Float](4).randn() val target = RoiLabel(classes, boxes) val sampledBoxes = new ArrayBuffer[BoundingBox]() val batchSamplers = Array( new BatchSampler(maxTrials = 1), new BatchSampler(minScale = 0.3, minAspectRatio = 0.5, maxAspectRatio = 2, minOverlap = Some(0.1)), new BatchSampler(minScale = 0.3, minAspectRatio = 0.5, maxAspectRatio = 2, minOverlap = Some(0.3)), new BatchSampler(minScale = 0.3, minAspectRatio = 0.5, maxAspectRatio = 2, minOverlap = Some(0.5)), new BatchSampler(minScale = 0.3, minAspectRatio = 0.5, maxAspectRatio = 2, minOverlap = Some(0.7)), new BatchSampler(minScale = 0.3, minAspectRatio = 0.5, maxAspectRatio = 2, minOverlap = Some(0.9)), new BatchSampler(minScale = 0.3, minAspectRatio = 0.5, maxAspectRatio = 2, maxOverlap = Some(1.0))) BatchSampler.generateBatchSamples(target, batchSamplers, sampledBoxes) sampledBoxes.foreach(box => { println(box) }) } }
Example 47
Source File: BigDLSpecHelper.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.utils import java.io.{File => JFile} import org.apache.log4j.Logger import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} import scala.collection.mutable.ArrayBuffer abstract class BigDLSpecHelper extends FlatSpec with Matchers with BeforeAndAfter { protected val logger = Logger.getLogger(getClass) private val tmpFiles : ArrayBuffer[JFile] = new ArrayBuffer[JFile]() protected def createTmpFile(): JFile = { val file = java.io.File.createTempFile("UnitTest", "BigDLSpecBase") logger.info(s"created file $file") tmpFiles.append(file) file } protected def getFileFolder(path: String): String = { path.substring(0, path.lastIndexOf(JFile.separator)) } protected def getFileName(path: String): String = { path.substring(path.lastIndexOf(JFile.separator) + 1) } def doAfter(): Unit = {} def doBefore(): Unit = {} before { doBefore() } after { doAfter() tmpFiles.foreach(f => { if (f.exists()) { require(f.isFile, "cannot clean folder") f.delete() logger.info(s"deleted file $f") } }) } }
Example 48
Source File: Kv2TensorSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.nn.ops import com.intel.analytics.bigdl.tensor.{DenseType, SparseType, Tensor} import com.intel.analytics.bigdl.utils.serializer.ModuleSerializationTest import com.intel.analytics.bigdl.utils.{T, Table} import org.scalatest.{FlatSpec, Matchers} import scala.collection.mutable.ArrayBuffer import scala.util.Random class Kv2TensorSpec extends FlatSpec with Matchers { protected def randDoubles(length: Int, lp: Double = 0.0, up: Double = 1.0): Array[Double] = { (1 to length).map(_ => lp + (up - lp) * Random.nextDouble()).toArray } protected def randKVMap(size: Int, numActive: Int, lp: Double = 0.0, up: Double = 1.0): Map[Int, Double] = { require(numActive <= size) val keys = Random.shuffle((0 until size).toList).take(numActive) val values = randDoubles(numActive, lp, up) keys.zip(values).toMap } val batchLen = 3 val numActive = Array(2, 3, 5) val feaLen = 8 val originData = new ArrayBuffer[String]() val originArr = new ArrayBuffer[Table]() val indices0 = new ArrayBuffer[Int]() val indices1 = new ArrayBuffer[Int]() val values = new ArrayBuffer[Double]() for (i <- 0 until batchLen) { val kvMap = randKVMap(feaLen, numActive(i)) val kvStr = kvMap.map(data => s"${data._1}:${data._2}").mkString(",") originData += kvStr originArr += T(kvStr) indices0 ++= ArrayBuffer.fill(numActive(i))(i) val kvArr = kvMap.toArray indices1 ++= kvArr.map(kv => kv._1) values ++= kvArr.map(kv => kv._2) } val originTable = T.array(originArr.toArray) val indices = Array(indices0.toArray, indices1.toArray) val shape = Array(batchLen, feaLen) "Kv2Tensor operation kvString to SparseTensor" should "work correctly" in { val input = T( Tensor[String](originTable), Tensor[Int](Array(feaLen), shape = Array[Int]()) ) val expectOutput = Tensor.sparse[Double]( indices = indices, values = values.toArray, shape = shape ) val output = Kv2Tensor[Double, Double](transType = 1) .forward(input) output should be(expectOutput) } "Kv2Tensor operation kvString to DenseTensor" should "work correctly" in { val input = T( Tensor[String](originTable), Tensor[Int](Array(feaLen), shape = Array[Int]()) ) val expectOutput = Tensor.dense(Tensor.sparse[Double]( indices = indices, values = values.toArray, shape = shape )) val output = Kv2Tensor[Double, Double](transType = 0) .forward(input) output should be(expectOutput) } } class Kv2TensorSerialTest extends ModuleSerializationTest { override def test(): Unit = { val kv2tensor = Kv2Tensor[Float, Float]( kvDelimiter = ",", itemDelimiter = ":", transType = 0 ).setName("kv2tensor") val input = T( Tensor[String]( T(T("0:0.1,1:0.2"), T("1:0.3,3:0.5"), T("2:0.15,4:0.25"))), Tensor[Int](Array(5), shape = Array[Int]()) ) runSerializationTest(kv2tensor, input) } }
Example 49
Source File: RMSpropSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.optim import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.utils.{T, TestUtils} import org.scalatest.{FlatSpec, Matchers} import scala.collection.mutable.ArrayBuffer // @com.intel.analytics.bigdl.tags.Parallel @com.intel.analytics.bigdl.tags.Serial class RMSpropSpec extends FlatSpec with Matchers { val start = System.currentTimeMillis() "RMSprop" should "perform well on rosenbrock function" in { val x = Tensor[Double](2).fill(0) val config = T("learningRate" -> 5e-4) val optm = new RMSprop[Double] var fx = new ArrayBuffer[Double] for (i <- 1 to 10001) { val result = optm.optimize(TestUtils.rosenBrock, x, config) if ((i - 1) % 1000 == 0) { fx += result._2(0) } } println(s"x is \n$x") println("fx is") for (i <- 1 to fx.length) { println(s"${(i - 1) * 1000 + 1}, ${fx(i - 1)}") } val spend = System.currentTimeMillis() - start println("Time Cost: " + spend + "ms") (fx.last < 1e-4) should be(true) x(Array(1)) should be(1.0 +- 0.01) x(Array(2)) should be(1.0 +- 0.01) } }
Example 50
Source File: AdagradSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.optim import com.intel.analytics.bigdl.utils.{TestUtils, T} import org.scalatest.{FlatSpec, Matchers} import com.intel.analytics.bigdl.tensor.Tensor import scala.collection.mutable.ArrayBuffer @com.intel.analytics.bigdl.tags.Parallel class AdagradSpec extends FlatSpec with Matchers { "adagrad" should "perform well on rosenbrock function" in { val x = Tensor[Double](2).fill(0) val config = T("learningRate" -> 1e-1) val optm = new Adagrad[Double] var fx = new ArrayBuffer[Double] for (i <- 1 to 10001) { val result = optm.optimize(TestUtils.rosenBrock, x, config) if ((i - 1) % 1000 == 0) { fx += (result._2(0)) } } println(s"x is \n$x") println("fx is") for (i <- 1 to fx.length) { println(s"${(i - 1) * 1000 + 1}, ${fx(i - 1)}") } (fx.last < 1e-9) should be(true) x(Array(1)) should be(1.0 +- 0.01) x(Array(2)) should be(1.0 +- 0.01) } }
Example 51
Source File: LBFGSSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.optim import com.intel.analytics.bigdl.utils.{TestUtils, T} import org.scalatest.{FlatSpec, Matchers} import com.intel.analytics.bigdl.tensor.Tensor import scala.collection.mutable.ArrayBuffer @com.intel.analytics.bigdl.tags.Parallel class LBFGSSpec extends FlatSpec with Matchers { "torchLBFGS in regular batch test" should "perform well on rosenbrock function" in { val x = Tensor[Double](2).fill(0) val optm = new LBFGS[Double] val result = optm.optimize(TestUtils.rosenBrock, x, T("maxIter" -> 100, "learningRate" -> 1e-1)) val fx = result._2 println() println("Rosenbrock test") println() println(s"x = $x") println("fx = ") for (i <- 1 to fx.length) { println(s"$i ${fx(i - 1)}") } println() println() fx.last < 1e-6 should be(true) } "torchLBFGS in stochastic test" should "perform well on rosenbrock function" in { val x = Tensor[Double](2).fill(0) val optm = new LBFGS[Double] val fx = new ArrayBuffer[Double]() val config = T("maxIter" -> 1, "learningRate" -> 1e-1) for (i <- 1 to 100) { val result = optm.optimize(TestUtils.rosenBrock, x, config) fx.append(result._2(0)) } println() println("Rosenbrock test") println() println(s"x = $x") println("fx = ") for (i <- 1 to fx.length) { println(s"$i ${fx(i - 1)}") } println() println() fx.last < 1e-6 should be(true) } }
Example 52
Source File: AdadeltaSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.optim import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.utils.{T, TestUtils} import org.scalatest.{FlatSpec, Matchers} import scala.collection.mutable.ArrayBuffer @com.intel.analytics.bigdl.tags.Parallel class AdadeltaSpec extends FlatSpec with Matchers { val start = System.currentTimeMillis() "adadelta" should "perform well on rosenbrock function" in { val x = Tensor[Double](2).fill(0) val config = T("Epsilon" -> 1e-10) val optm = new Adadelta[Double] var fx = new ArrayBuffer[Double] for (i <- 1 to 10001) { val result = optm.optimize(TestUtils.rosenBrock, x, config) if ((i - 1) % 1000 == 0) { fx += result._2(0) } } println(s"x is \n$x") println("fx is") for (i <- 1 to fx.length) { println(s"${(i - 1) * 1000 + 1}, ${fx(i - 1)}") } val spend = System.currentTimeMillis() - start println("Time Cost: " + spend + "ms") (fx.last < 1e-4) should be(true) x(Array(1)) should be(1.0 +- 0.02) x(Array(2)) should be(1.0 +- 0.02) } }
Example 53
Source File: AdamaxSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.optim import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.utils.{T, TestUtils} import org.scalatest.{FlatSpec, Matchers} import scala.collection.mutable.ArrayBuffer @com.intel.analytics.bigdl.tags.Parallel class AdamaxSpec extends FlatSpec with Matchers { val start = System.currentTimeMillis() "adamax" should "perform well on rosenbrock function" in { val x = Tensor[Double](2).fill(0) val config = T() val optm = new Adamax[Double] var fx = new ArrayBuffer[Double] for (i <- 1 to 10001) { val result = optm.optimize(TestUtils.rosenBrock, x, config) if ((i - 1) % 1000 == 0) { fx += result._2(0) } } println(s"x is \n$x") println("fx is") for (i <- 1 to fx.length) { println(s"${(i - 1) * 1000 + 1}, ${fx(i - 1)}") } val spend = System.currentTimeMillis() - start println("Time Cost: " + spend + "ms") (fx.last < 1e-9) should be(true) x(Array(1)) should be(1.0 +- 0.01) x(Array(2)) should be(1.0 +- 0.01) } }
Example 54
Source File: AdamSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.optim import com.intel.analytics.bigdl.nn.{CrossEntropyCriterion, Linear, Sequential} import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.utils.{Engine, RandomGenerator, T, TestUtils} import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} import scala.collection.mutable.ArrayBuffer import scala.util.Random @com.intel.analytics.bigdl.tags.Parallel class AdamSpec extends FlatSpec with Matchers with BeforeAndAfter { before { System.setProperty("bigdl.localMode", "true") System.setProperty("spark.master", "local[2]") Engine.init } after { System.clearProperty("bigdl.localMode") System.clearProperty("spark.master") } val start = System.currentTimeMillis() "adam" should "perform well on rosenbrock function" in { val x = Tensor[Double](2).fill(0) val config = T("learningRate" -> 0.002) val optm = new Adam[Double] var fx = new ArrayBuffer[Double] for (i <- 1 to 10001) { val result = optm.optimize(TestUtils.rosenBrock, x, config) if ((i - 1) % 1000 == 0) { fx += result._2(0) } } println(s"x is \n$x") println("fx is") for (i <- 1 to fx.length) { println(s"${(i - 1) * 1000 + 1}, ${fx(i - 1)}") } val spend = System.currentTimeMillis() - start println("Time Cost: " + spend + "ms") (fx.last < 1e-9) should be(true) x(Array(1)) should be(1.0 +- 0.01) x(Array(2)) should be(1.0 +- 0.01) } "ParallelAdam" should "perform well on rosenbrock function" in { val x = Tensor[Double](2).fill(0) val optm = new ParallelAdam[Double](learningRate = 0.002, parallelNum = 2) var fx = new ArrayBuffer[Double] for (i <- 1 to 10001) { val result = optm.optimize(TestUtils.rosenBrock, x) if ((i - 1) % 1000 == 0) { fx += result._2(0) } } println(s"x is \n$x") println("fx is") for (i <- 1 to fx.length) { println(s"${(i - 1) * 1000 + 1}, ${fx(i - 1)}") } val spend = System.currentTimeMillis() - start println("Time Cost: " + spend + "ms") (fx.last < 1e-9) should be(true) x(Array(1)) should be(1.0 +- 0.01) x(Array(2)) should be(1.0 +- 0.01) } }
Example 55
Source File: TrimmedIndependentPixelEvaluator.scala From scalismo-faces with Apache License 2.0 | 5 votes |
package scalismo.faces.sampling.face.evaluators import scalismo.color.{RGB, RGBA} import scalismo.faces.image.{ImageBuffer, PixelImage, PixelImageDomain} import scalismo.sampling.DistributionEvaluator import scalismo.sampling.evaluators.PairEvaluator import scala.collection.mutable.ArrayBuffer def visualize(values: IndexedSeq[(Double, Int, Int)], domain: PixelImageDomain, callBack: PixelImage[Option[Double]] => Unit): Unit = { val buffer = ImageBuffer.makeConstantBuffer[Option[Double]](domain.width, domain.height, None) values.foreach { case (lh: Double, x: Int, y: Int) => buffer(x, y) = Some(lh) } callBack(buffer.toImage) } var transparencySum = 0.0 var values = ArrayBuffer[(Double, Int, Int)]() var x: Int = 0 while (x < reference.width) { var y: Int = 0 while (y < reference.height) { val smp = sample(x, y) if (smp.a > 1e-4f) { val ref = reference(x, y).toRGB val fg: Double = pixelEvaluator.logValue(ref, smp.toRGB) val bg: Double = bgEvaluator.logValue(ref) val entry = (fg - bg, x, y) values += entry } transparencySum += smp.a y += 1 } x += 1 } val nCount = math.floor(values.length.toFloat * alphaClamped).toInt if (transparencySum > 0 && nCount > 0) { //was something rendered on the image? val data = values.toIndexedSeq.sortBy { case (d: Double, x: Int, y: Int) => d } var sumTrimmed: Double = 0.0 for (i <- 0 until nCount) { sumTrimmed += data(data.size - 1 - i)._1 } if (visualizationCallback.isDefined) visualize(data.slice(data.size - 1 - nCount, data.size - 1), reference.domain, visualizationCallback.get) sumTrimmed } else { // nothing was rendered on the image! Double.NegativeInfinity } } override def toString: String = { val builder = new StringBuilder(128) builder ++= "TrimmedIndependentPixelEvaluator(" builder ++= pixelEvaluator.toString builder ++= "/" builder ++= bgEvaluator.toString builder ++= s"alpha=$alphaClamped" builder ++= ")" builder.mkString } } object TrimmedIndependentPixelEvaluator { def apply(pixelEvaluator: PairEvaluator[RGB], bgEvaluator: DistributionEvaluator[RGB], alpha: Double) = new TrimmedIndependentPixelEvaluator(pixelEvaluator, bgEvaluator, alpha, None) def apply(pixelEvaluator: PairEvaluator[RGB], bgEvaluator: DistributionEvaluator[RGB], alpha: Double, visualisationCallback: PixelImage[Option[Double]] => Unit) = new TrimmedIndependentPixelEvaluator(pixelEvaluator, bgEvaluator, alpha, Some(visualisationCallback)) }
Example 56
Source File: MorphologicalFilter.scala From scalismo-faces with Apache License 2.0 | 5 votes |
package scalismo.faces.image.filter import scalismo.faces.image.AccessMode._ import scalismo.faces.image._ import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag def perPixel(x: Int, y: Int): A = { var kx = 0 var kernelPixels = new ArrayBuffer[A](width * height) while (kx < width) { val ix = x + kx - width / 2 var ky = 0 while (ky < height) { val iy = y + ky - height / 2 if (structuringElement(kx, ky)) kernelPixels += image(ix, iy) ky += 1 } kx += 1 } if (kernelPixels.nonEmpty) windowFilter(kernelPixels) else image(x, y) } if(width <= 0 || height <= 0) image else PixelImage(image.width, image.height, perPixel, Strict()) } } object MorphologicalFilter { def boxElement(size: Int): PixelImage[Boolean] = PixelImage.view(size, size, (x, y) => x >= 0 && x < size && y >= 0 && y < size) }
Example 57
Source File: ImmutableSelection.scala From hacktoberfest-scala-algorithms with GNU General Public License v3.0 | 5 votes |
package io.github.sentenza.hacktoberfest.algos import scala.collection.mutable.ArrayBuffer import scala.math.Ordered def quickSelect(list: List[Int], idx: Int): Option[Int] = { if (idx < 0 || list.size <= idx) return None list match { case Nil => None case pivot :: rest => { val (smaller, larger) = rest partition (_ <= pivot) val pivotIdx = smaller.size idx.compare(pivotIdx) match { case needleInSmaller if needleInSmaller < 0 => quickSelect(smaller, idx) case needleIsPivot if needleIsPivot == 0 => Some(pivot) case needleInLarger if needleInLarger > 0 => quickSelect(larger, idx - pivotIdx - 1) } } } } }
Example 58
Source File: RocksEdgeFetcher.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.storage.rocks import com.typesafe.config.Config import org.apache.s2graph.core._ import org.apache.s2graph.core.schema.Label import org.apache.s2graph.core.storage.{SKeyValue, StorageIO, StorageSerDe} import org.apache.s2graph.core.types.{HBaseType, VertexId} import org.rocksdb.RocksDB import scala.collection.mutable.ArrayBuffer import scala.concurrent.{ExecutionContext, Future} class RocksEdgeFetcher(val graph: S2GraphLike, val config: Config, val db: RocksDB, val vdb: RocksDB, val serDe: StorageSerDe, val io: StorageIO) extends EdgeFetcher { import RocksStorage._ override def fetches(queryRequests: Seq[QueryRequest], prevStepEdges: Map[VertexId, Seq[EdgeWithScore]])(implicit ec: ExecutionContext): Future[Seq[StepResult]] = { val futures = for { queryRequest <- queryRequests } yield { val parentEdges = prevStepEdges.getOrElse(queryRequest.vertex.id, Nil) val edge = graph.elementBuilder.toRequestEdge(queryRequest, parentEdges) val rpc = buildRequest(graph, serDe, queryRequest, edge) fetchKeyValues(vdb, db, rpc).map { kvs => val queryParam = queryRequest.queryParam val stepResult = io.toEdges(kvs, queryRequest, queryRequest.prevStepScore, false, parentEdges) val edgeWithScores = stepResult.edgeWithScores.filter { case edgeWithScore => val edge = edgeWithScore.edge val duration = queryParam.durationOpt.getOrElse((Long.MinValue, Long.MaxValue)) edge.ts >= duration._1 && edge.ts < duration._2 } stepResult.copy(edgeWithScores = edgeWithScores) } } Future.sequence(futures) } override def fetchEdgesAll()(implicit ec: ExecutionContext) = { val edges = new ArrayBuffer[S2EdgeLike]() Label.findAll().groupBy(_.hbaseTableName).toSeq.foreach { case (hTableName, labels) => val distinctLabels = labels.toSet val iter = db.newIterator() try { iter.seekToFirst() while (iter.isValid) { val kv = SKeyValue(table, iter.key(), SKeyValue.EdgeCf, qualifier, iter.value, System.currentTimeMillis()) serDe.indexEdgeDeserializer(schemaVer = HBaseType.DEFAULT_VERSION).fromKeyValues(Seq(kv), None) .filter(e => distinctLabels(e.innerLabel) && e.getDirection() == "out" && !e.isDegree) .foreach { edge => edges += edge } iter.next() } } finally { iter.close() } } Future.successful(edges) } }
Example 59
Source File: RocksVertexFetcher.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.storage.rocks import com.typesafe.config.Config import org.apache.hadoop.hbase.util.Bytes import org.apache.s2graph.core._ import org.apache.s2graph.core.schema.ServiceColumn import org.apache.s2graph.core.storage.rocks.RocksStorage.{qualifier, table} import org.apache.s2graph.core.storage.{SKeyValue, StorageIO, StorageSerDe} import org.apache.s2graph.core.types.HBaseType import org.rocksdb.RocksDB import scala.collection.mutable.ArrayBuffer import scala.concurrent.{ExecutionContext, Future} class RocksVertexFetcher(val graph: S2GraphLike, val config: Config, val db: RocksDB, val vdb: RocksDB, val serDe: StorageSerDe, val io: StorageIO) extends VertexFetcher { private def fetchKeyValues(queryRequest: QueryRequest, vertex: S2VertexLike)(implicit ec: ExecutionContext): Future[Seq[SKeyValue]] = { val rpc = RocksStorage.buildRequest(queryRequest, vertex) RocksStorage.fetchKeyValues(vdb, db, rpc) } override def fetchVertices(vertexQueryParam: VertexQueryParam)(implicit ec: ExecutionContext): Future[Seq[S2VertexLike]] = { def fromResult(kvs: Seq[SKeyValue], version: String): Seq[S2VertexLike] = { if (kvs.isEmpty) Nil else serDe.vertexDeserializer(version).fromKeyValues(kvs, None).toSeq.filter(vertexQueryParam.where.get.filter) } val vertices = vertexQueryParam.vertexIds.map(vId => graph.elementBuilder.newVertex(vId)) val futures = vertices.map { vertex => val queryParam = QueryParam.Empty val q = Query.toQuery(Seq(vertex), Seq(queryParam)) val queryRequest = QueryRequest(q, stepIdx = -1, vertex, queryParam) fetchKeyValues(queryRequest, vertex).map { kvs => fromResult(kvs, vertex.serviceColumn.schemaVersion) } recoverWith { case ex: Throwable => Future.successful(Nil) } } Future.sequence(futures).map(_.flatten) } override def fetchVerticesAll()(implicit ec: ExecutionContext) = { import scala.collection.mutable val vertices = new ArrayBuffer[S2VertexLike]() ServiceColumn.findAll().groupBy(_.service.hTableName).toSeq.foreach { case (hTableName, columns) => val distinctColumns = columns.toSet val iter = vdb.newIterator() val buffer = mutable.ListBuffer.empty[SKeyValue] var oldVertexIdBytes = Array.empty[Byte] var minusPos = 0 try { iter.seekToFirst() while (iter.isValid) { val row = iter.key() if (!Bytes.equals(oldVertexIdBytes, 0, oldVertexIdBytes.length - minusPos, row, 0, row.length - 1)) { if (buffer.nonEmpty) serDe.vertexDeserializer(schemaVer = HBaseType.DEFAULT_VERSION).fromKeyValues(buffer, None) .filter(v => distinctColumns(v.serviceColumn)) .foreach { vertex => vertices += vertex } oldVertexIdBytes = row minusPos = 1 buffer.clear() } val kv = SKeyValue(table, iter.key(), SKeyValue.VertexCf, qualifier, iter.value(), System.currentTimeMillis()) buffer += kv iter.next() } if (buffer.nonEmpty) serDe.vertexDeserializer(schemaVer = HBaseType.DEFAULT_VERSION).fromKeyValues(buffer, None) .filter(v => distinctColumns(v.serviceColumn)) .foreach { vertex => vertices += vertex } } finally { iter.close() } } Future.successful(vertices) } }
Example 60
Source File: BytesUtilV1.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.counter.core.v1 import org.apache.hadoop.hbase.util.Bytes import org.apache.s2graph.counter.core.TimedQualifier.IntervalUnit import org.apache.s2graph.counter.core.{TimedQualifier, ExactQualifier, ExactKeyTrait, BytesUtil} import org.apache.s2graph.counter.models.Counter.ItemType import org.apache.s2graph.counter.util.Hashes import scala.collection.mutable.ArrayBuffer object BytesUtilV1 extends BytesUtil { // ExactKey: [hash(2b)][policy(4b)][item(variable)] val BUCKET_BYTE_SIZE = Bytes.SIZEOF_SHORT val POLICY_ID_SIZE = Bytes.SIZEOF_INT val INTERVAL_SIZE = Bytes.SIZEOF_BYTE val TIMESTAMP_SIZE = Bytes.SIZEOF_LONG val TIMED_QUALIFIER_SIZE = INTERVAL_SIZE + TIMESTAMP_SIZE override def getRowKeyPrefix(id: Int): Array[Byte] = { Bytes.toBytes(id) } override def toBytes(key: ExactKeyTrait): Array[Byte] = { val buff = new ArrayBuffer[Byte] // hash key (2 byte) buff ++= Bytes.toBytes(Hashes.murmur3(key.itemKey)).take(BUCKET_BYTE_SIZE) buff ++= getRowKeyPrefix(key.policyId) buff ++= { key.itemType match { case ItemType.INT => Bytes.toBytes(key.itemKey.toInt) case ItemType.LONG => Bytes.toBytes(key.itemKey.toLong) case ItemType.STRING | ItemType.BLOB => Bytes.toBytes(key.itemKey) } } buff.toArray } override def toBytes(eq: ExactQualifier): Array[Byte] = { toBytes(eq.tq) ++ eq.dimension.getBytes } override def toBytes(tq: TimedQualifier): Array[Byte] = { Bytes.toBytes(tq.q.toString) ++ Bytes.toBytes(tq.ts) } override def toExactQualifier(bytes: Array[Byte]): ExactQualifier = { // qualifier: interval, ts, dimension 순서 val tq = toTimedQualifier(bytes) val dimension = Bytes.toString(bytes, TIMED_QUALIFIER_SIZE, bytes.length - TIMED_QUALIFIER_SIZE) ExactQualifier(tq, dimension) } override def toTimedQualifier(bytes: Array[Byte]): TimedQualifier = { val interval = Bytes.toString(bytes, 0, INTERVAL_SIZE) val ts = Bytes.toLong(bytes, INTERVAL_SIZE) TimedQualifier(IntervalUnit.withName(interval), ts) } }
Example 61
Source File: BytesUtilV2.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.counter.core.v2 import org.apache.hadoop.hbase.util._ import org.apache.s2graph.counter import org.apache.s2graph.counter.core.TimedQualifier.IntervalUnit import org.apache.s2graph.counter.core.{TimedQualifier, ExactQualifier, ExactKeyTrait, BytesUtil} import org.apache.s2graph.counter.models.Counter.ItemType import org.apache.s2graph.counter.util.Hashes import scala.collection.mutable.ArrayBuffer object BytesUtilV2 extends BytesUtil { // ExactKey: [hash(1b)][version(1b)][policy(4b)][item(variable)] val BUCKET_BYTE_SIZE = Bytes.SIZEOF_BYTE val VERSION_BYTE_SIZE = Bytes.SIZEOF_BYTE val POLICY_ID_SIZE = Bytes.SIZEOF_INT val INTERVAL_SIZE = Bytes.SIZEOF_BYTE val TIMESTAMP_SIZE = Bytes.SIZEOF_LONG val TIMED_QUALIFIER_SIZE = INTERVAL_SIZE + TIMESTAMP_SIZE override def getRowKeyPrefix(id: Int): Array[Byte] = { Array(counter.VERSION_2) ++ Bytes.toBytes(id) } override def toBytes(key: ExactKeyTrait): Array[Byte] = { val buff = new ArrayBuffer[Byte] // hash byte buff ++= Bytes.toBytes(Hashes.murmur3(key.itemKey)).take(BUCKET_BYTE_SIZE) // row key prefix // version + policy id buff ++= getRowKeyPrefix(key.policyId) buff ++= { key.itemType match { case ItemType.INT => Bytes.toBytes(key.itemKey.toInt) case ItemType.LONG => Bytes.toBytes(key.itemKey.toLong) case ItemType.STRING | ItemType.BLOB => Bytes.toBytes(key.itemKey) } } buff.toArray } override def toBytes(eq: ExactQualifier): Array[Byte] = { val len = eq.dimKeyValues.map { case (k, v) => k.length + 2 + v.length + 2 }.sum val pbr = new SimplePositionedMutableByteRange(len) for { v <- ExactQualifier.makeSortedDimension(eq.dimKeyValues) } { OrderedBytes.encodeString(pbr, v, Order.ASCENDING) } toBytes(eq.tq) ++ pbr.getBytes } override def toBytes(tq: TimedQualifier): Array[Byte] = { val pbr = new SimplePositionedMutableByteRange(INTERVAL_SIZE + 2 + TIMESTAMP_SIZE + 1) OrderedBytes.encodeString(pbr, tq.q.toString, Order.ASCENDING) OrderedBytes.encodeInt64(pbr, tq.ts, Order.DESCENDING) pbr.getBytes } private def decodeString(pbr: PositionedByteRange): Stream[String] = { if (pbr.getRemaining > 0) { Stream.cons(OrderedBytes.decodeString(pbr), decodeString(pbr)) } else { Stream.empty } } override def toExactQualifier(bytes: Array[Byte]): ExactQualifier = { val pbr = new SimplePositionedByteRange(bytes) ExactQualifier(toTimedQualifier(pbr), { val seqStr = decodeString(pbr).toSeq val (keys, values) = seqStr.splitAt(seqStr.length / 2) keys.zip(values).toMap }) } override def toTimedQualifier(bytes: Array[Byte]): TimedQualifier = { val pbr = new SimplePositionedByteRange(bytes) toTimedQualifier(pbr) } def toTimedQualifier(pbr: PositionedByteRange): TimedQualifier = { TimedQualifier(IntervalUnit.withName(OrderedBytes.decodeString(pbr)), OrderedBytes.decodeInt64(pbr)) } }
Example 62
Source File: AccountStorage.scala From matcher with MIT License | 5 votes |
package com.wavesplatform.dex.db import java.io.{File, FileInputStream, FileOutputStream} import java.nio.file.Files import java.util.Base64 import cats.syntax.either._ import com.google.common.primitives.{Bytes, Ints} import com.wavesplatform.dex.crypto.Enigma import com.wavesplatform.dex.db.AccountStorage.Settings.EncryptedFile import com.wavesplatform.dex.domain.account.KeyPair import com.wavesplatform.dex.domain.bytes.ByteStr import com.wavesplatform.dex.domain.crypto import net.ceedubs.ficus.readers.ValueReader import scala.collection.mutable.ArrayBuffer case class AccountStorage(keyPair: KeyPair) object AccountStorage { sealed trait Settings object Settings { case class InMem(seed: ByteStr) extends Settings case class EncryptedFile(path: File, password: String) extends Settings implicit val valueReader: ValueReader[Settings] = ValueReader.relative[Settings] { config => config.getString("type") match { case "in-mem" => InMem(Base64.getDecoder.decode(config.getString("in-mem.seed-in-base64"))) case "encrypted-file" => EncryptedFile( path = new File(config.getString("encrypted-file.path")), password = config.getString("encrypted-file.password") ) case x => throw new IllegalArgumentException(s"The type of account storage '$x' is unknown. Please update your settings.") } } } def load(settings: Settings): Either[String, AccountStorage] = settings match { case Settings.InMem(seed) => Right(AccountStorage(KeyPair(seed))) case Settings.EncryptedFile(file, password) => if (file.isFile) { val encryptedSeedBytes = readFile(file) val key = Enigma.prepareDefaultKey(password) val decryptedBytes = Enigma.decrypt(key, encryptedSeedBytes) AccountStorage(KeyPair(decryptedBytes)).asRight } else s"A file '${file.getAbsolutePath}' doesn't exist".asLeft } def save(seed: ByteStr, to: EncryptedFile): Unit = { Files.createDirectories(to.path.getParentFile.toPath) val key = Enigma.prepareDefaultKey(to.password) val encryptedSeedBytes = Enigma.encrypt(key, seed.arr) writeFile(to.path, encryptedSeedBytes) } def getAccountSeed(baseSeed: ByteStr, nonce: Int): ByteStr = ByteStr(crypto.secureHash(Bytes.concat(Ints.toByteArray(nonce), baseSeed))) def readFile(file: File): Array[Byte] = { val reader = new FileInputStream(file) try { val buff = new Array[Byte](1024) val r = new ArrayBuffer[Byte] while (reader.available() > 0) { val read = reader.read(buff) if (read > 0) { r.appendAll(buff.iterator.take(read)) } } r.toArray } finally { reader.close() } } def writeFile(file: File, bytes: Array[Byte]): Unit = { val writer = new FileOutputStream(file, false) try writer.write(bytes) finally writer.close() } }
Example 63
Source File: WordSpliter.scala From piflow with BSD 2-Clause "Simplified" License | 5 votes |
package cn.piflow.bundle.nlp import cn.piflow._ import cn.piflow.conf._ import cn.piflow.conf.bean.PropertyDescriptor import cn.piflow.conf.util.{ImageUtil, MapUtil} import com.huaban.analysis.jieba.JiebaSegmenter.SegMode import com.huaban.analysis.jieba._ import org.apache.spark.rdd.RDD import org.apache.spark.sql.types.{StringType, StructField, StructType} import org.apache.spark.sql.{DataFrame, Row, SparkSession} import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer class WordSpliter extends ConfigurableStop { val authorEmail: String = "[email protected]" val description: String = "Word segmentation" val inportList: List[String] = List(Port.AnyPort.toString) val outportList: List[String] = List(Port.DefaultPort.toString) var path:String = _ val jiebaSegmenter = new JiebaSegmenter() var tokenARR:ArrayBuffer[String]=ArrayBuffer() def segmenter(str:String): Unit ={ var strVar = str //delete symbol strVar = strVar.replaceAll( "[\\p{P}+~$`^=|<>~`$^+=|<>¥×+\\s]" , ""); val tokens = jiebaSegmenter.process(strVar,SegMode.SEARCH).asScala for (token: SegToken <- tokens){ tokenARR += token.word } } def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = { val session: SparkSession = pec.get[SparkSession]() //read val strDF = session.read.text(path) //segmenter segmenter(strDF.head().getString(0)) //write df val rows: List[Row] = tokenARR.map(each => { var arr:Array[String]=Array(each) val row: Row = Row.fromSeq(arr) row }).toList val rowRDD: RDD[Row] = session.sparkContext.makeRDD(rows) val schema: StructType = StructType(Array( StructField("words",StringType) )) val df: DataFrame = session.createDataFrame(rowRDD,schema) out.write(df) } def initialize(ctx: ProcessContext): Unit = { } def setProperties(map : Map[String, Any]) = { path = MapUtil.get(map,"path").asInstanceOf[String] } override def getPropertyDescriptor(): List[PropertyDescriptor] = { var descriptor : List[PropertyDescriptor] = List() val path = new PropertyDescriptor().name("path").displayName("path").description("The path of text file").defaultValue("").required(true) descriptor = path :: descriptor descriptor } override def getIcon(): Array[Byte] = { ImageUtil.getImage("icon/nlp/NLP.png") } override def getGroup(): List[String] = { List(StopGroup.Alg_NLPGroup.toString) } }
Example 64
Source File: JsonUtil.scala From piflow with BSD 2-Clause "Simplified" License | 5 votes |
package cn.piflow.bundle.util import org.apache.spark.sql.functions.explode import org.apache.spark.sql.{Column, DataFrame, SQLContext, SparkSession} import scala.collection.mutable.ArrayBuffer object JsonUtil extends Serializable{ // The tag you want to parse,If you want to open an array field,you have to write it like this:links_name(MasterField_ChildField) def ParserJsonDF(df:DataFrame,tag:String): DataFrame = { var openArrField:String="" var ArrSchame:String="" var tagARR: Array[String] = tag.split(",") var tagNew:String="" for(tt<-tagARR){ if(tt.indexOf("_")> -1){ //包含“.” val openField: Array[String] = tt.split("_") openArrField=openField(0) ArrSchame+=(openField(1)+",") }else{ tagNew+=(tt+",") } } tagNew+=openArrField ArrSchame=ArrSchame.substring(0,ArrSchame.length-1) tagARR = tagNew.split(",") var FinalDF:DataFrame=df //如果用户选择返回字段 var strings: Seq[Column] =tagNew.split(",").toSeq.map(p => new Column(p)) if(tag.length>0){ val df00 = FinalDF.select(strings : _*) FinalDF=df00 } //如果用户选择打开的数组字段,并给出schame if(openArrField.length>0&&ArrSchame.length>0){ val schames: Array[String] = ArrSchame.split(",") var selARR:ArrayBuffer[String]=ArrayBuffer()//分别取出已经打开的字段 //遍历数组,封装到column对象中 var coARR:ArrayBuffer[Column]=ArrayBuffer()//打开字段的select方法用 val sss = tagNew.split(",")//打开字段后todf方法用 var co: Column =null for(each<-tagARR){ if(each==openArrField){ co = explode(FinalDF(openArrField)) for(x<-schames){ selARR+=(openArrField+"."+x) } }else{ selARR+=each co=FinalDF(each) } coARR+=co } println("###################") selARR.foreach(println(_)) var selSEQ: Seq[Column] = selARR.toSeq.map(q => new Column(q)) var df01: DataFrame = FinalDF.select(coARR : _*).toDF(sss:_*) FinalDF = df01.select(selSEQ : _*) } FinalDF } }
Example 65
Source File: BufferListener.scala From Binding.scala with MIT License | 5 votes |
package com.thoughtworks.binding import Binding.{PatchedEvent, ChangedEvent, PatchedListener, ChangedListener} import com.thoughtworks.binding.Binding.{PatchedEvent, ChangedEvent, PatchedListener, ChangedListener} import scala.collection.mutable.ArrayBuffer final class BufferListener extends ArrayBuffer[Any] { val listener = new ChangedListener[Seq[Any]] with PatchedListener[Any] { override def changed(event: ChangedEvent[Seq[Any]]): Unit = { BufferListener.this += event } override def patched(event: PatchedEvent[Any]): Unit = { BufferListener.this += event } } }
Example 66
Source File: FlatMapRemove.scala From Binding.scala with MIT License | 5 votes |
package com.thoughtworks.binding.regression import com.thoughtworks.binding.Binding._ import com.thoughtworks.binding._ import org.scalatest.freespec.AnyFreeSpec import org.scalatest.matchers.should.Matchers import scala.collection.mutable.ArrayBuffer final class FlatMapRemove extends AnyFreeSpec with Matchers { "removed source of a flatMap" in { val data = Vars.empty[Either[String, String]] val left = for { s <- data if s.isLeft } yield s val events = ArrayBuffer.empty[String] val autoPrint = Binding { if (left.length.bind > 0) { events += "has left" } else { events += "does not has left" } } assert(events.forall(_ == "does not has left")) autoPrint.watch() assert(events.forall(_ == "does not has left")) data.value += Right("1") assert(events.forall(_ == "does not has left")) data.value += Right("2") assert(events.forall(_ == "does not has left")) data.value += Right("3") assert(events.forall(_ == "does not has left")) data.value(1) = Left("left 2") assert(events.last == "has left") data.value --= Seq(Left("left 2")) assert(events.last == "does not has left") } }
Example 67
Source File: InsertThenClear.scala From Binding.scala with MIT License | 5 votes |
package com.thoughtworks.binding.regression import com.thoughtworks.binding.Binding._ import com.thoughtworks.binding._ import org.scalatest.freespec.AnyFreeSpec import org.scalatest.matchers.should.Matchers import scala.collection.mutable.ArrayBuffer final class InsertThenClear extends AnyFreeSpec with Matchers { "insert then clear" in { val items = Vars(1 to 10: _*) val mapped = items.map(-_) mapped.watch() assert(mapped.get sameElements Seq(-1, -2, -3, -4, -5, -6, -7, -8, -9, -10)) items.value.insertAll(3, 100 to 103) assert(mapped.get sameElements Seq(-1, -2, -3, -100, -101, -102, -103, -4, -5, -6, -7, -8, -9, -10)) items.value.clear() assert(mapped.get sameElements Seq.empty) } }
Example 68
Source File: ProxyMessageHandler.scala From spark-riak-connector with Apache License 2.0 | 5 votes |
package com.basho.riak.stub import java.net.InetSocketAddress import java.nio.ByteBuffer import java.nio.channels._ import com.basho.riak.client.core.RiakMessage import com.basho.riak.client.core.util.HostAndPort import shaded.com.basho.riak.protobuf.RiakKvPB import shaded.com.basho.riak.protobuf.RiakMessageCodes._ import shaded.com.google.protobuf.ByteString import scala.collection.JavaConversions._ import scala.collection.mutable.ArrayBuffer class ProxyMessageHandler(hostAndPort: HostAndPort) extends RiakMessageHandler { private final val riakAddress = new InetSocketAddress(hostAndPort.getHost, hostAndPort.getPort) override def handle(context: ClientHandler.Context, input: RiakMessage): Iterable[RiakMessage] = input.getCode match { // coverage plan received from real Riak node must be modified to replace real node's host and port with proxy case MSG_CoverageReq => forwardAndTransform(context, input) { output => val resp = RiakKvPB.RpbCoverageResp.parseFrom(output.getData) val modified = RiakKvPB.RpbCoverageResp.newBuilder(resp) .clearEntries() .addAllEntries(resp.getEntriesList.map { ce => val ceBuilder = RiakKvPB.RpbCoverageEntry.newBuilder(ce) if (ce.getIp.toStringUtf8 == hostAndPort.getHost && ce.getPort == hostAndPort.getPort) { val localAddress = context.channel.asInstanceOf[NetworkChannel] .getLocalAddress.asInstanceOf[InetSocketAddress] ceBuilder.setIp(ByteString.copyFromUtf8(localAddress.getHostString)) ceBuilder.setPort(localAddress.getPort) } ceBuilder.build() }).build() new RiakMessage(output.getCode, modified.toByteArray) } case _ => forwardMessage(context, input) } private def forwardMessage(context: ClientHandler.Context, input: RiakMessage): Iterable[RiakMessage] = { def readRiakResponse(channel: SocketChannel, out: List[RiakMessage] = Nil): Iterable[RiakMessage] = out match { case _ if !isDoneReceived(out, input) => readRiakResponse(channel, out ++ readSocket(channel)) case _ => out } val channel = SocketChannel.open(riakAddress) try { // forward request to real Riak node assert(channel.write(RiakMessageEncoder.encode(input)) > 0) // read response for forwarded request from real Riak node readRiakResponse(channel) } finally { channel.close() } } private def readSocket(channel: SocketChannel): Iterable[RiakMessage] = { var accumulator = ByteBuffer.allocateDirect(0) var out = ArrayBuffer[RiakMessage]() while (out.isEmpty || accumulator.hasRemaining) { // try to parse riak message from bytes in accumulator buffer RiakMessageEncoder.decode(accumulator) match { case Some(x) => accumulator = accumulator.slice() out += x case None => // read next chunk of data from channel and add it into accumulator val in = ByteBuffer.allocateDirect(1024) // scalastyle:ignore channel.read(in) accumulator = ByteBuffer .allocate(accumulator.rewind().limit() + in.flip().limit()) .put(accumulator) .put(in) accumulator.rewind() in.clear() } } out } private def isDoneReceived(out: Iterable[RiakMessage], input: RiakMessage): Boolean = input.getCode match { case MSG_IndexReq => out.foldLeft[Boolean](false)((a, m) => a || RiakKvPB.RpbIndexResp.parseFrom(m.getData).getDone) case _ => out.nonEmpty } private def forwardAndTransform(context: ClientHandler.Context, input: RiakMessage )(transform: RiakMessage => RiakMessage ): Iterable[RiakMessage] = forwardMessage(context, input).map(transform(_)) override def onRespond(input: RiakMessage, output: Iterable[RiakMessage]): Unit = {} }
Example 69
Source File: QueryBucketKeys.scala From spark-riak-connector with Apache License 2.0 | 5 votes |
package com.basho.riak.spark.query import com.basho.riak.client.core.query.Location import com.basho.riak.spark.rdd.connector.RiakConnector import com.basho.riak.spark.rdd.{BucketDef, ReadConf} import scala.collection.mutable.ArrayBuffer private case class QueryBucketKeys(bucket: BucketDef, readConf:ReadConf, riakConnector: RiakConnector, keys: Iterable[String] ) extends QuerySubsetOfKeys[String] { override def locationsByKeys(keys: Iterator[String]): (Boolean, Iterable[Location]) = { val dataBuffer = new ArrayBuffer[Location](readConf.fetchSize) val ns = bucket.asNamespace() keys.forall(k =>{ dataBuffer += new Location(ns, k) dataBuffer.size < readConf.fetchSize} ) false -> dataBuffer } }
Example 70
Source File: Query2iKeys.scala From spark-riak-connector with Apache License 2.0 | 5 votes |
package com.basho.riak.spark.query import com.basho.riak.client.core.operations.CoveragePlanOperation.Response.CoverageEntry import com.basho.riak.client.core.query.Location import com.basho.riak.spark.rdd.connector.RiakConnector import com.basho.riak.spark.rdd.{BucketDef, ReadConf} import scala.collection.mutable.ArrayBuffer private case class Query2iKeys[K](bucket: BucketDef, readConf:ReadConf, riakConnector: RiakConnector, index: String, keys: Iterable[K] ) extends QuerySubsetOfKeys[K] { private var query2iKey: Option[Query2iKeySingleOrRange[K]] = None private var tokenNext: Option[Either[String, CoverageEntry]] = None // By default there should be an empty Serializable Iterator private var _iterator: Iterator[Location] = ArrayBuffer.empty[Location].iterator private def chunkIsCollected(chunk: Iterable[Location]) = chunk.size >= readConf.fetchSize // scalastyle:off cyclomatic.complexity override def locationsByKeys(keys: Iterator[K]): (Boolean, Iterable[Location]) = { val dataBuffer = new ArrayBuffer[Location](readConf.fetchSize) while ((keys.hasNext || _iterator.hasNext || tokenNext.isDefined) && !chunkIsCollected(dataBuffer)){ // Previously gathered results should be returned at first, if any _iterator forall ( location => { dataBuffer += location !chunkIsCollected(dataBuffer) }) if(!chunkIsCollected(dataBuffer)) tokenNext match { case Some(next) => // Fetch the next results page from the previously executed 2i query, if any assert(query2iKey.isDefined) val r = query2iKey.get.nextLocationChunk(tokenNext) tokenNext = r._1 _iterator = r._2.iterator case None if keys.hasNext => // query data for the first/next key assert(_iterator.isEmpty && tokenNext.isEmpty) val key = keys.next() query2iKey = Some(new Query2iKeySingleOrRange[K](bucket, readConf, riakConnector, index, key)) val r = query2iKey.get.nextLocationChunk(tokenNext) tokenNext = r._1 _iterator = r._2.iterator case _ => // There is nothing to do } } tokenNext.isDefined -> dataBuffer } // scalastyle:on cyclomatic.complexity }
Example 71
Source File: Partitioner.scala From spark-solr with Apache License 2.0 | 5 votes |
package com.lucidworks.spark import java.net.InetAddress import com.lucidworks.spark.rdd.SolrRDD import com.lucidworks.spark.util.SolrSupport import org.apache.solr.client.solrj.SolrQuery import org.apache.spark.Partition import scala.collection.mutable.ArrayBuffer // Is there a need to override {@code Partitioner.scala} and define our own partition id's object SolrPartitioner { def getShardPartitions(shards: List[SolrShard], query: SolrQuery) : Array[Partition] = { shards.zipWithIndex.map{ case (shard, i) => // Chose any of the replicas as the active shard to query SelectSolrRDDPartition(i, "*", shard, query, SolrRDD.randomReplica(shard))}.toArray } def getSplitPartitions( shards: List[SolrShard], query: SolrQuery, splitFieldName: String, splitsPerShard: Int): Array[Partition] = { var splitPartitions = ArrayBuffer.empty[SelectSolrRDDPartition] var counter = 0 shards.foreach(shard => { val splits = SolrSupport.getShardSplits(query, shard, splitFieldName, splitsPerShard) splits.foreach(split => { splitPartitions += SelectSolrRDDPartition(counter, "*", shard, split.query, split.replica) counter = counter + 1 }) }) splitPartitions.toArray } // Workaround for SOLR-10490. TODO: Remove once fixed def getExportHandlerPartitions( shards: List[SolrShard], query: SolrQuery): Array[Partition] = { shards.zipWithIndex.map{ case (shard, i) => // Chose any of the replicas as the active shard to query ExportHandlerPartition(i, shard, query, SolrRDD.randomReplica(shard), 0, 0)}.toArray } // Workaround for SOLR-10490. TODO: Remove once fixed def getExportHandlerPartitions( shards: List[SolrShard], query: SolrQuery, splitFieldName: String, splitsPerShard: Int): Array[Partition] = { val splitPartitions = ArrayBuffer.empty[ExportHandlerPartition] var counter = 0 shards.foreach(shard => { // Form a continuous iterator list so that we can pick different replicas for different partitions in round-robin mode val splits = SolrSupport.getExportHandlerSplits(query, shard, splitFieldName, splitsPerShard) splits.foreach(split => { splitPartitions += ExportHandlerPartition(counter, shard, split.query, split.replica, split.numWorkers, split.workerId) counter = counter+1 }) }) splitPartitions.toArray } } case class SolrShard(shardName: String, replicas: List[SolrReplica]) case class SolrReplica( replicaNumber: Int, replicaName: String, replicaUrl: String, replicaHostName: String, locations: Array[InetAddress]) { def getHostAndPort(): String = {replicaHostName.substring(0, replicaHostName.indexOf('_'))} override def toString(): String = { return s"SolrReplica(${replicaNumber}) ${replicaName}: url=${replicaUrl}, hostName=${replicaHostName}, locations="+locations.mkString(",") } }
Example 72
Source File: GranularBigVector.scala From glint with MIT License | 5 votes |
package glint.models.client.granular import scala.collection.mutable.ArrayBuffer import scala.concurrent.{ExecutionContext, Future} import scala.reflect.ClassTag import glint.models.client.BigVector override def push(keys: Array[Long], values: Array[V]) (implicit ec: ExecutionContext): Future[Boolean] = { var i = 0 val ab = new ArrayBuffer[Future[Boolean]](keys.length / maximumMessageSize) while (i < keys.length) { val end = Math.min(keys.length, i + maximumMessageSize) val future = underlying.push(keys.slice(i, end), values.slice(i, end)) ab.append(future) i += maximumMessageSize } Future.sequence(ab.toIterator).transform(x => x.forall(y => y), err => err) } }
Example 73
Source File: GranularBigMatrix.scala From glint with MIT License | 5 votes |
package glint.models.client.granular import scala.collection.mutable.ArrayBuffer import scala.concurrent.{ExecutionContext, Future} import scala.reflect.ClassTag import breeze.linalg.Vector import glint.models.client.BigMatrix override def pull(rows: Array[Long], cols: Array[Int])(implicit ec: ExecutionContext): Future[Array[V]] = { if (rows.length <= maximumMessageSize) { underlying.pull(rows, cols) } else { var i = 0 val ab = new ArrayBuffer[Future[Array[V]]](rows.length / maximumMessageSize) while (i < rows.length) { val end = Math.min(rows.length, i + maximumMessageSize) val future = underlying.pull(rows.slice(i, end), cols.slice(i, end)) ab.append(future) i += maximumMessageSize } Future.sequence(ab.toIterator).map { case arrayOfValues => val finalValues = new ArrayBuffer[V](rows.length) arrayOfValues.foreach(x => finalValues.appendAll(x)) finalValues.toArray } } } }
Example 74
Source File: HiveQLProcessBuilder.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.enginemanager.hive.process import java.nio.file.Paths import com.webank.wedatasphere.linkis.common.conf.Configuration import com.webank.wedatasphere.linkis.enginemanager.conf.EnvConfiguration.{DEFAULT_JAVA_OPTS, JAVA_HOME, engineGCLogPath} import com.webank.wedatasphere.linkis.enginemanager.hive.conf.HiveEngineConfiguration import com.webank.wedatasphere.linkis.enginemanager.impl.UserEngineResource import com.webank.wedatasphere.linkis.enginemanager.process.JavaProcessEngineBuilder import com.webank.wedatasphere.linkis.enginemanager.{AbstractEngineCreator, EngineResource} import com.webank.wedatasphere.linkis.protocol.engine.RequestEngine import org.apache.commons.lang.StringUtils import org.slf4j.LoggerFactory import scala.collection.mutable.ArrayBuffer override protected def classpathCheck(jarOrFiles: Array[String]): Unit = { for(jarOrFile <- jarOrFiles){ checkJarOrFile(jarOrFile) } } //todo Check the jar of the classpath(对classpath的jar进行检查) private def checkJarOrFile(jarOrFile:String):Unit = { } override def build(engineRequest: EngineResource, request: RequestEngine): Unit = { this.request = request userEngineResource = engineRequest.asInstanceOf[UserEngineResource] val javaHome = JAVA_HOME.getValue(request.properties) if(StringUtils.isEmpty(javaHome)) { warn("We cannot find the java home, use java to run storage repl web server.") commandLine += "java" } else { commandLine += Paths.get(javaHome, "bin/java").toAbsolutePath.toFile.getAbsolutePath } if (request.properties.containsKey(HiveEngineConfiguration.HIVE_CLIENT_MEMORY.key)){ val settingClientMemory = request.properties.get(HiveEngineConfiguration.HIVE_CLIENT_MEMORY.key) if (!settingClientMemory.toLowerCase().endsWith("g")){ request.properties.put(HiveEngineConfiguration.HIVE_CLIENT_MEMORY.key, settingClientMemory + "g") } //request.properties.put(HiveEngineConfiguration.HIVE_CLIENT_MEMORY.key, request.properties.get(HiveEngineConfiguration.HIVE_CLIENT_MEMORY.key)+"g") } val clientMemory = HiveEngineConfiguration.HIVE_CLIENT_MEMORY.getValue(request.properties).toString if (clientMemory.toLowerCase().endsWith("g")){ commandLine += ("-Xmx" + clientMemory.toLowerCase()) commandLine += ("-Xms" + clientMemory.toLowerCase()) }else{ commandLine += ("-Xmx" + clientMemory + "g") commandLine += ("-Xms" + clientMemory + "g") } val javaOPTS = getExtractJavaOpts val alias = getAlias(request) if(StringUtils.isNotEmpty(DEFAULT_JAVA_OPTS.getValue)) DEFAULT_JAVA_OPTS.getValue.format(engineGCLogPath(port, userEngineResource.getUser, alias)).split("\\s+").foreach(commandLine += _) if(StringUtils.isNotEmpty(javaOPTS)) javaOPTS.split("\\s+").foreach(commandLine += _) //engineLogJavaOpts(port, alias).trim.split(" ").foreach(commandLine += _) if(Configuration.IS_TEST_MODE.getValue) { val port = AbstractEngineCreator.getNewPort info(s"$toString open debug mode with port $port.") commandLine += s"-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$port" } var classpath = getClasspath(request.properties, getExtractClasspath) classpath = classpath ++ request.properties.get("jars").split(",") classpathCheck(classpath) commandLine += "-Djava.library.path=/appcom/Install/hadoop/lib/native" commandLine += "-cp" commandLine += classpath.mkString(":") commandLine += "com.webank.wedatasphere.linkis.engine.DataWorkCloudEngineApplication" } // override def build(engineRequest: EngineResource, request: RequestEngine): Unit = { // import scala.collection.JavaConversions._ // request.properties foreach {case (k, v) => LOG.info(s"request key is $k, value is $v")} // this.request = request // super.build(engineRequest, request) // // } override protected val addApacheConfigPath: Boolean = true }
Example 75
Source File: JDBCSQLCodeParser.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.entrance.executer import com.webank.wedatasphere.linkis.entrance.conf.JDBCConfiguration import org.apache.commons.lang.StringUtils import scala.collection.mutable.ArrayBuffer object JDBCSQLCodeParser { val separator = ";" val defaultLimit: Int = JDBCConfiguration.ENGINE_DEFAULT_LIMIT.getValue def parse(code: String): Array[String] = { val codeBuffer = new ArrayBuffer[String]() def appendStatement(sqlStatement: String): Unit = { codeBuffer.append(sqlStatement) } if (StringUtils.contains(code, separator)) { StringUtils.split(code, ";").foreach { case s if StringUtils.isBlank(s) => case s if isSelectCmdNoLimit(s) => appendStatement(s + " limit " + defaultLimit); case s => appendStatement(s); } } else { code match { case s if StringUtils.isBlank(s) => case s if isSelectCmdNoLimit(s) => appendStatement(s + " limit " + defaultLimit); case s => appendStatement(s); } } codeBuffer.toArray } def isSelectCmdNoLimit(cmd: String): Boolean = { var code = cmd.trim if (!cmd.split("\\s+")(0).equalsIgnoreCase("select")) return false if (code.contains("limit")) code = code.substring(code.lastIndexOf("limit")).trim else if (code.contains("LIMIT")) code = code.substring(code.lastIndexOf("LIMIT")).trim.toLowerCase else return true val hasLimit = code.matches("limit\\s+\\d+\\s*;?") if (hasLimit) { if (code.indexOf(";") > 0) code = code.substring(5, code.length - 1).trim else code = code.substring(5).trim val limitNum = code.toInt if (limitNum > defaultLimit) throw new IllegalArgumentException("We at most allowed to limit " + defaultLimit + ", but your SQL has been over the max rows.") } !hasLimit } }
Example 76
Source File: PythonEngineExecutor.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.executors import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.engine.PythonSession import com.webank.wedatasphere.linkis.engine.exception.EngineException import com.webank.wedatasphere.linkis.engine.execute.{EngineExecutor, EngineExecutorContext} import com.webank.wedatasphere.linkis.engine.rs.RsOutputStream import com.webank.wedatasphere.linkis.protocol.engine.JobProgressInfo import com.webank.wedatasphere.linkis.resourcemanager.{LoadInstanceResource, Resource} import com.webank.wedatasphere.linkis.rpc.Sender import com.webank.wedatasphere.linkis.scheduler.executer._ import org.apache.commons.io.IOUtils import scala.collection.mutable.ArrayBuffer class PythonEngineExecutor(outputPrintLimit: Int) extends EngineExecutor(outputPrintLimit, false) with SingleTaskOperateSupport with SingleTaskInfoSupport with Logging { override def getName: String = Sender.getThisServiceInstance.getInstance private val lineOutputStream = new RsOutputStream private[executors] var engineExecutorContext: EngineExecutorContext = _ override def getActualUsedResources: Resource = { new LoadInstanceResource(Runtime.getRuntime.totalMemory() - Runtime.getRuntime.freeMemory(), 2, 1) } private val pySession = new PythonSession override protected def executeLine(engineExecutorContext: EngineExecutorContext, code: String): ExecuteResponse = { if(engineExecutorContext != this.engineExecutorContext){ this.engineExecutorContext = engineExecutorContext pySession.setEngineExecutorContext(engineExecutorContext) //lineOutputStream.reset(engineExecutorContext) info("Python executor reset new engineExecutorContext!") } engineExecutorContext.appendStdout(s"$getName >> ${code.trim}") pySession.execute(code) //lineOutputStream.flush() SuccessExecuteResponse() } override protected def executeCompletely(engineExecutorContext: EngineExecutorContext, code: String, completedLine: String): ExecuteResponse = { val newcode = completedLine + code info("newcode is " + newcode) executeLine(engineExecutorContext, newcode) } override def kill(): Boolean = true override def pause(): Boolean = true override def resume(): Boolean = true override def progress(): Float = { if (this.engineExecutorContext != null){ this.engineExecutorContext.getCurrentParagraph / this.engineExecutorContext.getTotalParagraph.asInstanceOf[Float] }else 0.0f } override def getProgressInfo: Array[JobProgressInfo] = { val jobProgressInfos = new ArrayBuffer[JobProgressInfo]() jobProgressInfos.toArray Array.empty } override def log(): String = "" override def close(): Unit = { IOUtils.closeQuietly(lineOutputStream) var isKill:Boolean = false try { pySession.close isKill = true; } catch { case e: Throwable => throw new EngineException(60004, "Engine shutdown exception(引擎关闭异常)") } } }
Example 77
Source File: SparkPostExecutionHook.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.extension import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.engine.execute.EngineExecutorContext import com.webank.wedatasphere.linkis.scheduler.executer.ExecuteResponse import scala.collection.mutable.ArrayBuffer trait SparkPostExecutionHook { def hookName:String def callPostExecutionHook(engineExecutorContext: EngineExecutorContext, executeResponse: ExecuteResponse, code: String): Unit } object SparkPostExecutionHook extends Logging{ private val postHooks = ArrayBuffer[SparkPostExecutionHook]() def register(postExecutionHook: SparkPostExecutionHook):Unit = { info(s"Get a postExecutionHook of ${postExecutionHook.hookName} register") postHooks.append(postExecutionHook) } def getSparkPostExecutionHooks():Array[SparkPostExecutionHook] = { postHooks.toArray } }
Example 78
Source File: SparkPreExecutionHook.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.extension import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.engine.execute.EngineExecutorContext import scala.collection.mutable.ArrayBuffer trait SparkPreExecutionHook { def hookName:String def callPreExecutionHook(engineExecutorContext: EngineExecutorContext, code: String): String } object SparkPreExecutionHook extends Logging{ private val preHooks = ArrayBuffer[SparkPreExecutionHook]() def register(preExecutionHook: SparkPreExecutionHook):Unit = { info(s"Get a preExecutionHook of ${preExecutionHook.hookName} register") preHooks.append(preExecutionHook) } def getSparkPreExecutionHooks():Array[SparkPreExecutionHook] = { preHooks.toArray } }
Example 79
Source File: SparkSqlExtension.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.extension import java.util.concurrent._ import com.webank.wedatasphere.linkis.common.conf.CommonVars import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils} import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.sql.{DataFrame, SQLContext} import scala.collection.mutable.ArrayBuffer import scala.concurrent.duration._ abstract class SparkSqlExtension extends Logging{ private val maxPoolSize = CommonVars("wds.linkis.dws.ujes.spark.extension.max.pool",5).getValue private val executor = new ThreadPoolExecutor(2, maxPoolSize, 2, TimeUnit.SECONDS, new LinkedBlockingQueue[Runnable](), new ThreadFactory { override def newThread(r: Runnable): Thread = { val thread = new Thread(r) thread.setDaemon(true) thread } }) final def afterExecutingSQL(sqlContext: SQLContext,command: String,dataFrame: DataFrame,timeout:Long,sqlStartTime:Long):Unit = { try { val thread = new Runnable { override def run(): Unit = extensionRule(sqlContext,command,dataFrame.queryExecution,sqlStartTime) } val future = executor.submit(thread) Utils.waitUntil(future.isDone,timeout milliseconds) } catch { case e: Throwable => info("Failed to execute SparkSqlExtension: ", e) } } protected def extensionRule(sqlContext: SQLContext,command: String,queryExecution: QueryExecution,sqlStartTime:Long):Unit } object SparkSqlExtension extends Logging { private val extensions = ArrayBuffer[SparkSqlExtension]() def register(sqlExtension: SparkSqlExtension):Unit = { info("Get a sqlExtension register") extensions.append(sqlExtension) } def getSparkSqlExtensions():Array[SparkSqlExtension] = { extensions.toArray } }
Example 80
Source File: CSTableParser.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.cs import java.util.regex.Pattern import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.cs.client.service.CSTableService import com.webank.wedatasphere.linkis.cs.common.entity.metadata.CSTable import com.webank.wedatasphere.linkis.cs.common.utils.CSCommonUtils import com.webank.wedatasphere.linkis.engine.exception.ExecuteError import com.webank.wedatasphere.linkis.engine.execute.EngineExecutorContext import org.apache.commons.lang.StringUtils import org.apache.spark.sql.SparkSession import org.apache.spark.sql.execution.datasources.csv.DolphinToSpark import scala.collection.mutable.ArrayBuffer def getCSTable(csTempTable:String, contextIDValueStr: String, nodeNameStr: String):CSTable = { CSTableService.getInstance().getUpstreamSuitableTable(contextIDValueStr, nodeNameStr, csTempTable) } def registerTempTable(csTable: CSTable):Unit = { val spark = SparkSession.builder().enableHiveSupport().getOrCreate() info(s"Start to create tempView to sparkSession viewName(${csTable.getName}) location(${csTable.getLocation})") DolphinToSpark.createTempView(spark, csTable.getName, csTable.getLocation, true) info(s"Finished to create tempView to sparkSession viewName(${csTable.getName}) location(${csTable.getLocation})") } }
Example 81
Source File: LogContainer.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.spark.common import scala.collection.Iterable import scala.collection.JavaConversions._ import scala.collection.mutable.ArrayBuffer class LogContainer(val logSize: Int) { private final val logs = new Array[String](logSize) private var flag, tail = 0 def putLog(log: String): Unit = { logs.synchronized { val index = (tail + 1) % logSize if(index == flag) { flag = (flag + 1) % logSize } logs(tail) = log tail = index } } def putLogs(logs: Iterable[String]) = synchronized { logs.foreach(putLog) } def reset() = synchronized { flag = 0 tail = 0 } def getLogs: List[String] = { logs.synchronized { if(flag == tail) { return List.empty[String] } val _logs = ArrayBuffer[String]() val _tail = if(flag > tail) tail + logSize else tail for (index <- flag until _tail) { val _index = index % logSize _logs += logs(_index) } flag = tail _logs.toList } } def size = { if(flag == tail) 0 else if(flag > tail) tail + logSize - flag else tail - flag } def getLogList: java.util.List[String] = getLogs }
Example 82
Source File: SparkConfiguration.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.enginemanager.configuration import com.webank.wedatasphere.linkis.common.conf.{CommonVars, Configuration} import com.webank.wedatasphere.linkis.common.utils.{ClassUtils, Logging} import com.webank.wedatasphere.linkis.engine.factory.SparkEngineExecutorFactory import com.webank.wedatasphere.linkis.enginemanager.AbstractEngineCreator import scala.collection.mutable.ArrayBuffer object SparkConfiguration extends Logging { val SPARK_MAX_PARALLELISM_USERS = CommonVars[Int]("wds.linkis.engine.spark.user.parallelism", 100) val SPARK_USER_MAX_WAITING_SIZE = CommonVars[Int]("wds.linkis.engine.spark.user.waiting.max", 100) val SPARK_SESSION_HOOK = CommonVars[String]("wds.linkis.engine.spark.session.hook", "") val SPARK_LANGUAGE_REPL_INIT_TIME = CommonVars[String]("wds.linkis.engine.spark.language-repl.init.time", new String("30s")) val SPARK_ALLOW_REQUEST_ALL_YARN_MEMORY = CommonVars[String]("wds.linkis.engine.spark.allow.all-memory.when.queue", new String("60g")) val SPARK_ALLOW_REQUEST_ALL_YARN_CORES = CommonVars[Int]("wds.linkis.engine.spark.allow.all-cores.when.queue", 30) val SPARK_USER_MAX_ALLOCATE_SESSIONS = CommonVars[Int]("wds.linkis.engine.spark.user.sessions.max", 5) val SPARK_USER_MAX_ALLOCATE_YARN_MEMORY = CommonVars[String]("wds.linkis.engine.spark.user.yarn.memory.max", new String("100g")) val SPARK_USER_MAX_ALLOCATE_YARN_CORES = CommonVars[Int]("wds.linkis.engine.spark.user.cores.max", 50) val SPARK_USER_MAX_ALLOCATE_DRIVER_MEMORY = CommonVars[String]("wds.linkis.engine.spark.user.driver.memory.max", new String("15g")) val SPARK_USER_MAX_ALLOCATE_DRIVER_CORES = SPARK_USER_MAX_ALLOCATE_SESSIONS val SPARK_USER_MAX_RESOURCE_IN_QUEUE = CommonVars[Float]("wds.linkis.engine.spark.user.queue.resources.max", 0.6f) val SPARK_DANGER_QUEUE_USED_CAPACITY = CommonVars[Float]("wds.linkis.engine.spark.danger.queue.used", 0.2f) val SPARK_DANGER_QUEUE_USER_ALLOCATE_SESSION = CommonVars[Int]("wds.linkis.engine.spark.danger.user.sessions.max", 2) val SPARK_WARN_QUEUE_USED_CAPACITY = CommonVars[Float]("wds.linkis.engine.spark.warning.queue.used", 0.5f) val SPARK_WARN_QUEUE_USER_ALLOCATE_SESSION = CommonVars[Int]("wds.linkis.engine.spark.warning.user.sessions.max", 3) val PROXY_USER = CommonVars[String]("spark.proxy.user", "${UM}") val SPARK_CLIENT_MODE = "client" val SPARK_CLUSTER_MODE = "cluster" val SPARK_DEPLOY_MODE = CommonVars[String]("spark.submit.deployMode", SPARK_CLIENT_MODE) val SPARK_APPLICATION_JARS = CommonVars[String]("spark.application.jars", "", "User-defined jars, separated by English, must be uploaded to HDFS first, and must be full path to HDFS.(用户自定义jar包,多个以英文,隔开,必须先上传到HDFS,且需为HDFS全路径。)") val SPARK_EXTRA_JARS = CommonVars[String]("spark.jars", "", "Additional jar package, Driver and Executor take effect(额外的jar包,Driver和Executor生效)") val MAPRED_OUTPUT_COMPRESS = CommonVars[String]("mapred.output.compress", "true", "Whether the map output is compressed(map输出结果是否压缩)") val MAPRED_OUTPUT_COMPRESSION_CODEC = CommonVars[String]("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec", "Map output compression method(map输出结果压缩方式)") val SPARK_MASTER = CommonVars[String]("spark.master", "yarn", "Default master(默认master)") val SPARK_OUTPUTDIR = CommonVars[String]("spark.outputDir", "/home/georgeqiao", "Default output path(默认输出路径)") val DWC_SPARK_USEHIVECONTEXT = CommonVars[Boolean]("wds.linkis.spark.useHiveContext", true) val ENGINE_JAR = CommonVars[String]("wds.linkis.enginemanager.core.jar", ClassUtils.jarOfClass(classOf[SparkEngineExecutorFactory]).head) val SPARK_DRIVER_CLASSPATH = CommonVars[String]("wds.linkis.spark.driver.conf.mainjar", "") val SPARK_DRIVER_EXTRA_JAVA_OPTIONS = CommonVars[String]("spark.driver.extraJavaOptions", "\"-Dwds.linkis.configuration=linkis-engine.properties " + getJavaRemotePort + "\"") val DEFAULT_JAVA_OPTS = CommonVars[String]("wds.linkis.engine.javaOpts.default", "-server -XX:+UseG1GC -XX:MaxPermSize=250m -XX:PermSize=128m " + "-Xloggc:%s -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Dwds.linkis.configuration=linkis-engine.properties") val SPARK_ML_BUCKET_FIELDS = CommonVars[String]("wds.linkis.engine.spark.ml.bucketFields", "age[0,18,30,60,100]") val SPARK_SUBMIT_CMD = CommonVars[String]("wds.linkis.engine.spark.submit.cmd", "spark-submit") private var Ports: ArrayBuffer[Int] = _ def getJavaRemotePort = { if (Configuration.IS_TEST_MODE.getValue) { val r = new scala.util.Random() val port = 1024 + r.nextInt((65536 - 1024) + 1) info(s"open debug mode with port $port.") s"-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=$port" } else { "" } } private def getAvailablePort: Int = synchronized { var port = AbstractEngineCreator.getNewPort info("Get new port " + port) if (Ports == null) { info("Get inInitPorts is null ") Ports = ArrayBuffer(0, 1) info("Current ports is " + Ports.toList.toString()) } while (Ports.contains(port)) { if (AbstractEngineCreator != null) { port = AbstractEngineCreator.getNewPort } } Ports += port port } }
Example 83
Source File: CSResourceParser.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.cs import java.util import java.util.regex.Pattern import com.webank.wedatasphere.linkis.cs.client.service.CSResourceService import com.webank.wedatasphere.linkis.engine.PropertiesExecuteRequest import org.apache.commons.lang.StringUtils import scala.collection.JavaConversions._ import scala.collection.mutable.ArrayBuffer class CSResourceParser { private val pb = Pattern.compile("cs://[^\\s\"]+[$\\s]{0,1}", Pattern.CASE_INSENSITIVE) private val PREFIX = "cs://" private def getPreFixResourceNames(code: String): Array[String] = { val bmlResourceNames = new ArrayBuffer[String]() val mb = pb.matcher(code) while (mb.find) bmlResourceNames.append(mb.group.trim) bmlResourceNames.toArray } def parse(executeRequest: PropertiesExecuteRequest, code: String, contextIDValueStr: String, nodeNameStr: String): String = { //TODO getBMLResource peaceWong val bmlResourceList = CSResourceService.getInstance().getUpstreamBMLResource(contextIDValueStr, nodeNameStr) val parsedResources = new util.ArrayList[util.Map[String, Object]]() val preFixResourceNames = getPreFixResourceNames(code) val preFixNames = new ArrayBuffer[String]() val parsedNames = new ArrayBuffer[String]() preFixResourceNames.foreach { preFixResourceName => val resourceName = preFixResourceName.replace(PREFIX, "").trim val bmlResourceOption = bmlResourceList.find(_.getDownloadedFileName.equals(resourceName)) if (bmlResourceOption.isDefined) { val bmlResource = bmlResourceOption.get val map = new util.HashMap[String, Object]() map.put("resourceId", bmlResource.getResourceId) map.put("version", bmlResource.getVersion) map.put("fileName", resourceName) parsedResources.add(map) preFixNames.append(preFixResourceName) parsedNames.append(resourceName) } } executeRequest.properties.put("resources", parsedResources) StringUtils.replaceEach(code, preFixNames.toArray, parsedNames.toArray) } }
Example 84
Source File: RsOutputStream.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.rs import java.io.OutputStream import com.webank.wedatasphere.linkis.common.io.resultset.ResultSetWriter import com.webank.wedatasphere.linkis.common.io.{MetaData, Record} import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.engine.execute.EngineExecutorContext import com.webank.wedatasphere.linkis.storage.LineRecord import scala.collection.mutable.ArrayBuffer class RsOutputStream extends OutputStream with Logging{ private val line = ArrayBuffer[Byte]() private var isReady = false private var writer: ResultSetWriter[_ <: MetaData, _ <: Record] = _ override def write(b: Int) = if(isReady) synchronized { if(writer != null) { if (b == '\n') { val outStr = new String(line.toArray,"UTF-8") writer.addRecord(new LineRecord(outStr)) //info("output line:" + outStr) line.clear() } else line += b.toByte }else{ warn("writer is null") } } def reset(engineExecutorContext: EngineExecutorContext) = { writer = engineExecutorContext.createDefaultResultSetWriter() writer.addMetaData(null) } def ready() = isReady = true override def flush(): Unit = if(writer != null && line.nonEmpty) { val outStr = new String(line.toArray,"UTF-8") writer.addRecord(new LineRecord(outStr)) //info("flush line:" + outStr) line.clear() } override def toString = if(writer != null) writer.toString() else null override def close() = if(writer != null) { flush() writer.close() writer = null } }
Example 85
Source File: CodeGeneratorEngineHook.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.execute.hook import java.io.File import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.engine.execute.{EngineExecutor, EngineHook} import com.webank.wedatasphere.linkis.scheduler.executer.{ExecuteRequest, RunTypeExecuteRequest} import com.webank.wedatasphere.linkis.server.JMap import org.apache.commons.io.FileUtils import org.apache.commons.lang.StringUtils import scala.collection.mutable.ArrayBuffer @Deprecated //changed to UdfLoadEngineHook abstract class CodeGeneratorEngineHook extends EngineHook with Logging{ self => val udfPathProp = "udf.paths" protected var creator: String = _ protected var user: String = _ protected var initSpecialCode: String = _ protected val runType: String protected def acceptCodeType(line: String): Boolean protected def generateCode(): Array[String] = { val codeBuffer = new ArrayBuffer[String] val statementBuffer = new ArrayBuffer[String] var accept = true initSpecialCode.split("\n").foreach{ case "" => case l if l.startsWith("%") => if(acceptCodeType(l)){ accept = true codeBuffer.append(statementBuffer.mkString("\n")) statementBuffer.clear() }else{ accept = false } case l if accept => statementBuffer.append(l) case _ => } if(statementBuffer.nonEmpty) codeBuffer.append(statementBuffer.mkString("\n")) codeBuffer.toArray } override def beforeCreateEngine(params: JMap[String, String]): JMap[String, String] = { creator = params.get("creator") user = params.get("user") initSpecialCode = StringUtils.split(params.get(udfPathProp), ",").map(readFile).mkString("\n") params } override def afterCreatedEngine(executor: EngineExecutor): Unit = { generateCode().foreach { case "" => case c: String => info("Submit udf registration to engine, code: " + c) executor.execute(new ExecuteRequest with RunTypeExecuteRequest{ override val code: String = c override val runType: String = self.runType }) info("executed code: " + c) } } protected def readFile(path: String): String = { info("read file: " + path) val file = new File(path) if(file.exists()){ FileUtils.readFileToString(file) } else { info("udf file: [" + path + "] doesn't exist, ignore it.") "" } } } @Deprecated class SqlCodeGeneratorEngineHook extends CodeGeneratorEngineHook{ override val runType = "sql" override protected def acceptCodeType(line: String): Boolean = { line.startsWith("%sql") } } @Deprecated class PythonCodeGeneratorEngineHook extends CodeGeneratorEngineHook{ override val runType = "python" override protected def acceptCodeType(line: String): Boolean = { line.startsWith("%python") } } @Deprecated class ScalaCodeGeneratorEngineHook extends CodeGeneratorEngineHook{ override val runType = "scala" override protected def acceptCodeType(line: String): Boolean = { line.startsWith("%scala") } }
Example 86
Source File: AbstractEngineCreator.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.enginemanager import java.net.ServerSocket import com.webank.wedatasphere.linkis.common.conf.DWCArgumentsParser import com.webank.wedatasphere.linkis.common.utils.Utils import com.webank.wedatasphere.linkis.enginemanager.conf.EngineManagerConfiguration import com.webank.wedatasphere.linkis.enginemanager.exception.EngineManagerErrorException import com.webank.wedatasphere.linkis.enginemanager.impl.UserTimeoutEngineResource import com.webank.wedatasphere.linkis.enginemanager.process.{CommonProcessEngine, ProcessEngine, ProcessEngineBuilder} import com.webank.wedatasphere.linkis.protocol.engine.{EngineCallback, RequestEngine} import com.webank.wedatasphere.linkis.rpc.Sender import com.webank.wedatasphere.linkis.server.{JMap, toScalaMap} import org.apache.commons.io.IOUtils import scala.collection.mutable.ArrayBuffer abstract class AbstractEngineCreator extends EngineCreator { private val inInitPorts = ArrayBuffer[Int]() private def getAvailablePort: Int = synchronized { var port = AbstractEngineCreator.getNewPort while(inInitPorts.contains(port)) port = AbstractEngineCreator.getNewPort inInitPorts += port port } def removePort(port: Int): Unit = inInitPorts -= port protected def createProcessEngineBuilder(): ProcessEngineBuilder protected def getExtractSpringConfigs(requestEngine: RequestEngine): JMap[String, String] = { val springConf = new JMap[String, String] requestEngine.properties.keysIterator.filter(_.startsWith("spring.")).foreach(key => springConf.put(key.substring(7), requestEngine.properties.get(key))) springConf } protected def createEngine(processEngineBuilder:ProcessEngineBuilder,parser:DWCArgumentsParser):ProcessEngine={ processEngineBuilder.getEngineResource match { case timeout: UserTimeoutEngineResource => new CommonProcessEngine(processEngineBuilder, parser, timeout.getTimeout) case _ => new CommonProcessEngine(processEngineBuilder, parser) } } override def create(ticketId: String, engineRequest: EngineResource, request: RequestEngine): Engine = { val port = getAvailablePort val processEngineBuilder = createProcessEngineBuilder() processEngineBuilder.setPort(port) processEngineBuilder.build(engineRequest, request) val parser = new DWCArgumentsParser var springConf = Map("spring.application.name" -> EngineManagerConfiguration.ENGINE_SPRING_APPLICATION_NAME.getValue, "server.port" -> port.toString, "spring.profiles.active" -> "engine", "logging.config" -> "classpath:log4j2-engine.xml", "eureka.client.serviceUrl.defaultZone" -> EngineManagerReceiver.getSpringConf("eureka.client.serviceUrl.defaultZone")) springConf = springConf ++: getExtractSpringConfigs(request).toMap parser.setSpringConf(springConf) var dwcConf = Map("ticketId" -> ticketId, "creator" -> request.creator, "user" -> request.user) ++: EngineCallback.callbackToMap(EngineCallback(Sender.getThisServiceInstance.getApplicationName, Sender.getThisServiceInstance.getInstance)) if(request.properties.exists{case (k, v) => k.contains(" ") || (v != null && v.contains(" "))}) throw new EngineManagerErrorException(30000, "Startup parameters contain spaces!(启动参数中包含空格!)") dwcConf = dwcConf ++: request.properties.toMap parser.setDWCConf(dwcConf) val engine = createEngine(processEngineBuilder,parser) engine.setTicketId(ticketId) engine.setPort(port) engine match { case commonEngine: CommonProcessEngine => commonEngine.setUser(request.user) case _ => } engine } } object AbstractEngineCreator { private[enginemanager] def getNewPort: Int = { val socket = new ServerSocket(0) Utils.tryFinally(socket.getLocalPort)(IOUtils.closeQuietly(socket)) } }
Example 87
Source File: ScalaDDLCreator.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.metadata.ddl import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.metadata.conf.MdqConfiguration import com.webank.wedatasphere.linkis.metadata.domain.mdq.bo.{MdqTableBO, MdqTableFieldsInfoBO} import com.webank.wedatasphere.linkis.metadata.exception.MdqIllegalParamException import org.apache.commons.lang.StringUtils import scala.collection.JavaConversions._ import scala.collection.mutable.ArrayBuffer object ScalaDDLCreator extends DDLCreator with SQLConst with Logging{ override def createDDL(tableInfo:MdqTableBO, user:String): String = { logger.info(s"begin to generate ddl for user $user using ScalaDDLCreator") val dbName = tableInfo.getTableBaseInfo.getBase.getDatabase val tableName = tableInfo.getTableBaseInfo.getBase.getName val fields = tableInfo.getTableFieldsInfo val createTableCode = new StringBuilder createTableCode.append(SPARK_SQL).append(LEFT_PARENTHESES).append(MARKS).append(CREATE_TABLE) createTableCode.append(dbName).append(".").append(tableName) createTableCode.append(LEFT_PARENTHESES) val partitions = new ArrayBuffer[MdqTableFieldsInfoBO]() val fieldsArray = new ArrayBuffer[String]() fields foreach { field => if (field.getPartitionField != null && field.getPartitionField == true) partitions += field else{ val name = field.getName val _type = field.getType val desc = field.getComment if (StringUtils.isNotEmpty(desc)){ fieldsArray += (name + SPACE + _type + SPACE + COMMENT + SPACE + SINGLE_MARK + desc + SINGLE_MARK) }else{ fieldsArray += (name + SPACE + _type) } } } createTableCode.append(fieldsArray.mkString(COMMA)).append(RIGHT_PARENTHESES).append(SPACE) if (partitions.nonEmpty){ val partitionArr = new ArrayBuffer[String]() partitions foreach { p => val name = p.getName val _type = p.getType if (StringUtils.isEmpty(name) || StringUtils.isEmpty(_type)) throw MdqIllegalParamException("partition name or type is null") partitionArr += (name + SPACE + _type) } createTableCode.append(PARTITIONED_BY).append(LEFT_PARENTHESES).append(partitionArr.mkString(COMMA)). append(RIGHT_PARENTHESES).append(SPACE) } //如果是分区表,但是没有分区字段,默认是用ds做分区 if(partitions.isEmpty && tableInfo.getTableBaseInfo.getBase.getPartitionTable){ val partition = MdqConfiguration.DEFAULT_PARTITION_NAME.getValue val _type = "string" createTableCode.append(PARTITIONED_BY).append(LEFT_PARENTHESES).append(partition).append(SPACE).append(_type). append(RIGHT_PARENTHESES).append(SPACE) } createTableCode.append(STORED_AS).append(SPACE).append(MdqConfiguration.DEFAULT_STORED_TYPE.getValue).append(SPACE) createTableCode.append(MARKS) createTableCode.append(RIGHT_PARENTHESES) val finalCode = createTableCode.toString() logger.info(s"End to create ddl code, code is $finalCode") finalCode } def main(args: Array[String]): Unit = { val filePath = "E:\\data\\json\\data.json" val json = scala.io.Source.fromFile(filePath).mkString println(json) // val obj = new Gson().fromJson(json, classOf[MdqTableVO]) //val sql = createDDL(obj, "hadoop") //println(System.currentTimeMillis()) //println(sql) } }
Example 88
Source File: RMEventConsumer.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.resourcemanager.schedule import java.util.concurrent.{ExecutorService, Future} import com.webank.wedatasphere.linkis.common.utils.Utils import com.webank.wedatasphere.linkis.resourcemanager.event.RMEvent import com.webank.wedatasphere.linkis.resourcemanager.event.metric.{MetricRMEvent, MetricRMEventExecutor} import com.webank.wedatasphere.linkis.resourcemanager.event.notify.{NotifyRMEvent, NotifyRMEventExecutor} import com.webank.wedatasphere.linkis.scheduler.SchedulerContext import com.webank.wedatasphere.linkis.scheduler.queue._ import scala.collection.mutable.ArrayBuffer class RMEventConsumer(schedulerContext: SchedulerContext, executeService: ExecutorService) extends Consumer(schedulerContext, executeService) { private var queue: ConsumeQueue = _ private var group: Group = _ private var maxRunningJobsNum = 1000 //Not put(暂未放) private val runningJobs = new Array[SchedulerEvent](maxRunningJobsNum) private val executorManager = schedulerContext.getOrCreateExecutorManager private var rmConsumerListener : RMConsumerListener = _ var future: Future[_] = _ def this(schedulerContext: SchedulerContext, executeService: ExecutorService, group: Group) = { this(schedulerContext, executeService) this.group = group maxRunningJobsNum = group.getMaximumCapacity } def start():Unit = future = executeService.submit(this) def setRmConsumerListener(rmConsumerListener: RMConsumerListener): Unit ={ this.rmConsumerListener = rmConsumerListener } override def setConsumeQueue(consumeQueue: ConsumeQueue) = { queue = consumeQueue } override def getConsumeQueue = queue override def getGroup = group override def setGroup(group: Group) = { this.group = group } override def getRunningEvents = getEvents(_.isRunning) private def getEvents(op: SchedulerEvent => Boolean): Array[SchedulerEvent] = { val result = ArrayBuffer[SchedulerEvent]() runningJobs.filter(_ != null).filter(x => op(x)).foreach(result += _) result.toArray } override def run() = { Thread.currentThread().setName(s"${toString}Thread") info(s"$toString thread started!") while (!terminate) { Utils.tryAndError(loop()) Utils.tryQuietly(Thread.sleep(10)) } info(s"$toString thread stopped!") } def loop(): Unit = { var event = queue.take() while (event.turnToScheduled() != true) { event = queue.take() } if(rmConsumerListener != null){rmConsumerListener.beforeEventExecute(this,event.asInstanceOf[RMEvent])} Utils.tryAndError({ val executor = executorManager.askExecutor(event) if (executor.isDefined) { event match { case x: MetricRMEvent =>{ Utils.tryQuietly(executor.get.asInstanceOf[MetricRMEventExecutor].execute(new EventJob(x))) } case y: NotifyRMEvent =>{ Utils.tryQuietly(executor.get.asInstanceOf[NotifyRMEventExecutor].execute(new EventJob(y))) } } } }) if(rmConsumerListener != null){rmConsumerListener.afterEventExecute(this,event.asInstanceOf[RMEvent])} } override def shutdown() = { future.cancel(true) super.shutdown() } }
Example 89
Source File: StorageScriptFsReader.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.storage.script.reader import java.io._ import com.webank.wedatasphere.linkis.common.io.{FsPath, MetaData, Record} import com.webank.wedatasphere.linkis.storage.script._ import com.webank.wedatasphere.linkis.storage.utils.StorageUtils import org.apache.commons.io.IOUtils import scala.collection.mutable.ArrayBuffer def isMetadata(line: String, prefix: String, prefixConf: String): Boolean = { val regex = ("\\s*" + prefix + "\\s*(.+)\\s*" + "=" + "\\s*(.+)\\s*").r line match { case regex(_, _) => true case _ => { val split: Array[String] = line.split("=") if (split.size != 2) return false if (split(0).split(" ").filter(_ != "").size != 4) return false if (!split(0).split(" ").filter(_ != "")(0).equals(prefixConf)) return false true } } } }
Example 90
Source File: ResultSetWriter.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.storage.resultset import com.webank.wedatasphere.linkis.common.io.resultset.{ResultSet, ResultSetWriter} import com.webank.wedatasphere.linkis.common.io.{FsPath, MetaData, Record} import scala.collection.mutable.ArrayBuffer object ResultSetWriter { def getResultSetWriter[K <: MetaData, V <: Record](resultSet: ResultSet[K,V], maxCacheSize: Long, storePath: FsPath):ResultSetWriter[K, V] = new StorageResultSetWriter[K, V](resultSet, maxCacheSize, storePath) def getResultSetWriter[K <: MetaData, V <: Record](resultSet: ResultSet[K,V], maxCacheSize: Long, storePath: FsPath, proxyUser:String):ResultSetWriter[K, V] ={ val writer = new StorageResultSetWriter[K, V](resultSet, maxCacheSize, storePath) writer.setProxyUser(proxyUser) writer } def getRecordByWriter(writer: ResultSetWriter[_ <:MetaData,_ <:Record],limit:Long): Array[Record] ={ val res = writer.toString getRecordByRes(res,limit) } def getRecordByRes(res: String,limit:Long): Array[Record] ={ val reader = ResultSetReader.getResultSetReader(res) var count = 0 val records = new ArrayBuffer[Record]() reader.getMetaData while (reader.hasNext && count < limit){ records += reader.getRecord count = count + 1 } records.toArray } def getLastRecordByRes(res: String):Record = { val reader = ResultSetReader.getResultSetReader(res) reader.getMetaData while (reader.hasNext ){ reader.getRecord } reader.getRecord } }
Example 91
Source File: StorageResultSetReader.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.storage.resultset import java.io.{ByteArrayInputStream, IOException, InputStream} import com.webank.wedatasphere.linkis.common.io.resultset.{ResultSet, ResultSetReader} import com.webank.wedatasphere.linkis.common.io.{MetaData, Record} import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.storage.domain.Dolphin import com.webank.wedatasphere.linkis.storage.exception.StorageWarnException import com.webank.wedatasphere.linkis.storage.utils.StorageUtils import scala.collection.mutable.ArrayBuffer def readLine(): Array[Byte] = { var rowLen = 0 try rowLen = Dolphin.readInt(inputStream) catch { case t:StorageWarnException => info(s"Read finished(读取完毕)") ; return null case t: Throwable => throw t } val rowBuffer = ArrayBuffer[Byte]() var len = 0 //Read the entire line, except for the data of the line length(读取整行,除了行长的数据) while (rowLen > 0 && len >= 0) { if (rowLen > READ_CACHE) len = StorageUtils.readBytes(inputStream,bytes, READ_CACHE) else len = StorageUtils.readBytes(inputStream,bytes, rowLen) if (len > 0) { rowLen -= len rowBuffer ++= bytes.slice(0, len) } } rowCount = rowCount + 1 rowBuffer.toArray } @scala.throws[IOException] override def getRecord: Record = { if (metaData == null) throw new IOException("Must read metadata first(必须先读取metadata)") if (row == null) throw new IOException("Can't get the value of the field, maybe the IO stream has been read or has been closed!(拿不到字段的值,也许IO流已读取完毕或已被关闭!)") row } @scala.throws[IOException] override def getMetaData: MetaData = { if(metaData == null) init() metaData = deserializer.createMetaData(readLine()) metaData } @scala.throws[IOException] override def skip(recordNum: Int): Int = { if(recordNum < 0 ) return -1 if(metaData == null) getMetaData for(i <- recordNum until (0, -1)){ try inputStream.skip(Dolphin.readInt(inputStream)) catch { case t: Throwable => return -1} } recordNum } @scala.throws[IOException] override def getPosition: Long = rowCount @scala.throws[IOException] override def hasNext: Boolean = { if(metaData == null) getMetaData val line = readLine() if(line == null) return false row = deserializer.createRecord(line) if(row == null) return false true } @scala.throws[IOException] override def available: Long = inputStream.available() override def close(): Unit = inputStream.close() }
Example 92
Source File: TableResultDeserializer.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.storage.resultset.table import com.webank.wedatasphere.linkis.common.io.resultset.ResultDeserializer import com.webank.wedatasphere.linkis.storage.domain.{Column, DataType, Dolphin} import com.webank.wedatasphere.linkis.storage.exception.StorageErrorException import scala.collection.mutable.ArrayBuffer override def createRecord(bytes: Array[Byte]): TableRecord = { val colByteLen = Dolphin.getString(bytes, 0, Dolphin.INT_LEN).toInt val colString = Dolphin.getString(bytes, Dolphin.INT_LEN, colByteLen) val colArray = if(colString.endsWith(Dolphin.COL_SPLIT)) colString.substring(0, colString.length -1).split(Dolphin.COL_SPLIT) else colString.split(Dolphin.COL_SPLIT) var index = Dolphin.INT_LEN + colByteLen val data = colArray.indices.map { i => val len = colArray(i).toInt val res = Dolphin.getString(bytes, index, len) index += len if(i >= metaData.columns.length) res else toValue(metaData.columns(i).dataType,res) }.toArray new TableRecord(data) } }
Example 93
Source File: RetryHandler.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.common.utils import com.webank.wedatasphere.linkis.common.exception.{DWCRetryException, FatalException} import org.apache.commons.lang.{ClassUtils => CommonClassUtils} import scala.collection.mutable.ArrayBuffer trait RetryHandler extends Logging { private var retryNum = 2 private var period = 100l private var maxPeriod = 1000l private val retryExceptions = ArrayBuffer[Class[_ <: Throwable]]() def setRetryNum(retryNum: Int): Unit = this.retryNum = retryNum def getRetryNum: Int = retryNum def setRetryPeriod(retryPeriod: Long): Unit = this.period = retryPeriod def getRetryPeriod: Long = period def setRetryMaxPeriod(retryMaxPeriod: Long): Unit = this.maxPeriod = retryMaxPeriod def getRetryMaxPeriod: Long = maxPeriod def addRetryException(t: Class[_ <: Throwable]): Unit = retryExceptions += t def getRetryExceptions = retryExceptions.toArray def exceptionCanRetry(t: Throwable): Boolean = !t.isInstanceOf[FatalException] && retryExceptions.exists(c => CommonClassUtils.isAssignable(t.getClass, c)) def nextInterval(attempt: Int): Long = { val interval = (this.period.toDouble * Math.pow(1.5D, (attempt - 1).toDouble)).toLong if (interval > this.maxPeriod) this.maxPeriod else interval } def retry[T](op: => T, retryName: String): T = { if(retryExceptions.isEmpty || retryNum <= 1) return op var retry = 0 var result = null.asInstanceOf[T] while(retry < retryNum && result == null) result = Utils.tryCatch(op) { t => retry += 1 if(retry >= retryNum) throw t else if(exceptionCanRetry(t)) { val retryInterval = nextInterval(retry) info(retryName + s" failed with ${t.getClass.getName}, wait ${ByteTimeUtils.msDurationToString(retryInterval)} for next retry. Retried $retry++ ...") Utils.tryQuietly(Thread.sleep(retryInterval)) null.asInstanceOf[T] } else throw t } result } }
Example 94
Source File: ShutdownUtils.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.common.utils import sun.misc.{Signal, SignalHandler} import scala.collection.mutable.ArrayBuffer object ShutdownUtils { private val shutdownRunners = ArrayBuffer[ShutdownRunner]() def addShutdownHook(runnable: Runnable): Unit = addShutdownHook(Int.MaxValue, runnable) def addShutdownHook(order: Int, runnable: Runnable): Unit = shutdownRunners synchronized shutdownRunners += new DefaultShutdownRunner(order, runnable) def addShutdownHook(hook: => Unit): Unit = addShutdownHook(Int.MaxValue, hook) def addShutdownHook(order: Int, hook: => Unit): Unit = shutdownRunners synchronized shutdownRunners += new FunctionShutdownRunner(order, hook) def addShutdownHook(shutdownRunner: ShutdownRunner): Unit = shutdownRunners synchronized shutdownRunners += shutdownRunner private val signals = Array("TERM", "HUP", "INT").map(new Signal(_)) private val signalHandler = new SignalHandler { override def handle(signal: Signal): Unit = { val hooks = shutdownRunners.sortBy(_.order).toArray.map{ case m: DefaultShutdownRunner => Utils.defaultScheduler.execute(m) m case m => val runnable = new DefaultShutdownRunner(m.order, m) Utils.defaultScheduler.execute(runnable) runnable } val startTime = System.currentTimeMillis ShutdownUtils synchronized { while(System.currentTimeMillis - startTime < 30000 && hooks.exists(!_.isCompleted)) ShutdownUtils.wait(3000) } System.exit(0) } } signals.foreach(Signal.handle(_, signalHandler)) } trait ShutdownRunner extends Runnable { val order: Int } class DefaultShutdownRunner(override val order: Int, runnable: Runnable) extends ShutdownRunner { private var completed = false override def run(): Unit = Utils.tryFinally(runnable.run()){ completed = true ShutdownUtils synchronized ShutdownUtils.notify() } def isCompleted = completed } class FunctionShutdownRunner(override val order: Int, hook: => Unit) extends ShutdownRunner { override def run(): Unit = hook }
Example 95
Source File: DWCArgumentsParser.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.common.conf import org.apache.commons.lang.StringUtils import scala.collection.{JavaConversions, mutable} import scala.collection.mutable.ArrayBuffer object DWCArgumentsParser { protected val DWC_CONF = "--dwc-conf" protected val SPRING_CONF = "--spring-conf" private var dwcOptionMap = Map.empty[String, String] private[linkis] def setDWCOptionMap(dwcOptionMap: Map[String, String]) = this.dwcOptionMap = dwcOptionMap def getDWCOptionMap = dwcOptionMap def parse(args: Array[String]): DWCArgumentsParser = { val keyValueRegex = "([^=]+)=(.+)".r var i = 0 val optionParser = new DWCArgumentsParser while(i < args.length) { args(i) match { case DWC_CONF | SPRING_CONF => args(i + 1) match { case keyValueRegex(key, value) => optionParser.setConf(args(i), key, value) i += 1 case _ => throw new IllegalArgumentException("illegal commond line, format: --conf key=value.") } case _ => throw new IllegalArgumentException(s"illegal commond line, ${args(i)} cannot recognize.") } i += 1 } optionParser.validate() optionParser } def formatToArray(optionParser: DWCArgumentsParser): Array[String] = { val options = ArrayBuffer[String]() def write(confMap: Map[String, String], optionType: String): Unit = confMap.foreach { case (key, value) => if (StringUtils.isNotEmpty(key) && StringUtils.isNotEmpty(value)) { options += optionType options += (key + "=" + value) } } write(optionParser.getDWCConfMap, DWC_CONF) write(optionParser.getSpringConfMap, SPRING_CONF) options.toArray } def formatToArray(springOptionMap: Map[String, String], dwcOptionMap: Map[String, String]): Array[String] = formatToArray(new DWCArgumentsParser().setSpringConf(springOptionMap).setDWCConf(dwcOptionMap)) def format(optionParser: DWCArgumentsParser): String = formatToArray(optionParser).mkString(" ") def format(springOptionMap: Map[String, String], dwcOptionMap: Map[String, String]): String = formatToArray(springOptionMap, dwcOptionMap).mkString(" ") def formatSpringOptions(springOptionMap: Map[String, String]): Array[String] = { val options = ArrayBuffer[String]() springOptionMap.foreach { case (key, value) => if (StringUtils.isNotEmpty(key) && StringUtils.isNotEmpty(value)) { options += ("--" + key + "=" + value) } } options.toArray } } class DWCArgumentsParser { import DWCArgumentsParser._ private val dwcOptionMap = new mutable.HashMap[String, String]() private val springOptionMap = new mutable.HashMap[String, String]() def getSpringConfMap = springOptionMap.toMap def getSpringConfs = JavaConversions.mapAsJavaMap(springOptionMap) def getDWCConfMap = dwcOptionMap.toMap def setConf(optionType: String, key: String, value: String) = { optionType match { case DWC_CONF => dwcOptionMap += key -> value case SPRING_CONF => springOptionMap += key -> value } this } def setSpringConf(optionMap: Map[String, String]): DWCArgumentsParser = { if(optionMap != null) this.springOptionMap ++= optionMap this } def setDWCConf(optionMap: Map[String, String]): DWCArgumentsParser = { if(optionMap != null) this.dwcOptionMap ++= optionMap this } def validate() = {} }
Example 96
Source File: _03_TraitsAsStackableModifications.scala From LearningScala with Apache License 2.0 | 5 votes |
package _033_traits import scala.collection.mutable.ArrayBuffer class MyQueue extends BasicIntQueue with Doubling def main(args: Array[String]): Unit = { val queue = new BasicIntQueue queue.put(-10) queue.put(20) println(s"queue.get(): ${queue.get()}") println(s"queue.get(): ${queue.get()}") println() val myQueue = new MyQueue myQueue.put(-10) myQueue.put(20) println(s"myQueue.get(): ${myQueue.get()}") println(s"myQueue.get(): ${myQueue.get()}") println() // You could supply "BasicIntQueue with Doubling" directly to new instead of defining a named class. val queueWithDoubling = new BasicIntQueue with Doubling queueWithDoubling.put(-10) queueWithDoubling.put(20) println(s"queueWithDoubling.get(): ${queueWithDoubling.get()}") println(s"queueWithDoubling.get(): ${queueWithDoubling.get()}") println() // ORDER MATTERS examples: // You can now pick and choose which traits you want for a particular queue. val q1 = new BasicIntQueue with Incrementing with Filtering q1.put(-1) q1.put(0) q1.put(1) println(s"q1.get(): ${q1.get()}") println(s"q1.get(): ${q1.get()}") // println(s"q1.get(): ${q1.get()}") // will give an error println() val q2 = new BasicIntQueue with Filtering with Incrementing q2.put(-1) q2.put(0) q2.put(1) println(s"q2.get(): ${q2.get()}") println(s"q2.get(): ${q2.get()}") println(s"q2.get(): ${q2.get()}") println() } }
Example 97
Source File: _10_MutableCollections.scala From LearningScala with Apache License 2.0 | 5 votes |
package _020_collections object _10_MutableCollections { def main(args: Array[String]): Unit = { println("===== List buffers =====") listBufferExample() println() println("===== Array buffers =====") println(arrayBufferExample()) println() println("===== Mutable Sets =====") mutableSetExample() println() println("===== Mutable Maps =====") mutableMapExample() } private def mutableMapExample(): Unit = { import scala.collection.mutable val map = mutable.Map.empty[String, Int] println(map) map("hello") = 1 map("there") = 2 println(map) println(map("hello")) println("======") val nums = mutable.Map("i" -> 1, "ii" -> 2) println(nums) nums += ("vi" -> 6) println(nums) nums -= "ii" println(nums) nums ++= List("iii" -> 3, "v" -> 5) println(nums) nums --= List("i", "ii") println(nums) println("=====") println(s"nums.size: ${nums.size}") print("nums.contains(\"ii\"): ") println(nums.contains("ii")) print("nums(\"iii\"): ") println(nums("iii")) println(s"nums.keys ==> ${nums.keys}") println(s"nums.keySet ==> ${nums.keySet}") println(s"nums.values ==> ${nums.values}") println(s"nums.isEmpty: ${nums.isEmpty}") } def arrayBufferExample(): List[Int] = { import scala.collection.mutable.ArrayBuffer val ab = ArrayBuffer[Int](10, 20) ab += 30 ab += 40 ab.prepend(5) ab.toList //return immutable } private def listBufferExample(): Unit = { import scala.collection.mutable.ListBuffer val listBuffer = new ListBuffer[Int] listBuffer += 1 listBuffer += 2 println(listBuffer) 3 +=: listBuffer println(listBuffer) val list = listBuffer.toList println(list) } private def mutableSetExample(): Unit = { import scala.collection.mutable val emptySet = mutable.Set.empty[Int] println(emptySet) val nums = mutable.Set(1, 2, 3) println(nums) nums += 5 println(nums) nums -= 3 println(nums) nums ++= List(5, 6) println(nums) nums --= List(1, 2) println(nums) println(nums & Set(1, 3, 5, 7)) // intersection of two sets nums.clear() println(nums) } }
Example 98
Source File: TestableQueueInputDStream.scala From SparkUnitTestingExamples with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming import java.io.{ObjectInputStream, ObjectOutputStream} import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.dstream.InputDStream import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag class TestableQueueInputDStream[T: ClassTag]( ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() queue.synchronized { if (oneAtATime && queue.nonEmpty) { buffer += queue.dequeue() } else { buffer ++= queue queue.clear() } } if (buffer.nonEmpty) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(context.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { Some(ssc.sparkContext.emptyRDD) } } }
Example 99
Source File: PruneWorker.scala From spatial with MIT License | 5 votes |
package spatial.dse import java.util.concurrent.LinkedBlockingQueue import argon.State import spatial.metadata.params._ import spatial.metadata.bounds._ import scala.collection.mutable.ArrayBuffer case class PruneWorker( start: Int, size: Int, prods: Seq[BigInt], dims: Seq[BigInt], indexedSpace: Seq[(Domain[_],Int)], restricts: Set[Restrict], queue: LinkedBlockingQueue[Seq[Int]] )(implicit state: State) extends Runnable { private def isLegalSpace(): Boolean = restricts.forall(_.evaluate()) def run(): Unit = { println(s"Searching from $start until ${start+size}") val pts = (start until (start+size)).filter{i => indexedSpace.foreach{case (domain,d) => domain.set( ((i / prods(d)) % dims(d)).toInt ) } isLegalSpace() } queue.put(pts) } }
Example 100
Source File: Flows.scala From spatial with MIT License | 5 votes |
package argon import scala.collection.mutable.{ArrayBuffer,HashSet} import utils.Instrument trait FlowRules { val IR: State } class Flows { private var rules = ArrayBuffer[(String,PartialFunction[(Sym[_],Op[_],SrcCtx,State),Unit])]() private[argon] var names = HashSet[String]() lazy val instrument = new Instrument("flows") def prepend(name: String, func: PartialFunction[(Sym[_],Op[_],SrcCtx,State),Unit]): Unit = { rules.prepend((name,func)) names += name } def add(name: String, func: PartialFunction[(Sym[_],Op[_],SrcCtx,State),Unit]): Unit = { rules += ((name,func)) names += name } def remove(name: String): Unit = { val idx = rules.indexWhere(_._1 == name) rules.remove(idx) names.remove(name) } def apply[A](lhs: Sym[A], rhs: Op[A])(implicit ctx: SrcCtx, state: State): Unit = { val tuple = (lhs,rhs,ctx,state) rules.foreach{case (name,rule) => if (rule.isDefinedAt(tuple)) { instrument(name){ rule.apply(tuple) } } } } def save(): Flows = { val flows = new Flows flows.rules ++= rules flows.names ++= names flows } def restore(flow: Flows): Unit = { rules = flow.rules names = flow.names } }
Example 101
Source File: Rewrites.scala From spatial with MIT License | 5 votes |
package argon import utils.implicits.collections._ import scala.collection.mutable import scala.collection.mutable.ArrayBuffer trait RewriteRules { val IR: State } class Rewrites { type RewriteRule = PartialFunction[(Op[_],SrcCtx,State),Option[Sym[_]]] private def keyOf[A<:Op[_]:Manifest] = manifest[A].runtimeClass.asInstanceOf[Class[A]] // Roughly O(G), where G is the total number of global rewrite rules // When possible, use rules instead of globals private var globals: ArrayBuffer[RewriteRule] = ArrayBuffer.empty // Roughly O(R), where R is the number of rules for a specific node class private val rules: mutable.HashMap[Class[_], ArrayBuffer[RewriteRule]] = mutable.HashMap.empty private[argon] val names: mutable.HashSet[String] = mutable.HashSet.empty def rule(op: Op[_]): Seq[RewriteRule] = rules.getOrElse(op.getClass, Nil) def addGlobal(name: String, rule: RewriteRule): Unit = if (!names.contains(name)) { names += name globals += rule } def add[O<:Op[_]:Manifest](name: String, rule: RewriteRule): Unit = if (!names.contains(name)) { names += name val key = keyOf[O] val pfs = rules.getOrElseAdd(key, () => ArrayBuffer.empty[RewriteRule]) pfs += rule } private def applyRule[A:Type](op: Op[A], ctx: SrcCtx, state: State, rule: RewriteRule): Option[A] = { rule.apply((op,ctx,state)) match { case Some(s) if s.tp <:< Type[A] => Some(s.asInstanceOf[A]) case Some(s) => None case _ => None } } def apply[A:Type](op: Op[A])(implicit ctx: SrcCtx, state: State): Option[A] = { Option(op.rewrite) .orElse{ rule(op).mapFind{rule => applyRule[A](op,ctx,state, rule) } } .orElse{ globals.mapFind{rule => applyRule[A](op,ctx,state, rule) } }.map { op2 => if (state.config.enLog) { dbgs(s"Rewrite $op => $op2") } op2 } } }
Example 102
Source File: BitTest.scala From spatial with MIT License | 5 votes |
package spatial.tests.compiler import spatial.dsl._ import scala.collection.mutable.ArrayBuffer @spatial class BitTest extends SpatialTest { override def backends = DISABLED // Returns a random number in [min,max) def rand(max: gen.Int, min: gen.Int): gen.Int = scala.util.Random.nextInt(max-min)+min def opp(x: Bit, y: Bit, op: gen.Int): Bit = op match { case 0 | 1 | 2 => x & y case 3 | 4 | 5 => x | y case 6 | 7 | 8 => x !== y case 9 | 10 | 11 => x === y case 12 => !x case 13 => !y } def main(args: Array[String]): Void = { Foreach(0 until 32){i => val bits: List[Bit] = List.fill(32){ random[Bit] } var layers: ArrayBuffer[List[Bit]] = ArrayBuffer(bits) (0 until 64).meta.foreach{i => val layer = List.fill(200){ val l1 = i //rand(layers.length,0) val l2 = i //rand(layers.length,0) val p1 = rand(layers(l1).length, 0) val p2 = rand(layers(l2).length, 0) val op = rand(14,0) val x = layers(l1).apply(p1) val y = layers(l2).apply(p2) opp(x,y,op) } layers += layer println(r"[$i] 1: ${layer(1)}, 3: ${layer(3)}, 5: ${layer(5)}") } } } }
Example 103
Source File: TemplateRunner.scala From spatial with MIT License | 5 votes |
package fringe.test import java.io.File import scala.collection.mutable.ArrayBuffer import scala.util.Properties.envOrElse object TemplateRunner { def deleteRecursively(file: File): Unit = { if (file.isDirectory) file.listFiles.foreach(deleteRecursively) if (file.exists && !file.delete) throw new Exception(s"Unable to delete ${file.getAbsolutePath}") } def apply(templateMap: Map[String, String => Boolean], args: Array[String]): Unit = { // Choose the default backend based on what is available. lazy val firrtlTerpBackendAvailable: Boolean = { try { val cls = Class.forName("chisel3.iotesters.FirrtlTerpBackend") cls != null } catch { case e: Throwable => false } } lazy val defaultBackend = if (firrtlTerpBackendAvailable) "firrtl" else "" val backendName = envOrElse("TESTER_BACKENDS", defaultBackend).split(" ").head val tempDir = s"""${envOrElse("NEW_TEMPLATES_HOME", "tmp")}/test_run_dir/""" val specificRegex = "(.*[0-9]+)".r val problemsToRun = if (args.isEmpty) { templateMap.keys.toSeq.sorted.toArray // Run all by default } else { args.map { arg => arg match { case "all" => templateMap.keys.toSeq.sorted // Run all case specificRegex(c) => List(c).toSeq // Run specific test case _ => // Figure out tests that match this template and run all val tempRegex = s"(${arg}[0-9]+)".r templateMap.keys.toSeq.sorted.filter(tempRegex.pattern.matcher(_).matches) }}.flatten.toArray } var successful = 0 var passedTests:List[String] = List() val errors = new ArrayBuffer[String] for(testName <- problemsToRun) { // Wipe tempdir for consecutive tests of same module deleteRecursively(new File(tempDir)) templateMap.get(testName) match { case Some(test) => println(s"Starting template $testName") try { if(test(backendName)) { successful += 1 passedTests = passedTests :+ s"$testName" } else { errors += s"Template $testName: test error occurred" } } catch { case exception: Exception => exception.printStackTrace() errors += s"Template $testName: exception ${exception.getMessage}" case t : Throwable => errors += s"Template $testName: throwable ${t.getMessage}" } case _ => errors += s"Bad template name: $testName" } } if(successful > 0) { println(s"""Templates passing: $successful (${passedTests.mkString(", ")})""") } if(errors.nonEmpty) { println("=" * 80) println(s"Errors: ${errors.length}: in the following templates") println(errors.mkString("\n")) println("=" * 80) System.exit(1) } } }
Example 104
Source File: AvroSchemaMerge.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.avro import com.sksamuel.exts.StringOption import org.apache.avro.Schema import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer object AvroSchemaMerge { def apply(name: String, namespace: String, schemas: List[Schema]): Schema = { require(schemas.forall(_.getType == Schema.Type.RECORD), "Can only merge records") // documentations can just be a concat val doc = schemas.map(_.getDoc).filter(_ != null).mkString("; ") // simple impl to start: take all the fields from the first schema, and then add in the missing ones // from second 2 and so on val fields = new ArrayBuffer[Schema.Field]() schemas.foreach { schema => schema.getFields.asScala.filterNot { field => fields.exists(_.name() == field.name) }.foreach { field => // avro is funny about sharing fields, so need to copy it val copy = new Schema.Field(field.name(), field.schema(), StringOption(field.doc).orNull, field.defaultVal) fields.append(copy) } } val schema = Schema.createRecord(name, if (doc.isEmpty()) null else doc, namespace, false) schema.setFields(fields.result().asJava) schema } }
Example 105
Source File: JdbcPublisher.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.jdbc import java.sql.{Connection, PreparedStatement} import java.util.concurrent.atomic.AtomicBoolean import com.sksamuel.exts.io.Using import com.sksamuel.exts.metrics.Timed import io.eels.Row import io.eels.component.jdbc.dialect.JdbcDialect import io.eels.datastream.{Publisher, Subscriber, Subscription} import scala.collection.mutable.ArrayBuffer class JdbcPublisher(connFn: () => Connection, query: String, bindFn: (PreparedStatement) => Unit, fetchSize: Int, dialect: JdbcDialect ) extends Publisher[Seq[Row]] with Timed with JdbcPrimitives with Using { override def subscribe(subscriber: Subscriber[Seq[Row]]): Unit = { try { using(connFn()) { conn => logger.debug(s"Preparing query $query") using(conn.prepareStatement(query)) { stmt => stmt.setFetchSize(fetchSize) bindFn(stmt) logger.debug(s"Executing query $query") using(stmt.executeQuery()) { rs => val schema = schemaFor(dialect, rs) val running = new AtomicBoolean(true) subscriber.subscribed(Subscription.fromRunning(running)) val buffer = new ArrayBuffer[Row](fetchSize) while (rs.next && running.get) { val values = schema.fieldNames().map { name => val raw = rs.getObject(name) dialect.sanitize(raw) } buffer append Row(schema, values) if (buffer.size == fetchSize) { subscriber.next(buffer.toVector) buffer.clear() } } if (buffer.nonEmpty) subscriber.next(buffer.toVector) subscriber.completed() } } } } catch { case t: Throwable => subscriber.error(t) } } }
Example 106
Source File: HbasePublisher.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hbase import java.util import java.util.concurrent.atomic.AtomicBoolean import com.sksamuel.exts.io.Using import com.sksamuel.exts.metrics.Timed import io.eels.Row import io.eels.datastream.{Publisher, Subscriber, Subscription} import io.eels.schema.StructType import org.apache.hadoop.hbase.TableName import org.apache.hadoop.hbase.client.{Connection, Result, Scan} import scala.collection.mutable.ArrayBuffer class HbasePublisher(connection: Connection, schema: StructType, namespace: String, tableName: String, bufferSize: Int, maxRows: Long, scanner: Scan, implicit val serializer: HbaseSerializer) extends Publisher[Seq[Row]] with Timed with Using { private val table = connection.getTable(TableName.valueOf(namespace, tableName)) override def subscribe(subscriber: Subscriber[Seq[Row]]): Unit = { try { using(new CloseableIterator) { rowIter => val running = new AtomicBoolean(true) subscriber.subscribed(Subscription.fromRunning(running)) val buffer = new ArrayBuffer[Row](bufferSize) while (rowIter.hasNext && running.get()) { buffer append rowIter.next() if (buffer.size == bufferSize) { subscriber.next(buffer.toVector) buffer.clear() } } if (buffer.nonEmpty) subscriber.next(buffer.toVector) subscriber.completed() } } catch { case t: Throwable => subscriber.error(t) } } class CloseableIterator extends Iterator[Row] with AutoCloseable { private val resultScanner = table.getScanner(scanner) private val resultScannerIter = resultScanner.iterator() private var rowCount = 0 private var iter: Iterator[Row] = Iterator.empty override def hasNext: Boolean = rowCount < maxRows && iter.hasNext || { if (rowCount < maxRows && resultScannerIter.hasNext) { iter = HBaseResultsIterator(schema, resultScannerIter) iter.hasNext } else false } override def next(): Row = { rowCount += 1 iter.next() } override def close(): Unit = { resultScanner.close() } } case class HBaseResultsIterator(schema: StructType, resultIter: util.Iterator[Result])(implicit serializer: HbaseSerializer) extends Iterator[Row] { override def hasNext: Boolean = resultIter.hasNext override def next(): Row = { val resultRow = resultIter.next() val values = schema.fields.map { field => if (!field.key) { val value = resultRow.getValue(field.columnFamily.getOrElse(sys.error(s"No Column Family defined for field '${field.name}'")).getBytes, field.name.getBytes) if (value != null) serializer.fromBytes(value, field.name, field.dataType) else null } else serializer.fromBytes(resultRow.getRow, field.name, field.dataType) } Row(schema, values) } } }
Example 107
Source File: OrcWriter.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.orc import java.util.concurrent.atomic.AtomicInteger import java.util.function.IntUnaryOperator import com.sksamuel.exts.Logging import com.typesafe.config.ConfigFactory import io.eels.Row import io.eels.schema.StructType import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.hadoop.hive.ql.exec.vector.ColumnVector import org.apache.orc.{OrcConf, OrcFile, TypeDescription} import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer // performs the actual write out of orc data, to be used by an orc sink class OrcWriter(path: Path, structType: StructType, options: OrcWriteOptions)(implicit conf: Configuration) extends Logging { private val schema: TypeDescription = OrcSchemaFns.toOrcSchema(structType) logger.trace(s"Creating orc writer for schema $schema") private val batchSize = { val size = ConfigFactory.load().getInt("eel.orc.sink.batchSize") Math.max(Math.min(1024, size), 1) } logger.debug(s"Orc writer will use batchsize=$batchSize") private val buffer = new ArrayBuffer[Row](batchSize) private val serializers = schema.getChildren.asScala.map(OrcSerializer.forType).toArray private val batch = schema.createRowBatch(batchSize) OrcConf.COMPRESSION_STRATEGY.setString(conf, options.compressionStrategy.name) OrcConf.COMPRESS.setString(conf, options.compressionKind.name) options.encodingStrategy.map(_.name).foreach(OrcConf.ENCODING_STRATEGY.setString(conf, _)) options.compressionBufferSize.foreach(OrcConf.BUFFER_SIZE.setLong(conf, _)) private val woptions = OrcFile.writerOptions(conf).setSchema(schema) options.rowIndexStride.foreach { size => woptions.rowIndexStride(size) logger.debug(s"Using stride size = $size") } if (options.bloomFilterColumns.nonEmpty) { woptions.bloomFilterColumns(options.bloomFilterColumns.mkString(",")) logger.debug(s"Using bloomFilterColumns = $options.bloomFilterColumns") } private lazy val writer = OrcFile.createWriter(path, woptions) private val counter = new AtomicInteger(0) def write(row: Row): Unit = { buffer.append(row) if (buffer.size == batchSize) flush() } def records: Int = counter.get() def flush(): Unit = { def writecol[T <: ColumnVector](rowIndex: Int, colIndex: Int, row: Row): Unit = { val value = row.values(colIndex) val vector = batch.cols(colIndex).asInstanceOf[T] val serializer = serializers(colIndex).asInstanceOf[OrcSerializer[T]] serializer.writeToVector(rowIndex, vector, value) } // don't use foreach here, using old school for loops for perf for (rowIndex <- buffer.indices) { val row = buffer(rowIndex) for (colIndex <- batch.cols.indices) { writecol(rowIndex, colIndex, row) } } batch.size = buffer.size writer.addRowBatch(batch) counter.updateAndGet(new IntUnaryOperator { override def applyAsInt(operand: Int): Int = operand + batch.size }) buffer.clear() batch.reset() } def close(): Long = { if (buffer.nonEmpty) flush() writer.close() val count = writer.getNumberOfRows logger.info(s"Orc writer wrote $count rows") count } }
Example 108
Source File: SKRSpec.scala From spark-kafka-writer with Apache License 2.0 | 5 votes |
package com.github.benfradet.spark.kafka.writer import java.util.concurrent.atomic.AtomicInteger import org.apache.kafka.common.serialization.{StringDeserializer, StringSerializer} import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession import org.apache.spark.streaming.kafka010.{ConsumerStrategies, KafkaUtils, LocationStrategies} import org.apache.spark.streaming.{Seconds, StreamingContext} import org.scalatest.concurrent.Eventually import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} import scala.collection.mutable.ArrayBuffer import scala.util.Random import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec case class Foo(a: Int, b: String) trait SKRSpec extends AnyWordSpec with Matchers with BeforeAndAfterEach with BeforeAndAfterAll with Eventually { val sparkConf = new SparkConf() .setMaster("local[1]") .setAppName(getClass.getSimpleName) var ktu: KafkaTestUtils = _ override def beforeAll(): Unit = { ktu = new KafkaTestUtils ktu.setup() } override def afterAll(): Unit = { SKRSpec.callbackTriggerCount.set(0) if (ktu != null) { ktu.tearDown() ktu = null } } var topic: String = _ var ssc: StreamingContext = _ var spark: SparkSession = _ override def afterEach(): Unit = { if (ssc != null) { ssc.stop() ssc = null } if (spark != null) { spark.stop() spark = null } } override def beforeEach(): Unit = { ssc = new StreamingContext(sparkConf, Seconds(1)) spark = SparkSession.builder .config(sparkConf) .getOrCreate() topic = s"topic-${Random.nextInt()}" ktu.createTopics(topic) } def collect(ssc: StreamingContext, topic: String): ArrayBuffer[String] = { val kafkaParams = Map( "bootstrap.servers" -> ktu.brokerAddress, "auto.offset.reset" -> "earliest", "key.deserializer" -> classOf[StringDeserializer], "value.deserializer" -> classOf[StringDeserializer], "group.id" -> "test-collect" ) val results = new ArrayBuffer[String] KafkaUtils.createDirectStream[String, String]( ssc, LocationStrategies.PreferConsistent, ConsumerStrategies.Subscribe[String, String](Set(topic), kafkaParams) ).map(_.value()) .foreachRDD { rdd => results ++= rdd.collect() () } results } val producerConfig = Map( "bootstrap.servers" -> "127.0.0.1:9092", "key.serializer" -> classOf[StringSerializer].getName, "value.serializer" -> classOf[StringSerializer].getName ) } object SKRSpec { val callbackTriggerCount = new AtomicInteger() }
Example 109
Source File: SidechainBlockInfo.scala From Sidechains-SDK with MIT License | 5 votes |
package com.horizen.chain import com.horizen.block.SidechainBlock import com.horizen.utils.{WithdrawalEpochInfo, WithdrawalEpochInfoSerializer} import com.horizen.vrf.{VrfOutput, VrfOutputSerializer} import scorex.core.NodeViewModifier import scorex.core.block.Block.Timestamp import scorex.core.consensus.ModifierSemanticValidity import scorex.core.serialization.{BytesSerializable, ScorexSerializer} import scorex.util.serialization.{Reader, Writer} import scorex.util.{ModifierId, bytesToId, idToBytes} import scala.collection.mutable.ArrayBuffer case class SidechainBlockInfo(height: Int, score: Long, parentId: ModifierId, timestamp: Timestamp, semanticValidity: ModifierSemanticValidity, mainchainHeaderHashes: Seq[MainchainHeaderHash], mainchainReferenceDataHeaderHashes: Seq[MainchainHeaderHash], withdrawalEpochInfo: WithdrawalEpochInfo, vrfOutputOpt: Option[VrfOutput], lastBlockInPreviousConsensusEpoch: ModifierId) extends BytesSerializable with LinkedElement[ModifierId] { override def getParentId: ModifierId = parentId override type M = SidechainBlockInfo override lazy val serializer: ScorexSerializer[SidechainBlockInfo] = SidechainBlockInfoSerializer override def bytes: Array[Byte] = SidechainBlockInfoSerializer.toBytes(this) } object SidechainBlockInfo { def mainchainHeaderHashesFromBlock(sidechainBlock: SidechainBlock): Seq[MainchainHeaderHash] = { sidechainBlock.mainchainHeaders.map(header => byteArrayToMainchainHeaderHash(header.hash)) } def mainchainReferenceDataHeaderHashesFromBlock(sidechainBlock: SidechainBlock): Seq[MainchainHeaderHash] = { sidechainBlock.mainchainBlockReferencesData.map(data => byteArrayToMainchainHeaderHash(data.headerHash)) } } object SidechainBlockInfoSerializer extends ScorexSerializer[SidechainBlockInfo] { override def serialize(obj: SidechainBlockInfo, w: Writer): Unit = { w.putInt(obj.height) w.putLong(obj.score) w.putBytes(idToBytes(obj.parentId)) w.putLong(obj.timestamp) w.put(obj.semanticValidity.code) w.putInt(obj.mainchainHeaderHashes.size) obj.mainchainHeaderHashes.foreach(id => w.putBytes(id.data)) w.putInt(obj.mainchainReferenceDataHeaderHashes.size) obj.mainchainReferenceDataHeaderHashes.foreach(id => w.putBytes(id.data)) WithdrawalEpochInfoSerializer.serialize(obj.withdrawalEpochInfo, w) w.putOption(obj.vrfOutputOpt){case (writer: Writer, vrfOutput: VrfOutput) => VrfOutputSerializer.getSerializer.serialize(vrfOutput, writer) } w.putBytes(idToBytes(obj.lastBlockInPreviousConsensusEpoch)) } private def readMainchainHeadersHashes(r: Reader): Seq[MainchainHeaderHash] = { val references: ArrayBuffer[MainchainHeaderHash] = ArrayBuffer() val length = r.getInt() (0 until length).foreach(_ => { val bytes = r.getBytes(mainchainHeaderHashSize) references.append(byteArrayToMainchainHeaderHash(bytes)) }) references } override def parse(r: Reader): SidechainBlockInfo = { val height = r.getInt() val score = r.getLong() val parentId = bytesToId(r.getBytes(NodeViewModifier.ModifierIdSize)) val timestamp = r.getLong() val semanticValidityCode = r.getByte() val mainchainHeaderHashes = readMainchainHeadersHashes(r) val mainchainReferenceDataHeaderHashes = readMainchainHeadersHashes(r) val withdrawalEpochInfo = WithdrawalEpochInfoSerializer.parse(r) val vrfOutputOpt = r.getOption(VrfOutputSerializer.getSerializer.parse(r)) val lastBlockInPreviousConsensusEpoch = bytesToId(r.getBytes(NodeViewModifier.ModifierIdSize)) SidechainBlockInfo(height, score, parentId, timestamp, ModifierSemanticValidity.restoreFromCode(semanticValidityCode), mainchainHeaderHashes, mainchainReferenceDataHeaderHashes, withdrawalEpochInfo, vrfOutputOpt, lastBlockInPreviousConsensusEpoch) } }
Example 110
Source File: IODBStoreAdapter.scala From Sidechains-SDK with MIT License | 5 votes |
package com.horizen.storage import java.util.{ArrayList => JArrayList, List => JList} import java.util.Optional import com.horizen.utils.Pair import scala.collection.JavaConverters._ import io.iohk.iodb.Store import com.horizen.utils.ByteArrayWrapper import scala.collection.mutable.ArrayBuffer class IODBStoreAdapter (store : Store) extends Storage { override def get(key: ByteArrayWrapper): Optional[ByteArrayWrapper] = { val value = store.get(key) if (value.isEmpty) Optional.empty() else Optional.of(new ByteArrayWrapper(value.get)) } override def getOrElse(key: ByteArrayWrapper, defaultValue: ByteArrayWrapper): ByteArrayWrapper = { val value = store.get(key) if (value.isEmpty) defaultValue else new ByteArrayWrapper(value.get) } override def get(keys: JList[ByteArrayWrapper]): JList[Pair[ByteArrayWrapper, Optional[ByteArrayWrapper]]] = { val keysList = new ArrayBuffer[ByteArrayWrapper]() val valList = store.get(keys.asScala) val values = new JArrayList[Pair[ByteArrayWrapper,Optional[ByteArrayWrapper]]]() for (v <- valList) if (v._2.isDefined) values.add(new Pair[ByteArrayWrapper,Optional[ByteArrayWrapper]](new ByteArrayWrapper(v._1), Optional.of(new ByteArrayWrapper(v._2.get)))) else values.add(new Pair[ByteArrayWrapper,Optional[ByteArrayWrapper]](new ByteArrayWrapper(v._1), Optional.empty())) values } override def getAll: JList[Pair[ByteArrayWrapper, ByteArrayWrapper]] = { val values = new JArrayList[Pair[ByteArrayWrapper,ByteArrayWrapper]]() for ( i <- store.getAll()) values.add(new Pair[ByteArrayWrapper,ByteArrayWrapper](new ByteArrayWrapper(i._1), new ByteArrayWrapper(i._2))) values } override def lastVersionID(): Optional[ByteArrayWrapper] = { val value = store.lastVersionID if (value.isEmpty) Optional.empty() else Optional.of(new ByteArrayWrapper(value.get)) } override def update(version: ByteArrayWrapper, toUpdate: JList[Pair[ByteArrayWrapper, ByteArrayWrapper]], toRemove: JList[ByteArrayWrapper]): Unit = { val listToUpdate = new ArrayBuffer[Tuple2[ByteArrayWrapper,ByteArrayWrapper]]() for (r <- toUpdate.asScala) { listToUpdate.append(new Tuple2[ByteArrayWrapper, ByteArrayWrapper](r.getKey, r.getValue)) } store.update(version, toRemove.asScala, listToUpdate) } override def rollback(version : ByteArrayWrapper): Unit = { store.rollback(version) } override def rollbackVersions(): JList[ByteArrayWrapper] = { val versions = store.rollbackVersions() val value = new JArrayList[ByteArrayWrapper]() for (v <- versions) value.add(new ByteArrayWrapper(v)) value } override def isEmpty(): Boolean = !lastVersionID().isPresent override def close(): Unit = { store.close() } }
Example 111
Source File: StoreOpsTest.scala From fs2-blobstore with Apache License 2.0 | 5 votes |
package blobstore import java.nio.charset.Charset import java.nio.file.Files import java.util.concurrent.Executors import cats.effect.{Blocker, IO} import cats.effect.laws.util.TestInstances import cats.implicits._ import fs2.Pipe import org.scalatest.Assertion import org.scalatest.flatspec.AnyFlatSpec import implicits._ import org.scalatest.matchers.must.Matchers import scala.collection.mutable.ArrayBuffer import scala.concurrent.ExecutionContext class StoreOpsTest extends AnyFlatSpec with Matchers with TestInstances { implicit val cs = IO.contextShift(ExecutionContext.global) val blocker = Blocker.liftExecutionContext(ExecutionContext.fromExecutor(Executors.newCachedThreadPool)) behavior of "PutOps" it should "buffer contents and compute size before calling Store.put" in { val bytes: Array[Byte] = "AAAAAAAAAA".getBytes(Charset.forName("utf-8")) val store = DummyStore(_.size must be(Some(bytes.length))) fs2.Stream.emits(bytes).covary[IO].through(store.bufferedPut(Path("path/to/file.txt"), blocker)).compile.drain.unsafeRunSync() store.buf.toArray must be(bytes) } it should "upload a file from a nio Path" in { val bytes = "hello".getBytes(Charset.forName("utf-8")) val store = DummyStore(_.size must be(Some(bytes.length))) fs2.Stream.bracket(IO(Files.createTempFile("test-file", ".bin"))) { p => IO(p.toFile.delete).void }.flatMap { p => fs2.Stream.emits(bytes).covary[IO].through(fs2.io.file.writeAll(p, blocker)).drain ++ fs2.Stream.eval(store.put(p, Path("path/to/file.txt"), blocker)) }.compile.drain.unsafeRunSync() store.buf.toArray must be(bytes) } } final case class DummyStore(check: Path => Assertion) extends Store[IO] { val buf = new ArrayBuffer[Byte]() override def put(path: Path): Pipe[IO, Byte, Unit] = { check(path) in => { buf.appendAll(in.compile.toVector.unsafeRunSync()) fs2.Stream.emit(()) } } override def list(path: Path): fs2.Stream[IO, Path] = ??? override def get(path: Path, chunkSize: Int): fs2.Stream[IO, Byte] = ??? override def move(src: Path, dst: Path): IO[Unit] = ??? override def copy(src: Path, dst: Path): IO[Unit] = ??? override def remove(path: Path): IO[Unit] = ??? }
Example 112
Source File: MetadataTransformUtils.scala From automl with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.feature.operator import org.apache.spark.sql.types.{MetadataBuilder, StructField} import scala.collection.mutable.ArrayBuffer def vectorCartesianTransform(fields: Array[StructField], numFeatures: Int): MetadataBuilder = { if (fields.length < 2) { throw new IllegalArgumentException("the number of cols in the input DataFrame should be no less than 2") } var res = Array[String]() if (fields.head.metadata.contains(DERIVATION)) { res = fields.head.metadata.getStringArray(DERIVATION) } else { res = createDerivation(numFeatures) } for (i <- 1 until fields.length) { if (fields(i).metadata.contains(DERIVATION)) { res = cartesianWithArray(res, fields(i).metadata.getStringArray(DERIVATION)) } else { res = cartesianWithArray(res, createDerivation(numFeatures)) } } val metadata = fields.last.metadata new MetadataBuilder().withMetadata(metadata).putStringArray(DERIVATION, res) } }
Example 113
Source File: Message.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.network.nio import java.net.InetSocketAddress import java.nio.ByteBuffer import scala.collection.mutable.ArrayBuffer import com.google.common.base.Charsets.UTF_8 import org.apache.spark.util.Utils private[nio] abstract class Message(val typ: Long, val id: Int) { var senderAddress: InetSocketAddress = null var started = false var startTime = -1L var finishTime = -1L var isSecurityNeg = false var hasError = false def size: Int def getChunkForSending(maxChunkSize: Int): Option[MessageChunk] def getChunkForReceiving(chunkSize: Int): Option[MessageChunk] def timeTaken(): String = (finishTime - startTime).toString + " ms" override def toString: String = { this.getClass.getSimpleName + "(id = " + id + ", size = " + size + ")" } } private[nio] object Message { val BUFFER_MESSAGE = 1111111111L var lastId = 1 def getNewId(): Int = synchronized { lastId += 1 if (lastId == 0) { lastId += 1 } lastId } def createBufferMessage(dataBuffers: Seq[ByteBuffer], ackId: Int): BufferMessage = { if (dataBuffers == null) { return new BufferMessage(getNewId(), new ArrayBuffer[ByteBuffer], ackId) } if (dataBuffers.exists(_ == null)) { throw new Exception("Attempting to create buffer message with null buffer") } new BufferMessage(getNewId(), new ArrayBuffer[ByteBuffer] ++= dataBuffers, ackId) } def createBufferMessage(dataBuffers: Seq[ByteBuffer]): BufferMessage = createBufferMessage(dataBuffers, 0) def createBufferMessage(dataBuffer: ByteBuffer, ackId: Int): BufferMessage = { if (dataBuffer == null) { //ByteBuffer.allocate在能够读和写之前,必须有一个缓冲区,用静态方法 allocate() 来分配缓冲区 createBufferMessage(Array(ByteBuffer.allocate(0)), ackId) } else { createBufferMessage(Array(dataBuffer), ackId) } } def createBufferMessage(dataBuffer: ByteBuffer): BufferMessage = createBufferMessage(dataBuffer, 0) def createBufferMessage(ackId: Int): BufferMessage = { createBufferMessage(new Array[ByteBuffer](0), ackId) } def createErrorMessage(exception: Exception, ackId: Int): BufferMessage = { val exceptionString = Utils.exceptionString(exception) val serializedExceptionString = ByteBuffer.wrap(exceptionString.getBytes(UTF_8)) val errorMessage = createBufferMessage(serializedExceptionString, ackId) errorMessage.hasError = true errorMessage } def create(header: MessageChunkHeader): Message = { val newMessage: Message = header.typ match { case BUFFER_MESSAGE => new BufferMessage(header.id, //ByteBuffer.allocate在能够读和写之前,必须有一个缓冲区,用静态方法 allocate() 来分配缓冲区 ArrayBuffer(ByteBuffer.allocate(header.totalSize)), header.other) } newMessage.hasError = header.hasError newMessage.senderAddress = header.address newMessage } }
Example 114
Source File: ApplicationInfo.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.master import java.util.Date import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import org.apache.spark.deploy.ApplicationDescription import org.apache.spark.rpc.RpcEndpointRef import org.apache.spark.util.Utils private[spark] class ApplicationInfo( val startTime: Long, val id: String, val desc: ApplicationDescription, val submitDate: Date, val driver: RpcEndpointRef, defaultCores: Int) extends Serializable { //枚举类型赋值 @transient var state: ApplicationState.Value = _ @transient var executors: mutable.HashMap[Int, ExecutorDesc] = _ @transient var removedExecutors: ArrayBuffer[ExecutorDesc] = _ @transient var coresGranted: Int = _ @transient var endTime: Long = _ @transient var appSource: ApplicationSource = _ // A cap on the number of executors this application can have at any given time. //执行者的数量这个应用程序可以在任何给定的时间 // By default, this is infinite. Only after the first allocation request is issued by the // application will this be set to a finite value. This is used for dynamic allocation. //默认情况下,这是无限的,只有在应用程序发出第一个分配请求之后,这将被设置为有限的值,这用于动态分配 @transient private[master] var executorLimit: Int = _ @transient private var nextExecutorId: Int = _ init() //初始化方法 private def readObject(in: java.io.ObjectInputStream): Unit = Utils.tryOrIOException { in.defaultReadObject() init() } private[deploy] def getExecutorLimit: Int = executorLimit def duration: Long = { if (endTime != -1) { endTime - startTime } else { System.currentTimeMillis() - startTime } } }
Example 115
Source File: Schedulable.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.util.concurrent.ConcurrentLinkedQueue import scala.collection.mutable.ArrayBuffer import org.apache.spark.scheduler.SchedulingMode.SchedulingMode private[spark] trait Schedulable { var parent: Pool // child queues def schedulableQueue: ConcurrentLinkedQueue[Schedulable] def schedulingMode: SchedulingMode def weight: Int def minShare: Int def runningTasks: Int def priority: Int def stageId: Int def name: String def addSchedulable(schedulable: Schedulable): Unit def removeSchedulable(schedulable: Schedulable): Unit def getSchedulableByName(name: String): Schedulable def executorLost(executorId: String, host: String): Unit def checkSpeculatableTasks(): Boolean def getSortedTaskSetQueue: ArrayBuffer[TaskSetManager] }
Example 116
Source File: ByteArrayChunkOutputStream.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.util.io import java.io.OutputStream import scala.collection.mutable.ArrayBuffer private var position = chunkSize override def write(b: Int): Unit = { allocateNewChunkIfNeeded() //注意前套数组取值方式 chunks(lastChunkIndex)(position) = b.toByte position += 1 } override def write(bytes: Array[Byte], off: Int, len: Int): Unit = { var written = 0 while (written < len) { allocateNewChunkIfNeeded() val thisBatch = math.min(chunkSize - position, len - written) System.arraycopy(bytes, written + off, chunks(lastChunkIndex), position, thisBatch) written += thisBatch position += thisBatch } } @inline private def allocateNewChunkIfNeeded(): Unit = { if (position == chunkSize) { chunks += new Array[Byte](chunkSize) lastChunkIndex += 1 position = 0 } } def toArrays: Array[Array[Byte]] = { if (lastChunkIndex == -1) { new Array[Array[Byte]](0) } else { // Copy the first n-1 chunks to the output, and then create an array that fits the last chunk. // An alternative would have been returning an array of ByteBuffers, with the last buffer // bounded to only the last chunk's position. However, given our use case in Spark (to put // the chunks in block manager), only limiting the view bound of the buffer would still // require the block manager to store the whole chunk. //将第一个n-1块复制到输出,然后创建一个适合最后一个块的数组。一个替代方法是返回一个ByteBuffers数组,最后一个缓冲区 //仅限于最后一个块的位置。 但是,考虑到我们在Spark中的用例(put块块中的块管理器),只会限制缓冲区的视图边界 //要求块管理器存储整个块。 val ret = new Array[Array[Byte]](chunks.size) for (i <- 0 until chunks.size - 1) { ret(i) = chunks(i) } if (position == chunkSize) { ret(lastChunkIndex) = chunks(lastChunkIndex) } else { ret(lastChunkIndex) = new Array[Byte](position) System.arraycopy(chunks(lastChunkIndex), 0, ret(lastChunkIndex), 0, position) } ret } } }
Example 117
Source File: MapPartitionsWithPreparationRDD.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.{Partition, Partitioner, TaskContext} override def compute(partition: Partition, context: TaskContext): Iterator[U] = { val prepared = if (preparedArguments.isEmpty) { preparePartition() } else { preparedArguments.remove(0) } val parentIterator = firstParent[T].iterator(partition, context) executePartition(context, partition.index, prepared, parentIterator) } }
Example 118
Source File: UnionRDD.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils private[spark] class UnionPartition[T: ClassTag]( idx: Int, @transient rdd: RDD[T], val parentRddIndex: Int, @transient parentRddPartitionIndex: Int) extends Partition { var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex) def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization //在任务序列化时更新对父拆分的引用 parentPartition = rdd.partitions(parentRddPartitionIndex) oos.defaultWriteObject() } } @DeveloperApi class UnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]]) extends RDD[T](sc, Nil) { // Nil since we implement getDependencies override def getPartitions: Array[Partition] = { val array = new Array[Partition](rdds.map(_.partitions.length).sum) var pos = 0 for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) { array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index) pos += 1 } array } override def getDependencies: Seq[Dependency[_]] = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 for (rdd <- rdds) { deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length) pos += rdd.partitions.length } deps } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val part = s.asInstanceOf[UnionPartition[T]] parent[T](part.parentRddIndex).iterator(part.parentPartition, context) } override def getPreferredLocations(s: Partition): Seq[String] = s.asInstanceOf[UnionPartition[T]].preferredLocations() override def clearDependencies() { super.clearDependencies() rdds = null } }
Example 119
Source File: hbaseCommands.scala From Heracles with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase.execution import org.apache.spark.annotation.DeveloperApi import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias import org.apache.spark.sql.execution.command.RunnableCommand import org.apache.spark.sql.execution.datasources.LogicalRelation import org.apache.spark.sql.hbase._ import org.apache.spark.sql.hbase.util.DataTypeUtils import org.apache.spark.sql.types._ import scala.collection.mutable.ArrayBuffer @DeveloperApi case class AlterDropColCommand(namespace: String, tableName: String, columnName: String) extends RunnableCommand { def run(sparkSession: SparkSession): Seq[Row] = { sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog] .alterTableDropNonKey(namespace, tableName, columnName) sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog].stopAdmin() Seq.empty[Row] } } @DeveloperApi case class AlterAddColCommand(namespace: String, tableName: String, colName: String, colType: String, colFamily: String, colQualifier: String) extends RunnableCommand { def run(sparkSession: SparkSession): Seq[Row] = { val hbaseCatalog = sparkSession.sharedState.externalCatalog.asInstanceOf[HBaseCatalog] hbaseCatalog.alterTableAddNonKey(namespace, tableName, NonKeyColumn(colName, DataTypeUtils.getDataType(colType), colFamily, colQualifier)) hbaseCatalog.stopAdmin() Seq.empty[Row] } } @DeveloperApi case class InsertValueIntoTableCommand(tid: TableIdentifier, valueSeq: Seq[String]) extends RunnableCommand { override def run(sparkSession: SparkSession) = { val relation: HBaseRelation = sparkSession.sessionState.catalog.externalCatalog .asInstanceOf[HBaseCatalog] .getHBaseRelation(tid.database.getOrElse(null), tid.table).getOrElse(null) val bytes = valueSeq.zipWithIndex.map(v => DataTypeUtils.string2TypeData(v._1, relation.schema(v._2).dataType)) val rows = sparkSession.sparkContext.makeRDD(Seq(Row.fromSeq(bytes))) val inputValuesDF = sparkSession.createDataFrame(rows, relation.schema) relation.insert(inputValuesDF, overwrite = false) Seq.empty[Row] } override def output: Seq[Attribute] = Seq.empty }
Example 120
Source File: MeetupReceiver.scala From meetup-stream with Apache License 2.0 | 5 votes |
package receiver import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.storage.StorageLevel import org.apache.spark.Logging import com.ning.http.client.AsyncHttpClientConfig import com.ning.http.client._ import scala.collection.mutable.ArrayBuffer import java.io.OutputStream import java.io.ByteArrayInputStream import java.io.InputStreamReader import java.io.BufferedReader import java.io.InputStream import java.io.PipedInputStream import java.io.PipedOutputStream class MeetupReceiver(url: String) extends Receiver[String](StorageLevel.MEMORY_AND_DISK_2) with Logging { @transient var client: AsyncHttpClient = _ @transient var inputPipe: PipedInputStream = _ @transient var outputPipe: PipedOutputStream = _ def onStart() { val cf = new AsyncHttpClientConfig.Builder() cf.setRequestTimeout(Integer.MAX_VALUE) cf.setReadTimeout(Integer.MAX_VALUE) cf.setPooledConnectionIdleTimeout(Integer.MAX_VALUE) client= new AsyncHttpClient(cf.build()) inputPipe = new PipedInputStream(1024 * 1024) outputPipe = new PipedOutputStream(inputPipe) val producerThread = new Thread(new DataConsumer(inputPipe)) producerThread.start() client.prepareGet(url).execute(new AsyncHandler[Unit]{ def onBodyPartReceived(bodyPart: HttpResponseBodyPart) = { bodyPart.writeTo(outputPipe) AsyncHandler.STATE.CONTINUE } def onStatusReceived(status: HttpResponseStatus) = { AsyncHandler.STATE.CONTINUE } def onHeadersReceived(headers: HttpResponseHeaders) = { AsyncHandler.STATE.CONTINUE } def onCompleted = { println("completed") } def onThrowable(t: Throwable)={ t.printStackTrace() } }) } def onStop() { if (Option(client).isDefined) client.close() if (Option(outputPipe).isDefined) { outputPipe.flush() outputPipe.close() } if (Option(inputPipe).isDefined) { inputPipe.close() } } class DataConsumer(inputStream: InputStream) extends Runnable { override def run() { val bufferedReader = new BufferedReader( new InputStreamReader( inputStream )) var input=bufferedReader.readLine() while(input!=null){ store(input) input=bufferedReader.readLine() } } } }
Example 121
Source File: HashBasedDeduplicator.scala From pravda-ml with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.odkl.texts import odkl.analysis.spark.util.Logging import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.Transformer import org.apache.spark.ml.param._ import org.apache.spark.ml.util.Identifiable import org.apache.spark.ml.linalg.Vectors.norm import org.apache.spark.ml.linalg.{BLAS, Vector} import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{DataFrame, Dataset, Row} import scala.collection.mutable.ArrayBuffer def setSimilarityTreshold(value: Double): this.type = set(similarityThreshold, value) setDefault(new ParamPair[String](inputColHash,"hash"), new ParamPair[Double](similarityThreshold,0.9)) def this() = this(Identifiable.randomUID("hashBasedDeduplication")) override def transform(dataset: Dataset[_]): DataFrame = { dataset.sqlContext.createDataFrame( dataset.toDF .repartition(dataset.col($(inputColHash))) .sortWithinPartitions($(inputColHash)) .rdd .mapPartitions((f: Iterator[Row]) => { if (f.hasNext) { var curHash: Long = -1L val vectorsBuffer = new ArrayBuffer[Vector](0) // unique vectors buffer for this bucket for (it <- f) yield { val newHash = it.getAs[Long]($(inputColHash)) if (newHash == curHash) { val currentVector = it.getAs[Vector]($(inputColVector)) val isUnique = vectorsBuffer.forall(storedVector => { //are this vector is "different" with other in buffer? (BLAS.dot(storedVector, currentVector) / (norm(storedVector, 2) * norm(currentVector, 2))) < $(similarityThreshold) //is unsimilar? }) if (isUnique) { vectorsBuffer.append(currentVector) it } else { Row.empty //dummy Row } } else { vectorsBuffer.clear() vectorsBuffer.append(it.getAs[Vector]($(inputColVector))) curHash = newHash it } } } else { new Array[Row](0).toIterator //empty partition? } }).filter(!_.equals(Row.empty)), //filter dummy transformSchema(dataset.schema)) } @DeveloperApi override def transformSchema(schema: StructType): StructType = { schema } override def copy(extra: ParamMap): Transformer = defaultCopy(extra) }
Example 122
Source File: NonSampleCompactor.scala From deequ with Apache License 2.0 | 5 votes |
package com.amazon.deequ.analyzers import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import scala.util.Random val output = (offset until len by 2).map(sortedBuffer(_)).toArray val tail = findOdd(items) items = items % 2 var newBuffer = ArrayBuffer[T]() if (tail.isDefined) { newBuffer = newBuffer :+ tail.get } buffer = newBuffer numOfCompress = numOfCompress + 1 output } }
Example 123
Source File: ScaleAndConvert.scala From SparkNet with MIT License | 5 votes |
package preprocessing import java.awt.image.DataBufferByte import java.io.ByteArrayInputStream import javax.imageio.ImageIO import scala.collection.mutable.ArrayBuffer import scala.collection.JavaConversions._ import net.coobird.thumbnailator._ import org.apache.spark.rdd.RDD import libs._ object ScaleAndConvert { def BufferedImageToByteArray(image: java.awt.image.BufferedImage) : Array[Byte] = { val height = image.getHeight() val width = image.getWidth() val pixels = image.getRGB(0, 0, width, height, null, 0, width) val result = new Array[Byte](3 * height * width) var row = 0 while (row < height) { var col = 0 while (col < width) { val rgb = pixels(row * width + col) result(0 * height * width + row * width + col) = ((rgb >> 16) & 0xFF).toByte result(1 * height * width + row * width + col) = ((rgb >> 8) & 0xFF).toByte result(2 * height * width + row * width + col) = (rgb & 0xFF).toByte col += 1 } row += 1 } result } def decompressImageAndResize(compressedImage: Array[Byte], height: Int, width: Int) : Option[Array[Byte]] = { // this method takes a JPEG, decompresses it, and resizes it try { val im = ImageIO.read(new ByteArrayInputStream(compressedImage)) val resizedImage = Thumbnails.of(im).forceSize(width, height).asBufferedImage() Some(BufferedImageToByteArray(resizedImage)) } catch { // If images can't be processed properly, just ignore them case e: java.lang.IllegalArgumentException => None case e: javax.imageio.IIOException => None case e: java.lang.NullPointerException => None } } }
Example 124
Source File: ClassRDDPartitioner.scala From spark-orientdb-connector with Apache License 2.0 | 5 votes |
package com.metreta.spark.orientdb.connector.rdd.partitioner import scala.collection.JavaConversions.iterableAsScalaIterable import scala.collection.mutable.ArrayBuffer import org.apache.spark.Logging import org.apache.spark.Partition import com.metreta.spark.orientdb.connector.api.OrientDBConnector import com.orientechnologies.orient.core.metadata.schema.OClass import com.orientechnologies.orient.core.metadata.schema.OSchema import com.orientechnologies.orient.core.storage.OStorage import com.metreta.spark.orientdb.connector.SystemTables import scala.collection.JavaConversions.iterableAsScalaIterable def getPartitions(): Array[Partition] = { val db = connector.databaseDocumentTx() var partitions = new ArrayBuffer[OrientPartition] val schema: OSchema = connector.getSchema(db) var klass: OClass = schema.getClass(mClass) val storage: OStorage = connector.getStorage(db) klass.getClusterIds.zipWithIndex foreach { case (clusterId, index) => partitions = partitions.+=(OrientPartition( index, null, // <- Host Address ????? PartitionName(klass.getName, storage.getClusterById(clusterId).getName))) } partitions.toArray } }
Example 125
Source File: SparkContextFunctionsSpec.scala From spark-orientdb-connector with Apache License 2.0 | 5 votes |
package com.metreta.spark.orientdb.connector import scala.collection.mutable.ArrayBuffer import org.scalatest.BeforeAndAfterAll import com.orientechnologies.orient.core.id.ORID import com.metreta.spark.orientdb.connector.utils.BaseOrientDbFlatSpec class SparkContextFunctionsSpec extends BaseOrientDbFlatSpec { var oridList: ArrayBuffer[String] = new ArrayBuffer var MaxCluster = 1000 var MaxRecord = 1000 override def beforeAll(): Unit = { initSparkConf(defaultSparkConf) createOridList() } override def afterAll(): Unit = { sparkContext.stop() } "A VertexId created from RID" should "be unique" in { val vertexIdList = oridList map { rid => sparkContext.getVertexIdFromString(rid) } val duplicatedValues = vertexIdList.groupBy(identity).collect { case (x, ys) if ys.lengthCompare(1) > 0 => x } duplicatedValues shouldBe empty } it should "be a positive number" in { val negativeValues = oridList filter { rid => sparkContext.getVertexIdFromString(rid) < 0 } negativeValues shouldBe empty } def createOridList() { for (clusterId <- 0 to MaxCluster) { for (recordId <- 0 to MaxRecord) { val rid = new StringBuilder rid.append(ORID.PREFIX); rid.append(clusterId); rid.append(ORID.SEPARATOR); rid.append(recordId); oridList += rid.toString } } } }
Example 126
Source File: SpearmanCorrelation.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.stat.correlation import scala.collection.mutable.ArrayBuffer import org.apache.spark.internal.Logging import org.apache.spark.mllib.linalg.{Matrix, Vector, Vectors} import org.apache.spark.rdd.RDD override def computeCorrelationMatrix(X: RDD[Vector]): Matrix = { // ((columnIndex, value), rowUid) val colBased = X.zipWithUniqueId().flatMap { case (vec, uid) => vec.toArray.view.zipWithIndex.map { case (v, j) => ((j, v), uid) } } // global sort by (columnIndex, value) val sorted = colBased.sortByKey() // assign global ranks (using average ranks for tied values) val globalRanks = sorted.zipWithIndex().mapPartitions { iter => var preCol = -1 var preVal = Double.NaN var startRank = -1.0 val cachedUids = ArrayBuffer.empty[Long] val flush: () => Iterable[(Long, (Int, Double))] = () => { val averageRank = startRank + (cachedUids.size - 1) / 2.0 val output = cachedUids.map { uid => (uid, (preCol, averageRank)) } cachedUids.clear() output } iter.flatMap { case (((j, v), uid), rank) => // If we see a new value or cachedUids is too big, we flush ids with their average rank. if (j != preCol || v != preVal || cachedUids.size >= 10000000) { val output = flush() preCol = j preVal = v startRank = rank cachedUids += uid output } else { cachedUids += uid Iterator.empty } } ++ flush() } // Replace values in the input matrix by their ranks compared with values in the same column. // Note that shifting all ranks in a column by a constant value doesn't affect result. val groupedRanks = globalRanks.groupByKey().map { case (uid, iter) => // sort by column index and then convert values to a vector Vectors.dense(iter.toSeq.sortBy(_._1).map(_._2).toArray) } PearsonCorrelation.computeCorrelationMatrix(groupedRanks) } }
Example 127
Source File: ApplicationMasterArguments.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import scala.collection.mutable.ArrayBuffer class ApplicationMasterArguments(val args: Array[String]) { var userJar: String = null var userClass: String = null var primaryPyFile: String = null var primaryRFile: String = null var userArgs: Seq[String] = Nil var propertiesFile: String = null parseArgs(args.toList) private def parseArgs(inputArgs: List[String]): Unit = { val userArgsBuffer = new ArrayBuffer[String]() var args = inputArgs while (!args.isEmpty) { // --num-workers, --worker-memory, and --worker-cores are deprecated since 1.0, // the properties with executor in their names are preferred. args match { case ("--jar") :: value :: tail => userJar = value args = tail case ("--class") :: value :: tail => userClass = value args = tail case ("--primary-py-file") :: value :: tail => primaryPyFile = value args = tail case ("--primary-r-file") :: value :: tail => primaryRFile = value args = tail case ("--arg") :: value :: tail => userArgsBuffer += value args = tail case ("--properties-file") :: value :: tail => propertiesFile = value args = tail case _ => printUsageAndExit(1, args) } } if (primaryPyFile != null && primaryRFile != null) { // scalastyle:off println System.err.println("Cannot have primary-py-file and primary-r-file at the same time") // scalastyle:on println System.exit(-1) } userArgs = userArgsBuffer.toList } def printUsageAndExit(exitCode: Int, unknownParam: Any = null) { // scalastyle:off println if (unknownParam != null) { System.err.println("Unknown/unsupported param " + unknownParam) } System.err.println(""" |Usage: org.apache.spark.deploy.yarn.ApplicationMaster [options] |Options: | --jar JAR_PATH Path to your application's JAR file | --class CLASS_NAME Name of your application's main class | --primary-py-file A main Python file | --primary-r-file A main R file | --arg ARG Argument to be passed to your application's main class. | Multiple invocations are possible, each will be passed in order. | --properties-file FILE Path to a custom Spark properties file. """.stripMargin) // scalastyle:on println System.exit(exitCode) } } object ApplicationMasterArguments { val DEFAULT_NUMBER_EXECUTORS = 2 }
Example 128
Source File: ClientArguments.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import scala.collection.mutable.ArrayBuffer // TODO: Add code and support for ensuring that yarn resource 'tasks' are location aware ! private[spark] class ClientArguments(args: Array[String]) { var userJar: String = null var userClass: String = null var primaryPyFile: String = null var primaryRFile: String = null var userArgs: ArrayBuffer[String] = new ArrayBuffer[String]() parseArgs(args.toList) private def parseArgs(inputArgs: List[String]): Unit = { var args = inputArgs while (!args.isEmpty) { args match { case ("--jar") :: value :: tail => userJar = value args = tail case ("--class") :: value :: tail => userClass = value args = tail case ("--primary-py-file") :: value :: tail => primaryPyFile = value args = tail case ("--primary-r-file") :: value :: tail => primaryRFile = value args = tail case ("--arg") :: value :: tail => userArgs += value args = tail case Nil => case _ => throw new IllegalArgumentException(getUsageMessage(args)) } } if (primaryPyFile != null && primaryRFile != null) { throw new IllegalArgumentException("Cannot have primary-py-file and primary-r-file" + " at the same time") } } private def getUsageMessage(unknownParam: List[String] = null): String = { val message = if (unknownParam != null) s"Unknown/unsupported param $unknownParam\n" else "" message + s""" |Usage: org.apache.spark.deploy.yarn.Client [options] |Options: | --jar JAR_PATH Path to your application's JAR file (required in yarn-cluster | mode) | --class CLASS_NAME Name of your application's main class (required) | --primary-py-file A main Python file | --primary-r-file A main R file | --arg ARG Argument to be passed to your application's main class. | Multiple invocations are possible, each will be passed in order. """.stripMargin } }
Example 129
Source File: KPLBasedKinesisTestUtils.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kinesis import java.nio.ByteBuffer import java.nio.charset.StandardCharsets import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import com.amazonaws.services.kinesis.producer.{KinesisProducer => KPLProducer, KinesisProducerConfiguration, UserRecordResult} import com.google.common.util.concurrent.{FutureCallback, Futures} private[kinesis] class KPLBasedKinesisTestUtils(streamShardCount: Int = 2) extends KinesisTestUtils(streamShardCount) { override protected def getProducer(aggregate: Boolean): KinesisDataGenerator = { if (!aggregate) { new SimpleDataGenerator(kinesisClient) } else { new KPLDataGenerator(regionName) } } } private[kinesis] class KPLDataGenerator(regionName: String) extends KinesisDataGenerator { private lazy val producer: KPLProducer = { val conf = new KinesisProducerConfiguration() .setRecordMaxBufferedTime(1000) .setMaxConnections(1) .setRegion(regionName) .setMetricsLevel("none") new KPLProducer(conf) } override def sendData(streamName: String, data: Seq[Int]): Map[String, Seq[(Int, String)]] = { val shardIdToSeqNumbers = new mutable.HashMap[String, ArrayBuffer[(Int, String)]]() data.foreach { num => val str = num.toString val data = ByteBuffer.wrap(str.getBytes(StandardCharsets.UTF_8)) val future = producer.addUserRecord(streamName, str, data) val kinesisCallBack = new FutureCallback[UserRecordResult]() { override def onFailure(t: Throwable): Unit = {} // do nothing override def onSuccess(result: UserRecordResult): Unit = { val shardId = result.getShardId val seqNumber = result.getSequenceNumber() val sentSeqNumbers = shardIdToSeqNumbers.getOrElseUpdate(shardId, new ArrayBuffer[(Int, String)]()) sentSeqNumbers += ((num, seqNumber)) } } Futures.addCallback(future, kinesisCallBack) } producer.flushSync() shardIdToSeqNumbers.toMap } }
Example 130
Source File: Exchange.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.exchange import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import org.apache.spark.broadcast import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, Expression, SortOrder} import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.{LeafExecNode, SparkPlan, UnaryExecNode} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StructType case class ReuseExchange(conf: SQLConf) extends Rule[SparkPlan] { def apply(plan: SparkPlan): SparkPlan = { if (!conf.exchangeReuseEnabled) { return plan } // Build a hash map using schema of exchanges to avoid O(N*N) sameResult calls. val exchanges = mutable.HashMap[StructType, ArrayBuffer[Exchange]]() plan.transformUp { case exchange: Exchange => // the exchanges that have same results usually also have same schemas (same column names). val sameSchema = exchanges.getOrElseUpdate(exchange.schema, ArrayBuffer[Exchange]()) val samePlan = sameSchema.find { e => exchange.sameResult(e) } if (samePlan.isDefined) { // Keep the output of this exchange, the following plans require that to resolve // attributes. ReusedExchangeExec(exchange.output, samePlan.get) } else { sameSchema += exchange exchange } } } }
Example 131
Source File: SQLAppStatusStore.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.ui import java.lang.{Long => JLong} import java.util.Date import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import com.fasterxml.jackson.annotation.JsonIgnore import com.fasterxml.jackson.databind.annotation.JsonDeserialize import org.apache.spark.JobExecutionStatus import org.apache.spark.status.KVUtils.KVIndexParam import org.apache.spark.util.kvstore.{KVIndex, KVStore} class SparkPlanGraphNodeWrapper( val node: SparkPlanGraphNode, val cluster: SparkPlanGraphClusterWrapper) { def toSparkPlanGraphNode(): SparkPlanGraphNode = { assert(node == null ^ cluster == null, "One and only of of nore or cluster must be set.") if (node != null) node else cluster.toSparkPlanGraphCluster() } } case class SQLPlanMetric( name: String, accumulatorId: Long, metricType: String)
Example 132
Source File: ManifestFileCommitProtocol.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import java.util.UUID import scala.collection.mutable.ArrayBuffer import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.{JobContext, TaskAttemptContext} import org.apache.spark.internal.Logging import org.apache.spark.internal.io.FileCommitProtocol import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage def setupManifestOptions(fileLog: FileStreamSinkLog, batchId: Long): Unit = { this.fileLog = fileLog this.batchId = batchId } override def setupJob(jobContext: JobContext): Unit = { require(fileLog != null, "setupManifestOptions must be called before this function") // Do nothing } override def commitJob(jobContext: JobContext, taskCommits: Seq[TaskCommitMessage]): Unit = { require(fileLog != null, "setupManifestOptions must be called before this function") val fileStatuses = taskCommits.flatMap(_.obj.asInstanceOf[Seq[SinkFileStatus]]).toArray if (fileLog.add(batchId, fileStatuses)) { logInfo(s"Committed batch $batchId") } else { throw new IllegalStateException(s"Race while writing batch $batchId") } } override def abortJob(jobContext: JobContext): Unit = { require(fileLog != null, "setupManifestOptions must be called before this function") // Do nothing } override def setupTask(taskContext: TaskAttemptContext): Unit = { addedFiles = new ArrayBuffer[String] } override def newTaskTempFile( taskContext: TaskAttemptContext, dir: Option[String], ext: String): String = { // The file name looks like part-r-00000-2dd664f9-d2c4-4ffe-878f-c6c70c1fb0cb_00003.gz.parquet // Note that %05d does not truncate the split number, so if we have more than 100000 tasks, // the file name is fine and won't overflow. val split = taskContext.getTaskAttemptID.getTaskID.getId val uuid = UUID.randomUUID.toString val filename = f"part-$split%05d-$uuid$ext" val file = dir.map { d => new Path(new Path(path, d), filename).toString }.getOrElse { new Path(path, filename).toString } addedFiles += file file } override def newTaskTempFileAbsPath( taskContext: TaskAttemptContext, absoluteDir: String, ext: String): String = { throw new UnsupportedOperationException( s"$this does not support adding files with an absolute path") } override def commitTask(taskContext: TaskAttemptContext): TaskCommitMessage = { if (addedFiles.nonEmpty) { val fs = new Path(addedFiles.head).getFileSystem(taskContext.getConfiguration) val statuses: Seq[SinkFileStatus] = addedFiles.map(f => SinkFileStatus(fs.getFileStatus(new Path(f)))) new TaskCommitMessage(statuses) } else { new TaskCommitMessage(Seq.empty[SinkFileStatus]) } } override def abortTask(taskContext: TaskAttemptContext): Unit = { // Do nothing // TODO: we can also try delete the addedFiles as a best-effort cleanup. } }
Example 133
Source File: BatchEvalPythonExecSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.python import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer import org.apache.spark.api.python.{PythonEvalType, PythonFunction} import org.apache.spark.sql.catalyst.FunctionIdentifier import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, GreaterThan, In} import org.apache.spark.sql.execution.{FilterExec, InputAdapter, SparkPlanTest, WholeStageCodegenExec} import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types.BooleanType class BatchEvalPythonExecSuite extends SparkPlanTest with SharedSQLContext { import testImplicits.newProductEncoder import testImplicits.localSeqToDatasetHolder override def beforeAll(): Unit = { super.beforeAll() spark.udf.registerPython("dummyPythonUDF", new MyDummyPythonUDF) } override def afterAll(): Unit = { spark.sessionState.functionRegistry.dropFunction(FunctionIdentifier("dummyPythonUDF")) super.afterAll() } test("Python UDF: push down deterministic FilterExec predicates") { val df = Seq(("Hello", 4)).toDF("a", "b") .where("dummyPythonUDF(b) and dummyPythonUDF(a) and a in (3, 4)") val qualifiedPlanNodes = df.queryExecution.executedPlan.collect { case f @ FilterExec( And(_: AttributeReference, _: AttributeReference), InputAdapter(_: BatchEvalPythonExec)) => f case b @ BatchEvalPythonExec(_, _, WholeStageCodegenExec(FilterExec(_: In, _))) => b } assert(qualifiedPlanNodes.size == 2) } test("Nested Python UDF: push down deterministic FilterExec predicates") { val df = Seq(("Hello", 4)).toDF("a", "b") .where("dummyPythonUDF(a, dummyPythonUDF(a, b)) and a in (3, 4)") val qualifiedPlanNodes = df.queryExecution.executedPlan.collect { case f @ FilterExec(_: AttributeReference, InputAdapter(_: BatchEvalPythonExec)) => f case b @ BatchEvalPythonExec(_, _, WholeStageCodegenExec(FilterExec(_: In, _))) => b } assert(qualifiedPlanNodes.size == 2) } test("Python UDF: no push down on non-deterministic") { val df = Seq(("Hello", 4)).toDF("a", "b") .where("b > 4 and dummyPythonUDF(a) and rand() > 0.3") val qualifiedPlanNodes = df.queryExecution.executedPlan.collect { case f @ FilterExec( And(_: AttributeReference, _: GreaterThan), InputAdapter(_: BatchEvalPythonExec)) => f case b @ BatchEvalPythonExec(_, _, WholeStageCodegenExec(_: FilterExec)) => b } assert(qualifiedPlanNodes.size == 2) } test("Python UDF: push down on deterministic predicates after the first non-deterministic") { val df = Seq(("Hello", 4)).toDF("a", "b") .where("dummyPythonUDF(a) and rand() > 0.3 and b > 4") val qualifiedPlanNodes = df.queryExecution.executedPlan.collect { case f @ FilterExec( And(_: AttributeReference, _: GreaterThan), InputAdapter(_: BatchEvalPythonExec)) => f case b @ BatchEvalPythonExec(_, _, WholeStageCodegenExec(_: FilterExec)) => b } assert(qualifiedPlanNodes.size == 2) } test("Python UDF refers to the attributes from more than one child") { val df = Seq(("Hello", 4)).toDF("a", "b") val df2 = Seq(("Hello", 4)).toDF("c", "d") val joinDF = df.crossJoin(df2).where("dummyPythonUDF(a, c) == dummyPythonUDF(d, c)") val qualifiedPlanNodes = joinDF.queryExecution.executedPlan.collect { case b: BatchEvalPythonExec => b } assert(qualifiedPlanNodes.size == 1) } } // This Python UDF is dummy and just for testing. Unable to execute. class DummyUDF extends PythonFunction( command = Array[Byte](), envVars = Map("" -> "").asJava, pythonIncludes = ArrayBuffer("").asJava, pythonExec = "", pythonVer = "", broadcastVars = null, accumulator = null) class MyDummyPythonUDF extends UserDefinedPythonFunction( name = "dummyUDF", func = new DummyUDF, dataType = BooleanType, pythonEvalType = PythonEvalType.SQL_BATCHED_UDF, udfDeterministic = true)
Example 134
Source File: UnionDStream.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.SparkException import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} private[streaming] class UnionDStream[T: ClassTag](parents: Array[DStream[T]]) extends DStream[T](parents.head.ssc) { require(parents.length > 0, "List of DStreams to union is empty") require(parents.map(_.ssc).distinct.length == 1, "Some of the DStreams have different contexts") require(parents.map(_.slideDuration).distinct.length == 1, "Some of the DStreams have different slide durations") override def dependencies: List[DStream[_]] = parents.toList override def slideDuration: Duration = parents.head.slideDuration override def compute(validTime: Time): Option[RDD[T]] = { val rdds = new ArrayBuffer[RDD[T]]() parents.map(_.getOrCompute(validTime)).foreach { case Some(rdd) => rdds += rdd case None => throw new SparkException("Could not generate RDD from a parent for unifying at" + s" time $validTime") } if (rdds.nonEmpty) { Some(ssc.sc.union(rdds)) } else { None } } }
Example 135
Source File: QueueInputDStream.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{NotSerializableException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.{StreamingContext, Time} private[streaming] class QueueInputDStream[T: ClassTag]( ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { throw new NotSerializableException("queueStream doesn't support checkpointing. " + "Please don't use queueStream when checkpointing is enabled.") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() queue.synchronized { if (oneAtATime && queue.nonEmpty) { buffer += queue.dequeue() } else { buffer ++= queue queue.clear() } } if (buffer.nonEmpty) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(context.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { Some(ssc.sparkContext.emptyRDD) } } }
Example 136
Source File: LocalSparkCluster.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy import scala.collection.mutable.ArrayBuffer import org.apache.spark.SparkConf import org.apache.spark.deploy.master.Master import org.apache.spark.deploy.worker.Worker import org.apache.spark.internal.Logging import org.apache.spark.rpc.RpcEnv import org.apache.spark.util.Utils for (workerNum <- 1 to numWorkers) { val workerEnv = Worker.startRpcEnvAndEndpoint(localHostname, 0, 0, coresPerWorker, memoryPerWorker, masters, null, Some(workerNum), _conf) workerRpcEnvs += workerEnv } masters } def stop() { logInfo("Shutting down local Spark cluster.") // Stop the workers before the master so they don't get upset that it disconnected workerRpcEnvs.foreach(_.shutdown()) masterRpcEnvs.foreach(_.shutdown()) workerRpcEnvs.foreach(_.awaitTermination()) masterRpcEnvs.foreach(_.awaitTermination()) masterRpcEnvs.clear() workerRpcEnvs.clear() } }
Example 137
Source File: TaskResult.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io._ import java.nio.ByteBuffer import scala.collection.mutable.ArrayBuffer import org.apache.spark.SparkEnv import org.apache.spark.serializer.SerializerInstance import org.apache.spark.storage.BlockId import org.apache.spark.util.{AccumulatorV2, Utils} // Task result. Also contains updates to accumulator variables. private[spark] sealed trait TaskResult[T] def value(resultSer: SerializerInstance = null): T = { if (valueObjectDeserialized) { valueObject } else { // This should not run when holding a lock because it may cost dozens of seconds for a large // value val ser = if (resultSer == null) SparkEnv.get.serializer.newInstance() else resultSer valueObject = ser.deserialize(valueBytes) valueObjectDeserialized = true valueObject } } }
Example 138
Source File: Schedulable.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.util.concurrent.ConcurrentLinkedQueue import scala.collection.mutable.ArrayBuffer import org.apache.spark.scheduler.SchedulingMode.SchedulingMode private[spark] trait Schedulable { var parent: Pool // child queues def schedulableQueue: ConcurrentLinkedQueue[Schedulable] def schedulingMode: SchedulingMode def weight: Int def minShare: Int def runningTasks: Int def priority: Int def stageId: Int def name: String def addSchedulable(schedulable: Schedulable): Unit def removeSchedulable(schedulable: Schedulable): Unit def getSchedulableByName(name: String): Schedulable def executorLost(executorId: String, host: String, reason: ExecutorLossReason): Unit def checkSpeculatableTasks(minTimeToSpeculation: Int): Boolean def getSortedTaskSetQueue: ArrayBuffer[TaskSetManager] }
Example 139
Source File: ChunkedByteBufferOutputStream.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.util.io import java.io.OutputStream import java.nio.ByteBuffer import scala.collection.mutable.ArrayBuffer import org.apache.spark.storage.StorageUtils private[this] var position = chunkSize private[this] var _size = 0 private[this] var closed: Boolean = false def size: Long = _size override def close(): Unit = { if (!closed) { super.close() closed = true } } override def write(b: Int): Unit = { require(!closed, "cannot write to a closed ChunkedByteBufferOutputStream") allocateNewChunkIfNeeded() chunks(lastChunkIndex).put(b.toByte) position += 1 _size += 1 } override def write(bytes: Array[Byte], off: Int, len: Int): Unit = { require(!closed, "cannot write to a closed ChunkedByteBufferOutputStream") var written = 0 while (written < len) { allocateNewChunkIfNeeded() val thisBatch = math.min(chunkSize - position, len - written) chunks(lastChunkIndex).put(bytes, written + off, thisBatch) written += thisBatch position += thisBatch } _size += len } @inline private def allocateNewChunkIfNeeded(): Unit = { if (position == chunkSize) { chunks += allocator(chunkSize) lastChunkIndex += 1 position = 0 } } def toChunkedByteBuffer: ChunkedByteBuffer = { require(closed, "cannot call toChunkedByteBuffer() unless close() has been called") require(!toChunkedByteBufferWasCalled, "toChunkedByteBuffer() can only be called once") toChunkedByteBufferWasCalled = true if (lastChunkIndex == -1) { new ChunkedByteBuffer(Array.empty[ByteBuffer]) } else { // Copy the first n-1 chunks to the output, and then create an array that fits the last chunk. // An alternative would have been returning an array of ByteBuffers, with the last buffer // bounded to only the last chunk's position. However, given our use case in Spark (to put // the chunks in block manager), only limiting the view bound of the buffer would still // require the block manager to store the whole chunk. val ret = new Array[ByteBuffer](chunks.size) for (i <- 0 until chunks.size - 1) { ret(i) = chunks(i) ret(i).flip() } if (position == chunkSize) { ret(lastChunkIndex) = chunks(lastChunkIndex) ret(lastChunkIndex).flip() } else { ret(lastChunkIndex) = allocator(position) chunks(lastChunkIndex).flip() ret(lastChunkIndex).put(chunks(lastChunkIndex)) ret(lastChunkIndex).flip() StorageUtils.dispose(chunks(lastChunkIndex)) } new ChunkedByteBuffer(ret) } } }
Example 140
Source File: UnionRDD.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer import scala.collection.parallel.ForkJoinTaskSupport import scala.concurrent.forkjoin.ForkJoinPool import scala.reflect.ClassTag import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils private[spark] class UnionPartition[T: ClassTag]( idx: Int, @transient private val rdd: RDD[T], val parentRddIndex: Int, @transient private val parentRddPartitionIndex: Int) extends Partition { var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex) def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization parentPartition = rdd.partitions(parentRddPartitionIndex) oos.defaultWriteObject() } } object UnionRDD { private[spark] lazy val partitionEvalTaskSupport = new ForkJoinTaskSupport(new ForkJoinPool(8)) } @DeveloperApi class UnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]]) extends RDD[T](sc, Nil) { // Nil since we implement getDependencies // visible for testing private[spark] val isPartitionListingParallel: Boolean = rdds.length > conf.getInt("spark.rdd.parallelListingThreshold", 10) override def getPartitions: Array[Partition] = { val parRDDs = if (isPartitionListingParallel) { val parArray = rdds.par parArray.tasksupport = UnionRDD.partitionEvalTaskSupport parArray } else { rdds } val array = new Array[Partition](parRDDs.map(_.partitions.length).seq.sum) var pos = 0 for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) { array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index) pos += 1 } array } override def getDependencies: Seq[Dependency[_]] = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 for (rdd <- rdds) { deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length) pos += rdd.partitions.length } deps } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val part = s.asInstanceOf[UnionPartition[T]] parent[T](part.parentRddIndex).iterator(part.parentPartition, context) } override def getPreferredLocations(s: Partition): Seq[String] = s.asInstanceOf[UnionPartition[T]].preferredLocations() override def clearDependencies() { super.clearDependencies() rdds = null } }
Example 141
Source File: ValueJsonConversionTest.scala From ingraph with Eclipse Public License 1.0 | 5 votes |
package ingraph.compiler.sql.driver import ingraph.compiler.sql.driver.ValueJsonConversion._ import ingraph.compiler.sql.driver.ValueJsonConversionTest._ import org.neo4j.driver.internal.value._ import org.neo4j.driver.internal.{InternalNode, InternalPath, InternalRelationship} import org.neo4j.driver.v1.Value import org.scalactic.source import org.scalactic.source.Position import org.scalatest.FunSuite import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer class ValueJsonConversionTest extends FunSuite { testParameters.foreach { case (value, testName, pos) => test(testName) { println(value) val jsonString = gson.toJson(value, classOf[Value]) println(jsonString) val deserialized = gson.fromJson(jsonString, classOf[Value]) assert(value == deserialized) }(pos) } } object ValueJsonConversionTest { val testValues: ArrayBuffer[Value] = ArrayBuffer.empty val testParameters: ArrayBuffer[(Value, String, Position)] = ArrayBuffer.empty def addTest(value: Value, testName: String = null)(implicit pos: source.Position): Unit = { testValues += value testParameters += ((value, Option(testName).getOrElse(value.getClass.getSimpleName), pos)) } private val stringValue = new StringValue("John") private val integerValue = new IntegerValue(101) private val propertiesMap = Map[String, Value]("name" -> stringValue).asJava addTest(new MapValue(propertiesMap)) addTest(new BytesValue(Array[Byte](0, 42, 127, -128))) addTest(new ListValue(stringValue, integerValue)) addTest(new NodeValue(new InternalNode(5, List("Label1", "Label2").asJavaCollection, propertiesMap))) addTest(new RelationshipValue(new InternalRelationship(42, 10, 20, "Edge_Type_1", propertiesMap))) addTest(new PathValue(new InternalPath( new InternalNode(0), new InternalRelationship(101, 0, 1, "TYPE_A"), new InternalNode(1) ))) addTest(BooleanValue.FALSE) addTest(BooleanValue.TRUE) addTest(NullValue.NULL) addTest(stringValue) addTest(integerValue) addTest(new FloatValue(3.14)) }
Example 142
Source File: TokenStreamUtils.scala From odinson with Apache License 2.0 | 5 votes |
package ai.lum.odinson.lucene.analysis import scala.collection.mutable.ArrayBuffer import org.apache.lucene.analysis.Analyzer import org.apache.lucene.analysis.TokenStream import org.apache.lucene.analysis.tokenattributes.CharTermAttribute import org.apache.lucene.search.IndexSearcher import org.apache.lucene.search.highlight.TokenSources object TokenStreamUtils { def getTokens( docID: Int, fieldName: String, indexSearcher: IndexSearcher, analyzer: Analyzer ): Array[String] = { val doc = indexSearcher.doc(docID) val tvs = indexSearcher.getIndexReader().getTermVectors(docID) val text = doc.getField(fieldName).stringValue val ts = TokenSources.getTokenStream(fieldName, tvs, text, analyzer, -1) val tokens = getTokens(ts) tokens } def getTokens(ts: TokenStream): Array[String] = { ts.reset() val terms = new ArrayBuffer[String] while (ts.incrementToken()) { val charTermAttribute = ts.addAttribute(classOf[CharTermAttribute]) val term = charTermAttribute.toString terms += term } ts.end() ts.close() terms.toArray } }
Example 143
Source File: Driver.scala From OnlineLDA_Spark with Apache License 2.0 | 5 votes |
package com.github.yuhao.yang import java.util.Calendar import org.apache.log4j.{Level, Logger} import org.apache.spark.{SparkContext, SparkConf} import scala.collection.mutable.ArrayBuffer object Driver extends Serializable{ def main(args: Array[String]) { Logger.getLogger("org").setLevel(Level.ERROR) Logger.getLogger("akka").setLevel(Level.ERROR) val inputDir = args(0) val filePaths = extractPaths(inputDir + "texts", true) val stopWordsPath = inputDir + "stop.txt" val vocabPath = inputDir + "wordsEn.txt" println("begin: " + Calendar.getInstance().getTime) println("path size: " + filePaths.size) assert(filePaths.size > 0) val conf = new SparkConf().setAppName("online LDA Spark") val sc = new SparkContext(conf) val vocab = Docs2Vec.extractVocab(sc, Seq(vocabPath), stopWordsPath) val vocabArray = vocab.map(_.swap) val K = args(1).toInt // val lda = OnlineLDA_Spark.runBatchMode(sc, filePaths, vocab, K, 50) val lda = OnlineLDA_Spark.runOnlineMode(sc, filePaths, vocab, K, args(2).toInt) println("_lambda:") for(row <- 0 until lda._lambda.rows){ val v = lda._lambda(row, ::).t val topk = lda._lambda(row, ::).t.argtopk(10) val pairs = topk.map(k => (vocabArray(k), v(k))) val sorted = pairs.sortBy(_._2).reverse println(sorted.map(x => (x._1)).mkString(","), sorted.map(x => ("%2.2f".format(x._2))).mkString(",")) } println("end: " + Calendar.getInstance().getTime()) } def extractPaths(path: String, recursive: Boolean = true): Array[String] ={ val docsets = ArrayBuffer[String]() val fileList = new java.io.File(path).listFiles() if(fileList == null) return docsets.toArray for(f <- fileList){ if(f.isDirectory){ if(recursive) docsets ++= extractPaths(f.getAbsolutePath, true) } else{ docsets += f.getAbsolutePath } } docsets.toArray } }
Example 144
Source File: QuerySuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package edu.ucla.cs.wis.bigdatalog.spark import org.apache.spark.{Logging, SparkConf, SparkContext, SparkException} import org.scalatest.FunSuite import scala.collection.mutable.ArrayBuffer abstract class QuerySuite extends FunSuite with Logging { case class TestCase(program: String, query: String, data: Map[String, Seq[String]], answers: Seq[String], answersSize: Int) { def this(program: String, query: String, data: Map[String, Seq[String]], answersSize: Int) = this(program, query, data, null, answersSize) def this(program: String, query: String, data: Map[String, Seq[String]], answers: Seq[String]) = this(program, query, data, answers, answers.size) } def runTest(testCase: TestCase): Unit = runTests(Seq(testCase)) def runTests(testCases: Seq[TestCase]): Unit = { val sparkCtx = new SparkContext("local[*]", "QuerySuite", new SparkConf() .set("spark.eventLog.enabled", "true") //.set("spark.eventLog.dir", "../logs") .set("spark.ui.enabled", "false") .set("spark.sql.shuffle.partitions", "5") .setAll(Map.empty[String, String]) ) val bigDatalogCtx = new BigDatalogContext(sparkCtx) var count: Int = 1 for (testCase <- testCases) { bigDatalogCtx.loadProgram(testCase.program) for ((relationName, data) <- testCase.data) { val relationInfo = bigDatalogCtx.relationCatalog.getRelationInfo(relationName) if (relationInfo == null) throw new SparkException("You are attempting to load an unknown relation.") bigDatalogCtx.registerAndLoadTable(relationName, data, bigDatalogCtx.conf.numShufflePartitions) } val query = testCase.query val answers = testCase.answers logInfo("========== START BigDatalog Query " + count + " START ==========") val program = bigDatalogCtx.query(query) val results = program.execute().collect() // for some test cases we will only know the size of the answer set, not the actual answers if (answers == null) { assert(results.size == testCase.answersSize) } else { if (results.size != answers.size) { displayDifferences(results.map(_.toString), answers) // yes this will fail assert(results.size == answers.size) } else { for (result <- results) assert(answers.contains(result.toString())) } val resultStrings = results.map(_.toString).toSet for (answer <- answers) assert(resultStrings.contains(answer.toString())) } logInfo("========== END BigDatalog Query " + count + " END ==========\n") count += 1 bigDatalogCtx.reset() } sparkCtx.stop() } private def displayDifferences(results: Seq[String], answers: Seq[String]): Unit = { val missingAnswers = new ArrayBuffer[String] val missingResults = new ArrayBuffer[String] for (result <- results) if (!answers.contains(result)) missingAnswers += result for (answer <- answers) if (!results.contains(answer)) missingResults += answer if (missingAnswers.nonEmpty) logInfo("Results not in Answers: " + missingAnswers.mkString(", ")) if (missingResults.nonEmpty) logInfo("Answers not in Results: " + missingResults.mkString(", ")) } }
Example 145
Source File: SpearmanCorrelation.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.mllib.stat.correlation import scala.collection.mutable.ArrayBuffer import org.apache.spark.Logging import org.apache.spark.SparkContext._ import org.apache.spark.mllib.linalg.{Matrix, Vector, Vectors} import org.apache.spark.rdd.RDD override def computeCorrelationMatrix(X: RDD[Vector]): Matrix = { // ((columnIndex, value), rowUid) val colBased = X.zipWithUniqueId().flatMap { case (vec, uid) => vec.toArray.view.zipWithIndex.map { case (v, j) => ((j, v), uid) } } // global sort by (columnIndex, value) val sorted = colBased.sortByKey() // assign global ranks (using average ranks for tied values) val globalRanks = sorted.zipWithIndex().mapPartitions { iter => var preCol = -1 var preVal = Double.NaN var startRank = -1.0 var cachedUids = ArrayBuffer.empty[Long] val flush: () => Iterable[(Long, (Int, Double))] = () => { val averageRank = startRank + (cachedUids.size - 1) / 2.0 val output = cachedUids.map { uid => (uid, (preCol, averageRank)) } cachedUids.clear() output } iter.flatMap { case (((j, v), uid), rank) => // If we see a new value or cachedUids is too big, we flush ids with their average rank. if (j != preCol || v != preVal || cachedUids.size >= 10000000) { val output = flush() preCol = j preVal = v startRank = rank cachedUids += uid output } else { cachedUids += uid Iterator.empty } } ++ flush() } // Replace values in the input matrix by their ranks compared with values in the same column. // Note that shifting all ranks in a column by a constant value doesn't affect result. val groupedRanks = globalRanks.groupByKey().map { case (uid, iter) => // sort by column index and then convert values to a vector Vectors.dense(iter.toSeq.sortBy(_._1).map(_._2).toArray) } PearsonCorrelation.computeCorrelationMatrix(groupedRanks) } }
Example 146
Source File: TestOutputStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming import java.io.{IOException, ObjectInputStream} import org.apache.spark.rdd.RDD import org.apache.spark.streaming.dstream.{DStream, ForEachDStream} import org.apache.spark.util.Utils import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag class TestOutputStream[T: ClassTag](parent: DStream[T], val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]()) extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => { val collected = rdd.collect() output += collected }, false) { // This is to clear the output buffer every it is read from a checkpoint @throws(classOf[IOException]) private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException { ois.defaultReadObject() output.clear() } }
Example 147
Source File: FlumeStreamSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.flume import scala.collection.JavaConverters._ import scala.collection.mutable.{ArrayBuffer, SynchronizedBuffer} import scala.concurrent.duration._ import scala.language.postfixOps import com.google.common.base.Charsets import org.jboss.netty.channel.ChannelPipeline import org.jboss.netty.channel.socket.SocketChannel import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory import org.jboss.netty.handler.codec.compression._ import org.scalatest.{BeforeAndAfter, Matchers} import org.scalatest.concurrent.Eventually._ import org.apache.spark.{Logging, SparkConf, SparkFunSuite} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.{Milliseconds, StreamingContext, TestOutputStream} class FlumeStreamSuite extends SparkFunSuite with BeforeAndAfter with Matchers with Logging { val conf = new SparkConf().setMaster("local[4]").setAppName("FlumeStreamSuite") var ssc: StreamingContext = null test("flume input stream") { testFlumeStream(testCompression = false) } test("flume input compressed stream") { testFlumeStream(testCompression = true) } private class CompressionChannelFactory(compressionLevel: Int) extends NioClientSocketChannelFactory { override def newChannel(pipeline: ChannelPipeline): SocketChannel = { val encoder = new ZlibEncoder(compressionLevel) pipeline.addFirst("deflater", encoder) pipeline.addFirst("inflater", new ZlibDecoder()) super.newChannel(pipeline) } } }
Example 148
Source File: JDBCRelation.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.jdbc import java.util.Properties import scala.collection.mutable.ArrayBuffer import org.apache.spark.Partition import org.apache.spark.rdd.RDD import org.apache.spark.sql.sources._ import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{DataFrame, Row, SQLContext, SaveMode} def columnPartition(partitioning: JDBCPartitioningInfo): Array[Partition] = { if (partitioning == null) return Array[Partition](JDBCPartition(null, 0)) val numPartitions = partitioning.numPartitions val column = partitioning.column if (numPartitions == 1) return Array[Partition](JDBCPartition(null, 0)) // Overflow and silliness can happen if you subtract then divide. // Here we get a little roundoff, but that's (hopefully) OK. val stride: Long = (partitioning.upperBound / numPartitions - partitioning.lowerBound / numPartitions) var i: Int = 0 var currentValue: Long = partitioning.lowerBound var ans = new ArrayBuffer[Partition]() while (i < numPartitions) { val lowerBound = if (i != 0) s"$column >= $currentValue" else null currentValue += stride val upperBound = if (i != numPartitions - 1) s"$column < $currentValue" else null val whereClause = if (upperBound == null) { lowerBound } else if (lowerBound == null) { upperBound } else { s"$lowerBound AND $upperBound" } ans += JDBCPartition(whereClause, i) i = i + 1 } ans.toArray } } private[sql] case class JDBCRelation( url: String, table: String, parts: Array[Partition], properties: Properties = new Properties())(@transient val sqlContext: SQLContext) extends BaseRelation with PrunedFilteredScan with InsertableRelation { override val needConversion: Boolean = false override val schema: StructType = JDBCRDD.resolveTable(url, table, properties) override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = { // Rely on a type erasure hack to pass RDD[InternalRow] back as RDD[Row] JDBCRDD.scanTable( sqlContext.sparkContext, schema, url, properties, table, requiredColumns, filters, parts).asInstanceOf[RDD[Row]] } override def insert(data: DataFrame, overwrite: Boolean): Unit = { data.write .mode(if (overwrite) SaveMode.Overwrite else SaveMode.Append) .jdbc(url, table, properties) } }
Example 149
Source File: KPLBasedKinesisTestUtils.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.kinesis import java.nio.ByteBuffer import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import com.amazonaws.services.kinesis.producer.{KinesisProducer => KPLProducer, KinesisProducerConfiguration, UserRecordResult} import com.google.common.util.concurrent.{FutureCallback, Futures} private[kinesis] class KPLBasedKinesisTestUtils extends KinesisTestUtils { override protected def getProducer(aggregate: Boolean): KinesisDataGenerator = { if (!aggregate) { new SimpleDataGenerator(kinesisClient) } else { new KPLDataGenerator(regionName) } } } private[kinesis] class KPLDataGenerator(regionName: String) extends KinesisDataGenerator { private lazy val producer: KPLProducer = { val conf = new KinesisProducerConfiguration() .setRecordMaxBufferedTime(1000) .setMaxConnections(1) .setRegion(regionName) .setMetricsLevel("none") new KPLProducer(conf) } override def sendData(streamName: String, data: Seq[Int]): Map[String, Seq[(Int, String)]] = { val shardIdToSeqNumbers = new mutable.HashMap[String, ArrayBuffer[(Int, String)]]() data.foreach { num => val str = num.toString val data = ByteBuffer.wrap(str.getBytes()) val future = producer.addUserRecord(streamName, str, data) val kinesisCallBack = new FutureCallback[UserRecordResult]() { override def onFailure(t: Throwable): Unit = {} // do nothing override def onSuccess(result: UserRecordResult): Unit = { val shardId = result.getShardId val seqNumber = result.getSequenceNumber() val sentSeqNumbers = shardIdToSeqNumbers.getOrElseUpdate(shardId, new ArrayBuffer[(Int, String)]()) sentSeqNumbers += ((num, seqNumber)) } } Futures.addCallback(future, kinesisCallBack) } producer.flushSync() shardIdToSeqNumbers.toMap } }
Example 150
Source File: UnionDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.SparkException import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.rdd.RDD import org.apache.spark.rdd.UnionRDD private[streaming] class UnionDStream[T: ClassTag](parents: Array[DStream[T]]) extends DStream[T](parents.head.ssc) { require(parents.length > 0, "List of DStreams to union is empty") require(parents.map(_.ssc).distinct.size == 1, "Some of the DStreams have different contexts") require(parents.map(_.slideDuration).distinct.size == 1, "Some of the DStreams have different slide durations") override def dependencies: List[DStream[_]] = parents.toList override def slideDuration: Duration = parents.head.slideDuration override def compute(validTime: Time): Option[RDD[T]] = { val rdds = new ArrayBuffer[RDD[T]]() parents.map(_.getOrCompute(validTime)).foreach { case Some(rdd) => rdds += rdd case None => throw new SparkException("Could not generate RDD from a parent for unifying at" + s" time $validTime") } if (rdds.size > 0) { Some(new UnionRDD(ssc.sc, rdds)) } else { None } } }
Example 151
Source File: QueueInputDStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{NotSerializableException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.{Time, StreamingContext} private[streaming] class QueueInputDStream[T: ClassTag]( ssc: StreamingContext, val queue: Queue[RDD[T]], oneAtATime: Boolean, defaultRDD: RDD[T] ) extends InputDStream[T](ssc) { override def start() { } override def stop() { } private def readObject(in: ObjectInputStream): Unit = { throw new NotSerializableException("queueStream doesn't support checkpointing. " + "Please don't use queueStream when checkpointing is enabled.") } private def writeObject(oos: ObjectOutputStream): Unit = { logWarning("queueStream doesn't support checkpointing") } override def compute(validTime: Time): Option[RDD[T]] = { val buffer = new ArrayBuffer[RDD[T]]() if (oneAtATime && queue.size > 0) { buffer += queue.dequeue() } else { buffer ++= queue.dequeueAll(_ => true) } if (buffer.size > 0) { if (oneAtATime) { Some(buffer.head) } else { Some(new UnionRDD(context.sc, buffer.toSeq)) } } else if (defaultRDD != null) { Some(defaultRDD) } else { Some(ssc.sparkContext.emptyRDD) } } }
Example 152
Source File: LocalSparkCluster.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy import scala.collection.mutable.ArrayBuffer import org.apache.spark.rpc.RpcEnv import org.apache.spark.{Logging, SparkConf} import org.apache.spark.deploy.worker.Worker import org.apache.spark.deploy.master.Master import org.apache.spark.util.Utils for (workerNum <- 1 to numWorkers) { val workerEnv = Worker.startRpcEnvAndEndpoint(localHostname, 0, 0, coresPerWorker, memoryPerWorker, masters, null, Some(workerNum), _conf) workerRpcEnvs += workerEnv } masters } def stop() { logInfo("Shutting down local Spark cluster.") // Stop the workers before the master so they don't get upset that it disconnected workerRpcEnvs.foreach(_.shutdown()) masterRpcEnvs.foreach(_.shutdown()) workerRpcEnvs.foreach(_.awaitTermination()) masterRpcEnvs.foreach(_.awaitTermination()) masterRpcEnvs.clear() workerRpcEnvs.clear() } }
Example 153
Source File: Schedulable.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.util.concurrent.ConcurrentLinkedQueue import scala.collection.mutable.ArrayBuffer import org.apache.spark.scheduler.SchedulingMode.SchedulingMode private[spark] trait Schedulable { var parent: Pool // child queues def schedulableQueue: ConcurrentLinkedQueue[Schedulable] def schedulingMode: SchedulingMode def weight: Int def minShare: Int def runningTasks: Int def priority: Int def stageId: Int def name: String def addSchedulable(schedulable: Schedulable): Unit def removeSchedulable(schedulable: Schedulable): Unit def getSchedulableByName(name: String): Schedulable def executorLost(executorId: String, host: String, reason: ExecutorLossReason): Unit def checkSpeculatableTasks(): Boolean def getSortedTaskSetQueue: ArrayBuffer[TaskSetManager] }
Example 154
Source File: ByteArrayChunkOutputStream.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.util.io import java.io.OutputStream import scala.collection.mutable.ArrayBuffer private var position = chunkSize override def write(b: Int): Unit = { allocateNewChunkIfNeeded() chunks(lastChunkIndex)(position) = b.toByte position += 1 } override def write(bytes: Array[Byte], off: Int, len: Int): Unit = { var written = 0 while (written < len) { allocateNewChunkIfNeeded() val thisBatch = math.min(chunkSize - position, len - written) System.arraycopy(bytes, written + off, chunks(lastChunkIndex), position, thisBatch) written += thisBatch position += thisBatch } } @inline private def allocateNewChunkIfNeeded(): Unit = { if (position == chunkSize) { chunks += new Array[Byte](chunkSize) lastChunkIndex += 1 position = 0 } } def toArrays: Array[Array[Byte]] = { if (lastChunkIndex == -1) { new Array[Array[Byte]](0) } else { // Copy the first n-1 chunks to the output, and then create an array that fits the last chunk. // An alternative would have been returning an array of ByteBuffers, with the last buffer // bounded to only the last chunk's position. However, given our use case in Spark (to put // the chunks in block manager), only limiting the view bound of the buffer would still // require the block manager to store the whole chunk. val ret = new Array[Array[Byte]](chunks.size) for (i <- 0 until chunks.size - 1) { ret(i) = chunks(i) } if (position == chunkSize) { ret(lastChunkIndex) = chunks(lastChunkIndex) } else { ret(lastChunkIndex) = new Array[Byte](position) System.arraycopy(chunks(lastChunkIndex), 0, ret(lastChunkIndex), 0, position) } ret } } }
Example 155
Source File: UnionRDD.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils private[spark] class UnionPartition[T: ClassTag]( idx: Int, @transient private val rdd: RDD[T], val parentRddIndex: Int, @transient private val parentRddPartitionIndex: Int) extends Partition { var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex) def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization parentPartition = rdd.partitions(parentRddPartitionIndex) oos.defaultWriteObject() } } @DeveloperApi class UnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]]) extends RDD[T](sc, Nil) { // Nil since we implement getDependencies override def getPartitions: Array[Partition] = { val array = new Array[Partition](rdds.map(_.partitions.length).sum) var pos = 0 for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) { array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index) pos += 1 } array } override def getDependencies: Seq[Dependency[_]] = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 for (rdd <- rdds) { deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length) pos += rdd.partitions.length } deps } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val part = s.asInstanceOf[UnionPartition[T]] parent[T](part.parentRddIndex).iterator(part.parentPartition, context) } override def getPreferredLocations(s: Partition): Seq[String] = s.asInstanceOf[UnionPartition[T]].preferredLocations() override def clearDependencies() { super.clearDependencies() rdds = null } }
Example 156
Source File: TaskContextImpl.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark import scala.collection.mutable.{ArrayBuffer, HashMap} import org.apache.spark.executor.TaskMetrics import org.apache.spark.memory.TaskMemoryManager import org.apache.spark.metrics.MetricsSystem import org.apache.spark.metrics.source.Source import org.apache.spark.util.{TaskCompletionListener, TaskCompletionListenerException} private[spark] class TaskContextImpl( val stageId: Int, val partitionId: Int, override val taskAttemptId: Long, override val attemptNumber: Int, override val taskMemoryManager: TaskMemoryManager, @transient private val metricsSystem: MetricsSystem, internalAccumulators: Seq[Accumulator[Long]], val runningLocally: Boolean = false, val taskMetrics: TaskMetrics = TaskMetrics.empty) extends TaskContext with Logging { // For backwards-compatibility; this method is now deprecated as of 1.3.0. override def attemptId(): Long = taskAttemptId // List of callback functions to execute when the task completes. @transient private val onCompleteCallbacks = new ArrayBuffer[TaskCompletionListener] // Whether the corresponding task has been killed. @volatile private var interrupted: Boolean = false // Whether the task has completed. @volatile private var completed: Boolean = false override def addTaskCompletionListener(listener: TaskCompletionListener): this.type = { onCompleteCallbacks += listener this } override def addTaskCompletionListener(f: TaskContext => Unit): this.type = { onCompleteCallbacks += new TaskCompletionListener { override def onTaskCompletion(context: TaskContext): Unit = f(context) } this } @deprecated("use addTaskCompletionListener", "1.1.0") override def addOnCompleteCallback(f: () => Unit) { onCompleteCallbacks += new TaskCompletionListener { override def onTaskCompletion(context: TaskContext): Unit = f() } } private[spark] def markInterrupted(): Unit = { interrupted = true } override def isCompleted(): Boolean = completed override def isRunningLocally(): Boolean = runningLocally override def isInterrupted(): Boolean = interrupted override def getMetricsSources(sourceName: String): Seq[Source] = metricsSystem.getSourcesByName(sourceName) @transient private val accumulators = new HashMap[Long, Accumulable[_, _]] private[spark] override def registerAccumulator(a: Accumulable[_, _]): Unit = synchronized { accumulators(a.id) = a } private[spark] override def collectInternalAccumulators(): Map[Long, Any] = synchronized { accumulators.filter(_._2.isInternal).mapValues(_.localValue).toMap } private[spark] override def collectAccumulators(): Map[Long, Any] = synchronized { accumulators.mapValues(_.localValue).toMap } //private[spark] override val internalMetricsToAccumulators: Map[String, Accumulator[Long]] = { // Explicitly register internal accumulators here because these are // not captured in the task closure and are already deserialized internalAccumulators.foreach(registerAccumulator) internalAccumulators.map { a => (a.name.get, a) }.toMap } }
Example 157
Source File: LinkerdApi.scala From asura with MIT License | 5 votes |
package asura.app.api import asura.app.api.model.Dtabs import asura.app.api.model.Dtabs.DtabItem import asura.common.exceptions.ErrorMessages.ErrorMessage import asura.common.model.{ApiRes, ApiResError} import asura.core.http.HttpEngine import asura.core.{CoreConfig, ErrorMessages} import asura.namerd.DtabEntry import asura.namerd.api.v1.NamerdV1Api import asura.play.api.BaseApi.OkApiRes import javax.inject.{Inject, Singleton} import org.pac4j.play.scala.SecurityComponents import play.api.Configuration import scala.collection.mutable.ArrayBuffer import scala.concurrent.{ExecutionContext, Future} @Singleton class LinkerdApi @Inject()( implicit val exec: ExecutionContext, val controllerComponents: SecurityComponents, config: Configuration, ) extends BaseApi { val srcPrefix = "/svc/" val dstPrefix = "/$/inet/" def getProxyServers() = Action { implicit req => if (CoreConfig.linkerdConfig.enabled) { OkApiRes(ApiRes(data = CoreConfig.linkerdConfig.servers)) } else { OkApiRes(ApiResError(getI18nMessage(ErrorMessages.error_ProxyDisabled.name))) } } def getHttp(group: String, project: String, server: String) = Action.async { implicit req => if (CoreConfig.linkerdConfig.enabled) { val proxyServer = CoreConfig.linkerdConfig.servers.find(_.tag.equals(server)).get NamerdV1Api.getNamespaceDtabs(proxyServer.namerd, proxyServer.httpNs)(HttpEngine.http).map(dtabs => { val items = ArrayBuffer[DtabItem]() dtabs.foreach(entry => { val pStrs = entry.prefix.split("/") val dStrs = entry.dst.split("/") if (pStrs.length == 5 && dStrs.length == 5) { items += DtabItem( group = pStrs(2), project = pStrs(3), namespace = pStrs(4), host = dStrs(3), port = dStrs(4), owned = group == pStrs(2) && project == pStrs(3) ) } }) toActionResultFromAny(items) }) } else { Future.successful(OkApiRes(ApiResError(getI18nMessage(ErrorMessages.error_ProxyDisabled.name)))) } } def putHttp(group: String, project: String, server: String) = Action(parse.byteString).async { implicit req => if (CoreConfig.linkerdConfig.enabled) { val proxyServer = CoreConfig.linkerdConfig.servers.find(_.tag.equals(server)).get val dtabs = req.bodyAs(classOf[Dtabs]) if (null != dtabs && null != dtabs.dtabs && dtabs.dtabs.nonEmpty) { var error: ErrorMessage = null val entries = ArrayBuffer[DtabEntry]() for (i <- 0 until dtabs.dtabs.length if null == error) { val item = dtabs.dtabs(i) error = item.isValid() entries += DtabEntry( s"${srcPrefix}${item.group}/${item.project}/${item.namespace}", s"${dstPrefix}${item.host}/${item.port}" ) } if (null == error) { NamerdV1Api.updateNamespaceDtabs(proxyServer.namerd, proxyServer.httpNs, entries)(HttpEngine.http).toOkResult } else { error.toFutureFail } } else { Future.successful(OkApiRes(ApiRes())) } } else { Future.successful(OkApiRes(ApiResError(getI18nMessage(ErrorMessages.error_ProxyDisabled.name)))) } } }
Example 158
Source File: InterfaceMethodParamsActor.scala From asura with MIT License | 5 votes |
package asura.dubbo.actor import akka.actor.{ActorRef, Props, Status} import akka.pattern.pipe import akka.util.ByteString import asura.common.actor.BaseActor import asura.common.util.LogUtils import asura.dubbo.DubboConfig import asura.dubbo.actor.GenericServiceInvokerActor.GetInterfaceMethodParams import asura.dubbo.model.InterfaceMethodParams import asura.dubbo.model.InterfaceMethodParams.MethodSignature import scala.collection.mutable.ArrayBuffer import scala.concurrent.{ExecutionContext, Future} class InterfaceMethodParamsActor(invoker: ActorRef, msg: GetInterfaceMethodParams) extends BaseActor { implicit val ec: ExecutionContext = context.dispatcher private val telnet: ActorRef = context.actorOf(TelnetClientActor.props(msg.address, if (msg.port > 0) msg.port else DubboConfig.DEFAULT_PORT, self)) override def receive: Receive = { case telnetData: ByteString => val utf8String = telnetData.utf8String if (utf8String.contains(TelnetClientActor.MSG_CONNECT_TO)) { log.debug(utf8String) if (utf8String.contains(TelnetClientActor.MSG_SUCCESS)) { telnet ! ByteString(s"ls -l ${msg.ref}\r\n") } else if (utf8String.contains(TelnetClientActor.MSG_FAIL)) { Future.failed(new RuntimeException(s"Remote connection to ${msg.address}:${msg.port} failed")) pipeTo invoker telnet ! TelnetClientActor.CMD_CLOSE context stop self } else { Future.failed(new RuntimeException(s"Unknown response ${utf8String}")) pipeTo invoker telnet ! TelnetClientActor.CMD_CLOSE context stop self } } else if (utf8String.contains("(") && utf8String.contains(")")) { getInterfaceMethodParams(msg.ref, utf8String) pipeTo invoker telnet ! TelnetClientActor.CMD_CLOSE } else { Future.failed(new RuntimeException(s"Unknown response: ${utf8String}")) pipeTo invoker telnet ! TelnetClientActor.CMD_CLOSE context stop self } case Status.Failure(t) => val stackTrace = LogUtils.stackTraceToString(t) log.warning(stackTrace) context stop self } def getInterfaceMethodParams(ref: String, content: String): Future[InterfaceMethodParams] = { Future.successful { val methods = ArrayBuffer[MethodSignature]() content.split("\r\n") .filter(!_.startsWith(DubboConfig.DEFAULT_PROMPT)) .map(signature => { val splits = signature.split(" ") if (splits.length == 2) { val ret = splits(0) val secondPart = splits(1) val idx = secondPart.indexOf("(") val method = secondPart.substring(0, idx) val params = secondPart.substring(idx + 1, secondPart.length - 1).split(",") methods += (MethodSignature(ret, method, params)) } }) InterfaceMethodParams(ref, methods) } } override def postStop(): Unit = log.debug(s"${self.path} stopped") } object InterfaceMethodParamsActor { def props(invoker: ActorRef, msg: GetInterfaceMethodParams) = { Props(new InterfaceMethodParamsActor(invoker, msg)) } }
Example 159
Source File: JobReportDataItemSaveActor.scala From asura with MIT License | 5 votes |
package asura.core.job.actor import akka.actor.{Props, Status} import asura.common.actor.BaseActor import asura.common.util.LogUtils import asura.core.actor.messages.Flush import asura.core.es.model.JobReportDataItem import asura.core.es.service.JobReportDataItemService import asura.core.job.actor.JobReportDataItemSaveActor.SaveReportDataHttpItemMessage import scala.collection.mutable.ArrayBuffer import scala.concurrent.duration._ class JobReportDataItemSaveActor(dayIndexSuffix: String) extends BaseActor { val messages = ArrayBuffer[SaveReportDataHttpItemMessage]() override def receive: Receive = { case m: SaveReportDataHttpItemMessage => messages += m if (messages.length >= 10) { insert() } context.system.scheduler.scheduleOnce(2 seconds) { self ! Flush }(context.system.dispatcher) case Flush => insert() case Status.Failure(t) => log.warning(LogUtils.stackTraceToString(t)) } override def preStart(): Unit = { } override def postStop(): Unit = { insert() log.debug(s"${self.path} is stopped") } private def insert(): Unit = { if (messages.length > 0) { log.debug(s"${messages.length} items is saving...") JobReportDataItemService.index(messages, dayIndexSuffix) messages.clear() } } } object JobReportDataItemSaveActor { def props(dayIndexSuffix: String) = Props(new JobReportDataItemSaveActor(dayIndexSuffix)) case class SaveReportDataHttpItemMessage(id: String, dataItem: JobReportDataItem) }
Example 160
Source File: JobStatusActor.scala From asura with MIT License | 5 votes |
package asura.core.job.actor import akka.actor.Status.Failure import akka.actor.{ActorRef, Props} import asura.common.actor._ import asura.common.model.Pagination import asura.core.model.QueryJob import asura.core.es.service.JobService import asura.core.job.actor.JobStatusMonitorActor.JobStatusOperationMessage import asura.core.job.eventbus.JobStatusBus.JobStatusNotificationMessage import asura.core.job.{JobListItem, JobStates} import asura.core.redis.RedisJobState import asura.core.util.JacksonSupport import com.typesafe.scalalogging.Logger import scala.collection.mutable import scala.collection.mutable.ArrayBuffer class JobStatusActor() extends BaseActor { var query: QueryJob = null val watchIds = mutable.HashSet[String]() override def receive: Receive = { case SenderMessage(sender) => context.become(query(sender)) } def query(outSender: ActorRef): Receive = { case query: QueryJob => this.query = query JobService.queryJob(query).map(esResponse => if (esResponse.isSuccess) { val items = ArrayBuffer[JobListItem]() val jobsTable = mutable.HashMap[String, JobListItem]() val hits = esResponse.result.hits watchIds.clear() hits.hits.foreach(hit => { val jobId = hit.id watchIds.add(jobId) jobsTable += (jobId -> { val item = JacksonSupport.parse(hit.sourceAsString, classOf[JobListItem]) item.state = JobStates.UNKNOWN items += item item._id = jobId item }) }) if (watchIds.nonEmpty) { RedisJobState.getJobState(watchIds.toSet).onComplete { case util.Success(statesMap) => statesMap.forEach((jobKey, state) => jobsTable(jobKey).state = state) outSender ! ListActorEvent(Map("total" -> hits.total, "list" -> items)) case util.Failure(_) => outSender ! ListActorEvent(Map("total" -> hits.total, "list" -> items)) }(context.system.dispatcher) } else { outSender ! ListActorEvent(Map("total" -> 0, "list" -> Nil)) } } else { outSender ! ErrorActorEvent(esResponse.error.reason) })(context.system.dispatcher) case JobStatusNotificationMessage(_, operator, scheduler, group, name, data) => if (watchIds.contains(name)) { outSender ! ItemActorEvent(JobStatusOperationMessage(operator, scheduler, group, name, data)) } case eventMessage: ActorEvent => outSender ! eventMessage case Failure(t) => outSender ! ErrorActorEvent(t.getMessage) } override def postStop(): Unit = { import JobStatusActor.logger logger.debug(s"JobStatus for ${query} stopped") } } object JobStatusActor { val logger = Logger(classOf[JobStatusActor]) def props() = Props(new JobStatusActor()) case class JobQueryMessage(scheduler: String = null, group: String = null, text: String = null) extends Pagination }
Example 161
Source File: HeaderUtils.scala From asura with MIT License | 5 votes |
package asura.core.http import akka.http.scaladsl.model.HttpHeader.ParsingResult.{Error, Ok} import akka.http.scaladsl.model.headers.{Cookie, RawHeader} import akka.http.scaladsl.model.{ErrorInfo, HttpHeader} import asura.common.util.StringUtils import asura.core.es.model.{Environment, HttpCaseRequest} import asura.core.runtime.RuntimeContext import asura.core.{CoreConfig, ErrorMessages} import com.typesafe.scalalogging.Logger import scala.collection.immutable import scala.collection.mutable.ArrayBuffer object HeaderUtils { val logger = Logger("HeaderUtils") def toHeaders(cs: HttpCaseRequest, context: RuntimeContext): immutable.Seq[HttpHeader] = { val headers = ArrayBuffer[HttpHeader]() val request = cs.request val env = if (null != context.options) context.options.getUsedEnv() else null if (null != request) { val headerSeq = request.header if (null != headerSeq) { for (h <- headerSeq if (h.enabled && StringUtils.isNotEmpty(h.key))) { HttpHeader.parse(h.key, context.renderSingleMacroAsString(h.value)) match { case Ok(header: HttpHeader, errors: List[ErrorInfo]) => if (errors.nonEmpty) logger.warn(errors.mkString(",")) headers += header case Error(error: ErrorInfo) => logger.warn(error.detail) } } } val cookieSeq = request.cookie if (null != cookieSeq) { val cookies = ArrayBuffer[(String, String)]() for (c <- cookieSeq if (c.enabled && StringUtils.isNotEmpty(c.key))) { cookies += ((c.key, context.renderSingleMacroAsString(c.value))) } if (cookies.nonEmpty) headers += Cookie(cookies: _*) } } if (null != env && null != env.headers && env.headers.nonEmpty) { for (h <- env.headers if (h.enabled && StringUtils.isNotEmpty(h.key))) { HttpHeader.parse(h.key, context.renderSingleMacroAsString(h.value)) match { case Ok(header: HttpHeader, errors: List[ErrorInfo]) => if (errors.nonEmpty) logger.warn(errors.mkString(",")) headers += header case Error(error: ErrorInfo) => logger.warn(error.detail) } } } if (null != env && env.enableProxy) { val headerIdentifier = validateProxyVariables(env) val dst = StringBuilder.newBuilder dst.append("/").append(cs.group).append("/").append(cs.project).append("/").append(env.namespace) headers += RawHeader(headerIdentifier, dst.toString) } headers.toList } def validateProxyVariables(env: Environment): String = { if (!CoreConfig.linkerdConfig.enabled) { throw ErrorMessages.error_ProxyDisabled.toException } if (StringUtils.isEmpty(env.namespace)) { throw ErrorMessages.error_EmptyNamespace.toException } if (StringUtils.isEmpty(env.server)) { throw ErrorMessages.error_EmptyProxyServer.toException } val proxyServerOpt = CoreConfig.linkerdConfig.servers.find(_.tag.equals(env.server)) if (proxyServerOpt.isEmpty && StringUtils.isEmpty(proxyServerOpt.get.headerIdentifier)) { throw ErrorMessages.error_InvalidProxyConfig.toException } else { proxyServerOpt.get.headerIdentifier } } def isApplicationJson(header: HttpHeader): Boolean = { if (header.lowercaseName().equals("content-type")) { header.value().contains(HttpContentTypes.JSON) } else { false } } }
Example 162
package asura.core.assertion import asura.core.concurrent.ExecutionContextManager.cachedExecutor import asura.core.assertion.engine.{AssertResult, AssertionContext, FailAssertResult, Statistic} import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.concurrent.Future object Or extends Assertion { override val name: String = Assertions.OR override def assert(actual: Any, expect: Any): Future[AssertResult] = { apply(actual, expect) } def apply(actual: Any, except: Any): Future[AssertResult] = { val result = AssertResult( isSuccessful = false, msg = AssertResult.MSG_FAILED ) val subResults = ArrayBuffer[mutable.Map[String, Any]]() result.subResult = subResults except match { case assertions: Seq[_] => if (assertions.nonEmpty) { val assertionResults = assertions.map(assertion => { val subStatis = Statistic() val assertionMap = assertion.asInstanceOf[Map[String, Any]] val contextMap = actual.asInstanceOf[Object] AssertionContext.eval(assertionMap, contextMap, subStatis).map((subStatis, _)) }) Future.sequence(assertionResults).map(subStatisResults => { val subResults = ArrayBuffer[java.util.Map[String, Any]]() result.subResult = subResults subStatisResults.foreach(subStatisResult => { val (subStatis, subResult) = subStatisResult subResults += subResult result.pass(subStatis.passed) result.fail(subStatis.failed) if (subStatis.isSuccessful) { result.isSuccessful = true result.msg = AssertResult.MSG_PASSED } }) result }) } else { Future.successful(null) } case _ => Future.successful(FailAssertResult(1, AssertResult.msgIncomparableTargetType(except))) } } }
Example 163
package asura.core.assertion import asura.core.concurrent.ExecutionContextManager.cachedExecutor import asura.core.assertion.engine.{AssertResult, AssertionContext, FailAssertResult, Statistic} import scala.collection.mutable.ArrayBuffer import scala.concurrent.Future object And extends Assertion { override val name: String = Assertions.AND override def assert(actual: Any, expect: Any): Future[AssertResult] = { apply(actual, expect) } def apply(actual: Any, expect: Any): Future[AssertResult] = { val result = AssertResult( isSuccessful = true, msg = AssertResult.MSG_PASSED ) expect match { case assertions: Seq[_] => if (assertions.nonEmpty) { val assertionResults = assertions.map(assertion => { val subStatis = Statistic() val assertionMap = assertion.asInstanceOf[Map[String, Any]] val contextMap = actual.asInstanceOf[Object] AssertionContext.eval(assertionMap, contextMap, subStatis).map((subStatis, _)) }) Future.sequence(assertionResults).map(subStatisResults => { val subResults = ArrayBuffer[java.util.Map[String, Any]]() result.subResult = subResults subStatisResults.foreach(subStatisResult => { val (subStatis, subResult) = subStatisResult subResults += subResult result.pass(subStatis.passed) result.fail(subStatis.failed) if (!subStatis.isSuccessful) { result.isSuccessful = false result.msg = AssertResult.MSG_FAILED } }) result }) } else { Future.successful(null) } case _ => Future.successful(FailAssertResult(1, AssertResult.msgIncomparableTargetType(expect))) } } }
Example 164
Source File: TriggerEventsSaveActor.scala From asura with MIT License | 5 votes |
package asura.core.es.actor import akka.actor.{Props, Status} import asura.common.actor.BaseActor import asura.common.util.LogUtils import asura.core.actor.messages.Flush import asura.core.es.model.TriggerEventLog import asura.core.es.service.TriggerEventLogService import scala.collection.mutable.ArrayBuffer import scala.concurrent.duration._ class TriggerEventsSaveActor extends BaseActor { val logs = ArrayBuffer[TriggerEventLog]() override def receive: Receive = { case m: TriggerEventLog => logs += m if (logs.length >= 20) { insert() } context.system.scheduler.scheduleOnce(2 seconds) { self ! Flush }(context.system.dispatcher) case Flush => insert() case Status.Failure(t) => log.warning(LogUtils.stackTraceToString(t)) } override def preStart(): Unit = { } override def postStop(): Unit = { insert() log.debug(s"${self.path} is stopped") } private def insert(): Unit = { if (logs.length > 0) { log.debug(s"${logs.length} trigger events is saving...") TriggerEventLogService.index(logs) logs.clear() } } } object TriggerEventsSaveActor { def props() = Props(new TriggerEventsSaveActor()) }
Example 165
Source File: ActivitySaveActor.scala From asura with MIT License | 5 votes |
package asura.core.es.actor import akka.actor.{Props, Status} import asura.common.actor.BaseActor import asura.common.util.LogUtils import asura.core.actor.messages.Flush import asura.core.es.model.Activity import asura.core.es.service.ActivityService import scala.collection.mutable.ArrayBuffer import scala.concurrent.duration._ class ActivitySaveActor extends BaseActor { val activities = ArrayBuffer[Activity]() override def receive: Receive = { case m: Activity => activities += m if (activities.length >= 20) { insert() } context.system.scheduler.scheduleOnce(2 seconds) { self ! Flush }(context.system.dispatcher) case Flush => insert() case Status.Failure(t) => log.warning(LogUtils.stackTraceToString(t)) } override def preStart(): Unit = { } override def postStop(): Unit = { insert() log.debug(s"${self.path} is stopped") } private def insert(): Unit = { if (activities.length > 0) { log.debug(s"${activities.length} activities is saving...") ActivityService.index(activities) activities.clear() } } } object ActivitySaveActor { def props() = Props(new ActivitySaveActor()) }
Example 166
Source File: HttpResponse.scala From asura with MIT License | 5 votes |
package asura.core.es.model import asura.core.http.HttpContentTypes import io.swagger.models.properties.RefProperty import io.swagger.models.{ModelImpl, Response, Swagger} import scala.collection.mutable import scala.collection.mutable.ArrayBuffer case class HttpResponse( description: String, headers: Seq[ParameterSchema], contentType: String, schema: JsonSchema ) { } object HttpResponse { def toResponses(openApi: Swagger, responses: mutable.Map[String, Response]): Map[String, HttpResponse] = { val definitions = openApi.getDefinitions val responseMap = mutable.Map[String, HttpResponse]() for ((code, res) <- responses) { val schema: JsonSchema = res.getSchema match { case p: RefProperty => definitions.get(p.getSimpleRef) match { case model: ModelImpl => JsonSchema.toJsonSchema(model) case _ => null } case _ => null } val headers = ArrayBuffer[ParameterSchema]() if (null != res.getHeaders) { res.getHeaders.forEach((name, property) => { headers += (ParameterSchema( name = name, description = property.getDescription, `type` = SchemaObject.translateOpenApiType(property.getType, property.getFormat) )) }) } responseMap += (code -> HttpResponse( description = res.getDescription, headers = headers.toList, contentType = HttpContentTypes.JSON, schema = schema )) } responseMap.toMap } }
Example 167
Source File: RecommendService.scala From asura with MIT License | 5 votes |
package asura.core.es.service import asura.common.util.StringUtils import asura.core.concurrent.ExecutionContextManager.sysGlobal import asura.core.es.model.{FieldKeys, Project} import asura.core.model.RecommendProject import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.concurrent.Future object RecommendService { def getRecommendProjects(user: String, wd: String, discover: Boolean): Future[RecommendProjects] = { val futureTuple = for { my <- getRecommendProject(user, true, wd, 20, Nil) other <- if (discover) { getRecommendProject(user, false, null, 5, my.map(p => ((p.group, p.project)))) } else { Future.successful(Nil) } } yield (my, other) futureTuple.map(tuple => RecommendProjects(tuple._1, tuple._2)) } def getRecommendProject(user: String, me: Boolean, wd: String, size: Int, excludeGPs: Seq[(String, String)]): Future[Seq[RecommendProject]] = { val items = ArrayBuffer[RecommendProject]() ActivityService.recentProjects(user, me, wd, size).flatMap(aggItems => { if (aggItems.nonEmpty) { val map = mutable.Map[String, RecommendProject]() aggItems.foreach(item => { if (StringUtils.isNotEmpty(item.id)) { val gp = item.id.split("/") if (gp.length == 2) { val project = RecommendProject(gp(0), gp(1), item.count) items += project map += (Project.generateDocId(gp(0), gp(1)) -> project) } } }) ProjectService.getByIds(map.keys.toSeq, Seq(FieldKeys.FIELD_SUMMARY)).map(idMap => { idMap.foreach(tuple => { val id = tuple._1 val project = tuple._2 map(id).summary = project.summary }) items }) } else { Future.successful(items) } }) } case class RecommendProjects( my: Seq[RecommendProject], others: Seq[RecommendProject] ) }
Example 168
Source File: HomeService.scala From asura with MIT License | 5 votes |
package asura.core.es.service import asura.common.util.StringUtils import asura.core.concurrent.ExecutionContextManager.sysGlobal import asura.core.es.EsClient import asura.core.es.model._ import asura.core.model.QueryHome import com.sksamuel.elastic4s.ElasticDsl._ import com.sksamuel.elastic4s.requests.searches.queries.Query import scala.collection.mutable.ArrayBuffer object HomeService extends CommonService { val includeFields = Seq( FieldKeys.FIELD_GROUP, FieldKeys.FIELD_PROJECT, FieldKeys.FIELD_ID, FieldKeys.FIELD_AVATAR, FieldKeys.FIELD_SUMMARY, FieldKeys.FIELD_DESCRIPTION, FieldKeys.FIELD_OBJECT_REQUEST_URLPATH ) def queryDoc(query: QueryHome) = { EsClient.esClient.execute { val esQueries = ArrayBuffer[Query]() if (StringUtils.isNotEmpty(query.text)) esQueries += matchQuery(FieldKeys.FIELD__TEXT, query.text) search(Group.Index, Project.Index, HttpCaseRequest.Index, DubboRequest.Index, SqlRequest.Index, Environment.Index, Scenario.Index, Job.Index) .query(boolQuery().must(esQueries)) .sourceInclude(includeFields) .size(3) } } }
Example 169
Source File: TriggerEventLogService.scala From asura with MIT License | 5 votes |
package asura.core.es.service import asura.common.model.ApiMsg import asura.common.util.{FutureUtils, StringUtils} import asura.core.concurrent.ExecutionContextManager.sysGlobal import asura.core.es.EsClient import asura.core.es.model._ import asura.core.model.QueryCiEvents import asura.core.util.JacksonSupport.jacksonJsonIndexable import com.sksamuel.elastic4s.ElasticDsl._ import com.sksamuel.elastic4s.requests.searches.queries.Query import scala.collection.mutable.ArrayBuffer import scala.concurrent.Future object TriggerEventLogService extends CommonService with BaseAggregationService { def index(items: Seq[TriggerEventLog]): Future[BulkDocResponse] = { if (null == items && items.isEmpty) { FutureUtils.illegalArgs(ApiMsg.INVALID_REQUEST_BODY) } else { EsClient.esClient.execute { bulk( items.map(item => indexInto(TriggerEventLog.Index).doc(item)) ) }.map(toBulkDocResponse(_)) } } def queryEvents(query: QueryCiEvents) = { val esQueries = ArrayBuffer[Query]() if (StringUtils.isNotEmpty(query.group)) esQueries += termQuery(FieldKeys.FIELD_GROUP, query.group) if (StringUtils.isNotEmpty(query.project)) esQueries += termQuery(FieldKeys.FIELD_PROJECT, query.project) if (StringUtils.isNotEmpty(query.env)) esQueries += termQuery(FieldKeys.FIELD_ENV, query.env) if (StringUtils.isNotEmpty(query.`type`)) esQueries += termQuery(FieldKeys.FIELD_TYPE, query.`type`) if (StringUtils.isNotEmpty(query.service)) esQueries += termQuery(FieldKeys.FIELD_SERVICE, query.service) EsClient.esClient.execute { search(TriggerEventLog.Index).query(boolQuery().must(esQueries)) .from(query.pageFrom) .size(query.pageSize) .sortByFieldDesc(FieldKeys.FIELD_CREATED_AT) } } }
Example 170
Source File: IndexService.scala From asura with MIT License | 5 votes |
package asura.core.es.service import asura.common.util.StringUtils import asura.core.concurrent.ExecutionContextManager.sysGlobal import asura.core.es.EsClient import asura.core.es.model.{FieldKeys, IndexSetting, JobReportDataItem, RestApiOnlineLog} import com.sksamuel.elastic4s.ElasticDsl._ import com.sksamuel.elastic4s.Indexes import com.sksamuel.elastic4s.requests.delete.DeleteByQueryRequest import com.sksamuel.elastic4s.requests.searches.queries.Query import com.typesafe.scalalogging.Logger import scala.collection.mutable.ArrayBuffer import scala.concurrent.Future object IndexService extends CommonService { val logger = Logger("IndexService") def initCheck(idx: IndexSetting): Boolean = { val cli = EsClient.esClient val res = cli.execute(indexExists(idx.Index)).await if (res.isSuccess) { if (res.result.exists) { true } else { val res2 = cli.execute { createIndex(idx.Index) .shards(idx.shards) .replicas(idx.replicas) .mapping(idx.mappings) }.await if (res2.isSuccess) { true } else { logger.error(res2.error.reason) false } } } else { logger.error(res.error.reason) false } } def checkTemplate(): Boolean = { checkIndexTemplate(JobReportDataItem).await && checkIndexTemplate(RestApiOnlineLog).await } def checkIndexTemplate(idxSetting: IndexSetting): Future[Boolean] = { logger.info(s"check es template ${idxSetting.Index}") val cli = EsClient.esClient cli.execute { getIndexTemplate(idxSetting.Index) }.map { res => if (res.status != 404) true else false }.recover { case _ => false }.flatMap(hasTpl => { if (!hasTpl) { cli.execute { createIndexTemplate(idxSetting.Index, s"${idxSetting.Index}-*") .settings(Map( "number_of_replicas" -> idxSetting.replicas, "number_of_shards" -> idxSetting.shards )) .mappings(idxSetting.mappings) }.map(tplIndex => { if (tplIndex.result.acknowledged) true else false }) } else { Future.successful(true) } }) } def delIndex(indices: Seq[String]) = { EsClient.esClient.execute { deleteIndex(indices) }.map(toDeleteIndexResponse(_)) } def deleteByGroupOrProject(indices: Seq[String], group: String, project: String) = { val esQueries = ArrayBuffer[Query]() if (StringUtils.isNotEmpty(group)) esQueries += termQuery(FieldKeys.FIELD_GROUP, group) if (StringUtils.isNotEmpty(project)) esQueries += termQuery(FieldKeys.FIELD_PROJECT, project) EsClient.esClient.execute { DeleteByQueryRequest( Indexes(indices), boolQuery().must(esQueries) ).refreshImmediately }.map(toDeleteByQueryResponse(_)) } }
Example 171
Source File: ScalapropsRunner.scala From scalaprops with MIT License | 5 votes |
package scalaprops import sbt.testing._ import scala.collection.mutable.ArrayBuffer object ScalapropsRunner { def testFieldNames(clazz: Class[_]): Array[String] = Scalaprops.testFieldNames(clazz) private[scalaprops] def getTestObject( fingerprint: Fingerprint, testClassName: String, testClassLoader: ClassLoader ): Scalaprops = { ??? } private[scalaprops] def findTests( fingerprint: Fingerprint, testClassName: String, testClassLoader: ClassLoader, only: List[String], logger: Logger ): Properties[_] = { ??? } } final class ScalapropsRunner( override val args: Array[String], override val remoteArgs: Array[String], testClassLoader: ClassLoader ) extends Runner { private[this] val results = ArrayBuffer.empty[TestResult] private[this] val arguments = Arguments.parse(args.toList) private[this] val taskdef2task: TaskDef => sbt.testing.Task = { taskdef => new ScalapropsTaskImpl(taskdef, testClassLoader, args, arguments, results, TestStatus()) } override def tasks(taskDefs: Array[TaskDef]) = taskDefs.map(taskdef2task) override def done() = { val result = TestResult.formatResults(results, arguments.showDuration) println(result) result } }
Example 172
Source File: SolrTableFactory.scala From solr-sql with BSD 3-Clause "New" or "Revised" License | 5 votes |
package org.apache.calcite.adapter.solr import scala.annotation.migration import scala.collection.JavaConversions import scala.collection.mutable.ArrayBuffer import org.apache.calcite.rel.`type`.RelDataType import org.apache.calcite.schema.SchemaPlus import org.apache.calcite.schema.TableFactory import org.apache.calcite.sql.`type`.SqlTypeName import org.apache.log4j.Logger import org.apache.solr.client.solrj.SolrClient import org.apache.solr.client.solrj.impl.CloudSolrClient import org.apache.solr.client.solrj.impl.HttpSolrClient trait SolrClientFactory { def getClient(): SolrClient; } class SolrTableFactory extends TableFactory[SolrTable] { val logger = Logger.getLogger(this.getClass); override def create(parentSchema: SchemaPlus, name: String, operands: java.util.Map[String, Object], rowTypw: RelDataType): SolrTable = { val args = JavaConversions.mapAsScalaMap(operands).toMap.map(x ⇒ (x._1, x._2.toString())); //columns="title string, url string, content_length int" SolrTableConf.argumentsRequired(args, SolrTableConf.COULMNS); val columns: Map[String, SqlTypeName] = SolrTableConf.parseColumns(args, SolrTableConf.COULMNS); logger.debug(s"defined columns: $columns"); //columnMapping="title->solr_field_title, url->solr_field_url" val definedColumnMapping = SolrTableConf.parseMap(args, SolrTableConf.COLUMN_MAPPING); logger.debug(s"defined column mapping: $definedColumnMapping"); val filledColumnMapping = columns.map(x ⇒ (x._1, definedColumnMapping.getOrElse(x._1, x._1))); //options="pageSize:20,solrZkHosts=10.0.71.14:2181,10.0.71.17:2181,10.0.71.38:2181" val options = args; //a singleton of solr client val solrClientFactory = new SolrClientFactory { val clients = ArrayBuffer[SolrClient](); override def getClient = { if (clients.isEmpty) { if (options.keySet.contains(SolrTableConf.SOLR_ZK_HOSTS)) { val solrZkHosts = options(SolrTableConf.SOLR_ZK_HOSTS); logger.debug(s"connecting to solr cloud via zookeeper servers: $solrZkHosts"); val csc = new CloudSolrClient(solrZkHosts); csc.setDefaultCollection(options("solrCollection")); clients += csc; } else { SolrTableConf.argumentsRequired(args, SolrTableConf.SOLR_ZK_HOSTS, SolrTableConf.SOLR_SERVER_URL); val solrServerURL = options(SolrTableConf.SOLR_SERVER_URL); logger.debug(s"connecting to solr server: $solrServerURL"); clients += new HttpSolrClient(solrServerURL); } } clients(0); } } new SolrTable(solrClientFactory, columns, filledColumnMapping, options); } }
Example 173
Source File: TSQR.scala From SparkAndMPIFactorizations with MIT License | 5 votes |
package edu.berkeley.cs.amplab.mlmatrix import java.util.concurrent.ThreadLocalRandom import scala.collection.mutable.ArrayBuffer import breeze.linalg._ import edu.berkeley.cs.amplab.mlmatrix.util.QRUtils import edu.berkeley.cs.amplab.mlmatrix.util.Utils import org.apache.spark.rdd.RDD import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.Accumulator import org.apache.spark.SparkContext._ import java.util.Calendar import java.text.SimpleDateFormat class modifiedTSQR extends Serializable { def report(message: String, verbose: Boolean = true) = { val now = Calendar.getInstance().getTime() val formatter = new SimpleDateFormat("H:m:s") if (verbose) { println("STATUS REPORT (" + formatter.format(now) + "): " + message) } } private def reduceQR( acc: Accumulator[Double], a: Tuple2[DenseVector[Double], DenseMatrix[Double]], b: Tuple2[DenseVector[Double], DenseMatrix[Double]]): Tuple2[DenseVector[Double], DenseMatrix[Double]] = { val begin = System.nanoTime val outmat = QRUtils.qrR(DenseMatrix.vertcat(a._2, b._2), false) val outcolnorms = a._1 + b._1 acc += ((System.nanoTime - begin) / 1e6) (outcolnorms, outmat) } }
Example 174
Source File: ParallelizedWithLocalityRDD.scala From cloud-integration with Apache License 2.0 | 5 votes |
package org.apache.spark.cloudera import scala.collection.immutable.NumericRange import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark._ import org.apache.spark.rdd.{ParallelCollectionPartition, RDD} def slice[T: ClassTag](seq: Seq[T], numSlices: Int): Seq[Seq[T]] = { if (numSlices < 1) { throw new IllegalArgumentException( "Positive number of partitions required") } // Sequences need to be sliced at the same set of index positions for operations // like RDD.zip() to behave as expected def positions(length: Long, numSlices: Int): Iterator[(Int, Int)] = { (0 until numSlices).iterator.map { i => val start = ((i * length) / numSlices).toInt val end = (((i + 1) * length) / numSlices).toInt (start, end) } } seq match { case r: Range => positions(r.length, numSlices).zipWithIndex .map { case ((start, end), index) => // If the range is inclusive, use inclusive range for the last slice if (r.isInclusive && index == numSlices - 1) { new Range.Inclusive(r.start + start * r.step, r.end, r.step) } else { new Range(r.start + start * r.step, r.start + end * r.step, r.step) } }.toSeq.asInstanceOf[Seq[Seq[T]]] case nr: NumericRange[T] => // For ranges of Long, Double, BigInteger, etc val slices = new ArrayBuffer[Seq[T]](numSlices) var r = nr for ((start, end) <- positions(nr.length, numSlices)) { val sliceSize = end - start slices += r.take(sliceSize).asInstanceOf[Seq[T]] r = r.drop(sliceSize) } slices case _ => val array = seq.toArray // To prevent O(n^2) operations for List etc positions(array.length, numSlices).map { case (start, end) => array.slice(start, end).toSeq }.toSeq } } }
Example 175
Source File: DelTransfer.scala From bdg-sequila with Apache License 2.0 | 5 votes |
package org.biodatageeks.sequila.pileup.converters import scala.collection.mutable.ArrayBuffer case class DelTransfer (contig: String,start: Int,len: Int) { val endDel: Int = start + len def isOverlappingLocus(queryContig:String, queryStart:Int ): Boolean ={ if(queryContig != contig || queryStart <= start) return false if (queryStart <= endDel) return true false } } class DelContext extends Serializable { private val minDelLen: Int = 0 val dels: ArrayBuffer[DelTransfer] = new ArrayBuffer[DelTransfer]() def add(delTransfer: DelTransfer):Unit = { if (delTransfer.len <= minDelLen) return dels.append(delTransfer) } def getDelTransferForLocus(contig:String, position: Int): Int = { var counter = 0 for (del <- dels) { if (del.isOverlappingLocus(contig, position)) counter += 1 } counter } }
Example 176
Source File: MDTagParser.scala From bdg-sequila with Apache License 2.0 | 5 votes |
package org.biodatageeks.sequila.pileup import java.io.File import htsjdk.samtools.reference.IndexedFastaSequenceFile import htsjdk.samtools.{Cigar, CigarOperator, SAMRecord} import org.apache.log4j.Logger import org.apache.spark.sql.SparkSession import org.biodatageeks.sequila.datasources.BAM.BDGAlignFileReaderWriter import org.seqdoop.hadoop_bam.BAMBDGInputFormat import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer case class MDOperator(length: Int, base: Char) { //S means to skip n positions, not fix needed def isDeletion:Boolean = base.isLower def isNonDeletion:Boolean = base.isUpper } object MDTagParser{ val logger: Logger = Logger.getLogger(this.getClass.getCanonicalName) val pattern = "([0-9]+)\\^?([A-Za-z]+)?".r def parseMDTag(t : String) = { if (isAllDigits(t)) { Array[MDOperator](MDOperator(t.toInt, 'S')) } else { val ab = new ArrayBuffer[MDOperator]() val matches = pattern .findAllIn(t) while (matches.hasNext) { val m = matches.next() if(m.last.isLetter && !m.contains('^') ){ val skipPos = m.dropRight(1).toInt ab.append(MDOperator(skipPos, 'S') ) ab.append(MDOperator(0, m.last.toUpper)) } else if (m.last.isLetter && m.contains('^') ){ //encoding deletions as lowercase val arr = m.split('^') val skipPos = arr.head.toInt ab.append(MDOperator(skipPos, 'S') ) arr(1).foreach { b => ab.append(MDOperator(0, b.toLower)) } } else ab.append(MDOperator(m.toInt, 'S') ) } ab.toArray } } private def isAllDigits(s: String) : Boolean = { val len = s.length var i = 0 while(i < len){ if(! s(i).isDigit ) return false i += 1 } true } }
Example 177
Source File: NCListBuilder.scala From bdg-sequila with Apache License 2.0 | 5 votes |
package org.biodatageeks.sequila.rangejoins.NCList import scala.collection.mutable import scala.collection.mutable.ArrayBuffer object NCListBuilder { def build[T](array: Array[(Interval[Int], T)]): NCList = { val topNCList = NCList(ArrayBuffer.empty[NCList], 0, ArrayBuffer.empty[Int]) var landingNCList = NCList(ArrayBuffer.empty[NCList], 0, ArrayBuffer.empty[Int]) val arrayWithIndices = array.zipWithIndex.map{case (k,v) => (v,k)} val sortedIndices = arrayWithIndices.sortWith((x, y) => x._2._1.end > y._2._1.end) .sortWith((x, y) => x._2._1.start < y._2._1.start) .map(x => x._1) val stack = mutable.ArrayStack[NCListBuildingStack]() sortedIndices.foreach ( rgid => { val currentEnd = arrayWithIndices(rgid)._2._1.end while(!stack.isEmpty && arrayWithIndices(stack.top.rgid)._2._1.end < currentEnd) stack.pop landingNCList = if (stack.isEmpty) topNCList else stack.top.ncList val stackElt = appendNCListElt(landingNCList, rgid) stack.push(stackElt) }) topNCList } def appendNCListElt(landingNCList: NCList, rgid: Int): NCListBuildingStack = { landingNCList.childrenBuf.append(NCList(ArrayBuffer.empty[NCList], 0, ArrayBuffer.empty[Int])) val childrenNCList = landingNCList.childrenBuf.last val stackElt = NCListBuildingStack(childrenNCList,rgid) landingNCList.rgidBuf.append(rgid) landingNCList.nChildren = landingNCList.nChildren+1 stackElt } }
Example 178
Source File: NCListTree.scala From bdg-sequila with Apache License 2.0 | 5 votes |
package org.biodatageeks.sequila.rangejoins.NCList import scala.collection.mutable.{ArrayBuffer, ArrayStack} import scala.util.control.Breaks._ class NCListTree[T](allRegions: Array[(Interval[Int], T)]) extends Serializable { val ncList = NCListBuilder.build(allRegions) def getAllOverlappings(processedInterval: Interval[Int]) = allOverlappingRegions(processedInterval, ncList, allRegions) private def allOverlappingRegions(processedInterval: Interval[Int], topNcList: NCList, intervalArray: Array[(Interval[Int],T)]): List[(Interval[Int], T)] = { val backpack = Backpack(intervalArray,processedInterval) var resultList = List[(Interval[Int], T)]() val walkingStack = ArrayStack[NCListWalkingStack]() var n = findLandingChild(topNcList, backpack) if (n < 0) return Nil var ncList = moveToChild(topNcList, n, walkingStack) while (ncList != null) { val stackElt = peekNCListWalkingStackElt(walkingStack) val rgid = stackElt.parentNcList.rgidBuf(stackElt.n) breakable { val candidateInterval = intervalArray(rgid) if (candidateInterval._1.start > backpack.processedInterval.end) { var n = (n1 + n2) / 2 while (n != n1) { b = base(subset(n))._1.end if (b == min) return n if (b < min) n1 = n else n2 = n n = (n1 + n2) / 2 } return n2 } private def moveToChild(parentNcList: NCList, n: Int, walkingStack: ArrayStack[NCListWalkingStack]): NCList = { walkingStack.push(NCListWalkingStack(parentNcList, n)) parentNcList.childrenBuf(n) } private def peekNCListWalkingStackElt(walkingStack: ArrayStack[NCListWalkingStack]): NCListWalkingStack = { walkingStack.top } private def moveToRightUncle(walkingStack: ArrayStack[NCListWalkingStack]): NCList = { val parentNcList = walkingStack.pop().parentNcList if (walkingStack.isEmpty) return null moveToRightSiblingOrUncle(parentNcList, walkingStack) } private def moveToRightSiblingOrUncle(ncList: NCList, walkingStack: ArrayStack[NCListWalkingStack]): NCList = { var ncListLocal = ncList do { val stackElt = walkingStack.pop() if ((stackElt.n+1) < stackElt.parentNcList.nChildren) { walkingStack.push(NCListWalkingStack(stackElt.parentNcList,stackElt.n+1)) ncListLocal = stackElt.parentNcList.childrenBuf(stackElt.n+1) return ncListLocal } else { walkingStack.push(NCListWalkingStack(stackElt.parentNcList,stackElt.n+1)) ncListLocal = stackElt.parentNcList walkingStack.pop() } } while (walkingStack.nonEmpty) null } }
Example 179
Source File: CoverageUpdate.scala From bdg-sequila with Apache License 2.0 | 5 votes |
package org.biodatageeks.sequila.coverage import org.apache.spark.util.AccumulatorV2 import scala.collection.mutable.ArrayBuffer case class RightCovEdge(contig: String, minPos: Int, startPoint: Int, cov: Array[Short], cumSum: Short) case class ContigRange(contig: String, minPos: Int, maxPos: Int) class CovUpdate(var right: ArrayBuffer[RightCovEdge], var left: ArrayBuffer[ContigRange]) extends Serializable { def reset(): Unit = { right = new ArrayBuffer[RightCovEdge]() left = new ArrayBuffer[ContigRange]() } def add(p: CovUpdate): CovUpdate = { right = right ++ p.right left = left ++ p.left this } } class CoverageAccumulatorV2(var covAcc: CovUpdate) extends AccumulatorV2[CovUpdate, CovUpdate] { def reset(): Unit = { covAcc = new CovUpdate(new ArrayBuffer[RightCovEdge](), new ArrayBuffer[ContigRange]()) } def add(v: CovUpdate): Unit = { covAcc.add(v) } def value(): CovUpdate = { covAcc } def isZero(): Boolean = { covAcc.right.isEmpty && covAcc.left.isEmpty } def copy(): CoverageAccumulatorV2 = { new CoverageAccumulatorV2(covAcc) } def merge(other: AccumulatorV2[CovUpdate, CovUpdate]): Unit = { covAcc.add(other.value) } }
Example 180
Source File: BufferBenchmark.scala From sigmastate-interpreter with MIT License | 5 votes |
package special.collections import debox.Buffer import spire.syntax.all.cfor import org.scalameter.api.Bench import scala.collection.mutable import scala.collection.mutable.{ArrayBuffer, ListBuffer} trait BufferBenchmarkCases extends BenchmarkGens { suite: Bench[Double] => val obj = new Object() performance of "append[Int]" in { measure method "of debox.Buffer" in { using(arrays) in { case (arr, n) => val buf = Buffer.ofSize[Int](16) val limit = arr.length cfor(0)(_ < limit, _ + 1) { i => buf.append(arr(i)) } val res = buf.toArray() } } measure method "of ArrayBuilder" in { using(arrays) in { case (arr, n) => val buf = mutable.ArrayBuilder.make[Int]() val limit = arr.length cfor(0)(_ < limit, _ + 1) { i => buf += (arr(i)) } val res = buf.result() } } measure method "of ArrayBuffer" in { using(arrays) in { case (arr, n) => val buf = ArrayBuffer.empty[Int] val limit = arr.length cfor(0)(_ < limit, _ + 1) { i => buf.append(arr(i)) } val res = buf.toArray } } measure method "of ListBuffer" in { using(arrays) in { case (arr, n) => val buf = ListBuffer.empty[Int] val limit = arr.length cfor(0)(_ < limit, _ + 1) { i => buf.append(arr(i)) } val res = buf.toList } } } performance of "append[Object]" in { measure method "of debox.Buffer" in { using(arrays) in { case (arr, n) => val buf = Buffer.ofSize[Object](100) val limit = arr.length cfor(0)(_ < limit, _ + 1) { i => buf.append(obj) } } } measure method "of ArrayBuilder" in { using(arrays) in { case (arr, n) => val buf = mutable.ArrayBuilder.make[Object]() val limit = arr.length cfor(0)(_ < limit, _ + 1) { i => buf += (obj) } val res = buf.result() } } measure method "of ArrayBuffer" in { using(arrays) in { case (arr, n) => val buf = ArrayBuffer.empty[Object] val limit = arr.length cfor(0)(_ < limit, _ + 1) { i => buf.append(obj) } } } measure method "of ListBuffer" in { using(arrays) in { case (arr, n) => val buf = ListBuffer.empty[Object] val limit = arr.length cfor(0)(_ < limit, _ + 1) { i => buf.append(obj) } val res = buf.toList } } } } object FastBufferBenchmark extends Bench.LocalTime with BufferBenchmarkCases { }
Example 181
Source File: HBaseLocalClient.scala From gimel with Apache License 2.0 | 5 votes |
package com.paypal.gimel.hbase.utilities import java.io.File import scala.collection.mutable.ArrayBuffer import com.google.common.io.Files import org.apache.hadoop.hbase.{HBaseTestingUtility, TableName} import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.SparkConf import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, SparkSession} import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.sql.execution.datasources.hbase.SparkHBaseConf import org.apache.spark.sql.util._ import org.scalatest.{BeforeAndAfterAll, FunSuite, Matchers} import com.paypal.gimel.common.catalog.Field import com.paypal.gimel.hbase.DataSet class HBaseLocalClient extends FunSuite with Matchers with BeforeAndAfterAll { var sparkSession : SparkSession = _ var dataSet: DataSet = _ val hbaseTestingUtility = new HBaseTestingUtility() val tableName = "test_table" val cfs = Array("personal", "professional") val columns = Array("id", "name", "age", "address", "company", "designation", "salary") val fields = columns.map(col => new Field(col)) val metrics = ArrayBuffer.empty[(String, QueryExecution, Long)] protected override def beforeAll(): Unit = { val tempDir: File = Files.createTempDir tempDir.deleteOnExit hbaseTestingUtility.startMiniCluster() SparkHBaseConf.conf = hbaseTestingUtility.getConfiguration createTable(tableName, cfs) val conf = new SparkConf conf.set(SparkHBaseConf.testConf, "true") sparkSession = SparkSession.builder() .master("local") .appName("HBase Test") .config(conf) .getOrCreate() val listener = new QueryExecutionListener { // Only test successful case here, so no need to implement `onFailure` override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {} override def onSuccess(funcName: String, qe: QueryExecution, duration: Long): Unit = { metrics += ((funcName, qe, duration)) } } sparkSession.listenerManager.register(listener) sparkSession.sparkContext.setLogLevel("ERROR") dataSet = new DataSet(sparkSession) } protected override def afterAll(): Unit = { hbaseTestingUtility.shutdownMiniCluster() sparkSession.close() } def createTable(name: String, cfs: Array[String]) { val tName = Bytes.toBytes(name) val bcfs = cfs.map(Bytes.toBytes(_)) try { hbaseTestingUtility.deleteTable(TableName.valueOf(tName)) } catch { case _ : Throwable => println("No table = " + name + " found") } hbaseTestingUtility.createMultiRegionTable(TableName.valueOf(tName), bcfs) } // Mocks data for testing def mockDataInDataFrame(numberOfRows: Int): DataFrame = { def stringed(n: Int) = s"""{"id": "$n","name": "MAC-$n", "address": "MAC-${n + 1}", "age": "${n + 1}", "company": "MAC-$n", "designation": "MAC-$n", "salary": "${n * 10000}" }""" val texts: Seq[String] = (1 to numberOfRows).map { x => stringed(x) } val rdd: RDD[String] = sparkSession.sparkContext.parallelize(texts) val dataFrame: DataFrame = sparkSession.read.json(rdd) dataFrame } }
Example 182
Source File: FriendEntity.scala From lagom-scala-chirper with Apache License 2.0 | 5 votes |
package sample.chirper.friend.impl import akka.Done import com.lightbend.lagom.scaladsl.persistence.PersistentEntity import com.lightbend.lagom.scaladsl.playjson.{JsonSerializer, JsonSerializerRegistry} import sample.chirper.friend.api.User import scala.collection.mutable.ArrayBuffer class FriendEntity extends PersistentEntity { override type Command = FriendCommand[_] override type Event = FriendEvent override type State = FriendState override def initialState = FriendState(None) override def behavior = { case FriendState(None) => notInitialized case FriendState(Some(user)) => initialized } val onGetUser = Actions().onReadOnlyCommand[GetUser, GetUserReply] { case (GetUser(), ctx, state) => ctx.reply(GetUserReply(state.user)) } val onFriendAdded = Actions().onEvent { case (FriendAdded(userId, friendId, timestamp), state) => state.addFriend(friendId) } val notInitialized = { Actions(). onCommand[CreateUser, Done] { case (CreateUser(user), ctx, state) => val events = ArrayBuffer.empty[FriendEvent] events += UserCreated(user.userId, user.name) events ++= user.friends.map(friendId => FriendAdded(user.userId, friendId)) ctx.thenPersistAll(events: _*) { () => ctx.reply(Done) } }. onCommand[AddFriend, Done] { case (AddFriend(friendUserId), ctx, state) => ctx.invalidCommand(s"User $entityId is not created") ctx.done }. onEvent { case (UserCreated(userId, name, timestamp), state) => FriendState(User(userId, name)) } }.orElse(onGetUser).orElse(onFriendAdded) val initialized = { Actions(). onCommand[CreateUser, Done] { case (CreateUser(user), ctx, state) => ctx.invalidCommand(s"User ${user.name} is already created") ctx.done }. onCommand[AddFriend, Done] { case (AddFriend(friendUserId), ctx, state) if state.user.get.friends.contains(friendUserId) => ctx.reply(Done) ctx.done case (AddFriend(friendUserId), ctx, state) => val event = FriendAdded(state.user.get.userId, friendUserId) ctx.thenPersist(event) { _ => ctx.reply(Done) } } }.orElse(onGetUser).orElse(onFriendAdded) } object FriendSerializerRegistry extends JsonSerializerRegistry { override def serializers = List( JsonSerializer[GetUser], JsonSerializer[GetUserReply], JsonSerializer[FriendState], JsonSerializer[CreateUser], JsonSerializer[UserCreated], JsonSerializer[AddFriend], JsonSerializer[FriendAdded] ) }
Example 183
Source File: TestContext.scala From swave with Mozilla Public License 2.0 | 5 votes |
package swave.core.internal.testkit import scala.annotation.tailrec import scala.collection.mutable.ArrayBuffer import org.scalacheck.rng.Seed import swave.core.macros._ import swave.core.impl.util.ResizableRingBuffer import swave.core.util._ private[testkit] final class TestContext(val runNr: Int, val asyncRate: Double, val asyncScheduling: TestGeneration.AsyncScheduling, val genSeed: Seed, tracing: Boolean) { import TestContext._ private[this] val schedulings = ArrayBuffer.empty[ResizableRingBuffer[Task]] val random = XorShiftRandom(genSeed.long._1) def lastId = schedulings.size - 1 def nextId(): Int = { schedulings += new ResizableRingBuffer[Task](16, 4096) schedulings.size - 1 } def trace(msg: ⇒ String)(implicit stage: TestStage): Unit = if (tracing) println(stage.toString + ": " + msg) def run(msg: ⇒ String)(block: ⇒ Unit)(implicit stage: TestStage): Unit = { val scheduled = schedulings(stage.id) if (scheduled.nonEmpty || random.decide(asyncRate)) { trace("(scheduling) " + msg) requireState(scheduled.write(new Task(stage, msg _, block _))) } else { trace("(sync) " + msg) block } } def hasSchedulings: Boolean = schedulings.exists(_.nonEmpty) @tailrec def processSchedulings(): Unit = if (hasSchedulings) { val snapshot: Array[ResizableRingBuffer[Task]] = schedulings.toArray def runSnapshots() = snapshot foreach { buf ⇒ runTasks(buf, buf.count) } @tailrec def runTasks(buf: ResizableRingBuffer[Task], count: Int): Unit = if (count > 0) { val task = buf.read() trace("(running) " + task.msg())(task.stage) task.block() runTasks(buf, count - 1) } asyncScheduling match { case TestGeneration.AsyncScheduling.InOrder ⇒ runSnapshots() case TestGeneration.AsyncScheduling.RandomOrder ⇒ random.shuffle_!(snapshot) runSnapshots() case TestGeneration.AsyncScheduling.ReversedOrder ⇒ snapshot.reverse_!() runSnapshots() case TestGeneration.AsyncScheduling.Mixed ⇒ @tailrec def rec(remaining: Array[ResizableRingBuffer[Task]]): Unit = if (remaining.nonEmpty) { random.shuffle_!(remaining) rec(remaining flatMap { buf ⇒ val jobsSize = buf.count runTasks(buf, random.nextInt(jobsSize + 1)) // at least one, at most all if (buf.nonEmpty) buf :: Nil else Nil }) } rec(snapshot) } processSchedulings() } } private[testkit] object TestContext { private class Task(val stage: TestStage, val msg: () ⇒ String, val block: () ⇒ Unit) }
Example 184
Source File: ercesiMIPSRunner.scala From ercesiMIPS with GNU General Public License v3.0 | 5 votes |
// See LICENSE.txt for license details. package utils import scala.collection.mutable.ArrayBuffer import scala.util.Properties.envOrElse object ercesiMIPSRunner { def apply(ercesiMIPSMap: Map[String, String => Boolean], args: Array[String]): Unit = { // Choose the default backend based on what is available. lazy val firrtlTerpBackendAvailable: Boolean = { try { val cls = Class.forName("chisel3.iotesters.FirrtlTerpBackend") cls != null } catch { case e: Throwable => false } } lazy val defaultBackend = if (firrtlTerpBackendAvailable) { "firrtl" } else { "" } val backendName = envOrElse("TESTER_BACKENDS", defaultBackend).split(" ").head val problemsToRun = if(args.isEmpty || args.head == "all" ) { ercesiMIPSMap.keys.toSeq.sorted.toArray } else { args } var successful = 0 val errors = new ArrayBuffer[String] for(testName <- problemsToRun) { ercesiMIPSMap.get(testName) match { case Some(test) => println(s"Starting ercesiMIPS $testName") try { if(test(backendName)) { successful += 1 } else { errors += s"ercesiMIPS $testName: test error occurred" } } catch { case exception: Exception => exception.printStackTrace() errors += s"ercesiMIPS $testName: exception ${exception.getMessage}" case t : Throwable => errors += s"ercesiMIPS $testName: throwable ${t.getMessage}" } case _ => errors += s"Bad ercesiMIPS name: $testName" } } if(successful > 0) { println(s"ercesiMIPSs passing: $successful") } if(errors.nonEmpty) { println("=" * 80) println(s"Errors: ${errors.length}: in the following commands") println(errors.mkString("\n")) println("=" * 80) System.exit(1) } } }
Example 185
Source File: GzetPersons.scala From Mastering-Spark-for-Data-Science with MIT License | 5 votes |
package io.gzet.community.util import scala.collection.mutable.ArrayBuffer object GzetPersons { def buildTuples(array: Array[String]): Array[(String, String)] = { val holdingArray = ArrayBuffer[String]() val n = array.length val r = 2 val data = new Array[String](r) combinations(array, holdingArray, data, 0, n - 1, 0, r) val result = ArrayBuffer[(String, String)]() for (s: String <- holdingArray.toArray) { val split: Array[String] = s.split(",") result += ((split(0), split(1))) } result.toArray } def combinations(input: Array[String], result: ArrayBuffer[String], data: Array[String], start: Int, end: Int, index: Int, r: Int): Unit ={ if(index == r) { var s:String = "" for (i <- 0 until r) { if (i != 0) { s += "," } s += data(i) } result += s return } var j = start while(j <= end && (end - j + 1) >= (r - index)){ data(index) = input(j) combinations(input, result, data, j + 1, end, index + 1, r) j += 1 } } }
Example 186
Source File: IncrementalSeq.scala From inox with Apache License 2.0 | 5 votes |
package inox.utils import scala.collection.mutable.Builder import scala.collection.mutable.ArrayBuffer import scala.collection.{Iterable, IterableLike} class IncrementalSeq[A] extends IncrementalState with Iterable[A] with IterableLike[A, Seq[A]] with Builder[A, IncrementalSeq[A]] { private[this] var stack: List[ArrayBuffer[A]] = List(new ArrayBuffer()) def clear() : Unit = { stack = List(new ArrayBuffer()) } def reset(): Unit = { clear() } def push(): Unit = { stack ::= stack.head.clone } def pop(): Unit = { stack = stack.tail } def iterator = stack.head.toList.iterator def +=(e: A) = { stack.head += e; this } def -=(e: A) = { stack.head -= e; this } override def newBuilder = new scala.collection.mutable.ArrayBuffer() def result = this }
Example 187
Source File: MatchCollector.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.cep.ops import dbis.piglet.cep.nfa.NFAStructure import scala.reflect.ClassTag import scala.collection.mutable.ListBuffer import scala.collection.mutable.ArrayBuffer import dbis.piglet.backends.{SchemaClass => Event} class MatchCollector[ T <: Event: ClassTag] extends Serializable { var macthSequences: ListBuffer[NFAStructure[T]] = new ListBuffer() def +(that: NFAStructure[T]): Unit = macthSequences += that def size: Int = macthSequences.size def convertEventsToArray(): ArrayBuffer[T] = { var events: ArrayBuffer[T] = new ArrayBuffer() macthSequences.foreach (seq => events ++= seq.events) events } def convertEventsToBoolean(): ArrayBuffer[Boolean] = { ArrayBuffer(macthSequences.size > 0) } }
Example 188
Source File: NFAStructure.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.cep.nfa import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import scala.collection.mutable.HashMap import dbis.piglet.backends.{SchemaClass => Event} import scala.collection.mutable.ListBuffer def addEvent(event: T, currentEdge: ForwardEdge[T]): Unit = { events += event //if (relatedValue != null) { // relatedValue.get(currentEdge.name.get) match { // case Some(x) => x.foreach (r => r.updateValue(event)) //case None => Nil //} //} currenState = currentEdge.destState if (currenState.isInstanceOf[FinalState[T]]) complete = true } override def clone(): NFAStructure[T] = { val copyStr = new NFAStructure[T](this.nfaController) copyStr.complete = this.complete copyStr.currenState = this.currenState copyStr.events = this.events.clone() //copyStr.events = this.events copyStr } }
Example 189
Source File: FlinkStreamingCEPTest.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.cep.test.flink import java.io.File import dbis.piglet.backends.{ Record, SchemaClass } import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.scalatest._ import org.apache.commons.io.FileUtils import org.apache.flink.api.scala._ import dbis.piglet.cep.nfa._ import dbis.piglet.cep.ops.SelectionStrategy._ import dbis.piglet.cep.ops.OutputStrategy._ import dbis.piglet.cep.flink.CustomDataStreamMatcher._ import scala.collection.mutable.ArrayBuffer import org.apache.flink.streaming.api.windowing.windows.GlobalWindow import org.apache.flink.streaming.api.windowing.assigners.GlobalWindows case class StreamingDoubleRecord(col1: Int, col2: Int) extends java.io.Serializable with SchemaClass { override def mkString(delim: String) = s"$col1$delim$col2" } object OurStreamingNFA { def filter1(record: StreamingDoubleRecord, rvalues: NFAStructure[StreamingDoubleRecord]): Boolean = record.col1 == 1 def filter2(record: StreamingDoubleRecord, rvalues: NFAStructure[StreamingDoubleRecord]): Boolean = record.col1 == 2 def filter3(record: StreamingDoubleRecord, rvalues: NFAStructure[StreamingDoubleRecord]): Boolean = record.col1 == 3 def createNFA = { val testNFA: NFAController[StreamingDoubleRecord] = new NFAController() val firstState = testNFA.createAndGetStartState("First") val secondState = testNFA.createAndGetNormalState("Second") val thirdState = testNFA.createAndGetNormalState("Third") val finalState = testNFA.createAndGetFinalState("Final") val firstEdge = testNFA.createAndGetForwardEdge(filter1) val secondEdge = testNFA.createAndGetForwardEdge(filter2) val thirdEdge = testNFA.createAndGetForwardEdge(filter3) testNFA.createForwardTransition(firstState, firstEdge, secondState) testNFA.createForwardTransition(secondState, secondEdge, thirdState) testNFA.createForwardTransition(thirdState, thirdEdge, finalState) testNFA } } class FlinkStreamingCEPTest extends FlatSpec with Matchers with BeforeAndAfterEach { var resultArray = new ArrayBuffer[StreamingDoubleRecord] override def beforeEach() { resultArray.clear() } val sample = Seq( StreamingDoubleRecord(1,1), StreamingDoubleRecord(2,2), StreamingDoubleRecord(1,3), StreamingDoubleRecord(2,4), StreamingDoubleRecord(3,5), StreamingDoubleRecord(1,6), StreamingDoubleRecord(2,7), StreamingDoubleRecord(3,8)) "Flink Streaming CEP" should "detect the pattern SEQ(A, B, C) with first match" in { val env = StreamExecutionEnvironment.getExecutionEnvironment env.getConfig.disableSysoutLogging() val data = env.fromCollection(sample) val res = data.matchNFA(OurStreamingNFA.createNFA, env, FirstMatch) } it should "detect the pattern SEQ(A, B, C) with any match" in { val env = StreamExecutionEnvironment.getExecutionEnvironment env.getConfig.disableSysoutLogging() val data = env.fromCollection(sample) val res = data.matchNFA(OurStreamingNFA.createNFA, env, AllMatches) } it should "detect the pattern SEQ(A, B, C) with next match" in { val env = StreamExecutionEnvironment.getExecutionEnvironment env.getConfig.disableSysoutLogging() val data = env.fromCollection(sample) val res = data.matchNFA(OurStreamingNFA.createNFA, env, NextMatches) } it should "detect the pattern SEQ(A, B, C) with contiguity match" in { val env = StreamExecutionEnvironment.getExecutionEnvironment env.getConfig.disableSysoutLogging() val data = env.fromCollection(sample) val res = data.matchNFA(OurStreamingNFA.createNFA, env, ContiguityMatches) } }
Example 190
Source File: Cross.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.op import dbis.piglet.schema._ import scala.collection.mutable.ArrayBuffer case class Cross( private val out: Pipe, private val in: List[Pipe], timeWindow: (Int, String)= null.asInstanceOf[(Int, String)] ) extends PigOperator(List(out), in) { // require(in.size == 2, "Only two inputs allowed for CROSS, currently!") override def lineageString: String = { s"""CROSS%""" + super.lineageString } override def constructSchema: Option[Schema] = { val newFields = ArrayBuffer[Field]() inputs.foreach(p => p.producer.schema match { case Some(s) => newFields ++= s.fields map { f => Field(f.name, f.fType, p.name :: f.lineage) } case None => ??? }) schema = Some(Schema(BagType(TupleType(newFields.toArray)))) schema } override def toString = s"""CROSS | out = ${outPipeNames.mkString(",")} | in = ${inPipeNames.mkString(",")}""".stripMargin }
Example 191
package dbis.piglet.op import dbis.piglet.schema._ import scala.collection.mutable.ArrayBuffer case class Zip( private val out: Pipe, private val in: List[Pipe], withIndex: Boolean ) extends PigOperator(List(out), in) { require((in.size > 1 && !withIndex) || (in.size == 1 && withIndex), "zip with index works only with one input. Otherwise we must have at least two inputs") override def lineageString: String = { s"""ZIP%$withIndex""" + super.lineageString } override def constructSchema: Option[Schema] = { val newFields = inputs.flatMap(p => p.producer.schema match { case Some(s) => s.fields.map { f => Field(f.name, f.fType, p.name :: f.lineage) } case None => throw new UnsupportedOperationException(s"Cannot zip with unknown Schema! (input pipe $p)") }) schema = Some(Schema( BagType( TupleType( (if(withIndex) newFields :+ Field("index", Types.LongType) else newFields).toArray ) ) )) schema } override def toString = s"""ZIP | out = ${outPipeNames.mkString(",")} | in = ${inPipeNames.mkString(",")} | withIndex = $withIndex""".stripMargin }
Example 192
Source File: SpatialJoin.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.op import dbis.piglet.expr.SpatialJoinPredicate import dbis.piglet.op.IndexMethod.IndexMethod import dbis.piglet.op.PartitionMethod.PartitionMethod import dbis.piglet.schema._ import scala.collection.mutable.ArrayBuffer case class SpatialJoin( private val out: Pipe, private val in: List[Pipe], predicate: SpatialJoinPredicate, index: Option[(IndexMethod, List[String])], leftParti: Option[(PartitionMethod, List[String])], rightParti: Option[(PartitionMethod, List[String])] ) extends PigOperator(List(out), in) { override def lineageString: String = { s"""SPATIALJOIN%${predicate.toString()}%$index%""" + super.lineageString } override def constructSchema: Option[Schema] = { val newFields = ArrayBuffer[Field]() inputs.foreach(p => p.producer.schema match { case Some(s) => if(s.isIndexed) { newFields ++= s.element.valueType.asInstanceOf[IndexType] // a bag of Indexes .valueType.fields // An Index contains tuples with two fields: indexed column and payload .last.fType.asInstanceOf[TupleType] // payload is again a tuple .fields // fields in each tuple .map { f => Field(f.name, f.fType, p.name :: f.lineage) } } else { newFields ++= s.fields map { f => Field(f.name, f.fType, p.name :: f.lineage) } } case None => newFields += Field("", Types.ByteArrayType) }) schema = Some(Schema(BagType(TupleType(newFields.toArray)))) schema } override def toString = s"""SPATIALJOIN | out = $outPipeName | in = ${inPipeNames.mkString(",")} | inSchema = {${inputs.map(_.producer.schema).mkString(",")}} | outSchema = $schema | predicate = $predicate | index = $index""".stripMargin // }
Example 193
Source File: Union.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.op import dbis.piglet.schema._ import scala.collection.mutable.ArrayBuffer case class Union(private val out: Pipe, private val in: List[Pipe]) extends PigOperator(List(out), in) { override def lineageString: String = { s"""UNION%""" + super.lineageString } override def constructSchema: Option[Schema] = { val bagType = (p: Pipe) => p.producer.schema.get.element val generalizedBagType = (b1: BagType, b2: BagType) => { require(b1.valueType.fields.length == b2.valueType.fields.length) val newFields = ArrayBuffer[Field]() val fieldPairs = b1.valueType.fields.zip(b2.valueType.fields) for ((f1, f2) <- fieldPairs) { newFields += Field(f1.name, Types.escalateTypes(f1.fType, f2.fType)) } BagType(TupleType(newFields.toArray)) } // case 1: one of the input schema isn't known -> output schema = None if (inputs.exists(p => p.producer.schema.isEmpty)) { schema = None } else { // case 2: all input schemas have the same number of fields val s1 = inputs.head.producer.schema.get if (! inputs.tail.exists(p => s1.fields.length != p.producer.schema.get.fields.length)) { val typeList = inputs.map(p => bagType(p)) val resultType = typeList.reduceLeft(generalizedBagType) schema = Some(Schema(resultType)) } else { // case 3: the number of fields differ schema = None } } schema } override def toString = s"""UNION | out = $outPipeName | in = { ${inPipeNames.mkString(",")} } | inSchema = $inputSchema | outSchema = $schema""".stripMargin }
Example 194
Source File: JoinEmitter.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.codegen.flink.emitter import dbis.piglet.codegen.{ CodeEmitter, CodeGenContext, CodeGenException } import dbis.piglet.expr.Ref import dbis.piglet.op.Join import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.Set import dbis.piglet.codegen.scala_lang.ScalaEmitter import scala.collection.mutable.ListBuffer import dbis.piglet.codegen.flink.FlinkHelper class JoinEmitter extends dbis.piglet.codegen.scala_lang.JoinEmitter { override def template: String = """ val <out> = <rel1><rels, rel1_keys, rel2_keys:{ r,k1, k2 | .join(<r>).where(<k1>).equalTo(<k2>)}>.map{ | t => | val <pairs> = t | <class>(<fields>) | }""".stripMargin override def code(ctx: CodeGenContext, op: Join): String = { if (!op.schema.isDefined) throw CodeGenException("schema required in JOIN") val res = op.inputs.zip(op.fieldExprs) val keys = res.map { case (i, k) => k.map { x => s"_${FlinkHelper.getOrderIndex(i.producer.schema, x)}" } } var keysGroup: ListBuffer[(List[String], List[String])] = new ListBuffer for (i <- 0 until keys.length - 1) { val v = (keys(i), keys(i + 1)) keysGroup += v } val keysGroup1 = keysGroup.zipWithIndex.map { case (i, k) => if (k > 0) (FlinkHelper.printQuote(i._1.map { x => s"_$k.$x" }), FlinkHelper.printQuote(i._2)) else (FlinkHelper.printQuote(i._1), FlinkHelper.printQuote(i._2)) } val keys1 = keysGroup1.map(x => x._1) val keys2 = keysGroup1.map(x => x._2) val className = op.schema match { case Some(s) => ScalaEmitter.schemaClassName(s.className) case None => ScalaEmitter.schemaClassName(op.outPipeName) } var pairs = "(v1,v2)" for (i <- 3 to op.inputs.length) { pairs = s"($pairs,v$i)" } val fieldList = ArrayBuffer[String]() for (i <- 1 to op.inputs.length) { op.inputs(i - 1).producer.schema match { case Some(s) => fieldList ++= s.fields.zipWithIndex.map { case (f, k) => s"v$i._$k" } case None => fieldList += s"v$i._0" } } render( Map("out" -> op.outPipeName, "rel1" -> op.inputs.head.name, "class" -> className, "rels" -> op.inputs.tail.map(_.name), "pairs" -> pairs, "rel1_keys" -> keys1, "rel2_keys" -> keys2, "fields" -> fieldList.mkString(", "))) } } object JoinEmitter { lazy val instance = new JoinEmitter }
Example 195
Source File: FlinkHelper.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.codegen.flink import dbis.piglet.codegen.CodeGenException import dbis.piglet.expr.NamedField import dbis.piglet.expr.PositionalField import dbis.piglet.schema.Schema import dbis.piglet.expr.Ref import dbis.piglet.op.PigOperator import scala.collection.mutable.ArrayBuffer object FlinkHelper { def getOrderIndex(schema: Option[Schema], ref: Ref): Int = schema match { case Some(s) => ref match { case nf @ NamedField(f, _) => s.indexOfField(nf) case PositionalField(pos) => pos case _ => 0 } case None => throw new CodeGenException(s"the Flink OrderBy/Join operator needs a schema, thus, invalid field ") } def emitJoinFieldList(node: PigOperator): (String, String) = { val rels = node.inputs var fields = "" var pairs = "(v,w)" if (rels.length == 2) { val vsize = rels.head.inputSchema.get.fields.length fields = node.schema.get.fields.zipWithIndex .map { case (f, i) => if (i < vsize) s"v._$i" else s"w._${i - vsize}" }.mkString(", ") } else { pairs = "(v1,v2)" for (i <- 3 to rels.length) { pairs = s"($pairs,v$i)" } val fieldList = ArrayBuffer[String]() for (i <- 1 to node.inputs.length) { node.inputs(i - 1).producer.schema match { case Some(s) => fieldList ++= s.fields.zipWithIndex.map { case (f, k) => s"v$i._$k" } case None => fieldList += s"v$i._0" } } fields = fieldList.mkString(", ") } (pairs, fields) } def printQuote(values: List[String]) = """"""" + values.mkString("""","""") + """"""" }
Example 196
Source File: TSNEStandardExample.scala From dl4scala with MIT License | 5 votes |
package org.dl4scala.examples.nlp.tsne import java.io.File import org.datavec.api.util.ClassPathResource import org.deeplearning4j.models.embeddings.inmemory.InMemoryLookupTable import org.deeplearning4j.models.embeddings.loader.WordVectorSerializer import org.deeplearning4j.models.sequencevectors.sequence.SequenceElement import org.deeplearning4j.models.word2vec.wordstore.VocabCache import org.nd4j.linalg.api.buffer.DataBuffer import org.nd4j.linalg.api.buffer.util.DataTypeUtil import org.nd4j.linalg.primitives import org.slf4j.LoggerFactory import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer object TSNEStandardExample { private val log = LoggerFactory.getLogger(TSNEStandardExample.getClass) def main(args: Array[String]): Unit = { // STEP 1: Initialization val iterations = 100 // create an n-dimensional array of doubles DataTypeUtil.setDTypeForContext(DataBuffer.Type.DOUBLE) val cacheList = new ArrayBuffer[String](); // cacheList is a dynamic array of strings used to hold all words //STEP 2: Turn text input into a list of words log.info("Load & Vectorize data....") val wordFile = new ClassPathResource("words.txt").getFile //Open the file //Get the data of all unique word vectors val vectors: primitives.Pair[InMemoryLookupTable[_ <: SequenceElement], VocabCache[_ <: SequenceElement]] = WordVectorSerializer.loadTxt(wordFile) val cache = vectors.getSecond val weights = vectors.getFirst.getSyn0 //seperate weights of unique words into their own list (0 until cache.numWords()).foreach(i => cacheList.append(cache.wordAtIndex(i))) import org.deeplearning4j.plot.BarnesHutTsne //STEP 3: build a dual-tree tsne to use later//STEP 3: build a dual-tree tsne to use later log.info("Build model....") val tsne = new BarnesHutTsne.Builder() .setMaxIter(iterations) .theta(0.5) .normalize(false) .learningRate(500) .useAdaGrad(false) .build //STEP 4: establish the tsne values and save them to a file log.info("Store TSNE Coordinates for Plotting....") val outputFile = "target/archive-tmp/tsne-standard-coords.csv" new File(outputFile).getParentFile.mkdirs tsne.fit(weights) tsne.saveAsFile(cacheList.asJava, outputFile) } }
Example 197
Source File: MNISTVisualizer.scala From dl4scala with MIT License | 5 votes |
package org.dl4scala.examples.feedforward.anomalydetection import java.awt.{GridLayout, Image} import java.awt.image.BufferedImage import javax.swing.{ImageIcon, JFrame, JLabel, JPanel} import org.nd4j.linalg.api.ndarray.INDArray import scala.collection.mutable.ArrayBuffer class MNISTVisualizer(imageScale: Double, digits: ArrayBuffer[INDArray], title: String, gridWidth: Int) { def this(imageScale: Double, digits: ArrayBuffer[INDArray], title: String) = { this(imageScale, digits, title, 5) } def visualize(): Unit = { val frame = new JFrame frame.setTitle(title) frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE) val panel = new JPanel panel.setLayout(new GridLayout(0, gridWidth)) val list = getComponents for (image <- list) { panel.add(image) } frame.add(panel) frame.setVisible(true) frame.pack() } def getComponents: ArrayBuffer[JLabel] = { val images = new ArrayBuffer[JLabel]() for (arr <- digits) { val bi = new BufferedImage(28, 28, BufferedImage.TYPE_BYTE_GRAY) for(i <- 0 until 784) { bi.getRaster.setSample(i % 28, i / 28, 0, (255 * arr.getDouble(i)).asInstanceOf[Int]) } val orig = new ImageIcon(bi) val imageScaled = orig.getImage.getScaledInstance((imageScale * 28).asInstanceOf[Int], (imageScale * 28).asInstanceOf[Int], Image.SCALE_REPLICATE) val scaled = new ImageIcon(imageScaled) images.append(new JLabel(scaled)) } images } }
Example 198
Source File: GeneralNetwork.scala From deepspark with GNU General Public License v2.0 | 5 votes |
package com.github.nearbydelta.deepspark.network import com.esotericsoftware.kryo.Kryo import com.esotericsoftware.kryo.io.{Input, Output} import com.github.nearbydelta.deepspark.data._ import com.github.nearbydelta.deepspark.layer.InputLayer import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import scala.collection.mutable.ArrayBuffer import scala.collection.parallel.ParSeq class GeneralNetwork[In, Out](var inputLayer: InputLayer[In, _]) extends Network[In, Out] { @deprecated(message = "This is for kryo deserialization. Please use this(inputlayer)") def this() = this(null) override def NOut: Int = layerSeq.lastOption match { case Some(x) ⇒ x.NOut case None if inputLayer != null ⇒ inputLayer.NOut case None ⇒ 0 } override def backward(error: ParSeq[DataVec]): ArrayBuffer[() ⇒ Unit] = { val (upper, fseq) = backwardSeq(error) val (x, f) = inputLayer backward upper fseq ++= f.seq fseq } override def broadcast(sc: SparkContext): Unit = { inputLayer.broadcast(sc) super.broadcast(sc) } override def forward(in: In) = { val out = inputLayer.forward(in) forwardSingle(out) } override def forward(in: ParSeq[In]): ParSeq[DataVec] = { val out = inputLayer.forward(in) forwardSeq(out) } override def forward(in: RDD[(Long, In)]): RDD[(Long, DataVec)] = { val out = inputLayer.forward(in) broadcast(in.context) forwardRDD(out) } override def initiateBy(builder: WeightBuilder): this.type = { inputLayer.initiateBy(builder) super.initiateBy(builder) this } override def loss: Double = super.loss + inputLayer.loss override def read(kryo: Kryo, input: Input): Unit = { inputLayer = kryo.readClassAndObject(input).asInstanceOf[InputLayer[In, _]] super.read(kryo, input) } override def setUpdatable(bool: Boolean): Network[In, Out] = { inputLayer.setUpdatable(bool) super.setUpdatable(bool) } override def unbroadcast(): Unit = { inputLayer.unbroadcast() super.unbroadcast() } override def write(kryo: Kryo, output: Output): Unit = { kryo.writeClassAndObject(output, inputLayer) super.write(kryo, output) } }
Example 199
Source File: ExtractStageHelpers.scala From akka-xml-parser with Apache License 2.0 | 5 votes |
package uk.gov.hmrc.akka.xml import com.fasterxml.aalto.{AsyncByteArrayFeeder, AsyncXMLStreamReader} import scala.collection.mutable.ArrayBuffer trait ExtractStageHelpers { def update(xmlElementsLst: scala.collection.mutable.Set[XMLGroupElement], path: ArrayBuffer[String], newValue: Some[String]): Unit = { val elementsWithoutAnyValueForGivenPath = xmlElementsLst.collect { case e: XMLGroupElement if (e.xPath == path.toList) && e.value.isEmpty => e } elementsWithoutAnyValueForGivenPath.map((ele: XMLGroupElement) => { xmlElementsLst.remove(ele) val newElement = ele.copy(value = newValue) xmlElementsLst.add(newElement) }) } def getCompletedXMLElements(xmlElementsLst: scala.collection.mutable.Set[XMLGroupElement]): scala.collection.mutable.Set[XMLGroupElement] = { val completedElements = xmlElementsLst.collect { case e if !(e.xPath.nonEmpty && e.value.isEmpty) => e } completedElements.foreach({ xmlElementsLst -= _ }) completedElements } }
Example 200
Source File: StreamHelper.scala From akka-xml-parser with Apache License 2.0 | 5 votes |
package uk.gov.hmrc.akka.xml import com.fasterxml.aalto.{AsyncByteArrayFeeder, AsyncXMLStreamReader} import scala.collection.mutable.ArrayBuffer trait StreamHelper { def update(xmlElementsLst: scala.collection.mutable.Set[XMLElement], path: ArrayBuffer[String], newValue: Some[String]): Unit = { val elementsWithoutAnyValueForGivenPath = xmlElementsLst.collect { case e: XMLElement if (e.xPath == path.toList) && e.value.isEmpty => e } elementsWithoutAnyValueForGivenPath.map((ele: XMLElement) => { xmlElementsLst.remove(ele) val newElement = ele.copy(value = newValue) xmlElementsLst.add(newElement) }) } def getCompletedXMLElements(xmlElementsLst: scala.collection.mutable.Set[XMLElement]): scala.collection.mutable.Set[XMLElement] = { val completedElements = xmlElementsLst.collect { case e if !(e.xPath.nonEmpty && (e.value.isEmpty && e.attributes.isEmpty)) => e } completedElements.foreach({ xmlElementsLst -= _ }) completedElements } def getUpdatedElement(xPath: Seq[String], attributes: Map[String, String], elemText: String) (implicit reader: AsyncXMLStreamReader[AsyncByteArrayFeeder]): String = { val prefix = getPrefix val startElement = attributes.foldLeft(s"<$prefix${xPath.last}") { case (s, (k, v)) => s"""$s $k="$v"""" } + ">" val value = elemText val endElement = getEndElement(xPath, prefix) s"$startElement$value$endElement" } private def getPrefix(implicit reader: AsyncXMLStreamReader[AsyncByteArrayFeeder]): String = Option(reader.getPrefix) match { case Some(pre) if pre.nonEmpty => s"$pre:" case _ => "" } private def getEndElement(xPath: Seq[String], prefix: String) = s"</$prefix${xPath.last}>" }