The following examples show how to use
Source File: KafkaTopicInfo.scala From matcher with MIT License | 7 votes |
package tools import import import com.typesafe.config.ConfigFactory import com.wavesplatform.dex.queue.KafkaMatcherQueue.eventDeserializer import com.wavesplatform.dex.queue.{QueueEvent, QueueEventWithMeta} import com.wavesplatform.dex.settings.toConfigOps import org.apache.kafka.clients.consumer.KafkaConsumer import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.serialization.StringDeserializer import scala.collection.JavaConverters._ import scala.concurrent.duration.DurationInt object KafkaTopicInfo extends App { implicit val system: ActorSystem = ActorSystem() val configFile = new File(args(0)) val topic = args(1) val from = args(2).toLong val max = args(3).toInt println(s"""configFile: ${configFile.getAbsolutePath} |topic: $topic |from: $from |max: $max""".stripMargin) val requestTimeout = java.time.Duration.ofNanos(5.seconds.toNanos) val config = ConfigFactory .parseString(""" { | = "kafka-topics-info" | = false | auto.offset.reset = earliest |} | |""".stripMargin) .withFallback { ConfigFactory .parseFile(configFile) .withFallback(ConfigFactory.defaultApplication()) .withFallback(ConfigFactory.defaultReference()) .resolve() .getConfig("") } val consumer = new KafkaConsumer[String, QueueEvent]( config.getConfig("").toProperties, new StringDeserializer, eventDeserializer ) try { val topicPartition = new TopicPartition(topic, 0) val topicPartitions = java.util.Collections.singletonList(topicPartition) consumer.assign(topicPartitions) { val r = consumer.partitionsFor(topic, requestTimeout) println(s"Partitions:\n${r.asScala.mkString("\n")}") } { val r = consumer.endOffsets(topicPartitions, requestTimeout) println(s"End offsets for $topicPartition: ${r.asScala.mkString(", ")}") }, from) val pollDuriation = java.time.Duration.ofNanos(1.seconds.toNanos) val lastOffset = from + max var continue = true while (continue) { println(s"Reading from Kafka") val xs = consumer.poll(pollDuriation).asScala.toVector xs.foreach { msg => println(QueueEventWithMeta(msg.offset(), msg.timestamp(), msg.value())) } xs.lastOption.foreach { x => if (x.offset() == lastOffset) continue = false } } } finally { consumer.close() } }
Source File: DataFrameExample.scala From drizzle-spark with Apache License 2.0 | 7 votes |
// scalastyle:off println package import import scopt.OptionParser import org.apache.spark.examples.mllib.AbstractParams import import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer import org.apache.spark.sql.{DataFrame, Row, SparkSession} import org.apache.spark.util.Utils object DataFrameExample { case class Params(input: String = "data/mllib/sample_libsvm_data.txt") extends AbstractParams[Params] def main(args: Array[String]) { val defaultParams = Params() val parser = new OptionParser[Params]("DataFrameExample") { head("DataFrameExample: an example app using DataFrame for ML.") opt[String]("input") .text(s"input path to dataframe") .action((x, c) => c.copy(input = x)) checkConfig { params => success } } parser.parse(args, defaultParams) match { case Some(params) => run(params) case _ => sys.exit(1) } } def run(params: Params): Unit = { val spark = SparkSession .builder .appName(s"DataFrameExample with $params") .getOrCreate() // Load input data println(s"Loading LIBSVM file with UDT from ${params.input}.") val df: DataFrame ="libsvm").load(params.input).cache() println("Schema from LIBSVM:") df.printSchema() println(s"Loaded training data as a DataFrame with ${df.count()} records.") // Show statistical summary of labels. val labelSummary = df.describe("label") // Convert features column to an RDD of vectors. val features ="features") { case Row(v: Vector) => v } val featureSummary = features.aggregate(new MultivariateOnlineSummarizer())( (summary, feat) => summary.add(Vectors.fromML(feat)), (sum1, sum2) => sum1.merge(sum2)) println(s"Selected features column with average values:\n ${featureSummary.mean.toString}") // Save the records in a parquet file. val tmpDir = Utils.createTempDir() val outputDir = new File(tmpDir, "dataframe").toString println(s"Saving to $outputDir as Parquet file.") df.write.parquet(outputDir) // Load the records back. println(s"Loading Parquet file with UDT from $outputDir.") val newDF = println(s"Schema from Parquet:") newDF.printSchema() spark.stop() } } // scalastyle:on println
Source File: CommandUtils.scala From drizzle-spark with Apache License 2.0 | 7 votes |
package org.apache.spark.deploy.worker import{File, FileOutputStream, InputStream, IOException} import scala.collection.JavaConverters._ import scala.collection.Map import org.apache.spark.SecurityManager import org.apache.spark.deploy.Command import org.apache.spark.internal.Logging import org.apache.spark.launcher.WorkerCommandBuilder import org.apache.spark.util.Utils def redirectStream(in: InputStream, file: File) { val out = new FileOutputStream(file, true) // TODO: It would be nice to add a shutdown hook here that explains why the output is // terminating. Otherwise if the worker dies the executor logs will silently stop. new Thread("redirect output to " + file) { override def run() { try { Utils.copyStream(in, out, true) } catch { case e: IOException => logInfo("Redirection to " + file + " closed: " + e.getMessage) } } }.start() } }
Source File: KeyUtils.scala From daml with Apache License 2.0 | 6 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.jwt import{File, FileInputStream} import java.nio.charset.StandardCharsets import java.nio.file.Files import import{ECPublicKey, RSAPrivateKey, RSAPublicKey} import import import import scalaz.Show import import scala.util.Try object KeyUtils { final case class Error(what: Symbol, message: String) object Error { implicit val showInstance: Show[Error] = Show.shows(e => s"KeyUtils.Error: ${e.what}, ${e.message}") } private val mimeCharSet = StandardCharsets.ISO_8859_1 def generateJwks(keys: Map[String, RSAPublicKey]): String = { def generateKeyEntry(keyId: String, key: RSAPublicKey): String = s""" { | "kid": "$keyId", | "kty": "RSA", | "alg": "RS256", | "use": "sig", | "e": "${java.util.Base64.getUrlEncoder .encodeToString(key.getPublicExponent.toByteArray)}", | "n": "${java.util.Base64.getUrlEncoder.encodeToString(key.getModulus.toByteArray)}" | }""".stripMargin s""" |{ | "keys": [ |${ { case (keyId, key) => generateKeyEntry(keyId, key) }.mkString(",\n")} | ] |} """.stripMargin } }
Source File: RUtils.scala From drizzle-spark with Apache License 2.0 | 6 votes |
package org.apache.spark.api.r import import java.util.Arrays import org.apache.spark.{SparkEnv, SparkException} private[spark] object RUtils { // Local path where R binary packages built from R source code contained in the spark // packages specified with "--packages" or "--jars" command line option reside. var rPackages: Option[String] = None def isRInstalled: Boolean = { try { val builder = new ProcessBuilder(Arrays.asList("R", "--version")) builder.start().waitFor() == 0 } catch { case e: Exception => false } } }
Source File: package.scala From mantis with Apache License 2.0 | 6 votes |
package io.iohk.ethereum import{File, PrintWriter} import{Inet6Address, InetAddress} import import io.iohk.ethereum.crypto._ import org.spongycastle.crypto.AsymmetricCipherKeyPair import org.spongycastle.crypto.params.ECPublicKeyParameters import import org.spongycastle.util.encoders.Hex import package object network { val ProtocolVersion = 4 implicit class ECPublicKeyParametersNodeId(val pubKey: ECPublicKeyParameters) extends AnyVal { def toNodeId: Array[Byte] = pubKey.asInstanceOf[ECPublicKeyParameters].getQ .getEncoded(false) .drop(1) // drop type info } def publicKeyFromNodeId(nodeId: String): ECPoint = { val bytes = ECDSASignature.uncompressedIndicator +: Hex.decode(nodeId) curve.getCurve.decodePoint(bytes) } def loadAsymmetricCipherKeyPair(filePath: String, secureRandom: SecureRandom): AsymmetricCipherKeyPair = { val file = new File(filePath) if(!file.exists()){ val keysValuePair = generateKeyPair(secureRandom) //Write keys to file val (priv, _) = keyPairToByteArrays(keysValuePair) require(file.getParentFile.exists() || file.getParentFile.mkdirs(), "Key's file parent directory creation failed") val writer = new PrintWriter(filePath) try { writer.write(Hex.toHexString(priv)) } finally { writer.close() } keysValuePair } else { val reader = Source.fromFile(filePath) try { val privHex = reader.mkString keyPairFromPrvKey(Hex.decode(privHex)) } finally { reader.close() } } } def getHostName(address: InetAddress): String = { val hostName = address.getHostAddress address match { case _: Inet6Address => s"[$hostName]" case _ => hostName } } }
import{File, PrintWriter} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.clustering.GaussianMixture import org.apache.spark.sql.functions._ def computeGaussianMixtureModel( pathToTextFile: String, quantity: Int) { case class Point(x: Double, y: Double) def save(f: File)(func: PrintWriter => Unit) { val p = new PrintWriter(f) try { func(p) } finally { p.close() } } val filename = pathToTextFile.split("\\.")(0) val outputFilename = s"$filename-GMM-k${quantity}.tsv" val points = sc .textFile(pathToTextFile) .map { line => line.trim.split("\\s+") } .map { row => Point(row(0).toDouble, row(1).toDouble) } val features = points .map { p => Vectors.dense(p.x, p.y) } features.cache() val gmm = new GaussianMixture() .setK(quantity) .run(features) val predictions = features .map { f => (f(0), f(1), gmm.predict(f) + 1) } .collect save(new File(outputFilename)) { println(s"OUTPUT TO: ${outputFilename}") f => predictions.foreach{ case (x, y, ccid) => f.println(s"${x}\t${y}\t${ccid}") } } }
Source File: PointCloudRelation.scala From geotrellis-pointcloud with Apache License 2.0 | 5 votes |
package geotrellis.pointcloud.spark.datasource import import{Options => HadoopOptions} import geotrellis.pointcloud.util.Filesystem import geotrellis.proj4.CRS import import geotrellis.vector.Extent import cats.implicits._ import io.pdal._ import io.circe.syntax._ import org.apache.hadoop.fs.Path import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.sources.{BaseRelation, TableScan} import org.apache.spark.sql.types._ import org.apache.spark.sql.{Row, SQLContext} import import scala.collection.JavaConverters._ // This class has to be serializable since it is shipped over the network. class PointCloudRelation( val sqlContext: SQLContext, path: String, options: HadoopOptions ) extends BaseRelation with TableScan with Serializable { @transient implicit lazy val sc: SparkContext = sqlContext.sparkContext // TODO: switch between HadoopPointCloudRDD and S3PointcCloudRDD lazy val isS3: Boolean = path.startsWith("s3") override def schema: StructType = { lazy val (local, fixedPath) = if(path.startsWith("s3") || path.startsWith("hdfs")) { val tmpDir = Filesystem.createDirectory() val remotePath = new Path(path) // copy remote file into local tmp dir val localPath = new File(tmpDir, remotePath.getName) HdfsUtils.copyPath(remotePath, new Path(s"file:///${localPath.getAbsolutePath}"), sc.hadoopConfiguration) (true, localPath.toString) } else (false, path) val localPipeline = options.pipeline .hcursor .downField("pipeline").downArray .downField("filename").withFocus(_ => fixedPath.asJson) .top.fold(options.pipeline)(identity) val pl = Pipeline(localPipeline.noSpaces) if (pl.validate()) pl.execute() val pointCloud = try { pl.getPointViews().next().getPointCloud(0) } finally { pl.close() if(local) println(new File(fixedPath).delete) } val rdd = HadoopPointCloudRDD(new Path(path), options) val md: (Option[Extent], Option[CRS]) = rdd .map { case (header, _) => (, } .reduce { case ((e1, c), (e2, _)) => ((e1, e2).mapN(_ combine _), c) } val metadata = new MetadataBuilder().putString("metadata", md.asJson.noSpaces).build pointCloud.deriveSchema(metadata) } override def buildScan(): RDD[Row] = { val rdd = HadoopPointCloudRDD(new Path(path), options) rdd.flatMap { _._2.flatMap { pc => { k => Row(k: _*) } } } } }
Source File: S3PointCloudInputFormat.scala From geotrellis-pointcloud with Apache License 2.0 | 5 votes |
package import import import geotrellis.pointcloud.util.Filesystem import io.pdal._ import io.circe.Json import io.circe.syntax._ import cats.syntax.either._ import org.apache.hadoop.mapreduce.{InputSplit, TaskAttemptContext} import import{File, InputStream} import import scala.collection.JavaConverters._ mode match { case "s3" => new S3URIRecordReader[S3PointCloudHeader, List[PointCloud]](s3Client) { def read(key: String, uri: URI): (S3PointCloudHeader, List[PointCloud]) = { val s3Pipeline = pipeline .hcursor .downField("pipeline").downArray .downField("filename").withFocus(_ => uri.toString.asJson) .top.fold(pipeline)(identity) executePipeline(context)(key, s3Pipeline) } } case _ => val tmpDir = { val dir = PointCloudInputFormat.getTmpDir(context) if (dir == null) Filesystem.createDirectory() else Filesystem.createDirectory(dir) } new S3StreamRecordReader[S3PointCloudHeader, List[PointCloud]](s3Client) { def read(key: String, is: InputStream): (S3PointCloudHeader, List[PointCloud]) = { // copy remote file into local tmp dir tmpDir.mkdirs() // to be sure that dirs created val localPath = new File(tmpDir, key.replace("/", "_")) FileUtils.copyInputStreamToFile(is, localPath) is.close() // use local filename path if it's present in json val localPipeline = pipeline .hcursor .downField("pipeline").downArray .downField("filename").withFocus(_ => localPath.getAbsolutePath.asJson) .top.fold(pipeline)(identity) try executePipeline(context)(key, localPipeline) finally { localPath.delete() tmpDir.delete() } } } } } }
Source File: PointCloudTestEnvironment.scala From geotrellis-pointcloud with Apache License 2.0 | 5 votes |
package geotrellis.pointcloud.spark import geotrellis.spark.testkit._ import org.apache.hadoop.fs.Path import org.scalatest.Suite import trait PointCloudTestEnvironment extends TestEnvironment { self: Suite => val testResources = new File("src/test/resources") val lasPath = new Path(s"file://${testResources.getAbsolutePath}/las") val multipleLasPath = new Path(s"file://${testResources.getAbsolutePath}/las/files") def setS3Credentials: Unit = { try { val conf = ssc.sparkContext.hadoopConfiguration conf.set("fs.s3.impl", classOf[org.apache.hadoop.fs.s3a.S3AFileSystem].getName) conf.set("", classOf[com.amazonaws.auth.DefaultAWSCredentialsProviderChain].getName) conf.set("fs.s3a.endpoint", "") } catch { case e: Throwable => println(e.getMessage) } } }
Source File: CreateSaltedTable.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.hadooparchitecturebook.taxi360.setup.hbase import import org.apache.commons.lang.StringUtils import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName} import org.apache.hadoop.hbase.client.ConnectionFactory import import org.apache.hadoop.hbase.regionserver.{BloomType, ConstantSizeRegionSplitPolicy} import org.apache.hadoop.hbase.util.Bytes import scala.collection.mutable object CreateSaltedTable { def main(args:Array[String]): Unit = { if (args.length == 0) { println("<tableName> <columnFamily> <regionCount> <numOfSalts> <hbaseConfigFolder>") } val tableName = args(0) val columnFamilyName = args(1) val regionCount = args(2).toInt val numOfSalts = args(3).toInt val hbaseConfigFolder = args(4) val conf = HBaseConfiguration.create() conf.addResource(new File(hbaseConfigFolder + "hbase-site.xml").toURI.toURL) val connection = ConnectionFactory.createConnection(conf) val admin = connection.getAdmin val tableDescriptor = new HTableDescriptor(TableName.valueOf(tableName)) val columnDescriptor = new HColumnDescriptor(columnFamilyName) columnDescriptor.setCompressionType(Compression.Algorithm.SNAPPY) columnDescriptor.setBlocksize(64 * 1024) columnDescriptor.setBloomFilterType(BloomType.ROW) tableDescriptor.addFamily(columnDescriptor) tableDescriptor.setMaxFileSize(Long.MaxValue) tableDescriptor.setRegionSplitPolicyClassName(classOf[ConstantSizeRegionSplitPolicy].getName) val splitKeys = new mutable.MutableList[Array[Byte]] for (i <- 0 to regionCount) { val regionSplitStr = StringUtils.leftPad((i*(numOfSalts/regionCount)).toString, 4, "0") splitKeys += Bytes.toBytes(regionSplitStr) } admin.createTable(tableDescriptor, splitKeys.toArray) } }
Source File: HBaseRestServer.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.hadooparchitecturebook.taxi360.server.hbase import import com.sun.jersey.spi.container.servlet.ServletContainer import org.apache.hadoop.hbase.HBaseConfiguration import org.mortbay.jetty.Server import org.mortbay.jetty.servlet.{Context, ServletHolder} object HBaseRestServer { def main(args:Array[String]): Unit = { if (args.length == 0) { println("<port> <configDir> <numberOfSalts> <customerTableName>") } val port = args(0).toInt val hbaseConfigFolder = args(1) val numberOfSalts = args(2).toInt val appEventTableName = args(3) val conf = HBaseConfiguration.create() conf.addResource(new File(hbaseConfigFolder + "hbase-site.xml").toURI.toURL) HBaseGlobalValues.init(conf, numberOfSalts, appEventTableName) val server = new Server(port) val sh = new ServletHolder(classOf[ServletContainer]) sh.setInitParameter("", "com.sun.jersey.api.core.PackagesResourceConfig") sh.setInitParameter("", "com.hadooparchitecturebook.taxi360.server.hbase") sh.setInitParameter("com.sun.jersey.api.json.POJOMappingFeature", "true") val context = new Context(server, "/", Context.SESSIONS) context.addServlet(sh, "/*") println("starting HBase Rest Server") server.start() println("started HBase Rest Sserver") server.join() } }
Source File: CsvKafkaPublisher.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.hadooparchitecturebook.taxi360.common import import java.util.Random import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import object CsvKafkaPublisher { var counter = 0 var salts = 0 def main(args:Array[String]): Unit = { if (args.length == 0) { println("<brokerList> " + "<topicName> " + "<dataFolderOrFile> " + "<sleepPerRecord> " + "<acks> " + "<> " + "<producer.type> " + "<batch.size> " + "<salts>") return } val kafkaBrokerList = args(0) val kafkaTopicName = args(1) val nyTaxiDataFolder = args(2) val sleepPerRecord = args(3).toInt val acks = args(4).toInt val lingerMs = args(5).toInt val producerType = args(6) //"async" val batchSize = args(7).toInt salts = args(8).toInt val kafkaProducer = KafkaProducerUntil.getNewProducer(kafkaBrokerList, acks, lingerMs, producerType, batchSize) println("--Input:" + nyTaxiDataFolder) val dataFolder = new File(nyTaxiDataFolder) if (dataFolder.isDirectory) { val files = dataFolder.listFiles().iterator files.foreach(f => { println("--Input:" + f) processFile(f, kafkaTopicName, kafkaProducer, sleepPerRecord) }) } else { println("--Input:" + dataFolder) processFile(dataFolder, kafkaTopicName, kafkaProducer, sleepPerRecord) } println("---Done") } def processFile(file:File, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String], sleepPerRecord:Int): Unit = { var counter = 0 val r = new Random() println("-Starting Reading") Source.fromFile(file).getLines().foreach(l => { counter += 1 if (counter % 10000 == 0) { println("{Sent:" + counter + "}") } if (counter % 100 == 0) { print(".") } Thread.sleep(sleepPerRecord) val saltedVender = r.nextInt(salts) + l if (counter > 2) { publishTaxiRecord(saltedVender, kafkaTopicName, kafkaProducer) } }) } def publishTaxiRecord(line:String, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String]): Unit = { if (line.startsWith("vendor_name") || line.length < 10) { println("skip") } else { val message = new ProducerRecord[String, String](kafkaTopicName, line.hashCode.toString, line) kafkaProducer.send(message) } } }
Source File: SparkStreamingTaxiTripToHBase.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.hadooparchitecturebook.taxi360.streaming.ingestion.hbase import import com.hadooparchitecturebook.taxi360.model.NyTaxiYellowTripBuilder import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseDStreamFunctions._ import kafka.serializer.StringDecoder import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import import org.apache.spark.streaming.kafka.KafkaUtils import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.{SparkConf, SparkContext} object SparkStreamingTaxiTripToHBase { def main(args: Array[String]): Unit = { println("Java Version:" + System.getProperty("java.version")) println("Java Home:" + System.getProperties().getProperty("java.home")) val v:ZooKeeperException = null if (args.length == 0) { println("Args: <KafkaBrokerList> " + "<kafkaTopicList> " + "<numberOfSeconds>" + "<runLocal>" + "<hbaseTable>" + "<numOfSalts>" + "<checkpointDir>" + "<hbaseConfigFolder>") return } val kafkaBrokerList = args(0) val kafkaTopicList = args(1) val numberOfSeconds = args(2).toInt val runLocal = args(3).equals("l") val tableName = args(4) val numOfSalts = args(5).toInt val checkpointFolder = args(6) val hbaseConfigFolder = args(7) println("kafkaBrokerList:" + kafkaBrokerList) println("kafkaTopicList:" + kafkaTopicList) println("numberOfSeconds:" + numberOfSeconds) println("runLocal:" + runLocal) println("tableName:" + tableName) println("numOfSalts:" + numOfSalts) val sc:SparkContext = if (runLocal) { val sparkConfig = new SparkConf() sparkConfig.set("spark.broadcast.compress", "false") sparkConfig.set("spark.shuffle.compress", "false") sparkConfig.set("spark.shuffle.spill.compress", "false") new SparkContext("local[2]", "TableStatsSinglePathMain", sparkConfig) } else { val sparkConf = new SparkConf().setAppName("Spark Streaming Ingestion to HBase") new SparkContext(sparkConf) } val ssc = new StreamingContext(sc, Seconds(numberOfSeconds)) val topicsSet = kafkaTopicList.split(",").toSet val kafkaParams = Map[String, String]("" -> kafkaBrokerList) val messageStream = KafkaUtils. createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topicsSet) val conf = HBaseConfiguration.create() conf.addResource(new File(hbaseConfigFolder + "hbase-site.xml").toURI.toURL) val hbaseContext = new HBaseContext(sc, conf) val tripDStream = => { (r._1, r._2.split(",")) }).filter(r => r._2.size > 3).map(r => { (r._1, }) tripDStream.hbaseBulkPut(hbaseContext, TableName.valueOf(tableName), taxi => { TaxiTripHBaseHelper.generatePut(taxi._2, numOfSalts) }) ssc.checkpoint(checkpointFolder) ssc.start() ssc.awaitTermination() } }
Source File: SampleRoutes.scala From akka_streams_tutorial with MIT License | 5 votes |
package akkahttp import import import akka.http.scaladsl.Http import akka.http.scaladsl.server.Directives._ import akka.http.scaladsl.server.Route import org.slf4j.{Logger, LoggerFactory} import scala.concurrent.Await import scala.concurrent.duration._ import scala.sys.process.Process import scala.util.{Failure, Success} object SampleRoutes extends App { val logger: Logger = LoggerFactory.getLogger(this.getClass) implicit val system = ActorSystem("SampleRoutes") implicit val executionContext = system.dispatcher def getFromBrowsableDir: Route = { val dirToBrowse = File.separator + "tmp" // pathPrefix allows loading dirs and files recursively pathPrefix("entries") { getFromBrowseableDirectory(dirToBrowse) } } def parseFormData: Route = path("post") { formFields('color, '[Int]) { (color, age) => complete(s"The color is '$color' and the age is $age") } } def routes: Route = { getFromBrowsableDir ~ parseFormData } val bindingFuture = Http().bindAndHandle(routes, "", 8000) bindingFuture.onComplete { case Success(b) => println("Server started, listening on: " + b.localAddress) case Failure(e) => println(s"Server could not bind to... Exception message: ${e.getMessage}") system.terminate() } def browserClient() = { val os = System.getProperty("").toLowerCase if (os == "mac os x") Process("open ./src/main/resources/SampleRoutes.html").! } browserClient() sys.addShutdownHook { println("About to shutdown...") val fut = => serverBinding.terminate(hardDeadline = 3.seconds)) println("Waiting for connections to terminate...") val onceAllConnectionsTerminated = Await.result(fut, 10.seconds) println("Connections terminated") onceAllConnectionsTerminated.flatMap { _ => system.terminate() } } }
Source File: KafkaServer.scala From akka_streams_tutorial with MIT License | 5 votes |
package alpakka.env import import import java.nio.file.{Files, Paths} import java.util.Properties import kafka.server.{KafkaConfig, KafkaServerStartable} import import org.apache.zookeeper.server.quorum.QuorumPeerConfig import org.apache.zookeeper.server.{ServerConfig, ZooKeeperServerMain} object KafkaServer extends App { val zookeeperPort = 2181 val kafkaLogs = "/tmp/kafka-logs" val kafkaLogsPath = Paths.get(kafkaLogs) // See: def fix25Behaviour() = { val fileWithConflictingContent = kafkaLogsPath.resolve("").toFile if (fileWithConflictingContent.exists()) FileUtils.forceDelete(fileWithConflictingContent) } def removeKafkaLogs(): Unit = { if (kafkaLogsPath.toFile.exists()) FileUtils.forceDelete(kafkaLogsPath.toFile) } // Keeps the persistent data fix25Behaviour() // If everything fails //removeKafkaLogs() val quorumConfiguration = new QuorumPeerConfig { // Since we do not run a cluster, we are not interested in zookeeper data override def getDataDir: File = Files.createTempDirectory("zookeeper").toFile override def getDataLogDir: File = Files.createTempDirectory("zookeeper-logs").toFile override def getClientPortAddress: InetSocketAddress = new InetSocketAddress(zookeeperPort) } class StoppableZooKeeperServerMain extends ZooKeeperServerMain { def stop(): Unit = shutdown() } val zooKeeperServer = new StoppableZooKeeperServerMain() val zooKeeperConfig = new ServerConfig() zooKeeperConfig.readFrom(quorumConfiguration) val zooKeeperThread = new Thread { override def run(): Unit = zooKeeperServer.runFromConfig(zooKeeperConfig) } zooKeeperThread.start() val kafkaProperties = new Properties() kafkaProperties.put("zookeeper.connect", s"localhost:$zookeeperPort") kafkaProperties.put("", "0") kafkaProperties.put("offsets.topic.replication.factor", "1") kafkaProperties.put("log.dirs", kafkaLogs) kafkaProperties.put("delete.topic.enable", "true") kafkaProperties.put("", "0") kafkaProperties.put("transaction.state.log.min.isr", "1") kafkaProperties.put("transaction.state.log.replication.factor", "1") kafkaProperties.put("", "6000") kafkaProperties.put("num.partitions", "10") val kafkaConfig = KafkaConfig.fromProps(kafkaProperties) val kafka = new KafkaServerStartable(kafkaConfig) println("About to start...") kafka.startup() scala.sys.addShutdownHook{ println("About to shutdown...") kafka.shutdown() kafka.awaitShutdown() zooKeeperServer.stop() } zooKeeperThread.join() }
Source File: SparkSessionConfiguration.scala From spark-structured-streaming-examples with Apache License 2.0 | 5 votes |
package com.phylosoft.spark.learning import import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession trait SparkSessionConfiguration { val settings: Traversable[(String, String)] private val warehouseLocation = "file:///" + new File("spark-warehouse").getAbsolutePath.toString private lazy val conf = new SparkConf() .set("spark.sql.warehouse.dir", warehouseLocation) .set("spark.sql.session.timeZone", "UTC") .set("spark.sql.shuffle.partitions", "4") // keep the size of shuffles small .set("spark.sql.cbo.enabled", "true") .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") .set("spark.kryoserializer.buffer", "24") .setAll(settings) implicit lazy val spark: SparkSession = SparkSession.builder .config(conf) .enableHiveSupport() .getOrCreate() }
Source File: CongestionModel.scala From spatial with MIT License | 5 votes |
package models import import import import utils.math.{CombinationTree, ReduceTree} import object CongestionModel { abstract class FeatureVec[T] { def loads: T def stores: T def gateds: T def outerIters: T def innerIters: T def bitsPerCycle: T def toSeq: Seq[T] = Seq(stores, outerIters, loads, innerIters, gateds, bitsPerCycle) } case class RawFeatureVec(loads: Double, stores: Double, gateds: Double, outerIters: Double, innerIters: Double, bitsPerCycle: Double) extends FeatureVec[Double] case class CalibFeatureVec(loads: Double, stores: Double, gateds: Double, outerIters: Double, innerIters: Double, bitsPerCycle: Double) extends FeatureVec[Double] // Set up lattice properties val feature_dims = 6 val lattice_rank = 6 val lattice_size = Seq(3,3,3,3,3,3) val num_keypoints = 8 val num_lattices = 1 var model: String = "" // Derive lattice properties val sizes = scala.Array.tabulate(lattice_rank){i => lattice_size(i)} val dimensions = sizes.length val params_per_lattice = sizes.product val strides: scala.Array[Int] = scala.Array.fill(dimensions){1} val nparams = num_lattices * params_per_lattice // Grab lattice params lazy val loads_keypoints_inputs = ModelData.loads_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/loads_keypoints_inputs.csv", ","){x => x.toDouble} lazy val loads_keypoints_outputs = ModelData.loads_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/loads_keypoints_outputs.csv", ","){x => x.toDouble} lazy val stores_keypoints_inputs = ModelData.stores_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/stores_keypoints_inputs.csv", ","){x => x.toDouble} lazy val stores_keypoints_outputs = ModelData.stores_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/stores_keypoints_outputs.csv", ","){x => x.toDouble} lazy val gateds_keypoints_inputs = ModelData.gateds_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/gateds_keypoints_inputs.csv", ","){x => x.toDouble} lazy val gateds_keypoints_outputs = ModelData.gateds_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/gateds_keypoints_outputs.csv", ","){x => x.toDouble} lazy val outerIters_keypoints_inputs = ModelData.outerIters_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/outerIters_keypoints_inputs.csv", ","){x => x.toDouble} lazy val outerIters_keypoints_outputs = ModelData.outerIters_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/outerIters_keypoints_outputs.csv", ","){x => x.toDouble} lazy val innerIters_keypoints_inputs = ModelData.innerIters_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/innerIters_keypoints_inputs.csv", ","){x => x.toDouble} lazy val innerIters_keypoints_outputs = ModelData.innerIters_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/innerIters_keypoints_outputs.csv", ","){x => x.toDouble} lazy val bitsPerCycle_keypoints_inputs = ModelData.bitsPerCycle_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/bitsPerCycle_keypoints_inputs.csv", ","){x => x.toDouble} lazy val bitsPerCycle_keypoints_outputs = ModelData.bitsPerCycle_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/bitsPerCycle_keypoints_outputs.csv", ","){x => x.toDouble} lazy val params = ModelData.params(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/LATTICE_PARAMS.csv", ","){x => x.toDouble} def evaluate(features: RawFeatureVec, typ: Runtime.CtrlSchedule): Int = { model = typ.toString val calibrated_features = calibrate_features(features) val result = hypercube_features(calibrated_features) // TODO: Model is naughty if it returns <170 // println(s"evaluating $features = ${170 max result.toInt}") 170 max result.toInt } }
Source File: LatencyAnalyzer.scala From spatial with MIT License | 5 votes |
package spatial.dse import argon._ import spatial.lang._ import spatial.node._ import spatial.util.spatialConfig import spatial.util.modeling._ import spatial.traversal._ import spatial.targets._ import import models._ import argon.node._ case class LatencyAnalyzer(IR: State, latencyModel: LatencyModel) extends AccelTraversal { var cycleScope: List[Double] = Nil var intervalScope: List[Double] = Nil var totalCycles: Seq[Long] = Seq() val batchSize = 1000 def getListOfFiles(d: String):List[String] = { import java.nio.file.{FileSystems, Files} import scala.collection.JavaConverters._ val dir = FileSystems.getDefault.getPath(d) Files.walk(dir).iterator().asScala.filter(Files.isRegularFile(_)).map(_.toString).toList//.foreach(println) } override def silence(): Unit = { super.silence() } def test(rewriteParams: Seq[Seq[Any]]): Unit = { import scala.language.postfixOps import import sys.process._ val gen_dir = if (config.genDir.startsWith("/")) config.genDir + "/" else config.cwd + s"/${config.genDir}/" val modelJar = getListOfFiles(gen_dir + "/model").filter(_.contains("RuntimeModel-assembly")).head totalCycles = rewriteParams.grouped(batchSize).flatMap{params => val batchedParams ={rp => "tune " + rp.mkString(" ")}.mkString(" ") val cmd = s"""java -jar ${modelJar} ni ${batchedParams}""" // println(s"running cmd: $cmd") val output = Process(cmd, new File(gen_dir)).!! output.split("\n").filter(_.contains("Total Cycles for App")).map{r => "^.*: ".r.replaceAllIn(r,"").trim.toLong }.toSeq }.toSeq // println(s"DSE Model result: $totalCycles") } override protected def preprocess[A](b: Block[A]): Block[A] = { super.preprocess(b) } override protected def postprocess[A](b: Block[A]): Block[A] = { super.postprocess(b) } override protected def visit[A](lhs: Sym[A], rhs: Op[A]): Unit = { } }
package emul import import object OOB { lazy val writeStream = new PrintStream("./logs/writes.log") lazy val readStream = new PrintStream("./logs/reads.log") def open(): Unit = { new File("./logs/").mkdirs() writeStream readStream } def close(): Unit = { writeStream.close() readStream.close() } def readOrElse[T](mem: String, addr: String, invalid: T, en: Boolean)(rd: => T): T = { try { val data = rd if (en) readStream.println(s"Mem: $mem; Addr: $addr") data } catch {case err: java.lang.ArrayIndexOutOfBoundsException => if (en) readStream.println(s"Mem: $mem; Addr: $addr [OOB]") invalid } } def writeOrElse(mem: String, addr: String, data: Any, en: Boolean)(wr: => Unit): Unit = { try { wr if (en) writeStream.println(s"Mem: $mem; Addr: $addr; Data: $data") } catch {case err: java.lang.ArrayIndexOutOfBoundsException => if (en) writeStream.println(s"Mem: $mem; Addr: $addr; Data: $data [OOB]") } } }
Source File: globals.scala From spatial with MIT License | 5 votes |
package fringe import{File, PrintWriter} import fringe.targets.DeviceTarget import fringe.templates.axi4.{AXI4BundleParameters, AXI4StreamParameters} private var _tclScript: PrintWriter = { val pw = new PrintWriter(new File("bigIP.tcl")) pw.flush() pw } def tclScript: PrintWriter = _tclScript def tclScript_=(value: PrintWriter): Unit = _tclScript = value var regression_testing: String = scala.util.Properties.envOrElse("RUNNING_REGRESSION", "0") // Top parameters // These are set by the generated Instantiator class var numArgIns: Int = 1 // Number of ArgIn registers var numArgOuts: Int = 1 // Number of ArgOut registers var numArgIOs: Int = 0 // Number of HostIO registers var numArgInstrs: Int = 0 // TODO: What is this? var argOutLoopbacksMap: Map[Int,Int] = Map.empty // TODO: What is this? var loadStreamInfo: List[StreamParInfo] = Nil var storeStreamInfo: List[StreamParInfo] = Nil var gatherStreamInfo: List[StreamParInfo] = Nil var scatterStreamInfo: List[StreamParInfo] = Nil var axiStreamInsInfo: List[AXI4StreamParameters] = List(AXI4StreamParameters(64,8,64)) var axiStreamOutsInfo: List[AXI4StreamParameters] = List(AXI4StreamParameters(64,8,64)) var numAllocators: Int = 0 def LOAD_STREAMS: List[StreamParInfo] = if (loadStreamInfo.isEmpty) List(StreamParInfo(DATA_WIDTH, WORDS_PER_STREAM, 0)) else loadStreamInfo def STORE_STREAMS: List[StreamParInfo] = if (storeStreamInfo.isEmpty) List(StreamParInfo(DATA_WIDTH, WORDS_PER_STREAM, 0)) else storeStreamInfo def GATHER_STREAMS: List[StreamParInfo] = if (gatherStreamInfo.isEmpty) List(StreamParInfo(DATA_WIDTH, WORDS_PER_STREAM, 0)) else gatherStreamInfo def SCATTER_STREAMS: List[StreamParInfo] = if (scatterStreamInfo.isEmpty) List(StreamParInfo(DATA_WIDTH, WORDS_PER_STREAM, 0)) else scatterStreamInfo def AXI_STREAMS_IN: List[AXI4StreamParameters] = if (axiStreamInsInfo.isEmpty) List(AXI4StreamParameters(256,8,32)) else axiStreamInsInfo def AXI_STREAMS_OUT: List[AXI4StreamParameters] = if (axiStreamOutsInfo.isEmpty) List(AXI4StreamParameters(256,8,32)) else axiStreamOutsInfo def NUM_LOAD_STREAMS: Int = LOAD_STREAMS.size def NUM_STORE_STREAMS: Int = STORE_STREAMS.size def NUM_ARG_INS: Int = numArgIns def NUM_ARG_OUTS: Int = numArgOuts def NUM_ARG_IOS: Int = numArgIOs def NUM_ARG_LOOPS: Int = argOutLoopbacksMap.size max 1 def NUM_ARGS: Int = numArgIns + numArgOuts def NUM_STREAMS: Int = LOAD_STREAMS.size + STORE_STREAMS.size }
Source File: TemplateRunner.scala From spatial with MIT License | 5 votes |
package fringe.test import import scala.collection.mutable.ArrayBuffer import scala.util.Properties.envOrElse object TemplateRunner { def deleteRecursively(file: File): Unit = { if (file.isDirectory) file.listFiles.foreach(deleteRecursively) if (file.exists && !file.delete) throw new Exception(s"Unable to delete ${file.getAbsolutePath}") } def apply(templateMap: Map[String, String => Boolean], args: Array[String]): Unit = { // Choose the default backend based on what is available. lazy val firrtlTerpBackendAvailable: Boolean = { try { val cls = Class.forName("chisel3.iotesters.FirrtlTerpBackend") cls != null } catch { case e: Throwable => false } } lazy val defaultBackend = if (firrtlTerpBackendAvailable) "firrtl" else "" val backendName = envOrElse("TESTER_BACKENDS", defaultBackend).split(" ").head val tempDir = s"""${envOrElse("NEW_TEMPLATES_HOME", "tmp")}/test_run_dir/""" val specificRegex = "(.*[0-9]+)".r val problemsToRun = if (args.isEmpty) { templateMap.keys.toSeq.sorted.toArray // Run all by default } else { { arg => arg match { case "all" => templateMap.keys.toSeq.sorted // Run all case specificRegex(c) => List(c).toSeq // Run specific test case _ => // Figure out tests that match this template and run all val tempRegex = s"(${arg}[0-9]+)".r templateMap.keys.toSeq.sorted.filter(tempRegex.pattern.matcher(_).matches) }}.flatten.toArray } var successful = 0 var passedTests:List[String] = List() val errors = new ArrayBuffer[String] for(testName <- problemsToRun) { // Wipe tempdir for consecutive tests of same module deleteRecursively(new File(tempDir)) templateMap.get(testName) match { case Some(test) => println(s"Starting template $testName") try { if(test(backendName)) { successful += 1 passedTests = passedTests :+ s"$testName" } else { errors += s"Template $testName: test error occurred" } } catch { case exception: Exception => exception.printStackTrace() errors += s"Template $testName: exception ${exception.getMessage}" case t : Throwable => errors += s"Template $testName: throwable ${t.getMessage}" } case _ => errors += s"Bad template name: $testName" } } if(successful > 0) { println(s"""Templates passing: $successful (${passedTests.mkString(", ")})""") } if(errors.nonEmpty) { println("=" * 80) println(s"Errors: ${errors.length}: in the following templates") println(errors.mkString("\n")) println("=" * 80) System.exit(1) } } }
Source File: AvroSource.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.avro import import java.util.concurrent.atomic.AtomicBoolean import com.sksamuel.exts.Logging import import io.eels._ import io.eels.datastream.{DataStream, Publisher, Subscriber, Subscription} import io.eels.schema.StructType import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} case class AvroSource(path: Path) (implicit conf: Configuration, fs: FileSystem) extends Source with Using { override lazy val schema: StructType = { using(AvroReaderFns.createAvroReader(path)) { reader => val record = AvroSchemaFns.fromAvroSchema(record.getSchema) } } override def parts(): Seq[Publisher[Seq[Row]]] = Seq(AvroSourcePublisher(path)) } case class AvroSourcePublisher(path: Path) (implicit conf: Configuration, fs: FileSystem) extends Publisher[Seq[Row]] with Logging with Using { override def subscribe(subscriber: Subscriber[Seq[Row]]): Unit = { val deserializer = new AvroDeserializer() try { using(AvroReaderFns.createAvroReader(path)) { reader => val running = new AtomicBoolean(true) subscriber.subscribed(Subscription.fromRunning(running)) AvroRecordIterator(reader) .takeWhile(_ => running.get) .map(deserializer.toRow) .grouped(DataStream.DefaultBatchSize) .foreach( subscriber.completed() } } catch { case t: Throwable => subscriber.error(t) } } } object AvroSource { def apply(file: File)(implicit conf: Configuration, fs: FileSystem): AvroSource = AvroSource(new Path(file.getAbsoluteFile.toString)) def apply(path: java.nio.file.Path)(implicit conf: Configuration, fs: FileSystem): AvroSource = apply(path.toFile) }
Source File: AvroSink.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.avro import import io.eels.schema.StructType import io.eels.{Row, Sink, SinkWriter} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.permission.FsPermission import org.apache.hadoop.fs.{FileSystem, Path} case class AvroSink(path: Path, overwrite: Boolean = false, permission: Option[FsPermission] = None, inheritPermissions: Option[Boolean] = None) (implicit conf: Configuration, fs: FileSystem) extends Sink { def withOverwrite(overwrite: Boolean): AvroSink = copy(overwrite = overwrite) def withPermission(permission: FsPermission): AvroSink = copy(permission = Option(permission)) def withInheritPermission(inheritPermissions: Boolean): AvroSink = copy(inheritPermissions = Option(inheritPermissions)) override def open(schema: StructType): SinkWriter = new SinkWriter { private val writer = new AvroWriter(schema, fs.create(path, overwrite)) override def write(row: Row): Unit = writer.write(row) override def close(): Unit = { writer.close() permission match { case Some(perm) => fs.setPermission(path, perm) case None => if (inheritPermissions.getOrElse(false)) { val permission = fs.getFileStatus(path.getParent).getPermission fs.setPermission(path, permission) } } } } } object AvroSink { def apply(file: File)(implicit conf: Configuration, fs: FileSystem): AvroSink = AvroSink(new Path(file.getAbsoluteFile.toString)) def apply(path: java.nio.file.Path)(implicit conf: Configuration, fs: FileSystem): AvroSink = apply(path.toFile) }
Source File: ParquetProjectionTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.parquet import{File, FilenameFilter} import io.eels.datastream.DataStream import io.eels.schema.{Field, StringType, StructType} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.scalatest.{FlatSpec, Matchers} class ParquetProjectionTest extends FlatSpec with Matchers { cleanUpResidualParquetTestFiles private val schema = StructType( Field("name", StringType, nullable = false), Field("job", StringType, nullable = false), Field("location", StringType, nullable = false) ) private val ds = DataStream.fromValues( schema, Seq( Vector("clint eastwood", "actor", "carmel"), Vector("elton john", "musician", "pinner") ) ) private implicit val conf = new Configuration() private implicit val fs = FileSystem.get(new Configuration()) private val file = new File(s"test_${System.currentTimeMillis()}.pq") file.deleteOnExit() private val path = new Path(file.toURI) if (fs.exists(path)) fs.delete(path, false) "ParquetSource" should "support projections" in { val rows = ParquetSource(path).withProjection("name").toDataStream().collect shouldBe Vector(Vector("clint eastwood"), Vector("elton john")) } it should "return all data when no projection is set" in { val rows = ParquetSource(path).toDataStream().collect shouldBe Vector(Vector("clint eastwood", "actor", "carmel"), Vector("elton john", "musician", "pinner")) } private def cleanUpResidualParquetTestFiles = { new File(".").listFiles(new FilenameFilter { override def accept(dir: File, name: String): Boolean = { (name.startsWith("test_") && name.endsWith(".pq")) || (name.startsWith(".test_") && name.endsWith(".pq.crc")) } }).foreach(_.delete()) } }
Source File: ParquetSpeedTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.parquet import import com.sksamuel.exts.metrics.Timed import io.eels.Row import io.eels.component.parquet.avro.{AvroParquetSink, AvroParquetSource} import io.eels.component.parquet.util.ParquetLogMute import io.eels.datastream.DataStream import io.eels.schema.StructType import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import scala.util.Random object ParquetSpeedTest extends App with Timed { ParquetLogMute() val size = 2000000 val schema = StructType("a", "b", "c", "d", "e") val createRow = Row(schema, Random.nextBoolean(), Random.nextFloat(), Random.nextGaussian(), Random.nextLong(), Random.nextString(4)) val ds = DataStream.fromIterator(schema, Iterator.continually(createRow).take(size)) implicit val conf = new Configuration() implicit val fs = FileSystem.getLocal(new Configuration()) val path = new Path("parquet_speed.pq") fs.delete(path, false) new File(path.toString).deleteOnExit() timed("Insertion") { } while (true) { timed("Reading with ParquetSource") { val actual = ParquetSource(path).toDataStream().size assert(actual == size) } println("") println("---------") println("") Thread.sleep(2000) timed("Reading with AvroParquetSource") { val actual = AvroParquetSource(path).toDataStream().size assert(actual == size) } } }
Source File: ParquetMultipleFileSpeedTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.parquet import import com.sksamuel.exts.metrics.Timed import io.eels.component.parquet.util.ParquetLogMute import io.eels.datastream.DataStream import io.eels.schema.StructType import io.eels.{FilePattern, Row} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import scala.util.Random object ParquetMultipleFileSpeedTest extends App with Timed { ParquetLogMute() val size = 5000000 val count = 20 val schema = StructType("a", "b", "c", "d", "e") def createRow = Row(schema, Random.nextBoolean(), Random.nextFloat(), Random.nextGaussian(), Random.nextLong(), Random.nextString(4)) implicit val conf = new Configuration() implicit val fs = FileSystem.getLocal(new Configuration()) val dir = new Path("parquet-speed-test") new File(dir.toString).mkdirs() new File(dir.toString).listFiles().foreach(_.delete) timed("Insertion") { val ds = DataStream.fromRowIterator(schema, Iterator.continually(createRow).take(size)) Path("parquet-speed-test/parquet_speed.pq")), count) } for (_ <- 1 to 25) { assert(count == FilePattern("parquet-speed-test/*").toPaths().size) timed("Reading with ParquetSource") { val actual = ParquetSource("parquet-speed-test/*").toDataStream().map { row => row }.filter(_ => true).size assert(actual == size, s"Expected $size but was $actual") } println("") println("---------") println("") } }
Source File: ParquetVsOrcSpeedTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hive import import java.math.MathContext import com.sksamuel.exts.metrics.Timed import io.eels.Row import io.eels.component.orc.{OrcSink, OrcSource} import io.eels.component.parquet.{ParquetSink, ParquetSource} import io.eels.datastream.DataStream import io.eels.schema._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import scala.math.BigDecimal.RoundingMode import scala.util.Random object ParquetVsOrcSpeedTest extends App with Timed { implicit val conf = new Configuration() implicit val fs = FileSystem.getLocal(new Configuration()) val size = 5000000 val structType = StructType( Field("name", StringType), Field("age", IntType.Signed), Field("height", DoubleType), Field("amazing", BooleanType), Field("fans", LongType.Signed), Field("rating", DecimalType(4, 2)) ) def iter: Iterator[Vector[Any]] = Iterator.continually(Vector( Random.nextString(10), Random.nextInt(), Random.nextDouble(), Random.nextBoolean(), Random.nextLong(), BigDecimal(Random.nextDouble(), new MathContext(4)).setScale(2, RoundingMode.UP) )) def ds: DataStream = DataStream.fromIterator(structType, iter.take(size).map(Row(structType, _))) val ppath = new Path("parquet_speed.pq") fs.delete(ppath, false) val opath = new Path("orc_speed.orc") fs.delete(opath, false) new File(ppath.toString).deleteOnExit() new File(opath.toString).deleteOnExit() timed("Orc Insertion") { } timed("Parquet Insertion") { } while (true) { timed("Reading with OrcSource") { val actual = OrcSource(opath).toDataStream().size assert(actual == size, s"$actual != $size") } timed("Reading with ParquetSource") { val actual = ParquetSource(ppath).toDataStream().size assert(actual == size, s"$actual != $size") } } }
Source File: HiveDynamicPartitionTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hive import import io.eels.component.hive.partition.DynamicPartitionStrategy import io.eels.datastream.DataStream import io.eels.schema.{Field, Partition, StructType} import org.scalatest.{BeforeAndAfterAll, FunSuite, Matchers} import scala.util.Try class HiveDynamicPartitionTest extends FunSuite with Matchers with BeforeAndAfterAll { import HiveConfig._ val dbname = HiveTestUtils.createTestDatabase val table = "dynp_test_" + System.currentTimeMillis() val schema = StructType(Field("a"), Field("b")) Try { HiveTable(dbname, table).create(schema, Seq("a")) } override def afterAll(): Unit = Try { HiveTable(dbname, table).drop() } test("dynamic partition strategy should create new partitions") { assume(new File(s"$basePath/core-site.xml").exists) HiveTable(dbname, table).partitionValues("a") shouldBe Set.empty DataStream.fromValues(schema, Seq(Seq("1", "2"), Seq("3", "4"))).to(HiveSink(dbname, table)) HiveTable(dbname, table).partitionValues("a") shouldBe Set("1", "3") } test("skip partition if partition already exists") { assume(new File(s"$basePath/core-site.xml").exists) new DynamicPartitionStrategy().ensurePartition(Partition("a" -> "1"), dbname, table, false, client) new DynamicPartitionStrategy().ensurePartition(Partition("a" -> "1"), dbname, table, false, client) } }
Source File: HiveTableTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hive import import io.eels.Row import io.eels.datastream.DataStream import io.eels.schema.{Field, StringType, StructType} import org.scalatest.{FunSuite, Matchers} import scala.util.{Random, Try} class HiveTableTest extends FunSuite with Matchers { import HiveConfig._ val dbname = HiveTestUtils.createTestDatabase val table = "test_table_" + System.currentTimeMillis() Try { HiveTable(dbname, table).drop() } test("partition values should return values for the matching key") { assume(new File(s"$basePath/core-site.xml").exists) val schema = StructType( Field("a", StringType), Field("b", StringType), Field("c", StringType) ) def createRow = Row(schema, Seq( Random.shuffle(List("a", "b", "c")).head, Random.shuffle(List("x", "y", "z")).head, Random.shuffle(List("q", "r", "s")).head ) ) val sink = HiveSink(dbname, table).withCreateTable(true, Seq("a", "b")) val size = 1000 DataStream.fromIterator(schema, Iterator.continually(createRow).take(size)).to(sink, 4) HiveTable(dbname, table).partitionValues("b") shouldBe Set("x", "y", "z") } }
Source File: HivePartitionConstraintTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hive import import io.eels.datastream.DataStream import io.eels.schema.{Field, PartitionConstraint, StringType, StructType} import org.scalatest.{BeforeAndAfterAll, FunSuite, Matchers} import scala.util.Try class HivePartitionConstraintTest extends FunSuite with Matchers with BeforeAndAfterAll { import HiveConfig._ val dbname = HiveTestUtils.createTestDatabase private val table = "constraints_test_" + System.currentTimeMillis() override def afterAll(): Unit = Try { HiveTable(dbname, table).drop() } val schema = StructType( Field("state", StringType), Field("city", StringType) ) Try { DataStream.fromValues(schema, Seq( Seq("iowa", "des moines"), Seq("iowa", "iow city"), Seq("maine", "augusta") )).to(HiveSink(dbname, table).withCreateTable(true, Seq("state"))) } test("hive source with partition constraint should return matching data") { assume(new File(s"$basePath/core-site.xml").exists) HiveSource(dbname, table) .addPartitionConstraint(PartitionConstraint.equals("state", "iowa")) .toDataStream() .collect.size shouldBe 2 } test("hive source with non-existing partitions in constraint should return no data") { assume(new File(s"$basePath/core-site.xml").exists) HiveSource(dbname, table) .addPartitionConstraint(PartitionConstraint.equals("state", "pa")) .toDataStream() .collect.size shouldBe 0 } }
Source File: HiveStatsTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hive import import io.eels.Row import io.eels.datastream.DataStream import io.eels.schema._ import org.scalatest.{BeforeAndAfterAll, FunSuite, Matchers} import scala.util.{Random, Try} class HiveStatsTest extends FunSuite with Matchers with BeforeAndAfterAll { import HiveConfig._ private val dbname = HiveTestUtils.createTestDatabase private val table = "stats_test_" + System.currentTimeMillis() private val partitioned_table = "stats_test2_" + System.currentTimeMillis() val schema = StructType( Field("a", StringType), Field("b", IntType.Signed) ) def createRow = Row(schema, Seq(Random.shuffle(List("a", "b", "c")).head, Random.shuffle(List(1, 2, 3, 4, 5)).head)) val amount = 10000 override def afterAll(): Unit = Try { HiveTable(dbname, table).drop() HiveTable(dbname, partitioned_table).drop() } Try { DataStream.fromIterator(schema, Iterator.continually(createRow).take(amount)) .to(HiveSink(dbname, table).withCreateTable(true), 4) DataStream.fromIterator(schema, Iterator.continually(createRow).take(amount)) .to(HiveSink(dbname, partitioned_table).withCreateTable(true, Seq("a")), 4) } test("stats should return row counts for a non-partitioned table") { assume(new File(s"$basePath/core-site.xml").exists) HiveTable(dbname, table).stats().count shouldBe amount } test("stats should return row counts for a partitioned table") { assume(new File(s"$basePath/core-site.xml").exists) HiveTable(dbname, partitioned_table).stats().count shouldBe amount } test("stats should throw exception when constraints specified on a non-partitioned table") { assume(new File(s"$basePath/core-site.xml").exists) intercept[RuntimeException] { val constraints = Seq(PartitionConstraint.equals("a", "b")) HiveTable(dbname, table).stats().count(constraints) } } test("stats should support row count constraints for a partitioned table") { assume(new File(s"$basePath/core-site.xml").exists) val constraints = Seq(PartitionConstraint.equals("a", "b")) HiveTable(dbname, partitioned_table).stats().count(constraints) > 0 shouldBe true HiveTable(dbname, partitioned_table).stats().count(constraints) should be < amount.toLong } test("stats should support min and max for a non-partitioned tabled") { assume(new File(s"$basePath/core-site.xml").exists) HiveTable(dbname, table).stats.max("b") shouldBe 5 HiveTable(dbname, table).stats.min("b") shouldBe 1 } test("stats should support min and max for a partitioned table") { assume(new File(s"$basePath/core-site.xml").exists) HiveTable(dbname, partitioned_table).stats.max("b") shouldBe 5 HiveTable(dbname, partitioned_table).stats.min("b") shouldBe 1 } }
Source File: CompactorTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hive import import io.eels.datastream.DataStream import io.eels.schema.{Field, StructType} class CompactorTest extends HiveTests { import HiveConfig._ HiveTable("default", "wibble").drop(true) "Compactor" should { "delete the originals" ignore { val schema = StructType(Field("a"), Field("b")) val ds = DataStream.fromValues(schema, Seq( Array("1", "2"), Array("3", "4"), Array("5", "6"), Array("7", "8") ))"default", "wibble").withCreateTable(true)) assume(new File(s"$basePath/core-site.xml").exists) HiveTable("default", "wibble").paths(false, false).size should be > 1 Compactor("default", "wibble").compactTo(1) HiveTable("default", "wibble").paths(false, false).size should be 1 } "merge the contents" ignore { assume(new File(s"$basePath/core-site.xml").exists) HiveSource("default", "wibble").toDataStream().collectValues shouldBe Seq( Array("1", "2"), Array("3", "4"), Array("5", "6"), Array("7", "8") ) } } }
Example 34
Source File: OrcMultipleFileSpeedTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.orc import import com.sksamuel.exts.metrics.Timed import io.eels.datastream.DataStream import io.eels.schema.StructType import io.eels.{FilePattern, Row} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import scala.util.Random object OrcMultipleFileSpeedTest extends App with Timed { val size = 5000000 val count = 20 val schema = StructType("a", "b", "c", "d", "e") def createRow = Row(schema, Random.nextBoolean(), Random.nextFloat(), Random.nextGaussian(), Random.nextLong(), Random.nextString(4)) implicit val conf = new Configuration() implicit val fs = FileSystem.getLocal(new Configuration()) val dir = new Path("orc-speed-test") new File(dir.toString).mkdirs() timed("Insertion") { val ds = DataStream.fromIterator(schema, Iterator.continually(createRow).take(size)) new File(dir.toString).listFiles().foreach(_.delete) Path("orc-speed-test/orc_speed.pq")).withOverwrite(true), count) } for (_ <- 1 to 25) { assert(count == FilePattern("orc-speed-test/*").toPaths().size) timed("Reading with OrcSource") { val actual = OrcSource("orc-speed-test/*").toDataStream().map { row => row }.filter(_ => true).size assert(actual == size, s"Expected $size but was $actual") } println("") println("---------") println("") } }
Example 35
Source File: OrcPredicateTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.orc import{File, FilenameFilter} import io.eels.Predicate import io.eels.datastream.DataStream import io.eels.schema.{Field, LongType, StringType, StructType} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} class OrcPredicateTest extends FlatSpec with Matchers with BeforeAndAfterAll { cleanUpResidualOrcTestFiles val schema = StructType( Field("name", StringType, nullable = true), Field("city", StringType, nullable = true), Field("age", LongType.Signed, nullable = true) ) val values = Vector.fill(1000) { Vector("sam", "middlesbrough", 37) } ++ Vector.fill(1000) { Vector("laura", "iowa city", 24) } val ds = DataStream.fromValues(schema, values) implicit val conf = new Configuration() implicit val fs = FileSystem.get(new Configuration()) val path = new Path("test.orc") if (fs.exists(path)) fs.delete(path, false) new File(path.toString).deleteOnExit() override protected def afterAll(): Unit = fs.delete(path, false) "OrcSource" should "support string equals predicates" in { conf.set("eel.orc.predicate.row.filter", "false") val rows = OrcSource(path).withPredicate(Predicate.equals("name", "sam")).toDataStream().collect shouldBe Set(Vector("sam", "middlesbrough", 37L)) } it should "support gt predicates" in { conf.set("eel.orc.predicate.row.filter", "false") val rows = OrcSource(path).withPredicate("age", 30L)).toDataStream().collect shouldBe Set(Vector("sam", "middlesbrough", 37L)) } it should "support lt predicates" in { conf.set("eel.orc.predicate.row.filter", "false") val rows = OrcSource(path).withPredicate("age", 30)).toDataStream().collect shouldBe Set(Vector("laura", "iowa city", 24L)) } it should "enable row level filtering with predicates by default" in { conf.set("eel.orc.predicate.row.filter", "true") val rows = OrcSource(path).withPredicate(Predicate.equals("name", "sam")).toDataStream().collect rows.head.schema shouldBe schema rows.head.values shouldBe Vector("sam", "middlesbrough", 37L) } private def cleanUpResidualOrcTestFiles = { new File(".").listFiles(new FilenameFilter { override def accept(dir: File, name: String): Boolean = { (name.startsWith("test_") && name.endsWith(".orc")) || (name.startsWith(".test_") && name.endsWith(".orc.crc")) } }).foreach(_.delete()) } }
Example 36
Source File: KafkaTestUtils.scala From spark-kafka-writer with Apache License 2.0 | 5 votes |
package com.github.benfradet.spark.kafka.writer import import import java.util.Arrays.asList import java.util.Properties import kafka.server.{KafkaConfig, KafkaServerStartable} import org.apache.kafka.clients.admin.{AdminClient, NewTopic} import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer} import scala.util.Random class KafkaTestUtils { // zk private val zkHost = "localhost" private val zkPort = 2181 private var zk: EmbeddedZookeeper = _ private var zkReady = false // kafka private val brokerHost = "localhost" private val brokerPort = 9092 private var kafkaServer: KafkaServerStartable = _ private var topicCountMap = Map.empty[String, Int] private var brokerReady = false private var kafkaAdminClient: AdminClient = _ @scala.annotation.varargs def createTopics(topics: String*): Unit = for (topic <- topics) { kafkaAdminClient.createTopics(asList(new NewTopic(topic, 1, 1: Short))) Thread.sleep(1000) topicCountMap = topicCountMap + (topic -> 1) } private def brokerProps: Properties = { val props = new Properties props.put("", "0") props.put("", brokerHost) props.put("log.dir", { val dir = System.getProperty("") + "/logDir-" + new Random().nextInt(Int.MaxValue) val f = new File(dir) f.mkdirs() dir } ) props.put("port", brokerPort.toString) props.put("zookeeper.connect", zkAddress) props.put("", "10000") props.put("offsets.topic.replication.factor", "1") props } private class EmbeddedZookeeper(hostname: String, port: Int) { private val snapshotDir = { val f = new File(System.getProperty(""), "snapshotDir-" + Random.nextInt(Int.MaxValue)) f.mkdirs() f } private val logDir = { val f = new File(System.getProperty(""), "logDir-" + Random.nextInt(Int.MaxValue)) f.mkdirs() f } private val factory = { val zkTickTime = 500 val zk = new ZooKeeperServer(snapshotDir, logDir, zkTickTime) val f = new NIOServerCnxnFactory val maxCnxn = 16 f.configure(new InetSocketAddress(hostname, port), maxCnxn) f.startup(zk) f } def shutdown(): Unit = { factory.shutdown() snapshotDir.delete() logDir.delete() () } } }
Example 37
Source File: TotalTweetsScheduler.scala From redrock with Apache License 2.0 | 5 votes |
package com.restapi import{File, FileInputStream} import{ActorRef, Actor, ActorSystem, Props} import import org.slf4j.LoggerFactory import play.api.libs.json.Json import spray.can.Http import akka.pattern.ask import spray.http.DateTime import scala.concurrent.duration._ import akka.util.Timeout import import org.apache.commons.codec.digest.DigestUtils import case object GetTotalTweetsScheduler object CurrentTotalTweets { @volatile var totalTweets: Long = 0 } class ExecuterTotalTweetsES(delay: FiniteDuration, interval: FiniteDuration) extends Actor { context.system.scheduler.schedule(delay, interval) { getTotalTweetsES } val logger = LoggerFactory.getLogger(this.getClass) override def receive: Actor.Receive = { case GetTotalTweetsScheduler => {"Getting Total of Tweets. Begin: ${CurrentTotalTweets.totalTweets}") } case _ => // just ignore any messages } def getTotalTweetsES: Unit = { val elasticsearchRequests = new GetElasticsearchResponse(0, Array[String](), Array[String](), LoadConf.restConf.getString("searchParam.defaulStartDatetime"), LoadConf.restConf.getString("searchParam.defaultEndDatetime"), LoadConf.esConf.getString("decahoseIndexName")) val totalTweetsResponse = Json.parse(elasticsearchRequests.getTotalTweetsESResponse())"Getting Total of Tweets. Current: ${CurrentTotalTweets.totalTweets}") CurrentTotalTweets.totalTweets = (totalTweetsResponse \ "hits" \ "total").as[Long]"Total users updated. New: ${CurrentTotalTweets.totalTweets}") } }
Example 38
Source File: package.scala From sbt-reactive-app with Apache License 2.0 | 5 votes |
package com.lightbend.rp.sbtreactiveapp import import java.nio.file.Paths import import sbt.Logger import scala.collection.immutable.Seq import scala.sys.process.{ Process, ProcessLogger } package object cmd { private[cmd] def run( cwd: File = Paths.get(".").toRealPath().toFile, env: Map[String, String] = Map.empty, input: Option[String] = None, logStdErr: Option[Logger] = None, logStdOut: Option[Logger] = None)(args: String*): (Int, Seq[String], Seq[String]) = { var outList = List.empty[String] var errList = List.empty[String] val stringLogger = ProcessLogger( { s => outList = s :: outList logStdOut.foreach( }, { s => errList = s :: errList logStdErr.foreach(_.error(s)) }) val exitCode = input .map(new StringInputStream(_)) .foldLeft(Process(args, cwd = cwd, env.toVector: _*))(_ #< _) .run(stringLogger) .exitValue() (exitCode, outList.reverse, errList.reverse) } private[cmd] def runSuccess(failMsg: String)(result: (Int, Seq[String], Seq[String])): Unit = { if (result._1 != 0) { sys.error(s"$failMsg [${result._1}]") } } }
Example 39
Source File: package.scala From Principles-of-Reactive-Programming with GNU General Public License v3.0 | 5 votes |
import package object common { def resourceAsStreamFromSrc(resourcePath: List[String]): Option[] = { val classesDir = new File(getClass.getResource(".").toURI) val projectDir = classesDir.getParentFile.getParentFile.getParentFile.getParentFile val resourceFile = subFile(projectDir, ("src" :: "main" :: "resources" :: resourcePath): _*) if (resourceFile.exists) Some(new else None } }
Example 40
Source File: package.scala From Principles-of-Reactive-Programming with GNU General Public License v3.0 | 5 votes |
import package object common { def resourceAsStreamFromSrc(resourcePath: List[String]): Option[] = { val classesDir = new File(getClass.getResource(".").toURI) val projectDir = classesDir.getParentFile.getParentFile.getParentFile.getParentFile val resourceFile = subFile(projectDir, ("src" :: "main" :: "resources" :: resourcePath): _*) if (resourceFile.exists) Some(new else None } }
Example 41
Source File: package.scala From Principles-of-Reactive-Programming with GNU General Public License v3.0 | 5 votes |
import package object common { def resourceAsStreamFromSrc(resourcePath: List[String]): Option[] = { val classesDir = new File(getClass.getResource(".").toURI) val projectDir = classesDir.getParentFile.getParentFile.getParentFile.getParentFile val resourceFile = subFile(projectDir, ("src" :: "main" :: "resources" :: resourcePath): _*) if (resourceFile.exists) Some(new else None } }
Example 42
Source File: package.scala From Principles-of-Reactive-Programming with GNU General Public License v3.0 | 5 votes |
import package object common { def resourceAsStreamFromSrc(resourcePath: List[String]): Option[] = { val classesDir = new File(getClass.getResource(".").toURI) val projectDir = classesDir.getParentFile.getParentFile.getParentFile.getParentFile val resourceFile = subFile(projectDir, ("src" :: "main" :: "resources" :: resourcePath): _*) if (resourceFile.exists) Some(new else None } }
Example 43
Source File: package.scala From Principles-of-Reactive-Programming with GNU General Public License v3.0 | 5 votes |
import package object common { def resourceAsStreamFromSrc(resourcePath: List[String]): Option[] = { val classesDir = new File(getClass.getResource(".").toURI) val projectDir = classesDir.getParentFile.getParentFile.getParentFile.getParentFile val resourceFile = subFile(projectDir, ("src" :: "main" :: "resources" :: resourcePath): _*) if (resourceFile.exists) Some(new else None } }
Example 44
Source File: package.scala From Principles-of-Reactive-Programming with GNU General Public License v3.0 | 5 votes |
import package object common { def resourceAsStreamFromSrc(resourcePath: List[String]): Option[] = { val classesDir = new File(getClass.getResource(".").toURI) val projectDir = classesDir.getParentFile.getParentFile.getParentFile.getParentFile val resourceFile = subFile(projectDir, ("src" :: "main" :: "resources" :: resourcePath): _*) if (resourceFile.exists) Some(new else None } }
Example 45
Source File: SidechainSettingsReader.scala From Sidechains-SDK with MIT License | 5 votes |
package com.horizen import import import java.util.{Optional => JOptional} import com.typesafe.config.{Config, ConfigFactory} import net.ceedubs.ficus.Ficus._ import net.ceedubs.ficus.readers.ArbitraryTypeReader._ import scorex.core.settings.{ScorexSettings, SettingsReaders} import scorex.util.ScorexLogging import scala.compat.java8.OptionConverters.toScala object SidechainSettingsReader extends ScorexLogging with SettingsReaders { protected val sidechainSettingsName = "sidechain-sdk-settings.conf" def fromConfig(config: Config): SidechainSettings = { val webSocketConnectorConfiguration =[WebSocketSettings]("scorex.websocket") val scorexSettings =[ScorexSettings]("scorex") val genesisSetting =[GenesisDataSettings]("scorex.genesis") val backwardTransfer =[withdrawalEpochCertificateSettings]("scorex.withdrawalEpochCertificate") val walletSetting =[WalletSettings]("scorex.wallet") SidechainSettings(scorexSettings, genesisSetting, webSocketConnectorConfiguration, backwardTransfer, walletSetting) } def readConfigFromPath(userConfigPath: String, applicationConfigPath: Option[String]): Config = { val userConfigFile: File = new File(userConfigPath) val userConfig: Option[Config] = if (userConfigFile.exists()) { Some(ConfigFactory.parseFile(userConfigFile)) } else None val applicationConfigURL: Option[URL] = => new File(filename)) .filter(_.exists()).map(_.toURI.toURL) .orElse( => getClass.getClassLoader.getResource(r))) val applicationConfig: Option[Config] = if (applicationConfigURL.isDefined) { Some(ConfigFactory.parseURL(applicationConfigURL.get)) } else None var config: Config = ConfigFactory.defaultOverrides() if (userConfig.isDefined) config = config.withFallback(userConfig.get) if (applicationConfig.isDefined) config = config.withFallback(applicationConfig.get) config = config .withFallback(ConfigFactory.parseResources(sidechainSettingsName)) .withFallback(ConfigFactory.defaultReference()) .resolve() config } def readConfigFromPath(userConfigPath: String, applicationConfigPath: JOptional[String]) : Config = readConfigFromPath(userConfigPath, toScala(applicationConfigPath)) def read(userConfigPath: String, applicationConfigPath: Option[String]) : SidechainSettings = fromConfig(readConfigFromPath(userConfigPath, applicationConfigPath)) }
Example 46
Source File: VersionedLevelDbStorageAdapter.scala From Sidechains-SDK with MIT License | 5 votes |
package import import java.util import java.util.{Optional, List => JList} import import import com.horizen.utils.{Pair => JPair, _} import org.iq80.leveldb.Options import scala.collection.JavaConverters._ import scala.compat.java8.OptionConverters._ class VersionedLevelDbStorageAdapter(pathToDB: String, keepVersions: Int) extends Storage{ private val dataBase: VersionedLDBKVStore = createDb(pathToDB) override def get(key: ByteArrayWrapper): Optional[ByteArrayWrapper] = dataBase.get(key).map(byteArrayToWrapper).asJava override def getOrElse(key: ByteArrayWrapper, defaultValue: ByteArrayWrapper): ByteArrayWrapper = dataBase.getOrElse(key, defaultValue) override def get(keys: JList[ByteArrayWrapper]): JList[JPair[ByteArrayWrapper, Optional[ByteArrayWrapper]]] = { dataBase.get( .map{case (key, value) => new JPair(byteArrayToWrapper(key), => byteArrayToWrapper(v)).asJava)} .asJava } override def getAll: JList[JPair[ByteArrayWrapper, ByteArrayWrapper]] = { dataBase.getAll .map{case (key, value) => new JPair(byteArrayToWrapper(key), byteArrayToWrapper(value))} .asJava } override def lastVersionID(): Optional[ByteArrayWrapper] = override def update(version: ByteArrayWrapper, toUpdate: JList[JPair[ByteArrayWrapper, ByteArrayWrapper]], toRemove: util.List[ByteArrayWrapper]): Unit = { val toUpdateAsScala = toUpdate.asScala.toList val toRemoveAsScala = toRemove.asScala.toList //key for storing version shall not be used as key in any key-value pair in VersionedLDBKVStore require(!toUpdateAsScala.exists(pair => pair.getKey == version) && !toRemoveAsScala.contains(version)) val convertedToUpdate = => (, val convertedToRemove = dataBase.update(convertedToUpdate, convertedToRemove)(version) } override def rollback(versionID: ByteArrayWrapper): Unit = dataBase.rollbackTo(versionID) override def rollbackVersions(): JList[ByteArrayWrapper] = override def close(): Unit = dataBase.close() def createDb(path: String): VersionedLDBKVStore = { val dir = new File(path) dir.mkdirs() val options = new Options() options.createIfMissing(true) val db =, options) new VersionedLDBKVStore(db, keepVersions) } override def isEmpty: Boolean = dataBase.versions.isEmpty }
Example 47
Source File: SigProofTest.scala From Sidechains-SDK with MIT License | 5 votes |
package com.horizen import{BufferedReader, File, FileReader} import java.util.Optional import java.{lang, util} import import import com.horizen.cryptolibprovider.{SchnorrFunctionsImplZendoo, ThresholdSignatureCircuitImplZendoo} import com.horizen.proposition.MCPublicKeyHashProposition import com.horizen.schnorrnative.SchnorrSecretKey import com.horizen.utils.BytesUtils import org.junit.Assert.{assertEquals, assertTrue} import org.junit.{Ignore, Test} import scala.collection.JavaConverters._ import scala.util.Random class SigProofTest { private val classLoader: ClassLoader = getClass.getClassLoader private val sigCircuit: ThresholdSignatureCircuitImplZendoo = new ThresholdSignatureCircuitImplZendoo() private val schnorrFunctions: SchnorrFunctionsImplZendoo = new SchnorrFunctionsImplZendoo() private def buildSchnorrPrivateKey(index: Int): SchnorrSecretKey = { var bytes: Array[Byte] = null try { val resourceName = "schnorr_sk0"+ index + "_hex" val file = new FileReader(classLoader.getResource(resourceName).getFile) bytes = BytesUtils.fromHexString(new BufferedReader(file).readLine()) } catch { case e: Exception => assertEquals(e.toString(), true, false) } SchnorrSecretKey.deserialize(bytes) } //Test will take around 2 minutes, enable for sanity checking of ThresholdSignatureCircuit @Ignore @Test def simpleCheck(): Unit = { val keyPairsLen = 7 val threshold = 5 //hardcoded value val keyPairs = (0 until keyPairsLen) => (secret, secret.getPublicKey)) val publicKeysBytes: util.List[Array[Byte]] = val provingKeyPath = new File(classLoader.getResource("sample_proving_key_7_keys_with_threshold_5").getFile).getAbsolutePath; val verificationKeyPath = new File(classLoader.getResource("sample_vk_7_keys_with_threshold_5").getFile).getAbsolutePath; val sysConstant = sigCircuit.generateSysDataConstant(publicKeysBytes, threshold) val mcBlockHash = Array.fill(32)(Random.nextInt().toByte) val previousMcBlockHash = Array.fill(32)(Random.nextInt().toByte) val wb: util.List[WithdrawalRequestBox] = Seq(new WithdrawalRequestBox(new WithdrawalRequestBoxData(new MCPublicKeyHashProposition(Array.fill(20)(Random.nextInt().toByte)), 2345), 42)).asJava val messageToBeSigned = sigCircuit.generateMessageToBeSigned(wb, mcBlockHash, previousMcBlockHash) val emptySigs = List.fill[Optional[Array[Byte]]](keyPairsLen - threshold)(Optional.empty[Array[Byte]]()) val signatures: util.List[Optional[Array[Byte]]] = (keyPairs .map{case (secret, public) => schnorrFunctions.sign(secret.serializeSecretKey(), public.serializePublicKey(), messageToBeSigned)} .map(b => Optional.of(b)) .take(threshold) .toList ++ emptySigs) .asJava val proofAndQuality: utils.Pair[Array[Byte], lang.Long] = sigCircuit.createProof(wb, mcBlockHash, previousMcBlockHash, publicKeysBytes, signatures, threshold, provingKeyPath) val result = sigCircuit.verifyProof(wb, mcBlockHash, previousMcBlockHash, proofAndQuality.getValue, proofAndQuality.getKey, sysConstant, verificationKeyPath) assertTrue("Proof verification expected to be successfully", result) } }
Example 48
Source File: AccStorage.scala From EncryCore with GNU General Public License v3.0 | 5 votes |
package encry.api.http import import cats.syntax.either._ import com.typesafe.scalalogging.StrictLogging import encry.settings.EncryAppSettings import import import org.encryfoundation.common.utils.Algos import org.iq80.leveldb.{DB, Options} import scorex.utils.Random import supertagged.TaggedType trait AccStorage extends StrictLogging with AutoCloseable { val storage: DB val verifyPassword: String => Boolean = pass => { val salt = storage.get(AccStorage.SaltKey) val passHash = storage.get(AccStorage.PasswordHashKey) Algos.hash(pass.getBytes() ++ salt) sameElements passHash } def setPassword(pass: String): Either[Throwable, Unit] = { val batch = storage.createWriteBatch() val salt = Random.randomBytes() try { batch.put(AccStorage.PasswordHashKey, Algos.hash(pass.getBytes() ++ salt)) batch.put(AccStorage.SaltKey, salt) storage.write(batch).asRight[Throwable] } catch { case err: Throwable => err.asLeft[Unit] } finally { batch.close() } } override def close(): Unit = storage.close() } object AccStorage extends StrictLogging { object PasswordHash extends TaggedType[Array[Byte]] object PasswordSalt extends TaggedType[Array[Byte]] type PasswordHash = PasswordHash.Type type PasswordSalt = PasswordSalt.Type val PasswordHashKey: StorageKey = StorageKey @@ Algos.hash("Password_Key") val SaltKey: StorageKey = StorageKey @@ Algos.hash("Salt_Key") def getDirStorage(settings: EncryAppSettings): File = new File(s"${}/userKeys") def init(settings: EncryAppSettings): AccStorage = new AccStorage { override val storage: DB =, new Options) } }
Example 49
Source File: SettingsReaders.scala From EncryCore with GNU General Public License v3.0 | 5 votes |
package encry.settings import import import com.typesafe.config.Config import import import encry.utils.ByteStr import net.ceedubs.ficus.readers.ValueReader import org.encryfoundation.common.utils.constants.{Constants, TestNetConstants} trait SettingsReaders { implicit val byteStrReader: ValueReader[ByteStr] = (cfg, path) => ByteStr.decodeBase58(cfg.getString(path)).get implicit val storageTypeReader: ValueReader[StorageType] = (cfg, path) => cfg.getString(path) match { case "iodb" => VersionalStorage.IODB case "LevelDb" => VersionalStorage.LevelDB } implicit val fileReader: ValueReader[File] = (cfg, path) => new File(cfg.getString(path)) implicit val byteValueReader: ValueReader[Byte] = (cfg, path) => cfg.getInt(path).toByte implicit val inetSocketAddressReader: ValueReader[InetSocketAddress] = { (config: Config, path: String) => val split = config.getString(path).split(":") new InetSocketAddress(split(0), split(1).toInt) } implicit val ConstantsSettingsReader: ValueReader[Constants] = (cfg, path) => { def getConstants(constantsClass: String): Constants = { constantsClass match { case "TestConstants" => TestConstants case "SlowMiningConstants" => SlowMiningConstants case _ => TestNetConstants } } getConstants( if (cfg.hasPath(path)) cfg.getString(path) else "" ) } }
Example 50
Source File: RootNodesStorageTest.scala From EncryCore with GNU General Public License v3.0 | 5 votes |
package import import encry.view.state.avlTree.utils.implicits.Instances._ import encry.modifiers.InstanceFactory import{StorageKey, StorageValue, StorageVersion} import{LevelDbFactory, VLDBWrapper, VersionalLevelDBCompanion} import encry.utils.{EncryGenerator, FileHelper} import encry.view.state.avlTree.AvlTree import org.encryfoundation.common.utils.Algos import org.encryfoundation.common.utils.TaggedTypes.Height import org.iq80.leveldb.{DB, Options, ReadOptions} import org.scalatest.{FunSuite, Matchers, PropSpec} import scorex.utils.Random import scala.util.{Random => SRandom} class RootNodesStorageTest extends PropSpec with InstanceFactory with EncryGenerator with Matchers { def createAvl: AvlTree[StorageKey, StorageValue] = { val firstDir: File = FileHelper.getRandomTempDir val firstStorage: VLDBWrapper = { val levelDBInit =, new Options) VLDBWrapper(VersionalLevelDBCompanion(levelDBInit, settings.levelDB.copy(keySize = 33), keySize = 33)) } val dir: File = FileHelper.getRandomTempDir val levelDb: DB =, new Options) AvlTree[StorageKey, StorageValue](firstStorage, RootNodesStorage.emptyRootStorage[StorageKey, StorageValue]) } property("testRollback") { val avl: AvlTree[StorageKey, StorageValue] = createAvl val dir: File = FileHelper.getRandomTempDir val levelDb: DB =, new Options) val batch1 = levelDb.createWriteBatch() val readOptions1 = new ReadOptions() val rootNodesStorage = RootNodesStorage[StorageKey, StorageValue](levelDb, 10, dir) val (_, avlAfterInsertions, insertList) = (0 to SRandom.nextInt(1000) + 10).foldLeft(rootNodesStorage, avl, List.empty[(Height, (List[(StorageKey, StorageValue)], List[StorageKey]))]) { case ((rootStorage, previousAvl, insertionList), height) => val version = StorageVersion @@ Random.randomBytes() val toInsert = (0 to SRandom.nextInt(100)).foldLeft(List.empty[(StorageKey, StorageValue)]) { case (list, _) => (StorageKey @@ Random.randomBytes() -> StorageValue @@ Random.randomBytes()) :: list } val previousInsertions =[(StorageKey, StorageValue)]) val deletions = previousInsertions.take(1).map(_._1) val newAvl = previousAvl.insertAndDeleteMany( version, toInsert, deletions ) val newRootStorage = rootStorage.insert( version, newAvl.rootNode, Height @@ height ) (newRootStorage, newAvl, insertionList :+ (Height @@ height -> (toInsert -> deletions))) } val (_, rootNodeRestored) = rootNodesStorage.rollbackToSafePoint(insertList.dropWhile(_._1 != rootNodesStorage.safePointHeight).drop(1)) (avlAfterInsertions.rootNode.hash sameElements rootNodeRestored.hash) shouldBe true } }
Example 51
Source File: SnapshotAssemblerBench.scala From EncryCore with GNU General Public License v3.0 | 5 votes |
package benches import import java.util.concurrent.TimeUnit import benches.SnapshotAssemblerBench.SnapshotAssemblerBenchState import encry.view.state.avlTree.utils.implicits.Instances._ import benches.StateBenches.{StateBenchState, benchSettings} import benches.Utils.{getRandomTempDir, utxoFromBoxHolder} import encry.settings.Settings import{RootNodesStorage, VersionalStorage} import{StorageKey, StorageValue, StorageVersion} import{LevelDbFactory, VLDBWrapper, VersionalLevelDBCompanion} import encry.utils.FileHelper import import encry.view.state.UtxoState import encry.view.state.avlTree.AvlTree import org.encryfoundation.common.utils.TaggedTypes.Height import org.iq80.leveldb.{DB, Options} import org.openjdk.jmh.annotations.{Benchmark, Mode, Scope, State} import org.openjdk.jmh.infra.Blackhole import org.openjdk.jmh.profile.GCProfiler import org.openjdk.jmh.runner.{Runner, RunnerException} import org.openjdk.jmh.runner.options.{OptionsBuilder, TimeValue, VerboseMode} import scorex.utils.Random class SnapshotAssemblerBench { @Benchmark def createTree(stateBench: SnapshotAssemblerBenchState, bh: Blackhole): Unit = { bh.consume { //stateBench.a.initializeSnapshotData(stateBench.block1) } } } object SnapshotAssemblerBench { @throws[RunnerException] def main(args: Array[String]): Unit = { val opt = new OptionsBuilder() .include(".*" + classOf[SnapshotAssemblerBench].getSimpleName + ".*") .forks(1) .threads(1) .warmupIterations(benchSettings.benchesSettings.warmUpIterations) .measurementIterations(benchSettings.benchesSettings.measurementIterations) .mode(Mode.AverageTime) .timeUnit(TimeUnit.SECONDS) .verbosity(VerboseMode.EXTRA) .addProfiler(classOf[GCProfiler]) .warmupTime(TimeValue.milliseconds(benchSettings.benchesSettings.warmUpTime)) .measurementTime(TimeValue.milliseconds(benchSettings.benchesSettings.measurementTime)) .build new Runner(opt).run } @State(Scope.Benchmark) class SnapshotAssemblerBenchState extends Settings { val a: AvlTree[StorageKey, StorageValue] = createAvl("9gKDVmfsA6J4b78jDBx6JmS86Zph98NnjnUqTJBkW7zitQMReia", 0, 500000) val block1 = Utils.generateGenesisBlock(Height @@ 1) def createAvl(address: String, from: Int, to: Int): AvlTree[StorageKey, StorageValue] = { val firstDir: File = FileHelper.getRandomTempDir val firstStorage: VLDBWrapper = { val levelDBInit =, new Options) VLDBWrapper(VersionalLevelDBCompanion(levelDBInit, settings.levelDB, keySize = 32)) } val dir: File = FileHelper.getRandomTempDir val levelDb: DB =, new Options) val rootNodesStorage = RootNodesStorage[StorageKey, StorageValue](levelDb, 10, dir) val firstAvl: AvlTree[StorageKey, StorageValue] = AvlTree[StorageKey, StorageValue](firstStorage, rootNodesStorage) val avlNew = (from to to).foldLeft(firstAvl) { case (avl, i) => val bx = Utils.genAssetBox(address, i, nonce = i) val b = (StorageKey !@@, StorageValue @@ bx.bytes) avl.insertAndDeleteMany(StorageVersion @@ Random.randomBytes(), List(b), List.empty) } avlNew } def tmpDir: File = FileHelper.getRandomTempDir } }
Example 52
Source File: HistoryBenches.scala From EncryCore with GNU General Public License v3.0 | 5 votes |
package benches import import java.util.concurrent.TimeUnit import benches.HistoryBenches.HistoryBenchState import benches.Utils._ import encry.view.history.History import encryBenchmark.BenchSettings import org.encryfoundation.common.modifiers.history.Block import org.openjdk.jmh.annotations._ import org.openjdk.jmh.infra.Blackhole import org.openjdk.jmh.profile.GCProfiler import org.openjdk.jmh.runner.{Runner, RunnerException} import org.openjdk.jmh.runner.options.{OptionsBuilder, TimeValue, VerboseMode} class HistoryBenches { @Benchmark def appendBlocksToHistoryBench(benchStateHistory: HistoryBenchState, bh: Blackhole): Unit = { bh.consume { val history: History = generateHistory(benchStateHistory.settings, getRandomTempDir) benchStateHistory.blocks.foldLeft(history) { case (historyL, block) => historyL.append(block.header) historyL.append(block.payload) historyL.reportModifierIsValid(block) } history.closeStorage() } } @Benchmark def readHistoryFileBench(benchStateHistory: HistoryBenchState, bh: Blackhole): Unit = { bh.consume { val history: History = generateHistory(benchStateHistory.settings, benchStateHistory.tmpDir) history.closeStorage() } } } object HistoryBenches extends BenchSettings { @throws[RunnerException] def main(args: Array[String]): Unit = { val opt = new OptionsBuilder() .include(".*" + classOf[HistoryBenches].getSimpleName + ".*") .forks(1) .threads(1) .warmupIterations(benchSettings.benchesSettings.warmUpIterations) .measurementIterations(benchSettings.benchesSettings.measurementIterations) .mode(Mode.AverageTime) .timeUnit(TimeUnit.SECONDS) .verbosity(VerboseMode.EXTRA) .addProfiler(classOf[GCProfiler]) .warmupTime(TimeValue.milliseconds(benchSettings.benchesSettings.warmUpTime)) .measurementTime(TimeValue.milliseconds(benchSettings.benchesSettings.measurementTime)) .build new Runner(opt).run } @State(Scope.Benchmark) class HistoryBenchState extends encry.settings.Settings { val tmpDir: File = getRandomTempDir val initialHistory: History = generateHistory(settings, tmpDir) val resultedHistory: (History, Option[Block], Vector[Block]) = (0 until benchSettings.historyBenchSettings.blocksNumber) .foldLeft(initialHistory, Option.empty[Block], Vector.empty[Block]) { case ((prevHistory, prevBlock, vector), _) => val block: Block = generateNextBlockValidForHistory(prevHistory, 0, prevBlock, Seq(coinbaseTransaction(0))) prevHistory.append(block.header) prevHistory.append(block.payload) (prevHistory.reportModifierIsValid(block), Some(block), vector :+ block) } resultedHistory._1.closeStorage() val blocks: Vector[Block] = resultedHistory._3 } }
Example 53
Source File: StateRollbackBench.scala From EncryCore with GNU General Public License v3.0 | 5 votes |
package benches import import java.util.concurrent.TimeUnit import benches.StateRollbackBench.StateRollbackState import benches.Utils._ import import encry.utils.CoreTaggedTypes.VersionTag import encry.view.state.{BoxHolder, UtxoState} import encryBenchmark.{BenchSettings, Settings} import org.encryfoundation.common.modifiers.history.Block import import org.encryfoundation.common.utils.TaggedTypes.{ADKey, Difficulty} import org.openjdk.jmh.annotations.{Benchmark, Mode, Scope, State} import org.openjdk.jmh.infra.Blackhole import org.openjdk.jmh.profile.GCProfiler import org.openjdk.jmh.runner.{Runner, RunnerException} import org.openjdk.jmh.runner.options.{OptionsBuilder, TimeValue, VerboseMode} class StateRollbackBench { @Benchmark def applyBlocksToTheState(stateBench: StateRollbackState, bh: Blackhole): Unit = { bh.consume { val innerState: UtxoState = utxoFromBoxHolder(stateBench.boxesHolder, getRandomTempDir, None, stateBench.settings, VersionalStorage.IODB) val newState = stateBench.chain.foldLeft(innerState -> List.empty[VersionTag]) { case ((state, rootHashes), block) => val newState = state.applyModifier(block).right.get newState -> (rootHashes :+ newState.version) } val stateAfterRollback = newState._1.rollbackTo(newState._2.dropRight(1).last, List.empty).get val stateAfterForkBlockApplying = stateAfterRollback.applyModifier(stateBench.forkBlocks.last).right.get stateAfterForkBlockApplying.close() } } } object StateRollbackBench extends BenchSettings { @throws[RunnerException] def main(args: Array[String]): Unit = { val opt = new OptionsBuilder() .include(".*" + classOf[StateRollbackBench].getSimpleName + ".*") .forks(1) .threads(1) .warmupIterations(benchSettings.benchesSettings.warmUpIterations) .measurementIterations(benchSettings.benchesSettings.measurementIterations) .mode(Mode.AverageTime) .timeUnit(TimeUnit.SECONDS) .verbosity(VerboseMode.EXTRA) .addProfiler(classOf[GCProfiler]) .warmupTime(TimeValue.milliseconds(benchSettings.benchesSettings.warmUpTime)) .measurementTime(TimeValue.milliseconds(benchSettings.benchesSettings.measurementTime)) .build new Runner(opt).run } @State(Scope.Benchmark) class StateRollbackState extends encry.settings.Settings { val tmpDir: File = getRandomTempDir val initialBoxes: IndexedSeq[AssetBox] = (0 until benchSettings.stateBenchSettings.totalBoxesNumber).map(nonce => genHardcodedBox(privKey.publicImage.address.address, nonce) ) val boxesHolder: BoxHolder = BoxHolder(initialBoxes) var state: UtxoState = utxoFromBoxHolder(boxesHolder, tmpDir, None, settings, VersionalStorage.LevelDB) val genesisBlock: Block = generateGenesisBlockValidForState(state) state = state.applyModifier(genesisBlock).right.get val stateGenerationResults: (List[(Block, Block)], Block, UtxoState, IndexedSeq[AssetBox]) = (0 until benchSettings.stateBenchSettings.blocksNumber).foldLeft(List.empty[(Block, Block)], genesisBlock, state, initialBoxes) { case ((blocks, block, stateL, boxes), _) => val nextBlockMainChain: Block = generateNextBlockForStateWithSpendingAllPreviousBoxes( block, stateL, block.payload.txs.flatMap([AssetBox])).toIndexedSeq) val nextBlockFork: Block = generateNextBlockForStateWithSpendingAllPreviousBoxes( block, stateL, block.payload.txs.flatMap([AssetBox])).toIndexedSeq, addDiff = Difficulty @@ BigInt(100) ) val stateN: UtxoState = stateL.applyModifier(nextBlockMainChain).right.get (blocks :+ (nextBlockMainChain, nextBlockFork), nextBlockMainChain, stateN, boxes.drop( benchSettings.stateBenchSettings.transactionsNumberInEachBlock * benchSettings.stateBenchSettings.numberOfInputsInOneTransaction) ) } val chain: List[Block] = genesisBlock +: val forkBlocks: List[Block] = genesisBlock +: state = stateGenerationResults._3 state.close() } }
Example 54
Source File: SparkConfig.scala From gsoc_relationship with Apache License 2.0 | 5 votes |
package import import org.apache.spark.{SparkConf, SparkContext} import com.typesafe.config.ConfigFactory object SparkConfig { val config = ConfigFactory.parseFile(new File("./config/relationship.conf")) val hosts = "hosts" val username = "username" val password = "password" val keyspace = "keyspace" val analytics_knowledge_base = "analytics_knowledge_base" val analytics_mv_knowledge_base_by_feature = "analytics_mv_knowledge_base_by_feature" val analytics_primary_relationships = "analytics_primary_relationships" val results = "results" val results_meta = "results_meta" val results_data = "results_data" val objects_table = "objects_table" val appName = "relationship" val master = "localhost" val sparkconf = new SparkConf(true) .set("", hosts) .set("spark.cassandra.auth.username", username) .set("spark.cassandra.auth.password", password) val sc = new SparkContext(master, appName, sparkconf) }
Example 55
Source File: GoogleAuthentication.scala From amadou with Apache License 2.0 | 5 votes |
package com.mediative.amadou.bigquery import{File, FileReader} import scala.collection.JavaConversions._ import import import{ GoogleAuthorizationCodeFlow, GoogleClientSecrets } import{HttpRequest, HttpRequestInitializer} import import import import org.apache.spark.sql.SparkSession sealed abstract class GoogleAuthentication(val scopes: String*) object GoogleAuthentication { lazy val HTTP_TRANSPORT = new NetHttpTransport() lazy val JSON_FACTORY = new JacksonFactory() case object Dbm extends GoogleAuthentication("") def apply(auth: GoogleAuthentication, spark: SparkSession): HttpRequestInitializer = auth match { case Dbm => val clientFilePath = spark.conf.get("") require(clientFilePath != null, "'' not configured") val clientFile = new File(clientFilePath) require(clientFile.exists, s"$clientFilePath does not exists") val clientSecrets = GoogleClientSecrets.load(JSON_FACTORY, new FileReader(clientFile)) val dataStoreFactory = new FileDataStoreFactory(clientFile.getParentFile) val flow = new GoogleAuthorizationCodeFlow.Builder( HTTP_TRANSPORT, JSON_FACTORY, clientSecrets, auth.scopes) .setDataStoreFactory(dataStoreFactory) .build() val cred = new AuthorizationCodeInstalledApp(flow, new LocalServerReceiver()) .authorize("user") new CustomHttpRequestInitializer(cred) } class CustomHttpRequestInitializer(wrapped: HttpRequestInitializer) extends HttpRequestInitializer { override def initialize(httpRequest: HttpRequest) = { wrapped.initialize(httpRequest) httpRequest.setConnectTimeout(10 * 60000) // 10 minutes connect timeout httpRequest.setReadTimeout(10 * 60000) // 10 minutes read timeout () } } }
Example 56
Source File: config.scala From spark-integration with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.k8s.integrationtest import import import package object config { def getTestImageTag: String = { val imageTagFileProp = System.getProperty("spark.kubernetes.test.imageTagFile") require(imageTagFileProp != null, "Image tag file must be provided in system properties.") val imageTagFile = new File(imageTagFileProp) require(imageTagFile.isFile, s"No file found for image tag at ${imageTagFile.getAbsolutePath}.") Files.toString(imageTagFile, Charsets.UTF_8).trim } def getTestImageRepo: String = { val imageRepo = System.getProperty("spark.kubernetes.test.imageRepo") require(imageRepo != null, "Image repo must be provided in system properties.") imageRepo } }
Example 57
Source File: Minikube.scala From spark-integration with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.k8s.integrationtest.backend.minikube import import java.nio.file.Paths import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} import org.apache.spark.deploy.k8s.integrationtest.{Logging, ProcessUtils} // TODO support windows private[spark] object Minikube extends Logging { private val MINIKUBE_STARTUP_TIMEOUT_SECONDS = 60 def getMinikubeIp: String = { val outputs = executeMinikube("ip") .filter(_.matches("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$")) assert(outputs.size == 1, "Unexpected amount of output from minikube ip") outputs.head } def getMinikubeStatus: MinikubeStatus.Value = { val statusString = executeMinikube("status") .filter(line => line.contains("minikubeVM: ") || line.contains("minikube:")) .head .replaceFirst("minikubeVM: ", "") .replaceFirst("minikube: ", "") MinikubeStatus.unapply(statusString) .getOrElse(throw new IllegalStateException(s"Unknown status $statusString")) } def getKubernetesClient: DefaultKubernetesClient = { val kubernetesMaster = s"https://${getMinikubeIp}:8443" val userHome = System.getProperty("user.home") val kubernetesConf = new ConfigBuilder() .withApiVersion("v1") .withMasterUrl(kubernetesMaster) .withCaCertFile(Paths.get(userHome, ".minikube", "ca.crt").toFile.getAbsolutePath) .withClientCertFile(Paths.get(userHome, ".minikube", "apiserver.crt").toFile.getAbsolutePath) .withClientKeyFile(Paths.get(userHome, ".minikube", "apiserver.key").toFile.getAbsolutePath) .build() new DefaultKubernetesClient(kubernetesConf) } private def executeMinikube(action: String, args: String*): Seq[String] = { ProcessUtils.executeProcess( Array("bash", "-c", s"minikube $action") ++ args, MINIKUBE_STARTUP_TIMEOUT_SECONDS) } } private[spark] object MinikubeStatus extends Enumeration { // The following states are listed according to // val STARTING = status("Starting") val RUNNING = status("Running") val PAUSED = status("Paused") val STOPPING = status("Stopping") val STOPPED = status("Stopped") val ERROR = status("Error") val TIMEOUT = status("Timeout") val SAVED = status("Saved") val NONE = status("") def status(value: String): Value = new Val(nextId, value) def unapply(s: String): Option[Value] = values.find(s == _.toString) }
Example 58
Source File: RMCallbackHandler.scala From DataXServer with Apache License 2.0 | 5 votes |
package import import java.util.{Collections, List} import{Constants, HamalConf} import //import java.util.Collections import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path, FileContext} import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.client.api.{AMRMClient, NMClient} import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.util.{ConverterUtils, Records} import scala.jdk.CollectionConverters._ //import scala.collection.JavaConverters._ /** * Created by zhuhq on 2016/4/29. */ class RMCallbackHandler(nmClient:NMClient,containerCmd:Container => String,hamalConf: HamalConf,yarnConfiguration: Configuration) extends AMRMClientAsync.CallbackHandler { private val logging = org.slf4j.LoggerFactory.getLogger(classOf[RMCallbackHandler]) override def onContainersCompleted(statuses: List[ContainerStatus]): Unit = { for(containerStatus <- statuses.asScala) {"containerId:${containerStatus} exitStatus:${containerStatus}") } } override def onError(e: Throwable): Unit = { logging.error("on error",e) } override def getProgress: Float = { 0 } override def onShutdownRequest(): Unit = {"on shutdown request") } override def onNodesUpdated(updatedNodes: List[NodeReport]): Unit = {"on nodes updated") for(nodeReport <- updatedNodes.asScala) {"node id:${nodeReport} node labels:${nodeReport}"); } } override def onContainersAllocated(containers: List[Container]): Unit = {"on containers allocated"); for (container:Container <- containers.asScala) { try { // Launch container by create ContainerLaunchContext val ctx = Records.newRecord(classOf[ContainerLaunchContext]); //ctx.setCommands(Collections.singletonList(""" echo "begin";sleep 900;echo "end"; """)) ctx.setCommands(Collections.singletonList(containerCmd(container))) val packagePath = hamalConf.getString(Constants.DATAX_EXECUTOR_FILE,""); val archiveStat = FileSystem.get(yarnConfiguration).getFileStatus(new Path(packagePath)) val packageUrl = ConverterUtils.getYarnUrlFromPath( FileContext.getFileContext.makeQualified(new Path(packagePath))); val packageResource = Records.newRecord[LocalResource](classOf[LocalResource]) packageResource.setResource(packageUrl); packageResource.setSize(archiveStat.getLen); packageResource.setTimestamp(archiveStat.getModificationTime); packageResource.setType(LocalResourceType.ARCHIVE); packageResource.setVisibility(LocalResourceVisibility.APPLICATION) ctx.setLocalResources(Collections.singletonMap(Constants.DATAX_EXECUTOR_ARCHIVE_FILE_NAME,packageResource))"[AM] Launching container " + container.getId()); nmClient.startContainer(container, ctx); } catch { case ex:Exception =>"[AM] Error launching container " + container.getId() + " " + ex); } } } }
Example 59
Source File: FileUtil.scala From wookiee with Apache License 2.0 | 5 votes |
package com.webtrends.harness.utils import import java.nio.file.{FileSystems, Files, Path} import def getSymLink(f:File) : File = { if (f == null) throw new NullPointerException("File must not be null") val path = FileSystems.getDefault.getPath(f.getPath) if (Files.isSymbolicLink(path)) { f.getCanonicalFile } else { f.getAbsoluteFile } } }
Example 60
Source File: ConfigSpec.scala From wookiee with Apache License 2.0 | 5 votes |
package com.webtrends.harness import{BufferedWriter, File, FileWriter} import java.util.concurrent.TimeUnit import{Actor, ActorSystem, Props} import akka.testkit.TestProbe import com.typesafe.config.ConfigFactory import import com.webtrends.harness.config.ConfigWatcherActor import{ComponentState, HealthComponent} import com.webtrends.harness.service.messages.CheckHealth import org.specs2.mutable.SpecificationWithJUnit import scala.concurrent.ExecutionContextExecutor import scala.concurrent.duration.FiniteDuration import{Directory, Path} class ConfigSpec extends SpecificationWithJUnit { implicit val dur = FiniteDuration(2, TimeUnit.SECONDS) new File("services/test/conf").mkdirs() implicit val sys = ActorSystem("system", ConfigFactory.parseString( """ = "" services { path = "services" } """).withFallback(ConfigFactory.load)) implicit val ec: ExecutionContextExecutor = sys.dispatcher val probe = TestProbe() val parent = sys.actorOf(Props(new Actor { val child = context.actorOf(ConfigWatcherActor.props, "child") def receive = { case x if sender == child => probe.ref forward x case x => child forward x } })) sequential "config " should { "be in good health" in { probe.send(parent, CheckHealth) val msg = probe.expectMsgClass(classOf[HealthComponent]) msg.state equals ComponentState.NORMAL } "detect changes in config" in { val file = new File("services/test/conf/test.conf") val bw = new BufferedWriter(new FileWriter(file)) bw.write("test = \"value\"") bw.close() val msg = probe.expectMsgClass(classOf[ConfigChange]) msg.isInstanceOf[ConfigChange] } } step { sys.terminate().onComplete { _ => Directory(Path(new File("services"))).deleteRecursively() } } }
Example 61
Source File: SparkFunSuite.scala From tispark with Apache License 2.0 | 5 votes |
package org.apache.spark import import org.apache.spark.internal.Logging import org.scalatest._ import org.slf4j.Logger abstract class SparkFunSuite extends FunSuite with Logging { protected val logger: Logger = log final protected override def withFixture(test: NoArgTest): Outcome = { val testName = test.text val suiteName = this.getClass.getName val shortSuiteName = suiteName.replaceAll("org.apache.spark", "o.a.s") try { logInfo(s"\n\n===== TEST OUTPUT FOR $shortSuiteName: '$testName' =====\n") test() } finally { logInfo(s"\n\n===== FINISHED $shortSuiteName: '$testName' =====\n") } } protected final def getTestResourcePath(file: String): String = getTestResourceFile(file).getCanonicalPath // helper function protected final def getTestResourceFile(file: String): File = new File(getClass.getClassLoader.getResource(file).getFile) }
Example 62
Source File: TPCDSQuerySuite.scala From tispark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.benchmark import import org.apache.spark.sql.BaseTiSparkTest import org.apache.spark.sql.catalyst.util.resourceToString import scala.collection.mutable class TPCDSQuerySuite extends BaseTiSparkTest { private val tpcdsDirectory = getClass.getResource("/tpcds-sql").getPath private val tpcdsQueries = getListOfFiles(tpcdsDirectory) private def getListOfFiles(dir: String): List[String] = { val d = new File(dir) if (d.exists && d.isDirectory) { d.listFiles.filter(_.isFile).map(_.getName.stripSuffix(".sql")).toList } else { List[String]() } } private def run(queries: List[String], numRows: Int = 1, timeout: Int = 0): Unit = try { // set broadcast threshold to -1 so it will not oom spark.conf.set("spark.sql.autoBroadcastJoinThreshold", -1) setCurrentDatabase(tpcdsDBName) val succeeded = mutable.ArrayBuffer.empty[String] queries.foreach { q => println(s"Query: $q") val start = System.currentTimeMillis() // We do not use statistic information here due to conflict of netty versions when physical plan has broadcast nodes. val queryString = resourceToString( s"tpcds-sql/$q.sql", classLoader = Thread.currentThread().getContextClassLoader) val df = spark.sql(queryString) var failed = false val jobGroup = s"benchmark $q" val t = new Thread("query runner") { override def run(): Unit = try { sqlContext.sparkContext.setJobGroup(jobGroup, jobGroup, interruptOnCancel = true) } catch { case e: Exception => println("Failed to run: " + e) failed = true } } t.setDaemon(true) t.start() t.join(timeout) if (t.isAlive) { println(s"Timeout after $timeout seconds") sqlContext.sparkContext.cancelJobGroup(jobGroup) t.interrupt() } else { if (!failed) { succeeded += q println(s" Took: ${System.currentTimeMillis() - start} ms") println("------------------------------------------------------------------") } } queryViaTiSpark(queryString) println(s"TiSpark finished $q") } } catch { case e: Throwable => println(s"TiSpark failed to run TPCDS") fail(e) } test("TPCDS Test") { if (runTPCDS) { run(tpcdsQueries) } } }
Example 63
Source File: Utils.scala From tispark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.test import{File, PrintWriter} import java.nio.file.{Files, Paths} import java.util.Properties import org.slf4j.Logger import scala.collection.JavaConversions._ object Utils { def writeFile(content: String, path: String): Unit = TryResource(new PrintWriter(path))(_.close()) { _.print(content) } def TryResource[T](res: T)(closeOp: T => Unit)(taskOp: T => Unit): Unit = try { taskOp(res) } finally { closeOp(res) } def readFile(path: String): List[String] = Files.readAllLines(Paths.get(path)).toList def getOrThrow(prop: Properties, key: String): String = { val jvmProp = System.getProperty(key) if (jvmProp != null) { jvmProp } else { val v = prop.getProperty(key) if (v == null) { throw new IllegalArgumentException(key + " is null") } else { v } } } def getFlagOrFalse(prop: Properties, key: String): Boolean = getFlag(prop, key, "false") private def getFlag(prop: Properties, key: String, defValue: String): Boolean = getOrElse(prop, key, defValue).equalsIgnoreCase("true") def getOrElse(prop: Properties, key: String, defValue: String): String = { val jvmProp = System.getProperty(key) if (jvmProp != null) { jvmProp } else { Option(prop.getProperty(key)).getOrElse(defValue) } } def getFlagOrTrue(prop: Properties, key: String): Boolean = getFlag(prop, key, "true") def time[R](block: => R)(logger: Logger): R = { val t0 = System.nanoTime() val result = block val t1 = System.nanoTime()"Elapsed time: " + (t1 - t0) / 1000.0 / 1000.0 / 1000.0 + "s") result } def ensurePath(basePath: String, paths: String*): Boolean = new File(joinPath(basePath, paths: _*)).mkdirs() def joinPath(basePath: String, paths: String*): String = Paths.get(basePath, paths: _*).toAbsolutePath.toString }
Example 64
Source File: RedisBenchmarks.scala From spark-redis with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.redislabs.provider.redis import{File, FileWriter, PrintWriter} import java.time.{Duration => JDuration} import com.redislabs.provider.redis.util.Logging trait RedisBenchmarks extends Logging { val benchmarkReportDir = new File("target/reports/benchmarks/") benchmarkReportDir.mkdirs() def time[R](tag: String)(block: => R): R = { val t0 = System.nanoTime() val result = block // call-by-name val t1 = System.nanoTime() new PrintWriter(new FileWriter(s"$benchmarkReportDir/results.txt", true)) { // scalastyle:off this.println(s"$tag, ${JDuration.ofNanos(t1 - t0)}") close() } result } }
Example 65
Source File: JsonReceiverActor.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import java.nio.file.Paths import import{Actor, ActorLogging, ActorRef, Props} import play.api.libs.json.{JsValue, Json} class JsonReceiverActor extends Actor with ActorLogging { import JsonReceiverActor._ val monitoring_actor = FEY_MONITOR.actorRef var watchFileTask: WatchServiceReceiver = _ var watchThread: Thread = _ override def preStart() { prepareDynamicJarRepo() processCheckpointFiles() watchFileTask = new WatchServiceReceiver(self) watchThread = new Thread(watchFileTask, GLOBAL_DEFINITIONS.WATCH_SERVICE_THREAD) monitoring_actor ! Monitor.START(Utils.getTimestamp) watchThread.setDaemon(true) watchThread.start() } private def prepareDynamicJarRepo() = { val jarDir = new File(CONFIG.DYNAMIC_JAR_REPO) if (!jarDir.exists()){ jarDir.mkdir() }else if(CONFIG.DYNAMIC_JAR_FORCE_PULL){ jarDir.listFiles().foreach(_.delete()) } } private def processCheckpointFiles() = { if (CONFIG.CHEKPOINT_ENABLED) { val checkpoint = new CheckpointProcessor(self) } } override def postStop() { monitoring_actor ! Monitor.STOP(Utils.getTimestamp) watchThread.interrupt() watchThread.join() } override def postRestart(reason: Throwable): Unit = { monitoring_actor ! Monitor.RESTART(reason, Utils.getTimestamp) preStart() } override def receive: Receive = { case JSON_RECEIVED(json, file) =>"JSON RECEIVED => ${Json.stringify(json)}") context.parent ! FeyCore.ORCHESTRATION_RECEIVED(json, Some(file)) case _ => } } object JsonReceiverActor { case class JSON_RECEIVED(json: JsValue, file: File) }
Example 66
Source File: WatchServiceReceiver.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import java.nio.file.StandardWatchEventKinds._ import java.nio.file.{FileSystems, Path} import import import org.apache.iota.fey.JsonReceiverActor.JSON_RECEIVED import play.api.libs.json._ import class WatchServiceReceiver(receiverActor: ActorRef) extends JsonReceiver{ processInitialFiles() private val watchService = FileSystems.getDefault.newWatchService() def watch(path: Path) : Unit = path.register(watchService, ENTRY_CREATE, ENTRY_MODIFY) def getJsonObject(params: String): Option[JsValue] = { try{ val stringJson = Source.fromFile(params).getLines.mkString Option(Json.parse(stringJson)) }catch{ case e: Exception => log.error("Could not parse JSON", e) None } } override def execute(): Unit = { val key = watchService.take() val eventsIterator = key.pollEvents().iterator() while(eventsIterator.hasNext) { val event = val relativePath = event.context().asInstanceOf[Path] val path = key.watchable().asInstanceOf[Path].resolve(relativePath) log.debug(s"${event.kind()} --- $path") event.kind() match { case (ENTRY_CREATE | ENTRY_MODIFY) if path.toString.endsWith(CONFIG.JSON_EXTENSION) => processJson(path.toString, path.toFile) case _ => } } key.reset() } private[fey] def processJson(path: String, file: File) = { try{ getJsonObject(path) match { case Some(orchestrationJSON) => val valid = validJson(orchestrationJSON) if(valid && (orchestrationJSON \ JSON_PATH.COMMAND).as[String].toUpperCase != "DELETE"){ checkForLocation(orchestrationJSON) } if(valid) { receiverActor ! JSON_RECEIVED(orchestrationJSON, file) }else{ log.warn(s"File $path not processed. Incorrect JSON schema") } case None => } } catch { case e: Exception => log.error(s"File $path will not be processed", e) } } private def processInitialFiles() = { Utils.getFilesInDirectory(CONFIG.JSON_REPOSITORY) .filter(file => file.getName.endsWith(CONFIG.JSON_EXTENSION)) .foreach(file => { processJson(file.getAbsolutePath, file) }) } override def exceptionOnRun(e: Exception): Unit = { e match { case e: InterruptedException => case e: Exception => log.error("Watch Service stopped", e) } watchService.close() } }
Example 67
Source File: FeyGenericActorReceiver.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import{File, FileOutputStream} import import java.nio.file.{Files, Paths} import com.eclipsesource.schema._ import import com.eclipsesource.schema.SchemaValidator import import play.api.libs.json._ import scala.concurrent.duration._ import scala.util.Properties._ abstract class FeyGenericActorReceiver(override val params: Map[String,String] = Map.empty, override val backoff: FiniteDuration = 1.minutes, override val connectTo: Map[String,ActorRef] = Map.empty, override val schedulerTimeInterval: FiniteDuration = 2.seconds, override val orchestrationName: String = "", override val orchestrationID: String = "", override val autoScale: Boolean = false) extends FeyGenericActor{ private[fey] val feyCore = FEY_CORE_ACTOR.actorRef override final def processMessage[T](message: T, sender: ActorRef): Unit = { try { val jsonString = getJSONString(message) if(jsonString != "{}") { processJson(jsonString) } startBackoff() }catch{ case e: Exception => log.error(e, s"Could not process message $message") } } private[fey] def processJson(jsonString: String) = { var orchID:String = "None" try{ val orchestrationJSON = Json.parse(jsonString) orchID = (orchestrationJSON \ JSON_PATH.GUID).as[String] val valid = validJson(orchestrationJSON) if(valid && (orchestrationJSON \ JSON_PATH.COMMAND).as[String].toUpperCase != "DELETE"){ checkForLocation(orchestrationJSON) } if(valid) { feyCore ! FeyCore.ORCHESTRATION_RECEIVED(orchestrationJSON, None) }else{ log.warning(s"Could not forward Orchestration $orchID. Invalid JSON schema") } } catch { case e: Exception => log.error(e, s"Orchestration $orchID could not be forwarded") } } def resolveCredentials(credentials: Option[JsObject]):Option[(String, String)] = { credentials match { case None => None case Some(cred) => val user = (cred \ JSON_PATH.JAR_CRED_USER).as[String] val password = (cred \ JSON_PATH.JAR_CRED_PASSWORD).as[String] Option(envOrElse(user,user), envOrElse(password,password)) } } }
Example 68
Source File: CheckpointProcessor.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import import import org.apache.iota.fey.JsonReceiverActor.JSON_RECEIVED import play.api.libs.json.{JsValue, Json} import class CheckpointProcessor(receiverActor: ActorRef) extends JsonReceiver{ override def run(): Unit = { processCheckpointFiles() } def getJsonObject(params: String): Option[JsValue] = { try{ val stringJson = Source.fromFile(params).getLines.mkString Option(Json.parse(stringJson)) }catch{ case e: Exception => log.error("Could not parse JSON", e) None } } private def processJson(path: String, file: File) = { try{ getJsonObject(path) match { case Some(orchestrationJSON) => val valid = validJson(orchestrationJSON) if(valid && (orchestrationJSON \ JSON_PATH.COMMAND).as[String].toUpperCase != "DELETE"){ checkForLocation(orchestrationJSON) } if(valid) { receiverActor ! JSON_RECEIVED(orchestrationJSON, file) }else{ log.warn(s"File $path not processed. Incorrect JSON schema") } file.delete() case None => } } catch { case e: Exception => log.error(s"File $path will not be processed", e) } } private def processCheckpointFiles() = { Utils.getFilesInDirectory(CONFIG.CHECKPOINT_DIR) .filter(file => file.getName.endsWith(CONFIG.JSON_EXTENSION)) .foreach(file => { processJson(file.getAbsolutePath, file) }) } override def execute(): Unit = {} override def exceptionOnRun(e: Exception): Unit = {} }
Example 69
Source File: JsonReceiver.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import import import import com.eclipsesource.schema._ import org.slf4j.LoggerFactory import play.api.libs.json._ import JSON_PATH._ import java.nio.file.{Files, Paths} import import org.apache.commons.codec.binary.Base64 import scala.util.Properties._ def exceptionOnRun(e: Exception): Unit } object HttpBasicAuth { val BASIC = "Basic" val AUTHORIZATION = "Authorization" def encodeCredentials(username: String, password: String): String = { new String(Base64.encodeBase64((username + ":" + password).getBytes)) } def getHeader(username: String, password: String): String = BASIC + " " + encodeCredentials(username, password) }
Example 70
Source File: WatchServiceReceiverSpec.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import java.nio.file.{Files, Paths} import java.nio.charset.StandardCharsets import akka.testkit.{EventFilter, TestProbe} import scala.concurrent.duration.{DurationInt, FiniteDuration} import import ch.qos.logback.classic.Level class WatchServiceReceiverSpec extends BaseAkkaSpec{ val watcherTB = TestProbe("WATCH-SERVICE") var watchFileTask:WatchServiceReceiver = _ val watchTestDir = s"${CONFIG.JSON_REPOSITORY}/watchtest" "Creating WatchServiceReceiver" should { "process initial files in the JSON repository" in { CONFIG.JSON_EXTENSION = "json.not" watchFileTask = new WatchServiceReceiver(watcherTB.ref) watcherTB.expectMsgAllClassOf(classOf[JsonReceiverActor.JSON_RECEIVED]) CONFIG.JSON_EXTENSION = "json.test" } } var watchThread: Thread = _ "Start a Thread with WatchServiceReceiver" should { "Start Thread" in { watchThread = new Thread(watchFileTask, "TESTING-WATCHER-IN-THREAD") watchThread.setDaemon(true) watchThread.start() TestProbe().isThreadRunning("TESTING-WATCHER-IN-THREAD") should be(true) } } "Start watching directory" should { "Starting receiving CREATED event" taggedAs(SlowTest) in { Files.write(Paths.get(s"$watchTestDir/watched.json.test"), Utils_JSONTest.create_json_test.getBytes(StandardCharsets.UTF_8)) watcherTB.expectMsgAllClassOf(20.seconds, classOf[JsonReceiverActor.JSON_RECEIVED]) } "Starting receiving UPDATE event" taggedAs(SlowTest) in { Files.write(Paths.get(s"$watchTestDir/watched-update.json.test"), Utils_JSONTest.delete_json_test.getBytes(StandardCharsets.UTF_8)) Thread.sleep(200) Files.write(Paths.get(s"$watchTestDir/watched-update.json.test"), Utils_JSONTest.create_json_test.getBytes(StandardCharsets.UTF_8)) watcherTB.expectMsgAllClassOf(20.seconds, classOf[JsonReceiverActor.JSON_RECEIVED]) } } "processJson" should { "log to warn level when json has invalid schema" in { Files.write(Paths.get(s"$watchTestDir/watched-invalid.json.test"), Utils_JSONTest.test_json_schema_invalid.getBytes(StandardCharsets.UTF_8)) watchFileTask.processJson(s"$watchTestDir/watched-invalid.json.test",new File(s"$watchTestDir/watched-invalid.json.test")) s"File $watchTestDir/watched-invalid.json.test not processed. Incorrect JSON schema" should beLoggedAt(Level.WARN) } } "interrupt watchservice" should{ "interrupt thread" in { watchThread.interrupt() } } }
Example 71
Source File: TestSetup.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import import java.nio.file.Paths import import org.scalatest.Tag object TestSetup { private var runSetup = true val configTest = getClass.getResource("/test-fey-configuration.conf") def setup(): Unit = { if(runSetup){ println("SETTING UP ...") createFeyTmpDirectoriesForTest() copyTestActorToTmp() copyJSONstoTmp() runSetup = false } } private def copyTestActorToTmp(): Unit = { copyResourceFileToLocal("/fey-test-actor.jar",s"${CONFIG.JAR_REPOSITORY}/fey-test-actor.jar") } private def copyJSONstoTmp(): Unit = { copyResourceFileToLocal("/json/valid-json.json",s"${CONFIG.JSON_REPOSITORY}/valid-json.json.not") copyResourceFileToLocal("/json/invalid-json.json",s"${CONFIG.JSON_REPOSITORY}/invalid-json.json.not") } private def copyResourceFileToLocal(resourcePath: String, destination: String): Unit = { val resourceFile = getClass.getResource(resourcePath) val dest = new File(destination) FileUtils.copyURLToFile(resourceFile, dest) } private def createFeyTmpDirectoriesForTest(): Unit = { var file = new File(s"/tmp/fey/test/checkpoint") file.mkdirs() file = new File(s"/tmp/fey/test/json") file.mkdirs() file = new File(s"/tmp/fey/test/json/watchtest") file.mkdirs() file = new File(s"/tmp/fey/test/jars") file.mkdirs() file = new File(s"/tmp/fey/test/jars/dynamic") file.mkdirs() } } object SlowTest extends Tag("org.apache.iota.fey.SlowTest")
Example 72
Source File: MultiNodeSupportCassandra.scala From eventuate with Apache License 2.0 | 5 votes |
package com.rbmhtechnology.eventuate import import import akka.remote.testconductor.RoleName import akka.remote.testkit.MultiNodeSpec import com.rbmhtechnology.eventuate.log.cassandra._ import import org.scalatest.BeforeAndAfterAll trait MultiNodeSupportCassandra extends BeforeAndAfterAll { this: MultiNodeSpec with MultiNodeWordSpec => val coordinator = RoleName("nodeA") def cassandraDir: String = MultiNodeEmbeddedCassandra.DefaultCassandraDir def logProps(logId: String): Props = CassandraEventLog.props(logId) override def atStartup(): Unit = { if (isNode(coordinator)) { MultiNodeEmbeddedCassandra.start(cassandraDir) Cassandra(system) } enterBarrier("startup") } override def afterAll(): Unit = { // get all config data before shutting down node val snapshotRootDir = new File(system.settings.config.getString("eventuate.snapshot.filesystem.dir")) // shut down node super.afterAll() // clean database and delete snapshot files if (isNode(coordinator)) { FileUtils.deleteDirectory(snapshotRootDir) MultiNodeEmbeddedCassandra.clean() } } }
Example 73
Source File: MultiNodeSupportLeveldb.scala From eventuate with Apache License 2.0 | 5 votes |
package com.rbmhtechnology.eventuate import import import akka.remote.testconductor.RoleName import akka.remote.testkit.MultiNodeSpec import com.rbmhtechnology.eventuate.log.leveldb.LeveldbEventLog import import org.scalatest.BeforeAndAfterAll trait MultiNodeSupportLeveldb extends BeforeAndAfterAll { this: MultiNodeSpec with MultiNodeWordSpec => val coordinator = RoleName("nodeA") def logProps(logId: String): Props = LeveldbEventLog.props(logId) override def afterAll(): Unit = { // get all config data before shutting down node val snapshotRootDir = new File(system.settings.config.getString("eventuate.snapshot.filesystem.dir")) val logRootDir = new File(system.settings.config.getString("eventuate.log.leveldb.dir")) // shut down node super.afterAll() // delete log and snapshot files if (isNode(coordinator)) { FileUtils.deleteDirectory(snapshotRootDir) FileUtils.deleteDirectory(logRootDir) } } }
Example 74
Source File: LocationSpecLeveldb.scala From eventuate with Apache License 2.0 | 5 votes |
package com.rbmhtechnology.eventuate import import import com.rbmhtechnology.eventuate.log._ import com.rbmhtechnology.eventuate.log.leveldb._ import com.rbmhtechnology.eventuate.utilities.RestarterActor import com.typesafe.config.ConfigFactory trait LocationCleanupLeveldb extends LocationCleanup { override def storageLocations: List[File] = List("eventuate.log.leveldb.dir", "eventuate.snapshot.filesystem.dir").map(s => new File(config.getString(s))) } object SingleLocationSpecLeveldb { object TestEventLog { def props(logId: String, batching: Boolean, currentSystemTime: Long = 0): Props = { val logProps = Props(new TestEventLog(logId, currentSystemTime)) .withDispatcher("eventuate.log.dispatchers.write-dispatcher") if (batching) Props(new BatchingLayer(logProps)) else logProps } } class TestEventLog(id: String, override val currentSystemTime: Long = 0) extends LeveldbEventLog(id, "log-test") with SingleLocationSpec.TestEventLog[LeveldbEventLogState] { override def unhandled(message: Any): Unit = message match { case "boom" => throw IntegrationTestException case "dir" => sender() ! logDir case _ => super.unhandled(message) } } } trait SingleLocationSpecLeveldb extends SingleLocationSpec with LocationCleanupLeveldb { import SingleLocationSpecLeveldb._ private var _log: ActorRef = _ override def beforeEach(): Unit = { super.beforeEach() _log = system.actorOf(logProps(logId)) } def log: ActorRef = _log def logProps(logId: String): Props = RestarterActor.props(TestEventLog.props(logId, batching, currentSystemTime)) } trait MultiLocationSpecLeveldb extends MultiLocationSpec with LocationCleanupLeveldb { override val logFactory: String => Props = id => LeveldbEventLog.props(id) override val providerConfig = ConfigFactory.parseString( s""" |eventuate.log.leveldb.dir = target/test-log |eventuate.log.leveldb.index-update-limit = 3 |eventuate.log.leveldb.deletion-retry-delay = 1 ms """.stripMargin) }
Example 75
Source File: YamlHelpers.scala From barstools with BSD 3-Clause "New" or "Revised" License | 5 votes |
package barstools.tapeout.transforms import net.jcazevedo.moultingyaml._ import class YamlFileReader(resource: String) { def parse[A](file: String = "")(implicit reader: YamlReader[A]) : Seq[A] = { // If the user doesn't provide a Yaml file name, use defaults val yamlString = file match { case f if f.isEmpty => // Use example config if no file is provided val stream = getClass.getResourceAsStream(resource) io.Source.fromInputStream(stream).mkString case f if new File(f).exists =>"\n") case _ => throw new Exception("No valid Yaml file found!") } => } }
Example 76
Source File: KinesisProducerIntegrationSpec.scala From reactive-kinesis with Apache License 2.0 | 5 votes |
package com.weightwatchers.reactive.kinesis import import{KinesisProducer => AWSKinesisProducer} import com.typesafe.config.ConfigFactory import com.weightwatchers.reactive.kinesis.common.{ KinesisSuite, KinesisTestConsumer, TestCredentials } import com.weightwatchers.reactive.kinesis.consumer.KinesisConsumer.ConsumerConf import com.weightwatchers.reactive.kinesis.models.ProducerEvent import com.weightwatchers.reactive.kinesis.producer.{KinesisProducer, ProducerConf} import org.scalatest.concurrent.Eventually import org.scalatest.mockito.MockitoSugar import org.scalatest.time.{Millis, Seconds, Span} import org.scalatest.{BeforeAndAfterAll, FreeSpec, Matchers} import scala.concurrent.duration._ import scala.language.postfixOps import scala.util.Random //scalastyle:off magic.number class KinesisProducerIntegrationSpec extends FreeSpec with Matchers with MockitoSugar with BeforeAndAfterAll with Eventually with KinesisSuite { implicit val ece = val TestStreamNrOfMessagesPerShard: Long = 0 implicit override val patienceConfig: PatienceConfig = PatienceConfig(timeout = Span(5, Seconds), interval = Span(100, Millis)) "The KinesisProducer" - { "Should publish a message to a stream" in new withKinesisConfForApp( "int-test-stream-producer-1" ) { val conf = producerConf() val producer = KinesisProducer(conf) val existingRecordCount = testConsumer.retrieveRecords(conf.streamName, 10).size val event = ProducerEvent("1234", Random.alphanumeric.take(10).mkString) producer.addUserRecord(event) eventually { val records: Seq[String] = testConsumer.retrieveRecords(conf.streamName, 10) records.size shouldBe (existingRecordCount + 1) records should contain( new String(event.payload.array(), java.nio.charset.StandardCharsets.UTF_8) ) } } } } //scalastyle:on
Example 77
Source File: Persister.scala From exodus with MIT License | 5 votes |
package com.wix.bazel.migrator import import java.nio.file.attribute.BasicFileAttributes import java.nio.file.{Files, Paths} import java.time.Instant import java.time.temporal.TemporalUnit import java.util import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.wix.bazel.migrator.model.{CodePurpose, Package, Target, TestType} import com.wix.bazel.migrator.utils.{IgnoringIsArchiveDefMixin, IgnoringIsProtoArtifactDefMixin, IgnoringIsWarDefMixin, TypeAddingMixin} import import{Coordinates, MavenScope, Packaging} import scala.collection.JavaConverters._ object Persister { private val transformedFile = new File("dag.bazel") private val mavenCache = Paths.get("classpathModules.cache") val objectMapper = new ObjectMapper().registerModule(DefaultScalaModule) .addMixIn(classOf[Target], classOf[TypeAddingMixin]) .addMixIn(classOf[CodePurpose], classOf[TypeAddingMixin]) .addMixIn(classOf[TestType], classOf[TypeAddingMixin]) .addMixIn(classOf[MavenScope], classOf[TypeAddingMixin]) .addMixIn(classOf[Packaging], classOf[IgnoringIsArchiveDefMixin]) .addMixIn(classOf[Packaging], classOf[IgnoringIsWarDefMixin]) .addMixIn(classOf[Coordinates], classOf[IgnoringIsProtoArtifactDefMixin]) .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) def persistTransformationResults(bazelPackages: Set[Package]): Unit = { println("Persisting transformation") objectMapper.writeValue(transformedFile, bazelPackages) } def readTransformationResults(): Set[Package] = { val collectionType = objectMapper.getTypeFactory.constructCollectionType(classOf[util.Collection[Package]], classOf[Package]) val value: util.Collection[Package] = objectMapper.readValue(transformedFile, collectionType) val bazelPackages = value.asScala.toSet bazelPackages } def persistMavenClasspathResolution(sourceModules: SourceModules): Unit = { println("Persisting maven") objectMapper.writeValue(mavenCache.toFile, sourceModules) } def readTransMavenClasspathResolution(): SourceModules = { objectMapper.readValue[SourceModules](mavenCache.toFile, classOf[SourceModules]) } def mavenClasspathResolutionIsUnavailableOrOlderThan(amount: Int, unit: TemporalUnit): Boolean = !Files.isReadable(mavenCache) || lastModifiedMavenCache().toInstant.isBefore(, unit)) private def lastModifiedMavenCache() = Files.readAttributes(mavenCache, classOf[BasicFileAttributes]).lastModifiedTime() }
Example 78
Source File: SqliteTestBase.scala From smui with Apache License 2.0 | 5 votes |
package utils import import org.scalatest.{BeforeAndAfterAll, Suite} import play.api.db.evolutions.Evolutions import play.api.db.{Database, Databases} trait SqliteTestBase extends BeforeAndAfterAll { self: Suite => private lazy val dbFile = File.createTempFile("sqlitetest", ".db") lazy val db: Database = { // Use a temp file for the database - in-memory DB cannot be used // since it would be a different DB for each connection in the connection pool // (see val d = Databases("org.sqlite.JDBC", s"jdbc:sqlite:${dbFile.getAbsolutePath}") Evolutions.applyEvolutions(d) d } override protected def afterAll(): Unit = { super.afterAll() db.shutdown() dbFile.delete() } }
Example 79
Source File: Preprocess.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package com.packt.ScalaML.BitCoin import{ BufferedWriter, File, FileWriter } import org.apache.spark.sql.types.{ DoubleType, IntegerType, StructField, StructType } import org.apache.spark.sql.{ DataFrame, Row, SparkSession } import scala.collection.mutable.ListBuffer object Preprocess { //how many of first rows are omitted val dropFirstCount: Int = 612000 def rollingWindow(data: DataFrame, window: Int, xFilename: String, yFilename: String): Unit = { var i = 0 val xWriter = new BufferedWriter(new FileWriter(new File(xFilename))) val yWriter = new BufferedWriter(new FileWriter(new File(yFilename))) val zippedData = data.rdd.zipWithIndex().collect() System.gc() val dataStratified = zippedData.drop(dropFirstCount) //todo slice fisrt 614K while (i < (dataStratified.length - window)) { val x = dataStratified .slice(i, i + window) .map(r => r._1.getAs[Double]("Delta")).toList val y = dataStratified.apply(i + window)._1.getAs[Integer]("label") val stringToWrite = x.mkString(",") xWriter.write(stringToWrite + "\n") yWriter.write(y + "\n") i += 1 if (i % 10 == 0) { xWriter.flush() yWriter.flush() } } xWriter.close() yWriter.close() } def main(args: Array[String]): Unit = { //todo modify these variables to match desirable files val priceDataFileName: String = "C:/Users/admin-karim/Desktop/bitstampUSD_1-min_data_2012-01-01_to_2017-10-20.csv/bitstampUSD_1-min_data_2012-01-01_to_2017-10-20.csv" val outputDataFilePath: String = "output/scala_test_x.csv" val outputLabelFilePath: String = "output/scala_test_y.csv" val spark = SparkSession .builder() .master("local[*]") .config("spark.sql.warehouse.dir", "E:/Exp/") .appName("Bitcoin Preprocessing") .getOrCreate() val data ="com.databricks.spark.csv").option("header", "true").load(priceDataFileName) println((data.count(), data.columns.size)) val dataWithDelta = data.withColumn("Delta", data("Close") - data("Open")) import org.apache.spark.sql.functions._ import spark.sqlContext.implicits._ val dataWithLabels = dataWithDelta.withColumn("label", when($"Close" - $"Open" > 0, 1).otherwise(0)) rollingWindow(dataWithLabels, 22, outputDataFilePath, outputLabelFilePath) spark.stop() } }
Example 80
Source File: ResultFileGenerator.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package Yelp.Evaluator import import Yelp.Trainer.NeuralNetwork._ import Yelp.Preprocessor.CSVImageMetadataReader._ import Yelp.Preprocessor.makeND4jDataSets.makeDataSetTE import Yelp.Preprocessor.featureAndDataAligner import Yelp.Preprocessor.imageFeatureExtractor._ import Yelp.Evaluator.ResultFileGenerator._ import Yelp.Preprocessor.makeND4jDataSets._ import Yelp.Evaluator.ModelEvaluation._ import Yelp.Trainer.CNN._ import Yelp.Trainer.CNNEpochs._ import scala.Vector object ResultFileGenerator { def writeSubmissionFile(outcsv: String, phtoObj: List[(String, Vector[Double])], thresh: Double): Unit = { // prints to a csv or other txt file def printToFile(f: => Unit) { val p = new try { op(p) } finally { p.close() } } // assigning cutoffs for each class def findIndicesAboveThresh(x: Vector[Double]): Vector[Int] = { x.zipWithIndex.filter(x => x._1 >= thresh).map(_._2) } // create vector of rows to write to csv val ret = (for (i <- 0 until phtoObj.length) yield { (phtoObj(i)._1 + "," + findIndicesAboveThresh(phtoObj(i)._2).mkString(" ")) }).toVector // actually write text file printToFile(new File(outcsv)) { p => (Vector("business_ids,labels") ++ ret).foreach(p.println) } } def SubmitObj(alignedData: featureAndDataAligner, modelPath: String, model0: String = "model0", model1: String = "model1", model2: String = "model2", model3: String = "model3", model4: String = "model4", model5: String = "model5", model6: String = "model6", model7: String = "model7", model8: String = "model8"): List[(String, Vector[Double])] = { // new code which works in REPL // creates a List for each model (class) containing a map from the bizID to the probability of belonging in that class val big = for (m <- List(model0, model1, model2, model3, model4, model5, model6, model7, model8)) yield { val ds = makeDataSetTE(alignedData) val model = loadNN(modelPath + m + ".json", modelPath + m + ".bin") val scores = scoreModel(model, ds) val bizScores = aggImgScores2Business(scores, alignedData) bizScores.toMap } // transforming the data structure above into a List for each bizID containing a Tuple (bizid, List[Double]) where the Vector[Double] is the // the vector of probabilities map (x => (x, => x2(x)).toVector)) } }
Example 81
Source File: GrayscaleConverter.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package Yelp.Preprocessor import import javax.imageio.ImageIO import java.awt.Color object GrayscaleConverter { def main(args: Array[String]): Unit = { def pixels2Gray(R: Int, G: Int, B: Int): Int = (R + G + B) / 3 def makeGray(testImage: java.awt.image.BufferedImage): java.awt.image.BufferedImage = { val w = testImage.getWidth val h = testImage.getHeight for { w1 <- (0 until w).toVector h1 <- (0 until h).toVector } yield { val col = testImage.getRGB(w1, h1) val R = (col & 0xff0000) / 65536 val G = (col & 0xff00) / 256 val B = (col & 0xff) val graycol = pixels2Gray(R, G, B) testImage.setRGB(w1, h1, new Color(graycol, graycol, graycol).getRGB) } testImage } val testImage = File("data/images/preprocessed/147square.jpg")) val grayImage = makeGray(testImage) ImageIO.write(grayImage, "jpg", new File("data/images/preprocessed/147gray.jpg")) } }
Example 82
Source File: imageFeatureExtractor.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package Yelp.Preprocessor import import javax.imageio.ImageIO import scala.util.matching.Regex import imageUtils._ object imageFeatureExtractor { def processImages(imgs: List[String], resizeImgDim: Int = 128, nPixels: Int = -1): Map[Int, Vector[Int]] = { => patt_get_jpg_name.findAllIn(x).mkString.toInt -> { val img0 = File(x)) .makeSquare .resizeImg(resizeImgDim, resizeImgDim) // (128, 128) .image2gray if(nPixels != -1) img0.slice(0, nPixels) else img0 } ).filter( x => x._2 != ()) .toMap } }
Example 83
Source File: ImageResize.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package Yelp.Preprocessor import org.imgscalr._ import import javax.imageio.ImageIO object ImageResize { def main(args: Array[String]): Unit = { def resizeImg(img: java.awt.image.BufferedImage, width: Int, height: Int) = { Scalr.resize(img, Scalr.Method.BALANCED, width, height) } val testImage = File("data/images/train/147.jpg")) val testImage32 = resizeImg(testImage, 32, 32) val testImage64 = resizeImg(testImage, 64, 64) val testImage128 = resizeImg(testImage, 128, 128) val testImage256 = resizeImg(testImage, 256, 256) ImageIO.write(testImage32, "jpg", new File("data/images/preprocessed/147resize32.jpg")) ImageIO.write(testImage64, "jpg", new File("data/images/preprocessed/147resize64.jpg")) ImageIO.write(testImage128, "jpg", new File("data/images/preprocessed/147resize128.jpg")) ImageIO.write(testImage256, "jpg", new File("data/images/preprocessed/147resize256.jpg")) } }
Example 84
Source File: SquaringImage.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package Yelp.Preprocessor import org.imgscalr._ import import javax.imageio.ImageIO object SquaringImage { def main(args: Array[String]): Unit = { def makeSquare(img: java.awt.image.BufferedImage): java.awt.image.BufferedImage = { val w = img.getWidth val h = img.getHeight val dim = List(w, h).min img match { case x if w == h => img case x if w > h => Scalr.crop(img, (w - h) / 2, 0, dim, dim) case x if w < h => Scalr.crop(img, 0, (h - w) / 2, dim, dim) } } val myimg = File("data/images/train/147.jpg")) val myimgSquare = makeSquare(myimg) ImageIO.write(myimgSquare, "jpg", new File("data/images/preprocessed/147square.jpg")) } }
Example 85
Source File: NeuralNetwork.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package Yelp.Trainer import org.deeplearning4j.nn.conf.MultiLayerConfiguration import org.deeplearning4j.nn.multilayer.MultiLayerNetwork import org.nd4j.linalg.factory.Nd4j import import import{DataInputStream, DataOutputStream, FileInputStream} import java.nio.file.{Files, Paths} object NeuralNetwork { def loadNN(NNconfig: String, NNparams: String) = { // get neural network config val confFromJson: MultiLayerConfiguration = MultiLayerConfiguration.fromJson(FileUtils.readFileToString(new File(NNconfig))) // get neural network parameters val dis: DataInputStream = new DataInputStream(new FileInputStream(NNparams)) val newParams = // creating network object val savedNetwork: MultiLayerNetwork = new MultiLayerNetwork(confFromJson) savedNetwork.init() savedNetwork.setParameters(newParams) savedNetwork } def saveNN(model: MultiLayerNetwork, NNconfig: String, NNparams: String) = { // save neural network config FileUtils.write(new File(NNconfig), model.getLayerWiseConfigurations().toJson()) // save neural network parms val dos: DataOutputStream = new DataOutputStream(Files.newOutputStream(Paths.get(NNparams))) Nd4j.write(model.params(), dos) } }
Example 86
Source File: KerberosLoginProvider.scala From rokku with Apache License 2.0 | 5 votes |
package import import import com.typesafe.scalalogging.LazyLogging import org.apache.commons.lang.StringUtils import org.apache.hadoop.conf.Configuration import import scala.util.{ Failure, Success, Try } trait KerberosLoginProvider extends LazyLogging { protected[this] def kerberosSettings: KerberosSettings loginUserFromKeytab(kerberosSettings.keytab, kerberosSettings.principal) private def loginUserFromKeytab(keytab: String, principal: String): Unit = { if (StringUtils.isNotBlank(keytab) && StringUtils.isNotBlank(principal)) { if (!new File(keytab).exists()) {"keytab file does not exist {}", keytab) } else { Try { UserGroupInformation.setConfiguration(new Configuration()) UserGroupInformation.loginUserFromKeytab(principal, keytab) } match { case Success(_) =>"kerberos credentials provided {}", UserGroupInformation.getLoginUser) case Failure(exception) => logger.error("kerberos login error {}", exception) } } } else {"kerberos credentials are not provided") } } }
Example 87
Source File: S3SdkHelpers.scala From rokku with Apache License 2.0 | 5 votes |
package import import akka.http.scaladsl.model.Uri.Authority import com.amazonaws.ClientConfiguration import com.amazonaws.auth.{AWSCredentials, AWSStaticCredentialsProvider, BasicSessionCredentials} import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration import import import{AmazonS3, AmazonS3ClientBuilder} import com.typesafe.config.ConfigFactory import scala.collection.JavaConverters._ trait S3SdkHelpers { val awsRegion = ConfigFactory.load().getString("") def getAmazonS3(authority: Authority, credentials: AWSCredentials = new BasicSessionCredentials("accesskey", "secretkey", "token") ): AmazonS3 = { val cliConf = new ClientConfiguration() cliConf.setMaxErrorRetry(1) AmazonS3ClientBuilder .standard() .withClientConfiguration(cliConf) .withCredentials(new AWSStaticCredentialsProvider(credentials)) .withPathStyleAccessEnabled(true) .withEndpointConfiguration(new EndpointConfiguration(s"http://s3.localhost:${authority.port}", awsRegion)) .build() } def getKeysInBucket(sdk: AmazonS3, bucket: String): List[String] = sdk .listObjectsV2(bucket) .getObjectSummaries .asScala.toList .map(_.getKey) def doMultiPartUpload(sdk: AmazonS3, bucket: String, file: String, key: String): UploadResult = { val upload = TransferManagerBuilder .standard() .withS3Client(sdk) .build() .upload(bucket, key, new File(file)) upload.waitForUploadResult() } }
Example 88
Source File: RokkuFixtures.scala From rokku with Apache License 2.0 | 5 votes |
package import{File, RandomAccessFile} import import import org.scalatest.Assertion import scala.concurrent.{ExecutionContext, Future} import scala.util.{Random, Try} trait RokkuFixtures extends S3SdkHelpers { def withHomeBucket(s3Client: AmazonS3, objects: Seq[String])(testCode: String => Future[Assertion])(implicit exCtx: ExecutionContext): Future[Assertion] = { val testBucket = "home" Try(s3Client.createBucket(testBucket)) objects.foreach(obj => s3Client.putObject(testBucket, obj, "")) testCode(testBucket).andThen { case _ => cleanBucket(s3Client, testBucket) } } private def cleanBucket(s3Client: AmazonS3, bucketName: String) = { import scala.collection.JavaConverters._ s3Client.listObjectsV2(bucketName) { key => s3Client.deleteObject(bucketName, key) } } }
Example 89
Source File: AppConfig.scala From odsc-east-realish-predictions with Apache License 2.0 | 5 votes |
package import import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.dataformat.yaml.YAMLFactory import com.fasterxml.jackson.module.scala.DefaultScalaModule object AppConfig { private val mapper = new ObjectMapper(new YAMLFactory) mapper.registerModule(DefaultScalaModule) def parse(configPath: String): AppConfig = { mapper.readValue(new File(configPath), classOf[AppConfig]) } } @SerialVersionUID(100L) case class AppConfig( sparkAppConfig: SparkAppConfig, streamingQueryConfig: StreamingQueryConfig ) extends Serializable @SerialVersionUID(100L) case class SparkAppConfig( appName: String, core: Map[String, String] ) extends Serializable trait KafkaConsumerConfig { val topic: String val subscriptionType: String val conf: Map[String, String] } @SerialVersionUID(100L) case class ConsumerConfig( topic: String, subscriptionType: String, conf: Map[String, String] ) extends KafkaConsumerConfig with Serializable @SerialVersionUID(100L) case class StreamingQueryConfig( streamName: String, triggerInterval: String, triggerEnabled: Boolean, windowInterval: String, watermarkInterval: String ) extends Serializable
Example 90
Source File: SchemaReader.scala From darwin with Apache License 2.0 | 5 votes |
package import{File, InputStream} import org.apache.avro.Schema object SchemaReader { def readFromResources(p: String): Schema = { read(getClass.getClassLoader.getResourceAsStream(p)) } def read(f: File): Schema = { val parser = new Schema.Parser() parser.parse(f) } def read(s: String): Schema = { val parser = new Schema.Parser() parser.parse(s) } def read(is: InputStream): Schema = { val parser = new Schema.Parser() parser.parse(is) } }
Example 91
Source File: SbtActorApi.scala From sbt-actor-api with MIT License | 5 votes |
package import import import sbt._, Keys._ object SbtActorApi extends AutoPlugin { val ActorApi = config("actorapi").hide val path = SettingKey[File]("actor-schema-path", "The path that contains actor.json file") val outputPath = SettingKey[File]("actor-schema-output-path", "The paths where to save the generated *.scala files.") lazy val actorapi = TaskKey[Seq[File]]("actorapi", "Compile json schema to scala code") lazy val actorapiClean = TaskKey[Seq[File]]("actorapi-clean", "Clean generated code") lazy val actorapiMain = SettingKey[String]("actorapi-main", "ActorApi main class.") lazy val settings: Seq[Setting[_]] = Seq( sourceDirectory in ActorApi <<= (sourceDirectory in Compile), path <<= sourceDirectory in ActorApi, managedClasspath in ActorApi <<= (classpathTypes, update) map { (ct, report) ⇒ Classpaths.managedJars(ActorApi, ct, report) }, outputPath <<= sourceManaged in ActorApi, actorapi <<= ( sourceDirectory in ActorApi, sourceManaged in ActorApi, managedClasspath in ActorApi, javaHome, streams ).map(generate), actorapiClean <<= ( sourceManaged in ActorApi, streams ).map(clean), sourceGenerators in Compile <+= actorapi ) private def compiledFileDir(targetDir: File): File = targetDir / "main" / "scala" private def compiledFile(targetDir: File, name: String): File = compiledFileDir(targetDir) / s"${name}.scala" private def clean(targetDir: File, streams: TaskStreams): Seq[File] = { val log = streams.log"Cleaning actor schema") IO.delete(targetDir) Seq(targetDir) } private def generate(srcDir: File, targetDir: File, classpath: Classpath, javaHome: Option[File], streams: TaskStreams): Seq[File] = { val log = streams.log"Generating actor schema for $srcDir%s") val input = srcDir / "actor-api" if (!input.exists()) {"$input%s does not exists") Nil } else { val output = compiledFileDir(targetDir) val cached = FileFunction.cached(streams.cacheDirectory / "actor-api", FilesInfo.lastModified, FilesInfo.exists) { (in: Set[File]) ⇒ { if (!output.exists()) IO.createDirectory(output) val src = input / "actor.json" if (src.exists()) { val sources = (new Json2Tree( sources foreach { case (name, source) ⇒ val targetFile = compiledFile(targetDir, name)"Generated ActorApi $targetFile%s") IO.write(targetFile, source) } } else {"no actor.json file in $input%s") } (output ** ("*.scala")).get.toSet } } cached((input ** "actor.json").get.toSet).toSeq } } }
Example 92
Source File: VLFeatSuite.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.utils.external import import breeze.linalg._ import breeze.numerics.abs import org.scalatest.FunSuite import keystoneml.pipelines.Logging import keystoneml.utils.{ImageUtils, MatrixUtils, TestUtils} class VLFeatSuite extends FunSuite with Logging { test("Load an Image and compute SIFT Features") { val testImage = TestUtils.loadTestImage("images/000012.jpg") val singleImage = ImageUtils.mapPixels(testImage, _/255.0) val grayImage = ImageUtils.toGrayScale(singleImage) val extLib = new VLFeat val stepSize = 3 val binSize = 4 val scales = 4 val descriptorLength = 128 val scaleStep = 0 val rawDescDataShort = extLib.getSIFTs(grayImage.metadata.xDim, grayImage.metadata.yDim, stepSize, binSize, scales, scaleStep, grayImage.getSingleChannelAsFloatArray()) assert(rawDescDataShort.length % descriptorLength == 0, "Resulting SIFTs must be 128-dimensional.") val numCols = rawDescDataShort.length/descriptorLength val result = new DenseMatrix(descriptorLength, numCols, // Compare with the output of running this image through vl_phow with matlab from the enceval package: // featpipem_addpaths; // im = im2single(imread('images/000012.jpg')); // featextr = featpipem.features.PhowExtractor(); // featextr.step = 3; // [frames feats] = featextr.compute(im); // csvwrite('images/feats128.csv', feats) val testFeatures = csvread(new File(TestUtils.getTestResourceFileName("images/feats128.csv"))) val diff = result - testFeatures // Because of subtle differences in the way image smoothing works in the VLFeat C library and the VLFeat matlab // library (vl_imsmooth_f vs. _vl_imsmooth_f), these two matrices will not be exactly the same. // Instead, we check that 99.5% of the matrix entries are off by at most 1. val absdiff = abs(diff).toDenseVector assert(absdiff.findAll(_ > 1.0).length.toDouble < 0.005*absdiff.length, "Fewer than 0.05% of entries may be different by more than 1.") } }
Example 93
Source File: EncEvalSuite.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.utils.external import import breeze.linalg._ import breeze.stats.distributions.Gaussian import keystoneml.nodes.learning.GaussianMixtureModel import keystoneml.nodes.learning.external.GaussianMixtureModelEstimator import org.scalatest.FunSuite import keystoneml.pipelines.Logging import keystoneml.utils.{Stats, TestUtils} class EncEvalSuite extends FunSuite with Logging { test("Load SIFT Descriptors and compute Fisher Vector Features") { val siftDescriptor = csvread(new File(TestUtils.getTestResourceFileName("images/feats.csv"))) val gmmMeans = TestUtils.getTestResourceFileName("images/voc_codebook/means.csv") val gmmVars = TestUtils.getTestResourceFileName("images/voc_codebook/variances.csv") val gmmWeights = TestUtils.getTestResourceFileName("images/voc_codebook/priors") val gmm = GaussianMixtureModel.load(gmmMeans, gmmVars, gmmWeights) val nCenters = gmm.means.cols val nDim = gmm.means.rows val extLib = new EncEval val fisherVector = extLib.calcAndGetFVs(, nCenters, nDim,,,"Fisher Vector is ${fisherVector.sum}") assert(Stats.aboutEq(fisherVector.sum, 40.109097, 1e-4), "SUM of Fisher Vectors must match expected sum.") } test("Compute a GMM from scala") { val nsamps = 10000 // Generate two gaussians. val x = Gaussian(-1.0, 0.5).samples.take(nsamps).toArray val y = Gaussian(5.0, 1.0).samples.take(nsamps).toArray val z = shuffle(x ++ y).map(x => DenseVector(x)) // Compute a 1-d GMM. val extLib = new EncEval val gmm = new GaussianMixtureModelEstimator(2).fit(z) logInfo(s"GMM means: ${gmm.means.toArray.mkString(",")}") logInfo(s"GMM vars: ${gmm.variances.toArray.mkString(",")}") logInfo(s"GMM weights: ${gmm.weights.toArray.mkString(",")}") // The results should be close to the distribution we set up. assert(Stats.aboutEq(min(gmm.means), -1.0, 1e-1), "Smallest mean should be close to -1.0") assert(Stats.aboutEq(max(gmm.means), 5.0, 1e-1), "Largest mean should be close to 1.0") assert(Stats.aboutEq(math.sqrt(min(gmm.variances)), 0.5, 1e-1), "Smallest SD should be close to 0.25") assert(Stats.aboutEq(math.sqrt(max(gmm.variances)), 1.0, 1e-1), "Largest SD should be close to 5.0") } }
Example 94
Source File: DefaultBodyWritables.scala From play-ws with Apache License 2.0 | 5 votes |
package import import java.nio.ByteBuffer import java.util.function.Supplier import import import import akka.util.ByteString import scala.compat.java8.FunctionConverters.asScalaFromSupplier implicit val writeableOf_urlEncodedForm: BodyWritable[Map[String, Seq[String]]] = { import BodyWritable( formData => InMemoryBody( ByteString.fromString( formData.flatMap(item => => s"${item._1}=${URLEncoder.encode(c, "UTF-8")}")).mkString("&") ) ), "application/x-www-form-urlencoded" ) } implicit val writeableOf_urlEncodedSimpleForm: BodyWritable[Map[String, String]] = {[Map[String, String]]( => kv._1 -> Seq(kv._2))) } } object DefaultBodyWritables extends DefaultBodyWritables
Example 95
Source File: TestZooKeeper.scala From mango with Apache License 2.0 | 5 votes |
package import{File, IOException} import{ServerSocket, Socket} import java.util.concurrent.TimeUnit import import{LogLevelOverrider, Logging} import import org.apache.zookeeper.server.persistence.FileTxnSnapLog import org.apache.zookeeper.server.{ServerCnxnFactory, ServerConfig, ZooKeeperServer} import org.scalatest.{BeforeAndAfterAll, Suite} trait TestZooKeeper extends BeforeAndAfterAll with Logging { this: Suite => val zkServerPort = 2181 val zkServerExecutor = NamedExecutors.single("zookeeper-server") var zk: ZooKeeperConnection = _ override protected def beforeAll(): Unit = {"Launching a standalone ZooKeeper server for testing...") try { val socket = new ServerSocket(zkServerPort) socket.close() } catch { case e: IOException => throw new RuntimeException(s"TCP port $zkServerPort is required for tests but not available") } zkServerExecutor.submit { LogLevelOverrider.error("org.apache.zookeeper") val datadir = Files.createTempDir().getAbsolutePath val config = new ServerConfig config.parse(Array(zkServerPort.toString, datadir)) val zkServer = new ZooKeeperServer zkServer.setTxnLogFactory(new FileTxnSnapLog(new File(datadir), new File(datadir))) zkServer.setTickTime(6000) zkServer.setMinSessionTimeout(6000) zkServer.setMaxSessionTimeout(6000) val cnxnFactory = ServerCnxnFactory.createFactory try { cnxnFactory.configure(config.getClientPortAddress, 60) cnxnFactory.startup(zkServer) cnxnFactory.join() } catch { case _: InterruptedException =>"ZooKeeper server interrupted; shutting down...") cnxnFactory.shutdown() cnxnFactory.join() if (zkServer.isRunning) { zkServer.shutdown() }"ZooKeeper server stopped") } } var connected = false while (!connected) {"Waiting for ZooKeeper server to launch...") try { val socket = new Socket("localhost", zkServerPort)"ZooKeeper server is available") socket.close() zk = ZooKeeperConnection(s"localhost:$zkServerPort") connected = true } catch { case _: IOException => Thread.sleep(1000) // retry } } super.beforeAll() } override protected def afterAll(): Unit = { try super.afterAll() finally { zk.close()"Interrupting ZooKeeper server...") zkServerExecutor.shutdownNow() while (!zkServerExecutor.awaitTermination(1, TimeUnit.SECONDS)) {"awaiting ZooKeeper server termination...") }"ZooKeeper server terminated") } } }
Example 96
Source File: FileSystems.scala From mango with Apache License 2.0 | 5 votes |
package import import java.nio.file._ import scala.collection.JavaConversions._ import scala.collection.mutable def entries(dir: Path, recursive: Boolean = true): Stream[Path] = { val maxDepth = if (recursive) Int.MaxValue else 1 val stack = mutable.Stack[(Path, Int)]((dir, maxDepth)) new Iterator[Iterator[Path]] { override def hasNext: Boolean = stack.nonEmpty override def next(): Iterator[Path] = { val (dir, depth) = stack.pop() Files.newDirectoryStream(dir).iterator().flatMap { case entry if Files.isDirectory(entry) => if (depth > 1) stack.push((entry, depth - 1)) Nil case entry => Some(entry) } } }.toStream.flatten } def entries(dir: File): Stream[File] = entries(dir.toPath, recursive = true).map(_.toFile) def entries(dir: File, recursive: Boolean): Stream[File] = entries(dir.toPath, recursive).map(_.toFile) def entries(dir: String): Stream[Path] = entries(Paths.get(dir), recursive = true) def entries(dir: String, recursive: Boolean): Stream[Path] = entries(Paths.get(dir), recursive) }
Example 97
Source File: QueryPartitionSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import import java.sql.Timestamp import import org.apache.hadoop.fs.FileSystem import org.apache.spark.internal.config._ import org.apache.spark.sql._ import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.util.Utils class QueryPartitionSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { import spark.implicits._ private def queryWhenPathNotExist(): Unit = { withTempView("testData") { withTable("table_with_partition", "createAndInsertTest") { withTempDir { tmpDir => val testData = sparkContext.parallelize( (1 to 10).map(i => TestData(i, i.toString))).toDF() testData.createOrReplaceTempView("testData") // create the table for test sql(s"CREATE TABLE table_with_partition(key int,value string) " + s"PARTITIONED by (ds string) location '${tmpDir.toURI}' ") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='1') " + "SELECT key,value FROM testData") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='2') " + "SELECT key,value FROM testData") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='3') " + "SELECT key,value FROM testData") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='4') " + "SELECT key,value FROM testData") // test for the exist path checkAnswer(sql("select key,value from table_with_partition"), testData.union(testData).union(testData).union(testData)) // delete the path of one partition tmpDir.listFiles .find { f => f.isDirectory && f.getName().startsWith("ds=") } .foreach { f => Utils.deleteRecursively(f) } // test for after delete the path checkAnswer(sql("select key,value from table_with_partition"), testData.union(testData).union(testData)) } } } } test("SPARK-5068: query data when path doesn't exist") { withSQLConf(SQLConf.HIVE_VERIFY_PARTITION_PATH.key -> "true") { queryWhenPathNotExist() } } test("Replace spark.sql.hive.verifyPartitionPath by spark.files.ignoreMissingFiles") { withSQLConf(SQLConf.HIVE_VERIFY_PARTITION_PATH.key -> "false") { sparkContext.conf.set(IGNORE_MISSING_FILES.key, "true") queryWhenPathNotExist() } } test("SPARK-21739: Cast expression should initialize timezoneId") { withTable("table_with_timestamp_partition") { sql("CREATE TABLE table_with_timestamp_partition(value int) PARTITIONED BY (ts TIMESTAMP)") sql("INSERT OVERWRITE TABLE table_with_timestamp_partition " + "PARTITION (ts = '2010-01-01 00:00:00.000') VALUES (1)") // test for Cast expression in TableReader checkAnswer(sql("SELECT * FROM table_with_timestamp_partition"), Seq(Row(1, Timestamp.valueOf("2010-01-01 00:00:00.000")))) // test for Cast expression in HiveTableScanExec checkAnswer(sql("SELECT value FROM table_with_timestamp_partition " + "WHERE ts = '2010-01-01 00:00:00.000'"), Row(1)) } } }
Example 98
Source File: HiveQueryFileTest.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import import org.apache.spark.sql.catalyst.util._ def whiteList: Seq[String] = ".*" :: Nil def testCases: Seq[(String, File)] val runAll: Boolean = !(System.getProperty("spark.hive.alltests") == null) || runOnlyDirectories.nonEmpty || skipDirectories.nonEmpty val whiteListProperty: String = "spark.hive.whitelist" // Allow the whiteList to be overridden by a system property val realWhiteList: Seq[String] = Option(System.getProperty(whiteListProperty)).map(_.split(",").toSeq).getOrElse(whiteList) // Go through all the test cases and add them to scala test. testCases.sorted.foreach { case (testCaseName, testCaseFile) => if (||_)) { logDebug(s"Blacklisted test skipped $testCaseName") } else if (||_) || runAll) { // Build a test case and submit it to scala test framework... val queriesString = fileToString(testCaseFile) createQueryTest(testCaseName, queriesString, reset = true, tryWithoutResettingFirst = true) } else { // Only output warnings for the built in whitelist as this clutters the output when the user // trying to execute a single test from the commandline. if (System.getProperty(whiteListProperty) == null && !runAll) { ignore(testCaseName) {} } } } }
Example 99
Source File: HiveClientBuilder.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import import org.apache.hadoop.conf.Configuration import org.apache.hadoop.util.VersionInfo import org.apache.spark.SparkConf import org.apache.spark.util.Utils private[client] object HiveClientBuilder { // In order to speed up test execution during development or in Jenkins, you can specify the path // of an existing Ivy cache: private val ivyPath: Option[String] = { sys.env.get("SPARK_VERSIONS_SUITE_IVY_PATH").orElse( Some(new File(sys.props(""), "hive-ivy-cache").getAbsolutePath)) } private def buildConf(extraConf: Map[String, String]) = { lazy val warehousePath = Utils.createTempDir() lazy val metastorePath = Utils.createTempDir() metastorePath.delete() extraConf ++ Map( "javax.jdo.option.ConnectionURL" -> s"jdbc:derby:;databaseName=$metastorePath;create=true", "hive.metastore.warehouse.dir" -> warehousePath.toString) } // for testing only def buildClient( version: String, hadoopConf: Configuration, extraConf: Map[String, String] = Map.empty, sharesHadoopClasses: Boolean = true): HiveClient = { IsolatedClientLoader.forVersion( hiveMetastoreVersion = version, hadoopVersion = VersionInfo.getVersion, sparkConf = new SparkConf(), hadoopConf = hadoopConf, config = buildConf(extraConf), ivyPath = ivyPath, sharesHadoopClasses = sharesHadoopClasses).createClient() } }
Example 100
Source File: EvalPythonExec.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.python import import scala.collection.mutable.ArrayBuffer import org.apache.spark.{SparkEnv, TaskContext} import org.apache.spark.api.python.ChainedPythonFunctions import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.types.{DataType, StructField, StructType} import org.apache.spark.util.Utils abstract class EvalPythonExec(udfs: Seq[PythonUDF], output: Seq[Attribute], child: SparkPlan) extends SparkPlan { def children: Seq[SparkPlan] = child :: Nil override def producedAttributes: AttributeSet = AttributeSet(output.drop(child.output.length)) private def collectFunctions(udf: PythonUDF): (ChainedPythonFunctions, Seq[Expression]) = { udf.children match { case Seq(u: PythonUDF) => val (chained, children) = collectFunctions(u) (ChainedPythonFunctions(chained.funcs ++ Seq(udf.func)), children) case children => // There should not be any other UDFs, or the children can't be evaluated directly. assert(children.forall(_.find(_.isInstanceOf[PythonUDF]).isEmpty)) (ChainedPythonFunctions(Seq(udf.func)), udf.children) } } protected def evaluate( funcs: Seq[ChainedPythonFunctions], argOffsets: Array[Array[Int]], iter: Iterator[InternalRow], schema: StructType, context: TaskContext): Iterator[InternalRow] protected override def doExecute(): RDD[InternalRow] = { val inputRDD = child.execute().map(_.copy()) inputRDD.mapPartitions { iter => val context = TaskContext.get() // The queue used to buffer input rows so we can drain it to // combine input with output from Python. val queue = HybridRowQueue(context.taskMemoryManager(), new File(Utils.getLocalDir(SparkEnv.get.conf)), child.output.length) context.addTaskCompletionListener[Unit] { ctx => queue.close() } val (pyFuncs, inputs) = // flatten all the arguments val allInputs = new ArrayBuffer[Expression] val dataTypes = new ArrayBuffer[DataType] val argOffsets = { input => { e => if (allInputs.exists(_.semanticEquals(e))) { allInputs.indexWhere(_.semanticEquals(e)) } else { allInputs += e dataTypes += e.dataType allInputs.length - 1 } }.toArray }.toArray val projection = newMutableProjection(allInputs, child.output) val schema = StructType( { case (dt, i) => StructField(s"_$i", dt) }) // Add rows to queue to join later with the result. val projectedRowIter = { inputRow => queue.add(inputRow.asInstanceOf[UnsafeRow]) projection(inputRow) } val outputRowIterator = evaluate( pyFuncs, argOffsets, projectedRowIter, schema, context) val joined = new JoinedRow val resultProj = UnsafeProjection.create(output, output) { outputRow => resultProj(joined(queue.remove(), outputRow)) } } } }
Source File: resources.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import import import org.apache.hadoop.fs.Path import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} case class ListJarsCommand(jars: Seq[String] = Seq.empty[String]) extends RunnableCommand { override val output: Seq[Attribute] = { AttributeReference("Results", StringType, nullable = false)() :: Nil } override def run(sparkSession: SparkSession): Seq[Row] = { val jarList = sparkSession.sparkContext.listJars() if (jars.nonEmpty) { for { jarName <- => new Path(f).getName) jarPath <- jarList if jarPath.contains(jarName) } yield Row(jarPath) } else { } } }
Source File: OrcTest.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.orc import import scala.reflect.ClassTag import scala.reflect.runtime.universe.TypeTag import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql._ import org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION import org.apache.spark.sql.test.SQLTestUtils protected def withOrcTable[T <: Product: ClassTag: TypeTag] (data: Seq[T], tableName: String) (f: => Unit): Unit = { withOrcDataFrame(data) { df => df.createOrReplaceTempView(tableName) withTempView(tableName)(f) } } protected def makeOrcFile[T <: Product: ClassTag: TypeTag]( data: Seq[T], path: File): Unit = { data.toDF().write.mode(SaveMode.Overwrite).orc(path.getCanonicalPath) } protected def makeOrcFile[T <: Product: ClassTag: TypeTag]( df: DataFrame, path: File): Unit = { df.write.mode(SaveMode.Overwrite).orc(path.getCanonicalPath) } protected def checkPredicatePushDown(df: DataFrame, numRows: Int, predicate: String): Unit = { withTempPath { file => // It needs to repartition data so that we can have several ORC files // in order to skip stripes in ORC. df.repartition(numRows).write.orc(file.getCanonicalPath) val actual = stripSparkFilter( assert(actual < numRows) } } }
Source File: HadoopFsRelationSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import{File, FilenameFilter} import org.apache.spark.sql.QueryTest import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec} import org.apache.spark.sql.test.SharedSQLContext class HadoopFsRelationSuite extends QueryTest with SharedSQLContext { test("sizeInBytes should be the total size of all files") { withTempDir{ dir => dir.delete() spark.range(1000).write.parquet(dir.toString) // ignore hidden files val allFiles = dir.listFiles(new FilenameFilter { override def accept(dir: File, name: String): Boolean = { !name.startsWith(".") && !name.startsWith("_") } }) val totalSize = val df = assert(df.queryExecution.logical.stats.sizeInBytes === BigInt(totalSize)) } } test("SPARK-22790: spark.sql.sources.compressionFactor takes effect") { import testImplicits._ Seq(1.0, 0.5).foreach { compressionFactor => withSQLConf("spark.sql.sources.fileCompressionFactor" -> compressionFactor.toString, "spark.sql.autoBroadcastJoinThreshold" -> "400") { withTempPath { workDir => // the file size is 740 bytes val workDirPath = workDir.getAbsolutePath val data1 = Seq(100, 200, 300, 400).toDF("count") data1.write.parquet(workDirPath + "/data1") val df1FromFile = + "/data1") val data2 = Seq(100, 200, 300, 400).toDF("count") data2.write.parquet(workDirPath + "/data2") val df2FromFile = + "/data2") val joinedDF = df1FromFile.join(df2FromFile, Seq("count")) if (compressionFactor == 0.5) { val bJoinExec = joinedDF.queryExecution.executedPlan.collect { case bJoin: BroadcastHashJoinExec => bJoin } assert(bJoinExec.nonEmpty) val smJoinExec = joinedDF.queryExecution.executedPlan.collect { case smJoin: SortMergeJoinExec => smJoin } assert(smJoinExec.isEmpty) } else { // compressionFactor is 1.0 val bJoinExec = joinedDF.queryExecution.executedPlan.collect { case bJoin: BroadcastHashJoinExec => bJoin } assert(bJoinExec.isEmpty) val smJoinExec = joinedDF.queryExecution.executedPlan.collect { case smJoin: SortMergeJoinExec => smJoin } assert(smJoinExec.nonEmpty) } } } } } }
Source File: StreamMetadataSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import import java.util.UUID import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.spark.sql.streaming.StreamTest class StreamMetadataSuite extends StreamTest { test("writing and reading") { withTempDir { dir => val id = UUID.randomUUID.toString val metadata = StreamMetadata(id) val file = new Path(new File(dir, "test").toString) StreamMetadata.write(metadata, file, hadoopConf) val readMetadata =, hadoopConf) assert(readMetadata.nonEmpty) assert( === id) } } test("read Spark 2.1.0 format") { // query-metadata-logs-version-2.1.0.txt has the execution metadata generated by Spark 2.1.0 assert( readForResource("query-metadata-logs-version-2.1.0.txt") === StreamMetadata("d366a8bf-db79-42ca-b5a4-d9ca0a11d63e")) } private def readForResource(fileName: String): StreamMetadata = { val input = getClass.getResource(s"/structured-streaming/$fileName") Path(input.toString), hadoopConf).get } private val hadoopConf = new Configuration() }
Source File: BarChartPainter.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.painter import import java.util.Scanner import org.jfree.chart.{ChartFactory, ChartUtils} import org.jfree.chart.plot.PlotOrientation import import org.apache.spark.util.Utils class BarChartPainter(dataPath: String, picturePath: String) extends Painter(dataPath, picturePath) { def createDataset(): DefaultCategoryDataset = { fw.flush() fw.close() val dataset = new DefaultCategoryDataset val scaner = new Scanner(new File(dataPath)) while (scaner.hasNext()) { val cols =",") dataset.addValue(Utils.byteStringAsMb(cols(1) + "b"), "peak", cols(0)) dataset.addValue(Utils.byteStringAsMb(cols(2) + "b"), "majority", cols(0)) } dataset } def paint( width: Int, height: Int, chartTitle: String, categoryAxisLabel: String, valueAxisLabel: String, yLB: Double, yUB: Double): Unit = { val barChart = ChartFactory.createBarChart( chartTitle, categoryAxisLabel, valueAxisLabel, createDataset, PlotOrientation.VERTICAL, true, false, false) barChart.getCategoryPlot.getRangeAxis.setRange(yLB, yUB) ChartUtils.saveChartAsJPEG(new File(picturePath), barChart, width, height) } override def paint( width: Int, height: Int, chartTitle: String, categoryAxisLabel: String, valueAxisLabel: String): Unit = {} }
Source File: Painter.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.painter import java.awt.Font import{File, FileWriter} import org.jfree.chart.{ChartFactory, StandardChartTheme} import abstract class Painter(dataPath: String, picturePath: String) { initialize() var fw: FileWriter = _ def initialize(): Unit = { val dataFile = new File(dataPath) if (dataFile.exists()) { dataFile.delete() } fw = new FileWriter(dataPath, true) val standardChartTheme = new StandardChartTheme("CN") standardChartTheme.setExtraLargeFont(new Font("Monospaced", Font.BOLD, 20)) standardChartTheme.setRegularFont(new Font("Monospaced", Font.PLAIN, 15)) standardChartTheme.setLargeFont(new Font("Monospaced", Font.PLAIN, 15)) ChartFactory.setChartTheme(standardChartTheme) } def addPoint(xAxis: Any, yAxis: Any): Unit = { fw.write(s"${xAxis},${yAxis}\n") } def addPoint(xAxis: Any, yAxis: Any, zAxis: Any): Unit = { fw.write(s"${xAxis},${yAxis},${zAxis}\n") } def createDataset(): Dataset def paint( width: Int, height: Int, chartTitle: String, categoryAxisLabel: String, valueAxisLabel: String): Unit }
Source File: TimeSeriesChartPainter.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.painter import import java.util.Scanner import org.jfree.chart.{ChartFactory, ChartUtils} import{FixedMillisecond, TimeSeries, TimeSeriesCollection} import class TimeSeriesChartPainter(dataPath: String, picturePath: String) extends Painter(dataPath, picturePath) { def createDataset(): XYDataset = { fw.flush() fw.close() val dataset = new TimeSeriesCollection val timeSeries = new TimeSeries("default") val scaner = new Scanner(new File(dataPath)) while (scaner.hasNext()) { val cols =",") timeSeries.addOrUpdate(new FixedMillisecond(cols(1).toLong), cols(0).toLong) } dataset.addSeries(timeSeries) dataset } def paint( width: Int, height: Int, chartTitle: String, categoryAxisLabel: String, valueAxisLabel: String): Unit = { val lineChart = ChartFactory.createTimeSeriesChart( chartTitle, categoryAxisLabel, valueAxisLabel, createDataset, false, false, false) ChartUtils.saveChartAsJPEG(new File(picturePath), lineChart, width, height) } }
Source File: LineChartPainter.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.painter import import java.util.Scanner import org.jfree.chart.{ChartFactory, ChartUtils} import org.jfree.chart.plot.PlotOrientation import class LineChartPainter(dataPath: String, picturePath: String) extends Painter(dataPath, picturePath) { def createDataset(): DefaultCategoryDataset = { fw.flush() fw.close() val dataset = new DefaultCategoryDataset val scaner = new Scanner(new File(dataPath)) while (scaner.hasNext()) { val cols =",") dataset.addValue(cols(0).toLong, "default", cols(1)) } dataset } def paint( width: Int, height: Int, chartTitle: String, categoryAxisLabel: String, valueAxisLabel: String): Unit = { val lineChart = ChartFactory.createLineChart( chartTitle, categoryAxisLabel, valueAxisLabel, createDataset, PlotOrientation.VERTICAL, false, false, false) ChartUtils.saveChartAsJPEG(new File(picturePath), lineChart, width, height) } }
Source File: ExecutorNumMonitor.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.monitor.executor import import java.util.Date import scala.xml._ import org.apache.spark.alarm.{AlertMessage, EmailAlarm, HtmlMessage} import org.apache.spark.monitor.{Monitor, MonitorItem} import org.apache.spark.monitor.MonitorItem.MonitorItem import org.apache.spark.painter.TimeSeriesChartPainter import org.apache.spark.scheduler._ import org.apache.spark.status.ExecutorSummaryWrapper class ExecutorNumMonitor extends ExecutorMonitor { override val item: MonitorItem = MonitorItem.EXECUTOR_NUM_NOTIFIER lazy val dataPath = s"/tmp/${item}-${conf.get("")}.csv" lazy val picturePath = s"/tmp/${item}-${conf.get("")}.jpg" lazy val eventMinInterval = conf.getTimeAsMs(s"${Monitor.PREFIX}.${item.toString.toLowerCase}.granularity", "60s") var lastPointTime: Long = new Date().getTime var recentEventTime: Long = new Date().getTime lazy private val painter = new TimeSeriesChartPainter(dataPath, picturePath) def executorNum(): Long = { kvStore.count(classOf[ExecutorSummaryWrapper], "active", true) } def addPoint(executorNum: Long, time: Long): Unit = { painter.addPoint(executorNum, recentEventTime) } // scalastyle:off override def watchOut(event: SparkListenerEvent): Option[AlertMessage] = { event match { case env: SparkListenerExecutorAdded => // try to coarse num change in 60s into one point, so that we can keep graph clean and readable if (env.time - lastPointTime > eventMinInterval) { addPoint(executorNum, recentEventTime) addPoint(executorNum, env.time) lastPointTime = env.time } recentEventTime = env.time Option.empty case env: SparkListenerExecutorRemoved => if (env.time - lastPointTime > eventMinInterval) { addPoint(executorNum, recentEventTime) addPoint(executorNum, env.time) lastPointTime = env.time } recentEventTime = env.time Option.empty case e: SparkListenerApplicationEnd => addPoint(executorNum, recentEventTime) addPoint(executorNum, new Date().getTime) painter.paint(600, 400, "executor num curve", "datetime", "executor num") if (EmailAlarm.get().isDefined) { val pic = EmailAlarm.get().get.embed(new File(picturePath)) val a = <h2>动态调度情况:</h2> <img src={"cid:"+pic}></img> <br/> Option(new HtmlMessage(title = item, content = a.mkString)) } else { Option.empty } } } // scalastyle:on }
Source File: GlobalSapSQLContext.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import import import{GlobalSparkContext, WithSQLContext} import org.apache.spark.SparkContext import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{BoundReference, Cast} import org.apache.spark.unsafe.types._ import org.apache.spark.sql.types._ import org.scalatest.Suite import trait GlobalSapSQLContext extends GlobalSparkContext with WithSQLContext { self: Suite => override implicit def sqlContext: SQLContext = GlobalSapSQLContext._sqlc override protected def setUpSQLContext(): Unit = GlobalSapSQLContext.init(sc) override protected def tearDownSQLContext(): Unit = GlobalSapSQLContext.reset() def getDataFrameFromSourceFile(sparkSchema: StructType, path: File): DataFrame = { val conversions ={ case (field, index) => Cast(BoundReference(index, StringType, nullable = true), field.dataType) }) val data = Source.fromFile(path) .getLines() .map({ line => val stringRow = InternalRow.fromSeq(line.split(",", -1).map(UTF8String.fromString)) Row.fromSeq({ c => c.eval(stringRow) })) }) val rdd = sc.parallelize(data.toSeq, numberOfSparkWorkers) sqlContext.createDataFrame(rdd, sparkSchema) } } object GlobalSapSQLContext { private var _sqlc: SQLContext = _ private def init(sc: SparkContext): Unit = if (_sqlc == null) { _sqlc = TestUtils.newSQLContext(sc) } private def reset(): Unit = { if (_sqlc != null) { _sqlc.catalog.unregisterAllTables() } } }
Source File: DefaultFileWatchService.scala From play-file-watch with Apache License 2.0 | 5 votes |
package import import java.nio.file.FileSystems import io.methvin.watcher.DirectoryChangeEvent import io.methvin.watcher.DirectoryChangeListener import io.methvin.watcher.DirectoryWatcher import io.methvin.watchservice.MacOSXListeningWatchService import scala.collection.JavaConverters._ import scala.util.control.NonFatal class DefaultFileWatchService(logger: LoggerProxy, isMac: Boolean) extends FileWatchService { def this(logger: LoggerProxy) = this(logger, false) def watch(filesToWatch: Seq[File], onChange: () => Unit) = { val dirsToWatch = filesToWatch.filter { file => if (file.isDirectory) { true } else if (file.isFile) { logger.warn("An attempt has been made to watch the file: " + file.getCanonicalPath) logger.warn("DefaultFileWatchService only supports watching directories. The file will not be watched.") false } else false } val watchService = if (isMac) new MacOSXListeningWatchService() else FileSystems.getDefault.newWatchService() val directoryWatcher = DirectoryWatcher .builder() .paths( .listener(new DirectoryChangeListener { override def onEvent(event: DirectoryChangeEvent): Unit = onChange() }) .watchService(watchService) .build() val thread = new Thread( new Runnable { override def run(): Unit = { try { } catch { case NonFatal(_) => // Do nothing, this means the watch service has been closed, or we've been interrupted. } } }, "play-watch-service" ) thread.setDaemon(true) thread.start() new FileWatcher { override def stop(): Unit = directoryWatcher.close() } } }
Source File: PollingFileWatchService.scala From play-file-watch with Apache License 2.0 | 5 votes |
package import import better.files.{ File => ScalaFile, _ } import annotation.tailrec object SourceModificationWatch { type PathFinder = () => Iterator[ScalaFile] private def listFiles(sourcesFinder: PathFinder): Set[ScalaFile] = sourcesFinder().toSet private def findLastModifiedTime(files: Set[ScalaFile]): Long = { if (files.nonEmpty) files.maxBy(_.lastModifiedTime).lastModifiedTime.toEpochMilli else 0L } @tailrec def watch(sourcesFinder: PathFinder, pollDelayMillis: Int, state: WatchState)( terminationCondition: => Boolean ): (Boolean, WatchState) = { import state._ val filesToWatch = listFiles(sourcesFinder) val sourceFilesPath: Set[String] = val lastModifiedTime = findLastModifiedTime(filesToWatch) val sourcesModified = lastModifiedTime > lastCallbackCallTime || previousFiles != sourceFilesPath val (triggered, newCallbackCallTime) = if (sourcesModified) (false, System.currentTimeMillis) else (awaitingQuietPeriod, lastCallbackCallTime) val newState = new WatchState(newCallbackCallTime, sourceFilesPath, sourcesModified, if (triggered) count + 1 else count) if (triggered) (true, newState) else { Thread.sleep(pollDelayMillis) if (terminationCondition) (false, newState) else watch(sourcesFinder, pollDelayMillis, newState)(terminationCondition) } } } final class WatchState( val lastCallbackCallTime: Long, val previousFiles: Set[String], val awaitingQuietPeriod: Boolean, val count: Int ) { def previousFileCount: Int = previousFiles.size } object WatchState { def empty = new WatchState(0L, Set.empty[String], false, 0) }
Source File: ExampleMahaService.scala From maha with Apache License 2.0 | 5 votes |
// Copyright 2017, Yahoo Holdings Inc. // Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms. package import import java.util.UUID import import{JdbcConnection, List, Seq} import{DefaultMahaService, MahaService, MahaServiceConfig} import com.zaxxer.hikari.{HikariConfig, HikariDataSource} import grizzled.slf4j.Logging import import org.joda.time.DateTime import org.joda.time.format.DateTimeFormat object ExampleMahaService extends Logging { val REGISTRY_NAME = "academic"; private var dataSource: Option[HikariDataSource] = None private var jdbcConnection: Option[JdbcConnection] = None val h2dbId = UUID.randomUUID().toString.replace("-","") val today: String = DateTimeFormat.forPattern("yyyy-MM-dd").print( val yesterday: String = DateTimeFormat.forPattern("yyyy-MM-dd").print( def initJdbcToH2(): Unit = { val config = new HikariConfig() config.setJdbcUrl(s"jdbc:h2:mem:$h2dbId;MODE=Oracle;DB_CLOSE_DELAY=-1") config.setUsername("sa") config.setPassword("h2.test.database.password") config.setMaximumPoolSize(2) dataSource = Option(new HikariDataSource(config)) jdbcConnection = JdbcConnection(_)) assert(jdbcConnection.isDefined, "Failed to connect to h2 local server") } def getMahaService(scope: String = "main"): MahaService = { val jsonString = FileUtils.readFileToString(new File(s"src/$scope/resources/maha-service-config.json")) .replaceAll("h2dbId", s"$h2dbId") initJdbcToH2() val mahaServiceResult = MahaServiceConfig.fromJson(jsonString.getBytes("utf-8")) if (mahaServiceResult.isFailure) { mahaServiceResult.leftMap { res=> error(s"Failed to launch Example MahaService, MahaService Error list is: ${res.list.toList}") } } val mahaServiceConfig = mahaServiceResult.toOption.get val mahaService: MahaService = new DefaultMahaService(mahaServiceConfig) stageStudentData(mahaServiceConfig) mahaService } def stageStudentData(mahaServiceConfig: MahaServiceConfig) : Unit = { val ddlGenerator = new OracleDDLGenerator val erRegistryConfig = mahaServiceConfig.registry.get(ExampleMahaService.REGISTRY_NAME).get val erRegistry= erRegistryConfig.registry erRegistry.factMap.values.foreach { publicFact => publicFact.factList.foreach { fact=> val ddl = ddlGenerator.toDDL(fact) assert(jdbcConnection.get.executeUpdate(ddl).isSuccess) } } val insertSql = """ INSERT INTO student_grade_sheet (year, section_id, student_id, class_id, total_marks, date, comment) VALUES (?, ?, ?, ?, ?, ?, ?) """ val rows: List[Seq[Any]] = List( Seq(1, 100, 213, 200, 125,, "some comment") ) rows.foreach { row => val result = jdbcConnection.get.executeUpdate(insertSql, row) assert(result.isSuccess) } var count = 0 jdbcConnection.get.queryForObject("select * from student_grade_sheet") { rs => while ( { count += 1 } } assert(rows.size == count) } }
Source File: WorkerStateReporter.scala From maha with Apache License 2.0 | 5 votes |
// Copyright 2018, Yahoo Inc. // Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms. package import import{Actor, ActorPath, ActorSystem, Props} import com.typesafe.config.{Config, ConfigFactory} import import import grizzled.slf4j.Logging object WorkerStateReporter extends Logging { // Use a bounded mailbox to prevent memory leaks in the rare case when jobs get piled up to be processed by the actor val defaultConfig: Config = ConfigFactory.parseString( """ | { | mailbox-type = akka.dispatch.NonBlockingBoundedMailbox | mailbox-capacity = 10000 |} |akka { | loggers = ["akka.event.slf4j.Slf4jLogger"] | loglevel = "INFO" |} |""".stripMargin) } case class WorkerStateReporter(akkaConf: String) extends Logging { val config: Config = { val file = new File(akkaConf) if(file.exists() && file.canRead) { info(s"Using akka conf file : ${file.getAbsolutePath}") ConfigFactory.parseFile(file) } else { info("Using default akka config") WorkerStateReporter.defaultConfig } } val system = ActorSystem("maha-workers", config) lazy val workerStateActorPath: ActorPath = { val actorConfig = WorkerStateActorConfig() val props: Props = Props(classOf[WorkerStateActor], actorConfig).withMailbox("") val path = system.actorOf(props, info(s"Created WorkerStateActor: $path") path } def jobStarted(executionType: ExecutionType, jobId: Long, engine: Engine, cost: Long, estimatedRows: Long, userId: String): Unit = { sendMessage(JobStarted(executionType, jobId, engine, cost, estimatedRows, userId)) } def jobEnded(executionType: ExecutionType, jobId: Long, engine: Engine, cost: Long, estimatedRows: Long, userId: String): Unit = { sendMessage(JobEnded(executionType, jobId, engine, cost, estimatedRows, userId)) } def sendMessage(actorMessage:WorkerStateActorMessage) = { try { system.actorSelection(workerStateActorPath).tell(actorMessage, Actor.noSender) } catch { case t: Throwable => warn(s"Failed to send $actorMessage message to WorkerStateActor", t) } } }
Source File: RocksDBStorage.scala From JustinDB with Apache License 2.0 | 5 votes |
package import{ByteArrayInputStream, ByteArrayOutputStream, File} import java.util.UUID import{Input, Output} import com.esotericsoftware.kryo.{Kryo, Serializer} import{Ack, StorageGetData} import org.rocksdb.{FlushOptions, Options, RocksDB} import scala.concurrent.Future // TODO: // Current version store every single data under one file (totally doesn't care about data originality). // Data should be eventually splitted by ring partitionId. // This might be an issue during possible data movements between nodes. final class RocksDBStorage(dir: File) extends PluggableStorageProtocol { import RocksDBStorage._ { RocksDB.loadLibrary() } private[this] val kryo = new Kryo() private[this] val db: RocksDB = { val options: Options = new Options().setCreateIfMissing(true), dir.getPath) } override def get(id: UUID)(resolveOriginality: (UUID) => PluggableStorageProtocol.DataOriginality): Future[PluggableStorageProtocol.StorageGetData] = { val key: Array[Byte] = uuid2bytes(kryo, id) val dataBytes: Array[Byte] = db.get(key) val justinDataOpt = Option(dataBytes).map { dataBytes => val input = new Input(new ByteArrayInputStream(dataBytes)), input, classOf[JustinData]) } Future.successful( } override def put(data: JustinData)(resolveOriginality: (UUID) => PluggableStorageProtocol.DataOriginality): Future[PluggableStorageProtocol.Ack] = { val key: Array[Byte] = uuid2bytes(kryo, val dataBytes: Array[Byte] = { val output = new Output(new ByteArrayOutputStream()) JustinDataSerializer.write(kryo, output, data) output.getBuffer } db.put(key, dataBytes) db.flush(new FlushOptions().setWaitForFlush(true)) Ack.future } } object RocksDBStorage { def uuid2bytes(kryo: Kryo, id: UUID): Array[Byte] = { val output = new Output(new ByteArrayOutputStream(), 16) UUIDSerializer.write(kryo, output, id) output.getBuffer } object UUIDSerializer extends Serializer[UUID] { override def read(kryo: Kryo, input: Input, `type`: Class[UUID]): UUID = { new UUID(input.readLong, input.readLong) } override def write(kryo: Kryo, output: Output, uuid: UUID): Unit = { output.writeLong(uuid.getMostSignificantBits) output.writeLong(uuid.getLeastSignificantBits) } } object JustinDataSerializer extends Serializer[JustinData] { override def read(kryo: Kryo, input: Input, `type`: Class[JustinData]): JustinData = { JustinData( id =, input, classOf[UUID]), value = input.readString(), vclock = input.readString(), timestamp = input.readLong() ) } override def write(kryo: Kryo, output: Output, data: JustinData): Unit = { UUIDSerializer.write(kryo, output, output.writeString(data.value) output.writeString(data.vclock) output.writeLong(data.timestamp) } } }
Source File: VirtualScreeningTest.scala From MaRe with Apache License 2.0 | 5 votes |
package import import java.util.UUID import import scala.util.Properties import org.apache.spark.SharedSparkContext import org.junit.runner.RunWith import org.scalatest.FunSuite import org.scalatest.junit.JUnitRunner private object SDFUtils { def parseIDsAndScores(sdf: String): Array[(String, String)] = { sdf.split("\\n\\$\\$\\$\\$\\n").map { mol => val lines = mol.split("\\n") (lines(0), lines.last) } } } @RunWith(classOf[JUnitRunner]) class VirtualScreeningTest extends FunSuite with SharedSparkContext { private val tmpDir = new File(Properties.envOrElse("TMPDIR", "/tmp")) test("Virtual Screening") { sc.hadoopConfiguration.set("textinputformat.record.delimiter", "\n$$$$\n") val mols = sc.textFile(getClass.getResource("sdf/molecules.sdf").getPath) // Parallel execution with MaRe val hitsParallel = new MaRe(mols) .map( inputMountPoint = TextFile("/input.sdf", "\n$$$$\n"), outputMountPoint = TextFile("/output.sdf", "\n$$$$\n"), imageName = "mcapuccini/oe:latest", command = "fred -receptor /var/openeye/hiv1_protease.oeb " + "-hitlist_size 0 " + "-conftest none " + "-dock_resolution Low " + "-dbase /input.sdf " + "-docked_molecule_file /output.sdf") .reduce( inputMountPoint = TextFile("/input.sdf", "\n$$$$\n"), outputMountPoint = TextFile("/output.sdf", "\n$$$$\n"), imageName = "mcapuccini/sdsorter:latest", command = "sdsorter -reversesort='FRED Chemgauss4 score' " + "-keep-tag='FRED Chemgauss4 score' " + "-nbest=30 " + "/input.sdf " + "/output.sdf") .rdd.collect.mkString("\n$$$$\n") // Serial execution val inputFile = new File(getClass.getResource("sdf/molecules.sdf").getPath) val dockedFile = new File(tmpDir, "mare_test_" + UUID.randomUUID.toString) dockedFile.createNewFile dockedFile.deleteOnExit val outputFile = new File(tmpDir, "mare_test_" + UUID.randomUUID.toString) outputFile.createNewFile outputFile.deleteOnExit imageName = "mcapuccini/oe:latest", command = "fred -receptor /var/openeye/hiv1_protease.oeb " + "-hitlist_size 0 " + "-conftest none " + "-dock_resolution Low " + "-dbase /input.sdf " + "-docked_molecule_file /docked.sdf", bindFiles = Seq(inputFile, dockedFile), volumeFiles = Seq(new File("/input.sdf"), new File("/docked.sdf")), forcePull = false) imageName = "mcapuccini/sdsorter:latest", command = "sdsorter -reversesort='FRED Chemgauss4 score' " + "-keep-tag='FRED Chemgauss4 score' " + "-nbest=30 " + "/docked.sdf " + "/output.sdf", bindFiles = Seq(dockedFile, outputFile), volumeFiles = Seq(new File("/docked.sdf"), new File("/output.sdf")), forcePull = false) val hitsSerial = Source.fromFile(outputFile).mkString // Test val parallel = SDFUtils.parseIDsAndScores(hitsParallel) val serial = SDFUtils.parseIDsAndScores(hitsSerial) assert(parallel.deep == serial.deep) } }
Source File: TestSuiteTests.scala From circe-json-schema with Apache License 2.0 | 5 votes |
package io.circe.schema import import io.circe.{ Decoder, Json } import import org.scalatest.flatspec.AnyFlatSpec case class SchemaTestCase(description: String, data: Json, valid: Boolean) case class SchemaTest(description: String, schema: Json, tests: List[SchemaTestCase]) object SchemaTestCase { implicit val decodeSchemaTestCase: Decoder[SchemaTestCase] = io.circe.generic.semiauto.deriveDecoder } object SchemaTest { implicit val decodeSchemaTest: Decoder[SchemaTest] = io.circe.generic.semiauto.deriveDecoder } class TestSuiteTests(path: String) extends AnyFlatSpec { val tests: List[SchemaTest] = io.circe.jawn .decodeFile[List[SchemaTest]](new File(path)) .getOrElse( throw new Exception(s"Unable to load test file: $path") ) tests.foreach { case SchemaTest(description, schema, tests) => tests.foreach { case SchemaTestCase(caseDescription, data, valid) => val expected = if (valid) "validate successfully" else "fail to validate" s"$description: $caseDescription" should expected in { val errors = Schema.load(schema).validate(data) if (valid) { assert(errors == Validated.valid(())) } else { assert(errors.isInvalid) } } it should s"$expected when schema is loaded from a string" in { val errors = Schema.loadFromString(schema.noSpaces).get.validate(data) if (valid) { assert(errors == Validated.valid(())) } else { assert(errors.isInvalid) } } } } } class AdditionalItemsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/additionalItems.json") class AdditionalPropertiesTestSuiteTests extends TestSuiteTests("tests/tests/draft7/additionalProperties.json") class AllOfTestSuiteTests extends TestSuiteTests("tests/tests/draft7/allOf.json") class AnyOfTestSuiteTests extends TestSuiteTests("tests/tests/draft7/anyOf.json") class BooleanSchemaTestSuiteTests extends TestSuiteTests("tests/tests/draft7/boolean_schema.json") class ConstTestSuiteTests extends TestSuiteTests("tests/tests/draft7/const.json") class ContainsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/contains.json") class DefaultTestSuiteTests extends TestSuiteTests("tests/tests/draft7/default.json") //class DefinitionsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/definitions.json") class EnumTestSuiteTests extends TestSuiteTests("tests/tests/draft7/enum.json") class ExclusiveMaximumTestSuiteTests extends TestSuiteTests("tests/tests/draft7/exclusiveMaximum.json") class ExclusiveMinimumTestSuiteTests extends TestSuiteTests("tests/tests/draft7/exclusiveMinimum.json") class FormatTestSuiteTests extends TestSuiteTests("tests/tests/draft7/format.json") class IfThenElseTestSuiteTests extends TestSuiteTests("tests/tests/draft7/if-then-else.json") class ItemsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/items.json") class MaximumTestSuiteTests extends TestSuiteTests("tests/tests/draft7/maximum.json") class MaxItemsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/maxItems.json") class MaxLengthTestSuiteTests extends TestSuiteTests("tests/tests/draft7/maxLength.json") class MaxPropertiesTestSuiteTests extends TestSuiteTests("tests/tests/draft7/maxProperties.json") class MinimumTestSuiteTests extends TestSuiteTests("tests/tests/draft7/minimum.json") class MinItemsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/minItems.json") class MinLengthTestSuiteTests extends TestSuiteTests("tests/tests/draft7/minLength.json") class MinPropertiesTestSuiteTests extends TestSuiteTests("tests/tests/draft7/minProperties.json") class MultipleOfTestSuiteTests extends TestSuiteTests("tests/tests/draft7/multipleOf.json") class NotTestSuiteTests extends TestSuiteTests("tests/tests/draft7/not.json") class OneOfTestSuiteTests extends TestSuiteTests("tests/tests/draft7/oneOf.json") class PatternTestSuiteTests extends TestSuiteTests("tests/tests/draft7/pattern.json") class PatternPropertiesTestSuiteTests extends TestSuiteTests("tests/tests/draft7/patternProperties.json") class PropertyNamesTestSuiteTests extends TestSuiteTests("tests/tests/draft7/propertyNames.json") // Not currently running remote tests. //class RefTestSuiteTests extends TestSuiteTests("tests/tests/draft7/ref.json") //class RefRemoteTestSuiteTests extends TestSuiteTests("tests/tests/draft7/refRemote.json") class RequiredTestSuiteTests extends TestSuiteTests("tests/tests/draft7/required.json") class TypeTestSuiteTests extends TestSuiteTests("tests/tests/draft7/type.json") class UniqueItemsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/uniqueItems.json")
Source File: KMeans.scala From spark-tda with Apache License 2.0 | 5 votes |
import{File, PrintWriter} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.clustering.KMeans import org.apache.spark.sql.functions._ def computeKMeans( pathToTextFile: String, quantity: Int, iteration: Int) { case class Point(x: Double, y: Double) def save(f: File)(func: PrintWriter => Unit) { val p = new PrintWriter(f) try { func(p) } finally { p.close() } } val filename = pathToTextFile.split("\\.")(0) val outputFilename = s"$filename-KMEANS-k${quantity}-i${iteration}.tsv" val points = sc .textFile(pathToTextFile) .map { line => line.trim.split("\\s+") } .map { row => Point(row(0).toDouble, row(1).toDouble) } val features = points .map { p => Vectors.dense(p.x, p.y) } features.cache() val kmeans = KMeans.train(features, quantity, iteration) val predictions = features .map { f => (f(0), f(1), model.predict(f) + 1) } .collect save(new File(outputFilename)) { println(s"OUTPUT TO: ${outputFilename}") f => predictions.foreach{ case (x, y, ccid) => f.println(s"${x}\t${y}\t${ccid}") } } }
Source File: ReebDiagram.scala From spark-tda with Apache License 2.0 | 5 votes |
import{File, PrintWriter} import import{ReebDiagram, VectorAssembler} import org.apache.spark.sql.functions._ def computeReebDiagram( pathToTextFile: String, quantity: Int, linkThresholdRatio: Double, coreThresholdRatio: Double, topTreeRatio: Double) { def save(f: File)(func: PrintWriter => Unit) { val p = new PrintWriter(f) try { func(p) } finally { p.close() } } val filename = pathToTextFile.split("\\.")(0) val outputFilename = s"$filename-REEB-k${quantity}-l${linkThresholdRatio}-c${coreThresholdRatio}-i${topTreeRatio}.tsv" val points = sc.textFile(pathToTextFile) .map { line => line.trim.split("\\s+") } .zipWithIndex .map { case (row, i) => (i, row(0).toDouble, row(1).toDouble, 0) } .toDF("id", "x", "y", "cover_id") val cardinality = points.count val assembler = new VectorAssembler() .setInputCols(Array("x", "y")) .setOutputCol("feature") val features = assembler .transform(points) val reeb = new ReebDiagram() .setK(quantity) .setLinkThresholdRatio(linkThresholdRatio) .setCoreThresholdRatio(coreThresholdRatio) .setTopTreeSize((topTreeRatio * cardinality).toInt) .setTopTreeLeafSize(quantity) .setIdCol("id") .setCoverCol("cover_id") .setFeaturesCol("feature") .setOutputCol("cluster_id") val transformed = reeb .fit(features) .transform(features) val clusters = Map( transformed .select("cluster_id") .rdd .map(row => row.getLong(0)) .distinct .zipWithIndex .collect(): _*) val result = transformed .select("x", "y", "cluster_id") .rdd .map(row => (row.getDouble(0), row.getDouble(1), row.getLong(2))) .map { case (x, y, clusterId) => (x, y, clusters(clusterId) + 1)} .collect() save(new File(outputFilename)) { println(s"OUTPUT TO: ${outputFilename}") f => result.foreach{ case (x, y, ccid) => f.println(s"${x}\t${y}\t${ccid}") } } }
Source File: TempDirectory.scala From spark-tda with Apache License 2.0 | 5 votes |
package import import org.scalatest.{BeforeAndAfterAll, Suite} import com.holdenkarau.spark.testing.Utils protected def tempDir: File = _tempDir override def beforeAll(): Unit = { super.beforeAll() _tempDir = Utils.createTempDir() } override def afterAll(): Unit = { try { Utils.deleteRecursively(_tempDir) } finally { super.afterAll() } } }
Source File: MarkdownPagesEndpoint.scala From udash-core with Apache License 2.0 | 5 votes |
package import{BufferedReader, File, FileReader} import java.time.Instant import java.util.concurrent.ConcurrentHashMap import com.avsystem.commons._ import com.vladsch.flexmark.ext.toc.TocExtension import com.vladsch.flexmark.html.HtmlRenderer import com.vladsch.flexmark.parser.Parser import scala.concurrent.{ExecutionContext, Future} final class MarkdownPagesEndpoint(guideResourceBase: String)(implicit ec: ExecutionContext) extends MarkdownPageRPC { private val tocExtension = TocExtension.create private val parser = Parser.builder.extensions(JList(tocExtension)).build private val renderer = HtmlRenderer.builder.extensions(JList(tocExtension)).build private val renderedPages = new ConcurrentHashMap[MarkdownPage, (Future[String], Instant)] private def render(file: File): Future[String] = Future { val reader = new BufferedReader(new FileReader(file)) val document = parser.parseReader(reader) renderer.render(document) } override def loadContent(page: MarkdownPage): Future[String] = { val (result, _) = renderedPages.compute(page, { (_, cached) => val pageFile = new File(guideResourceBase + page.file) cached.opt.filter { case (currentRender, renderedInstant) => currentRender.value.exists(_.isSuccess) && renderedInstant.toEpochMilli >= pageFile.lastModified() }.getOrElse((render(pageFile), Instant.ofEpochMilli(pageFile.lastModified()))) }) result } }
Source File: FileDownloadServlet.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.rpc.utils import import java.nio.file.Files import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse} protected def resolveFileMimeType(file: File): String = Option(getServletContext.getMimeType(file.getAbsolutePath)).getOrElse("application/octet-stream") override def doGet(request: HttpServletRequest, response: HttpServletResponse): Unit = { val file = resolveFile(request) if (!file.exists()) response.sendError(404, "File not found!") else { // MIME type response.setContentType(resolveFileMimeType(file)) // content length response.setContentLengthLong(file.length) // file name response.setHeader("Content-Disposition", s"""attachment; filename="${presentedFileName(file.getName)}"""") val outStream = response.getOutputStream Files.copy(file.toPath, outStream) outStream.close() } } }
Source File: CssFileRenderer.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.css import{File, PrintWriter} import scalacss.internal.Renderer class CssFileRenderer(dirPath: String, styles: Seq[CssBase], createMain: Boolean) { def render()(implicit renderer: Renderer[String]): Unit = { val dir = new File(dirPath) dir.mkdirs() val mainFile: Option[File] = if (createMain) Some(new File(s"${dir.getAbsolutePath}/main.css")) else None mainFile.foreach(_.createNewFile()) val mainWriter = PrintWriter(_, "UTF-8")) styles.foreach { style => val name = style.getClass.getName val f = new File(s"${dir.getAbsolutePath}/$name.css") { createNewFile() } new PrintWriter(f, "UTF-8") { write(style.render) flush() close() } mainWriter.foreach(_.append(s"""@import "$name.css";\n""")) } mainWriter.foreach { w => w.flush() w.close() } } }
Source File: ValueStoreSerializationExt.scala From random-projections-at-berlinbuzzwords with Apache License 2.0 | 5 votes |
package com.stefansavev.randomprojections.serialization import import com.stefansavev.core.serialization.Utils import import com.stefansavev.randomprojections.serialization.DataFrameViewSerializers._ import com.typesafe.scalalogging.StrictLogging object ValueStoreSerializationExt { val ser = valuesStoreSerializer() implicit class ValueStoreSerializerExt(input: ValuesStore) { def toFile(file: File): Unit = { Utils.toFile(ser, file, input) } def toFile(fileName: String): Unit = { toFile(new File(fileName)) } def toBytes(): Array[Byte] = { Utils.toBytes(ser, input) } } implicit class ValueStoreDeserializerExt(t: ValuesStore.type) extends StrictLogging { def fromFile(file: File): ValuesStore = { if (!file.exists()) { throw new IllegalStateException("file does not exist: " + file.getAbsolutePath) }"Loading file: " + file.getAbsolutePath) val output = Utils.fromFile(ser, file) output } def fromFile(fileName: String): ValuesStore = { fromFile(new File(fileName)) } def fromBytes(input: Array[Byte]): ValuesStore = { Utils.fromBytes(ser, input) } } }
Source File: DataFrameViewSerializationExt.scala From random-projections-at-berlinbuzzwords with Apache License 2.0 | 5 votes |
package com.stefansavev.randomprojections.serialization import import com.stefansavev.core.serialization.Utils import com.stefansavev.randomprojections.datarepr.dense.DataFrameView import com.stefansavev.randomprojections.serialization.DataFrameViewSerializers._ object DataFrameViewSerializationExt { implicit class DataFrameSerializerExt(input: DataFrameView) { def toFile(file: File): Unit = { val ser = dataFrameSerializer() Utils.toFile(ser, file, input) } def toFile(fileName: String): Unit = { toFile(new File(fileName)) } } implicit class DataFrameDeserializerExt(t: DataFrameView.type) { def fromFile(file: File): DataFrameView = { if (!file.exists()) { throw new IllegalStateException("file does not exist") } val ser = dataFrameSerializer() val output = Utils.fromFile(ser, file) output } def fromFile(dir: String): DataFrameView = { fromFile(new File(dir)) } } }
Source File: FileWriter.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package sbtavrohugger; import avrohugger.filesorter.{AvdlFileSorter, AvscFileSorter} import avrohugger.Generator import import sbt.Keys._ import sbt.{Logger, globFilter, singleFileFinder} import sbt.Path._ object FileWriter { private[sbtavrohugger] def generateCaseClasses( generator: Generator, srcDirs: Seq[File], target: File, log: Logger): Set[] = {"Considering source directories %s".format(srcDirs.mkString(","))) def getSrcFiles(dirs: Seq[File], fileExtension: String) = for { srcDir <- dirs srcFile <- (srcDir ** s"*.$fileExtension").get } yield srcFile for (inFile <- AvscFileSorter.sortSchemaFiles(getSrcFiles(srcDirs, "avsc"))) {"Compiling AVSC %s to %s".format(inFile, target.getPath)) generator.fileToFile(inFile, target.getPath) } for (idlFile <- AvdlFileSorter.sortSchemaFiles(getSrcFiles(srcDirs, "avdl"))) {"Compiling Avro IDL %s".format(idlFile)) generator.fileToFile(idlFile, target.getPath) } for (inFile <- getSrcFiles(srcDirs, "avro")) {"Compiling Avro datafile %s".format(inFile)) generator.fileToFile(inFile, target.getPath) } for (protocol <- getSrcFiles(srcDirs, "avpr")) {"Compiling Avro protocol %s".format(protocol)) generator.fileToFile(protocol, target.getPath) } (target ** ("*.java"|"*.scala")).get.toSet } }
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = record = } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = record = } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Source File: StandardDefaultValuesSpec.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
import test._ import org.specs2.mutable.Specification import import scala.collection.mutable.Buffer import scala.collection.JavaConverters._ import org.apache.avro.file._ import org.apache.avro.generic._ import org.apache.avro._ class StandardDefaultValuesSpec extends Specification { skipAll "A case class with default values" should { "deserialize correctly" in { val record = DefaultTest() val enumSchemaString = """{"type":"enum","name":"DefaultEnum","symbols":["SPADES","DIAMONDS","CLUBS","HEARTS"]}""" val enumSchema = new Schema.Parser().parse(enumSchemaString) val genericEnum = new GenericData.EnumSymbol(enumSchema, record.suit.toString) val embeddedSchemaString = """{"type":"record","name":"Embedded","fields":[{"name":"inner","type":"int"}]},"default":{"inner":1}}""" val embeddedSchema = new Schema.Parser().parse(embeddedSchemaString) val embeddedGenericRecord = new GenericData.Record(embeddedSchema) embeddedGenericRecord.put("inner", record.embedded.inner) val recordSchemaString = """{"type":"record","name":"DefaultTest","namespace":"test","fields":[{"name":"suit","type":{"type":"enum","name":"DefaultEnum","symbols":["SPADES","DIAMONDS","CLUBS","HEARTS"]},"default":"SPADES"},{"name":"number","type":"int","default":0},{"name":"str","type":"string","default":"str"},{"name":"optionString","type":["null","string"],"default":null},{"name":"optionStringValue","type":["string","null"],"default":"default"},{"name":"embedded","type":{"type":"record","name":"Embedded","fields":[{"name":"inner","type":"int"}]},"default":{"inner":1}},{"name":"defaultArray","type":{"type":"array","items":"int"},"default":[1,3,4,5]},{"name":"optionalEnum","type":["null","DefaultEnum"],"default":null},{"name":"defaultMap","type":{"type":"map","values":"string"},"default":{"Hello":"world","Merry":"Christmas"}},{"name":"byt","type":"bytes","default":"ÿ"}, {"name":"defaultEither","type": ["int", "string"],"default":2}, {"name":"defaultCoproduct","type": ["int", "string", "boolean"],"default":3}]}""" val recordSchema = new Schema.Parser().parse(recordSchemaString) val genericRecord = new GenericData.Record(recordSchema) genericRecord.put("suit", genericEnum) genericRecord.put("number", record.number) genericRecord.put("str", record.str) genericRecord.put("optionString", record.optionString.getOrElse(null)) genericRecord.put("optionStringValue", record.optionStringValue.getOrElse(null)) genericRecord.put("embedded", embeddedGenericRecord) genericRecord.put("defaultArray",record.defaultArray.asJava) genericRecord.put("optionalEnum", record.optionalEnum.getOrElse(null)) genericRecord.put("defaultMap", record.defaultMap.asJava) genericRecord.put("byt", java.nio.ByteBuffer.wrap(record.byt)) genericRecord.put("defaultEither", record.defaultEither.fold(identity, identity)) genericRecord.put("defaultCoproduct",[Int].getOrElse(0)) val records = List(genericRecord) val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() StandardTestUtil.write(file, records) var dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] while (dataFileReader.hasNext) { sameRecord = } dataFileReader.close() sameRecord.get("suit").toString === DefaultEnum.SPADES.toString sameRecord.get("number") === 0 sameRecord.get("str").toString === "str" sameRecord.get("optionString") === null sameRecord.get("optionStringValue").toString === "default" sameRecord.get("embedded").asInstanceOf[GenericRecord].get("inner") === 1 sameRecord.get("defaultArray") === List(1,3,4,5).asJava sameRecord.get("optionalEnum") === null sameRecord.get("defaultMap").toString === "{Hello=world, Merry=Christmas}" sameRecord.get("byt") === java.nio.ByteBuffer.wrap("ÿ".getBytes) sameRecord.get("defaultEither") === 2 sameRecord.get("defaultCoproduct") === 3 } } }
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = record = } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Source File: ExpectedResults.scala From api-first-hand with MIT License | 5 votes |
package de.zalando import{ File, FileOutputStream } import de.zalando.apifirst.util.ScalaPrinter import de.zalando.model._ import trait ExpectedResults { val model = Seq[WithModel]( additional_properties_yaml, basic_polymorphism_yaml, nested_arrays_yaml, nested_options_yaml, basic_extension_yaml, expanded_polymorphism_yaml, nested_objects_yaml, options_yaml, wrong_field_name_yaml, all_of_imports_yaml, i038_invalid_enum_members_yaml ) val examples = Seq[WithModel]( basic_auth_api_yaml, cross_spec_references_yaml, echo_api_yaml, error_in_array_yaml, form_data_yaml, full_petstore_api_yaml, hackweek_yaml, heroku_petstore_api_yaml, instagram_api_yaml, minimal_api_yaml, nakadi_yaml, security_api_yaml, simple_petstore_api_yaml, split_petstore_api_yaml, string_formats_yaml, type_deduplication_yaml, uber_api_yaml, i041_no_json_deserialiser_yaml ) val validations = Seq[WithModel]( nested_arrays_validation_yaml, nested_objects_validation_yaml, nested_options_validation_yaml, numbers_validation_yaml, string_formats_validation_yaml ) val resourcesPath = "play-scala-generator/src/test/resources/" def expectationsFolder: String = "/expected_results/" def dump(result: String, name: String, suffix: String): Unit = { if (result.nonEmpty) { val newFile = target(name, suffix) newFile.getParentFile.mkdirs() newFile.delete() newFile.createNewFile() val out = new FileOutputStream(newFile) out.write(result.getBytes) out.close() } } def asInFile(name: String, suffix: String): String = { val expectedFile = target(name, suffix) if (expectedFile.canRead) { val src = Source.fromFile(expectedFile) val result = src.getLines().mkString("\n") src.close() result } else "" } def target(name: String, suffix: String): File = new File(resourcesPath + expectationsFolder + name + "." + suffix) def clean(str: String): String = str.split("\n").map(_.trim).filter(_.nonEmpty).mkString("\n") def nameFromModel(ast: WithModel): String = ScalaPrinter.nameFromModel(ast) }
Source File: Display.scala From almond with BSD 3-Clause "New" or "Revised" License | 5 votes |
package almond.display import import import java.nio.file.Path import java.util.{Map => JMap} import almond.interpreter.api.{DisplayData, OutputHandler} import jupyter.{Displayer, Displayers} import scala.collection.JavaConverters._ trait Display { def data(): Map[String, String] def metadata(): Map[String, String] = Map() def displayData(): DisplayData = DisplayData(data(), metadata = metadata()) def display()(implicit output: OutputHandler): Unit = output.display(displayData()) // registering things with jvm-repr just in case Display.registered } object Display { private lazy val registered: Unit = { Displayers.register( classOf[Display], new Displayer[Display] { def display(d: Display): JMap[String, String] = } ) } def markdown(content: String)(implicit output: OutputHandler): Unit = Markdown(content).display() def html(content: String)(implicit output: OutputHandler): Unit = Html(content).display() def latex(content: String)(implicit output: OutputHandler): Unit = Latex(content).display() def text(content: String)(implicit output: OutputHandler): Unit = Text(content).display() def js(content: String)(implicit output: OutputHandler): Unit = Javascript(content).display() def svg(content: String)(implicit output: OutputHandler): Unit = Svg(content).display() trait Builder[C, T] { protected def build(contentOrUrl: Either[URL, C]): T def apply(content: C): T = build(Right(content)) def from(url: String): T = build(Left(new URL(url))) def from(url: URL): T = build(Left(url)) def fromFile(file: File): T = build(Left(file.toURI.toURL)) def fromFile(path: Path): T = build(Left(path.toUri.toURL)) def fromFile(path: String): T = build(Left(new File(path).toURI.toURL)) } }
Source File: NotebookSparkSessionBuilder.scala From almond with BSD 3-Clause "New" or "Revised" License | 5 votes |
package org.apache.spark.sql.almondinternals import import java.lang.{Boolean => JBoolean} import almond.interpreter.api.{CommHandler, OutputHandler} import almond.display.Display.html import ammonite.interp.api.InterpAPI import ammonite.repl.api.ReplAPI import org.apache.log4j.{Category, Logger, RollingFileAppender} import org.apache.spark.sql.SparkSession import org.apache.spark.sql.ammonitesparkinternals.AmmoniteSparkSessionBuilder import scala.collection.JavaConverters._ class NotebookSparkSessionBuilder (implicit interpApi: InterpAPI, replApi: ReplAPI, publish: OutputHandler, commHandler: CommHandler ) extends AmmoniteSparkSessionBuilder { private var progress0 = true private var keep0 = true private var logsInDeveloperConsoleOpt = Option.empty[Boolean] def progress(enable: Boolean = true, keep: Boolean = true): this.type = { progress0 = enable keep0 = keep this } def logsInDeveloperConsole(enable: JBoolean = null): this.type = { logsInDeveloperConsoleOpt = Option[JBoolean](enable).map[Boolean](x => x) this } override def getOrCreate(): SparkSession = { val logFileOpt = logsInDeveloperConsoleOpt match { case Some(false) => None case Some(true) => val fileOpt = NotebookSparkSessionBuilder.logFile(classOf[SparkSession]) if (fileOpt.isEmpty) Console.err.println("Warning: cannot determine log file, logs won't be sent to developer console.") fileOpt case None => NotebookSparkSessionBuilder.logFile(classOf[SparkSession]) } var sendLogOpt = Option.empty[SendLog] try { sendLogOpt = { f => println("See your browser developer console for detailed spark logs.") SendLog.start(f) } val session = super.getOrCreate() for (url <- session.sparkContext.uiWebUrl) html(s"""<a target="_blank" href="$url">Spark UI</a>""") session.sparkContext.addSparkListener( new ProgressSparkListener(session, keep0, progress0) ) session } finally { sendLogOpt.foreach(_.stop()) } } } object NotebookSparkSessionBuilder { private def logFile(clazz: Class[_]): Option[File] = { def appenders(log: Category): Stream[Any] = if (log == null) Stream() else log.getAllAppenders.asScala.toStream #::: appenders(log.getParent) appenders(Logger.getLogger(clazz)).collectFirst { case rfa: RollingFileAppender => new File(rfa.getFile) } } }
Source File: BitMap.scala From Scurses with MIT License | 5 votes |
package net.team2xh.onions.components.widgets import java.awt.image.BufferedImage import import javax.imageio.ImageIO import net.team2xh.onions.Symbols import net.team2xh.onions.Themes.ColorScheme import net.team2xh.onions.components.{FramePanel, Widget} import net.team2xh.scurses.{Colors, Scurses} object BitMap { def apply(parent: FramePanel, path: String, relative: Boolean = false)(implicit screen: Scurses): BitMap = { val fullPath = if (relative) new File("").getAbsolutePath + path else path val image = File(fullPath)) new BitMap(parent, image) } def apply(parent: FramePanel, image: BufferedImage)(implicit screen: Scurses): BitMap = { new BitMap(parent, image) } } class BitMap(parent: FramePanel, image: BufferedImage) (implicit screen: Scurses) extends Widget(parent) { val colors = { val width = image.getWidth val height = image.getHeight for (x <- 0 until width) yield for (y <- 0 until height / 2) yield { // Read two rows at a time val upper = Colors.fromRGBInt(image.getRGB(x, y * 2)) val lower = if (height % 2 == 1) -1 else Colors.fromRGBInt(image.getRGB(x, y * 2 + 1)) (upper, lower) } } override def redraw(focus: Boolean, theme: ColorScheme): Unit = { val width = image.getWidth min innerWidth val x0 = (innerWidth - width) / 2 for (x <- 0 until width) { for (y <- 0 until innerHeight) { // Read two rows at a time val c = colors(x)(y) screen.put(x0 + x, y, Symbols.BLOCK_UPPER, c._1, c._2) } } } override def handleKeypress(keypress: Int): Unit = { } override def focusable: Boolean = false override def innerHeight: Int = image.getHeight / 2 }
Source File: SarkPredictorEngineSpec.scala From elasticsearch-prediction-spark with Apache License 2.0 | 5 votes |
package com.sdhu.elasticsearchprediction.spark package test import com.mahisoft.elasticsearchprediction._ import utils.DataProperties import plugin.domain.IndexValue import plugin.exception.PredictionException import plugin.engine.PredictorEngine import org.apache.spark._ import rdd.RDD import mllib.regression._ import mllib.classification._ import org.scalatest._ import com.holdenkarau.spark.testing._ import import java.util.Collection import scala.collection.JavaConversions._ class SparkPredictorEngineSpec extends FlatSpec with MustMatchers { val pconf = getClass.getResource("/prop1.conf").getPath val dataP = getClass.getResource("/mini.csv").toURI.toString val dp = new DataProperties(pconf) val modelP = getClass.getResource("/spark-clf-test.model").getPath val clf_type = "spark.logistic-regression" "Predictor Engine" should "throw empty model exception" in { val eng = new SparkPredictorEngine(modelP, SVM_Helper) evaluating {eng.getPrediction(List[IndexValue]())} must produce [PredictionException] } // "Spark_PredictorEngine" should "return sparkPredictorEngine of svm type" in { // val speng = new Spark_PredictorEngine(modelP, "spark.svm") // speng.getSparkPredictorEngine mustBe a [SparkPredictorEngine[_]] // // } it should "return a generic PredictorEngine" in { val speng = new Spark_PredictorEngine(modelP, "spark.svm") speng.getPredictorEngine mustBe a [PredictorEngine] } it should "load the classifier" in { val speng = new Spark_PredictorEngine(modelP, clf_type) val eng = speng.getSparkPredictorEngine val m = eng.getModel val cm = m.categoriesMap.getOrElse(Map[String, Double]()) m.clf must not be empty //m.numClasses must be(Some(2)) //m.binThreshold must be(Some(0.5)) cm.keys must contain allOf("Female", "Male", "United-States", "China") } it should "evaluate values" in { val speng = new Spark_PredictorEngine(modelP, clf_type) val eng = speng.getSparkPredictorEngine val p0 = Array("50", "Self-emp-not-inc", "Male", "0", "0", "United-States") val cindv = ReadUtil.arr2CIndVal(p0) val check = eng.getPrediction(cindv) check must equal(0.0) check mustBe a [java.lang.Double] } it should "evaluate values using generic Predictor engine" in { val speng = new Spark_PredictorEngine(modelP, clf_type) val eng = speng.getPredictorEngine val p0 = Array("50", "Self-emp-not-inc", "Male", "0", "0", "United-States") val cindv = ReadUtil.arr2CIndVal(p0) val check = eng.getPrediction(cindv) check must equal(0.0) check mustBe a [java.lang.Double] } }
Source File: ApplicationWithProcess.scala From aloha with Apache License 2.0 | 5 votes |
package import import java.nio.charset.StandardCharsets import scala.collection.JavaConverters._ import scala.concurrent.Promise import import me.jrwang.aloha.common.Logging import me.jrwang.aloha.common.util.{FileAppender, Utils} abstract class ApplicationWithProcess extends AbstractApplication with Logging { private var process: Process = _ private var stdoutAppender: FileAppender = _ private var stderrAppender: FileAppender = _ // Timeout to wait for when trying to terminate an app. private val APP_TERMINATE_TIMEOUT_MS = 10 * 1000 def getProcessBuilder(): ProcessBuilder private var stateMonitorThread: Thread = _ override def start(): Promise[ExitState] = { val processBuilder = getProcessBuilder() val command = processBuilder.command() val formattedCommand = command.asScala.mkString("\"", "\" \"", "\"") logInfo(s"Launch command: $formattedCommand") process = processBuilder.start() // Redirect its stdout and stderr to files val stdout = new File(appDir, "stdout") stdoutAppender = FileAppender(process.getInputStream, stdout, alohaConf) val header = "Aloha Application Command: %s\n%s\n\n".format( formattedCommand, "=" * 40) val stderr = new File(appDir, "stderr") Files.write(header, stderr, StandardCharsets.UTF_8) stderrAppender = FileAppender(process.getErrorStream, stderr, alohaConf) stateMonitorThread = new Thread("app-state-monitor-thread") { override def run(): Unit = { val exitCode = process.waitFor() if(exitCode == 0) { result.success(ExitState(ExitCode.SUCCESS, Some("success"))) } else { result.success(ExitState(ExitCode.FAILED, Some("failed"))) } } } stateMonitorThread.start() result } override def shutdown(reason: Option[String]): Unit = { if (process != null) { logInfo("Killing process!") if (stdoutAppender != null) { stdoutAppender.stop() } if (stderrAppender != null) { stderrAppender.stop() } val exitCode = Utils.terminateProcess(process, APP_TERMINATE_TIMEOUT_MS) if (exitCode.isEmpty) { logWarning("Failed to terminate process: " + process + ". This process will likely be orphaned.") } } } }
Source File: AbstractApplication.scala From aloha with Apache License 2.0 | 5 votes |
package import import scala.concurrent.Promise import me.jrwang.aloha.common.AlohaConf abstract class AbstractApplication extends Application { protected val result: Promise[ExitState] = Promise() protected var appDesc: ApplicationDescription = _ protected var appDir: File = _ protected var alohaConf: AlohaConf = _ override def withDescription(desc: ApplicationDescription): Application = { this.appDesc = desc this } override def withApplicationDir(appDir: File): Application = { this.appDir = appDir this } override def withAlohaConf(conf: AlohaConf): Application = { this.alohaConf = conf this } }
Source File: Application.scala From aloha with Apache License 2.0 | 5 votes |
package import import scala.concurrent.Promise import me.jrwang.aloha.common.{AlohaConf, AlohaException, Logging} import me.jrwang.aloha.scheduler.AlohaUserCodeClassLoaders trait Application { def start(): Promise[ExitState] def shutdown(reason: Option[String]): Unit def withDescription(desc: ApplicationDescription): Application def withApplicationDir(appDir: File): Application def withAlohaConf(conf: AlohaConf): Application def clean(): Unit } object Application extends Logging { def create(appDesc: ApplicationDescription): Application = { //TODO we should download dependencies and resource files logInfo(s"Create module for [$appDesc]") val fullClassName = appDesc.entryPoint try { val urls = File(_)).filter(_.exists()) .flatMap(_.listFiles().filter(_.isFile)).map(_.toURI.toURL) val classLoader = AlohaUserCodeClassLoaders.childFirst(urls) Thread.currentThread().setContextClassLoader(classLoader) val klass = classLoader.loadClass(fullClassName) require(classOf[Application].isAssignableFrom(klass), s"$fullClassName is not a subclass of ${classOf[Application].getName}.") klass.getConstructor().newInstance().asInstanceOf[Application].withDescription(appDesc) } catch { case _: NoSuchMethodException => throw new AlohaException( s"$fullClassName did not have a zero-argument constructor." + s"Note: if the class is defined inside of another Scala class, then its constructors " + s"may accept an implicit parameter that references the enclosing class; in this case, " + s"you must define the class as a top-level class in order to prevent this extra" + " parameter from breaking Atom's ability to find a valid constructor.") case e: Throwable => throw e } } }
Source File: AppRunner.scala From aloha with Apache License 2.0 | 5 votes |
package me.jrwang.aloha.scheduler.worker import import scala.concurrent.Await import scala.concurrent.duration.Duration import{Application, ApplicationDescription, ApplicationState, ExitCode} import me.jrwang.aloha.common.{AlohaConf, Logging} import me.jrwang.aloha.rpc.RpcEndpointRef import me.jrwang.aloha.scheduler.ApplicationStateChanged class AppRunner( val conf: AlohaConf, val appId: String, val appDesc: ApplicationDescription, val worker: RpcEndpointRef, val workerId: String, val host: String, val appDir: File, @volatile var state: ApplicationState.Value) extends Logging { private var workerThread: Thread = null private[worker] def start() { workerThread = new Thread(s"ApplicationRunner for $appId") { override def run() { fetchAndRunApplication() } } workerThread.start() } // Stop this application runner private[worker] def kill() { if (workerThread != null) { // the workerThread will kill the application when interrupted workerThread.interrupt() workerThread = null state = ApplicationState.KILLED } } private def fetchAndRunApplication() { var app: Application = null try { app = Application.create(appDesc).withApplicationDir(appDir).withAlohaConf(conf) val exitStatePromise = app.start() state = ApplicationState.RUNNING worker.send(ApplicationStateChanged(appId, ApplicationState.RUNNING, None)) val exitState = Await.result(exitStatePromise.future, Duration.Inf) if(exitState.code == ExitCode.FAILED) { worker.send(ApplicationStateChanged(appId, ApplicationState.FAILED, exitState.msg, None)) } else { worker.send(ApplicationStateChanged(appId, ApplicationState.FINISHED, exitState.msg, None)) } } catch { case _: InterruptedException => logInfo(s"Runner thread for application $appId interrupted") state = ApplicationState.KILLED killApp(app, Some("User request to kill app.")) worker.send(ApplicationStateChanged(appId, ApplicationState.KILLED, Some("User request to kill app."))) case e: Exception => logError("Error running executor", e) state = ApplicationState.FAILED killApp(app, Some(e.toString)) worker.send(ApplicationStateChanged(appId, ApplicationState.FAILED, Some(e.toString), Some(e))) } finally { if(app != null) { app.clean() } } } private def killApp(app: Application, reason: Option[String]) = { if(app != null) { try { app.shutdown(reason) } catch { case e: Throwable => logError(s"Error while killing app $appDesc.", e) } } } }
Source File: ImageReader.scala From scala-deeplearn-examples with Apache License 2.0 | 5 votes |
package io.brunk.examples import{File, FileFilter} import java.lang.Math.toIntExact import import import org.datavec.api.split.{FileSplit, InputSplit} import org.datavec.image.loader.BaseImageLoader import org.datavec.image.recordreader.ImageRecordReader import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator import org.deeplearning4j.datasets.iterator.MultipleEpochsIterator import org.deeplearning4j.eval.Evaluation import org.nd4j.linalg.dataset.api.iterator.DataSetIterator import org.nd4j.linalg.dataset.api.preprocessor.ImagePreProcessingScaler import scala.collection.JavaConverters._ object ImageReader { val channels = 3 val height = 150 val width = 150 val batchSize = 50 val numClasses = 2 val epochs = 100 val splitTrainTest = 0.8 val random = new java.util.Random() def createImageIterator(path: String): (MultipleEpochsIterator, DataSetIterator) = { val baseDir = new File(path) val labelGenerator = new ParentPathLabelGenerator val fileSplit = new FileSplit(baseDir, BaseImageLoader.ALLOWED_FORMATS, random) val numExamples = toIntExact(fileSplit.length) val numLabels = fileSplit.getRootDir.listFiles(new FileFilter { override def accept(pathname: File): Boolean = pathname.isDirectory }).length val pathFilter = new BalancedPathFilter(random, labelGenerator, numExamples, numLabels, batchSize) //val inputSplit = fileSplit.sample(pathFilter, splitTrainTest, 1 - splitTrainTest) val inputSplit = fileSplit.sample(pathFilter, 70, 30) val trainData = inputSplit(0) val validationData = inputSplit(1) val recordReader = new ImageRecordReader(height, width, channels, labelGenerator) val scaler = new ImagePreProcessingScaler(0, 1) recordReader.initialize(trainData, null) val dataIter = new RecordReaderDataSetIterator(recordReader, batchSize, 1, numClasses) dataIter.setPreProcessor(scaler) val trainIter = new MultipleEpochsIterator(epochs, dataIter) val valRecordReader = new ImageRecordReader(height, width, channels, labelGenerator) valRecordReader.initialize(validationData, null) val validationIter = new RecordReaderDataSetIterator(valRecordReader, batchSize, 1, numClasses) validationIter.setPreProcessor(scaler) (trainIter, validationIter) } }
Source File: IrisReader.scala From scala-deeplearn-examples with Apache License 2.0 | 5 votes |
package io.brunk.examples import import org.datavec.api.records.reader.impl.csv.CSVRecordReader import org.datavec.api.split.FileSplit import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator import org.nd4j.linalg.dataset.SplitTestAndTrain import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize object IrisReader { val numLinesToSkip = 1 val batchSize = 150 val labelIndex = 4 val numLabels = 3 val seed = 1 def readData(): SplitTestAndTrain = { val recordReader = new CSVRecordReader(numLinesToSkip, ',') recordReader.initialize(new FileSplit(new File("data/iris.csv"))) val iterator = new RecordReaderDataSetIterator(recordReader, batchSize, labelIndex, numLabels) val dataSet = // read all data in a single batch dataSet.shuffle(seed) val testAndTrain = dataSet.splitTestAndTrain(0.67) val train = testAndTrain.getTrain val test = testAndTrain.getTest // val normalizer = new NormalizerStandardize // // normalizer.transform(train) // normalize training data // normalizer.transform(test) // normalize test data testAndTrain } }
Source File: CorpusReader.scala From ai.vitk.ner with GNU General Public License v3.0 | 5 votes |
package ai.vitk.ner import{File, InputStream} import org.slf4j.LoggerFactory import scala.collection.mutable.ListBuffer import object CorpusReader { val logger = LoggerFactory.getLogger(CorpusReader.getClass) def readVLSPTest1(resourcePath: String): List[Sentence] = { // read lines of the file and remove lines which contains "<s>" val stream = getClass.getResourceAsStream(resourcePath) val lines = Source.fromInputStream(stream).getLines().toList.filter { line => line.trim != "<s>" } val sentences = new ListBuffer[Sentence]() var tokens = new ListBuffer[Token]() for (i <- (0 until lines.length)) { val line = lines(i).trim if (line == "</s>") { if (!tokens.isEmpty) sentences.append(Sentence(tokens)) tokens = new ListBuffer[Token]() } else { val parts = line.split("\\s+") if (parts.length < 3) logger.error("Invalid line = " + line) else tokens.append(Token(parts(0), Map(Label.PartOfSpeech -> parts(1), Label.Chunk -> parts(2)))) } } + ", number of sentences = " + sentences.length) sentences.toList } def readVLSPTest2(dir: String): List[Sentence] = { def getListOfFiles: List[File] = { val d = new File(dir) if (d.exists && d.isDirectory) { d.listFiles.filter(_.isFile).toList } else { List[File]() } } val files = getListOfFiles"Number of test files = " + files.length) files.flatMap { file => { val x = file.getAbsolutePath val resourcePath = x.substring(x.indexOf("/ner")) readVLSPTest1(resourcePath) } } } def main(args: Array[String]): Unit = { val path = "/ner/vi/train.txt" val sentences = readCoNLL(path)"Number of sentences = " + sentences.length) sentences.take(10).foreach(s => sentences.takeRight(10).foreach(s => } }
Source File: Releaser.scala From releaser with Apache License 2.0 | 5 votes |
package import import java.nio.file.{Files, Path} import import{BintrayHttp, BintrayRepoConnector, DefaultBintrayRepoConnector} import{GithubConnector, Repo} import{CredentialsFinder, FileDownloader, Logger} import scala.util.{Failure, Success, Try} object ReleaserMain { def main(args: Array[String]): Unit = { val result = Releaser(args) System.exit(result) } } object Releaser extends Logger { import ArgParser._ def apply(args: Array[String]): Int = { parser.parse(args, Config()) match { case Some(config) => val githubName = config.githubNameOverride.getOrElse(config.artefactName) run(config.artefactName, ReleaseCandidateVersion(config.rcVersion), config.releaseType, githubName, config.releaseNotes, config.dryRun) case None => -1 } } def run(artefactName: String, rcVersion: ReleaseCandidateVersion, releaseType: ReleaseType.Value, gitHubName: String, releaseNotes: Option[String], dryRun: Boolean = false): Int = { val githubCredsFile = System.getProperty("user.home") + "/.github/.credentials" val bintrayCredsFile = System.getProperty("user.home") + "/.bintray/.credentials" val githubCredsOpt = CredentialsFinder.findGithubCredsInFile(new File(githubCredsFile).toPath) val bintrayCredsOpt = CredentialsFinder.findBintrayCredsInFile(new File(bintrayCredsFile).toPath) doReleaseWithCleanup { directories => if (githubCredsOpt.isEmpty) {"Didn't find github credentials in $githubCredsFile") -1 } else if (bintrayCredsOpt.isEmpty) {"Didn't find Bintray credentials in $bintrayCredsFile") -1 } else { val releaserVersion = getClass.getPackage.getImplementationVersion val metaDataProvider = new ArtefactMetaDataProvider() val gitHubDetails = if (dryRun) GithubConnector.dryRun(githubCredsOpt.get, releaserVersion) else GithubConnector(githubCredsOpt.get, releaserVersion) val bintrayDetails = if (dryRun) BintrayRepoConnector.dryRun(bintrayCredsOpt.get, directories.workDir) else BintrayRepoConnector(bintrayCredsOpt.get, directories.workDir) val bintrayRepoConnector = new DefaultBintrayRepoConnector(directories.workDir, new BintrayHttp(bintrayCredsOpt.get), new FileDownloader) val coordinator = new Coordinator(directories.stageDir, metaDataProvider, gitHubDetails, bintrayRepoConnector) val result = coordinator.start(artefactName, Repo(gitHubName), rcVersion, releaseType, releaseNotes) result match { case Success(targetVersion) =>"Releaser successfully released $artefactName $targetVersion") 0 case Failure(e) => e.printStackTrace()"Releaser failed to release $artefactName $rcVersion with error '${e.getMessage}'") 1 } } } } def doReleaseWithCleanup[T](f: ReleaseDirectories => T): T = { val directories = ReleaseDirectories() try { f(directories) } finally {"cleaning releaser work directory") directories.delete().recover{case t => log.warn(s"failed to delete releaser work directory ${t.getMessage}")} } } } case class ReleaseDirectories(tmpDirectory: Path = Files.createTempDirectory("releaser")) { lazy val workDir = Files.createDirectories(tmpDirectory.resolve("work")) lazy val stageDir = Files.createDirectories(tmpDirectory.resolve("stage")) def delete() = Try { FileUtils.forceDelete(tmpDirectory.toFile) } }
Source File: package.scala From theGardener with Apache License 2.0 | 5 votes |
import import play.api.Logging import scala.concurrent._ import scala.util.control.NonFatal import scala.util.{Failure, Try} package object utils extends Logging { implicit class TryOps[T](t: Try[T]) { def logError(msg: => String): Try[T] = t.recoverWith { case e => logger.error(msg, e) Failure(e) } } implicit class FutureOps[T](f: Future[T]) { def logError(msg: => String)(implicit ec: ExecutionContext): Future[T] = f.recoverWith { case NonFatal(e) => logger.error(msg, e) Future.failed(e) } } implicit class PathExt(path: String) { def fixPathSeparator: String = path.replace('/', File.separatorChar) } }
Source File: PageController.scala From theGardener with Apache License 2.0 | 5 votes |
package controllers import import com.github.ghik.silencer.silent import controllers.AssetAccessError.{AssetNotAllowed, AssetNotFound} import controllers.dto._ import io.swagger.annotations._ import javax.inject.Inject import play.api.Configuration import play.api.libs.json.Json import play.api.mvc._ import repositories._ import services._ import scala.concurrent.ExecutionContext @silent("Interpolated") @silent("missing interpolator") @Api(value = "PageController", produces = "application/json") class PageController @Inject()(pageService: PageService)(implicit ec: ExecutionContext) extends InjectedController { @ApiOperation(value = "Get pages from path", response = classOf[PageDTO], responseContainer = "list") @ApiResponses(Array(new ApiResponse(code = 404, message = "Page not found"))) def getPageFromPath(path: String): Action[AnyContent] = Action.async { pageService.computePageFromPath(path).map { case Some(pageDto) => Ok(Json.toJson(Seq(pageDto))) case None => NotFound(s"No Page $path") } } } sealed abstract class AssetAccessError(message: String) extends Throwable(message) object AssetAccessError { case class AssetNotAllowed(message: String) extends AssetAccessError(message) case class AssetNotFound(message: String) extends AssetAccessError(message) } class PageAssetController @Inject()(config: Configuration, projectRepository: ProjectRepository)(implicit ec: ExecutionContext) extends InjectedController { val projectsRootDirectory = config.get[String]("") def getImageFromPath(path: String): Action[AnyContent] = Action { val params = path.split(">") (for { projectId <- params.lift(0) branchName <- params.lift(1) relativePath <- params.lift(2) documentationRootPath <- projectRepository.findById(projectId).flatMap(_.documentationRootPath) assetFileAccess = accessToAsset(s"$projectsRootDirectory/$projectId/$branchName/$documentationRootPath", relativePath) } yield (relativePath, assetFileAccess)) match { case None => NotFound("Project not found or bad configuration") case Some((_, Left(AssetNotAllowed(message)))) => Forbidden(message) case Some((_, Left(AssetNotFound(message)))) => NotFound(message) case Some((_, Right(assetFile))) => Ok.sendFile(assetFile) } } def accessToAsset(documentationRootPath: String, assetRelativePath: String): Either[AssetAccessError, File] = { val assetFile = new File(s"$documentationRootPath/$assetRelativePath") val documentationCanonicalPath = new File(documentationRootPath).getCanonicalPath val assetCanonicalPath = assetFile.getCanonicalPath if (!assetCanonicalPath.contains(documentationCanonicalPath)) { Left(AssetNotAllowed(s"Asset $assetRelativePath not allowed")) } else if (!assetFile.exists()) { Left(AssetNotFound(s"Asset $assetRelativePath not found")) } else { Right(assetFile) } } }
Source File: CustomConfigSystemReader.scala From theGardener with Apache License 2.0 | 5 votes |
package utils import import import org.eclipse.jgit.lib.Config import import org.eclipse.jgit.util.{FS, SystemReader} object CustomConfigSystemReader { def overrideSystemGitConfig(): Unit = { val userGitConfig = new File(s"target${separator}data${separator}gitconfig") SystemReader.setInstance(new CustomConfigSystemReader(userGitConfig)) } } class CustomConfigSystemReader(userGitConfig: File) extends SystemReader { val proxy = SystemReader.getInstance() override def getHostname: String = proxy.getHostname override def getenv(variable: String): String = proxy.getenv(variable) override def getProperty(key: String): String = proxy.getProperty(key) override def getCurrentTime: Long = proxy.getCurrentTime override def getTimezone(when: Long): Int = proxy.getTimezone(when) override def openJGitConfig(parent: Config, fs: FS): FileBasedConfig = proxy.openJGitConfig(parent, fs) override def openUserConfig(parent: Config, fs: FS): FileBasedConfig = new FileBasedConfig(parent, userGitConfig, fs) override def openSystemConfig(parent: Config, fs: FS): FileBasedConfig = new FileBasedConfig(parent, null, fs) { override def load(): Unit = () override def isOutdated: Boolean = false } }
Source File: CaptchaHelper.scala From ez-framework with Apache License 2.0 | 5 votes |
package com.ecfront.ez.framework.service.auth.helper import{File, FileOutputStream} import com.ecfront.ez.framework.core.logger.Logging import com.github.cage.GCage object CaptchaHelper extends Logging { def generate(text: String): File = { val temp = File.createTempFile("ez_captcha_", ".jpg") val os = new FileOutputStream(temp) try { temp.deleteOnExit() new GCage().draw(text, os) temp } catch { case e: Throwable => logger.error("Generate captche error.", e) null } finally { os.close() } } }
Source File: I18NProcessor.scala From ez-framework with Apache License 2.0 | 5 votes |
package com.ecfront.ez.framework.core.i18n import import java.util.regex.Pattern import com.ecfront.common.Resp import com.ecfront.ez.framework.core.EZ import com.ecfront.ez.framework.core.logger.Logging import def setLanguage(_language: String): Unit = { EZ.Info.language = _language } private val tabR = "\t" def process(resp: Resp[_]): Unit = { if (resp.message != null && resp.message.nonEmpty) { resp.message = i18n(resp.message.replaceAll(tabR, " ")) } } def i18n(str: String): String = { var newStr = str i18nInfo.find(_._1.matcher(str).matches()).foreach { matchedItem => val matcher = matchedItem._1.matcher(str) newStr = matcher.replaceAll(matchedItem._2(EZ.Info.language)) } newStr } implicit class Impl(val str: String) { def x: String = i18n(str) } }
Source File: AttachmentService.scala From BacklogMigration-Redmine with MIT License | 5 votes |
package com.nulabinc.backlog.r2b.exporter.service import{File, FileOutputStream} import{HttpURLConnection, URL} import java.nio.channels.Channels import com.nulabinc.backlog.migration.common.utils.ControlUtil.using import com.nulabinc.backlog.migration.common.utils.Logging object AttachmentService extends Logging { private val MAX_REDIRECT_COUNT = 10 def download(url: URL, file: File): Unit = { val redirected = followRedirect(url) doDownload(redirected, file) } private def doDownload(url: URL, file: File): Unit = try { val rbc = Channels.newChannel(url.openStream()) val fos = new FileOutputStream(file) fos.getChannel.transferFrom(rbc, 0, java.lang.Long.MAX_VALUE) rbc.close() fos.close() } catch { case e: Throwable => logger.warn("Download attachment failed: " + e.getMessage) } private def followRedirect(url: URL, count: Int = 0): URL = url.openConnection match { case http: HttpURLConnection => http.setRequestMethod("GET") http.connect() using(http) { connection => connection.getResponseCode match { case 301 | 302 | 303 => val newUrl = new URL(connection.getHeaderField("Location")) if (count < MAX_REDIRECT_COUNT) followRedirect(newUrl, count + 1) else newUrl case _ => url } } case _ => url } }
Source File: IterateeMain.scala From advanced-scala-code with Apache License 2.0 | 5 votes |
package iteratee import scala.util.{Failure, Success} object IterateeMain { def fileExample(): Unit = { import io.iteratee.monix.task._ import val wordsE = readLines(new File("license.txt")).flatMap { line => enumIndexedSeq(line.split("\\W")) } val noEmptyLinesEE = filter[String](str => str.trim.length > 0) val toLowerEE = map[String, String](_.toLowerCase) val countWordsI = fold[String, Map[String, Int]](Map.empty) { (acc, next) => acc.get(next) match { case None => acc + (next -> 1) case Some(num) => acc + (next -> (1 + num)) } } val dataT = wordsE.through(noEmptyLinesEE). through(toLowerEE).into(countWordsI).map { dataMap => dataMap.toList.sortWith( _._2 > _._2).take(5).map(_._1) } import dataT.runOnComplete { case Success(data) => println(data) case Failure(th) => th.printStackTrace() } } def main(args: Array[String]) { import // Just one Int val singleNumE = enumOne(42) val singleNumI = takeI[Int](1) val singleNumResult = singleNumE.into(singleNumI) println(singleNumResult) // Incrementing one Int val incrementNumEE = map[Int, Int](_ + 1) val incrementedNumResult = singleNumE.through(incrementNumEE).into(singleNumI) println(incrementedNumResult) // First 10 even numbers val naturalsE = iterate(1)(_ + 1) val moreThan100EE = filter[Int](_ >= 100) val evenFilterEE = filter[Int](_ % 2 == 0) val first10I = takeI[Int](10) println(naturalsE.through(moreThan100EE).through(evenFilterEE).into(first10I)) { import io.iteratee.modules.eval._ // Summing N first numbers val naturalsE = iterate(1)(_ + 1) val limit1kEE = take[Int](30000) val sumI = fold[Int, Int](0) { (acc, next) => acc + next } println(naturalsE.through(limit1kEE).into(sumI).value) } fileExample() } }
Source File: TransformerBenchmark.scala From mleap with Apache License 2.0 | 5 votes |
package com.truecar.mleap.spark.benchmark import{FileInputStream, File} import ml.bundle.fs.DirectoryBundle import com.truecar.mleap.runtime.LocalLeapFrame import com.truecar.mleap.runtime.transformer.Transformer import import org.scalameter.api._ import org.scalameter.picklers.Implicits._ import spray.json._ import com.truecar.mleap.serialization.mleap.v1.MleapJsonSupport._ object TransformerBenchmark extends Bench.ForkedTime { lazy override val executor = { SeparateJvmsExecutor( Executor.Warmer.Zero, Aggregator.min[Double], new Measurer.Default) } val mlSerializer = MlJsonSerializer val classLoader = getClass.getClassLoader val regressionFile = new File("/tmp/") val frameFile = new File("/tmp/frame.json") val bundleReader = DirectoryBundle(regressionFile) val regression = mlSerializer.deserializeWithClass(bundleReader).asInstanceOf[Transformer] val lines = val frame = lines.parseJson.convertTo[LocalLeapFrame] val ranges = for { size <- Gen.range("size")(1000, 10000, 1000) } yield 0 until size measure method "transform" in { using(ranges) in { size => size.foreach { _ => regression.transform(frame) } } } }
Source File: SparkTransformerBenchmark.scala From mleap with Apache License 2.0 | 5 votes |
package com.truecar.mleap.spark.benchmark import{FileInputStream, File} import import com.truecar.mleap.runtime.LocalLeapFrame import com.truecar.mleap.spark.benchmark.util.SparkSerializer import org.apache.spark.sql.{Row, SQLContext} import org.apache.spark.{SparkContext, SparkConf} import import org.scalameter.Bench import scala.collection.JavaConverters._ import org.scalameter.api._ import org.scalameter.picklers.Implicits._ import org.apache.log4j.Logger import org.apache.log4j.Level import com.truecar.mleap.spark.MleapSparkSupport._ import spray.json._ import com.truecar.mleap.serialization.mleap.v1.MleapJsonSupport._ object SparkTransformerBenchmark extends Bench.ForkedTime { lazy override val executor = { SeparateJvmsExecutor( Executor.Warmer.Zero, Aggregator.min[Double], new Measurer.Default) } val classLoader = getClass.getClassLoader val regressionFile = new File("/tmp/spark.transformer.kryo") val frameFile = new File("/tmp/frame.json") val inputStream = new FileInputStream(regressionFile) val input = new Input(inputStream) val regression: Transformer = SparkSerializer().read(input) val lines = val frame = lines.parseJson.convertTo[LocalLeapFrame] Logger.getLogger("org").setLevel(Level.OFF) Logger.getLogger("akka").setLevel(Level.OFF) val sparkConf = new SparkConf() .setAppName("Spark Transformer Benchmark") .setMaster("local[1]") val sc = new SparkContext(sparkConf) val sqlContext = new SQLContext(sc) val rdd = => Row(a.toSeq: _*)).toList.asJava val schema = frame.schema.toSpark val sparkFrame = sqlContext.createDataFrame(rdd, schema) val ranges = for { size <- Gen.range("size")(1000, 10000, 1000) } yield 0 until size measure method "transform" in { using(ranges) in { size => size.foreach { _ => regression.transform(sparkFrame).head } } } // sc.stop() }
Source File: AWTSystemProvider.scala From scala-game-library with MIT License | 5 votes |
package sgl package awt import sgl.util._ import import java.awt.Desktop import import scala.concurrent.ExecutionContext trait AWTSystemProvider extends SystemProvider with PartsResourcePathProvider { object AWT5System extends System { override def exit(): Unit = { sys.exit() } override def currentTimeMillis: Long = java.lang.System.currentTimeMillis override def nanoTime: Long = java.lang.System.nanoTime override def loadText(path: ResourcePath): Loader[Array[String]] = { FutureLoader { val localAsset = if(DynamicResourcesEnabled) findDynamicResource(path) else None val is = => new if(is == null) { throw new ResourceNotFoundException(path) } } } override def loadBinary(path: ResourcePath): Loader[Array[Byte]] = { FutureLoader { val localAsset = if(DynamicResourcesEnabled) findDynamicResource(path) else None val is = => new if(is == null) { throw new ResourceNotFoundException(path) } val bis = new val bytes = new scala.collection.mutable.ListBuffer[Byte] var b: Int = 0 while({ b =; b != -1}) { bytes.append(b.toByte) } bytes.toArray } } override def openWebpage(uri: URI): Unit = { val desktop = if(Desktop.isDesktopSupported()) Desktop.getDesktop() else null if(desktop != null && desktop.isSupported(Desktop.Action.BROWSE)) { try { desktop.browse(uri); } catch { case (e: Exception) => e.printStackTrace() } } } } val System = AWT5System override val ResourcesRoot = PartsResourcePath(Vector()) override val MultiDPIResourcesRoot = PartsResourcePath(Vector()) val DynamicResourcesEnabled: Boolean = false // TODO: provide a command line flag to control this as well, in particular to give // the asset directory. def findDynamicResource(path: ResourcePath): Option[File] = { def findFromDir(d: File): Option[File] = { val asset = new File(d.getAbsolutePath + "/assets/" + path.path) if(asset.exists) Some(asset) else None } def findFromWorkingDir: Option[File] = findFromDir(new File(java.lang.System.getProperty("user.dir"))) val protectionDomain = this.getClass.getProtectionDomain() val codeSource = protectionDomain.getCodeSource() if(codeSource == null) return findFromWorkingDir val jar = new File(codeSource.getLocation.toURI.getPath) if(!jar.exists) return findFromWorkingDir val parent = jar.getParentFile if(parent == null) return findFromWorkingDir findFromDir(parent).orElse(findFromWorkingDir) } //Centralize the execution context used for asynchronous tasks in the Desktop backend //Could be overriden at wiring time implicit val executionContext: ExecutionContext = }
Source File: SparkFunSuite.scala From spark-gbtlr with Apache License 2.0 | 5 votes |
package org.apache.spark // scalastyle:off import import org.apache.spark.internal.Logging import org.apache.spark.util.AccumulatorContext import org.scalatest.{BeforeAndAfterAll, FunSuite, Outcome} final protected override def withFixture(test: NoArgTest): Outcome = { val testName = test.text val suiteName = this.getClass.getName val shortSuiteName = suiteName.replaceAll("org.apache.spark", "o.a.s") try { logInfo(s"\n\n===== TEST OUTPUT FOR $shortSuiteName: '$testName' =====\n") test() } finally { logInfo(s"\n\n===== FINISHED $shortSuiteName: '$testName' =====\n") } } }
Source File: Config.scala From zipkin-mesos-framework with Apache License 2.0 | 5 votes |
package net.elodina.mesos.zipkin import{File, FileInputStream} import import java.util.Properties import net.elodina.mesos.zipkin.utils.{BindAddress, Period} object Config { val DEFAULT_FILE = new File("") var debug: Boolean = false var genTraces: Boolean = false var storage: String = "file:zipkin-mesos.json" var master: Option[String] = None var principal: Option[String] = None var secret: Option[String] = None var user: Option[String] = None var frameworkName: String = "zipkin" var frameworkRole: String = "*" var frameworkTimeout: Period = new Period("30d") var log: Option[File] = None var api: Option[String] = None var bindAddress: Option[BindAddress] = None def apiPort: Int = { val port = new URI(getApi).getPort if (port == -1) 80 else port } def replaceApiPort(port: Int): Unit = { val prev: URI = new URI(getApi) api = Some("" + new URI( prev.getScheme, prev.getUserInfo, prev.getHost, port, prev.getPath, prev.getQuery, prev.getFragment )) } def getApi: String = { api.getOrElse(throw new Error("api not initialized")) } def getMaster: String = { master.getOrElse(throw new Error("master not initialized")) } def getZk: String = { master.getOrElse(throw new Error("zookeeper not initialized")) } private[zipkin] def loadFromFile(file: File): Unit = { val props: Properties = new Properties() val stream: FileInputStream = new FileInputStream(file) props.load(stream) stream.close() if (props.containsKey("debug")) debug = java.lang.Boolean.valueOf(props.getProperty("debug")) if (props.containsKey("genTraces")) genTraces = java.lang.Boolean.valueOf(props.getProperty("genTraces")) if (props.containsKey("storage")) storage = props.getProperty("storage") if (props.containsKey("master")) master = Some(props.getProperty("master")) if (props.containsKey("user")) user = Some(props.getProperty("user")) if (props.containsKey("principal")) principal = Some(props.getProperty("principal")) if (props.containsKey("secret")) secret = Some(props.getProperty("secret")) if (props.containsKey("framework-name")) frameworkName = props.getProperty("framework-name") if (props.containsKey("framework-role")) frameworkRole = props.getProperty("framework-role") if (props.containsKey("framework-timeout")) frameworkTimeout = new Period(props.getProperty("framework-timeout")) if (props.containsKey("log")) log = Some(new File(props.getProperty("log"))) if (props.containsKey("api")) api = Some(props.getProperty("api")) if (props.containsKey("bind-address")) bindAddress = Some(new BindAddress(props.getProperty("bind-address"))) } override def toString: String = { s""" |debug: $debug, storage: $storage |mesos: master=$master, user=${if (user.isEmpty || user.get.isEmpty) "<default>" else user} |principal=${principal.getOrElse("<none>")}, secret=${if (secret.isDefined) "*****" else "<none>"} |framework: name=$frameworkName, role=$frameworkRole, timeout=$frameworkTimeout |api: $api, bind-address: ${bindAddress.getOrElse("<all>")}, genTraces: $genTraces """.stripMargin.trim } }
Source File: Storage.scala From zipkin-mesos-framework with Apache License 2.0 | 5 votes |
package import{File, FileWriter} import org.I0Itec.zkclient.ZkClient import org.I0Itec.zkclient.exception.ZkNodeExistsException import org.I0Itec.zkclient.serialize.ZkSerializer import play.api.libs.json.{Json, Reads, Writes} import trait Storage[T] { def save(value: T)(implicit writes: Writes[T]) def load(implicit reads: Reads[T]): Option[T] } case class FileStorage[T](file: String) extends Storage[T] { override def save(value: T)(implicit writes: Writes[T]) { val writer = new FileWriter(file) try { writer.write(Json.stringify(Json.toJson(value))) } finally { writer.close() } } override def load(implicit reads: Reads[T]): Option[T] = { if (!new File(file).exists()) None else Json.parse(Source.fromFile(file).mkString).asOpt[T] } } case class ZkStorage[T](zk: String) extends Storage[T] { val (zkConnect, path) = zk.span(_ != '/') createChrootIfRequired() private def createChrootIfRequired() { if (path != "") { val client = zkClient try { client.createPersistent(path, true) } finally { client.close() } } } private def zkClient: ZkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer) override def save(value: T)(implicit writes: Writes[T]) { val client = zkClient val json = Json.stringify(Json.toJson(value)) try { client.createPersistent(path, json) } catch { case e: ZkNodeExistsException => client.writeData(path, json) } finally { client.close() } } override def load(implicit reads: Reads[T]): Option[T] = { val client = zkClient try { Option(client.readData(path, true).asInstanceOf[String]).flatMap(Json.parse(_).asOpt[T]) } finally { client.close() } } } private object ZKStringSerializer extends ZkSerializer { def serialize(data: Object): Array[Byte] = data.asInstanceOf[String].getBytes("UTF-8") def deserialize(bytes: Array[Byte]): Object = { if (bytes == null) null else new String(bytes, "UTF-8") } }
Source File: ZipkinComponentServer.scala From zipkin-mesos-framework with Apache License 2.0 | 5 votes |
package net.elodina.mesos.zipkin.components import import net.elodina.mesos.zipkin.http.HttpServer import scala.sys.process.Process import scala.sys.process.ProcessBuilder class ZipkinComponentServer { var process: Process = null @volatile var shutdownInitiated = false def isStarted = Option(process).isDefined def start(taskConfig: TaskConfig, taskId: String) = { val jarMask = ZipkinComponent.getComponentFromTaskId(taskId) match { case "collector" => HttpServer.collectorMask case "query" => HttpServer.queryMask case "web" => HttpServer.webMask case _ => throw new IllegalArgumentException(s"Illegal component name found in task id: $taskId") } val distToLaunch = initJar(jarMask) process = configureProcess(taskConfig, distToLaunch).run() //TODO: consider logs redirect } def await(): Option[Int] = { if (isStarted) Some(process.exitValue()) else None } def acknowledgeShutdownStatus(): Boolean = { val oldStatus = shutdownInitiated if (shutdownInitiated) shutdownInitiated = false oldStatus } def stop(shutdownInitiated: Boolean) { if (isStarted) { this.shutdownInitiated = shutdownInitiated process.destroy() } } private def initJar(jarMask: String): File = { new File(".").listFiles().find(file => file.getName.matches(jarMask)) match { case None => throw new IllegalStateException("Corresponding jar not found") case Some(componentDist) => componentDist } } private def configureProcess(taskConfig: TaskConfig, distToLaunch: File): ProcessBuilder = { val configFileArg ="-f", _)) var command = Seq("java", "-jar", distToLaunch.getCanonicalPath) configFileArg.foreach(command ++= _) command ++= { case (k: String, v: String) => s"-$k=$v" } Process(command, Some(new File(".")), taskConfig.env.toList: _*) } }
Source File: S3.scala From teamcity-s3-plugin with Apache License 2.0 | 5 votes |
package import{InputStream, File} import com.amazonaws.ClientConfiguration import com.amazonaws.auth.{AWSCredentialsProviderChain, DefaultAWSCredentialsProviderChain} import import{ObjectMetadata, PutObjectRequest, CannedAccessControlList} import import jetbrains.buildServer.serverSide.SBuild import scala.util.{Success, Try} class S3(config: S3ConfigManager) { val credentialsProvider = { val provider = new AWSCredentialsProviderChain(config, new DefaultAWSCredentialsProviderChain()) provider.setReuseLastProvider(false) provider } val transferManager = new TransferManager( new AmazonS3Client(credentialsProvider, new ClientConfiguration().withMaxErrorRetry(2)) ) def upload(bucket: String, build: SBuild, fileName: String, contents: InputStream, fileSize: Long): Try[Unit] = Try { val uploadDirectory = s"${S3Plugin.cleanFullName(build)}/${build.getBuildNumber}" val metadata = { val md = new ObjectMetadata() md.setContentLength(fileSize) md } val req = new PutObjectRequest(bucket, s"$uploadDirectory/$fileName", contents, metadata) req.withCannedAcl(CannedAccessControlList.BucketOwnerFullControl) val upload = transferManager.upload(req) upload.waitForUploadResult() } def upload(bucket: String, build: SBuild, fileName: String, file: File): Try[Unit] = Try { val uploadDirectory = s"${S3Plugin.cleanFullName(build)}/${build.getBuildNumber}" val req = new PutObjectRequest(bucket, s"$uploadDirectory/$fileName", file) req.withCannedAcl(CannedAccessControlList.BucketOwnerFullControl) val upload = transferManager.upload(req) upload.waitForUploadResult() } }
Source File: S3ConfigManager.scala From teamcity-s3-plugin with Apache License 2.0 | 5 votes |
package import{File, PrintWriter} import com.amazonaws.auth.{BasicAWSCredentials, AWSCredentialsProvider, AWSCredentials} import jetbrains.buildServer.serverSide.ServerPaths import org.json4s._ import org.json4s.native.JsonMethods._ import org.json4s.native.Serialization import org.json4s.native.Serialization._ case class S3Config( artifactBucket: Option[String], buildManifestBucket: Option[String], tagManifestBucket: Option[String], awsAccessKey: Option[String], awsSecretKey: Option[String] ) class S3ConfigManager(paths: ServerPaths) extends AWSCredentialsProvider { implicit val formats = Serialization.formats(NoTypeHints) val configFile = new File(s"${paths.getConfigDir}/s3.json") private[teamcity] var config: Option[S3Config] = { if (configFile.exists()) { parse(configFile).extractOpt[S3Config] } else None } def artifactBucket: Option[String] = config.flatMap(_.artifactBucket) def buildManifestBucket: Option[String] = config.flatMap(_.buildManifestBucket) def tagManifestBucket: Option[String] = config.flatMap(_.tagManifestBucket) private[teamcity] def update(config: S3Config): Unit = { this.config = Some(if (config.awsSecretKey.isEmpty && config.awsAccessKey == this.config.flatMap(_.awsAccessKey)) { config.copy(awsSecretKey = this.config.flatMap(_.awsSecretKey)) } else config) } def updateAndPersist(newConfig: S3Config): Unit = { synchronized { update(newConfig) val out = new PrintWriter(configFile, "UTF-8") try { writePretty(config, out) } finally { out.close } } } def details: Map[String, Option[String]] = Map( "artifactBucket" -> artifactBucket, "buildManifestBucket" -> buildManifestBucket, "tagManifestBucket" -> tagManifestBucket, "accessKey" -> config.flatMap(_.awsAccessKey) ) override def getCredentials: AWSCredentials = (for { c <- config accessKey <- c.awsAccessKey secretKey <- c.awsSecretKey } yield new BasicAWSCredentials(accessKey, secretKey)).getOrElse(null) // Yes, this is sad override def refresh(): Unit = () } object S3ConfigManager { val bucketElement = "bucket" val s3Element = "S3" }
Source File: ArtifactUploader.scala From teamcity-s3-plugin with Apache License 2.0 | 5 votes |
package import import java.util.Date import jetbrains.buildServer.messages.{BuildMessage1, DefaultMessagesInfo, Status} import jetbrains.buildServer.serverSide.{BuildServerAdapter, SRunningBuild} import scala.util.control.NonFatal class ArtifactUploader(config: S3ConfigManager, s3: S3) extends BuildServerAdapter { override def beforeBuildFinish(runningBuild: SRunningBuild) { def report(msg: String): Unit = { runningBuild.getBuildLog().message(msg,Status.NORMAL,new Date,DefaultMessagesInfo.MSG_TEXT,DefaultMessagesInfo.SOURCE_ID,null) } report("About to upload artifacts to S3") getAllFiles(runningBuild).foreach { case (name: String, artifact: File) => config.artifactBucket match { case None => report("Target artifactBucket was not set") case Some(bucket) => s3.upload(bucket, runningBuild, name, artifact).recover { case NonFatal(e) => runningBuild.getBuildLog().message(s"Error uploading artifacts: ${e.getMessage}", Status.ERROR,new Date,DefaultMessagesInfo.MSG_BUILD_FAILURE,DefaultMessagesInfo.SOURCE_ID,null) } } } report("Artifact S3 upload complete") } def getAllFiles(runningBuild: SRunningBuild): Seq[(String,File)] = { if (!runningBuild.isArtifactsExists) { Nil } else { ArtifactUploader.getChildren(runningBuild.getArtifactsDirectory) } } private def normalMessage(text: String) = new BuildMessage1(DefaultMessagesInfo.SOURCE_ID, DefaultMessagesInfo.MSG_TEXT, Status.NORMAL, new Date, text) } object ArtifactUploader { def getChildren(file: File, paths: Seq[String] = Nil, current: String = ""): Seq[(String, File)] = { file.listFiles.toSeq.flatMap { child => if (child.isHidden) { Seq() } else { val newPath = current + child.getName if (child.isDirectory) { getChildren(child, paths, newPath + File.separator) } else { Seq((newPath, child)) } } } } }
Source File: Codegen.scala From caliban with Apache License 2.0 | 5 votes |
package import{ File, PrintWriter } import caliban.parsing.adt.Document import zio.{ Task, UIO } object Codegen { def generate( arguments: Options, writer: (Document, String, Option[String], String) => String ): Task[Unit] = { val s = ".*/scala/(.*)/(.*).scala".r.findFirstMatchIn(arguments.toPath) val packageName = arguments.packageName.orElse("/").mkString("."))) val objectName ="Client") val effect = arguments.effect.getOrElse("zio.UIO") val loader = getSchemaLoader(arguments.schemaPath, arguments.headers) for { schema <- loader.load code = writer(schema, objectName, packageName, effect) formatted <- Formatter.format(code, arguments.fmtPath) _ <- Task(new PrintWriter(new File(arguments.toPath))) .bracket(q => UIO(q.close()), pw => Task(pw.println(formatted))) } yield () } private def getSchemaLoader(path: String, schemaPathHeaders: Option[List[Options.Header]]): SchemaLoader = if (path.startsWith("http")) SchemaLoader.fromIntrospection(path, schemaPathHeaders) else SchemaLoader.fromFile(path) }
Source File: CodeExampleImpl.scala From slinky with MIT License | 5 votes |
package import import slinky.core.facade.ReactElement import import scala.reflect.macros.blackbox object CodeExampleImpl { def text(c: blackbox.Context)(exampleLocation: c.Expr[String]): c.Expr[ReactElement] = { import c.universe._ val Literal(Constant(loc: String)) = exampleLocation.tree val inputFile = new File(s"docs/src/main/scala/${loc.split('.').mkString("/")}.scala") val enclosingPackage = loc.split('.').init.mkString(".") val fileContent = Source.fromFile(inputFile).mkString val innerCode = fileContent.split('\n') val textToDisplay = innerCode .map(_.replaceAllLiterally("//display:", "")) .filterNot(_.endsWith("//nodisplay")) .dropWhile(_.trim.isEmpty) .reverse.dropWhile(_.trim.isEmpty).reverse .mkString("\n") val codeToRun = innerCode.filter(_.startsWith("//run:")).map(_.replaceAllLiterally("//run:", "")).mkString("\n") c.Expr[ReactElement]( q"""{ import ${c.parse(enclosingPackage)}._ = ${Literal(Constant(textToDisplay))}, demoElement = {${c.parse(codeToRun)}}) }""") } }
Source File: HttpSlippyTileReader.scala From geotrellis-osm-elevation with Apache License 2.0 | 5 votes |
package geotrellis.osme.core import geotrellis.vector._ import geotrellis.raster._ import import geotrellis.spark._ import import import geotrellis.util.Filesystem import import import import org.apache.spark._ import org.apache.spark.rdd._ import import class HttpSlippyTileReader[T](pathTemplate: String)(fromBytes: (SpatialKey, Array[Byte]) => T) extends SlippyTileReader[T] { def getURL(template: String, z: Int, x: Int, y: Int) = template.replace("{z}", z.toString).replace("{x}", x.toString).replace("{y}", y.toString) def getByteArray(url: String) = { val inStream = new URL(url).openStream() try { toByteArray(inStream) } finally { inStream.close() } } def read(zoom: Int)(implicit sc: SparkContext): RDD[(SpatialKey, T)] = ??? def read(zoom: Int, key: SpatialKey): T = fromBytes(key, getByteArray(getURL(pathTemplate, zoom, key.col, key.row))) override def read(zoom: Int, x: Int, y: Int): T = read(zoom, SpatialKey(x, y)) }
Source File: ElevationOverlay.scala From geotrellis-osm-elevation with Apache License 2.0 | 5 votes |
package geotrellis.osme.core import{BufferedWriter, FileWriter, File} import com.vividsolutions.jts.geom.{LineString, MultiLineString} import import{GeoJson, JsonFeatureCollection} import scala.collection.immutable.Map import spray.json._ import DefaultJsonProtocol._ import import import geotrellis.vector.densify.DensifyMethods import geotrellis.vector.dissolve.DissolveMethods import geotrellis.vector._ val segmentsFeatures = { segment => val center = segment.centroid match { case PointResult(p) => p case NoResult => throw new Exception("No result found in PointOrNoResult") } val (col, row) = rasterExtent.mapToGrid(center) val elevation = geotiff.tile.getDouble(col, row) val meanvMap: Map[String, Double] = Map("MEANV" -> elevation) LineFeature(segment, meanvMap) } return segmentsFeatures.toTraversable } }
Source File: ElevationSpec.scala From geotrellis-osm-elevation with Apache License 2.0 | 5 votes |
package geotrellis.osme.core import{FileWriter, BufferedWriter, File} import import geotrellis.vector.{Feature, Line, LineFeature} import import spray.json.DefaultJsonProtocol._ import{JsonFeatureCollection, GeoJson} import spray.json.JsonReader import import org.scalatest._ class ElevationSpec extends FunSpec with Matchers { def sharedData = { val geojson = Source.fromFile("data/imgn36w100vector.geojson").getLines.mkString val gjCol = parse[JsonFeatureCollection](geojson) new { val geotiff = SinglebandGeoTiff("data/imgn36w100_13_3_3.tif") val multiLine = gjCol.getAllLines().toMultiLine val elevationGeoJson = ElevationOverlay(geotiff, multiLine) } } describe("Core spec") { val numInputLines = sharedData.multiLine.lines.size val numOutputLines = sharedData.elevationGeoJson.size val ratio = numOutputLines / numInputLines println(s"Ratio of input lines to output lines: $ratio : 1") it("returned geojson should contain the MEANV property") { val elevationFeatures = sharedData.elevationGeoJson val hasMeanV = elevationFeatures.forall(feat =>"MEANV")) assert(hasMeanV) } it("should produce a geojson file that can be put into") { val elevationFeatures = sharedData.elevationGeoJson val jsonFeatures = JsonFeatureCollection(elevationFeatures) val file = new File("geocolor_test.json") val bw = new BufferedWriter(new FileWriter(file)) bw.write(jsonFeatures.toJson.prettyPrint) bw.close() } it("Every feature should intersect the tile extent") { val elevationFeatures = sharedData.elevationGeoJson val rasterPoly = sharedData.geotiff.rasterExtent.extent.toPolygon() val doesIntersect = elevationFeatures.forall(feat => rasterPoly.intersects(feat.geom)) assert(doesIntersect) } } }
Source File: Logger.scala From shapenet-viewer with MIT License | 5 votes |
package import org.slf4j.LoggerFactory import import org.slf4j.bridge.SLF4JBridgeHandler import additive: Boolean = false) = { import ch.qos.logback.classic.spi.ILoggingEvent import ch.qos.logback.classic.Level import ch.qos.logback.classic.LoggerContext import ch.qos.logback.classic.encoder.PatternLayoutEncoder import ch.qos.logback.core.FileAppender // Make sure log directory is created val file: File = new File(filename) val parent: File = file.getParentFile if (parent != null) parent.mkdirs val loggerContext = LoggerFactory.getILoggerFactory().asInstanceOf[LoggerContext] val logger = loggerContext.getLogger(loggerName) // Setup pattern val patternLayoutEncoder = new PatternLayoutEncoder() patternLayoutEncoder.setPattern(pattern) patternLayoutEncoder.setContext(loggerContext) patternLayoutEncoder.start() // Setup appender val fileAppender = new FileAppender[ILoggingEvent]() fileAppender.setFile(filename) fileAppender.setEncoder(patternLayoutEncoder) fileAppender.setContext(loggerContext) fileAppender.start() // Attach appender to logger logger.addAppender(fileAppender) //logger.setLevel(Level.DEBUG) logger.setAdditive(additive) fileAppender.getName } def detachAppender(appenderName: String, loggerName: String = org.slf4j.Logger.ROOT_LOGGER_NAME): Unit = { import ch.qos.logback.classic.LoggerContext val loggerContext = LoggerFactory.getILoggerFactory().asInstanceOf[LoggerContext] val logger = loggerContext.getLogger(loggerName) logger.detachAppender(appenderName) } def getLogger(clazz: Class[_]): org.slf4j.Logger = { LoggerFactory.getLogger(clazz) } def getLogger(name: String): org.slf4j.Logger = { LoggerFactory.getLogger(name) } } trait Loggable { lazy val logger = Logger.getLogger(this.getClass) def startTrack(name: String): Unit = { logger.debug("Starting " + name) } def endTrack(name: String): Unit = { logger.debug("Finished " + name) } }
Source File: FullId.scala From shapenet-viewer with MIT License | 5 votes |
package import import scala.util.matching.Regex case class FullId(source: String, id: String) { lazy val fullid = source + "." + id } object FullId { val fullIdRegex = new Regex("([a-zA-z0-9_-]+)\\.([a-zA-z0-9_-]+)") def apply(fullid: String, defaultSource: Option[String] = None): FullId = { val dotIndex = fullid.indexOf('.') val (source, id) = if (fullid.startsWith("http://") || fullid.startsWith("https://")) { ("raw", fullid) } else if (fullid.startsWith("file://")) { ("raw", fullid.substring(7)) } else if (fullid.startsWith("/")) { ("raw", fullid) } else if (new File(fullid).isAbsolute) { ("raw", fullid) } else if (dotIndex > 0) { (fullid.substring(0, dotIndex), fullid.substring(dotIndex + 1)) } else { val s = defaultSource.getOrElse(if (fullid.contains("scene")) "wssScenes" else "3dw") (s, fullid) } new FullId(source,id) } def matches(id1: String, id2: String): Boolean = { val f1 = FullId(id1) val f2 = FullId(id2) f1 == f2 } def isFullId(s: String): Boolean = { fullIdRegex.pattern.matcher(s).matches() } }
Source File: ColorPalette.scala From shapenet-viewer with MIT License | 5 votes |
package import java.awt.Color import javax.imageio.ImageIO import import trait ColorPalette { def getColor(id: Int): Color def getColorCount(): Int = -1 def getColor(id: Int, alpha: Float): Color = { val c = getColor(id), alpha) } } class ColorBar(rgbColors: Array[Color]) extends ColorPalette { val nColors = rgbColors.length def getColor(r: Double): Color = getColor((r*(nColors-1)).toInt) def getColor(id: Int): Color = rgbColors(id % nColors) override def getColorCount() = nColors } object ColorBar { val texturesDir = Constants.ASSETS_DIR + "Textures" + File.separator lazy val coolwarmBar = ColorBar(texturesDir + "Cool2WarmBar.png") lazy val warmBar = ColorBar(texturesDir + "heatmap.png") def apply(filename: String): ColorBar = { val img = File(filename)) val rgb = Array.ofDim[Color](img.getWidth) for (x <- 0 until rgb.length) { rgb(x) = new Color(img.getRGB(x, 0)) } new ColorBar(rgb) } } object PhiColorPalette extends ColorPalette { def getColor(id: Int): Color = { val startColor = new Color(0x4FD067) val hsb = Color.RGBtoHSB(startColor.getRed, startColor.getGreen, startColor.getBlue, null) val invPhi = 1.0/Constants.phi var hue = hsb(0) + id*invPhi hue = hue - math.floor(hue) val c = Color.getHSBColor(hue.toFloat, 0.5f, 0.95f) // Switch blue and green for nice pretty colors new Color(c.getRed, c.getBlue, c.getGreen) } } object DefaultColorPalette extends ColorPalette { def getColor(id: Int): Color = { var h = (-3.88 * id) % (2*Math.PI) if (h<0) h += 2*Math.PI h /= 2*Math.PI val c = Color.getHSBColor(h.toFloat, (0.4 + 0.2 * Math.sin(0.42 * id)).toFloat, 0.5f) c } }
Source File: Summarizer.scala From berkeley-doc-summarizer with GNU General Public License v3.0 | 5 votes |
package edu.berkeley.nlp.summ import import edu.berkeley.nlp.entity.ConllDocReader import edu.berkeley.nlp.entity.coref.CorefDocAssembler import edu.berkeley.nlp.entity.coref.MentionPropertyComputer import edu.berkeley.nlp.entity.coref.NumberGenderComputer import edu.berkeley.nlp.entity.lang.EnglishCorefLanguagePack import edu.berkeley.nlp.entity.lang.Language import edu.berkeley.nlp.futile.LightRunner import edu.berkeley.nlp.futile.fig.basic.IOUtils import edu.berkeley.nlp.futile.util.Logger import import edu.berkeley.nlp.summ.preprocess.DiscourseDependencyParser import edu.berkeley.nlp.summ.preprocess.EDUSegmenter import object Summarizer { val numberGenderPath = "data/"; val segmenterPath = "models/edusegmenter.ser.gz" val discourseParserPath = "models/discoursedep.ser.gz" val modelPath = "models/summarizer-full.ser.gz" val inputDir = "" val outputDir = "" // Indicates that we shouldn't do any discourse preprocessing; this is only appropriate // for the sentence-extractive version of the system val noRst = false // Summary budget, in words. Set this to whatever you want it to. val budget = 50 def main(args: Array[String]) { LightRunner.initializeOutput(Summarizer.getClass()) LightRunner.populateScala(Summarizer.getClass(), args) Logger.logss("Loading model...") val model = IOUtils.readObjFile(modelPath).asInstanceOf[CompressiveAnaphoraSummarizer] Logger.logss("Model loaded!") val (segmenter, discourseParser) = if (noRst) { (None, None) } else { Logger.logss("Loading segmenter...") val tmpSegmenter = IOUtils.readObjFile(segmenterPath).asInstanceOf[EDUSegmenter] Logger.logss("Segmenter loaded!") Logger.logss("Loading discourse parser...") val tmpDiscourseParser = IOUtils.readObjFile(discourseParserPath).asInstanceOf[DiscourseDependencyParser] Logger.logss("Discourse parser loaded!") (Some(tmpSegmenter), Some(tmpDiscourseParser)) } val numberGenderComputer = NumberGenderComputer.readBergsmaLinData(numberGenderPath); val mpc = new MentionPropertyComputer(Some(numberGenderComputer)) val reader = new ConllDocReader(Language.ENGLISH) val assembler = new CorefDocAssembler(new EnglishCorefLanguagePack, true) val filesToSummarize = new File(inputDir).listFiles() for (file <- filesToSummarize) { val conllDoc = reader.readConllDocs(file.getAbsolutePath).head val corefDoc = assembler.createCorefDoc(conllDoc, mpc) val summDoc = SummDoc.makeSummDoc(conllDoc.docID, corefDoc, Seq()) val ex = if (noRst) { DiscourseDepExProcessed.makeTrivial(summDoc) } else { DiscourseDepExProcessed.makeWithEduAndSyntactic(summDoc, segmenter.get, discourseParser.get) } val summaryLines = model.summarize(ex, budget, true) val outWriter = IOUtils.openOutHard(outputDir + "/" + file.getName) for (summLine <- summaryLines) { outWriter.println(summLine) } outWriter.close } LightRunner.finalizeOutput() } }
Source File: EDUAligner.scala From berkeley-doc-summarizer with GNU General Public License v3.0 | 5 votes |
package import import scala.collection.mutable.ArrayBuffer import edu.berkeley.nlp.entity.coref.MentionPropertyComputer import edu.berkeley.nlp.entity.coref.NumberGenderComputer import edu.berkeley.nlp.futile.util.Logger object EDUAligner { def align(leafWords: Seq[Seq[String]], docSents: Seq[DepParse]) = { var currSentIdx = 0 var currWordIdx = 0 val leafSpans = new ArrayBuffer[((Int,Int),(Int,Int))] for (i <- 0 until leafWords.size) { val start = (currSentIdx, currWordIdx) val currLen = docSents(currSentIdx).size require(currWordIdx + leafWords(i).size <= currLen, currWordIdx + " " + leafWords(i).size + " " + currLen + "\nsent = " + docSents(currSentIdx).getWords.toSeq + ", leaf words = " + leafWords(i).toSeq) var leafWordIdx = 0 while (leafWordIdx < leafWords(i).size) { val docWord = docSents(currSentIdx).getWord(currWordIdx) val leafWord = leafWords(i)(leafWordIdx) val currWordsEqual = docWord == leafWord val currWordsEffectivelyEqual = docWord.contains("'") || docWord.contains("`") // Ignore some punc symbols because they're weird // Spurious period but last thing ended in period, so it was probably added by the tokenizer (like "Ltd. .") if (!currWordsEqual && docWord == "." && currWordIdx > 0 && docSents(currSentIdx).getWord(currWordIdx - 1).endsWith(".")) { currWordIdx += 1 if (currWordIdx == docSents(currSentIdx).size) { currSentIdx += 1 currWordIdx = 0 } // N.B. don't advance leafWordIdx } else { require(currWordsEqual || currWordsEffectivelyEqual, docWord + " :: " + leafWord + "\nsent = " + docSents(currSentIdx).getWords.toSeq + ", leaf words = " + leafWords(i).toSeq) currWordIdx += 1 if (currWordIdx == docSents(currSentIdx).size) { currSentIdx += 1 currWordIdx = 0 } leafWordIdx += 1 } } val end = if (currWordIdx == 0) { (currSentIdx - 1, docSents(currSentIdx - 1).size) } else { (currSentIdx, currWordIdx) } leafSpans += start -> end // if (currWordIdx == docSents(currSentIdx).size) { // currSentIdx += 1 // currWordIdx = 0 // } } leafSpans // } } def main(args: Array[String]) { val allTreeFiles = new File("data/RSTDiscourse/data/RSTtrees-WSJ-main-1.0/ALL-FILES/").listFiles.sortBy(_.getName).filter(_.getName.endsWith(".out.dis")) val allTrees = => DiscourseTreeReader.readDisFile(file.getAbsolutePath)) // val allSummDocs = new File("data/RSTDiscourse/data/RSTtrees-WSJ-main-1.0/ALL-FILES-PREPROC/").listFiles.sortBy(_.getName)) val numberGenderComputer = NumberGenderComputer.readBergsmaLinData("data/"); val mpc = new MentionPropertyComputer(Some(numberGenderComputer)) val allSummDocFiles = new File("data/RSTDiscourse/data/RSTtrees-WSJ-main-1.0/ALL-FILES-PROC2/").listFiles.sortBy(_.getName) val allSummDocs = => SummDoc.readSummDocNoAbstract(file.getAbsolutePath, mpc, filterSpuriousDocs = false, filterSpuriousSummSents = false)) val summNames = new File("data/RSTDiscourse/data/RSTtrees-WSJ-main-1.0/SUMM-SUBSET-PROC/") require(allTrees.size == allSummDocs.size) val badFiles = new ArrayBuffer[String] for (i <- 0 until allTrees.size) { require(allTreeFiles(i).getName.dropRight(4) == allSummDocFiles(i).getName, allTreeFiles(i).getName.dropRight(4) + " " + allSummDocFiles(i).getName) Logger.logss(allSummDocFiles(i).getName) try { align(allTrees(i).leafWords, allSummDocs(i).doc) } catch { case e: Exception => { Logger.logss(e) badFiles += allSummDocFiles(i).getName } } } Logger.logss(badFiles.size + " bad files: " + badFiles) val badSummDocs = (badFiles.toSet & summNames.toSet) Logger.logss(badSummDocs.size + " bad summarized files: " + badSummDocs.toSeq.sorted) } }
Source File: RougeFileMunger.scala From berkeley-doc-summarizer with GNU General Public License v3.0 | 5 votes |
package edu.berkeley.nlp.summ import import edu.berkeley.nlp.futile.fig.basic.IOUtils import scala.collection.JavaConverters._ object RougeFileMunger { val input = "data/RSTDiscourse/sample-outputs/" val output = "data/RSTDiscourse/sample-outputs-rouge/" val settingsPath = "data/RSTDiscourse/rouge-settings.xml" val detokenize = true def writeSummary(fileName: String, sents: Seq[String], outPath: String, keepFile: Boolean) { val outFile = new File(outPath) if (!keepFile) outFile.deleteOnExit() val outWriter = IOUtils.openOutHard(outFile) outWriter.println("<html>") outWriter.println("<head><title>" + fileName + "</title></head>") outWriter.println("<<body bgcolor=\"white\">") var counter = 1 for (sent <- sents) { outWriter.println("<a name=\"" + counter + "\">[" + counter + "]</a> <a href=\"#" + counter + "\" id=" + counter + ">" + sent + "</a>") counter += 1 } outWriter.println("</body>") outWriter.println("</html>") outWriter.close } def detokenizeSentence(line: String) = { line.replace(" ,", ",").replace(" .", ".").replace(" !", "!").replace(" ?", "?").replace(" :", ":").replace(" ;", ";"). replace("`` ", "``").replace(" ''", "''").replace(" '", "'").replace(" \"", "\"").replace("$ ", "$") } def processFiles(rootPath: String, subDir: String) = { val refFiles = new File(rootPath + "/" + subDir).listFiles for (refFile <- refFiles) { val rawName = refFile.getName() val name = rawName.substring(0, if (rawName.indexOf("_") == -1) rawName.size else rawName.indexOf("_")) val lines = IOUtils.readLinesHard(refFile.getAbsolutePath()) => if (detokenize) detokenizeSentence(sent) else sent) writeSummary(name, lines, output + "/" + subDir + "/" + refFile.getName, true) } } def writeSettings(settingsPath: String, dirPaths: String) { val outWriter = IOUtils.openOutHard(settingsPath) outWriter.println("""<ROUGE_EVAL version="1.55">""") val rawDirName = new File(dirPaths).getName() val docs = new File(dirPaths + "/reference").listFiles var idx = 0 for (doc <- docs) { val rawName = doc.getName().substring(0, doc.getName.indexOf("_")) outWriter.println("<EVAL ID=\"TASK_" + idx + "\">") outWriter.println("<MODEL-ROOT>" + rawDirName + "/reference</MODEL-ROOT>") outWriter.println("<PEER-ROOT>" + rawDirName + "/system</PEER-ROOT>") outWriter.println("<INPUT-FORMAT TYPE=\"SEE\"> </INPUT-FORMAT>") outWriter.println("<PEERS>") outWriter.println("<P ID=\"1\">" + rawName + "_system1.txt</P>") outWriter.println("</PEERS>") outWriter.println("<MODELS>") outWriter.println("<M ID=\"1\">" + rawName + "_reference1.txt</M>") outWriter.println("</MODELS>") outWriter.println("</EVAL>") idx += 1 } outWriter.println("</ROUGE_EVAL>") outWriter.close } def main(args: Array[String]) { processFiles(input, "reference") processFiles(input, "system") writeSettings(settingsPath, output) } }
package org.apache.spark.sql import org.apache.spark.sql.execution.datasources.hbase.Logging import import import org.apache.hadoop.hbase.client.Table import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{HBaseTestingUtility, TableName} import org.apache.spark.sql.execution.datasources.hbase.SparkHBaseConf import org.apache.spark.{SparkContext, SparkConf} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} class SHC extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll with Logging { implicit class StringToColumn(val sc: StringContext) { def $(args: Any*): ColumnName = { new ColumnName(sc.s(args: _*)) } } var spark: SparkSession = null var sc: SparkContext = null var sqlContext: SQLContext = null var df: DataFrame = null private[spark] var htu = new HBaseTestingUtility private[spark] def tableName = "table1" private[spark] def columnFamilies: Array[String] = Array.tabulate(9){ x=> s"cf$x"} var table: Table = null val conf = new SparkConf conf.set(SparkHBaseConf.testConf, "true") // private[spark] var columnFamilyStr = Bytes.toString(columnFamily) def defineCatalog(tName: String) = s"""{ |"table":{"namespace":"default", "name":"$tName"}, |"rowkey":"key", |"columns":{ |"col0":{"cf":"rowkey", "col":"key", "type":"string"}, |"col1":{"cf":"cf1", "col":"col1", "type":"boolean"}, |"col2":{"cf":"cf2", "col":"col2", "type":"double"}, |"col3":{"cf":"cf3", "col":"col3", "type":"float"}, |"col4":{"cf":"cf4", "col":"col4", "type":"int"}, |"col5":{"cf":"cf5", "col":"col5", "type":"bigint"}, |"col6":{"cf":"cf6", "col":"col6", "type":"smallint"}, |"col7":{"cf":"cf7", "col":"col7", "type":"string"}, |"col8":{"cf":"cf8", "col":"col8", "type":"tinyint"} |} |}""".stripMargin @deprecated(since = "04.12.2017(dd/mm/year)", message = "use `defineCatalog` instead") def catalog = defineCatalog(tableName) override def beforeAll() { val tempDir: File = Files.createTempDir tempDir.deleteOnExit htu.startMiniCluster SparkHBaseConf.conf = htu.getConfiguration logInfo(" - minicluster started") println(" - minicluster started") spark = SparkSession.builder() .master("local") .appName("HBaseTest") .config(conf) .getOrCreate() sqlContext = spark.sqlContext sc = spark.sparkContext } override def afterAll() { htu.shutdownMiniCluster() spark.stop() } def createTable(name: String, cfs: Array[String]) { val tName = Bytes.toBytes(name) val bcfs = try { htu.deleteTable(TableName.valueOf(tName)) } catch { case _ : Throwable => logInfo(" - no table " + name + " found") } htu.createMultiRegionTable(TableName.valueOf(tName), bcfs) } def createTable(name: Array[Byte], cfs: Array[Array[Byte]]) { try { htu.deleteTable(TableName.valueOf(name)) } catch { case _ : Throwable => logInfo(" - no table " + Bytes.toString(name) + " found") } htu.createMultiRegionTable(TableName.valueOf(name), cfs) } }
Example 173
package org.apache.spark.sql import import scala.collection.JavaConverters._ import import org.apache.hadoop.hbase.client._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{TableName, HBaseTestingUtility} import org.apache.spark.sql.execution.datasources.hbase.Logging import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} class HBaseTestSuite extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll with Logging { private[spark] var htu = HBaseTestingUtility.createLocalHTU() private[spark] var tableName: Array[Byte] = Bytes.toBytes("t1") private[spark] var columnFamily: Array[Byte] = Bytes.toBytes("cf0") private[spark] var columnFamilies: Array[Array[Byte]] = Array(Bytes.toBytes("cf0"), Bytes.toBytes("cf1"), Bytes.toBytes("cf2"), Bytes.toBytes("cf3"), Bytes.toBytes("cf4")) var table: Table = null // private[spark] var columnFamilyStr = Bytes.toString(columnFamily) override def beforeAll() { val tempDir: File = Files.createTempDir tempDir.deleteOnExit htu.cleanupTestDir htu.startMiniZKCluster htu.startMiniHBaseCluster(1, 4) logInfo(" - minicluster started") println(" - minicluster started") try { htu.deleteTable(TableName.valueOf(tableName)) //htu.createTable(TableName.valueOf(tableName), columnFamily, 2, Bytes.toBytes("abc"), Bytes.toBytes("xyz"), 2) } catch { case _ : Throwable => logInfo(" - no table " + Bytes.toString(tableName) + " found") } setupTable() } override def afterAll() { try { table.close() println("shutdown") htu.deleteTable(TableName.valueOf(tableName)) logInfo("shuting down minicluster") htu.shutdownMiniHBaseCluster htu.shutdownMiniZKCluster logInfo(" - minicluster shut down") htu.cleanupTestDir } catch { case _ : Throwable => logError("teardown error") } } def setupTable() { val config = htu.getConfiguration htu.createMultiRegionTable(TableName.valueOf(tableName), columnFamilies) println("create htable t1") val connection = ConnectionFactory.createConnection(config) val r = connection.getRegionLocator(TableName.valueOf("t1")) table = connection.getTable(TableName.valueOf("t1")) val regionLocations = r.getAllRegionLocations.asScala.toSeq println(s"$regionLocations size: ${regionLocations.size}") (0 until 100).foreach { x => var put = new Put(Bytes.toBytes(s"row$x")) (0 until 5).foreach { y => put.addColumn(columnFamilies(y), Bytes.toBytes(s"c$y"), Bytes.toBytes(s"value $x $y")) } table.put(put) } } }
Example 174
package im.tox.tox4j.impl.jni.codegen import{ File, PrintWriter } import import gnieh.pp.PrettyRenderer import im.tox.tox4j.impl.jni.codegen.cxx.Ast._ import im.tox.tox4j.impl.jni.codegen.cxx.{ Ast, Print } object NameConversions { def cxxVarName(name: String): String =, name) def cxxTypeName(name: String): String =, name) def javaVarName(name: String): String =, name) def javaTypeName(name: String): String =, name) } abstract class CodeGenerator extends App { def writeCode(path: String, sep: String = "\n\n")(code: Ast.TranslationUnit): Unit = { val renderer = new PrettyRenderer(130) val writer = new PrintWriter(new File("cpp/src", path)) try { writer.println( } finally { writer.close() } } def ifdef(header: String, guard: String, code: TranslationUnit*): TranslationUnit = { Include(header) +: Ifdef(guard) +: code.flatten :+ Endif } }
Example 175
package import import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, RelationProvider, SchemaRelationProvider} import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode} class DefaultSource extends RelationProvider with SchemaRelationProvider with CreatableRelationProvider { final val DEFAULT_CREDENTIAL_PATH = "/etc/gdata/credential.p12" override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]) = { createRelation(sqlContext, parameters, null) } private[spreadsheets] def pathToSheetNames(parameters: Map[String, String]): (String, String) = { val path = parameters.getOrElse("path", sys.error("'path' must be specified for spreadsheets.")) val elems = path.split('/') if (elems.length < 2) throw new Exception("'path' must be formed like '<spreadsheet>/<worksheet>'") (elems(0), elems(1)) } override def createRelation(sqlContext: SQLContext, parameters: Map[String, String], schema: StructType) = { val (spreadsheetName, worksheetName) = pathToSheetNames(parameters) val context = createSpreadsheetContext(parameters) createRelation(sqlContext, context, spreadsheetName, worksheetName, schema) } override def createRelation(sqlContext: SQLContext, mode: SaveMode, parameters: Map[String, String], data: DataFrame): BaseRelation = { val (spreadsheetName, worksheetName) = pathToSheetNames(parameters) implicit val context = createSpreadsheetContext(parameters) val spreadsheet = SparkSpreadsheetService.findSpreadsheet(spreadsheetName) if(!spreadsheet.isDefined) throw new RuntimeException(s"no such a spreadsheet: $spreadsheetName") spreadsheet.get.addWorksheet(worksheetName, data.schema, data.collect().toList, Util.toRowData) createRelation(sqlContext, context, spreadsheetName, worksheetName, data.schema) } private[spreadsheets] def createSpreadsheetContext(parameters: Map[String, String]) = { val serviceAccountIdOption = parameters.get("serviceAccountId") val credentialPath = parameters.getOrElse("credentialPath", DEFAULT_CREDENTIAL_PATH) SparkSpreadsheetService(serviceAccountIdOption, new File(credentialPath)) } private[spreadsheets] def createRelation(sqlContext: SQLContext, context: SparkSpreadsheetService.SparkSpreadsheetContext, spreadsheetName: String, worksheetName: String, schema: StructType): SpreadsheetRelation = if (schema == null) { createRelation(sqlContext, context, spreadsheetName, worksheetName, None) } else { createRelation(sqlContext, context, spreadsheetName, worksheetName, Some(schema)) } private[spreadsheets] def createRelation(sqlContext: SQLContext, context: SparkSpreadsheetService.SparkSpreadsheetContext, spreadsheetName: String, worksheetName: String, schema: Option[StructType]): SpreadsheetRelation = SpreadsheetRelation(context, spreadsheetName, worksheetName, schema)(sqlContext) }
Example 176
package import import org.scalatest.{BeforeAndAfter, FlatSpec} class SparkSpreadsheetServiceReadSuite extends FlatSpec with BeforeAndAfter { private val serviceAccountId = "" private val testCredentialPath = "src/test/resources/spark-google-spreadsheets-test-eb7b191d1e1d.p12" private val TEST_SPREADSHEET_NAME = "SpreadsheetSuite" private val TEST_SPREADSHEET_ID = "1H40ZeqXrMRxgHIi3XxmHwsPs2SgVuLUFbtaGcqCAk6c" private val context: SparkSpreadsheetService.SparkSpreadsheetContext = SparkSpreadsheetService.SparkSpreadsheetContext(Some(serviceAccountId), new File(testCredentialPath)) private val spreadsheet: SparkSpreadsheetService.SparkSpreadsheet = context.findSpreadsheet(TEST_SPREADSHEET_ID) behavior of "A Spreadsheet" it should "have a name" in { assert( == TEST_SPREADSHEET_NAME) } behavior of "A worksheet" it should "be None when a worksheet is missing" in { assert(spreadsheet.findWorksheet("foo").isEmpty) } it should "be retrieved when the worksheet exists" in { val worksheet = spreadsheet.findWorksheet("case2") assert(worksheet.isDefined) assert( == "case2") assert(worksheet.get.headers == List("id", "firstname", "lastname", "email", "country", "ipaddress")) val firstRow = worksheet.get.rows(0) assert(firstRow == Map( "id" -> "1", "firstname" -> "Annie", "lastname" -> "Willis", "email" -> "[email protected]", "country" -> "Burundi", "ipaddress" -> "")) } }
Example 177
package import import java.nio.charset.StandardCharsets import import org.apache.spark.SparkFunSuite import import org.apache.spark.util.SparkUtil class LibFFMRelationSuite extends SparkFunSuite with MLlibTestSparkContext { // Path for dataset var path: String = _ override def beforeAll(): Unit = { super.beforeAll() val lines0 = """ |1 0:1:1.0 1:3:2.0 2:5:3.0 |0 """.stripMargin val lines1 = """ |0 0:2:4.0 1:4:5.0 2:6:6.0 """.stripMargin val dir = SparkUtil.createTempDir() val succ = new File(dir, "_SUCCESS") val file0 = new File(dir, "part-00000") val file1 = new File(dir, "part-00001") Files.write("", succ, StandardCharsets.UTF_8) Files.write(lines0, file0, StandardCharsets.UTF_8) Files.write(lines1, file1, StandardCharsets.UTF_8) path = dir.getPath } override def afterAll(): Unit = { try { val prefix = "C:\\Users\\fitzwang\\AppData\\Local\\Temp\\" if (path.startsWith(prefix)) { SparkUtil.deleteRecursively(new File(path)) } } finally { super.afterAll() } } test("ffmIO"){ val df ="libffm").load(path) val metadata = df.schema(1).metadata val fieldSet = MetaSummary.getFieldSet(metadata) println(fieldSet.mkString("[", ",", "]")) val keyFieldMap = MetaSummary.getKeyFieldMap(metadata) println(keyFieldMap.mkString("[", ",", "]")) df.write.format("libffm").save("temp.libffm") } test("read_ffm"){ val df ="libffm").load(path) val metadata = df.schema(1).metadata val fieldSet = MetaSummary.getFieldSet(metadata) println(fieldSet.mkString("[", ",", "]")) val keyFieldMap = MetaSummary.getKeyFieldMap(metadata) println(keyFieldMap.mkString("[", ",", "]")) } }
Example 178
package import import org.apache.spark.SparkContext import org.apache.spark.sql.types.UDTRegistration import org.apache.spark.sql.{SQLContext, SQLImplicits, SparkSession} import org.apache.spark.util.{SparkUtil, Utils} import org.scalatest.Suite trait MLlibTestSparkContext extends TempDirectory { self: Suite => @transient var spark: SparkSession = _ @transient var sc: SparkContext = _ @transient var checkpointDir: String = _ override def beforeAll() { super.beforeAll() SparkUtil.UDTRegister("org.apache.spark.linalg.Vector", "org.apache.spark.linalg.VectorUDT") SparkUtil.UDTRegister("org.apache.spark.linalg.DenseVector", "org.apache.spark.linalg.VectorUDT") SparkUtil.UDTRegister("org.apache.spark.linalg.SparseVector", "org.apache.spark.linalg.VectorUDT") SparkUtil.UDTRegister("org.apache.spark.linalg.Matrix", "org.apache.spark.linalg.MatrixUDT") SparkUtil.UDTRegister("org.apache.spark.linalg.DenseMatrix", "org.apache.spark.linalg.MatrixUDT") SparkUtil.UDTRegister("org.apache.spark.linalg.SparseMatrix", "org.apache.spark.linalg.MatrixUDT") spark = SparkSession.builder .master("local[2]") .appName("MLlibUnitTest") .getOrCreate() sc = spark.sparkContext checkpointDir = SparkUtil.createDirectory(tempDir.getCanonicalPath, "checkpoints").toString sc.setCheckpointDir(checkpointDir) } override def afterAll() { try { SparkUtil.deleteRecursively(new File(checkpointDir)) SparkSession.clearActiveSession() if (spark != null) { spark.stop() } spark = null } finally { super.afterAll() } } /** * A helper object for importing SQL implicits. * * Note that the alternative of importing `spark.implicits._` is not possible here. * This is because we create the `SQLContext` immediately before the first test is run, * but the implicits import is needed in the constructor. */ protected object testImplicits extends SQLImplicits { protected override def _sqlContext: SQLContext = self.spark.sqlContext } }
Example 179
package import import org.scalatest.{BeforeAndAfterAll, Suite} import org.apache.spark.util.SparkUtil /** * Trait that creates a temporary directory before all tests and deletes it after all. */ trait TempDirectory extends BeforeAndAfterAll { self: Suite => private var _tempDir: File = _ /** * Returns the temporary directory as a `File` instance. */ protected def tempDir: File = _tempDir override def beforeAll(): Unit = { super.beforeAll() _tempDir = SparkUtil.createTempDir(namePrefix = this.getClass.getName) } override def afterAll(): Unit = { try { SparkUtil.deleteRecursively(_tempDir) } finally { super.afterAll() } } }
Example 180
package import{File, IOException} import org.dmg.pmml.PMML import org.scalatest.Suite import org.apache.spark.SparkContext import trait PMMLReadWriteTest extends TempDirectory { self: Suite => /** * Test PMML export. Requires exported model is small enough to be loaded locally. * Checks that the model can be exported and the result is valid PMML, but does not check * the specific contents of the model. */ def testPMMLWrite[T <: Params with GeneralMLWritable](sc: SparkContext, instance: T, checkModelData: PMML => Unit): Unit = { val uid = instance.uid val subdirName = Identifiable.randomUID("pmml-") val subdir = new File(tempDir, subdirName) val path = new File(subdir, uid).getPath instance.write.format("pmml").save(path) intercept[IOException] { instance.write.format("pmml").save(path) } instance.write.format("pmml").overwrite().save(path) val pmmlStr = sc.textFile(path).collect.mkString("\n") val pmmlModel = PMMLUtils.loadFromString(pmmlStr) assert(pmmlModel.getHeader.getApplication.getName.startsWith("Apache Spark")) checkModelData(pmmlModel) } }
Example 181
package zio.config.typesafe import import java.lang.{ Boolean => JBoolean } import com.typesafe.config._ import zio.config.PropertyTree.{ Leaf, _ } import zio.config.{ ConfigSource, _ } import zio.{ IO, Task, ZIO } import scala.collection.JavaConverters._ import scala.util.{ Failure, Success, Try } object TypesafeConfigSource { def fromDefaultLoader: Either[String, ConfigSource] = fromTypesafeConfig(ConfigFactory.load.resolve) def fromHoconFile[A]( file: File ): Task[ConfigSource] = IO.effect(ConfigFactory.parseFile(file).resolve) .flatMap(typesafeConfig => { ZIO .fromEither(fromTypesafeConfig(typesafeConfig)) .mapError(str => new RuntimeException(str)) }) def fromHoconString( input: String ): Either[String, zio.config.ConfigSource] = fromTypesafeConfig( ConfigFactory.parseString(input).resolve ) def fromTypesafeConfig( input: => com.typesafe.config.Config ): Either[String, ConfigSource] = Try { input } match { case Failure(exception) => Left(exception.getMessage) case Success(value) => getPropertyTree(value) match { case Left(value) => Left(value) case Right(value) => Right(ConfigSource.fromPropertyTree(value, "hocon", LeafForSequence.Invalid)) } } private[config] def getPropertyTree( input: com.typesafe.config.Config ): Either[String, PropertyTree[String, String]] = { def loopBoolean(value: Boolean) = Leaf(value.toString) def loopNumber(value: Number) = Leaf(value.toString) val loopNull = PropertyTree.empty def loopString(value: String) = Leaf(value) def loopList(values: List[ConfigValue]) = Sequence( def loopConfig(config: ConfigObject) = Record( { case (key, value) => key -> loopAny(value) }.toMap) def loopAny(value: ConfigValue): PropertyTree[String, String] = value.valueType() match { case ConfigValueType.OBJECT => loopConfig(value.asInstanceOf[ConfigObject]) case ConfigValueType.LIST => loopList(value.asInstanceOf[ConfigList].asScala.toList) case ConfigValueType.BOOLEAN => loopBoolean(value.unwrapped().asInstanceOf[JBoolean]) case ConfigValueType.NUMBER => loopNumber(value.unwrapped().asInstanceOf[Number]) case ConfigValueType.NULL => loopNull case ConfigValueType.STRING => loopString(value.unwrapped().asInstanceOf[String]) } Try(loopConfig(input.root())) match { case Failure(t) => Left( "Unable to form the zio.config.PropertyTree from Hocon string." + " This may be due to the presence of explicit usage of nulls in hocon string. " + t.getMessage ) case Success(value) => Right(value) } } }
Example 182
package zio.config.typesafe import import com.typesafe.config.ConfigFactory import zio.config.Config import zio.{ Layer, Tag, ZIO } import zio.config.ConfigDescriptor object TypesafeConfig { def fromDefaultLoader[A]( configDescriptor: ConfigDescriptor[A] )(implicit tag: Tag[A]): Layer[Throwable, Config[A]] = fromTypesafeConfig(ConfigFactory.load.resolve, configDescriptor) def fromHoconFile[A]( file: File, configDescriptor: ConfigDescriptor[A] )(implicit tag: Tag[A]): Layer[Throwable, Config[A]] = fromTypesafeConfig(ConfigFactory.parseFile(file).resolve, configDescriptor) def fromHoconString[A]( str: String, configDescriptor: ConfigDescriptor[A] )(implicit tag: Tag[A]): Layer[Throwable, Config[A]] = fromTypesafeConfig(ConfigFactory.parseString(str).resolve, configDescriptor) def fromTypesafeConfig[A]( conf: => com.typesafe.config.Config, configDescriptor: ConfigDescriptor[A] )(implicit tag: Tag[A]): Layer[Throwable, Config[A]] = Config.fromConfigDescriptorM( ZIO .fromEither(TypesafeConfigSource.fromTypesafeConfig(conf)) .map(configDescriptor from _) .mapError(error => new RuntimeException(error)) ) }
Example 183
package de.gccc.jib import import import sbt._ object SbtLayerConfigurations { def generate( targetDirectory: File, classes: Seq[File], resourceDirectories: Seq[File], internalDependencies: Keys.Classpath, external: Keys.Classpath, extraMappings: Seq[(File, String)], specialResourceDirectory: File ): List[LayerConfiguration] = { val internalDependenciesLayer = { SbtJibHelper.mappingsConverter("internal", reproducibleDependencies(targetDirectory, internalDependencies)) } val externalDependenciesLayer = { SbtJibHelper.mappingsConverter("libs", MappingsHelper.fromClasspath(external.seq, "/app/libs")) } val resourcesLayer = { SbtJibHelper.mappingsConverter( "conf", resourceDirectories.flatMap(MappingsHelper.contentOf(_, "/app/resources", _.isFile)) ) } val specialResourcesLayer = { SbtJibHelper.mappingsConverter("resources", MappingsHelper.contentOf(specialResourceDirectory, "/app/resources", _.isFile)) } val extraLayer = if (extraMappings.nonEmpty) SbtJibHelper.mappingsConverter("extra", extraMappings.filter(_._1.isFile)) :: Nil else Nil val allClasses = classes // we only want class-files in our classes layer // FIXME: not just extensions checking? .flatMap(MappingsHelper.contentOf(_, "/app/classes", f => if (f.isFile) f.getName.endsWith(".class") else false)) val classesLayer = SbtJibHelper.mappingsConverter("classes", allClasses) // the ordering here is really important (extraLayer ::: List( externalDependenciesLayer, resourcesLayer, internalDependenciesLayer, specialResourcesLayer, classesLayer )).filterNot(lc => lc.getLayerEntries.isEmpty) } private def reproducibleDependencies(targetDirectory: File, internalDependencies: Keys.Classpath) = { val dependencies = val stageDirectory = targetDirectory / "jib" / "dependency-stage" IO.delete(stageDirectory) IO.createDirectory(stageDirectory) val stripper = new ZipStripper() dependencies.foreach { in => val fileName = in.getName val out = new File(stageDirectory, fileName) stripper.strip(in, out) } MappingsHelper.contentOf(stageDirectory, "/app/libs") } }
Example 184
package de.gccc.jib import import import private[jib] object SbtJibHelper { def mappingsConverter(name: String, mappings: Seq[(File, String)]): LayerConfiguration = { val layerConfiguration = LayerConfiguration.builder() mappings .filter(_._1.isFile) // fixme resolve all directory files .map { case (file, fullPathOnImage) => (file.toPath, fullPathOnImage) } .toList .sortBy(_._2) .foreach { case (sourceFile, pathOnImage) => layerConfiguration.addEntry(sourceFile, AbsoluteUnixPath.get(pathOnImage)) } } }
Example 185
package de.gccc.jib import import sbt._ import{ IO, PathFinder } import scala.language.postfixOps def fromClasspath(entries: Seq[Attributed[File]], target: String, includeArtifact: Artifact => Boolean, includeOnNoArtifact: Boolean = false): Seq[(File, String)] = entries.filter(attr => attr.get(sbt.Keys.artifact.key) map includeArtifact getOrElse includeOnNoArtifact).map { attribute => val file = file -> s"$target/${file.getName}" } }
Example 186
package zio.nio.core.channels import{ File, RandomAccessFile } import zio.nio.core.{ BaseSpec, Buffer } import zio.test.Assertion._ import zio.test._ import zio.{ Chunk, IO, ZIO } import object ScatterGatherChannelSpec extends BaseSpec { override def spec = suite("ScatterGatherChannelSpec")( testM("scattering read") { for { raf <- ZIO.effectTotal(new RandomAccessFile("nio-core/src/test/resources/scattering_read_test.txt", "r")) fileChannel = raf.getChannel readLine = (buffer: Buffer[Byte]) => for { _ <- buffer.flip array <- buffer.array text = array.takeWhile(_ != 10).map(_.toChar).mkString.trim } yield text buffs <- IO.collectAll(Seq(Buffer.byte(5), Buffer.byte(5))) channel = new FileChannel(fileChannel) _ <- channel.readBuffer(buffs) list <- IO.collectAll( _ <- channel.close } yield assert(list)(equalTo("Hello" :: "World" :: Nil)) }, testM("gathering write") { for { file <- ZIO.effect(new File("nio-core/src/test/resources/gathering_write_test.txt")) raf = new RandomAccessFile(file, "rw") fileChannel = raf.getChannel buffs <- IO.collectAll( Seq( Buffer.byte(Chunk.fromArray("Hello".getBytes)), Buffer.byte(Chunk.fromArray("World".getBytes)) ) ) channel = new FileChannel(fileChannel) _ <- channel.writeBuffer(buffs) _ <- channel.close result = Source.fromFile(file).getLines().toSeq _ = file.delete() } yield assert(result)(equalTo(Seq("HelloWorld"))) } ) }
Example 187
package zio.nio.channels import{ File, RandomAccessFile } import zio.nio.core.Buffer import zio.nio.BaseSpec import zio.test.Assertion._ import zio.test._ import zio.{ Chunk, IO, ZIO } import object ScatterGatherChannelSpec extends BaseSpec { override def spec = suite("ScatterGatherChannelSpec")( testM("scattering read") { for { raf <- ZIO.effectTotal(new RandomAccessFile("nio/src/test/resources/scattering_read_test.txt", "r")) fileChannel = raf.getChannel readLine = (buffer: Buffer[Byte]) => for { _ <- buffer.flip array <- buffer.array text = array.takeWhile(_ != 10).map(_.toChar).mkString.trim } yield text buffs <- IO.collectAll(Seq(Buffer.byte(5), Buffer.byte(5))) list <- FileChannel(fileChannel).use { channel => for { _ <- channel.readBuffer(buffs) list <- IO.collectAll( } yield list } } yield assert(list)(equalTo("Hello" :: "World" :: Nil)) }, testM("gathering write") { for { file <- ZIO.effect(new File("nio/src/test/resources/gathering_write_test.txt")) raf = new RandomAccessFile(file, "rw") fileChannel = raf.getChannel buffs <- IO.collectAll( Seq( Buffer.byte(Chunk.fromArray("Hello".getBytes)), Buffer.byte(Chunk.fromArray("World".getBytes)) ) ) _ <- FileChannel(fileChannel).use(_.writeBuffer(buffs).unit) result = Source.fromFile(file).getLines().toSeq _ = file.delete() } yield assert(result)(equalTo(Seq("HelloWorld"))) } ) }
Example 188
package apibuilder.sbt import import sbt.IO import scala.util.Try final case class GlobalConfig(profiles: Map[String, Profile] = Map.empty) extends AnyVal { override def toString: String = profiles.keys.mkString(", ") } final case class Profile(token: String) extends AnyVal object GlobalConfig { private val ProfileM = "^\\s*\\[\\s*(profile\\s+|)(\\w+)\\s*\\]\\s*$".r private val TokenM = "^\\s*token\\s*=\\s*(\\w+)$".r private[this] implicit final class Ext(val acc: List[(String, Option[Profile])]) extends AnyVal { def hasNotSeen(pn: String): Boolean = !acc.exists { case (pn0, _) => pn0 == pn } } def load(f: File): Either[Throwable, GlobalConfig] = Try { IO.reader(f) { r => GlobalConfig( IO.foldLines(r, List.empty[(String, Option[Profile])]) { case (acc, ProfileM(_, pn)) if acc.hasNotSeen(pn) => (pn -> None) :: acc case ((cpn, None) :: rest, TokenM(t)) => (cpn -> Some(Profile(t))) :: rest case (acc, _) => acc } .collect { case (profile, Some(config)) => profile -> config } .toMap ) } }.toEither }
Example 189
package apibuilder.sbt import{File, FileNotFoundException} import java.nio.file.{Path, PathMatcher} import io.circe.Decoder import io.circe.yaml.parser import sbt.IO final case class CLIConfig(organizationFor: Map[String, OrganizationConfig]) extends AnyVal final case class OrganizationConfig(applicationFor: Map[String, ApplicationConfig]) extends AnyVal final case class ApplicationConfig(version: String, generators: Seq[GeneratorConfig]) final case class GeneratorConfig(generator: String, maybeTargetPath: Option[Path], pathMatchers: Seq[PathMatcher]) object CLIConfig extends BaseDecoders { final def load(f: File): Either[ConfigException, CLIConfig] = if (!f.getParentFile.exists) Left(MissingParentDirectory(f)) else { try { IO.reader(f) { r => parser .parse(r) .left .map(pf => InvalidContent(pf.message)) .flatMap([CLIConfig] => InvalidContent(df.message))) } } catch { case _: FileNotFoundException => Left(MissingFile(f)) } } implicit final val cliConfigDecoder: Decoder[CLIConfig] = Decoder.instance { c => c.downField("code").as[Map[String, OrganizationConfig]].map(CLIConfig.apply) } implicit final val organizationConfigDecoder: Decoder[OrganizationConfig] = Decoder.instance { c =>[Map[String, ApplicationConfig]].map(OrganizationConfig.apply) } implicit final val applicationConfig: Decoder[ApplicationConfig] = Decoder.instance { c => for { version <- c.downField("version").as[String] generators <- c.downField("generators").as[Seq[GeneratorConfig]] } yield ApplicationConfig(version, generators) } implicit final val generatorConfigDecoder: Decoder[GeneratorConfig] = Decoder.instance { c => for { generator <- c.downField("generator").as[String] maybeTargetPath <- c.downField("target").as[Option[Path]] pathMatchers <- c.downField("files").as[Seq[PathMatcher]] } yield GeneratorConfig(generator, maybeTargetPath, pathMatchers) } }
Example 190
package fpgatidbits.hlstools import sys.process._ import // Collection of utilities for Vivado HLS object TidbitsHLSTools { // quick-and-dirty single file HLS synthesis def hlsToVerilog( inFile: String, outDir: String, synDir: String, projName: String, topFxnName: String, inclDirs: Seq[String] = Seq(), fpgaPart: String = "xc7z020clg400-1", nsClk: String = "5.0" ) = { // get path to hls_syn.tcl val synthScriptPath = getClass.getResource("/script/hls_syn.tcl").getPath // need to provide include dirs as a single string argument, parsing // done in tcl. note: dirs here should have no spaces! val inclDirString = inclDirs.mkString(" ") // call the actual synthesis script val cmdline = Seq( "vivado_hls", "-f", synthScriptPath, "-tclargs", projName, inFile, fpgaPart, nsClk, topFxnName, inclDirString ) val status = Process(cmdline, new File(synDir)) ! ProcessLogger(stdout append _+"\n", stderr append _+"\n") // copy results to outDir s"cp -a $synDir/$projName/sol1/impl/verilog/. $outDir/".!! } }
Example 191
import import import java.time.ZonedDateTime import java.time.ZoneOffset import import sbt._ """.stripMargin.trim def findFiles(dir: File): Seq[File] = { (dir ** "*.scala").get ++ (dir ** "*.java").get } def checkLicenseHeaders(log: Logger, srcDir: File): Unit = { val badFiles = findFiles(srcDir).filterNot(checkLicenseHeader) if (badFiles.nonEmpty) { badFiles.foreach { f => log.error(s"bad license header: $f") } sys.error(s"${badFiles.size} files with incorrect header, run formatLicenseHeaders to fix") } else {"all files have correct license header") } } def checkLicenseHeader(file: File): Boolean = { val lines = Source.fromFile(file, "UTF-8").getLines().toList checkLicenseHeader(lines) } def checkLicenseHeader(lines: List[String]): Boolean = { val header = lines.takeWhile(!_.startsWith("package ")).mkString(lineSeparator) header == apache2 } def formatLicenseHeaders(log: Logger, srcDir: File): Unit = { findFiles(srcDir).foreach { f => formatLicenseHeader(log, f) } } def formatLicenseHeader(log: Logger, file: File): Unit = { val lines = Source.fromFile(file, "UTF-8").getLines().toList if (!checkLicenseHeader(lines)) {"fixing license header: $file") writeLines(file, apache2 :: removeExistingHeader(lines)) } } def removeExistingHeader(lines: List[String]): List[String] = { val res = lines.dropWhile(!_.startsWith("package ")) if (res.isEmpty) lines else res } def writeLines(file: File, lines: List[String]): Unit = { val out = new PrintStream(file) try lines.foreach(out.println) finally out.close() } }
Example 192
package import import java.nio.file.Files import java.nio.file.Paths import akka.NotUsed import import import import import import import import import import com.typesafe.config.Config import com.typesafe.scalalogging.StrictLogging import javax.inject.Inject import javax.inject.Singleton import scala.concurrent.duration._ @Singleton class S3CopyService @Inject()( val config: Config, val registry: Registry, implicit val system: ActorSystem ) extends AbstractService with StrictLogging { private val dataDir = config.getString("") private implicit val mat = ActorMaterializer() private var killSwitch: KillSwitch = _ private val s3Config = config.getConfig("atlas.persistence.s3") private val cleanupTimeoutMs = s3Config.getDuration("cleanup-timeout").toMillis private val maxInactiveMs = s3Config.getDuration("max-inactive-duration").toMillis private val maxFileDurationMs = config.getDuration("atlas.persistence.local-file.max-duration").toMillis require( maxInactiveMs > maxFileDurationMs, "`max-inactive-duration` MUST be longer than `max-duration`, otherwise file may be renamed before normal write competes" ) override def startImpl(): Unit = {"Starting service") killSwitch = Source .tick(1.second, 5.seconds, NotUsed) .viaMat(KillSwitches.single)(Keep.right) .flatMapMerge(Int.MaxValue, _ => Source(FileUtil.listFiles(new File(dataDir)))) .toMat(new S3CopySink(s3Config, registry, system))(Keep.left) .run() } override def stopImpl(): Unit = {"Stopping service") waitForCleanup() if (killSwitch != null) killSwitch.shutdown() } private def waitForCleanup(): Unit = {"Waiting for cleanup") val start = System.currentTimeMillis while (hasMoreFiles) { if (System.currentTimeMillis() > start + cleanupTimeoutMs) { logger.error("Cleanup timeout") return } Thread.sleep(1000) }"Cleanup done") } private def hasMoreFiles: Boolean = { try { Streams.scope(Files.list(Paths.get(dataDir))) { dir => dir.anyMatch(f => Files.isRegularFile(f)) } } catch { case e: Exception => { logger.error(s"Error checking hasMoreFiles in $dataDir", e) true // Assuming there's more files on error to retry } } } }
Example 193
package import import java.nio.file.Files import import com.typesafe.scalalogging.StrictLogging import scala.jdk.StreamConverters._ object FileUtil extends StrictLogging { def delete(f: File): Unit = { try { Files.delete(f.toPath) logger.debug(s"deleted file $f") } catch { case e: Exception => logger.error(s"failed to delete path $f", e) } } def listFiles(f: File): List[File] = { try { Streams.scope(Files.list(f.toPath)) { dir => dir.toScala(List).map(_.toFile) } } catch { case e: Exception => logger.error(s"failed to list files for: $f", e) Nil } } def isTmpFile(f: File): Boolean = { f.getName.endsWith(RollingFileWriter.TmpFileSuffix) } }
Example 194
package import import java.nio.file.Files import java.nio.file.Paths import org.apache.avro.file.DataFileReader import org.apache.avro.specific.SpecificDatumReader // Read metadata for all avro files in given directory object AvroTest { def main(args: Array[String]): Unit = { val dir = args(0) Files .walk(Paths.get(dir)) .filter(path => Files.isRegularFile(path)) .forEach(p => readFile(p.toFile)) } private def readFile(file: File): Unit = { println(s"##### Reading file: $file") var count = 0 val userDatumReader = new SpecificDatumReader[AvroDatapoint](classOf[AvroDatapoint]) val dataFileReader = new DataFileReader[AvroDatapoint](file, userDatumReader) while (dataFileReader.hasNext) { count += 1 } println(s" blockCount = ${dataFileReader.getBlockCount}") println(s" blockSize = ${dataFileReader.getBlockSize}") println(s" numRecords = $count") dataFileReader.close() println } }
Example 195
package import import java.nio.file.Files import java.nio.file.Paths import import import org.apache.avro.file.DataFileReader import org.apache.avro.specific.SpecificDatumReader import org.scalatest.BeforeAndAfter import org.scalatest.BeforeAndAfterAll import org.scalatest.funsuite.AnyFunSuite import scala.collection.mutable.ListBuffer class RollingFileWriterSuite extends AnyFunSuite with BeforeAndAfter with BeforeAndAfterAll { private val outputDir = "./target/unitTestAvroOutput" private val registry = new NoopRegistry before { listFilesSorted(outputDir).foreach(_.delete()) // Clean up files if exits Files.createDirectories(Paths.get(outputDir)) } after { listFilesSorted(outputDir).foreach(_.delete()) Files.deleteIfExists(Paths.get(outputDir)) } // Write 3 datapoints, first 2 is written in file 1, rollover, and 3rd one is written in file 2 test("avro writer rollover by max records") { val rollingConf = RollingConfig(2, 12000, 12000) val hourStart = 3600000 val hourEnd = 7200000 val writer = new RollingFileWriter(s"$outputDir/prefix", rollingConf, hourStart, hourEnd, registry) writer.initialize() createData(hourStart, 0, 1, 2).foreach(writer.write) writer.write(Datapoint(Map.empty, hourEnd, 3)) // out of range, should be ignored writer.close() // Check num of files val files = listFilesSorted(outputDir) assert(files.size == 2) // Check file 1 records val file1 = files.head assert(file1.getName.endsWith(".0000-0001")) val dpArray1 = readAvro(file1) assert(dpArray1.size == 2) assert(dpArray1(0).getValue == 0) assert(dpArray1(0).getTags.get("node") == "0") assert(dpArray1(1).getValue == 1) assert(dpArray1(1).getTags.get("node") == "1") // Check file 2 records val file2 = files.last assert(file2.getName.endsWith(".0002-0002")) val dpArray2 = readAvro(file2) assert(dpArray2.size == 1) assert(dpArray2(0).getValue == 2) assert(dpArray2(0).getTags.get("node") == "2") } private def createData(startTime: Long, values: Double*): List[Datapoint] = { { case (v, i) => val tags = Map( "name" -> "cpu", "node" -> s"$i" ) Datapoint(tags, startTime + i * 1000, v, 60000) } } private def listFilesSorted(dir: String): List[File] = { val d = new File(dir) if (!d.exists()) { Nil } else { new File(dir).listFiles().filter(_.isFile).toList.sortBy(_.getName) } } private def readAvro(file: File): Array[AvroDatapoint] = { val userDatumReader = new SpecificDatumReader[AvroDatapoint](classOf[AvroDatapoint]) val dataFileReader = new DataFileReader[AvroDatapoint](file, userDatumReader) val dpListBuf = ListBuffer.empty[AvroDatapoint] try { while (dataFileReader.hasNext) { dpListBuf.addOne( } } finally { dataFileReader.close() } dpListBuf.toArray } }
Example 196
import sbt._ import import object Resources { def getJavaHome: File = { val javaHome = Option(System.getenv("JAVA_HOME")).map(_+"/jre"). orElse(Option(System.getProperty("java.home"))) javaHome match { case Some(str) => file(str) case None => throw new FileNotFoundException("$JAVA_HOME is undefined as well as the system property `java.home`." + "Setup a environment variable JAVA_HOME") } } def checkExists(file:File): File = { if(file.exists()) file else throw new FileNotFoundException(s"Can't find needed resource: $file") } }
Example 197
package com.eharmony.aloha.models.vw.jni.multilabel import{ByteArrayOutputStream, File, FileInputStream} import com.eharmony.aloha.ModelSerializationTestHelper import{Base64StringSource, ExternalSource, ModelSource} import org.apache.commons.codec.binary.Base64 import import org.junit.Assert._ import org.junit.Test import org.junit.runner.RunWith import org.junit.runners.BlockJUnit4ClassRunner import vowpalWabbit.learner.{VWActionScoresLearner, VWLearners} @RunWith(classOf[BlockJUnit4ClassRunner]) class VwSparseMultilabelPredictorTest extends ModelSerializationTestHelper { import VwSparseMultilabelPredictorTest._ @Test def testSerializability(): Unit = { val predictor = getPredictor(getModelSource(), 3) val ds = serializeDeserializeRoundTrip(predictor) assertEquals(predictor, ds) assertEquals(predictor.vwParams(), ds.vwParams()) assertNotNull(ds.vwModel) } @Test def testVwParameters(): Unit = { val numLabelsInTrainingSet = 3 val predictor = getPredictor(getModelSource(), numLabelsInTrainingSet) predictor.vwParams() match { case Data(vwBinFilePath, ringSize) => checkVwBinFile(vwBinFilePath) checkVwRingSize(numLabelsInTrainingSet, ringSize.toInt) case ps => fail(s"Unexpected VW parameters format. Found string: $ps") } } } object VwSparseMultilabelPredictorTest { private val Data = """\s*-i\s+(\S+)\s+--ring_size\s+(\d+)\s+--testonly\s+--quiet""".r private def getModelSource(): ModelSource = { val f = File.createTempFile("i_dont", "care") f.deleteOnExit() val learner = VWLearners.create[VWActionScoresLearner](s"--quiet --csoaa_ldf mc --csoaa_rank -f ${f.getCanonicalPath}") learner.close() val baos = new ByteArrayOutputStream() IOUtils.copy(new FileInputStream(f), baos) val src = Base64StringSource(Base64.encodeBase64URLSafeString(baos.toByteArray)) ExternalSource(src.localVfs) } private def getPredictor(modelSrc: ModelSource, numLabelsInTrainingSet: Int) = VwSparseMultilabelPredictor[Any](modelSrc, Nil, Nil, numLabelsInTrainingSet) private def checkVwBinFile(vwBinFilePath: String): Unit = { val vwBinFile = new File(vwBinFilePath) assertTrue("VW binary file should have been written to disk", vwBinFile.exists()) vwBinFile.deleteOnExit() } private def checkVwRingSize(numLabelsInTrainingSet: Int, ringSize: Int): Unit = { assertEquals( "vw --ring_size parameter is incorrect:", numLabelsInTrainingSet + VwSparseMultilabelPredictor.AddlVwRingSize, ringSize.toInt ) } }
Example 198
package com.eharmony.aloha.factory.avro import import org.apache.commons.{vfs => vfs1, vfs2} import{Vfs1, Vfs2} import com.eharmony.aloha.audit.impl.avro.Score import com.eharmony.aloha.factory.ModelFactory import org.apache.avro.generic.GenericRecord import scala.util.Try @deprecated(message = "Prefer StdAvroModelFactory.fromConfig(conf: FactoryConfig)", since = "4.0.1") def apply(modelDomainSchemaVfsUrl: String, modelCodomainRefInfoStr: String, imports: Seq[String] = Nil, classCacheDir: Option[File] = None, dereferenceAsOptional: Boolean = true, useVfs2: Boolean = true): Try[ModelFactory[GenericRecord, Score]] = { val vfs = url(modelDomainSchemaVfsUrl, useVfs2) vfs.flatMap { u => UrlConfig( u, modelCodomainRefInfoStr, imports, classCacheDir, dereferenceAsOptional )() } } private[this] def url(modelDomainSchemaVfsUrl: String, useVfs2: Boolean) = { val u = if (useVfs2) Try { Vfs2(vfs2.VFS.getManager.resolveFile(modelDomainSchemaVfsUrl)) } else Try { Vfs1(vfs1.VFS.getManager.resolveFile(modelDomainSchemaVfsUrl)) } FactoryConfig.wrapException(u) } }
Example 199
package com.eharmony.aloha.factory import import com.eharmony.aloha.factory.ex.AlohaFactoryException import import org.apache.commons.{vfs, vfs2} import spray.json.{JsObject, pimpString} import scala.util.{Failure, Try} def resolveFileContents(): Try[JsObject] } private[factory] case class Vfs2ImportedModelPlaceholder(fileDescriptor: String) extends ImportedModelPlaceholder { def resolveFileContents() = for { file <- Try { vfs2.VFS.getManager.resolveFile(fileDescriptor) } recoverWith { case f => Failure { new AlohaFactoryException(s"Couldn't resolve VFS2 file: $fileDescriptor", f) } } json <- Try { StringReadable.fromVfs2(file).parseJson.asJsObject } recoverWith { case f => Failure { new AlohaFactoryException(s"Couldn't get JSON for VFS2 file: $file", f) } } } yield json } private[factory] case class Vfs1ImportedModelPlaceholder(fileDescriptor: String) extends ImportedModelPlaceholder { def resolveFileContents() = for { file <- Try { vfs.VFS.getManager.resolveFile(fileDescriptor) } recoverWith { case f => Failure { new AlohaFactoryException(s"Couldn't resolve VFS1 file: $fileDescriptor", f) } } json <- Try { StringReadable.fromVfs1(file).parseJson.asJsObject } recoverWith { case f => Failure { new AlohaFactoryException(s"Couldn't get JSON for VFS1 file: $file", f) } } } yield json } private[factory] case class FileImportedModelPlaceholder(fileDescriptor: String) extends ImportedModelPlaceholder { def resolveFileContents() = for { file <- Try { new File(fileDescriptor) } recoverWith { case f => Failure { new AlohaFactoryException(s"Couldn't resolve file: $fileDescriptor", f) } } json <- Try { StringReadable.fromFile(file).parseJson.asJsObject } recoverWith { case f => Failure { new AlohaFactoryException(s"Couldn't get JSON for file: $file", f) } } } yield json }
Example 200
package import scala.language.higherKinds import{File, InputStream, Reader} import import org.apache.commons.{vfs => vfs1, vfs2} trait ContainerReadable[C[_]] { def fromString[A](s: String): C[A] def fromFile[A](f: File): C[A] def fromInputStream[A](is: InputStream): C[A] def fromUrl[A](u: URL): C[A] def fromReader[A](r: Reader): C[A] def fromVfs1[A](foVfs1: vfs1.FileObject): C[A] def fromVfs2[A](foVfs2: vfs2.FileObject): C[A] def fromResource[A](s: String): C[A] def fromClasspathResource[A](s: String): C[A] }