java.io.File Scala Examples
The following examples show how to use java.io.File.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: KafkaTopicInfo.scala From matcher with MIT License | 7 votes |
package tools import java.io.File import akka.actor.ActorSystem import com.typesafe.config.ConfigFactory import com.wavesplatform.dex.queue.KafkaMatcherQueue.eventDeserializer import com.wavesplatform.dex.queue.{QueueEvent, QueueEventWithMeta} import com.wavesplatform.dex.settings.toConfigOps import org.apache.kafka.clients.consumer.KafkaConsumer import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.serialization.StringDeserializer import scala.collection.JavaConverters._ import scala.concurrent.duration.DurationInt object KafkaTopicInfo extends App { implicit val system: ActorSystem = ActorSystem() val configFile = new File(args(0)) val topic = args(1) val from = args(2).toLong val max = args(3).toInt println(s"""configFile: ${configFile.getAbsolutePath} |topic: $topic |from: $from |max: $max""".stripMargin) val requestTimeout = java.time.Duration.ofNanos(5.seconds.toNanos) val config = ConfigFactory .parseString("""waves.dex.events-queue.kafka.consumer.client { | client.id = "kafka-topics-info" | enable.auto.commit = false | auto.offset.reset = earliest |} | |""".stripMargin) .withFallback { ConfigFactory .parseFile(configFile) .withFallback(ConfigFactory.defaultApplication()) .withFallback(ConfigFactory.defaultReference()) .resolve() .getConfig("waves.dex.events-queue.kafka") } val consumer = new KafkaConsumer[String, QueueEvent]( config.getConfig("waves.dex.events-queue.kafka.consumer.client").toProperties, new StringDeserializer, eventDeserializer ) try { val topicPartition = new TopicPartition(topic, 0) val topicPartitions = java.util.Collections.singletonList(topicPartition) consumer.assign(topicPartitions) { val r = consumer.partitionsFor(topic, requestTimeout) println(s"Partitions:\n${r.asScala.mkString("\n")}") } { val r = consumer.endOffsets(topicPartitions, requestTimeout) println(s"End offsets for $topicPartition: ${r.asScala.mkString(", ")}") } consumer.seek(topicPartition, from) val pollDuriation = java.time.Duration.ofNanos(1.seconds.toNanos) val lastOffset = from + max var continue = true while (continue) { println(s"Reading from Kafka") val xs = consumer.poll(pollDuriation).asScala.toVector xs.foreach { msg => println(QueueEventWithMeta(msg.offset(), msg.timestamp(), msg.value())) } xs.lastOption.foreach { x => if (x.offset() == lastOffset) continue = false } } } finally { consumer.close() } }
Example 2
Source File: DataFrameExample.scala From drizzle-spark with Apache License 2.0 | 7 votes |
// scalastyle:off println package org.apache.spark.examples.ml import java.io.File import scopt.OptionParser import org.apache.spark.examples.mllib.AbstractParams import org.apache.spark.ml.linalg.Vector import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer import org.apache.spark.sql.{DataFrame, Row, SparkSession} import org.apache.spark.util.Utils object DataFrameExample { case class Params(input: String = "data/mllib/sample_libsvm_data.txt") extends AbstractParams[Params] def main(args: Array[String]) { val defaultParams = Params() val parser = new OptionParser[Params]("DataFrameExample") { head("DataFrameExample: an example app using DataFrame for ML.") opt[String]("input") .text(s"input path to dataframe") .action((x, c) => c.copy(input = x)) checkConfig { params => success } } parser.parse(args, defaultParams) match { case Some(params) => run(params) case _ => sys.exit(1) } } def run(params: Params): Unit = { val spark = SparkSession .builder .appName(s"DataFrameExample with $params") .getOrCreate() // Load input data println(s"Loading LIBSVM file with UDT from ${params.input}.") val df: DataFrame = spark.read.format("libsvm").load(params.input).cache() println("Schema from LIBSVM:") df.printSchema() println(s"Loaded training data as a DataFrame with ${df.count()} records.") // Show statistical summary of labels. val labelSummary = df.describe("label") labelSummary.show() // Convert features column to an RDD of vectors. val features = df.select("features").rdd.map { case Row(v: Vector) => v } val featureSummary = features.aggregate(new MultivariateOnlineSummarizer())( (summary, feat) => summary.add(Vectors.fromML(feat)), (sum1, sum2) => sum1.merge(sum2)) println(s"Selected features column with average values:\n ${featureSummary.mean.toString}") // Save the records in a parquet file. val tmpDir = Utils.createTempDir() val outputDir = new File(tmpDir, "dataframe").toString println(s"Saving to $outputDir as Parquet file.") df.write.parquet(outputDir) // Load the records back. println(s"Loading Parquet file with UDT from $outputDir.") val newDF = spark.read.parquet(outputDir) println(s"Schema from Parquet:") newDF.printSchema() spark.stop() } } // scalastyle:on println
Example 3
Source File: CommandUtils.scala From drizzle-spark with Apache License 2.0 | 7 votes |
package org.apache.spark.deploy.worker import java.io.{File, FileOutputStream, InputStream, IOException} import scala.collection.JavaConverters._ import scala.collection.Map import org.apache.spark.SecurityManager import org.apache.spark.deploy.Command import org.apache.spark.internal.Logging import org.apache.spark.launcher.WorkerCommandBuilder import org.apache.spark.util.Utils def redirectStream(in: InputStream, file: File) { val out = new FileOutputStream(file, true) // TODO: It would be nice to add a shutdown hook here that explains why the output is // terminating. Otherwise if the worker dies the executor logs will silently stop. new Thread("redirect output to " + file) { override def run() { try { Utils.copyStream(in, out, true) } catch { case e: IOException => logInfo("Redirection to " + file + " closed: " + e.getMessage) } } }.start() } }
Example 4
Source File: KeyUtils.scala From daml with Apache License 2.0 | 6 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.jwt import java.io.{File, FileInputStream} import java.nio.charset.StandardCharsets import java.nio.file.Files import java.security.cert.CertificateFactory import java.security.interfaces.{ECPublicKey, RSAPrivateKey, RSAPublicKey} import java.security.spec.PKCS8EncodedKeySpec import java.security.KeyFactory import com.daml.lf.data.TryOps.Bracket.bracket import scalaz.Show import scalaz.syntax.show._ import scala.util.Try object KeyUtils { final case class Error(what: Symbol, message: String) object Error { implicit val showInstance: Show[Error] = Show.shows(e => s"KeyUtils.Error: ${e.what}, ${e.message}") } private val mimeCharSet = StandardCharsets.ISO_8859_1 def generateJwks(keys: Map[String, RSAPublicKey]): String = { def generateKeyEntry(keyId: String, key: RSAPublicKey): String = s""" { | "kid": "$keyId", | "kty": "RSA", | "alg": "RS256", | "use": "sig", | "e": "${java.util.Base64.getUrlEncoder .encodeToString(key.getPublicExponent.toByteArray)}", | "n": "${java.util.Base64.getUrlEncoder.encodeToString(key.getModulus.toByteArray)}" | }""".stripMargin s""" |{ | "keys": [ |${keys.toList.map { case (keyId, key) => generateKeyEntry(keyId, key) }.mkString(",\n")} | ] |} """.stripMargin } }
Example 5
Source File: RUtils.scala From drizzle-spark with Apache License 2.0 | 6 votes |
package org.apache.spark.api.r import java.io.File import java.util.Arrays import org.apache.spark.{SparkEnv, SparkException} private[spark] object RUtils { // Local path where R binary packages built from R source code contained in the spark // packages specified with "--packages" or "--jars" command line option reside. var rPackages: Option[String] = None def isRInstalled: Boolean = { try { val builder = new ProcessBuilder(Arrays.asList("R", "--version")) builder.start().waitFor() == 0 } catch { case e: Exception => false } } }
Example 6
Source File: package.scala From mantis with Apache License 2.0 | 6 votes |
package io.iohk.ethereum import java.io.{File, PrintWriter} import java.net.{Inet6Address, InetAddress} import java.security.SecureRandom import io.iohk.ethereum.crypto._ import org.spongycastle.crypto.AsymmetricCipherKeyPair import org.spongycastle.crypto.params.ECPublicKeyParameters import org.spongycastle.math.ec.ECPoint import org.spongycastle.util.encoders.Hex import scala.io.Source package object network { val ProtocolVersion = 4 implicit class ECPublicKeyParametersNodeId(val pubKey: ECPublicKeyParameters) extends AnyVal { def toNodeId: Array[Byte] = pubKey.asInstanceOf[ECPublicKeyParameters].getQ .getEncoded(false) .drop(1) // drop type info } def publicKeyFromNodeId(nodeId: String): ECPoint = { val bytes = ECDSASignature.uncompressedIndicator +: Hex.decode(nodeId) curve.getCurve.decodePoint(bytes) } def loadAsymmetricCipherKeyPair(filePath: String, secureRandom: SecureRandom): AsymmetricCipherKeyPair = { val file = new File(filePath) if(!file.exists()){ val keysValuePair = generateKeyPair(secureRandom) //Write keys to file val (priv, _) = keyPairToByteArrays(keysValuePair) require(file.getParentFile.exists() || file.getParentFile.mkdirs(), "Key's file parent directory creation failed") val writer = new PrintWriter(filePath) try { writer.write(Hex.toHexString(priv)) } finally { writer.close() } keysValuePair } else { val reader = Source.fromFile(filePath) try { val privHex = reader.mkString keyPairFromPrvKey(Hex.decode(privHex)) } finally { reader.close() } } } def getHostName(address: InetAddress): String = { val hostName = address.getHostAddress address match { case _: Inet6Address => s"[$hostName]" case _ => hostName } } }
Example 7
import java.io.{File, PrintWriter} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.clustering.GaussianMixture import org.apache.spark.sql.functions._ def computeGaussianMixtureModel( pathToTextFile: String, quantity: Int) { case class Point(x: Double, y: Double) def save(f: File)(func: PrintWriter => Unit) { val p = new PrintWriter(f) try { func(p) } finally { p.close() } } val filename = pathToTextFile.split("\\.")(0) val outputFilename = s"$filename-GMM-k${quantity}.tsv" val points = sc .textFile(pathToTextFile) .map { line => line.trim.split("\\s+") } .map { row => Point(row(0).toDouble, row(1).toDouble) } val features = points .map { p => Vectors.dense(p.x, p.y) } features.cache() val gmm = new GaussianMixture() .setK(quantity) .run(features) val predictions = features .map { f => (f(0), f(1), gmm.predict(f) + 1) } .collect save(new File(outputFilename)) { println(s"OUTPUT TO: ${outputFilename}") f => predictions.foreach{ case (x, y, ccid) => f.println(s"${x}\t${y}\t${ccid}") } } }
Example 8
Source File: PointCloudRelation.scala From geotrellis-pointcloud with Apache License 2.0 | 5 votes |
package geotrellis.pointcloud.spark.datasource import geotrellis.pointcloud.spark.store.hadoop._ import geotrellis.pointcloud.spark.store.hadoop.HadoopPointCloudRDD.{Options => HadoopOptions} import geotrellis.pointcloud.util.Filesystem import geotrellis.proj4.CRS import geotrellis.store.hadoop.util.HdfsUtils import geotrellis.vector.Extent import cats.implicits._ import io.pdal._ import io.circe.syntax._ import org.apache.hadoop.fs.Path import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.sources.{BaseRelation, TableScan} import org.apache.spark.sql.types._ import org.apache.spark.sql.{Row, SQLContext} import java.io.File import scala.collection.JavaConverters._ // This class has to be serializable since it is shipped over the network. class PointCloudRelation( val sqlContext: SQLContext, path: String, options: HadoopOptions ) extends BaseRelation with TableScan with Serializable { @transient implicit lazy val sc: SparkContext = sqlContext.sparkContext // TODO: switch between HadoopPointCloudRDD and S3PointcCloudRDD lazy val isS3: Boolean = path.startsWith("s3") override def schema: StructType = { lazy val (local, fixedPath) = if(path.startsWith("s3") || path.startsWith("hdfs")) { val tmpDir = Filesystem.createDirectory() val remotePath = new Path(path) // copy remote file into local tmp dir val localPath = new File(tmpDir, remotePath.getName) HdfsUtils.copyPath(remotePath, new Path(s"file:///${localPath.getAbsolutePath}"), sc.hadoopConfiguration) (true, localPath.toString) } else (false, path) val localPipeline = options.pipeline .hcursor .downField("pipeline").downArray .downField("filename").withFocus(_ => fixedPath.asJson) .top.fold(options.pipeline)(identity) val pl = Pipeline(localPipeline.noSpaces) if (pl.validate()) pl.execute() val pointCloud = try { pl.getPointViews().next().getPointCloud(0) } finally { pl.close() if(local) println(new File(fixedPath).delete) } val rdd = HadoopPointCloudRDD(new Path(path), options) val md: (Option[Extent], Option[CRS]) = rdd .map { case (header, _) => (header.projectedExtent3D.map(_.extent3d.toExtent), header.crs) } .reduce { case ((e1, c), (e2, _)) => ((e1, e2).mapN(_ combine _), c) } val metadata = new MetadataBuilder().putString("metadata", md.asJson.noSpaces).build pointCloud.deriveSchema(metadata) } override def buildScan(): RDD[Row] = { val rdd = HadoopPointCloudRDD(new Path(path), options) rdd.flatMap { _._2.flatMap { pc => pc.readAll.toList.map { k => Row(k: _*) } } } } }
Example 9
Source File: S3PointCloudInputFormat.scala From geotrellis-pointcloud with Apache License 2.0 | 5 votes |
package geotrellis.pointcloud.spark.store.s3 import geotrellis.spark.store.s3._ import geotrellis.pointcloud.spark.store.hadoop.formats._ import geotrellis.pointcloud.util.Filesystem import io.pdal._ import io.circe.Json import io.circe.syntax._ import cats.syntax.either._ import org.apache.hadoop.mapreduce.{InputSplit, TaskAttemptContext} import org.apache.commons.io.FileUtils import java.io.{File, InputStream} import java.net.URI import scala.collection.JavaConverters._ mode match { case "s3" => new S3URIRecordReader[S3PointCloudHeader, List[PointCloud]](s3Client) { def read(key: String, uri: URI): (S3PointCloudHeader, List[PointCloud]) = { val s3Pipeline = pipeline .hcursor .downField("pipeline").downArray .downField("filename").withFocus(_ => uri.toString.asJson) .top.fold(pipeline)(identity) executePipeline(context)(key, s3Pipeline) } } case _ => val tmpDir = { val dir = PointCloudInputFormat.getTmpDir(context) if (dir == null) Filesystem.createDirectory() else Filesystem.createDirectory(dir) } new S3StreamRecordReader[S3PointCloudHeader, List[PointCloud]](s3Client) { def read(key: String, is: InputStream): (S3PointCloudHeader, List[PointCloud]) = { // copy remote file into local tmp dir tmpDir.mkdirs() // to be sure that dirs created val localPath = new File(tmpDir, key.replace("/", "_")) FileUtils.copyInputStreamToFile(is, localPath) is.close() // use local filename path if it's present in json val localPipeline = pipeline .hcursor .downField("pipeline").downArray .downField("filename").withFocus(_ => localPath.getAbsolutePath.asJson) .top.fold(pipeline)(identity) try executePipeline(context)(key, localPipeline) finally { localPath.delete() tmpDir.delete() } } } } } }
Example 10
Source File: PointCloudTestEnvironment.scala From geotrellis-pointcloud with Apache License 2.0 | 5 votes |
package geotrellis.pointcloud.spark import geotrellis.spark.testkit._ import org.apache.hadoop.fs.Path import org.scalatest.Suite import java.io.File trait PointCloudTestEnvironment extends TestEnvironment { self: Suite => val testResources = new File("src/test/resources") val lasPath = new Path(s"file://${testResources.getAbsolutePath}/las") val multipleLasPath = new Path(s"file://${testResources.getAbsolutePath}/las/files") def setS3Credentials: Unit = { try { val conf = ssc.sparkContext.hadoopConfiguration conf.set("fs.s3.impl", classOf[org.apache.hadoop.fs.s3a.S3AFileSystem].getName) conf.set("fs.s3a.aws.credentials.provider", classOf[com.amazonaws.auth.DefaultAWSCredentialsProviderChain].getName) conf.set("fs.s3a.endpoint", "s3.eu-west-2.amazonaws.com") } catch { case e: Throwable => println(e.getMessage) } } }
Example 11
Source File: CreateSaltedTable.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.hadooparchitecturebook.taxi360.setup.hbase import java.io.File import org.apache.commons.lang.StringUtils import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName} import org.apache.hadoop.hbase.client.ConnectionFactory import org.apache.hadoop.hbase.io.compress.Compression import org.apache.hadoop.hbase.regionserver.{BloomType, ConstantSizeRegionSplitPolicy} import org.apache.hadoop.hbase.util.Bytes import scala.collection.mutable object CreateSaltedTable { def main(args:Array[String]): Unit = { if (args.length == 0) { println("<tableName> <columnFamily> <regionCount> <numOfSalts> <hbaseConfigFolder>") } val tableName = args(0) val columnFamilyName = args(1) val regionCount = args(2).toInt val numOfSalts = args(3).toInt val hbaseConfigFolder = args(4) val conf = HBaseConfiguration.create() conf.addResource(new File(hbaseConfigFolder + "hbase-site.xml").toURI.toURL) val connection = ConnectionFactory.createConnection(conf) val admin = connection.getAdmin val tableDescriptor = new HTableDescriptor(TableName.valueOf(tableName)) val columnDescriptor = new HColumnDescriptor(columnFamilyName) columnDescriptor.setCompressionType(Compression.Algorithm.SNAPPY) columnDescriptor.setBlocksize(64 * 1024) columnDescriptor.setBloomFilterType(BloomType.ROW) tableDescriptor.addFamily(columnDescriptor) tableDescriptor.setMaxFileSize(Long.MaxValue) tableDescriptor.setRegionSplitPolicyClassName(classOf[ConstantSizeRegionSplitPolicy].getName) val splitKeys = new mutable.MutableList[Array[Byte]] for (i <- 0 to regionCount) { val regionSplitStr = StringUtils.leftPad((i*(numOfSalts/regionCount)).toString, 4, "0") splitKeys += Bytes.toBytes(regionSplitStr) } admin.createTable(tableDescriptor, splitKeys.toArray) } }
Example 12
Source File: HBaseRestServer.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.hadooparchitecturebook.taxi360.server.hbase import java.io.File import com.sun.jersey.spi.container.servlet.ServletContainer import org.apache.hadoop.hbase.HBaseConfiguration import org.mortbay.jetty.Server import org.mortbay.jetty.servlet.{Context, ServletHolder} object HBaseRestServer { def main(args:Array[String]): Unit = { if (args.length == 0) { println("<port> <configDir> <numberOfSalts> <customerTableName>") } val port = args(0).toInt val hbaseConfigFolder = args(1) val numberOfSalts = args(2).toInt val appEventTableName = args(3) val conf = HBaseConfiguration.create() conf.addResource(new File(hbaseConfigFolder + "hbase-site.xml").toURI.toURL) HBaseGlobalValues.init(conf, numberOfSalts, appEventTableName) val server = new Server(port) val sh = new ServletHolder(classOf[ServletContainer]) sh.setInitParameter("com.sun.jersey.config.property.resourceConfigClass", "com.sun.jersey.api.core.PackagesResourceConfig") sh.setInitParameter("com.sun.jersey.config.property.packages", "com.hadooparchitecturebook.taxi360.server.hbase") sh.setInitParameter("com.sun.jersey.api.json.POJOMappingFeature", "true") val context = new Context(server, "/", Context.SESSIONS) context.addServlet(sh, "/*") println("starting HBase Rest Server") server.start() println("started HBase Rest Sserver") server.join() } }
Example 13
Source File: CsvKafkaPublisher.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.hadooparchitecturebook.taxi360.common import java.io.File import java.util.Random import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.io.Source object CsvKafkaPublisher { var counter = 0 var salts = 0 def main(args:Array[String]): Unit = { if (args.length == 0) { println("<brokerList> " + "<topicName> " + "<dataFolderOrFile> " + "<sleepPerRecord> " + "<acks> " + "<linger.ms> " + "<producer.type> " + "<batch.size> " + "<salts>") return } val kafkaBrokerList = args(0) val kafkaTopicName = args(1) val nyTaxiDataFolder = args(2) val sleepPerRecord = args(3).toInt val acks = args(4).toInt val lingerMs = args(5).toInt val producerType = args(6) //"async" val batchSize = args(7).toInt salts = args(8).toInt val kafkaProducer = KafkaProducerUntil.getNewProducer(kafkaBrokerList, acks, lingerMs, producerType, batchSize) println("--Input:" + nyTaxiDataFolder) val dataFolder = new File(nyTaxiDataFolder) if (dataFolder.isDirectory) { val files = dataFolder.listFiles().iterator files.foreach(f => { println("--Input:" + f) processFile(f, kafkaTopicName, kafkaProducer, sleepPerRecord) }) } else { println("--Input:" + dataFolder) processFile(dataFolder, kafkaTopicName, kafkaProducer, sleepPerRecord) } println("---Done") } def processFile(file:File, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String], sleepPerRecord:Int): Unit = { var counter = 0 val r = new Random() println("-Starting Reading") Source.fromFile(file).getLines().foreach(l => { counter += 1 if (counter % 10000 == 0) { println("{Sent:" + counter + "}") } if (counter % 100 == 0) { print(".") } Thread.sleep(sleepPerRecord) val saltedVender = r.nextInt(salts) + l if (counter > 2) { publishTaxiRecord(saltedVender, kafkaTopicName, kafkaProducer) } }) } def publishTaxiRecord(line:String, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String]): Unit = { if (line.startsWith("vendor_name") || line.length < 10) { println("skip") } else { val message = new ProducerRecord[String, String](kafkaTopicName, line.hashCode.toString, line) kafkaProducer.send(message) } } }
Example 14
Source File: SparkStreamingTaxiTripToHBase.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.hadooparchitecturebook.taxi360.streaming.ingestion.hbase import java.io.File import com.hadooparchitecturebook.taxi360.model.NyTaxiYellowTripBuilder import org.apache.hadoop.hbase.spark.HBaseContext import org.apache.hadoop.hbase.spark.HBaseDStreamFunctions._ import kafka.serializer.StringDecoder import org.apache.hadoop.hbase.{HBaseConfiguration, TableName} import org.apache.solr.common.cloud.ZooKeeperException import org.apache.spark.streaming.kafka.KafkaUtils import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.{SparkConf, SparkContext} object SparkStreamingTaxiTripToHBase { def main(args: Array[String]): Unit = { println("Java Version:" + System.getProperty("java.version")) println("Java Home:" + System.getProperties().getProperty("java.home")) val v:ZooKeeperException = null if (args.length == 0) { println("Args: <KafkaBrokerList> " + "<kafkaTopicList> " + "<numberOfSeconds>" + "<runLocal>" + "<hbaseTable>" + "<numOfSalts>" + "<checkpointDir>" + "<hbaseConfigFolder>") return } val kafkaBrokerList = args(0) val kafkaTopicList = args(1) val numberOfSeconds = args(2).toInt val runLocal = args(3).equals("l") val tableName = args(4) val numOfSalts = args(5).toInt val checkpointFolder = args(6) val hbaseConfigFolder = args(7) println("kafkaBrokerList:" + kafkaBrokerList) println("kafkaTopicList:" + kafkaTopicList) println("numberOfSeconds:" + numberOfSeconds) println("runLocal:" + runLocal) println("tableName:" + tableName) println("numOfSalts:" + numOfSalts) val sc:SparkContext = if (runLocal) { val sparkConfig = new SparkConf() sparkConfig.set("spark.broadcast.compress", "false") sparkConfig.set("spark.shuffle.compress", "false") sparkConfig.set("spark.shuffle.spill.compress", "false") new SparkContext("local[2]", "TableStatsSinglePathMain", sparkConfig) } else { val sparkConf = new SparkConf().setAppName("Spark Streaming Ingestion to HBase") new SparkContext(sparkConf) } val ssc = new StreamingContext(sc, Seconds(numberOfSeconds)) val topicsSet = kafkaTopicList.split(",").toSet val kafkaParams = Map[String, String]("metadata.broker.list" -> kafkaBrokerList) val messageStream = KafkaUtils. createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topicsSet) val conf = HBaseConfiguration.create() conf.addResource(new File(hbaseConfigFolder + "hbase-site.xml").toURI.toURL) val hbaseContext = new HBaseContext(sc, conf) val tripDStream = messageStream.map(r => { (r._1, r._2.split(",")) }).filter(r => r._2.size > 3).map(r => { (r._1, NyTaxiYellowTripBuilder.build(r._2)) }) tripDStream.hbaseBulkPut(hbaseContext, TableName.valueOf(tableName), taxi => { TaxiTripHBaseHelper.generatePut(taxi._2, numOfSalts) }) ssc.checkpoint(checkpointFolder) ssc.start() ssc.awaitTermination() } }
Example 15
Source File: SampleRoutes.scala From akka_streams_tutorial with MIT License | 5 votes |
package akkahttp import java.io.File import akka.actor.ActorSystem import akka.http.scaladsl.Http import akka.http.scaladsl.server.Directives._ import akka.http.scaladsl.server.Route import org.slf4j.{Logger, LoggerFactory} import scala.concurrent.Await import scala.concurrent.duration._ import scala.sys.process.Process import scala.util.{Failure, Success} object SampleRoutes extends App { val logger: Logger = LoggerFactory.getLogger(this.getClass) implicit val system = ActorSystem("SampleRoutes") implicit val executionContext = system.dispatcher def getFromBrowsableDir: Route = { val dirToBrowse = File.separator + "tmp" // pathPrefix allows loading dirs and files recursively pathPrefix("entries") { getFromBrowseableDirectory(dirToBrowse) } } def parseFormData: Route = path("post") { formFields('color, 'age.as[Int]) { (color, age) => complete(s"The color is '$color' and the age is $age") } } def routes: Route = { getFromBrowsableDir ~ parseFormData } val bindingFuture = Http().bindAndHandle(routes, "127.0.0.1", 8000) bindingFuture.onComplete { case Success(b) => println("Server started, listening on: " + b.localAddress) case Failure(e) => println(s"Server could not bind to... Exception message: ${e.getMessage}") system.terminate() } def browserClient() = { val os = System.getProperty("os.name").toLowerCase if (os == "mac os x") Process("open ./src/main/resources/SampleRoutes.html").! } browserClient() sys.addShutdownHook { println("About to shutdown...") val fut = bindingFuture.map(serverBinding => serverBinding.terminate(hardDeadline = 3.seconds)) println("Waiting for connections to terminate...") val onceAllConnectionsTerminated = Await.result(fut, 10.seconds) println("Connections terminated") onceAllConnectionsTerminated.flatMap { _ => system.terminate() } } }
Example 16
Source File: KafkaServer.scala From akka_streams_tutorial with MIT License | 5 votes |
package alpakka.env import java.io.File import java.net.InetSocketAddress import java.nio.file.{Files, Paths} import java.util.Properties import kafka.server.{KafkaConfig, KafkaServerStartable} import org.apache.commons.io.FileUtils import org.apache.zookeeper.server.quorum.QuorumPeerConfig import org.apache.zookeeper.server.{ServerConfig, ZooKeeperServerMain} object KafkaServer extends App { val zookeeperPort = 2181 val kafkaLogs = "/tmp/kafka-logs" val kafkaLogsPath = Paths.get(kafkaLogs) // See: https://stackoverflow.com/questions/59592518/kafka-broker-doesnt-find-cluster-id-and-creates-new-one-after-docker-restart/60864763#comment108382967_60864763 def fix25Behaviour() = { val fileWithConflictingContent = kafkaLogsPath.resolve("meta.properties").toFile if (fileWithConflictingContent.exists()) FileUtils.forceDelete(fileWithConflictingContent) } def removeKafkaLogs(): Unit = { if (kafkaLogsPath.toFile.exists()) FileUtils.forceDelete(kafkaLogsPath.toFile) } // Keeps the persistent data fix25Behaviour() // If everything fails //removeKafkaLogs() val quorumConfiguration = new QuorumPeerConfig { // Since we do not run a cluster, we are not interested in zookeeper data override def getDataDir: File = Files.createTempDirectory("zookeeper").toFile override def getDataLogDir: File = Files.createTempDirectory("zookeeper-logs").toFile override def getClientPortAddress: InetSocketAddress = new InetSocketAddress(zookeeperPort) } class StoppableZooKeeperServerMain extends ZooKeeperServerMain { def stop(): Unit = shutdown() } val zooKeeperServer = new StoppableZooKeeperServerMain() val zooKeeperConfig = new ServerConfig() zooKeeperConfig.readFrom(quorumConfiguration) val zooKeeperThread = new Thread { override def run(): Unit = zooKeeperServer.runFromConfig(zooKeeperConfig) } zooKeeperThread.start() val kafkaProperties = new Properties() kafkaProperties.put("zookeeper.connect", s"localhost:$zookeeperPort") kafkaProperties.put("broker.id", "0") kafkaProperties.put("offsets.topic.replication.factor", "1") kafkaProperties.put("log.dirs", kafkaLogs) kafkaProperties.put("delete.topic.enable", "true") kafkaProperties.put("group.initial.rebalance.delay.ms", "0") kafkaProperties.put("transaction.state.log.min.isr", "1") kafkaProperties.put("transaction.state.log.replication.factor", "1") kafkaProperties.put("zookeeper.connection.timeout.ms", "6000") kafkaProperties.put("num.partitions", "10") val kafkaConfig = KafkaConfig.fromProps(kafkaProperties) val kafka = new KafkaServerStartable(kafkaConfig) println("About to start...") kafka.startup() scala.sys.addShutdownHook{ println("About to shutdown...") kafka.shutdown() kafka.awaitShutdown() zooKeeperServer.stop() } zooKeeperThread.join() }
Example 17
Source File: SparkSessionConfiguration.scala From spark-structured-streaming-examples with Apache License 2.0 | 5 votes |
package com.phylosoft.spark.learning import java.io.File import org.apache.spark.SparkConf import org.apache.spark.sql.SparkSession trait SparkSessionConfiguration { val settings: Traversable[(String, String)] private val warehouseLocation = "file:///" + new File("spark-warehouse").getAbsolutePath.toString private lazy val conf = new SparkConf() .set("spark.sql.warehouse.dir", warehouseLocation) .set("spark.sql.session.timeZone", "UTC") .set("spark.sql.shuffle.partitions", "4") // keep the size of shuffles small .set("spark.sql.cbo.enabled", "true") .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") .set("spark.kryoserializer.buffer", "24") .setAll(settings) implicit lazy val spark: SparkSession = SparkSession.builder .config(conf) .enableHiveSupport() .getOrCreate() }
Example 18
Source File: CongestionModel.scala From spatial with MIT License | 5 votes |
package models import java.io.File import java.io.PrintWriter import utils.io.files._ import utils.math.{CombinationTree, ReduceTree} import scala.io.Source object CongestionModel { abstract class FeatureVec[T] { def loads: T def stores: T def gateds: T def outerIters: T def innerIters: T def bitsPerCycle: T def toSeq: Seq[T] = Seq(stores, outerIters, loads, innerIters, gateds, bitsPerCycle) } case class RawFeatureVec(loads: Double, stores: Double, gateds: Double, outerIters: Double, innerIters: Double, bitsPerCycle: Double) extends FeatureVec[Double] case class CalibFeatureVec(loads: Double, stores: Double, gateds: Double, outerIters: Double, innerIters: Double, bitsPerCycle: Double) extends FeatureVec[Double] // Set up lattice properties val feature_dims = 6 val lattice_rank = 6 val lattice_size = Seq(3,3,3,3,3,3) val num_keypoints = 8 val num_lattices = 1 var model: String = "" // Derive lattice properties val sizes = scala.Array.tabulate(lattice_rank){i => lattice_size(i)} val dimensions = sizes.length val params_per_lattice = sizes.product val strides: scala.Array[Int] = scala.Array.fill(dimensions){1} val nparams = num_lattices * params_per_lattice // Grab lattice params lazy val loads_keypoints_inputs = ModelData.loads_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/loads_keypoints_inputs.csv", ","){x => x.toDouble} lazy val loads_keypoints_outputs = ModelData.loads_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/loads_keypoints_outputs.csv", ","){x => x.toDouble} lazy val stores_keypoints_inputs = ModelData.stores_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/stores_keypoints_inputs.csv", ","){x => x.toDouble} lazy val stores_keypoints_outputs = ModelData.stores_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/stores_keypoints_outputs.csv", ","){x => x.toDouble} lazy val gateds_keypoints_inputs = ModelData.gateds_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/gateds_keypoints_inputs.csv", ","){x => x.toDouble} lazy val gateds_keypoints_outputs = ModelData.gateds_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/gateds_keypoints_outputs.csv", ","){x => x.toDouble} lazy val outerIters_keypoints_inputs = ModelData.outerIters_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/outerIters_keypoints_inputs.csv", ","){x => x.toDouble} lazy val outerIters_keypoints_outputs = ModelData.outerIters_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/outerIters_keypoints_outputs.csv", ","){x => x.toDouble} lazy val innerIters_keypoints_inputs = ModelData.innerIters_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/innerIters_keypoints_inputs.csv", ","){x => x.toDouble} lazy val innerIters_keypoints_outputs = ModelData.innerIters_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/innerIters_keypoints_outputs.csv", ","){x => x.toDouble} lazy val bitsPerCycle_keypoints_inputs = ModelData.bitsPerCycle_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/bitsPerCycle_keypoints_inputs.csv", ","){x => x.toDouble} lazy val bitsPerCycle_keypoints_outputs = ModelData.bitsPerCycle_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/bitsPerCycle_keypoints_outputs.csv", ","){x => x.toDouble} lazy val params = ModelData.params(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/LATTICE_PARAMS.csv", ","){x => x.toDouble} def evaluate(features: RawFeatureVec, typ: Runtime.CtrlSchedule): Int = { model = typ.toString val calibrated_features = calibrate_features(features) val result = hypercube_features(calibrated_features) // TODO: Model is naughty if it returns <170 // println(s"evaluating $features = ${170 max result.toInt}") 170 max result.toInt } }
Example 19
Source File: LatencyAnalyzer.scala From spatial with MIT License | 5 votes |
package spatial.dse import argon._ import spatial.lang._ import spatial.node._ import spatial.util.spatialConfig import spatial.util.modeling._ import spatial.traversal._ import spatial.targets._ import java.io.File import models._ import argon.node._ case class LatencyAnalyzer(IR: State, latencyModel: LatencyModel) extends AccelTraversal { var cycleScope: List[Double] = Nil var intervalScope: List[Double] = Nil var totalCycles: Seq[Long] = Seq() val batchSize = 1000 def getListOfFiles(d: String):List[String] = { import java.nio.file.{FileSystems, Files} import scala.collection.JavaConverters._ val dir = FileSystems.getDefault.getPath(d) Files.walk(dir).iterator().asScala.filter(Files.isRegularFile(_)).map(_.toString).toList//.foreach(println) } override def silence(): Unit = { super.silence() } def test(rewriteParams: Seq[Seq[Any]]): Unit = { import scala.language.postfixOps import java.io.File import sys.process._ val gen_dir = if (config.genDir.startsWith("/")) config.genDir + "/" else config.cwd + s"/${config.genDir}/" val modelJar = getListOfFiles(gen_dir + "/model").filter(_.contains("RuntimeModel-assembly")).head totalCycles = rewriteParams.grouped(batchSize).flatMap{params => val batchedParams = params.map{rp => "tune " + rp.mkString(" ")}.mkString(" ") val cmd = s"""java -jar ${modelJar} ni ${batchedParams}""" // println(s"running cmd: $cmd") val output = Process(cmd, new File(gen_dir)).!! output.split("\n").filter(_.contains("Total Cycles for App")).map{r => "^.*: ".r.replaceAllIn(r,"").trim.toLong }.toSeq }.toSeq // println(s"DSE Model result: $totalCycles") } override protected def preprocess[A](b: Block[A]): Block[A] = { super.preprocess(b) } override protected def postprocess[A](b: Block[A]): Block[A] = { super.postprocess(b) } override protected def visit[A](lhs: Sym[A], rhs: Op[A]): Unit = { } }
Example 20
package emul import java.io.PrintStream import java.io.File object OOB { lazy val writeStream = new PrintStream("./logs/writes.log") lazy val readStream = new PrintStream("./logs/reads.log") def open(): Unit = { new File("./logs/").mkdirs() writeStream readStream } def close(): Unit = { writeStream.close() readStream.close() } def readOrElse[T](mem: String, addr: String, invalid: T, en: Boolean)(rd: => T): T = { try { val data = rd if (en) readStream.println(s"Mem: $mem; Addr: $addr") data } catch {case err: java.lang.ArrayIndexOutOfBoundsException => if (en) readStream.println(s"Mem: $mem; Addr: $addr [OOB]") invalid } } def writeOrElse(mem: String, addr: String, data: Any, en: Boolean)(wr: => Unit): Unit = { try { wr if (en) writeStream.println(s"Mem: $mem; Addr: $addr; Data: $data") } catch {case err: java.lang.ArrayIndexOutOfBoundsException => if (en) writeStream.println(s"Mem: $mem; Addr: $addr; Data: $data [OOB]") } } }
Example 21
Source File: globals.scala From spatial with MIT License | 5 votes |
package fringe import java.io.{File, PrintWriter} import fringe.targets.DeviceTarget import fringe.templates.axi4.{AXI4BundleParameters, AXI4StreamParameters} private var _tclScript: PrintWriter = { val pw = new PrintWriter(new File("bigIP.tcl")) pw.flush() pw } def tclScript: PrintWriter = _tclScript def tclScript_=(value: PrintWriter): Unit = _tclScript = value var regression_testing: String = scala.util.Properties.envOrElse("RUNNING_REGRESSION", "0") // Top parameters // These are set by the generated Instantiator class var numArgIns: Int = 1 // Number of ArgIn registers var numArgOuts: Int = 1 // Number of ArgOut registers var numArgIOs: Int = 0 // Number of HostIO registers var numArgInstrs: Int = 0 // TODO: What is this? var argOutLoopbacksMap: Map[Int,Int] = Map.empty // TODO: What is this? var loadStreamInfo: List[StreamParInfo] = Nil var storeStreamInfo: List[StreamParInfo] = Nil var gatherStreamInfo: List[StreamParInfo] = Nil var scatterStreamInfo: List[StreamParInfo] = Nil var axiStreamInsInfo: List[AXI4StreamParameters] = List(AXI4StreamParameters(64,8,64)) var axiStreamOutsInfo: List[AXI4StreamParameters] = List(AXI4StreamParameters(64,8,64)) var numAllocators: Int = 0 def LOAD_STREAMS: List[StreamParInfo] = if (loadStreamInfo.isEmpty) List(StreamParInfo(DATA_WIDTH, WORDS_PER_STREAM, 0)) else loadStreamInfo def STORE_STREAMS: List[StreamParInfo] = if (storeStreamInfo.isEmpty) List(StreamParInfo(DATA_WIDTH, WORDS_PER_STREAM, 0)) else storeStreamInfo def GATHER_STREAMS: List[StreamParInfo] = if (gatherStreamInfo.isEmpty) List(StreamParInfo(DATA_WIDTH, WORDS_PER_STREAM, 0)) else gatherStreamInfo def SCATTER_STREAMS: List[StreamParInfo] = if (scatterStreamInfo.isEmpty) List(StreamParInfo(DATA_WIDTH, WORDS_PER_STREAM, 0)) else scatterStreamInfo def AXI_STREAMS_IN: List[AXI4StreamParameters] = if (axiStreamInsInfo.isEmpty) List(AXI4StreamParameters(256,8,32)) else axiStreamInsInfo def AXI_STREAMS_OUT: List[AXI4StreamParameters] = if (axiStreamOutsInfo.isEmpty) List(AXI4StreamParameters(256,8,32)) else axiStreamOutsInfo def NUM_LOAD_STREAMS: Int = LOAD_STREAMS.size def NUM_STORE_STREAMS: Int = STORE_STREAMS.size def NUM_ARG_INS: Int = numArgIns def NUM_ARG_OUTS: Int = numArgOuts def NUM_ARG_IOS: Int = numArgIOs def NUM_ARG_LOOPS: Int = argOutLoopbacksMap.size max 1 def NUM_ARGS: Int = numArgIns + numArgOuts def NUM_STREAMS: Int = LOAD_STREAMS.size + STORE_STREAMS.size }
Example 22
Source File: TemplateRunner.scala From spatial with MIT License | 5 votes |
package fringe.test import java.io.File import scala.collection.mutable.ArrayBuffer import scala.util.Properties.envOrElse object TemplateRunner { def deleteRecursively(file: File): Unit = { if (file.isDirectory) file.listFiles.foreach(deleteRecursively) if (file.exists && !file.delete) throw new Exception(s"Unable to delete ${file.getAbsolutePath}") } def apply(templateMap: Map[String, String => Boolean], args: Array[String]): Unit = { // Choose the default backend based on what is available. lazy val firrtlTerpBackendAvailable: Boolean = { try { val cls = Class.forName("chisel3.iotesters.FirrtlTerpBackend") cls != null } catch { case e: Throwable => false } } lazy val defaultBackend = if (firrtlTerpBackendAvailable) "firrtl" else "" val backendName = envOrElse("TESTER_BACKENDS", defaultBackend).split(" ").head val tempDir = s"""${envOrElse("NEW_TEMPLATES_HOME", "tmp")}/test_run_dir/""" val specificRegex = "(.*[0-9]+)".r val problemsToRun = if (args.isEmpty) { templateMap.keys.toSeq.sorted.toArray // Run all by default } else { args.map { arg => arg match { case "all" => templateMap.keys.toSeq.sorted // Run all case specificRegex(c) => List(c).toSeq // Run specific test case _ => // Figure out tests that match this template and run all val tempRegex = s"(${arg}[0-9]+)".r templateMap.keys.toSeq.sorted.filter(tempRegex.pattern.matcher(_).matches) }}.flatten.toArray } var successful = 0 var passedTests:List[String] = List() val errors = new ArrayBuffer[String] for(testName <- problemsToRun) { // Wipe tempdir for consecutive tests of same module deleteRecursively(new File(tempDir)) templateMap.get(testName) match { case Some(test) => println(s"Starting template $testName") try { if(test(backendName)) { successful += 1 passedTests = passedTests :+ s"$testName" } else { errors += s"Template $testName: test error occurred" } } catch { case exception: Exception => exception.printStackTrace() errors += s"Template $testName: exception ${exception.getMessage}" case t : Throwable => errors += s"Template $testName: throwable ${t.getMessage}" } case _ => errors += s"Bad template name: $testName" } } if(successful > 0) { println(s"""Templates passing: $successful (${passedTests.mkString(", ")})""") } if(errors.nonEmpty) { println("=" * 80) println(s"Errors: ${errors.length}: in the following templates") println(errors.mkString("\n")) println("=" * 80) System.exit(1) } } }
Example 23
Source File: AvroSource.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.avro import java.io.File import java.util.concurrent.atomic.AtomicBoolean import com.sksamuel.exts.Logging import com.sksamuel.exts.io.Using import io.eels._ import io.eels.datastream.{DataStream, Publisher, Subscriber, Subscription} import io.eels.schema.StructType import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} case class AvroSource(path: Path) (implicit conf: Configuration, fs: FileSystem) extends Source with Using { override lazy val schema: StructType = { using(AvroReaderFns.createAvroReader(path)) { reader => val record = reader.next() AvroSchemaFns.fromAvroSchema(record.getSchema) } } override def parts(): Seq[Publisher[Seq[Row]]] = Seq(AvroSourcePublisher(path)) } case class AvroSourcePublisher(path: Path) (implicit conf: Configuration, fs: FileSystem) extends Publisher[Seq[Row]] with Logging with Using { override def subscribe(subscriber: Subscriber[Seq[Row]]): Unit = { val deserializer = new AvroDeserializer() try { using(AvroReaderFns.createAvroReader(path)) { reader => val running = new AtomicBoolean(true) subscriber.subscribed(Subscription.fromRunning(running)) AvroRecordIterator(reader) .takeWhile(_ => running.get) .map(deserializer.toRow) .grouped(DataStream.DefaultBatchSize) .foreach(subscriber.next) subscriber.completed() } } catch { case t: Throwable => subscriber.error(t) } } } object AvroSource { def apply(file: File)(implicit conf: Configuration, fs: FileSystem): AvroSource = AvroSource(new Path(file.getAbsoluteFile.toString)) def apply(path: java.nio.file.Path)(implicit conf: Configuration, fs: FileSystem): AvroSource = apply(path.toFile) }
Example 24
Source File: AvroSink.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.avro import java.io.File import io.eels.schema.StructType import io.eels.{Row, Sink, SinkWriter} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.permission.FsPermission import org.apache.hadoop.fs.{FileSystem, Path} case class AvroSink(path: Path, overwrite: Boolean = false, permission: Option[FsPermission] = None, inheritPermissions: Option[Boolean] = None) (implicit conf: Configuration, fs: FileSystem) extends Sink { def withOverwrite(overwrite: Boolean): AvroSink = copy(overwrite = overwrite) def withPermission(permission: FsPermission): AvroSink = copy(permission = Option(permission)) def withInheritPermission(inheritPermissions: Boolean): AvroSink = copy(inheritPermissions = Option(inheritPermissions)) override def open(schema: StructType): SinkWriter = new SinkWriter { private val writer = new AvroWriter(schema, fs.create(path, overwrite)) override def write(row: Row): Unit = writer.write(row) override def close(): Unit = { writer.close() permission match { case Some(perm) => fs.setPermission(path, perm) case None => if (inheritPermissions.getOrElse(false)) { val permission = fs.getFileStatus(path.getParent).getPermission fs.setPermission(path, permission) } } } } } object AvroSink { def apply(file: File)(implicit conf: Configuration, fs: FileSystem): AvroSink = AvroSink(new Path(file.getAbsoluteFile.toString)) def apply(path: java.nio.file.Path)(implicit conf: Configuration, fs: FileSystem): AvroSink = apply(path.toFile) }
Example 25
Source File: ParquetProjectionTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.parquet import java.io.{File, FilenameFilter} import io.eels.datastream.DataStream import io.eels.schema.{Field, StringType, StructType} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.scalatest.{FlatSpec, Matchers} class ParquetProjectionTest extends FlatSpec with Matchers { cleanUpResidualParquetTestFiles private val schema = StructType( Field("name", StringType, nullable = false), Field("job", StringType, nullable = false), Field("location", StringType, nullable = false) ) private val ds = DataStream.fromValues( schema, Seq( Vector("clint eastwood", "actor", "carmel"), Vector("elton john", "musician", "pinner") ) ) private implicit val conf = new Configuration() private implicit val fs = FileSystem.get(new Configuration()) private val file = new File(s"test_${System.currentTimeMillis()}.pq") file.deleteOnExit() private val path = new Path(file.toURI) if (fs.exists(path)) fs.delete(path, false) ds.to(ParquetSink(path).withOverwrite(true)) "ParquetSource" should "support projections" in { val rows = ParquetSource(path).withProjection("name").toDataStream().collect rows.map(_.values) shouldBe Vector(Vector("clint eastwood"), Vector("elton john")) } it should "return all data when no projection is set" in { val rows = ParquetSource(path).toDataStream().collect rows.map(_.values) shouldBe Vector(Vector("clint eastwood", "actor", "carmel"), Vector("elton john", "musician", "pinner")) } private def cleanUpResidualParquetTestFiles = { new File(".").listFiles(new FilenameFilter { override def accept(dir: File, name: String): Boolean = { (name.startsWith("test_") && name.endsWith(".pq")) || (name.startsWith(".test_") && name.endsWith(".pq.crc")) } }).foreach(_.delete()) } }
Example 26
Source File: ParquetSpeedTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.parquet import java.io.File import com.sksamuel.exts.metrics.Timed import io.eels.Row import io.eels.component.parquet.avro.{AvroParquetSink, AvroParquetSource} import io.eels.component.parquet.util.ParquetLogMute import io.eels.datastream.DataStream import io.eels.schema.StructType import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import scala.util.Random object ParquetSpeedTest extends App with Timed { ParquetLogMute() val size = 2000000 val schema = StructType("a", "b", "c", "d", "e") val createRow = Row(schema, Random.nextBoolean(), Random.nextFloat(), Random.nextGaussian(), Random.nextLong(), Random.nextString(4)) val ds = DataStream.fromIterator(schema, Iterator.continually(createRow).take(size)) implicit val conf = new Configuration() implicit val fs = FileSystem.getLocal(new Configuration()) val path = new Path("parquet_speed.pq") fs.delete(path, false) new File(path.toString).deleteOnExit() timed("Insertion") { ds.to(AvroParquetSink(path).withOverwrite(true)) } while (true) { timed("Reading with ParquetSource") { val actual = ParquetSource(path).toDataStream().size assert(actual == size) } println("") println("---------") println("") Thread.sleep(2000) timed("Reading with AvroParquetSource") { val actual = AvroParquetSource(path).toDataStream().size assert(actual == size) } } }
Example 27
Source File: ParquetMultipleFileSpeedTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.parquet import java.io.File import com.sksamuel.exts.metrics.Timed import io.eels.component.parquet.util.ParquetLogMute import io.eels.datastream.DataStream import io.eels.schema.StructType import io.eels.{FilePattern, Row} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import scala.util.Random object ParquetMultipleFileSpeedTest extends App with Timed { ParquetLogMute() val size = 5000000 val count = 20 val schema = StructType("a", "b", "c", "d", "e") def createRow = Row(schema, Random.nextBoolean(), Random.nextFloat(), Random.nextGaussian(), Random.nextLong(), Random.nextString(4)) implicit val conf = new Configuration() implicit val fs = FileSystem.getLocal(new Configuration()) val dir = new Path("parquet-speed-test") new File(dir.toString).mkdirs() new File(dir.toString).listFiles().foreach(_.delete) timed("Insertion") { val ds = DataStream.fromRowIterator(schema, Iterator.continually(createRow).take(size)) ds.to(ParquetSink(new Path("parquet-speed-test/parquet_speed.pq")), count) } for (_ <- 1 to 25) { assert(count == FilePattern("parquet-speed-test/*").toPaths().size) timed("Reading with ParquetSource") { val actual = ParquetSource("parquet-speed-test/*").toDataStream().map { row => row }.filter(_ => true).size assert(actual == size, s"Expected $size but was $actual") } println("") println("---------") println("") } }
Example 28
Source File: ParquetVsOrcSpeedTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hive import java.io.File import java.math.MathContext import com.sksamuel.exts.metrics.Timed import io.eels.Row import io.eels.component.orc.{OrcSink, OrcSource} import io.eels.component.parquet.{ParquetSink, ParquetSource} import io.eels.datastream.DataStream import io.eels.schema._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import scala.math.BigDecimal.RoundingMode import scala.util.Random object ParquetVsOrcSpeedTest extends App with Timed { implicit val conf = new Configuration() implicit val fs = FileSystem.getLocal(new Configuration()) val size = 5000000 val structType = StructType( Field("name", StringType), Field("age", IntType.Signed), Field("height", DoubleType), Field("amazing", BooleanType), Field("fans", LongType.Signed), Field("rating", DecimalType(4, 2)) ) def iter: Iterator[Vector[Any]] = Iterator.continually(Vector( Random.nextString(10), Random.nextInt(), Random.nextDouble(), Random.nextBoolean(), Random.nextLong(), BigDecimal(Random.nextDouble(), new MathContext(4)).setScale(2, RoundingMode.UP) )) def ds: DataStream = DataStream.fromIterator(structType, iter.take(size).map(Row(structType, _))) val ppath = new Path("parquet_speed.pq") fs.delete(ppath, false) val opath = new Path("orc_speed.orc") fs.delete(opath, false) new File(ppath.toString).deleteOnExit() new File(opath.toString).deleteOnExit() timed("Orc Insertion") { ds.to(OrcSink(opath)) } timed("Parquet Insertion") { ds.to(ParquetSink(ppath)) } while (true) { timed("Reading with OrcSource") { val actual = OrcSource(opath).toDataStream().size assert(actual == size, s"$actual != $size") } timed("Reading with ParquetSource") { val actual = ParquetSource(ppath).toDataStream().size assert(actual == size, s"$actual != $size") } } }
Example 29
Source File: HiveDynamicPartitionTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hive import java.io.File import io.eels.component.hive.partition.DynamicPartitionStrategy import io.eels.datastream.DataStream import io.eels.schema.{Field, Partition, StructType} import org.scalatest.{BeforeAndAfterAll, FunSuite, Matchers} import scala.util.Try class HiveDynamicPartitionTest extends FunSuite with Matchers with BeforeAndAfterAll { import HiveConfig._ val dbname = HiveTestUtils.createTestDatabase val table = "dynp_test_" + System.currentTimeMillis() val schema = StructType(Field("a"), Field("b")) Try { HiveTable(dbname, table).create(schema, Seq("a")) } override def afterAll(): Unit = Try { HiveTable(dbname, table).drop() } test("dynamic partition strategy should create new partitions") { assume(new File(s"$basePath/core-site.xml").exists) HiveTable(dbname, table).partitionValues("a") shouldBe Set.empty DataStream.fromValues(schema, Seq(Seq("1", "2"), Seq("3", "4"))).to(HiveSink(dbname, table)) HiveTable(dbname, table).partitionValues("a") shouldBe Set("1", "3") } test("skip partition if partition already exists") { assume(new File(s"$basePath/core-site.xml").exists) new DynamicPartitionStrategy().ensurePartition(Partition("a" -> "1"), dbname, table, false, client) new DynamicPartitionStrategy().ensurePartition(Partition("a" -> "1"), dbname, table, false, client) } }
Example 30
Source File: HiveTableTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hive import java.io.File import io.eels.Row import io.eels.datastream.DataStream import io.eels.schema.{Field, StringType, StructType} import org.scalatest.{FunSuite, Matchers} import scala.util.{Random, Try} class HiveTableTest extends FunSuite with Matchers { import HiveConfig._ val dbname = HiveTestUtils.createTestDatabase val table = "test_table_" + System.currentTimeMillis() Try { HiveTable(dbname, table).drop() } test("partition values should return values for the matching key") { assume(new File(s"$basePath/core-site.xml").exists) val schema = StructType( Field("a", StringType), Field("b", StringType), Field("c", StringType) ) def createRow = Row(schema, Seq( Random.shuffle(List("a", "b", "c")).head, Random.shuffle(List("x", "y", "z")).head, Random.shuffle(List("q", "r", "s")).head ) ) val sink = HiveSink(dbname, table).withCreateTable(true, Seq("a", "b")) val size = 1000 DataStream.fromIterator(schema, Iterator.continually(createRow).take(size)).to(sink, 4) HiveTable(dbname, table).partitionValues("b") shouldBe Set("x", "y", "z") } }
Example 31
Source File: HivePartitionConstraintTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hive import java.io.File import io.eels.datastream.DataStream import io.eels.schema.{Field, PartitionConstraint, StringType, StructType} import org.scalatest.{BeforeAndAfterAll, FunSuite, Matchers} import scala.util.Try class HivePartitionConstraintTest extends FunSuite with Matchers with BeforeAndAfterAll { import HiveConfig._ val dbname = HiveTestUtils.createTestDatabase private val table = "constraints_test_" + System.currentTimeMillis() override def afterAll(): Unit = Try { HiveTable(dbname, table).drop() } val schema = StructType( Field("state", StringType), Field("city", StringType) ) Try { DataStream.fromValues(schema, Seq( Seq("iowa", "des moines"), Seq("iowa", "iow city"), Seq("maine", "augusta") )).to(HiveSink(dbname, table).withCreateTable(true, Seq("state"))) } test("hive source with partition constraint should return matching data") { assume(new File(s"$basePath/core-site.xml").exists) HiveSource(dbname, table) .addPartitionConstraint(PartitionConstraint.equals("state", "iowa")) .toDataStream() .collect.size shouldBe 2 } test("hive source with non-existing partitions in constraint should return no data") { assume(new File(s"$basePath/core-site.xml").exists) HiveSource(dbname, table) .addPartitionConstraint(PartitionConstraint.equals("state", "pa")) .toDataStream() .collect.size shouldBe 0 } }
Example 32
Source File: HiveStatsTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hive import java.io.File import io.eels.Row import io.eels.datastream.DataStream import io.eels.schema._ import org.scalatest.{BeforeAndAfterAll, FunSuite, Matchers} import scala.util.{Random, Try} class HiveStatsTest extends FunSuite with Matchers with BeforeAndAfterAll { import HiveConfig._ private val dbname = HiveTestUtils.createTestDatabase private val table = "stats_test_" + System.currentTimeMillis() private val partitioned_table = "stats_test2_" + System.currentTimeMillis() val schema = StructType( Field("a", StringType), Field("b", IntType.Signed) ) def createRow = Row(schema, Seq(Random.shuffle(List("a", "b", "c")).head, Random.shuffle(List(1, 2, 3, 4, 5)).head)) val amount = 10000 override def afterAll(): Unit = Try { HiveTable(dbname, table).drop() HiveTable(dbname, partitioned_table).drop() } Try { DataStream.fromIterator(schema, Iterator.continually(createRow).take(amount)) .to(HiveSink(dbname, table).withCreateTable(true), 4) DataStream.fromIterator(schema, Iterator.continually(createRow).take(amount)) .to(HiveSink(dbname, partitioned_table).withCreateTable(true, Seq("a")), 4) } test("stats should return row counts for a non-partitioned table") { assume(new File(s"$basePath/core-site.xml").exists) HiveTable(dbname, table).stats().count shouldBe amount } test("stats should return row counts for a partitioned table") { assume(new File(s"$basePath/core-site.xml").exists) HiveTable(dbname, partitioned_table).stats().count shouldBe amount } test("stats should throw exception when constraints specified on a non-partitioned table") { assume(new File(s"$basePath/core-site.xml").exists) intercept[RuntimeException] { val constraints = Seq(PartitionConstraint.equals("a", "b")) HiveTable(dbname, table).stats().count(constraints) } } test("stats should support row count constraints for a partitioned table") { assume(new File(s"$basePath/core-site.xml").exists) val constraints = Seq(PartitionConstraint.equals("a", "b")) HiveTable(dbname, partitioned_table).stats().count(constraints) > 0 shouldBe true HiveTable(dbname, partitioned_table).stats().count(constraints) should be < amount.toLong } test("stats should support min and max for a non-partitioned tabled") { assume(new File(s"$basePath/core-site.xml").exists) HiveTable(dbname, table).stats.max("b") shouldBe 5 HiveTable(dbname, table).stats.min("b") shouldBe 1 } test("stats should support min and max for a partitioned table") { assume(new File(s"$basePath/core-site.xml").exists) HiveTable(dbname, partitioned_table).stats.max("b") shouldBe 5 HiveTable(dbname, partitioned_table).stats.min("b") shouldBe 1 } }
Example 33
Source File: CompactorTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.hive import java.io.File import io.eels.datastream.DataStream import io.eels.schema.{Field, StructType} class CompactorTest extends HiveTests { import HiveConfig._ HiveTable("default", "wibble").drop(true) "Compactor" should { "delete the originals" ignore { val schema = StructType(Field("a"), Field("b")) val ds = DataStream.fromValues(schema, Seq( Array("1", "2"), Array("3", "4"), Array("5", "6"), Array("7", "8") )) ds.to(HiveSink("default", "wibble").withCreateTable(true)) assume(new File(s"$basePath/core-site.xml").exists) HiveTable("default", "wibble").paths(false, false).size should be > 1 Compactor("default", "wibble").compactTo(1) HiveTable("default", "wibble").paths(false, false).size should be 1 } "merge the contents" ignore { assume(new File(s"$basePath/core-site.xml").exists) HiveSource("default", "wibble").toDataStream().collectValues shouldBe Seq( Array("1", "2"), Array("3", "4"), Array("5", "6"), Array("7", "8") ) } } }
Example 34
Source File: OrcMultipleFileSpeedTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.orc import java.io.File import com.sksamuel.exts.metrics.Timed import io.eels.datastream.DataStream import io.eels.schema.StructType import io.eels.{FilePattern, Row} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import scala.util.Random object OrcMultipleFileSpeedTest extends App with Timed { val size = 5000000 val count = 20 val schema = StructType("a", "b", "c", "d", "e") def createRow = Row(schema, Random.nextBoolean(), Random.nextFloat(), Random.nextGaussian(), Random.nextLong(), Random.nextString(4)) implicit val conf = new Configuration() implicit val fs = FileSystem.getLocal(new Configuration()) val dir = new Path("orc-speed-test") new File(dir.toString).mkdirs() timed("Insertion") { val ds = DataStream.fromIterator(schema, Iterator.continually(createRow).take(size)) new File(dir.toString).listFiles().foreach(_.delete) ds.to(OrcSink(new Path("orc-speed-test/orc_speed.pq")).withOverwrite(true), count) } for (_ <- 1 to 25) { assert(count == FilePattern("orc-speed-test/*").toPaths().size) timed("Reading with OrcSource") { val actual = OrcSource("orc-speed-test/*").toDataStream().map { row => row }.filter(_ => true).size assert(actual == size, s"Expected $size but was $actual") } println("") println("---------") println("") } }
Example 35
Source File: OrcPredicateTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.orc import java.io.{File, FilenameFilter} import io.eels.Predicate import io.eels.datastream.DataStream import io.eels.schema.{Field, LongType, StringType, StructType} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} class OrcPredicateTest extends FlatSpec with Matchers with BeforeAndAfterAll { cleanUpResidualOrcTestFiles val schema = StructType( Field("name", StringType, nullable = true), Field("city", StringType, nullable = true), Field("age", LongType.Signed, nullable = true) ) val values = Vector.fill(1000) { Vector("sam", "middlesbrough", 37) } ++ Vector.fill(1000) { Vector("laura", "iowa city", 24) } val ds = DataStream.fromValues(schema, values) implicit val conf = new Configuration() implicit val fs = FileSystem.get(new Configuration()) val path = new Path("test.orc") if (fs.exists(path)) fs.delete(path, false) new File(path.toString).deleteOnExit() ds.to(OrcSink(path).withRowIndexStride(1000)) override protected def afterAll(): Unit = fs.delete(path, false) "OrcSource" should "support string equals predicates" in { conf.set("eel.orc.predicate.row.filter", "false") val rows = OrcSource(path).withPredicate(Predicate.equals("name", "sam")).toDataStream().collect rows.map(_.values).toSet shouldBe Set(Vector("sam", "middlesbrough", 37L)) } it should "support gt predicates" in { conf.set("eel.orc.predicate.row.filter", "false") val rows = OrcSource(path).withPredicate(Predicate.gt("age", 30L)).toDataStream().collect rows.map(_.values).toSet shouldBe Set(Vector("sam", "middlesbrough", 37L)) } it should "support lt predicates" in { conf.set("eel.orc.predicate.row.filter", "false") val rows = OrcSource(path).withPredicate(Predicate.lt("age", 30)).toDataStream().collect rows.map(_.values).toSet shouldBe Set(Vector("laura", "iowa city", 24L)) } it should "enable row level filtering with predicates by default" in { conf.set("eel.orc.predicate.row.filter", "true") val rows = OrcSource(path).withPredicate(Predicate.equals("name", "sam")).toDataStream().collect rows.head.schema shouldBe schema rows.head.values shouldBe Vector("sam", "middlesbrough", 37L) } private def cleanUpResidualOrcTestFiles = { new File(".").listFiles(new FilenameFilter { override def accept(dir: File, name: String): Boolean = { (name.startsWith("test_") && name.endsWith(".orc")) || (name.startsWith(".test_") && name.endsWith(".orc.crc")) } }).foreach(_.delete()) } }
Example 36
Source File: KafkaTestUtils.scala From spark-kafka-writer with Apache License 2.0 | 5 votes |
package com.github.benfradet.spark.kafka.writer import java.io.File import java.net.InetSocketAddress import java.util.Arrays.asList import java.util.Properties import kafka.server.{KafkaConfig, KafkaServerStartable} import org.apache.kafka.clients.admin.{AdminClient, NewTopic} import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer} import scala.util.Random class KafkaTestUtils { // zk private val zkHost = "localhost" private val zkPort = 2181 private var zk: EmbeddedZookeeper = _ private var zkReady = false // kafka private val brokerHost = "localhost" private val brokerPort = 9092 private var kafkaServer: KafkaServerStartable = _ private var topicCountMap = Map.empty[String, Int] private var brokerReady = false private var kafkaAdminClient: AdminClient = _ @scala.annotation.varargs def createTopics(topics: String*): Unit = for (topic <- topics) { kafkaAdminClient.createTopics(asList(new NewTopic(topic, 1, 1: Short))) Thread.sleep(1000) topicCountMap = topicCountMap + (topic -> 1) } private def brokerProps: Properties = { val props = new Properties props.put("broker.id", "0") props.put("host.name", brokerHost) props.put("log.dir", { val dir = System.getProperty("java.io.tmpdir") + "/logDir-" + new Random().nextInt(Int.MaxValue) val f = new File(dir) f.mkdirs() dir } ) props.put("port", brokerPort.toString) props.put("zookeeper.connect", zkAddress) props.put("zookeeper.connection.timeout.ms", "10000") props.put("offsets.topic.replication.factor", "1") props } private class EmbeddedZookeeper(hostname: String, port: Int) { private val snapshotDir = { val f = new File(System.getProperty("java.io.tmpdir"), "snapshotDir-" + Random.nextInt(Int.MaxValue)) f.mkdirs() f } private val logDir = { val f = new File(System.getProperty("java.io.tmpdir"), "logDir-" + Random.nextInt(Int.MaxValue)) f.mkdirs() f } private val factory = { val zkTickTime = 500 val zk = new ZooKeeperServer(snapshotDir, logDir, zkTickTime) val f = new NIOServerCnxnFactory val maxCnxn = 16 f.configure(new InetSocketAddress(hostname, port), maxCnxn) f.startup(zk) f } def shutdown(): Unit = { factory.shutdown() snapshotDir.delete() logDir.delete() () } } }
Example 37
Source File: TotalTweetsScheduler.scala From redrock with Apache License 2.0 | 5 votes |
package com.restapi import java.io.{File, FileInputStream} import akka.actor.{ActorRef, Actor, ActorSystem, Props} import akka.io.IO import org.slf4j.LoggerFactory import play.api.libs.json.Json import spray.can.Http import akka.pattern.ask import spray.http.DateTime import scala.concurrent.duration._ import akka.util.Timeout import scala.concurrent.ExecutionContext.Implicits.global import org.apache.commons.codec.digest.DigestUtils import scala.io.Source case object GetTotalTweetsScheduler object CurrentTotalTweets { @volatile var totalTweets: Long = 0 } class ExecuterTotalTweetsES(delay: FiniteDuration, interval: FiniteDuration) extends Actor { context.system.scheduler.schedule(delay, interval) { getTotalTweetsES } val logger = LoggerFactory.getLogger(this.getClass) override def receive: Actor.Receive = { case GetTotalTweetsScheduler => { logger.info(s"Getting Total of Tweets. Begin: ${CurrentTotalTweets.totalTweets}") } case _ => // just ignore any messages } def getTotalTweetsES: Unit = { val elasticsearchRequests = new GetElasticsearchResponse(0, Array[String](), Array[String](), LoadConf.restConf.getString("searchParam.defaulStartDatetime"), LoadConf.restConf.getString("searchParam.defaultEndDatetime"), LoadConf.esConf.getString("decahoseIndexName")) val totalTweetsResponse = Json.parse(elasticsearchRequests.getTotalTweetsESResponse()) logger.info(s"Getting Total of Tweets. Current: ${CurrentTotalTweets.totalTweets}") CurrentTotalTweets.totalTweets = (totalTweetsResponse \ "hits" \ "total").as[Long] logger.info(s"Total users updated. New: ${CurrentTotalTweets.totalTweets}") } }
Example 38
Source File: package.scala From sbt-reactive-app with Apache License 2.0 | 5 votes |
package com.lightbend.rp.sbtreactiveapp import java.io.File import java.nio.file.Paths import org.apache.tools.ant.filters.StringInputStream import sbt.Logger import scala.collection.immutable.Seq import scala.sys.process.{ Process, ProcessLogger } package object cmd { private[cmd] def run( cwd: File = Paths.get(".").toRealPath().toFile, env: Map[String, String] = Map.empty, input: Option[String] = None, logStdErr: Option[Logger] = None, logStdOut: Option[Logger] = None)(args: String*): (Int, Seq[String], Seq[String]) = { var outList = List.empty[String] var errList = List.empty[String] val stringLogger = ProcessLogger( { s => outList = s :: outList logStdOut.foreach(_.info(s)) }, { s => errList = s :: errList logStdErr.foreach(_.error(s)) }) val exitCode = input .map(new StringInputStream(_)) .foldLeft(Process(args, cwd = cwd, env.toVector: _*))(_ #< _) .run(stringLogger) .exitValue() (exitCode, outList.reverse, errList.reverse) } private[cmd] def runSuccess(failMsg: String)(result: (Int, Seq[String], Seq[String])): Unit = { if (result._1 != 0) { sys.error(s"$failMsg [${result._1}]") } } }
Example 39
Source File: package.scala From Principles-of-Reactive-Programming with GNU General Public License v3.0 | 5 votes |
import java.io.File package object common { def resourceAsStreamFromSrc(resourcePath: List[String]): Option[java.io.InputStream] = { val classesDir = new File(getClass.getResource(".").toURI) val projectDir = classesDir.getParentFile.getParentFile.getParentFile.getParentFile val resourceFile = subFile(projectDir, ("src" :: "main" :: "resources" :: resourcePath): _*) if (resourceFile.exists) Some(new java.io.FileInputStream(resourceFile)) else None } }
Example 40
Source File: package.scala From Principles-of-Reactive-Programming with GNU General Public License v3.0 | 5 votes |
import java.io.File package object common { def resourceAsStreamFromSrc(resourcePath: List[String]): Option[java.io.InputStream] = { val classesDir = new File(getClass.getResource(".").toURI) val projectDir = classesDir.getParentFile.getParentFile.getParentFile.getParentFile val resourceFile = subFile(projectDir, ("src" :: "main" :: "resources" :: resourcePath): _*) if (resourceFile.exists) Some(new java.io.FileInputStream(resourceFile)) else None } }
Example 41
Source File: package.scala From Principles-of-Reactive-Programming with GNU General Public License v3.0 | 5 votes |
import java.io.File package object common { def resourceAsStreamFromSrc(resourcePath: List[String]): Option[java.io.InputStream] = { val classesDir = new File(getClass.getResource(".").toURI) val projectDir = classesDir.getParentFile.getParentFile.getParentFile.getParentFile val resourceFile = subFile(projectDir, ("src" :: "main" :: "resources" :: resourcePath): _*) if (resourceFile.exists) Some(new java.io.FileInputStream(resourceFile)) else None } }
Example 42
Source File: package.scala From Principles-of-Reactive-Programming with GNU General Public License v3.0 | 5 votes |
import java.io.File package object common { def resourceAsStreamFromSrc(resourcePath: List[String]): Option[java.io.InputStream] = { val classesDir = new File(getClass.getResource(".").toURI) val projectDir = classesDir.getParentFile.getParentFile.getParentFile.getParentFile val resourceFile = subFile(projectDir, ("src" :: "main" :: "resources" :: resourcePath): _*) if (resourceFile.exists) Some(new java.io.FileInputStream(resourceFile)) else None } }
Example 43
Source File: package.scala From Principles-of-Reactive-Programming with GNU General Public License v3.0 | 5 votes |
import java.io.File package object common { def resourceAsStreamFromSrc(resourcePath: List[String]): Option[java.io.InputStream] = { val classesDir = new File(getClass.getResource(".").toURI) val projectDir = classesDir.getParentFile.getParentFile.getParentFile.getParentFile val resourceFile = subFile(projectDir, ("src" :: "main" :: "resources" :: resourcePath): _*) if (resourceFile.exists) Some(new java.io.FileInputStream(resourceFile)) else None } }
Example 44
Source File: package.scala From Principles-of-Reactive-Programming with GNU General Public License v3.0 | 5 votes |
import java.io.File package object common { def resourceAsStreamFromSrc(resourcePath: List[String]): Option[java.io.InputStream] = { val classesDir = new File(getClass.getResource(".").toURI) val projectDir = classesDir.getParentFile.getParentFile.getParentFile.getParentFile val resourceFile = subFile(projectDir, ("src" :: "main" :: "resources" :: resourcePath): _*) if (resourceFile.exists) Some(new java.io.FileInputStream(resourceFile)) else None } }
Example 45
Source File: SidechainSettingsReader.scala From Sidechains-SDK with MIT License | 5 votes |
package com.horizen import java.io.File import java.net.URL import java.util.{Optional => JOptional} import com.typesafe.config.{Config, ConfigFactory} import net.ceedubs.ficus.Ficus._ import net.ceedubs.ficus.readers.ArbitraryTypeReader._ import scorex.core.settings.{ScorexSettings, SettingsReaders} import scorex.util.ScorexLogging import scala.compat.java8.OptionConverters.toScala object SidechainSettingsReader extends ScorexLogging with SettingsReaders { protected val sidechainSettingsName = "sidechain-sdk-settings.conf" def fromConfig(config: Config): SidechainSettings = { val webSocketConnectorConfiguration = config.as[WebSocketSettings]("scorex.websocket") val scorexSettings = config.as[ScorexSettings]("scorex") val genesisSetting = config.as[GenesisDataSettings]("scorex.genesis") val backwardTransfer = config.as[withdrawalEpochCertificateSettings]("scorex.withdrawalEpochCertificate") val walletSetting = config.as[WalletSettings]("scorex.wallet") SidechainSettings(scorexSettings, genesisSetting, webSocketConnectorConfiguration, backwardTransfer, walletSetting) } def readConfigFromPath(userConfigPath: String, applicationConfigPath: Option[String]): Config = { val userConfigFile: File = new File(userConfigPath) val userConfig: Option[Config] = if (userConfigFile.exists()) { Some(ConfigFactory.parseFile(userConfigFile)) } else None val applicationConfigURL: Option[URL] = applicationConfigPath.map(filename => new File(filename)) .filter(_.exists()).map(_.toURI.toURL) .orElse(applicationConfigPath.map(r => getClass.getClassLoader.getResource(r))) val applicationConfig: Option[Config] = if (applicationConfigURL.isDefined) { Some(ConfigFactory.parseURL(applicationConfigURL.get)) } else None var config: Config = ConfigFactory.defaultOverrides() if (userConfig.isDefined) config = config.withFallback(userConfig.get) if (applicationConfig.isDefined) config = config.withFallback(applicationConfig.get) config = config .withFallback(ConfigFactory.parseResources(sidechainSettingsName)) .withFallback(ConfigFactory.defaultReference()) .resolve() config } def readConfigFromPath(userConfigPath: String, applicationConfigPath: JOptional[String]) : Config = readConfigFromPath(userConfigPath, toScala(applicationConfigPath)) def read(userConfigPath: String, applicationConfigPath: Option[String]) : SidechainSettings = fromConfig(readConfigFromPath(userConfigPath, applicationConfigPath)) }
Example 46
Source File: VersionedLevelDbStorageAdapter.scala From Sidechains-SDK with MIT License | 5 votes |
package com.horizen.storage.leveldb import java.io.File import java.util import java.util.{Optional, List => JList} import com.horizen.storage.Storage import com.horizen.storage.leveldb.LDBFactory.factory import com.horizen.utils.{Pair => JPair, _} import org.iq80.leveldb.Options import scala.collection.JavaConverters._ import scala.compat.java8.OptionConverters._ class VersionedLevelDbStorageAdapter(pathToDB: String, keepVersions: Int) extends Storage{ private val dataBase: VersionedLDBKVStore = createDb(pathToDB) override def get(key: ByteArrayWrapper): Optional[ByteArrayWrapper] = dataBase.get(key).map(byteArrayToWrapper).asJava override def getOrElse(key: ByteArrayWrapper, defaultValue: ByteArrayWrapper): ByteArrayWrapper = dataBase.getOrElse(key, defaultValue) override def get(keys: JList[ByteArrayWrapper]): JList[JPair[ByteArrayWrapper, Optional[ByteArrayWrapper]]] = { dataBase.get(keys.asScala.map(_.data)) .map{case (key, value) => new JPair(byteArrayToWrapper(key), value.map(v => byteArrayToWrapper(v)).asJava)} .asJava } override def getAll: JList[JPair[ByteArrayWrapper, ByteArrayWrapper]] = { dataBase.getAll .map{case (key, value) => new JPair(byteArrayToWrapper(key), byteArrayToWrapper(value))} .asJava } override def lastVersionID(): Optional[ByteArrayWrapper] = dataBase.versions.lastOption.map(byteArrayToWrapper).asJava override def update(version: ByteArrayWrapper, toUpdate: JList[JPair[ByteArrayWrapper, ByteArrayWrapper]], toRemove: util.List[ByteArrayWrapper]): Unit = { val toUpdateAsScala = toUpdate.asScala.toList val toRemoveAsScala = toRemove.asScala.toList //key for storing version shall not be used as key in any key-value pair in VersionedLDBKVStore require(!toUpdateAsScala.exists(pair => pair.getKey == version) && !toRemoveAsScala.contains(version)) val convertedToUpdate = toUpdateAsScala.map(pair => (pair.getKey.data, pair.getValue.data)) val convertedToRemove = toRemoveAsScala.map(_.data) dataBase.update(convertedToUpdate, convertedToRemove)(version) } override def rollback(versionID: ByteArrayWrapper): Unit = dataBase.rollbackTo(versionID) override def rollbackVersions(): JList[ByteArrayWrapper] = dataBase.versions.map(byteArrayToWrapper).asJava override def close(): Unit = dataBase.close() def createDb(path: String): VersionedLDBKVStore = { val dir = new File(path) dir.mkdirs() val options = new Options() options.createIfMissing(true) val db = factory.open(dir, options) new VersionedLDBKVStore(db, keepVersions) } override def isEmpty: Boolean = dataBase.versions.isEmpty }
Example 47
Source File: SigProofTest.scala From Sidechains-SDK with MIT License | 5 votes |
package com.horizen import java.io.{BufferedReader, File, FileReader} import java.util.Optional import java.{lang, util} import com.horizen.box.WithdrawalRequestBox import com.horizen.box.data.WithdrawalRequestBoxData import com.horizen.cryptolibprovider.{SchnorrFunctionsImplZendoo, ThresholdSignatureCircuitImplZendoo} import com.horizen.proposition.MCPublicKeyHashProposition import com.horizen.schnorrnative.SchnorrSecretKey import com.horizen.utils.BytesUtils import org.junit.Assert.{assertEquals, assertTrue} import org.junit.{Ignore, Test} import scala.collection.JavaConverters._ import scala.util.Random class SigProofTest { private val classLoader: ClassLoader = getClass.getClassLoader private val sigCircuit: ThresholdSignatureCircuitImplZendoo = new ThresholdSignatureCircuitImplZendoo() private val schnorrFunctions: SchnorrFunctionsImplZendoo = new SchnorrFunctionsImplZendoo() private def buildSchnorrPrivateKey(index: Int): SchnorrSecretKey = { var bytes: Array[Byte] = null try { val resourceName = "schnorr_sk0"+ index + "_hex" val file = new FileReader(classLoader.getResource(resourceName).getFile) bytes = BytesUtils.fromHexString(new BufferedReader(file).readLine()) } catch { case e: Exception => assertEquals(e.toString(), true, false) } SchnorrSecretKey.deserialize(bytes) } //Test will take around 2 minutes, enable for sanity checking of ThresholdSignatureCircuit @Ignore @Test def simpleCheck(): Unit = { val keyPairsLen = 7 val threshold = 5 //hardcoded value val keyPairs = (0 until keyPairsLen).view.map(buildSchnorrPrivateKey).map(secret => (secret, secret.getPublicKey)) val publicKeysBytes: util.List[Array[Byte]] = keyPairs.map(_._2.serializePublicKey()).toList.asJava val provingKeyPath = new File(classLoader.getResource("sample_proving_key_7_keys_with_threshold_5").getFile).getAbsolutePath; val verificationKeyPath = new File(classLoader.getResource("sample_vk_7_keys_with_threshold_5").getFile).getAbsolutePath; val sysConstant = sigCircuit.generateSysDataConstant(publicKeysBytes, threshold) val mcBlockHash = Array.fill(32)(Random.nextInt().toByte) val previousMcBlockHash = Array.fill(32)(Random.nextInt().toByte) val wb: util.List[WithdrawalRequestBox] = Seq(new WithdrawalRequestBox(new WithdrawalRequestBoxData(new MCPublicKeyHashProposition(Array.fill(20)(Random.nextInt().toByte)), 2345), 42)).asJava val messageToBeSigned = sigCircuit.generateMessageToBeSigned(wb, mcBlockHash, previousMcBlockHash) val emptySigs = List.fill[Optional[Array[Byte]]](keyPairsLen - threshold)(Optional.empty[Array[Byte]]()) val signatures: util.List[Optional[Array[Byte]]] = (keyPairs .map{case (secret, public) => schnorrFunctions.sign(secret.serializeSecretKey(), public.serializePublicKey(), messageToBeSigned)} .map(b => Optional.of(b)) .take(threshold) .toList ++ emptySigs) .asJava val proofAndQuality: utils.Pair[Array[Byte], lang.Long] = sigCircuit.createProof(wb, mcBlockHash, previousMcBlockHash, publicKeysBytes, signatures, threshold, provingKeyPath) val result = sigCircuit.verifyProof(wb, mcBlockHash, previousMcBlockHash, proofAndQuality.getValue, proofAndQuality.getKey, sysConstant, verificationKeyPath) assertTrue("Proof verification expected to be successfully", result) } }
Example 48
Source File: AccStorage.scala From EncryCore with GNU General Public License v3.0 | 5 votes |
package encry.api.http import java.io.File import cats.syntax.either._ import com.typesafe.scalalogging.StrictLogging import encry.settings.EncryAppSettings import encry.storage.VersionalStorage.StorageKey import encry.storage.levelDb.versionalLevelDB.LevelDbFactory import org.encryfoundation.common.utils.Algos import org.iq80.leveldb.{DB, Options} import scorex.utils.Random import supertagged.TaggedType trait AccStorage extends StrictLogging with AutoCloseable { val storage: DB val verifyPassword: String => Boolean = pass => { val salt = storage.get(AccStorage.SaltKey) val passHash = storage.get(AccStorage.PasswordHashKey) Algos.hash(pass.getBytes() ++ salt) sameElements passHash } def setPassword(pass: String): Either[Throwable, Unit] = { val batch = storage.createWriteBatch() val salt = Random.randomBytes() try { batch.put(AccStorage.PasswordHashKey, Algos.hash(pass.getBytes() ++ salt)) batch.put(AccStorage.SaltKey, salt) storage.write(batch).asRight[Throwable] } catch { case err: Throwable => err.asLeft[Unit] } finally { batch.close() } } override def close(): Unit = storage.close() } object AccStorage extends StrictLogging { object PasswordHash extends TaggedType[Array[Byte]] object PasswordSalt extends TaggedType[Array[Byte]] type PasswordHash = PasswordHash.Type type PasswordSalt = PasswordSalt.Type val PasswordHashKey: StorageKey = StorageKey @@ Algos.hash("Password_Key") val SaltKey: StorageKey = StorageKey @@ Algos.hash("Salt_Key") def getDirStorage(settings: EncryAppSettings): File = new File(s"${settings.directory}/userKeys") def init(settings: EncryAppSettings): AccStorage = new AccStorage { override val storage: DB = LevelDbFactory.factory.open(getDirStorage(settings), new Options) } }
Example 49
Source File: SettingsReaders.scala From EncryCore with GNU General Public License v3.0 | 5 votes |
package encry.settings import java.io.File import java.net.InetSocketAddress import com.typesafe.config.Config import encry.storage.VersionalStorage import encry.storage.VersionalStorage.StorageType import encry.utils.ByteStr import net.ceedubs.ficus.readers.ValueReader import org.encryfoundation.common.utils.constants.{Constants, TestNetConstants} trait SettingsReaders { implicit val byteStrReader: ValueReader[ByteStr] = (cfg, path) => ByteStr.decodeBase58(cfg.getString(path)).get implicit val storageTypeReader: ValueReader[StorageType] = (cfg, path) => cfg.getString(path) match { case "iodb" => VersionalStorage.IODB case "LevelDb" => VersionalStorage.LevelDB } implicit val fileReader: ValueReader[File] = (cfg, path) => new File(cfg.getString(path)) implicit val byteValueReader: ValueReader[Byte] = (cfg, path) => cfg.getInt(path).toByte implicit val inetSocketAddressReader: ValueReader[InetSocketAddress] = { (config: Config, path: String) => val split = config.getString(path).split(":") new InetSocketAddress(split(0), split(1).toInt) } implicit val ConstantsSettingsReader: ValueReader[Constants] = (cfg, path) => { def getConstants(constantsClass: String): Constants = { constantsClass match { case "TestConstants" => TestConstants case "SlowMiningConstants" => SlowMiningConstants case _ => TestNetConstants } } getConstants( if (cfg.hasPath(path)) cfg.getString(path) else "" ) } }
Example 50
Source File: RootNodesStorageTest.scala From EncryCore with GNU General Public License v3.0 | 5 votes |
package encry.storage import java.io.File import encry.view.state.avlTree.utils.implicits.Instances._ import encry.modifiers.InstanceFactory import encry.storage.VersionalStorage.{StorageKey, StorageValue, StorageVersion} import encry.storage.levelDb.versionalLevelDB.{LevelDbFactory, VLDBWrapper, VersionalLevelDBCompanion} import encry.utils.{EncryGenerator, FileHelper} import encry.view.state.avlTree.AvlTree import org.encryfoundation.common.utils.Algos import org.encryfoundation.common.utils.TaggedTypes.Height import org.iq80.leveldb.{DB, Options, ReadOptions} import org.scalatest.{FunSuite, Matchers, PropSpec} import scorex.utils.Random import scala.util.{Random => SRandom} class RootNodesStorageTest extends PropSpec with InstanceFactory with EncryGenerator with Matchers { def createAvl: AvlTree[StorageKey, StorageValue] = { val firstDir: File = FileHelper.getRandomTempDir val firstStorage: VLDBWrapper = { val levelDBInit = LevelDbFactory.factory.open(firstDir, new Options) VLDBWrapper(VersionalLevelDBCompanion(levelDBInit, settings.levelDB.copy(keySize = 33), keySize = 33)) } val dir: File = FileHelper.getRandomTempDir val levelDb: DB = LevelDbFactory.factory.open(dir, new Options) AvlTree[StorageKey, StorageValue](firstStorage, RootNodesStorage.emptyRootStorage[StorageKey, StorageValue]) } property("testRollback") { val avl: AvlTree[StorageKey, StorageValue] = createAvl val dir: File = FileHelper.getRandomTempDir val levelDb: DB = LevelDbFactory.factory.open(dir, new Options) val batch1 = levelDb.createWriteBatch() val readOptions1 = new ReadOptions() val rootNodesStorage = RootNodesStorage[StorageKey, StorageValue](levelDb, 10, dir) val (_, avlAfterInsertions, insertList) = (0 to SRandom.nextInt(1000) + 10).foldLeft(rootNodesStorage, avl, List.empty[(Height, (List[(StorageKey, StorageValue)], List[StorageKey]))]) { case ((rootStorage, previousAvl, insertionList), height) => val version = StorageVersion @@ Random.randomBytes() val toInsert = (0 to SRandom.nextInt(100)).foldLeft(List.empty[(StorageKey, StorageValue)]) { case (list, _) => (StorageKey @@ Random.randomBytes() -> StorageValue @@ Random.randomBytes()) :: list } val previousInsertions = insertionList.lastOption.map(_._2._1).getOrElse(List.empty[(StorageKey, StorageValue)]) val deletions = previousInsertions.take(1).map(_._1) val newAvl = previousAvl.insertAndDeleteMany( version, toInsert, deletions ) val newRootStorage = rootStorage.insert( version, newAvl.rootNode, Height @@ height ) (newRootStorage, newAvl, insertionList :+ (Height @@ height -> (toInsert -> deletions))) } val (_, rootNodeRestored) = rootNodesStorage.rollbackToSafePoint(insertList.dropWhile(_._1 != rootNodesStorage.safePointHeight).drop(1)) (avlAfterInsertions.rootNode.hash sameElements rootNodeRestored.hash) shouldBe true } }
Example 51
Source File: SnapshotAssemblerBench.scala From EncryCore with GNU General Public License v3.0 | 5 votes |
package benches import java.io.File import java.util.concurrent.TimeUnit import benches.SnapshotAssemblerBench.SnapshotAssemblerBenchState import encry.view.state.avlTree.utils.implicits.Instances._ import benches.StateBenches.{StateBenchState, benchSettings} import benches.Utils.{getRandomTempDir, utxoFromBoxHolder} import encry.settings.Settings import encry.storage.{RootNodesStorage, VersionalStorage} import encry.storage.VersionalStorage.{StorageKey, StorageValue, StorageVersion} import encry.storage.levelDb.versionalLevelDB.{LevelDbFactory, VLDBWrapper, VersionalLevelDBCompanion} import encry.utils.FileHelper import encry.view.fast.sync.SnapshotHolder import encry.view.state.UtxoState import encry.view.state.avlTree.AvlTree import org.encryfoundation.common.utils.TaggedTypes.Height import org.iq80.leveldb.{DB, Options} import org.openjdk.jmh.annotations.{Benchmark, Mode, Scope, State} import org.openjdk.jmh.infra.Blackhole import org.openjdk.jmh.profile.GCProfiler import org.openjdk.jmh.runner.{Runner, RunnerException} import org.openjdk.jmh.runner.options.{OptionsBuilder, TimeValue, VerboseMode} import scorex.utils.Random class SnapshotAssemblerBench { @Benchmark def createTree(stateBench: SnapshotAssemblerBenchState, bh: Blackhole): Unit = { bh.consume { //stateBench.a.initializeSnapshotData(stateBench.block1) } } } object SnapshotAssemblerBench { @throws[RunnerException] def main(args: Array[String]): Unit = { val opt = new OptionsBuilder() .include(".*" + classOf[SnapshotAssemblerBench].getSimpleName + ".*") .forks(1) .threads(1) .warmupIterations(benchSettings.benchesSettings.warmUpIterations) .measurementIterations(benchSettings.benchesSettings.measurementIterations) .mode(Mode.AverageTime) .timeUnit(TimeUnit.SECONDS) .verbosity(VerboseMode.EXTRA) .addProfiler(classOf[GCProfiler]) .warmupTime(TimeValue.milliseconds(benchSettings.benchesSettings.warmUpTime)) .measurementTime(TimeValue.milliseconds(benchSettings.benchesSettings.measurementTime)) .build new Runner(opt).run } @State(Scope.Benchmark) class SnapshotAssemblerBenchState extends Settings { val a: AvlTree[StorageKey, StorageValue] = createAvl("9gKDVmfsA6J4b78jDBx6JmS86Zph98NnjnUqTJBkW7zitQMReia", 0, 500000) val block1 = Utils.generateGenesisBlock(Height @@ 1) def createAvl(address: String, from: Int, to: Int): AvlTree[StorageKey, StorageValue] = { val firstDir: File = FileHelper.getRandomTempDir val firstStorage: VLDBWrapper = { val levelDBInit = LevelDbFactory.factory.open(firstDir, new Options) VLDBWrapper(VersionalLevelDBCompanion(levelDBInit, settings.levelDB, keySize = 32)) } val dir: File = FileHelper.getRandomTempDir val levelDb: DB = LevelDbFactory.factory.open(dir, new Options) val rootNodesStorage = RootNodesStorage[StorageKey, StorageValue](levelDb, 10, dir) val firstAvl: AvlTree[StorageKey, StorageValue] = AvlTree[StorageKey, StorageValue](firstStorage, rootNodesStorage) val avlNew = (from to to).foldLeft(firstAvl) { case (avl, i) => val bx = Utils.genAssetBox(address, i, nonce = i) val b = (StorageKey !@@ bx.id, StorageValue @@ bx.bytes) avl.insertAndDeleteMany(StorageVersion @@ Random.randomBytes(), List(b), List.empty) } avlNew } def tmpDir: File = FileHelper.getRandomTempDir } }
Example 52
Source File: HistoryBenches.scala From EncryCore with GNU General Public License v3.0 | 5 votes |
package benches import java.io.File import java.util.concurrent.TimeUnit import benches.HistoryBenches.HistoryBenchState import benches.Utils._ import encry.view.history.History import encryBenchmark.BenchSettings import org.encryfoundation.common.modifiers.history.Block import org.openjdk.jmh.annotations._ import org.openjdk.jmh.infra.Blackhole import org.openjdk.jmh.profile.GCProfiler import org.openjdk.jmh.runner.{Runner, RunnerException} import org.openjdk.jmh.runner.options.{OptionsBuilder, TimeValue, VerboseMode} class HistoryBenches { @Benchmark def appendBlocksToHistoryBench(benchStateHistory: HistoryBenchState, bh: Blackhole): Unit = { bh.consume { val history: History = generateHistory(benchStateHistory.settings, getRandomTempDir) benchStateHistory.blocks.foldLeft(history) { case (historyL, block) => historyL.append(block.header) historyL.append(block.payload) historyL.reportModifierIsValid(block) } history.closeStorage() } } @Benchmark def readHistoryFileBench(benchStateHistory: HistoryBenchState, bh: Blackhole): Unit = { bh.consume { val history: History = generateHistory(benchStateHistory.settings, benchStateHistory.tmpDir) history.closeStorage() } } } object HistoryBenches extends BenchSettings { @throws[RunnerException] def main(args: Array[String]): Unit = { val opt = new OptionsBuilder() .include(".*" + classOf[HistoryBenches].getSimpleName + ".*") .forks(1) .threads(1) .warmupIterations(benchSettings.benchesSettings.warmUpIterations) .measurementIterations(benchSettings.benchesSettings.measurementIterations) .mode(Mode.AverageTime) .timeUnit(TimeUnit.SECONDS) .verbosity(VerboseMode.EXTRA) .addProfiler(classOf[GCProfiler]) .warmupTime(TimeValue.milliseconds(benchSettings.benchesSettings.warmUpTime)) .measurementTime(TimeValue.milliseconds(benchSettings.benchesSettings.measurementTime)) .build new Runner(opt).run } @State(Scope.Benchmark) class HistoryBenchState extends encry.settings.Settings { val tmpDir: File = getRandomTempDir val initialHistory: History = generateHistory(settings, tmpDir) val resultedHistory: (History, Option[Block], Vector[Block]) = (0 until benchSettings.historyBenchSettings.blocksNumber) .foldLeft(initialHistory, Option.empty[Block], Vector.empty[Block]) { case ((prevHistory, prevBlock, vector), _) => val block: Block = generateNextBlockValidForHistory(prevHistory, 0, prevBlock, Seq(coinbaseTransaction(0))) prevHistory.append(block.header) prevHistory.append(block.payload) (prevHistory.reportModifierIsValid(block), Some(block), vector :+ block) } resultedHistory._1.closeStorage() val blocks: Vector[Block] = resultedHistory._3 } }
Example 53
Source File: StateRollbackBench.scala From EncryCore with GNU General Public License v3.0 | 5 votes |
package benches import java.io.File import java.util.concurrent.TimeUnit import benches.StateRollbackBench.StateRollbackState import benches.Utils._ import encry.storage.VersionalStorage import encry.utils.CoreTaggedTypes.VersionTag import encry.view.state.{BoxHolder, UtxoState} import encryBenchmark.{BenchSettings, Settings} import org.encryfoundation.common.modifiers.history.Block import org.encryfoundation.common.modifiers.state.box.AssetBox import org.encryfoundation.common.utils.TaggedTypes.{ADKey, Difficulty} import org.openjdk.jmh.annotations.{Benchmark, Mode, Scope, State} import org.openjdk.jmh.infra.Blackhole import org.openjdk.jmh.profile.GCProfiler import org.openjdk.jmh.runner.{Runner, RunnerException} import org.openjdk.jmh.runner.options.{OptionsBuilder, TimeValue, VerboseMode} class StateRollbackBench { @Benchmark def applyBlocksToTheState(stateBench: StateRollbackState, bh: Blackhole): Unit = { bh.consume { val innerState: UtxoState = utxoFromBoxHolder(stateBench.boxesHolder, getRandomTempDir, None, stateBench.settings, VersionalStorage.IODB) val newState = stateBench.chain.foldLeft(innerState -> List.empty[VersionTag]) { case ((state, rootHashes), block) => val newState = state.applyModifier(block).right.get newState -> (rootHashes :+ newState.version) } val stateAfterRollback = newState._1.rollbackTo(newState._2.dropRight(1).last, List.empty).get val stateAfterForkBlockApplying = stateAfterRollback.applyModifier(stateBench.forkBlocks.last).right.get stateAfterForkBlockApplying.close() } } } object StateRollbackBench extends BenchSettings { @throws[RunnerException] def main(args: Array[String]): Unit = { val opt = new OptionsBuilder() .include(".*" + classOf[StateRollbackBench].getSimpleName + ".*") .forks(1) .threads(1) .warmupIterations(benchSettings.benchesSettings.warmUpIterations) .measurementIterations(benchSettings.benchesSettings.measurementIterations) .mode(Mode.AverageTime) .timeUnit(TimeUnit.SECONDS) .verbosity(VerboseMode.EXTRA) .addProfiler(classOf[GCProfiler]) .warmupTime(TimeValue.milliseconds(benchSettings.benchesSettings.warmUpTime)) .measurementTime(TimeValue.milliseconds(benchSettings.benchesSettings.measurementTime)) .build new Runner(opt).run } @State(Scope.Benchmark) class StateRollbackState extends encry.settings.Settings { val tmpDir: File = getRandomTempDir val initialBoxes: IndexedSeq[AssetBox] = (0 until benchSettings.stateBenchSettings.totalBoxesNumber).map(nonce => genHardcodedBox(privKey.publicImage.address.address, nonce) ) val boxesHolder: BoxHolder = BoxHolder(initialBoxes) var state: UtxoState = utxoFromBoxHolder(boxesHolder, tmpDir, None, settings, VersionalStorage.LevelDB) val genesisBlock: Block = generateGenesisBlockValidForState(state) state = state.applyModifier(genesisBlock).right.get val stateGenerationResults: (List[(Block, Block)], Block, UtxoState, IndexedSeq[AssetBox]) = (0 until benchSettings.stateBenchSettings.blocksNumber).foldLeft(List.empty[(Block, Block)], genesisBlock, state, initialBoxes) { case ((blocks, block, stateL, boxes), _) => val nextBlockMainChain: Block = generateNextBlockForStateWithSpendingAllPreviousBoxes( block, stateL, block.payload.txs.flatMap(_.newBoxes.map(_.asInstanceOf[AssetBox])).toIndexedSeq) val nextBlockFork: Block = generateNextBlockForStateWithSpendingAllPreviousBoxes( block, stateL, block.payload.txs.flatMap(_.newBoxes.map(_.asInstanceOf[AssetBox])).toIndexedSeq, addDiff = Difficulty @@ BigInt(100) ) val stateN: UtxoState = stateL.applyModifier(nextBlockMainChain).right.get (blocks :+ (nextBlockMainChain, nextBlockFork), nextBlockMainChain, stateN, boxes.drop( benchSettings.stateBenchSettings.transactionsNumberInEachBlock * benchSettings.stateBenchSettings.numberOfInputsInOneTransaction) ) } val chain: List[Block] = genesisBlock +: stateGenerationResults._1.map(_._1) val forkBlocks: List[Block] = genesisBlock +: stateGenerationResults._1.map(_._2) state = stateGenerationResults._3 state.close() } }
Example 54
Source File: SparkConfig.scala From gsoc_relationship with Apache License 2.0 | 5 votes |
package com.holmesprocessing.analytics.relationship import java.io.File import org.apache.spark.{SparkConf, SparkContext} import com.typesafe.config.ConfigFactory object SparkConfig { val config = ConfigFactory.parseFile(new File("./config/relationship.conf")) val hosts = "hosts" val username = "username" val password = "password" val keyspace = "keyspace" val analytics_knowledge_base = "analytics_knowledge_base" val analytics_mv_knowledge_base_by_feature = "analytics_mv_knowledge_base_by_feature" val analytics_primary_relationships = "analytics_primary_relationships" val results = "results" val results_meta = "results_meta" val results_data = "results_data" val objects_table = "objects_table" val appName = "relationship" val master = "localhost" val sparkconf = new SparkConf(true) .set("spark.cassandra.connection.host", hosts) .set("spark.cassandra.auth.username", username) .set("spark.cassandra.auth.password", password) val sc = new SparkContext(master, appName, sparkconf) }
Example 55
Source File: GoogleAuthentication.scala From amadou with Apache License 2.0 | 5 votes |
package com.mediative.amadou.bigquery import java.io.{File, FileReader} import scala.collection.JavaConversions._ import com.google.api.client.extensions.java6.auth.oauth2.AuthorizationCodeInstalledApp import com.google.api.client.extensions.jetty.auth.oauth2.LocalServerReceiver import com.google.api.client.googleapis.auth.oauth2.{ GoogleAuthorizationCodeFlow, GoogleClientSecrets } import com.google.api.client.http.{HttpRequest, HttpRequestInitializer} import com.google.api.client.http.javanet.NetHttpTransport import com.google.api.client.json.jackson2.JacksonFactory import com.google.api.client.util.store.FileDataStoreFactory import org.apache.spark.sql.SparkSession sealed abstract class GoogleAuthentication(val scopes: String*) object GoogleAuthentication { lazy val HTTP_TRANSPORT = new NetHttpTransport() lazy val JSON_FACTORY = new JacksonFactory() case object Dbm extends GoogleAuthentication("https://www.googleapis.com/auth/doubleclickbidmanager") def apply(auth: GoogleAuthentication, spark: SparkSession): HttpRequestInitializer = auth match { case Dbm => val clientFilePath = spark.conf.get("spark.google.cloud.auth.client.file") require(clientFilePath != null, "'google.cloud.auth.client.file' not configured") val clientFile = new File(clientFilePath) require(clientFile.exists, s"$clientFilePath does not exists") val clientSecrets = GoogleClientSecrets.load(JSON_FACTORY, new FileReader(clientFile)) val dataStoreFactory = new FileDataStoreFactory(clientFile.getParentFile) val flow = new GoogleAuthorizationCodeFlow.Builder( HTTP_TRANSPORT, JSON_FACTORY, clientSecrets, auth.scopes) .setDataStoreFactory(dataStoreFactory) .build() val cred = new AuthorizationCodeInstalledApp(flow, new LocalServerReceiver()) .authorize("user") new CustomHttpRequestInitializer(cred) } class CustomHttpRequestInitializer(wrapped: HttpRequestInitializer) extends HttpRequestInitializer { override def initialize(httpRequest: HttpRequest) = { wrapped.initialize(httpRequest) httpRequest.setConnectTimeout(10 * 60000) // 10 minutes connect timeout httpRequest.setReadTimeout(10 * 60000) // 10 minutes read timeout () } } }
Example 56
Source File: config.scala From spark-integration with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.k8s.integrationtest import java.io.File import com.google.common.base.Charsets import com.google.common.io.Files package object config { def getTestImageTag: String = { val imageTagFileProp = System.getProperty("spark.kubernetes.test.imageTagFile") require(imageTagFileProp != null, "Image tag file must be provided in system properties.") val imageTagFile = new File(imageTagFileProp) require(imageTagFile.isFile, s"No file found for image tag at ${imageTagFile.getAbsolutePath}.") Files.toString(imageTagFile, Charsets.UTF_8).trim } def getTestImageRepo: String = { val imageRepo = System.getProperty("spark.kubernetes.test.imageRepo") require(imageRepo != null, "Image repo must be provided in system properties.") imageRepo } }
Example 57
Source File: Minikube.scala From spark-integration with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.k8s.integrationtest.backend.minikube import java.io.File import java.nio.file.Paths import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} import org.apache.spark.deploy.k8s.integrationtest.{Logging, ProcessUtils} // TODO support windows private[spark] object Minikube extends Logging { private val MINIKUBE_STARTUP_TIMEOUT_SECONDS = 60 def getMinikubeIp: String = { val outputs = executeMinikube("ip") .filter(_.matches("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$")) assert(outputs.size == 1, "Unexpected amount of output from minikube ip") outputs.head } def getMinikubeStatus: MinikubeStatus.Value = { val statusString = executeMinikube("status") .filter(line => line.contains("minikubeVM: ") || line.contains("minikube:")) .head .replaceFirst("minikubeVM: ", "") .replaceFirst("minikube: ", "") MinikubeStatus.unapply(statusString) .getOrElse(throw new IllegalStateException(s"Unknown status $statusString")) } def getKubernetesClient: DefaultKubernetesClient = { val kubernetesMaster = s"https://${getMinikubeIp}:8443" val userHome = System.getProperty("user.home") val kubernetesConf = new ConfigBuilder() .withApiVersion("v1") .withMasterUrl(kubernetesMaster) .withCaCertFile(Paths.get(userHome, ".minikube", "ca.crt").toFile.getAbsolutePath) .withClientCertFile(Paths.get(userHome, ".minikube", "apiserver.crt").toFile.getAbsolutePath) .withClientKeyFile(Paths.get(userHome, ".minikube", "apiserver.key").toFile.getAbsolutePath) .build() new DefaultKubernetesClient(kubernetesConf) } private def executeMinikube(action: String, args: String*): Seq[String] = { ProcessUtils.executeProcess( Array("bash", "-c", s"minikube $action") ++ args, MINIKUBE_STARTUP_TIMEOUT_SECONDS) } } private[spark] object MinikubeStatus extends Enumeration { // The following states are listed according to // https://github.com/docker/machine/blob/master/libmachine/state/state.go. val STARTING = status("Starting") val RUNNING = status("Running") val PAUSED = status("Paused") val STOPPING = status("Stopping") val STOPPED = status("Stopped") val ERROR = status("Error") val TIMEOUT = status("Timeout") val SAVED = status("Saved") val NONE = status("") def status(value: String): Value = new Val(nextId, value) def unapply(s: String): Option[Value] = values.find(s == _.toString) }
Example 58
Source File: RMCallbackHandler.scala From DataXServer with Apache License 2.0 | 5 votes |
package org.tianlangstudio.data.hamal.yarn import java.io.File import java.util.{Collections, List} import org.tianlangstudio.data.hamal.core.{Constants, HamalConf} import org.tianlangstudio.data.hamal.core.HamalConf //import java.util.Collections import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path, FileContext} import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.client.api.{AMRMClient, NMClient} import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.util.{ConverterUtils, Records} import scala.jdk.CollectionConverters._ //import scala.collection.JavaConverters._ /** * Created by zhuhq on 2016/4/29. */ class RMCallbackHandler(nmClient:NMClient,containerCmd:Container => String,hamalConf: HamalConf,yarnConfiguration: Configuration) extends AMRMClientAsync.CallbackHandler { private val logging = org.slf4j.LoggerFactory.getLogger(classOf[RMCallbackHandler]) override def onContainersCompleted(statuses: List[ContainerStatus]): Unit = { for(containerStatus <- statuses.asScala) { logging.info(s"containerId:${containerStatus} exitStatus:${containerStatus}") } } override def onError(e: Throwable): Unit = { logging.error("on error",e) } override def getProgress: Float = { 0 } override def onShutdownRequest(): Unit = { logging.info("on shutdown request") } override def onNodesUpdated(updatedNodes: List[NodeReport]): Unit = { logging.info("on nodes updated") for(nodeReport <- updatedNodes.asScala) { logging.info(s"node id:${nodeReport} node labels:${nodeReport}"); } } override def onContainersAllocated(containers: List[Container]): Unit = { logging.info("on containers allocated"); for (container:Container <- containers.asScala) { try { // Launch container by create ContainerLaunchContext val ctx = Records.newRecord(classOf[ContainerLaunchContext]); //ctx.setCommands(Collections.singletonList(""" echo "begin";sleep 900;echo "end"; """)) ctx.setCommands(Collections.singletonList(containerCmd(container))) val packagePath = hamalConf.getString(Constants.DATAX_EXECUTOR_FILE,"executor.zip"); val archiveStat = FileSystem.get(yarnConfiguration).getFileStatus(new Path(packagePath)) val packageUrl = ConverterUtils.getYarnUrlFromPath( FileContext.getFileContext.makeQualified(new Path(packagePath))); val packageResource = Records.newRecord[LocalResource](classOf[LocalResource]) packageResource.setResource(packageUrl); packageResource.setSize(archiveStat.getLen); packageResource.setTimestamp(archiveStat.getModificationTime); packageResource.setType(LocalResourceType.ARCHIVE); packageResource.setVisibility(LocalResourceVisibility.APPLICATION) ctx.setLocalResources(Collections.singletonMap(Constants.DATAX_EXECUTOR_ARCHIVE_FILE_NAME,packageResource)) logging.info("[AM] Launching container " + container.getId()); nmClient.startContainer(container, ctx); } catch { case ex:Exception => logging.info("[AM] Error launching container " + container.getId() + " " + ex); } } } }
Example 59
Source File: FileUtil.scala From wookiee with Apache License 2.0 | 5 votes |
package com.webtrends.harness.utils import java.io.File import java.nio.file.{FileSystems, Files, Path} import scala.io.Source def getSymLink(f:File) : File = { if (f == null) throw new NullPointerException("File must not be null") val path = FileSystems.getDefault.getPath(f.getPath) if (Files.isSymbolicLink(path)) { f.getCanonicalFile } else { f.getAbsoluteFile } } }
Example 60
Source File: ConfigSpec.scala From wookiee with Apache License 2.0 | 5 votes |
package com.webtrends.harness import java.io.{BufferedWriter, File, FileWriter} import java.util.concurrent.TimeUnit import akka.actor.{Actor, ActorSystem, Props} import akka.testkit.TestProbe import com.typesafe.config.ConfigFactory import com.webtrends.harness.app.HarnessActor.ConfigChange import com.webtrends.harness.config.ConfigWatcherActor import com.webtrends.harness.health.{ComponentState, HealthComponent} import com.webtrends.harness.service.messages.CheckHealth import org.specs2.mutable.SpecificationWithJUnit import scala.concurrent.ExecutionContextExecutor import scala.concurrent.duration.FiniteDuration import scala.reflect.io.{Directory, Path} class ConfigSpec extends SpecificationWithJUnit { implicit val dur = FiniteDuration(2, TimeUnit.SECONDS) new File("services/test/conf").mkdirs() implicit val sys = ActorSystem("system", ConfigFactory.parseString( """ akka.actor.provider = "akka.actor.LocalActorRefProvider" services { path = "services" } """).withFallback(ConfigFactory.load)) implicit val ec: ExecutionContextExecutor = sys.dispatcher val probe = TestProbe() val parent = sys.actorOf(Props(new Actor { val child = context.actorOf(ConfigWatcherActor.props, "child") def receive = { case x if sender == child => probe.ref forward x case x => child forward x } })) sequential "config " should { "be in good health" in { probe.send(parent, CheckHealth) val msg = probe.expectMsgClass(classOf[HealthComponent]) msg.state equals ComponentState.NORMAL } "detect changes in config" in { val file = new File("services/test/conf/test.conf") val bw = new BufferedWriter(new FileWriter(file)) bw.write("test = \"value\"") bw.close() val msg = probe.expectMsgClass(classOf[ConfigChange]) msg.isInstanceOf[ConfigChange] } } step { sys.terminate().onComplete { _ => Directory(Path(new File("services"))).deleteRecursively() } } }
Example 61
Source File: SparkFunSuite.scala From tispark with Apache License 2.0 | 5 votes |
package org.apache.spark import java.io.File import org.apache.spark.internal.Logging import org.scalatest._ import org.slf4j.Logger abstract class SparkFunSuite extends FunSuite with Logging { protected val logger: Logger = log final protected override def withFixture(test: NoArgTest): Outcome = { val testName = test.text val suiteName = this.getClass.getName val shortSuiteName = suiteName.replaceAll("org.apache.spark", "o.a.s") try { logInfo(s"\n\n===== TEST OUTPUT FOR $shortSuiteName: '$testName' =====\n") test() } finally { logInfo(s"\n\n===== FINISHED $shortSuiteName: '$testName' =====\n") } } protected final def getTestResourcePath(file: String): String = getTestResourceFile(file).getCanonicalPath // helper function protected final def getTestResourceFile(file: String): File = new File(getClass.getClassLoader.getResource(file).getFile) }
Example 62
Source File: TPCDSQuerySuite.scala From tispark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.benchmark import java.io.File import org.apache.spark.sql.BaseTiSparkTest import org.apache.spark.sql.catalyst.util.resourceToString import scala.collection.mutable class TPCDSQuerySuite extends BaseTiSparkTest { private val tpcdsDirectory = getClass.getResource("/tpcds-sql").getPath private val tpcdsQueries = getListOfFiles(tpcdsDirectory) private def getListOfFiles(dir: String): List[String] = { val d = new File(dir) if (d.exists && d.isDirectory) { d.listFiles.filter(_.isFile).map(_.getName.stripSuffix(".sql")).toList } else { List[String]() } } private def run(queries: List[String], numRows: Int = 1, timeout: Int = 0): Unit = try { // set broadcast threshold to -1 so it will not oom spark.conf.set("spark.sql.autoBroadcastJoinThreshold", -1) setCurrentDatabase(tpcdsDBName) val succeeded = mutable.ArrayBuffer.empty[String] queries.foreach { q => println(s"Query: $q") val start = System.currentTimeMillis() // We do not use statistic information here due to conflict of netty versions when physical plan has broadcast nodes. val queryString = resourceToString( s"tpcds-sql/$q.sql", classLoader = Thread.currentThread().getContextClassLoader) val df = spark.sql(queryString) var failed = false val jobGroup = s"benchmark $q" val t = new Thread("query runner") { override def run(): Unit = try { sqlContext.sparkContext.setJobGroup(jobGroup, jobGroup, interruptOnCancel = true) df.show(numRows) } catch { case e: Exception => println("Failed to run: " + e) failed = true } } t.setDaemon(true) t.start() t.join(timeout) if (t.isAlive) { println(s"Timeout after $timeout seconds") sqlContext.sparkContext.cancelJobGroup(jobGroup) t.interrupt() } else { if (!failed) { succeeded += q println(s" Took: ${System.currentTimeMillis() - start} ms") println("------------------------------------------------------------------") } } queryViaTiSpark(queryString) println(s"TiSpark finished $q") } } catch { case e: Throwable => println(s"TiSpark failed to run TPCDS") fail(e) } test("TPCDS Test") { if (runTPCDS) { run(tpcdsQueries) } } }
Example 63
Source File: Utils.scala From tispark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.test import java.io.{File, PrintWriter} import java.nio.file.{Files, Paths} import java.util.Properties import org.slf4j.Logger import scala.collection.JavaConversions._ object Utils { def writeFile(content: String, path: String): Unit = TryResource(new PrintWriter(path))(_.close()) { _.print(content) } def TryResource[T](res: T)(closeOp: T => Unit)(taskOp: T => Unit): Unit = try { taskOp(res) } finally { closeOp(res) } def readFile(path: String): List[String] = Files.readAllLines(Paths.get(path)).toList def getOrThrow(prop: Properties, key: String): String = { val jvmProp = System.getProperty(key) if (jvmProp != null) { jvmProp } else { val v = prop.getProperty(key) if (v == null) { throw new IllegalArgumentException(key + " is null") } else { v } } } def getFlagOrFalse(prop: Properties, key: String): Boolean = getFlag(prop, key, "false") private def getFlag(prop: Properties, key: String, defValue: String): Boolean = getOrElse(prop, key, defValue).equalsIgnoreCase("true") def getOrElse(prop: Properties, key: String, defValue: String): String = { val jvmProp = System.getProperty(key) if (jvmProp != null) { jvmProp } else { Option(prop.getProperty(key)).getOrElse(defValue) } } def getFlagOrTrue(prop: Properties, key: String): Boolean = getFlag(prop, key, "true") def time[R](block: => R)(logger: Logger): R = { val t0 = System.nanoTime() val result = block val t1 = System.nanoTime() logger.info("Elapsed time: " + (t1 - t0) / 1000.0 / 1000.0 / 1000.0 + "s") result } def ensurePath(basePath: String, paths: String*): Boolean = new File(joinPath(basePath, paths: _*)).mkdirs() def joinPath(basePath: String, paths: String*): String = Paths.get(basePath, paths: _*).toAbsolutePath.toString }
Example 64
Source File: RedisBenchmarks.scala From spark-redis with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.redislabs.provider.redis import java.io.{File, FileWriter, PrintWriter} import java.time.{Duration => JDuration} import com.redislabs.provider.redis.util.Logging trait RedisBenchmarks extends Logging { val benchmarkReportDir = new File("target/reports/benchmarks/") benchmarkReportDir.mkdirs() def time[R](tag: String)(block: => R): R = { val t0 = System.nanoTime() val result = block // call-by-name val t1 = System.nanoTime() new PrintWriter(new FileWriter(s"$benchmarkReportDir/results.txt", true)) { // scalastyle:off this.println(s"$tag, ${JDuration.ofNanos(t1 - t0)}") close() } result } }
Example 65
Source File: JsonReceiverActor.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import java.nio.file.Paths import java.io.File import akka.actor.{Actor, ActorLogging, ActorRef, Props} import play.api.libs.json.{JsValue, Json} class JsonReceiverActor extends Actor with ActorLogging { import JsonReceiverActor._ val monitoring_actor = FEY_MONITOR.actorRef var watchFileTask: WatchServiceReceiver = _ var watchThread: Thread = _ override def preStart() { prepareDynamicJarRepo() processCheckpointFiles() watchFileTask = new WatchServiceReceiver(self) watchThread = new Thread(watchFileTask, GLOBAL_DEFINITIONS.WATCH_SERVICE_THREAD) monitoring_actor ! Monitor.START(Utils.getTimestamp) watchThread.setDaemon(true) watchThread.start() watchFileTask.watch(Paths.get(CONFIG.JSON_REPOSITORY)) } private def prepareDynamicJarRepo() = { val jarDir = new File(CONFIG.DYNAMIC_JAR_REPO) if (!jarDir.exists()){ jarDir.mkdir() }else if(CONFIG.DYNAMIC_JAR_FORCE_PULL){ jarDir.listFiles().foreach(_.delete()) } } private def processCheckpointFiles() = { if (CONFIG.CHEKPOINT_ENABLED) { val checkpoint = new CheckpointProcessor(self) checkpoint.run() } } override def postStop() { monitoring_actor ! Monitor.STOP(Utils.getTimestamp) watchThread.interrupt() watchThread.join() } override def postRestart(reason: Throwable): Unit = { monitoring_actor ! Monitor.RESTART(reason, Utils.getTimestamp) preStart() } override def receive: Receive = { case JSON_RECEIVED(json, file) => log.info(s"JSON RECEIVED => ${Json.stringify(json)}") context.parent ! FeyCore.ORCHESTRATION_RECEIVED(json, Some(file)) case _ => } } object JsonReceiverActor { case class JSON_RECEIVED(json: JsValue, file: File) }
Example 66
Source File: WatchServiceReceiver.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import java.nio.file.StandardWatchEventKinds._ import java.nio.file.{FileSystems, Path} import java.io.File import akka.actor.ActorRef import org.apache.iota.fey.JsonReceiverActor.JSON_RECEIVED import play.api.libs.json._ import scala.io.Source class WatchServiceReceiver(receiverActor: ActorRef) extends JsonReceiver{ processInitialFiles() private val watchService = FileSystems.getDefault.newWatchService() def watch(path: Path) : Unit = path.register(watchService, ENTRY_CREATE, ENTRY_MODIFY) def getJsonObject(params: String): Option[JsValue] = { try{ val stringJson = Source.fromFile(params).getLines.mkString Option(Json.parse(stringJson)) }catch{ case e: Exception => log.error("Could not parse JSON", e) None } } override def execute(): Unit = { val key = watchService.take() val eventsIterator = key.pollEvents().iterator() while(eventsIterator.hasNext) { val event = eventsIterator.next() val relativePath = event.context().asInstanceOf[Path] val path = key.watchable().asInstanceOf[Path].resolve(relativePath) log.debug(s"${event.kind()} --- $path") event.kind() match { case (ENTRY_CREATE | ENTRY_MODIFY) if path.toString.endsWith(CONFIG.JSON_EXTENSION) => processJson(path.toString, path.toFile) case _ => } } key.reset() } private[fey] def processJson(path: String, file: File) = { try{ getJsonObject(path) match { case Some(orchestrationJSON) => val valid = validJson(orchestrationJSON) if(valid && (orchestrationJSON \ JSON_PATH.COMMAND).as[String].toUpperCase != "DELETE"){ checkForLocation(orchestrationJSON) } if(valid) { receiverActor ! JSON_RECEIVED(orchestrationJSON, file) }else{ log.warn(s"File $path not processed. Incorrect JSON schema") } case None => } } catch { case e: Exception => log.error(s"File $path will not be processed", e) } } private def processInitialFiles() = { Utils.getFilesInDirectory(CONFIG.JSON_REPOSITORY) .filter(file => file.getName.endsWith(CONFIG.JSON_EXTENSION)) .foreach(file => { processJson(file.getAbsolutePath, file) }) } override def exceptionOnRun(e: Exception): Unit = { e match { case e: InterruptedException => case e: Exception => log.error("Watch Service stopped", e) } watchService.close() } }
Example 67
Source File: FeyGenericActorReceiver.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import java.io.{File, FileOutputStream} import java.net.URL import java.nio.file.{Files, Paths} import com.eclipsesource.schema._ import akka.actor.ActorRef import com.eclipsesource.schema.SchemaValidator import org.apache.commons.io.IOUtils import play.api.libs.json._ import scala.concurrent.duration._ import scala.util.Properties._ abstract class FeyGenericActorReceiver(override val params: Map[String,String] = Map.empty, override val backoff: FiniteDuration = 1.minutes, override val connectTo: Map[String,ActorRef] = Map.empty, override val schedulerTimeInterval: FiniteDuration = 2.seconds, override val orchestrationName: String = "", override val orchestrationID: String = "", override val autoScale: Boolean = false) extends FeyGenericActor{ private[fey] val feyCore = FEY_CORE_ACTOR.actorRef override final def processMessage[T](message: T, sender: ActorRef): Unit = { try { val jsonString = getJSONString(message) if(jsonString != "{}") { processJson(jsonString) } startBackoff() }catch{ case e: Exception => log.error(e, s"Could not process message $message") } } private[fey] def processJson(jsonString: String) = { var orchID:String = "None" try{ val orchestrationJSON = Json.parse(jsonString) orchID = (orchestrationJSON \ JSON_PATH.GUID).as[String] val valid = validJson(orchestrationJSON) if(valid && (orchestrationJSON \ JSON_PATH.COMMAND).as[String].toUpperCase != "DELETE"){ checkForLocation(orchestrationJSON) } if(valid) { feyCore ! FeyCore.ORCHESTRATION_RECEIVED(orchestrationJSON, None) }else{ log.warning(s"Could not forward Orchestration $orchID. Invalid JSON schema") } } catch { case e: Exception => log.error(e, s"Orchestration $orchID could not be forwarded") } } def resolveCredentials(credentials: Option[JsObject]):Option[(String, String)] = { credentials match { case None => None case Some(cred) => val user = (cred \ JSON_PATH.JAR_CRED_USER).as[String] val password = (cred \ JSON_PATH.JAR_CRED_PASSWORD).as[String] Option(envOrElse(user,user), envOrElse(password,password)) } } }
Example 68
Source File: CheckpointProcessor.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import java.io.File import akka.actor.ActorRef import org.apache.iota.fey.JsonReceiverActor.JSON_RECEIVED import play.api.libs.json.{JsValue, Json} import scala.io.Source class CheckpointProcessor(receiverActor: ActorRef) extends JsonReceiver{ override def run(): Unit = { processCheckpointFiles() } def getJsonObject(params: String): Option[JsValue] = { try{ val stringJson = Source.fromFile(params).getLines.mkString Option(Json.parse(stringJson)) }catch{ case e: Exception => log.error("Could not parse JSON", e) None } } private def processJson(path: String, file: File) = { try{ getJsonObject(path) match { case Some(orchestrationJSON) => val valid = validJson(orchestrationJSON) if(valid && (orchestrationJSON \ JSON_PATH.COMMAND).as[String].toUpperCase != "DELETE"){ checkForLocation(orchestrationJSON) } if(valid) { receiverActor ! JSON_RECEIVED(orchestrationJSON, file) }else{ log.warn(s"File $path not processed. Incorrect JSON schema") } file.delete() case None => } } catch { case e: Exception => log.error(s"File $path will not be processed", e) } } private def processCheckpointFiles() = { Utils.getFilesInDirectory(CONFIG.CHECKPOINT_DIR) .filter(file => file.getName.endsWith(CONFIG.JSON_EXTENSION)) .foreach(file => { processJson(file.getAbsolutePath, file) }) } override def execute(): Unit = {} override def exceptionOnRun(e: Exception): Unit = {} }
Example 69
Source File: JsonReceiver.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import java.io.FileOutputStream import java.net.URL import java.io.File import com.eclipsesource.schema._ import org.slf4j.LoggerFactory import play.api.libs.json._ import JSON_PATH._ import java.nio.file.{Files, Paths} import org.apache.commons.io.IOUtils import org.apache.commons.codec.binary.Base64 import scala.util.Properties._ def exceptionOnRun(e: Exception): Unit } object HttpBasicAuth { val BASIC = "Basic" val AUTHORIZATION = "Authorization" def encodeCredentials(username: String, password: String): String = { new String(Base64.encodeBase64((username + ":" + password).getBytes)) } def getHeader(username: String, password: String): String = BASIC + " " + encodeCredentials(username, password) }
Example 70
Source File: WatchServiceReceiverSpec.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import java.nio.file.{Files, Paths} import java.nio.charset.StandardCharsets import akka.testkit.{EventFilter, TestProbe} import scala.concurrent.duration.{DurationInt, FiniteDuration} import java.io.File import ch.qos.logback.classic.Level class WatchServiceReceiverSpec extends BaseAkkaSpec{ val watcherTB = TestProbe("WATCH-SERVICE") var watchFileTask:WatchServiceReceiver = _ val watchTestDir = s"${CONFIG.JSON_REPOSITORY}/watchtest" "Creating WatchServiceReceiver" should { "process initial files in the JSON repository" in { CONFIG.JSON_EXTENSION = "json.not" watchFileTask = new WatchServiceReceiver(watcherTB.ref) watcherTB.expectMsgAllClassOf(classOf[JsonReceiverActor.JSON_RECEIVED]) CONFIG.JSON_EXTENSION = "json.test" } } var watchThread: Thread = _ "Start a Thread with WatchServiceReceiver" should { "Start Thread" in { watchThread = new Thread(watchFileTask, "TESTING-WATCHER-IN-THREAD") watchThread.setDaemon(true) watchThread.start() TestProbe().isThreadRunning("TESTING-WATCHER-IN-THREAD") should be(true) } } "Start watching directory" should { "Starting receiving CREATED event" taggedAs(SlowTest) in { watchFileTask.watch(Paths.get(watchTestDir)) Files.write(Paths.get(s"$watchTestDir/watched.json.test"), Utils_JSONTest.create_json_test.getBytes(StandardCharsets.UTF_8)) watcherTB.expectMsgAllClassOf(20.seconds, classOf[JsonReceiverActor.JSON_RECEIVED]) } "Starting receiving UPDATE event" taggedAs(SlowTest) in { Files.write(Paths.get(s"$watchTestDir/watched-update.json.test"), Utils_JSONTest.delete_json_test.getBytes(StandardCharsets.UTF_8)) Thread.sleep(200) Files.write(Paths.get(s"$watchTestDir/watched-update.json.test"), Utils_JSONTest.create_json_test.getBytes(StandardCharsets.UTF_8)) watcherTB.expectMsgAllClassOf(20.seconds, classOf[JsonReceiverActor.JSON_RECEIVED]) } } "processJson" should { "log to warn level when json has invalid schema" in { Files.write(Paths.get(s"$watchTestDir/watched-invalid.json.test"), Utils_JSONTest.test_json_schema_invalid.getBytes(StandardCharsets.UTF_8)) watchFileTask.processJson(s"$watchTestDir/watched-invalid.json.test",new File(s"$watchTestDir/watched-invalid.json.test")) s"File $watchTestDir/watched-invalid.json.test not processed. Incorrect JSON schema" should beLoggedAt(Level.WARN) } } "interrupt watchservice" should{ "interrupt thread" in { watchThread.interrupt() } } }
Example 71
Source File: TestSetup.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import java.io.File import java.nio.file.Paths import org.apache.commons.io.FileUtils import org.scalatest.Tag object TestSetup { private var runSetup = true val configTest = getClass.getResource("/test-fey-configuration.conf") def setup(): Unit = { if(runSetup){ println("SETTING UP ...") createFeyTmpDirectoriesForTest() copyTestActorToTmp() copyJSONstoTmp() runSetup = false } } private def copyTestActorToTmp(): Unit = { copyResourceFileToLocal("/fey-test-actor.jar",s"${CONFIG.JAR_REPOSITORY}/fey-test-actor.jar") } private def copyJSONstoTmp(): Unit = { copyResourceFileToLocal("/json/valid-json.json",s"${CONFIG.JSON_REPOSITORY}/valid-json.json.not") copyResourceFileToLocal("/json/invalid-json.json",s"${CONFIG.JSON_REPOSITORY}/invalid-json.json.not") } private def copyResourceFileToLocal(resourcePath: String, destination: String): Unit = { val resourceFile = getClass.getResource(resourcePath) val dest = new File(destination) FileUtils.copyURLToFile(resourceFile, dest) } private def createFeyTmpDirectoriesForTest(): Unit = { var file = new File(s"/tmp/fey/test/checkpoint") file.mkdirs() file = new File(s"/tmp/fey/test/json") file.mkdirs() file = new File(s"/tmp/fey/test/json/watchtest") file.mkdirs() file = new File(s"/tmp/fey/test/jars") file.mkdirs() file = new File(s"/tmp/fey/test/jars/dynamic") file.mkdirs() } } object SlowTest extends Tag("org.apache.iota.fey.SlowTest")
Example 72
Source File: MultiNodeSupportCassandra.scala From eventuate with Apache License 2.0 | 5 votes |
package com.rbmhtechnology.eventuate import java.io.File import akka.actor.Props import akka.remote.testconductor.RoleName import akka.remote.testkit.MultiNodeSpec import com.rbmhtechnology.eventuate.log.cassandra._ import org.apache.commons.io.FileUtils import org.scalatest.BeforeAndAfterAll trait MultiNodeSupportCassandra extends BeforeAndAfterAll { this: MultiNodeSpec with MultiNodeWordSpec => val coordinator = RoleName("nodeA") def cassandraDir: String = MultiNodeEmbeddedCassandra.DefaultCassandraDir def logProps(logId: String): Props = CassandraEventLog.props(logId) override def atStartup(): Unit = { if (isNode(coordinator)) { MultiNodeEmbeddedCassandra.start(cassandraDir) Cassandra(system) } enterBarrier("startup") } override def afterAll(): Unit = { // get all config data before shutting down node val snapshotRootDir = new File(system.settings.config.getString("eventuate.snapshot.filesystem.dir")) // shut down node super.afterAll() // clean database and delete snapshot files if (isNode(coordinator)) { FileUtils.deleteDirectory(snapshotRootDir) MultiNodeEmbeddedCassandra.clean() } } }
Example 73
Source File: MultiNodeSupportLeveldb.scala From eventuate with Apache License 2.0 | 5 votes |
package com.rbmhtechnology.eventuate import java.io.File import akka.actor.Props import akka.remote.testconductor.RoleName import akka.remote.testkit.MultiNodeSpec import com.rbmhtechnology.eventuate.log.leveldb.LeveldbEventLog import org.apache.commons.io.FileUtils import org.scalatest.BeforeAndAfterAll trait MultiNodeSupportLeveldb extends BeforeAndAfterAll { this: MultiNodeSpec with MultiNodeWordSpec => val coordinator = RoleName("nodeA") def logProps(logId: String): Props = LeveldbEventLog.props(logId) override def afterAll(): Unit = { // get all config data before shutting down node val snapshotRootDir = new File(system.settings.config.getString("eventuate.snapshot.filesystem.dir")) val logRootDir = new File(system.settings.config.getString("eventuate.log.leveldb.dir")) // shut down node super.afterAll() // delete log and snapshot files if (isNode(coordinator)) { FileUtils.deleteDirectory(snapshotRootDir) FileUtils.deleteDirectory(logRootDir) } } }
Example 74
Source File: LocationSpecLeveldb.scala From eventuate with Apache License 2.0 | 5 votes |
package com.rbmhtechnology.eventuate import java.io.File import akka.actor._ import com.rbmhtechnology.eventuate.log._ import com.rbmhtechnology.eventuate.log.leveldb._ import com.rbmhtechnology.eventuate.utilities.RestarterActor import com.typesafe.config.ConfigFactory trait LocationCleanupLeveldb extends LocationCleanup { override def storageLocations: List[File] = List("eventuate.log.leveldb.dir", "eventuate.snapshot.filesystem.dir").map(s => new File(config.getString(s))) } object SingleLocationSpecLeveldb { object TestEventLog { def props(logId: String, batching: Boolean, currentSystemTime: Long = 0): Props = { val logProps = Props(new TestEventLog(logId, currentSystemTime)) .withDispatcher("eventuate.log.dispatchers.write-dispatcher") if (batching) Props(new BatchingLayer(logProps)) else logProps } } class TestEventLog(id: String, override val currentSystemTime: Long = 0) extends LeveldbEventLog(id, "log-test") with SingleLocationSpec.TestEventLog[LeveldbEventLogState] { override def unhandled(message: Any): Unit = message match { case "boom" => throw IntegrationTestException case "dir" => sender() ! logDir case _ => super.unhandled(message) } } } trait SingleLocationSpecLeveldb extends SingleLocationSpec with LocationCleanupLeveldb { import SingleLocationSpecLeveldb._ private var _log: ActorRef = _ override def beforeEach(): Unit = { super.beforeEach() _log = system.actorOf(logProps(logId)) } def log: ActorRef = _log def logProps(logId: String): Props = RestarterActor.props(TestEventLog.props(logId, batching, currentSystemTime)) } trait MultiLocationSpecLeveldb extends MultiLocationSpec with LocationCleanupLeveldb { override val logFactory: String => Props = id => LeveldbEventLog.props(id) override val providerConfig = ConfigFactory.parseString( s""" |eventuate.log.leveldb.dir = target/test-log |eventuate.log.leveldb.index-update-limit = 3 |eventuate.log.leveldb.deletion-retry-delay = 1 ms """.stripMargin) }
Example 75
Source File: YamlHelpers.scala From barstools with BSD 3-Clause "New" or "Revised" License | 5 votes |
package barstools.tapeout.transforms import net.jcazevedo.moultingyaml._ import java.io.File class YamlFileReader(resource: String) { def parse[A](file: String = "")(implicit reader: YamlReader[A]) : Seq[A] = { // If the user doesn't provide a Yaml file name, use defaults val yamlString = file match { case f if f.isEmpty => // Use example config if no file is provided val stream = getClass.getResourceAsStream(resource) io.Source.fromInputStream(stream).mkString case f if new File(f).exists => scala.io.Source.fromFile(f).getLines.mkString("\n") case _ => throw new Exception("No valid Yaml file found!") } yamlString.parseYamls.map(x => reader.read(x)) } }
Example 76
Source File: KinesisProducerIntegrationSpec.scala From reactive-kinesis with Apache License 2.0 | 5 votes |
package com.weightwatchers.reactive.kinesis import java.io.File import com.amazonaws.services.kinesis.producer.{KinesisProducer => AWSKinesisProducer} import com.typesafe.config.ConfigFactory import com.weightwatchers.reactive.kinesis.common.{ KinesisSuite, KinesisTestConsumer, TestCredentials } import com.weightwatchers.reactive.kinesis.consumer.KinesisConsumer.ConsumerConf import com.weightwatchers.reactive.kinesis.models.ProducerEvent import com.weightwatchers.reactive.kinesis.producer.{KinesisProducer, ProducerConf} import org.scalatest.concurrent.Eventually import org.scalatest.mockito.MockitoSugar import org.scalatest.time.{Millis, Seconds, Span} import org.scalatest.{BeforeAndAfterAll, FreeSpec, Matchers} import scala.concurrent.duration._ import scala.language.postfixOps import scala.util.Random //scalastyle:off magic.number class KinesisProducerIntegrationSpec extends FreeSpec with Matchers with MockitoSugar with BeforeAndAfterAll with Eventually with KinesisSuite { implicit val ece = scala.concurrent.ExecutionContext.global val TestStreamNrOfMessagesPerShard: Long = 0 implicit override val patienceConfig: PatienceConfig = PatienceConfig(timeout = Span(5, Seconds), interval = Span(100, Millis)) "The KinesisProducer" - { "Should publish a message to a stream" in new withKinesisConfForApp( "int-test-stream-producer-1" ) { val conf = producerConf() val producer = KinesisProducer(conf) val existingRecordCount = testConsumer.retrieveRecords(conf.streamName, 10).size val event = ProducerEvent("1234", Random.alphanumeric.take(10).mkString) producer.addUserRecord(event) eventually { val records: Seq[String] = testConsumer.retrieveRecords(conf.streamName, 10) records.size shouldBe (existingRecordCount + 1) records should contain( new String(event.payload.array(), java.nio.charset.StandardCharsets.UTF_8) ) } } } } //scalastyle:on
Example 77
Source File: Persister.scala From exodus with MIT License | 5 votes |
package com.wix.bazel.migrator import java.io.File import java.nio.file.attribute.BasicFileAttributes import java.nio.file.{Files, Paths} import java.time.Instant import java.time.temporal.TemporalUnit import java.util import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.wix.bazel.migrator.model.{CodePurpose, Package, Target, TestType} import com.wix.bazel.migrator.utils.{IgnoringIsArchiveDefMixin, IgnoringIsProtoArtifactDefMixin, IgnoringIsWarDefMixin, TypeAddingMixin} import com.wix.build.maven.analysis.SourceModules import com.wixpress.build.maven.{Coordinates, MavenScope, Packaging} import scala.collection.JavaConverters._ object Persister { private val transformedFile = new File("dag.bazel") private val mavenCache = Paths.get("classpathModules.cache") val objectMapper = new ObjectMapper().registerModule(DefaultScalaModule) .addMixIn(classOf[Target], classOf[TypeAddingMixin]) .addMixIn(classOf[CodePurpose], classOf[TypeAddingMixin]) .addMixIn(classOf[TestType], classOf[TypeAddingMixin]) .addMixIn(classOf[MavenScope], classOf[TypeAddingMixin]) .addMixIn(classOf[Packaging], classOf[IgnoringIsArchiveDefMixin]) .addMixIn(classOf[Packaging], classOf[IgnoringIsWarDefMixin]) .addMixIn(classOf[Coordinates], classOf[IgnoringIsProtoArtifactDefMixin]) .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) def persistTransformationResults(bazelPackages: Set[Package]): Unit = { println("Persisting transformation") objectMapper.writeValue(transformedFile, bazelPackages) } def readTransformationResults(): Set[Package] = { val collectionType = objectMapper.getTypeFactory.constructCollectionType(classOf[util.Collection[Package]], classOf[Package]) val value: util.Collection[Package] = objectMapper.readValue(transformedFile, collectionType) val bazelPackages = value.asScala.toSet bazelPackages } def persistMavenClasspathResolution(sourceModules: SourceModules): Unit = { println("Persisting maven") objectMapper.writeValue(mavenCache.toFile, sourceModules) } def readTransMavenClasspathResolution(): SourceModules = { objectMapper.readValue[SourceModules](mavenCache.toFile, classOf[SourceModules]) } def mavenClasspathResolutionIsUnavailableOrOlderThan(amount: Int, unit: TemporalUnit): Boolean = !Files.isReadable(mavenCache) || lastModifiedMavenCache().toInstant.isBefore(Instant.now().minus(amount, unit)) private def lastModifiedMavenCache() = Files.readAttributes(mavenCache, classOf[BasicFileAttributes]).lastModifiedTime() }
Example 78
Source File: SqliteTestBase.scala From smui with Apache License 2.0 | 5 votes |
package utils import java.io.File import org.scalatest.{BeforeAndAfterAll, Suite} import play.api.db.evolutions.Evolutions import play.api.db.{Database, Databases} trait SqliteTestBase extends BeforeAndAfterAll { self: Suite => private lazy val dbFile = File.createTempFile("sqlitetest", ".db") lazy val db: Database = { // Use a temp file for the database - in-memory DB cannot be used // since it would be a different DB for each connection in the connection pool // (see https://www.sqlite.org/inmemorydb.html) val d = Databases("org.sqlite.JDBC", s"jdbc:sqlite:${dbFile.getAbsolutePath}") Evolutions.applyEvolutions(d) d } override protected def afterAll(): Unit = { super.afterAll() db.shutdown() dbFile.delete() } }
Example 79
Source File: Preprocess.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package com.packt.ScalaML.BitCoin import java.io.{ BufferedWriter, File, FileWriter } import org.apache.spark.sql.types.{ DoubleType, IntegerType, StructField, StructType } import org.apache.spark.sql.{ DataFrame, Row, SparkSession } import scala.collection.mutable.ListBuffer object Preprocess { //how many of first rows are omitted val dropFirstCount: Int = 612000 def rollingWindow(data: DataFrame, window: Int, xFilename: String, yFilename: String): Unit = { var i = 0 val xWriter = new BufferedWriter(new FileWriter(new File(xFilename))) val yWriter = new BufferedWriter(new FileWriter(new File(yFilename))) val zippedData = data.rdd.zipWithIndex().collect() System.gc() val dataStratified = zippedData.drop(dropFirstCount) //todo slice fisrt 614K while (i < (dataStratified.length - window)) { val x = dataStratified .slice(i, i + window) .map(r => r._1.getAs[Double]("Delta")).toList val y = dataStratified.apply(i + window)._1.getAs[Integer]("label") val stringToWrite = x.mkString(",") xWriter.write(stringToWrite + "\n") yWriter.write(y + "\n") i += 1 if (i % 10 == 0) { xWriter.flush() yWriter.flush() } } xWriter.close() yWriter.close() } def main(args: Array[String]): Unit = { //todo modify these variables to match desirable files val priceDataFileName: String = "C:/Users/admin-karim/Desktop/bitstampUSD_1-min_data_2012-01-01_to_2017-10-20.csv/bitstampUSD_1-min_data_2012-01-01_to_2017-10-20.csv" val outputDataFilePath: String = "output/scala_test_x.csv" val outputLabelFilePath: String = "output/scala_test_y.csv" val spark = SparkSession .builder() .master("local[*]") .config("spark.sql.warehouse.dir", "E:/Exp/") .appName("Bitcoin Preprocessing") .getOrCreate() val data = spark.read.format("com.databricks.spark.csv").option("header", "true").load(priceDataFileName) data.show(10) println((data.count(), data.columns.size)) val dataWithDelta = data.withColumn("Delta", data("Close") - data("Open")) import org.apache.spark.sql.functions._ import spark.sqlContext.implicits._ val dataWithLabels = dataWithDelta.withColumn("label", when($"Close" - $"Open" > 0, 1).otherwise(0)) rollingWindow(dataWithLabels, 22, outputDataFilePath, outputLabelFilePath) spark.stop() } }
Example 80
Source File: ResultFileGenerator.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package Yelp.Evaluator import java.io.File import Yelp.Trainer.NeuralNetwork._ import Yelp.Preprocessor.CSVImageMetadataReader._ import Yelp.Preprocessor.makeND4jDataSets.makeDataSetTE import Yelp.Preprocessor.featureAndDataAligner import Yelp.Preprocessor.imageFeatureExtractor._ import Yelp.Evaluator.ResultFileGenerator._ import Yelp.Preprocessor.makeND4jDataSets._ import Yelp.Evaluator.ModelEvaluation._ import Yelp.Trainer.CNN._ import Yelp.Trainer.CNNEpochs._ import scala.Vector object ResultFileGenerator { def writeSubmissionFile(outcsv: String, phtoObj: List[(String, Vector[Double])], thresh: Double): Unit = { // prints to a csv or other txt file def printToFile(f: java.io.File)(op: java.io.PrintWriter => Unit) { val p = new java.io.PrintWriter(f) try { op(p) } finally { p.close() } } // assigning cutoffs for each class def findIndicesAboveThresh(x: Vector[Double]): Vector[Int] = { x.zipWithIndex.filter(x => x._1 >= thresh).map(_._2) } // create vector of rows to write to csv val ret = (for (i <- 0 until phtoObj.length) yield { (phtoObj(i)._1 + "," + findIndicesAboveThresh(phtoObj(i)._2).mkString(" ")) }).toVector // actually write text file printToFile(new File(outcsv)) { p => (Vector("business_ids,labels") ++ ret).foreach(p.println) } } def SubmitObj(alignedData: featureAndDataAligner, modelPath: String, model0: String = "model0", model1: String = "model1", model2: String = "model2", model3: String = "model3", model4: String = "model4", model5: String = "model5", model6: String = "model6", model7: String = "model7", model8: String = "model8"): List[(String, Vector[Double])] = { // new code which works in REPL // creates a List for each model (class) containing a map from the bizID to the probability of belonging in that class val big = for (m <- List(model0, model1, model2, model3, model4, model5, model6, model7, model8)) yield { val ds = makeDataSetTE(alignedData) val model = loadNN(modelPath + m + ".json", modelPath + m + ".bin") val scores = scoreModel(model, ds) val bizScores = aggImgScores2Business(scores, alignedData) bizScores.toMap } // transforming the data structure above into a List for each bizID containing a Tuple (bizid, List[Double]) where the Vector[Double] is the // the vector of probabilities alignedData.data.map(_._2).distinct map (x => (x, big.map(x2 => x2(x)).toVector)) } }
Example 81
Source File: GrayscaleConverter.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package Yelp.Preprocessor import java.io.File import javax.imageio.ImageIO import java.awt.Color object GrayscaleConverter { def main(args: Array[String]): Unit = { def pixels2Gray(R: Int, G: Int, B: Int): Int = (R + G + B) / 3 def makeGray(testImage: java.awt.image.BufferedImage): java.awt.image.BufferedImage = { val w = testImage.getWidth val h = testImage.getHeight for { w1 <- (0 until w).toVector h1 <- (0 until h).toVector } yield { val col = testImage.getRGB(w1, h1) val R = (col & 0xff0000) / 65536 val G = (col & 0xff00) / 256 val B = (col & 0xff) val graycol = pixels2Gray(R, G, B) testImage.setRGB(w1, h1, new Color(graycol, graycol, graycol).getRGB) } testImage } val testImage = ImageIO.read(new File("data/images/preprocessed/147square.jpg")) val grayImage = makeGray(testImage) ImageIO.write(grayImage, "jpg", new File("data/images/preprocessed/147gray.jpg")) } }
Example 82
Source File: imageFeatureExtractor.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package Yelp.Preprocessor import java.io.File import javax.imageio.ImageIO import scala.util.matching.Regex import imageUtils._ object imageFeatureExtractor { def processImages(imgs: List[String], resizeImgDim: Int = 128, nPixels: Int = -1): Map[Int, Vector[Int]] = { imgs.map(x => patt_get_jpg_name.findAllIn(x).mkString.toInt -> { val img0 = ImageIO.read(new File(x)) .makeSquare .resizeImg(resizeImgDim, resizeImgDim) // (128, 128) .image2gray if(nPixels != -1) img0.slice(0, nPixels) else img0 } ).filter( x => x._2 != ()) .toMap } }
Example 83
Source File: ImageResize.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package Yelp.Preprocessor import org.imgscalr._ import java.io.File import javax.imageio.ImageIO object ImageResize { def main(args: Array[String]): Unit = { def resizeImg(img: java.awt.image.BufferedImage, width: Int, height: Int) = { Scalr.resize(img, Scalr.Method.BALANCED, width, height) } val testImage = ImageIO.read(new File("data/images/train/147.jpg")) val testImage32 = resizeImg(testImage, 32, 32) val testImage64 = resizeImg(testImage, 64, 64) val testImage128 = resizeImg(testImage, 128, 128) val testImage256 = resizeImg(testImage, 256, 256) ImageIO.write(testImage32, "jpg", new File("data/images/preprocessed/147resize32.jpg")) ImageIO.write(testImage64, "jpg", new File("data/images/preprocessed/147resize64.jpg")) ImageIO.write(testImage128, "jpg", new File("data/images/preprocessed/147resize128.jpg")) ImageIO.write(testImage256, "jpg", new File("data/images/preprocessed/147resize256.jpg")) } }
Example 84
Source File: SquaringImage.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package Yelp.Preprocessor import org.imgscalr._ import java.io.File import javax.imageio.ImageIO object SquaringImage { def main(args: Array[String]): Unit = { def makeSquare(img: java.awt.image.BufferedImage): java.awt.image.BufferedImage = { val w = img.getWidth val h = img.getHeight val dim = List(w, h).min img match { case x if w == h => img case x if w > h => Scalr.crop(img, (w - h) / 2, 0, dim, dim) case x if w < h => Scalr.crop(img, 0, (h - w) / 2, dim, dim) } } val myimg = ImageIO.read(new File("data/images/train/147.jpg")) val myimgSquare = makeSquare(myimg) ImageIO.write(myimgSquare, "jpg", new File("data/images/preprocessed/147square.jpg")) } }
Example 85
Source File: NeuralNetwork.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package Yelp.Trainer import org.deeplearning4j.nn.conf.MultiLayerConfiguration import org.deeplearning4j.nn.multilayer.MultiLayerNetwork import org.nd4j.linalg.factory.Nd4j import java.io.File import org.apache.commons.io.FileUtils import java.io.{DataInputStream, DataOutputStream, FileInputStream} import java.nio.file.{Files, Paths} object NeuralNetwork { def loadNN(NNconfig: String, NNparams: String) = { // get neural network config val confFromJson: MultiLayerConfiguration = MultiLayerConfiguration.fromJson(FileUtils.readFileToString(new File(NNconfig))) // get neural network parameters val dis: DataInputStream = new DataInputStream(new FileInputStream(NNparams)) val newParams = Nd4j.read(dis) // creating network object val savedNetwork: MultiLayerNetwork = new MultiLayerNetwork(confFromJson) savedNetwork.init() savedNetwork.setParameters(newParams) savedNetwork } def saveNN(model: MultiLayerNetwork, NNconfig: String, NNparams: String) = { // save neural network config FileUtils.write(new File(NNconfig), model.getLayerWiseConfigurations().toJson()) // save neural network parms val dos: DataOutputStream = new DataOutputStream(Files.newOutputStream(Paths.get(NNparams))) Nd4j.write(model.params(), dos) } }
Example 86
Source File: KerberosLoginProvider.scala From rokku with Apache License 2.0 | 5 votes |
package com.ing.wbaa.rokku.proxy.provider import java.io.File import com.ing.wbaa.rokku.proxy.config.KerberosSettings import com.typesafe.scalalogging.LazyLogging import org.apache.commons.lang.StringUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.security.UserGroupInformation import scala.util.{ Failure, Success, Try } trait KerberosLoginProvider extends LazyLogging { protected[this] def kerberosSettings: KerberosSettings loginUserFromKeytab(kerberosSettings.keytab, kerberosSettings.principal) private def loginUserFromKeytab(keytab: String, principal: String): Unit = { if (StringUtils.isNotBlank(keytab) && StringUtils.isNotBlank(principal)) { if (!new File(keytab).exists()) { logger.info("keytab file does not exist {}", keytab) } else { Try { UserGroupInformation.setConfiguration(new Configuration()) UserGroupInformation.loginUserFromKeytab(principal, keytab) } match { case Success(_) => logger.info("kerberos credentials provided {}", UserGroupInformation.getLoginUser) case Failure(exception) => logger.error("kerberos login error {}", exception) } } } else { logger.info("kerberos credentials are not provided") } } }
Example 87
Source File: S3SdkHelpers.scala From rokku with Apache License 2.0 | 5 votes |
package com.ing.wbaa.testkit.awssdk import java.io.File import akka.http.scaladsl.model.Uri.Authority import com.amazonaws.ClientConfiguration import com.amazonaws.auth.{AWSCredentials, AWSStaticCredentialsProvider, BasicSessionCredentials} import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration import com.amazonaws.services.s3.transfer.TransferManagerBuilder import com.amazonaws.services.s3.transfer.model.UploadResult import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder} import com.typesafe.config.ConfigFactory import scala.collection.JavaConverters._ trait S3SdkHelpers { val awsRegion = ConfigFactory.load().getString("rokku.storage.s3.region") def getAmazonS3(authority: Authority, credentials: AWSCredentials = new BasicSessionCredentials("accesskey", "secretkey", "token") ): AmazonS3 = { val cliConf = new ClientConfiguration() cliConf.setMaxErrorRetry(1) AmazonS3ClientBuilder .standard() .withClientConfiguration(cliConf) .withCredentials(new AWSStaticCredentialsProvider(credentials)) .withPathStyleAccessEnabled(true) .withEndpointConfiguration(new EndpointConfiguration(s"http://s3.localhost:${authority.port}", awsRegion)) .build() } def getKeysInBucket(sdk: AmazonS3, bucket: String): List[String] = sdk .listObjectsV2(bucket) .getObjectSummaries .asScala.toList .map(_.getKey) def doMultiPartUpload(sdk: AmazonS3, bucket: String, file: String, key: String): UploadResult = { val upload = TransferManagerBuilder .standard() .withS3Client(sdk) .build() .upload(bucket, key, new File(file)) upload.waitForUploadResult() } }
Example 88
Source File: RokkuFixtures.scala From rokku with Apache License 2.0 | 5 votes |
package com.ing.wbaa.testkit import java.io.{File, RandomAccessFile} import com.amazonaws.services.s3.AmazonS3 import com.ing.wbaa.testkit.awssdk.S3SdkHelpers import org.scalatest.Assertion import scala.concurrent.{ExecutionContext, Future} import scala.util.{Random, Try} trait RokkuFixtures extends S3SdkHelpers { def withHomeBucket(s3Client: AmazonS3, objects: Seq[String])(testCode: String => Future[Assertion])(implicit exCtx: ExecutionContext): Future[Assertion] = { val testBucket = "home" Try(s3Client.createBucket(testBucket)) objects.foreach(obj => s3Client.putObject(testBucket, obj, "")) testCode(testBucket).andThen { case _ => cleanBucket(s3Client, testBucket) } } private def cleanBucket(s3Client: AmazonS3, bucketName: String) = { import scala.collection.JavaConverters._ s3Client.listObjectsV2(bucketName).getObjectSummaries.asScala.toList.map(_.getKey).foreach { key => s3Client.deleteObject(bucketName, key) } } }
Example 89
Source File: AppConfig.scala From odsc-east-realish-predictions with Apache License 2.0 | 5 votes |
package com.twilio.open.odsc.realish.config import java.io.File import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.dataformat.yaml.YAMLFactory import com.fasterxml.jackson.module.scala.DefaultScalaModule object AppConfig { private val mapper = new ObjectMapper(new YAMLFactory) mapper.registerModule(DefaultScalaModule) def parse(configPath: String): AppConfig = { mapper.readValue(new File(configPath), classOf[AppConfig]) } } @SerialVersionUID(100L) case class AppConfig( sparkAppConfig: SparkAppConfig, streamingQueryConfig: StreamingQueryConfig ) extends Serializable @SerialVersionUID(100L) case class SparkAppConfig( appName: String, core: Map[String, String] ) extends Serializable trait KafkaConsumerConfig { val topic: String val subscriptionType: String val conf: Map[String, String] } @SerialVersionUID(100L) case class ConsumerConfig( topic: String, subscriptionType: String, conf: Map[String, String] ) extends KafkaConsumerConfig with Serializable @SerialVersionUID(100L) case class StreamingQueryConfig( streamName: String, triggerInterval: String, triggerEnabled: Boolean, windowInterval: String, watermarkInterval: String ) extends Serializable
Example 90
Source File: SchemaReader.scala From darwin with Apache License 2.0 | 5 votes |
package it.agilelab.darwin.app.mock import java.io.{File, InputStream} import org.apache.avro.Schema object SchemaReader { def readFromResources(p: String): Schema = { read(getClass.getClassLoader.getResourceAsStream(p)) } def read(f: File): Schema = { val parser = new Schema.Parser() parser.parse(f) } def read(s: String): Schema = { val parser = new Schema.Parser() parser.parse(s) } def read(is: InputStream): Schema = { val parser = new Schema.Parser() parser.parse(is) } }
Example 91
Source File: SbtActorApi.scala From sbt-actor-api with MIT License | 5 votes |
package im.actor import im.actor.api._ import java.io.File import sbt._, Keys._ object SbtActorApi extends AutoPlugin { val ActorApi = config("actorapi").hide val path = SettingKey[File]("actor-schema-path", "The path that contains actor.json file") val outputPath = SettingKey[File]("actor-schema-output-path", "The paths where to save the generated *.scala files.") lazy val actorapi = TaskKey[Seq[File]]("actorapi", "Compile json schema to scala code") lazy val actorapiClean = TaskKey[Seq[File]]("actorapi-clean", "Clean generated code") lazy val actorapiMain = SettingKey[String]("actorapi-main", "ActorApi main class.") lazy val settings: Seq[Setting[_]] = Seq( sourceDirectory in ActorApi <<= (sourceDirectory in Compile), path <<= sourceDirectory in ActorApi, managedClasspath in ActorApi <<= (classpathTypes, update) map { (ct, report) ⇒ Classpaths.managedJars(ActorApi, ct, report) }, outputPath <<= sourceManaged in ActorApi, actorapi <<= ( sourceDirectory in ActorApi, sourceManaged in ActorApi, managedClasspath in ActorApi, javaHome, streams ).map(generate), actorapiClean <<= ( sourceManaged in ActorApi, streams ).map(clean), sourceGenerators in Compile <+= actorapi ) private def compiledFileDir(targetDir: File): File = targetDir / "main" / "scala" private def compiledFile(targetDir: File, name: String): File = compiledFileDir(targetDir) / s"${name}.scala" private def clean(targetDir: File, streams: TaskStreams): Seq[File] = { val log = streams.log log.info("Cleaning actor schema") IO.delete(targetDir) Seq(targetDir) } private def generate(srcDir: File, targetDir: File, classpath: Classpath, javaHome: Option[File], streams: TaskStreams): Seq[File] = { val log = streams.log log.info(f"Generating actor schema for $srcDir%s") val input = srcDir / "actor-api" if (!input.exists()) { log.info(f"$input%s does not exists") Nil } else { val output = compiledFileDir(targetDir) val cached = FileFunction.cached(streams.cacheDirectory / "actor-api", FilesInfo.lastModified, FilesInfo.exists) { (in: Set[File]) ⇒ { if (!output.exists()) IO.createDirectory(output) val src = input / "actor.json" if (src.exists()) { val sources = (new Json2Tree(IO.read(src))).convert() sources foreach { case (name, source) ⇒ val targetFile = compiledFile(targetDir, name) log.info(f"Generated ActorApi $targetFile%s") IO.write(targetFile, source) } } else { log.info(f"no actor.json file in $input%s") } (output ** ("*.scala")).get.toSet } } cached((input ** "actor.json").get.toSet).toSeq } } }
Example 92
Source File: VLFeatSuite.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.utils.external import java.io.File import breeze.linalg._ import breeze.numerics.abs import org.scalatest.FunSuite import keystoneml.pipelines.Logging import keystoneml.utils.{ImageUtils, MatrixUtils, TestUtils} class VLFeatSuite extends FunSuite with Logging { test("Load an Image and compute SIFT Features") { val testImage = TestUtils.loadTestImage("images/000012.jpg") val singleImage = ImageUtils.mapPixels(testImage, _/255.0) val grayImage = ImageUtils.toGrayScale(singleImage) val extLib = new VLFeat val stepSize = 3 val binSize = 4 val scales = 4 val descriptorLength = 128 val scaleStep = 0 val rawDescDataShort = extLib.getSIFTs(grayImage.metadata.xDim, grayImage.metadata.yDim, stepSize, binSize, scales, scaleStep, grayImage.getSingleChannelAsFloatArray()) assert(rawDescDataShort.length % descriptorLength == 0, "Resulting SIFTs must be 128-dimensional.") val numCols = rawDescDataShort.length/descriptorLength val result = new DenseMatrix(descriptorLength, numCols, rawDescDataShort.map(_.toDouble)) // Compare with the output of running this image through vl_phow with matlab from the enceval package: // featpipem_addpaths; // im = im2single(imread('images/000012.jpg')); // featextr = featpipem.features.PhowExtractor(); // featextr.step = 3; // [frames feats] = featextr.compute(im); // csvwrite('images/feats128.csv', feats) val testFeatures = csvread(new File(TestUtils.getTestResourceFileName("images/feats128.csv"))) val diff = result - testFeatures // Because of subtle differences in the way image smoothing works in the VLFeat C library and the VLFeat matlab // library (vl_imsmooth_f vs. _vl_imsmooth_f), these two matrices will not be exactly the same. // Instead, we check that 99.5% of the matrix entries are off by at most 1. val absdiff = abs(diff).toDenseVector assert(absdiff.findAll(_ > 1.0).length.toDouble < 0.005*absdiff.length, "Fewer than 0.05% of entries may be different by more than 1.") } }
Example 93
Source File: EncEvalSuite.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.utils.external import java.io.File import breeze.linalg._ import breeze.stats.distributions.Gaussian import keystoneml.nodes.learning.GaussianMixtureModel import keystoneml.nodes.learning.external.GaussianMixtureModelEstimator import org.scalatest.FunSuite import keystoneml.pipelines.Logging import keystoneml.utils.{Stats, TestUtils} class EncEvalSuite extends FunSuite with Logging { test("Load SIFT Descriptors and compute Fisher Vector Features") { val siftDescriptor = csvread(new File(TestUtils.getTestResourceFileName("images/feats.csv"))) val gmmMeans = TestUtils.getTestResourceFileName("images/voc_codebook/means.csv") val gmmVars = TestUtils.getTestResourceFileName("images/voc_codebook/variances.csv") val gmmWeights = TestUtils.getTestResourceFileName("images/voc_codebook/priors") val gmm = GaussianMixtureModel.load(gmmMeans, gmmVars, gmmWeights) val nCenters = gmm.means.cols val nDim = gmm.means.rows val extLib = new EncEval val fisherVector = extLib.calcAndGetFVs( gmm.means.toArray.map(_.toFloat), nCenters, nDim, gmm.variances.toArray.map(_.toFloat), gmm.weights.toArray.map(_.toFloat), siftDescriptor.toArray.map(_.toFloat)) log.info(s"Fisher Vector is ${fisherVector.sum}") assert(Stats.aboutEq(fisherVector.sum, 40.109097, 1e-4), "SUM of Fisher Vectors must match expected sum.") } test("Compute a GMM from scala") { val nsamps = 10000 // Generate two gaussians. val x = Gaussian(-1.0, 0.5).samples.take(nsamps).toArray val y = Gaussian(5.0, 1.0).samples.take(nsamps).toArray val z = shuffle(x ++ y).map(x => DenseVector(x)) // Compute a 1-d GMM. val extLib = new EncEval val gmm = new GaussianMixtureModelEstimator(2).fit(z) logInfo(s"GMM means: ${gmm.means.toArray.mkString(",")}") logInfo(s"GMM vars: ${gmm.variances.toArray.mkString(",")}") logInfo(s"GMM weights: ${gmm.weights.toArray.mkString(",")}") // The results should be close to the distribution we set up. assert(Stats.aboutEq(min(gmm.means), -1.0, 1e-1), "Smallest mean should be close to -1.0") assert(Stats.aboutEq(max(gmm.means), 5.0, 1e-1), "Largest mean should be close to 1.0") assert(Stats.aboutEq(math.sqrt(min(gmm.variances)), 0.5, 1e-1), "Smallest SD should be close to 0.25") assert(Stats.aboutEq(math.sqrt(max(gmm.variances)), 1.0, 1e-1), "Largest SD should be close to 5.0") } }
Example 94
Source File: DefaultBodyWritables.scala From play-ws with Apache License 2.0 | 5 votes |
package play.api.libs.ws import java.io.File import java.nio.ByteBuffer import java.util.function.Supplier import akka.stream.scaladsl.StreamConverters.fromInputStream import akka.stream.scaladsl.FileIO import akka.stream.scaladsl.Source import akka.util.ByteString import scala.compat.java8.FunctionConverters.asScalaFromSupplier implicit val writeableOf_urlEncodedForm: BodyWritable[Map[String, Seq[String]]] = { import java.net.URLEncoder BodyWritable( formData => InMemoryBody( ByteString.fromString( formData.flatMap(item => item._2.map(c => s"${item._1}=${URLEncoder.encode(c, "UTF-8")}")).mkString("&") ) ), "application/x-www-form-urlencoded" ) } implicit val writeableOf_urlEncodedSimpleForm: BodyWritable[Map[String, String]] = { writeableOf_urlEncodedForm.map[Map[String, String]](_.map(kv => kv._1 -> Seq(kv._2))) } } object DefaultBodyWritables extends DefaultBodyWritables
Example 95
Source File: TestZooKeeper.scala From mango with Apache License 2.0 | 5 votes |
package com.kakao.mango.zk import java.io.{File, IOException} import java.net.{ServerSocket, Socket} import java.util.concurrent.TimeUnit import com.kakao.mango.concurrent.NamedExecutors import com.kakao.mango.logging.{LogLevelOverrider, Logging} import com.kakao.shaded.guava.io.Files import org.apache.zookeeper.server.persistence.FileTxnSnapLog import org.apache.zookeeper.server.{ServerCnxnFactory, ServerConfig, ZooKeeperServer} import org.scalatest.{BeforeAndAfterAll, Suite} trait TestZooKeeper extends BeforeAndAfterAll with Logging { this: Suite => val zkServerPort = 2181 val zkServerExecutor = NamedExecutors.single("zookeeper-server") var zk: ZooKeeperConnection = _ override protected def beforeAll(): Unit = { logger.info("Launching a standalone ZooKeeper server for testing...") try { val socket = new ServerSocket(zkServerPort) socket.close() } catch { case e: IOException => throw new RuntimeException(s"TCP port $zkServerPort is required for tests but not available") } zkServerExecutor.submit { LogLevelOverrider.error("org.apache.zookeeper") val datadir = Files.createTempDir().getAbsolutePath val config = new ServerConfig config.parse(Array(zkServerPort.toString, datadir)) val zkServer = new ZooKeeperServer zkServer.setTxnLogFactory(new FileTxnSnapLog(new File(datadir), new File(datadir))) zkServer.setTickTime(6000) zkServer.setMinSessionTimeout(6000) zkServer.setMaxSessionTimeout(6000) val cnxnFactory = ServerCnxnFactory.createFactory try { cnxnFactory.configure(config.getClientPortAddress, 60) cnxnFactory.startup(zkServer) cnxnFactory.join() } catch { case _: InterruptedException => logger.info("ZooKeeper server interrupted; shutting down...") cnxnFactory.shutdown() cnxnFactory.join() if (zkServer.isRunning) { zkServer.shutdown() } logger.info("ZooKeeper server stopped") } } var connected = false while (!connected) { logger.info("Waiting for ZooKeeper server to launch...") try { val socket = new Socket("localhost", zkServerPort) logger.info("ZooKeeper server is available") socket.close() zk = ZooKeeperConnection(s"localhost:$zkServerPort") connected = true } catch { case _: IOException => Thread.sleep(1000) // retry } } super.beforeAll() } override protected def afterAll(): Unit = { try super.afterAll() finally { zk.close() logger.info("Interrupting ZooKeeper server...") zkServerExecutor.shutdownNow() while (!zkServerExecutor.awaitTermination(1, TimeUnit.SECONDS)) { logger.info("awaiting ZooKeeper server termination...") } logger.info("ZooKeeper server terminated") } } }
Example 96
Source File: FileSystems.scala From mango with Apache License 2.0 | 5 votes |
package com.kakao.mango.io import java.io.File import java.nio.file._ import scala.collection.JavaConversions._ import scala.collection.mutable def entries(dir: Path, recursive: Boolean = true): Stream[Path] = { val maxDepth = if (recursive) Int.MaxValue else 1 val stack = mutable.Stack[(Path, Int)]((dir, maxDepth)) new Iterator[Iterator[Path]] { override def hasNext: Boolean = stack.nonEmpty override def next(): Iterator[Path] = { val (dir, depth) = stack.pop() Files.newDirectoryStream(dir).iterator().flatMap { case entry if Files.isDirectory(entry) => if (depth > 1) stack.push((entry, depth - 1)) Nil case entry => Some(entry) } } }.toStream.flatten } def entries(dir: File): Stream[File] = entries(dir.toPath, recursive = true).map(_.toFile) def entries(dir: File, recursive: Boolean): Stream[File] = entries(dir.toPath, recursive).map(_.toFile) def entries(dir: String): Stream[Path] = entries(Paths.get(dir), recursive = true) def entries(dir: String, recursive: Boolean): Stream[Path] = entries(Paths.get(dir), recursive) }
Example 97
Source File: QueryPartitionSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive import java.io.File import java.sql.Timestamp import com.google.common.io.Files import org.apache.hadoop.fs.FileSystem import org.apache.spark.internal.config._ import org.apache.spark.sql._ import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.util.Utils class QueryPartitionSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { import spark.implicits._ private def queryWhenPathNotExist(): Unit = { withTempView("testData") { withTable("table_with_partition", "createAndInsertTest") { withTempDir { tmpDir => val testData = sparkContext.parallelize( (1 to 10).map(i => TestData(i, i.toString))).toDF() testData.createOrReplaceTempView("testData") // create the table for test sql(s"CREATE TABLE table_with_partition(key int,value string) " + s"PARTITIONED by (ds string) location '${tmpDir.toURI}' ") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='1') " + "SELECT key,value FROM testData") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='2') " + "SELECT key,value FROM testData") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='3') " + "SELECT key,value FROM testData") sql("INSERT OVERWRITE TABLE table_with_partition partition (ds='4') " + "SELECT key,value FROM testData") // test for the exist path checkAnswer(sql("select key,value from table_with_partition"), testData.union(testData).union(testData).union(testData)) // delete the path of one partition tmpDir.listFiles .find { f => f.isDirectory && f.getName().startsWith("ds=") } .foreach { f => Utils.deleteRecursively(f) } // test for after delete the path checkAnswer(sql("select key,value from table_with_partition"), testData.union(testData).union(testData)) } } } } test("SPARK-5068: query data when path doesn't exist") { withSQLConf(SQLConf.HIVE_VERIFY_PARTITION_PATH.key -> "true") { queryWhenPathNotExist() } } test("Replace spark.sql.hive.verifyPartitionPath by spark.files.ignoreMissingFiles") { withSQLConf(SQLConf.HIVE_VERIFY_PARTITION_PATH.key -> "false") { sparkContext.conf.set(IGNORE_MISSING_FILES.key, "true") queryWhenPathNotExist() } } test("SPARK-21739: Cast expression should initialize timezoneId") { withTable("table_with_timestamp_partition") { sql("CREATE TABLE table_with_timestamp_partition(value int) PARTITIONED BY (ts TIMESTAMP)") sql("INSERT OVERWRITE TABLE table_with_timestamp_partition " + "PARTITION (ts = '2010-01-01 00:00:00.000') VALUES (1)") // test for Cast expression in TableReader checkAnswer(sql("SELECT * FROM table_with_timestamp_partition"), Seq(Row(1, Timestamp.valueOf("2010-01-01 00:00:00.000")))) // test for Cast expression in HiveTableScanExec checkAnswer(sql("SELECT value FROM table_with_timestamp_partition " + "WHERE ts = '2010-01-01 00:00:00.000'"), Row(1)) } } }
Example 98
Source File: HiveQueryFileTest.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import java.io.File import org.apache.spark.sql.catalyst.util._ def whiteList: Seq[String] = ".*" :: Nil def testCases: Seq[(String, File)] val runAll: Boolean = !(System.getProperty("spark.hive.alltests") == null) || runOnlyDirectories.nonEmpty || skipDirectories.nonEmpty val whiteListProperty: String = "spark.hive.whitelist" // Allow the whiteList to be overridden by a system property val realWhiteList: Seq[String] = Option(System.getProperty(whiteListProperty)).map(_.split(",").toSeq).getOrElse(whiteList) // Go through all the test cases and add them to scala test. testCases.sorted.foreach { case (testCaseName, testCaseFile) => if (blackList.map(_.r.pattern.matcher(testCaseName).matches()).reduceLeft(_||_)) { logDebug(s"Blacklisted test skipped $testCaseName") } else if (realWhiteList.map(_.r.pattern.matcher(testCaseName).matches()).reduceLeft(_||_) || runAll) { // Build a test case and submit it to scala test framework... val queriesString = fileToString(testCaseFile) createQueryTest(testCaseName, queriesString, reset = true, tryWithoutResettingFirst = true) } else { // Only output warnings for the built in whitelist as this clutters the output when the user // trying to execute a single test from the commandline. if (System.getProperty(whiteListProperty) == null && !runAll) { ignore(testCaseName) {} } } } }
Example 99
Source File: HiveClientBuilder.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.client import java.io.File import org.apache.hadoop.conf.Configuration import org.apache.hadoop.util.VersionInfo import org.apache.spark.SparkConf import org.apache.spark.util.Utils private[client] object HiveClientBuilder { // In order to speed up test execution during development or in Jenkins, you can specify the path // of an existing Ivy cache: private val ivyPath: Option[String] = { sys.env.get("SPARK_VERSIONS_SUITE_IVY_PATH").orElse( Some(new File(sys.props("java.io.tmpdir"), "hive-ivy-cache").getAbsolutePath)) } private def buildConf(extraConf: Map[String, String]) = { lazy val warehousePath = Utils.createTempDir() lazy val metastorePath = Utils.createTempDir() metastorePath.delete() extraConf ++ Map( "javax.jdo.option.ConnectionURL" -> s"jdbc:derby:;databaseName=$metastorePath;create=true", "hive.metastore.warehouse.dir" -> warehousePath.toString) } // for testing only def buildClient( version: String, hadoopConf: Configuration, extraConf: Map[String, String] = Map.empty, sharesHadoopClasses: Boolean = true): HiveClient = { IsolatedClientLoader.forVersion( hiveMetastoreVersion = version, hadoopVersion = VersionInfo.getVersion, sparkConf = new SparkConf(), hadoopConf = hadoopConf, config = buildConf(extraConf), ivyPath = ivyPath, sharesHadoopClasses = sharesHadoopClasses).createClient() } }
Example 100
Source File: EvalPythonExec.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.python import java.io.File import scala.collection.mutable.ArrayBuffer import org.apache.spark.{SparkEnv, TaskContext} import org.apache.spark.api.python.ChainedPythonFunctions import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.types.{DataType, StructField, StructType} import org.apache.spark.util.Utils abstract class EvalPythonExec(udfs: Seq[PythonUDF], output: Seq[Attribute], child: SparkPlan) extends SparkPlan { def children: Seq[SparkPlan] = child :: Nil override def producedAttributes: AttributeSet = AttributeSet(output.drop(child.output.length)) private def collectFunctions(udf: PythonUDF): (ChainedPythonFunctions, Seq[Expression]) = { udf.children match { case Seq(u: PythonUDF) => val (chained, children) = collectFunctions(u) (ChainedPythonFunctions(chained.funcs ++ Seq(udf.func)), children) case children => // There should not be any other UDFs, or the children can't be evaluated directly. assert(children.forall(_.find(_.isInstanceOf[PythonUDF]).isEmpty)) (ChainedPythonFunctions(Seq(udf.func)), udf.children) } } protected def evaluate( funcs: Seq[ChainedPythonFunctions], argOffsets: Array[Array[Int]], iter: Iterator[InternalRow], schema: StructType, context: TaskContext): Iterator[InternalRow] protected override def doExecute(): RDD[InternalRow] = { val inputRDD = child.execute().map(_.copy()) inputRDD.mapPartitions { iter => val context = TaskContext.get() // The queue used to buffer input rows so we can drain it to // combine input with output from Python. val queue = HybridRowQueue(context.taskMemoryManager(), new File(Utils.getLocalDir(SparkEnv.get.conf)), child.output.length) context.addTaskCompletionListener[Unit] { ctx => queue.close() } val (pyFuncs, inputs) = udfs.map(collectFunctions).unzip // flatten all the arguments val allInputs = new ArrayBuffer[Expression] val dataTypes = new ArrayBuffer[DataType] val argOffsets = inputs.map { input => input.map { e => if (allInputs.exists(_.semanticEquals(e))) { allInputs.indexWhere(_.semanticEquals(e)) } else { allInputs += e dataTypes += e.dataType allInputs.length - 1 } }.toArray }.toArray val projection = newMutableProjection(allInputs, child.output) val schema = StructType(dataTypes.zipWithIndex.map { case (dt, i) => StructField(s"_$i", dt) }) // Add rows to queue to join later with the result. val projectedRowIter = iter.map { inputRow => queue.add(inputRow.asInstanceOf[UnsafeRow]) projection(inputRow) } val outputRowIterator = evaluate( pyFuncs, argOffsets, projectedRowIter, schema, context) val joined = new JoinedRow val resultProj = UnsafeProjection.create(output, output) outputRowIterator.map { outputRow => resultProj(joined(queue.remove(), outputRow)) } } } }
Example 101
Source File: resources.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.command import java.io.File import java.net.URI import org.apache.hadoop.fs.Path import org.apache.spark.sql.{Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} case class ListJarsCommand(jars: Seq[String] = Seq.empty[String]) extends RunnableCommand { override val output: Seq[Attribute] = { AttributeReference("Results", StringType, nullable = false)() :: Nil } override def run(sparkSession: SparkSession): Seq[Row] = { val jarList = sparkSession.sparkContext.listJars() if (jars.nonEmpty) { for { jarName <- jars.map(f => new Path(f).getName) jarPath <- jarList if jarPath.contains(jarName) } yield Row(jarPath) } else { jarList.map(Row(_)) } } }
Example 102
Source File: OrcTest.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.orc import java.io.File import scala.reflect.ClassTag import scala.reflect.runtime.universe.TypeTag import org.scalatest.BeforeAndAfterAll import org.apache.spark.sql._ import org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION import org.apache.spark.sql.test.SQLTestUtils protected def withOrcTable[T <: Product: ClassTag: TypeTag] (data: Seq[T], tableName: String) (f: => Unit): Unit = { withOrcDataFrame(data) { df => df.createOrReplaceTempView(tableName) withTempView(tableName)(f) } } protected def makeOrcFile[T <: Product: ClassTag: TypeTag]( data: Seq[T], path: File): Unit = { data.toDF().write.mode(SaveMode.Overwrite).orc(path.getCanonicalPath) } protected def makeOrcFile[T <: Product: ClassTag: TypeTag]( df: DataFrame, path: File): Unit = { df.write.mode(SaveMode.Overwrite).orc(path.getCanonicalPath) } protected def checkPredicatePushDown(df: DataFrame, numRows: Int, predicate: String): Unit = { withTempPath { file => // It needs to repartition data so that we can have several ORC files // in order to skip stripes in ORC. df.repartition(numRows).write.orc(file.getCanonicalPath) val actual = stripSparkFilter(spark.read.orc(file.getCanonicalPath).where(predicate)).count() assert(actual < numRows) } } }
Example 103
Source File: HadoopFsRelationSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import java.io.{File, FilenameFilter} import org.apache.spark.sql.QueryTest import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec} import org.apache.spark.sql.test.SharedSQLContext class HadoopFsRelationSuite extends QueryTest with SharedSQLContext { test("sizeInBytes should be the total size of all files") { withTempDir{ dir => dir.delete() spark.range(1000).write.parquet(dir.toString) // ignore hidden files val allFiles = dir.listFiles(new FilenameFilter { override def accept(dir: File, name: String): Boolean = { !name.startsWith(".") && !name.startsWith("_") } }) val totalSize = allFiles.map(_.length()).sum val df = spark.read.parquet(dir.toString) assert(df.queryExecution.logical.stats.sizeInBytes === BigInt(totalSize)) } } test("SPARK-22790: spark.sql.sources.compressionFactor takes effect") { import testImplicits._ Seq(1.0, 0.5).foreach { compressionFactor => withSQLConf("spark.sql.sources.fileCompressionFactor" -> compressionFactor.toString, "spark.sql.autoBroadcastJoinThreshold" -> "400") { withTempPath { workDir => // the file size is 740 bytes val workDirPath = workDir.getAbsolutePath val data1 = Seq(100, 200, 300, 400).toDF("count") data1.write.parquet(workDirPath + "/data1") val df1FromFile = spark.read.parquet(workDirPath + "/data1") val data2 = Seq(100, 200, 300, 400).toDF("count") data2.write.parquet(workDirPath + "/data2") val df2FromFile = spark.read.parquet(workDirPath + "/data2") val joinedDF = df1FromFile.join(df2FromFile, Seq("count")) if (compressionFactor == 0.5) { val bJoinExec = joinedDF.queryExecution.executedPlan.collect { case bJoin: BroadcastHashJoinExec => bJoin } assert(bJoinExec.nonEmpty) val smJoinExec = joinedDF.queryExecution.executedPlan.collect { case smJoin: SortMergeJoinExec => smJoin } assert(smJoinExec.isEmpty) } else { // compressionFactor is 1.0 val bJoinExec = joinedDF.queryExecution.executedPlan.collect { case bJoin: BroadcastHashJoinExec => bJoin } assert(bJoinExec.isEmpty) val smJoinExec = joinedDF.queryExecution.executedPlan.collect { case smJoin: SortMergeJoinExec => smJoin } assert(smJoinExec.nonEmpty) } } } } } }
Example 104
Source File: StreamMetadataSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import java.io.File import java.util.UUID import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.spark.sql.streaming.StreamTest class StreamMetadataSuite extends StreamTest { test("writing and reading") { withTempDir { dir => val id = UUID.randomUUID.toString val metadata = StreamMetadata(id) val file = new Path(new File(dir, "test").toString) StreamMetadata.write(metadata, file, hadoopConf) val readMetadata = StreamMetadata.read(file, hadoopConf) assert(readMetadata.nonEmpty) assert(readMetadata.get.id === id) } } test("read Spark 2.1.0 format") { // query-metadata-logs-version-2.1.0.txt has the execution metadata generated by Spark 2.1.0 assert( readForResource("query-metadata-logs-version-2.1.0.txt") === StreamMetadata("d366a8bf-db79-42ca-b5a4-d9ca0a11d63e")) } private def readForResource(fileName: String): StreamMetadata = { val input = getClass.getResource(s"/structured-streaming/$fileName") StreamMetadata.read(new Path(input.toString), hadoopConf).get } private val hadoopConf = new Configuration() }
Example 105
Source File: BarChartPainter.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.painter import java.io.File import java.util.Scanner import org.jfree.chart.{ChartFactory, ChartUtils} import org.jfree.chart.plot.PlotOrientation import org.jfree.data.category.DefaultCategoryDataset import org.apache.spark.util.Utils class BarChartPainter(dataPath: String, picturePath: String) extends Painter(dataPath, picturePath) { def createDataset(): DefaultCategoryDataset = { fw.flush() fw.close() val dataset = new DefaultCategoryDataset val scaner = new Scanner(new File(dataPath)) while (scaner.hasNext()) { val cols = scaner.next().split(",") dataset.addValue(Utils.byteStringAsMb(cols(1) + "b"), "peak", cols(0)) dataset.addValue(Utils.byteStringAsMb(cols(2) + "b"), "majority", cols(0)) } dataset } def paint( width: Int, height: Int, chartTitle: String, categoryAxisLabel: String, valueAxisLabel: String, yLB: Double, yUB: Double): Unit = { val barChart = ChartFactory.createBarChart( chartTitle, categoryAxisLabel, valueAxisLabel, createDataset, PlotOrientation.VERTICAL, true, false, false) barChart.getCategoryPlot.getRangeAxis.setRange(yLB, yUB) ChartUtils.saveChartAsJPEG(new File(picturePath), barChart, width, height) } override def paint( width: Int, height: Int, chartTitle: String, categoryAxisLabel: String, valueAxisLabel: String): Unit = {} }
Example 106
Source File: Painter.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.painter import java.awt.Font import java.io.{File, FileWriter} import org.jfree.chart.{ChartFactory, StandardChartTheme} import org.jfree.data.general.Dataset abstract class Painter(dataPath: String, picturePath: String) { initialize() var fw: FileWriter = _ def initialize(): Unit = { val dataFile = new File(dataPath) if (dataFile.exists()) { dataFile.delete() } fw = new FileWriter(dataPath, true) val standardChartTheme = new StandardChartTheme("CN") standardChartTheme.setExtraLargeFont(new Font("Monospaced", Font.BOLD, 20)) standardChartTheme.setRegularFont(new Font("Monospaced", Font.PLAIN, 15)) standardChartTheme.setLargeFont(new Font("Monospaced", Font.PLAIN, 15)) ChartFactory.setChartTheme(standardChartTheme) } def addPoint(xAxis: Any, yAxis: Any): Unit = { fw.write(s"${xAxis},${yAxis}\n") } def addPoint(xAxis: Any, yAxis: Any, zAxis: Any): Unit = { fw.write(s"${xAxis},${yAxis},${zAxis}\n") } def createDataset(): Dataset def paint( width: Int, height: Int, chartTitle: String, categoryAxisLabel: String, valueAxisLabel: String): Unit }
Example 107
Source File: TimeSeriesChartPainter.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.painter import java.io.File import java.util.Scanner import org.jfree.chart.{ChartFactory, ChartUtils} import org.jfree.data.time.{FixedMillisecond, TimeSeries, TimeSeriesCollection} import org.jfree.data.xy.XYDataset class TimeSeriesChartPainter(dataPath: String, picturePath: String) extends Painter(dataPath, picturePath) { def createDataset(): XYDataset = { fw.flush() fw.close() val dataset = new TimeSeriesCollection val timeSeries = new TimeSeries("default") val scaner = new Scanner(new File(dataPath)) while (scaner.hasNext()) { val cols = scaner.next().split(",") timeSeries.addOrUpdate(new FixedMillisecond(cols(1).toLong), cols(0).toLong) } dataset.addSeries(timeSeries) dataset } def paint( width: Int, height: Int, chartTitle: String, categoryAxisLabel: String, valueAxisLabel: String): Unit = { val lineChart = ChartFactory.createTimeSeriesChart( chartTitle, categoryAxisLabel, valueAxisLabel, createDataset, false, false, false) ChartUtils.saveChartAsJPEG(new File(picturePath), lineChart, width, height) } }
Example 108
Source File: LineChartPainter.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.painter import java.io.File import java.util.Scanner import org.jfree.chart.{ChartFactory, ChartUtils} import org.jfree.chart.plot.PlotOrientation import org.jfree.data.category.DefaultCategoryDataset class LineChartPainter(dataPath: String, picturePath: String) extends Painter(dataPath, picturePath) { def createDataset(): DefaultCategoryDataset = { fw.flush() fw.close() val dataset = new DefaultCategoryDataset val scaner = new Scanner(new File(dataPath)) while (scaner.hasNext()) { val cols = scaner.next().split(",") dataset.addValue(cols(0).toLong, "default", cols(1)) } dataset } def paint( width: Int, height: Int, chartTitle: String, categoryAxisLabel: String, valueAxisLabel: String): Unit = { val lineChart = ChartFactory.createLineChart( chartTitle, categoryAxisLabel, valueAxisLabel, createDataset, PlotOrientation.VERTICAL, false, false, false) ChartUtils.saveChartAsJPEG(new File(picturePath), lineChart, width, height) } }
Example 109
Source File: ExecutorNumMonitor.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.monitor.executor import java.io.File import java.util.Date import scala.xml._ import org.apache.spark.alarm.{AlertMessage, EmailAlarm, HtmlMessage} import org.apache.spark.monitor.{Monitor, MonitorItem} import org.apache.spark.monitor.MonitorItem.MonitorItem import org.apache.spark.painter.TimeSeriesChartPainter import org.apache.spark.scheduler._ import org.apache.spark.status.ExecutorSummaryWrapper class ExecutorNumMonitor extends ExecutorMonitor { override val item: MonitorItem = MonitorItem.EXECUTOR_NUM_NOTIFIER lazy val dataPath = s"/tmp/${item}-${conf.get("spark.app.id")}.csv" lazy val picturePath = s"/tmp/${item}-${conf.get("spark.app.id")}.jpg" lazy val eventMinInterval = conf.getTimeAsMs(s"${Monitor.PREFIX}.${item.toString.toLowerCase}.granularity", "60s") var lastPointTime: Long = new Date().getTime var recentEventTime: Long = new Date().getTime lazy private val painter = new TimeSeriesChartPainter(dataPath, picturePath) def executorNum(): Long = { kvStore.count(classOf[ExecutorSummaryWrapper], "active", true) } def addPoint(executorNum: Long, time: Long): Unit = { painter.addPoint(executorNum, recentEventTime) } // scalastyle:off override def watchOut(event: SparkListenerEvent): Option[AlertMessage] = { event match { case env: SparkListenerExecutorAdded => // try to coarse num change in 60s into one point, so that we can keep graph clean and readable if (env.time - lastPointTime > eventMinInterval) { addPoint(executorNum, recentEventTime) addPoint(executorNum, env.time) lastPointTime = env.time } recentEventTime = env.time Option.empty case env: SparkListenerExecutorRemoved => if (env.time - lastPointTime > eventMinInterval) { addPoint(executorNum, recentEventTime) addPoint(executorNum, env.time) lastPointTime = env.time } recentEventTime = env.time Option.empty case e: SparkListenerApplicationEnd => addPoint(executorNum, recentEventTime) addPoint(executorNum, new Date().getTime) painter.paint(600, 400, "executor num curve", "datetime", "executor num") if (EmailAlarm.get().isDefined) { val pic = EmailAlarm.get().get.embed(new File(picturePath)) val a = <h2>动态调度情况:</h2> <img src={"cid:"+pic}></img> <br/> Option(new HtmlMessage(title = item, content = a.mkString)) } else { Option.empty } } } // scalastyle:on }
Example 110
Source File: GlobalSapSQLContext.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import java.io.File import com.sap.spark.util.TestUtils import com.sap.spark.{GlobalSparkContext, WithSQLContext} import org.apache.spark.SparkContext import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{BoundReference, Cast} import org.apache.spark.unsafe.types._ import org.apache.spark.sql.types._ import org.scalatest.Suite import scala.io.Source trait GlobalSapSQLContext extends GlobalSparkContext with WithSQLContext { self: Suite => override implicit def sqlContext: SQLContext = GlobalSapSQLContext._sqlc override protected def setUpSQLContext(): Unit = GlobalSapSQLContext.init(sc) override protected def tearDownSQLContext(): Unit = GlobalSapSQLContext.reset() def getDataFrameFromSourceFile(sparkSchema: StructType, path: File): DataFrame = { val conversions = sparkSchema.toSeq.zipWithIndex.map({ case (field, index) => Cast(BoundReference(index, StringType, nullable = true), field.dataType) }) val data = Source.fromFile(path) .getLines() .map({ line => val stringRow = InternalRow.fromSeq(line.split(",", -1).map(UTF8String.fromString)) Row.fromSeq(conversions.map({ c => c.eval(stringRow) })) }) val rdd = sc.parallelize(data.toSeq, numberOfSparkWorkers) sqlContext.createDataFrame(rdd, sparkSchema) } } object GlobalSapSQLContext { private var _sqlc: SQLContext = _ private def init(sc: SparkContext): Unit = if (_sqlc == null) { _sqlc = TestUtils.newSQLContext(sc) } private def reset(): Unit = { if (_sqlc != null) { _sqlc.catalog.unregisterAllTables() } } }
Example 111
Source File: DefaultFileWatchService.scala From play-file-watch with Apache License 2.0 | 5 votes |
package play.dev.filewatch import java.io.File import java.nio.file.FileSystems import io.methvin.watcher.DirectoryChangeEvent import io.methvin.watcher.DirectoryChangeListener import io.methvin.watcher.DirectoryWatcher import io.methvin.watchservice.MacOSXListeningWatchService import scala.collection.JavaConverters._ import scala.util.control.NonFatal class DefaultFileWatchService(logger: LoggerProxy, isMac: Boolean) extends FileWatchService { def this(logger: LoggerProxy) = this(logger, false) def watch(filesToWatch: Seq[File], onChange: () => Unit) = { val dirsToWatch = filesToWatch.filter { file => if (file.isDirectory) { true } else if (file.isFile) { logger.warn("An attempt has been made to watch the file: " + file.getCanonicalPath) logger.warn("DefaultFileWatchService only supports watching directories. The file will not be watched.") false } else false } val watchService = if (isMac) new MacOSXListeningWatchService() else FileSystems.getDefault.newWatchService() val directoryWatcher = DirectoryWatcher .builder() .paths(dirsToWatch.map(_.toPath).asJava) .listener(new DirectoryChangeListener { override def onEvent(event: DirectoryChangeEvent): Unit = onChange() }) .watchService(watchService) .build() val thread = new Thread( new Runnable { override def run(): Unit = { try { directoryWatcher.watch() } catch { case NonFatal(_) => // Do nothing, this means the watch service has been closed, or we've been interrupted. } } }, "play-watch-service" ) thread.setDaemon(true) thread.start() new FileWatcher { override def stop(): Unit = directoryWatcher.close() } } }
Example 112
Source File: PollingFileWatchService.scala From play-file-watch with Apache License 2.0 | 5 votes |
package play.dev.filewatch import java.io.File import better.files.{ File => ScalaFile, _ } import annotation.tailrec object SourceModificationWatch { type PathFinder = () => Iterator[ScalaFile] private def listFiles(sourcesFinder: PathFinder): Set[ScalaFile] = sourcesFinder().toSet private def findLastModifiedTime(files: Set[ScalaFile]): Long = { if (files.nonEmpty) files.maxBy(_.lastModifiedTime).lastModifiedTime.toEpochMilli else 0L } @tailrec def watch(sourcesFinder: PathFinder, pollDelayMillis: Int, state: WatchState)( terminationCondition: => Boolean ): (Boolean, WatchState) = { import state._ val filesToWatch = listFiles(sourcesFinder) val sourceFilesPath: Set[String] = filesToWatch.map(_.toJava.getCanonicalPath) val lastModifiedTime = findLastModifiedTime(filesToWatch) val sourcesModified = lastModifiedTime > lastCallbackCallTime || previousFiles != sourceFilesPath val (triggered, newCallbackCallTime) = if (sourcesModified) (false, System.currentTimeMillis) else (awaitingQuietPeriod, lastCallbackCallTime) val newState = new WatchState(newCallbackCallTime, sourceFilesPath, sourcesModified, if (triggered) count + 1 else count) if (triggered) (true, newState) else { Thread.sleep(pollDelayMillis) if (terminationCondition) (false, newState) else watch(sourcesFinder, pollDelayMillis, newState)(terminationCondition) } } } final class WatchState( val lastCallbackCallTime: Long, val previousFiles: Set[String], val awaitingQuietPeriod: Boolean, val count: Int ) { def previousFileCount: Int = previousFiles.size } object WatchState { def empty = new WatchState(0L, Set.empty[String], false, 0) }
Example 113
Source File: ExampleMahaService.scala From maha with Apache License 2.0 | 5 votes |
// Copyright 2017, Yahoo Holdings Inc. // Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms. package com.yahoo.maha.api.jersey.example import java.io.File import java.util.UUID import com.yahoo.maha.core.ddl.OracleDDLGenerator import com.yahoo.maha.jdbc.{JdbcConnection, List, Seq} import com.yahoo.maha.service.{DefaultMahaService, MahaService, MahaServiceConfig} import com.zaxxer.hikari.{HikariConfig, HikariDataSource} import grizzled.slf4j.Logging import org.apache.commons.io.FileUtils import org.joda.time.DateTime import org.joda.time.format.DateTimeFormat object ExampleMahaService extends Logging { val REGISTRY_NAME = "academic"; private var dataSource: Option[HikariDataSource] = None private var jdbcConnection: Option[JdbcConnection] = None val h2dbId = UUID.randomUUID().toString.replace("-","") val today: String = DateTimeFormat.forPattern("yyyy-MM-dd").print(DateTime.now()) val yesterday: String = DateTimeFormat.forPattern("yyyy-MM-dd").print(DateTime.now().minusDays(1)) def initJdbcToH2(): Unit = { val config = new HikariConfig() config.setJdbcUrl(s"jdbc:h2:mem:$h2dbId;MODE=Oracle;DB_CLOSE_DELAY=-1") config.setUsername("sa") config.setPassword("h2.test.database.password") config.setMaximumPoolSize(2) dataSource = Option(new HikariDataSource(config)) jdbcConnection = dataSource.map(new JdbcConnection(_)) assert(jdbcConnection.isDefined, "Failed to connect to h2 local server") } def getMahaService(scope: String = "main"): MahaService = { val jsonString = FileUtils.readFileToString(new File(s"src/$scope/resources/maha-service-config.json")) .replaceAll("h2dbId", s"$h2dbId") initJdbcToH2() val mahaServiceResult = MahaServiceConfig.fromJson(jsonString.getBytes("utf-8")) if (mahaServiceResult.isFailure) { mahaServiceResult.leftMap { res=> error(s"Failed to launch Example MahaService, MahaService Error list is: ${res.list.toList}") } } val mahaServiceConfig = mahaServiceResult.toOption.get val mahaService: MahaService = new DefaultMahaService(mahaServiceConfig) stageStudentData(mahaServiceConfig) mahaService } def stageStudentData(mahaServiceConfig: MahaServiceConfig) : Unit = { val ddlGenerator = new OracleDDLGenerator val erRegistryConfig = mahaServiceConfig.registry.get(ExampleMahaService.REGISTRY_NAME).get val erRegistry= erRegistryConfig.registry erRegistry.factMap.values.foreach { publicFact => publicFact.factList.foreach { fact=> val ddl = ddlGenerator.toDDL(fact) assert(jdbcConnection.get.executeUpdate(ddl).isSuccess) } } val insertSql = """ INSERT INTO student_grade_sheet (year, section_id, student_id, class_id, total_marks, date, comment) VALUES (?, ?, ?, ?, ?, ?, ?) """ val rows: List[Seq[Any]] = List( Seq(1, 100, 213, 200, 125, ExampleMahaService.today, "some comment") ) rows.foreach { row => val result = jdbcConnection.get.executeUpdate(insertSql, row) assert(result.isSuccess) } var count = 0 jdbcConnection.get.queryForObject("select * from student_grade_sheet") { rs => while (rs.next()) { count += 1 } } assert(rows.size == count) } }
Example 114
Source File: WorkerStateReporter.scala From maha with Apache License 2.0 | 5 votes |
// Copyright 2018, Yahoo Inc. // Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms. package com.yahoo.maha.worker.state import java.io.File import akka.actor.{Actor, ActorPath, ActorSystem, Props} import com.typesafe.config.{Config, ConfigFactory} import com.yahoo.maha.core.Engine import com.yahoo.maha.worker.state.actor._ import grizzled.slf4j.Logging object WorkerStateReporter extends Logging { // Use a bounded mailbox to prevent memory leaks in the rare case when jobs get piled up to be processed by the actor val defaultConfig: Config = ConfigFactory.parseString( """ |akka.actor.nonblocking_bounded_mailbox { | mailbox-type = akka.dispatch.NonBlockingBoundedMailbox | mailbox-capacity = 10000 |} |akka { | loggers = ["akka.event.slf4j.Slf4jLogger"] | loglevel = "INFO" |} |""".stripMargin) } case class WorkerStateReporter(akkaConf: String) extends Logging { val config: Config = { val file = new File(akkaConf) if(file.exists() && file.canRead) { info(s"Using akka conf file : ${file.getAbsolutePath}") ConfigFactory.parseFile(file) } else { info("Using default akka config") WorkerStateReporter.defaultConfig } } val system = ActorSystem("maha-workers", config) lazy val workerStateActorPath: ActorPath = { val actorConfig = WorkerStateActorConfig() val props: Props = Props(classOf[WorkerStateActor], actorConfig).withMailbox("akka.actor.nonblocking_bounded_mailbox") val path = system.actorOf(props, actorConfig.name).path info(s"Created WorkerStateActor: $path") path } def jobStarted(executionType: ExecutionType, jobId: Long, engine: Engine, cost: Long, estimatedRows: Long, userId: String): Unit = { sendMessage(JobStarted(executionType, jobId, engine, cost, estimatedRows, userId)) } def jobEnded(executionType: ExecutionType, jobId: Long, engine: Engine, cost: Long, estimatedRows: Long, userId: String): Unit = { sendMessage(JobEnded(executionType, jobId, engine, cost, estimatedRows, userId)) } def sendMessage(actorMessage:WorkerStateActorMessage) = { try { system.actorSelection(workerStateActorPath).tell(actorMessage, Actor.noSender) } catch { case t: Throwable => warn(s"Failed to send $actorMessage message to WorkerStateActor", t) } } }
Example 115
Source File: RocksDBStorage.scala From JustinDB with Apache License 2.0 | 5 votes |
package justin.db.storage import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File} import java.util.UUID import com.esotericsoftware.kryo.io.{Input, Output} import com.esotericsoftware.kryo.{Kryo, Serializer} import justin.db.storage.PluggableStorageProtocol.{Ack, StorageGetData} import org.rocksdb.{FlushOptions, Options, RocksDB} import scala.concurrent.Future // TODO: // Current version store every single data under one file (totally doesn't care about data originality). // Data should be eventually splitted by ring partitionId. // This might be an issue during possible data movements between nodes. final class RocksDBStorage(dir: File) extends PluggableStorageProtocol { import RocksDBStorage._ { RocksDB.loadLibrary() } private[this] val kryo = new Kryo() private[this] val db: RocksDB = { val options: Options = new Options().setCreateIfMissing(true) RocksDB.open(options, dir.getPath) } override def get(id: UUID)(resolveOriginality: (UUID) => PluggableStorageProtocol.DataOriginality): Future[PluggableStorageProtocol.StorageGetData] = { val key: Array[Byte] = uuid2bytes(kryo, id) val dataBytes: Array[Byte] = db.get(key) val justinDataOpt = Option(dataBytes).map { dataBytes => val input = new Input(new ByteArrayInputStream(dataBytes)) JustinDataSerializer.read(kryo, input, classOf[JustinData]) } Future.successful(justinDataOpt.map(StorageGetData.Single).getOrElse(StorageGetData.None)) } override def put(data: JustinData)(resolveOriginality: (UUID) => PluggableStorageProtocol.DataOriginality): Future[PluggableStorageProtocol.Ack] = { val key: Array[Byte] = uuid2bytes(kryo, data.id) val dataBytes: Array[Byte] = { val output = new Output(new ByteArrayOutputStream()) JustinDataSerializer.write(kryo, output, data) output.getBuffer } db.put(key, dataBytes) db.flush(new FlushOptions().setWaitForFlush(true)) Ack.future } } object RocksDBStorage { def uuid2bytes(kryo: Kryo, id: UUID): Array[Byte] = { val output = new Output(new ByteArrayOutputStream(), 16) UUIDSerializer.write(kryo, output, id) output.getBuffer } object UUIDSerializer extends Serializer[UUID] { override def read(kryo: Kryo, input: Input, `type`: Class[UUID]): UUID = { new UUID(input.readLong, input.readLong) } override def write(kryo: Kryo, output: Output, uuid: UUID): Unit = { output.writeLong(uuid.getMostSignificantBits) output.writeLong(uuid.getLeastSignificantBits) } } object JustinDataSerializer extends Serializer[JustinData] { override def read(kryo: Kryo, input: Input, `type`: Class[JustinData]): JustinData = { JustinData( id = UUIDSerializer.read(kryo, input, classOf[UUID]), value = input.readString(), vclock = input.readString(), timestamp = input.readLong() ) } override def write(kryo: Kryo, output: Output, data: JustinData): Unit = { UUIDSerializer.write(kryo, output, data.id) output.writeString(data.value) output.writeString(data.vclock) output.writeLong(data.timestamp) } } }
Example 116
Source File: VirtualScreeningTest.scala From MaRe with Apache License 2.0 | 5 votes |
package se.uu.it.mare import java.io.File import java.util.UUID import scala.io.Source import scala.util.Properties import org.apache.spark.SharedSparkContext import org.junit.runner.RunWith import org.scalatest.FunSuite import org.scalatest.junit.JUnitRunner private object SDFUtils { def parseIDsAndScores(sdf: String): Array[(String, String)] = { sdf.split("\\n\\$\\$\\$\\$\\n").map { mol => val lines = mol.split("\\n") (lines(0), lines.last) } } } @RunWith(classOf[JUnitRunner]) class VirtualScreeningTest extends FunSuite with SharedSparkContext { private val tmpDir = new File(Properties.envOrElse("TMPDIR", "/tmp")) test("Virtual Screening") { sc.hadoopConfiguration.set("textinputformat.record.delimiter", "\n$$$$\n") val mols = sc.textFile(getClass.getResource("sdf/molecules.sdf").getPath) // Parallel execution with MaRe val hitsParallel = new MaRe(mols) .map( inputMountPoint = TextFile("/input.sdf", "\n$$$$\n"), outputMountPoint = TextFile("/output.sdf", "\n$$$$\n"), imageName = "mcapuccini/oe:latest", command = "fred -receptor /var/openeye/hiv1_protease.oeb " + "-hitlist_size 0 " + "-conftest none " + "-dock_resolution Low " + "-dbase /input.sdf " + "-docked_molecule_file /output.sdf") .reduce( inputMountPoint = TextFile("/input.sdf", "\n$$$$\n"), outputMountPoint = TextFile("/output.sdf", "\n$$$$\n"), imageName = "mcapuccini/sdsorter:latest", command = "sdsorter -reversesort='FRED Chemgauss4 score' " + "-keep-tag='FRED Chemgauss4 score' " + "-nbest=30 " + "/input.sdf " + "/output.sdf") .rdd.collect.mkString("\n$$$$\n") // Serial execution val inputFile = new File(getClass.getResource("sdf/molecules.sdf").getPath) val dockedFile = new File(tmpDir, "mare_test_" + UUID.randomUUID.toString) dockedFile.createNewFile dockedFile.deleteOnExit val outputFile = new File(tmpDir, "mare_test_" + UUID.randomUUID.toString) outputFile.createNewFile outputFile.deleteOnExit DockerHelper.run( imageName = "mcapuccini/oe:latest", command = "fred -receptor /var/openeye/hiv1_protease.oeb " + "-hitlist_size 0 " + "-conftest none " + "-dock_resolution Low " + "-dbase /input.sdf " + "-docked_molecule_file /docked.sdf", bindFiles = Seq(inputFile, dockedFile), volumeFiles = Seq(new File("/input.sdf"), new File("/docked.sdf")), forcePull = false) DockerHelper.run( imageName = "mcapuccini/sdsorter:latest", command = "sdsorter -reversesort='FRED Chemgauss4 score' " + "-keep-tag='FRED Chemgauss4 score' " + "-nbest=30 " + "/docked.sdf " + "/output.sdf", bindFiles = Seq(dockedFile, outputFile), volumeFiles = Seq(new File("/docked.sdf"), new File("/output.sdf")), forcePull = false) val hitsSerial = Source.fromFile(outputFile).mkString // Test val parallel = SDFUtils.parseIDsAndScores(hitsParallel) val serial = SDFUtils.parseIDsAndScores(hitsSerial) assert(parallel.deep == serial.deep) } }
Example 117
Source File: TestSuiteTests.scala From circe-json-schema with Apache License 2.0 | 5 votes |
package io.circe.schema import cats.data.Validated import io.circe.{ Decoder, Json } import java.io.File import org.scalatest.flatspec.AnyFlatSpec case class SchemaTestCase(description: String, data: Json, valid: Boolean) case class SchemaTest(description: String, schema: Json, tests: List[SchemaTestCase]) object SchemaTestCase { implicit val decodeSchemaTestCase: Decoder[SchemaTestCase] = io.circe.generic.semiauto.deriveDecoder } object SchemaTest { implicit val decodeSchemaTest: Decoder[SchemaTest] = io.circe.generic.semiauto.deriveDecoder } class TestSuiteTests(path: String) extends AnyFlatSpec { val tests: List[SchemaTest] = io.circe.jawn .decodeFile[List[SchemaTest]](new File(path)) .getOrElse( throw new Exception(s"Unable to load test file: $path") ) tests.foreach { case SchemaTest(description, schema, tests) => tests.foreach { case SchemaTestCase(caseDescription, data, valid) => val expected = if (valid) "validate successfully" else "fail to validate" s"$description: $caseDescription" should expected in { val errors = Schema.load(schema).validate(data) if (valid) { assert(errors == Validated.valid(())) } else { assert(errors.isInvalid) } } it should s"$expected when schema is loaded from a string" in { val errors = Schema.loadFromString(schema.noSpaces).get.validate(data) if (valid) { assert(errors == Validated.valid(())) } else { assert(errors.isInvalid) } } } } } class AdditionalItemsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/additionalItems.json") class AdditionalPropertiesTestSuiteTests extends TestSuiteTests("tests/tests/draft7/additionalProperties.json") class AllOfTestSuiteTests extends TestSuiteTests("tests/tests/draft7/allOf.json") class AnyOfTestSuiteTests extends TestSuiteTests("tests/tests/draft7/anyOf.json") class BooleanSchemaTestSuiteTests extends TestSuiteTests("tests/tests/draft7/boolean_schema.json") class ConstTestSuiteTests extends TestSuiteTests("tests/tests/draft7/const.json") class ContainsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/contains.json") class DefaultTestSuiteTests extends TestSuiteTests("tests/tests/draft7/default.json") //class DefinitionsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/definitions.json") class EnumTestSuiteTests extends TestSuiteTests("tests/tests/draft7/enum.json") class ExclusiveMaximumTestSuiteTests extends TestSuiteTests("tests/tests/draft7/exclusiveMaximum.json") class ExclusiveMinimumTestSuiteTests extends TestSuiteTests("tests/tests/draft7/exclusiveMinimum.json") class FormatTestSuiteTests extends TestSuiteTests("tests/tests/draft7/format.json") class IfThenElseTestSuiteTests extends TestSuiteTests("tests/tests/draft7/if-then-else.json") class ItemsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/items.json") class MaximumTestSuiteTests extends TestSuiteTests("tests/tests/draft7/maximum.json") class MaxItemsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/maxItems.json") class MaxLengthTestSuiteTests extends TestSuiteTests("tests/tests/draft7/maxLength.json") class MaxPropertiesTestSuiteTests extends TestSuiteTests("tests/tests/draft7/maxProperties.json") class MinimumTestSuiteTests extends TestSuiteTests("tests/tests/draft7/minimum.json") class MinItemsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/minItems.json") class MinLengthTestSuiteTests extends TestSuiteTests("tests/tests/draft7/minLength.json") class MinPropertiesTestSuiteTests extends TestSuiteTests("tests/tests/draft7/minProperties.json") class MultipleOfTestSuiteTests extends TestSuiteTests("tests/tests/draft7/multipleOf.json") class NotTestSuiteTests extends TestSuiteTests("tests/tests/draft7/not.json") class OneOfTestSuiteTests extends TestSuiteTests("tests/tests/draft7/oneOf.json") class PatternTestSuiteTests extends TestSuiteTests("tests/tests/draft7/pattern.json") class PatternPropertiesTestSuiteTests extends TestSuiteTests("tests/tests/draft7/patternProperties.json") class PropertyNamesTestSuiteTests extends TestSuiteTests("tests/tests/draft7/propertyNames.json") // Not currently running remote tests. //class RefTestSuiteTests extends TestSuiteTests("tests/tests/draft7/ref.json") //class RefRemoteTestSuiteTests extends TestSuiteTests("tests/tests/draft7/refRemote.json") class RequiredTestSuiteTests extends TestSuiteTests("tests/tests/draft7/required.json") class TypeTestSuiteTests extends TestSuiteTests("tests/tests/draft7/type.json") class UniqueItemsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/uniqueItems.json")
Example 118
Source File: KMeans.scala From spark-tda with Apache License 2.0 | 5 votes |
import java.io.{File, PrintWriter} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.clustering.KMeans import org.apache.spark.sql.functions._ def computeKMeans( pathToTextFile: String, quantity: Int, iteration: Int) { case class Point(x: Double, y: Double) def save(f: File)(func: PrintWriter => Unit) { val p = new PrintWriter(f) try { func(p) } finally { p.close() } } val filename = pathToTextFile.split("\\.")(0) val outputFilename = s"$filename-KMEANS-k${quantity}-i${iteration}.tsv" val points = sc .textFile(pathToTextFile) .map { line => line.trim.split("\\s+") } .map { row => Point(row(0).toDouble, row(1).toDouble) } val features = points .map { p => Vectors.dense(p.x, p.y) } features.cache() val kmeans = KMeans.train(features, quantity, iteration) val predictions = features .map { f => (f(0), f(1), model.predict(f) + 1) } .collect save(new File(outputFilename)) { println(s"OUTPUT TO: ${outputFilename}") f => predictions.foreach{ case (x, y, ccid) => f.println(s"${x}\t${y}\t${ccid}") } } }
Example 119
Source File: ReebDiagram.scala From spark-tda with Apache License 2.0 | 5 votes |
import java.io.{File, PrintWriter} import org.apache.spark.ml.linalg.Vectors import org.apache.spark.ml.feature.{ReebDiagram, VectorAssembler} import org.apache.spark.sql.functions._ def computeReebDiagram( pathToTextFile: String, quantity: Int, linkThresholdRatio: Double, coreThresholdRatio: Double, topTreeRatio: Double) { def save(f: File)(func: PrintWriter => Unit) { val p = new PrintWriter(f) try { func(p) } finally { p.close() } } val filename = pathToTextFile.split("\\.")(0) val outputFilename = s"$filename-REEB-k${quantity}-l${linkThresholdRatio}-c${coreThresholdRatio}-i${topTreeRatio}.tsv" val points = sc.textFile(pathToTextFile) .map { line => line.trim.split("\\s+") } .zipWithIndex .map { case (row, i) => (i, row(0).toDouble, row(1).toDouble, 0) } .toDF("id", "x", "y", "cover_id") val cardinality = points.count val assembler = new VectorAssembler() .setInputCols(Array("x", "y")) .setOutputCol("feature") val features = assembler .transform(points) val reeb = new ReebDiagram() .setK(quantity) .setLinkThresholdRatio(linkThresholdRatio) .setCoreThresholdRatio(coreThresholdRatio) .setTopTreeSize((topTreeRatio * cardinality).toInt) .setTopTreeLeafSize(quantity) .setIdCol("id") .setCoverCol("cover_id") .setFeaturesCol("feature") .setOutputCol("cluster_id") val transformed = reeb .fit(features) .transform(features) val clusters = Map( transformed .select("cluster_id") .rdd .map(row => row.getLong(0)) .distinct .zipWithIndex .collect(): _*) val result = transformed .select("x", "y", "cluster_id") .rdd .map(row => (row.getDouble(0), row.getDouble(1), row.getLong(2))) .map { case (x, y, clusterId) => (x, y, clusters(clusterId) + 1)} .collect() save(new File(outputFilename)) { println(s"OUTPUT TO: ${outputFilename}") f => result.foreach{ case (x, y, ccid) => f.println(s"${x}\t${y}\t${ccid}") } } }
Example 120
Source File: TempDirectory.scala From spark-tda with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.feature import java.io.File import org.scalatest.{BeforeAndAfterAll, Suite} import com.holdenkarau.spark.testing.Utils protected def tempDir: File = _tempDir override def beforeAll(): Unit = { super.beforeAll() _tempDir = Utils.createTempDir() } override def afterAll(): Unit = { try { Utils.deleteRecursively(_tempDir) } finally { super.afterAll() } } }
Example 121
Source File: MarkdownPagesEndpoint.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.web.guide.markdown import java.io.{BufferedReader, File, FileReader} import java.time.Instant import java.util.concurrent.ConcurrentHashMap import com.avsystem.commons._ import com.vladsch.flexmark.ext.toc.TocExtension import com.vladsch.flexmark.html.HtmlRenderer import com.vladsch.flexmark.parser.Parser import scala.concurrent.{ExecutionContext, Future} final class MarkdownPagesEndpoint(guideResourceBase: String)(implicit ec: ExecutionContext) extends MarkdownPageRPC { private val tocExtension = TocExtension.create private val parser = Parser.builder.extensions(JList(tocExtension)).build private val renderer = HtmlRenderer.builder.extensions(JList(tocExtension)).build private val renderedPages = new ConcurrentHashMap[MarkdownPage, (Future[String], Instant)] private def render(file: File): Future[String] = Future { val reader = new BufferedReader(new FileReader(file)) val document = parser.parseReader(reader) renderer.render(document) } override def loadContent(page: MarkdownPage): Future[String] = { val (result, _) = renderedPages.compute(page, { (_, cached) => val pageFile = new File(guideResourceBase + page.file) cached.opt.filter { case (currentRender, renderedInstant) => currentRender.value.exists(_.isSuccess) && renderedInstant.toEpochMilli >= pageFile.lastModified() }.getOrElse((render(pageFile), Instant.ofEpochMilli(pageFile.lastModified()))) }) result } }
Example 122
Source File: FileDownloadServlet.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.rpc.utils import java.io.File import java.nio.file.Files import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse} protected def resolveFileMimeType(file: File): String = Option(getServletContext.getMimeType(file.getAbsolutePath)).getOrElse("application/octet-stream") override def doGet(request: HttpServletRequest, response: HttpServletResponse): Unit = { val file = resolveFile(request) if (!file.exists()) response.sendError(404, "File not found!") else { // MIME type response.setContentType(resolveFileMimeType(file)) // content length response.setContentLengthLong(file.length) // file name response.setHeader("Content-Disposition", s"""attachment; filename="${presentedFileName(file.getName)}"""") val outStream = response.getOutputStream Files.copy(file.toPath, outStream) outStream.close() } } }
Example 123
Source File: CssFileRenderer.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.css import java.io.{File, PrintWriter} import scalacss.internal.Renderer class CssFileRenderer(dirPath: String, styles: Seq[CssBase], createMain: Boolean) { def render()(implicit renderer: Renderer[String]): Unit = { val dir = new File(dirPath) dir.mkdirs() val mainFile: Option[File] = if (createMain) Some(new File(s"${dir.getAbsolutePath}/main.css")) else None mainFile.foreach(_.createNewFile()) val mainWriter = mainFile.map(new PrintWriter(_, "UTF-8")) styles.foreach { style => val name = style.getClass.getName val f = new File(s"${dir.getAbsolutePath}/$name.css") { createNewFile() } new PrintWriter(f, "UTF-8") { write(style.render) flush() close() } mainWriter.foreach(_.append(s"""@import "$name.css";\n""")) } mainWriter.foreach { w => w.flush() w.close() } } }
Example 124
Source File: ValueStoreSerializationExt.scala From random-projections-at-berlinbuzzwords with Apache License 2.0 | 5 votes |
package com.stefansavev.randomprojections.serialization import java.io.File import com.stefansavev.core.serialization.Utils import com.stefansavev.randomprojections.datarepr.dense.store.ValuesStore import com.stefansavev.randomprojections.serialization.DataFrameViewSerializers._ import com.typesafe.scalalogging.StrictLogging object ValueStoreSerializationExt { val ser = valuesStoreSerializer() implicit class ValueStoreSerializerExt(input: ValuesStore) { def toFile(file: File): Unit = { Utils.toFile(ser, file, input) } def toFile(fileName: String): Unit = { toFile(new File(fileName)) } def toBytes(): Array[Byte] = { Utils.toBytes(ser, input) } } implicit class ValueStoreDeserializerExt(t: ValuesStore.type) extends StrictLogging { def fromFile(file: File): ValuesStore = { if (!file.exists()) { throw new IllegalStateException("file does not exist: " + file.getAbsolutePath) } logger.info("Loading file: " + file.getAbsolutePath) val output = Utils.fromFile(ser, file) output } def fromFile(fileName: String): ValuesStore = { fromFile(new File(fileName)) } def fromBytes(input: Array[Byte]): ValuesStore = { Utils.fromBytes(ser, input) } } }
Example 125
Source File: DataFrameViewSerializationExt.scala From random-projections-at-berlinbuzzwords with Apache License 2.0 | 5 votes |
package com.stefansavev.randomprojections.serialization import java.io.File import com.stefansavev.core.serialization.Utils import com.stefansavev.randomprojections.datarepr.dense.DataFrameView import com.stefansavev.randomprojections.serialization.DataFrameViewSerializers._ object DataFrameViewSerializationExt { implicit class DataFrameSerializerExt(input: DataFrameView) { def toFile(file: File): Unit = { val ser = dataFrameSerializer() Utils.toFile(ser, file, input) } def toFile(fileName: String): Unit = { toFile(new File(fileName)) } } implicit class DataFrameDeserializerExt(t: DataFrameView.type) { def fromFile(file: File): DataFrameView = { if (!file.exists()) { throw new IllegalStateException("file does not exist") } val ser = dataFrameSerializer() val output = Utils.fromFile(ser, file) output } def fromFile(dir: String): DataFrameView = { fromFile(new File(dir)) } } }
Example 126
Source File: FileWriter.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package sbtavrohugger; import avrohugger.filesorter.{AvdlFileSorter, AvscFileSorter} import avrohugger.Generator import java.io.File import sbt.Keys._ import sbt.{Logger, globFilter, singleFileFinder} import sbt.Path._ object FileWriter { private[sbtavrohugger] def generateCaseClasses( generator: Generator, srcDirs: Seq[File], target: File, log: Logger): Set[java.io.File] = { log.info("Considering source directories %s".format(srcDirs.mkString(","))) def getSrcFiles(dirs: Seq[File], fileExtension: String) = for { srcDir <- dirs srcFile <- (srcDir ** s"*.$fileExtension").get } yield srcFile for (inFile <- AvscFileSorter.sortSchemaFiles(getSrcFiles(srcDirs, "avsc"))) { log.info("Compiling AVSC %s to %s".format(inFile, target.getPath)) generator.fileToFile(inFile, target.getPath) } for (idlFile <- AvdlFileSorter.sortSchemaFiles(getSrcFiles(srcDirs, "avdl"))) { log.info("Compiling Avro IDL %s".format(idlFile)) generator.fileToFile(idlFile, target.getPath) } for (inFile <- getSrcFiles(srcDirs, "avro")) { log.info("Compiling Avro datafile %s".format(inFile)) generator.fileToFile(inFile, target.getPath) } for (protocol <- getSrcFiles(srcDirs, "avpr")) { log.info("Compiling Avro protocol %s".format(protocol)) generator.fileToFile(protocol, target.getPath) } (target ** ("*.java"|"*.scala")).get.toSet } }
Example 127
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Example 128
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Example 129
Source File: StandardDefaultValuesSpec.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
import test._ import org.specs2.mutable.Specification import java.io.File import scala.collection.mutable.Buffer import scala.collection.JavaConverters._ import org.apache.avro.file._ import org.apache.avro.generic._ import org.apache.avro._ class StandardDefaultValuesSpec extends Specification { skipAll "A case class with default values" should { "deserialize correctly" in { val record = DefaultTest() val enumSchemaString = """{"type":"enum","name":"DefaultEnum","symbols":["SPADES","DIAMONDS","CLUBS","HEARTS"]}""" val enumSchema = new Schema.Parser().parse(enumSchemaString) val genericEnum = new GenericData.EnumSymbol(enumSchema, record.suit.toString) val embeddedSchemaString = """{"type":"record","name":"Embedded","fields":[{"name":"inner","type":"int"}]},"default":{"inner":1}}""" val embeddedSchema = new Schema.Parser().parse(embeddedSchemaString) val embeddedGenericRecord = new GenericData.Record(embeddedSchema) embeddedGenericRecord.put("inner", record.embedded.inner) val recordSchemaString = """{"type":"record","name":"DefaultTest","namespace":"test","fields":[{"name":"suit","type":{"type":"enum","name":"DefaultEnum","symbols":["SPADES","DIAMONDS","CLUBS","HEARTS"]},"default":"SPADES"},{"name":"number","type":"int","default":0},{"name":"str","type":"string","default":"str"},{"name":"optionString","type":["null","string"],"default":null},{"name":"optionStringValue","type":["string","null"],"default":"default"},{"name":"embedded","type":{"type":"record","name":"Embedded","fields":[{"name":"inner","type":"int"}]},"default":{"inner":1}},{"name":"defaultArray","type":{"type":"array","items":"int"},"default":[1,3,4,5]},{"name":"optionalEnum","type":["null","DefaultEnum"],"default":null},{"name":"defaultMap","type":{"type":"map","values":"string"},"default":{"Hello":"world","Merry":"Christmas"}},{"name":"byt","type":"bytes","default":"ÿ"}, {"name":"defaultEither","type": ["int", "string"],"default":2}, {"name":"defaultCoproduct","type": ["int", "string", "boolean"],"default":3}]}""" val recordSchema = new Schema.Parser().parse(recordSchemaString) val genericRecord = new GenericData.Record(recordSchema) genericRecord.put("suit", genericEnum) genericRecord.put("number", record.number) genericRecord.put("str", record.str) genericRecord.put("optionString", record.optionString.getOrElse(null)) genericRecord.put("optionStringValue", record.optionStringValue.getOrElse(null)) genericRecord.put("embedded", embeddedGenericRecord) genericRecord.put("defaultArray",record.defaultArray.asJava) genericRecord.put("optionalEnum", record.optionalEnum.getOrElse(null)) genericRecord.put("defaultMap", record.defaultMap.asJava) genericRecord.put("byt", java.nio.ByteBuffer.wrap(record.byt)) genericRecord.put("defaultEither", record.defaultEither.fold(identity, identity)) genericRecord.put("defaultCoproduct", record.defaultCoproduct.select[Int].getOrElse(0)) val records = List(genericRecord) val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() StandardTestUtil.write(file, records) var dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) } dataFileReader.close() sameRecord.get("suit").toString === DefaultEnum.SPADES.toString sameRecord.get("number") === 0 sameRecord.get("str").toString === "str" sameRecord.get("optionString") === null sameRecord.get("optionStringValue").toString === "default" sameRecord.get("embedded").asInstanceOf[GenericRecord].get("inner") === 1 sameRecord.get("defaultArray") === List(1,3,4,5).asJava sameRecord.get("optionalEnum") === null sameRecord.get("defaultMap").toString === "{Hello=world, Merry=Christmas}" sameRecord.get("byt") === java.nio.ByteBuffer.wrap("ÿ".getBytes) sameRecord.get("defaultEither") === 2 sameRecord.get("defaultCoproduct") === 3 } } }
Example 130
Source File: StandardTestUtil.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import java.io.File import org.apache.avro.file.{ DataFileReader, DataFileWriter } import org.apache.avro.generic.{ GenericRecord, GenericDatumReader, GenericDatumWriter } import org.specs2.mutable.Specification object StandardTestUtil extends Specification { def write(file: File, records: List[GenericRecord]) = { val userDatumWriter = new GenericDatumWriter[GenericRecord] val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter) dataFileWriter.create(records.head.getSchema, file); records.foreach(record => dataFileWriter.append(record)) dataFileWriter.close(); } def read(file: File, records: List[GenericRecord]) = { val dummyRecord = new GenericDatumReader[GenericRecord] val schema = new DataFileReader(file, dummyRecord).getSchema val userDatumReader = new GenericDatumReader[GenericRecord](schema) val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader) // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader. var record: GenericRecord = null.asInstanceOf[GenericRecord] var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord] val recordIter = records.iterator while (dataFileReader.hasNext) { sameRecord = dataFileReader.next(sameRecord) record = recordIter.next } dataFileReader.close() sameRecord must ===(record) } def verifyWriteAndRead(records: List[GenericRecord]) = { val fileName = s"${records.head.getClass.getName}" val fileEnding = "avro" val file = File.createTempFile(fileName, fileEnding) file.deleteOnExit() write(file, records) read(file, records) } }
Example 131
Source File: ExpectedResults.scala From api-first-hand with MIT License | 5 votes |
package de.zalando import java.io.{ File, FileOutputStream } import de.zalando.apifirst.util.ScalaPrinter import de.zalando.model._ import scala.io.Source trait ExpectedResults { val model = Seq[WithModel]( additional_properties_yaml, basic_polymorphism_yaml, nested_arrays_yaml, nested_options_yaml, basic_extension_yaml, expanded_polymorphism_yaml, nested_objects_yaml, options_yaml, wrong_field_name_yaml, all_of_imports_yaml, i038_invalid_enum_members_yaml ) val examples = Seq[WithModel]( basic_auth_api_yaml, cross_spec_references_yaml, echo_api_yaml, error_in_array_yaml, form_data_yaml, full_petstore_api_yaml, hackweek_yaml, heroku_petstore_api_yaml, instagram_api_yaml, minimal_api_yaml, nakadi_yaml, security_api_yaml, simple_petstore_api_yaml, split_petstore_api_yaml, string_formats_yaml, type_deduplication_yaml, uber_api_yaml, i041_no_json_deserialiser_yaml ) val validations = Seq[WithModel]( nested_arrays_validation_yaml, nested_objects_validation_yaml, nested_options_validation_yaml, numbers_validation_yaml, string_formats_validation_yaml ) val resourcesPath = "play-scala-generator/src/test/resources/" def expectationsFolder: String = "/expected_results/" def dump(result: String, name: String, suffix: String): Unit = { if (result.nonEmpty) { val newFile = target(name, suffix) newFile.getParentFile.mkdirs() newFile.delete() newFile.createNewFile() val out = new FileOutputStream(newFile) out.write(result.getBytes) out.close() } } def asInFile(name: String, suffix: String): String = { val expectedFile = target(name, suffix) if (expectedFile.canRead) { val src = Source.fromFile(expectedFile) val result = src.getLines().mkString("\n") src.close() result } else "" } def target(name: String, suffix: String): File = new File(resourcesPath + expectationsFolder + name + "." + suffix) def clean(str: String): String = str.split("\n").map(_.trim).filter(_.nonEmpty).mkString("\n") def nameFromModel(ast: WithModel): String = ScalaPrinter.nameFromModel(ast) }
Example 132
Source File: Display.scala From almond with BSD 3-Clause "New" or "Revised" License | 5 votes |
package almond.display import java.io.File import java.net.URL import java.nio.file.Path import java.util.{Map => JMap} import almond.interpreter.api.{DisplayData, OutputHandler} import jupyter.{Displayer, Displayers} import scala.collection.JavaConverters._ trait Display { def data(): Map[String, String] def metadata(): Map[String, String] = Map() def displayData(): DisplayData = DisplayData(data(), metadata = metadata()) def display()(implicit output: OutputHandler): Unit = output.display(displayData()) // registering things with jvm-repr just in case Display.registered } object Display { private lazy val registered: Unit = { Displayers.register( classOf[Display], new Displayer[Display] { def display(d: Display): JMap[String, String] = d.data().asJava } ) } def markdown(content: String)(implicit output: OutputHandler): Unit = Markdown(content).display() def html(content: String)(implicit output: OutputHandler): Unit = Html(content).display() def latex(content: String)(implicit output: OutputHandler): Unit = Latex(content).display() def text(content: String)(implicit output: OutputHandler): Unit = Text(content).display() def js(content: String)(implicit output: OutputHandler): Unit = Javascript(content).display() def svg(content: String)(implicit output: OutputHandler): Unit = Svg(content).display() trait Builder[C, T] { protected def build(contentOrUrl: Either[URL, C]): T def apply(content: C): T = build(Right(content)) def from(url: String): T = build(Left(new URL(url))) def from(url: URL): T = build(Left(url)) def fromFile(file: File): T = build(Left(file.toURI.toURL)) def fromFile(path: Path): T = build(Left(path.toUri.toURL)) def fromFile(path: String): T = build(Left(new File(path).toURI.toURL)) } }
Example 133
Source File: NotebookSparkSessionBuilder.scala From almond with BSD 3-Clause "New" or "Revised" License | 5 votes |
package org.apache.spark.sql.almondinternals import java.io.File import java.lang.{Boolean => JBoolean} import almond.interpreter.api.{CommHandler, OutputHandler} import almond.display.Display.html import ammonite.interp.api.InterpAPI import ammonite.repl.api.ReplAPI import org.apache.log4j.{Category, Logger, RollingFileAppender} import org.apache.spark.sql.SparkSession import org.apache.spark.sql.ammonitesparkinternals.AmmoniteSparkSessionBuilder import scala.collection.JavaConverters._ class NotebookSparkSessionBuilder (implicit interpApi: InterpAPI, replApi: ReplAPI, publish: OutputHandler, commHandler: CommHandler ) extends AmmoniteSparkSessionBuilder { private var progress0 = true private var keep0 = true private var logsInDeveloperConsoleOpt = Option.empty[Boolean] def progress(enable: Boolean = true, keep: Boolean = true): this.type = { progress0 = enable keep0 = keep this } def logsInDeveloperConsole(enable: JBoolean = null): this.type = { logsInDeveloperConsoleOpt = Option[JBoolean](enable).map[Boolean](x => x) this } override def getOrCreate(): SparkSession = { val logFileOpt = logsInDeveloperConsoleOpt match { case Some(false) => None case Some(true) => val fileOpt = NotebookSparkSessionBuilder.logFile(classOf[SparkSession]) if (fileOpt.isEmpty) Console.err.println("Warning: cannot determine log file, logs won't be sent to developer console.") fileOpt case None => NotebookSparkSessionBuilder.logFile(classOf[SparkSession]) } var sendLogOpt = Option.empty[SendLog] try { sendLogOpt = logFileOpt.map { f => println("See your browser developer console for detailed spark logs.") SendLog.start(f) } val session = super.getOrCreate() for (url <- session.sparkContext.uiWebUrl) html(s"""<a target="_blank" href="$url">Spark UI</a>""") session.sparkContext.addSparkListener( new ProgressSparkListener(session, keep0, progress0) ) session } finally { sendLogOpt.foreach(_.stop()) } } } object NotebookSparkSessionBuilder { private def logFile(clazz: Class[_]): Option[File] = { def appenders(log: Category): Stream[Any] = if (log == null) Stream() else log.getAllAppenders.asScala.toStream #::: appenders(log.getParent) appenders(Logger.getLogger(clazz)).collectFirst { case rfa: RollingFileAppender => new File(rfa.getFile) } } }
Example 134
Source File: BitMap.scala From Scurses with MIT License | 5 votes |
package net.team2xh.onions.components.widgets import java.awt.image.BufferedImage import java.io.File import javax.imageio.ImageIO import net.team2xh.onions.Symbols import net.team2xh.onions.Themes.ColorScheme import net.team2xh.onions.components.{FramePanel, Widget} import net.team2xh.scurses.{Colors, Scurses} object BitMap { def apply(parent: FramePanel, path: String, relative: Boolean = false)(implicit screen: Scurses): BitMap = { val fullPath = if (relative) new File("").getAbsolutePath + path else path val image = ImageIO.read(new File(fullPath)) new BitMap(parent, image) } def apply(parent: FramePanel, image: BufferedImage)(implicit screen: Scurses): BitMap = { new BitMap(parent, image) } } class BitMap(parent: FramePanel, image: BufferedImage) (implicit screen: Scurses) extends Widget(parent) { val colors = { val width = image.getWidth val height = image.getHeight for (x <- 0 until width) yield for (y <- 0 until height / 2) yield { // Read two rows at a time val upper = Colors.fromRGBInt(image.getRGB(x, y * 2)) val lower = if (height % 2 == 1) -1 else Colors.fromRGBInt(image.getRGB(x, y * 2 + 1)) (upper, lower) } } override def redraw(focus: Boolean, theme: ColorScheme): Unit = { val width = image.getWidth min innerWidth val x0 = (innerWidth - width) / 2 for (x <- 0 until width) { for (y <- 0 until innerHeight) { // Read two rows at a time val c = colors(x)(y) screen.put(x0 + x, y, Symbols.BLOCK_UPPER, c._1, c._2) } } } override def handleKeypress(keypress: Int): Unit = { } override def focusable: Boolean = false override def innerHeight: Int = image.getHeight / 2 }
Example 135
Source File: SarkPredictorEngineSpec.scala From elasticsearch-prediction-spark with Apache License 2.0 | 5 votes |
package com.sdhu.elasticsearchprediction.spark package test import com.mahisoft.elasticsearchprediction._ import utils.DataProperties import plugin.domain.IndexValue import plugin.exception.PredictionException import plugin.engine.PredictorEngine import org.apache.spark._ import rdd.RDD import mllib.regression._ import mllib.classification._ import org.scalatest._ import com.holdenkarau.spark.testing._ import java.io.File import java.util.Collection import scala.collection.JavaConversions._ class SparkPredictorEngineSpec extends FlatSpec with MustMatchers { val pconf = getClass.getResource("/prop1.conf").getPath val dataP = getClass.getResource("/mini.csv").toURI.toString val dp = new DataProperties(pconf) val modelP = getClass.getResource("/spark-clf-test.model").getPath val clf_type = "spark.logistic-regression" "Predictor Engine" should "throw empty model exception" in { val eng = new SparkPredictorEngine(modelP, SVM_Helper) evaluating {eng.getPrediction(List[IndexValue]())} must produce [PredictionException] } // "Spark_PredictorEngine" should "return sparkPredictorEngine of svm type" in { // val speng = new Spark_PredictorEngine(modelP, "spark.svm") // speng.getSparkPredictorEngine mustBe a [SparkPredictorEngine[_]] // // } it should "return a generic PredictorEngine" in { val speng = new Spark_PredictorEngine(modelP, "spark.svm") speng.getPredictorEngine mustBe a [PredictorEngine] } it should "load the classifier" in { val speng = new Spark_PredictorEngine(modelP, clf_type) val eng = speng.getSparkPredictorEngine val m = eng.getModel val cm = m.categoriesMap.getOrElse(Map[String, Double]()) m.clf must not be empty //m.numClasses must be(Some(2)) //m.binThreshold must be(Some(0.5)) cm.keys must contain allOf("Female", "Male", "United-States", "China") } it should "evaluate values" in { val speng = new Spark_PredictorEngine(modelP, clf_type) val eng = speng.getSparkPredictorEngine val p0 = Array("50", "Self-emp-not-inc", "Male", "0", "0", "United-States") val cindv = ReadUtil.arr2CIndVal(p0) val check = eng.getPrediction(cindv) check must equal(0.0) check mustBe a [java.lang.Double] } it should "evaluate values using generic Predictor engine" in { val speng = new Spark_PredictorEngine(modelP, clf_type) val eng = speng.getPredictorEngine val p0 = Array("50", "Self-emp-not-inc", "Male", "0", "0", "United-States") val cindv = ReadUtil.arr2CIndVal(p0) val check = eng.getPrediction(cindv) check must equal(0.0) check mustBe a [java.lang.Double] } }
Example 136
Source File: ApplicationWithProcess.scala From aloha with Apache License 2.0 | 5 votes |
package me.jrwang.aloha.app import java.io.File import java.nio.charset.StandardCharsets import scala.collection.JavaConverters._ import scala.concurrent.Promise import com.google.common.io.Files import me.jrwang.aloha.common.Logging import me.jrwang.aloha.common.util.{FileAppender, Utils} abstract class ApplicationWithProcess extends AbstractApplication with Logging { private var process: Process = _ private var stdoutAppender: FileAppender = _ private var stderrAppender: FileAppender = _ // Timeout to wait for when trying to terminate an app. private val APP_TERMINATE_TIMEOUT_MS = 10 * 1000 def getProcessBuilder(): ProcessBuilder private var stateMonitorThread: Thread = _ override def start(): Promise[ExitState] = { val processBuilder = getProcessBuilder() val command = processBuilder.command() val formattedCommand = command.asScala.mkString("\"", "\" \"", "\"") logInfo(s"Launch command: $formattedCommand") processBuilder.directory(appDir) process = processBuilder.start() // Redirect its stdout and stderr to files val stdout = new File(appDir, "stdout") stdoutAppender = FileAppender(process.getInputStream, stdout, alohaConf) val header = "Aloha Application Command: %s\n%s\n\n".format( formattedCommand, "=" * 40) val stderr = new File(appDir, "stderr") Files.write(header, stderr, StandardCharsets.UTF_8) stderrAppender = FileAppender(process.getErrorStream, stderr, alohaConf) stateMonitorThread = new Thread("app-state-monitor-thread") { override def run(): Unit = { val exitCode = process.waitFor() if(exitCode == 0) { result.success(ExitState(ExitCode.SUCCESS, Some("success"))) } else { result.success(ExitState(ExitCode.FAILED, Some("failed"))) } } } stateMonitorThread.start() result } override def shutdown(reason: Option[String]): Unit = { if (process != null) { logInfo("Killing process!") if (stdoutAppender != null) { stdoutAppender.stop() } if (stderrAppender != null) { stderrAppender.stop() } val exitCode = Utils.terminateProcess(process, APP_TERMINATE_TIMEOUT_MS) if (exitCode.isEmpty) { logWarning("Failed to terminate process: " + process + ". This process will likely be orphaned.") } } } }
Example 137
Source File: AbstractApplication.scala From aloha with Apache License 2.0 | 5 votes |
package me.jrwang.aloha.app import java.io.File import scala.concurrent.Promise import me.jrwang.aloha.common.AlohaConf abstract class AbstractApplication extends Application { protected val result: Promise[ExitState] = Promise() protected var appDesc: ApplicationDescription = _ protected var appDir: File = _ protected var alohaConf: AlohaConf = _ override def withDescription(desc: ApplicationDescription): Application = { this.appDesc = desc this } override def withApplicationDir(appDir: File): Application = { this.appDir = appDir this } override def withAlohaConf(conf: AlohaConf): Application = { this.alohaConf = conf this } }
Example 138
Source File: Application.scala From aloha with Apache License 2.0 | 5 votes |
package me.jrwang.aloha.app import java.io.File import scala.concurrent.Promise import me.jrwang.aloha.common.{AlohaConf, AlohaException, Logging} import me.jrwang.aloha.scheduler.AlohaUserCodeClassLoaders trait Application { def start(): Promise[ExitState] def shutdown(reason: Option[String]): Unit def withDescription(desc: ApplicationDescription): Application def withApplicationDir(appDir: File): Application def withAlohaConf(conf: AlohaConf): Application def clean(): Unit } object Application extends Logging { def create(appDesc: ApplicationDescription): Application = { //TODO we should download dependencies and resource files logInfo(s"Create module for [$appDesc]") val fullClassName = appDesc.entryPoint try { val urls = appDesc.libs.map(new File(_)).filter(_.exists()) .flatMap(_.listFiles().filter(_.isFile)).map(_.toURI.toURL) val classLoader = AlohaUserCodeClassLoaders.childFirst(urls) Thread.currentThread().setContextClassLoader(classLoader) val klass = classLoader.loadClass(fullClassName) require(classOf[Application].isAssignableFrom(klass), s"$fullClassName is not a subclass of ${classOf[Application].getName}.") klass.getConstructor().newInstance().asInstanceOf[Application].withDescription(appDesc) } catch { case _: NoSuchMethodException => throw new AlohaException( s"$fullClassName did not have a zero-argument constructor." + s"Note: if the class is defined inside of another Scala class, then its constructors " + s"may accept an implicit parameter that references the enclosing class; in this case, " + s"you must define the class as a top-level class in order to prevent this extra" + " parameter from breaking Atom's ability to find a valid constructor.") case e: Throwable => throw e } } }
Example 139
Source File: AppRunner.scala From aloha with Apache License 2.0 | 5 votes |
package me.jrwang.aloha.scheduler.worker import java.io.File import scala.concurrent.Await import scala.concurrent.duration.Duration import me.jrwang.aloha.app.{Application, ApplicationDescription, ApplicationState, ExitCode} import me.jrwang.aloha.common.{AlohaConf, Logging} import me.jrwang.aloha.rpc.RpcEndpointRef import me.jrwang.aloha.scheduler.ApplicationStateChanged class AppRunner( val conf: AlohaConf, val appId: String, val appDesc: ApplicationDescription, val worker: RpcEndpointRef, val workerId: String, val host: String, val appDir: File, @volatile var state: ApplicationState.Value) extends Logging { private var workerThread: Thread = null private[worker] def start() { workerThread = new Thread(s"ApplicationRunner for $appId") { override def run() { fetchAndRunApplication() } } workerThread.start() } // Stop this application runner private[worker] def kill() { if (workerThread != null) { // the workerThread will kill the application when interrupted workerThread.interrupt() workerThread = null state = ApplicationState.KILLED } } private def fetchAndRunApplication() { var app: Application = null try { app = Application.create(appDesc).withApplicationDir(appDir).withAlohaConf(conf) val exitStatePromise = app.start() state = ApplicationState.RUNNING worker.send(ApplicationStateChanged(appId, ApplicationState.RUNNING, None)) val exitState = Await.result(exitStatePromise.future, Duration.Inf) if(exitState.code == ExitCode.FAILED) { worker.send(ApplicationStateChanged(appId, ApplicationState.FAILED, exitState.msg, None)) } else { worker.send(ApplicationStateChanged(appId, ApplicationState.FINISHED, exitState.msg, None)) } } catch { case _: InterruptedException => logInfo(s"Runner thread for application $appId interrupted") state = ApplicationState.KILLED killApp(app, Some("User request to kill app.")) worker.send(ApplicationStateChanged(appId, ApplicationState.KILLED, Some("User request to kill app."))) case e: Exception => logError("Error running executor", e) state = ApplicationState.FAILED killApp(app, Some(e.toString)) worker.send(ApplicationStateChanged(appId, ApplicationState.FAILED, Some(e.toString), Some(e))) } finally { if(app != null) { app.clean() } } } private def killApp(app: Application, reason: Option[String]) = { if(app != null) { try { app.shutdown(reason) } catch { case e: Throwable => logError(s"Error while killing app $appDesc.", e) } } } }
Example 140
Source File: ImageReader.scala From scala-deeplearn-examples with Apache License 2.0 | 5 votes |
package io.brunk.examples import java.io.{File, FileFilter} import java.lang.Math.toIntExact import org.datavec.api.io.filters.BalancedPathFilter import org.datavec.api.io.labels.ParentPathLabelGenerator import org.datavec.api.split.{FileSplit, InputSplit} import org.datavec.image.loader.BaseImageLoader import org.datavec.image.recordreader.ImageRecordReader import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator import org.deeplearning4j.datasets.iterator.MultipleEpochsIterator import org.deeplearning4j.eval.Evaluation import org.nd4j.linalg.dataset.api.iterator.DataSetIterator import org.nd4j.linalg.dataset.api.preprocessor.ImagePreProcessingScaler import scala.collection.JavaConverters._ object ImageReader { val channels = 3 val height = 150 val width = 150 val batchSize = 50 val numClasses = 2 val epochs = 100 val splitTrainTest = 0.8 val random = new java.util.Random() def createImageIterator(path: String): (MultipleEpochsIterator, DataSetIterator) = { val baseDir = new File(path) val labelGenerator = new ParentPathLabelGenerator val fileSplit = new FileSplit(baseDir, BaseImageLoader.ALLOWED_FORMATS, random) val numExamples = toIntExact(fileSplit.length) val numLabels = fileSplit.getRootDir.listFiles(new FileFilter { override def accept(pathname: File): Boolean = pathname.isDirectory }).length val pathFilter = new BalancedPathFilter(random, labelGenerator, numExamples, numLabels, batchSize) //val inputSplit = fileSplit.sample(pathFilter, splitTrainTest, 1 - splitTrainTest) val inputSplit = fileSplit.sample(pathFilter, 70, 30) val trainData = inputSplit(0) val validationData = inputSplit(1) val recordReader = new ImageRecordReader(height, width, channels, labelGenerator) val scaler = new ImagePreProcessingScaler(0, 1) recordReader.initialize(trainData, null) val dataIter = new RecordReaderDataSetIterator(recordReader, batchSize, 1, numClasses) scaler.fit(dataIter) dataIter.setPreProcessor(scaler) val trainIter = new MultipleEpochsIterator(epochs, dataIter) val valRecordReader = new ImageRecordReader(height, width, channels, labelGenerator) valRecordReader.initialize(validationData, null) val validationIter = new RecordReaderDataSetIterator(valRecordReader, batchSize, 1, numClasses) scaler.fit(validationIter) validationIter.setPreProcessor(scaler) (trainIter, validationIter) } }
Example 141
Source File: IrisReader.scala From scala-deeplearn-examples with Apache License 2.0 | 5 votes |
package io.brunk.examples import java.io.File import org.datavec.api.records.reader.impl.csv.CSVRecordReader import org.datavec.api.split.FileSplit import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator import org.nd4j.linalg.dataset.SplitTestAndTrain import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize object IrisReader { val numLinesToSkip = 1 val batchSize = 150 val labelIndex = 4 val numLabels = 3 val seed = 1 def readData(): SplitTestAndTrain = { val recordReader = new CSVRecordReader(numLinesToSkip, ',') recordReader.initialize(new FileSplit(new File("data/iris.csv"))) val iterator = new RecordReaderDataSetIterator(recordReader, batchSize, labelIndex, numLabels) val dataSet = iterator.next() // read all data in a single batch dataSet.shuffle(seed) val testAndTrain = dataSet.splitTestAndTrain(0.67) val train = testAndTrain.getTrain val test = testAndTrain.getTest // val normalizer = new NormalizerStandardize // normalizer.fit(train) // normalizer.transform(train) // normalize training data // normalizer.transform(test) // normalize test data testAndTrain } }
Example 142
Source File: CorpusReader.scala From ai.vitk.ner with GNU General Public License v3.0 | 5 votes |
package ai.vitk.ner import java.io.{File, InputStream} import org.slf4j.LoggerFactory import scala.collection.mutable.ListBuffer import scala.io.Source object CorpusReader { val logger = LoggerFactory.getLogger(CorpusReader.getClass) def readVLSPTest1(resourcePath: String): List[Sentence] = { // read lines of the file and remove lines which contains "<s>" val stream = getClass.getResourceAsStream(resourcePath) val lines = Source.fromInputStream(stream).getLines().toList.filter { line => line.trim != "<s>" } val sentences = new ListBuffer[Sentence]() var tokens = new ListBuffer[Token]() for (i <- (0 until lines.length)) { val line = lines(i).trim if (line == "</s>") { if (!tokens.isEmpty) sentences.append(Sentence(tokens)) tokens = new ListBuffer[Token]() } else { val parts = line.split("\\s+") if (parts.length < 3) logger.error("Invalid line = " + line) else tokens.append(Token(parts(0), Map(Label.PartOfSpeech -> parts(1), Label.Chunk -> parts(2)))) } } logger.info(resourcePath + ", number of sentences = " + sentences.length) sentences.toList } def readVLSPTest2(dir: String): List[Sentence] = { def getListOfFiles: List[File] = { val d = new File(dir) if (d.exists && d.isDirectory) { d.listFiles.filter(_.isFile).toList } else { List[File]() } } val files = getListOfFiles logger.info("Number of test files = " + files.length) files.flatMap { file => { val x = file.getAbsolutePath val resourcePath = x.substring(x.indexOf("/ner")) readVLSPTest1(resourcePath) } } } def main(args: Array[String]): Unit = { val path = "/ner/vi/train.txt" val sentences = readCoNLL(path) logger.info("Number of sentences = " + sentences.length) sentences.take(10).foreach(s => logger.info(s.toString)) sentences.takeRight(10).foreach(s => logger.info(s.toString)) } }
Example 143
Source File: Releaser.scala From releaser with Apache License 2.0 | 5 votes |
package uk.gov.hmrc.releaser import java.io.File import java.nio.file.{Files, Path} import org.apache.commons.io.FileUtils import uk.gov.hmrc.releaser.bintray.{BintrayHttp, BintrayRepoConnector, DefaultBintrayRepoConnector} import uk.gov.hmrc.releaser.github.{GithubConnector, Repo} import uk.gov.hmrc.{CredentialsFinder, FileDownloader, Logger} import scala.util.{Failure, Success, Try} object ReleaserMain { def main(args: Array[String]): Unit = { val result = Releaser(args) System.exit(result) } } object Releaser extends Logger { import ArgParser._ def apply(args: Array[String]): Int = { parser.parse(args, Config()) match { case Some(config) => val githubName = config.githubNameOverride.getOrElse(config.artefactName) run(config.artefactName, ReleaseCandidateVersion(config.rcVersion), config.releaseType, githubName, config.releaseNotes, config.dryRun) case None => -1 } } def run(artefactName: String, rcVersion: ReleaseCandidateVersion, releaseType: ReleaseType.Value, gitHubName: String, releaseNotes: Option[String], dryRun: Boolean = false): Int = { val githubCredsFile = System.getProperty("user.home") + "/.github/.credentials" val bintrayCredsFile = System.getProperty("user.home") + "/.bintray/.credentials" val githubCredsOpt = CredentialsFinder.findGithubCredsInFile(new File(githubCredsFile).toPath) val bintrayCredsOpt = CredentialsFinder.findBintrayCredsInFile(new File(bintrayCredsFile).toPath) doReleaseWithCleanup { directories => if (githubCredsOpt.isEmpty) { log.info(s"Didn't find github credentials in $githubCredsFile") -1 } else if (bintrayCredsOpt.isEmpty) { log.info(s"Didn't find Bintray credentials in $bintrayCredsFile") -1 } else { val releaserVersion = getClass.getPackage.getImplementationVersion val metaDataProvider = new ArtefactMetaDataProvider() val gitHubDetails = if (dryRun) GithubConnector.dryRun(githubCredsOpt.get, releaserVersion) else GithubConnector(githubCredsOpt.get, releaserVersion) val bintrayDetails = if (dryRun) BintrayRepoConnector.dryRun(bintrayCredsOpt.get, directories.workDir) else BintrayRepoConnector(bintrayCredsOpt.get, directories.workDir) val bintrayRepoConnector = new DefaultBintrayRepoConnector(directories.workDir, new BintrayHttp(bintrayCredsOpt.get), new FileDownloader) val coordinator = new Coordinator(directories.stageDir, metaDataProvider, gitHubDetails, bintrayRepoConnector) val result = coordinator.start(artefactName, Repo(gitHubName), rcVersion, releaseType, releaseNotes) result match { case Success(targetVersion) => log.info(s"Releaser successfully released $artefactName $targetVersion") 0 case Failure(e) => e.printStackTrace() log.info(s"Releaser failed to release $artefactName $rcVersion with error '${e.getMessage}'") 1 } } } } def doReleaseWithCleanup[T](f: ReleaseDirectories => T): T = { val directories = ReleaseDirectories() try { f(directories) } finally { log.info("cleaning releaser work directory") directories.delete().recover{case t => log.warn(s"failed to delete releaser work directory ${t.getMessage}")} } } } case class ReleaseDirectories(tmpDirectory: Path = Files.createTempDirectory("releaser")) { lazy val workDir = Files.createDirectories(tmpDirectory.resolve("work")) lazy val stageDir = Files.createDirectories(tmpDirectory.resolve("stage")) def delete() = Try { FileUtils.forceDelete(tmpDirectory.toFile) } }
Example 144
Source File: package.scala From theGardener with Apache License 2.0 | 5 votes |
import java.io.File import play.api.Logging import scala.concurrent._ import scala.util.control.NonFatal import scala.util.{Failure, Try} package object utils extends Logging { implicit class TryOps[T](t: Try[T]) { def logError(msg: => String): Try[T] = t.recoverWith { case e => logger.error(msg, e) Failure(e) } } implicit class FutureOps[T](f: Future[T]) { def logError(msg: => String)(implicit ec: ExecutionContext): Future[T] = f.recoverWith { case NonFatal(e) => logger.error(msg, e) Future.failed(e) } } implicit class PathExt(path: String) { def fixPathSeparator: String = path.replace('/', File.separatorChar) } }
Example 145
Source File: PageController.scala From theGardener with Apache License 2.0 | 5 votes |
package controllers import java.io.File import com.github.ghik.silencer.silent import controllers.AssetAccessError.{AssetNotAllowed, AssetNotFound} import controllers.dto._ import io.swagger.annotations._ import javax.inject.Inject import play.api.Configuration import play.api.libs.json.Json import play.api.mvc._ import repositories._ import services._ import scala.concurrent.ExecutionContext @silent("Interpolated") @silent("missing interpolator") @Api(value = "PageController", produces = "application/json") class PageController @Inject()(pageService: PageService)(implicit ec: ExecutionContext) extends InjectedController { @ApiOperation(value = "Get pages from path", response = classOf[PageDTO], responseContainer = "list") @ApiResponses(Array(new ApiResponse(code = 404, message = "Page not found"))) def getPageFromPath(path: String): Action[AnyContent] = Action.async { pageService.computePageFromPath(path).map { case Some(pageDto) => Ok(Json.toJson(Seq(pageDto))) case None => NotFound(s"No Page $path") } } } sealed abstract class AssetAccessError(message: String) extends Throwable(message) object AssetAccessError { case class AssetNotAllowed(message: String) extends AssetAccessError(message) case class AssetNotFound(message: String) extends AssetAccessError(message) } class PageAssetController @Inject()(config: Configuration, projectRepository: ProjectRepository)(implicit ec: ExecutionContext) extends InjectedController { val projectsRootDirectory = config.get[String]("projects.root.directory") def getImageFromPath(path: String): Action[AnyContent] = Action { val params = path.split(">") (for { projectId <- params.lift(0) branchName <- params.lift(1) relativePath <- params.lift(2) documentationRootPath <- projectRepository.findById(projectId).flatMap(_.documentationRootPath) assetFileAccess = accessToAsset(s"$projectsRootDirectory/$projectId/$branchName/$documentationRootPath", relativePath) } yield (relativePath, assetFileAccess)) match { case None => NotFound("Project not found or bad configuration") case Some((_, Left(AssetNotAllowed(message)))) => Forbidden(message) case Some((_, Left(AssetNotFound(message)))) => NotFound(message) case Some((_, Right(assetFile))) => Ok.sendFile(assetFile) } } def accessToAsset(documentationRootPath: String, assetRelativePath: String): Either[AssetAccessError, File] = { val assetFile = new File(s"$documentationRootPath/$assetRelativePath") val documentationCanonicalPath = new File(documentationRootPath).getCanonicalPath val assetCanonicalPath = assetFile.getCanonicalPath if (!assetCanonicalPath.contains(documentationCanonicalPath)) { Left(AssetNotAllowed(s"Asset $assetRelativePath not allowed")) } else if (!assetFile.exists()) { Left(AssetNotFound(s"Asset $assetRelativePath not found")) } else { Right(assetFile) } } }
Example 146
Source File: CustomConfigSystemReader.scala From theGardener with Apache License 2.0 | 5 votes |
package utils import java.io.File import java.io.File.separator import org.eclipse.jgit.lib.Config import org.eclipse.jgit.storage.file.FileBasedConfig import org.eclipse.jgit.util.{FS, SystemReader} object CustomConfigSystemReader { def overrideSystemGitConfig(): Unit = { val userGitConfig = new File(s"target${separator}data${separator}gitconfig") SystemReader.setInstance(new CustomConfigSystemReader(userGitConfig)) } } class CustomConfigSystemReader(userGitConfig: File) extends SystemReader { val proxy = SystemReader.getInstance() override def getHostname: String = proxy.getHostname override def getenv(variable: String): String = proxy.getenv(variable) override def getProperty(key: String): String = proxy.getProperty(key) override def getCurrentTime: Long = proxy.getCurrentTime override def getTimezone(when: Long): Int = proxy.getTimezone(when) override def openJGitConfig(parent: Config, fs: FS): FileBasedConfig = proxy.openJGitConfig(parent, fs) override def openUserConfig(parent: Config, fs: FS): FileBasedConfig = new FileBasedConfig(parent, userGitConfig, fs) override def openSystemConfig(parent: Config, fs: FS): FileBasedConfig = new FileBasedConfig(parent, null, fs) { override def load(): Unit = () override def isOutdated: Boolean = false } }
Example 147
Source File: CaptchaHelper.scala From ez-framework with Apache License 2.0 | 5 votes |
package com.ecfront.ez.framework.service.auth.helper import java.io.{File, FileOutputStream} import com.ecfront.ez.framework.core.logger.Logging import com.github.cage.GCage object CaptchaHelper extends Logging { def generate(text: String): File = { val temp = File.createTempFile("ez_captcha_", ".jpg") val os = new FileOutputStream(temp) try { temp.deleteOnExit() new GCage().draw(text, os) temp } catch { case e: Throwable => logger.error("Generate captche error.", e) null } finally { os.close() } } }
Example 148
Source File: I18NProcessor.scala From ez-framework with Apache License 2.0 | 5 votes |
package com.ecfront.ez.framework.core.i18n import java.io.File import java.util.regex.Pattern import com.ecfront.common.Resp import com.ecfront.ez.framework.core.EZ import com.ecfront.ez.framework.core.logger.Logging import scala.io.Source def setLanguage(_language: String): Unit = { EZ.Info.language = _language } private val tabR = "\t" def process(resp: Resp[_]): Unit = { if (resp.message != null && resp.message.nonEmpty) { resp.message = i18n(resp.message.replaceAll(tabR, " ")) } } def i18n(str: String): String = { var newStr = str i18nInfo.find(_._1.matcher(str).matches()).foreach { matchedItem => val matcher = matchedItem._1.matcher(str) newStr = matcher.replaceAll(matchedItem._2(EZ.Info.language)) } newStr } implicit class Impl(val str: String) { def x: String = i18n(str) } }
Example 149
Source File: AttachmentService.scala From BacklogMigration-Redmine with MIT License | 5 votes |
package com.nulabinc.backlog.r2b.exporter.service import java.io.{File, FileOutputStream} import java.net.{HttpURLConnection, URL} import java.nio.channels.Channels import com.nulabinc.backlog.migration.common.utils.ControlUtil.using import com.nulabinc.backlog.migration.common.utils.Logging object AttachmentService extends Logging { private val MAX_REDIRECT_COUNT = 10 def download(url: URL, file: File): Unit = { val redirected = followRedirect(url) doDownload(redirected, file) } private def doDownload(url: URL, file: File): Unit = try { val rbc = Channels.newChannel(url.openStream()) val fos = new FileOutputStream(file) fos.getChannel.transferFrom(rbc, 0, java.lang.Long.MAX_VALUE) rbc.close() fos.close() } catch { case e: Throwable => logger.warn("Download attachment failed: " + e.getMessage) } private def followRedirect(url: URL, count: Int = 0): URL = url.openConnection match { case http: HttpURLConnection => http.setRequestMethod("GET") http.connect() using(http) { connection => connection.getResponseCode match { case 301 | 302 | 303 => val newUrl = new URL(connection.getHeaderField("Location")) if (count < MAX_REDIRECT_COUNT) followRedirect(newUrl, count + 1) else newUrl case _ => url } } case _ => url } }
Example 150
Source File: IterateeMain.scala From advanced-scala-code with Apache License 2.0 | 5 votes |
package iteratee import scala.util.{Failure, Success} object IterateeMain { def fileExample(): Unit = { import io.iteratee.monix.task._ import java.io.File val wordsE = readLines(new File("license.txt")).flatMap { line => enumIndexedSeq(line.split("\\W")) } val noEmptyLinesEE = filter[String](str => str.trim.length > 0) val toLowerEE = map[String, String](_.toLowerCase) val countWordsI = fold[String, Map[String, Int]](Map.empty) { (acc, next) => acc.get(next) match { case None => acc + (next -> 1) case Some(num) => acc + (next -> (1 + num)) } } val dataT = wordsE.through(noEmptyLinesEE). through(toLowerEE).into(countWordsI).map { dataMap => dataMap.toList.sortWith( _._2 > _._2).take(5).map(_._1) } import monix.execution.Scheduler.Implicits.global dataT.runOnComplete { case Success(data) => println(data) case Failure(th) => th.printStackTrace() } } def main(args: Array[String]) { import io.iteratee.modules.id._ // Just one Int val singleNumE = enumOne(42) val singleNumI = takeI[Int](1) val singleNumResult = singleNumE.into(singleNumI) println(singleNumResult) // Incrementing one Int val incrementNumEE = map[Int, Int](_ + 1) val incrementedNumResult = singleNumE.through(incrementNumEE).into(singleNumI) println(incrementedNumResult) // First 10 even numbers val naturalsE = iterate(1)(_ + 1) val moreThan100EE = filter[Int](_ >= 100) val evenFilterEE = filter[Int](_ % 2 == 0) val first10I = takeI[Int](10) println(naturalsE.through(moreThan100EE).through(evenFilterEE).into(first10I)) { import io.iteratee.modules.eval._ // Summing N first numbers val naturalsE = iterate(1)(_ + 1) val limit1kEE = take[Int](30000) val sumI = fold[Int, Int](0) { (acc, next) => acc + next } println(naturalsE.through(limit1kEE).into(sumI).value) } fileExample() } }
Example 151
Source File: TransformerBenchmark.scala From mleap with Apache License 2.0 | 5 votes |
package com.truecar.mleap.spark.benchmark import java.io.{FileInputStream, File} import ml.bundle.fs.DirectoryBundle import com.truecar.mleap.runtime.LocalLeapFrame import com.truecar.mleap.runtime.transformer.Transformer import com.truecar.mleap.serialization.ml.v1.MlJsonSerializer import org.scalameter.api._ import org.scalameter.picklers.Implicits._ import spray.json._ import com.truecar.mleap.serialization.mleap.v1.MleapJsonSupport._ object TransformerBenchmark extends Bench.ForkedTime { lazy override val executor = { SeparateJvmsExecutor( Executor.Warmer.Zero, Aggregator.min[Double], new Measurer.Default) } val mlSerializer = MlJsonSerializer val classLoader = getClass.getClassLoader val regressionFile = new File("/tmp/transformer.ml") val frameFile = new File("/tmp/frame.json") val bundleReader = DirectoryBundle(regressionFile) val regression = mlSerializer.deserializeWithClass(bundleReader).asInstanceOf[Transformer] val lines = scala.io.Source.fromFile(frameFile).mkString val frame = lines.parseJson.convertTo[LocalLeapFrame] val ranges = for { size <- Gen.range("size")(1000, 10000, 1000) } yield 0 until size measure method "transform" in { using(ranges) in { size => size.foreach { _ => regression.transform(frame) } } } }
Example 152
Source File: SparkTransformerBenchmark.scala From mleap with Apache License 2.0 | 5 votes |
package com.truecar.mleap.spark.benchmark import java.io.{FileInputStream, File} import com.esotericsoftware.kryo.io.Input import com.truecar.mleap.runtime.LocalLeapFrame import com.truecar.mleap.spark.benchmark.util.SparkSerializer import org.apache.spark.sql.{Row, SQLContext} import org.apache.spark.{SparkContext, SparkConf} import org.apache.spark.ml.Transformer import org.scalameter.Bench import scala.collection.JavaConverters._ import org.scalameter.api._ import org.scalameter.picklers.Implicits._ import org.apache.log4j.Logger import org.apache.log4j.Level import com.truecar.mleap.spark.MleapSparkSupport._ import spray.json._ import com.truecar.mleap.serialization.mleap.v1.MleapJsonSupport._ object SparkTransformerBenchmark extends Bench.ForkedTime { lazy override val executor = { SeparateJvmsExecutor( Executor.Warmer.Zero, Aggregator.min[Double], new Measurer.Default) } val classLoader = getClass.getClassLoader val regressionFile = new File("/tmp/spark.transformer.kryo") val frameFile = new File("/tmp/frame.json") val inputStream = new FileInputStream(regressionFile) val input = new Input(inputStream) val regression: Transformer = SparkSerializer().read(input) val lines = scala.io.Source.fromFile(frameFile).mkString val frame = lines.parseJson.convertTo[LocalLeapFrame] Logger.getLogger("org").setLevel(Level.OFF) Logger.getLogger("akka").setLevel(Level.OFF) val sparkConf = new SparkConf() .setAppName("Spark Transformer Benchmark") .setMaster("local[1]") val sc = new SparkContext(sparkConf) val sqlContext = new SQLContext(sc) val rdd = frame.dataset.data.map(a => Row(a.toSeq: _*)).toList.asJava val schema = frame.schema.toSpark val sparkFrame = sqlContext.createDataFrame(rdd, schema) val ranges = for { size <- Gen.range("size")(1000, 10000, 1000) } yield 0 until size measure method "transform" in { using(ranges) in { size => size.foreach { _ => regression.transform(sparkFrame).head } } } // sc.stop() }
Example 153
Source File: AWTSystemProvider.scala From scala-game-library with MIT License | 5 votes |
package sgl package awt import sgl.util._ import java.net.URI import java.awt.Desktop import java.io.File import scala.concurrent.ExecutionContext trait AWTSystemProvider extends SystemProvider with PartsResourcePathProvider { object AWT5System extends System { override def exit(): Unit = { sys.exit() } override def currentTimeMillis: Long = java.lang.System.currentTimeMillis override def nanoTime: Long = java.lang.System.nanoTime override def loadText(path: ResourcePath): Loader[Array[String]] = { FutureLoader { val localAsset = if(DynamicResourcesEnabled) findDynamicResource(path) else None val is = localAsset.map(a => new java.io.FileInputStream(a)).getOrElse(getClass.getClassLoader.getResourceAsStream(path.path)) if(is == null) { throw new ResourceNotFoundException(path) } scala.io.Source.fromInputStream(is).getLines.toArray } } override def loadBinary(path: ResourcePath): Loader[Array[Byte]] = { FutureLoader { val localAsset = if(DynamicResourcesEnabled) findDynamicResource(path) else None val is = localAsset.map(a => new java.io.FileInputStream(a)).getOrElse(getClass.getClassLoader.getResourceAsStream(path.path)) if(is == null) { throw new ResourceNotFoundException(path) } val bis = new java.io.BufferedInputStream(is) val bytes = new scala.collection.mutable.ListBuffer[Byte] var b: Int = 0 while({ b = bis.read; b != -1}) { bytes.append(b.toByte) } bytes.toArray } } override def openWebpage(uri: URI): Unit = { val desktop = if(Desktop.isDesktopSupported()) Desktop.getDesktop() else null if(desktop != null && desktop.isSupported(Desktop.Action.BROWSE)) { try { desktop.browse(uri); } catch { case (e: Exception) => e.printStackTrace() } } } } val System = AWT5System override val ResourcesRoot = PartsResourcePath(Vector()) override val MultiDPIResourcesRoot = PartsResourcePath(Vector()) val DynamicResourcesEnabled: Boolean = false // TODO: provide a command line flag to control this as well, in particular to give // the asset directory. def findDynamicResource(path: ResourcePath): Option[File] = { def findFromDir(d: File): Option[File] = { val asset = new File(d.getAbsolutePath + "/assets/" + path.path) if(asset.exists) Some(asset) else None } def findFromWorkingDir: Option[File] = findFromDir(new File(java.lang.System.getProperty("user.dir"))) val protectionDomain = this.getClass.getProtectionDomain() val codeSource = protectionDomain.getCodeSource() if(codeSource == null) return findFromWorkingDir val jar = new File(codeSource.getLocation.toURI.getPath) if(!jar.exists) return findFromWorkingDir val parent = jar.getParentFile if(parent == null) return findFromWorkingDir findFromDir(parent).orElse(findFromWorkingDir) } //Centralize the execution context used for asynchronous tasks in the Desktop backend //Could be overriden at wiring time implicit val executionContext: ExecutionContext = ExecutionContext.Implicits.global }
Example 154
Source File: SparkFunSuite.scala From spark-gbtlr with Apache License 2.0 | 5 votes |
package org.apache.spark // scalastyle:off import java.io.File import org.apache.spark.internal.Logging import org.apache.spark.util.AccumulatorContext import org.scalatest.{BeforeAndAfterAll, FunSuite, Outcome} final protected override def withFixture(test: NoArgTest): Outcome = { val testName = test.text val suiteName = this.getClass.getName val shortSuiteName = suiteName.replaceAll("org.apache.spark", "o.a.s") try { logInfo(s"\n\n===== TEST OUTPUT FOR $shortSuiteName: '$testName' =====\n") test() } finally { logInfo(s"\n\n===== FINISHED $shortSuiteName: '$testName' =====\n") } } }
Example 155
Source File: Config.scala From zipkin-mesos-framework with Apache License 2.0 | 5 votes |
package net.elodina.mesos.zipkin import java.io.{File, FileInputStream} import java.net.URI import java.util.Properties import net.elodina.mesos.zipkin.utils.{BindAddress, Period} object Config { val DEFAULT_FILE = new File("zipkin-mesos.properties") var debug: Boolean = false var genTraces: Boolean = false var storage: String = "file:zipkin-mesos.json" var master: Option[String] = None var principal: Option[String] = None var secret: Option[String] = None var user: Option[String] = None var frameworkName: String = "zipkin" var frameworkRole: String = "*" var frameworkTimeout: Period = new Period("30d") var log: Option[File] = None var api: Option[String] = None var bindAddress: Option[BindAddress] = None def apiPort: Int = { val port = new URI(getApi).getPort if (port == -1) 80 else port } def replaceApiPort(port: Int): Unit = { val prev: URI = new URI(getApi) api = Some("" + new URI( prev.getScheme, prev.getUserInfo, prev.getHost, port, prev.getPath, prev.getQuery, prev.getFragment )) } def getApi: String = { api.getOrElse(throw new Error("api not initialized")) } def getMaster: String = { master.getOrElse(throw new Error("master not initialized")) } def getZk: String = { master.getOrElse(throw new Error("zookeeper not initialized")) } private[zipkin] def loadFromFile(file: File): Unit = { val props: Properties = new Properties() val stream: FileInputStream = new FileInputStream(file) props.load(stream) stream.close() if (props.containsKey("debug")) debug = java.lang.Boolean.valueOf(props.getProperty("debug")) if (props.containsKey("genTraces")) genTraces = java.lang.Boolean.valueOf(props.getProperty("genTraces")) if (props.containsKey("storage")) storage = props.getProperty("storage") if (props.containsKey("master")) master = Some(props.getProperty("master")) if (props.containsKey("user")) user = Some(props.getProperty("user")) if (props.containsKey("principal")) principal = Some(props.getProperty("principal")) if (props.containsKey("secret")) secret = Some(props.getProperty("secret")) if (props.containsKey("framework-name")) frameworkName = props.getProperty("framework-name") if (props.containsKey("framework-role")) frameworkRole = props.getProperty("framework-role") if (props.containsKey("framework-timeout")) frameworkTimeout = new Period(props.getProperty("framework-timeout")) if (props.containsKey("log")) log = Some(new File(props.getProperty("log"))) if (props.containsKey("api")) api = Some(props.getProperty("api")) if (props.containsKey("bind-address")) bindAddress = Some(new BindAddress(props.getProperty("bind-address"))) } override def toString: String = { s""" |debug: $debug, storage: $storage |mesos: master=$master, user=${if (user.isEmpty || user.get.isEmpty) "<default>" else user} |principal=${principal.getOrElse("<none>")}, secret=${if (secret.isDefined) "*****" else "<none>"} |framework: name=$frameworkName, role=$frameworkRole, timeout=$frameworkTimeout |api: $api, bind-address: ${bindAddress.getOrElse("<all>")}, genTraces: $genTraces """.stripMargin.trim } }
Example 156
Source File: Storage.scala From zipkin-mesos-framework with Apache License 2.0 | 5 votes |
package net.elodina.mesos.zipkin.storage import java.io.{File, FileWriter} import org.I0Itec.zkclient.ZkClient import org.I0Itec.zkclient.exception.ZkNodeExistsException import org.I0Itec.zkclient.serialize.ZkSerializer import play.api.libs.json.{Json, Reads, Writes} import scala.io.Source trait Storage[T] { def save(value: T)(implicit writes: Writes[T]) def load(implicit reads: Reads[T]): Option[T] } case class FileStorage[T](file: String) extends Storage[T] { override def save(value: T)(implicit writes: Writes[T]) { val writer = new FileWriter(file) try { writer.write(Json.stringify(Json.toJson(value))) } finally { writer.close() } } override def load(implicit reads: Reads[T]): Option[T] = { if (!new File(file).exists()) None else Json.parse(Source.fromFile(file).mkString).asOpt[T] } } case class ZkStorage[T](zk: String) extends Storage[T] { val (zkConnect, path) = zk.span(_ != '/') createChrootIfRequired() private def createChrootIfRequired() { if (path != "") { val client = zkClient try { client.createPersistent(path, true) } finally { client.close() } } } private def zkClient: ZkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer) override def save(value: T)(implicit writes: Writes[T]) { val client = zkClient val json = Json.stringify(Json.toJson(value)) try { client.createPersistent(path, json) } catch { case e: ZkNodeExistsException => client.writeData(path, json) } finally { client.close() } } override def load(implicit reads: Reads[T]): Option[T] = { val client = zkClient try { Option(client.readData(path, true).asInstanceOf[String]).flatMap(Json.parse(_).asOpt[T]) } finally { client.close() } } } private object ZKStringSerializer extends ZkSerializer { def serialize(data: Object): Array[Byte] = data.asInstanceOf[String].getBytes("UTF-8") def deserialize(bytes: Array[Byte]): Object = { if (bytes == null) null else new String(bytes, "UTF-8") } }
Example 157
Source File: ZipkinComponentServer.scala From zipkin-mesos-framework with Apache License 2.0 | 5 votes |
package net.elodina.mesos.zipkin.components import java.io.File import net.elodina.mesos.zipkin.http.HttpServer import scala.sys.process.Process import scala.sys.process.ProcessBuilder class ZipkinComponentServer { var process: Process = null @volatile var shutdownInitiated = false def isStarted = Option(process).isDefined def start(taskConfig: TaskConfig, taskId: String) = { val jarMask = ZipkinComponent.getComponentFromTaskId(taskId) match { case "collector" => HttpServer.collectorMask case "query" => HttpServer.queryMask case "web" => HttpServer.webMask case _ => throw new IllegalArgumentException(s"Illegal component name found in task id: $taskId") } val distToLaunch = initJar(jarMask) process = configureProcess(taskConfig, distToLaunch).run() //TODO: consider logs redirect } def await(): Option[Int] = { if (isStarted) Some(process.exitValue()) else None } def acknowledgeShutdownStatus(): Boolean = { val oldStatus = shutdownInitiated if (shutdownInitiated) shutdownInitiated = false oldStatus } def stop(shutdownInitiated: Boolean) { if (isStarted) { this.shutdownInitiated = shutdownInitiated process.destroy() } } private def initJar(jarMask: String): File = { new File(".").listFiles().find(file => file.getName.matches(jarMask)) match { case None => throw new IllegalStateException("Corresponding jar not found") case Some(componentDist) => componentDist } } private def configureProcess(taskConfig: TaskConfig, distToLaunch: File): ProcessBuilder = { val configFileArg = taskConfig.configFile.map(Seq("-f", _)) var command = Seq("java", "-jar", distToLaunch.getCanonicalPath) configFileArg.foreach(command ++= _) command ++= taskConfig.flags.map { case (k: String, v: String) => s"-$k=$v" } Process(command, Some(new File(".")), taskConfig.env.toList: _*) } }
Example 158
Source File: S3.scala From teamcity-s3-plugin with Apache License 2.0 | 5 votes |
package com.gu.teamcity import java.io.{InputStream, File} import com.amazonaws.ClientConfiguration import com.amazonaws.auth.{AWSCredentialsProviderChain, DefaultAWSCredentialsProviderChain} import com.amazonaws.services.s3.AmazonS3Client import com.amazonaws.services.s3.model.{ObjectMetadata, PutObjectRequest, CannedAccessControlList} import com.amazonaws.services.s3.transfer.TransferManager import jetbrains.buildServer.serverSide.SBuild import scala.util.{Success, Try} class S3(config: S3ConfigManager) { val credentialsProvider = { val provider = new AWSCredentialsProviderChain(config, new DefaultAWSCredentialsProviderChain()) provider.setReuseLastProvider(false) provider } val transferManager = new TransferManager( new AmazonS3Client(credentialsProvider, new ClientConfiguration().withMaxErrorRetry(2)) ) def upload(bucket: String, build: SBuild, fileName: String, contents: InputStream, fileSize: Long): Try[Unit] = Try { val uploadDirectory = s"${S3Plugin.cleanFullName(build)}/${build.getBuildNumber}" val metadata = { val md = new ObjectMetadata() md.setContentLength(fileSize) md } val req = new PutObjectRequest(bucket, s"$uploadDirectory/$fileName", contents, metadata) req.withCannedAcl(CannedAccessControlList.BucketOwnerFullControl) val upload = transferManager.upload(req) upload.waitForUploadResult() } def upload(bucket: String, build: SBuild, fileName: String, file: File): Try[Unit] = Try { val uploadDirectory = s"${S3Plugin.cleanFullName(build)}/${build.getBuildNumber}" val req = new PutObjectRequest(bucket, s"$uploadDirectory/$fileName", file) req.withCannedAcl(CannedAccessControlList.BucketOwnerFullControl) val upload = transferManager.upload(req) upload.waitForUploadResult() } }
Example 159
Source File: S3ConfigManager.scala From teamcity-s3-plugin with Apache License 2.0 | 5 votes |
package com.gu.teamcity import java.io.{File, PrintWriter} import com.amazonaws.auth.{BasicAWSCredentials, AWSCredentialsProvider, AWSCredentials} import jetbrains.buildServer.serverSide.ServerPaths import org.json4s._ import org.json4s.native.JsonMethods._ import org.json4s.native.Serialization import org.json4s.native.Serialization._ case class S3Config( artifactBucket: Option[String], buildManifestBucket: Option[String], tagManifestBucket: Option[String], awsAccessKey: Option[String], awsSecretKey: Option[String] ) class S3ConfigManager(paths: ServerPaths) extends AWSCredentialsProvider { implicit val formats = Serialization.formats(NoTypeHints) val configFile = new File(s"${paths.getConfigDir}/s3.json") private[teamcity] var config: Option[S3Config] = { if (configFile.exists()) { parse(configFile).extractOpt[S3Config] } else None } def artifactBucket: Option[String] = config.flatMap(_.artifactBucket) def buildManifestBucket: Option[String] = config.flatMap(_.buildManifestBucket) def tagManifestBucket: Option[String] = config.flatMap(_.tagManifestBucket) private[teamcity] def update(config: S3Config): Unit = { this.config = Some(if (config.awsSecretKey.isEmpty && config.awsAccessKey == this.config.flatMap(_.awsAccessKey)) { config.copy(awsSecretKey = this.config.flatMap(_.awsSecretKey)) } else config) } def updateAndPersist(newConfig: S3Config): Unit = { synchronized { update(newConfig) val out = new PrintWriter(configFile, "UTF-8") try { writePretty(config, out) } finally { out.close } } } def details: Map[String, Option[String]] = Map( "artifactBucket" -> artifactBucket, "buildManifestBucket" -> buildManifestBucket, "tagManifestBucket" -> tagManifestBucket, "accessKey" -> config.flatMap(_.awsAccessKey) ) override def getCredentials: AWSCredentials = (for { c <- config accessKey <- c.awsAccessKey secretKey <- c.awsSecretKey } yield new BasicAWSCredentials(accessKey, secretKey)).getOrElse(null) // Yes, this is sad override def refresh(): Unit = () } object S3ConfigManager { val bucketElement = "bucket" val s3Element = "S3" }
Example 160
Source File: ArtifactUploader.scala From teamcity-s3-plugin with Apache License 2.0 | 5 votes |
package com.gu.teamcity import java.io.File import java.util.Date import jetbrains.buildServer.messages.{BuildMessage1, DefaultMessagesInfo, Status} import jetbrains.buildServer.serverSide.{BuildServerAdapter, SRunningBuild} import scala.util.control.NonFatal class ArtifactUploader(config: S3ConfigManager, s3: S3) extends BuildServerAdapter { override def beforeBuildFinish(runningBuild: SRunningBuild) { def report(msg: String): Unit = { runningBuild.getBuildLog().message(msg,Status.NORMAL,new Date,DefaultMessagesInfo.MSG_TEXT,DefaultMessagesInfo.SOURCE_ID,null) } report("About to upload artifacts to S3") getAllFiles(runningBuild).foreach { case (name: String, artifact: File) => config.artifactBucket match { case None => report("Target artifactBucket was not set") case Some(bucket) => s3.upload(bucket, runningBuild, name, artifact).recover { case NonFatal(e) => runningBuild.getBuildLog().message(s"Error uploading artifacts: ${e.getMessage}", Status.ERROR,new Date,DefaultMessagesInfo.MSG_BUILD_FAILURE,DefaultMessagesInfo.SOURCE_ID,null) } } } report("Artifact S3 upload complete") } def getAllFiles(runningBuild: SRunningBuild): Seq[(String,File)] = { if (!runningBuild.isArtifactsExists) { Nil } else { ArtifactUploader.getChildren(runningBuild.getArtifactsDirectory) } } private def normalMessage(text: String) = new BuildMessage1(DefaultMessagesInfo.SOURCE_ID, DefaultMessagesInfo.MSG_TEXT, Status.NORMAL, new Date, text) } object ArtifactUploader { def getChildren(file: File, paths: Seq[String] = Nil, current: String = ""): Seq[(String, File)] = { file.listFiles.toSeq.flatMap { child => if (child.isHidden) { Seq() } else { val newPath = current + child.getName if (child.isDirectory) { getChildren(child, paths, newPath + File.separator) } else { Seq((newPath, child)) } } } } }
Example 161
Source File: Codegen.scala From caliban with Apache License 2.0 | 5 votes |
package caliban.tools import java.io.{ File, PrintWriter } import caliban.parsing.adt.Document import zio.{ Task, UIO } object Codegen { def generate( arguments: Options, writer: (Document, String, Option[String], String) => String ): Task[Unit] = { val s = ".*/scala/(.*)/(.*).scala".r.findFirstMatchIn(arguments.toPath) val packageName = arguments.packageName.orElse(s.map(_.group(1).split("/").mkString("."))) val objectName = s.map(_.group(2)).getOrElse("Client") val effect = arguments.effect.getOrElse("zio.UIO") val loader = getSchemaLoader(arguments.schemaPath, arguments.headers) for { schema <- loader.load code = writer(schema, objectName, packageName, effect) formatted <- Formatter.format(code, arguments.fmtPath) _ <- Task(new PrintWriter(new File(arguments.toPath))) .bracket(q => UIO(q.close()), pw => Task(pw.println(formatted))) } yield () } private def getSchemaLoader(path: String, schemaPathHeaders: Option[List[Options.Header]]): SchemaLoader = if (path.startsWith("http")) SchemaLoader.fromIntrospection(path, schemaPathHeaders) else SchemaLoader.fromFile(path) }
Example 162
Source File: CodeExampleImpl.scala From slinky with MIT License | 5 votes |
package slinky.docs import java.io.File import slinky.core.facade.ReactElement import scala.io.Source import scala.reflect.macros.blackbox object CodeExampleImpl { def text(c: blackbox.Context)(exampleLocation: c.Expr[String]): c.Expr[ReactElement] = { import c.universe._ val Literal(Constant(loc: String)) = exampleLocation.tree val inputFile = new File(s"docs/src/main/scala/${loc.split('.').mkString("/")}.scala") val enclosingPackage = loc.split('.').init.mkString(".") val fileContent = Source.fromFile(inputFile).mkString val innerCode = fileContent.split('\n') val textToDisplay = innerCode .map(_.replaceAllLiterally("//display:", "")) .filterNot(_.endsWith("//nodisplay")) .dropWhile(_.trim.isEmpty) .reverse.dropWhile(_.trim.isEmpty).reverse .mkString("\n") val codeToRun = innerCode.filter(_.startsWith("//run:")).map(_.replaceAllLiterally("//run:", "")).mkString("\n") c.Expr[ReactElement]( q"""{ import ${c.parse(enclosingPackage)}._ _root_.slinky.docs.CodeExampleInternal(codeText = ${Literal(Constant(textToDisplay))}, demoElement = {${c.parse(codeToRun)}}) }""") } }
Example 163
Source File: HttpSlippyTileReader.scala From geotrellis-osm-elevation with Apache License 2.0 | 5 votes |
package geotrellis.osme.core import geotrellis.vector._ import geotrellis.raster._ import geotrellis.raster.io.geotiff._ import geotrellis.spark._ import geotrellis.spark.io.s3._ import geotrellis.spark.io.slippy._ import geotrellis.util.Filesystem import org.apache.commons.io.FileUtils import org.apache.commons.io.filefilter._ import org.apache.commons.io.IOUtils._ import org.apache.spark._ import org.apache.spark.rdd._ import java.net._ import java.io.File class HttpSlippyTileReader[T](pathTemplate: String)(fromBytes: (SpatialKey, Array[Byte]) => T) extends SlippyTileReader[T] { def getURL(template: String, z: Int, x: Int, y: Int) = template.replace("{z}", z.toString).replace("{x}", x.toString).replace("{y}", y.toString) def getByteArray(url: String) = { val inStream = new URL(url).openStream() try { toByteArray(inStream) } finally { inStream.close() } } def read(zoom: Int)(implicit sc: SparkContext): RDD[(SpatialKey, T)] = ??? def read(zoom: Int, key: SpatialKey): T = fromBytes(key, getByteArray(getURL(pathTemplate, zoom, key.col, key.row))) override def read(zoom: Int, x: Int, y: Int): T = read(zoom, SpatialKey(x, y)) }
Example 164
Source File: ElevationOverlay.scala From geotrellis-osm-elevation with Apache License 2.0 | 5 votes |
package geotrellis.osme.core import java.io.{BufferedWriter, FileWriter, File} import com.vividsolutions.jts.geom.{LineString, MultiLineString} import geotrellis.raster.io.geotiff.SinglebandGeoTiff import geotrellis.vector.io.json.{GeoJson, JsonFeatureCollection} import scala.collection.immutable.Map import spray.json._ import DefaultJsonProtocol._ import geotrellis.vector.io.json.FeatureFormats.writeFeatureJson import geotrellis.vector.io.json.GeometryFormats._ import geotrellis.vector.densify.DensifyMethods import geotrellis.vector.dissolve.DissolveMethods import geotrellis.vector._ val segmentsFeatures = segments.map { segment => val center = segment.centroid match { case PointResult(p) => p case NoResult => throw new Exception("No result found in PointOrNoResult") } val (col, row) = rasterExtent.mapToGrid(center) val elevation = geotiff.tile.getDouble(col, row) val meanvMap: Map[String, Double] = Map("MEANV" -> elevation) LineFeature(segment, meanvMap) } return segmentsFeatures.toTraversable } }
Example 165
Source File: ElevationSpec.scala From geotrellis-osm-elevation with Apache License 2.0 | 5 votes |
package geotrellis.osme.core import java.io.{FileWriter, BufferedWriter, File} import geotrellis.raster.io.geotiff.SinglebandGeoTiff import geotrellis.vector.{Feature, Line, LineFeature} import geotrellis.vector.io.json.GeoJson._ import spray.json.DefaultJsonProtocol._ import geotrellis.vector.io.json.{JsonFeatureCollection, GeoJson} import spray.json.JsonReader import scala.io.Source import org.scalatest._ class ElevationSpec extends FunSpec with Matchers { def sharedData = { val geojson = Source.fromFile("data/imgn36w100vector.geojson").getLines.mkString val gjCol = parse[JsonFeatureCollection](geojson) new { val geotiff = SinglebandGeoTiff("data/imgn36w100_13_3_3.tif") val multiLine = gjCol.getAllLines().toMultiLine val elevationGeoJson = ElevationOverlay(geotiff, multiLine) } } describe("Core spec") { val numInputLines = sharedData.multiLine.lines.size val numOutputLines = sharedData.elevationGeoJson.size val ratio = numOutputLines / numInputLines println(s"Ratio of input lines to output lines: $ratio : 1") it("returned geojson should contain the MEANV property") { val elevationFeatures = sharedData.elevationGeoJson val hasMeanV = elevationFeatures.forall(feat => feat.data.contains("MEANV")) assert(hasMeanV) } it("should produce a geojson file that can be put into geocolor.io") { val elevationFeatures = sharedData.elevationGeoJson val jsonFeatures = JsonFeatureCollection(elevationFeatures) val file = new File("geocolor_test.json") val bw = new BufferedWriter(new FileWriter(file)) bw.write(jsonFeatures.toJson.prettyPrint) bw.close() } it("Every feature should intersect the tile extent") { val elevationFeatures = sharedData.elevationGeoJson val rasterPoly = sharedData.geotiff.rasterExtent.extent.toPolygon() val doesIntersect = elevationFeatures.forall(feat => rasterPoly.intersects(feat.geom)) assert(doesIntersect) } } }
Example 166
Source File: Logger.scala From shapenet-viewer with MIT License | 5 votes |
package edu.stanford.graphics.shapenet.util import org.slf4j.LoggerFactory import java.io.File import org.slf4j.bridge.SLF4JBridgeHandler import uk.org.lidalia.sysoutslf4j.context.SysOutOverSLF4J additive: Boolean = false) = { import ch.qos.logback.classic.spi.ILoggingEvent import ch.qos.logback.classic.Level import ch.qos.logback.classic.LoggerContext import ch.qos.logback.classic.encoder.PatternLayoutEncoder import ch.qos.logback.core.FileAppender // Make sure log directory is created val file: File = new File(filename) val parent: File = file.getParentFile if (parent != null) parent.mkdirs val loggerContext = LoggerFactory.getILoggerFactory().asInstanceOf[LoggerContext] val logger = loggerContext.getLogger(loggerName) // Setup pattern val patternLayoutEncoder = new PatternLayoutEncoder() patternLayoutEncoder.setPattern(pattern) patternLayoutEncoder.setContext(loggerContext) patternLayoutEncoder.start() // Setup appender val fileAppender = new FileAppender[ILoggingEvent]() fileAppender.setFile(filename) fileAppender.setEncoder(patternLayoutEncoder) fileAppender.setContext(loggerContext) fileAppender.start() // Attach appender to logger logger.addAppender(fileAppender) //logger.setLevel(Level.DEBUG) logger.setAdditive(additive) fileAppender.getName } def detachAppender(appenderName: String, loggerName: String = org.slf4j.Logger.ROOT_LOGGER_NAME): Unit = { import ch.qos.logback.classic.LoggerContext val loggerContext = LoggerFactory.getILoggerFactory().asInstanceOf[LoggerContext] val logger = loggerContext.getLogger(loggerName) logger.detachAppender(appenderName) } def getLogger(clazz: Class[_]): org.slf4j.Logger = { LoggerFactory.getLogger(clazz) } def getLogger(name: String): org.slf4j.Logger = { LoggerFactory.getLogger(name) } } trait Loggable { lazy val logger = Logger.getLogger(this.getClass) def startTrack(name: String): Unit = { logger.debug("Starting " + name) } def endTrack(name: String): Unit = { logger.debug("Finished " + name) } }
Example 167
Source File: FullId.scala From shapenet-viewer with MIT License | 5 votes |
package edu.stanford.graphics.shapenet.common import java.io.File import scala.util.matching.Regex case class FullId(source: String, id: String) { lazy val fullid = source + "." + id } object FullId { val fullIdRegex = new Regex("([a-zA-z0-9_-]+)\\.([a-zA-z0-9_-]+)") def apply(fullid: String, defaultSource: Option[String] = None): FullId = { val dotIndex = fullid.indexOf('.') val (source, id) = if (fullid.startsWith("http://") || fullid.startsWith("https://")) { ("raw", fullid) } else if (fullid.startsWith("file://")) { ("raw", fullid.substring(7)) } else if (fullid.startsWith("/")) { ("raw", fullid) } else if (new File(fullid).isAbsolute) { ("raw", fullid) } else if (dotIndex > 0) { (fullid.substring(0, dotIndex), fullid.substring(dotIndex + 1)) } else { val s = defaultSource.getOrElse(if (fullid.contains("scene")) "wssScenes" else "3dw") (s, fullid) } new FullId(source,id) } def matches(id1: String, id2: String): Boolean = { val f1 = FullId(id1) val f2 = FullId(id2) f1 == f2 } def isFullId(s: String): Boolean = { fullIdRegex.pattern.matcher(s).matches() } }
Example 168
Source File: ColorPalette.scala From shapenet-viewer with MIT License | 5 votes |
package edu.stanford.graphics.shapenet.colors import java.awt.Color import javax.imageio.ImageIO import java.io.File import edu.stanford.graphics.shapenet.Constants trait ColorPalette { def getColor(id: Int): Color def getColorCount(): Int = -1 def getColor(id: Int, alpha: Float): Color = { val c = getColor(id) edu.stanford.graphics.shapenet.colors.getColor(c, alpha) } } class ColorBar(rgbColors: Array[Color]) extends ColorPalette { val nColors = rgbColors.length def getColor(r: Double): Color = getColor((r*(nColors-1)).toInt) def getColor(id: Int): Color = rgbColors(id % nColors) override def getColorCount() = nColors } object ColorBar { val texturesDir = Constants.ASSETS_DIR + "Textures" + File.separator lazy val coolwarmBar = ColorBar(texturesDir + "Cool2WarmBar.png") lazy val warmBar = ColorBar(texturesDir + "heatmap.png") def apply(filename: String): ColorBar = { val img = ImageIO.read(new File(filename)) val rgb = Array.ofDim[Color](img.getWidth) for (x <- 0 until rgb.length) { rgb(x) = new Color(img.getRGB(x, 0)) } new ColorBar(rgb) } } object PhiColorPalette extends ColorPalette { def getColor(id: Int): Color = { val startColor = new Color(0x4FD067) val hsb = Color.RGBtoHSB(startColor.getRed, startColor.getGreen, startColor.getBlue, null) val invPhi = 1.0/Constants.phi var hue = hsb(0) + id*invPhi hue = hue - math.floor(hue) val c = Color.getHSBColor(hue.toFloat, 0.5f, 0.95f) // Switch blue and green for nice pretty colors new Color(c.getRed, c.getBlue, c.getGreen) } } object DefaultColorPalette extends ColorPalette { def getColor(id: Int): Color = { var h = (-3.88 * id) % (2*Math.PI) if (h<0) h += 2*Math.PI h /= 2*Math.PI val c = Color.getHSBColor(h.toFloat, (0.4 + 0.2 * Math.sin(0.42 * id)).toFloat, 0.5f) c } }
Example 169
Source File: Summarizer.scala From berkeley-doc-summarizer with GNU General Public License v3.0 | 5 votes |
package edu.berkeley.nlp.summ import java.io.File import edu.berkeley.nlp.entity.ConllDocReader import edu.berkeley.nlp.entity.coref.CorefDocAssembler import edu.berkeley.nlp.entity.coref.MentionPropertyComputer import edu.berkeley.nlp.entity.coref.NumberGenderComputer import edu.berkeley.nlp.entity.lang.EnglishCorefLanguagePack import edu.berkeley.nlp.entity.lang.Language import edu.berkeley.nlp.futile.LightRunner import edu.berkeley.nlp.futile.fig.basic.IOUtils import edu.berkeley.nlp.futile.util.Logger import edu.berkeley.nlp.summ.data.SummDoc import edu.berkeley.nlp.summ.preprocess.DiscourseDependencyParser import edu.berkeley.nlp.summ.preprocess.EDUSegmenter import edu.berkeley.nlp.summ.data.DiscourseDepExProcessed object Summarizer { val numberGenderPath = "data/gender.data"; val segmenterPath = "models/edusegmenter.ser.gz" val discourseParserPath = "models/discoursedep.ser.gz" val modelPath = "models/summarizer-full.ser.gz" val inputDir = "" val outputDir = "" // Indicates that we shouldn't do any discourse preprocessing; this is only appropriate // for the sentence-extractive version of the system val noRst = false // Summary budget, in words. Set this to whatever you want it to. val budget = 50 def main(args: Array[String]) { LightRunner.initializeOutput(Summarizer.getClass()) LightRunner.populateScala(Summarizer.getClass(), args) Logger.logss("Loading model...") val model = IOUtils.readObjFile(modelPath).asInstanceOf[CompressiveAnaphoraSummarizer] Logger.logss("Model loaded!") val (segmenter, discourseParser) = if (noRst) { (None, None) } else { Logger.logss("Loading segmenter...") val tmpSegmenter = IOUtils.readObjFile(segmenterPath).asInstanceOf[EDUSegmenter] Logger.logss("Segmenter loaded!") Logger.logss("Loading discourse parser...") val tmpDiscourseParser = IOUtils.readObjFile(discourseParserPath).asInstanceOf[DiscourseDependencyParser] Logger.logss("Discourse parser loaded!") (Some(tmpSegmenter), Some(tmpDiscourseParser)) } val numberGenderComputer = NumberGenderComputer.readBergsmaLinData(numberGenderPath); val mpc = new MentionPropertyComputer(Some(numberGenderComputer)) val reader = new ConllDocReader(Language.ENGLISH) val assembler = new CorefDocAssembler(new EnglishCorefLanguagePack, true) val filesToSummarize = new File(inputDir).listFiles() for (file <- filesToSummarize) { val conllDoc = reader.readConllDocs(file.getAbsolutePath).head val corefDoc = assembler.createCorefDoc(conllDoc, mpc) val summDoc = SummDoc.makeSummDoc(conllDoc.docID, corefDoc, Seq()) val ex = if (noRst) { DiscourseDepExProcessed.makeTrivial(summDoc) } else { DiscourseDepExProcessed.makeWithEduAndSyntactic(summDoc, segmenter.get, discourseParser.get) } val summaryLines = model.summarize(ex, budget, true) val outWriter = IOUtils.openOutHard(outputDir + "/" + file.getName) for (summLine <- summaryLines) { outWriter.println(summLine) } outWriter.close } LightRunner.finalizeOutput() } }
Example 170
Source File: EDUAligner.scala From berkeley-doc-summarizer with GNU General Public License v3.0 | 5 votes |
package edu.berkeley.nlp.summ.data import java.io.File import scala.collection.mutable.ArrayBuffer import edu.berkeley.nlp.entity.coref.MentionPropertyComputer import edu.berkeley.nlp.entity.coref.NumberGenderComputer import edu.berkeley.nlp.futile.util.Logger object EDUAligner { def align(leafWords: Seq[Seq[String]], docSents: Seq[DepParse]) = { var currSentIdx = 0 var currWordIdx = 0 val leafSpans = new ArrayBuffer[((Int,Int),(Int,Int))] for (i <- 0 until leafWords.size) { val start = (currSentIdx, currWordIdx) val currLen = docSents(currSentIdx).size require(currWordIdx + leafWords(i).size <= currLen, currWordIdx + " " + leafWords(i).size + " " + currLen + "\nsent = " + docSents(currSentIdx).getWords.toSeq + ", leaf words = " + leafWords(i).toSeq) var leafWordIdx = 0 while (leafWordIdx < leafWords(i).size) { val docWord = docSents(currSentIdx).getWord(currWordIdx) val leafWord = leafWords(i)(leafWordIdx) val currWordsEqual = docWord == leafWord val currWordsEffectivelyEqual = docWord.contains("'") || docWord.contains("`") // Ignore some punc symbols because they're weird // Spurious period but last thing ended in period, so it was probably added by the tokenizer (like "Ltd. .") if (!currWordsEqual && docWord == "." && currWordIdx > 0 && docSents(currSentIdx).getWord(currWordIdx - 1).endsWith(".")) { currWordIdx += 1 if (currWordIdx == docSents(currSentIdx).size) { currSentIdx += 1 currWordIdx = 0 } // N.B. don't advance leafWordIdx } else { require(currWordsEqual || currWordsEffectivelyEqual, docWord + " :: " + leafWord + "\nsent = " + docSents(currSentIdx).getWords.toSeq + ", leaf words = " + leafWords(i).toSeq) currWordIdx += 1 if (currWordIdx == docSents(currSentIdx).size) { currSentIdx += 1 currWordIdx = 0 } leafWordIdx += 1 } } val end = if (currWordIdx == 0) { (currSentIdx - 1, docSents(currSentIdx - 1).size) } else { (currSentIdx, currWordIdx) } leafSpans += start -> end // if (currWordIdx == docSents(currSentIdx).size) { // currSentIdx += 1 // currWordIdx = 0 // } } leafSpans // } } def main(args: Array[String]) { val allTreeFiles = new File("data/RSTDiscourse/data/RSTtrees-WSJ-main-1.0/ALL-FILES/").listFiles.sortBy(_.getName).filter(_.getName.endsWith(".out.dis")) val allTrees = allTreeFiles.map(file => DiscourseTreeReader.readDisFile(file.getAbsolutePath)) // val allSummDocs = new File("data/RSTDiscourse/data/RSTtrees-WSJ-main-1.0/ALL-FILES-PREPROC/").listFiles.sortBy(_.getName)) val numberGenderComputer = NumberGenderComputer.readBergsmaLinData("data/gender.data"); val mpc = new MentionPropertyComputer(Some(numberGenderComputer)) val allSummDocFiles = new File("data/RSTDiscourse/data/RSTtrees-WSJ-main-1.0/ALL-FILES-PROC2/").listFiles.sortBy(_.getName) val allSummDocs = allSummDocFiles.map(file => SummDoc.readSummDocNoAbstract(file.getAbsolutePath, mpc, filterSpuriousDocs = false, filterSpuriousSummSents = false)) val summNames = new File("data/RSTDiscourse/data/RSTtrees-WSJ-main-1.0/SUMM-SUBSET-PROC/").listFiles.map(_.getName) require(allTrees.size == allSummDocs.size) val badFiles = new ArrayBuffer[String] for (i <- 0 until allTrees.size) { require(allTreeFiles(i).getName.dropRight(4) == allSummDocFiles(i).getName, allTreeFiles(i).getName.dropRight(4) + " " + allSummDocFiles(i).getName) Logger.logss(allSummDocFiles(i).getName) try { align(allTrees(i).leafWords, allSummDocs(i).doc) } catch { case e: Exception => { Logger.logss(e) badFiles += allSummDocFiles(i).getName } } } Logger.logss(badFiles.size + " bad files: " + badFiles) val badSummDocs = (badFiles.toSet & summNames.toSet) Logger.logss(badSummDocs.size + " bad summarized files: " + badSummDocs.toSeq.sorted) } }
Example 171
Source File: RougeFileMunger.scala From berkeley-doc-summarizer with GNU General Public License v3.0 | 5 votes |
package edu.berkeley.nlp.summ import java.io.File import edu.berkeley.nlp.futile.fig.basic.IOUtils import scala.collection.JavaConverters._ object RougeFileMunger { val input = "data/RSTDiscourse/sample-outputs/" val output = "data/RSTDiscourse/sample-outputs-rouge/" val settingsPath = "data/RSTDiscourse/rouge-settings.xml" val detokenize = true def writeSummary(fileName: String, sents: Seq[String], outPath: String, keepFile: Boolean) { val outFile = new File(outPath) if (!keepFile) outFile.deleteOnExit() val outWriter = IOUtils.openOutHard(outFile) outWriter.println("<html>") outWriter.println("<head><title>" + fileName + "</title></head>") outWriter.println("<<body bgcolor=\"white\">") var counter = 1 for (sent <- sents) { outWriter.println("<a name=\"" + counter + "\">[" + counter + "]</a> <a href=\"#" + counter + "\" id=" + counter + ">" + sent + "</a>") counter += 1 } outWriter.println("</body>") outWriter.println("</html>") outWriter.close } def detokenizeSentence(line: String) = { line.replace(" ,", ",").replace(" .", ".").replace(" !", "!").replace(" ?", "?").replace(" :", ":").replace(" ;", ";"). replace("`` ", "``").replace(" ''", "''").replace(" '", "'").replace(" \"", "\"").replace("$ ", "$") } def processFiles(rootPath: String, subDir: String) = { val refFiles = new File(rootPath + "/" + subDir).listFiles for (refFile <- refFiles) { val rawName = refFile.getName() val name = rawName.substring(0, if (rawName.indexOf("_") == -1) rawName.size else rawName.indexOf("_")) val lines = IOUtils.readLinesHard(refFile.getAbsolutePath()).asScala.map(sent => if (detokenize) detokenizeSentence(sent) else sent) writeSummary(name, lines, output + "/" + subDir + "/" + refFile.getName, true) } } def writeSettings(settingsPath: String, dirPaths: String) { val outWriter = IOUtils.openOutHard(settingsPath) outWriter.println("""<ROUGE_EVAL version="1.55">""") val rawDirName = new File(dirPaths).getName() val docs = new File(dirPaths + "/reference").listFiles var idx = 0 for (doc <- docs) { val rawName = doc.getName().substring(0, doc.getName.indexOf("_")) outWriter.println("<EVAL ID=\"TASK_" + idx + "\">") outWriter.println("<MODEL-ROOT>" + rawDirName + "/reference</MODEL-ROOT>") outWriter.println("<PEER-ROOT>" + rawDirName + "/system</PEER-ROOT>") outWriter.println("<INPUT-FORMAT TYPE=\"SEE\"> </INPUT-FORMAT>") outWriter.println("<PEERS>") outWriter.println("<P ID=\"1\">" + rawName + "_system1.txt</P>") outWriter.println("</PEERS>") outWriter.println("<MODELS>") outWriter.println("<M ID=\"1\">" + rawName + "_reference1.txt</M>") outWriter.println("</MODELS>") outWriter.println("</EVAL>") idx += 1 } outWriter.println("</ROUGE_EVAL>") outWriter.close } def main(args: Array[String]) { processFiles(input, "reference") processFiles(input, "system") writeSettings(settingsPath, output) } }
Example 172
package org.apache.spark.sql import org.apache.spark.sql.execution.datasources.hbase.Logging import java.io.File import com.google.common.io.Files import org.apache.hadoop.hbase.client.Table import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{HBaseTestingUtility, TableName} import org.apache.spark.sql.execution.datasources.hbase.SparkHBaseConf import org.apache.spark.{SparkContext, SparkConf} import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} class SHC extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll with Logging { implicit class StringToColumn(val sc: StringContext) { def $(args: Any*): ColumnName = { new ColumnName(sc.s(args: _*)) } } var spark: SparkSession = null var sc: SparkContext = null var sqlContext: SQLContext = null var df: DataFrame = null private[spark] var htu = new HBaseTestingUtility private[spark] def tableName = "table1" private[spark] def columnFamilies: Array[String] = Array.tabulate(9){ x=> s"cf$x"} var table: Table = null val conf = new SparkConf conf.set(SparkHBaseConf.testConf, "true") // private[spark] var columnFamilyStr = Bytes.toString(columnFamily) def defineCatalog(tName: String) = s"""{ |"table":{"namespace":"default", "name":"$tName"}, |"rowkey":"key", |"columns":{ |"col0":{"cf":"rowkey", "col":"key", "type":"string"}, |"col1":{"cf":"cf1", "col":"col1", "type":"boolean"}, |"col2":{"cf":"cf2", "col":"col2", "type":"double"}, |"col3":{"cf":"cf3", "col":"col3", "type":"float"}, |"col4":{"cf":"cf4", "col":"col4", "type":"int"}, |"col5":{"cf":"cf5", "col":"col5", "type":"bigint"}, |"col6":{"cf":"cf6", "col":"col6", "type":"smallint"}, |"col7":{"cf":"cf7", "col":"col7", "type":"string"}, |"col8":{"cf":"cf8", "col":"col8", "type":"tinyint"} |} |}""".stripMargin @deprecated(since = "04.12.2017(dd/mm/year)", message = "use `defineCatalog` instead") def catalog = defineCatalog(tableName) override def beforeAll() { val tempDir: File = Files.createTempDir tempDir.deleteOnExit htu.startMiniCluster SparkHBaseConf.conf = htu.getConfiguration logInfo(" - minicluster started") println(" - minicluster started") spark = SparkSession.builder() .master("local") .appName("HBaseTest") .config(conf) .getOrCreate() sqlContext = spark.sqlContext sc = spark.sparkContext } override def afterAll() { htu.shutdownMiniCluster() spark.stop() } def createTable(name: String, cfs: Array[String]) { val tName = Bytes.toBytes(name) val bcfs = cfs.map(Bytes.toBytes(_)) try { htu.deleteTable(TableName.valueOf(tName)) } catch { case _ : Throwable => logInfo(" - no table " + name + " found") } htu.createMultiRegionTable(TableName.valueOf(tName), bcfs) } def createTable(name: Array[Byte], cfs: Array[Array[Byte]]) { try { htu.deleteTable(TableName.valueOf(name)) } catch { case _ : Throwable => logInfo(" - no table " + Bytes.toString(name) + " found") } htu.createMultiRegionTable(TableName.valueOf(name), cfs) } }
Example 173
Source File: HBaseTestSuite.scala From shc with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import java.io.File import scala.collection.JavaConverters._ import com.google.common.io.Files import org.apache.hadoop.hbase.client._ import org.apache.hadoop.hbase.util.Bytes import org.apache.hadoop.hbase.{TableName, HBaseTestingUtility} import org.apache.spark.sql.execution.datasources.hbase.Logging import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite} class HBaseTestSuite extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll with Logging { private[spark] var htu = HBaseTestingUtility.createLocalHTU() private[spark] var tableName: Array[Byte] = Bytes.toBytes("t1") private[spark] var columnFamily: Array[Byte] = Bytes.toBytes("cf0") private[spark] var columnFamilies: Array[Array[Byte]] = Array(Bytes.toBytes("cf0"), Bytes.toBytes("cf1"), Bytes.toBytes("cf2"), Bytes.toBytes("cf3"), Bytes.toBytes("cf4")) var table: Table = null // private[spark] var columnFamilyStr = Bytes.toString(columnFamily) override def beforeAll() { val tempDir: File = Files.createTempDir tempDir.deleteOnExit htu.cleanupTestDir htu.startMiniZKCluster htu.startMiniHBaseCluster(1, 4) logInfo(" - minicluster started") println(" - minicluster started") try { htu.deleteTable(TableName.valueOf(tableName)) //htu.createTable(TableName.valueOf(tableName), columnFamily, 2, Bytes.toBytes("abc"), Bytes.toBytes("xyz"), 2) } catch { case _ : Throwable => logInfo(" - no table " + Bytes.toString(tableName) + " found") } setupTable() } override def afterAll() { try { table.close() println("shutdown") htu.deleteTable(TableName.valueOf(tableName)) logInfo("shuting down minicluster") htu.shutdownMiniHBaseCluster htu.shutdownMiniZKCluster logInfo(" - minicluster shut down") htu.cleanupTestDir } catch { case _ : Throwable => logError("teardown error") } } def setupTable() { val config = htu.getConfiguration htu.createMultiRegionTable(TableName.valueOf(tableName), columnFamilies) println("create htable t1") val connection = ConnectionFactory.createConnection(config) val r = connection.getRegionLocator(TableName.valueOf("t1")) table = connection.getTable(TableName.valueOf("t1")) val regionLocations = r.getAllRegionLocations.asScala.toSeq println(s"$regionLocations size: ${regionLocations.size}") (0 until 100).foreach { x => var put = new Put(Bytes.toBytes(s"row$x")) (0 until 5).foreach { y => put.addColumn(columnFamilies(y), Bytes.toBytes(s"c$y"), Bytes.toBytes(s"value $x $y")) } table.put(put) } } }
Example 174
Source File: CodeGenerator.scala From jvm-toxcore-c with GNU General Public License v3.0 | 5 votes |
package im.tox.tox4j.impl.jni.codegen import java.io.{ File, PrintWriter } import com.google.common.base.CaseFormat import gnieh.pp.PrettyRenderer import im.tox.tox4j.impl.jni.codegen.cxx.Ast._ import im.tox.tox4j.impl.jni.codegen.cxx.{ Ast, Print } object NameConversions { def cxxVarName(name: String): String = CaseFormat.LOWER_CAMEL.to(CaseFormat.LOWER_UNDERSCORE, name) def cxxTypeName(name: String): String = CaseFormat.UPPER_CAMEL.to(CaseFormat.UPPER_UNDERSCORE, name) def javaVarName(name: String): String = CaseFormat.LOWER_UNDERSCORE.to(CaseFormat.LOWER_CAMEL, name) def javaTypeName(name: String): String = CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, name) } abstract class CodeGenerator extends App { def writeCode(path: String, sep: String = "\n\n")(code: Ast.TranslationUnit): Unit = { val renderer = new PrettyRenderer(130) val writer = new PrintWriter(new File("cpp/src", path)) try { writer.println(code.map(Print.printDecl).map(renderer).mkString(sep)) } finally { writer.close() } } def ifdef(header: String, guard: String, code: TranslationUnit*): TranslationUnit = { Include(header) +: Ifdef(guard) +: code.flatten :+ Endif } }
Example 175
Source File: DefaultSource.scala From spark-google-spreadsheets with Apache License 2.0 | 5 votes |
package com.github.potix2.spark.google.spreadsheets import java.io.File import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, RelationProvider, SchemaRelationProvider} import org.apache.spark.sql.types.StructType import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode} class DefaultSource extends RelationProvider with SchemaRelationProvider with CreatableRelationProvider { final val DEFAULT_CREDENTIAL_PATH = "/etc/gdata/credential.p12" override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]) = { createRelation(sqlContext, parameters, null) } private[spreadsheets] def pathToSheetNames(parameters: Map[String, String]): (String, String) = { val path = parameters.getOrElse("path", sys.error("'path' must be specified for spreadsheets.")) val elems = path.split('/') if (elems.length < 2) throw new Exception("'path' must be formed like '<spreadsheet>/<worksheet>'") (elems(0), elems(1)) } override def createRelation(sqlContext: SQLContext, parameters: Map[String, String], schema: StructType) = { val (spreadsheetName, worksheetName) = pathToSheetNames(parameters) val context = createSpreadsheetContext(parameters) createRelation(sqlContext, context, spreadsheetName, worksheetName, schema) } override def createRelation(sqlContext: SQLContext, mode: SaveMode, parameters: Map[String, String], data: DataFrame): BaseRelation = { val (spreadsheetName, worksheetName) = pathToSheetNames(parameters) implicit val context = createSpreadsheetContext(parameters) val spreadsheet = SparkSpreadsheetService.findSpreadsheet(spreadsheetName) if(!spreadsheet.isDefined) throw new RuntimeException(s"no such a spreadsheet: $spreadsheetName") spreadsheet.get.addWorksheet(worksheetName, data.schema, data.collect().toList, Util.toRowData) createRelation(sqlContext, context, spreadsheetName, worksheetName, data.schema) } private[spreadsheets] def createSpreadsheetContext(parameters: Map[String, String]) = { val serviceAccountIdOption = parameters.get("serviceAccountId") val credentialPath = parameters.getOrElse("credentialPath", DEFAULT_CREDENTIAL_PATH) SparkSpreadsheetService(serviceAccountIdOption, new File(credentialPath)) } private[spreadsheets] def createRelation(sqlContext: SQLContext, context: SparkSpreadsheetService.SparkSpreadsheetContext, spreadsheetName: String, worksheetName: String, schema: StructType): SpreadsheetRelation = if (schema == null) { createRelation(sqlContext, context, spreadsheetName, worksheetName, None) } else { createRelation(sqlContext, context, spreadsheetName, worksheetName, Some(schema)) } private[spreadsheets] def createRelation(sqlContext: SQLContext, context: SparkSpreadsheetService.SparkSpreadsheetContext, spreadsheetName: String, worksheetName: String, schema: Option[StructType]): SpreadsheetRelation = SpreadsheetRelation(context, spreadsheetName, worksheetName, schema)(sqlContext) }
Example 176
Source File: SparkSpreadsheetServiceReadSuite.scala From spark-google-spreadsheets with Apache License 2.0 | 5 votes |
package com.github.potix2.spark.google.spreadsheets import java.io.File import org.scalatest.{BeforeAndAfter, FlatSpec} class SparkSpreadsheetServiceReadSuite extends FlatSpec with BeforeAndAfter { private val serviceAccountId = "53797494708-ds5v22b6cbpchrv2qih1vg8kru098k9i@developer.gserviceaccount.com" private val testCredentialPath = "src/test/resources/spark-google-spreadsheets-test-eb7b191d1e1d.p12" private val TEST_SPREADSHEET_NAME = "SpreadsheetSuite" private val TEST_SPREADSHEET_ID = "1H40ZeqXrMRxgHIi3XxmHwsPs2SgVuLUFbtaGcqCAk6c" private val context: SparkSpreadsheetService.SparkSpreadsheetContext = SparkSpreadsheetService.SparkSpreadsheetContext(Some(serviceAccountId), new File(testCredentialPath)) private val spreadsheet: SparkSpreadsheetService.SparkSpreadsheet = context.findSpreadsheet(TEST_SPREADSHEET_ID) behavior of "A Spreadsheet" it should "have a name" in { assert(spreadsheet.name == TEST_SPREADSHEET_NAME) } behavior of "A worksheet" it should "be None when a worksheet is missing" in { assert(spreadsheet.findWorksheet("foo").isEmpty) } it should "be retrieved when the worksheet exists" in { val worksheet = spreadsheet.findWorksheet("case2") assert(worksheet.isDefined) assert(worksheet.get.name == "case2") assert(worksheet.get.headers == List("id", "firstname", "lastname", "email", "country", "ipaddress")) val firstRow = worksheet.get.rows(0) assert(firstRow == Map( "id" -> "1", "firstname" -> "Annie", "lastname" -> "Willis", "email" -> "[email protected]", "country" -> "Burundi", "ipaddress" -> "241.162.49.104")) } }
Example 177
Source File: LibFFMRelationSuite.scala From sona with Apache License 2.0 | 5 votes |
package com.tencent.angel.sona.ml.source.libffm import java.io.File import java.nio.charset.StandardCharsets import com.google.common.io.Files import org.apache.spark.SparkFunSuite import com.tencent.angel.sona.ml.util.MLlibTestSparkContext import org.apache.spark.util.SparkUtil class LibFFMRelationSuite extends SparkFunSuite with MLlibTestSparkContext { // Path for dataset var path: String = _ override def beforeAll(): Unit = { super.beforeAll() val lines0 = """ |1 0:1:1.0 1:3:2.0 2:5:3.0 |0 """.stripMargin val lines1 = """ |0 0:2:4.0 1:4:5.0 2:6:6.0 """.stripMargin val dir = SparkUtil.createTempDir() val succ = new File(dir, "_SUCCESS") val file0 = new File(dir, "part-00000") val file1 = new File(dir, "part-00001") Files.write("", succ, StandardCharsets.UTF_8) Files.write(lines0, file0, StandardCharsets.UTF_8) Files.write(lines1, file1, StandardCharsets.UTF_8) path = dir.getPath } override def afterAll(): Unit = { try { val prefix = "C:\\Users\\fitzwang\\AppData\\Local\\Temp\\" if (path.startsWith(prefix)) { SparkUtil.deleteRecursively(new File(path)) } } finally { super.afterAll() } } test("ffmIO"){ val df = spark.read.format("libffm").load(path) val metadata = df.schema(1).metadata val fieldSet = MetaSummary.getFieldSet(metadata) println(fieldSet.mkString("[", ",", "]")) val keyFieldMap = MetaSummary.getKeyFieldMap(metadata) println(keyFieldMap.mkString("[", ",", "]")) df.write.format("libffm").save("temp.libffm") } test("read_ffm"){ val df = spark.read.format("libffm").load(path) val metadata = df.schema(1).metadata val fieldSet = MetaSummary.getFieldSet(metadata) println(fieldSet.mkString("[", ",", "]")) val keyFieldMap = MetaSummary.getKeyFieldMap(metadata) println(keyFieldMap.mkString("[", ",", "]")) } }
Example 178
Source File: MLlibTestSparkContext.scala From sona with Apache License 2.0 | 5 votes |
package com.tencent.angel.sona.ml.util import java.io.File import org.apache.spark.SparkContext import org.apache.spark.sql.types.UDTRegistration import org.apache.spark.sql.{SQLContext, SQLImplicits, SparkSession} import org.apache.spark.util.{SparkUtil, Utils} import org.scalatest.Suite trait MLlibTestSparkContext extends TempDirectory { self: Suite => @transient var spark: SparkSession = _ @transient var sc: SparkContext = _ @transient var checkpointDir: String = _ override def beforeAll() { super.beforeAll() SparkUtil.UDTRegister("org.apache.spark.linalg.Vector", "org.apache.spark.linalg.VectorUDT") SparkUtil.UDTRegister("org.apache.spark.linalg.DenseVector", "org.apache.spark.linalg.VectorUDT") SparkUtil.UDTRegister("org.apache.spark.linalg.SparseVector", "org.apache.spark.linalg.VectorUDT") SparkUtil.UDTRegister("org.apache.spark.linalg.Matrix", "org.apache.spark.linalg.MatrixUDT") SparkUtil.UDTRegister("org.apache.spark.linalg.DenseMatrix", "org.apache.spark.linalg.MatrixUDT") SparkUtil.UDTRegister("org.apache.spark.linalg.SparseMatrix", "org.apache.spark.linalg.MatrixUDT") spark = SparkSession.builder .master("local[2]") .appName("MLlibUnitTest") .getOrCreate() sc = spark.sparkContext checkpointDir = SparkUtil.createDirectory(tempDir.getCanonicalPath, "checkpoints").toString sc.setCheckpointDir(checkpointDir) } override def afterAll() { try { SparkUtil.deleteRecursively(new File(checkpointDir)) SparkSession.clearActiveSession() if (spark != null) { spark.stop() } spark = null } finally { super.afterAll() } } /** * A helper object for importing SQL implicits. * * Note that the alternative of importing `spark.implicits._` is not possible here. * This is because we create the `SQLContext` immediately before the first test is run, * but the implicits import is needed in the constructor. */ protected object testImplicits extends SQLImplicits { protected override def _sqlContext: SQLContext = self.spark.sqlContext } }
Example 179
Source File: TempDirectory.scala From sona with Apache License 2.0 | 5 votes |
package com.tencent.angel.sona.ml.util import java.io.File import org.scalatest.{BeforeAndAfterAll, Suite} import org.apache.spark.util.SparkUtil /** * Trait that creates a temporary directory before all tests and deletes it after all. */ trait TempDirectory extends BeforeAndAfterAll { self: Suite => private var _tempDir: File = _ /** * Returns the temporary directory as a `File` instance. */ protected def tempDir: File = _tempDir override def beforeAll(): Unit = { super.beforeAll() _tempDir = SparkUtil.createTempDir(namePrefix = this.getClass.getName) } override def afterAll(): Unit = { try { SparkUtil.deleteRecursively(_tempDir) } finally { super.afterAll() } } }
Example 180
Source File: PMMLReadWriteTest.scala From sona with Apache License 2.0 | 5 votes |
package com.tencent.angel.sona.ml.util import java.io.{File, IOException} import org.dmg.pmml.PMML import org.scalatest.Suite import org.apache.spark.SparkContext import com.tencent.angel.sona.ml.param.Params trait PMMLReadWriteTest extends TempDirectory { self: Suite => /** * Test PMML export. Requires exported model is small enough to be loaded locally. * Checks that the model can be exported and the result is valid PMML, but does not check * the specific contents of the model. */ def testPMMLWrite[T <: Params with GeneralMLWritable](sc: SparkContext, instance: T, checkModelData: PMML => Unit): Unit = { val uid = instance.uid val subdirName = Identifiable.randomUID("pmml-") val subdir = new File(tempDir, subdirName) val path = new File(subdir, uid).getPath instance.write.format("pmml").save(path) intercept[IOException] { instance.write.format("pmml").save(path) } instance.write.format("pmml").overwrite().save(path) val pmmlStr = sc.textFile(path).collect.mkString("\n") val pmmlModel = PMMLUtils.loadFromString(pmmlStr) assert(pmmlModel.getHeader.getApplication.getName.startsWith("Apache Spark")) checkModelData(pmmlModel) } }
Example 181
Source File: TypesafeConfigSource.scala From zio-config with Apache License 2.0 | 5 votes |
package zio.config.typesafe import java.io.File import java.lang.{ Boolean => JBoolean } import com.typesafe.config._ import zio.config.PropertyTree.{ Leaf, _ } import zio.config.{ ConfigSource, _ } import zio.{ IO, Task, ZIO } import scala.collection.JavaConverters._ import scala.util.{ Failure, Success, Try } object TypesafeConfigSource { def fromDefaultLoader: Either[String, ConfigSource] = fromTypesafeConfig(ConfigFactory.load.resolve) def fromHoconFile[A]( file: File ): Task[ConfigSource] = IO.effect(ConfigFactory.parseFile(file).resolve) .flatMap(typesafeConfig => { ZIO .fromEither(fromTypesafeConfig(typesafeConfig)) .mapError(str => new RuntimeException(str)) }) def fromHoconString( input: String ): Either[String, zio.config.ConfigSource] = fromTypesafeConfig( ConfigFactory.parseString(input).resolve ) def fromTypesafeConfig( input: => com.typesafe.config.Config ): Either[String, ConfigSource] = Try { input } match { case Failure(exception) => Left(exception.getMessage) case Success(value) => getPropertyTree(value) match { case Left(value) => Left(value) case Right(value) => Right(ConfigSource.fromPropertyTree(value, "hocon", LeafForSequence.Invalid)) } } private[config] def getPropertyTree( input: com.typesafe.config.Config ): Either[String, PropertyTree[String, String]] = { def loopBoolean(value: Boolean) = Leaf(value.toString) def loopNumber(value: Number) = Leaf(value.toString) val loopNull = PropertyTree.empty def loopString(value: String) = Leaf(value) def loopList(values: List[ConfigValue]) = Sequence(values.map(loopAny)) def loopConfig(config: ConfigObject) = Record(config.asScala.toVector.map { case (key, value) => key -> loopAny(value) }.toMap) def loopAny(value: ConfigValue): PropertyTree[String, String] = value.valueType() match { case ConfigValueType.OBJECT => loopConfig(value.asInstanceOf[ConfigObject]) case ConfigValueType.LIST => loopList(value.asInstanceOf[ConfigList].asScala.toList) case ConfigValueType.BOOLEAN => loopBoolean(value.unwrapped().asInstanceOf[JBoolean]) case ConfigValueType.NUMBER => loopNumber(value.unwrapped().asInstanceOf[Number]) case ConfigValueType.NULL => loopNull case ConfigValueType.STRING => loopString(value.unwrapped().asInstanceOf[String]) } Try(loopConfig(input.root())) match { case Failure(t) => Left( "Unable to form the zio.config.PropertyTree from Hocon string." + " This may be due to the presence of explicit usage of nulls in hocon string. " + t.getMessage ) case Success(value) => Right(value) } } }
Example 182
Source File: TypesafeConfig.scala From zio-config with Apache License 2.0 | 5 votes |
package zio.config.typesafe import java.io.File import com.typesafe.config.ConfigFactory import zio.config.Config import zio.{ Layer, Tag, ZIO } import zio.config.ConfigDescriptor object TypesafeConfig { def fromDefaultLoader[A]( configDescriptor: ConfigDescriptor[A] )(implicit tag: Tag[A]): Layer[Throwable, Config[A]] = fromTypesafeConfig(ConfigFactory.load.resolve, configDescriptor) def fromHoconFile[A]( file: File, configDescriptor: ConfigDescriptor[A] )(implicit tag: Tag[A]): Layer[Throwable, Config[A]] = fromTypesafeConfig(ConfigFactory.parseFile(file).resolve, configDescriptor) def fromHoconString[A]( str: String, configDescriptor: ConfigDescriptor[A] )(implicit tag: Tag[A]): Layer[Throwable, Config[A]] = fromTypesafeConfig(ConfigFactory.parseString(str).resolve, configDescriptor) def fromTypesafeConfig[A]( conf: => com.typesafe.config.Config, configDescriptor: ConfigDescriptor[A] )(implicit tag: Tag[A]): Layer[Throwable, Config[A]] = Config.fromConfigDescriptorM( ZIO .fromEither(TypesafeConfigSource.fromTypesafeConfig(conf)) .map(configDescriptor from _) .mapError(error => new RuntimeException(error)) ) }
Example 183
Source File: SbtLayerConfigurations.scala From sbt-jib with Apache License 2.0 | 5 votes |
package de.gccc.jib import java.io.File import com.google.cloud.tools.jib.api.LayerConfiguration import sbt._ object SbtLayerConfigurations { def generate( targetDirectory: File, classes: Seq[File], resourceDirectories: Seq[File], internalDependencies: Keys.Classpath, external: Keys.Classpath, extraMappings: Seq[(File, String)], specialResourceDirectory: File ): List[LayerConfiguration] = { val internalDependenciesLayer = { SbtJibHelper.mappingsConverter("internal", reproducibleDependencies(targetDirectory, internalDependencies)) } val externalDependenciesLayer = { SbtJibHelper.mappingsConverter("libs", MappingsHelper.fromClasspath(external.seq, "/app/libs")) } val resourcesLayer = { SbtJibHelper.mappingsConverter( "conf", resourceDirectories.flatMap(MappingsHelper.contentOf(_, "/app/resources", _.isFile)) ) } val specialResourcesLayer = { SbtJibHelper.mappingsConverter("resources", MappingsHelper.contentOf(specialResourceDirectory, "/app/resources", _.isFile)) } val extraLayer = if (extraMappings.nonEmpty) SbtJibHelper.mappingsConverter("extra", extraMappings.filter(_._1.isFile)) :: Nil else Nil val allClasses = classes // we only want class-files in our classes layer // FIXME: not just extensions checking? .flatMap(MappingsHelper.contentOf(_, "/app/classes", f => if (f.isFile) f.getName.endsWith(".class") else false)) val classesLayer = SbtJibHelper.mappingsConverter("classes", allClasses) // the ordering here is really important (extraLayer ::: List( externalDependenciesLayer, resourcesLayer, internalDependenciesLayer, specialResourcesLayer, classesLayer )).filterNot(lc => lc.getLayerEntries.isEmpty) } private def reproducibleDependencies(targetDirectory: File, internalDependencies: Keys.Classpath) = { val dependencies = internalDependencies.seq.map(_.data) val stageDirectory = targetDirectory / "jib" / "dependency-stage" IO.delete(stageDirectory) IO.createDirectory(stageDirectory) val stripper = new ZipStripper() dependencies.foreach { in => val fileName = in.getName val out = new File(stageDirectory, fileName) stripper.strip(in, out) } MappingsHelper.contentOf(stageDirectory, "/app/libs") } }
Example 184
Source File: SbtJibHelper.scala From sbt-jib with Apache License 2.0 | 5 votes |
package de.gccc.jib import java.io.File import com.google.cloud.tools.jib.api.buildplan.AbsoluteUnixPath import com.google.cloud.tools.jib.api.LayerConfiguration private[jib] object SbtJibHelper { def mappingsConverter(name: String, mappings: Seq[(File, String)]): LayerConfiguration = { val layerConfiguration = LayerConfiguration.builder() mappings .filter(_._1.isFile) // fixme resolve all directory files .map { case (file, fullPathOnImage) => (file.toPath, fullPathOnImage) } .toList .sortBy(_._2) .foreach { case (sourceFile, pathOnImage) => layerConfiguration.addEntry(sourceFile, AbsoluteUnixPath.get(pathOnImage)) } layerConfiguration.build() } }
Example 185
Source File: MappingsHelper.scala From sbt-jib with Apache License 2.0 | 5 votes |
package de.gccc.jib import java.io.File import sbt._ import sbt.io.{ IO, PathFinder } import scala.language.postfixOps def fromClasspath(entries: Seq[Attributed[File]], target: String, includeArtifact: Artifact => Boolean, includeOnNoArtifact: Boolean = false): Seq[(File, String)] = entries.filter(attr => attr.get(sbt.Keys.artifact.key) map includeArtifact getOrElse includeOnNoArtifact).map { attribute => val file = attribute.data file -> s"$target/${file.getName}" } }
Example 186
Source File: ScatterGatherChannelSpec.scala From zio-nio with Apache License 2.0 | 5 votes |
package zio.nio.core.channels import java.io.{ File, RandomAccessFile } import zio.nio.core.{ BaseSpec, Buffer } import zio.test.Assertion._ import zio.test._ import zio.{ Chunk, IO, ZIO } import scala.io.Source object ScatterGatherChannelSpec extends BaseSpec { override def spec = suite("ScatterGatherChannelSpec")( testM("scattering read") { for { raf <- ZIO.effectTotal(new RandomAccessFile("nio-core/src/test/resources/scattering_read_test.txt", "r")) fileChannel = raf.getChannel readLine = (buffer: Buffer[Byte]) => for { _ <- buffer.flip array <- buffer.array text = array.takeWhile(_ != 10).map(_.toChar).mkString.trim } yield text buffs <- IO.collectAll(Seq(Buffer.byte(5), Buffer.byte(5))) channel = new FileChannel(fileChannel) _ <- channel.readBuffer(buffs) list <- IO.collectAll(buffs.map(readLine)) _ <- channel.close } yield assert(list)(equalTo("Hello" :: "World" :: Nil)) }, testM("gathering write") { for { file <- ZIO.effect(new File("nio-core/src/test/resources/gathering_write_test.txt")) raf = new RandomAccessFile(file, "rw") fileChannel = raf.getChannel buffs <- IO.collectAll( Seq( Buffer.byte(Chunk.fromArray("Hello".getBytes)), Buffer.byte(Chunk.fromArray("World".getBytes)) ) ) channel = new FileChannel(fileChannel) _ <- channel.writeBuffer(buffs) _ <- channel.close result = Source.fromFile(file).getLines().toSeq _ = file.delete() } yield assert(result)(equalTo(Seq("HelloWorld"))) } ) }
Example 187
Source File: ScatterGatherChannelSpec.scala From zio-nio with Apache License 2.0 | 5 votes |
package zio.nio.channels import java.io.{ File, RandomAccessFile } import zio.nio.core.Buffer import zio.nio.BaseSpec import zio.test.Assertion._ import zio.test._ import zio.{ Chunk, IO, ZIO } import scala.io.Source object ScatterGatherChannelSpec extends BaseSpec { override def spec = suite("ScatterGatherChannelSpec")( testM("scattering read") { for { raf <- ZIO.effectTotal(new RandomAccessFile("nio/src/test/resources/scattering_read_test.txt", "r")) fileChannel = raf.getChannel readLine = (buffer: Buffer[Byte]) => for { _ <- buffer.flip array <- buffer.array text = array.takeWhile(_ != 10).map(_.toChar).mkString.trim } yield text buffs <- IO.collectAll(Seq(Buffer.byte(5), Buffer.byte(5))) list <- FileChannel(fileChannel).use { channel => for { _ <- channel.readBuffer(buffs) list <- IO.collectAll(buffs.map(readLine)) } yield list } } yield assert(list)(equalTo("Hello" :: "World" :: Nil)) }, testM("gathering write") { for { file <- ZIO.effect(new File("nio/src/test/resources/gathering_write_test.txt")) raf = new RandomAccessFile(file, "rw") fileChannel = raf.getChannel buffs <- IO.collectAll( Seq( Buffer.byte(Chunk.fromArray("Hello".getBytes)), Buffer.byte(Chunk.fromArray("World".getBytes)) ) ) _ <- FileChannel(fileChannel).use(_.writeBuffer(buffs).unit) result = Source.fromFile(file).getLines().toSeq _ = file.delete() } yield assert(result)(equalTo(Seq("HelloWorld"))) } ) }
Example 188
Source File: GlobalConfig.scala From sbt-api-builder with MIT License | 5 votes |
package apibuilder.sbt import java.io.File import sbt.IO import scala.util.Try final case class GlobalConfig(profiles: Map[String, Profile] = Map.empty) extends AnyVal { override def toString: String = profiles.keys.mkString(", ") } final case class Profile(token: String) extends AnyVal object GlobalConfig { private val ProfileM = "^\\s*\\[\\s*(profile\\s+|)(\\w+)\\s*\\]\\s*$".r private val TokenM = "^\\s*token\\s*=\\s*(\\w+)$".r private[this] implicit final class Ext(val acc: List[(String, Option[Profile])]) extends AnyVal { def hasNotSeen(pn: String): Boolean = !acc.exists { case (pn0, _) => pn0 == pn } } def load(f: File): Either[Throwable, GlobalConfig] = Try { IO.reader(f) { r => GlobalConfig( IO.foldLines(r, List.empty[(String, Option[Profile])]) { case (acc, ProfileM(_, pn)) if acc.hasNotSeen(pn) => (pn -> None) :: acc case ((cpn, None) :: rest, TokenM(t)) => (cpn -> Some(Profile(t))) :: rest case (acc, _) => acc } .collect { case (profile, Some(config)) => profile -> config } .toMap ) } }.toEither }
Example 189
Source File: CLIConfig.scala From sbt-api-builder with MIT License | 5 votes |
package apibuilder.sbt import java.io.{File, FileNotFoundException} import java.nio.file.{Path, PathMatcher} import io.circe.Decoder import io.circe.yaml.parser import sbt.IO final case class CLIConfig(organizationFor: Map[String, OrganizationConfig]) extends AnyVal final case class OrganizationConfig(applicationFor: Map[String, ApplicationConfig]) extends AnyVal final case class ApplicationConfig(version: String, generators: Seq[GeneratorConfig]) final case class GeneratorConfig(generator: String, maybeTargetPath: Option[Path], pathMatchers: Seq[PathMatcher]) object CLIConfig extends BaseDecoders { final def load(f: File): Either[ConfigException, CLIConfig] = if (!f.getParentFile.exists) Left(MissingParentDirectory(f)) else { try { IO.reader(f) { r => parser .parse(r) .left .map(pf => InvalidContent(pf.message)) .flatMap(_.as[CLIConfig].left.map(df => InvalidContent(df.message))) } } catch { case _: FileNotFoundException => Left(MissingFile(f)) } } implicit final val cliConfigDecoder: Decoder[CLIConfig] = Decoder.instance { c => c.downField("code").as[Map[String, OrganizationConfig]].map(CLIConfig.apply) } implicit final val organizationConfigDecoder: Decoder[OrganizationConfig] = Decoder.instance { c => c.value.as[Map[String, ApplicationConfig]].map(OrganizationConfig.apply) } implicit final val applicationConfig: Decoder[ApplicationConfig] = Decoder.instance { c => for { version <- c.downField("version").as[String] generators <- c.downField("generators").as[Seq[GeneratorConfig]] } yield ApplicationConfig(version, generators) } implicit final val generatorConfigDecoder: Decoder[GeneratorConfig] = Decoder.instance { c => for { generator <- c.downField("generator").as[String] maybeTargetPath <- c.downField("target").as[Option[Path]] pathMatchers <- c.downField("files").as[Seq[PathMatcher]] } yield GeneratorConfig(generator, maybeTargetPath, pathMatchers) } }
Example 190
Source File: HLSTools.scala From fpga-tidbits with BSD 2-Clause "Simplified" License | 5 votes |
package fpgatidbits.hlstools import sys.process._ import java.io.File // Collection of utilities for Vivado HLS object TidbitsHLSTools { // quick-and-dirty single file HLS synthesis def hlsToVerilog( inFile: String, outDir: String, synDir: String, projName: String, topFxnName: String, inclDirs: Seq[String] = Seq(), fpgaPart: String = "xc7z020clg400-1", nsClk: String = "5.0" ) = { // get path to hls_syn.tcl val synthScriptPath = getClass.getResource("/script/hls_syn.tcl").getPath // need to provide include dirs as a single string argument, parsing // done in tcl. note: dirs here should have no spaces! val inclDirString = inclDirs.mkString(" ") // call the actual synthesis script val cmdline = Seq( "vivado_hls", "-f", synthScriptPath, "-tclargs", projName, inFile, fpgaPart, nsClk, topFxnName, inclDirString ) val status = Process(cmdline, new File(synDir)) ! ProcessLogger(stdout append _+"\n", stderr append _+"\n") // copy results to outDir s"cp -a $synDir/$projName/sol1/impl/verilog/. $outDir/".!! } }
Example 191
Source File: License.scala From iep-apps with Apache License 2.0 | 5 votes |
import java.io.File import java.io.PrintStream import java.time.ZonedDateTime import java.time.ZoneOffset import scala.io.Source import sbt._ """.stripMargin.trim def findFiles(dir: File): Seq[File] = { (dir ** "*.scala").get ++ (dir ** "*.java").get } def checkLicenseHeaders(log: Logger, srcDir: File): Unit = { val badFiles = findFiles(srcDir).filterNot(checkLicenseHeader) if (badFiles.nonEmpty) { badFiles.foreach { f => log.error(s"bad license header: $f") } sys.error(s"${badFiles.size} files with incorrect header, run formatLicenseHeaders to fix") } else { log.info("all files have correct license header") } } def checkLicenseHeader(file: File): Boolean = { val lines = Source.fromFile(file, "UTF-8").getLines().toList checkLicenseHeader(lines) } def checkLicenseHeader(lines: List[String]): Boolean = { val header = lines.takeWhile(!_.startsWith("package ")).mkString(lineSeparator) header == apache2 } def formatLicenseHeaders(log: Logger, srcDir: File): Unit = { findFiles(srcDir).foreach { f => formatLicenseHeader(log, f) } } def formatLicenseHeader(log: Logger, file: File): Unit = { val lines = Source.fromFile(file, "UTF-8").getLines().toList if (!checkLicenseHeader(lines)) { log.info(s"fixing license header: $file") writeLines(file, apache2 :: removeExistingHeader(lines)) } } def removeExistingHeader(lines: List[String]): List[String] = { val res = lines.dropWhile(!_.startsWith("package ")) if (res.isEmpty) lines else res } def writeLines(file: File, lines: List[String]): Unit = { val out = new PrintStream(file) try lines.foreach(out.println) finally out.close() } }
Example 192
Source File: S3CopyService.scala From iep-apps with Apache License 2.0 | 5 votes |
package com.netflix.atlas.persistence import java.io.File import java.nio.file.Files import java.nio.file.Paths import akka.NotUsed import akka.actor.ActorSystem import akka.stream.ActorMaterializer import akka.stream.KillSwitch import akka.stream.KillSwitches import akka.stream.scaladsl.Keep import akka.stream.scaladsl.Source import com.netflix.atlas.core.util.Streams import com.netflix.iep.service.AbstractService import com.netflix.spectator.api.Registry import com.typesafe.config.Config import com.typesafe.scalalogging.StrictLogging import javax.inject.Inject import javax.inject.Singleton import scala.concurrent.duration._ @Singleton class S3CopyService @Inject()( val config: Config, val registry: Registry, implicit val system: ActorSystem ) extends AbstractService with StrictLogging { private val dataDir = config.getString("atlas.persistence.local-file.data-dir") private implicit val mat = ActorMaterializer() private var killSwitch: KillSwitch = _ private val s3Config = config.getConfig("atlas.persistence.s3") private val cleanupTimeoutMs = s3Config.getDuration("cleanup-timeout").toMillis private val maxInactiveMs = s3Config.getDuration("max-inactive-duration").toMillis private val maxFileDurationMs = config.getDuration("atlas.persistence.local-file.max-duration").toMillis require( maxInactiveMs > maxFileDurationMs, "`max-inactive-duration` MUST be longer than `max-duration`, otherwise file may be renamed before normal write competes" ) override def startImpl(): Unit = { logger.info("Starting service") killSwitch = Source .tick(1.second, 5.seconds, NotUsed) .viaMat(KillSwitches.single)(Keep.right) .flatMapMerge(Int.MaxValue, _ => Source(FileUtil.listFiles(new File(dataDir)))) .toMat(new S3CopySink(s3Config, registry, system))(Keep.left) .run() } override def stopImpl(): Unit = { logger.info("Stopping service") waitForCleanup() if (killSwitch != null) killSwitch.shutdown() } private def waitForCleanup(): Unit = { logger.info("Waiting for cleanup") val start = System.currentTimeMillis while (hasMoreFiles) { if (System.currentTimeMillis() > start + cleanupTimeoutMs) { logger.error("Cleanup timeout") return } Thread.sleep(1000) } logger.info("Cleanup done") } private def hasMoreFiles: Boolean = { try { Streams.scope(Files.list(Paths.get(dataDir))) { dir => dir.anyMatch(f => Files.isRegularFile(f)) } } catch { case e: Exception => { logger.error(s"Error checking hasMoreFiles in $dataDir", e) true // Assuming there's more files on error to retry } } } }
Example 193
Source File: FileUtil.scala From iep-apps with Apache License 2.0 | 5 votes |
package com.netflix.atlas.persistence import java.io.File import java.nio.file.Files import com.netflix.atlas.core.util.Streams import com.typesafe.scalalogging.StrictLogging import scala.jdk.StreamConverters._ object FileUtil extends StrictLogging { def delete(f: File): Unit = { try { Files.delete(f.toPath) logger.debug(s"deleted file $f") } catch { case e: Exception => logger.error(s"failed to delete path $f", e) } } def listFiles(f: File): List[File] = { try { Streams.scope(Files.list(f.toPath)) { dir => dir.toScala(List).map(_.toFile) } } catch { case e: Exception => logger.error(s"failed to list files for: $f", e) Nil } } def isTmpFile(f: File): Boolean = { f.getName.endsWith(RollingFileWriter.TmpFileSuffix) } }
Example 194
Source File: AvroTest.scala From iep-apps with Apache License 2.0 | 5 votes |
package com.netflix.atlas.persistence import java.io.File import java.nio.file.Files import java.nio.file.Paths import org.apache.avro.file.DataFileReader import org.apache.avro.specific.SpecificDatumReader // Read metadata for all avro files in given directory object AvroTest { def main(args: Array[String]): Unit = { val dir = args(0) Files .walk(Paths.get(dir)) .filter(path => Files.isRegularFile(path)) .forEach(p => readFile(p.toFile)) } private def readFile(file: File): Unit = { println(s"##### Reading file: $file") var count = 0 val userDatumReader = new SpecificDatumReader[AvroDatapoint](classOf[AvroDatapoint]) val dataFileReader = new DataFileReader[AvroDatapoint](file, userDatumReader) while (dataFileReader.hasNext) { dataFileReader.next() count += 1 } println(s" blockCount = ${dataFileReader.getBlockCount}") println(s" blockSize = ${dataFileReader.getBlockSize}") println(s" numRecords = $count") dataFileReader.close() println } }
Example 195
Source File: RollingFileWriterSuite.scala From iep-apps with Apache License 2.0 | 5 votes |
package com.netflix.atlas.persistence import java.io.File import java.nio.file.Files import java.nio.file.Paths import com.netflix.atlas.core.model.Datapoint import com.netflix.spectator.api.NoopRegistry import org.apache.avro.file.DataFileReader import org.apache.avro.specific.SpecificDatumReader import org.scalatest.BeforeAndAfter import org.scalatest.BeforeAndAfterAll import org.scalatest.funsuite.AnyFunSuite import scala.collection.mutable.ListBuffer class RollingFileWriterSuite extends AnyFunSuite with BeforeAndAfter with BeforeAndAfterAll { private val outputDir = "./target/unitTestAvroOutput" private val registry = new NoopRegistry before { listFilesSorted(outputDir).foreach(_.delete()) // Clean up files if exits Files.createDirectories(Paths.get(outputDir)) } after { listFilesSorted(outputDir).foreach(_.delete()) Files.deleteIfExists(Paths.get(outputDir)) } // Write 3 datapoints, first 2 is written in file 1, rollover, and 3rd one is written in file 2 test("avro writer rollover by max records") { val rollingConf = RollingConfig(2, 12000, 12000) val hourStart = 3600000 val hourEnd = 7200000 val writer = new RollingFileWriter(s"$outputDir/prefix", rollingConf, hourStart, hourEnd, registry) writer.initialize() createData(hourStart, 0, 1, 2).foreach(writer.write) writer.write(Datapoint(Map.empty, hourEnd, 3)) // out of range, should be ignored writer.close() // Check num of files val files = listFilesSorted(outputDir) assert(files.size == 2) // Check file 1 records val file1 = files.head assert(file1.getName.endsWith(".0000-0001")) val dpArray1 = readAvro(file1) assert(dpArray1.size == 2) assert(dpArray1(0).getValue == 0) assert(dpArray1(0).getTags.get("node") == "0") assert(dpArray1(1).getValue == 1) assert(dpArray1(1).getTags.get("node") == "1") // Check file 2 records val file2 = files.last assert(file2.getName.endsWith(".0002-0002")) val dpArray2 = readAvro(file2) assert(dpArray2.size == 1) assert(dpArray2(0).getValue == 2) assert(dpArray2(0).getTags.get("node") == "2") } private def createData(startTime: Long, values: Double*): List[Datapoint] = { values.toList.zipWithIndex.map { case (v, i) => val tags = Map( "name" -> "cpu", "node" -> s"$i" ) Datapoint(tags, startTime + i * 1000, v, 60000) } } private def listFilesSorted(dir: String): List[File] = { val d = new File(dir) if (!d.exists()) { Nil } else { new File(dir).listFiles().filter(_.isFile).toList.sortBy(_.getName) } } private def readAvro(file: File): Array[AvroDatapoint] = { val userDatumReader = new SpecificDatumReader[AvroDatapoint](classOf[AvroDatapoint]) val dataFileReader = new DataFileReader[AvroDatapoint](file, userDatumReader) val dpListBuf = ListBuffer.empty[AvroDatapoint] try { while (dataFileReader.hasNext) { dpListBuf.addOne(dataFileReader.next) } } finally { dataFileReader.close() } dpListBuf.toArray } }
Example 196
Source File: Resources.scala From MoVE with Mozilla Public License 2.0 | 5 votes |
import sbt._ import java.io.FileNotFoundException import java.io.File object Resources { def getJavaHome: File = { val javaHome = Option(System.getenv("JAVA_HOME")).map(_+"/jre"). orElse(Option(System.getProperty("java.home"))) javaHome match { case Some(str) => file(str) case None => throw new FileNotFoundException("$JAVA_HOME is undefined as well as the system property `java.home`." + "Setup a environment variable JAVA_HOME") } } def checkExists(file:File): File = { if(file.exists()) file else throw new FileNotFoundException(s"Can't find needed resource: $file") } }
Example 197
Source File: VwSparseMultilabelPredictorTest.scala From aloha with MIT License | 5 votes |
package com.eharmony.aloha.models.vw.jni.multilabel import java.io.{ByteArrayOutputStream, File, FileInputStream} import com.eharmony.aloha.ModelSerializationTestHelper import com.eharmony.aloha.io.sources.{Base64StringSource, ExternalSource, ModelSource} import org.apache.commons.codec.binary.Base64 import org.apache.commons.io.IOUtils import org.junit.Assert._ import org.junit.Test import org.junit.runner.RunWith import org.junit.runners.BlockJUnit4ClassRunner import vowpalWabbit.learner.{VWActionScoresLearner, VWLearners} @RunWith(classOf[BlockJUnit4ClassRunner]) class VwSparseMultilabelPredictorTest extends ModelSerializationTestHelper { import VwSparseMultilabelPredictorTest._ @Test def testSerializability(): Unit = { val predictor = getPredictor(getModelSource(), 3) val ds = serializeDeserializeRoundTrip(predictor) assertEquals(predictor, ds) assertEquals(predictor.vwParams(), ds.vwParams()) assertNotNull(ds.vwModel) } @Test def testVwParameters(): Unit = { val numLabelsInTrainingSet = 3 val predictor = getPredictor(getModelSource(), numLabelsInTrainingSet) predictor.vwParams() match { case Data(vwBinFilePath, ringSize) => checkVwBinFile(vwBinFilePath) checkVwRingSize(numLabelsInTrainingSet, ringSize.toInt) case ps => fail(s"Unexpected VW parameters format. Found string: $ps") } } } object VwSparseMultilabelPredictorTest { private val Data = """\s*-i\s+(\S+)\s+--ring_size\s+(\d+)\s+--testonly\s+--quiet""".r private def getModelSource(): ModelSource = { val f = File.createTempFile("i_dont", "care") f.deleteOnExit() val learner = VWLearners.create[VWActionScoresLearner](s"--quiet --csoaa_ldf mc --csoaa_rank -f ${f.getCanonicalPath}") learner.close() val baos = new ByteArrayOutputStream() IOUtils.copy(new FileInputStream(f), baos) val src = Base64StringSource(Base64.encodeBase64URLSafeString(baos.toByteArray)) ExternalSource(src.localVfs) } private def getPredictor(modelSrc: ModelSource, numLabelsInTrainingSet: Int) = VwSparseMultilabelPredictor[Any](modelSrc, Nil, Nil, numLabelsInTrainingSet) private def checkVwBinFile(vwBinFilePath: String): Unit = { val vwBinFile = new File(vwBinFilePath) assertTrue("VW binary file should have been written to disk", vwBinFile.exists()) vwBinFile.deleteOnExit() } private def checkVwRingSize(numLabelsInTrainingSet: Int, ringSize: Int): Unit = { assertEquals( "vw --ring_size parameter is incorrect:", numLabelsInTrainingSet + VwSparseMultilabelPredictor.AddlVwRingSize, ringSize.toInt ) } }
Example 198
Source File: StdAvroModelFactory.scala From aloha with MIT License | 5 votes |
package com.eharmony.aloha.factory.avro import java.io.File import org.apache.commons.{vfs => vfs1, vfs2} import com.eharmony.aloha.io.vfs.{Vfs1, Vfs2} import com.eharmony.aloha.audit.impl.avro.Score import com.eharmony.aloha.factory.ModelFactory import org.apache.avro.generic.GenericRecord import scala.util.Try @deprecated(message = "Prefer StdAvroModelFactory.fromConfig(conf: FactoryConfig)", since = "4.0.1") def apply(modelDomainSchemaVfsUrl: String, modelCodomainRefInfoStr: String, imports: Seq[String] = Nil, classCacheDir: Option[File] = None, dereferenceAsOptional: Boolean = true, useVfs2: Boolean = true): Try[ModelFactory[GenericRecord, Score]] = { val vfs = url(modelDomainSchemaVfsUrl, useVfs2) vfs.flatMap { u => UrlConfig( u, modelCodomainRefInfoStr, imports, classCacheDir, dereferenceAsOptional )() } } private[this] def url(modelDomainSchemaVfsUrl: String, useVfs2: Boolean) = { val u = if (useVfs2) Try { Vfs2(vfs2.VFS.getManager.resolveFile(modelDomainSchemaVfsUrl)) } else Try { Vfs1(vfs1.VFS.getManager.resolveFile(modelDomainSchemaVfsUrl)) } FactoryConfig.wrapException(u) } }
Example 199
Source File: modelFactoryPlaceholder.scala From aloha with MIT License | 5 votes |
package com.eharmony.aloha.factory import java.io.File import com.eharmony.aloha.factory.ex.AlohaFactoryException import com.eharmony.aloha.io.StringReadable import org.apache.commons.{vfs, vfs2} import spray.json.{JsObject, pimpString} import scala.util.{Failure, Try} def resolveFileContents(): Try[JsObject] } private[factory] case class Vfs2ImportedModelPlaceholder(fileDescriptor: String) extends ImportedModelPlaceholder { def resolveFileContents() = for { file <- Try { vfs2.VFS.getManager.resolveFile(fileDescriptor) } recoverWith { case f => Failure { new AlohaFactoryException(s"Couldn't resolve VFS2 file: $fileDescriptor", f) } } json <- Try { StringReadable.fromVfs2(file).parseJson.asJsObject } recoverWith { case f => Failure { new AlohaFactoryException(s"Couldn't get JSON for VFS2 file: $file", f) } } } yield json } private[factory] case class Vfs1ImportedModelPlaceholder(fileDescriptor: String) extends ImportedModelPlaceholder { def resolveFileContents() = for { file <- Try { vfs.VFS.getManager.resolveFile(fileDescriptor) } recoverWith { case f => Failure { new AlohaFactoryException(s"Couldn't resolve VFS1 file: $fileDescriptor", f) } } json <- Try { StringReadable.fromVfs1(file).parseJson.asJsObject } recoverWith { case f => Failure { new AlohaFactoryException(s"Couldn't get JSON for VFS1 file: $file", f) } } } yield json } private[factory] case class FileImportedModelPlaceholder(fileDescriptor: String) extends ImportedModelPlaceholder { def resolveFileContents() = for { file <- Try { new File(fileDescriptor) } recoverWith { case f => Failure { new AlohaFactoryException(s"Couldn't resolve file: $fileDescriptor", f) } } json <- Try { StringReadable.fromFile(file).parseJson.asJsObject } recoverWith { case f => Failure { new AlohaFactoryException(s"Couldn't get JSON for file: $file", f) } } } yield json }
Example 200
Source File: ContainerReadable.scala From aloha with MIT License | 5 votes |
package com.eharmony.aloha.io import scala.language.higherKinds import java.io.{File, InputStream, Reader} import java.net.URL import org.apache.commons.{vfs => vfs1, vfs2} trait ContainerReadable[C[_]] { def fromString[A](s: String): C[A] def fromFile[A](f: File): C[A] def fromInputStream[A](is: InputStream): C[A] def fromUrl[A](u: URL): C[A] def fromReader[A](r: Reader): C[A] def fromVfs1[A](foVfs1: vfs1.FileObject): C[A] def fromVfs2[A](foVfs2: vfs2.FileObject): C[A] def fromResource[A](s: String): C[A] def fromClasspathResource[A](s: String): C[A] }