scala.io.Source Scala Examples
The following examples show how to use scala.io.Source.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: WortschatzParser.scala From dbpedia-spotlight-model with Apache License 2.0 | 7 votes |
package org.dbpedia.spotlight.io import com.officedepot.cdap2.collection.CompactHashSet import org.apache.log4j.Logger import scala.io.Source object WortschatzParser { val LOG = Logger.getLogger(this.getClass) def parse(filename: String) : CompactHashSet[String] = { parse(filename, count => true); } def parse(filename: String, minimumCount: Int) : CompactHashSet[String] = { parse(filename, count => (count > minimumCount) ) } def parse(filename: String, minimumCount: Int, maximumCount: Int) : CompactHashSet[String] = { parse(filename, count => (count > minimumCount) && (count < maximumCount)) } def parse(filename: String, condition: Int => Boolean) : CompactHashSet[String] = { LOG.info(" parsing common words file ") // get lines, split in three fields, get the middle one (word) val commonWords = new CompactHashSet[String](); val log = Source.fromFile(filename, "iso-8859-1").getLines.foreach(line => { if (line.trim()!="") { val fields = line.split("\\s") if (condition(fields(2).toInt)) commonWords.add(fields(1)) } }); commonWords } }
Example 2
Source File: Banner.scala From daml with Apache License 2.0 | 6 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.platform.sandbox.banner import java.io.PrintStream import scala.io.Source object Banner { def show(out: PrintStream): Unit = { val resourceName = "banner.txt" if (getClass.getClassLoader.getResource(resourceName) != null) out.println( Source .fromResource(resourceName) .getLines .mkString("\n")) else out.println("Banner resource missing from classpath.") } }
Example 3
Source File: ComponentsFixture.scala From daml with Apache License 2.0 | 6 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.navigator.test import java.util.concurrent.atomic.AtomicReference import com.daml.navigator.test.config.Arguments import com.daml.navigator.test.runner.{HeadNavigator, PackagedDamlc, PackagedSandbox} import com.typesafe.scalalogging.LazyLogging import scala.io.Source import scala.util.{Failure, Success, Try} class ComponentsFixture( val args: Arguments, val navigatorPort: Int, val sandboxPort: Int, val scenario: String ) extends LazyLogging { // A list of commands on how to destroy started processes private val killProcs: AtomicReference[List[Unit => Unit]] = new AtomicReference(List.empty) private val onlineUrl = s"http://localhost:$navigatorPort/api/about" private def get( url: String, connectTimeout: Int = 1000, readTimeout: Int = 1000, requestMethod: String = "GET" ): String = { import java.net.{URL, HttpURLConnection} val connection = (new URL(url)).openConnection.asInstanceOf[HttpURLConnection] connection.setConnectTimeout(connectTimeout) connection.setReadTimeout(readTimeout) connection.setRequestMethod(requestMethod) val inputStream = connection.getInputStream val content = Source.fromInputStream(inputStream).mkString if (inputStream != null) inputStream.close() content } def startup(): Try[Unit] = { if (args.startComponents) { logger.info("Starting the sandbox and the Navigator") for { (darFile, tempFiles) <- Try(PackagedDamlc.run(args.damlPath)) sandbox <- Try(PackagedSandbox.runAsync(sandboxPort, darFile, scenario)) _ = killProcs.updateAndGet(s => sandbox :: s) navigator <- Try( HeadNavigator.runAsync(args.navConfPAth, args.navigatorDir, navigatorPort, sandboxPort)) _ = killProcs.updateAndGet(s => navigator :: s) } yield { () } } else { Success(()) } } private def retry[R](action: => R, maxRetries: Int, delayMillis: Int): Try[R] = { def retry0(count: Int): Try[R] = { Try(action) match { case Success(r) => Success(r) case Failure(e) => if (count > maxRetries) { logger.error( s"Navigator is not available after $maxRetries retries with $delayMillis millis interval.") Failure(e) } else { logger.info(s"Navigator is not available yet, waiting $delayMillis millis ") Thread.sleep(delayMillis.toLong) retry0(count + 1) } } } retry0(0) } def waitForNavigator(): Try[Unit] = { logger.info(s"Waiting for the Navigator to start up (waiting for $onlineUrl)") retry({ get(onlineUrl); () }, 120, 1000) } def shutdown(): Unit = { killProcs.getAndUpdate(procs => { procs.foreach(killAction => Try { killAction(()) }) List.empty }) () } }
Example 4
Source File: FundamentalsParser.scala From YahooFinanceScala with MIT License | 6 votes |
package openquant.yahoofinance.impl import java.time.format.DateTimeFormatter import java.time.{LocalDate, ZoneId, ZonedDateTime} import com.github.tototoshi.csv._ import openquant.yahoofinance.Fundamentals import scala.io.Source object FundamentalsParser extends Function1[String, Vector[Fundamentals]] { def apply(content: String): Vector[Fundamentals] = { val csvReader = CSVReader.open(Source.fromString(content)) val fundamentals: Vector[Fundamentals] = csvReader.toStream.map { fields ⇒ parseCSVLine(fields.toVector) }.toVector fundamentals } private def parseCSVLine(field: Vector[String]): Fundamentals = { require(field.length >= 2, "number of fields") val name = field(1) if (name == "N/A") Fundamentals( looksValid = false, symbol = field(0), name = name ) else Fundamentals( looksValid = true, symbol = field(0), name = name ) } }
Example 5
Source File: package.scala From mantis with Apache License 2.0 | 6 votes |
package io.iohk.ethereum import java.io.{File, PrintWriter} import java.net.{Inet6Address, InetAddress} import java.security.SecureRandom import io.iohk.ethereum.crypto._ import org.spongycastle.crypto.AsymmetricCipherKeyPair import org.spongycastle.crypto.params.ECPublicKeyParameters import org.spongycastle.math.ec.ECPoint import org.spongycastle.util.encoders.Hex import scala.io.Source package object network { val ProtocolVersion = 4 implicit class ECPublicKeyParametersNodeId(val pubKey: ECPublicKeyParameters) extends AnyVal { def toNodeId: Array[Byte] = pubKey.asInstanceOf[ECPublicKeyParameters].getQ .getEncoded(false) .drop(1) // drop type info } def publicKeyFromNodeId(nodeId: String): ECPoint = { val bytes = ECDSASignature.uncompressedIndicator +: Hex.decode(nodeId) curve.getCurve.decodePoint(bytes) } def loadAsymmetricCipherKeyPair(filePath: String, secureRandom: SecureRandom): AsymmetricCipherKeyPair = { val file = new File(filePath) if(!file.exists()){ val keysValuePair = generateKeyPair(secureRandom) //Write keys to file val (priv, _) = keyPairToByteArrays(keysValuePair) require(file.getParentFile.exists() || file.getParentFile.mkdirs(), "Key's file parent directory creation failed") val writer = new PrintWriter(filePath) try { writer.write(Hex.toHexString(priv)) } finally { writer.close() } keysValuePair } else { val reader = Source.fromFile(filePath) try { val privHex = reader.mkString keyPairFromPrvKey(Hex.decode(privHex)) } finally { reader.close() } } } def getHostName(address: InetAddress): String = { val hostName = address.getHostAddress address match { case _: Inet6Address => s"[$hostName]" case _ => hostName } } }
Example 6
Source File: TFIDF.scala From AI with Apache License 2.0 | 6 votes |
package com.bigchange.mllib import org.apache.spark.mllib.feature.{HashingTF, IDF} import org.apache.spark.mllib.linalg.{SparseVector => SV} import org.apache.spark.{SparkConf, SparkContext} import scala.io.Source object TFIDF { def main(args: Array[String]) { val conf = new SparkConf().setAppName("TfIdfTest") .setMaster("local") val sc = new SparkContext(conf) // Load documents (one per line).要求每行作为一个document,这里zipWithIndex将每一行的行号作为doc id val documents = sc.parallelize(Source.fromFile("J:\\github\\dataSet\\TFIDF-DOC").getLines() .filter(_.trim.length > 0).toSeq) .map(_.split(" ").toSeq) .zipWithIndex() // feature number val hashingTF = new HashingTF(Math.pow(2, 18).toInt) //line number for doc id,每一行的分词结果生成tf vector val idAndTFVector = documents.map { case (seq, num) => val tf = hashingTF.transform(seq) (num + 1, tf) } idAndTFVector.cache() // build idf model val idf = new IDF().fit(idAndTFVector.values) // transform tf vector to tf-idf vector val idAndTFIDFVector = idAndTFVector.mapValues(v => idf.transform(v)) // broadcast tf-idf vectors val idAndTFIDFVectorBroadCast = sc.broadcast(idAndTFIDFVector.collect()) // cal doc cosineSimilarity val docSims = idAndTFIDFVector.flatMap { case (id1, idf1) => // filter the same doc id val idfs = idAndTFIDFVectorBroadCast.value.filter(_._1 != id1) val sv1 = idf1.asInstanceOf[SV] import breeze.linalg._ val bsv1 = new SparseVector[Double](sv1.indices, sv1.values, sv1.size) idfs.map { case (id2, idf2) => val sv2 = idf2.asInstanceOf[SV] val bsv2 = new SparseVector[Double](sv2.indices, sv2.values, sv2.size) val cosSim = bsv1.dot(bsv2) / (norm(bsv1) * norm(bsv2)) (id1, id2, cosSim) } } docSims.foreach(println) sc.stop() } }
Example 7
Source File: HttpUtil.scala From sparta with Apache License 2.0 | 6 votes |
package com.stratio.benchmark.generator.utils import org.apache.http.HttpStatus import org.apache.http.client.methods.{HttpDelete, HttpGet, HttpPost, HttpPut} import org.apache.http.entity.StringEntity import org.apache.http.impl.client.HttpClientBuilder import org.apache.http.util.EntityUtils import org.apache.log4j.Logger import org.json4s.DefaultFormats import org.json4s.native.JsonMethods._ import scala.io.Source trait HttpUtil { private val logger = Logger.getLogger(this.getClass) def createPolicy(policyContent: String, endpoint: String)(implicit defaultFormats: DefaultFormats): String = { val policyName = (parse(policyContent) \ "name").extract[String] // If the policy exists when it launches the benchmark, it should stop and delete it. getPolicyId(policyName, endpoint) match { case Some(id) => stopPolicy(id, endpoint) deletePolicy(id, endpoint) case None => logger.debug(s"No policy with name $policyName exists in Sparta yet.") } val client = HttpClientBuilder.create().build() val post = new HttpPost(s"$endpoint/policyContext") post.setHeader("Content-type", "application/json") post.setEntity(new StringEntity(policyContent)) val response = client.execute(post) if(response.getStatusLine.getStatusCode != HttpStatus.SC_OK) throw new IllegalStateException(s"Sparta status code is not OK: ${response.getStatusLine.getStatusCode}") else { val entity = response.getEntity val policyId = (parse(EntityUtils.toString(entity)) \ "policyId").extract[String] policyId } } def getPolicyId(name: String, endpoint: String)(implicit defaultFormats: DefaultFormats): Option[String] = { val client = HttpClientBuilder.create().build() val get = new HttpGet(s"$endpoint/policy/findByName/$name") val response = client.execute(get) response.getStatusLine.getStatusCode match { case HttpStatus.SC_OK => Option((parse(EntityUtils.toString(response.getEntity)) \ "id").extract[String]) case _ => None } } def stopPolicy(id: String, endpoint: String): Unit = { val client = HttpClientBuilder.create().build() val put = new HttpPut(s"$endpoint/policyContext") put.setHeader("Content-Type", "application/json") val entity = new StringEntity(s"""{"id":"$id", "status":"Stopping"}""") put.setEntity(entity) val response = client.execute(put) if(response.getStatusLine.getStatusCode != HttpStatus.SC_CREATED) { logger.info(Source.fromInputStream(response.getEntity.getContent).mkString("")) logger.info(s"Sparta status code is not OK: ${response.getStatusLine.getStatusCode}") } } def deletePolicy(id: String, endpoint: String): Unit = { val client = HttpClientBuilder.create().build() val delete = new HttpDelete(s"$endpoint/policy/$id") val response = client.execute(delete) if(response.getStatusLine.getStatusCode != HttpStatus.SC_OK) logger.info(s"Sparta status code is not OK: ${response.getStatusLine.getStatusCode}") } }
Example 8
Source File: GraphQLSchemaSpec.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.navigator.graphql import org.scalatest.{Matchers, WordSpec} import sangria.parser.QueryParser import sangria.schema.SchemaChange.DescriptionChange import sangria.schema.Schema import scala.io.Source class GraphQLSchemaSpec extends WordSpec with Matchers { "The rendered schema" should { "match the expected schema definition" in { val idl = Source.fromInputStream(getClass.getResourceAsStream("/schema.graphql"), "UTF-8").mkString val schema = Schema.buildFromAst(QueryParser.parse(idl).get) // Compare schemata but ignore description changes. val changes = schema .compare(new GraphQLSchema(Set()).QuerySchema) .filter(!_.isInstanceOf[DescriptionChange]) if (changes.nonEmpty) { fail( s"Schema definition does not match:\n- ${changes.map(_.description).mkString("\n- ")}\n") } } } }
Example 9
Source File: EquityData.scala From Scala-Programming-Projects with MIT License | 5 votes |
package retcalc import scala.io.Source case class EquityData(monthId: String, value: Double, annualDividend: Double) { val monthlyDividend: Double = annualDividend / 12 } object EquityData { def fromResource(resource: String): Vector[EquityData] = Source.fromResource(resource).getLines().drop(1).map { line => val fields = line.split("\t") EquityData( monthId = fields(0), value = fields(1).toDouble, annualDividend = fields(2).toDouble) }.toVector }
Example 10
Source File: EquityData.scala From Scala-Programming-Projects with MIT License | 5 votes |
package retcalc import scala.io.Source case class EquityData(monthId: String, value: Double, annualDividend: Double) { val monthlyDividend: Double = annualDividend / 12 } object EquityData { def fromResource(resource: String): Vector[EquityData] = Source.fromResource(resource).getLines().drop(1).map { line => val fields = line.split("\t") EquityData( monthId = fields(0), value = fields(1).toDouble, annualDividend = fields(2).toDouble) }.toVector }
Example 11
Source File: RawTextSender.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.util import java.io.{ByteArrayOutputStream, IOException} import java.net.ServerSocket import java.nio.ByteBuffer import scala.io.Source import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.serializer.KryoSerializer import org.apache.spark.util.IntParam private[streaming] object RawTextSender extends Logging { def main(args: Array[String]) { if (args.length != 4) { // scalastyle:off println System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>") // scalastyle:on println System.exit(1) } // Parse the arguments using a pattern match val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args // Repeat the input data multiple times to fill in a buffer val lines = Source.fromFile(file).getLines().toArray val bufferStream = new ByteArrayOutputStream(blockSize + 1000) val ser = new KryoSerializer(new SparkConf()).newInstance() val serStream = ser.serializeStream(bufferStream) var i = 0 while (bufferStream.size < blockSize) { serStream.writeObject(lines(i)) i = (i + 1) % lines.length } val array = bufferStream.toByteArray val countBuf = ByteBuffer.wrap(new Array[Byte](4)) countBuf.putInt(array.length) countBuf.flip() val serverSocket = new ServerSocket(port) logInfo("Listening on port " + port) while (true) { val socket = serverSocket.accept() logInfo("Got a new connection") val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec) try { while (true) { out.write(countBuf.array) out.write(array) } } catch { case e: IOException => logError("Client disconnected") } finally { socket.close() } } } }
Example 12
Source File: ReplayListenerBus.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.{InputStream, IOException} import scala.io.Source import com.fasterxml.jackson.core.JsonParseException import org.json4s.jackson.JsonMethods._ import org.apache.spark.internal.Logging import org.apache.spark.scheduler.ReplayListenerBus._ import org.apache.spark.util.JsonProtocol def replay( logData: InputStream, sourceName: String, maybeTruncated: Boolean = false, eventsFilter: ReplayEventsFilter = SELECT_ALL_FILTER): Unit = { var currentLine: String = null var lineNumber: Int = 0 try { val lineEntries = Source.fromInputStream(logData) .getLines() .zipWithIndex .filter { case (line, _) => eventsFilter(line) } while (lineEntries.hasNext) { try { val entry = lineEntries.next() currentLine = entry._1 lineNumber = entry._2 + 1 postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine))) } catch { case jpe: JsonParseException => // We can only ignore exception from last line of the file that might be truncated // the last entry may not be the very last line in the event log, but we treat it // as such in a best effort to replay the given input if (!maybeTruncated || lineEntries.hasNext) { throw jpe } else { logWarning(s"Got JsonParseException from log file $sourceName" + s" at line $lineNumber, the file might not have finished writing cleanly.") } } } } catch { case ioe: IOException => throw ioe case e: Exception => logError(s"Exception parsing Spark event log: $sourceName", e) logError(s"Malformed line #$lineNumber: $currentLine\n") } } } private[spark] object ReplayListenerBus { type ReplayEventsFilter = (String) => Boolean // utility filter that selects all event logs during replay val SELECT_ALL_FILTER: ReplayEventsFilter = { (eventString: String) => true } }
Example 13
Source File: PythonBroadcastSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.api.python import java.io.{File, PrintWriter} import scala.io.Source import org.scalatest.Matchers import org.apache.spark.{SharedSparkContext, SparkConf, SparkFunSuite} import org.apache.spark.serializer.KryoSerializer import org.apache.spark.util.Utils // This test suite uses SharedSparkContext because we need a SparkEnv in order to deserialize // a PythonBroadcast: class PythonBroadcastSuite extends SparkFunSuite with Matchers with SharedSparkContext { test("PythonBroadcast can be serialized with Kryo (SPARK-4882)") { val tempDir = Utils.createTempDir() val broadcastedString = "Hello, world!" def assertBroadcastIsValid(broadcast: PythonBroadcast): Unit = { val source = Source.fromFile(broadcast.path) val contents = source.mkString source.close() contents should be (broadcastedString) } try { val broadcastDataFile: File = { val file = new File(tempDir, "broadcastData") val printWriter = new PrintWriter(file) printWriter.write(broadcastedString) printWriter.close() file } val broadcast = new PythonBroadcast(broadcastDataFile.getAbsolutePath) assertBroadcastIsValid(broadcast) val conf = new SparkConf().set("spark.kryo.registrationRequired", "true") val deserializedBroadcast = Utils.clone[PythonBroadcast](broadcast, new KryoSerializer(conf).newInstance()) assertBroadcastIsValid(deserializedBroadcast) } finally { Utils.deleteRecursively(tempDir) } } }
Example 14
Source File: LogUrlsStandaloneSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy import java.net.URL import scala.collection.mutable import scala.io.Source import org.apache.spark.{LocalSparkContext, SparkContext, SparkFunSuite} import org.apache.spark.scheduler.{SparkListener, SparkListenerExecutorAdded} import org.apache.spark.scheduler.cluster.ExecutorInfo import org.apache.spark.util.SparkConfWithEnv class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext { private val WAIT_TIMEOUT_MILLIS = 10000 test("verify that correct log urls get propagated from workers") { sc = new SparkContext("local-cluster[2,1,1024]", "test") val listener = new SaveExecutorInfo sc.addSparkListener(listener) // Trigger a job so that executors get added sc.parallelize(1 to 100, 4).map(_.toString).count() sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) listener.addedExecutorInfos.values.foreach { info => assert(info.logUrlMap.nonEmpty) // Browse to each URL to check that it's valid info.logUrlMap.foreach { case (logType, logUrl) => val html = Source.fromURL(logUrl).mkString assert(html.contains(s"$logType log page")) } } } test("verify that log urls reflect SPARK_PUBLIC_DNS (SPARK-6175)") { val SPARK_PUBLIC_DNS = "public_dns" val conf = new SparkConfWithEnv(Map("SPARK_PUBLIC_DNS" -> SPARK_PUBLIC_DNS)).set( "spark.extraListeners", classOf[SaveExecutorInfo].getName) sc = new SparkContext("local-cluster[2,1,1024]", "test", conf) // Trigger a job so that executors get added sc.parallelize(1 to 100, 4).map(_.toString).count() sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) val listeners = sc.listenerBus.findListenersByClass[SaveExecutorInfo] assert(listeners.size === 1) val listener = listeners(0) listener.addedExecutorInfos.values.foreach { info => assert(info.logUrlMap.nonEmpty) info.logUrlMap.values.foreach { logUrl => assert(new URL(logUrl).getHost === SPARK_PUBLIC_DNS) } } } } private[spark] class SaveExecutorInfo extends SparkListener { val addedExecutorInfos = mutable.Map[String, ExecutorInfo]() override def onExecutorAdded(executor: SparkListenerExecutorAdded) { addedExecutorInfos(executor.executorId) = executor.executorInfo } }
Example 15
Source File: Main.scala From ros_hadoop with Apache License 2.0 | 5 votes |
package de.valtech.foss import scala.io.Source import scala.collection.mutable.Map import scala.collection.mutable.ListBuffer import scala.collection.JavaConverters._ import Console.{GREEN, RED, RESET} import scala.language.reflectiveCalls import java.io.File import java.io.FileInputStream import java.io.FileOutputStream import java.nio.channels.FileChannel.MapMode._ import java.nio.ByteOrder._ import java.nio.ByteBuffer import de.valtech.foss.proto.RosbagIdxOuterClass.RosbagIdx object Main extends App { def help() = { Console.err.printf(s""" ${RESET}${GREEN}Usage: --file <ros.bag> file to process --version print Rosbag version and exit --offset <offset> --number <records> Seek at offset < 1073741824 and read the specified number of records ${RESET}By default will just create the protobuf idx file needed for configuration.\n\n""") sys.exit(0) } val pargs = Map[String,AnyRef]() def process_cli(args: List[String]) :Boolean = args match { case Nil => true // parse success case "-v" :: rest => pargs += ("version" -> Some(true)); process_cli(rest) case "--version" :: rest => pargs += ("version" -> Some(true)); process_cli(rest) case "-f" :: x :: rest => pargs += ("file" -> x); process_cli(rest) case "--file" :: x :: rest => pargs += ("file" -> x); process_cli(rest) case "-n" :: x :: rest => pargs += ("number" -> Some(x.toInt)); process_cli(rest) case "--number" :: x :: rest => pargs += ("number" -> Some(x.toInt)); process_cli(rest) case "-o" :: x :: rest => pargs += ("offset" -> Some(x.toInt)); process_cli(rest) case "--offset" :: x :: rest => pargs += ("offset" -> Some(x.toInt)); process_cli(rest) case "-h" :: rest => help(); false case "--help" :: rest => help(); false case _ => Console.err.printf(s"${RESET}${RED}Unknown argument " + args.head); false } process_cli(args.toList) def use[T <: { def close() }] (resource: T) (code: T ⇒ Unit) = try code(resource) finally resource.close() pargs("file") match { case f:String => process() case _ => help() } def process(): Unit = { val fin = new File(pargs("file").asInstanceOf[String]) use(new FileInputStream(fin)) { stream => { //printf("min: %s\n", Math.min(1073741824, fin.length) ) val buffer = stream.getChannel.map(READ_ONLY, 0, Math.min(1073741824, fin.length)).order(LITTLE_ENDIAN) val p:RosbagParser = new RosbagParser(buffer) val version = p.read_version() val h = p.read_record().get if(pargs contains "version") { printf("%s\n%s\n\n", version, h) return } if(pargs contains "number"){ buffer position pargs.getOrElse("offset",None).asInstanceOf[Option[Int]].getOrElse(0) for(i <- List.range(0,pargs("number").asInstanceOf[Option[Int]].getOrElse(0))) println(p.read_record) return } val idxpos = h.header.fields("index_pos").asInstanceOf[Long] //printf("idxpos: %s %s\n", idxpos, Math.min(1073741824, fin.length) ) val b = stream.getChannel.map(READ_ONLY, idxpos, Math.min(1073741824, fin.length - idxpos)).order(LITTLE_ENDIAN) val pp:RosbagParser = new RosbagParser(b) val c = pp.read_connections(h.header, Nil) val chunk_idx = pp.read_chunk_infos(c) Console.err.printf(s"""${RESET}${GREEN}Found: """ + chunk_idx.size +s""" chunks\n${RESET}It should be the same number reported by rosbag tool.\nIf you encounter any issues try reindexing your file and submit an issue. ${RESET}\n""") val fout = new FileOutputStream(pargs("file").asInstanceOf[String] + ".idx.bin") val builder = RosbagIdx.newBuilder for(i <- chunk_idx) builder.addArray(i) builder.build().writeTo(fout) fout.close() //printf("[%s]\n",chunk_idx.toArray.mkString(",")) }} } }
Example 16
Source File: RosbagInputFormat.scala From ros_hadoop with Apache License 2.0 | 5 votes |
package de.valtech.foss import scala.io.Source import scala.collection.JavaConverters._ import org.apache.hadoop.fs.Path import org.apache.hadoop.io.{BytesWritable, LongWritable, MapWritable} import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext} import org.apache.hadoop.mapreduce.lib.input.FileInputFormat object RosbagInputFormat { def getRosChunkIdx(context: JobContext): String = { context.getConfiguration.get("RosbagInputFormat.chunkIdx") } def getBlockSize(context: JobContext): Long = { context.getConfiguration.get("dfs.blocksize").toLong } } class RosbagBytesInputFormat extends FileInputFormat[LongWritable, BytesWritable] { private var rosChunkIdx = "" private var recordLength = -1L override def isSplitable(context: JobContext, filename: Path): Boolean = { rosChunkIdx = RosbagInputFormat.getRosChunkIdx(context) recordLength = RosbagInputFormat.getBlockSize(context) true } override def computeSplitSize(blockSize: Long, minSize: Long, maxSize: Long): Long = { val defaultSize = super.computeSplitSize(blockSize, minSize, maxSize) defaultSize } override def createRecordReader(split: InputSplit, context: TaskAttemptContext) : RecordReader[LongWritable, BytesWritable] = { new RosbagBytesRecordReader } } class RosbagMapInputFormat extends FileInputFormat[LongWritable, MapWritable] { private var rosChunkIdx = "" private var recordLength = -1L override def isSplitable(context: JobContext, filename: Path): Boolean = { rosChunkIdx = RosbagInputFormat.getRosChunkIdx(context) recordLength = RosbagInputFormat.getBlockSize(context) true } override def computeSplitSize(blockSize: Long, minSize: Long, maxSize: Long): Long = { val defaultSize = super.computeSplitSize(blockSize, minSize, maxSize) defaultSize } override def createRecordReader(split: InputSplit, context: TaskAttemptContext) : RecordReader[LongWritable, MapWritable] = { new RosbagMapRecordReader } }
Example 17
Source File: SentenceTokenizerSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.text import java.io.PrintWriter import com.intel.analytics.bigdl.dataset.DataSet import com.intel.analytics.bigdl.utils.{Engine, SparkContextLifeCycle} import org.apache.spark.{SparkConf, SparkContext} import org.scalatest.{FlatSpec, Matchers} import scala.io.Source class SentenceTokenizerSpec extends SparkContextLifeCycle with Matchers { override def appName: String = "DocumentTokenizer" "SentenceTokenizerSpec" should "tokenizes articles correctly on Spark" in { val tmpFile = java.io.File .createTempFile("UnitTest", "DocumentTokenizerSpec").getPath val sentence1 = "Enter Barnardo and Francisco, two sentinels." val sentence2 = "Who’s there?" val sentence3 = "I think I hear them. Stand ho! Who is there?" val sentence4 = "The Dr. lives in a blue-painted box." val sentences = Array(sentence1, sentence2, sentence3, sentence4) new PrintWriter(tmpFile) { write(sentences.mkString("\n")); close } val sents = DataSet.rdd(sc.textFile(tmpFile) .filter(!_.isEmpty)).transform(SentenceSplitter()) .toDistributed().data(train = false).flatMap(item => item.iterator).collect() .asInstanceOf[Array[String]] val tokens = DataSet.rdd(sc.parallelize(sents)) .transform(SentenceTokenizer()) val output = tokens.toDistributed().data(train = false).collect() var count = 0 println("tokenized sentences:") output.foreach(x => { count += x.length println(x.mkString(" ")) }) val numOfSents = 6 val numOfWords = 33 output.length should be (numOfSents) count should be (numOfWords) } "SentenceTokenizerSpec" should "tokenizes articles correctly on local" in { val tmpFile = java.io.File .createTempFile("UnitTest", "DocumentTokenizerSpec").getPath val sentence1 = "Enter Barnardo and Francisco, two sentinels." val sentence2 = "Who’s there?" val sentence3 = "I think I hear them. Stand ho! Who is there?" val sentence4 = "The Dr. lives in a blue-painted box." val sentences = Array(sentence1, sentence2, sentence3, sentence4) new PrintWriter(tmpFile) { write(sentences.mkString("\n")); close } val sentenceSplitter = SentenceSplitter() val sentenceTokenizer = SentenceTokenizer() val logData = Source.fromFile(tmpFile).getLines().toArray val sents = DataSet.array(logData .filter(!_.isEmpty)).transform(sentenceSplitter) .toLocal().data(train = false).flatMap(item => item.iterator) val tokens = DataSet.array(sents.toArray) .transform(sentenceTokenizer) val output = tokens.toLocal().data(train = false).toArray sentenceSplitter.close() sentenceTokenizer.close() var count_word = 0 println("tokenized sentences:") output.foreach(x => { count_word += x.length println(x.mkString(" ")) }) val numOfSents = 6 val numOfWords = 33 output.length should be (numOfSents) count_word should be (numOfWords) } }
Example 18
Source File: TextToLabeledSentenceSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.text import java.io.PrintWriter import com.intel.analytics.bigdl.dataset.DataSet import com.intel.analytics.bigdl.utils.{Engine, SparkContextLifeCycle} import org.apache.spark.SparkContext import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} import scala.io.Source @com.intel.analytics.bigdl.tags.Serial class TextToLabeledSentenceSpec extends SparkContextLifeCycle with Matchers { override def nodeNumber: Int = 1 override def coreNumber: Int = 1 override def appName: String = "TextToLabeledSentence" "TextToLabeledSentenceSpec" should "indexes sentences correctly on Spark" in { val tmpFile = java.io.File .createTempFile("UnitTest", "DocumentTokenizerSpec").getPath val sentence1 = "Enter Barnardo and Francisco, two sentinels." val sentence2 = "Who’s there?" val sentence3 = "I think I hear them. Stand ho! Who is there?" val sentence4 = "The Dr. lives in a blue-painted box." val sentences = Array(sentence1, sentence2, sentence3, sentence4) new PrintWriter(tmpFile) { write(sentences.mkString("\n")); close } val tokens = DataSet.rdd(sc.textFile(tmpFile) .filter(!_.isEmpty)) .transform(SentenceTokenizer()) val output = tokens.toDistributed().data(train = false) val dictionary = Dictionary(output, 100) val textToLabeledSentence = TextToLabeledSentence[Float](dictionary) val labeledSentences = tokens.transform(textToLabeledSentence) .toDistributed().data(false).collect() labeledSentences.foreach(x => { println("input = " + x.data().mkString(",")) println("target = " + x.label().mkString(",")) var i = 1 while (i < x.dataLength()) { x.getData(i) should be (x.getLabel(i - 1)) i += 1 } }) } "TextToLabeledSentenceSpec" should "indexes sentences correctly on Local" in { val tmpFile = java.io.File .createTempFile("UnitTest", "DocumentTokenizerSpec").getPath val sentence1 = "Enter Barnardo and Francisco, two sentinels." val sentence2 = "Who’s there?" val sentence3 = "I think I hear them. Stand ho! Who is there?" val sentence4 = "The Dr. lives in a blue-painted box." val sentences = Array(sentence1, sentence2, sentence3, sentence4) new PrintWriter(tmpFile) { write(sentences.mkString("\n")); close } val logData = Source.fromFile(tmpFile).getLines().toArray val tokens = DataSet.array(logData .filter(!_.isEmpty)) .transform(SentenceTokenizer()) val output = tokens.toLocal().data(train = false) val dictionary = Dictionary(output, 100) val textToLabeledSentence = TextToLabeledSentence[Float](dictionary) val labeledSentences = tokens.transform(textToLabeledSentence) .toLocal().data(false) labeledSentences.foreach(x => { println("input = " + x.data().mkString(",")) println("target = " + x.label().mkString(",")) var i = 1 while (i < x.dataLength()) { x.getData(i) should be (x.getLabel(i - 1)) i += 1 } }) } }
Example 19
Source File: DictionarySpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.text import java.io.PrintWriter import com.intel.analytics.bigdl.dataset.DataSet import com.intel.analytics.bigdl.utils.Engine import com.intel.analytics.bigdl.utils.SparkContextLifeCycle import org.apache.spark.{SparkConf, SparkContext} import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} import scala.io.Source class DictionarySpec extends SparkContextLifeCycle with Matchers { override def nodeNumber: Int = 1 override def coreNumber: Int = 1 override def appName: String = "DictionarySpec" "DictionarySpec" should "creates dictionary correctly on Spark" in { val tmpFile = java.io.File .createTempFile("UnitTest", "DictionarySpec").getPath val sentence1 = "Enter Barnardo and Francisco, two sentinels." val sentence2 = "Who’s there?" val sentence3 = "I think I hear them. Stand ho! Who is there?" val sentences = Array(sentence1, sentence2, sentence3) new PrintWriter(tmpFile, "UTF-8") { write(sentences.mkString("\n")); close } val tokens = DataSet.rdd(sc.textFile(tmpFile) .filter(!_.isEmpty)).transform(SentenceTokenizer()) val output = tokens.toDistributed().data(train = false) val numOfWords = 21 val dictionary = Dictionary(output, 100) dictionary.getVocabSize() should be (numOfWords) dictionary.getDiscardSize() should be (0) dictionary.print() dictionary.printDiscard() dictionary.getVocabSize() should be (numOfWords) sc.stop() } "DictionarySpec" should "creates dictionary correctly on local" in { val tmpFile = java.io.File .createTempFile("UnitTest", "DictionarySpec").getPath val sentence1 = "Enter Barnardo and Francisco, two sentinels." val sentence2 = "Who’s there?" val sentence3 = "I think I hear them. Stand ho! Who is there?" val sentences = Array(sentence1, sentence2, sentence3) new PrintWriter(tmpFile, "UTF-8") { write(sentences.mkString("\n")); close } val logData = Source.fromFile(tmpFile, "UTF-8").getLines().toArray val tokens = DataSet.array(logData .filter(!_.isEmpty)).transform(SentenceTokenizer()) val output = tokens.toLocal().data(train = false) val numOfWords = 21 val dictionary = Dictionary(output, 100) dictionary.getVocabSize() should be (numOfWords) dictionary.getDiscardSize() should be (0) dictionary.print() dictionary.printDiscard() dictionary.getVocabSize() should be (numOfWords) } }
Example 20
Source File: SentenceBiPaddingSpec.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.dataset.text import java.io.PrintWriter import com.intel.analytics.bigdl.dataset.DataSet import com.intel.analytics.bigdl.dataset.text.utils.SentenceToken import com.intel.analytics.bigdl.utils.{Engine, SparkContextLifeCycle} import org.apache.spark.SparkContext import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} import scala.io.Source @com.intel.analytics.bigdl.tags.Serial class SentenceBiPaddingSpec extends SparkContextLifeCycle with Matchers { override def nodeNumber: Int = 1 override def coreNumber: Int = 1 override def appName: String = "DocumentTokenizer" "SentenceBiPaddingSpec" should "pads articles correctly on Spark" in { val tmpFile = java.io.File .createTempFile("UnitTest", "DocumentTokenizerSpec").getPath val sentence1 = "Enter Barnardo and Francisco, two sentinels." val sentence2 = "Who’s there?" val sentence3 = "I think I hear them. Stand ho! Who is there?" val sentence4 = "The Dr. lives in a blue-painted box." val sentences = Array(sentence1, sentence2, sentence3, sentence4) new PrintWriter(tmpFile) { write(sentences.mkString("\n")); close } val sents = DataSet.rdd(sc.textFile(tmpFile) .filter(!_.isEmpty)).transform(SentenceSplitter()) .toDistributed().data(train = false).flatMap(item => item.iterator).collect() .asInstanceOf[Array[String]] val tokens = DataSet.rdd(sc.parallelize(sents)) .transform(SentenceBiPadding()) val output = tokens.toDistributed().data(train = false).collect() var count = 0 println("padding sentences:") output.foreach(x => { count += x.length println(x) val words = x.split(" ") val startToken = words(0) val endToken = words(words.length - 1) startToken should be (SentenceToken.start) endToken should be (SentenceToken.end) }) sc.stop() } "SentenceBiPaddingSpec" should "pads articles correctly on local" in { val tmpFile = java.io.File .createTempFile("UnitTest", "DocumentTokenizerSpec").getPath val sentence1 = "Enter Barnardo and Francisco, two sentinels." val sentence2 = "Who’s there?" val sentence3 = "I think I hear them. Stand ho! Who is there?" val sentence4 = "The Dr. lives in a blue-painted box." val sentences = Array(sentence1, sentence2, sentence3, sentence4) new PrintWriter(tmpFile) { write(sentences.mkString("\n")); close } val logData = Source.fromFile(tmpFile).getLines().toArray val sents = DataSet.array(logData .filter(!_.isEmpty)).transform(SentenceSplitter()) .toLocal().data(train = false).flatMap(item => item.iterator) val tokens = DataSet.array(sents.toArray) .transform(SentenceBiPadding()) val output = tokens.toLocal().data(train = false).toArray var count_word = 0 println("padding sentences:") output.foreach(x => { count_word += x.length println(x) val words = x.split(" ") val startToken = words(0) val endToken = words(words.length - 1) startToken should be (SentenceToken.start) endToken should be (SentenceToken.end) }) } }
Example 21
Source File: ExampleFileTests.scala From circe-yaml with Apache License 2.0 | 5 votes |
package io.circe.yaml import java.io.{ File, InputStreamReader } import org.scalatest.freespec.AnyFreeSpec import scala.io.Source class ExampleFileTests extends AnyFreeSpec { "yaml test files" - { val testFiles = new File(getClass.getClassLoader.getResource("test-yamls").getPath).listFiles .filter(_.getName.endsWith(".yml")) .map { file => file.getName -> file.getName.replaceFirst("yml$", "json") } testFiles.foreach { case (yamlFile, jsonFile) => yamlFile in { val jsonStream = getClass.getClassLoader.getResourceAsStream(s"test-yamls/$jsonFile") val json = Source.fromInputStream(jsonStream).mkString jsonStream.close() val parsedJson = io.circe.jawn.parse(json) def yamlStream = getClass.getClassLoader.getResourceAsStream(s"test-yamls/$yamlFile") def yamlReader = new InputStreamReader(yamlStream) val yaml = Source.fromInputStream(yamlStream).mkString val parsedYamlString = parser.parse(yaml) val parsedStreamString = parser.parseDocuments(yaml) val parsedYamlReader = parser.parse(yamlReader) val parsedStreamReader = parser.parseDocuments(yamlReader) assert(parsedJson == parsedYamlString) assert(parsedJson == parsedStreamString.head) assert(parsedJson == parsedYamlReader) assert(parsedJson == parsedStreamReader.head) } } } }
Example 22
Source File: QuoteParser.scala From YahooFinanceScala with MIT License | 5 votes |
package openquant.yahoofinance.impl import java.time.format.DateTimeFormatter import java.time.{LocalDate, ZoneId, ZonedDateTime} import com.github.tototoshi.csv._ import openquant.yahoofinance.Quote import scala.io.Source class QuoteParser { private[this] val df = DateTimeFormatter.ofPattern("yyyy-MM-dd") private[this] val zoneId = ZoneId.of("America/New_York") def parse(content: String): Vector[Quote] = { val csvReader = CSVReader.open(Source.fromString(content)) val quotes: Vector[Quote] = csvReader.toStream.drop(1).map { fields ⇒ parseCSVLine(fields.toVector) }.toVector quotes } private def parseCSVLine(field: Vector[String]): Quote = { require(field.length >= 7) Quote( parseDate(field(0)), BigDecimal(field(1)), BigDecimal(field(4)), BigDecimal(field(2)), BigDecimal(field(3)), BigDecimal(field(5)), BigDecimal(field(6)) ) } private def parseDate(date: String): ZonedDateTime = { LocalDate.parse(date, df).atStartOfDay().atZone(zoneId) } } object QuoteParser { def apply() = new QuoteParser }
Example 23
Source File: ScenarioLoader.scala From mantis with Apache License 2.0 | 5 votes |
package io.iohk.ethereum.ets.common import java.io.File import io.iohk.ethereum.utils.Logger import org.apache.commons.io.FileUtils import scala.collection.JavaConverters._ import scala.io.Source trait ScenarioLoader[T] extends ScenarioParser[T] with Logger { def load(path: String, options: TestOptions, ignoredTestNames: Set[String] = Set.empty): List[ScenarioGroup[T]] = { val testDir = new File(getClass.getClassLoader.getResource(path).toURI) val files = FileUtils.listFiles(testDir, Array("json"), true).asScala.toList files.filterNot(file => ignoredTestNames.contains(file.getName)).flatMap { file => val name = file.getAbsolutePath.drop(testDir.getAbsolutePath.length + 1).dropRight(".json".length) if (!options.isGroupIncluded(name)) None else { log.info(s"Loading test scenarios from: $file") val text = Source.fromFile(file).getLines.mkString val scenarios = parse(text) Some(ScenarioGroup(name, scenarios)) } } } }
Example 24
Source File: JsonRpcHttpsServer.scala From mantis with Apache License 2.0 | 5 votes |
package io.iohk.ethereum.jsonrpc.server import java.io.{File, FileInputStream} import java.security.{KeyStore, SecureRandom} import javax.net.ssl.{KeyManagerFactory, SSLContext, TrustManagerFactory} import akka.actor.ActorSystem import akka.http.scaladsl.model.headers.HttpOriginRange import akka.http.scaladsl.{ConnectionContext, Http} import akka.stream.ActorMaterializer import io.iohk.ethereum.jsonrpc.JsonRpcController import io.iohk.ethereum.jsonrpc.server.JsonRpcHttpsServer.HttpsSetupResult import io.iohk.ethereum.jsonrpc.server.JsonRpcServer.JsonRpcServerConfig import io.iohk.ethereum.utils.Logger import scala.concurrent.ExecutionContext.Implicits.global import scala.io.Source import scala.util.{Failure, Success, Try} class JsonRpcHttpsServer(val jsonRpcController: JsonRpcController, config: JsonRpcServerConfig, secureRandom: SecureRandom)(implicit val actorSystem: ActorSystem) extends JsonRpcServer with Logger { def run(): Unit = { implicit val materializer = ActorMaterializer() val maybeSslContext = validateCertificateFiles(config.certificateKeyStorePath, config.certificateKeyStoreType, config.certificatePasswordFile).flatMap{ case (keystorePath, keystoreType, passwordFile) => val passwordReader = Source.fromFile(passwordFile) try { val password = passwordReader.getLines().mkString obtainSSLContext(keystorePath, keystoreType, password) } finally { passwordReader.close() } } val maybeHttpsContext = maybeSslContext.map(sslContext => ConnectionContext.https(sslContext)) maybeHttpsContext match { case Right(httpsContext) => Http().setDefaultServerHttpContext(httpsContext) val bindingResultF = Http().bindAndHandle(route, config.interface, config.port, connectionContext = httpsContext) bindingResultF onComplete { case Success(serverBinding) => log.info(s"JSON RPC HTTPS server listening on ${serverBinding.localAddress}") case Failure(ex) => log.error("Cannot start JSON HTTPS RPC server", ex) } case Left(error) => log.error(s"Cannot start JSON HTTPS RPC server due to: $error") } } private def validateCertificateFiles(maybeKeystorePath: Option[String], maybeKeystoreType: Option[String], maybePasswordFile: Option[String]): HttpsSetupResult[(String, String, String)] = (maybeKeystorePath, maybeKeystoreType, maybePasswordFile) match { case (Some(keystorePath), Some(keystoreType), Some(passwordFile)) => val keystoreDirMissing = !new File(keystorePath).isFile val passwordFileMissing = !new File(passwordFile).isFile if(keystoreDirMissing && passwordFileMissing) Left("Certificate keystore path and password file configured but files are missing") else if(keystoreDirMissing) Left("Certificate keystore path configured but file is missing") else if(passwordFileMissing) Left("Certificate password file configured but file is missing") else Right((keystorePath, keystoreType, passwordFile)) case _ => Left("HTTPS requires: certificate-keystore-path, certificate-keystore-type and certificate-password-file to be configured") } override def corsAllowedOrigins: HttpOriginRange = config.corsAllowedOrigins } object JsonRpcHttpsServer { type HttpsSetupResult[T] = Either[String, T] }
Example 25
Source File: Utils.scala From mantis with Apache License 2.0 | 5 votes |
package io.iohk.ethereum.vm.utils import java.io.File import akka.util.ByteString import io.circe.parser.decode import io.circe.generic.extras.Configuration import io.circe.generic.extras.auto._ import io.circe.Error import scala.io.Source object Utils { def loadContractCodeFromFile(file: File): ByteString = { val src = Source.fromFile(file) val raw = try { src.mkString } finally { src.close() } ByteString(raw.trim.grouped(2).map(Integer.parseInt(_, 16).toByte).toArray) } def loadContractAbiFromFile(file: File): Either[Error, List[ABI]] = { val src = Source.fromFile(file) val raw = try { src.mkString } finally { src.close() } implicit val config = Configuration.default.withDefaults decode[List[ABI]](raw) } }
Example 26
Source File: JudgeFile.scala From CarbonDataLearning with GNU General Public License v3.0 | 5 votes |
package org.github.xubo245.carbonDataLearning.etl import scala.io.Source object JudgeFile { def main(args: Array[String]): Unit = { val fileName = "/root/xubo/data/pageviews-20150505key" judge(fileName) // val fileName = "/root/xubo/data/pageviews-20150505-000000WithTime" // for (i <- 0 to 9) { // val path = s"/root/xubo/data/pageviews-20150505-0" + i + "0000WithTime" // println("read file " +i + ":" + path) // judge(path) // } } def judge(fileName:String): Unit ={ val reader = Source.fromFile(fileName) var i=1; for (line <- reader.getLines()) { val array = line.split('\t') if (array.length != 9) { println(i + ":\t" + line) i = i + 1 } if (array.length>7 && !array(7).matches("[0-9]*")) { println(line) println(array(7)) } if(array.length>8 && !array(8).equalsIgnoreCase("0")){ println(line) } if(line.length>3200){ println(line) println(line.length) } if(array(6).equalsIgnoreCase("\"")){ println(line) } } } }
Example 27
Source File: WikiETL.scala From CarbonDataLearning with GNU General Public License v3.0 | 5 votes |
package org.github.xubo245.carbonDataLearning.etl import java.io.{File, PrintWriter} import java.text.SimpleDateFormat import java.util.Date import scala.io.Source import scala.util.Random object WikiETL { def main(args: Array[String]): Unit = { val directory = "/root/xubo/data" val files = new File(directory) val out = new PrintWriter("/root/xubo/data/pageviews-20150505time") var flag:Int = 10000000; var typeMap= Map (("b","wikibooks") ,("d","wiktionary") ,("m","wikimedia") ,("mw","wikipedia mobile") ,("n","wikinews") ,("q","wikiquote") ,("s","wikisource") ,("v","wikiversity") ,("w","mediawiki")) for (file <- files.listFiles().sorted.filter(_.getCanonicalFile.getName.contains("pageviews-20150505-"))) { val filePath = file.getCanonicalPath println(filePath) // val out = new PrintWriter(filePath + "WithTime") val reader = Source.fromFile(filePath) val fileName = file.getCanonicalFile.getName val delimiter = "\t" for (line <- reader.getLines()) { val stringBuffer = new StringBuffer() val random = new Random() val id = flag+random.nextInt(1000000) stringBuffer .append(id).append(delimiter) .append(fileName.substring(10, 14)).append(delimiter) .append(fileName.substring(14, 16)).append(delimiter) .append(fileName.substring(16, 18)).append(delimiter) .append(fileName.substring(19, 21)).append(delimiter) val array=line.mkString.split("\\s+") if (array.length == 4 && array(2).matches("[0-9]*") && !array(1).contains("\"")) { val domain = array(0).split('.') stringBuffer.append(domain(0)).append(delimiter) if (domain.length > 1) { var value: String = typeMap.getOrElse(domain(1), "wiki") stringBuffer.append(value).append(delimiter) } else { stringBuffer.append("wiki").append(delimiter) } val time = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date()); val tid= id*10+random.nextInt(5) stringBuffer.append(array(1).replace('_',' ')).append(delimiter) .append(tid).append(delimiter) .append(array(2)).append(delimiter) .append(random.nextInt(100000)).append(delimiter) .append(time) // for (i <- 0 until array.length-1){ // stringBuffer.append(array(i)).append(delimiter) // } // stringBuffer.append(array(array.length-1)) // if (array.length == 4 && array(2).matches("[0-9]*")) { // id = id + 1 out.println(stringBuffer.toString) } } } out.close() } }
Example 28
Source File: WaybackSpec.scala From ArchiveSpark with MIT License | 5 votes |
package org.archive.archivespark.specific.warc.specs import java.net.URLEncoder import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.archive.archivespark.dataspecs.DataSpec import org.archive.archivespark.sparkling.Sparkling import org.archive.archivespark.sparkling.cdx.CdxRecord import org.archive.archivespark.sparkling.util.{IteratorUtil, RddUtil, StringUtil} import org.archive.archivespark.specific.warc.WaybackRecord import scala.io.Source class WaybackSpec (cdxServerUrl: String, pages: Int, maxPartitions: Int) extends DataSpec[String, WaybackRecord] { override def load(sc: SparkContext, minPartitions: Int): RDD[String] = { RddUtil.parallelize(pages, if (maxPartitions == 0) minPartitions else maxPartitions.min(minPartitions)).flatMap{page => try { val source = Source.fromURL(cdxServerUrl + "&page=" + page)(StringUtil.codec(Sparkling.DefaultCharset)) IteratorUtil.cleanup(source.getLines, source.close) } catch { case e: Exception => e.printStackTrace() Iterator.empty } }.cache } override def parse(data: String): Option[WaybackRecord] = CdxRecord.fromString(data).map(cdx => new WaybackRecord(cdx)) } object WaybackSpec { def apply(url: String, matchPrefix: Boolean = false, from: Long = 0, to: Long = 0, blocksPerPage: Int = 5, pages: Int = 50, maxPartitions: Int = 0): WaybackSpec = { var cdxServerUrl = "http://web.archive.org/cdx/search/cdx?url=$url&matchType=$prefix&pageSize=$blocks" cdxServerUrl = cdxServerUrl.replace("$url", URLEncoder.encode(url, "UTF-8")) cdxServerUrl = cdxServerUrl.replace("$prefix", if (matchPrefix) "prefix" else "exact") cdxServerUrl = cdxServerUrl.replace("$blocks", blocksPerPage.toString) if (from > 0) cdxServerUrl += "&from=" + from if (to > 0) cdxServerUrl += "&to=" + to new WaybackSpec(cdxServerUrl, pages, maxPartitions) } }
Example 29
Source File: FileStreamRecord.scala From ArchiveSpark with MIT License | 5 votes |
package org.archive.archivespark.specific.raw import java.io.InputStream import org.archive.archivespark.dataspecs.access.DataAccessor import org.archive.archivespark.model.{DataEnrichRoot, EnrichRootCompanion} import org.archive.archivespark.sparkling.io.IOUtil import org.archive.archivespark.sparkling.util.{IteratorUtil, StringUtil} import scala.io.Source class FileStreamRecord(path: String, accessor: DataAccessor[InputStream], retryDelayMs: Option[Int] = None) extends DataEnrichRoot[String, InputStream](path) { override def access[R >: Null](action: InputStream => R): R = accessor.access(action) def accessSource[R >: Null](action: Source => R): R = access { stream => StringUtil.source(stream) { source => action(source) } } def lineIterator: Iterator[String] = accessor.get match { case Some(stream) => IteratorUtil.cleanup(IOUtil.lines(stream), () => stream.close()) case None => Iterator.empty } override def companion: EnrichRootCompanion[FileStreamRecord] = FileStreamRecord } object FileStreamRecord extends EnrichRootCompanion[FileStreamRecord]
Example 30
Source File: PropertiesUtil.scala From versioneye_sbt_plugin with MIT License | 5 votes |
package com.versioneye import java.io.{File, FileOutputStream} import java.util.Properties import scala.io.Source object PropertiesUtil { protected val propertiesFile: String = "versioneye.properties" def writeProperties(response: ProjectJsonResponse, propertiesFile: File, baseUrl: String): Unit = { var properties: Properties = null if (!propertiesFile.exists()) { createPropertiesFile(propertiesFile) properties = new Properties() } else { properties = loadProperties(propertiesFile) } if (response.getId != null) { properties.setProperty("project_id", response.getId) } val fos = new FileOutputStream(propertiesFile) properties.store(fos, s" Properties for $baseUrl") fos.close() } def getProperties(propertiesFile: File): Properties = { return loadProperties(propertiesFile) } private def loadProperties(file: File): Properties = { if (!file.exists) { return null } val properties = new Properties() val reader = Source.fromFile(file).reader() properties.load(reader) reader.close() return properties } private def createPropertiesFile(file: File) { val parent: File = file.getParentFile if (!parent.exists) { parent.mkdirs } file.createNewFile } def getPropertiesFile(properties: String, projectDirectory: File, withHome: Boolean): File = { val candidates = getPropertyFileCandidates(properties, projectDirectory, false) val firstFile = candidates.find(_.exists()) return firstFile.orElse(candidates.find(!_.exists())).get } def containing(key: String, file: File): Boolean = { if (!file.exists()) { return false } return loadProperties(file).containsKey(key) } def getPropertiesFileContainingProperty(key: String, properties: String, projectDirectory: File): Option[File] = { val candidates = getPropertyFileCandidates(properties, projectDirectory, true) val firstFile = candidates.find(containing(key, _)) return firstFile } def getPropertyFileCandidates(properties: String, projectDirectory: File, withHome: Boolean): Seq[File] = { if (!properties.isEmpty) { return Seq(new File(properties)); } var qaResources = new File(projectDirectory, "src/qa/resources/" + propertiesFile) var mainResources = new File(projectDirectory, "src/main/resources/" + propertiesFile) var userHome = new File(System.getProperty("user.home") + "/.m2/" + propertiesFile) if (withHome) return Seq(qaResources, mainResources, userHome) else return Seq(qaResources, mainResources) } }
Example 31
Source File: ResourceManagement.scala From scalismo-faces with Apache License 2.0 | 5 votes |
package scalismo.faces.utils import java.io.Closeable import scala.io.Source import scala.util.control.NonFatal import scala.util.{Failure, Try} def usingOption[T <: Closeable, R](obj: => Option[T], after: T => Unit = { t: T => t.close() })(block: T => Option[R]): Option[R] = { val o: Option[T] = try { obj } catch { case NonFatal(e) => None } o.flatMap { res => try { block(res) } finally { after(res) } } } }
Example 32
Source File: LandmarksTest.scala From scalismo-faces with Apache License 2.0 | 5 votes |
package scalismo.faces.landmarks import java.io._ import scalismo.faces.FacesTestSuite import scalismo.faces.io.TLMSLandmarksIO import scalismo.faces.utils.ResourceManagement import scalismo.geometry.{Point, Point2D, Point3D} import scala.io.Source class LandmarksTest extends FacesTestSuite { def createRandom2DLandmarks(n: Int): IndexedSeq[TLMSLandmark2D] = { for (i <- 0 until n) yield TLMSLandmark2D(randomString(rnd.scalaRandom.nextInt(10) + 1), new Point2D(rnd.scalaRandom.nextDouble, rnd.scalaRandom.nextDouble), rnd.scalaRandom.nextBoolean()) } def createRandom3DLandmarks(n: Int): IndexedSeq[TLMSLandmark3D] = { for (i <- 0 until n) yield TLMSLandmark3D(randomString(rnd.scalaRandom.nextInt(10) + 1), new Point3D(rnd.scalaRandom.nextDouble, rnd.scalaRandom.nextDouble, rnd.scalaRandom.nextDouble), rnd.scalaRandom.nextBoolean()) } describe("Landmarks 2D") { val lms = createRandom2DLandmarks(25) it("can write and read from file (enforce TLMS Float format)") { val tmpFile = File.createTempFile("tlms2d", ".tlms") tmpFile.deleteOnExit() TLMSLandmarksIO.write2D(lms, tmpFile).get val readLM = TLMSLandmarksIO.read2D(tmpFile).get // cast landmarks to float val floatLM = lms.map{lm => lm.copy(point = Point(lm.point.x.toFloat, lm.point.y.toFloat))} // should not write/read as double readLM should not be lms // but as float readLM shouldBe floatLM } it("can write to an existing output stream without closing it") { val f = File.createTempFile("tlms2d", ".txt") f.deleteOnExit() val oStream = new FileOutputStream(f) TLMSLandmarksIO.write2DToStream(lms, oStream).get ResourceManagement.using(new PrintWriter(oStream)) { writer => writer.println("stream should still accept more text") } Source.fromFile(f).getLines().length should be (lms.length + 1) } it("can be converted to Landmarks") { val scLMs = lms.map(lm => lm.toLandmark) lms.zip(scLMs).foreach{ case(tlm, scLM) => scLM.id shouldBe tlm.id scLM.point shouldBe tlm.point } } } describe("Landmarks 3D") { val lms = createRandom3DLandmarks(25) it("can write / read from file (and properly convert to Float thereby, TLMS is float)") { val tmpFile = File.createTempFile("tlms3d",".tlms") tmpFile.deleteOnExit() TLMSLandmarksIO.write3D(lms, tmpFile).get val readLM = TLMSLandmarksIO.read3D(tmpFile).get val floatLM = lms.map{lm => lm.copy(point = Point(lm.point.x.toFloat, lm.point.y.toFloat, lm.point.z.toFloat))} // should not write/read as double readLM should not be lms // but as float readLM shouldBe floatLM } it("can write to an existing output stream without closing it") { val f = File.createTempFile("tlms3d", ".txt") f.deleteOnExit() val oStream = new FileOutputStream(f) TLMSLandmarksIO.write3DToStream(lms, oStream).get ResourceManagement.using(new PrintWriter(oStream)) { writer => writer.println("stream should still accept more text") } Source.fromFile(f).getLines().length should be (lms.length + 1) } it("can be converted to Landmarks") { val scLMs = lms.map(lm => lm.toLandmark) lms.zip(scLMs).foreach{ case(tlm, scLM) => scLM.id shouldBe tlm.id scLM.point shouldBe tlm.point } } } }
Example 33
Source File: TSV.scala From low-rank-logic with MIT License | 5 votes |
package uclmr.io import uclmr.{DefaultIx, Cell, CellType, TensorKB} import ml.wolfe.util.{ProgressBar, Conf} import scala.io.Source import scala.util.Random object LoadTSV extends App { def apply(k: Int = 100, subsample: Double = 1.0, db: TensorKB = null, filePath: String = Conf.getString("inputFile")): TensorKB = { val kb = if (db != null) db else new TensorKB(k) val rand = new Random(0l) val lines = Source.fromFile(filePath).getLines() val progressBar = new ProgressBar(Source.fromFile(filePath).getLines().size, 100000) progressBar.start() for { fact <- lines Array(r, e1, e2, typ, target) = fact.split("\t") } { val cellType = typ match { case "Train" => CellType.Train case "Test" => CellType.Test case "Dev" => CellType.Dev case "Observed" => CellType.Observed } if (rand.nextDouble() < subsample) { val cell = Cell(r, (e1, e2), DefaultIx, target.toDouble, cellType) kb += cell } progressBar(r) } kb } }
Example 34
Source File: LuceneRDDMoreLikeThisSpec.scala From spark-lucenerdd with Apache License 2.0 | 5 votes |
package org.zouzias.spark.lucenerdd import com.holdenkarau.spark.testing.SharedSparkContext import org.apache.spark.SparkConf import scala.collection.JavaConverters._ import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers} import scala.io.Source class LuceneRDDMoreLikeThisSpec extends FlatSpec with Matchers with BeforeAndAfterEach with SharedSparkContext { var luceneRDD: LuceneRDD[_] = _ override val conf = LuceneRDDKryoRegistrator.registerKryoClasses(new SparkConf(). setMaster("local[*]"). setAppName("test"). set("spark.ui.enabled", "false"). set("spark.app.id", appID)) override def afterEach() { luceneRDD.close() } "LuceneRDD.moreLikeThis" should "return relevant documents" in { val words: Seq[String] = Source.fromFile("src/test/resources/alice.txt") .getLines().map(_.toLowerCase).toSeq val rdd = sc.parallelize(words) luceneRDD = LuceneRDD(rdd) val results = luceneRDD .moreLikeThis("_1", "alice adventures wonderland", 1, 1) .collect() results.length > 0 should equal(true) val firstDoc = results.head val x = firstDoc.getString(firstDoc.fieldIndex("_1")) x.contains("alice") && x.contains("wonderland") && x.contains("adventures") should equal(true) val lastDoc = results.last val y = lastDoc.getString(lastDoc.fieldIndex("_1")) y.contains("alice") && !y.contains("wonderland") && !y.contains("adventures") should equal(true) } }
Example 35
Source File: linkageFuzzyExample.scala From spark-lucenerdd with Apache License 2.0 | 5 votes |
import scala.io.Source import org.apache.spark.rdd.RDD import org.zouzias.spark.lucenerdd._ import org.zouzias.spark.lucenerdd.LuceneRDD import org.zouzias.spark.lucenerdd.models.SparkScoreDoc // Step 1: Query prefixes of countries // Shooting for Greece, Germany, Spain and Italy val leftCountries = Array("gree", "germa", "belgi", "ita") val leftCountriesRDD: RDD[String] = sc.parallelize(leftCountries) // Step 2: Load all country names val countries = sc.parallelize(Source.fromFile("src/test/resources/countries.txt").getLines() .map(_.toLowerCase()).toSeq) val luceneRDD = LuceneRDD(countries) luceneRDD.cache() // Step 3: Define you linkage function (prefix) def fuzzyLinker(country: String): String = { val Fuzziness = 2 s"_1:${country}~${Fuzziness}" } // Step 4: Perform the linkage val linked: RDD[(String, Array[SparkScoreDoc])] = luceneRDD.link(leftCountriesRDD, fuzzyLinker, 10) // Step 5: View the results linked.foreach(x => println((x._1, x._2.mkString(",")))) // spa,List(SparkScoreDoc(5.1271343,84,0,Text fields:_1:[spain]))) // (gree,List(SparkScoreDoc(5.1271343,86,0,Text fields:_1:[greece]))) // (germa,List(SparkScoreDoc(5.127134,83,0,Text fields:_1:[germany]))) // (ita,List(SparkScoreDoc(2.9601524,106,0,Text fields:_1:[italy]), SparkScoreDoc(2.9601524,102,0,Text fields:_1:[iraq]), SparkScoreDoc(2.9601524,101,0,Text fields:_1:[iran]))
Example 36
Source File: linkagePrefixExample.scala From spark-lucenerdd with Apache License 2.0 | 5 votes |
import scala.io.Source import org.apache.spark.rdd.RDD import org.zouzias.spark.lucenerdd._ import org.zouzias.spark.lucenerdd.LuceneRDD import org.zouzias.spark.lucenerdd.models.SparkScoreDoc // Step 1: Query prefixes of countries // Shooting for Greece, Russian, Argentina and Belgium val leftCountries = Array("gre", "ru", "ar", "bel") val leftCountriesRDD: RDD[String] = sc.parallelize(leftCountries) // Step 2: Load all country names val countries = sc.parallelize(Source.fromFile("src/test/resources/countries.txt").getLines() .map(_.toLowerCase()).toSeq) val luceneRDD = LuceneRDD(countries) // Step 3: Define you linkage function (prefix) def prefixLinker(country: String): String = { s"_1:${country}*" } // Step 4: Perform the linkage val linked: RDD[(String, Array[SparkScoreDoc])] = luceneRDD.link(leftCountriesRDD, prefixLinker, 10) // Step 5: View the results linked.foreach(x => println((x._1, x._2.mkString(",")))) // (gre,List(SparkScoreDoc(1.0,88,0,Text fields:_1:[grenada]), SparkScoreDoc(1.0,87,0,Text fields:_1:[greenland]), SparkScoreDoc(1.0,86,0,Text fields:_1:[greece]))) // (ar,List(SparkScoreDoc(1.0,12,0,Text fields:_1:[aruba]), SparkScoreDoc(1.0,11,0,Text fields:_1:[armenia]), SparkScoreDoc(1.0,10,0,Text fields:_1:[argentina]))) // (ru,List(SparkScoreDoc(1.0,55,0,Text fields:_1:[russia]))) // (be,List(SparkScoreDoc(1.0,25,0,Text fields:_1:[bermuda]), SparkScoreDoc(1.0,24,0,Text fields:_1:[benin]), SparkScoreDoc(1.0,23,0,Text fields:_1:[belize]), SparkScoreDoc(1.0,22,0,Text fields:_1:[belgium]), SparkScoreDoc(1.0,21,0,Text fields:_1:[belarus])))
Example 37
Source File: loadCities.scala From spark-lucenerdd with Apache License 2.0 | 5 votes |
sc.setLogLevel("INFO") import scala.io.Source import org.zouzias.spark.lucenerdd.partition.LuceneRDDPartition import org.zouzias.spark.lucenerdd._ import org.zouzias.spark.lucenerdd.LuceneRDD val cities = Source.fromFile("src/test/resources/cities.txt").getLines().toSeq val rdd = sc.parallelize(cities) val luceneRDD = LuceneRDD(rdd) luceneRDD.cache luceneRDD.count println("=" * 20) luceneRDD.termQuery("_1", "toronto").take(10) println("=" * 20) luceneRDD.termQuery("_1", "athens").take(10) println("=" * 20) luceneRDD.termQuery("_1", "bern").take(10) println("=" * 20) luceneRDD.termQuery("_1", "madrid").take(10)
Example 38
Source File: package.scala From scalda with MIT License | 5 votes |
package com.nitro.scalda import java.io.File import scala.io.Source package object examples { val lines: File => Iterator[String] = f => Source.fromFile(f).getLines() val text: File => String = lines andThen { _.mkString(" ") } def log(message: => String, on: Boolean = true): Unit = if (on) System.err.println(message) else () def getOrElse(args: Array[String])(index: Int, alt: => String): String = Option(args(index)).getOrElse(alt) def getOrElse[T]( args: Array[String], index: Int, alt: => T, convert: String => T ): T = Option(args(index)) .map(convert) .getOrElse(alt) }
Example 39
Source File: JobLauncher.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.s2jobs import org.apache.s2graph.s2jobs.udfs.Udf import org.apache.spark.sql.SparkSession import play.api.libs.json.{JsValue, Json} import scala.io.Source case class JobOption( name:String = "S2BatchJob", confType:String = "db", jobId:Int = -1, confFile:String = "" ) object JobLauncher extends Logger { def parseArguments(args: Array[String]): JobOption = { val parser = new scopt.OptionParser[JobOption]("run") { opt[String]('n', "name").required().action((x, c) => c.copy(name = x)).text("job display name") cmd("file").action((_, c) => c.copy(confType = "file")) .text("get config from file") .children( opt[String]('f', "confFile").required().valueName("<file>").action((x, c) => c.copy(confFile = x)).text("configuration file") ) cmd("db").action((_, c) => c.copy(confType = "db")) .text("get config from db") .children( opt[String]('i', "jobId").required().valueName("<jobId>").action((x, c) => c.copy(jobId = x.toInt)).text("configuration file") ) } parser.parse(args, JobOption()) match { case Some(o) => o case None => parser.showUsage() throw new IllegalArgumentException(s"failed to parse options... (${args.mkString(",")}") } } def getConfig(options: JobOption):JsValue = options.confType match { case "file" => Json.parse(Source.fromFile(options.confFile).mkString) case "db" => throw new IllegalArgumentException(s"'db' option that read config file from database is not supported yet.. ") } def main(args: Array[String]): Unit = { val options = parseArguments(args) logger.info(s"Job Options : ${options}") val jobDescription = JobDescription(getConfig(options)) val ss = SparkSession .builder() .appName(s"${jobDescription.name}") .config("spark.driver.maxResultSize", "20g") .enableHiveSupport() .getOrCreate() // register udfs jobDescription.udfs.foreach{ udfOption => val udf = Class.forName(udfOption.`class`).newInstance().asInstanceOf[Udf] logger.info((s"[udf register] ${udfOption}")) udf.register(ss, udfOption.name, udfOption.params.getOrElse(Map.empty)) } val job = new Job(ss, jobDescription) job.run() } }
Example 40
Source File: Bootstrap.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.rest.play import java.util.concurrent.Executors import org.apache.s2graph.core.rest.{RequestParser, RestHandler} import org.apache.s2graph.core.utils.logger import org.apache.s2graph.core.{ExceptionHandler, S2Graph, Management} import org.apache.s2graph.rest.play.actors.QueueActor import org.apache.s2graph.rest.play.config.Config import org.apache.s2graph.rest.play.controllers.ApplicationController import play.api.Application import play.api.mvc.{WithFilters, _} import play.filters.gzip.GzipFilter import scala.concurrent.{ExecutionContext, Future} import scala.io.Source import scala.util.Try object Global extends WithFilters(new GzipFilter()) { var s2graph: S2Graph = _ var storageManagement: Management = _ var s2parser: RequestParser = _ var s2rest: RestHandler = _ var wallLogHandler: ExceptionHandler = _ def startup() = { val numOfThread = Runtime.getRuntime.availableProcessors() val threadPool = Executors.newFixedThreadPool(numOfThread) val ec = ExecutionContext.fromExecutor(threadPool) val config = Config.conf.underlying // init s2graph with config s2graph = new S2Graph(config)(ec) storageManagement = new Management(s2graph) s2parser = new RequestParser(s2graph) s2rest = new RestHandler(s2graph)(ec) logger.info(s"starts with num of thread: $numOfThread, ${threadPool.getClass.getSimpleName}") config } def shutdown() = { s2graph.shutdown() } // Application entry point override def onStart(app: Application) { ApplicationController.isHealthy = false val config = startup() wallLogHandler = new ExceptionHandler(config) QueueActor.init(s2graph, wallLogHandler) val defaultHealthOn = Config.conf.getBoolean("app.health.on").getOrElse(true) ApplicationController.deployInfo = Try(Source.fromFile("./release_info").mkString("")).recover { case _ => "release info not found\n" }.get ApplicationController.isHealthy = defaultHealthOn } override def onStop(app: Application) { wallLogHandler.shutdown() QueueActor.shutdown() shutdown() } override def onError(request: RequestHeader, ex: Throwable): Future[Result] = { logger.error(s"onError => ip:${request.remoteAddress}, request:${request}", ex) Future.successful(Results.InternalServerError) } override def onHandlerNotFound(request: RequestHeader): Future[Result] = { logger.error(s"onHandlerNotFound => ip:${request.remoteAddress}, request:${request}") Future.successful(Results.NotFound) } override def onBadRequest(request: RequestHeader, error: String): Future[Result] = { logger.error(s"onBadRequest => ip:${request.remoteAddress}, request:$request, error:$error") Future.successful(Results.BadRequest(error)) } }
Example 41
Source File: RequestDeleter.scala From matcher with MIT License | 5 votes |
package com.wavesplatform.dex.load import java.io.{File, PrintWriter} import java.nio.file.Files import scala.io.Source object RequestDeleter { def delRequests(file: File, deletedCount: Int): Unit = { if (Files.exists(file.toPath)) { val source = Source.fromFile(file) val outputFile = s"requests-after-drop-${System.currentTimeMillis}.txt" val output = new PrintWriter(outputFile, "utf-8") var i = 0 var j = 0 var r = 0 try { source .getLines() .map(line => { if (r < deletedCount) i = i + 1 if (line.isEmpty || line.indexOf("{") == 0) { j = j + 1 if (j % 3 == 0) { j = 0 r = r + 1 } } line }) .drop(i) .foreach(line => output.print(s"$line\r\n")) println(s"$deletedCount of $r requests have been dropped from ${file.getAbsolutePath}, and saved to $outputFile") } finally output.close() } } }
Example 42
Source File: WsAccumulateChanges.scala From matcher with MIT License | 5 votes |
package com.wavesplatform.dex.load import java.io.File import akka.actor.ActorSystem import com.wavesplatform.dex.load.ws.WsCollectChangesClient import scala.io.Source import scala.util.Random object WsAccumulateChanges { def createClients(apiUri: String, feederFile: File, accountsNumber: Int)(implicit system: ActorSystem): Seq[WsCollectChangesClient] = readRandomAccountLines(feederFile, accountsNumber).map { accountLine => val fields = accountLine.split(';') val addr = fields(0) val aus = fields(1) val obs = fields.drop(2) new WsCollectChangesClient(apiUri, addr, aus, obs) } private def readRandomAccountLines(feederFile: File, accountsNumber: Int): Seq[String] = { val source = Source.fromFile(feederFile) try { val lines = source.getLines() val r = lines.take(accountsNumber).toArray lines.foreach { line => // 30% if (Random.nextDouble() < 0.3) r.update(Random.nextInt(accountsNumber), line) } r } finally source.close() } }
Example 43
Source File: GatlingFeeder.scala From matcher with MIT License | 5 votes |
package com.wavesplatform.dex.load import java.io.{File, PrintWriter} import java.security import java.security.KeyFactory import java.security.spec.PKCS8EncodedKeySpec import java.util.Base64 import com.wavesplatform.dex.api.ws.protocol.WsAddressSubscribe.JwtPayload import com.wavesplatform.dex.auth.JwtUtils import com.wavesplatform.dex.domain.account.{AddressScheme, PrivateKey, PublicKey} import com.wavesplatform.dex.domain.bytes.ByteStr import com.wavesplatform.wavesj.PrivateKeyAccount import play.api.libs.json.Json import scala.concurrent.duration._ import scala.io.Source import scala.util.Random object GatlingFeeder { def authServiceKeyPair(rawPrivateKey: String): security.PrivateKey = { val privateKeyContent = rawPrivateKey .replace("-----BEGIN PRIVATE KEY-----", "") .replace("-----END PRIVATE KEY-----", "") .replaceAll("\\n", "") val kf = KeyFactory.getInstance("RSA") val ksPkcs8 = new PKCS8EncodedKeySpec(Base64.getDecoder.decode(privateKeyContent)) val privateKey = kf.generatePrivate(ksPkcs8) privateKey } private def mkJwtSignedPayload(a: PrivateKeyAccount): JwtPayload = { val exp = System.currentTimeMillis() / 1000 + 24.hour.toSeconds JwtPayload( signature = ByteStr(Array.emptyByteArray), publicKey = PublicKey(a.getPublicKey), networkByte = AddressScheme.current.chainId.toChar.toString, clientId = "test", firstTokenExpirationInSeconds = exp, activeTokenExpirationInSeconds = exp, scope = List("general") ).signed(PrivateKey(a.getPrivateKey)) } private def mkAusString(accountPrivateKey: PrivateKeyAccount, authKp: security.PrivateKey): String = { s"""{"T":"aus","S":"${accountPrivateKey.getAddress}","t":"jwt","j":"${JwtUtils.mkJwt(authKp, Json.toJsObject(mkJwtSignedPayload(accountPrivateKey)))}"}""" } private def mkObsStrings(pairsFile: File, numberPerClient: Int): String = { val source = Source.fromFile(pairsFile) try { val pairs = Random.shuffle(source.getLines.toVector) require(numberPerClient <= pairs.size, "numberPerClient > available asset pairs in file") pairs.take(numberPerClient).map(x => s"""{"T":"obs","S":"$x","d":100}""").mkString(";") } finally source.close() } def mkFile(accountsNumber: Int, seedPrefix: String, authKp: security.PrivateKey, pairsFile: File, orderBookNumberPerAccount: Int, feederFile: File): Unit = { val output = new PrintWriter(feederFile, "utf-8") try { (0 until accountsNumber).foreach { i => val pk = PrivateKeyAccount.fromSeed(s"$seedPrefix$i", 0, AddressScheme.current.chainId) output.println(s"""${pk.getAddress};${mkAusString(pk, authKp)};${mkObsStrings(pairsFile, orderBookNumberPerAccount)}""") } } finally output.close() println(s"Results have been saved to $feederFile") } }
Example 44
Source File: FileUtil.scala From piflow with BSD 2-Clause "Simplified" License | 5 votes |
package cn.piflow.conf.util import scala.io.Source object FileUtil { def fileReader(filePath : String) : String = { var str = "" val file = Source.fromFile(filePath) val iter = file.buffered while (iter.hasNext){ val line = iter.head str += line iter.next() } file.close() str } }
Example 45
package com.indix.utils.core import org.apache.commons.lang3.StringUtils import org.apache.commons.lang3.text.WordUtils import scala.io.Source object MPN { // Some domain specific keywords known to be invalid val BlackListedMpns = Source.fromInputStream(getClass.getResourceAsStream("/BlacklistMPNs.txt")).getLines.toSet val StopChars = Set(' ', '-', '_', '.', '/') val TerminateChars = Set(',', '"', '*', '%', '{', '}', "#", '&', '\\') val MaxLen = 50 val MinLen = 3 // Does not consider one word strings as title-case phrase def isTitleCase(str: String): Boolean = { val words = str.split(' ').filter(_.nonEmpty) if (words.length < 2) false else words.forall(w => w == WordUtils.capitalizeFully(w)) } def postProcessIdentifier(input: String): String = { val trimmedUpper = input.trim.toUpperCase trimmedUpper } // Check if identifier is valid, also return the identifier to process further if any def validateIdentifier(text: String): (Boolean, String) = { val input = if (text != null) text.trim() else text input match { case _ if StringUtils.isBlank(input) || input.length > MaxLen || input.length < MinLen => (false, "") case _ if input.count(c => TerminateChars.contains(c)) > 1 => (false, input) case _ if BlackListedMpns.contains(input.toLowerCase) => (false, "") case _ if isTitleCase(input) => (false, "") // Unicode strings yet to be handled case _ => (true, input) } } def isValidIdentifier(value: String): Boolean = validateIdentifier(value)._1 def standardizeMPN(input: String): Option[String] = { val (isValid, identifier) = validateIdentifier(input) if (isValid) { Some(postProcessIdentifier(identifier)) } else if (StringUtils.isBlank(identifier)) { None } else if (identifier.indexWhere(c => TerminateChars.contains(c)) > 0) { Some(postProcessIdentifier(identifier.substring(0, identifier.indexWhere(c => TerminateChars.contains(c))))) } else None } }
Example 46
Source File: FilesAndArchivesTest.scala From AI with Apache License 2.0 | 5 votes |
package com.bigchange.basic import java.io.File import org.apache.spark.{SparkConf, SparkContext} import scala.io.Source object FilesAndArchivesTest { def main(args: Array[String]): Unit = { // 注意用户采用的是 --files localtest.txt#appSees.txt 形式进行提交的,参数也需要带一个 appSees.txt,这样我们知道 if (args.length < 3) { System.err.println("Usage: <files> <archives> <words>" + "\n" + "files - (string) such as story.txt#st" + "\n" + "archives - (string) such as techtc300.zip#z" + "\n" + "words - (string) such as 'young,Englewood' etc") System.exit(1) } val sparkConf = new SparkConf().setAppName("FilesAndArchivesTest") val sc = new SparkContext(sparkConf) val files = args(0) val archives = args(1) val words = args(2) // 找出 files 中包含 words 中单词的所有句子 // 找出 archives 中包含 words 中单词的所有句子,以及打印出其文件名称 println(s"files: $files, archives: $archives, words: $words") val r = sc.parallelize(words.split( """,""").toList).persist() r.collect().foreach(println) // 处理 files r.mapPartitions(p => { // 加载 files,注意 --files filename#sortname, 可以直接使用 sortname val lines = Source.fromFile(files).getLines() val words = p.toList // 这个非常重要,不能直接使用 p.exists(xxx), 因为 p 是一个迭代器,注意迭代器的一些 trick // 对于包含 p 中单词的,会进行打印 val flines = lines.filter(x => { words.exists(x.indexOf(_) >= 0) }) // 注意,这里的 flines 是一个迭代器,千万不要调用类似 foreach 的接口 flines }).distinct().collect().foreach(x => println(s"files match -- $x")) // 处理 archives, 可以将一些文件压缩为一个 zip, tar.gz 等等 r.mapPartitions(p => { val dir = new File(archives + File.separator + "conf") val fileNameAndLines = for (f <- dir.listFiles()) yield { val fileName = f.getName val lines = Source.fromFile(f.getCanonicalPath).getLines() (fileName, lines) } val tmp = fileNameAndLines.flatMap({ case (fileName, lines) => for (line <- lines) yield (fileName, line) }) val words = p.toList val flines = tmp.filter({ case (fileName, line) => words.exists(line.indexOf(_) >= 0) }) flines.toIterator }).distinct().collect().foreach(x => println(s"archives match -- ${x._1}/${x._2}")) sc.stop() } }
Example 47
Source File: NaiveBayesTest.scala From AI with Apache License 2.0 | 5 votes |
package com.bigchange.test import com.bigchange.datamining.CustomNaiveBayes import scala.collection.mutable import scala.collection.mutable.ListBuffer import scala.io.Source def main(args: Array[String]) { // val Array(dataPath) = args val data = Source.fromFile("src/main/resources/nbData/i100-i500").getLines().toList // 可实现打完包后读取jar包中对应文件数据 val data2 = Source.fromInputStream(this.getClass.getResourceAsStream("src/main/resources/nbData/i100-i500")).getLines().toList // 十折交叉验证(index,List(item1,item2)) val splitData = data.zipWithIndex.map(x => (x._2 % 10,x._1)).groupBy(_._1).mapValues(x => x.map(_._2)) val modelMap = new mutable.HashMap[Int,String]() val model = CustomNaiveBayes.model(0, splitData) var list = List((0,model)) for (id <- 1 until 10) { // 训练 val model = CustomNaiveBayes.model(id, splitData) list = list ::: List((id,model)) } // 分类 val listP = new ListBuffer[(String, Double)] list.foreach(x => { println("model:" + x) val pred = CustomNaiveBayes.predict(Array("health", "moderate", "moderate1", "yes"), x._2) listP.+=(pred) }) println("tobe:"+ listP) println("tobe:"+ listP.max) } }
Example 48
Source File: Swagger.scala From daf-semantics with Apache License 2.0 | 5 votes |
package controllers import java.io.File import java.util import de.zalando.play.controllers.PlayBodyParsing._ import org.yaml.snakeyaml.Yaml import play.api.mvc._ import scala.io.Source class Swagger extends Controller { def notSpec = Seq("application.conf", "logback.xml", "routes") def listSpecs() = Action { val path = "conf" val file = new File(path) if (file != null && file.list() != null) { val files = file.listFiles().filterNot(_.isDirectory).map(_.getName).toSeq implicit val arrayMarshaller = anyToWritable[Seq[String]]("application/json") val names = files.filterNot(notSpec.contains).filterNot(_.startsWith(".")) Ok(names) } else { NotFound("Path could not be found: " + file.getAbsolutePath) } } def swaggerSpec(name: String) = Action { implicit val mapMarshaller = anyToWritable[java.util.Map[_, _]]("application/json") getSpec(name).map(s => Ok(s)).getOrElse(NotFound(name)) } private def getSpec(yamlPath: String) = { val yamlFile = Option(getClass.getClassLoader.getResource(yamlPath)) // TODO: close the stream val yamlStr = yamlFile map { yaml => Source.fromURL(yaml).getLines().mkString("\n") } val javaMap = yamlStr map { new Yaml().load(_).asInstanceOf[util.Map[Any, Any]] } javaMap } }
Example 49
Source File: Swagger.scala From daf-semantics with Apache License 2.0 | 5 votes |
package controllers import java.io.File import java.util import de.zalando.play.controllers.PlayBodyParsing._ import org.yaml.snakeyaml.Yaml import play.api.mvc._ import scala.io.Source class Swagger extends Controller { def notSpec = Seq("application.conf", "logback.xml", "routes") def listSpecs() = Action { val path = "conf" val file = new File(path) if (file != null && file.list() != null) { val files = file.listFiles().filterNot(_.isDirectory).map(_.getName).toSeq implicit val arrayMarshaller = anyToWritable[Seq[String]]("application/json") val names = files.filterNot(notSpec.contains).filterNot(_.startsWith(".")) Ok(names) } else { NotFound("Path could not be found: " + file.getAbsolutePath) } } def swaggerSpec(name: String) = Action { implicit val mapMarshaller = anyToWritable[java.util.Map[_, _]]("application/json") getSpec(name).map(s => Ok(s)).getOrElse(NotFound(name)) } private def getSpec(yamlPath: String) = { val yamlFile = Option(getClass.getClassLoader.getResource(yamlPath)) // TODO: close the stream val yamlStr = yamlFile map { yaml => Source.fromURL(yaml).getLines().mkString("\n") } val javaMap = yamlStr map { new Yaml().load(_).asInstanceOf[util.Map[Any, Any]] } javaMap } }
Example 50
Source File: MockIdentityVerificationHttp.scala From nisp-frontend with Apache License 2.0 | 5 votes |
package uk.gov.hmrc.nisp.helpers import org.mockito.ArgumentMatchers import org.mockito.Mockito._ import org.scalatest.mock.MockitoSugar import play.api.http.Status import play.api.libs.json.Json import scala.concurrent.Future import scala.io.Source import uk.gov.hmrc.http.{ HttpGet, HttpResponse } object MockIdentityVerificationHttp extends MockitoSugar { val mockHttp = mock[HttpGet] val possibleJournies = Map( "success-journey-id" -> "test/resources/identity-verification/success.json", "incomplete-journey-id" -> "test/resources/identity-verification/incomplete.json", "failed-matching-journey-id" -> "test/resources/identity-verification/failed-matching.json", "insufficient-evidence-journey-id" -> "test/resources/identity-verification/insufficient-evidence.json", "locked-out-journey-id" -> "test/resources/identity-verification/locked-out.json", "user-aborted-journey-id" -> "test/resources/identity-verification/user-aborted.json", "timeout-journey-id" -> "test/resources/identity-verification/timeout.json", "technical-issue-journey-id" -> "test/resources/identity-verification/technical-issue.json", "precondition-failed-journey-id" -> "test/resources/identity-verification/precondition-failed.json", "invalid-journey-id" -> "test/resources/identity-verification/invalid-result.json", "invalid-fields-journey-id" -> "test/resources/identity-verification/invalid-fields.json", "failed-iv-journey-id" -> "test/resources/identity-verification/failed-iv.json" ) def mockJourneyId(journeyId: String): Unit = { val fileContents = Source.fromFile(possibleJournies(journeyId)).mkString when(mockHttp.GET[HttpResponse](ArgumentMatchers.contains(journeyId))(ArgumentMatchers.any(), ArgumentMatchers.any(),ArgumentMatchers.any())). thenReturn(Future.successful(HttpResponse(Status.OK, responseJson = Some(Json.parse(fileContents))))) } possibleJournies.keys.foreach(mockJourneyId) }
Example 51
Source File: MockSessionCache.scala From nisp-frontend with Apache License 2.0 | 5 votes |
package uk.gov.hmrc.nisp.helpers import play.api.libs.json.{Json, Reads, Writes} import uk.gov.hmrc.http.cache.client.{CacheMap, SessionCache} import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.{ExecutionContext, Future} import scala.io.Source import uk.gov.hmrc.http.{HeaderCarrier, HttpDelete, HttpGet, HttpPut, UserId} object MockSessionCache extends SessionCache{ val cachedNinoAndUsername = TestAccountBuilder.cachedNino val cachedUserId = UserId(s"/auth/oid/$cachedNinoAndUsername") override def defaultSource: String = ??? override def baseUri: String = ??? override def domain: String = ??? override def http: HttpGet with HttpPut with HttpDelete = ??? private def loadObjectFromFile[T](filename: String)(implicit rds: Reads[T]): Option[T] = { val fileContents = Source.fromFile(filename).mkString Json.parse(fileContents).validate[T].fold(invalid => None, valid => Some(valid)) } private def loadObjectBasedOnKey[T](key: String)(implicit rds: Reads[T]): Option[T] = key match { case _ => None } override def fetchAndGetEntry[T](key: String)(implicit hc: HeaderCarrier, rds: Reads[T],ec:ExecutionContext): Future[Option[T]] = Future.successful(hc.userId.filter(_ == cachedUserId).flatMap(p => loadObjectBasedOnKey(key))) override def cache[A](formId: String, body: A)(implicit wts: Writes[A], hc: HeaderCarrier,ec:ExecutionContext): Future[CacheMap] = Future.successful(CacheMap("", Map())) }
Example 52
Source File: Swagger.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package controllers import java.io.File import java.util import de.zalando.play.controllers.PlayBodyParsing._ import org.yaml.snakeyaml.Yaml import play.api.mvc._ import scala.io.Source @SuppressWarnings(Array("org.wartremover.warts.Equals", "org.wartremover.warts.AsInstanceOf", "org.wartremover.warts.ExplicitImplicitTypes")) class Swagger extends Controller { def notSpec = Seq("application.conf", "logback.xml", "routes") def listSpecs() = Action { val path = "conf" val file = new File(path) if (file != null && file.list() != null) { val files = file.listFiles().filterNot(_.isDirectory).map(_.getName).toSeq implicit val arrayMarshaller = anyToWritable[Seq[String]]("application/json") val names = files.filterNot(notSpec.contains).filterNot(_.startsWith(".")) Ok(names) } else { NotFound("Path could not be found: " + file.getAbsolutePath) } } def swaggerSpec(name: String) = Action { implicit val mapMarshaller = anyToWritable[java.util.Map[_, _]]("application/json") getSpec(name).map(s => Ok(s)).getOrElse(NotFound(name)) } private def getSpec(yamlPath: String) = { val yamlFile = Option(getClass.getClassLoader.getResource(yamlPath)) val yamlStr = yamlFile map { yaml => Source.fromURL(yaml).getLines().mkString("\n") } val javaMap = yamlStr map { new Yaml().load(_).asInstanceOf[util.Map[Any, Any]] } javaMap } }
Example 53
Source File: NifiProcessorSpec.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package it.gov.daf.ingestion.nifi import akka.actor.ActorSystem import akka.stream.ActorMaterializer import com.typesafe.config.Config import it.gov.daf.catalogmanager.MetaCatalog import it.gov.daf.catalogmanager.json._ import it.gov.daf.ingestion.metacatalog.MetaCatalogProcessor import org.scalatest.{AsyncFlatSpec, Matchers} import play.api.libs.json._ import play.api.libs.ws.WSResponse import play.api.libs.ws.ahc.AhcWSClient import scala.concurrent.Future import scala.io.Source class NifiProcessorSpec extends AsyncFlatSpec with Matchers { "A Nifi Processor " should "create a nifi pipeline for a correct meta catalog entry" in { val in = this.getClass.getResourceAsStream("/data_test.json") val sMetaCatalog = Source.fromInputStream(in).getLines().mkString(" ") in.close() val parsed = Json.parse(sMetaCatalog) val metaCatalog: JsResult[MetaCatalog] = Json.fromJson[MetaCatalog](parsed) metaCatalog.isSuccess shouldBe true implicit val system: ActorSystem = ActorSystem() implicit val materializer: ActorMaterializer = ActorMaterializer() implicit val wsClient: AhcWSClient = AhcWSClient() implicit val config: Config = com.typesafe.config.ConfigFactory.load() implicit val ec = system.dispatcher def closeAll(): Unit = { system.terminate() materializer.shutdown() wsClient.close() } val fResult = NifiProcessor(metaCatalog.get).createDataFlow() fResult.map { response => println(response) closeAll() true shouldBe true } } }
Example 54
Source File: Swagger.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package controllers import java.io.File import java.util import de.zalando.play.controllers.PlayBodyParsing._ import org.yaml.snakeyaml.Yaml import play.api.mvc._ import scala.io.Source @SuppressWarnings(Array("org.wartremover.warts.Equals", "org.wartremover.warts.AsInstanceOf", "org.wartremover.warts.ExplicitImplicitTypes")) class Swagger extends Controller { def notSpec = Seq("application.conf", "logback.xml", "routes") def listSpecs() = Action { val path = "conf" val file = new File(path) if (file != null && file.list() != null) { val files = file.listFiles().filterNot(_.isDirectory).map(_.getName).toSeq implicit val arrayMarshaller = anyToWritable[Seq[String]]("application/json") val names = files.filterNot(notSpec.contains).filterNot(_.startsWith(".")) Ok(names) } else { NotFound("Path could not be found: " + file.getAbsolutePath) } } def swaggerSpec(name: String) = Action { implicit val mapMarshaller = anyToWritable[java.util.Map[_,_]]("application/json") getSpec(name).map(s => Ok(s)).getOrElse(NotFound(name)) } private def getSpec(yamlPath: String) = { val yamlFile = Option(getClass.getClassLoader.getResource(yamlPath)) val yamlStr = yamlFile map { yaml => Source.fromURL(yaml).getLines().mkString("\n") } val javaMap = yamlStr map { new Yaml().load(_).asInstanceOf[util.Map[Any, Any]] } javaMap } }
Example 55
Source File: Swagger.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package controllers import java.io.File import java.util import de.zalando.play.controllers.PlayBodyParsing._ import org.yaml.snakeyaml.Yaml import play.api.mvc._ import scala.io.Source @SuppressWarnings(Array("org.wartremover.warts.Equals", "org.wartremover.warts.AsInstanceOf", "org.wartremover.warts.ExplicitImplicitTypes")) class Swagger extends Controller { def notSpec = Seq("application.conf", "logback.xml", "routes") def listSpecs() = Action { val path = "conf" val file = new File(path) if (file != null && file.list() != null) { val files = file.listFiles().filterNot(_.isDirectory).map(_.getName).toSeq implicit val arrayMarshaller = anyToWritable[Seq[String]]("application/json") val names = files.filterNot(notSpec.contains).filterNot(_.startsWith(".")) Ok(names) } else { NotFound("Path could not be found: " + file.getAbsolutePath) } } def swaggerSpec(name: String) = Action { implicit val mapMarshaller = anyToWritable[java.util.Map[_, _]]("application/json") getSpec(name).map(s => Ok(s)).getOrElse(NotFound(name)) } private def getSpec(yamlPath: String) = { val yamlFile = Option(getClass.getClassLoader.getResource(yamlPath)) val yamlStr = yamlFile map { yaml => Source.fromURL(yaml).getLines().mkString("\n") } val javaMap = yamlStr map { new Yaml().load(_).asInstanceOf[util.Map[Any, Any]] } javaMap } }
Example 56
Source File: Swagger.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package controllers import java.io.File import java.util import de.zalando.play.controllers.PlayBodyParsing._ import org.yaml.snakeyaml.Yaml import play.api.mvc._ import scala.io.Source @SuppressWarnings(Array("org.wartremover.warts.Equals", "org.wartremover.warts.AsInstanceOf", "org.wartremover.warts.ExplicitImplicitTypes")) class Swagger extends Controller { def notSpec = Seq("application.conf", "logback.xml", "routes") def listSpecs() = Action { val path = "conf" val file = new File(path) if (file != null && file.list() != null) { val files = file.listFiles().filterNot(_.isDirectory).map(_.getName).toSeq implicit val arrayMarshaller = anyToWritable[Seq[String]]("application/json") val names = files.filterNot(notSpec.contains).filterNot(_.startsWith(".")) Ok(names) } else { NotFound("Path could not be found: " + file.getAbsolutePath) } } def swaggerSpec(name: String) = Action { implicit val mapMarshaller = anyToWritable[java.util.Map[_,_]]("application/json") getSpec(name).map(s => Ok(s)).getOrElse(NotFound(name)) } private def getSpec(yamlPath: String) = { val yamlFile = Option(getClass.getClassLoader.getResource(yamlPath)) val yamlStr = yamlFile map { yaml => Source.fromURL(yaml).getLines().mkString("\n") } val javaMap = yamlStr map { new Yaml().load(_).asInstanceOf[util.Map[Any, Any]] } javaMap } }
Example 57
Source File: Swagger.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package controllers import java.io.File import java.util import de.zalando.play.controllers.PlayBodyParsing._ import org.yaml.snakeyaml.Yaml import play.api.mvc._ import scala.io.Source //@SuppressWarnings(Array("org.wartremover.warts.Equals", "org.wartremover.warts.AsInstanceOf", "org.wartremover.warts.ExplicitImplicitTypes")) class Swagger extends Controller { def notSpec = Seq("application.conf", "logback.xml", "routes") def listSpecs() = Action { val path = "conf" val file = new File(path) if (file != null && file.list() != null) { val files = file.listFiles().filterNot(_.isDirectory).map(_.getName).toSeq implicit val arrayMarshaller = anyToWritable[Seq[String]]("application/json") val names = files.filterNot(notSpec.contains).filterNot(_.startsWith(".")) Ok(names) } else { NotFound("Path could not be found: " + file.getAbsolutePath) } } def swaggerSpec(name: String) = Action { implicit val mapMarshaller = anyToWritable[java.util.Map[_,_]]("application/json") getSpec(name).map(s => Ok(s)).getOrElse(NotFound(name)) } private def getSpec(yamlPath: String) = { val yamlFile = Option(getClass.getClassLoader.getResource(yamlPath)) val yamlStr = yamlFile map { yaml => Source.fromURL(yaml).getLines().mkString("\n") } val javaMap = yamlStr map { new Yaml().load(_).asInstanceOf[util.Map[Any, Any]] } javaMap } }
Example 58
Source File: Swagger.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package controllers import java.io.File import java.util import de.zalando.play.controllers.PlayBodyParsing._ import org.yaml.snakeyaml.Yaml import play.api.mvc._ import scala.io.Source @SuppressWarnings(Array("org.wartremover.warts.Equals", "org.wartremover.warts.AsInstanceOf", "org.wartremover.warts.ExplicitImplicitTypes")) class Swagger extends Controller { def notSpec = Seq("application.conf", "logback.xml", "routes") def listSpecs() = Action { val path = "conf" val file = new File(path) if (file != null && file.list() != null) { val files = file.listFiles().filterNot(_.isDirectory).map(_.getName).toSeq implicit val arrayMarshaller = anyToWritable[Seq[String]]("application/json") val names = files.filterNot(notSpec.contains).filterNot(_.startsWith(".")) Ok(names) } else { NotFound("Path could not be found: " + file.getAbsolutePath) } } def swaggerSpec(name: String) = Action { implicit val mapMarshaller = anyToWritable[java.util.Map[_,_]]("application/json") getSpec(name).map(s => Ok(s)).getOrElse(NotFound(name)) } private def getSpec(yamlPath: String) = { val yamlFile = Option(getClass.getClassLoader.getResource(yamlPath)) val yamlStr = yamlFile map { yaml => Source.fromURL(yaml).getLines().mkString("\n") } val javaMap = yamlStr map { new Yaml().load(_).asInstanceOf[util.Map[Any, Any]] } javaMap } }
Example 59
Source File: Swagger.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package controllers import java.io.File import java.util import de.zalando.play.controllers.PlayBodyParsing._ import org.yaml.snakeyaml.Yaml import play.api.mvc._ import scala.io.Source class Swagger extends Controller { def notSpec = Seq("application.conf", "logback.xml", "routes") def listSpecs() = Action { val path = "conf" val file = new File(path) if (file != null && file.list() != null) { val files = file.listFiles().filterNot(_.isDirectory).map(_.getName).toSeq implicit val arrayMarshaller = anyToWritable[Seq[String]]("application/json") val names = files.filterNot(notSpec.contains).filterNot(_.startsWith(".")) Ok(names) } else { NotFound("Path could not be found: " + file.getAbsolutePath) } } def swaggerSpec(name: String) = Action { implicit val mapMarshaller = anyToWritable[java.util.Map[_, _]]("application/json") getSpec(name).map(s => Ok(s)).getOrElse(NotFound(name)) } private def getSpec(yamlPath: String) = { val yamlFile = Option(getClass.getClassLoader.getResource(yamlPath)) val yamlStr = yamlFile map { yaml => Source.fromURL(yaml).getLines().mkString("\n") } val javaMap = yamlStr map { new Yaml().load(_).asInstanceOf[util.Map[Any, Any]] } javaMap } }
Example 60
Source File: ScalaSensor.scala From sonar-scala with GNU Lesser General Public License v3.0 | 5 votes |
package com.mwz.sonar.scala package sensor import scala.io.Source import scala.jdk.CollectionConverters._ import org.sonar.api.batch.sensor.{Sensor, SensorContext, SensorDescriptor} import org.sonar.api.measures.{CoreMetrics => CM} import scalariform.ScalaVersion final class ScalaSensor(globalConfig: GlobalConfig) extends Sensor { override def execute(context: SensorContext): Unit = { val charset = context.fileSystem().encoding.toString val inputFiles = context .fileSystem() .inputFiles(context.fileSystem().predicates().hasLanguage(Scala.LanguageKey)) val scalaVersion: ScalaVersion = Scala.getScalaVersion(context.config()) // Save measures if not in pr decoration mode. if (!globalConfig.prDecoration) inputFiles.asScala.foreach { inputFile => // TODO: This source needs to be closed! val sourceCode = Source.fromFile(inputFile.uri, charset).mkString val tokens = Scala.tokenize(sourceCode, scalaVersion) context .newMeasure() .on(inputFile) .forMetric(CM.COMMENT_LINES) .withValue(Measures.countCommentLines(tokens)) .save() context .newMeasure() .on(inputFile) .forMetric(CM.NCLOC) .withValue(Measures.countNonCommentLines(tokens)) .save() context .newMeasure() .on(inputFile) .forMetric(CM.CLASSES) .withValue(Measures.countClasses(tokens)) .save() context .newMeasure() .on(inputFile) .forMetric(CM.FUNCTIONS) .withValue(Measures.countMethods(tokens)) .save() } } override def describe(descriptor: SensorDescriptor): Unit = { descriptor .onlyOnLanguage(Scala.LanguageKey) .name("Scala Sensor") } }
Example 61
Source File: PatchSpec.scala From sonar-scala with GNU Lesser General Public License v3.0 | 5 votes |
package com.mwz.sonar.scala.pr import scala.io.Source import org.scalatest.EitherValues import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks class PatchSpec extends AnyFlatSpec with Matchers with EitherValues with ScalaCheckDrivenPropertyChecks { def patch(path: String): String = Source .fromResource(path) .getLines() .mkString("\n") it should "fail to parse an invalid patch" in { forAll((s: String) => Patch.parse(s) shouldBe Left(PatchError(s))) } it should "parse successfully a patch with additions only" in { val expected: Map[FileLine, PatchLine] = (69 to 84).zipWithIndex.map { case (fileLine, index) => (FileLine(fileLine), PatchLine(index + 1)) }.toMap Patch.parse(patch("patches/add.patch")).right.value shouldBe expected } it should "parse successfully a patch with deletions only" in { val expected: Map[FileLine, PatchLine] = List( List(26 -> 1, 27 -> 2, 28 -> 3, 29 -> 6, 30 -> 7, 31 -> 8), List(43 -> 10, 44 -> 11, 45 -> 12, 46 -> 15, 47 -> 16, 48 -> 20, 49 -> 21, 50 -> 22) ).flatten.map { case (k, v) => FileLine(k) -> PatchLine(v) }.toMap Patch.parse(patch("patches/del.patch")).right.value shouldBe expected } it should "parse successfully a patch with additions, deletions and modifications" in { val expected: Map[FileLine, PatchLine] = List( (43 to 50).zipWithIndex.map(a => (a._1, a._2 + 1)), List(60 -> 10, 61 -> 11, 62 -> 12, 63 -> 15, 64 -> 16, 65 -> 17), List(77 -> 19, 78 -> 20, 79 -> 21, 80 -> 23, 81 -> 24, 82 -> 25, 83 -> 26) ).flatten.map { case (k, v) => FileLine(k) -> PatchLine(v) }.toMap Patch.parse(patch("patches/add-del-mod.patch")).right.value shouldBe expected } }
Example 62
Source File: ParSeqTraceBaseVisualizer.scala From play-parseq with Apache License 2.0 | 5 votes |
package com.linkedin.playparseq.trace.utils import com.linkedin.parseq.trace.Trace import com.linkedin.parseq.trace.codec.json.JsonTraceCodec import java.io.File import play.api.Environment import play.api.http.HttpConfiguration import scala.collection.immutable.ListMap import scala.io.Source protected[this] def showTrace(trace: Trace, environment: Environment, httpConfiguration: HttpConfiguration): String = { // Get Trace JSON val traceJson = new JsonTraceCodec().encode(trace) // Generate pre-fill script for onload Trace JSON val preFillScript = """ |<base href="%s"> |<script> | var ESC_FLAGS = "gi"; | var EMBED_ESCAPES = __EMBED_ESCAPES__; | var unescapeForEmbedding = function (str) { | for (var key in EMBED_ESCAPES) { | if (EMBED_ESCAPES.hasOwnProperty(key)) { | str = str.replace(new RegExp(EMBED_ESCAPES[key], ESC_FLAGS), key); | } | } | return str; | }; | var getEmbeddedContent = function(id) { | var contentElem = document.getElementById(id); | var innerContent = contentElem.firstChild.nodeValue; | return JSON.parse(unescapeForEmbedding(innerContent)); | }; | window.onload = function() { | var json = getEmbeddedContent('injected-json'); | // The renderTrace method does not yet support normal JS objects, but expects stringified JSON | renderTrace(JSON.stringify(json)); | } |</script> """.stripMargin.format(httpConfiguration.context.stripSuffix("/") + TracevisRoot + "/") // Generate injected JSON placeholder val injectedJson = """<code id="injected-json"><!--__JSON__--></code>""" // Build HTML page environment.resourceAsStream(new File(TracevisRoot, TraceName).getPath).map(stream => { // Escape script and JSON val script = preFillScript.replace("__EMBED_ESCAPES__", """{"&":"&","-":"&dsh;"}""") val json = injectedJson.replace("__JSON__", ListMap("&" -> "&", "-" -> "&dsh;").foldLeft(traceJson)((acc, escape) => acc.replaceAll(escape._1, escape._2))) // Inject script and JSON Source.fromInputStream(stream).mkString.replace("<title>", script + "\n<title>").replace("</style>", "</style>\n" + json) }).orNull } }
Example 63
Source File: MemoryVectorStoreIndexer.scala From dbpedia-spotlight-model with Apache License 2.0 | 5 votes |
package org.dbpedia.spotlight.db import java.io.File import breeze.linalg.DenseMatrix import org.dbpedia.spotlight.db.model.{ResourceStore, TokenTypeStore} import org.dbpedia.spotlight.exceptions.DBpediaResourceNotFoundException import scala.collection.immutable.Iterable import scala.io.Source import java.util import org.dbpedia.spotlight.db.memory.{MemoryStore, MemoryVectorStore} import org.dbpedia.spotlight.model.{TokenType, DBpediaResource, TokenOccurrenceIndexer} class MemoryVectorStoreIndexer(modelPath: File, dictPath: File){ lazy val contextStore = new MemoryVectorStore() var dict: Map[String, Int] = null def loadVectorDict(tokenTypeStore: TokenTypeStore, resourceStore: ResourceStore) = { println("Loading vector dictionary!") dict = Source.fromFile(dictPath, "UTF-8").getLines().map { line => val contents = line.split("\t") (contents(0), contents(1).toInt) }.toMap var resources: collection.mutable.Map[Int, Int] = collection.mutable.HashMap[Int,Int]() var tokens: collection.mutable.Map[Int, Int] = collection.mutable.HashMap[Int,Int]() // TODO: error handling if we can't find the token or resource var failedResources = 0 var succeededResources = 0 var failedTokens = 0 var succeededTokens = 0 dict.foreach { case(key, value) => if(key.startsWith("DBPEDIA_ID/")){ try { val resource = resourceStore.getResourceByName(key.replace("DBPEDIA_ID/", "")) resources += (resource.id -> value) succeededResources += 1 } catch { case e: DBpediaResourceNotFoundException=> { failedResources += 1 if (failedResources % 1000 == 0){ println("Can't find resource: " + key.replace("DBPEDIA_ID/", "")) } } } }else{ val token = tokenTypeStore.getTokenType(key) if (token == TokenType.UNKNOWN){ failedTokens += 1 if (failedTokens % 1000 == 0){ println("Can't find token: " + key) } } else { tokens += (token.id -> value) succeededTokens += 1 } } } println("Failed on " + failedResources + " entities, succeeded on " + succeededResources) println("Failed on " + failedTokens + " tokens, succeeded on " + succeededTokens) contextStore.resourceIdToVectorIndex = resources.toMap contextStore.tokenTypeIdToVectorIndex = tokens.toMap println("Done loading dict.") } def loadVectorsAndWriteToStore(outputFile:File) = { println("Loading vectors..") val matrixSource = Source.fromFile(modelPath) val lines = matrixSource.getLines() val rows = lines.next().substring(2).toInt val cols = lines.next().substring(2).toInt contextStore.vectors = new DenseMatrix[Float](rows, cols) println("Reading CSV and writing to store...") lines.zipWithIndex.foreach { case (row_str, row_idx) => if (row_idx % 10000 == 0) println("At row " + row_idx) val values = row_str.split(",").map(_.trim).map(_.toDouble) values.zipWithIndex.foreach { case (value, col_idx) => contextStore.vectors(row_idx, col_idx) = value.toFloat } } matrixSource.close() println("Done, dumping..") MemoryStore.dump(contextStore, outputFile) } }
Example 64
Source File: TypesLoader.scala From dbpedia-spotlight-model with Apache License 2.0 | 5 votes |
package org.dbpedia.spotlight.util import java.io.{File, InputStream} import java.util.LinkedHashSet import org.dbpedia.spotlight.log.SpotlightLog import org.dbpedia.spotlight.model._ import org.semanticweb.yars.nx.parser.NxParser import scala.collection.JavaConversions._ import scala.io.Source object TypesLoader { def getTypesMap(typeDictFile : File) : Map[String, List[OntologyType]] = { SpotlightLog.info(this.getClass, "Loading types map...") if (!(typeDictFile.getName.toLowerCase endsWith ".tsv")) throw new IllegalArgumentException("types mapping only accepted in tsv format so far! can't parse "+typeDictFile) // CAUTION: this assumes that the most specific type is listed last var typesMap = Map[String,List[OntologyType]]() for (line <- Source.fromFile(typeDictFile, "UTF-8").getLines) { val elements = line.split("\t") val uri = new DBpediaResource(elements(0)).uri val t = Factory.OntologyType.fromURI(elements(1)) val typesList : List[OntologyType] = typesMap.get(uri).getOrElse(List[OntologyType]()) ::: List(t) typesMap = typesMap.updated(uri, typesList) } SpotlightLog.info(this.getClass, "Done.") typesMap } def getTypesMapFromTSV_java(input: InputStream) : java.util.Map[String,java.util.LinkedHashSet[OntologyType]] = { SpotlightLog.info(this.getClass, "Loading types map...") var typesMap = Map[String,java.util.LinkedHashSet[OntologyType]]() var i = 0; for (line <- Source.fromInputStream(input, "UTF-8").getLines) { val elements = line.split("\t") val uri = new DBpediaResource(elements(0)).uri val typeUri = elements(1) if (!typeUri.equalsIgnoreCase("http://www.w3.org/2002/07/owl#Thing")) { val t = Factory.OntologyType.fromURI(typeUri) i = i + 1; val typesList : java.util.LinkedHashSet[OntologyType] = typesMap.getOrElse(uri,new LinkedHashSet[OntologyType]()) typesList.add(t) t match { case ft: FreebaseType => typesList.add(Factory.OntologyType.fromQName("Freebase:/"+ft.domain)) //Add supertype as well to mimic inference case _ => //nothing } typesMap = typesMap.updated(uri, typesList) } } SpotlightLog.info(this.getClass, "Done. Loaded %d types for %d resources.", i,typesMap.size) typesMap } def getTypesMap_java(instanceTypesStream : InputStream) : java.util.Map[String,java.util.LinkedHashSet[OntologyType]] = { SpotlightLog.info(this.getClass, "Loading types map...") var typesMap = Map[String,java.util.LinkedHashSet[OntologyType]]() var i = 0; // CAUTION: this assumes that the most specific type is listed last val parser = new NxParser(instanceTypesStream) while (parser.hasNext) { val triple = parser.next if(!triple(2).toString.endsWith("owl#Thing")) { i = i + 1; val resource = new DBpediaResource(triple(0).toString) val t = Factory.OntologyType.fromURI(triple(2).toString) val typesList : java.util.LinkedHashSet[OntologyType] = typesMap.get(resource.uri).getOrElse(new LinkedHashSet[OntologyType]()) typesList.add(t) typesMap = typesMap.updated(resource.uri, typesList) } } SpotlightLog.info(this.getClass, "Done. Loaded %d types.", i) typesMap } }
Example 65
Source File: NTripleSource.scala From dbpedia-spotlight-model with Apache License 2.0 | 5 votes |
package org.dbpedia.spotlight.io import java.io._ import java.util.zip.GZIPInputStream import org.dbpedia.spotlight.log.SpotlightLog import scala.io.Source object NTripleSource { def fromFile(ntFile : File) : NTripleSource = new NTripleSource(ntFile) class NTripleSource(ntFile: File) extends Traversable[(String,String,String)] { override def foreach[U]( f: ((String,String,String)) => U) { var input : InputStream = new FileInputStream(ntFile) if (ntFile.getName.endsWith(".gz")) { input = new GZIPInputStream(input) } var linesIterator : Iterator[String] = Iterator.empty try { linesIterator = Source.fromInputStream(input, "UTF-8").getLines } catch { case e: java.nio.charset.MalformedInputException => linesIterator = Source.fromInputStream(input).getLines } for (line <- linesIterator) { if (!line.startsWith("#")) { //comments val elements = line.trim.split(" ") if (elements.length >= 4) { var subj = elements(0) var pred = elements(1) var obj = elements(2) subj = subj.substring(1,subj.length-1) pred = pred.substring(1,pred.length-1) obj = obj.substring(1,obj.length-1) f((subj,pred,obj)) } else { SpotlightLog.error(this.getClass, "line must have at least 4 whitespaces; got %d in line: %d", elements.length-1,line) } } } } } }
Example 66
Source File: DBpediaResourceFactorySQLTest.scala From dbpedia-spotlight-model with Apache License 2.0 | 5 votes |
package org.dbpedia.spotlight.model import org.junit.Test import scala.io.Source //TODO fix hard codes and register into the module pom. class DBpediaResourceFactorySQLTest { val configuration: SpotlightConfiguration = new SpotlightConfiguration("conf/server.properties") val factory: SpotlightFactory = new SpotlightFactory(configuration) val dbpediaResourceFactory = configuration.getDBpediaResourceFactory def dbpediaResourceForAllConcepts() { //val configuration: IndexingConfiguration = new IndexingConfiguration("conf/indexing.properties") val examples = Source.fromFile("/Users/jodaiber/Desktop/DBpedia/conceptURIs.list", "UTF-8").getLines().take(10000) examples.foreach( dbpediaID => { try{ val dBpediaResource: DBpediaResource = dbpediaResourceFactory.from(dbpediaID) assert(dBpediaResource.uri.equals(dbpediaID)) assert(dBpediaResource.getTypes.size() >= 0) assert(dBpediaResource.support >= 0) assert(!dBpediaResource.getTypes.contains(null)) }catch{ case e: NoSuchElementException => //There may be a difference between the index and the concept list when testing... } }) } @Test def createDBpediaResourcesOnce() { dbpediaResourceForAllConcepts() } @Test def createDBpediaResourcesTenTimes() { (1 to 10 toList).foreach{ _ => dbpediaResourceForAllConcepts() } } }
Example 67
Source File: SparkSqlUtils.scala From HadoopLearning with MIT License | 5 votes |
package com.c503.utils import java.io.{BufferedInputStream, BufferedReader, FileInputStream, InputStreamReader} import java.nio.file.Path import com.google.common.io.Resources import org.apache.log4j.{Level, Logger} import org.apache.mesos.Protos.Resource import org.apache.spark.sql.SparkSession import scala.io.Source def readSqlByPath(sqlPath: String) = { val buf = new StringBuilder val path = this.getPathByName(sqlPath) val file = Source.fromFile(path) for (line <- file.getLines) { buf ++= line + "\n" } file.close buf.toString() } }
Example 68
Source File: ExportData.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.imexport import java.io.File import com.webank.wedatasphere.linkis.common.utils.Logging import org.apache.spark.sql.SparkSession import org.json4s._ import org.json4s.jackson.JsonMethods._ import scala.io.Source object ExportData extends Logging { implicit val formats = DefaultFormats def exportData(spark: SparkSession, dataInfo: String, destination: String): Unit = { exportDataFromFile(spark, parse(dataInfo).extract[Map[String, Any]], parse(destination).extract[Map[String, Any]]) } def exportDataByFile(spark: SparkSession, dataInfoPath: String, destination: String): Unit = { val fileSource = Source.fromFile(dataInfoPath) val dataInfo = fileSource.mkString exportDataFromFile(spark, parse(dataInfo).extract[Map[String, Any]], parse(destination).extract[Map[String, Any]]) fileSource.close() val file = new File(dataInfoPath) if (file.exists()) { file.delete() } } def exportDataFromFile(spark: SparkSession, dataInfo: Map[String, Any], dest: Map[String, Any]): Unit = { //Export dataFrame val df = spark.sql(getExportSql(dataInfo)) //dest val pathType = LoadData.getMapValue[String](dest, "pathType", "share") val path = if ("share".equals(pathType)) "file://" + LoadData.getMapValue[String](dest, "path") else "hdfs://" + LoadData.getMapValue[String](dest, "path") val hasHeader = LoadData.getMapValue[Boolean](dest, "hasHeader", false) val isCsv = LoadData.getMapValue[Boolean](dest, "isCsv", true) val isOverwrite = LoadData.getMapValue[Boolean](dest, "isOverwrite", true) val sheetName = LoadData.getMapValue[String](dest, "sheetName", "Sheet1") val fieldDelimiter = LoadData.getMapValue[String](dest, "fieldDelimiter", ",") if (isCsv) { CsvRelation.saveDFToCsv(spark, df, path, hasHeader, isOverwrite,option = Map("fieldDelimiter" -> fieldDelimiter)) } else { df.write.format("com.webank.wedatasphere.spark.excel") .option("sheetName", sheetName) .option("useHeader", hasHeader) .mode("overwrite").save(path) } warn(s"Succeed to export data to path:$path") } def getExportSql(dataInfo: Map[String, Any]): String = { val sql = new StringBuilder //dataInfo val database = LoadData.getMapValue[String](dataInfo, "database") val tableName = LoadData.getMapValue[String](dataInfo, "tableName") val isPartition = LoadData.getMapValue[Boolean](dataInfo, "isPartition", false) val partition = LoadData.getMapValue[String](dataInfo, "partition", "ds") val partitionValue = LoadData.getMapValue[String](dataInfo, "partitionValue", "1993-01-02") val columns = LoadData.getMapValue[String](dataInfo, "columns", "*") sql.append("select ").append(columns).append(" from ").append(s"$database.$tableName") if (isPartition) sql.append(" where ").append(s"$partition=$partitionValue") val sqlString = sql.toString() warn(s"export sql:$sqlString") sqlString } }
Example 69
Source File: Mnemonics.scala From LearningScala with Apache License 2.0 | 5 votes |
package _980_problem_solving import scala.io.Source import scala.language.postfixOps def encode(number: String): Set[List[String]] = if (number.isEmpty) Set(List()) else { for { split <- 1 to number.length word <- wordsForNum(number take split) rest <- encode(number drop split) } yield word :: rest }.toSet def translate(number: String): Set[String] = encode(number) map (_ mkString " ") // testing println(translate("7225247386")) println(translate("72252")) println(translate("783364")) }
Example 70
Source File: CollectionRollerConfigParser.scala From pulse with Apache License 2.0 | 5 votes |
package io.phdata.pulse.collectionroller import net.jcazevedo.moultingyaml._ import scala.io.Source object YamlProtocol extends DefaultYamlProtocol { implicit val application = yamlFormat6(Application) implicit val config = yamlFormat2(CollectionRollerConfig) } case class Application(name: String, numCollections: Option[Int], shards: Option[Int], replicas: Option[Int], rollPeriod: Option[Int], solrConfigSetName: String = "testconf")
Example 71
Source File: AlertEngineConfigParser.scala From pulse with Apache License 2.0 | 5 votes |
package io.phdata.pulse.alertengine import net.jcazevedo.moultingyaml._ import scala.io.Source object YamlProtocol extends DefaultYamlProtocol { implicit val alert: YamlFormat[AlertRule] = yamlFormat5(AlertRule) implicit val mailProfile: YamlFormat[MailAlertProfile] = yamlFormat2(MailAlertProfile) implicit val slackProfile: YamlFormat[SlackAlertProfile] = yamlFormat2(SlackAlertProfile) implicit val application: YamlFormat[Application] = yamlFormat4(Application) implicit val config: YamlFormat[AlertEngineConfig] = yamlFormat1(AlertEngineConfig) } object AlertTypes { val SOLR: String = "solr" val SQL: String = "sql" val ALL_TYPES: Set[String] = Set(AlertTypes.SOLR, AlertTypes.SQL) } case class AlertRule(query: String, retryInterval: Int, resultThreshold: Option[Int] = None, alertProfiles: List[String], alertType: Option[String] = None) trait AlertProfile { val name: String } case class SlackAlertProfile(name: String, url: String) extends AlertProfile case class MailAlertProfile(name: String, addresses: List[String]) extends AlertProfile
Example 72
Source File: TemplateLoader.scala From avoin-voitto with MIT License | 5 votes |
package liigavoitto.journalist.utils import liigavoitto.util.Logging import scaledn.parser.parseEDN import scaledn.{EDN, EDNKeyword, EDNSymbol} import scala.io.Source import scala.reflect.ClassTag import scala.util.{Failure, Try} trait TemplateLoader extends Logging { type FileContent = Map[EDNKeyword, Map[EDNKeyword, List[TemplateVector]]] type TemplateVector = Vector[Any] type TemplateSettings = Map[EDNKeyword, Any] val WeightKey = EDNKeyword(EDNSymbol("weight")) def load(filePath: String, templatesName: String, language: String) = { val content = loadResource(filePath) val parsed = parseEDN(content) logErrors(parsed, filePath) val mapped = parsed.get.asInstanceOf[FileContent] getTemplates(mapped, templatesName, language) } private def getTemplates(parsed: FileContent, name: String, language: String) = { val templatesName = EDNKeyword(EDNSymbol(name)) val languageKey = EDNKeyword(EDNSymbol(language)) parsed(templatesName)(languageKey).map(parseTemplate) } private def parseTemplate(vector: TemplateVector) = { val tmpl = vector(0).asInstanceOf[String] val weight = getWeight(vector) if (weight.isDefined) Template(tmpl, weight.get) else Template(tmpl) } private def asInstanceOfOption[T: ClassTag](o: Any): Option[T] = Some(o) collect { case m: T => m } private def getWeight(vector: Vector[Any]) = for { opts <- vector.lift(1) settings <- asInstanceOfOption[TemplateSettings](opts) value <- settings.get(WeightKey) asDouble <- asInstanceOfOption[Double](value) } yield asDouble private def loadResource(path: String) = { val resourcePath = path val res = getClass.getClassLoader.getResource(resourcePath) val source = Source.fromURL(res) source.mkString } private def logErrors(parsed: Try[EDN], filePath: String) = parsed match { case Failure(f : org.parboiled2.ParseError) => { log.error(s"$filePath ParseError at line " + f.position.line + " col " + f.position.column) } case _ => } }
Example 73
Source File: CsvKafkaPublisher.scala From Taxi360 with Apache License 2.0 | 5 votes |
package com.hadooparchitecturebook.taxi360.common import java.io.File import java.util.Random import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord} import scala.io.Source object CsvKafkaPublisher { var counter = 0 var salts = 0 def main(args:Array[String]): Unit = { if (args.length == 0) { println("<brokerList> " + "<topicName> " + "<dataFolderOrFile> " + "<sleepPerRecord> " + "<acks> " + "<linger.ms> " + "<producer.type> " + "<batch.size> " + "<salts>") return } val kafkaBrokerList = args(0) val kafkaTopicName = args(1) val nyTaxiDataFolder = args(2) val sleepPerRecord = args(3).toInt val acks = args(4).toInt val lingerMs = args(5).toInt val producerType = args(6) //"async" val batchSize = args(7).toInt salts = args(8).toInt val kafkaProducer = KafkaProducerUntil.getNewProducer(kafkaBrokerList, acks, lingerMs, producerType, batchSize) println("--Input:" + nyTaxiDataFolder) val dataFolder = new File(nyTaxiDataFolder) if (dataFolder.isDirectory) { val files = dataFolder.listFiles().iterator files.foreach(f => { println("--Input:" + f) processFile(f, kafkaTopicName, kafkaProducer, sleepPerRecord) }) } else { println("--Input:" + dataFolder) processFile(dataFolder, kafkaTopicName, kafkaProducer, sleepPerRecord) } println("---Done") } def processFile(file:File, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String], sleepPerRecord:Int): Unit = { var counter = 0 val r = new Random() println("-Starting Reading") Source.fromFile(file).getLines().foreach(l => { counter += 1 if (counter % 10000 == 0) { println("{Sent:" + counter + "}") } if (counter % 100 == 0) { print(".") } Thread.sleep(sleepPerRecord) val saltedVender = r.nextInt(salts) + l if (counter > 2) { publishTaxiRecord(saltedVender, kafkaTopicName, kafkaProducer) } }) } def publishTaxiRecord(line:String, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String]): Unit = { if (line.startsWith("vendor_name") || line.length < 10) { println("skip") } else { val message = new ProducerRecord[String, String](kafkaTopicName, line.hashCode.toString, line) kafkaProducer.send(message) } } }
Example 74
Source File: files.scala From spatial with MIT License | 5 votes |
package utils.io import java.io._ import java.nio.file._ import java.util.function.Consumer import java.nio.file.{Files,Paths} import scala.io.Source object files { def sep: String = java.io.File.separator def cwd: String = new java.io.File("").getAbsolutePath final val BUFFER_SIZE: Int = 1024 * 4 final val EOF = -1 def copyResource(src: String, dest: String): Unit = { val outFile = new File(dest) val outPath = outFile.getParentFile outPath.mkdirs() val url = getClass.getResource(src) val in: InputStream = url.openStream() val out: OutputStream = new FileOutputStream(outFile) val buffer = new Array[Byte](BUFFER_SIZE) var n: Int = 0 while ({n = in.read(buffer); n != EOF}) { out.write(buffer, 0, n) } out.close() in.close() } def listFiles(dir:String, exts:List[String]=Nil):List[java.io.File] = { val d = new java.io.File(dir) if (d.exists && d.isDirectory) { d.listFiles.filter { file => file.isFile && exts.exists { ext => file.getName.endsWith(ext) } }.toList } else { Nil } } def splitPath(path:String) = { val file = new File(path) (file.getParent, file.getName) } def buildPath(parts:String*):String = { parts.mkString(sep) } def dirName(fullPath:String) = fullPath.split(sep).dropRight(1).mkString(sep) def createDirectories(dir:String) = { val path = Paths.get(dir) if (!Files.exists(path)) Files.createDirectories(path) } }
Example 75
Source File: CongestionModel.scala From spatial with MIT License | 5 votes |
package models import java.io.File import java.io.PrintWriter import utils.io.files._ import utils.math.{CombinationTree, ReduceTree} import scala.io.Source object CongestionModel { abstract class FeatureVec[T] { def loads: T def stores: T def gateds: T def outerIters: T def innerIters: T def bitsPerCycle: T def toSeq: Seq[T] = Seq(stores, outerIters, loads, innerIters, gateds, bitsPerCycle) } case class RawFeatureVec(loads: Double, stores: Double, gateds: Double, outerIters: Double, innerIters: Double, bitsPerCycle: Double) extends FeatureVec[Double] case class CalibFeatureVec(loads: Double, stores: Double, gateds: Double, outerIters: Double, innerIters: Double, bitsPerCycle: Double) extends FeatureVec[Double] // Set up lattice properties val feature_dims = 6 val lattice_rank = 6 val lattice_size = Seq(3,3,3,3,3,3) val num_keypoints = 8 val num_lattices = 1 var model: String = "" // Derive lattice properties val sizes = scala.Array.tabulate(lattice_rank){i => lattice_size(i)} val dimensions = sizes.length val params_per_lattice = sizes.product val strides: scala.Array[Int] = scala.Array.fill(dimensions){1} val nparams = num_lattices * params_per_lattice // Grab lattice params lazy val loads_keypoints_inputs = ModelData.loads_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/loads_keypoints_inputs.csv", ","){x => x.toDouble} lazy val loads_keypoints_outputs = ModelData.loads_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/loads_keypoints_outputs.csv", ","){x => x.toDouble} lazy val stores_keypoints_inputs = ModelData.stores_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/stores_keypoints_inputs.csv", ","){x => x.toDouble} lazy val stores_keypoints_outputs = ModelData.stores_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/stores_keypoints_outputs.csv", ","){x => x.toDouble} lazy val gateds_keypoints_inputs = ModelData.gateds_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/gateds_keypoints_inputs.csv", ","){x => x.toDouble} lazy val gateds_keypoints_outputs = ModelData.gateds_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/gateds_keypoints_outputs.csv", ","){x => x.toDouble} lazy val outerIters_keypoints_inputs = ModelData.outerIters_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/outerIters_keypoints_inputs.csv", ","){x => x.toDouble} lazy val outerIters_keypoints_outputs = ModelData.outerIters_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/outerIters_keypoints_outputs.csv", ","){x => x.toDouble} lazy val innerIters_keypoints_inputs = ModelData.innerIters_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/innerIters_keypoints_inputs.csv", ","){x => x.toDouble} lazy val innerIters_keypoints_outputs = ModelData.innerIters_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/innerIters_keypoints_outputs.csv", ","){x => x.toDouble} lazy val bitsPerCycle_keypoints_inputs = ModelData.bitsPerCycle_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/bitsPerCycle_keypoints_inputs.csv", ","){x => x.toDouble} lazy val bitsPerCycle_keypoints_outputs = ModelData.bitsPerCycle_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/bitsPerCycle_keypoints_outputs.csv", ","){x => x.toDouble} lazy val params = ModelData.params(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/LATTICE_PARAMS.csv", ","){x => x.toDouble} def evaluate(features: RawFeatureVec, typ: Runtime.CtrlSchedule): Int = { model = typ.toString val calibrated_features = calibrate_features(features) val result = hypercube_features(calibrated_features) // TODO: Model is naughty if it returns <170 // println(s"evaluating $features = ${170 max result.toInt}") 170 max result.toInt } }
Example 76
Source File: Client.scala From roc with BSD 3-Clause "New" or "Revised" License | 5 votes |
package roc package integrations import com.twitter.finagle.{Addr, Address, Name} import com.twitter.util.Var import scala.io.Source trait Client { private val db = "circle_test" //private val db = "postgres" private val user = "ubuntu" //private val user = "postgres" private val passwd = "" private val host = "127.0.0.1" private val port = 5432 private lazy val address = Address(host, port) protected lazy val Postgres = Postgresql.client .withUserAndPasswd(user, passwd) .withDatabase(db) .newRichClient( Name.Bound(Var[Addr](Addr.Bound(address)), "roc"), "roc" ) } trait SqlReader { def readSql(filename: String): String = { val path = s"core/src/it/resources/sql/$filename" Source.fromFile(path) .getLines .foldLeft("")(_ + _) } }
Example 77
Source File: TotalTweetsScheduler.scala From redrock with Apache License 2.0 | 5 votes |
package com.restapi import java.io.{File, FileInputStream} import akka.actor.{ActorRef, Actor, ActorSystem, Props} import akka.io.IO import org.slf4j.LoggerFactory import play.api.libs.json.Json import spray.can.Http import akka.pattern.ask import spray.http.DateTime import scala.concurrent.duration._ import akka.util.Timeout import scala.concurrent.ExecutionContext.Implicits.global import org.apache.commons.codec.digest.DigestUtils import scala.io.Source case object GetTotalTweetsScheduler object CurrentTotalTweets { @volatile var totalTweets: Long = 0 } class ExecuterTotalTweetsES(delay: FiniteDuration, interval: FiniteDuration) extends Actor { context.system.scheduler.schedule(delay, interval) { getTotalTweetsES } val logger = LoggerFactory.getLogger(this.getClass) override def receive: Actor.Receive = { case GetTotalTweetsScheduler => { logger.info(s"Getting Total of Tweets. Begin: ${CurrentTotalTweets.totalTweets}") } case _ => // just ignore any messages } def getTotalTweetsES: Unit = { val elasticsearchRequests = new GetElasticsearchResponse(0, Array[String](), Array[String](), LoadConf.restConf.getString("searchParam.defaulStartDatetime"), LoadConf.restConf.getString("searchParam.defaultEndDatetime"), LoadConf.esConf.getString("decahoseIndexName")) val totalTweetsResponse = Json.parse(elasticsearchRequests.getTotalTweetsESResponse()) logger.info(s"Getting Total of Tweets. Current: ${CurrentTotalTweets.totalTweets}") CurrentTotalTweets.totalTweets = (totalTweetsResponse \ "hits" \ "total").as[Long] logger.info(s"Total users updated. New: ${CurrentTotalTweets.totalTweets}") } }
Example 78
Source File: io.scala From sbt-org-policies with Apache License 2.0 | 5 votes |
package sbtorgpolicies import java.io._ import java.net.URL import java.nio.charset.Charset import java.nio.file.Path import java.nio.file.Paths.get import cats.syntax.either._ import sbtorgpolicies.exceptions.IOException import scala.io.Source import scala.language.implicitConversions package object io { type IOResult[T] = Either[IOException, T] object syntax { implicit def eitherFilterSyntax[T](either: Either[Throwable, T]): FilteredEitherOps[T] = new FilteredEitherOps(either) implicit def fileNameSyntax(fileName: String): FileNameOps = new FileNameOps(fileName) final class FilteredEitherOps[T](either: Either[Throwable, T]) { def withFilter(f: T => Boolean): Either[Throwable, T] = either match { case Right(r) if !f(r) => new IllegalStateException("Filter condition has not been satisfied").asLeft[T] case _ => either } } final class FileNameOps(filename: String) { def toPath: Path = get(filename) def toFile: File = new File(filename.fixPath) def fixPath: String = filename.replaceAll("/", File.separator) def ensureFinalSlash: String = filename + (if (filename.endsWith(File.separator)) "" else File.separator) } } object IO { def file(path: String): File = new File(path) def url(address: String): URL = new URL(address) def readLines(file: File): Iterator[String] = Source.fromFile(file).getLines() def readBytes(file: File): Array[Byte] = { val is: InputStream = new FileInputStream(file) val array: Array[Byte] = Stream.continually(is.read).takeWhile(_ != -1).map(_.toByte).toArray is.close() array } def write(file: File, content: String, charset: Charset = Charset.forName("UTF-8")): Unit = { val writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(file, false), charset) ) writer.write(content) writer.close() } def relativize(base: File, file: File): Option[String] = { def ensureEndingSlash: Option[String] = { val path = base.getAbsolutePath path.lastOption.map { case c if c == File.separatorChar => path case _ => path + File.separatorChar } } val baseFileString = if (base.isDirectory) ensureEndingSlash else None val pathString = file.getAbsolutePath baseFileString flatMap { case baseString if pathString.startsWith(baseString) => Some(pathString.substring(baseString.length)) case _ => None } } } }
Example 79
Source File: Mnemonic.scala From EncryCore with GNU General Public License v3.0 | 5 votes |
package encry.utils import org.encryfoundation.common.utils.Algos import scodec.bits.BitVector import scala.io.Source object Mnemonic { def getWords: Array[String] = Source.fromInputStream(getClass.getResourceAsStream("/languages/english/words.txt")).getLines.toArray def seedFromMnemonic(mnemonicCode: String, passPhrase: String = ""): Array[Byte] = Algos.hash(mnemonicCode + "mnemonic=" + passPhrase) def entropyToMnemonicCode(entropy: Array[Byte]): String = { val words: Array[String] = getWords val checkSum: BitVector = BitVector(Algos.hash(entropy)) val entropyWithCheckSum: BitVector = BitVector(entropy) ++ checkSum.take(4) entropyWithCheckSum.grouped(11).map { i => words(i.toInt(signed = false)) }.mkString(" ") } }
Example 80
Source File: ProcessUtils.scala From spark-integration with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.k8s.integrationtest import java.util.concurrent.TimeUnit import scala.collection.mutable.ArrayBuffer import scala.io.Source object ProcessUtils extends Logging { def executeProcess(fullCommand: Array[String], timeout: Long): Seq[String] = { val pb = new ProcessBuilder().command(fullCommand: _*) pb.redirectErrorStream(true) val proc = pb.start() val outputLines = new ArrayBuffer[String] Utils.tryWithResource(proc.getInputStream)( Source.fromInputStream(_, "UTF-8").getLines().foreach { line => logInfo(line) outputLines += line }) assert(proc.waitFor(timeout, TimeUnit.SECONDS), s"Timed out while executing ${fullCommand.mkString(" ")}") assert(proc.exitValue == 0, s"Failed to execute ${fullCommand.mkString(" ")}") outputLines } }
Example 81
Source File: FileUtil.scala From wookiee with Apache License 2.0 | 5 votes |
package com.webtrends.harness.utils import java.io.File import java.nio.file.{FileSystems, Files, Path} import scala.io.Source def getSymLink(f:File) : File = { if (f == null) throw new NullPointerException("File must not be null") val path = FileSystems.getDefault.getPath(f.getPath) if (Files.isSymbolicLink(path)) { f.getCanonicalFile } else { f.getAbsoluteFile } } }
Example 82
Source File: TestBroadcastVariables.scala From spark-dev with GNU General Public License v3.0 | 5 votes |
package examples import org.apache.spark.{ SparkContext, SparkConf } import org.apache.spark.rdd.RDD import org.apache.spark.broadcast.Broadcast import scala.io.Source import scala.util.{ Try, Success, Failure } import scala.collection.mutable.Map def loadCSVFile(filename: String): Option[Map[String, String]] = { val countries = Map[String, String]() Try { val bufferedSource = Source.fromFile(filename) for (line <- bufferedSource.getLines) { val Array(country, capital) = line.split(",").map(_.trim) countries += country -> capital } bufferedSource.close() return Some(countries) }.toOption } }
Example 83
Source File: AvroSchemaGeneratorSpec.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.avro.util import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.node.ObjectNode import org.scalatest.matchers.should.Matchers import org.scalatest.funspec.AnyFunSpecLike import scala.io.Source class AvroSchemaGeneratorSpec extends Matchers with AnyFunSpecLike { val mapper = new ObjectMapper val converter = new AvroSchemaGenerator() val json = Source .fromFile( Thread.currentThread.getContextClassLoader .getResource("avro-test.json") .getFile ) .getLines() .mkString describe("The json to avro schema converter") { it("Should include a valid namespace and a valid name") { val jsonNode = mapper.readTree(converter.convert(json, "hydra", "name")) jsonNode.at("/namespace").asText shouldBe "hydra" jsonNode.at("/name").asText shouldBe "name" jsonNode.at("/type").asText shouldBe "record" } it("Should have a valid record type") { val jsonNode = mapper.readTree(converter.convert(json, "hydra", "name")) val arrayNode = jsonNode.at("/fields") arrayNode.get(0).at("/type/type").asText shouldBe "record" } it("Should throw an exception with null values") { val jsonNode = mapper.readTree(json) jsonNode.asInstanceOf[ObjectNode].set("dummyString", null) intercept[IllegalArgumentException] { converter.convert(jsonNode.toString(), "hydra", "name") } } it("Should convert booleans") { val clipViewJson = """ |{ | "clipId": "shawn-wildermuth|front-end-web-app-html5-javascript-css-m01|front-end-web-app-html5-javascript-css-m1-02", | "clipModuleIndex": 1, | "clipName": "front-end-web-app-html5-javascript-css-m1-02", | "contentIndexPosition": 99999, | "countsTowardTrialLimits": false, | "courseName": "front-end-web-app-html5-javascript-css", | "courseTitle": "Front-End Web Development Quick Start With HTML5, CSS, and JavaScript", | "ipAddress": "127.0.0.1", | "moduleAuthorHandle": "shawn-wildermuth", | "moduleId": "shawn-wildermuth|front-end-web-app-html5-javascript-css-m01", | "moduleName": "front-end-web-app-html5-javascript-css-m01", | "online": true, | "royaltiesPaid": true, | "started": "2016-11-30T20:30:45.3136582Z", | "userHandle": "44bbf444-ba44-444a-b444-b444bebb4b4b" |} """.stripMargin val jsonNode = mapper.readTree(clipViewJson) val schema = converter.convert(jsonNode.toString(), "hydra", "name") println(schema) } } }
Example 84
Source File: IngestionErrorHandler.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.kafka.ingestors import akka.actor.Actor import com.pluralsight.hydra.avro.JsonToAvroConversionException import hydra.common.config.ConfigSupport._ import hydra.avro.registry.JsonToAvroConversionExceptionWithMetadata import hydra.common.config.ConfigSupport import hydra.core.ingest.RequestParams.HYDRA_KAFKA_TOPIC_PARAM import hydra.core.protocol.GenericIngestionError import hydra.core.transport.Transport.Deliver import hydra.kafka.producer.AvroRecord import org.apache.avro.Schema import spray.json.DefaultJsonProtocol import scala.io.Source class IngestionErrorHandler extends Actor with ConfigSupport with DefaultJsonProtocol { import spray.json._ private implicit val ec = context.dispatcher private implicit val hydraIngestionErrorInfoFormat = jsonFormat6( HydraIngestionErrorInfo ) private val errorTopic = applicationConfig .getStringOpt("ingest.error-topic") .getOrElse("_hydra_ingest_errors") private lazy val kafkaTransport = context .actorSelection( applicationConfig .getStringOpt(s"transports.kafka.path") .getOrElse(s"/user/service/kafka_transport") ) private val errorSchema = new Schema.Parser() .parse(Source.fromResource("schemas/HydraIngestError.avsc").mkString) override def receive: Receive = { case error: GenericIngestionError => kafkaTransport ! Deliver(buildPayload(error)) } private[ingestors] def buildPayload( err: GenericIngestionError ): AvroRecord = { val schema: Option[String] = err.cause match { case e: JsonToAvroConversionException => Some(e.getSchema.toString) case e: JsonToAvroConversionExceptionWithMetadata => Some(e.location) case e: Exception => None } val topic = err.request.metadataValue(HYDRA_KAFKA_TOPIC_PARAM) val errorInfo = HydraIngestionErrorInfo( err.ingestor, topic, err.cause.getMessage, err.request.metadata, schema, err.request.payload ).toJson.compactPrint AvroRecord( errorTopic, errorSchema, topic, errorInfo, err.request.ackStrategy ) } } case class HydraIngestionErrorInfo( ingestor: String, destination: Option[String], errorMessage: String, metadata: Map[String, String], schema: Option[String], payload: String )
Example 85
Source File: WatchServiceReceiver.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import java.nio.file.StandardWatchEventKinds._ import java.nio.file.{FileSystems, Path} import java.io.File import akka.actor.ActorRef import org.apache.iota.fey.JsonReceiverActor.JSON_RECEIVED import play.api.libs.json._ import scala.io.Source class WatchServiceReceiver(receiverActor: ActorRef) extends JsonReceiver{ processInitialFiles() private val watchService = FileSystems.getDefault.newWatchService() def watch(path: Path) : Unit = path.register(watchService, ENTRY_CREATE, ENTRY_MODIFY) def getJsonObject(params: String): Option[JsValue] = { try{ val stringJson = Source.fromFile(params).getLines.mkString Option(Json.parse(stringJson)) }catch{ case e: Exception => log.error("Could not parse JSON", e) None } } override def execute(): Unit = { val key = watchService.take() val eventsIterator = key.pollEvents().iterator() while(eventsIterator.hasNext) { val event = eventsIterator.next() val relativePath = event.context().asInstanceOf[Path] val path = key.watchable().asInstanceOf[Path].resolve(relativePath) log.debug(s"${event.kind()} --- $path") event.kind() match { case (ENTRY_CREATE | ENTRY_MODIFY) if path.toString.endsWith(CONFIG.JSON_EXTENSION) => processJson(path.toString, path.toFile) case _ => } } key.reset() } private[fey] def processJson(path: String, file: File) = { try{ getJsonObject(path) match { case Some(orchestrationJSON) => val valid = validJson(orchestrationJSON) if(valid && (orchestrationJSON \ JSON_PATH.COMMAND).as[String].toUpperCase != "DELETE"){ checkForLocation(orchestrationJSON) } if(valid) { receiverActor ! JSON_RECEIVED(orchestrationJSON, file) }else{ log.warn(s"File $path not processed. Incorrect JSON schema") } case None => } } catch { case e: Exception => log.error(s"File $path will not be processed", e) } } private def processInitialFiles() = { Utils.getFilesInDirectory(CONFIG.JSON_REPOSITORY) .filter(file => file.getName.endsWith(CONFIG.JSON_EXTENSION)) .foreach(file => { processJson(file.getAbsolutePath, file) }) } override def exceptionOnRun(e: Exception): Unit = { e match { case e: InterruptedException => case e: Exception => log.error("Watch Service stopped", e) } watchService.close() } }
Example 86
Source File: CheckpointProcessor.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import java.io.File import akka.actor.ActorRef import org.apache.iota.fey.JsonReceiverActor.JSON_RECEIVED import play.api.libs.json.{JsValue, Json} import scala.io.Source class CheckpointProcessor(receiverActor: ActorRef) extends JsonReceiver{ override def run(): Unit = { processCheckpointFiles() } def getJsonObject(params: String): Option[JsValue] = { try{ val stringJson = Source.fromFile(params).getLines.mkString Option(Json.parse(stringJson)) }catch{ case e: Exception => log.error("Could not parse JSON", e) None } } private def processJson(path: String, file: File) = { try{ getJsonObject(path) match { case Some(orchestrationJSON) => val valid = validJson(orchestrationJSON) if(valid && (orchestrationJSON \ JSON_PATH.COMMAND).as[String].toUpperCase != "DELETE"){ checkForLocation(orchestrationJSON) } if(valid) { receiverActor ! JSON_RECEIVED(orchestrationJSON, file) }else{ log.warn(s"File $path not processed. Incorrect JSON schema") } file.delete() case None => } } catch { case e: Exception => log.error(s"File $path will not be processed", e) } } private def processCheckpointFiles() = { Utils.getFilesInDirectory(CONFIG.CHECKPOINT_DIR) .filter(file => file.getName.endsWith(CONFIG.JSON_EXTENSION)) .foreach(file => { processJson(file.getAbsolutePath, file) }) } override def execute(): Unit = {} override def exceptionOnRun(e: Exception): Unit = {} }
Example 87
Source File: SparkStreamAdapterExample.scala From eventuate with Apache License 2.0 | 5 votes |
package com.rbmhtechnology.example.spark //#spark-stream-adapter import com.rbmhtechnology.eventuate._ import com.rbmhtechnology.eventuate.adapter.spark.SparkStreamAdapter import org.apache.spark._ import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming._ import org.apache.spark.streaming.dstream.DStream //# import akka.actor._ import com.rbmhtechnology.eventuate.log.EventLogWriter import com.rbmhtechnology.eventuate.log.leveldb.LeveldbEventLog import scala.collection.immutable._ import scala.io.Source object SparkStreamAdapterExample extends App { implicit val system: ActorSystem = ActorSystem(ReplicationConnection.DefaultRemoteSystemName) val logName: String = "L" val endpoint: ReplicationEndpoint = new ReplicationEndpoint(id = "1", logNames = Set(logName), logFactory = logId => LeveldbEventLog.props(logId), connections = Set()) val log: ActorRef = endpoint.logs(logName) val writer: EventLogWriter = new EventLogWriter("writer", log) endpoint.activate() //#spark-stream-adapter val sparkConfig = new SparkConf(true) .setAppName("adapter") .setMaster("local[4]") val sparkContext = new SparkContext(sparkConfig) val sparkStreamingContext = new StreamingContext(sparkContext, Seconds(1)) // Create an Eventuate Spark stream adapter val sparkStreamAdapter = new SparkStreamAdapter( sparkStreamingContext, system.settings.config) // Create a DStream from event log L by connecting to its replication endpoint val stream: DStream[DurableEvent] = sparkStreamAdapter.eventStream( id = "s1", host = "127.0.0.1", port = 2552, logName = "L", fromSequenceNr = 1L, storageLevel = StorageLevel.MEMORY_ONLY) // For processing in strict event storage order, use repartition(1) stream.repartition(1).foreachRDD(rdd => rdd.foreach(println)) // Start event stream processing sparkStreamingContext.start() //# // Generate new events from stdin val lines = Source.stdin.getLines() def prompt(): Unit = { if (lines.hasNext) lines.next() match { case "exit" => sparkStreamingContext.stop(stopSparkContext = true) system.terminate() case line => writer.write(Seq(line)) prompt() } } prompt() }
Example 88
Source File: OrderExample.scala From eventuate with Apache License 2.0 | 5 votes |
package com.rbmhtechnology.example.ordermgnt import akka.actor._ import com.rbmhtechnology.eventuate._ import com.rbmhtechnology.eventuate.VersionedAggregate._ import com.rbmhtechnology.eventuate.log.leveldb.LeveldbEventLog import com.typesafe.config.ConfigFactory import scala.concurrent.Future import scala.io.Source import scala.util._ class OrderExample(manager: ActorRef, view: ActorRef) extends Actor { import OrderActor._ import OrderView._ val lines = Source.stdin.getLines def receive = { case GetStateSuccess(state) => state.values.foreach(printOrder) prompt() case GetStateFailure(cause) => println(cause.getMessage) prompt() case SaveSnapshotSuccess(orderId, metadata) => println(s"[${orderId}] saved snapshot at sequence number ${metadata.sequenceNr}") prompt() case SaveSnapshotFailure(orderId, cause) => println(s"[${orderId}] save snapshot failed: ${cause}") cause.printStackTrace() prompt() case GetUpdateCountSuccess(orderId, count) => println(s"[${orderId}] update count = ${count}") prompt() case CommandSuccess(_) => prompt() case CommandFailure(_, cause: ConflictDetectedException[Order]) => println(s"${cause.getMessage}, select one of the following versions to resolve conflict") printOrder(cause.versions) prompt() case CommandFailure(_, cause) => println(cause.getMessage) prompt() case line: String => line.split(' ').toList match { case "state" :: Nil => manager ! GetState case "count" :: id :: Nil => view ! GetUpdateCount(id) case "create" :: id :: Nil => manager ! CreateOrder(id) case "cancel" :: id :: Nil => manager ! CancelOrder(id) case "save" :: id :: Nil => manager ! SaveSnapshot(id) case "add" :: id :: item :: Nil => manager ! AddOrderItem(id, item) case "remove" :: id :: item :: Nil => manager ! RemoveOrderItem(id, item) case "resolve" :: id :: idx :: Nil => manager ! Resolve(id, idx.toInt) case Nil => prompt() case "" :: Nil => prompt() case na :: nas => println(s"unknown command: ${na}"); prompt() } } def prompt(): Unit = { if (lines.hasNext) lines.next() match { case "exit" => context.system.terminate() case line => self ! line } } override def preStart(): Unit = prompt() } object OrderExample extends App { val recover = args(1) == "recover" val system = ActorSystem(ReplicationConnection.DefaultRemoteSystemName, ConfigFactory.load(args(0))) val endpoint = ReplicationEndpoint(id => LeveldbEventLog.props(id, "s"))(system) import system.dispatcher def initialize(): Future[Unit] = if (recover) endpoint.recover() else Future.successful(endpoint.activate()) initialize() onComplete { case Failure(e) => println(s"Recovery failed: ${e.getMessage}") system.terminate() case Success(_) => val manager = system.actorOf(Props(new OrderManager(endpoint.id, endpoint.logs(ReplicationEndpoint.DefaultLogName)))) val view = system.actorOf(Props(new OrderView(endpoint.id, endpoint.logs(ReplicationEndpoint.DefaultLogName)))) val driver = system.actorOf(Props(new OrderExample(manager, view)).withDispatcher("eventuate.cli-dispatcher")) } }
Example 89
Source File: MavenCoordinatesListReader.scala From exodus with MIT License | 5 votes |
package com.wixpress.build.maven import java.nio.file.{Files, Path} import scala.io.Source object MavenCoordinatesListReader { def coordinatesIn(filePath:Path):Set[Coordinates] = { val lines = Source.fromInputStream(Files.newInputStream(filePath)).getLines().toSet coordinatesInText(lines) } def coordinatesInText(content: Set[String]):Set[Coordinates] = { content .map(_.trim) .filterNot(_.isEmpty) .filterNot(_.startsWith("#")) .map(l=>Coordinates.deserialize(l)) } }
Example 90
Source File: MigratorApplication.scala From exodus with MIT License | 5 votes |
package com.wix.bazel.migrator.app import scala.io.Source object MigratorApplication extends MigratorApp { migrate() def migrate(): Unit = { printHeader() new PublicMigrator(configuration).migrate() } private def printHeader(): Unit = { println(Source.fromInputStream(MigratorApplication.getClass.getResourceAsStream("/banner.txt")).mkString) println(s"starting migration with configuration [$configuration]") } }
Example 91
Source File: gihyo_6_3_TwitterStreamSuite.scala From gihyo-spark-book-example with Apache License 2.0 | 5 votes |
package jp.gihyo.spark.ch06 import java.nio.file.Files import scala.collection.mutable import scala.io.Source import twitter4j.{Status, TwitterObjectFactory} import org.apache.spark.rdd.RDD import org.apache.spark.streaming.StreamingContextWrapper import jp.gihyo.spark.{SparkFunSuite, TestStreamingContext} class gihyo_6_3_TwitterStreamSuite extends SparkFunSuite with TestStreamingContext { test("run") { val lines = mutable.Queue[RDD[Status]]() val ds = ssc.queueStream(lines) val clock = new StreamingContextWrapper(ssc).manualClock gihyo_6_3_TwitterStream.run( sc, ds, Files.createTempDirectory("TwitterTag").toString, Files.createTempDirectory("TwitterWords").toString) val checkpointDir = Files.createTempDirectory("StreamingUnitTest").toString ssc.checkpoint(checkpointDir) ssc.start() (1 to 2).foreach { case i => // test data lines += sc.makeRDD(Seq( MockTweetGenerator.createMockStatusFromJson(), MockTweetGenerator.createMockStatusFromJson(), MockTweetGenerator.createMockStatusFromJson(), MockTweetGenerator.createMockStatusFromJson())) clock.advance(1000) Thread.sleep(1000) } } } object MockTweetGenerator { // Creates a tweet status from a JSON file def createMockStatusFromJson(): Status = { val jsonFile = getClass.getResource("/streaming/test-tweet.json").getPath TwitterObjectFactory.createStatus(Source.fromFile(jsonFile).getLines().mkString) } }
Example 92
Source File: Utils.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package com.packt.ScalaML.HAR import scala.io.Source object Utils { // Useful Constants: those are separate normalised input features for the neural network private val INPUT_SIGNAL_TYPES = Array( "body_acc_x_", "body_acc_y_", "body_acc_z_", "body_gyro_x_", "body_gyro_y_", "body_gyro_z_", "total_acc_x_", "total_acc_y_", "total_acc_z_" ) // Output classes: used to learn how to classify private val LABELS = Array( "WALKING", "WALKING_UPSTAIRS", "WALKING_DOWNSTAIRS", "SITTING", "STANDING", "LAYING" ) def loadData(dataPath: String, name: String): Array[Array[Array[Float]]] = { val dataSignalsPaths = INPUT_SIGNAL_TYPES.map( signal => s"$dataPath/${signal}${name}.txt" ) val signals = dataSignalsPaths.map { path => Source.fromFile(path).mkString.split("\n").map { line => line.replaceAll(" ", " ").trim().split(" ").map(_.toFloat) } } val inputDim = signals.length val numSamples = signals(0).length val timeStep = signals(0)(0).length (0 until numSamples).map { n => (0 until timeStep).map { t => (0 until inputDim).map( i => signals(i)(n)(t) ).toArray }.toArray }.toArray } def loadLabels(labelPath: String): Array[Float] = { Source.fromFile(labelPath).mkString.split("\n").map(_.toFloat - 1) } }
Example 93
Source File: CSVImageMetadataReader.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package Yelp.Preprocessor import scala.io.Source object CSVImageMetadataReader { def readBusinessToImageLabels(csv: String, rows: List[Int] = List(-1)): Map[Int, String] = { val reader = readMetadata(csv) reader.drop(1) .map(x => x match { case x :: Nil => (x(0).toInt, "-1") case _ => (x(0).toInt, x(1).split(" ").head) }).toMap } }
Example 94
Source File: GrpcSyncServerSimulation.scala From grpc-gatling with MIT License | 5 votes |
package ch.tamedia.gatling.tests import ch.tamedia.gatling.GrpcCustomCheck import ch.tamedia.gatling.actions.impl.{GrpcAsyncCallAction, GrpcSyncCallAction} import ch.tamedia.noname.server.grpc.endpoint.log.LogResponse import com.trueaccord.scalapb.GeneratedMessage import io.gatling.core.Predef._ import scala.concurrent.duration._ import scala.io.Source class GrpcSyncServerSimulation extends Simulation { import ch.tamedia.gatling.Predef._ val host = "localhost" val port = 50051 val json: String = Source.fromFile("src/test/resources/sample_request.json").getLines.mkString val grpcConfig = GRPC() val grpcScenario = scenario("Test GRPC server") .exec(grpcCall(GrpcAsyncCallAction("async", host, port, json)).check(new GrpcCustomCheck((s: GeneratedMessage) => { s.asInstanceOf[LogResponse].message.equals("OK") }))) .exec(grpcCall(GrpcSyncCallAction("sync", host, port, json)).check(new GrpcCustomCheck((s: GeneratedMessage) => { s.asInstanceOf[LogResponse].message.equals("OK") }))) setUp( grpcScenario.inject( atOnceUsers(10), rampUsers(10) over(5 seconds), constantUsersPerSec(20) during(15 seconds), heavisideUsers(1000) over(20 seconds)) ).protocols(grpcConfig) }
Example 95
Source File: TestUtils.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.utils import java.io.{FileReader, ByteArrayInputStream} import breeze.linalg.DenseMatrix import breeze.stats.distributions.{Gaussian, RandBasis, ThreadLocalRandomGenerator, Rand} import edu.berkeley.cs.amplab.mlmatrix.RowPartitionedMatrix import org.apache.commons.io.IOUtils import org.apache.commons.math3.random.MersenneTwister import org.apache.spark.SparkContext import scala.io.Source import scala.util.Random def genChannelMajorArrayVectorizedImage(x: Int, y: Int, z: Int): ChannelMajorArrayVectorizedImage = { ChannelMajorArrayVectorizedImage(genData(x, y, z), ImageMetadata(x,y,z)) } def genRowColumnMajorByteArrayVectorizedImage(x: Int, y: Int, z: Int): RowColumnMajorByteArrayVectorizedImage = { RowColumnMajorByteArrayVectorizedImage(genData(x,y,z).map(_.toByte), ImageMetadata(x,y,z)) } def createRandomMatrix( sc: SparkContext, numRows: Int, numCols: Int, numParts: Int, seed: Int = 42): RowPartitionedMatrix = { val rowsPerPart = numRows / numParts val matrixParts = sc.parallelize(1 to numParts, numParts).mapPartitionsWithIndex { (index, part) => val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed+index))) Iterator(DenseMatrix.rand(rowsPerPart, numCols, Gaussian(0.0, 1.0)(randBasis))) } RowPartitionedMatrix.fromMatrix(matrixParts.cache()) } def createLocalRandomMatrix(numRows: Int, numCols: Int, seed: Int = 42): DenseMatrix[Double] = { val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed))) DenseMatrix.rand(numRows, numCols, Gaussian(0.0, 1.0)(randBasis)) } }
Example 96
Source File: GlobalSapSQLContext.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package org.apache.spark.sql import java.io.File import com.sap.spark.util.TestUtils import com.sap.spark.{GlobalSparkContext, WithSQLContext} import org.apache.spark.SparkContext import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{BoundReference, Cast} import org.apache.spark.unsafe.types._ import org.apache.spark.sql.types._ import org.scalatest.Suite import scala.io.Source trait GlobalSapSQLContext extends GlobalSparkContext with WithSQLContext { self: Suite => override implicit def sqlContext: SQLContext = GlobalSapSQLContext._sqlc override protected def setUpSQLContext(): Unit = GlobalSapSQLContext.init(sc) override protected def tearDownSQLContext(): Unit = GlobalSapSQLContext.reset() def getDataFrameFromSourceFile(sparkSchema: StructType, path: File): DataFrame = { val conversions = sparkSchema.toSeq.zipWithIndex.map({ case (field, index) => Cast(BoundReference(index, StringType, nullable = true), field.dataType) }) val data = Source.fromFile(path) .getLines() .map({ line => val stringRow = InternalRow.fromSeq(line.split(",", -1).map(UTF8String.fromString)) Row.fromSeq(conversions.map({ c => c.eval(stringRow) })) }) val rdd = sc.parallelize(data.toSeq, numberOfSparkWorkers) sqlContext.createDataFrame(rdd, sparkSchema) } } object GlobalSapSQLContext { private var _sqlc: SQLContext = _ private def init(sc: SparkContext): Unit = if (_sqlc == null) { _sqlc = TestUtils.newSQLContext(sc) } private def reset(): Unit = { if (_sqlc != null) { _sqlc.catalog.unregisterAllTables() } } }
Example 97
Source File: TestUtils.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package com.sap.spark.util import java.util.Locale import scala.io.Source import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.{Row, SQLContext, SapSQLContext} import org.apache.spark.sql.hive.SapHiveContext import org.apache.spark.sql.sources.sql.SqlLikeRelation import org.apache.spark.sql.sources.{BaseRelation, CatalystSource, Table} import org.apache.spark.sql.types.StructType import org.mockito.Matchers._ import org.mockito.Mockito._ import scala.tools.nsc.io.Directory import scala.util.{Failure, Success} def parsePTestFile(fileName: String): List[(String, String, String)] = { val filePath = getFileFromClassPath(fileName) val fileContents = Source.fromFile(filePath).getLines .map(p => p.stripMargin.trim) .filter(p => !p.isEmpty && !p.startsWith("//")) // filter empty rows and comments .mkString("\n") val p = new PTestFileParser // strip semicolons from query and parsed p(fileContents) match { case Success(lines) => lines.map { case (query, parsed, expect) => (stripSemicolon(query).trim, stripSemicolon(parsed).trim, expect.trim) } case Failure(ex) => throw ex } } private def stripSemicolon(sql: String): String = if (sql.endsWith(";")) { sql.substring(0, sql.length-1) } else { sql } def withTempDirectory[A](f: Directory => A): A = { val dir = Directory.makeTemp() try { f(dir) } finally { dir.deleteIfExists() } } }
Example 98
Source File: VirtualScreeningTest.scala From MaRe with Apache License 2.0 | 5 votes |
package se.uu.it.mare import java.io.File import java.util.UUID import scala.io.Source import scala.util.Properties import org.apache.spark.SharedSparkContext import org.junit.runner.RunWith import org.scalatest.FunSuite import org.scalatest.junit.JUnitRunner private object SDFUtils { def parseIDsAndScores(sdf: String): Array[(String, String)] = { sdf.split("\\n\\$\\$\\$\\$\\n").map { mol => val lines = mol.split("\\n") (lines(0), lines.last) } } } @RunWith(classOf[JUnitRunner]) class VirtualScreeningTest extends FunSuite with SharedSparkContext { private val tmpDir = new File(Properties.envOrElse("TMPDIR", "/tmp")) test("Virtual Screening") { sc.hadoopConfiguration.set("textinputformat.record.delimiter", "\n$$$$\n") val mols = sc.textFile(getClass.getResource("sdf/molecules.sdf").getPath) // Parallel execution with MaRe val hitsParallel = new MaRe(mols) .map( inputMountPoint = TextFile("/input.sdf", "\n$$$$\n"), outputMountPoint = TextFile("/output.sdf", "\n$$$$\n"), imageName = "mcapuccini/oe:latest", command = "fred -receptor /var/openeye/hiv1_protease.oeb " + "-hitlist_size 0 " + "-conftest none " + "-dock_resolution Low " + "-dbase /input.sdf " + "-docked_molecule_file /output.sdf") .reduce( inputMountPoint = TextFile("/input.sdf", "\n$$$$\n"), outputMountPoint = TextFile("/output.sdf", "\n$$$$\n"), imageName = "mcapuccini/sdsorter:latest", command = "sdsorter -reversesort='FRED Chemgauss4 score' " + "-keep-tag='FRED Chemgauss4 score' " + "-nbest=30 " + "/input.sdf " + "/output.sdf") .rdd.collect.mkString("\n$$$$\n") // Serial execution val inputFile = new File(getClass.getResource("sdf/molecules.sdf").getPath) val dockedFile = new File(tmpDir, "mare_test_" + UUID.randomUUID.toString) dockedFile.createNewFile dockedFile.deleteOnExit val outputFile = new File(tmpDir, "mare_test_" + UUID.randomUUID.toString) outputFile.createNewFile outputFile.deleteOnExit DockerHelper.run( imageName = "mcapuccini/oe:latest", command = "fred -receptor /var/openeye/hiv1_protease.oeb " + "-hitlist_size 0 " + "-conftest none " + "-dock_resolution Low " + "-dbase /input.sdf " + "-docked_molecule_file /docked.sdf", bindFiles = Seq(inputFile, dockedFile), volumeFiles = Seq(new File("/input.sdf"), new File("/docked.sdf")), forcePull = false) DockerHelper.run( imageName = "mcapuccini/sdsorter:latest", command = "sdsorter -reversesort='FRED Chemgauss4 score' " + "-keep-tag='FRED Chemgauss4 score' " + "-nbest=30 " + "/docked.sdf " + "/output.sdf", bindFiles = Seq(dockedFile, outputFile), volumeFiles = Seq(new File("/docked.sdf"), new File("/output.sdf")), forcePull = false) val hitsSerial = Source.fromFile(outputFile).mkString // Test val parallel = SDFUtils.parseIDsAndScores(hitsParallel) val serial = SDFUtils.parseIDsAndScores(hitsSerial) assert(parallel.deep == serial.deep) } }
Example 99
Source File: Main.scala From time-series-demo with Apache License 2.0 | 5 votes |
package mesosphere.crimedemo import java.io.BufferedInputStream import java.net.URI import java.util.zip.GZIPInputStream import org.tukaani.xz.XZInputStream import scala.io.Source object Main { lazy val log = org.slf4j.LoggerFactory.getLogger(getClass.getName) def main(args: Array[String]): Unit = { val conf = new Conf(args) val publisher = new KafkaPublisher(conf.brokers()) val topic = conf.topic() val sleep = 1000L / conf.eventsPerSecond() val uri = new URI(conf.uri()) val inputStream = new BufferedInputStream(uri.toURL.openStream()) val wrappedStream = if (conf.uri().endsWith(".gz")) { new GZIPInputStream(inputStream) } else if (conf.uri().endsWith(".xz")) { new XZInputStream(inputStream) } else { inputStream } val source = Source.fromInputStream(wrappedStream) var done = 0 log.info(s"Reading crime from ${conf.uri()} and publishing to ${conf.brokers()} every ${sleep}ms") source.getLines().foreach(line => { publisher.publishKafka(topic, line.getBytes) done += 1 if (done % 1000 == 0) { log.info(s"$done lines done") } Thread.sleep(sleep) }) log.info(s"$done lines done") } }
Example 100
Source File: ExtractApplicationProperties.scala From rug with GNU General Public License v3.0 | 5 votes |
package com.atomist.rug.kind.java import com.atomist.tree.content.project.{ConfigValue, Configuration, SimpleConfigValue, SimpleConfiguration} import com.atomist.source.FileArtifact import org.apache.commons.lang3.StringUtils import scala.collection.mutable.ListBuffer import scala.io.Source class ExtractApplicationProperties(source: String) extends Function[FileArtifact, Configuration] { override def apply(f: FileArtifact): Configuration = { val isWhiteSpace: String => Boolean = line => StringUtils.isWhitespace(line) val isComment: String => Boolean = line => !isWhiteSpace(line) && line.dropWhile(c => c.isWhitespace).startsWith("#") val isContent: String => Boolean = line => !(isWhiteSpace(line) || isComment(line)) trait State object InComment extends State object InBlanks extends State var state: State = InComment var comment = "" val configValues = new ListBuffer[ConfigValue]() // Strip # and whitespace from comments (respecting multiline comments) def extractComment(comment: String): String = { def toCommentContentLine(l: String) = { val r = l.dropWhile(c => c.isWhitespace || '#'.equals(c)) r } val r = comment.lines.map(l => toCommentContentLine(l)).mkString("\n") r } // Return None if not a valid property line def parseContentLine(line: String): Option[ConfigValue] = { val stripped = line.dropWhile(c => c.isWhitespace) val idx = stripped.indexOf("=") if (idx == -1) { None } else { val (key, value) = stripped.splitAt(idx) val profile = "" Some(SimpleConfigValue(key, value.substring(1), source, profile, description = extractComment(comment))) } } def appendToComment(l: String): Unit = { if ("".equals(comment)) comment = l else comment = comment + "\n" + l } val lines = Source.fromString(f.content).getLines() for (line <- lines) { if (isContent(line)) { parseContentLine(line).foreach(cv => configValues.append(cv)) comment = "" } else state match { case InBlanks if isComment(line) => state = InComment appendToComment(line) case InComment if isComment(line) || isWhiteSpace(line) => appendToComment(line) case InComment => comment = "" state = InBlanks case _ => } } new SimpleConfiguration(configValues) } }
Example 101
Source File: OpenApiGenerationTest.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash package rest.openapi import com.avsystem.commons.serialization.json.JsonStringOutput import io.udash.rest.RestTestApi import scala.io.Source import org.scalatest.funsuite.AnyFunSuite class OpenApiGenerationTest extends AnyFunSuite { test("openapi for RestTestApi") { val openapi = RestTestApi.openapiMetadata.openapi( Info("Test API", "0.1", description = "Some test REST API"), servers = List(Server("http://localhost")) ) val expected = Source.fromInputStream(getClass.getResourceAsStream("/RestTestApi.json")).getLines().mkString("\n") assert(JsonStringOutput.writePretty(openapi) == expected) } }
Example 102
Source File: CSVFile.scala From random-projections-at-berlinbuzzwords with Apache License 2.0 | 5 votes |
package com.stefansavev.randomprojections.file import scala.io.Source import scala.util.matching.Regex case class CSVFileOptions(sep: String = ",", hasHeader: Boolean = true, quote: Option[String] = None, onlyTopRecords: Option[Int] = None) class CSVFile private (_header: Option[Array[String]], source: Source, iter: Iterator[String], opt: CSVFileOptions) { def numColumns: Option[Int] = _header.map(_.length) def header: Option[Array[String]] = _header def processLine(line: String): Array[String] = { CSVFile.processLine(opt, line) //TODO: verify num columns } def getLines(): Iterator[Array[String]] = iter.map(line => processLine(line)) def close() = source.close() } object CSVFile { def processLine(opt: CSVFileOptions, line: String): Array[String] = { line.split(Regex.quote(opt.sep)) } def read(fileName: String, opt: CSVFileOptions): CSVFile = { val source = Source.fromFile(fileName) val linesIterator = source.getLines() val iterator = opt.onlyTopRecords match { case None => linesIterator case Some(n) => linesIterator.take(n + 1) //1 is for the header } val header = if (opt.hasHeader) Some(processLine(opt, iterator.next())) else None new CSVFile(header, source, iterator, opt) } }
Example 103
Source File: PrintUtils.scala From random-projections-at-berlinbuzzwords with Apache License 2.0 | 5 votes |
package com.stefansavev import java.io.PrintWriter import scala.io.{Codec, Source} object PrintUtils { def columnVectorToFile(fileName: String, v: Array[Double]): Unit = { val writer = new PrintWriter(fileName) for (a <- v) { writer.println(a.toString) } writer.close() } def stringsToFile(fileName: String, v: Array[String]): Unit = { val writer = new PrintWriter(fileName) for (a <- v) { writer.println(a) } writer.close() } def withPrintWriter(fileName: String, body: PrintWriter => Unit): Unit = { val writer = new PrintWriter(fileName, "UTF-8") body(writer) writer.close() } } object FileReadUtils { def withLinesIterator[T](fileName: String)(body: Iterator[String] => T): T = { val source = Source.fromFile(fileName)(Codec.UTF8) val result = body(source.getLines()) source.close() result } }
Example 104
Source File: ViewHelpers.scala From dr-cla with BSD 3-Clause "New" or "Revised" License | 5 votes |
package helpers import java.net.URL import javax.inject.Inject import play.api.{Configuration, Environment} import scala.io.Source import scala.util.Try class ViewHelpers @Inject() (configuration: Configuration, environment: Environment) { val organizationName = configuration.get[String]("app.organization.name") val maybeOrganizationLogoUrl = configuration.getOptional[String]("app.organization.logo-url") val maybeOrganizationUrl = configuration.getOptional[String]("app.organization.url") val maybeOrganizationClaUrl = configuration.getOptional[String]("app.organization.cla-url") val claText: String = { maybeOrganizationClaUrl .flatMap(claUrl => Try(new URL(claUrl)).toOption) .orElse(environment.resource("sample-cla.html")) .map { claUrl => val text = Source.fromURL(claUrl) text.mkString } getOrElse { throw new Exception("You must set the ORG_CLA environment variable.") } } }
Example 105
Source File: DatabaseScriptTestUtils.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.core.database.test import scala.concurrent.duration.DurationInt import scala.io.Source import org.scalatest.Matchers import org.scalatest.concurrent.IntegrationPatience import org.scalatest.concurrent.ScalaFutures import akka.actor.ActorSystem import common.WaitFor import common.WhiskProperties import pureconfig._ import pureconfig.generic.auto._ import spray.json._ import spray.json.DefaultJsonProtocol._ import org.apache.openwhisk.common.Logging import org.apache.openwhisk.core.ConfigKeys import org.apache.openwhisk.core.WhiskConfig import org.apache.openwhisk.core.database.CouchDbRestClient import org.apache.openwhisk.core.database.CouchDbConfig trait DatabaseScriptTestUtils extends ScalaFutures with Matchers with WaitFor with IntegrationPatience { case class DatabaseUrl(dbProtocol: String, dbUsername: String, dbPassword: String, dbHost: String, dbPort: String) { def url = s"$dbProtocol://$dbUsername:$dbPassword@$dbHost:$dbPort" def safeUrl = s"$dbProtocol://$dbHost:$dbPort" } val python = WhiskProperties.python val config = loadConfigOrThrow[CouchDbConfig](ConfigKeys.couchdb) val dbProtocol = config.protocol val dbHost = config.host val dbPort = config.port val dbUsername = config.username val dbPassword = config.password val dbPrefix = WhiskProperties.getProperty(WhiskConfig.dbPrefix) val dbUrl = DatabaseUrl(dbProtocol, dbUsername, dbPassword, dbHost, dbPort.toString) def retry[T](task: => T) = org.apache.openwhisk.utils.retry(task, 10, Some(500.milliseconds)) def waitForView(db: CouchDbRestClient, designDoc: String, viewName: String, numDocuments: Int) = { waitfor(() => { val view = db.executeView(designDoc, viewName)().futureValue view shouldBe 'right view.right.get.fields("rows").convertTo[List[JsObject]].length == numDocuments }, totalWait = 2.minutes) } }
Example 106
Source File: DockerClientWithFileAccess.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.core.containerpool.docker import java.io.File import java.nio.file.Paths import akka.actor.ActorSystem import akka.stream.alpakka.file.scaladsl.FileTailSource import akka.stream.scaladsl.{FileIO, Source => AkkaSource} import akka.util.ByteString import scala.concurrent.ExecutionContext import scala.concurrent.Future import scala.concurrent.blocking import spray.json.DefaultJsonProtocol._ import spray.json._ import org.apache.openwhisk.common.Logging import org.apache.openwhisk.common.TransactionId import org.apache.openwhisk.core.containerpool.ContainerId import org.apache.openwhisk.core.containerpool.ContainerAddress import scala.io.Source import scala.concurrent.duration.FiniteDuration class DockerClientWithFileAccess(dockerHost: Option[String] = None, containersDirectory: File = Paths.get("containers").toFile)( executionContext: ExecutionContext)(implicit log: Logging, as: ActorSystem) extends DockerClient(dockerHost)(executionContext) with DockerApiWithFileAccess { implicit private val ec = executionContext def rawContainerLogs(containerId: ContainerId, fromPos: Long, pollInterval: Option[FiniteDuration]): AkkaSource[ByteString, Any] }
Example 107
Source File: EnronEmail.scala From Mastering-Scala-Machine-Learning with MIT License | 5 votes |
package org.akozlov.chapter07 import scala.io.Source import scala.util.hashing.{MurmurHash3 => Hash} import scala.util.matching.Regex import java.util.{Date => javaDateTime} import java.io.File import net.liftweb.json._ import Extraction._ import Serialization.{read, write} object EnronEmail { val emailRe = """[a-zA-Z0-9_.+\-][email protected]""".r.unanchored def emails(s: String) = { for (email <- emailRe findAllIn s) yield email } def hash(s: String) = { java.lang.Integer.MAX_VALUE.toLong + Hash.stringHash(s) } val messageRe = """(?:Message-ID:\s+)(<[A-Za-z0-9_.+\-@]+>)(?s)(?:.*?)(?m) |(?:Date:\s+)(.*?)$(?:.*?) |(?:From:\s+)([a-zA-Z0-9_.+\-][email protected])(?:.*?) |(?:Subject: )(.*?)$""".stripMargin.r.unanchored case class Relation(from: String, fromId: Long, to: String, toId: Long, source: String, messageId: String, date: javaDateTime, subject: String) implicit val formats = Serialization.formats(NoTypeHints) def getFileTree(f: File): Stream[File] = f #:: (if (f.isDirectory) f.listFiles().toStream.flatMap(getFileTree) else Stream.empty) def main(args: Array[String]) { getFileTree(new File(args(0))).par.map { file => { "\\.$".r findFirstIn file.getName match { case Some(x) => try { val src = Source.fromFile(file, "us-ascii") val message = try src.mkString finally src.close() message match { case messageRe(messageId, date, from , subject) => val fromLower = from.toLowerCase for (to <- emails(message).filter(_ != fromLower).toList.distinct) println(write(Relation(fromLower, hash(fromLower), to, hash(to), file.toString, messageId, new javaDateTime(date), subject))) case _ => } } catch { case e: Exception => System.err.println(e) } case _ => } } } } }
Example 108
Source File: HTMLReportGenerator.scala From regressr with Apache License 2.0 | 5 votes |
package org.ebayopensource.regression.internal.reportGenerator import java.io.{BufferedWriter, File, FileWriter} import org.fusesource.scalate.{TemplateEngine, TemplateSource} import scala.io.Source import scala.util.{Failure, Success, Try} class HTMLReportGenerator extends ReportGenerator { val scalateEngine = new TemplateEngine def getContent(reportEntries: Seq[ReportEntry]) : Try[String] = Try { if (reportEntries.size==0) { throw new IllegalArgumentException("Cannot generate report with 0 reportEntries.") } val templateText = Source.fromInputStream(getClass.getResourceAsStream("/report/index.html")).mkString scalateEngine.escapeMarkup = false val regressionCount :Seq[Int] = reportEntries.flatMap { reportEntry => { reportEntry.requestReportEntries.map { requestReportEntry => { requestReportEntry.reqMessages.size } } } } val renderedContent = scalateEngine.layout(TemplateSource.fromText("/com/ebay/n/regression/text.ssp", templateText), Map("reportEntries" -> reportEntries, "regressionCount" -> regressionCount.sum)) renderedContent } def writeAndGetFile(content: String, reportFilePath: String) : Try[File] = Try { val outputFile = new File(reportFilePath) val bw = new BufferedWriter(new FileWriter(outputFile)) bw.write(content) bw.close() outputFile } override def generate(reportEntries: Seq[ReportEntry], reportFilePath: String): Try[File] = Try { getContent(reportEntries).flatMap { content => writeAndGetFile(content, reportFilePath) } match { case Success(file) => file case Failure(t) => throw t } } }
Example 109
Source File: FileDataStore.scala From regressr with Apache License 2.0 | 5 votes |
package org.ebayopensource.regression.internal.datastore import java.io.{File, PrintWriter} import scala.io.Source import scala.util.Try class FileDataStore(path: String) extends BaseDataStore { new File(path).mkdir() override def put(key: String, value: String): Unit = { val pw = new PrintWriter(new File(buildFilePath(key))) pw.write(value) pw.flush() pw.close() } private def buildFilePath(key: String) = { if (key.endsWith(".strategy")) s"${path}${key}" else s"${path}${key}.json" } override def get(key: String): Option[String] = { val file = new File(buildFilePath(key)) if (!file.exists()) { None } else { Some(Source.fromFile(file).mkString) } } override def close(): Unit = ??? // This is a no op. override def listStrategies(): Seq[String] = { new File(path).listFiles().filter { file => file.getName.endsWith(".strategy") }.map(file => file.getName.replaceFirst(s"${BaseDataStore.strategyPrefix}", "").replaceFirst(".strategy", "")) } override def remove(key: String): Unit = { new File(s"${path}${key}").delete() } override def deleteRecordingFiles(testIdentifier: String): Try[Unit] = Try { new File(path).listFiles().filter { file => file.getName.startsWith(s"${BaseDataStore.strategyPrefix}${testIdentifier}.") }.map(file => file.delete()) } } object FileDataStore { val PATH="./tmp/" }
Example 110
Source File: YAMLTestStrategyReaderTest.scala From regressr with Apache License 2.0 | 5 votes |
package org.ebayopensource.regression.internal.reader import org.ebayopensource.regression.UnitSpec import scala.io.Source import scala.util.{Failure, Success} class YAMLTestStrategyReaderTest extends UnitSpec { "A reader" should "be able to read a valid strategy file with 1 request" in { val strategyContent = Source.fromInputStream(getClass.getResourceAsStream("/yaml/valid_strategy_simple_one_request.yaml")).mkString.replace("\t","") YAMLTestStrategyReader.read(strategyContent) match { case Success(t) => { assert(t.requests.size == 1) assert(t.headers.size>0) } case Failure(t) => assert(false, s"Strategy file was valid. Should not throw exception ${t.getMessage}") } } }
Example 111
Source File: SwaggerAPI.scala From swagger-check with MIT License | 5 votes |
package de.leanovate.swaggercheck.schema import java.io.InputStream import com.fasterxml.jackson.annotation.{JsonCreator, JsonProperty} import com.fasterxml.jackson.core.JsonFactory import com.fasterxml.jackson.databind.annotation.JsonDeserialize import com.fasterxml.jackson.databind.{DeserializationFeature, JsonNode, MappingJsonFactory, ObjectMapper} import com.fasterxml.jackson.dataformat.yaml.YAMLFactory import com.fasterxml.jackson.module.scala.DefaultScalaModule import de.leanovate.swaggercheck.schema.jackson.JsonSchemaModule import de.leanovate.swaggercheck.schema.model.{Definition, Parameter} import scala.collection.JavaConverters._ import scala.io.Source @JsonDeserialize(builder = classOf[SwaggerAPIBuilder]) case class SwaggerAPI( basePath: Option[String], paths: Map[String, Map[String, Operation]], definitions: Map[String, Definition] ) object SwaggerAPI { val jsonMapper = objectMapper(new MappingJsonFactory()) val yamlMapper = objectMapper(new YAMLFactory()) def parse(jsonOrYaml: String): SwaggerAPI = { val mapper = if (jsonOrYaml.trim().startsWith("{")) jsonMapper else yamlMapper mapper.readValue(jsonOrYaml, classOf[SwaggerAPI]) } def parse(swaggerInput: InputStream): SwaggerAPI = { parse(Source.fromInputStream(swaggerInput).mkString) } def objectMapper(jsonFactory: JsonFactory): ObjectMapper = { val mapper = new ObjectMapper(jsonFactory) mapper.registerModule(DefaultScalaModule) mapper.registerModule(JsonSchemaModule) mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES) mapper } } class SwaggerAPIBuilder @JsonCreator()( @JsonProperty("basePath") basePath: Option[String], @JsonProperty("consumes") consumes: Option[Seq[String]], @JsonProperty("produces") produces: Option[Seq[String]], @JsonProperty("paths") paths: Option[Map[String, JsonNode]], @JsonProperty("definitions") definitions: Option[Map[String, Definition]], @JsonProperty("parameters") globalParameters: Option[Map[String, Parameter]] ) { def build(): SwaggerAPI = { val defaultConsumes = consumes.map(_.toSet).getOrElse(Set.empty) val defaultProduces = produces.map(_.toSet).getOrElse(Set.empty) SwaggerAPI(basePath, paths.getOrElse(Map.empty).map { case (path, pathDefinition) => val defaultParameters = Option(pathDefinition.get("parameters")).map { node => node.iterator().asScala.map { element => SwaggerAPI.jsonMapper.treeToValue(element, classOf[OperationParameter]) }.toSeq }.getOrElse(Seq.empty) basePath.map(_ + path).getOrElse(path) -> pathDefinition.fields().asScala.filter(_.getKey != "parameters").map { entry => val operation = SwaggerAPI.jsonMapper.treeToValue(entry.getValue, classOf[Operation]) entry.getKey.toUpperCase -> operation.withDefaults(defaultParameters, defaultConsumes, defaultProduces).resolveGlobalParameters(globalParameters.getOrElse(Map())) }.toMap }, definitions.getOrElse(Map.empty)) } }
Example 112
Source File: L8-10-11UDF.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import scala.io.Source import scala.reflect.runtime.universe import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.SQLContext import org.apache.spark.streaming.Seconds import org.apache.spark.streaming.StreamingContext import org.json4s.jackson.JsonMethods.parse import org.json4s.jvalue2extractable import org.json4s.string2JsonInput object CdrUDFApp { case class Cdr(squareId: Int, timeInterval: Long, countryCode: Int, smsInActivity: Float, smsOutActivity: Float, callInActivity: Float, callOutActivity: Float, internetTrafficActivity: Float) def main(args: Array[String]) { if (args.length != 4) { System.err.println( "Usage: CdrUDFApp <appname> <batchInterval> <hostname> <port>") System.exit(1) } val Seq(appName, batchInterval, hostname, port) = args.toSeq val conf = new SparkConf() .setAppName(appName) .setJars(SparkContext.jarOfClass(this.getClass).toSeq) val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt)) val sqlC = new SQLContext(ssc.sparkContext) import sqlC.implicits._ def getCountryCodeMapping() = { implicit val formats = org.json4s.DefaultFormats parse(Source.fromURL("http://country.io/phone.json").mkString).extract[Map[String, String]].map(_.swap) } def getCountryNameMapping() = { implicit val formats = org.json4s.DefaultFormats parse(Source.fromURL("http://country.io/names.json").mkString).extract[Map[String, String]] } def getCountryName(mappingPhone: Map[String, String], mappingName: Map[String, String], code: Int) = { mappingName.getOrElse(mappingPhone.getOrElse(code.toString, "NotFound"), "NotFound") } val getCountryNamePartial = getCountryName(getCountryCodeMapping(), getCountryNameMapping(), _: Int) sqlC.udf.register("getCountryNamePartial", getCountryNamePartial) val cdrStream = ssc.socketTextStream(hostname, port.toInt) .map(_.split("\\t", -1)) .foreachRDD(rdd => { val cdrs = seqToCdr(rdd).toDF() cdrs.registerTempTable("cdrs") sqlC.sql("SELECT getCountryNamePartial(countryCode) AS countryName, COUNT(countryCode) AS cCount FROM cdrs GROUP BY countryCode ORDER BY cCount DESC LIMIT 5").show() }) ssc.start() ssc.awaitTermination() } def seqToCdr(rdd: RDD[Array[String]]): RDD[Cdr] = { rdd.map(c => c.map(f => f match { case x if x.isEmpty() => "0" case x => x })).map(c => Cdr(c(0).toInt, c(1).toLong, c(2).toInt, c(3).toFloat, c(4).toFloat, c(5).toFloat, c(6).toFloat, c(7).toFloat)) } }
Example 113
Source File: L2-1FirstApp.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import scala.io.Source import org.apache.spark.SparkConf import org.apache.spark.SparkContext object TranslateApp { def main(args: Array[String]) { if (args.length != 4) { System.err.println( "Usage: TranslateApp <appname> <book_path> <output_path> <language>") System.exit(1) } val Seq(appName, bookPath, outputPath, lang) = args.toSeq val dict = getDictionary(lang) val conf = new SparkConf() .setAppName(appName) .setJars(SparkContext.jarOfClass(this.getClass).toSeq) val sc = new SparkContext(conf) val book = sc.textFile(bookPath) val translated = book.map(line => line.split("\\s+").map(word => dict.getOrElse(word, word)).mkString(" ")) translated.saveAsTextFile(outputPath) } def getDictionary(lang: String): Map[String, String] = { if (!Set("German", "French", "Italian", "Spanish").contains(lang)) { System.err.println( "Unsupported language: %s".format(lang)) System.exit(1) } val url = "http://www.june29.com/IDP/files/%s.txt".format(lang) println("Grabbing dictionary from: %s".format(url)) Source.fromURL(url, "ISO-8859-1").mkString .split("\\r?\\n") .filter(line => !line.startsWith("#")) .map(line => line.split("\\t")) .map(tkns => (tkns(0).trim, tkns(1).trim)).toMap } }
Example 114
Source File: L3-1DStreams.scala From prosparkstreaming with Apache License 2.0 | 5 votes |
package org.apress.prospark import scala.io.Source import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.streaming.Seconds import org.apache.spark.streaming.StreamingContext import org.apache.hadoop.io.LongWritable import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.lib.input.TextInputFormat import org.apache.hadoop.io.Text object StreamingTranslateApp { def main(args: Array[String]) { if (args.length != 4) { System.err.println( "Usage: StreamingTranslateApp <appname> <book_path> <output_path> <language>") System.exit(1) } val Seq(appName, bookPath, outputPath, lang) = args.toSeq val dict = getDictionary(lang) val conf = new SparkConf() .setAppName(appName) .setJars(SparkContext.jarOfClass(this.getClass).toSeq) val ssc = new StreamingContext(conf, Seconds(1)) val book = ssc.textFileStream(bookPath) val translated = book.map(line => line.split("\\s+").map(word => dict.getOrElse(word, word)).mkString(" ")) translated.saveAsTextFiles(outputPath) ssc.start() ssc.awaitTermination() } def getDictionary(lang: String): Map[String, String] = { if (!Set("German", "French", "Italian", "Spanish").contains(lang)) { System.err.println( "Unsupported language: %s".format(lang)) System.exit(1) } val url = "http://www.june29.com/IDP/files/%s.txt".format(lang) println("Grabbing dictionary from: %s".format(url)) Source.fromURL(url, "ISO-8859-1").mkString .split("\\r?\\n") .filter(line => !line.startsWith("#")) .map(line => line.split("\\t")) .map(tkns => (tkns(0).trim, tkns(1).trim)).toMap } }
Example 115
Source File: OrderedDocFreq.scala From gemini with GNU General Public License v3.0 | 5 votes |
package tech.sourced.gemini import java.io.{File, PrintWriter} import scala.io.Source import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper case class OrderedDocFreq(docs: Int, tokens: IndexedSeq[String], df: collection.Map[String, Int]) { def saveToJson(filename: String): Unit = { val mapper = new ObjectMapper() with ScalaObjectMapper mapper.registerModule(DefaultScalaModule) val out = new PrintWriter(filename) mapper.writeValue(out, Map( "docs" -> docs, "tokens" -> tokens, "df" -> df )) out.close() } } object OrderedDocFreq { def fromJson(file: File): OrderedDocFreq = { val docFreqMap = parseFile[Map[String, Any]](file) val docs = docFreqMap.get("docs") match { case Some(v) => v.asInstanceOf[Int] case None => throw new RuntimeException(s"Can not parse key 'docs' in docFreq:${file.getAbsolutePath}") } val df = docFreqMap.get("df") match { case Some(v) => v.asInstanceOf[Map[String, Int]] case None => throw new RuntimeException(s"Can not parse key 'df' in docFreq:${file.getAbsolutePath}") } val tokens = docFreqMap.get("tokens") match { case Some(v) => v.asInstanceOf[List[String]].toArray case None => throw new RuntimeException(s"Can not parse key 'tokens' in docFreq:${file.getAbsolutePath}") } OrderedDocFreq(docs, tokens, df) } def parseFile[T: Manifest](file: File): T = { val json = Source.fromFile(file) val mapper = new ObjectMapper with ScalaObjectMapper mapper.registerModule(DefaultScalaModule) mapper.readValue[T](json.reader) } }
Example 116
Source File: WeightedMinHashSpec.scala From gemini with GNU General Public License v3.0 | 5 votes |
package tech.sourced.gemini import org.scalatest.{FlatSpec, Matchers} import scala.io.Source import org.scalatest.Tag // Tag to set which tests depend on pyhton object PythonDep extends Tag("tech.sourced.tags.PythonDep") class WeightedMinHashSpec extends FlatSpec with Matchers { "WeightedMinHash constructor" should "initialize correctly" taggedAs(PythonDep) in { val mg = new WeightedMinHash(2, 4, 1) mg.rs.length should be(4) mg.lnCs.length should be(4) mg.betas.length should be(4) mg.sampleSize should be(4) } def readCSV(filename: String): Array[Array[Float]] = { Source .fromFile(s"src/test/resources/weighted-minhash/csv/${filename}") .getLines() .map(_.split(",").map(_.trim.toFloat)) .toArray } "WeightedMinHash hash" should "hash tiny data" taggedAs(PythonDep) in { val input = readCSV("tiny-data.csv") val rs = readCSV("tiny-rs.csv") val lnCs = readCSV("tiny-ln_cs.csv") val betas = readCSV("tiny-betas.csv") input.zipWithIndex.foreach { case (v, i) => val wmh = new WeightedMinHash(v.length, 128, rs, lnCs, betas) val hashes = wmh.hash(v) val realHashes = readCSV(s"tiny-hashes-${i}.csv").map(_.map(_.toLong)) hashes should be(realHashes) } } "WeightedMinHash hash" should "hash big data" taggedAs(PythonDep) in { val input = readCSV("big-data.csv") val rs = readCSV("big-rs.csv") val lnCs = readCSV("big-ln_cs.csv") val betas = readCSV("big-betas.csv") input.zipWithIndex.foreach { case (v, i) => val wmh = new WeightedMinHash(v.length, 128, rs, lnCs, betas) val hashes = wmh.hash(v) val realHashes = readCSV(s"big-hashes-${i}.csv").map(_.map(_.toLong)) hashes should be(realHashes) } } }
Example 117
Source File: EventHubsSourceOffsetSuite.scala From azure-event-hubs-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.eventhubs import java.io.File import org.apache.spark.sql.execution.streaming._ import org.apache.spark.sql.streaming.OffsetSuite import org.apache.spark.sql.test.SharedSQLContext class EventHubsSourceOffsetSuite extends OffsetSuite with SharedSQLContext { compare(one = EventHubsSourceOffset(("t", 0, 1L)), two = EventHubsSourceOffset(("t", 0, 2L))) compare(one = EventHubsSourceOffset(("t", 0, 1L), ("t", 1, 0L)), two = EventHubsSourceOffset(("t", 0, 2L), ("t", 1, 1L))) compare(one = EventHubsSourceOffset(("t", 0, 1L), ("T", 0, 0L)), two = EventHubsSourceOffset(("t", 0, 2L), ("T", 0, 1L))) compare(one = EventHubsSourceOffset(("t", 0, 1L)), two = EventHubsSourceOffset(("t", 0, 2L), ("t", 1, 1L))) val ehso1 = EventHubsSourceOffset(("t", 0, 1L)) val ehso2 = EventHubsSourceOffset(("t", 0, 2L), ("t", 1, 3L)) val ehso3 = EventHubsSourceOffset(("t", 0, 2L), ("t", 1, 3L), ("t", 1, 4L)) compare(EventHubsSourceOffset(SerializedOffset(ehso1.json)), EventHubsSourceOffset(SerializedOffset(ehso2.json))) test("basic serialization - deserialization") { assert( EventHubsSourceOffset.getPartitionSeqNos(ehso1) == EventHubsSourceOffset.getPartitionSeqNos(SerializedOffset(ehso1.json))) } test("OffsetSeqLog serialization - deserialization") { withTempDir { temp => // use non-existent directory to test whether log make the dir val dir = new File(temp, "dir") val metadataLog = new OffsetSeqLog(spark, dir.getAbsolutePath) val batch0 = OffsetSeq.fill(ehso1) val batch1 = OffsetSeq.fill(ehso2, ehso3) val batch0Serialized = OffsetSeq.fill(batch0.offsets.flatMap(_.map(o => SerializedOffset(o.json))): _*) val batch1Serialized = OffsetSeq.fill(batch1.offsets.flatMap(_.map(o => SerializedOffset(o.json))): _*) assert(metadataLog.add(0, batch0)) assert(metadataLog.getLatest() === Some(0 -> batch0Serialized)) assert(metadataLog.get(0) === Some(batch0Serialized)) assert(metadataLog.add(1, batch1)) assert(metadataLog.get(0) === Some(batch0Serialized)) assert(metadataLog.get(1) === Some(batch1Serialized)) assert(metadataLog.getLatest() === Some(1 -> batch1Serialized)) assert( metadataLog.get(None, Some(1)) === Array(0 -> batch0Serialized, 1 -> batch1Serialized)) // Adding the same batch does nothing metadataLog.add(1, OffsetSeq.fill(LongOffset(3))) assert(metadataLog.get(0) === Some(batch0Serialized)) assert(metadataLog.get(1) === Some(batch1Serialized)) assert(metadataLog.getLatest() === Some(1 -> batch1Serialized)) assert( metadataLog.get(None, Some(1)) === Array(0 -> batch0Serialized, 1 -> batch1Serialized)) } } test("read Spark 2.1.0 offset format") { val offset = readFromResource("eventhubs-source-offset-version-2.1.0.txt") assert( EventHubsSourceOffset(offset) === EventHubsSourceOffset(("ehName1", 0, 456L), ("ehName1", 1, 789L), ("ehName2", 0, 0L))) } private def readFromResource(file: String): SerializedOffset = { import scala.io.Source val input = getClass.getResource(s"/$file").toURI val str = Source.fromFile(input).mkString SerializedOffset(str) } }
Example 118
Source File: SensorReader.scala From temperature-machine with Apache License 2.0 | 5 votes |
package bad.robot.temperature.ds18b20 import bad.robot.temperature.AutoClosing._ import bad.robot.temperature._ import bad.robot.temperature.ds18b20.SensorFile._ import bad.robot.temperature.ds18b20.SensorReader._ import bad.robot.temperature.rrd.Host import bad.robot.temperature.rrd.Seconds.{apply => _, _} import scala.io.Source import scalaz.Scalaz._ import scalaz.\/ import scalaz.\/._ object SensorReader { def apply(host: Host, files: List[SensorFile]) = new SensorReader(host, files) private val toReading: SensorFile => Error \/ SensorReading = file => { for { source <- fromTryCatchNonFatal(Source.fromFile(file)).leftMap(FileError) data <- closingAfterUse(source)(_.getLines().toList).headOption.toRightDisjunction(UnexpectedError("Problem reading file, is it empty?")) temperature <- Parser.parse(data) } yield SensorReading(file.getParentFile.getName, temperature) } } class SensorReader(host: Host, sensors: List[SensorFile]) extends TemperatureReader { def read: Error \/ Measurement = { for { files <- sensors.toNel.toRightDisjunction(FailedToFindFile(BaseFolder)) temperatures <- files.map(toReading).sequenceU } yield Measurement(host, now(), temperatures.toList) } }
Example 119
Source File: DataGenerator.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.unsupervised.functionapprox import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import scala.collection.mutable import scala.io.Source import scala.util.Random def apply(sc: SparkContext): RDD[(Float, Float)] = { // See the random noise val r = new Random(System.currentTimeMillis + Random.nextLong) val src = Source.fromFile(sourceName) val input = src.getLines.map(_.split(DELIM)) ./:(mutable.ArrayBuffer[(Float, Float)]())((buf, xy) => { val x = addNoise(xy(0).trim.toFloat, r) val y = addNoise(xy(1).trim.toFloat, r) buf += ((x, y)) }) datasetSize = input.size val data_rdd = sc.makeRDD(input, nTasks) src.close data_rdd } // Original signal + random noise private def addNoise(value: Float, r: Random): Float = value*(1.0 + RATIO*(r.nextDouble - 0.5)).toFloat } // ------------------------------------- EOF ----------------------------------------------
Example 120
Source File: ResourcesLoader.scala From Scala-for-Machine-Learning-Second-Edition with MIT License | 5 votes |
package org.scalaml.spark import org.apache.spark.sql.DataFrame sealed trait Extractor { protected[this] val delimiter: String def extract(line: String): Array[String] } final class CSVExtractor extends Extractor { override protected[this] val delimiter: String = "," override def extract(line: String): Array[String] = line.split(delimiter) } private[spark] object ResourcesLoader { import scala.io.Source type FieldsSet = Iterator[Array[String]] final def loadFromLocal(filename: String, extractor: Extractor): FieldsSet = { val src = Source.fromFile(filename) val lines = src.getLines().map(extractor.extract(_)) src.close() lines } final def loadFromHDFS(pathname: String)(implicit sessionLifeCycle: SessionLifeCycle): DataFrame = { import sessionLifeCycle.sparkSession.implicits._ sessionLifeCycle.sparkContext.textFile(pathname).toDF } final def getPath(filename: String): Option[String] = Option(getClass.getResource(filename).getPath) }
Example 121
Source File: Application.scala From retail_analytics with Apache License 2.0 | 5 votes |
package controllers import scalaz._ import Scalaz._ import scalaz.EitherT._ import scalaz.Validation //import scalaz.Validation.FlatMap._ import scalaz.NonEmptyList._ import play.api.mvc._ import java.io.File import scala.io.Source import org.apache.log4j.Logger import org.apache.log4j.Level import models._ import models.stack._ import play.api.libs.json._ object Application extends Controller { def index() = Action { implicit request => Ok(views.html.index("Megam Analytics.")) } def upload = Action(parse.multipartFormData) { implicit request => request.body.file("picture").map { picture => import java.io.File val filename = picture.filename val contentType = picture.contentType picture.ref.moveTo(new File("/tmp/"+filename)) models.HDFSFileService.saveFile("/tmp/"+filename) match { case Success(succ) => { val fu = List(("success" -> succ)) Redirect("/").flashing(fu: _*) } case Failure(err) => { val fu = List(("error" -> "File doesn't get uploaded")) Redirect("/").flashing(fu: _*) } } }.getOrElse { val fu = List(("error" -> "File doesn't get uploaded..")) Redirect("/").flashing(fu: _*) } } def analysis() = Action { implicit request => val tuple_res = models.Retail.buyingbehaviour(MConfig.recommand_ID.toInt, MConfig.retailfile) println("BACK==========================>>>") println(tuple_res._1) //val finalJson = { // for { // product <- productList // } yield Json.parse(product).as[JsObject] // } Ok(views.html.finalProducts(tuple_res._1, tuple_res._2)) } }
Example 122
Source File: LibLinTest.scala From scala-cp with Apache License 2.0 | 5 votes |
package se.uu.it.cp import scala.io.Source import scala.util.Random import org.junit.runner.RunWith import org.scalatest.FunSuite import org.scalatest.junit.JUnitRunner import de.bwaldvogel.liblinear.Feature import de.bwaldvogel.liblinear.FeatureNode import de.bwaldvogel.liblinear.Linear import de.bwaldvogel.liblinear.Parameter import de.bwaldvogel.liblinear.Problem import de.bwaldvogel.liblinear.SolverType @RunWith(classOf[JUnitRunner]) class LibLinTest extends FunSuite { test("Train an inductive classifier with LIBLINEAR") { // Define a LIBLINEAR data point case class LibLinPoint(features: Array[Feature], label: Double) // Define a LIBLINEAR underlying algorithm class LibLinAlg(val properTrainingSet: Seq[LibLinPoint]) extends UnderlyingAlgorithm[LibLinPoint] { // First describe how to access LIBLINEAR data point structure override def makeDataPoint(features: Seq[Double], label: Double) = { val libLinFeat = features.zipWithIndex.map { case (f, i) => new FeatureNode(i + 1, f).asInstanceOf[Feature] } LibLinPoint(libLinFeat.toArray, label) } override def getDataPointFeatures(p: LibLinPoint) = p.features.map(_.getValue) override def getDataPointLabel(p: LibLinPoint) = p.label // Train a Logistic Regression model val lrModel = { val problem = new Problem() problem.l = properTrainingSet.length problem.n = properTrainingSet(0).features.length problem.x = properTrainingSet.map(_.features).toArray problem.y = properTrainingSet.map(_.label).toArray val solver = SolverType.L2R_LR val parameter = new Parameter(solver, 1.0, 0.01) Linear.train(problem, parameter) } // Define nonconformity measure as probability of wrong prediction override def nonConformityMeasure(p: LibLinPoint) = { val estimates = Array.fill(2)(0.0) Linear.predictProbability(lrModel, p.features, estimates) estimates((p.label - 1).abs.toInt) } } // Load and parse dataset val dataPath = getClass.getResource("breast-cancer.data").getPath val dataset = Source.fromFile(dataPath).getLines .map { line => val split = line.split(" ") val label = split(0).toDouble val features = split.drop(1).map { featureString => val split = featureString.split(":") new FeatureNode(split(0).toInt,split(1).toDouble) .asInstanceOf[Feature] } LibLinPoint(features,label) }.toSeq // Split data Random.setSeed(11L) val (training,test) = Random.shuffle(dataset).splitAt(450) val (properTraining, calibrationSet) = Random.shuffle(training).splitAt(400) // Train an inductive conformal classifier val cp = ICP.trainClassifier( new LibLinAlg(properTraining), nOfClasses = 2, calibrationSet) // Make some predictions, and compute error fraction val significance = 0.05 val nOfCorrect = test.count { p => val pSet = cp.predict(p.features.map(_.getValue), significance) !pSet.contains(p.label) } val errorFract = nOfCorrect.toDouble / test.length // Error fraction should be at most significance assert(errorFract <= significance) } }
Example 123
Source File: RawModel.scala From sparta with Apache License 2.0 | 5 votes |
package com.stratio.models.benchmark.generator.models import org.joda.time.DateTime import org.joda.time.format.DateTimeFormat import scala.annotation.tailrec import scala.io.Source import scala.util.Random case class RawModel (order_id: String, timestamp: String, client_id: Integer, latitude: Double, longitude: Double, payment_method: String, credit_card: String, shopping_center: String, employee: Integer) {} object RawModel { val Range_client_id = (1, 300) val Range_payment_method = Source.fromInputStream( this.getClass.getClassLoader.getResourceAsStream("payment-methods.txt")).getLines().toSeq val Range_shopping_center = Source.fromInputStream( this.getClass.getClassLoader.getResourceAsStream("shopping-centers.txt")).getLines().toSeq val Range_employee = (1, 300) val Range_quantity = (1, 30) val Range_timestap = (0, 60) val Range_creditCard = (0, 9) val R = Random val DigitsCreditCard = 16 val Range_family_product: Map[String, Map[String,Float]] = Source.fromInputStream( this.getClass.getClassLoader.getResourceAsStream("family-products.csv")).getLines().map(x => { val splitted = x.split(",") (splitted(0), Map(splitted(1) -> splitted(2).toFloat)) }).toMap def generateShoppingCenter(): String = { Range_shopping_center(generateRandomInt(0, Range_shopping_center.length - 1)) } def generatePaymentMethod(): String = { Range_payment_method(generateRandomInt(0, Range_payment_method.length - 1)) } def generateTimestamp(): String = { val datetime = new DateTime().minusDays(generateRandomInt(Range_timestap._1, Range_timestap._2)) DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss").print(datetime) } def generateRandomInt(min: Int, max: Int): Int = { R.nextInt((max -min) + 1) + min } @tailrec def generateCreditCard(current: String): String = { if(current.length != DigitsCreditCard) generateCreditCard(current + generateRandomInt(Range_creditCard._1, Range_creditCard._2)) else current } } trait RawModelCommonData { val geolocations = initGeolocations() val clientIdCreditCard: Map[Int, String] = initClientIdCreditCard((1 to RawModel.Range_client_id._2).toSeq, Map()) val clientIdGeo: Map[Int, (Double, Double)] = initClientIdGeo(clientIdCreditCard, geolocations) def initGeolocations() : Seq[String] = { Source.fromInputStream( this.getClass.getClassLoader.getResourceAsStream("geolocations.csv")).getLines().toSeq } def initClientIdCreditCard(idClients: Seq[Int], clientIdCreditCard: Map[Int, String]): Map[Int, String] = { if(idClients.size == 0) { clientIdCreditCard } else { val newIdClients = idClients.init val newClientIdCreditCard = clientIdCreditCard + (idClients.last -> RawModel.generateCreditCard("")) initClientIdCreditCard(newIdClients, newClientIdCreditCard) } } def initClientIdGeo(clientIdCreditCard: Map[Int, String], geolocations: Seq[String]) :Map[Int, (Double, Double)] = { clientIdCreditCard.map(x => { val index = RawModel.generateRandomInt(0, geolocations.size - 1) x._1 -> ((geolocations(index)).split(":")(0).toDouble, (geolocations(index)).split(":")(1).toDouble) }) } }
Example 124
Source File: InfoHelper.scala From sparta with Apache License 2.0 | 5 votes |
package com.stratio.sparta.serving.core.helpers import akka.event.slf4j.SLF4JLogging import com.github.nscala_time.time.Imports.{DateTime, DateTimeFormat} import com.stratio.sparta.serving.core.constants.AppConstant.version import com.stratio.sparta.serving.core.models.info.AppInfo import scala.io.Source import scala.util._ object InfoHelper extends SLF4JLogging { val devContact = "[email protected]" val supportContact = "[email protected]" val license = Try { Source.fromInputStream(InfoHelper.getClass.getClassLoader.getResourceAsStream("LICENSE.txt")).mkString }.getOrElse("") def getAppInfo: AppInfo = { Try(Source.fromInputStream(InfoHelper.getClass.getClassLoader.getResourceAsStream("version.txt")).getLines) match { case Success(lines) => val pomVersion = lines.next() val profileId = lines.next() val timestamp = lines.next() val pomParsed = if (pomVersion != "${project.version}") pomVersion else version val profileIdParsed = if (profileId != "${profile.id}") profileId else "" val timestampParsed = { if (timestamp != "${timestamp}") timestamp else { val format = DateTimeFormat.forPattern("yyyy-MM-dd-hh:mm:ss") format.print(DateTime.now) } } AppInfo(pomParsed, profileIdParsed, timestampParsed, devContact, supportContact, license) case Failure(e) => log.error("Cannot get version info", e) throw e } } }
Example 125
Source File: FileSystemInputTest.scala From sparta with Apache License 2.0 | 5 votes |
package com.stratio.sparta.plugin.input.fileSystem import java.io._ import com.stratio.sparta.plugin.TemporalSparkContext import org.scalatest._ import scala.io.Source class FileSystemInputTest extends TemporalSparkContext with Matchers { val directory = getClass().getResource("/origin.txt") val lines = Source.fromURL(directory).getLines().toList val parentFile = new File(directory.getPath).getParent val properties = Map(("directory", "file://" + parentFile)) val input = new FileSystemInput(properties) "Events counted" should " the same as files created" in { val dstream= input.initStream(ssc, "MEMORY_ONLY") val totalEvents = ssc.sparkContext.accumulator(0L) dstream.print() dstream.foreachRDD(rdd => { val count = rdd.count() println(s"EVENTS COUNT : \t $count") totalEvents.add(count) }) ssc.start() Thread.sleep(3000) val file = new File(parentFile + "/output.txt") val out = new PrintWriter(file) lines.foreach(l => out.write(l)) out.close() val numFile = if (file.exists()) 1 else 0 ssc.awaitTerminationOrTimeout(10000) assert(totalEvents.value === numFile.toLong) file.delete() } }
Example 126
Source File: LongInputTests.scala From boson with Apache License 2.0 | 5 votes |
package io.zink.boson import bsonLib.BsonObject import io.netty.util.ResourceLeakDetector import io.vertx.core.json.JsonObject import io.zink.boson.bson.bsonImpl.BosonImpl import org.junit.runner.RunWith import org.scalatest.FunSuite import org.scalatest.junit.JUnitRunner import org.junit.Assert._ import scala.collection.mutable.ArrayBuffer import scala.concurrent.Await import scala.concurrent.duration.Duration import scala.io.Source @RunWith(classOf[JUnitRunner]) class LongInputTests extends FunSuite { ResourceLeakDetector.setLevel(ResourceLeakDetector.Level.ADVANCED) val bufferedSource: Source = Source.fromURL(getClass.getResource("/jsonOutput.txt")) val finale: String = bufferedSource.getLines.toSeq.head bufferedSource.close val json: JsonObject = new JsonObject(finale) val bson: BsonObject = new BsonObject(json) test("extract top field") { val expression: String = ".Epoch" val boson: Boson = Boson.extractor(expression, (out: Int) => { assertTrue(3 == out) }) val res = boson.go(bson.encode.getBytes) Await.result(res, Duration.Inf) } test("extract bottom field") { val expression: String = "SSLNLastName" val expected: String = "de Huanuco" val boson: Boson = Boson.extractor(expression, (out: String) => { assertTrue(expected.zip(out).forall(e => e._1.equals(e._2))) }) val res = boson.go(bson.encode.getBytes) Await.result(res, Duration.Inf) } test("extract positions of an Array") { val expression: String = "Markets[3 to 5]" val mutableBuffer: ArrayBuffer[Array[Byte]] = ArrayBuffer() val boson: Boson = Boson.extractor(expression, (out: Array[Byte]) => { mutableBuffer += out }) val res = boson.go(bson.encode.getBytes) Await.result(res, Duration.Inf) assertEquals(3, mutableBuffer.size) } test("extract further positions of an Array") { val expression: String = "Markets[50 to 55]" val mutableBuffer: ArrayBuffer[Array[Byte]] = ArrayBuffer() val boson: Boson = Boson.extractor(expression, (out: Array[Byte]) => { mutableBuffer += out }) val res = boson.go(bson.encode.getBytes) Await.result(res, Duration.Inf) assertEquals(6, mutableBuffer.size) } test("size of all occurrences of Key") { val expression: String = "Price" val mutableBuffer: ArrayBuffer[Float] = ArrayBuffer() val boson: Boson = Boson.extractor(expression, (out: Float) => { mutableBuffer += out }) val res = boson.go(bson.encode.getBytes) Await.result(res, Duration.Inf) assertEquals(195, mutableBuffer.size) } }
Example 127
Source File: JsonSource.scala From play-json-schema-validator with Apache License 2.0 | 5 votes |
package com.eclipsesource.schema import java.io.InputStream import java.net.URL import play.api.libs.json._ import scala.io.Source import scala.util.{Failure, Success, Try} def schemaFromUrl(url: URL)(implicit reads: Reads[SchemaType]): JsResult[SchemaType] = { for { schemaJson <- JsonSource.fromUrl(url) match { case Success(json) => JsSuccess(json) case Failure(throwable) => JsError(throwable.getMessage) } schema <- Json.fromJson[SchemaType](schemaJson) } yield schema } }
Example 128
Source File: EmrConfig.scala From sbt-lighter with Apache License 2.0 | 5 votes |
package sbtlighter import scala.collection.JavaConverters._ import scala.io.Source import com.amazonaws.services.elasticmapreduce.model.Configuration import com.amazonaws.services.s3.AmazonS3 import io.circe.generic.auto._ import io.circe.parser._ case class EmrConfig( Classification: String, Properties: Option[Map[String, String]], Configurations: Option[Seq[EmrConfig]] ) { def withProperties(props: (String, String)*) = { this.copy(Properties = Some(props.toMap)) } def withEmrConfigs(configs: EmrConfig*) = { this.copy(Configurations = Some(configs)) } def toAwsEmrConfig(): Configuration = { Some(new Configuration().withClassification(Classification)) .map { c => Properties.map(props => c.withProperties(props.asJava)).getOrElse(c) } .map { c => Configurations .map { configs => c.withConfigurations(configs.map(_.toAwsEmrConfig): _*) } .getOrElse(c) } .get } } object EmrConfig { def apply(classification: String): EmrConfig = EmrConfig(classification, None, None) def parseJson(jsonString: String) = decode[List[EmrConfig]](jsonString) def parseJsonFromS3(s3Url: String)(implicit s3: AmazonS3) = { val s3JsonUrl = new S3Url(s3Url) val jsonString = s3.getObjectAsString(s3JsonUrl.bucket, s3JsonUrl.key) parseJson(jsonString) } }
Example 129
Source File: TestApp.scala From akka-http-file-server with Apache License 2.0 | 5 votes |
package akkahttp import java.io.File import akka.actor.ActorSystem import com.typesafe.config.{Config, ConfigFactory} import scala.concurrent.Await import scala.io.Source object TestApp extends App { val testConf: Config = ConfigFactory.load() implicit val system = ActorSystem("ServerTest", testConf) implicit val dispatcher = system.dispatcher val host = "127.0.0.1" val port = 9112 val server = new FileServer(system, host, 9112) //start file server val binding = server.start val client = new FileServer.Client(system, host, port) // upload the file val testFile = new File(getClass.getResource("/testfile.txt").toURI()) val fileHandler = client.upload(testFile) //download the file val target = File.createTempFile("testapp_download", "") val future = fileHandler.flatMap{handler => client.download(handler, target) } import scala.concurrent.duration._ Await.result(future, 10 seconds) // check the file content. Source.fromFile(testFile).foreach{ print } println() // now you can try to browser http://127.0.0.1:9112/ println(s"Browser http://${host}:${port} to test download and upload") system.awaitTermination() }
Example 130
Source File: DefaultRouteTest.scala From scala-for-beginners with Apache License 2.0 | 5 votes |
package com.allaboutscala.donutstore.httpserver.routes import akka.http.scaladsl.testkit.ScalatestRouteTest import org.scalatest.{Matchers, WordSpec} import scala.io.Source class DefaultRouteTest extends WordSpec with Matchers with ScalatestRouteTest with TestBase { val defaultRoutes = new DefaultRoute().routes() "DonutStore" can { "have a welcome page at the root end point" in { Get("/") ~> defaultRoutes ~> check { responseAs[String] shouldEqual welcomePage() } } } private def welcomePage(): String = { val path = sys.env.getOrElse("WELCOME_PAGE_PATH", "httpServer/src/main/resources/welcome.html") Source.fromFile(path).mkString } }
Example 131
Source File: WordList.scala From scala-stellar-sdk with Apache License 2.0 | 5 votes |
package stellar.sdk.key import scala.io.Source trait WordList { def indexOf(word: String): Option[Int] def wordAt(i: Int): String def contains(word: String): Boolean = indexOf(word).isDefined def separator: String } class ArrayBackedWordList(source: => Source, val separator: String = " ") extends WordList { lazy val words: Array[String] = source.getLines().toArray // TODO (jem) - WordList spec that ensures index can be found with normalized variants. override def indexOf(word: String): Option[Int] = Some(words.indexOf(word)).filter(_ >= 0) override def wordAt(i: Int): String = { require(i >= 0 && i < words.length, s"Word index $i is out of range.") words(i) } } object ChineseSimplifiedWords extends ArrayBackedWordList(Source.fromResource("wordlists/chinese_simplified.txt")) object ChineseTraditionalWords extends ArrayBackedWordList(Source.fromResource("wordlists/chinese_traditional.txt")) object CzechWords extends ArrayBackedWordList(Source.fromResource("wordlists/czech.txt")) object EnglishWords extends ArrayBackedWordList(Source.fromResource("wordlists/english.txt")) object FrenchWords extends ArrayBackedWordList(Source.fromResource("wordlists/french.txt")) object ItalianWords extends ArrayBackedWordList(Source.fromResource("wordlists/italian.txt")) object JapaneseWords extends ArrayBackedWordList(Source.fromResource("wordlists/japanese.txt"), "\u3000") object KoreanWords extends ArrayBackedWordList(Source.fromResource("wordlists/korean.txt")) object SpanishWords extends ArrayBackedWordList(Source.fromResource("wordlists/spanish.txt"))
Example 132
Source File: RegexSentenceAnnotator.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.pipeline import java.util.Properties import scala.io.Source import scala.xml.{Node, Elem, Text, Atom} import jigg.util.XMLUtil.RichNode class RegexSentenceAnnotator(override val name: String, override val props: Properties) extends Annotator { @Prop(gloss = "Regular expression to segment lines (if omitted, specified method is used)") var pattern = "" @Prop(gloss = "Use predefined segment pattern newLine|point|pointAndNewLine") var method = "pointAndNewLine" readProps() val splitRegex = pattern match { case "" => method match { case "newLine" => RegexSentenceAnnotator.newLine case "point" => RegexSentenceAnnotator.point case "pointAndNewLine" => RegexSentenceAnnotator.pointAndNewLine case other => argumentError("method") } case pattern => pattern.r } private[this] val sentenceIDGen = jigg.util.IDGenerator("s") override def annotate(annotation: Node): Node = { annotation.replaceAll("document") { e => val line = e.text val sentenceBoundaries = 0 +: splitRegex.findAllMatchIn(line).map(_.end).toVector :+ line.length val sentences: Vector[Node] = sentenceBoundaries.sliding(2).toVector flatMap { case Seq(begin_, end_) => def isSpace(c: Char) = c == ' ' || c == '\t' || c == '\n' val snippet = line.substring(begin_, end_) val begin = snippet.indexWhere(!isSpace(_)) match { case -1 => begin_ // space only case offset => begin_ + offset } val end = snippet.lastIndexWhere(!isSpace(_)) match { case -1 => begin_ case offset => begin_ + offset + 1 } // val sentence: String = line.substring(begin, end).trim() val sentence: String = line.substring(begin, end) if (sentence.isEmpty) None else { Option(<sentence id={ sentenceIDGen.next } characterOffsetBegin={ begin+"" } characterOffsetEnd={ end+"" }>{ sentence }</sentence>) } } // val textRemoved = XMLUtil.removeText(e) // XMLUtil.addChild(textRemoved, <sentences>{ sentences }</sentences>) e addChild <sentences>{ sentences }</sentences> } } override def requires = Set() override def requirementsSatisfied = Set(Requirement.Ssplit) } object RegexSentenceAnnotator extends AnnotatorCompanion[RegexSentenceAnnotator] { val newLine = """\n+""".r val point = """。+""".r val pointAndNewLine = """\n+|。\n*""".r }
Example 133
Source File: MecabReader.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg.lexicon class MecabReader(dict:Dictionary) { def toPoSTaggedSentence(lines:Seq[String]) = { val terminalSeq = lines.map { line => val splitted = line.split('\t') val word = dict.getWordOrCreate(splitted(0)) val base = dict.getWordOrCreate(splitted(2)) val conjStr = if (splitted.size > 6) splitted(5) else "_" val posStr = splitted(3) + "/" + conjStr val pos = dict.getPoSOrCreate(posStr) (word, base, pos) } new PoSTaggedSentence( terminalSeq.map(_._1), terminalSeq.map(_._2), terminalSeq.map(_._3)) } def readSentences(in:Source, n:Int): Array[PoSTaggedSentence] = { val sentences = new ArrayBuffer[PoSTaggedSentence] val sentenceLines = new ArrayBuffer[String] takeLines(in, n).foreach { _ match { case "EOS" => sentences += toPoSTaggedSentence(sentenceLines) sentenceLines.clear case line => sentenceLines += line }} sentences.toArray } def readSentences(path:String, n:Int): Array[PoSTaggedSentence] = readSentences(Source.fromFile(path), n) def takeLines(in:Source, n:Int): Iterator[String] = for (line <- in.getLines.filter(_!="") match { case lines if (n == -1) => lines case lines => lines.take(n) }) yield line }
Example 134
Source File: CategoryFeature.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg.lexicon trait CategoryFeature { def kvs: Seq[(String, String)] def unify(lhs: CategoryFeature): Boolean = false // TODO: implement } @SerialVersionUID(-8236395926230742650L) case class JPCategoryFeature(values: Seq[String]) extends CategoryFeature { import JPCategoryFeature._ override def kvs = keys zip values override def toString = kvs.filter(_._2 != "").map { case (k, v) => k + "=" + v }.mkString(",") } object JPCategoryFeature { // This is a hard-coded mapping of feature structure of Japanese category. private val k2vals = Map( "mod" -> Array("adv", "adn", "nm"), "form" -> Array("attr", "base", "cont", "hyp", "imp", "beg", "stem", "ta", "te", "pre", "r", "neg", "s", "da"), "case" -> Array("ga", "o", "ni", "to", "nc", "caus"), "fin" -> Array("f", "t")) private val keys = k2vals.keys.toSeq private val v2keyIdx = { val key2idx = keys.zipWithIndex.toMap k2vals.flatMap { case (key, vals) => vals.map { v => v -> key2idx(key) } } } val kvpair = """\w+=(\w+)""".r def createFromValues(values: Seq[String]) = values match { case Seq() => emptyFeature case _ => val sortedValues = Array.fill(keys.size)("") values.filter(_!="").foreach { value => val v = value match { case kvpair(v) => v; case v => v } if (v(0) != 'X') v2keyIdx(v) match { case i => sortedValues(i) = v } } JPCategoryFeature(sortedValues) } // We cache this because most categories don't have a feature private val emptyFeature = JPCategoryFeature(Array.fill(keys.size)("")) } case class EnCategoryFeature(values: Seq[String]) extends CategoryFeature { override def kvs = values.zipWithIndex.map { case (v, k) => (k.toString, v) } override def toString = values.mkString(",") } object EnCategoryFeature { def createFromValues(values: Seq[String]) = EnCategoryFeature(values.sortWith(_ < _)) }
Example 135
Source File: CabochaReader.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg.lexicon import scala.io.Source class CabochaReader[S<:TaggedSentence](ccgSentences: Seq[S]) { def readSentences(path: String): Seq[ParsedBunsetsuSentence] = { val bunsetsuStart = """\* (\d+) (-?\d+)[A-Z].*""".r def addBunsetsuTo(curSent: List[(String, Int)], curBunsetsu: List[String]) = curBunsetsu.reverse match { case Nil => curSent case headIdx :: tail => (tail.mkString(""), headIdx.toInt) :: curSent } val bunsetsuSegedSentences: List[List[(String, Int)]] = scala.io.Source.fromFile(path).getLines.filter(_ != "").foldLeft( (List[List[(String, Int)]](), List[(String, Int)](), List[String]())) { case ((processed, curSent, curBunsetsu), line) => line match { case bunsetsuStart(_, nextHeadIdx) => (processed, addBunsetsuTo(curSent, curBunsetsu), nextHeadIdx :: Nil) // use first elem as the head idx case "EOS" => (addBunsetsuTo(curSent, curBunsetsu).reverse :: processed, Nil, Nil) case word => (processed, curSent, word.split("\t")(0) :: curBunsetsu) } }._1.reverse ccgSentences.zip(bunsetsuSegedSentences).map { case (ccgSentence, bunsetsuSentence) => val bunsetsuSegCharIdxs: List[Int] = bunsetsuSentence.map { _._1.size }.scanLeft(0)(_+_).tail // 5 10 ... val ccgWordSegCharIdxs: List[Int] = ccgSentence.wordSeq.toList.map { _.v.size }.scanLeft(0)(_+_).tail // 2 5 7 10 ... assert(bunsetsuSegCharIdxs.last == ccgWordSegCharIdxs.last) val bunsetsuSegWordIdxs: List[Int] = ccgWordSegCharIdxs.zipWithIndex.foldLeft((List[Int](), 0)) { // 1 3 ... case ((segWordIdxs, curBunsetsuIdx), (wordIdx, i)) => if (wordIdx >= bunsetsuSegCharIdxs(curBunsetsuIdx)) (i :: segWordIdxs, curBunsetsuIdx + 1) else (segWordIdxs, curBunsetsuIdx) // wait until wordIdx exceeds the next bunsetsu segment }._1.reverse val bunsetsuSeq = bunsetsuSegWordIdxs.zip(-1 :: bunsetsuSegWordIdxs).map { case (bunsetsuIdx, prevIdx) => val offset = prevIdx + 1 Bunsetsu(offset, ccgSentence.wordSeq.slice(offset, bunsetsuIdx + 1), ccgSentence.posSeq.slice(offset, bunsetsuIdx + 1)) } ParsedBunsetsuSentence(bunsetsuSeq, bunsetsuSentence.map { _._2 }) } } }
Example 136
Source File: CommandLineMain.scala From Muse-CGH with MIT License | 5 votes |
package ui.command_line import ui.user.UIMain import main.{DoubleFieldInfo, ParamsCore} import scopt.OptionParser import utilities.{ParallelOp, ImageSaver, ProjectParameters} import scala.io.Source object CommandLineMain { def main(args: Array[String]) { if (args.isEmpty) UIMain.main(args) // enter interactive GUI mode else { val core = new ParamsCore() var imgName = "muse_result.png" // default output name val parser = new OptionParser[Unit]("muse") { head("muse", ProjectParameters.versionNumber.toString) arg[String]("<input file>") foreach { ip => try { core.textToRender.set(Source.fromFile(ip).mkString) // read input from file } catch { case e: Throwable => println(s"failed to read input from file.\n$e}") System.exit(-1) } } text "the input file to read." opt[String]('o',"out") foreach { n => imgName = n } validate { n => if (n.isEmpty) failure("Option --out must not be empty") else success } text "the out image name (if no extension specified, use .png)" // Other settable parameters (core.layoutRow ++ core.edgeRow ++ core.fontRow ++ core.wordRow ++ core.randomRow).foreach { case DoubleFieldInfo(settable, name, constraint, description) => val abbr = toAbbreviateString(name) val requirements = s"$name --$abbr" + constraint.requirementString opt[Double](abbr) foreach { settable.set } validate { d => if (constraint.f(d)) success else failure(requirements) } text s"$name: $description, ${constraint.requirementString} (default: ${settable.get})" } } if (parser.parse(args)) { println("arguments parsed") renderToImage(core, imgName) } else { println("bad arguments") System.exit(-1) } } } def toAbbreviateString(fullName: String): String = { val words = fullName.split(' ') words.map(w => w.head.toString.capitalize + w.tail).mkString } def renderToImage(core: ParamsCore,imgName: String): Unit = { val paintable = core.getPaintableResult(println) println("start to paint text...") paintable.drawToBufferInParallel(8) // paintable.drawToBuffer() // (0 until 20).foreach { _ => // paintable.drawToBufferInParallel(8) // } // (0 until 20).foreach { _ => // paintable.drawToBuffer() // } println("painting finished.") println("saving results...") ImageSaver.saveImage(paintable.buffer, imgName).foreach{ actualPath => println(s"results saved to $actualPath") } } }
Example 137
Source File: DistServiceExecutor.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.experiments.distributeservice import java.io.{File, FileWriter} import java.net.InetAddress import scala.collection.JavaConverters._ import scala.io.Source import scala.sys.process._ import scala.util.{Failure, Success, Try} import akka.actor.Actor import org.apache.commons.io.FileUtils import org.apache.commons.lang.text.StrSubstitutor import org.slf4j.Logger import org.apache.gearpump.cluster.{ExecutorContext, UserConfig} import org.apache.gearpump.experiments.distributeservice.DistServiceAppMaster.InstallService import org.apache.gearpump.util.{ActorUtil, LogUtil} class DistServiceExecutor(executorContext: ExecutorContext, userConf: UserConfig) extends Actor { import executorContext._ private val LOG: Logger = LogUtil.getLogger(getClass, executor = executorId, app = appId) override def receive: Receive = { case InstallService(url, zipFileName, targetPath, scriptData, serviceName, serviceSettings) => LOG.info(s"Executor $executorId receive command to install " + s"service $serviceName to $targetPath") unzipFile(url, zipFileName, targetPath) installService(scriptData, serviceName, serviceSettings) } private def unzipFile(url: String, zipFileName: String, targetPath: String) = { val zipFile = File.createTempFile(System.currentTimeMillis().toString, zipFileName) val dir = new File(targetPath) if (dir.exists()) { FileUtils.forceDelete(dir) } val bytes = FileServer.newClient.get(url).get FileUtils.writeByteArrayToFile(zipFile, bytes) val result = Try(s"unzip ${zipFile.getAbsolutePath} -d $targetPath".!!) result match { case Success(msg) => LOG.info(s"Executor $executorId unzip file to $targetPath") case Failure(ex) => throw ex } } private def installService( scriptData: Array[Byte], serviceName: String, serviceSettings: Map[String, Any]) = { val tempFile = File.createTempFile("gearpump", serviceName) FileUtils.writeByteArrayToFile(tempFile, scriptData) val script = new File("/etc/init.d", serviceName) writeFileWithEnvVariables(tempFile, script, serviceSettings ++ getEnvSettings) val result = Try(s"chkconfig --add $serviceName".!!) result match { case Success(msg) => LOG.info(s"Executor install service $serviceName successfully!") case Failure(ex) => throw ex } } private def getEnvSettings: Map[String, Any] = { Map("workerId" -> worker, "localhost" -> ActorUtil.getSystemAddress(context.system).host.get, "hostname" -> InetAddress.getLocalHost.getHostName) } private def writeFileWithEnvVariables(source: File, target: File, envs: Map[String, Any]) = { val writer = new FileWriter(target) val sub = new StrSubstitutor(envs.asJava) sub.setEnableSubstitutionInVariables(true) Source.fromFile(source).getLines().foreach(line => writer.write(sub.replace(line) + "\r\n")) writer.close() } }
Example 138
Source File: DownloadSupportSpec.scala From incubator-toree with Apache License 2.0 | 5 votes |
package org.apache.toree.utils import java.io.FileNotFoundException import java.net.URL import org.scalatest.{BeforeAndAfter, Matchers, FunSpec} import scala.io.Source import scala.tools.nsc.io.File class DownloadSupportSpec extends FunSpec with Matchers with BeforeAndAfter { val downloadDestinationUrl = new URL("file:///tmp/testfile2.ext") val testFileContent = "This is a test" val testFileName = "/tmp/testfile.txt" // Create a test file for downloading before { File(testFileName).writeAll(testFileContent) } // Cleanup what we made after { if (File(testFileName).exists) File(testFileName).delete() if (File(downloadDestinationUrl.getPath).exists) File(downloadDestinationUrl.getPath).delete() } describe("DownloadSupport"){ describe("#downloadFile( String, String )"){ it("should download a file to the download directory"){ val testFileUrl = "file:///tmp/testfile.txt" // Create our utility and download the file val downloader = new Object with DownloadSupport downloader.downloadFile( testFileUrl, downloadDestinationUrl.getProtocol + "://" + downloadDestinationUrl.getPath) // Verify the file contents are what was in the original file val downloadedFileContent: String = Source.fromFile(downloadDestinationUrl.getPath).mkString downloadedFileContent should be (testFileContent) } } describe("#downloadFile( URL, URL )"){ it("should download a file to the download directory"){ val testFileUrl = new URL("file:///tmp/testfile.txt") val downloader = new Object with DownloadSupport downloader.downloadFile(testFileUrl, downloadDestinationUrl) // Verify the file contents are what was in the original file val downloadedFileContent: String = Source.fromFile(downloadDestinationUrl.getPath).mkString downloadedFileContent should be (testFileContent) } it("should throw FileNotFoundException if the download URL is bad"){ val badFilename = "file:///tmp/testbadfile.txt" if (File(badFilename).exists) File(badFilename).delete() val badFileUrl = new URL(badFilename) val downloader = new Object with DownloadSupport intercept[FileNotFoundException] { downloader.downloadFile(badFileUrl, downloadDestinationUrl) } } it("should throw FileNotFoundException if the download ") { val testFileUrl = new URL("file:///tmp/testfile.txt") val badDestinationUrl = new URL("file:///tmp/badloc/that/doesnt/exist.txt") val downloader = new Object with DownloadSupport intercept[FileNotFoundException] { downloader.downloadFile(testFileUrl, badDestinationUrl) } } } } }
Example 139
Source File: SparkTestEnv.scala From spark-json-schema with MIT License | 5 votes |
package org.zalando.spark.jsonschema import org.apache.spark.sql.SparkSession import scala.io.Source object SparkTestEnv { lazy val sparkSession: SparkSession = { System.clearProperty("spark.driver.port") System.clearProperty("spark.hostPort") SparkSession.builder() .master("local") .appName("testapp") .config("spark.ui.enabled", value = false) .getOrCreate() } def getTestResourceContent(relativePath: String): String = { Option(getClass.getResource(relativePath)) match { case Some(relPath) => Source.fromURL (relPath).mkString case None => throw new IllegalArgumentException(s"Path can not be reached: $relativePath") } } }
Example 140
Source File: BlockFilterTest.scala From bitcoin-s with MIT License | 5 votes |
package org.bitcoins.core.gcs import org.bitcoins.core.protocol.blockchain.Block import org.bitcoins.core.protocol.script.ScriptPubKey import org.bitcoins.crypto.DoubleSha256DigestBE import org.bitcoins.testkit.util.BitcoinSUnitTest import play.api.libs.json.{JsArray, Json} import scala.io.Source class BlockFilterTest extends BitcoinSUnitTest { behavior of "BlockFilter" // https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki#appendix-c-test-vectors case class Bip158TestCase( blockHeight: Int, blockHash: DoubleSha256DigestBE, block: Block, prevOutputScripts: Vector[ScriptPubKey], prevHeader: DoubleSha256DigestBE, filter: GolombFilter, header: DoubleSha256DigestBE, notes: String ) { val clue: String = s"Test Notes: $notes" def runTest(): org.scalatest.Assertion = { val constructedFilter = BlockFilter(block, prevOutputScripts) assert(constructedFilter.encodedData.bytes == filter.encodedData.bytes, clue) val matcher = new BinarySearchFilterMatcher(filter) val constructedMatcher = new BinarySearchFilterMatcher(constructedFilter) assert(constructedMatcher.decodedHashes == matcher.decodedHashes, clue) val constructedHeader = constructedFilter.getHeader(prevHeader.flip) assert(constructedHeader.hash == header.flip, clue) } } object Bip158TestCase { //["Block Height,Block Hash,Block,[Prev Output Scripts for Block],Previous Basic Header,Basic Filter,Basic Header,Notes"] def fromJsArray(array: JsArray): Bip158TestCase = { val parseResult = for { height <- array(0).validate[Int] blockHash <- array(1).validate[String].map(DoubleSha256DigestBE.fromHex) block <- array(2).validate[String].map(Block.fromHex) scriptArray <- array(3).validate[JsArray] scripts = parseScripts(scriptArray) prevHeader <- array(4) .validate[String] .map(DoubleSha256DigestBE.fromHex) filter <- array(5) .validate[String] .map(BlockFilter.fromHex(_, blockHash.flip)) header <- array(6).validate[String].map(DoubleSha256DigestBE.fromHex) notes <- array(7).validate[String] } yield Bip158TestCase(height, blockHash, block, scripts, prevHeader, filter, header, notes) parseResult.get } private def parseScripts(array: JsArray): Vector[ScriptPubKey] = { val hexScripts = array.validate[Vector[String]].get hexScripts.map(ScriptPubKey.fromAsmHex) } } it must "pass bip 158 test vectors" in { val source = Source.fromURL(getClass.getResource("/testnet-19.json")) val vec: Vector[JsArray] = Json.parse(source.mkString).validate[Vector[JsArray]].get.tail val testCases = vec.map(Bip158TestCase.fromJsArray) testCases.foreach(_.runTest()) } }
Example 141
Source File: BlockBench.scala From bitcoin-s with MIT License | 5 votes |
package org.bitcoins.bench.core import org.bitcoins.core.protocol.blockchain.Block import org.slf4j.LoggerFactory import scala.io.Source object BlockBench extends App { private def logger = LoggerFactory.getLogger(this.getClass) private def timeBlockParsing[R](block: () => R): Long = { val t0 = System.currentTimeMillis() val _ = block() // call-by-name val t1 = System.currentTimeMillis() val time = t1 - t0 logger.info("Elapsed time: " + time + "ms") time } def bench1(): Unit = { val fileName = "/00000000000000000008513c860373da0484f065983aeb063ebf81c172e81d48.txt" val lines = Source.fromURL(getClass.getResource(fileName)).mkString val time = timeBlockParsing(() => Block.fromHex(lines)) require(time <= 15000) } def bench2(): Unit = { val fileName = "/000000000000000000050f70113ab1932c195442cb49bcc4ee4d7f426c8a3295.txt" val lines = Source.fromURL(getClass.getResource(fileName)).mkString val time = timeBlockParsing(() => Block.fromHex(lines)) require(time <= 15000) } 0.until(10).foreach(_ => bench1()) //bench2() }
Example 142
Source File: PullRequestOutTest.scala From scala-steward with Apache License 2.0 | 5 votes |
package org.scalasteward.core.vcs.data import io.circe.parser import org.http4s.syntax.literals._ import org.scalasteward.core.vcs.data.PullRequestState.Open import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import scala.io.Source class PullRequestOutTest extends AnyFunSuite with Matchers { test("decode") { val expected = List( PullRequestOut( uri"https://github.com/octocat/Hello-World/pull/1347", Open, "new-feature" ) ) val input = Source.fromResource("list-pull-requests.json").mkString parser.decode[List[PullRequestOut]](input) shouldBe Right(expected) } }
Example 143
Source File: BranchOutTest.scala From scala-steward with Apache License 2.0 | 5 votes |
package org.scalasteward.core.vcs.data import io.circe.parser import org.scalasteward.core.git.Sha1.HexString import org.scalasteward.core.git.{Branch, Sha1} import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import scala.io.Source class BranchOutTest extends AnyFunSuite with Matchers { test("decode") { val input = Source.fromResource("get-branch.json").mkString parser.decode[BranchOut](input) shouldBe Right( BranchOut( Branch("master"), CommitOut(Sha1(HexString("7fd1a60b01f91b314f59955a4e4d4e80d8edf11d"))) ) ) } }
Example 144
Source File: RepoOutTest.scala From scala-steward with Apache License 2.0 | 5 votes |
package org.scalasteward.core.vcs.data import cats.effect.IO import io.circe.parser import org.http4s.syntax.literals._ import org.scalasteward.core.git.Branch import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import scala.io.Source class RepoOutTest extends AnyFunSuite with Matchers { val parent = RepoOut( "base.g8", UserOut("ChristopherDavenport"), None, uri"https://github.com/ChristopherDavenport/base.g8.git", Branch("master") ) val fork = RepoOut( "base.g8-1", UserOut("scala-steward"), Some(parent), uri"https://github.com/scala-steward/base.g8-1.git", Branch("master") ) test("decode") { val input = Source.fromResource("create-fork.json").mkString parser.decode[RepoOut](input) shouldBe Right(fork) } test("parentOrRaise") { fork.parentOrRaise[IO].unsafeRunSync() shouldBe parent } test("repo") { fork.repo shouldBe Repo("scala-steward", "base.g8-1") } }
Example 145
Source File: TestUtils.scala From scavro with Apache License 2.0 | 5 votes |
package org.oedura.scavro.plugin import java.io.{FileOutputStream, InputStream} import sbt._ import scala.io.Source import scala.util.Random class TestUtils(workingDir: File) { (workingDir / "in").mkdir (workingDir / "out").mkdir def tmpDir = workingDir def tmpPath = workingDir.getAbsolutePath private def extractResource(resourceName: String): File = { val is: InputStream = getClass.getResourceAsStream(s"/$resourceName") val text = Source.fromInputStream(is).mkString val os: FileOutputStream = new FileOutputStream(workingDir / "in" / resourceName) os.write(text.getBytes) os.close() is.close() workingDir / "in" / resourceName } lazy val schemaFile: File = extractResource("Number.avsc") lazy val protocolFile: File = { schemaFile extractResource("NumberSystem.avdl") } def cleanup() = { def getRecursively(f: File): Seq[File] = f.listFiles.filter(_.isDirectory).flatMap(getRecursively) ++ f.listFiles getRecursively(workingDir).foreach { f => if (!f.delete()) throw new RuntimeException("Failed to delete " + f.getAbsolutePath) } tmpDir.delete() } } object TestUtils { private val alphabet = ('a' to 'z') ++ ('A' to 'Z') ++ ('0' to '9') def randomFile(dir: File, prefix: String = "", suffix: String = "", maxTries: Int = 100, nameSize: Int = 10): File = { def randomFileImpl(triesLeft: Int): String = { val testName: String = (1 to nameSize).map(_ => alphabet(Random.nextInt(alphabet.size))).mkString if (!(dir / (prefix + testName + suffix)).exists) prefix + testName + suffix else if (triesLeft < 0) throw new Exception("Unable to find empty random file path.") else randomFileImpl(triesLeft - 1) } dir / randomFileImpl(maxTries) } def randomFileName(prefix: String, suffix: String = "", maxTries: Int = 100, nameSize: Int = 10): String = { def randomFileNameImpl(triesLeft: Int): String = { val testName: String = (1 to nameSize).map(_ => alphabet(Random.nextInt(alphabet.size))).mkString if (!file(prefix + testName + suffix).exists) prefix + testName + suffix else if (triesLeft < 0) throw new Exception("Unable to find empty random file path.") else randomFileNameImpl(triesLeft - 1) } randomFileNameImpl(maxTries) } def apply(workingDir: File) = { if (workingDir.exists && workingDir.isDirectory) new TestUtils(workingDir) else if (!workingDir.exists) { val success = workingDir.mkdirs if (success) new TestUtils(workingDir) else throw new Exception("Cannot initialize working directory") } else throw new Exception("Requested directory is occupied by ordinary file") } }
Example 146
Source File: ProxyFeedback.scala From oni-ml with Apache License 2.0 | 5 votes |
package org.opennetworkinsight.proxy import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.sql._ import org.apache.spark.sql.types.{StructType, StructField, StringType} import scala.io.Source import org.opennetworkinsight.proxy.ProxySchema._ object ProxyFeedback { def loadFeedbackDF(sc: SparkContext, sqlContext: SQLContext, feedbackFile: String, duplicationFactor: Int): DataFrame = { val feedbackSchema = StructType( List(StructField(Date, StringType, nullable= true), StructField(Time, StringType, nullable= true), StructField(ClientIP, StringType, nullable= true), StructField(Host, StringType, nullable= true), StructField(ReqMethod, StringType, nullable= true), StructField(UserAgent, StringType, nullable= true), StructField(ResponseContentType, StringType, nullable= true), StructField(RespCode, StringType, nullable= true), StructField(FullURI, StringType, nullable= true))) if (new java.io.File(feedbackFile).exists) { val dateIndex = 0 val timeIndex = 1 val clientIpIndex = 2 val hostIndex = 3 val reqMethodIndex = 4 val userAgentIndex = 5 val resContTypeIndex = 6 val respCodeIndex = 11 val fullURIIndex = 18 val fullURISeverityIndex = 22 val lines = Source.fromFile(feedbackFile).getLines().toArray.drop(1) val feedback: RDD[String] = sc.parallelize(lines) sqlContext.createDataFrame(feedback.map(_.split("\t")) .filter(row => row(fullURISeverityIndex).trim.toInt == 3) .map(row => Row.fromSeq(List(row(dateIndex), row(timeIndex), row(clientIpIndex), row(hostIndex), row(reqMethodIndex), row(userAgentIndex), row(resContTypeIndex), row(respCodeIndex), row(fullURIIndex)))) .flatMap(row => List.fill(duplicationFactor)(row)), feedbackSchema) .select(Date, Time, ClientIP, Host, ReqMethod, UserAgent, ResponseContentType, RespCode, FullURI) } else { sqlContext.createDataFrame(sc.emptyRDD[Row], feedbackSchema) } } }
Example 147
Source File: DomainProcessor.scala From oni-ml with Apache License 2.0 | 5 votes |
package org.opennetworkinsight.utilities import org.apache.spark.broadcast.Broadcast import scala.io.Source object DomainProcessor extends Serializable { val COUNTRY_CODES = Set("ac", "ad", "ae", "af", "ag", "ai", "al", "am", "an", "ao", "aq", "ar", "as", "at", "au", "aw", "ax", "az", "ba", "bb", "bd", "be", "bf", "bg", "bh", "bi", "bj", "bm", "bn", "bo", "bq", "br", "bs", "bt", "bv", "bw", "by", "bz", "ca", "cc", "cd", "cf", "cg", "ch", "ci", "ck", "cl", "cm", "cn", "co", "cr", "cu", "cv", "cw", "cx", "cy", "cz", "de", "dj", "dk", "dm", "do", "dz", "ec", "ee", "eg", "eh", "er", "es", "et", "eu", "fi", "fj", "fk", "fm", "fo", "fr", "ga", "gb", "gd", "ge", "gf", "gg", "gh", "gi", "gl", "gm", "gn", "gp", "gq", "gr", "gs", "gt", "gu", "gw", "gy", "hk", "hm", "hn", "hr", "ht", "hu", "id", "ie", "il", "im", "in", "io", "iq", "ir", "is", "it", "je", "jm", "jo", "jp", "ke", "kg", "kh", "ki", "km", "kn", "kp", "kr", "krd", "kw", "ky", "kz", "la", "lb", "lc", "li", "lk", "lr", "ls", "lt", "lu", "lv", "ly", "ma", "mc", "md", "me", "mg", "mh", "mk", "ml", "mm", "mn", "mo", "mp", "mq", "mr", "ms", "mt", "mu", "mv", "mw", "mx", "my", "mz", "na", "nc", "ne", "nf", "ng", "ni", "nl", "no", "np", "nr", "nu", "nz", "om", "pa", "pe", "pf", "pg", "ph", "pk", "pl", "pm", "pn", "pr", "ps", "pt", "pw", "py", "qa", "re", "ro", "rs", "ru", "rw", "sa", "sb", "sc", "sd", "se", "sg", "sh", "si", "sj", "", "sk", "sl", "sm", "sn", "so", "sr", "ss", "st", "su", "sv", "sx", "sy", "sz", "tc", "td", "tf", "tg", "th", "tj", "tk", "tl", "tm", "tn", "to", "tp", "tr", "tt", "tv", "tw", "tz", "ua", "ug", "uk", "us", "uy", "uz", "va", "vc", "ve", "vg", "vi", "vn", "vu", "wf", "ws", "ye", "yt", "za", "zm", "zw") val TOP_LEVEL_DOMAIN_NAMES = Set("com", "org", "net", "int", "edu", "gov", "mil") val NO_DOMAIN = "None" def extractDomain(url: String): String = { val spliturl = url.split('.') val numParts = spliturl.length // First check if query is an IP address e.g.: 123.103.104.10.in-addr.arpa or a name. // Such URLs receive a domain of NO_DOMAIN if (numParts > 2 && spliturl(numParts - 1) == "arpa" && spliturl(numParts - 2) == "in-addr") { NO_DOMAIN // it's an address } else if (!COUNTRY_CODES.contains(spliturl.last) && !TOP_LEVEL_DOMAIN_NAMES.contains(spliturl.last)) { NO_DOMAIN // it does not have a valid top-level domain name } else { val strippedSplitURL = removeTopLevelDomainName(removeCountryCode(spliturl)) if (strippedSplitURL.length > 0) { strippedSplitURL.last } else { // invalid URL... nothing that is not TLD.countrycode NO_DOMAIN } } } def removeCountryCode(urlComponents: Array[String]): Array[String] = { if (COUNTRY_CODES.contains(urlComponents.last)) { urlComponents.dropRight(1) } else { urlComponents } } def removeTopLevelDomainName(urlComponents: Array[String]): Array[String] = { if (TOP_LEVEL_DOMAIN_NAMES.contains(urlComponents.last)) { urlComponents.dropRight(1) } else { urlComponents } } }
Example 148
Source File: AsynchronousLoggingSpec.scala From scribe with MIT License | 5 votes |
package spec import java.io.File import java.util.concurrent.ConcurrentLinkedQueue import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AsyncWordSpec import perfolation._ import scribe.{LogRecord, Logger} import scribe.format._ import scribe.output.LogOutput import scribe.writer.{FileWriter, Writer} import scala.collection.JavaConverters._ import scala.concurrent.Future import scala.io.Source class AsynchronousLoggingSpec extends AsyncWordSpec with Matchers { private val Regex = """(\d+) - (.+)""".r private val threads = "abcdefghijklmnopqrstuvwxyz" private val iterations = 10 private val total = threads.length * iterations "Asynchronous Logging" should { s"log $total records in the proper order with simple logging" in { val queue = new ConcurrentLinkedQueue[String] val logger = Logger.empty.orphan().withHandler( formatter = AsynchronousLoggingSpec.format, writer = new Writer { override def write[M](record: LogRecord[M], output: LogOutput): Unit = queue.add(output.plainText.trim) } ) Future.sequence(threads.map { char => Future { (0 until iterations).foreach { index => logger.info(p"$char:$index") } } }).map { _ => var previous = 0L queue.iterator().asScala.foreach { case Regex(ts, _) => { val timeStamp = ts.toLong timeStamp should be >= previous previous = timeStamp } } queue.size() should be(total) } } s"log $total records in the proper order with file logging" in { val file = new File("logs/app.log") file.delete() val fileWriter = FileWriter().nio val logger = Logger.empty.orphan().withHandler( formatter = AsynchronousLoggingSpec.format, writer = fileWriter ) Future.sequence(threads.map { char => Future { (0 until iterations).foreach { index => logger.info(p"$char:$index") } } }).map { _ => var previous = 0L fileWriter.flush() fileWriter.dispose() val lines = Source.fromFile(file).getLines().toList lines.foreach { case Regex(ts, message) => { val timeStamp = ts.toLong timeStamp should be >= previous previous = timeStamp } } lines.length should be(threads.length * iterations) } } } } object AsynchronousLoggingSpec { val format = formatter"$timeStamp - $message" }
Example 149
Source File: LineBufferedStream.scala From incubator-livy with Apache License 2.0 | 5 votes |
package org.apache.livy.utils import java.io.InputStream import java.util import java.util.concurrent.locks.ReentrantLock import scala.io.Source import org.apache.livy.Logging class CircularQueue[T](var capacity: Int) extends util.LinkedList[T] { override def add(t: T): Boolean = { if (size >= capacity) removeFirst super.add(t) } } class LineBufferedStream(inputStream: InputStream, logSize: Int) extends Logging { private[this] val _lines: CircularQueue[String] = new CircularQueue[String](logSize) private[this] val _lock = new ReentrantLock() private[this] val _condition = _lock.newCondition() private[this] var _finished = false private val thread = new Thread { override def run() = { val lines = Source.fromInputStream(inputStream).getLines() for (line <- lines) { info(line) _lock.lock() try { _lines.add(line) _condition.signalAll() } finally { _lock.unlock() } } _lock.lock() try { _finished = true _condition.signalAll() } finally { _lock.unlock() } } } thread.setDaemon(true) thread.start() def lines: IndexedSeq[String] = { _lock.lock() val lines = IndexedSeq.empty[String] ++ _lines.toArray(Array.empty[String]) _lock.unlock() lines } def iterator: Iterator[String] = { new LinesIterator } def waitUntilClose(): Unit = thread.join() private class LinesIterator extends Iterator[String] { override def hasNext: Boolean = { if (_lines.size > 0) { true } else { // Otherwise we might still have more data. _lock.lock() try { if (_finished) { false } else { _condition.await() _lines.size > 0 } } finally { _lock.unlock() } } } override def next(): String = { _lock.lock() val line = _lines.poll() _lock.unlock() line } } }
Example 150
Source File: ConfigurationParser.scala From Soteria with MIT License | 5 votes |
package com.leobenkel.soteria.Config import com.leobenkel.soteria.Utils.Json.JsonDecode import com.leobenkel.soteria.Utils.LoggerExtended import scala.io.Source import scala.util._ private[soteria] case class ConfigurationParser( log: LoggerExtended, configPath: String ) { if (!configPath.endsWith(".json")) { log.criticalFailure( s"The input configuration file was defined as '$configPath' " + s"but it should be a '.json' file." ) } @transient lazy private val isWeb: Boolean = configPath.startsWith("http://") || configPath .startsWith("https://") @transient lazy private val fileContent: String = Try(if (isWeb) { Source.fromURL(configPath) } else { Source.fromFile(configPath) }) match { case Success(file) => val content = file.mkString file.close() content case Failure(exception) => log.criticalFailure(exception.toString) throw exception } @transient lazy private val conf: SoteriaConfiguration = { JsonDecode.parse[SoteriaConfiguration](fileContent)(SoteriaConfiguration.parser(log)) match { case Left(err: String) => log.criticalFailure(err) throw new Exception(err) case Right(c) => c } } def getConf: SoteriaConfiguration = conf }
Example 151
Source File: SerializedModuleTest.scala From Soteria with MIT License | 5 votes |
package com.leobenkel.soteria.Config import com.leobenkel.soteria.Modules.NameOfModule import com.leobenkel.soteria.Utils.Json.JsonDecode import com.leobenkel.soteria.{LogTest, ParentTest} import sbt.util.Level import scala.io.Source // scalastyle:off magic.number class SerializedModuleTest extends ParentTest { private val test: SerializedModuleTest = this private class LogTestWithBuffer extends LogTest(test) { private var allMessages: String = "" override def log( level: Level.Value, message: => String ): Unit = { test.log.debug(message) assertEquals(Level.Error, level) allMessages += message + "\n" } def getMessages: String = allMessages } test("Test parsing to NameOfModule") { val pathToFile = "soteria_succeed_4.json" val soteriaLog = new LogTestWithBuffer val file = Source.fromResource(pathToFile) val content = file.mkString file.close() log.debug(s"Reading '$pathToFile'") val result: Either[String, SoteriaConfiguration] = JsonDecode.parse[SoteriaConfiguration](content)(SoteriaConfiguration.parser(soteriaLog)) assert(result.isRight) val serializedModule = result.right.get assert(soteriaLog.getMessages.isEmpty) val modules = serializedModule.AllModules.sortBy(s => s.key) val modulesWithDependanceErrors = serializedModule.ZTestOnly.RawModulesTest .filter(_._2.nonEmpty) modulesWithDependanceErrors.foreach { case (module, errors) => assert(soteriaLog.getMessages.contains(module.toString)) errors.foreach(e => assert(soteriaLog.getMessages.contains(e))) } assertEquals(3, modules.length) val m1 = modules.head assertEquals("com.orgs", m1.organization) assertEquals("name-of-library", m1.name) assertEquals(Right("3.0"), m1.version) val m2 = modules.apply(2) assertEquals("com.other.org", m2.organization) assertEquals("artifact-name", m2.name) assertEquals(Right("2.1.0"), m2.version) val m3 = modules.apply(1) val m3Obj = m3.nameObj assertEquals("com.other.org", m3.organization) assertEquals("artif", m3.name) assert(m3.version.isLeft) assert(m3.version.left.get.contains("0")) assertEquals(false, m3Obj.exactName) assertEquals(Seq("artifactory", "artifice"), m3Obj.excludeName.sortBy(identity)) assertEquals(1, m3.dependenciesToRemove.length) assertEquals(Seq(NameOfModule.apply("com.orgs", "name-of-library")), m3.dependenciesToRemove) } test("test serialize/deserialize") { val s = SerializedModule.Empty.copy( version = "1.0", shouldBeProvided = Some(true), excludeName = Some(Seq("a", "b")) ) val encodedEi = s.toJsonStructure assert(encodedEi.isRight) val encoded = encodedEi.right.get log.debug(encoded) val sParsedEi = SerializedModule.parser("com.org", "arti")(encoded) assert(sParsedEi.isRight) val sParsed = sParsedEi.right.get assertEquals(s, sParsed) } } // scalastyle:on magic.number
Example 152
Source File: JsonDecodeTest.scala From Soteria with MIT License | 5 votes |
package com.leobenkel.soteria.Utils import com.leobenkel.soteria.Config.SoteriaConfiguration import com.leobenkel.soteria.Utils.Json.JsonDecode import com.leobenkel.soteria.Utils.Json.JsonParserHelper._ import com.leobenkel.soteria.{LogTest, ParentTest} import org.scalatest.Assertion import scala.io.Source class JsonDecodeTest extends ParentTest { private val soteriaLog: LogTest = new LogTest(this) test("Test decode json") { val value: Int = 12 case class MyJson(key: Int) implicit val parser: JsonDecode.Parser[MyJson] = (input: Map[String, Any]) => { for { key <- input.getAsInt("key") } yield { MyJson(key) } } val ei = JsonDecode.parse[MyJson](s""" |{ |"key": $value |} """.stripMargin) assert(ei.isRight) assertEquals(value, ei.right.get.key) } test("Test decode soteria.json") { Map[String, Either[String, SoteriaConfiguration] => Assertion]( "soteria_succeed_1.json" -> { result => assert(result.isRight) val parsed = result.right.get assert(parsed.modules.size == 1) assert(parsed.modules.head._2.size == 1) assert(parsed.modules.head._2.head._2.version == "3.0") assert(parsed.scalaCFlags.length == 10) assert(parsed.scalaVersions.size == 2) }, "soteria_succeed_2.json" -> { result => assert(result.isRight) val parsed = result.right.get assert(parsed.modules.size == 1) assert(parsed.modules.head._2.size == 1) assert(parsed.modules.head._2.head._2.version == "3.0") assert(parsed.scalaCFlags.isEmpty) assert(parsed.scalaVersions.size == 2) }, "soteria_succeed_3.json" -> { result => assert(result.isRight) val parsed = result.right.get assert(parsed.modules.isEmpty) assert(parsed.scalaCFlags.isEmpty) assert(parsed.scalaVersions.size == 2) }, "soteria_fail_no_scalaVersions.json" -> { result => assert(result.isLeft) val error = result.left.get assert(error.contains("scalaVersions")) }, "soteria_fail_no_version.json" -> { result => assert(result.isLeft) val error = result.left.get assert(error.contains("version")) assert(error.contains("com.orgs")) assert(error.contains("name-of-library")) }, "soteria_fail_bad_json.json" -> { result => assert(result.isLeft) val error = result.left.get assert("Did not parse" == error) } ).map { case (filePath, test) => val file = Source.fromResource(filePath) val content = file.mkString file.close() log.debug(s"Reading '$filePath'") val result: Either[String, SoteriaConfiguration] = JsonDecode.parse[SoteriaConfiguration](content)(SoteriaConfiguration.parser(soteriaLog)) test(result) } } }
Example 153
Source File: NotebookSimTest.scala From spark-bench with Apache License 2.0 | 5 votes |
package com.ibm.sparktc.sparkbench import com.ibm.sparktc.sparkbench.cli.CLIKickoff import com.ibm.sparktc.sparkbench.testfixtures.BuildAndTeardownData import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers} import scala.io.Source class NotebookSimTest extends FlatSpec with Matchers with BeforeAndAfterEach with Capturing { val dataMaker = new BuildAndTeardownData("notebook-sim-test") val giantData = s"${dataMaker.sparkBenchTestFolder}/giant-kmeans-data.parquet" val tinyData = s"${dataMaker.sparkBenchTestFolder}/tiny-kmeans-data.parquet" override def beforeEach(): Unit = { super.beforeEach() dataMaker.deleteFolders() dataMaker.createFolders() dataMaker.generateKMeansData(400000, 50, giantData) dataMaker.generateKMeansData(100, 5, tinyData) } override def afterEach(): Unit = { dataMaker.deleteFolders() } "Simulating two notebook users" should "work" in { val relativePath = "/etc/notebook-sim.conf" val resource = getClass.getResource(relativePath) val path = resource.getPath val text = Source.fromFile(path).mkString CLIKickoff.main(Array(text)) } }
Example 154
Source File: ConfigFileTest.scala From spark-bench with Apache License 2.0 | 5 votes |
package com.ibm.sparktc.sparkbench import java.io.File import com.ibm.sparktc.sparkbench.cli.CLIKickoff import com.ibm.sparktc.sparkbench.testfixtures.BuildAndTeardownData import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} import scala.io.Source class ConfigFileTest extends FlatSpec with Matchers with BeforeAndAfterAll with Capturing { val dataShiznit = new BuildAndTeardownData("configfiletest") override def beforeAll(): Unit = { super.beforeAll() dataShiznit.deleteFolders() dataShiznit.createFolders() } override def afterAll(): Unit = { dataShiznit.deleteFolders() } val kmeansData = new File("/tmp/spark-bench-scalatest/configfiletest/kmeans-data.parquet") val output1 = new File("/tmp/spark-bench-scalatest/configfiletest/conf-file-output-1.csv") val output2 = new File("/tmp/spark-bench-scalatest/configfiletest/conf-file-output-2.parquet") "Spark-bench run through a config file serially" should "work" in { kmeansData.exists() shouldBe false val relativePath = "/etc/testConfFile1.conf" val resource = getClass.getResource(relativePath) val path = resource.getPath val text = Source.fromFile(path).mkString CLIKickoff.main(Array(text)) kmeansData.exists() shouldBe true output1.exists() shouldBe true output2.exists() shouldBe true val fileList = output1.listFiles().toList.filter(_.getName.startsWith("part")) val fileContents: List[String] = Source.fromFile(fileList.head) .getLines() .toList val length: Int = fileContents.length (length > 0) shouldBe true } "Spark-bench run through a config file with the suites running in parallel" should "work" in { kmeansData.exists() shouldBe true val relativePath = "/etc/testConfFile2.conf" val resource = getClass.getResource(relativePath) val path = resource.getPath val text = Source.fromFile(path).mkString CLIKickoff.main(Array(text)) } }
Example 155
Source File: OutputTest.scala From spark-bench with Apache License 2.0 | 5 votes |
package com.ibm.sparktc.sparkbench import com.ibm.sparktc.sparkbench.cli.CLIKickoff import com.ibm.sparktc.sparkbench.testfixtures.BuildAndTeardownData import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} import scala.io.Source class OutputTest extends FlatSpec with Matchers with BeforeAndAfterAll with Capturing { val dataStuff = new BuildAndTeardownData("output-test") override def beforeAll(): Unit = { super.beforeAll() dataStuff.createFolders() dataStuff.generateKMeansData(1000, 5, dataStuff.kmeansFile) // scalastyle:ignore } override def afterAll(): Unit = { dataStuff.deleteFolders() super.afterAll() } "Specifying Console output" should "work" in { val relativePath = "/etc/testConfFile3.conf" val resource = getClass.getResource(relativePath) val path = resource.getPath val text = Source.fromFile(path).mkString val (out, _) = captureOutput(CLIKickoff.main(Array(text))) out should not be empty out.split("\n").length shouldBe 9 } "Want to see configuration added to results when there's crazy stuff" should "work" in { val relativePath = "/etc/testConfFile4.conf" val resource = getClass.getResource(relativePath) val path = resource.getPath val text = Source.fromFile(path).mkString val (out, _) = captureOutput(CLIKickoff.main(Array(text))) out shouldBe empty } }
Example 156
Source File: TestHelper.scala From spark-summit-2018 with GNU General Public License v3.0 | 5 votes |
package com.twilio.open.streaming.trend.discovery import java.io.{ByteArrayInputStream, InputStream} import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.google.protobuf.Message import com.googlecode.protobuf.format.JsonFormat import com.holdenkarau.spark.testing.{LocalSparkContext, SparkContextProvider} import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.SparkSession import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers, Suite} import org.slf4j.{Logger, LoggerFactory} import scala.collection.Seq import scala.io.Source import scala.reflect.ClassTag import scala.reflect.classTag object TestHelper { val log: Logger = LoggerFactory.getLogger("com.twilio.open.streaming.trend.discovery.TestHelper") val mapper: ObjectMapper = { val m = new ObjectMapper() m.registerModule(DefaultScalaModule) } val jsonFormat: JsonFormat = new JsonFormat def loadScenario[T<: Message : ClassTag](file: String): Seq[T] = { val fileString = Source.fromFile(file).mkString val parsed = mapper.readValue(fileString, classOf[Sceanario]) parsed.input.map { data => val json = mapper.writeValueAsString(data) convert[T](json) } } def convert[T<: Message : ClassTag](json: String): T = { val clazz = classTag[T].runtimeClass val builder = clazz.getMethod("newBuilder").invoke(clazz).asInstanceOf[Message.Builder] try { val input: InputStream = new ByteArrayInputStream(json.getBytes()) jsonFormat.merge(input, builder) builder.build().asInstanceOf[T] } catch { case e: Exception => throw e } } } @SerialVersionUID(1L) case class KafkaDataFrame(key: Array[Byte], topic: Array[Byte], value: Array[Byte]) extends Serializable case class Sceanario(input: Seq[Any], expected: Option[Any] = None) trait SparkSqlTest extends BeforeAndAfterAll with SparkContextProvider { self: Suite => @transient var _sparkSql: SparkSession = _ @transient private var _sc: SparkContext = _ override def sc: SparkContext = _sc def conf: SparkConf def sparkSql: SparkSession = _sparkSql override def beforeAll() { _sparkSql = SparkSession.builder().config(conf).getOrCreate() _sc = _sparkSql.sparkContext setup(_sc) super.beforeAll() } override def afterAll() { try { _sparkSql.close() _sparkSql = null LocalSparkContext.stop(_sc) _sc = null } finally { super.afterAll() } } }
Example 157
Source File: DeduplicateAndSortDictionaries.scala From open-korean-text with Apache License 2.0 | 5 votes |
package org.openkoreantext.processor.tools import java.io.FileOutputStream import scala.io.Source object DeduplicateAndSortDictionaries extends Runnable { private[this] def readWords(filename: String): Set[String] = { Source.fromFile(filename)(io.Codec("UTF-8")) .getLines() .map(_.trim) .filter(_.length > 0) .toSet } private val RESOURCES_TO_CLEANUP = Seq( "noun/nouns.txt", "noun/entities.txt", "noun/spam.txt", "noun/names.txt", "noun/twitter.txt", "noun/lol.txt", "noun/slangs.txt", "noun/company_names.txt", "noun/foreign.txt", "noun/geolocations.txt", "noun/profane.txt", "noun/kpop.txt", "noun/bible.txt", "noun/wikipedia_title_nouns.txt", "noun/pokemon.txt", "noun/congress.txt", "noun/brand.txt", "noun/fashion.txt", "noun/neologism.txt", "substantives/modifier.txt", "substantives/suffix.txt", "substantives/family_names.txt", "substantives/given_names.txt", "adjective/adjective.txt", "adverb/adverb.txt", "auxiliary/determiner.txt", "auxiliary/exclamation.txt", "auxiliary/conjunctions.txt", "josa/josa.txt", "typos/typos.txt", "verb/eomi.txt", "verb/pre_eomi.txt", "verb/verb.txt", "verb/verb_prefix.txt" ) def run { RESOURCES_TO_CLEANUP.foreach { f: String => val outputFolder = "src/main/resources/org/openkoreantext/processor/util/" System.err.println("Processing %s.".format(f)) val words = readWords(outputFolder + f).toList.sorted val out = new FileOutputStream(outputFolder + f) words.foreach { word: String => out.write((word + "\n").getBytes) } out.close() } } }
Example 158
Source File: BatchGetUnknownNouns.scala From open-korean-text with Apache License 2.0 | 5 votes |
package org.openkoreantext.processor.qa import java.util.logging.Logger import org.openkoreantext.processor.OpenKoreanTextProcessor import org.openkoreantext.processor.tokenizer.KoreanChunker._ import org.openkoreantext.processor.tokenizer.KoreanTokenizer._ import org.openkoreantext.processor.util.KoreanPos import scala.io.Source object BatchGetUnknownNouns { private val LOG = Logger.getLogger(getClass.getSimpleName) private val VERBOSE = true case class ChunkWithTweet(chunk: String, tweet: String) def main(args: Array[String]) { if (args.length != 1) { println("The first arg should be an input file path of Korean tweets.") return } val chunksWithUnknowns = Source.fromFile(args(0)).getLines().foldLeft(List[ChunkWithTweet]()) { case (l: List[ChunkWithTweet], line: String) if line.trim.length > 5 => chunk(line).flatMap { case t: KoreanToken if t.pos == KoreanPos.Korean && tokenize(t.text).exists(_.unknown) => Some(ChunkWithTweet(t.text, line.trim)) case t: KoreanToken => None }.toList ::: l case (l: List[ChunkWithTweet], line: String) => l }.toSet chunksWithUnknowns.toSeq.sortBy(_.chunk).foreach { chunkWithTweet: ChunkWithTweet => println(chunkWithTweet.tweet) println(OpenKoreanTextProcessor .tokenize(chunkWithTweet.tweet) .mkString(" ")) println(chunkWithTweet.chunk + ": " + tokenize(chunkWithTweet.chunk).mkString(" ")) println() } } }
Example 159
Source File: BatchTokenizeTweets.scala From open-korean-text with Apache License 2.0 | 5 votes |
package org.openkoreantext.processor.qa import java.util.logging.{Level, Logger} import org.openkoreantext.processor.OpenKoreanTextProcessor import org.openkoreantext.processor.tokenizer.KoreanTokenizer.KoreanToken import org.openkoreantext.processor.util.KoreanPos import scala.io.Source object BatchTokenizeTweets { case class ParseTime(time: Long, chunk: String) private val LOG = Logger.getLogger(getClass.getSimpleName) private val VERBOSE = true private val NON_NOUNS = Set(KoreanPos.Adjective, KoreanPos.Adverb, KoreanPos.Verb) def main(args: Array[String]) { if (args.length != 1) { println("The first arg should be an input file of Korean tweets.") return } val parseTimesAll = Source.fromFile(args(0)).getLines().foldLeft(List[ParseTime]()) { case (l: List[ParseTime], line: String) => val t0 = System.currentTimeMillis() val parsed = OpenKoreanTextProcessor.tokenize(line) val t1 = System.currentTimeMillis() if (VERBOSE) { println(parsed.map(t => t.text + "/" + t.pos).mkString(" ")) } ParseTime(t1 - t0, line.trim) :: l } val loadingTime = parseTimesAll.last LOG.log(Level.INFO, "The first one \"%s\" took %d ms including the loading time.".format(loadingTime.chunk, loadingTime.time)) val parseTimes = parseTimesAll.init val averageTweetLength = parseTimes.map(_.chunk.length).sum.toDouble / parseTimes.size val averageTime = parseTimes.map(_.time).sum.toDouble / parseTimes.size val maxItem = parseTimes.maxBy(_.time) LOG.log(Level.INFO, ("Parsed %d items. \n" + " Total time: %d s \n" + " Average tweet length: %.2f chars \n" + " Average time per tweet: %.2f ms \n" + " Max time: %d ms, %s\n" + " Parsed: %s" ).format( parseTimes.size, parseTimes.map(_.time).sum / 1000, averageTweetLength, averageTime, maxItem.time, maxItem.chunk, OpenKoreanTextProcessor.tokenize(maxItem.chunk).map { case t if t.unknown => t.text.toString + t.pos + "*" case t => t.text + t.pos.toString }.mkString(" ") )) } private def parseToString(parsed: Seq[KoreanToken]): String = { parsed.map { case t if t.unknown => t.text.toString + t.pos + "*" case t => t.text + t.pos.toString }.mkString(" ") } }
Example 160
Source File: KafkaSourceOffsetSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.kafka010 import java.io.File import org.apache.spark.sql.execution.streaming._ import org.apache.spark.sql.streaming.OffsetSuite import org.apache.spark.sql.test.SharedSQLContext class KafkaSourceOffsetSuite extends OffsetSuite with SharedSQLContext { compare( one = KafkaSourceOffset(("t", 0, 1L)), two = KafkaSourceOffset(("t", 0, 2L))) compare( one = KafkaSourceOffset(("t", 0, 1L), ("t", 1, 0L)), two = KafkaSourceOffset(("t", 0, 2L), ("t", 1, 1L))) compare( one = KafkaSourceOffset(("t", 0, 1L), ("T", 0, 0L)), two = KafkaSourceOffset(("t", 0, 2L), ("T", 0, 1L))) compare( one = KafkaSourceOffset(("t", 0, 1L)), two = KafkaSourceOffset(("t", 0, 2L), ("t", 1, 1L))) val kso1 = KafkaSourceOffset(("t", 0, 1L)) val kso2 = KafkaSourceOffset(("t", 0, 2L), ("t", 1, 3L)) val kso3 = KafkaSourceOffset(("t", 0, 2L), ("t", 1, 3L), ("t", 1, 4L)) compare(KafkaSourceOffset(SerializedOffset(kso1.json)), KafkaSourceOffset(SerializedOffset(kso2.json))) test("basic serialization - deserialization") { assert(KafkaSourceOffset.getPartitionOffsets(kso1) == KafkaSourceOffset.getPartitionOffsets(SerializedOffset(kso1.json))) } testWithUninterruptibleThread("OffsetSeqLog serialization - deserialization") { withTempDir { temp => // use non-existent directory to test whether log make the dir val dir = new File(temp, "dir") val metadataLog = new OffsetSeqLog(spark, dir.getAbsolutePath) val batch0 = OffsetSeq.fill(kso1) val batch1 = OffsetSeq.fill(kso2, kso3) val batch0Serialized = OffsetSeq.fill(batch0.offsets.flatMap(_.map(o => SerializedOffset(o.json))): _*) val batch1Serialized = OffsetSeq.fill(batch1.offsets.flatMap(_.map(o => SerializedOffset(o.json))): _*) assert(metadataLog.add(0, batch0)) assert(metadataLog.getLatest() === Some(0 -> batch0Serialized)) assert(metadataLog.get(0) === Some(batch0Serialized)) assert(metadataLog.add(1, batch1)) assert(metadataLog.get(0) === Some(batch0Serialized)) assert(metadataLog.get(1) === Some(batch1Serialized)) assert(metadataLog.getLatest() === Some(1 -> batch1Serialized)) assert(metadataLog.get(None, Some(1)) === Array(0 -> batch0Serialized, 1 -> batch1Serialized)) // Adding the same batch does nothing metadataLog.add(1, OffsetSeq.fill(LongOffset(3))) assert(metadataLog.get(0) === Some(batch0Serialized)) assert(metadataLog.get(1) === Some(batch1Serialized)) assert(metadataLog.getLatest() === Some(1 -> batch1Serialized)) assert(metadataLog.get(None, Some(1)) === Array(0 -> batch0Serialized, 1 -> batch1Serialized)) } } test("read Spark 2.1.0 offset format") { val offset = readFromResource("kafka-source-offset-version-2.1.0.txt") assert(KafkaSourceOffset(offset) === KafkaSourceOffset(("topic1", 0, 456L), ("topic1", 1, 789L), ("topic2", 0, 0L))) } private def readFromResource(file: String): SerializedOffset = { import scala.io.Source val input = getClass.getResource(s"/$file").toURI val str = Source.fromFile(input).mkString SerializedOffset(str) } }
Example 161
Source File: RawTextSender.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.util import java.io.{ByteArrayOutputStream, IOException} import java.net.ServerSocket import java.nio.ByteBuffer import scala.io.Source import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.serializer.KryoSerializer import org.apache.spark.util.IntParam private[streaming] object RawTextSender extends Logging { def main(args: Array[String]) { if (args.length != 4) { // scalastyle:off println System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>") // scalastyle:on println System.exit(1) } // Parse the arguments using a pattern match val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args // Repeat the input data multiple times to fill in a buffer val lines = Source.fromFile(file).getLines().toArray val bufferStream = new ByteArrayOutputStream(blockSize + 1000) val ser = new KryoSerializer(new SparkConf()).newInstance() val serStream = ser.serializeStream(bufferStream) var i = 0 while (bufferStream.size < blockSize) { serStream.writeObject(lines(i)) i = (i + 1) % lines.length } val array = bufferStream.toByteArray val countBuf = ByteBuffer.wrap(new Array[Byte](4)) countBuf.putInt(array.length) countBuf.flip() val serverSocket = new ServerSocket(port) logInfo("Listening on port " + port) while (true) { val socket = serverSocket.accept() logInfo("Got a new connection") val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec) try { while (true) { out.write(countBuf.array) out.write(array) } } catch { case e: IOException => logError("Client disconnected") } finally { socket.close() } } } }
Example 162
Source File: HDFSExecutorMetricsReplayListenerBus.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.InputStream import scala.collection.immutable import scala.collection.mutable.ListBuffer import scala.io.Source import scala.util.parsing.json._ import org.apache.spark.internal.Logging private[spark] class HDFSExecutorMetricsReplayListenerBus extends SparkListenerBus with Logging { def replay( logDataList: ListBuffer[(InputStream, String)], sourceName: String, maybeTruncated: Boolean = false): Unit = { logDataList.foreach(logData => { try { for (line <- Source.fromInputStream(logData._1).getLines()) { val hashMapParsed = JSON.parseFull(line) val hashMap = { hashMapParsed match { case Some(m: Map[String, Any]) => m case _ => new immutable.HashMap[String, Any] } } val hdfsExecutorMetrics = new HDFSExecutorMetrics( hashMap("values").asInstanceOf[Map[String, Any]], hashMap("host").asInstanceOf[String], hashMap("timestamp").asInstanceOf[Double].toLong) postToAll(hdfsExecutorMetrics) } } catch { case ex: Exception => ex.printStackTrace(); logError(ex.toString) logWarning(s"Got JsonParseException from log file $logData") } }) } }
Example 163
Source File: PythonBroadcastSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.api.python import java.io.{File, PrintWriter} import scala.io.Source import org.scalatest.Matchers import org.apache.spark.{SharedSparkContext, SparkConf, SparkFunSuite} import org.apache.spark.serializer.KryoSerializer import org.apache.spark.util.Utils // This test suite uses SharedSparkContext because we need a SparkEnv in order to deserialize // a PythonBroadcast: class PythonBroadcastSuite extends SparkFunSuite with Matchers with SharedSparkContext { test("PythonBroadcast can be serialized with Kryo (SPARK-4882)") { val tempDir = Utils.createTempDir() val broadcastedString = "Hello, world!" def assertBroadcastIsValid(broadcast: PythonBroadcast): Unit = { val source = Source.fromFile(broadcast.path) val contents = source.mkString source.close() contents should be (broadcastedString) } try { val broadcastDataFile: File = { val file = new File(tempDir, "broadcastData") val printWriter = new PrintWriter(file) printWriter.write(broadcastedString) printWriter.close() file } val broadcast = new PythonBroadcast(broadcastDataFile.getAbsolutePath) assertBroadcastIsValid(broadcast) val conf = new SparkConf().set("spark.kryo.registrationRequired", "true") val deserializedBroadcast = Utils.clone[PythonBroadcast](broadcast, new KryoSerializer(conf).newInstance()) assertBroadcastIsValid(deserializedBroadcast) } finally { Utils.deleteRecursively(tempDir) } } }
Example 164
Source File: LogUrlsStandaloneSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy import java.net.URL import scala.collection.mutable import scala.io.Source import org.apache.spark.{LocalSparkContext, SparkContext, SparkFunSuite} import org.apache.spark.scheduler.{SparkListener, SparkListenerExecutorAdded} import org.apache.spark.scheduler.cluster.ExecutorInfo import org.apache.spark.util.SparkConfWithEnv class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext { private val WAIT_TIMEOUT_MILLIS = 10000 test("verify that correct log urls get propagated from workers") { sc = new SparkContext("local-cluster[2,1,1024]", "test") val listener = new SaveExecutorInfo sc.addSparkListener(listener) // Trigger a job so that executors get added sc.parallelize(1 to 100, 4).map(_.toString).count() sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) listener.addedExecutorInfos.values.foreach { info => assert(info.logUrlMap.nonEmpty) // Browse to each URL to check that it's valid info.logUrlMap.foreach { case (logType, logUrl) => val html = Source.fromURL(logUrl).mkString assert(html.contains(s"$logType log page")) } } } test("verify that log urls reflect SPARK_PUBLIC_DNS (SPARK-6175)") { val SPARK_PUBLIC_DNS = "public_dns" val conf = new SparkConfWithEnv(Map("SPARK_PUBLIC_DNS" -> SPARK_PUBLIC_DNS)).set( "spark.extraListeners", classOf[SaveExecutorInfo].getName) sc = new SparkContext("local-cluster[2,1,1024]", "test", conf) // Trigger a job so that executors get added sc.parallelize(1 to 100, 4).map(_.toString).count() sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) val listeners = sc.listenerBus.findListenersByClass[SaveExecutorInfo] assert(listeners.size === 1) val listener = listeners(0) listener.addedExecutorInfos.values.foreach { info => assert(info.logUrlMap.nonEmpty) info.logUrlMap.values.foreach { logUrl => assert(new URL(logUrl).getHost === SPARK_PUBLIC_DNS) } } } } private[spark] class SaveExecutorInfo extends SparkListener { val addedExecutorInfos = mutable.Map[String, ExecutorInfo]() override def onExecutorAdded(executor: SparkListenerExecutorAdded) { addedExecutorInfos(executor.executorId) = executor.executorInfo } }
Example 165
Source File: TnTestHelper.scala From TopNotch with Apache License 2.0 | 5 votes |
package com.bfm.topnotch import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ import org.scalatest.Matchers import scala.io.Source import org.json4s._ import org.json4s.native.JsonMethods._ /** * This class handles some of the TopNotch reusable test code */ object TnTestHelper extends Matchers { val INDEX_COL_NAME = "__INDEX_COL__" /** * Read a file from the resources/src/test/scala/com/bfm/topnotch folder * @param fileName The path to the file relative to the path resources/src/test/scala/com/bfm/topnotch * @return The contents of the file as one string */ def readResourceFileToJson[T](fileName: String, classType: Class[_]): JValue = { parse(Source.fromFile(classType.getResource(fileName).getFile).getLines().mkString("\n")) } /** * Attach an index to rows into a dataframe so we can track them throughout a series of operations * @param df The dataframe to index * @return A dataframe equal to df but with an index column */ def attachIdx(df: DataFrame): DataFrame = df.withColumn(INDEX_COL_NAME, monotonicallyIncreasingId()).cache /** * Get a number greater than or equal to num that is divisible by denomiator */ def numDivisibleBy(num: Int, denomiator: Int) = num / denomiator * denomiator /** * Grow a data frame to a desired size by duplicating rows. */ def growDataFrame(initDF: DataFrame, newSize: Int): DataFrame = { val initCount = initDF.count if (initCount < 1) throw new IllegalArgumentException("initDF's size must be greater than 0") List.fill((newSize / initCount + 1).toInt)(initDF).reduce(_.unionAll(_)).limit(newSize) } /** * Compares two dataframes and ensures that they have the same schema (ignore nullable) and the same values * @param actualDF The DF we want to check for correctness * @param correctDF The correct DF we use for comparison * @param onlySchema only compare the schemas of the dataframes */ def dfEquals(actualDF: DataFrame, correctDF: DataFrame, onlySchema: Boolean = false): Unit = { actualDF.schema.map(f => (f.name, f.dataType)).toSet shouldBe correctDF.schema.map(f => (f.name, f.dataType)).toSet if (!onlySchema) { actualDF.collect.map(_.toSeq.toSet).toSet shouldBe correctDF.collect.map(_.toSeq.toSet).toSet } } }
Example 166
Source File: exercise02.scala From scala-for-the-Impatient with MIT License | 5 votes |
import scala.collection.mutable import scala.io.Source def countWord() : Unit = { val wordMap = new mutable.HashMap[String,Int] val in = new java.util.Scanner(new java.io.File("exercise02.txt")) while(in.hasNext){ val word = in.next() wordMap(word) = wordMap.getOrElse(word,0) + 1 } println(wordMap.mkString(",")) } def countWord2() : Unit = { val source = Source.fromFile("exercise02.txt").mkString val tokens = source.split("\\s+") val wordMap = new mutable.HashMap[String,Int] for(word <- tokens){ wordMap(word) = wordMap.getOrElse(word,0) + 1 } println(wordMap.mkString(",")) }
Example 167
Source File: FileUtils.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.wmexchanger.utils import java.io.BufferedInputStream import java.io.BufferedOutputStream import java.io.File import java.io.FileInputStream import java.io.FileOutputStream import java.io.FilenameFilter import java.io.ObjectInputStream import java.io.ObjectOutputStream import java.io.PrintWriter import org.clulab.wm.wmexchanger.utils.Closer.AutoCloser import scala.io.Source object FileUtils { def appendingPrintWriterFromFile(file: File): PrintWriter = Sinker.printWriterFromFile(file, append = true) def appendingPrintWriterFromFile(path: String): PrintWriter = Sinker.printWriterFromFile(path, append = true) def printWriterFromFile(file: File): PrintWriter = Sinker.printWriterFromFile(file, append = false) def printWriterFromFile(path: String): PrintWriter = Sinker.printWriterFromFile(path, append = false) // Output def newBufferedOutputStream(file: File): BufferedOutputStream = new BufferedOutputStream(new FileOutputStream(file)) def newBufferedOutputStream(filename: String): BufferedOutputStream = newBufferedOutputStream(new File(filename)) def newAppendingBufferedOutputStream(file: File): BufferedOutputStream = new BufferedOutputStream(new FileOutputStream(file, true)) def newAppendingBufferedOutputStream(filename: String): BufferedOutputStream = newAppendingBufferedOutputStream(new File(filename)) def newObjectOutputStream(filename: String): ObjectOutputStream = new ObjectOutputStream(newBufferedOutputStream(filename)) // Input def newBufferedInputStream(file: File): BufferedInputStream = new BufferedInputStream(new FileInputStream(file)) def newBufferedInputStream(filename: String): BufferedInputStream = newBufferedInputStream(new File(filename)) def newObjectInputStream(filename: String): ObjectInputStream = new ObjectInputStream(newBufferedInputStream(filename)) def findFiles(collectionDir: String, extension: String): Seq[File] = { val dir = new File(collectionDir) val filter = new FilenameFilter { def accept(dir: File, name: String): Boolean = name.endsWith(extension) } val result = Option(dir.listFiles(filter)) .getOrElse(throw Sourcer.newFileNotFoundException(collectionDir)) result } protected def getTextFromSource(source: Source): String = source.mkString def getTextFromFile(file: File): String = Sourcer.sourceFromFile(file).autoClose { source => getTextFromSource(source) } }
Example 168
Source File: Sourcer.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.wmexchanger.utils import java.io.{File, FileNotFoundException} import java.nio.charset.StandardCharsets import org.slf4j.{Logger, LoggerFactory} import scala.io.BufferedSource import scala.io.Source object Sourcer { protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass) val utf8: String = StandardCharsets.UTF_8.toString def sourceFromResource(path: String): BufferedSource = { val url = Option(Sourcer.getClass.getResource(path)) .getOrElse(throw newFileNotFoundException(path)) logger.info("Sourcing resource " + url.getPath) Source.fromURL(url, utf8) } def sourceFromFile(file: File): BufferedSource = { logger.info("Sourcing file " + file.getPath) Source.fromFile(file, utf8) } def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path)) def newFileNotFoundException(path: String): FileNotFoundException = { val message1 = path + " (The system cannot find the path specified" val message2 = message1 + (if (path.startsWith("~")) ". Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "") val message3 = message2 + ")" new FileNotFoundException(message3) } }
Example 169
Source File: Sourcer.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.eidos.utils import java.io.{File, FileNotFoundException} import java.nio.charset.StandardCharsets import org.slf4j.{Logger, LoggerFactory} import scala.io.BufferedSource import scala.io.Source object Sourcer { protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass) val utf8: String = StandardCharsets.UTF_8.toString def sourceFromResource(path: String): BufferedSource = { val url = Option(Sourcer.getClass.getResource(path)) .getOrElse(throw newFileNotFoundException(path)) logger.info("Sourcing resource " + url.getPath) Source.fromURL(url, utf8) } def sourceFromFile(file: File): BufferedSource = { logger.info("Sourcing file " + file.getPath) Source.fromFile(file, utf8) } def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path)) def newFileNotFoundException(path: String): FileNotFoundException = { val message1 = path + " (The system cannot find the path specified" val message2 = message1 + (if (path.startsWith("~")) ". Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "") val message3 = message2 + ")" new FileNotFoundException(message3) } }
Example 170
Source File: TestResources.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.eidos.system import java.io.File import org.clulab.wm.eidos.test.TestUtils._ import org.clulab.wm.eidos.utils.Closer.AutoCloser import org.clulab.wm.eidos.utils.Sourcer import scala.io.Source class TestResources extends Test { behavior of "resources" def test(file: File): Unit = { val path = file.getCanonicalPath() it should "not have any Unicode characters in " + path in { val count = Sourcer.sourceFromFile(file).autoClose { source => source.getLines().zipWithIndex.foldRight(0) { (lineAndLineNo, sum) => val line = lineAndLineNo._1 val lineNo = lineAndLineNo._2 val badCharAndIndex = line.zipWithIndex.filter { case (c: Char, index: Int) => (c < 32 || 127 < c) && c != '\r' && c != '\n' && c != '\t' } val complaints = badCharAndIndex.map { case (c: Char, index: Int) => "'" + c + "' found at index " + index + "." } complaints.foreach(complaint => println("Line " + (lineNo + 1) + ": " + complaint)) sum + complaints.size } } count should be (0) } } // https://groups.google.com/forum/#!topic/scala-user/WrmYHHzcJPw type Operation = (File) => Unit val wantedSuffixes = Seq(".conf", ".yml", ".tsv", ".kb", ".txt") val unwantedSuffixes = Seq("300d.txt", "vectors.txt", "_2016.txt", "/portuguese/grammars/triggers.yml", "word2idx_file.txt") def fileMatches(file: File): Boolean = { val canonicalPath = file.getCanonicalPath().replace('\\', '/') wantedSuffixes.exists(suffix => canonicalPath.endsWith(suffix)) && !unwantedSuffixes.exists(suffix => canonicalPath.endsWith(suffix)) } def directoryMatches(file: File): Boolean = true def doOperation(path: String)(operation: Operation): Unit = { for (files <- Option(new File(path).listFiles); file <- files) { if (file.isFile() && fileMatches(file) && file.getAbsolutePath.contains("english")) operation(file) if (file.isDirectory && directoryMatches(file)) doOperation(file.getAbsolutePath)(operation) } } doOperation(new File("./src/main/resources").getCanonicalPath())(test) }
Example 171
Source File: Sourcer.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.elasticsearch.utils import java.io.{File, FileNotFoundException} import java.nio.charset.StandardCharsets import org.slf4j.{Logger, LoggerFactory} import scala.io.BufferedSource import scala.io.Source object Sourcer { protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass) val utf8: String = StandardCharsets.UTF_8.toString def sourceFromResource(path: String): BufferedSource = { val url = Option(Sourcer.getClass.getResource(path)) .getOrElse(throw newFileNotFoundException(path)) logger.info("Sourcing resource " + url.getPath) Source.fromURL(url, utf8) } def sourceFromFile(file: File): BufferedSource = { logger.info("Sourcing file " + file.getPath) Source.fromFile(file, utf8) } def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path)) def newFileNotFoundException(path: String): FileNotFoundException = { val message1 = path + " (The system cannot find the path specified" val message2 = message1 + (if (path.startsWith("~")) ". Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "") val message3 = message2 + ")" new FileNotFoundException(message3) } }
Example 172
Source File: AssetsHelper.scala From scuruto with MIT License | 5 votes |
package lib import skinny.SkinnyEnv import scala.io.Source object AssetsHelper { val fileName = "version.txt" val hash = { if (SkinnyEnv.isDevelopment() || SkinnyEnv.isTest()) { val basePath = "src/main/webapp/assets/dist" Source.fromFile(s"${basePath}/${fileName}").mkString } else { val basePath = "assets/dist" Source.fromInputStream(getClass.getClassLoader.getResourceAsStream(s"${basePath}/${fileName}")).mkString } } }
Example 173
Source File: ReplayListenerBus.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.{InputStream, IOException} import scala.io.Source import org.json4s.jackson.JsonMethods._ import org.apache.spark.Logging import org.apache.spark.util.JsonProtocol def replay(logData: InputStream, sourceName: String): Unit = { var currentLine: String = null var lineNumber: Int = 1 try { val lines = Source.fromInputStream(logData).getLines() lines.foreach { line => currentLine = line postToAll(JsonProtocol.sparkEventFromJson(parse(line))) lineNumber += 1 } } catch { case ioe: IOException => throw ioe case e: Exception => logError(s"Exception parsing Spark event log: $sourceName", e) logError(s"Malformed line #$lineNumber: $currentLine\n") } } }
Example 174
Source File: PythonBroadcastSuite.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.api.python import scala.io.Source import java.io.{PrintWriter, File} import org.scalatest.{Matchers, FunSuite} import org.apache.spark.{SharedSparkContext, SparkConf} import org.apache.spark.serializer.KryoSerializer import org.apache.spark.util.Utils // This test suite uses SharedSparkContext because we need a SparkEnv in order to deserialize // a PythonBroadcast: class PythonBroadcastSuite extends FunSuite with Matchers with SharedSparkContext { test("PythonBroadcast can be serialized with Kryo (SPARK-4882)") { val tempDir = Utils.createTempDir() val broadcastedString = "Hello, world!" def assertBroadcastIsValid(broadcast: PythonBroadcast): Unit = { val source = Source.fromFile(broadcast.path) val contents = source.mkString source.close() contents should be (broadcastedString) } try { val broadcastDataFile: File = { val file = new File(tempDir, "broadcastData") val printWriter = new PrintWriter(file) printWriter.write(broadcastedString) printWriter.close() file } val broadcast = new PythonBroadcast(broadcastDataFile.getAbsolutePath) assertBroadcastIsValid(broadcast) val conf = new SparkConf().set("spark.kryo.registrationRequired", "true") val deserializedBroadcast = Utils.clone[PythonBroadcast](broadcast, new KryoSerializer(conf).newInstance()) assertBroadcastIsValid(deserializedBroadcast) } finally { Utils.deleteRecursively(tempDir) } } }
Example 175
Source File: LogUrlsStandaloneSuite.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy import java.net.URL import scala.collection.mutable import scala.io.Source import org.scalatest.FunSuite import org.apache.spark.scheduler.cluster.ExecutorInfo import org.apache.spark.scheduler.{SparkListenerExecutorAdded, SparkListener} import org.apache.spark.{SparkConf, SparkContext, LocalSparkContext} class LogUrlsStandaloneSuite extends FunSuite with LocalSparkContext { private val WAIT_TIMEOUT_MILLIS = 10000 test("verify that correct log urls get propagated from workers") { sc = new SparkContext("local-cluster[2,1,512]", "test") val listener = new SaveExecutorInfo sc.addSparkListener(listener) // Trigger a job so that executors get added sc.parallelize(1 to 100, 4).map(_.toString).count() assert(sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)) listener.addedExecutorInfos.values.foreach { info => assert(info.logUrlMap.nonEmpty) // Browse to each URL to check that it's valid info.logUrlMap.foreach { case (logType, logUrl) => val html = Source.fromURL(logUrl).mkString assert(html.contains(s"$logType log page")) } } } test("verify that log urls reflect SPARK_PUBLIC_DNS (SPARK-6175)") { val SPARK_PUBLIC_DNS = "public_dns" class MySparkConf extends SparkConf(false) { override def getenv(name: String) = { if (name == "SPARK_PUBLIC_DNS") SPARK_PUBLIC_DNS else super.getenv(name) } override def clone: SparkConf = { new MySparkConf().setAll(getAll) } } val conf = new MySparkConf() sc = new SparkContext("local-cluster[2,1,512]", "test", conf) val listener = new SaveExecutorInfo sc.addSparkListener(listener) // Trigger a job so that executors get added sc.parallelize(1 to 100, 4).map(_.toString).count() assert(sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)) listener.addedExecutorInfos.values.foreach { info => assert(info.logUrlMap.nonEmpty) info.logUrlMap.values.foreach { logUrl => assert(new URL(logUrl).getHost === SPARK_PUBLIC_DNS) } } } private class SaveExecutorInfo extends SparkListener { val addedExecutorInfos = mutable.Map[String, ExecutorInfo]() override def onExecutorAdded(executor: SparkListenerExecutorAdded) { addedExecutorInfos(executor.executorId) = executor.executorInfo } } }
Example 176
Source File: UISuite.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.ui import java.net.ServerSocket import scala.io.Source import scala.util.{Failure, Success, Try} import org.eclipse.jetty.servlet.ServletContextHandler import org.scalatest.FunSuite import org.scalatest.concurrent.Eventually._ import org.scalatest.time.SpanSugar._ import org.apache.spark.LocalSparkContext._ import org.apache.spark.{SparkConf, SparkContext} class UISuite extends FunSuite { private def newSparkContext(): SparkContext = { val conf = new SparkConf() .setMaster("local") .setAppName("test") .set("spark.ui.enabled", "true") val sc = new SparkContext(conf) assert(sc.ui.isDefined) sc } ignore("basic ui visibility") { withSpark(newSparkContext()) { sc => // test if the ui is visible, and all the expected tabs are visible eventually(timeout(10 seconds), interval(50 milliseconds)) { val html = Source.fromURL(sc.ui.get.appUIAddress).mkString assert(!html.contains("random data that should not be present")) assert(html.toLowerCase.contains("stages")) assert(html.toLowerCase.contains("storage")) assert(html.toLowerCase.contains("environment")) assert(html.toLowerCase.contains("executors")) } } } ignore("visibility at localhost:4040") { withSpark(newSparkContext()) { sc => // test if visible from http://localhost:4040 eventually(timeout(10 seconds), interval(50 milliseconds)) { val html = Source.fromURL("http://localhost:4040").mkString assert(html.toLowerCase.contains("stages")) } } } test("jetty selects different port under contention") { val server = new ServerSocket(0) val startPort = server.getLocalPort val serverInfo1 = JettyUtils.startJettyServer( "0.0.0.0", startPort, Seq[ServletContextHandler](), new SparkConf) val serverInfo2 = JettyUtils.startJettyServer( "0.0.0.0", startPort, Seq[ServletContextHandler](), new SparkConf) // Allow some wiggle room in case ports on the machine are under contention val boundPort1 = serverInfo1.boundPort val boundPort2 = serverInfo2.boundPort assert(boundPort1 != startPort) assert(boundPort2 != startPort) assert(boundPort1 != boundPort2) serverInfo1.server.stop() serverInfo2.server.stop() server.close() } test("jetty binds to port 0 correctly") { val serverInfo = JettyUtils.startJettyServer( "0.0.0.0", 0, Seq[ServletContextHandler](), new SparkConf) val server = serverInfo.server val boundPort = serverInfo.boundPort assert(server.getState === "STARTED") assert(boundPort != 0) Try { new ServerSocket(boundPort) } match { case Success(s) => fail("Port %s doesn't seem used by jetty server".format(boundPort)) case Failure(e) => } } test("verify appUIAddress contains the scheme") { withSpark(newSparkContext()) { sc => val ui = sc.ui.get val uiAddress = ui.appUIAddress val uiHostPort = ui.appUIHostPort assert(uiAddress.equals("http://" + uiHostPort)) } } test("verify appUIAddress contains the port") { withSpark(newSparkContext()) { sc => val ui = sc.ui.get val splitUIAddress = ui.appUIAddress.split(':') val boundPort = ui.boundPort assert(splitUIAddress(2).toInt == boundPort) } } }
Example 177
Source File: FileUtil.scala From sangria with Apache License 2.0 | 5 votes |
package sangria.util import java.io.File import io.github.classgraph.ClassGraph import sangria.parser.QueryParser import sangria.parser.DeliveryScheme.Throw import spray.json._ import scala.io.Source import net.jcazevedo.moultingyaml._ import scala.collection.JavaConverters._ object FileUtil extends StringMatchers { def loadQuery(name: String) = loadResource("queries/" + name) def loadYaml(name: String, root: String = "scenarios") = loadResource(root + "/" + name).parseYaml def loadScenarios(path: String, root: String = "scenarios") = this.synchronized { val yamlResources = new ClassGraph() .whitelistPackages(root + "." + path) .scan() .getResourcesWithExtension("yaml") .asScala .groupBy(_.getPath).mapValues(_.head) // deduplicate (`ClassGraph` gives duplicates for some reason) .values .toVector yamlResources.map { resource => val name = resource.getPath.substring(resource.getPath.lastIndexOf("/") + 1) val relativePath = resource.getPathRelativeToClasspathElement val stream = this.getClass.getResourceAsStream("/" + relativePath) val contents = Source.fromInputStream(stream, "UTF-8").mkString.parseYaml ScenarioFile(name, relativePath, contents) } } def loadSchema(path: String) = QueryParser.parse(loadResource(path)) def loadTestData(path: String): Either[YamlValue, JsValue] = { val text = loadResource(path) if (path endsWith ".yaml") Left(text.parseYaml) else if (path endsWith ".json") Right(text.parseJson) else throw new IllegalArgumentException(s"Unsupported file format for test data '$path'. Only `*.json` and `*.yaml` files are supported.") } def loadResource(path: String) = Option(this.getClass.getResourceAsStream("/" + path)) match { case Some(res) => stripCarriageReturns(Source.fromInputStream(res, "UTF-8").mkString) case None => throw new IllegalArgumentException("Resource not found: /" + path) } case class ScenarioFile(fileName: String, path: String, scenario: YamlValue) { def folder = path.substring(0, path.lastIndexOf("/")) } }
Example 178
Source File: GameResources.scala From wowchat with GNU General Public License v3.0 | 5 votes |
package wowchat.game import wowchat.common.{WowChatConfig, WowExpansion} import scala.io.Source object GameResources { lazy val AREA: Map[Int, String] = readIDNameFile(WowChatConfig.getExpansion match { case WowExpansion.Vanilla | WowExpansion.TBC | WowExpansion.WotLK => "pre_cata_areas.csv" case _ => "post_cata_areas.csv" }) lazy val ACHIEVEMENT: Map[Int, String] = readIDNameFile("achievements.csv") private def readIDNameFile(file: String) = { Source .fromResource(file) .getLines .map(str => { val splt = str.split(",", 2) splt(0).toInt -> splt(1) }) .toMap } }
Example 179
Source File: WoWChat.scala From wowchat with GNU General Public License v3.0 | 5 votes |
package wowchat import java.util.concurrent.{Executors, TimeUnit} import wowchat.common.{CommonConnectionCallback, Global, ReconnectDelay, WowChatConfig} import wowchat.discord.Discord import wowchat.game.GameConnector import wowchat.realm.{RealmConnectionCallback, RealmConnector} import com.typesafe.scalalogging.StrictLogging import io.netty.channel.nio.NioEventLoopGroup import scala.io.Source object WoWChat extends StrictLogging { private val RELEASE = "v1.3.3" def main(args: Array[String]): Unit = { logger.info(s"Running WoWChat - $RELEASE") val confFile = if (args.nonEmpty) { args(0) } else { logger.info("No configuration file supplied. Trying with default wowchat.conf.") "wowchat.conf" } Global.config = WowChatConfig(confFile) checkForNewVersion val gameConnectionController: CommonConnectionCallback = new CommonConnectionCallback { private val reconnectExecutor = Executors.newSingleThreadScheduledExecutor private val reconnectDelay = new ReconnectDelay override def connect: Unit = { Global.group = new NioEventLoopGroup val realmConnector = new RealmConnector(new RealmConnectionCallback { override def success(host: String, port: Int, realmName: String, realmId: Int, sessionKey: Array[Byte]): Unit = { gameConnect(host, port, realmName, realmId, sessionKey) } override def disconnected: Unit = doReconnect override def error: Unit = sys.exit(1) }) realmConnector.connect } private def gameConnect(host: String, port: Int, realmName: String, realmId: Int, sessionKey: Array[Byte]): Unit = { new GameConnector(host, port, realmName, realmId, sessionKey, this).connect } override def connected: Unit = reconnectDelay.reset override def disconnected: Unit = doReconnect def doReconnect: Unit = { Global.group.shutdownGracefully() Global.discord.changeRealmStatus("Connecting...") val delay = reconnectDelay.getNext logger.info(s"Disconnected from server! Reconnecting in $delay seconds...") reconnectExecutor.schedule(new Runnable { override def run(): Unit = connect }, delay, TimeUnit.SECONDS) } } logger.info("Connecting to Discord...") Global.discord = new Discord(new CommonConnectionCallback { override def connected: Unit = gameConnectionController.connect override def error: Unit = sys.exit(1) }) } private def checkForNewVersion = { // This is JSON, but I really just didn't want to import a full blown JSON library for one string. val data = Source.fromURL("https://api.github.com/repos/fjaros/wowchat/releases/latest").mkString val regex = "\"tag_name\":\"(.+?)\",".r val repoTagName = regex .findFirstMatchIn(data) .map(_.group(1)) .getOrElse("NOT FOUND") if (repoTagName != RELEASE) { logger.error( "~~~ !!! YOUR WoWChat VERSION IS OUT OF DATE !!! ~~~") logger.error(s"~~~ !!! Current Version: $RELEASE !!! ~~~") logger.error(s"~~~ !!! Repo Version: $repoTagName !!! ~~~") logger.error( "~~~ !!! RUN git pull OR GO TO https://github.com/fjaros/wowchat TO UPDATE !!! ~~~") logger.error( "~~~ !!! YOUR WoWChat VERSION IS OUT OF DATE !!! ~~~") } } }
Example 180
Source File: Packet.scala From wowchat with GNU General Public License v3.0 | 5 votes |
package wowchat.common import io.netty.buffer.{ByteBuf, ByteBufAllocator, EmptyByteBuf} import scala.collection.mutable.ArrayBuffer import scala.io.Source case class Packet( id: Int, byteBuf: ByteBuf = new EmptyByteBuf(ByteBufAllocator.DEFAULT) ) { def readString: String = { import scala.util.control.Breaks._ val ret = ArrayBuffer.newBuilder[Byte] breakable { while (byteBuf.readableBytes > 0) { val value = byteBuf.readByte if (value == 0) { break } ret += value } } Source.fromBytes(ret.result.toArray, "UTF-8").mkString } def skipString: Packet = { while (byteBuf.readableBytes > 0 && byteBuf.readByte != 0) {} this } // bit manipulation for cata+ private var bitPosition = 7 private var byte: Byte = 0 def resetBitReader: Unit = { bitPosition = 7 byte = 0 } def readBit: Byte = { bitPosition += 1 if (bitPosition > 7) { bitPosition = 0 byte = byteBuf.readByte } (byte >> (7 - bitPosition) & 1).toByte } def readBits(length: Int): Int = { (length - 1 to 0 by -1).foldLeft(0) { case (result, i) => result | (readBit << i) } } def readBitSeq(mask: Array[Byte], indices: Int*): Unit = { indices.foreach(i => { mask(i) = readBit }) } def readXorByte(mask: Byte): Byte = { if (mask != 0) { (mask ^ byteBuf.readByte).toByte } else { mask } } def readXorByteSeq(mask: Array[Byte], indices: Int*): Unit = { indices.foreach(i => { mask(i) = readXorByte(mask(i)) }) } }
Example 181
Source File: JsonFormatSpec.scala From akka-management with Apache License 2.0 | 5 votes |
package akka.discovery.kubernetes import org.scalatest.{ Matchers, WordSpec } import spray.json._ import scala.io.Source import PodList._ class JsonFormatSpec extends WordSpec with Matchers { "JsonFormat" should { val data = resourceAsString("pods.json") "work" in { JsonFormat.podListFormat.read(data.parseJson) shouldBe PodList( List( Pod( Some(PodSpec(List(Container( "akka-cluster-tooling-example", Some(List( ContainerPort(Some("akka-remote"), 10000), ContainerPort(Some("management"), 10001), ContainerPort(Some("http"), 10002))) )))), Some(PodStatus(Some("172.17.0.4"), Some("Running"))), Some(Metadata(deletionTimestamp = None)) ), Pod( Some(PodSpec(List(Container( "akka-cluster-tooling-example", Some(List( ContainerPort(Some("akka-remote"), 10000), ContainerPort(Some("management"), 10001), ContainerPort(Some("http"), 10002))) )))), Some(PodStatus(Some("172.17.0.6"), Some("Running"))), Some(Metadata(deletionTimestamp = None)) ), Pod( Some(PodSpec(List(Container( "akka-cluster-tooling-example", Some(List( ContainerPort(Some("akka-remote"), 10000), ContainerPort(Some("management"), 10001), ContainerPort(Some("http"), 10002))) )))), Some(PodStatus(Some("172.17.0.7"), Some("Running"))), Some(Metadata(deletionTimestamp = Some("2017-12-06T16:30:22Z"))) ), Pod( Some(PodSpec( List(Container("akka-cluster-tooling-example", Some(List(ContainerPort(Some("management"), 10001))))))), Some(PodStatus(Some("172.17.0.47"), Some("Succeeded"))), Some(Metadata(deletionTimestamp = None)) ) )) } } private def resourceAsString(name: String): String = Source.fromInputStream(getClass.getClassLoader.getResourceAsStream(name)).mkString }
Example 182
Source File: MarathonApiServiceDiscoverySpec.scala From akka-management with Apache License 2.0 | 5 votes |
package akka.discovery.marathon import java.net.InetAddress import akka.discovery.ServiceDiscovery.ResolvedTarget import org.scalatest.{ Matchers, WordSpec } import spray.json._ import scala.io.Source class MarathonApiServiceDiscoverySpec extends WordSpec with Matchers { "targets" should { "calculate the correct list of resolved targets" in { val data = resourceAsString("apps.json") val appList = JsonFormat.appListFormat.read(data.parseJson) MarathonApiServiceDiscovery.targets(appList, "management") shouldBe List( ResolvedTarget( host = "192.168.65.60", port = Some(23236), address = Option(InetAddress.getByName("192.168.65.60"))), ResolvedTarget( host = "192.168.65.111", port = Some(6850), address = Option(InetAddress.getByName("192.168.65.111"))) ) } "calculate the correct list of resolved targets for docker" in { val data = resourceAsString("docker-app.json") val appList = JsonFormat.appListFormat.read(data.parseJson) MarathonApiServiceDiscovery.targets(appList, "akkamgmthttp") shouldBe List( ResolvedTarget( host = "10.121.48.204", port = Some(29480), address = Option(InetAddress.getByName("10.121.48.204"))), ResolvedTarget( host = "10.121.48.204", port = Some(10136), address = Option(InetAddress.getByName("10.121.48.204"))) ) } } private def resourceAsString(name: String): String = Source.fromInputStream(getClass.getClassLoader.getResourceAsStream(name)).mkString }
Example 183
Source File: ScriptManagerTest.scala From codepropertygraph with Apache License 2.0 | 5 votes |
package io.shiftleft.console.scripting import better.files.File import cats.effect.IO import org.scalatest.{Inside, Matchers, WordSpec} import io.shiftleft.codepropertygraph.Cpg import io.shiftleft.console.scripting.ScriptManager.{ScriptCollections, ScriptDescription, ScriptDescriptions} import java.nio.file.{FileSystemNotFoundException, NoSuchFileException, Path} import scala.io.Source import scala.util.Try class ScriptManagerTest extends WordSpec with Matchers with Inside { private object TestScriptExecutor extends AmmoniteExecutor { override protected def predef: String = "" override def runScript(scriptPath: Path, parameters: Map[String, String], cpg: Cpg): IO[Any] = IO.fromTry( Try { val source = Source.fromFile(scriptPath.toFile) val result = source.getLines.mkString(System.lineSeparator()) source.close() result } ) } private object TestScriptManager extends ScriptManager(TestScriptExecutor) protected val DEFAULT_CPG_NAME: String = { if (File(".").name == "console") { (File("..") / "resources" / "testcode" / "cpgs" / "method" / "cpg.bin.zip").pathAsString } else { (File("resources") / "testcode" / "cpgs" / "method" / "cpg.bin.zip").pathAsString } } def withScriptManager(f: ScriptManager => Unit): Unit = { f(TestScriptManager) } "listing scripts" should { "be correct" in withScriptManager { scriptManager => val scripts = scriptManager.scripts() val expected = List( ScriptCollections("general", ScriptDescriptions( "A collection of general purpose scripts.", List(ScriptDescription("list-funcs.sc", "Lists all functions.")) )), ScriptCollections("java", ScriptDescriptions( "A collection of java-specific scripts.", List(ScriptDescription("list-sl-ns.sc", "Lists all shiftleft namespaces.")) )), ScriptCollections("general/general_plus", ScriptDescriptions( "Even more general purpose scripts.", List.empty )) ) scripts should contain theSameElementsAs expected } } "running scripts" should { "be correct when explicitly specifying a CPG" in withScriptManager { scriptManager => val expected = """|@main def main() = { | cpg.method.name.l |}""".stripMargin scriptManager.runScript("general/list-funcs.sc", Map.empty, Cpg.emptyCpg) shouldBe expected } "be correct when specifying a CPG filename" in withScriptManager { scriptManager => val expected = """|@main def main() = { | cpg.method.name.l |}""".stripMargin scriptManager.runScript("general/list-funcs.sc", Map.empty, DEFAULT_CPG_NAME) shouldBe expected } "throw an exception if the specified CPG can not be found" in withScriptManager { scriptManager => intercept[FileSystemNotFoundException] { scriptManager.runScript("general/list-funcs.sc", Map.empty, "cake.bin.zip") } } "throw an exception if the specified script can not be found" in withScriptManager { scriptManager => intercept[NoSuchFileException] { scriptManager.runScript("list-funcs.sc", Map.empty, Cpg.emptyCpg) } } } }
Example 184
Source File: SemanticsLoader.scala From codepropertygraph with Apache License 2.0 | 5 votes |
package io.shiftleft.dataflowengineoss.semanticsloader import org.apache.logging.log4j.LogManager import scala.io.Source case class Semantic(methodFullName: String, parameterIndex: Int) case class Semantics(elements: List[Semantic]) object SemanticsLoader { def emptySemantics: Semantics = { Semantics(Nil) } } class SemanticsLoader(filename: String) { private val logger = LogManager.getLogger(getClass) def load(): Semantics = { val bufferedReader = Source.fromFile(filename) var lineNumber = 0 try { val semanticElements = bufferedReader .getLines() .flatMap { line => val parts = line.split(",") if (parts.size == 2) { try { val methodFullName = parts(0).trim val parameterIndex = parts(1).trim.toInt lineNumber += 1 Some(Semantic(methodFullName, parameterIndex)) } catch { case _: NumberFormatException => logFormatError("Argument index is not convertable to Int.", lineNumber) None } } else { logFormatError("Invalid number of elements per line. Expected method name followed by argument index.", lineNumber) None } } .toList Semantics(semanticElements) } finally { bufferedReader.close() } } private def logFormatError(msg: String, lineNumber: Int): Unit = { logger.warn(s"$msg In $filename on line $lineNumber") } }
Example 185
Source File: MLLib.scala From spark-sql-perf with Apache License 2.0 | 5 votes |
package com.databricks.spark.sql.perf.mllib import scala.io.Source import scala.language.implicitConversions import org.slf4j.LoggerFactory import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.{SparkConf, SparkContext} import com.databricks.spark.sql.perf._ class MLLib(sqlContext: SQLContext) extends Benchmark(sqlContext) with Serializable { def this() = this(SQLContext.getOrCreate(SparkContext.getOrCreate())) } object MLLib { def run(yamlFile: String = null, yamlConfig: String = null): DataFrame = { logger.info("Starting run") val conf = getConf(yamlFile, yamlConfig) val sparkConf = new SparkConf().setAppName("MLlib QA").setMaster("local[2]") val sc = SparkContext.getOrCreate(sparkConf) sc.setLogLevel("INFO") val b = new com.databricks.spark.sql.perf.mllib.MLLib() val benchmarks = getBenchmarks(conf) println(s"${benchmarks.size} benchmarks identified:") val str = benchmarks.map(_.prettyPrint).mkString("\n") println(str) logger.info("Starting experiments") val e = b.runExperiment( executionsToRun = benchmarks, iterations = 1, // If you want to increase the number of iterations, add more seeds resultLocation = conf.output, forkThread = false) e.waitForFinish(conf.timeout.toSeconds.toInt) logger.info("Run finished") e.getCurrentResults() } }
Example 186
Source File: InfluxUDPClient.scala From chronicler with Apache License 2.0 | 5 votes |
package com.github.fsanaulla.chronicler.udp import java.io.File import java.net._ import java.nio.charset.{Charset, StandardCharsets} import com.github.fsanaulla.chronicler.core.components.BodyBuilder import com.github.fsanaulla.chronicler.core.model.{InfluxWriter, Point} import scala.io.Source import scala.util.{Failure, Try} final class InfluxUDPClient(host: String, port: Int) extends AutoCloseable { private[this] val socket = new DatagramSocket() private[this] def buildAndSend(msg: Array[Byte]): Try[Unit] = Try( socket.send( new DatagramPacket( msg, msg.length, new InetSocketAddress(host, port) ) ) ) def writeNative(point: String, charset: Charset = StandardCharsets.UTF_8): Try[Unit] = buildAndSend(point.getBytes(charset)) def bulkWriteNative(points: Seq[String], charset: Charset = StandardCharsets.UTF_8): Try[Unit] = buildAndSend(points.mkString("\n").getBytes(charset)) def write[T]( measurement: String, entity: T, charset: Charset = StandardCharsets.UTF_8 )(implicit writer: InfluxWriter[T] ): Try[Unit] = { BodyBuilder.stringBodyBuilder.fromT(measurement, entity) match { case Left(ex) => scala.util.Failure(ex) case Right(r) => buildAndSend(r.getBytes(charset)) } } def bulkWrite[T]( measurement: String, entities: Seq[T], charset: Charset = StandardCharsets.UTF_8 )(implicit writer: InfluxWriter[T] ): Try[Unit] = { BodyBuilder.stringBodyBuilder.fromSeqT(measurement, entities) match { case Left(ex) => Failure(ex) case Right(r) => buildAndSend(r.getBytes(charset)) } } def writeFromFile(file: File, charset: Charset = StandardCharsets.UTF_8): Try[Unit] = { val sendData = Source .fromFile(file) .getLines() .mkString("\n") .getBytes(charset) buildAndSend(sendData) } def writePoint(point: Point, charset: Charset = StandardCharsets.UTF_8): Try[Unit] = buildAndSend(point.serialize.getBytes(charset)) def bulkWritePoints(points: Seq[Point], charset: Charset = StandardCharsets.UTF_8): Try[Unit] = buildAndSend( points .map(_.serialize) .mkString("\n") .getBytes(charset) ) def close(): Unit = socket.close() }
Example 187
Source File: BodyBuilder.scala From chronicler with Apache License 2.0 | 5 votes |
package com.github.fsanaulla.chronicler.core.components import java.nio.file.Path import com.github.fsanaulla.chronicler.core.alias.ErrorOr import com.github.fsanaulla.chronicler.core.either import com.github.fsanaulla.chronicler.core.either.EitherOps import com.github.fsanaulla.chronicler.core.model.{Appender, InfluxWriter, Point} import scala.io.Source trait BodyBuilder[A] { def fromFile(filePath: Path, enc: String): A def fromString(string: String): A def fromStrings(strings: Seq[String]): A def fromPoint(point: Point): A def fromPoints(points: Seq[Point]): A def fromT[T](meas: String, t: T)(implicit wr: InfluxWriter[T]): ErrorOr[A] def fromSeqT[T](meas: String, ts: Seq[T])(implicit wr: InfluxWriter[T]): ErrorOr[A] } object BodyBuilder { implicit val stringBodyBuilder: BodyBuilder[String] = new BodyBuilder[String] with Appender { override def fromFile(filePath: Path, enc: String): String = Source .fromFile(filePath.toUri, enc) .getLines() .mkString("\n") override def fromStrings(strings: Seq[String]): String = strings.mkString("\n") override def fromPoint(point: Point): String = point.serialize override def fromPoints(points: Seq[Point]): String = points.map(_.serialize).mkString("\n") override def fromString(string: String): String = string override def fromT[T](meas: String, t: T)(implicit wr: InfluxWriter[T]): ErrorOr[String] = wr.write(t).mapRight(append(meas, _)) override def fromSeqT[T]( meas: String, ts: Seq[T] )(implicit wr: InfluxWriter[T] ): ErrorOr[String] = { either.seq(ts.map(wr.write)).mapRight(append(meas, _)) } } }
Example 188
Source File: FileIO.scala From ZparkIO with MIT License | 5 votes |
package com.leobenkel.zparkioProjectExample import zio.{Task, ZIO} import scala.io.Source trait FileIO { def fileIO: FileIO.Service } object FileIO { trait Service { protected def readFileContent(path: String): Seq[String] final def getFileContent(path: String): ZIO[Any, Throwable, Seq[String]] = { Task(readFileContent(path)) } } private trait LiveService extends FileIO.Service { override protected def readFileContent(path: String): Seq[String] = { val file = Source.fromFile(path) val content = file.getLines().toArray file.close() content } } trait Live extends FileIO { override def fileIO: Service = new LiveService {} } def apply(path: String): ZIO[FileIO, Throwable, Seq[String]] = { ZIO.accessM[FileIO](_.fileIO.getFileContent(path)) } }
Example 189
Source File: FileIO.scala From ZparkIO with MIT License | 5 votes |
package com.leobenkel.zparkioProfileExampleMoreComplex.Services import zio.{Has, Task, ZIO, ZLayer} import scala.io.Source object FileIO { type FileIO = Has[Service] trait Service { protected def readFileContent(path: String): Seq[String] final def getFileContent(path: String): ZIO[Any, Throwable, Seq[String]] = { Task(readFileContent(path)) } } trait LiveService extends FileIO.Service { override protected def readFileContent(path: String): Seq[String] = { val file = Source.fromFile(path) val content = file.getLines().toArray file.close() content } } val Live: ZLayer[Any, Nothing, FileIO] = ZLayer.succeed(new LiveService {}) def apply(path: String): ZIO[FileIO, Throwable, Seq[String]] = { ZIO.accessM[FileIO](_.get.getFileContent(path)) } }
Example 190
Source File: AvscFileSorter.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger.filesorter import TypeComparator.strContainsType import java.io.File import scala.collection.mutable import scala.io.Source object AvscFileSorter { def sortSchemaFiles(files: Iterable[File]): Seq[File] = { val sortedButReversed = mutable.ArrayBuffer.empty[File] def normalizeInput(files: List[File]) = files.sortBy(file => file.getName) var pending: Iterable[File] = normalizeInput(files.toList) while (pending.nonEmpty) { val (used, unused) = usedUnusedSchemas(pending) sortedButReversed ++= unused pending = used } sortedButReversed.reverse.toSeq } def usedUnusedSchemas(files: Iterable[File]): (Iterable[File], Iterable[File]) = { val usedUnused = files.map { file => val fullName = extractFullName(file) val numUsages = files.count { candidate => val candidateName = extractFullName(candidate) strContainsType(candidateName, fileText(candidate), fullName) } (file, numUsages) }.partition(usedUnused => usedUnused._2 > 0) (usedUnused._1.map(_._1), usedUnused._2.map(_._1)) } def extractFullName(f: File): String = { val txt = fileText(f) val namespace = namespaceRegex.findFirstMatchIn(txt) val name = nameRegex.findFirstMatchIn(txt) val nameGroup = name.get.group(1) if (namespace.isEmpty) { nameGroup } else { s"${namespace.get.group(1)}.$nameGroup" } } def fileText(f: File): String = { val src = Source.fromFile(f) try { src.getLines.mkString } finally { src.close() } } val namespaceRegex = "\\\"namespace\\\"\\s*:\\s*\"([^\\\"]+)\\\"".r val nameRegex = "\\\"name\\\"\\s*:\\s*\"([^\\\"]+)\\\"".r }
Example 191
Source File: AvdlFileSorter.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger.filesorter import java.io.File import scala.annotation.tailrec import scala.io.Source object AvdlFileSorter { def sortSchemaFiles(filesIterable: Iterable[File]): Seq[File] = { val files = filesIterable.toList val importsMap = files.map{ file => (file.getCanonicalFile, getImports(file)) }.toMap.mapValues(f => f.filter(_.exists)) @tailrec def addFiles(processedFiles: Seq[File], remainingFiles: List[File]): Seq[File] = { remainingFiles match { case Nil => processedFiles case h :: t => val processedFilesSet = processedFiles.toSet if (importsMap(h).forall(processedFilesSet.contains)) addFiles(processedFiles :+ h, t) else addFiles(processedFiles, t :+ h) } } val result = addFiles(Seq.empty, files) result } // TODO This should be replaced by letting AVRO compile the IDL files directly, but I'm not sure how to do that now. private[this] val importPattern = """\s*import\s+idl\s+"([^"]+)"\s*;\s*""".r private[this] def getImports(file: File): Vector[File] = { val source = Source.fromFile(file) try { source.getLines().collect{ case importPattern(currentImport) => new File(file.getParentFile, currentImport).getCanonicalFile }.toVector } finally source.close() } }
Example 192
Source File: AvrohuggerSpec.scala From avrohugger with Apache License 2.0 | 5 votes |
package util import avrohugger._ import avrohugger.format._ import avrohugger.format.abstractions.SourceFormat import java.io.File import java.nio.file.{FileSystems, Path} import org.specs2.SpecificationLike import org.specs2.matcher.{Matcher, Matchers, ShouldExpectable} import scala.io.Source class AvrohuggerSpec( inPath: Path, val outputFiles: Seq[Path], sourceFormat: SourceFormat ) extends Matchers { implicit class PathExtensions( path: Path ) { def ++(next: String) = path.resolve(next) def ++(other: Path) = path.resolve(other) } val sourceFormatName = sourceFormat match { case SpecificRecord => "specific" case Standard => "standard" case Scavro => "scavro" } val gen = new Generator(sourceFormat) val inputPath = { val sourceBase = FileSystems.getDefault.getPath("avrohugger-core", "src", "test", "avro") (sourceBase ++ inPath) } val inputFile = inputPath.toFile val outDir = gen.defaultOutputDir + s"/$sourceFormatName/" private def readFile(f: File): String = { val source = Source.fromFile(f) try source.mkString finally source.close() } val expectedBase = FileSystems.getDefault.getPath("avrohugger-core", "src", "test", "expected", sourceFormatName) val generatedBase = FileSystems.getDefault.getPath("target", "generated-sources", sourceFormatName) private def prefixedFileString(prefixPath: Path, p: Path) = { val fullPath = sourceFormat match { case Scavro => { Option(p.getParent) match { case Some(parent) => parent ++ "model" ++ p.getFileName case None => FileSystems.getDefault.getPath("model") ++ p } } case _ => p } readFile((prefixPath ++ fullPath).toFile) } def generatedString(p: Path) = prefixedFileString(generatedBase, p) def expectedString(p: Path) = prefixedFileString(expectedBase, p) def checkFileToFile = { gen.fileToFile(inputFile, outDir) val generated = outputFiles map generatedString val expected = outputFiles map expectedString ShouldExpectable(generated) shouldEqual expected } def checkFileToStrings = { val generated = gen.fileToStrings(inputFile) val expected = outputFiles map expectedString ShouldExpectable(generated) shouldEqual expected } def checkStringToFile = { val inputString = readFile(inputFile) gen.stringToFile(inputString, outDir) val generated = outputFiles map generatedString val expected = outputFiles map expectedString ShouldExpectable(generated) shouldEqual expected } def checkStringToStrings = { val generated = { val inputString = readFile(inputFile) gen.stringToStrings(inputString) } val expected = outputFiles map expectedString ShouldExpectable(generated) shouldEqual expected } }
Example 193
Source File: TypeFlattenerIntegrationTest.scala From play-swagger with MIT License | 5 votes |
package de.zalando.apifirst import java.io.File import de.zalando.apifirst.util.ScalaPrinter import org.scalatest.{FunSpec, MustMatchers} import scala.io.Source class TypeFlattenerIntegrationTest extends FunSpec with MustMatchers { val expectation_path = "play-scala-generator/src/test/scala/model/" val prefix = "resources." import de.zalando.model._ val plainModels = Seq[WithModel]( additional_properties_yaml, basic_auth_api_yaml, basic_extension_yaml, basic_polymorphism_yaml, cross_spec_references_yaml, echo_api_yaml, error_in_array_yaml, expanded_polymorphism_yaml, form_data_yaml, full_petstore_api_yaml, hackweek_yaml, heroku_petstore_api_yaml, instagram_api_yaml, minimal_api_yaml, nakadi_yaml, nested_arrays_yaml, nested_arrays_validation_yaml, nested_objects_yaml, nested_objects_validation_yaml, nested_options_yaml, nested_options_validation_yaml, numbers_validation_yaml, options_yaml, security_api_yaml, simple_petstore_api_yaml, split_petstore_api_yaml, string_formats_yaml, string_formats_validation_yaml, type_deduplication_yaml, uber_api_yaml ) describe("TypeFlattener") { plainModels.foreach { model => testTypeFlattener(model) } } def testTypeFlattener(ast: WithModel): Unit = { val name = ScalaPrinter.nameFromModel(ast) it(s"should flatten API model $name") { val scalaModel = TypeNormaliser.flatten(ast.model) val expected = asInFile(name, ".scala") clean(ScalaPrinter.asScala(name, scalaModel)) mustBe clean(expected) } } def asInFile(name: String, suffix: String): String = { val expectedFile = new File(expectation_path, prefix + name + suffix) if (expectedFile.canRead) { val src = Source.fromFile(expectedFile) val result = src.getLines().mkString("\n") src.close() result } else "" } def clean(str: String): String = str.split("\n").map(_.trim).filter(_.nonEmpty).mkString("\n") }
Example 194
Source File: ExpectedResults.scala From play-swagger with MIT License | 5 votes |
package de.zalando.swagger import java.io.{File, FileOutputStream} import scala.io.Source trait ExpectedResults { val resourcesPath = "swagger-parser/src/test/resources/" def expectationsFolder: String = "/expected_results/" def dump(result: String, file: File, suffix: String): Unit = { if (result.nonEmpty) { val newFile = target(file, suffix) newFile.getParentFile.mkdirs() newFile.delete() newFile.createNewFile() val out = new FileOutputStream(newFile) out.write(result.getBytes) out.close() } } def asInFile(file: File, suffix: String): String = { val expectedFile = target(file, suffix) if (expectedFile.canRead) { val src = Source.fromFile(expectedFile) val result = src.getLines().mkString("\n") src.close() result } else "" } def target(file: File, suffix: String): File = new File(file.getParentFile.getParent + expectationsFolder + file.getName + "." + suffix) def clean(str: String): String = str.split("\n").map(_.trim).mkString("\n") }
Example 195
Source File: ExpectedResults.scala From play-swagger with MIT License | 5 votes |
package de.zalando import java.io.{File, FileOutputStream} import de.zalando.apifirst.util.ScalaPrinter import de.zalando.model._ import scala.io.Source trait ExpectedResults { val model = Seq[WithModel]( additional_properties_yaml, basic_polymorphism_yaml, nested_arrays_yaml, nested_options_yaml, basic_extension_yaml, expanded_polymorphism_yaml, nested_objects_yaml, options_yaml ) val examples = Seq[WithModel]( basic_auth_api_yaml, cross_spec_references_yaml, echo_api_yaml, error_in_array_yaml, form_data_yaml, full_petstore_api_yaml, hackweek_yaml, heroku_petstore_api_yaml, instagram_api_yaml, minimal_api_yaml, nakadi_yaml, security_api_yaml, simple_petstore_api_yaml, split_petstore_api_yaml, string_formats_yaml, type_deduplication_yaml, uber_api_yaml ) val validations = Seq[WithModel]( nested_arrays_validation_yaml, nested_objects_validation_yaml, nested_options_validation_yaml, numbers_validation_yaml, string_formats_validation_yaml ) val resourcesPath = "play-scala-generator/src/test/resources/" def expectationsFolder: String = "/expected_results/" def dump(result: String, name: String, suffix: String): Unit = { if (result.nonEmpty) { val newFile = target(name, suffix) newFile.getParentFile.mkdirs() newFile.delete() newFile.createNewFile() val out = new FileOutputStream(newFile) out.write(result.getBytes) out.close() } } def asInFile(name: String, suffix: String): String = { val expectedFile = target(name, suffix) if (expectedFile.canRead) { val src = Source.fromFile(expectedFile) val result = src.getLines().mkString("\n") src.close() result } else "" } def target(name: String, suffix: String): File = new File(resourcesPath + expectationsFolder + name + "." + suffix) def clean(str: String): String = str.split("\n").map(_.trim).filter(_.nonEmpty).mkString("\n") def nameFromModel(ast: WithModel): String = ScalaPrinter.nameFromModel(ast) }
Example 196
Source File: IndexRenderService.scala From silhouette-vuejs-app with Apache License 2.0 | 5 votes |
package models.services import javax.inject.Inject import play.api.mvc.RequestHeader import play.filters.csrf.CSRF import play.filters.csrf.CSRF.Token import scala.io.Source class IndexRenderService @Inject() () { def render(title: Option[String] = None, meta: Seq[(String, String)] = Seq.empty)(implicit request: RequestHeader): String = { val metaTags = title.map(t => s"<title>$t</title>").getOrElse("") + meta.map { case (n, c) => s"""<meta name="$n" content="$c">""" }.mkString("") val html = Source.fromFile("public/ui/index.html").mkString setCsrfToken(html).replace("</head>", s"$metaTags</head>") } def setCsrfToken(html: String)(implicit request: RequestHeader): String = { val Token(_, value) = CSRF.getToken.get html.replace("csrf-token-value=\"\"", s"csrf-token-value='$value'") } }
Example 197
Source File: JsonTestSuite.scala From borer with Mozilla Public License 2.0 | 5 votes |
package io.bullet.borer import java.io.BufferedInputStream import utest._ import scala.io.Source object JsonTestSuite extends TestSuite { val disabled: Set[String] = Set( "n_multidigit_number_then_00.json", "n_structure_null-byte-outside-string.json", "n_structure_whitespace_formfeed.json" ) val testFiles = Source .fromResource("") .getLines() .map { name => val is = new BufferedInputStream(getClass.getResourceAsStream("/" + name)) try name -> Iterator.continually(is.read).takeWhile(_ != -1).map(_.toByte).toArray finally is.close() } .toMap .view .filter(t => !disabled.contains(t._1)) val config = Json.DecodingConfig.default.copy(maxNumberMantissaDigits = 99, maxNumberAbsExponent = 999) val tests = Tests { "Accept" - { for { (name, bytes) <- testFiles if name startsWith "y" } { Json.decode(bytes).withConfig(config).to[Dom.Element].valueEither match { case Left(e) => throw new RuntimeException(s"Test `$name` did not parse as it should", e) case Right(_) => // ok } } } "Reject" - { for { (name, bytes) <- testFiles if name startsWith "n" } { Json.decode(bytes).withConfig(config).to[Dom.Element].valueEither match { case Left(_) => // ok case Right(x) => throw new RuntimeException(s"Test `$name` parsed even though it should have failed: $x") } } } "Not Crash" - { for { (name, bytes) <- testFiles if name startsWith "i" } { Json.decode(bytes).withConfig(config).to[Dom.Element].valueEither match { case Left(e: Borer.Error.General[_]) => throw new RuntimeException(s"Test `$name` did fail unexpectedly", e) case _ => // everything else is fine } } } } }
Example 198
Source File: FileSpec.scala From borer with Mozilla Public License 2.0 | 5 votes |
package io.bullet.borer import java.io.File import java.nio.charset.StandardCharsets import java.nio.file.Files import utest._ import scala.io.Source object FileSpec extends TestSuite { final case class Foo( string: String = "This is a really long text for testing writing to a file", int: Int = 42, double: Double = 0.0) implicit val fooCodec = Codec(Encoder.from(Foo.unapply _), Decoder.from(Foo.apply _)) val tests = Tests { "small file" - { val tempFile = File.createTempFile("borer", ".json") try { Json.encode(Foo()).to(tempFile).result ==> tempFile new String(Files.readAllBytes(tempFile.toPath), "UTF8") ==> """["This is a really long text for testing writing to a file",42,0.0]""" Json.decode(tempFile).to[Foo].value ==> Foo() } finally tempFile.delete() } "large file" - { val testFileBytes = Source.fromResource("large.json").mkString.getBytes(StandardCharsets.UTF_8) val config = Json.DecodingConfig.default .copy(maxNumberMantissaDigits = 99, maxNumberAbsExponent = 300, initialCharbufferSize = 8) val dom = Json.decode(testFileBytes).withConfig(config).to[Dom.Element].value val tempFile = File.createTempFile("borer", ".json") try { Json.encode(dom).to(tempFile).result ==> tempFile Json .decode(Input.fromFile(tempFile, bufferSize = 256)) .withConfig(config) .to[Dom.Element] .value ==> dom } finally tempFile.delete() } } }
Example 199
Source File: FromInputIteratorFileSpec.scala From borer with Mozilla Public License 2.0 | 5 votes |
package io.bullet.borer import java.nio.charset.StandardCharsets import utest._ import scala.io.Source object FromInputIteratorFileSpec extends TestSuite with TestUtils { val testFileBytes = Source.fromResource("large.json").mkString.getBytes(StandardCharsets.UTF_8) val config = Json.DecodingConfig.default .copy(maxNumberMantissaDigits = 99, maxNumberAbsExponent = 300, initialCharbufferSize = 8) val dom = Json.decode(testFileBytes).withConfig(config).to[Dom.Element].value val tests = Tests { "test file" - { Json .decode(chunkedInput(3, 2, 1, 0, 100, 71)) .withConfig(config) .to[Dom.Element] .value ==> dom } } def chunkedInput(chunkSizes: Int*): Iterator[Array[Byte]] = chunkIterator(testFileBytes, Iterator.continually(0).flatMap(_ => chunkSizes)) def chunkIterator(remainingBytes: Array[Byte], chunkSizes: Iterator[Int]): Iterator[Array[Byte]] = { val len = chunkSizes.next() if (remainingBytes.length <= len) Iterator.single(remainingBytes) else Iterator.single(remainingBytes.take(len)) ++ chunkIterator(remainingBytes.drop(len), chunkSizes) } final class FFPadder(input: Input[Array[Byte]]) extends Input.PaddingProvider[Array[Byte]] { def padByte(): Byte = -1 def padDoubleByte(remaining: Int): Char = if (remaining < 1) '\uffff' else ((input.readByte() << 8) | 0xFF).toChar def padQuadByte(remaining: Int): Int = { import input.{readByte => byte, readDoubleByteBigEndian => doub} // format: OFF remaining match { case 0 => 0xFFFFFFFF case 1 => (byte() << 24) | 0xFFFFFF case 2 => (doub() << 16) | 0xFFFF case 3 => (doub() << 16) | ((byte() & 0xFF) << 8) | 0xFF case _ => throw new IllegalStateException } // format: ON } def padOctaByte(remaining: Int): Long = { import input.{readByte => byte, readDoubleByteBigEndian => doub, readQuadByteBigEndian => quad} // format: OFF remaining match { case 0 => 0XFFFFFFFFFFFFFFFFL case 1 => (byte().toLong << 56) | 0XFFFFFFFFFFFFFFL case 2 => (doub().toLong << 48) | 0XFFFFFFFFFFFFL case 3 => (doub().toLong << 48) | ((byte() & 0XFFL) << 40) | 0XFFFFFFFFFFL case 4 => (quad().toLong << 32) | 0XFFFFFFFFL case 5 => (quad().toLong << 32) | ((byte() & 0XFFL) << 24) | 0XFFFFFFL case 6 => (quad().toLong << 32) | ((doub() & 0XFFFFL) << 16) | 0XFFFFL case 7 => (quad().toLong << 32) | ((doub() & 0XFFFFL) << 16) | ((byte() & 0XFFL) << 8) | 0XFFL case _ => throw new IllegalStateException } // format: ON } def padBytes(rest: Array[Byte], missing: Long) = ByteAccess.ForByteArray.concat(rest, Array.fill[Byte](missing.toInt)(-1)) } }
Example 200
Source File: CueSheetVersion.scala From cuesheet with Apache License 2.0 | 5 votes |
package com.kakao.cuesheet import java.nio.file.{Files, Paths} import com.kakao.cuesheet.deps.{DependencyAnalyzer, ManagedDependencyNode} import com.kakao.mango.logging.Logging import scala.collection.JavaConversions._ import scala.io.Source import scala.util.Try object CueSheetVersion extends Logging { private val versionPattern = """[^"]*"([^"]+)".*""".r lazy val version: String = { // read from MANIFEST.MF getClass.getClassLoader.getResources("META-INF/MANIFEST.MF").toSeq.flatMap { url => val src = Source.fromInputStream(url.openStream()) try { val manifest = src.getLines().map(_.split(":", 2)).collect { case Array(key, value) => (key.trim(), value.trim()) }.toMap (manifest.get("Implementation-Vendor"), manifest.get("Implementation-Title")) match { case (Some("com.kakao.cuesheet"), Some("cuesheet")) => manifest.get("Implementation-Version") case (Some("com.kakao.cuesheet"), Some("cuesheet-assembly")) => manifest.get("Implementation-Version") case _ => Nil } } finally { src.close() } }.headOption.orElse { val (_, applicationJars) = DependencyAnalyzer().graph.divide() applicationJars.collectFirst { case jar: ManagedDependencyNode if jar.artifact.startsWith("cuesheet") => jar.version } }.orElse { Try(Files.readAllBytes(Paths.get("version.sbt"))).map { bytes => }.toOption Try(Source.fromFile("version.sbt")).map { src => // try to read from version.sbt try { src.getLines().collectFirst { case versionPattern(v) => v }.head } finally { src.close() } }.toOption }.getOrElse("Unknown") } }