scala.io.Source Scala Example

Source File: WortschatzParser.scala From dbpedia-spotlight-model with Apache License 2.0

7 votes

package org.dbpedia.spotlight.io

import com.officedepot.cdap2.collection.CompactHashSet
import org.apache.log4j.Logger

import scala.io.Source



object WortschatzParser {

    val LOG = Logger.getLogger(this.getClass)

    def parse(filename: String) : CompactHashSet[String] = {
        parse(filename, count => true);
    }

    def parse(filename: String, minimumCount: Int) : CompactHashSet[String] = {
        parse(filename, count => (count > minimumCount) )
    }

    def parse(filename: String, minimumCount: Int, maximumCount: Int) : CompactHashSet[String] = {
        parse(filename, count => (count > minimumCount) && (count < maximumCount))
    }

    def parse(filename: String, condition: Int => Boolean) : CompactHashSet[String] = {
        LOG.info(" parsing common words file ")
        // get lines, split in three fields, get the middle one (word)
        val commonWords = new CompactHashSet[String]();

        val log = Source.fromFile(filename, "iso-8859-1").getLines.foreach(line => {
            if (line.trim()!="") {
                val fields = line.split("\\s")
                if (condition(fields(2).toInt)) commonWords.add(fields(1))
            }
        });
        commonWords
    }
}

Source File: Banner.scala From daml with Apache License 2.0

6 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.sandbox.banner

import java.io.PrintStream

import scala.io.Source

object Banner {
  def show(out: PrintStream): Unit = {
    val resourceName = "banner.txt"
    if (getClass.getClassLoader.getResource(resourceName) != null)
      out.println(
        Source
          .fromResource(resourceName)
          .getLines
          .mkString("\n"))
    else
      out.println("Banner resource missing from classpath.")
  }
}

Source File: ComponentsFixture.scala From daml with Apache License 2.0

6 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.navigator.test

import java.util.concurrent.atomic.AtomicReference

import com.daml.navigator.test.config.Arguments
import com.daml.navigator.test.runner.{HeadNavigator, PackagedDamlc, PackagedSandbox}
import com.typesafe.scalalogging.LazyLogging

import scala.io.Source
import scala.util.{Failure, Success, Try}

class ComponentsFixture(
    val args: Arguments,
    val navigatorPort: Int,
    val sandboxPort: Int,
    val scenario: String
) extends LazyLogging {

  // A list of commands on how to destroy started processes
  private val killProcs: AtomicReference[List[Unit => Unit]] = new AtomicReference(List.empty)

  private val onlineUrl = s"http://localhost:$navigatorPort/api/about"

  private def get(
      url: String,
      connectTimeout: Int = 1000,
      readTimeout: Int = 1000,
      requestMethod: String = "GET"
  ): String = {
    import java.net.{URL, HttpURLConnection}
    val connection = (new URL(url)).openConnection.asInstanceOf[HttpURLConnection]
    connection.setConnectTimeout(connectTimeout)
    connection.setReadTimeout(readTimeout)
    connection.setRequestMethod(requestMethod)
    val inputStream = connection.getInputStream
    val content = Source.fromInputStream(inputStream).mkString
    if (inputStream != null) inputStream.close()
    content
  }

  def startup(): Try[Unit] = {
    if (args.startComponents) {
      logger.info("Starting the sandbox and the Navigator")
      for {
        (darFile, tempFiles) <- Try(PackagedDamlc.run(args.damlPath))
        sandbox <- Try(PackagedSandbox.runAsync(sandboxPort, darFile, scenario))
        _ = killProcs.updateAndGet(s => sandbox :: s)
        navigator <- Try(
          HeadNavigator.runAsync(args.navConfPAth, args.navigatorDir, navigatorPort, sandboxPort))
        _ = killProcs.updateAndGet(s => navigator :: s)
      } yield { () }
    } else {
      Success(())
    }
  }

  private def retry[R](action: => R, maxRetries: Int, delayMillis: Int): Try[R] = {
    def retry0(count: Int): Try[R] = {
      Try(action) match {
        case Success(r) => Success(r)
        case Failure(e) =>
          if (count > maxRetries) {
            logger.error(
              s"Navigator is not available after $maxRetries retries with $delayMillis millis interval.")
            Failure(e)
          } else {
            logger.info(s"Navigator is not available yet, waiting $delayMillis millis ")
            Thread.sleep(delayMillis.toLong)
            retry0(count + 1)
          }
      }
    }

    retry0(0)
  }

  def waitForNavigator(): Try[Unit] = {
    logger.info(s"Waiting for the Navigator to start up (waiting for $onlineUrl)")
    retry({ get(onlineUrl); () }, 120, 1000)
  }

  def shutdown(): Unit = {
    killProcs.getAndUpdate(procs => {
      procs.foreach(killAction => Try { killAction(()) })
      List.empty
    })
    ()
  }
}

Source File: FundamentalsParser.scala From YahooFinanceScala with MIT License

6 votes

package openquant.yahoofinance.impl

import java.time.format.DateTimeFormatter
import java.time.{LocalDate, ZoneId, ZonedDateTime}

import com.github.tototoshi.csv._
import openquant.yahoofinance.Fundamentals

import scala.io.Source


object FundamentalsParser extends Function1[String, Vector[Fundamentals]] {
  def apply(content: String): Vector[Fundamentals] = {
    val csvReader = CSVReader.open(Source.fromString(content))
    val fundamentals: Vector[Fundamentals] = csvReader.toStream.map { fields ⇒
      parseCSVLine(fields.toVector)
    }.toVector
    fundamentals
  }

  private def parseCSVLine(field: Vector[String]): Fundamentals = {
    require(field.length >= 2, "number of fields")
    val name = field(1)
    if (name == "N/A")
      Fundamentals(
        looksValid = false,
        symbol = field(0),
        name = name
      )
    else
      Fundamentals(
        looksValid = true,
        symbol = field(0),
        name = name
      )
  }
}

Source File: package.scala From mantis with Apache License 2.0

6 votes

package io.iohk.ethereum

import java.io.{File, PrintWriter}
import java.net.{Inet6Address, InetAddress}
import java.security.SecureRandom

import io.iohk.ethereum.crypto._
import org.spongycastle.crypto.AsymmetricCipherKeyPair
import org.spongycastle.crypto.params.ECPublicKeyParameters
import org.spongycastle.math.ec.ECPoint
import org.spongycastle.util.encoders.Hex

import scala.io.Source

package object network {

  val ProtocolVersion = 4

  implicit class ECPublicKeyParametersNodeId(val pubKey: ECPublicKeyParameters) extends AnyVal {
    def toNodeId: Array[Byte] =
      pubKey.asInstanceOf[ECPublicKeyParameters].getQ
      .getEncoded(false)
      .drop(1) // drop type info
  }

  def publicKeyFromNodeId(nodeId: String): ECPoint = {
    val bytes = ECDSASignature.uncompressedIndicator +: Hex.decode(nodeId)
    curve.getCurve.decodePoint(bytes)
  }

  def loadAsymmetricCipherKeyPair(filePath: String, secureRandom: SecureRandom): AsymmetricCipherKeyPair = {
    val file = new File(filePath)
    if(!file.exists()){
      val keysValuePair = generateKeyPair(secureRandom)

      //Write keys to file
      val (priv, _) = keyPairToByteArrays(keysValuePair)
      require(file.getParentFile.exists() || file.getParentFile.mkdirs(), "Key's file parent directory creation failed")
      val writer = new PrintWriter(filePath)
      try {
        writer.write(Hex.toHexString(priv))
      } finally {
        writer.close()
      }

      keysValuePair
    } else {
      val reader = Source.fromFile(filePath)
      try {
        val privHex = reader.mkString
        keyPairFromPrvKey(Hex.decode(privHex))
      } finally {
        reader.close()
      }
    }
  }

  
  def getHostName(address: InetAddress): String = {
    val hostName = address.getHostAddress
    address match {
      case _: Inet6Address => s"[$hostName]"
      case _ => hostName
    }
  }

}

Source File: TFIDF.scala From AI with Apache License 2.0

6 votes

package com.bigchange.mllib

import org.apache.spark.mllib.feature.{HashingTF, IDF}
import org.apache.spark.mllib.linalg.{SparseVector => SV}
import org.apache.spark.{SparkConf, SparkContext}

import scala.io.Source


object TFIDF {
  def main(args: Array[String]) {

    val conf = new SparkConf().setAppName("TfIdfTest")
      .setMaster("local")
    val sc = new SparkContext(conf)

    // Load documents (one per line).要求每行作为一个document,这里zipWithIndex将每一行的行号作为doc id
    val documents = sc.parallelize(Source.fromFile("J:\\github\\dataSet\\TFIDF-DOC").getLines()
      .filter(_.trim.length > 0).toSeq)
      .map(_.split(" ").toSeq)
      .zipWithIndex()


    // feature number
    val hashingTF = new HashingTF(Math.pow(2, 18).toInt)
    //line number for doc id，每一行的分词结果生成tf vector
    val idAndTFVector = documents.map {
      case (seq, num) =>
        val tf = hashingTF.transform(seq)
        (num + 1, tf)
    }
    idAndTFVector.cache()
    // build idf model
    val idf = new IDF().fit(idAndTFVector.values)
    // transform tf vector to tf-idf vector
    val idAndTFIDFVector = idAndTFVector.mapValues(v => idf.transform(v))
    // broadcast tf-idf vectors
    val idAndTFIDFVectorBroadCast = sc.broadcast(idAndTFIDFVector.collect())

    // cal doc cosineSimilarity
    val docSims = idAndTFIDFVector.flatMap {
      case (id1, idf1) =>
        // filter the same doc id
        val idfs = idAndTFIDFVectorBroadCast.value.filter(_._1 != id1)
        val sv1 = idf1.asInstanceOf[SV]
        import breeze.linalg._
        val bsv1 = new SparseVector[Double](sv1.indices, sv1.values, sv1.size)
        idfs.map {
          case (id2, idf2) =>
            val sv2 = idf2.asInstanceOf[SV]
            val bsv2 = new SparseVector[Double](sv2.indices, sv2.values, sv2.size)
            val cosSim = bsv1.dot(bsv2) / (norm(bsv1) * norm(bsv2))
            (id1, id2, cosSim)
        }
    }
    docSims.foreach(println)

    sc.stop()

  }
}

Source File: HttpUtil.scala From sparta with Apache License 2.0

6 votes

package com.stratio.benchmark.generator.utils

import org.apache.http.HttpStatus
import org.apache.http.client.methods.{HttpDelete, HttpGet, HttpPost, HttpPut}
import org.apache.http.entity.StringEntity
import org.apache.http.impl.client.HttpClientBuilder
import org.apache.http.util.EntityUtils
import org.apache.log4j.Logger
import org.json4s.DefaultFormats
import org.json4s.native.JsonMethods._

import scala.io.Source

trait HttpUtil   {

  private val logger = Logger.getLogger(this.getClass)

  
  def createPolicy(policyContent: String, endpoint: String)(implicit defaultFormats: DefaultFormats): String = {

    val policyName = (parse(policyContent) \ "name").extract[String]

    // If the policy exists when it launches the benchmark, it should stop and delete it.
    getPolicyId(policyName, endpoint) match {
      case Some(id) =>
        stopPolicy(id, endpoint)
        deletePolicy(id, endpoint)
      case None => logger.debug(s"No policy with name $policyName exists in Sparta yet.")
    }

    val client = HttpClientBuilder.create().build()
    val post = new HttpPost(s"$endpoint/policyContext")
    post.setHeader("Content-type", "application/json")
    post.setEntity(new StringEntity(policyContent))
    val response = client.execute(post)

   if(response.getStatusLine.getStatusCode != HttpStatus.SC_OK)
     throw new IllegalStateException(s"Sparta status code is not OK: ${response.getStatusLine.getStatusCode}")
   else {
     val entity = response.getEntity
     val policyId = (parse(EntityUtils.toString(entity)) \ "policyId").extract[String]
     policyId
   }
  }

  def getPolicyId(name: String, endpoint: String)(implicit defaultFormats: DefaultFormats): Option[String] = {
    val client = HttpClientBuilder.create().build()
    val get = new HttpGet(s"$endpoint/policy/findByName/$name")

    val response = client.execute(get)

    response.getStatusLine.getStatusCode match {
      case HttpStatus.SC_OK =>
        Option((parse(EntityUtils.toString(response.getEntity)) \ "id").extract[String])
      case _ => None
    }
  }

  def stopPolicy(id: String, endpoint: String): Unit = {
    val client = HttpClientBuilder.create().build()
    val put = new HttpPut(s"$endpoint/policyContext")
    put.setHeader("Content-Type", "application/json")
    val entity = new StringEntity(s"""{"id":"$id", "status":"Stopping"}""")
    put.setEntity(entity)
    val response = client.execute(put)

    if(response.getStatusLine.getStatusCode != HttpStatus.SC_CREATED) {
      logger.info(Source.fromInputStream(response.getEntity.getContent).mkString(""))
      logger.info(s"Sparta status code is not OK: ${response.getStatusLine.getStatusCode}")
    }
  }

  def deletePolicy(id: String, endpoint: String): Unit = {
    val client = HttpClientBuilder.create().build()
    val delete = new HttpDelete(s"$endpoint/policy/$id")
    val response = client.execute(delete)

    if(response.getStatusLine.getStatusCode != HttpStatus.SC_OK)
      logger.info(s"Sparta status code is not OK: ${response.getStatusLine.getStatusCode}")
  }
}

Source File: GraphQLSchemaSpec.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.navigator.graphql

import org.scalatest.{Matchers, WordSpec}
import sangria.parser.QueryParser
import sangria.schema.SchemaChange.DescriptionChange
import sangria.schema.Schema

import scala.io.Source

class GraphQLSchemaSpec extends WordSpec with Matchers {
  "The rendered schema" should {
    "match the expected schema definition" in {
      val idl =
        Source.fromInputStream(getClass.getResourceAsStream("/schema.graphql"), "UTF-8").mkString
      val schema = Schema.buildFromAst(QueryParser.parse(idl).get)

      // Compare schemata but ignore description changes.
      val changes = schema
        .compare(new GraphQLSchema(Set()).QuerySchema)
        .filter(!_.isInstanceOf[DescriptionChange])

      if (changes.nonEmpty) {
        fail(
          s"Schema definition does not match:\n- ${changes.map(_.description).mkString("\n- ")}\n")
      }
    }
  }
}

Source File: EquityData.scala From Scala-Programming-Projects with MIT License

5 votes

package retcalc

import scala.io.Source

case class EquityData(monthId: String, value: Double, annualDividend: Double) {
  val monthlyDividend: Double = annualDividend / 12
}

object EquityData {
  def fromResource(resource: String): Vector[EquityData] =
    Source.fromResource(resource).getLines().drop(1).map { line =>
      val fields = line.split("\t")
      EquityData(
        monthId = fields(0),
        value = fields(1).toDouble,
        annualDividend = fields(2).toDouble)
    }.toVector
}

Source File: EquityData.scala From Scala-Programming-Projects with MIT License

5 votes

package retcalc

import scala.io.Source

case class EquityData(monthId: String, value: Double, annualDividend: Double) {
  val monthlyDividend: Double = annualDividend / 12
}

object EquityData {
  def fromResource(resource: String): Vector[EquityData] =
    Source.fromResource(resource).getLines().drop(1).map { line =>
      val fields = line.split("\t")
      EquityData(
        monthId = fields(0),
        value = fields(1).toDouble,
        annualDividend = fields(2).toDouble)
    }.toVector
}

Source File: RawTextSender.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{ByteArrayOutputStream, IOException}
import java.net.ServerSocket
import java.nio.ByteBuffer

import scala.io.Source

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.IntParam


private[streaming]
object RawTextSender extends Logging {
  def main(args: Array[String]) {
    if (args.length != 4) {
      // scalastyle:off println
      System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
      // scalastyle:on println
      System.exit(1)
    }
    // Parse the arguments using a pattern match
    val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args

    // Repeat the input data multiple times to fill in a buffer
    val lines = Source.fromFile(file).getLines().toArray
    val bufferStream = new ByteArrayOutputStream(blockSize + 1000)
    val ser = new KryoSerializer(new SparkConf()).newInstance()
    val serStream = ser.serializeStream(bufferStream)
    var i = 0
    while (bufferStream.size < blockSize) {
      serStream.writeObject(lines(i))
      i = (i + 1) % lines.length
    }
    val array = bufferStream.toByteArray

    val countBuf = ByteBuffer.wrap(new Array[Byte](4))
    countBuf.putInt(array.length)
    countBuf.flip()

    val serverSocket = new ServerSocket(port)
    logInfo("Listening on port " + port)

    while (true) {
      val socket = serverSocket.accept()
      logInfo("Got a new connection")
      val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec)
      try {
        while (true) {
          out.write(countBuf.array)
          out.write(array)
        }
      } catch {
        case e: IOException =>
          logError("Client disconnected")
      } finally {
        socket.close()
      }
    }
  }
}

Source File: ReplayListenerBus.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.io.{InputStream, IOException}

import scala.io.Source

import com.fasterxml.jackson.core.JsonParseException
import org.json4s.jackson.JsonMethods._

import org.apache.spark.internal.Logging
import org.apache.spark.scheduler.ReplayListenerBus._
import org.apache.spark.util.JsonProtocol


  def replay(
      logData: InputStream,
      sourceName: String,
      maybeTruncated: Boolean = false,
      eventsFilter: ReplayEventsFilter = SELECT_ALL_FILTER): Unit = {

    var currentLine: String = null
    var lineNumber: Int = 0

    try {
      val lineEntries = Source.fromInputStream(logData)
        .getLines()
        .zipWithIndex
        .filter { case (line, _) => eventsFilter(line) }

      while (lineEntries.hasNext) {
        try {
          val entry = lineEntries.next()

          currentLine = entry._1
          lineNumber = entry._2 + 1

          postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine)))
        } catch {
          case jpe: JsonParseException =>
            // We can only ignore exception from last line of the file that might be truncated
            // the last entry may not be the very last line in the event log, but we treat it
            // as such in a best effort to replay the given input
            if (!maybeTruncated || lineEntries.hasNext) {
              throw jpe
            } else {
              logWarning(s"Got JsonParseException from log file $sourceName" +
                s" at line $lineNumber, the file might not have finished writing cleanly.")
            }
        }
      }
    } catch {
      case ioe: IOException =>
        throw ioe
      case e: Exception =>
        logError(s"Exception parsing Spark event log: $sourceName", e)
        logError(s"Malformed line #$lineNumber: $currentLine\n")
    }
  }

}


private[spark] object ReplayListenerBus {

  type ReplayEventsFilter = (String) => Boolean

  // utility filter that selects all event logs during replay
  val SELECT_ALL_FILTER: ReplayEventsFilter = { (eventString: String) => true }
}

Source File: PythonBroadcastSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.api.python

import java.io.{File, PrintWriter}

import scala.io.Source

import org.scalatest.Matchers

import org.apache.spark.{SharedSparkContext, SparkConf, SparkFunSuite}
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.Utils

// This test suite uses SharedSparkContext because we need a SparkEnv in order to deserialize
// a PythonBroadcast:
class PythonBroadcastSuite extends SparkFunSuite with Matchers with SharedSparkContext {
  test("PythonBroadcast can be serialized with Kryo (SPARK-4882)") {
    val tempDir = Utils.createTempDir()
    val broadcastedString = "Hello, world!"
    def assertBroadcastIsValid(broadcast: PythonBroadcast): Unit = {
      val source = Source.fromFile(broadcast.path)
      val contents = source.mkString
      source.close()
      contents should be (broadcastedString)
    }
    try {
      val broadcastDataFile: File = {
        val file = new File(tempDir, "broadcastData")
        val printWriter = new PrintWriter(file)
        printWriter.write(broadcastedString)
        printWriter.close()
        file
      }
      val broadcast = new PythonBroadcast(broadcastDataFile.getAbsolutePath)
      assertBroadcastIsValid(broadcast)
      val conf = new SparkConf().set("spark.kryo.registrationRequired", "true")
      val deserializedBroadcast =
        Utils.clone[PythonBroadcast](broadcast, new KryoSerializer(conf).newInstance())
      assertBroadcastIsValid(deserializedBroadcast)
    } finally {
      Utils.deleteRecursively(tempDir)
    }
  }
}

Source File: LogUrlsStandaloneSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.deploy

import java.net.URL

import scala.collection.mutable
import scala.io.Source

import org.apache.spark.{LocalSparkContext, SparkContext, SparkFunSuite}
import org.apache.spark.scheduler.{SparkListener, SparkListenerExecutorAdded}
import org.apache.spark.scheduler.cluster.ExecutorInfo
import org.apache.spark.util.SparkConfWithEnv

class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {

  
  private val WAIT_TIMEOUT_MILLIS = 10000

  test("verify that correct log urls get propagated from workers") {
    sc = new SparkContext("local-cluster[2,1,1024]", "test")

    val listener = new SaveExecutorInfo
    sc.addSparkListener(listener)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      // Browse to each URL to check that it's valid
      info.logUrlMap.foreach { case (logType, logUrl) =>
        val html = Source.fromURL(logUrl).mkString
        assert(html.contains(s"$logType log page"))
      }
    }
  }

  test("verify that log urls reflect SPARK_PUBLIC_DNS (SPARK-6175)") {
    val SPARK_PUBLIC_DNS = "public_dns"
    val conf = new SparkConfWithEnv(Map("SPARK_PUBLIC_DNS" -> SPARK_PUBLIC_DNS)).set(
      "spark.extraListeners", classOf[SaveExecutorInfo].getName)
    sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    val listeners = sc.listenerBus.findListenersByClass[SaveExecutorInfo]
    assert(listeners.size === 1)
    val listener = listeners(0)
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      info.logUrlMap.values.foreach { logUrl =>
        assert(new URL(logUrl).getHost === SPARK_PUBLIC_DNS)
      }
    }
  }
}

private[spark] class SaveExecutorInfo extends SparkListener {
  val addedExecutorInfos = mutable.Map[String, ExecutorInfo]()

  override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
    addedExecutorInfos(executor.executorId) = executor.executorInfo
  }
}

Source File: Main.scala From ros_hadoop with Apache License 2.0

5 votes

package de.valtech.foss

import scala.io.Source
import scala.collection.mutable.Map
import scala.collection.mutable.ListBuffer
import scala.collection.JavaConverters._
import Console.{GREEN, RED, RESET}
import scala.language.reflectiveCalls

import java.io.File
import java.io.FileInputStream
import java.io.FileOutputStream
import java.nio.channels.FileChannel.MapMode._
import java.nio.ByteOrder._
import java.nio.ByteBuffer

import de.valtech.foss.proto.RosbagIdxOuterClass.RosbagIdx

object Main extends App {
  def help() = {
    Console.err.printf(s"""
${RESET}${GREEN}Usage:
	--file <ros.bag> file to process
	--version print Rosbag version and exit
	--offset <offset> --number <records> Seek at offset < 1073741824 and read the specified number of records
${RESET}By default will just create the protobuf idx file needed for configuration.\n\n""")
    sys.exit(0)
  }

  val pargs = Map[String,AnyRef]()
  def process_cli(args: List[String]) :Boolean = args match {
    case Nil => true // parse success
    case "-v" :: rest => pargs += ("version" -> Some(true)); process_cli(rest)
    case "--version" :: rest => pargs += ("version" -> Some(true)); process_cli(rest)
    case "-f" :: x :: rest => pargs += ("file" -> x); process_cli(rest)
    case "--file" :: x :: rest => pargs += ("file" -> x); process_cli(rest)
    case "-n" :: x :: rest => pargs += ("number" -> Some(x.toInt)); process_cli(rest)
    case "--number" :: x :: rest => pargs += ("number" -> Some(x.toInt)); process_cli(rest)
    case "-o" :: x :: rest => pargs += ("offset" -> Some(x.toInt)); process_cli(rest)
    case "--offset" :: x :: rest => pargs += ("offset" -> Some(x.toInt)); process_cli(rest)
    case "-h" :: rest => help(); false
    case "--help" :: rest => help(); false
    case _ => Console.err.printf(s"${RESET}${RED}Unknown argument " + args.head); false
  }
  process_cli(args.toList)

  def use[T <: { def close() }]
    (resource: T)
    (code: T ⇒ Unit) =
    try
      code(resource)
    finally
      resource.close()

  pargs("file") match {
    case f:String => process()
    case _ => help()
  }

  def process(): Unit = {
    val fin = new File(pargs("file").asInstanceOf[String])
    use(new FileInputStream(fin)) { stream => {
      //printf("min: %s\n", Math.min(1073741824, fin.length) )
      val buffer = stream.getChannel.map(READ_ONLY, 0, Math.min(1073741824, fin.length)).order(LITTLE_ENDIAN)
      val p:RosbagParser = new RosbagParser(buffer)
      val version = p.read_version()
      val h = p.read_record().get
      if(pargs contains "version") {
        printf("%s\n%s\n\n", version, h)
        return
      }
      if(pargs contains "number"){
        buffer position pargs.getOrElse("offset",None).asInstanceOf[Option[Int]].getOrElse(0)
        for(i <- List.range(0,pargs("number").asInstanceOf[Option[Int]].getOrElse(0)))
          println(p.read_record)
        return
      }
      val idxpos = h.header.fields("index_pos").asInstanceOf[Long]
      //printf("idxpos: %s %s\n", idxpos, Math.min(1073741824, fin.length) )
      val b = stream.getChannel.map(READ_ONLY, idxpos, Math.min(1073741824, fin.length - idxpos)).order(LITTLE_ENDIAN)
      val pp:RosbagParser = new RosbagParser(b)
      val c = pp.read_connections(h.header, Nil)
      val chunk_idx = pp.read_chunk_infos(c)
      Console.err.printf(s"""${RESET}${GREEN}Found: """
          + chunk_idx.size
          +s""" chunks\n${RESET}It should be the same number reported by rosbag tool.\nIf you encounter any issues try reindexing your file and submit an issue.
          ${RESET}\n""")
      val fout = new FileOutputStream(pargs("file").asInstanceOf[String] + ".idx.bin")
      val builder = RosbagIdx.newBuilder
      for(i <- chunk_idx) builder.addArray(i)
      builder.build().writeTo(fout)
      fout.close()
      //printf("[%s]\n",chunk_idx.toArray.mkString(","))
    }}
  }
}

Source File: RosbagInputFormat.scala From ros_hadoop with Apache License 2.0

5 votes

package de.valtech.foss

import scala.io.Source
import scala.collection.JavaConverters._

import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{BytesWritable, LongWritable, MapWritable}
import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext}
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat

object RosbagInputFormat {
  def getRosChunkIdx(context: JobContext): String = {
    context.getConfiguration.get("RosbagInputFormat.chunkIdx")
  }
  def getBlockSize(context: JobContext): Long = {
    context.getConfiguration.get("dfs.blocksize").toLong
  }
}

class RosbagBytesInputFormat
  extends FileInputFormat[LongWritable, BytesWritable] {

  private var rosChunkIdx = ""
  private var recordLength = -1L

  override def isSplitable(context: JobContext, filename: Path): Boolean = {
    rosChunkIdx = RosbagInputFormat.getRosChunkIdx(context)
    recordLength = RosbagInputFormat.getBlockSize(context)
    true
  }

  override def computeSplitSize(blockSize: Long, minSize: Long, maxSize: Long): Long = {
    val defaultSize = super.computeSplitSize(blockSize, minSize, maxSize)
    defaultSize
  }

  override def createRecordReader(split: InputSplit, context: TaskAttemptContext)
      : RecordReader[LongWritable, BytesWritable] = {
    new RosbagBytesRecordReader
  }
}



class RosbagMapInputFormat
  extends FileInputFormat[LongWritable, MapWritable] {

  private var rosChunkIdx = ""
  private var recordLength = -1L

  override def isSplitable(context: JobContext, filename: Path): Boolean = {
    rosChunkIdx = RosbagInputFormat.getRosChunkIdx(context)
    recordLength = RosbagInputFormat.getBlockSize(context)
    true
  }

  override def computeSplitSize(blockSize: Long, minSize: Long, maxSize: Long): Long = {
    val defaultSize = super.computeSplitSize(blockSize, minSize, maxSize)
    defaultSize
  }

  override def createRecordReader(split: InputSplit, context: TaskAttemptContext)
      : RecordReader[LongWritable, MapWritable] = {
    new RosbagMapRecordReader
  }
}

Source File: SentenceTokenizerSpec.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.text

import java.io.PrintWriter

import com.intel.analytics.bigdl.dataset.DataSet
import com.intel.analytics.bigdl.utils.{Engine, SparkContextLifeCycle}
import org.apache.spark.{SparkConf, SparkContext}
import org.scalatest.{FlatSpec, Matchers}

import scala.io.Source

class SentenceTokenizerSpec extends SparkContextLifeCycle with Matchers {

  override def appName: String = "DocumentTokenizer"

  "SentenceTokenizerSpec" should "tokenizes articles correctly on Spark" in {
    val tmpFile = java.io.File
      .createTempFile("UnitTest", "DocumentTokenizerSpec").getPath

    val sentence1 = "Enter Barnardo and Francisco, two sentinels."
    val sentence2 = "Who’s there?"
    val sentence3 = "I think I hear them. Stand ho! Who is there?"
    val sentence4 = "The Dr. lives in a blue-painted box."

    val sentences = Array(sentence1, sentence2, sentence3, sentence4)
    new PrintWriter(tmpFile) {
      write(sentences.mkString("\n")); close
    }

    val sents = DataSet.rdd(sc.textFile(tmpFile)
      .filter(!_.isEmpty)).transform(SentenceSplitter())
      .toDistributed().data(train = false).flatMap(item => item.iterator).collect()
      .asInstanceOf[Array[String]]
    val tokens = DataSet.rdd(sc.parallelize(sents))
        .transform(SentenceTokenizer())
    val output = tokens.toDistributed().data(train = false).collect()

    var count = 0
    println("tokenized sentences:")
    output.foreach(x => {
      count += x.length
      println(x.mkString(" "))
    })

    val numOfSents = 6
    val numOfWords = 33

    output.length should be (numOfSents)
    count should be (numOfWords)
  }

  "SentenceTokenizerSpec" should "tokenizes articles correctly on local" in {
    val tmpFile = java.io.File
      .createTempFile("UnitTest", "DocumentTokenizerSpec").getPath

    val sentence1 = "Enter Barnardo and Francisco, two sentinels."
    val sentence2 = "Who’s there?"
    val sentence3 = "I think I hear them. Stand ho! Who is there?"
    val sentence4 = "The Dr. lives in a blue-painted box."

    val sentences = Array(sentence1, sentence2, sentence3, sentence4)

    new PrintWriter(tmpFile) {
      write(sentences.mkString("\n")); close
    }

    val sentenceSplitter = SentenceSplitter()
    val sentenceTokenizer = SentenceTokenizer()
    val logData = Source.fromFile(tmpFile).getLines().toArray
    val sents = DataSet.array(logData
      .filter(!_.isEmpty)).transform(sentenceSplitter)
      .toLocal().data(train = false).flatMap(item => item.iterator)
    val tokens = DataSet.array(sents.toArray)
        .transform(sentenceTokenizer)
    val output = tokens.toLocal().data(train = false).toArray

    sentenceSplitter.close()
    sentenceTokenizer.close()

    var count_word = 0
    println("tokenized sentences:")
    output.foreach(x => {
      count_word += x.length
      println(x.mkString(" "))
    })

    val numOfSents = 6
    val numOfWords = 33
    output.length should be (numOfSents)
    count_word should be (numOfWords)
  }
}

Source File: TextToLabeledSentenceSpec.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.text

import java.io.PrintWriter

import com.intel.analytics.bigdl.dataset.DataSet
import com.intel.analytics.bigdl.utils.{Engine, SparkContextLifeCycle}
import org.apache.spark.SparkContext
import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers}

import scala.io.Source

@com.intel.analytics.bigdl.tags.Serial
class TextToLabeledSentenceSpec extends SparkContextLifeCycle with Matchers {
  override def nodeNumber: Int = 1
  override def coreNumber: Int = 1
  override def appName: String = "TextToLabeledSentence"

  "TextToLabeledSentenceSpec" should "indexes sentences correctly on Spark" in {
    val tmpFile = java.io.File
      .createTempFile("UnitTest", "DocumentTokenizerSpec").getPath

    val sentence1 = "Enter Barnardo and Francisco, two sentinels."
    val sentence2 = "Who’s there?"
    val sentence3 = "I think I hear them. Stand ho! Who is there?"
    val sentence4 = "The Dr. lives in a blue-painted box."

    val sentences = Array(sentence1, sentence2, sentence3, sentence4)

    new PrintWriter(tmpFile) {
      write(sentences.mkString("\n")); close
    }

    val tokens = DataSet.rdd(sc.textFile(tmpFile)
      .filter(!_.isEmpty))
      .transform(SentenceTokenizer())
    val output = tokens.toDistributed().data(train = false)
    val dictionary = Dictionary(output, 100)
    val textToLabeledSentence = TextToLabeledSentence[Float](dictionary)
    val labeledSentences = tokens.transform(textToLabeledSentence)
      .toDistributed().data(false).collect()
    labeledSentences.foreach(x => {
      println("input = " + x.data().mkString(","))
      println("target = " + x.label().mkString(","))
      var i = 1
      while (i < x.dataLength()) {
        x.getData(i) should be (x.getLabel(i - 1))
        i += 1
      }
    })
  }

  "TextToLabeledSentenceSpec" should "indexes sentences correctly on Local" in {
    val tmpFile = java.io.File
      .createTempFile("UnitTest", "DocumentTokenizerSpec").getPath

    val sentence1 = "Enter Barnardo and Francisco, two sentinels."
    val sentence2 = "Who’s there?"
    val sentence3 = "I think I hear them. Stand ho! Who is there?"
    val sentence4 = "The Dr. lives in a blue-painted box."

    val sentences = Array(sentence1, sentence2, sentence3, sentence4)

    new PrintWriter(tmpFile) {
      write(sentences.mkString("\n")); close
    }

    val logData = Source.fromFile(tmpFile).getLines().toArray
    val tokens = DataSet.array(logData
      .filter(!_.isEmpty))
      .transform(SentenceTokenizer())
    val output = tokens.toLocal().data(train = false)

    val dictionary = Dictionary(output, 100)
    val textToLabeledSentence = TextToLabeledSentence[Float](dictionary)
    val labeledSentences = tokens.transform(textToLabeledSentence)
      .toLocal().data(false)
    labeledSentences.foreach(x => {
      println("input = " + x.data().mkString(","))
      println("target = " + x.label().mkString(","))
      var i = 1
      while (i < x.dataLength()) {
        x.getData(i) should be (x.getLabel(i - 1))
        i += 1
      }
    })

  }
}

Source File: DictionarySpec.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.text

import java.io.PrintWriter

import com.intel.analytics.bigdl.dataset.DataSet
import com.intel.analytics.bigdl.utils.Engine
import com.intel.analytics.bigdl.utils.SparkContextLifeCycle
import org.apache.spark.{SparkConf, SparkContext}
import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers}

import scala.io.Source

class DictionarySpec extends SparkContextLifeCycle with Matchers {
  override def nodeNumber: Int = 1
  override def coreNumber: Int = 1
  override def appName: String = "DictionarySpec"

  "DictionarySpec" should "creates dictionary correctly on Spark" in {
    val tmpFile = java.io.File
      .createTempFile("UnitTest", "DictionarySpec").getPath

    val sentence1 = "Enter Barnardo and Francisco, two sentinels."
    val sentence2 = "Who’s there?"
    val sentence3 = "I think I hear them. Stand ho! Who is there?"

    val sentences = Array(sentence1, sentence2, sentence3)

    new PrintWriter(tmpFile, "UTF-8") {
      write(sentences.mkString("\n")); close
    }

    val tokens = DataSet.rdd(sc.textFile(tmpFile)
      .filter(!_.isEmpty)).transform(SentenceTokenizer())
    val output = tokens.toDistributed().data(train = false)

    val numOfWords = 21

    val dictionary = Dictionary(output, 100)

    dictionary.getVocabSize() should be (numOfWords)
    dictionary.getDiscardSize() should be (0)
    dictionary.print()
    dictionary.printDiscard()
    dictionary.getVocabSize() should be (numOfWords)
    sc.stop()
  }

  "DictionarySpec" should "creates dictionary correctly on local" in {
    val tmpFile = java.io.File
      .createTempFile("UnitTest", "DictionarySpec").getPath

    val sentence1 = "Enter Barnardo and Francisco, two sentinels."
    val sentence2 = "Who’s there?"
    val sentence3 = "I think I hear them. Stand ho! Who is there?"

    val sentences = Array(sentence1, sentence2, sentence3)

    new PrintWriter(tmpFile, "UTF-8") {
      write(sentences.mkString("\n")); close
    }

    val logData = Source.fromFile(tmpFile, "UTF-8").getLines().toArray
    val tokens = DataSet.array(logData
      .filter(!_.isEmpty)).transform(SentenceTokenizer())
    val output = tokens.toLocal().data(train = false)

    val numOfWords = 21

    val dictionary = Dictionary(output, 100)

    dictionary.getVocabSize() should be (numOfWords)
    dictionary.getDiscardSize() should be (0)
    dictionary.print()
    dictionary.printDiscard()
    dictionary.getVocabSize() should be (numOfWords)
  }
}

Source File: SentenceBiPaddingSpec.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.dataset.text

import java.io.PrintWriter

import com.intel.analytics.bigdl.dataset.DataSet
import com.intel.analytics.bigdl.dataset.text.utils.SentenceToken
import com.intel.analytics.bigdl.utils.{Engine, SparkContextLifeCycle}
import org.apache.spark.SparkContext
import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers}

import scala.io.Source

@com.intel.analytics.bigdl.tags.Serial
class SentenceBiPaddingSpec extends SparkContextLifeCycle with Matchers {
  override def nodeNumber: Int = 1
  override def coreNumber: Int = 1
  override def appName: String = "DocumentTokenizer"

  "SentenceBiPaddingSpec" should "pads articles correctly on Spark" in {
    val tmpFile = java.io.File
      .createTempFile("UnitTest", "DocumentTokenizerSpec").getPath

    val sentence1 = "Enter Barnardo and Francisco, two sentinels."
    val sentence2 = "Who’s there?"
    val sentence3 = "I think I hear them. Stand ho! Who is there?"
    val sentence4 = "The Dr. lives in a blue-painted box."

    val sentences = Array(sentence1, sentence2, sentence3, sentence4)
    new PrintWriter(tmpFile) {
      write(sentences.mkString("\n")); close
    }

    val sents = DataSet.rdd(sc.textFile(tmpFile)
      .filter(!_.isEmpty)).transform(SentenceSplitter())
      .toDistributed().data(train = false).flatMap(item => item.iterator).collect()
      .asInstanceOf[Array[String]]
    val tokens = DataSet.rdd(sc.parallelize(sents))
      .transform(SentenceBiPadding())
    val output = tokens.toDistributed().data(train = false).collect()

    var count = 0
    println("padding sentences:")
    output.foreach(x => {
      count += x.length
      println(x)
      val words = x.split(" ")
      val startToken = words(0)
      val endToken = words(words.length - 1)
      startToken should be (SentenceToken.start)
      endToken should be (SentenceToken.end)
    })
    sc.stop()
  }

  "SentenceBiPaddingSpec" should "pads articles correctly on local" in {
    val tmpFile = java.io.File
      .createTempFile("UnitTest", "DocumentTokenizerSpec").getPath

    val sentence1 = "Enter Barnardo and Francisco, two sentinels."
    val sentence2 = "Who’s there?"
    val sentence3 = "I think I hear them. Stand ho! Who is there?"
    val sentence4 = "The Dr. lives in a blue-painted box."

    val sentences = Array(sentence1, sentence2, sentence3, sentence4)

    new PrintWriter(tmpFile) {
      write(sentences.mkString("\n")); close
    }

    val logData = Source.fromFile(tmpFile).getLines().toArray
    val sents = DataSet.array(logData
      .filter(!_.isEmpty)).transform(SentenceSplitter())
      .toLocal().data(train = false).flatMap(item => item.iterator)
    val tokens = DataSet.array(sents.toArray)
      .transform(SentenceBiPadding())
    val output = tokens.toLocal().data(train = false).toArray

    var count_word = 0
    println("padding sentences:")
    output.foreach(x => {
      count_word += x.length
      println(x)
      val words = x.split(" ")
      val startToken = words(0)
      val endToken = words(words.length - 1)
      startToken should be (SentenceToken.start)
      endToken should be (SentenceToken.end)
    })
  }
}

Source File: ExampleFileTests.scala From circe-yaml with Apache License 2.0

5 votes

package io.circe.yaml

import java.io.{ File, InputStreamReader }

import org.scalatest.freespec.AnyFreeSpec
import scala.io.Source

class ExampleFileTests extends AnyFreeSpec {

  "yaml test files" - {

    val testFiles = new File(getClass.getClassLoader.getResource("test-yamls").getPath).listFiles
      .filter(_.getName.endsWith(".yml"))
      .map { file =>
        file.getName -> file.getName.replaceFirst("yml$", "json")
      }

    testFiles.foreach {
      case (yamlFile, jsonFile) =>
        yamlFile in {
          val jsonStream = getClass.getClassLoader.getResourceAsStream(s"test-yamls/$jsonFile")
          val json = Source.fromInputStream(jsonStream).mkString
          jsonStream.close()
          val parsedJson = io.circe.jawn.parse(json)
          def yamlStream = getClass.getClassLoader.getResourceAsStream(s"test-yamls/$yamlFile")
          def yamlReader = new InputStreamReader(yamlStream)
          val yaml = Source.fromInputStream(yamlStream).mkString
          val parsedYamlString = parser.parse(yaml)
          val parsedStreamString = parser.parseDocuments(yaml)
          val parsedYamlReader = parser.parse(yamlReader)
          val parsedStreamReader = parser.parseDocuments(yamlReader)
          assert(parsedJson == parsedYamlString)
          assert(parsedJson == parsedStreamString.head)
          assert(parsedJson == parsedYamlReader)
          assert(parsedJson == parsedStreamReader.head)
        }
    }
  }
}

Source File: QuoteParser.scala From YahooFinanceScala with MIT License

5 votes

package openquant.yahoofinance.impl

import java.time.format.DateTimeFormatter
import java.time.{LocalDate, ZoneId, ZonedDateTime}

import com.github.tototoshi.csv._
import openquant.yahoofinance.Quote

import scala.io.Source


class QuoteParser {
  private[this] val df = DateTimeFormatter.ofPattern("yyyy-MM-dd")
  private[this] val zoneId = ZoneId.of("America/New_York")

  def parse(content: String): Vector[Quote] = {
    val csvReader = CSVReader.open(Source.fromString(content))
    val quotes: Vector[Quote] = csvReader.toStream.drop(1).map { fields ⇒
      parseCSVLine(fields.toVector)
    }.toVector
    quotes
  }

  private def parseCSVLine(field: Vector[String]): Quote = {
    require(field.length >= 7)
    Quote(
      parseDate(field(0)),
      BigDecimal(field(1)),
      BigDecimal(field(4)),
      BigDecimal(field(2)),
      BigDecimal(field(3)),
      BigDecimal(field(5)),
      BigDecimal(field(6))
    )
  }

  private def parseDate(date: String): ZonedDateTime = {
    LocalDate.parse(date, df).atStartOfDay().atZone(zoneId)
  }
}

object QuoteParser {
  def apply() = new QuoteParser
}

Source File: ScenarioLoader.scala From mantis with Apache License 2.0

5 votes

package io.iohk.ethereum.ets.common

import java.io.File

import io.iohk.ethereum.utils.Logger
import org.apache.commons.io.FileUtils

import scala.collection.JavaConverters._
import scala.io.Source


trait ScenarioLoader[T] extends ScenarioParser[T] with Logger {

  def load(path: String, options: TestOptions, ignoredTestNames: Set[String] = Set.empty): List[ScenarioGroup[T]] = {
    val testDir = new File(getClass.getClassLoader.getResource(path).toURI)
    val files = FileUtils.listFiles(testDir, Array("json"), true).asScala.toList

    files.filterNot(file => ignoredTestNames.contains(file.getName)).flatMap { file =>
      val name = file.getAbsolutePath.drop(testDir.getAbsolutePath.length + 1).dropRight(".json".length)

      if (!options.isGroupIncluded(name))
        None
      else {
        log.info(s"Loading test scenarios from: $file")
        val text = Source.fromFile(file).getLines.mkString
        val scenarios = parse(text)
        Some(ScenarioGroup(name, scenarios))
      }
    }
  }
}

Source File: JsonRpcHttpsServer.scala From mantis with Apache License 2.0

5 votes

package io.iohk.ethereum.jsonrpc.server

import java.io.{File, FileInputStream}
import java.security.{KeyStore, SecureRandom}
import javax.net.ssl.{KeyManagerFactory, SSLContext, TrustManagerFactory}

import akka.actor.ActorSystem
import akka.http.scaladsl.model.headers.HttpOriginRange
import akka.http.scaladsl.{ConnectionContext, Http}
import akka.stream.ActorMaterializer
import io.iohk.ethereum.jsonrpc.JsonRpcController
import io.iohk.ethereum.jsonrpc.server.JsonRpcHttpsServer.HttpsSetupResult
import io.iohk.ethereum.jsonrpc.server.JsonRpcServer.JsonRpcServerConfig
import io.iohk.ethereum.utils.Logger

import scala.concurrent.ExecutionContext.Implicits.global
import scala.io.Source
import scala.util.{Failure, Success, Try}

class JsonRpcHttpsServer(val jsonRpcController: JsonRpcController, config: JsonRpcServerConfig,
                         secureRandom: SecureRandom)(implicit val actorSystem: ActorSystem)
  extends JsonRpcServer with Logger {

  def run(): Unit = {
    implicit val materializer = ActorMaterializer()

    val maybeSslContext = validateCertificateFiles(config.certificateKeyStorePath, config.certificateKeyStoreType, config.certificatePasswordFile).flatMap{
      case (keystorePath, keystoreType, passwordFile) =>
        val passwordReader = Source.fromFile(passwordFile)
        try {
          val password = passwordReader.getLines().mkString
          obtainSSLContext(keystorePath, keystoreType, password)
        } finally {
          passwordReader.close()
        }
    }

    val maybeHttpsContext = maybeSslContext.map(sslContext => ConnectionContext.https(sslContext))

    maybeHttpsContext match {
      case Right(httpsContext) =>
        Http().setDefaultServerHttpContext(httpsContext)
        val bindingResultF = Http().bindAndHandle(route, config.interface, config.port, connectionContext = httpsContext)

        bindingResultF onComplete {
          case Success(serverBinding) => log.info(s"JSON RPC HTTPS server listening on ${serverBinding.localAddress}")
          case Failure(ex) => log.error("Cannot start JSON HTTPS RPC server", ex)
        }
      case Left(error) => log.error(s"Cannot start JSON HTTPS RPC server due to: $error")
    }
  }

  
  private def validateCertificateFiles(maybeKeystorePath: Option[String],
                                       maybeKeystoreType: Option[String],
                                       maybePasswordFile: Option[String]): HttpsSetupResult[(String, String, String)] =
    (maybeKeystorePath, maybeKeystoreType, maybePasswordFile) match {
      case (Some(keystorePath), Some(keystoreType), Some(passwordFile)) =>
        val keystoreDirMissing = !new File(keystorePath).isFile
        val passwordFileMissing = !new File(passwordFile).isFile
        if(keystoreDirMissing && passwordFileMissing)
          Left("Certificate keystore path and password file configured but files are missing")
        else if(keystoreDirMissing)
          Left("Certificate keystore path configured but file is missing")
        else if(passwordFileMissing)
          Left("Certificate password file configured but file is missing")
        else
          Right((keystorePath, keystoreType, passwordFile))
      case _ =>
        Left("HTTPS requires: certificate-keystore-path, certificate-keystore-type and certificate-password-file to be configured")
    }

  override def corsAllowedOrigins: HttpOriginRange = config.corsAllowedOrigins
}

object JsonRpcHttpsServer {
  type HttpsSetupResult[T] = Either[String, T]
}

Source File: Utils.scala From mantis with Apache License 2.0

5 votes

package io.iohk.ethereum.vm.utils

import java.io.File

import akka.util.ByteString
import io.circe.parser.decode
import io.circe.generic.extras.Configuration
import io.circe.generic.extras.auto._
import io.circe.Error
import scala.io.Source

object Utils {

  def loadContractCodeFromFile(file: File): ByteString = {
    val src = Source.fromFile(file)
    val raw = try { src.mkString } finally { src.close() }
    ByteString(raw.trim.grouped(2).map(Integer.parseInt(_, 16).toByte).toArray)
  }

  def loadContractAbiFromFile(file: File): Either[Error, List[ABI]] = {
    val src = Source.fromFile(file)
    val raw = try { src.mkString } finally { src.close() }
    implicit val config = Configuration.default.withDefaults
    decode[List[ABI]](raw)
  }

}

Source File: JudgeFile.scala From CarbonDataLearning with GNU General Public License v3.0

5 votes

package org.github.xubo245.carbonDataLearning.etl

import scala.io.Source

object JudgeFile {
  def main(args: Array[String]): Unit = {
    val fileName = "/root/xubo/data/pageviews-20150505key"
    judge(fileName)
    //    val fileName = "/root/xubo/data/pageviews-20150505-000000WithTime"
    //    for (i <- 0 to 9) {
    //      val path = s"/root/xubo/data/pageviews-20150505-0" + i + "0000WithTime"
    //      println("read file " +i + ":" + path)
    //      judge(path)
    //    }
  }
  def judge(fileName:String): Unit ={
    val reader = Source.fromFile(fileName)
    var i=1;
    for (line <- reader.getLines()) {
      val array = line.split('\t')
      if (array.length != 9) {
        println(i + ":\t" + line)
        i = i + 1
      }
      if (array.length>7 && !array(7).matches("[0-9]*")) {
        println(line)
        println(array(7))
      }
      if(array.length>8 && !array(8).equalsIgnoreCase("0")){
        println(line)
      }
      if(line.length>3200){
        println(line)
        println(line.length)
      }
      if(array(6).equalsIgnoreCase("\"")){
        println(line)
      }


    }
  }
}

Source File: WikiETL.scala From CarbonDataLearning with GNU General Public License v3.0

5 votes

package org.github.xubo245.carbonDataLearning.etl

import java.io.{File, PrintWriter}
import java.text.SimpleDateFormat
import java.util.Date

import scala.io.Source
import scala.util.Random

object WikiETL {
  def main(args: Array[String]): Unit = {
    val directory = "/root/xubo/data"
    val files = new File(directory)
    val out = new PrintWriter("/root/xubo/data/pageviews-20150505time")
    var flag:Int = 10000000;
    var typeMap= Map (("b","wikibooks")
      ,("d","wiktionary")
      ,("m","wikimedia")
      ,("mw","wikipedia mobile")
      ,("n","wikinews")
      ,("q","wikiquote")
      ,("s","wikisource")
      ,("v","wikiversity")
      ,("w","mediawiki"))

    for (file <- files.listFiles().sorted.filter(_.getCanonicalFile.getName.contains("pageviews-20150505-"))) {
      val filePath = file.getCanonicalPath
      println(filePath)
      //            val out = new PrintWriter(filePath + "WithTime")
      val reader = Source.fromFile(filePath)
      val fileName = file.getCanonicalFile.getName
      val delimiter = "\t"
      for (line <- reader.getLines()) {
        val stringBuffer = new StringBuffer()
        val random = new Random()
        val id = flag+random.nextInt(1000000)
        stringBuffer
          .append(id).append(delimiter)
          .append(fileName.substring(10, 14)).append(delimiter)
          .append(fileName.substring(14, 16)).append(delimiter)
          .append(fileName.substring(16, 18)).append(delimiter)
          .append(fileName.substring(19, 21)).append(delimiter)
        val array=line.mkString.split("\\s+")

        if (array.length == 4 && array(2).matches("[0-9]*") && !array(1).contains("\"")) {
          val domain = array(0).split('.')
          stringBuffer.append(domain(0)).append(delimiter)
          if (domain.length > 1) {
            var value: String = typeMap.getOrElse(domain(1), "wiki")
            stringBuffer.append(value).append(delimiter)
          } else {
            stringBuffer.append("wiki").append(delimiter)
          }
          val time = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date());
          val tid= id*10+random.nextInt(5)
          stringBuffer.append(array(1).replace('_',' ')).append(delimiter)
            .append(tid).append(delimiter)
            .append(array(2)).append(delimiter)
            .append(random.nextInt(100000)).append(delimiter)
            .append(time)

          //          for (i <- 0 until array.length-1){
          //            stringBuffer.append(array(i)).append(delimiter)
          //          }
          //          stringBuffer.append(array(array.length-1))

          //        if (array.length == 4 && array(2).matches("[0-9]*")) {
          //          id = id + 1
          out.println(stringBuffer.toString)
        }
      }
    }
    out.close()
  }
}

Source File: WaybackSpec.scala From ArchiveSpark with MIT License

5 votes

package org.archive.archivespark.specific.warc.specs

import java.net.URLEncoder

import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.archive.archivespark.dataspecs.DataSpec
import org.archive.archivespark.sparkling.Sparkling
import org.archive.archivespark.sparkling.cdx.CdxRecord
import org.archive.archivespark.sparkling.util.{IteratorUtil, RddUtil, StringUtil}
import org.archive.archivespark.specific.warc.WaybackRecord

import scala.io.Source

class WaybackSpec (cdxServerUrl: String, pages: Int, maxPartitions: Int) extends DataSpec[String, WaybackRecord] {
  override def load(sc: SparkContext, minPartitions: Int): RDD[String] = {
    RddUtil.parallelize(pages, if (maxPartitions == 0) minPartitions else maxPartitions.min(minPartitions)).flatMap{page =>
      try {
        val source = Source.fromURL(cdxServerUrl + "&page=" + page)(StringUtil.codec(Sparkling.DefaultCharset))
        IteratorUtil.cleanup(source.getLines, source.close)
      } catch {
        case e: Exception =>
          e.printStackTrace()
          Iterator.empty
      }
    }.cache
  }

  override def parse(data: String): Option[WaybackRecord] = CdxRecord.fromString(data).map(cdx => new WaybackRecord(cdx))
}

object WaybackSpec {
  def apply(url: String, matchPrefix: Boolean = false, from: Long = 0, to: Long = 0, blocksPerPage: Int = 5, pages: Int = 50, maxPartitions: Int = 0): WaybackSpec = {
    var cdxServerUrl = "http://web.archive.org/cdx/search/cdx?url=$url&matchType=$prefix&pageSize=$blocks"
    cdxServerUrl = cdxServerUrl.replace("$url", URLEncoder.encode(url, "UTF-8"))
    cdxServerUrl = cdxServerUrl.replace("$prefix", if (matchPrefix) "prefix" else "exact")
    cdxServerUrl = cdxServerUrl.replace("$blocks", blocksPerPage.toString)
    if (from > 0) cdxServerUrl += "&from=" + from
    if (to > 0) cdxServerUrl += "&to=" + to
    new WaybackSpec(cdxServerUrl, pages, maxPartitions)
  }
}

Source File: FileStreamRecord.scala From ArchiveSpark with MIT License

5 votes

package org.archive.archivespark.specific.raw

import java.io.InputStream

import org.archive.archivespark.dataspecs.access.DataAccessor
import org.archive.archivespark.model.{DataEnrichRoot, EnrichRootCompanion}
import org.archive.archivespark.sparkling.io.IOUtil
import org.archive.archivespark.sparkling.util.{IteratorUtil, StringUtil}

import scala.io.Source

class FileStreamRecord(path: String, accessor: DataAccessor[InputStream], retryDelayMs: Option[Int] = None) extends DataEnrichRoot[String, InputStream](path) {
  override def access[R >: Null](action: InputStream => R): R = accessor.access(action)

  def accessSource[R >: Null](action: Source => R): R = access { stream =>
    StringUtil.source(stream) { source =>
      action(source)
    }
  }

  def lineIterator: Iterator[String] = accessor.get match {
    case Some(stream) => IteratorUtil.cleanup(IOUtil.lines(stream), () => stream.close())
    case None => Iterator.empty
  }

  override def companion: EnrichRootCompanion[FileStreamRecord] = FileStreamRecord
}

object FileStreamRecord extends EnrichRootCompanion[FileStreamRecord]

Source File: PropertiesUtil.scala From versioneye_sbt_plugin with MIT License

5 votes

package com.versioneye

import java.io.{File, FileOutputStream}
import java.util.Properties

import scala.io.Source


object PropertiesUtil {

  protected val propertiesFile: String = "versioneye.properties"

  def writeProperties(response: ProjectJsonResponse, propertiesFile: File, baseUrl: String): Unit = {
    var properties: Properties = null

    if (!propertiesFile.exists()) {
      createPropertiesFile(propertiesFile)
      properties = new Properties()
    }
    else {
      properties = loadProperties(propertiesFile)
    }

    if (response.getId != null) {
      properties.setProperty("project_id", response.getId)
    }

    val fos = new FileOutputStream(propertiesFile)
    properties.store(fos, s" Properties for $baseUrl")
    fos.close()
  }

  def getProperties(propertiesFile: File): Properties = {
    return loadProperties(propertiesFile)
  }

  private def loadProperties(file: File): Properties = {
    if (!file.exists) {
      return null
    }

    val properties = new Properties()
    val reader = Source.fromFile(file).reader()
    properties.load(reader)
    reader.close()
    return properties
  }

  private def createPropertiesFile(file: File) {
    val parent: File = file.getParentFile
    if (!parent.exists) {
      parent.mkdirs
    }
    file.createNewFile
  }

  def getPropertiesFile(properties: String, projectDirectory: File, withHome: Boolean): File = {
    val candidates = getPropertyFileCandidates(properties, projectDirectory, false)
    val firstFile = candidates.find(_.exists())
    return firstFile.orElse(candidates.find(!_.exists())).get
  }

  def containing(key: String, file: File): Boolean = {
    if (!file.exists()) {
      return false
    }

    return loadProperties(file).containsKey(key)
  }

  def getPropertiesFileContainingProperty(key: String, properties: String, projectDirectory: File): Option[File] = {
    val candidates = getPropertyFileCandidates(properties, projectDirectory, true)
    val firstFile = candidates.find(containing(key, _))
    return firstFile
  }

  def getPropertyFileCandidates(properties: String, projectDirectory: File, withHome: Boolean): Seq[File] = {
    if (!properties.isEmpty) {
      return Seq(new File(properties));
    }

    var qaResources = new File(projectDirectory, "src/qa/resources/" + propertiesFile)
    var mainResources = new File(projectDirectory, "src/main/resources/" + propertiesFile)
    var userHome = new File(System.getProperty("user.home") + "/.m2/" + propertiesFile)

    if (withHome)
      return Seq(qaResources, mainResources, userHome)
    else
      return Seq(qaResources, mainResources)
  }

}

Source File: ResourceManagement.scala From scalismo-faces with Apache License 2.0

5 votes

package scalismo.faces.utils

import java.io.Closeable

import scala.io.Source
import scala.util.control.NonFatal
import scala.util.{Failure, Try}


  def usingOption[T <: Closeable, R](obj: => Option[T], after: T => Unit = { t: T => t.close() })(block: T => Option[R]): Option[R] = {
    val o: Option[T] = try {
      obj
    } catch {
      case NonFatal(e) => None
    }
    o.flatMap { res =>
      try {
        block(res)
      } finally {
        after(res)
      }
    }
  }

}

Source File: LandmarksTest.scala From scalismo-faces with Apache License 2.0

5 votes

package scalismo.faces.landmarks

import java.io._

import scalismo.faces.FacesTestSuite
import scalismo.faces.io.TLMSLandmarksIO
import scalismo.faces.utils.ResourceManagement
import scalismo.geometry.{Point, Point2D, Point3D}

import scala.io.Source

class LandmarksTest extends FacesTestSuite {

  def createRandom2DLandmarks(n: Int): IndexedSeq[TLMSLandmark2D] = {
    for (i <- 0 until n) yield TLMSLandmark2D(randomString(rnd.scalaRandom.nextInt(10) + 1), new Point2D(rnd.scalaRandom.nextDouble, rnd.scalaRandom.nextDouble), rnd.scalaRandom.nextBoolean())
  }

  def createRandom3DLandmarks(n: Int): IndexedSeq[TLMSLandmark3D] = {
    for (i <- 0 until n) yield TLMSLandmark3D(randomString(rnd.scalaRandom.nextInt(10) + 1), new Point3D(rnd.scalaRandom.nextDouble, rnd.scalaRandom.nextDouble, rnd.scalaRandom.nextDouble), rnd.scalaRandom.nextBoolean())
  }

  describe("Landmarks 2D") {
    val lms = createRandom2DLandmarks(25)

    it("can write and read from file (enforce TLMS Float format)") {
      val tmpFile = File.createTempFile("tlms2d", ".tlms")
      tmpFile.deleteOnExit()
      TLMSLandmarksIO.write2D(lms, tmpFile).get
      val readLM = TLMSLandmarksIO.read2D(tmpFile).get
      // cast landmarks to float
      val floatLM = lms.map{lm => lm.copy(point = Point(lm.point.x.toFloat, lm.point.y.toFloat))}
      // should not write/read as double
      readLM should not be lms
      // but as float
      readLM shouldBe floatLM
    }

    it("can write to an existing output stream without closing it") {
      val f = File.createTempFile("tlms2d", ".txt")
      f.deleteOnExit()
      val oStream = new FileOutputStream(f)
      TLMSLandmarksIO.write2DToStream(lms, oStream).get
      ResourceManagement.using(new PrintWriter(oStream)) { writer =>
        writer.println("stream should still accept more text")
      }
      Source.fromFile(f).getLines().length should be (lms.length + 1)
    }

    it("can be converted to Landmarks") {
      val scLMs = lms.map(lm => lm.toLandmark)
      lms.zip(scLMs).foreach{ case(tlm, scLM) =>
        scLM.id shouldBe tlm.id
        scLM.point shouldBe tlm.point
      }
    }
  }

  describe("Landmarks 3D") {
    val lms = createRandom3DLandmarks(25)

    it("can write / read from file (and properly convert to Float thereby, TLMS is float)") {
      val tmpFile = File.createTempFile("tlms3d",".tlms")
      tmpFile.deleteOnExit()
      TLMSLandmarksIO.write3D(lms, tmpFile).get
      val readLM = TLMSLandmarksIO.read3D(tmpFile).get
      val floatLM = lms.map{lm => lm.copy(point = Point(lm.point.x.toFloat, lm.point.y.toFloat, lm.point.z.toFloat))}
      // should not write/read as double
      readLM should not be lms
      // but as float
      readLM shouldBe floatLM
    }

    it("can write to an existing output stream without closing it") {
      val f = File.createTempFile("tlms3d", ".txt")
      f.deleteOnExit()
      val oStream = new FileOutputStream(f)
      TLMSLandmarksIO.write3DToStream(lms, oStream).get
      ResourceManagement.using(new PrintWriter(oStream)) { writer =>
        writer.println("stream should still accept more text")
      }
      Source.fromFile(f).getLines().length should be (lms.length + 1)
    }

    it("can be converted to Landmarks") {
      val scLMs = lms.map(lm => lm.toLandmark)
      lms.zip(scLMs).foreach{ case(tlm, scLM) =>
        scLM.id shouldBe tlm.id
        scLM.point shouldBe tlm.point
      }
    }
  }
}

Source File: TSV.scala From low-rank-logic with MIT License

5 votes

package uclmr.io

import uclmr.{DefaultIx, Cell, CellType, TensorKB}
import ml.wolfe.util.{ProgressBar, Conf}

import scala.io.Source
import scala.util.Random


object LoadTSV extends App {
  def apply(k: Int = 100, subsample: Double = 1.0, db: TensorKB = null, filePath: String = Conf.getString("inputFile")): TensorKB = {
    val kb = if (db != null) db else new TensorKB(k)
    val rand = new Random(0l)

    val lines = Source.fromFile(filePath).getLines()

    val progressBar = new ProgressBar(Source.fromFile(filePath).getLines().size, 100000)
    progressBar.start()

    for {
      fact <- lines
      Array(r, e1, e2, typ, target) = fact.split("\t")
    } {
      val cellType = typ match {
        case "Train" => CellType.Train
        case "Test" => CellType.Test
        case "Dev" => CellType.Dev
        case "Observed" => CellType.Observed
      }

      if (rand.nextDouble() < subsample) {
        val cell = Cell(r, (e1, e2), DefaultIx, target.toDouble, cellType)
        kb += cell
      }

      progressBar(r)
    }

    kb
  }
}

Source File: LuceneRDDMoreLikeThisSpec.scala From spark-lucenerdd with Apache License 2.0

5 votes

package org.zouzias.spark.lucenerdd

import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.SparkConf
import scala.collection.JavaConverters._
import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}

import scala.io.Source

class LuceneRDDMoreLikeThisSpec extends FlatSpec
  with Matchers
  with BeforeAndAfterEach
  with SharedSparkContext {

  var luceneRDD: LuceneRDD[_] = _


  override val conf = LuceneRDDKryoRegistrator.registerKryoClasses(new SparkConf().
    setMaster("local[*]").
    setAppName("test").
    set("spark.ui.enabled", "false").
    set("spark.app.id", appID))

  override def afterEach() {
    luceneRDD.close()
  }

  "LuceneRDD.moreLikeThis" should "return relevant documents" in {
    val words: Seq[String] = Source.fromFile("src/test/resources/alice.txt")
      .getLines().map(_.toLowerCase).toSeq
    val rdd = sc.parallelize(words)
    luceneRDD = LuceneRDD(rdd)
    val results = luceneRDD
      .moreLikeThis("_1", "alice adventures wonderland", 1, 1)
      .collect()

    results.length > 0 should equal(true)
    val firstDoc = results.head
    val x = firstDoc.getString(firstDoc.fieldIndex("_1"))

    x.contains("alice") &&
      x.contains("wonderland") &&
      x.contains("adventures") should equal(true)

    val lastDoc = results.last
    val y = lastDoc.getString(lastDoc.fieldIndex("_1"))


      y.contains("alice") &&
        !y.contains("wonderland") &&
        !y.contains("adventures") should equal(true)

  }
}

Source File: linkageFuzzyExample.scala From spark-lucenerdd with Apache License 2.0

5 votes

import scala.io.Source
import org.apache.spark.rdd.RDD
import org.zouzias.spark.lucenerdd._
import org.zouzias.spark.lucenerdd.LuceneRDD
import org.zouzias.spark.lucenerdd.models.SparkScoreDoc

// Step 1: Query prefixes of countries
// Shooting for Greece, Germany, Spain and Italy
val leftCountries = Array("gree", "germa", "belgi", "ita")
val leftCountriesRDD: RDD[String] = sc.parallelize(leftCountries)

// Step 2: Load all country names
val countries = sc.parallelize(Source.fromFile("src/test/resources/countries.txt").getLines()
  .map(_.toLowerCase()).toSeq)
val luceneRDD = LuceneRDD(countries)
luceneRDD.cache()

// Step 3: Define you linkage function (prefix)
def fuzzyLinker(country: String): String = {
  val Fuzziness = 2
  s"_1:${country}~${Fuzziness}"
}

// Step 4: Perform the linkage
val linked: RDD[(String, Array[SparkScoreDoc])] = luceneRDD.link(leftCountriesRDD, fuzzyLinker, 10)

// Step 5: View the results
linked.foreach(x => println((x._1, x._2.mkString(","))))

// spa,List(SparkScoreDoc(5.1271343,84,0,Text fields:_1:[spain])))
// (gree,List(SparkScoreDoc(5.1271343,86,0,Text fields:_1:[greece])))
// (germa,List(SparkScoreDoc(5.127134,83,0,Text fields:_1:[germany])))
// (ita,List(SparkScoreDoc(2.9601524,106,0,Text fields:_1:[italy]), SparkScoreDoc(2.9601524,102,0,Text fields:_1:[iraq]), SparkScoreDoc(2.9601524,101,0,Text fields:_1:[iran]))

Source File: linkagePrefixExample.scala From spark-lucenerdd with Apache License 2.0

5 votes

import scala.io.Source
import org.apache.spark.rdd.RDD
import org.zouzias.spark.lucenerdd._
import org.zouzias.spark.lucenerdd.LuceneRDD
import org.zouzias.spark.lucenerdd.models.SparkScoreDoc

// Step 1: Query prefixes of countries
// Shooting for Greece, Russian, Argentina and Belgium
val leftCountries = Array("gre", "ru", "ar", "bel")
val leftCountriesRDD: RDD[String] = sc.parallelize(leftCountries)

// Step 2: Load all country names
val countries = sc.parallelize(Source.fromFile("src/test/resources/countries.txt").getLines()
  .map(_.toLowerCase()).toSeq)
val luceneRDD = LuceneRDD(countries)

// Step 3: Define you linkage function (prefix)
def prefixLinker(country: String): String = {
  s"_1:${country}*"
}

// Step 4: Perform the linkage
val linked: RDD[(String, Array[SparkScoreDoc])] = luceneRDD.link(leftCountriesRDD, prefixLinker, 10)

// Step 5: View the results
linked.foreach(x => println((x._1, x._2.mkString(","))))

// (gre,List(SparkScoreDoc(1.0,88,0,Text fields:_1:[grenada]), SparkScoreDoc(1.0,87,0,Text fields:_1:[greenland]), SparkScoreDoc(1.0,86,0,Text fields:_1:[greece])))
// (ar,List(SparkScoreDoc(1.0,12,0,Text fields:_1:[aruba]), SparkScoreDoc(1.0,11,0,Text fields:_1:[armenia]), SparkScoreDoc(1.0,10,0,Text fields:_1:[argentina])))
// (ru,List(SparkScoreDoc(1.0,55,0,Text fields:_1:[russia])))
// (be,List(SparkScoreDoc(1.0,25,0,Text fields:_1:[bermuda]), SparkScoreDoc(1.0,24,0,Text fields:_1:[benin]), SparkScoreDoc(1.0,23,0,Text fields:_1:[belize]), SparkScoreDoc(1.0,22,0,Text fields:_1:[belgium]), SparkScoreDoc(1.0,21,0,Text fields:_1:[belarus])))

Source File: loadCities.scala From spark-lucenerdd with Apache License 2.0

5 votes

sc.setLogLevel("INFO")

import scala.io.Source
import org.zouzias.spark.lucenerdd.partition.LuceneRDDPartition
import org.zouzias.spark.lucenerdd._
import org.zouzias.spark.lucenerdd.LuceneRDD

val cities = Source.fromFile("src/test/resources/cities.txt").getLines().toSeq
val rdd = sc.parallelize(cities)
val luceneRDD = LuceneRDD(rdd)
luceneRDD.cache
luceneRDD.count

println("=" * 20)
luceneRDD.termQuery("_1", "toronto").take(10)

println("=" * 20)
luceneRDD.termQuery("_1", "athens").take(10)

println("=" * 20)
luceneRDD.termQuery("_1", "bern").take(10)

println("=" * 20)
luceneRDD.termQuery("_1", "madrid").take(10)

Source File: package.scala From scalda with MIT License

5 votes

package com.nitro.scalda

import java.io.File

import scala.io.Source

package object examples {

  val lines: File => Iterator[String] =
    f => Source.fromFile(f).getLines()

  val text: File => String =
    lines andThen { _.mkString(" ") }

  def log(message: => String, on: Boolean = true): Unit =
    if (on)
      System.err.println(message)
    else
      ()

  def getOrElse(args: Array[String])(index: Int, alt: => String): String =
    Option(args(index)).getOrElse(alt)

  def getOrElse[T](
    args: Array[String],
    index: Int,
    alt: => T,
    convert: String => T
  ): T =
    Option(args(index))
      .map(convert)
      .getOrElse(alt)

}

Source File: JobLauncher.scala From incubator-s2graph with Apache License 2.0

5 votes

package org.apache.s2graph.s2jobs

import org.apache.s2graph.s2jobs.udfs.Udf
import org.apache.spark.sql.SparkSession
import play.api.libs.json.{JsValue, Json}

import scala.io.Source

case class JobOption(
                      name:String = "S2BatchJob",
                      confType:String = "db",
                      jobId:Int = -1,
                      confFile:String = ""
                    )

object JobLauncher extends Logger {

  def parseArguments(args: Array[String]): JobOption = {
    val parser = new scopt.OptionParser[JobOption]("run") {
      opt[String]('n', "name").required().action((x, c) =>
        c.copy(name = x)).text("job display name")

      cmd("file").action((_, c) => c.copy(confType = "file"))
        .text("get config from file")
        .children(
          opt[String]('f', "confFile").required().valueName("<file>").action((x, c) =>
            c.copy(confFile = x)).text("configuration file")
        )

      cmd("db").action((_, c) => c.copy(confType = "db"))
        .text("get config from db")
        .children(
          opt[String]('i', "jobId").required().valueName("<jobId>").action((x, c) =>
            c.copy(jobId = x.toInt)).text("configuration file")
        )
    }

    parser.parse(args, JobOption()) match {
      case Some(o) => o
      case None =>
        parser.showUsage()
        throw new IllegalArgumentException(s"failed to parse options... (${args.mkString(",")}")
    }
  }

  def getConfig(options: JobOption):JsValue = options.confType match {
    case "file" =>
      Json.parse(Source.fromFile(options.confFile).mkString)
    case "db" =>
      throw new IllegalArgumentException(s"'db' option that read config file from database is not supported yet.. ")
  }

  def main(args: Array[String]): Unit = {

    val options = parseArguments(args)
    logger.info(s"Job Options : ${options}")

    val jobDescription = JobDescription(getConfig(options))

    val ss = SparkSession
      .builder()
      .appName(s"${jobDescription.name}")
      .config("spark.driver.maxResultSize", "20g")
      .enableHiveSupport()
      .getOrCreate()

    // register udfs
    jobDescription.udfs.foreach{ udfOption =>
      val udf = Class.forName(udfOption.`class`).newInstance().asInstanceOf[Udf]
      logger.info((s"[udf register] ${udfOption}"))
      udf.register(ss, udfOption.name, udfOption.params.getOrElse(Map.empty))
    }

    val job = new Job(ss, jobDescription)
    job.run()
  }
}

Source File: Bootstrap.scala From incubator-s2graph with Apache License 2.0

5 votes

package org.apache.s2graph.rest.play

import java.util.concurrent.Executors

import org.apache.s2graph.core.rest.{RequestParser, RestHandler}
import org.apache.s2graph.core.utils.logger
import org.apache.s2graph.core.{ExceptionHandler, S2Graph, Management}
import org.apache.s2graph.rest.play.actors.QueueActor
import org.apache.s2graph.rest.play.config.Config
import org.apache.s2graph.rest.play.controllers.ApplicationController
import play.api.Application
import play.api.mvc.{WithFilters, _}
import play.filters.gzip.GzipFilter

import scala.concurrent.{ExecutionContext, Future}
import scala.io.Source
import scala.util.Try

object Global extends WithFilters(new GzipFilter()) {
  var s2graph: S2Graph = _
  var storageManagement: Management = _
  var s2parser: RequestParser = _
  var s2rest: RestHandler = _
  var wallLogHandler: ExceptionHandler = _

  def startup() = {
    val numOfThread = Runtime.getRuntime.availableProcessors()
    val threadPool = Executors.newFixedThreadPool(numOfThread)
    val ec = ExecutionContext.fromExecutor(threadPool)

    val config = Config.conf.underlying

    // init s2graph with config
    s2graph = new S2Graph(config)(ec)
    storageManagement = new Management(s2graph)
    s2parser = new RequestParser(s2graph) 
    s2rest = new RestHandler(s2graph)(ec)

    logger.info(s"starts with num of thread: $numOfThread, ${threadPool.getClass.getSimpleName}")

    config
  }

  def shutdown() = {
    s2graph.shutdown()
  }

  // Application entry point
  override def onStart(app: Application) {
    ApplicationController.isHealthy = false

    val config = startup()
    wallLogHandler = new ExceptionHandler(config)

    QueueActor.init(s2graph, wallLogHandler)

    val defaultHealthOn = Config.conf.getBoolean("app.health.on").getOrElse(true)
    ApplicationController.deployInfo = Try(Source.fromFile("./release_info").mkString("")).recover { case _ => "release info not found\n" }.get

    ApplicationController.isHealthy = defaultHealthOn
  }

  override def onStop(app: Application) {
    wallLogHandler.shutdown()
    QueueActor.shutdown()

    
    shutdown()
  }

  override def onError(request: RequestHeader, ex: Throwable): Future[Result] = {
    logger.error(s"onError => ip:${request.remoteAddress}, request:${request}", ex)
    Future.successful(Results.InternalServerError)
  }

  override def onHandlerNotFound(request: RequestHeader): Future[Result] = {
    logger.error(s"onHandlerNotFound => ip:${request.remoteAddress}, request:${request}")
    Future.successful(Results.NotFound)
  }

  override def onBadRequest(request: RequestHeader, error: String): Future[Result] = {
    logger.error(s"onBadRequest => ip:${request.remoteAddress}, request:$request, error:$error")
    Future.successful(Results.BadRequest(error))
  }
}

Source File: RequestDeleter.scala From matcher with MIT License

5 votes

package com.wavesplatform.dex.load

import java.io.{File, PrintWriter}
import java.nio.file.Files

import scala.io.Source

object RequestDeleter {

  def delRequests(file: File, deletedCount: Int): Unit = {
    if (Files.exists(file.toPath)) {
      val source = Source.fromFile(file)
      val outputFile = s"requests-after-drop-${System.currentTimeMillis}.txt"
      val output = new PrintWriter(outputFile, "utf-8")

      var i = 0
      var j = 0
      var r = 0

      try {
        source
          .getLines()
          .map(line => {
            if (r < deletedCount)
              i = i + 1
            if (line.isEmpty || line.indexOf("{") == 0) {
              j = j + 1
              if (j % 3 == 0) {
                j = 0
                r = r + 1
              }
            }
            line
          })
          .drop(i)
          .foreach(line => output.print(s"$line\r\n"))
        println(s"$deletedCount of $r requests have been dropped from ${file.getAbsolutePath}, and saved to $outputFile")
      } finally output.close()
    }
  }
}

Source File: WsAccumulateChanges.scala From matcher with MIT License

5 votes

package com.wavesplatform.dex.load

import java.io.File

import akka.actor.ActorSystem
import com.wavesplatform.dex.load.ws.WsCollectChangesClient

import scala.io.Source
import scala.util.Random

object WsAccumulateChanges {

  def createClients(apiUri: String, feederFile: File, accountsNumber: Int)(implicit system: ActorSystem): Seq[WsCollectChangesClient] =
    readRandomAccountLines(feederFile, accountsNumber).map { accountLine =>
      val fields = accountLine.split(';')

      val addr = fields(0)
      val aus  = fields(1)
      val obs  = fields.drop(2)

      new WsCollectChangesClient(apiUri, addr, aus, obs)
    }

  private def readRandomAccountLines(feederFile: File, accountsNumber: Int): Seq[String] = {
    val source = Source.fromFile(feederFile)
    try {
      val lines = source.getLines()
      val r     = lines.take(accountsNumber).toArray
      lines.foreach { line =>
        // 30%
        if (Random.nextDouble() < 0.3) r.update(Random.nextInt(accountsNumber), line)
      }
      r
    } finally source.close()
  }
}

Source File: GatlingFeeder.scala From matcher with MIT License

5 votes

package com.wavesplatform.dex.load

import java.io.{File, PrintWriter}
import java.security
import java.security.KeyFactory
import java.security.spec.PKCS8EncodedKeySpec
import java.util.Base64

import com.wavesplatform.dex.api.ws.protocol.WsAddressSubscribe.JwtPayload
import com.wavesplatform.dex.auth.JwtUtils
import com.wavesplatform.dex.domain.account.{AddressScheme, PrivateKey, PublicKey}
import com.wavesplatform.dex.domain.bytes.ByteStr
import com.wavesplatform.wavesj.PrivateKeyAccount
import play.api.libs.json.Json

import scala.concurrent.duration._
import scala.io.Source
import scala.util.Random

object GatlingFeeder {

  def authServiceKeyPair(rawPrivateKey: String): security.PrivateKey = {
    val privateKeyContent = rawPrivateKey
      .replace("-----BEGIN PRIVATE KEY-----", "")
      .replace("-----END PRIVATE KEY-----", "")
      .replaceAll("\\n", "")

    val kf         = KeyFactory.getInstance("RSA")
    val ksPkcs8    = new PKCS8EncodedKeySpec(Base64.getDecoder.decode(privateKeyContent))
    val privateKey = kf.generatePrivate(ksPkcs8)

    privateKey
  }

  private def mkJwtSignedPayload(a: PrivateKeyAccount): JwtPayload = {
    val exp = System.currentTimeMillis() / 1000 + 24.hour.toSeconds
    JwtPayload(
      signature = ByteStr(Array.emptyByteArray),
      publicKey = PublicKey(a.getPublicKey),
      networkByte = AddressScheme.current.chainId.toChar.toString,
      clientId = "test",
      firstTokenExpirationInSeconds = exp,
      activeTokenExpirationInSeconds = exp,
      scope = List("general")
    ).signed(PrivateKey(a.getPrivateKey))
  }

  private def mkAusString(accountPrivateKey: PrivateKeyAccount, authKp: security.PrivateKey): String = {
    s"""{"T":"aus","S":"${accountPrivateKey.getAddress}","t":"jwt","j":"${JwtUtils.mkJwt(authKp,
                                                                                         Json.toJsObject(mkJwtSignedPayload(accountPrivateKey)))}"}"""
  }

  private def mkObsStrings(pairsFile: File, numberPerClient: Int): String = {
    val source = Source.fromFile(pairsFile)
    try {
      val pairs = Random.shuffle(source.getLines.toVector)
      require(numberPerClient <= pairs.size, "numberPerClient > available asset pairs in file")
      pairs.take(numberPerClient).map(x => s"""{"T":"obs","S":"$x","d":100}""").mkString(";")
    } finally source.close()
  }

  def mkFile(accountsNumber: Int,
             seedPrefix: String,
             authKp: security.PrivateKey,
             pairsFile: File,
             orderBookNumberPerAccount: Int,
             feederFile: File): Unit = {
    val output = new PrintWriter(feederFile, "utf-8")
    try {
      (0 until accountsNumber).foreach { i =>
        val pk = PrivateKeyAccount.fromSeed(s"$seedPrefix$i", 0, AddressScheme.current.chainId)
        output.println(s"""${pk.getAddress};${mkAusString(pk, authKp)};${mkObsStrings(pairsFile, orderBookNumberPerAccount)}""")
      }
    } finally output.close()
    println(s"Results have been saved to $feederFile")
  }
}

Source File: FileUtil.scala From piflow with BSD 2-Clause "Simplified" License

5 votes

package cn.piflow.conf.util

import scala.io.Source

object FileUtil {

  def fileReader(filePath : String) : String = {
    var str = ""
    val file = Source.fromFile(filePath)
    val iter = file.buffered
    while (iter.hasNext){
      val line = iter.head
      str += line
      iter.next()
    }
    file.close()
    str
  }

}

Source File: MPN.scala From utils with Apache License 2.0

5 votes

package com.indix.utils.core

import org.apache.commons.lang3.StringUtils
import org.apache.commons.lang3.text.WordUtils

import scala.io.Source

object MPN {
  // Some domain specific keywords known to be invalid
  val BlackListedMpns = Source.fromInputStream(getClass.getResourceAsStream("/BlacklistMPNs.txt")).getLines.toSet

  val StopChars = Set(' ', '-', '_', '.', '/')
  val TerminateChars = Set(',', '"', '*', '%', '{', '}', "#", '&', '\\')

  val MaxLen = 50
  val MinLen = 3

  // Does not consider one word strings as title-case phrase
  def isTitleCase(str: String): Boolean = {
    val words = str.split(' ').filter(_.nonEmpty)
    if (words.length < 2) false
    else words.forall(w => w == WordUtils.capitalizeFully(w))
  }

  def postProcessIdentifier(input: String): String = {
    val trimmedUpper = input.trim.toUpperCase
    trimmedUpper
  }

  // Check if identifier is valid, also return the identifier to process further if any
  def validateIdentifier(text: String): (Boolean, String) = {
    val input = if (text != null) text.trim() else text
    input match {
      case _ if StringUtils.isBlank(input) || input.length > MaxLen || input.length < MinLen => (false, "")
      case _ if input.count(c => TerminateChars.contains(c)) > 1 => (false, input)
      case _ if BlackListedMpns.contains(input.toLowerCase) => (false, "")
      case _ if isTitleCase(input) => (false, "")
      // Unicode strings yet to be handled
      case _ => (true, input)
    }
  }

  def isValidIdentifier(value: String): Boolean = validateIdentifier(value)._1

  def standardizeMPN(input: String): Option[String] = {
    val (isValid, identifier) = validateIdentifier(input)
    if (isValid) {
      Some(postProcessIdentifier(identifier))
    } else if (StringUtils.isBlank(identifier)) {
      None
    } else if (identifier.indexWhere(c => TerminateChars.contains(c)) > 0) {
      Some(postProcessIdentifier(identifier.substring(0, identifier.indexWhere(c => TerminateChars.contains(c)))))
    }
    else None
  }
}

Source File: FilesAndArchivesTest.scala From AI with Apache License 2.0

5 votes

package com.bigchange.basic

import java.io.File

import org.apache.spark.{SparkConf, SparkContext}

import scala.io.Source


object FilesAndArchivesTest {

  def main(args: Array[String]): Unit = {
    // 注意用户采用的是 --files localtest.txt#appSees.txt 形式进行提交的，参数也需要带一个 appSees.txt，这样我们知道
    if (args.length < 3) {
      System.err.println("Usage: <files> <archives> <words>" + "\n" +
        "files - (string) such as story.txt#st" + "\n" +
        "archives - (string) such as techtc300.zip#z" + "\n" +
        "words - (string) such as 'young,Englewood' etc")
      System.exit(1)
    }

    val sparkConf = new SparkConf().setAppName("FilesAndArchivesTest")
    val sc = new SparkContext(sparkConf)

    val files = args(0)
    val archives = args(1)
    val words = args(2)

    // 找出 files 中包含 words 中单词的所有句子
    // 找出 archives 中包含 words 中单词的所有句子，以及打印出其文件名称
    println(s"files: $files, archives: $archives, words: $words")

    val r = sc.parallelize(words.split( """,""").toList).persist()

    r.collect().foreach(println)

    // 处理 files
    r.mapPartitions(p => {
      // 加载 files，注意 --files filename#sortname, 可以直接使用 sortname
      val lines = Source.fromFile(files).getLines()
      val words = p.toList // 这个非常重要，不能直接使用 p.exists(xxx), 因为 p 是一个迭代器，注意迭代器的一些 trick

      // 对于包含 p 中单词的，会进行打印
      val flines = lines.filter(x => {
        words.exists(x.indexOf(_) >= 0)
      })

      // 注意，这里的 flines 是一个迭代器，千万不要调用类似 foreach 的接口
      flines
    }).distinct().collect().foreach(x => println(s"files match -- $x"))

    // 处理 archives, 可以将一些文件压缩为一个 zip, tar.gz 等等
    r.mapPartitions(p => {
      val dir = new File(archives + File.separator + "conf")

      val fileNameAndLines =
        for (f <- dir.listFiles()) yield {
          val fileName = f.getName
          val lines = Source.fromFile(f.getCanonicalPath).getLines()
          (fileName, lines)
        }

      val tmp = fileNameAndLines.flatMap({ case (fileName, lines) => for (line <- lines) yield (fileName, line) })
      val words = p.toList

      val flines = tmp.filter({
        case (fileName, line) => words.exists(line.indexOf(_) >= 0)
      })

      flines.toIterator
    }).distinct().collect().foreach(x => println(s"archives match -- ${x._1}/${x._2}"))

    sc.stop()
  }

}

Source File: NaiveBayesTest.scala From AI with Apache License 2.0

5 votes

package com.bigchange.test

import com.bigchange.datamining.CustomNaiveBayes

import scala.collection.mutable
import scala.collection.mutable.ListBuffer
import scala.io.Source


  def main(args: Array[String]) {

    // val Array(dataPath) = args
    val data = Source.fromFile("src/main/resources/nbData/i100-i500").getLines().toList
    // 可实现打完包后读取jar包中对应文件数据
    val data2 = Source.fromInputStream(this.getClass.getResourceAsStream("src/main/resources/nbData/i100-i500")).getLines().toList
    // 十折交叉验证(index,List(item1,item2))
    val splitData  = data.zipWithIndex.map(x => (x._2 % 10,x._1)).groupBy(_._1).mapValues(x => x.map(_._2))
    val modelMap = new mutable.HashMap[Int,String]()

    val model = CustomNaiveBayes.model(0, splitData)
    var list = List((0,model))

    for (id <- 1 until 10) {
      // 训练
      val model = CustomNaiveBayes.model(id, splitData)
      list = list ::: List((id,model))

    }

    // 分类
    val listP = new ListBuffer[(String, Double)]
    list.foreach(x => {
      println("model:" + x)
      val pred = CustomNaiveBayes.predict(Array("health", "moderate", "moderate1", "yes"), x._2)
     listP.+=(pred)
    })
    println("tobe:"+ listP)
    println("tobe:"+ listP.max)

  }

}

Source File: Swagger.scala From daf-semantics with Apache License 2.0

5 votes

package controllers

import java.io.File
import java.util

import de.zalando.play.controllers.PlayBodyParsing._
import org.yaml.snakeyaml.Yaml
import play.api.mvc._

import scala.io.Source

class Swagger extends Controller {

  def notSpec = Seq("application.conf", "logback.xml", "routes")

  def listSpecs() = Action {
    val path = "conf"
    val file = new File(path)
    if (file != null && file.list() != null) {
      val files = file.listFiles().filterNot(_.isDirectory).map(_.getName).toSeq
      implicit val arrayMarshaller = anyToWritable[Seq[String]]("application/json")
      val names = files.filterNot(notSpec.contains).filterNot(_.startsWith("."))
      Ok(names)
    } else {
      NotFound("Path could not be found: " + file.getAbsolutePath)
    }
  }

  def swaggerSpec(name: String) = Action {
    implicit val mapMarshaller = anyToWritable[java.util.Map[_, _]]("application/json")
    getSpec(name).map(s => Ok(s)).getOrElse(NotFound(name))
  }

  private def getSpec(yamlPath: String) = {
    val yamlFile = Option(getClass.getClassLoader.getResource(yamlPath))
    // TODO: close the stream
    val yamlStr = yamlFile map { yaml => Source.fromURL(yaml).getLines().mkString("\n") }
    val javaMap = yamlStr map { new Yaml().load(_).asInstanceOf[util.Map[Any, Any]] }
    javaMap
  }
}

Source File: Swagger.scala From daf-semantics with Apache License 2.0

5 votes

package controllers

import java.io.File
import java.util

import de.zalando.play.controllers.PlayBodyParsing._
import org.yaml.snakeyaml.Yaml
import play.api.mvc._

import scala.io.Source

class Swagger extends Controller {

  def notSpec = Seq("application.conf", "logback.xml", "routes")

  def listSpecs() = Action {
    val path = "conf"
    val file = new File(path)
    if (file != null && file.list() != null) {
      val files = file.listFiles().filterNot(_.isDirectory).map(_.getName).toSeq
      implicit val arrayMarshaller = anyToWritable[Seq[String]]("application/json")
      val names = files.filterNot(notSpec.contains).filterNot(_.startsWith("."))
      Ok(names)
    } else {
      NotFound("Path could not be found: " + file.getAbsolutePath)
    }
  }

  def swaggerSpec(name: String) = Action {
    implicit val mapMarshaller = anyToWritable[java.util.Map[_, _]]("application/json")
    getSpec(name).map(s => Ok(s)).getOrElse(NotFound(name))
  }

  private def getSpec(yamlPath: String) = {
    val yamlFile = Option(getClass.getClassLoader.getResource(yamlPath))
    // TODO: close the stream
    val yamlStr = yamlFile map { yaml => Source.fromURL(yaml).getLines().mkString("\n") }
    val javaMap = yamlStr map { new Yaml().load(_).asInstanceOf[util.Map[Any, Any]] }
    javaMap
  }
}

Source File: MockIdentityVerificationHttp.scala From nisp-frontend with Apache License 2.0

5 votes

package uk.gov.hmrc.nisp.helpers

import org.mockito.ArgumentMatchers
import org.mockito.Mockito._
import org.scalatest.mock.MockitoSugar
import play.api.http.Status
import play.api.libs.json.Json

import scala.concurrent.Future
import scala.io.Source
import uk.gov.hmrc.http.{ HttpGet, HttpResponse }

object MockIdentityVerificationHttp extends MockitoSugar {
  val mockHttp = mock[HttpGet]

  val possibleJournies = Map(
    "success-journey-id" -> "test/resources/identity-verification/success.json",
    "incomplete-journey-id" -> "test/resources/identity-verification/incomplete.json",
    "failed-matching-journey-id" -> "test/resources/identity-verification/failed-matching.json",
    "insufficient-evidence-journey-id" -> "test/resources/identity-verification/insufficient-evidence.json",
    "locked-out-journey-id" -> "test/resources/identity-verification/locked-out.json",
    "user-aborted-journey-id" -> "test/resources/identity-verification/user-aborted.json",
    "timeout-journey-id" -> "test/resources/identity-verification/timeout.json",
    "technical-issue-journey-id" -> "test/resources/identity-verification/technical-issue.json",
    "precondition-failed-journey-id" -> "test/resources/identity-verification/precondition-failed.json",
    "invalid-journey-id" -> "test/resources/identity-verification/invalid-result.json",
    "invalid-fields-journey-id" -> "test/resources/identity-verification/invalid-fields.json",
    "failed-iv-journey-id" -> "test/resources/identity-verification/failed-iv.json"
  )

  def mockJourneyId(journeyId: String): Unit = {
    val fileContents = Source.fromFile(possibleJournies(journeyId)).mkString
    when(mockHttp.GET[HttpResponse](ArgumentMatchers.contains(journeyId))(ArgumentMatchers.any(), ArgumentMatchers.any(),ArgumentMatchers.any())).
      thenReturn(Future.successful(HttpResponse(Status.OK, responseJson = Some(Json.parse(fileContents)))))
  }

  possibleJournies.keys.foreach(mockJourneyId)
}

Source File: MockSessionCache.scala From nisp-frontend with Apache License 2.0

5 votes

package uk.gov.hmrc.nisp.helpers

import play.api.libs.json.{Json, Reads, Writes}
import uk.gov.hmrc.http.cache.client.{CacheMap, SessionCache}

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.{ExecutionContext, Future}
import scala.io.Source
import uk.gov.hmrc.http.{HeaderCarrier, HttpDelete, HttpGet, HttpPut, UserId}

object MockSessionCache extends SessionCache{
  val cachedNinoAndUsername = TestAccountBuilder.cachedNino
  val cachedUserId = UserId(s"/auth/oid/$cachedNinoAndUsername")

  override def defaultSource: String = ???
  override def baseUri: String = ???
  override def domain: String = ???
  override def http: HttpGet with HttpPut with HttpDelete = ???

  private def loadObjectFromFile[T](filename: String)(implicit rds: Reads[T]): Option[T] = {
    val fileContents = Source.fromFile(filename).mkString
    Json.parse(fileContents).validate[T].fold(invalid => None, valid => Some(valid))
  }

  private def loadObjectBasedOnKey[T](key: String)(implicit rds: Reads[T]): Option[T] =
    key match {
      case _ => None
    }

  override def fetchAndGetEntry[T](key: String)(implicit hc: HeaderCarrier, rds: Reads[T],ec:ExecutionContext): Future[Option[T]] =
    Future.successful(hc.userId.filter(_ == cachedUserId).flatMap(p => loadObjectBasedOnKey(key)))

  override def cache[A](formId: String, body: A)(implicit wts: Writes[A], hc: HeaderCarrier,ec:ExecutionContext): Future[CacheMap] = Future.successful(CacheMap("", Map()))
}

Source File: Swagger.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package controllers

import java.io.File
import java.util

import de.zalando.play.controllers.PlayBodyParsing._
import org.yaml.snakeyaml.Yaml
import play.api.mvc._

import scala.io.Source

@SuppressWarnings(Array("org.wartremover.warts.Equals", "org.wartremover.warts.AsInstanceOf", "org.wartremover.warts.ExplicitImplicitTypes"))
class Swagger extends Controller {

  def notSpec = Seq("application.conf", "logback.xml", "routes")

  def listSpecs() = Action {
    val path = "conf"
    val file = new File(path)
    if (file != null && file.list() != null) {
      val files = file.listFiles().filterNot(_.isDirectory).map(_.getName).toSeq
      implicit val arrayMarshaller = anyToWritable[Seq[String]]("application/json")
      val names = files.filterNot(notSpec.contains).filterNot(_.startsWith("."))
      Ok(names)
    } else {
      NotFound("Path could not be found: " + file.getAbsolutePath)
    }
  }

  def swaggerSpec(name: String) = Action {
    implicit val mapMarshaller = anyToWritable[java.util.Map[_, _]]("application/json")
    getSpec(name).map(s => Ok(s)).getOrElse(NotFound(name))
  }

  private def getSpec(yamlPath: String) = {
    val yamlFile = Option(getClass.getClassLoader.getResource(yamlPath))
    val yamlStr = yamlFile map { yaml => Source.fromURL(yaml).getLines().mkString("\n") }
    val javaMap = yamlStr map { new Yaml().load(_).asInstanceOf[util.Map[Any, Any]] }
    javaMap
  }
}

Source File: NifiProcessorSpec.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package it.gov.daf.ingestion.nifi


import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import com.typesafe.config.Config
import it.gov.daf.catalogmanager.MetaCatalog
import it.gov.daf.catalogmanager.json._
import it.gov.daf.ingestion.metacatalog.MetaCatalogProcessor
import org.scalatest.{AsyncFlatSpec, Matchers}
import play.api.libs.json._
import play.api.libs.ws.WSResponse
import play.api.libs.ws.ahc.AhcWSClient

import scala.concurrent.Future
import scala.io.Source

class NifiProcessorSpec extends AsyncFlatSpec with Matchers {

  "A Nifi Processor " should "create a nifi pipeline for a correct meta catalog entry" in {

    val in = this.getClass.getResourceAsStream("/data_test.json")
    val sMetaCatalog = Source.fromInputStream(in).getLines().mkString(" ")
    in.close()

    val parsed = Json.parse(sMetaCatalog)
    val metaCatalog: JsResult[MetaCatalog] = Json.fromJson[MetaCatalog](parsed)

    metaCatalog.isSuccess shouldBe true

    implicit val system: ActorSystem = ActorSystem()
    implicit val materializer: ActorMaterializer = ActorMaterializer()
    implicit val wsClient: AhcWSClient = AhcWSClient()

    implicit val config: Config = com.typesafe.config.ConfigFactory.load()
    implicit val ec = system.dispatcher

    def closeAll(): Unit = {
      system.terminate()
      materializer.shutdown()
      wsClient.close()
    }

    val fResult = NifiProcessor(metaCatalog.get).createDataFlow()

    fResult.map { response =>
      println(response)
      closeAll()
      true shouldBe true
    }
  }


}

Source File: Swagger.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package controllers

import java.io.File
import java.util

import de.zalando.play.controllers.PlayBodyParsing._
import org.yaml.snakeyaml.Yaml
import play.api.mvc._

import scala.io.Source

@SuppressWarnings(Array("org.wartremover.warts.Equals", "org.wartremover.warts.AsInstanceOf", "org.wartremover.warts.ExplicitImplicitTypes"))
class Swagger extends Controller {

  def notSpec = Seq("application.conf", "logback.xml", "routes")

  def listSpecs() = Action {
    val path = "conf"
    val file = new File(path)
    if (file != null && file.list() != null) {
      val files = file.listFiles().filterNot(_.isDirectory).map(_.getName).toSeq
      implicit val arrayMarshaller = anyToWritable[Seq[String]]("application/json")
      val names = files.filterNot(notSpec.contains).filterNot(_.startsWith("."))
      Ok(names)
    } else {
      NotFound("Path could not be found: " + file.getAbsolutePath)
    }
  }

  def swaggerSpec(name: String) = Action {
    implicit val mapMarshaller = anyToWritable[java.util.Map[_,_]]("application/json")
    getSpec(name).map(s => Ok(s)).getOrElse(NotFound(name))
  }

  private def getSpec(yamlPath: String) = {
    val yamlFile  = Option(getClass.getClassLoader.getResource(yamlPath))
    val yamlStr = yamlFile map { yaml => Source.fromURL(yaml).getLines().mkString("\n") }
    val javaMap = yamlStr map { new Yaml().load(_).asInstanceOf[util.Map[Any, Any]] }
    javaMap
  }
}

Source File: Swagger.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package controllers

import java.io.File
import java.util

import de.zalando.play.controllers.PlayBodyParsing._
import org.yaml.snakeyaml.Yaml
import play.api.mvc._

import scala.io.Source

@SuppressWarnings(Array("org.wartremover.warts.Equals", "org.wartremover.warts.AsInstanceOf", "org.wartremover.warts.ExplicitImplicitTypes"))
class Swagger extends Controller {

  def notSpec = Seq("application.conf", "logback.xml", "routes")

  def listSpecs() = Action {
    val path = "conf"
    val file = new File(path)
    if (file != null && file.list() != null) {
      val files = file.listFiles().filterNot(_.isDirectory).map(_.getName).toSeq
      implicit val arrayMarshaller = anyToWritable[Seq[String]]("application/json")
      val names = files.filterNot(notSpec.contains).filterNot(_.startsWith("."))
      Ok(names)
    } else {
      NotFound("Path could not be found: " + file.getAbsolutePath)
    }
  }

  def swaggerSpec(name: String) = Action {
    implicit val mapMarshaller = anyToWritable[java.util.Map[_, _]]("application/json")
    getSpec(name).map(s => Ok(s)).getOrElse(NotFound(name))
  }

  private def getSpec(yamlPath: String) = {
    val yamlFile = Option(getClass.getClassLoader.getResource(yamlPath))
    val yamlStr = yamlFile map { yaml => Source.fromURL(yaml).getLines().mkString("\n") }
    val javaMap = yamlStr map { new Yaml().load(_).asInstanceOf[util.Map[Any, Any]] }
    javaMap
  }
}

Source File: Swagger.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package controllers

import java.io.File
import java.util

import de.zalando.play.controllers.PlayBodyParsing._
import org.yaml.snakeyaml.Yaml
import play.api.mvc._

import scala.io.Source

@SuppressWarnings(Array("org.wartremover.warts.Equals", "org.wartremover.warts.AsInstanceOf", "org.wartremover.warts.ExplicitImplicitTypes"))
class Swagger extends Controller {

  def notSpec = Seq("application.conf", "logback.xml", "routes")

  def listSpecs() = Action {
    val path = "conf"
    val file = new File(path)
    if (file != null && file.list() != null) {
      val files = file.listFiles().filterNot(_.isDirectory).map(_.getName).toSeq
      implicit val arrayMarshaller = anyToWritable[Seq[String]]("application/json")
      val names = files.filterNot(notSpec.contains).filterNot(_.startsWith("."))
      Ok(names)
    } else {
      NotFound("Path could not be found: " + file.getAbsolutePath)
    }
  }

  def swaggerSpec(name: String) = Action {
    implicit val mapMarshaller = anyToWritable[java.util.Map[_,_]]("application/json")
    getSpec(name).map(s => Ok(s)).getOrElse(NotFound(name))
  }

  private def getSpec(yamlPath: String) = {
    val yamlFile  = Option(getClass.getClassLoader.getResource(yamlPath))
    val yamlStr = yamlFile map { yaml => Source.fromURL(yaml).getLines().mkString("\n") }
    val javaMap = yamlStr map { new Yaml().load(_).asInstanceOf[util.Map[Any, Any]] }
    javaMap
  }
}

Source File: Swagger.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package controllers

import java.io.File
import java.util

import de.zalando.play.controllers.PlayBodyParsing._
import org.yaml.snakeyaml.Yaml
import play.api.mvc._

import scala.io.Source

//@SuppressWarnings(Array("org.wartremover.warts.Equals", "org.wartremover.warts.AsInstanceOf", "org.wartremover.warts.ExplicitImplicitTypes"))
class Swagger extends Controller {

  def notSpec = Seq("application.conf", "logback.xml", "routes")

  def listSpecs() = Action {
    val path = "conf"
    val file = new File(path)
    if (file != null && file.list() != null) {
      val files = file.listFiles().filterNot(_.isDirectory).map(_.getName).toSeq
      implicit val arrayMarshaller = anyToWritable[Seq[String]]("application/json")
      val names = files.filterNot(notSpec.contains).filterNot(_.startsWith("."))
      Ok(names)
    } else {
      NotFound("Path could not be found: " + file.getAbsolutePath)
    }
  }

  def swaggerSpec(name: String) = Action {
    implicit val mapMarshaller = anyToWritable[java.util.Map[_,_]]("application/json")
    getSpec(name).map(s => Ok(s)).getOrElse(NotFound(name))
  }

  private def getSpec(yamlPath: String) = {
    val yamlFile  = Option(getClass.getClassLoader.getResource(yamlPath))
    val yamlStr = yamlFile map { yaml => Source.fromURL(yaml).getLines().mkString("\n") }
    val javaMap = yamlStr map { new Yaml().load(_).asInstanceOf[util.Map[Any, Any]] }
    javaMap
  }
}

Source File: Swagger.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package controllers

import java.io.File
import java.util

import de.zalando.play.controllers.PlayBodyParsing._
import org.yaml.snakeyaml.Yaml
import play.api.mvc._

import scala.io.Source

@SuppressWarnings(Array("org.wartremover.warts.Equals", "org.wartremover.warts.AsInstanceOf", "org.wartremover.warts.ExplicitImplicitTypes"))
class Swagger extends Controller {

  def notSpec = Seq("application.conf", "logback.xml", "routes")

  def listSpecs() = Action {
    val path = "conf"
    val file = new File(path)
    if (file != null && file.list() != null) {
      val files = file.listFiles().filterNot(_.isDirectory).map(_.getName).toSeq
      implicit val arrayMarshaller = anyToWritable[Seq[String]]("application/json")
      val names = files.filterNot(notSpec.contains).filterNot(_.startsWith("."))
      Ok(names)
    } else {
      NotFound("Path could not be found: " + file.getAbsolutePath)
    }
  }

  def swaggerSpec(name: String) = Action {
    implicit val mapMarshaller = anyToWritable[java.util.Map[_,_]]("application/json")
    getSpec(name).map(s => Ok(s)).getOrElse(NotFound(name))
  }

  private def getSpec(yamlPath: String) = {
    val yamlFile  = Option(getClass.getClassLoader.getResource(yamlPath))
    val yamlStr = yamlFile map { yaml => Source.fromURL(yaml).getLines().mkString("\n") }
    val javaMap = yamlStr map { new Yaml().load(_).asInstanceOf[util.Map[Any, Any]] }
    javaMap
  }
}

Source File: Swagger.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

package controllers

import java.io.File
import java.util

import de.zalando.play.controllers.PlayBodyParsing._
import org.yaml.snakeyaml.Yaml
import play.api.mvc._

import scala.io.Source

class Swagger extends Controller {

  def notSpec = Seq("application.conf", "logback.xml", "routes")

  def listSpecs() = Action {
    val path = "conf"
    val file = new File(path)
    if (file != null && file.list() != null) {
      val files = file.listFiles().filterNot(_.isDirectory).map(_.getName).toSeq
      implicit val arrayMarshaller = anyToWritable[Seq[String]]("application/json")
      val names = files.filterNot(notSpec.contains).filterNot(_.startsWith("."))
      Ok(names)
    } else {
      NotFound("Path could not be found: " + file.getAbsolutePath)
    }
  }

  def swaggerSpec(name: String) = Action {
    implicit val mapMarshaller = anyToWritable[java.util.Map[_, _]]("application/json")
    getSpec(name).map(s => Ok(s)).getOrElse(NotFound(name))
  }

  private def getSpec(yamlPath: String) = {
    val yamlFile = Option(getClass.getClassLoader.getResource(yamlPath))
    val yamlStr = yamlFile map { yaml => Source.fromURL(yaml).getLines().mkString("\n") }
    val javaMap = yamlStr map { new Yaml().load(_).asInstanceOf[util.Map[Any, Any]] }
    javaMap
  }
}

Source File: ScalaSensor.scala From sonar-scala with GNU Lesser General Public License v3.0

5 votes

package com.mwz.sonar.scala
package sensor

import scala.io.Source
import scala.jdk.CollectionConverters._

import org.sonar.api.batch.sensor.{Sensor, SensorContext, SensorDescriptor}
import org.sonar.api.measures.{CoreMetrics => CM}
import scalariform.ScalaVersion


final class ScalaSensor(globalConfig: GlobalConfig) extends Sensor {
  override def execute(context: SensorContext): Unit = {
    val charset = context.fileSystem().encoding.toString

    val inputFiles = context
      .fileSystem()
      .inputFiles(context.fileSystem().predicates().hasLanguage(Scala.LanguageKey))

    val scalaVersion: ScalaVersion =
      Scala.getScalaVersion(context.config())

    // Save measures if not in pr decoration mode.
    if (!globalConfig.prDecoration)
      inputFiles.asScala.foreach { inputFile =>
        // TODO: This source needs to be closed!
        val sourceCode = Source.fromFile(inputFile.uri, charset).mkString
        val tokens = Scala.tokenize(sourceCode, scalaVersion)

        context
          .newMeasure()
          .on(inputFile)
          .forMetric(CM.COMMENT_LINES)
          .withValue(Measures.countCommentLines(tokens))
          .save()

        context
          .newMeasure()
          .on(inputFile)
          .forMetric(CM.NCLOC)
          .withValue(Measures.countNonCommentLines(tokens))
          .save()

        context
          .newMeasure()
          .on(inputFile)
          .forMetric(CM.CLASSES)
          .withValue(Measures.countClasses(tokens))
          .save()

        context
          .newMeasure()
          .on(inputFile)
          .forMetric(CM.FUNCTIONS)
          .withValue(Measures.countMethods(tokens))
          .save()
      }
  }

  override def describe(descriptor: SensorDescriptor): Unit = {
    descriptor
      .onlyOnLanguage(Scala.LanguageKey)
      .name("Scala Sensor")
  }
}

Source File: PatchSpec.scala From sonar-scala with GNU Lesser General Public License v3.0

5 votes

package com.mwz.sonar.scala.pr

import scala.io.Source

import org.scalatest.EitherValues
import org.scalatest.flatspec.AnyFlatSpec
import org.scalatest.matchers.should.Matchers
import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks

class PatchSpec extends AnyFlatSpec with Matchers with EitherValues with ScalaCheckDrivenPropertyChecks {

  def patch(path: String): String =
    Source
      .fromResource(path)
      .getLines()
      .mkString("\n")

  it should "fail to parse an invalid patch" in {
    forAll((s: String) => Patch.parse(s) shouldBe Left(PatchError(s)))
  }

  it should "parse successfully a patch with additions only" in {
    val expected: Map[FileLine, PatchLine] =
      (69 to 84).zipWithIndex.map {
        case (fileLine, index) =>
          (FileLine(fileLine), PatchLine(index + 1))
      }.toMap

    Patch.parse(patch("patches/add.patch")).right.value shouldBe expected
  }

  it should "parse successfully a patch with deletions only" in {
    val expected: Map[FileLine, PatchLine] =
      List(
        List(26 -> 1, 27 -> 2, 28 -> 3, 29 -> 6, 30 -> 7, 31 -> 8),
        List(43 -> 10, 44 -> 11, 45 -> 12, 46 -> 15, 47 -> 16, 48 -> 20, 49 -> 21, 50 -> 22)
      ).flatten.map {
        case (k, v) =>
          FileLine(k) -> PatchLine(v)
      }.toMap

    Patch.parse(patch("patches/del.patch")).right.value shouldBe expected
  }

  it should "parse successfully a patch with additions, deletions and modifications" in {
    val expected: Map[FileLine, PatchLine] =
      List(
        (43 to 50).zipWithIndex.map(a => (a._1, a._2 + 1)),
        List(60 -> 10, 61 -> 11, 62 -> 12, 63 -> 15, 64 -> 16, 65 -> 17),
        List(77 -> 19, 78 -> 20, 79 -> 21, 80 -> 23, 81 -> 24, 82 -> 25, 83 -> 26)
      ).flatten.map {
        case (k, v) =>
          FileLine(k) -> PatchLine(v)
      }.toMap

    Patch.parse(patch("patches/add-del-mod.patch")).right.value shouldBe expected
  }
}

Source File: ParSeqTraceBaseVisualizer.scala From play-parseq with Apache License 2.0

5 votes

package com.linkedin.playparseq.trace.utils

import com.linkedin.parseq.trace.Trace
import com.linkedin.parseq.trace.codec.json.JsonTraceCodec
import java.io.File
import play.api.Environment
import play.api.http.HttpConfiguration
import scala.collection.immutable.ListMap
import scala.io.Source



  protected[this] def showTrace(trace: Trace, environment: Environment, httpConfiguration: HttpConfiguration): String = {
    // Get Trace JSON
    val traceJson = new JsonTraceCodec().encode(trace)
    // Generate pre-fill script for onload Trace JSON
    val preFillScript =
      """
        |<base href="%s">
        |<script>
        |  var ESC_FLAGS = "gi";
        |  var EMBED_ESCAPES = __EMBED_ESCAPES__;
        |  var unescapeForEmbedding = function (str) {
        |    for (var key in EMBED_ESCAPES) {
        |      if (EMBED_ESCAPES.hasOwnProperty(key)) {
        |        str = str.replace(new RegExp(EMBED_ESCAPES[key], ESC_FLAGS), key);
        |      }
        |    }
        |    return str;
        |  };
        |  var getEmbeddedContent = function(id) {
        |    var contentElem = document.getElementById(id);
        |    var innerContent = contentElem.firstChild.nodeValue;
        |    return JSON.parse(unescapeForEmbedding(innerContent));
        |  };
        |  window.onload = function() {
        |    var json = getEmbeddedContent('injected-json');
        |    // The renderTrace method does not yet support normal JS objects, but expects stringified JSON
        |    renderTrace(JSON.stringify(json));
        |  }
        |</script>
      """.stripMargin.format(httpConfiguration.context.stripSuffix("/") + TracevisRoot + "/")
    // Generate injected JSON placeholder
    val injectedJson = """<code id="injected-json"><!--__JSON__--></code>"""
    // Build HTML page
    environment.resourceAsStream(new File(TracevisRoot, TraceName).getPath).map(stream => {
      // Escape script and JSON
      val script = preFillScript.replace("__EMBED_ESCAPES__", """{"&":"&amp;","-":"&dsh;"}""")
      val json = injectedJson.replace("__JSON__", ListMap("&" -> "&amp;", "-" -> "&dsh;").foldLeft(traceJson)((acc, escape) => acc.replaceAll(escape._1, escape._2)))
      // Inject script and JSON
      Source.fromInputStream(stream).mkString.replace("<title>", script + "\n<title>").replace("</style>", "</style>\n" + json)
    }).orNull
  }

}

Source File: MemoryVectorStoreIndexer.scala From dbpedia-spotlight-model with Apache License 2.0

5 votes

package org.dbpedia.spotlight.db

import java.io.File
import breeze.linalg.DenseMatrix
import org.dbpedia.spotlight.db.model.{ResourceStore, TokenTypeStore}
import org.dbpedia.spotlight.exceptions.DBpediaResourceNotFoundException

import scala.collection.immutable.Iterable
import scala.io.Source
import java.util

import org.dbpedia.spotlight.db.memory.{MemoryStore, MemoryVectorStore}
import org.dbpedia.spotlight.model.{TokenType, DBpediaResource, TokenOccurrenceIndexer}


class MemoryVectorStoreIndexer(modelPath: File, dictPath: File){
  lazy val contextStore = new MemoryVectorStore()

  var dict: Map[String, Int] = null

  def loadVectorDict(tokenTypeStore: TokenTypeStore, resourceStore: ResourceStore) = {
    println("Loading vector dictionary!")
    dict = Source.fromFile(dictPath, "UTF-8").getLines().map { line =>
      val contents = line.split("\t")
      (contents(0), contents(1).toInt)
    }.toMap
    var resources: collection.mutable.Map[Int, Int] = collection.mutable.HashMap[Int,Int]()
    var tokens: collection.mutable.Map[Int, Int] = collection.mutable.HashMap[Int,Int]()
    // TODO: error handling if we can't find the token or resource
    var failedResources = 0
    var succeededResources = 0
    var failedTokens = 0
    var succeededTokens = 0
    dict.foreach { case(key, value) =>
      if(key.startsWith("DBPEDIA_ID/")){
        try {
          val resource = resourceStore.getResourceByName(key.replace("DBPEDIA_ID/", ""))
          resources += (resource.id -> value)
          succeededResources += 1
        } catch {
          case e: DBpediaResourceNotFoundException=> {
            failedResources += 1
            if (failedResources % 1000 == 0){
              println("Can't find resource: " + key.replace("DBPEDIA_ID/", ""))
            }
          }
        }
      }else{
        val token = tokenTypeStore.getTokenType(key)
        if (token == TokenType.UNKNOWN){
          failedTokens += 1
          if (failedTokens % 1000 == 0){
            println("Can't find token: " + key)
          }
        } else {
          tokens += (token.id -> value)
          succeededTokens += 1
        }
      }
    }
    println("Failed on " + failedResources + " entities, succeeded on " + succeededResources)
    println("Failed on " + failedTokens + " tokens, succeeded on " + succeededTokens)
    contextStore.resourceIdToVectorIndex = resources.toMap
    contextStore.tokenTypeIdToVectorIndex = tokens.toMap
    println("Done loading dict.")
  }

  def loadVectorsAndWriteToStore(outputFile:File) = {
    println("Loading vectors..")
    val matrixSource = Source.fromFile(modelPath)
    val lines = matrixSource.getLines()
    val rows = lines.next().substring(2).toInt
    val cols = lines.next().substring(2).toInt
    contextStore.vectors = new DenseMatrix[Float](rows, cols)
    println("Reading CSV and writing to store...")
    lines.zipWithIndex.foreach { case (row_str, row_idx) =>
      if (row_idx % 10000 == 0)
        println("At row " + row_idx)
      val values = row_str.split(",").map(_.trim).map(_.toDouble)
      values.zipWithIndex.foreach { case (value, col_idx) =>
        contextStore.vectors(row_idx, col_idx) = value.toFloat
      }
    }
    matrixSource.close()
    println("Done, dumping..")
    MemoryStore.dump(contextStore, outputFile)
  }

}

Source File: TypesLoader.scala From dbpedia-spotlight-model with Apache License 2.0

5 votes

package org.dbpedia.spotlight.util

import java.io.{File, InputStream}
import java.util.LinkedHashSet

import org.dbpedia.spotlight.log.SpotlightLog
import org.dbpedia.spotlight.model._
import org.semanticweb.yars.nx.parser.NxParser

import scala.collection.JavaConversions._
import scala.io.Source



object TypesLoader
{
    def getTypesMap(typeDictFile : File) : Map[String, List[OntologyType]] = {
        SpotlightLog.info(this.getClass, "Loading types map...")
        if (!(typeDictFile.getName.toLowerCase endsWith ".tsv"))
            throw new IllegalArgumentException("types mapping only accepted in tsv format so far! can't parse "+typeDictFile)
        // CAUTION: this assumes that the most specific type is listed last
        var typesMap = Map[String,List[OntologyType]]()
        for (line <- Source.fromFile(typeDictFile, "UTF-8").getLines) {
            val elements = line.split("\t")
            val uri = new DBpediaResource(elements(0)).uri
            val t = Factory.OntologyType.fromURI(elements(1))
            val typesList : List[OntologyType] = typesMap.get(uri).getOrElse(List[OntologyType]()) ::: List(t)
            typesMap = typesMap.updated(uri, typesList)
        }
        SpotlightLog.info(this.getClass, "Done.")
        typesMap
    }

    def getTypesMapFromTSV_java(input: InputStream) : java.util.Map[String,java.util.LinkedHashSet[OntologyType]] = {
        SpotlightLog.info(this.getClass, "Loading types map...")
        var typesMap = Map[String,java.util.LinkedHashSet[OntologyType]]()
        var i = 0;
        for (line <- Source.fromInputStream(input, "UTF-8").getLines) {
            val elements = line.split("\t")
            val uri = new DBpediaResource(elements(0)).uri
            val typeUri = elements(1)
            if (!typeUri.equalsIgnoreCase("http://www.w3.org/2002/07/owl#Thing")) {
                val t = Factory.OntologyType.fromURI(typeUri)
                i = i + 1;
                val typesList : java.util.LinkedHashSet[OntologyType] = typesMap.getOrElse(uri,new LinkedHashSet[OntologyType]())
                typesList.add(t)
                t match {
                    case ft: FreebaseType => typesList.add(Factory.OntologyType.fromQName("Freebase:/"+ft.domain)) //Add supertype as well to mimic inference
                    case _ => //nothing
                }
                typesMap = typesMap.updated(uri, typesList)
            }
        }
        SpotlightLog.info(this.getClass, "Done. Loaded %d types for %d resources.", i,typesMap.size)
        typesMap
    }


    def getTypesMap_java(instanceTypesStream : InputStream) : java.util.Map[String,java.util.LinkedHashSet[OntologyType]] = {
        SpotlightLog.info(this.getClass, "Loading types map...")
        var typesMap = Map[String,java.util.LinkedHashSet[OntologyType]]()
        var i = 0;
        // CAUTION: this assumes that the most specific type is listed last
        val parser = new NxParser(instanceTypesStream)
        while (parser.hasNext) {
            val triple = parser.next
            if(!triple(2).toString.endsWith("owl#Thing")) {
                i = i + 1;
                val resource = new DBpediaResource(triple(0).toString)
                val t = Factory.OntologyType.fromURI(triple(2).toString)
                val typesList : java.util.LinkedHashSet[OntologyType] = typesMap.get(resource.uri).getOrElse(new LinkedHashSet[OntologyType]())
                typesList.add(t)
                typesMap = typesMap.updated(resource.uri, typesList)
            }
        }
        SpotlightLog.info(this.getClass, "Done. Loaded %d types.", i)
        typesMap
    }
    
}

Source File: NTripleSource.scala From dbpedia-spotlight-model with Apache License 2.0

5 votes

package org.dbpedia.spotlight.io

import java.io._
import java.util.zip.GZIPInputStream

import org.dbpedia.spotlight.log.SpotlightLog

import scala.io.Source


object NTripleSource {
    def fromFile(ntFile : File) : NTripleSource = new NTripleSource(ntFile)

    class NTripleSource(ntFile: File) extends Traversable[(String,String,String)] {

        override def foreach[U]( f: ((String,String,String)) => U) {
            var input : InputStream = new FileInputStream(ntFile)
            if (ntFile.getName.endsWith(".gz")) {
                input = new GZIPInputStream(input)
            }

            var linesIterator : Iterator[String] = Iterator.empty
            try {
                linesIterator = Source.fromInputStream(input, "UTF-8").getLines
            }
            catch {
                case e: java.nio.charset.MalformedInputException => linesIterator = Source.fromInputStream(input).getLines
            }

            for (line <- linesIterator) {
                if (!line.startsWith("#")) { //comments
                    val elements = line.trim.split(" ")

                    if (elements.length >= 4) {
                        var subj = elements(0)
                        var pred = elements(1)
                        var obj = elements(2)

                        subj = subj.substring(1,subj.length-1)
                        pred = pred.substring(1,pred.length-1)
                        obj = obj.substring(1,obj.length-1)

                        f((subj,pred,obj))
                    }
                    else {
                        SpotlightLog.error(this.getClass, "line must have at least 4 whitespaces; got %d in line: %d", elements.length-1,line)
                    }
                }
            }
        }
    }
}

Source File: DBpediaResourceFactorySQLTest.scala From dbpedia-spotlight-model with Apache License 2.0

5 votes

package org.dbpedia.spotlight.model

import org.junit.Test

import scala.io.Source



//TODO fix hard codes and register into the module pom.

class DBpediaResourceFactorySQLTest {

  val configuration: SpotlightConfiguration = new SpotlightConfiguration("conf/server.properties")
  val factory: SpotlightFactory = new SpotlightFactory(configuration)

  val dbpediaResourceFactory = configuration.getDBpediaResourceFactory

  def dbpediaResourceForAllConcepts() {
    //val configuration: IndexingConfiguration = new IndexingConfiguration("conf/indexing.properties")
    val examples = Source.fromFile("/Users/jodaiber/Desktop/DBpedia/conceptURIs.list", "UTF-8").getLines().take(10000)

    examples.foreach( dbpediaID => {
      try{
        val dBpediaResource: DBpediaResource = dbpediaResourceFactory.from(dbpediaID)
        assert(dBpediaResource.uri.equals(dbpediaID))
        assert(dBpediaResource.getTypes.size() >= 0)
        assert(dBpediaResource.support >= 0)
        assert(!dBpediaResource.getTypes.contains(null))
      }catch{
        case e: NoSuchElementException => //There may be a difference between the index and the concept list when testing...
      }
    })
  }

  @Test
  def createDBpediaResourcesOnce() {
    dbpediaResourceForAllConcepts()
  }

  @Test
  def createDBpediaResourcesTenTimes() {
    (1 to 10 toList).foreach{
      _ => dbpediaResourceForAllConcepts()
    }
  }

}

Source File: SparkSqlUtils.scala From HadoopLearning with MIT License

5 votes

package com.c503.utils

import java.io.{BufferedInputStream, BufferedReader, FileInputStream, InputStreamReader}
import java.nio.file.Path

import com.google.common.io.Resources
import org.apache.log4j.{Level, Logger}
import org.apache.mesos.Protos.Resource
import org.apache.spark.sql.SparkSession

import scala.io.Source


  def readSqlByPath(sqlPath: String) = {
    val buf = new StringBuilder
    val path = this.getPathByName(sqlPath)
    val file = Source.fromFile(path)
    for (line <- file.getLines) {
      buf ++= line + "\n"
    }
    file.close
    buf.toString()
  }


}

Source File: ExportData.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.engine.imexport

import java.io.File

import com.webank.wedatasphere.linkis.common.utils.Logging
import org.apache.spark.sql.SparkSession
import org.json4s._
import org.json4s.jackson.JsonMethods._

import scala.io.Source


object ExportData extends Logging {
  implicit val formats = DefaultFormats

  def exportData(spark: SparkSession, dataInfo: String, destination: String): Unit = {
    exportDataFromFile(spark, parse(dataInfo).extract[Map[String, Any]], parse(destination).extract[Map[String, Any]])
  }

  def exportDataByFile(spark: SparkSession, dataInfoPath: String, destination: String): Unit = {
    val fileSource = Source.fromFile(dataInfoPath)
    val dataInfo = fileSource.mkString
    exportDataFromFile(spark, parse(dataInfo).extract[Map[String, Any]], parse(destination).extract[Map[String, Any]])
    fileSource.close()
    val file = new File(dataInfoPath)
    if (file.exists()) {
      file.delete()
    }
  }

  def exportDataFromFile(spark: SparkSession, dataInfo: Map[String, Any], dest: Map[String, Any]): Unit = {

    //Export dataFrame
    val df = spark.sql(getExportSql(dataInfo))
    //dest

    val pathType = LoadData.getMapValue[String](dest, "pathType", "share")
    val path = if ("share".equals(pathType))
      "file://" + LoadData.getMapValue[String](dest, "path")
    else
      "hdfs://" + LoadData.getMapValue[String](dest, "path")

    val hasHeader = LoadData.getMapValue[Boolean](dest, "hasHeader", false)
    val isCsv = LoadData.getMapValue[Boolean](dest, "isCsv", true)
    val isOverwrite = LoadData.getMapValue[Boolean](dest, "isOverwrite", true)
    val sheetName = LoadData.getMapValue[String](dest, "sheetName", "Sheet1")
    val fieldDelimiter = LoadData.getMapValue[String](dest, "fieldDelimiter", ",")

    if (isCsv) {
      CsvRelation.saveDFToCsv(spark, df, path, hasHeader, isOverwrite,option = Map("fieldDelimiter" -> fieldDelimiter))
    } else {
      df.write.format("com.webank.wedatasphere.spark.excel")
        .option("sheetName", sheetName)
        .option("useHeader", hasHeader)
        .mode("overwrite").save(path)
    }
    warn(s"Succeed to export data  to path:$path")
  }

  def getExportSql(dataInfo: Map[String, Any]): String = {
    val sql = new StringBuilder
    //dataInfo
    val database = LoadData.getMapValue[String](dataInfo, "database")
    val tableName = LoadData.getMapValue[String](dataInfo, "tableName")
    val isPartition = LoadData.getMapValue[Boolean](dataInfo, "isPartition", false)
    val partition = LoadData.getMapValue[String](dataInfo, "partition", "ds")
    val partitionValue = LoadData.getMapValue[String](dataInfo, "partitionValue", "1993-01-02")
    val columns = LoadData.getMapValue[String](dataInfo, "columns", "*")
    sql.append("select ").append(columns).append(" from ").append(s"$database.$tableName")
    if (isPartition) sql.append(" where ").append(s"$partition=$partitionValue")
    val sqlString = sql.toString()
    warn(s"export sql:$sqlString")
    sqlString
  }

}

Source File: Mnemonics.scala From LearningScala with Apache License 2.0

5 votes

package _980_problem_solving

import scala.io.Source
import scala.language.postfixOps


  def encode(number: String): Set[List[String]] =
    if (number.isEmpty) Set(List())
    else {
      for {
        split <- 1 to number.length
        word <- wordsForNum(number take split)
        rest <- encode(number drop split)
      } yield word :: rest
    }.toSet

  def translate(number: String): Set[String] =
    encode(number) map (_ mkString " ")

  // testing
  println(translate("7225247386"))
  println(translate("72252"))
  println(translate("783364"))
}

Source File: CollectionRollerConfigParser.scala From pulse with Apache License 2.0

5 votes

package io.phdata.pulse.collectionroller

import net.jcazevedo.moultingyaml._

import scala.io.Source

object YamlProtocol extends DefaultYamlProtocol {
  implicit val application = yamlFormat6(Application)
  implicit val config      = yamlFormat2(CollectionRollerConfig)
}


case class Application(name: String,
                       numCollections: Option[Int],
                       shards: Option[Int],
                       replicas: Option[Int],
                       rollPeriod: Option[Int],
                       solrConfigSetName: String = "testconf")

Source File: AlertEngineConfigParser.scala From pulse with Apache License 2.0

5 votes

package io.phdata.pulse.alertengine

import net.jcazevedo.moultingyaml._

import scala.io.Source

object YamlProtocol extends DefaultYamlProtocol {
  implicit val alert: YamlFormat[AlertRule]                = yamlFormat5(AlertRule)
  implicit val mailProfile: YamlFormat[MailAlertProfile]   = yamlFormat2(MailAlertProfile)
  implicit val slackProfile: YamlFormat[SlackAlertProfile] = yamlFormat2(SlackAlertProfile)
  implicit val application: YamlFormat[Application]        = yamlFormat4(Application)
  implicit val config: YamlFormat[AlertEngineConfig]       = yamlFormat1(AlertEngineConfig)
}

object AlertTypes {
  val SOLR: String           = "solr"
  val SQL: String            = "sql"
  val ALL_TYPES: Set[String] = Set(AlertTypes.SOLR, AlertTypes.SQL)
}


case class AlertRule(query: String,
                     retryInterval: Int,
                     resultThreshold: Option[Int] = None,
                     alertProfiles: List[String],
                     alertType: Option[String] = None)

trait AlertProfile {
  val name: String
}

case class SlackAlertProfile(name: String, url: String) extends AlertProfile

case class MailAlertProfile(name: String, addresses: List[String]) extends AlertProfile

Source File: TemplateLoader.scala From avoin-voitto with MIT License

5 votes

package liigavoitto.journalist.utils

import liigavoitto.util.Logging
import scaledn.parser.parseEDN
import scaledn.{EDN, EDNKeyword, EDNSymbol}

import scala.io.Source
import scala.reflect.ClassTag
import scala.util.{Failure, Try}

trait TemplateLoader extends Logging {

  type FileContent = Map[EDNKeyword, Map[EDNKeyword, List[TemplateVector]]]
  type TemplateVector = Vector[Any]
  type TemplateSettings = Map[EDNKeyword, Any]
  val WeightKey = EDNKeyword(EDNSymbol("weight"))

  def load(filePath: String, templatesName: String, language: String) = {
    val content = loadResource(filePath)
    val parsed = parseEDN(content)
    logErrors(parsed, filePath)
    val mapped = parsed.get.asInstanceOf[FileContent]
    getTemplates(mapped, templatesName, language)
  }

  private def getTemplates(parsed: FileContent, name: String, language: String) = {
    val templatesName = EDNKeyword(EDNSymbol(name))
    val languageKey = EDNKeyword(EDNSymbol(language))
    parsed(templatesName)(languageKey).map(parseTemplate)
  }

  private def parseTemplate(vector: TemplateVector) = {
    val tmpl = vector(0).asInstanceOf[String]
    val weight = getWeight(vector)
    if (weight.isDefined)
      Template(tmpl, weight.get)
    else
      Template(tmpl)
  }

  private def asInstanceOfOption[T: ClassTag](o: Any): Option[T] =
    Some(o) collect { case m: T => m }

  private def getWeight(vector: Vector[Any]) = for {
      opts <- vector.lift(1)
      settings <- asInstanceOfOption[TemplateSettings](opts)
      value <- settings.get(WeightKey)
      asDouble <- asInstanceOfOption[Double](value)
    } yield asDouble

  private def loadResource(path: String) = {
    val resourcePath =  path
    val res = getClass.getClassLoader.getResource(resourcePath)
    val source = Source.fromURL(res)
    source.mkString
  }

  private def logErrors(parsed: Try[EDN], filePath: String) = parsed match {
      case Failure(f : org.parboiled2.ParseError) => {
        log.error(s"$filePath ParseError at line " + f.position.line + " col " + f.position.column)
      }
      case _ =>
    }
}

Source File: CsvKafkaPublisher.scala From Taxi360 with Apache License 2.0

5 votes

package com.hadooparchitecturebook.taxi360.common

import java.io.File
import java.util.Random

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.io.Source

object CsvKafkaPublisher {

  var counter = 0
  var salts = 0

  def main(args:Array[String]): Unit = {
    if (args.length == 0) {
      println("<brokerList> " +
        "<topicName> " +
        "<dataFolderOrFile> " +
        "<sleepPerRecord> " +
        "<acks> " +
        "<linger.ms> " +
        "<producer.type> " +
        "<batch.size> " +
        "<salts>")
      return
    }

    val kafkaBrokerList = args(0)
    val kafkaTopicName = args(1)
    val nyTaxiDataFolder = args(2)
    val sleepPerRecord = args(3).toInt
    val acks = args(4).toInt
    val lingerMs = args(5).toInt
    val producerType = args(6) //"async"
    val batchSize = args(7).toInt
    salts = args(8).toInt

    val kafkaProducer = KafkaProducerUntil.getNewProducer(kafkaBrokerList, acks, lingerMs, producerType, batchSize)

    println("--Input:" + nyTaxiDataFolder)

    val dataFolder = new File(nyTaxiDataFolder)
    if (dataFolder.isDirectory) {
      val files = dataFolder.listFiles().iterator
      files.foreach(f => {
        println("--Input:" + f)
        processFile(f, kafkaTopicName, kafkaProducer, sleepPerRecord)
      })
    } else {
      println("--Input:" + dataFolder)
      processFile(dataFolder, kafkaTopicName, kafkaProducer, sleepPerRecord)
    }
    println("---Done")
  }

  def processFile(file:File, kafkaTopicName:String,
                  kafkaProducer: KafkaProducer[String, String], sleepPerRecord:Int): Unit = {
    var counter = 0
    val r = new Random()

    println("-Starting Reading")
    Source.fromFile(file).getLines().foreach(l => {
      counter += 1
      if (counter % 10000 == 0) {
        println("{Sent:" + counter + "}")
      }
      if (counter % 100 == 0) {
        print(".")
      }
      Thread.sleep(sleepPerRecord)

      val saltedVender = r.nextInt(salts) + l

      if (counter > 2) {
        publishTaxiRecord(saltedVender, kafkaTopicName, kafkaProducer)
      }
    })
  }

  def publishTaxiRecord(line:String, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String]): Unit = {

    if (line.startsWith("vendor_name") || line.length < 10) {
      println("skip")
    } else {
      val message = new ProducerRecord[String, String](kafkaTopicName, line.hashCode.toString, line)
      kafkaProducer.send(message)
    }
  }


}

Source File: files.scala From spatial with MIT License

5 votes

package utils.io

import java.io._
import java.nio.file._
import java.util.function.Consumer
import java.nio.file.{Files,Paths}

import scala.io.Source

object files {
  def sep: String = java.io.File.separator
  def cwd: String = new java.io.File("").getAbsolutePath
  final val BUFFER_SIZE: Int = 1024 * 4
  final val EOF = -1

  
  def copyResource(src: String, dest: String): Unit = {
    val outFile = new File(dest)
    val outPath = outFile.getParentFile
    outPath.mkdirs()
    val url = getClass.getResource(src)
    val in: InputStream = url.openStream()
    val out: OutputStream = new FileOutputStream(outFile)
    val buffer = new Array[Byte](BUFFER_SIZE)
    var n: Int = 0
    while ({n = in.read(buffer); n != EOF}) {
      out.write(buffer, 0, n)
    }
    out.close()
    in.close()
  }

  def listFiles(dir:String, exts:List[String]=Nil):List[java.io.File] = {
    val d = new java.io.File(dir)
    if (d.exists && d.isDirectory) {
      d.listFiles.filter { file =>
        file.isFile && exts.exists { ext => file.getName.endsWith(ext) }
      }.toList
    } else {
      Nil
    }
  }

  def splitPath(path:String) = {
    val file = new File(path)
    (file.getParent, file.getName)
  }

  def buildPath(parts:String*):String = {
    parts.mkString(sep)
  }

  def dirName(fullPath:String) = fullPath.split(sep).dropRight(1).mkString(sep)

  def createDirectories(dir:String) = {
    val path = Paths.get(dir)
    if (!Files.exists(path)) Files.createDirectories(path)
  }

}

Source File: CongestionModel.scala From spatial with MIT License

5 votes

package models

import java.io.File
import java.io.PrintWriter
import utils.io.files._
import utils.math.{CombinationTree, ReduceTree}

import scala.io.Source

object CongestionModel {

	abstract class FeatureVec[T] {
		def loads: T
		def stores: T
		def gateds: T
		def outerIters: T
		def innerIters: T
		def bitsPerCycle: T
		def toSeq: Seq[T] = Seq(stores, outerIters, loads, innerIters, gateds, bitsPerCycle)
	}
	case class RawFeatureVec(loads: Double, stores: Double, gateds: Double, outerIters: Double, innerIters: Double, bitsPerCycle: Double) extends FeatureVec[Double]
	case class CalibFeatureVec(loads: Double, stores: Double, gateds: Double, outerIters: Double, innerIters: Double, bitsPerCycle: Double) extends FeatureVec[Double]

	// Set up lattice properties
	val feature_dims = 6
	val lattice_rank = 6
	val lattice_size = Seq(3,3,3,3,3,3)
	val num_keypoints = 8
	val num_lattices = 1
	var model: String = ""

	// Derive lattice properties
	val sizes = scala.Array.tabulate(lattice_rank){i => lattice_size(i)}
	val dimensions = sizes.length
	val params_per_lattice = sizes.product
	val strides: scala.Array[Int] = scala.Array.fill(dimensions){1}
	val nparams = num_lattices * params_per_lattice

	// Grab lattice params
	lazy val loads_keypoints_inputs = ModelData.loads_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/loads_keypoints_inputs.csv", ","){x => x.toDouble}
	lazy val loads_keypoints_outputs = ModelData.loads_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/loads_keypoints_outputs.csv", ","){x => x.toDouble}
	lazy val stores_keypoints_inputs = ModelData.stores_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/stores_keypoints_inputs.csv", ","){x => x.toDouble}
	lazy val stores_keypoints_outputs = ModelData.stores_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/stores_keypoints_outputs.csv", ","){x => x.toDouble}
	lazy val gateds_keypoints_inputs = ModelData.gateds_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/gateds_keypoints_inputs.csv", ","){x => x.toDouble}
	lazy val gateds_keypoints_outputs = ModelData.gateds_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/gateds_keypoints_outputs.csv", ","){x => x.toDouble}
	lazy val outerIters_keypoints_inputs = ModelData.outerIters_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/outerIters_keypoints_inputs.csv", ","){x => x.toDouble}
	lazy val outerIters_keypoints_outputs = ModelData.outerIters_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/outerIters_keypoints_outputs.csv", ","){x => x.toDouble}
	lazy val innerIters_keypoints_inputs = ModelData.innerIters_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/innerIters_keypoints_inputs.csv", ","){x => x.toDouble}
	lazy val innerIters_keypoints_outputs = ModelData.innerIters_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/innerIters_keypoints_outputs.csv", ","){x => x.toDouble}
	lazy val bitsPerCycle_keypoints_inputs = ModelData.bitsPerCycle_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/bitsPerCycle_keypoints_inputs.csv", ","){x => x.toDouble}
	lazy val bitsPerCycle_keypoints_outputs = ModelData.bitsPerCycle_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/bitsPerCycle_keypoints_outputs.csv", ","){x => x.toDouble}
    lazy val params = ModelData.params(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/LATTICE_PARAMS.csv", ","){x => x.toDouble}

    
	def evaluate(features: RawFeatureVec, typ: Runtime.CtrlSchedule): Int = {
		model = typ.toString

		val calibrated_features = calibrate_features(features)
		val result = hypercube_features(calibrated_features)
		// TODO: Model is naughty if it returns <170
		// println(s"evaluating $features = ${170 max result.toInt}")
		170 max result.toInt
	}
}

Source File: Client.scala From roc with BSD 3-Clause "New" or "Revised" License

5 votes

package roc
package integrations

import com.twitter.finagle.{Addr, Address, Name}
import com.twitter.util.Var
import scala.io.Source

trait Client {
  private val db     = "circle_test"
  //private val db     = "postgres"
  private val user   = "ubuntu"
  //private val user   = "postgres"
  private val passwd = ""
  private val host   = "127.0.0.1"
  private val port   = 5432

  private lazy val address = Address(host, port)
  protected lazy val Postgres = Postgresql.client
    .withUserAndPasswd(user, passwd)
    .withDatabase(db)
    .newRichClient(
      Name.Bound(Var[Addr](Addr.Bound(address)), "roc"),
      "roc"
    )
}

trait SqlReader {

  def readSql(filename: String): String = {
    val path = s"core/src/it/resources/sql/$filename"
    Source.fromFile(path)
      .getLines
      .foldLeft("")(_ + _)
  }
}

Source File: TotalTweetsScheduler.scala From redrock with Apache License 2.0

5 votes

package com.restapi

import java.io.{File, FileInputStream}

import akka.actor.{ActorRef, Actor, ActorSystem, Props}
import akka.io.IO
import org.slf4j.LoggerFactory
import play.api.libs.json.Json
import spray.can.Http
import akka.pattern.ask
import spray.http.DateTime
import scala.concurrent.duration._
import akka.util.Timeout
import scala.concurrent.ExecutionContext.Implicits.global
import org.apache.commons.codec.digest.DigestUtils
import scala.io.Source

case object GetTotalTweetsScheduler

object CurrentTotalTweets {
  @volatile
  var totalTweets: Long = 0
}

class ExecuterTotalTweetsES(delay: FiniteDuration, interval: FiniteDuration) extends Actor {
  context.system.scheduler.schedule(delay, interval) {
    getTotalTweetsES
  }

  val logger = LoggerFactory.getLogger(this.getClass)

  override def receive: Actor.Receive = {
    case GetTotalTweetsScheduler => {
      logger.info(s"Getting Total of Tweets. Begin: ${CurrentTotalTweets.totalTweets}")
    }
    case _ => // just ignore any messages
  }

  def getTotalTweetsES: Unit = {
    val elasticsearchRequests = new GetElasticsearchResponse(0, Array[String](), Array[String](),
      LoadConf.restConf.getString("searchParam.defaulStartDatetime"),
      LoadConf.restConf.getString("searchParam.defaultEndDatetime"),
      LoadConf.esConf.getString("decahoseIndexName"))
    val totalTweetsResponse = Json.parse(elasticsearchRequests.getTotalTweetsESResponse())
    logger.info(s"Getting Total of Tweets. Current: ${CurrentTotalTweets.totalTweets}")
    CurrentTotalTweets.totalTweets = (totalTweetsResponse \ "hits" \ "total").as[Long]
    logger.info(s"Total users updated. New: ${CurrentTotalTweets.totalTweets}")
  }
}

Source File: io.scala From sbt-org-policies with Apache License 2.0

5 votes

package sbtorgpolicies

import java.io._
import java.net.URL
import java.nio.charset.Charset
import java.nio.file.Path
import java.nio.file.Paths.get

import cats.syntax.either._
import sbtorgpolicies.exceptions.IOException

import scala.io.Source
import scala.language.implicitConversions

package object io {

  type IOResult[T] = Either[IOException, T]

  object syntax {

    implicit def eitherFilterSyntax[T](either: Either[Throwable, T]): FilteredEitherOps[T] =
      new FilteredEitherOps(either)

    implicit def fileNameSyntax(fileName: String): FileNameOps = new FileNameOps(fileName)

    final class FilteredEitherOps[T](either: Either[Throwable, T]) {

      def withFilter(f: T => Boolean): Either[Throwable, T] = either match {
        case Right(r) if !f(r) =>
          new IllegalStateException("Filter condition has not been satisfied").asLeft[T]
        case _ =>
          either
      }
    }

    final class FileNameOps(filename: String) {

      def toPath: Path = get(filename)

      def toFile: File = new File(filename.fixPath)

      def fixPath: String = filename.replaceAll("/", File.separator)

      def ensureFinalSlash: String =
        filename +
          (if (filename.endsWith(File.separator)) ""
           else File.separator)
    }
  }

  object IO {

    def file(path: String): File = new File(path)

    def url(address: String): URL = new URL(address)

    def readLines(file: File): Iterator[String] =
      Source.fromFile(file).getLines()

    def readBytes(file: File): Array[Byte] = {
      val is: InputStream    = new FileInputStream(file)
      val array: Array[Byte] = Stream.continually(is.read).takeWhile(_ != -1).map(_.toByte).toArray
      is.close()
      array
    }

    def write(file: File, content: String, charset: Charset = Charset.forName("UTF-8")): Unit = {
      val writer = new BufferedWriter(
        new OutputStreamWriter(new FileOutputStream(file, false), charset)
      )
      writer.write(content)
      writer.close()
    }

    def relativize(base: File, file: File): Option[String] = {

      def ensureEndingSlash: Option[String] = {
        val path = base.getAbsolutePath
        path.lastOption.map {
          case c if c == File.separatorChar => path
          case _                            => path + File.separatorChar
        }
      }

      val baseFileString = if (base.isDirectory) ensureEndingSlash else None
      val pathString     = file.getAbsolutePath
      baseFileString flatMap {
        case baseString if pathString.startsWith(baseString) =>
          Some(pathString.substring(baseString.length))
        case _ => None
      }
    }

  }
}

Source File: Mnemonic.scala From EncryCore with GNU General Public License v3.0

5 votes

package encry.utils

import org.encryfoundation.common.utils.Algos
import scodec.bits.BitVector
import scala.io.Source

object Mnemonic {

  def getWords: Array[String] =
    Source.fromInputStream(getClass.getResourceAsStream("/languages/english/words.txt")).getLines.toArray

  def seedFromMnemonic(mnemonicCode: String, passPhrase: String = ""): Array[Byte] =
    Algos.hash(mnemonicCode + "mnemonic=" + passPhrase)

  def entropyToMnemonicCode(entropy: Array[Byte]): String = {
    val words: Array[String] = getWords
    val checkSum: BitVector = BitVector(Algos.hash(entropy))
    val entropyWithCheckSum: BitVector = BitVector(entropy) ++ checkSum.take(4)

    entropyWithCheckSum.grouped(11).map { i =>
      words(i.toInt(signed = false))
    }.mkString(" ")
  }
}

Source File: ProcessUtils.scala From spark-integration with Apache License 2.0

5 votes

package org.apache.spark.deploy.k8s.integrationtest

import java.util.concurrent.TimeUnit

import scala.collection.mutable.ArrayBuffer
import scala.io.Source

object ProcessUtils extends Logging {
  
  def executeProcess(fullCommand: Array[String], timeout: Long): Seq[String] = {
    val pb = new ProcessBuilder().command(fullCommand: _*)
    pb.redirectErrorStream(true)
    val proc = pb.start()
    val outputLines = new ArrayBuffer[String]
    Utils.tryWithResource(proc.getInputStream)(
      Source.fromInputStream(_, "UTF-8").getLines().foreach { line =>
        logInfo(line)
        outputLines += line
      })
    assert(proc.waitFor(timeout, TimeUnit.SECONDS),
      s"Timed out while executing ${fullCommand.mkString(" ")}")
    assert(proc.exitValue == 0, s"Failed to execute ${fullCommand.mkString(" ")}")
    outputLines
  }
}

Source File: FileUtil.scala From wookiee with Apache License 2.0

5 votes

package com.webtrends.harness.utils

import java.io.File
import java.nio.file.{FileSystems, Files, Path}

import scala.io.Source


  def getSymLink(f:File) : File = {
    if (f == null)
      throw new NullPointerException("File must not be null")
    val path = FileSystems.getDefault.getPath(f.getPath)
    if (Files.isSymbolicLink(path)) {
      f.getCanonicalFile
    } else {
      f.getAbsoluteFile
    }
  }
}

Source File: TestBroadcastVariables.scala From spark-dev with GNU General Public License v3.0

5 votes

package examples

import org.apache.spark.{ SparkContext, SparkConf }
import org.apache.spark.rdd.RDD
import org.apache.spark.broadcast.Broadcast

import scala.io.Source
import scala.util.{ Try, Success, Failure }
import scala.collection.mutable.Map


	def loadCSVFile(filename: String): Option[Map[String, String]] = {
		val countries = Map[String, String]()

		Try {
			val bufferedSource = Source.fromFile(filename)

			for (line <- bufferedSource.getLines) {
				val Array(country, capital) = line.split(",").map(_.trim)
				countries += country -> capital
			}

			bufferedSource.close()
			return Some(countries)

		}.toOption
	}
}

Source File: AvroSchemaGeneratorSpec.scala From hydra with Apache License 2.0

5 votes

package hydra.avro.util

import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.databind.node.ObjectNode
import org.scalatest.matchers.should.Matchers
import org.scalatest.funspec.AnyFunSpecLike

import scala.io.Source


class AvroSchemaGeneratorSpec extends Matchers with AnyFunSpecLike {

  val mapper = new ObjectMapper
  val converter = new AvroSchemaGenerator()

  val json = Source
    .fromFile(
      Thread.currentThread.getContextClassLoader
        .getResource("avro-test.json")
        .getFile
    )
    .getLines()
    .mkString

  describe("The json to avro schema converter") {
    it("Should include a valid namespace and a valid name") {
      val jsonNode = mapper.readTree(converter.convert(json, "hydra", "name"))
      jsonNode.at("/namespace").asText shouldBe "hydra"
      jsonNode.at("/name").asText shouldBe "name"
      jsonNode.at("/type").asText shouldBe "record"
    }
    it("Should have a valid record type") {
      val jsonNode = mapper.readTree(converter.convert(json, "hydra", "name"))
      val arrayNode = jsonNode.at("/fields")
      arrayNode.get(0).at("/type/type").asText shouldBe "record"
    }

    it("Should throw an exception with null values") {
      val jsonNode = mapper.readTree(json)
      jsonNode.asInstanceOf[ObjectNode].set("dummyString", null)
      intercept[IllegalArgumentException] {
        converter.convert(jsonNode.toString(), "hydra", "name")
      }
    }
    it("Should convert booleans") {
      val clipViewJson =
        """
          |{
          |	"clipId": "shawn-wildermuth|front-end-web-app-html5-javascript-css-m01|front-end-web-app-html5-javascript-css-m1-02",
          |	"clipModuleIndex": 1,
          |	"clipName": "front-end-web-app-html5-javascript-css-m1-02",
          |	"contentIndexPosition": 99999,
          |	"countsTowardTrialLimits": false,
          |	"courseName": "front-end-web-app-html5-javascript-css",
          |	"courseTitle": "Front-End Web Development Quick Start With HTML5, CSS, and JavaScript",
          |	"ipAddress": "127.0.0.1",
          |	"moduleAuthorHandle": "shawn-wildermuth",
          |	"moduleId": "shawn-wildermuth|front-end-web-app-html5-javascript-css-m01",
          |	"moduleName": "front-end-web-app-html5-javascript-css-m01",
          |	"online": true,
          |	"royaltiesPaid": true,
          |	"started": "2016-11-30T20:30:45.3136582Z",
          |	"userHandle": "44bbf444-ba44-444a-b444-b444bebb4b4b"
          |}
        """.stripMargin

      val jsonNode = mapper.readTree(clipViewJson)

      val schema = converter.convert(jsonNode.toString(), "hydra", "name")

      println(schema)
    }
  }
}

Source File: IngestionErrorHandler.scala From hydra with Apache License 2.0

5 votes

package hydra.kafka.ingestors

import akka.actor.Actor
import com.pluralsight.hydra.avro.JsonToAvroConversionException
import hydra.common.config.ConfigSupport._
import hydra.avro.registry.JsonToAvroConversionExceptionWithMetadata
import hydra.common.config.ConfigSupport
import hydra.core.ingest.RequestParams.HYDRA_KAFKA_TOPIC_PARAM
import hydra.core.protocol.GenericIngestionError
import hydra.core.transport.Transport.Deliver
import hydra.kafka.producer.AvroRecord
import org.apache.avro.Schema
import spray.json.DefaultJsonProtocol

import scala.io.Source


class IngestionErrorHandler
    extends Actor
    with ConfigSupport
    with DefaultJsonProtocol {

  import spray.json._

  private implicit val ec = context.dispatcher

  private implicit val hydraIngestionErrorInfoFormat = jsonFormat6(
    HydraIngestionErrorInfo
  )

  private val errorTopic = applicationConfig
    .getStringOpt("ingest.error-topic")
    .getOrElse("_hydra_ingest_errors")

  private lazy val kafkaTransport = context
    .actorSelection(
      applicationConfig
        .getStringOpt(s"transports.kafka.path")
        .getOrElse(s"/user/service/kafka_transport")
    )

  private val errorSchema = new Schema.Parser()
    .parse(Source.fromResource("schemas/HydraIngestError.avsc").mkString)

  override def receive: Receive = {
    case error: GenericIngestionError =>
      kafkaTransport ! Deliver(buildPayload(error))
  }

  private[ingestors] def buildPayload(
      err: GenericIngestionError
  ): AvroRecord = {
    val schema: Option[String] = err.cause match {
      case e: JsonToAvroConversionException             => Some(e.getSchema.toString)
      case e: JsonToAvroConversionExceptionWithMetadata => Some(e.location)
      case e: Exception                                 => None
    }

    val topic = err.request.metadataValue(HYDRA_KAFKA_TOPIC_PARAM)

    val errorInfo = HydraIngestionErrorInfo(
      err.ingestor,
      topic,
      err.cause.getMessage,
      err.request.metadata,
      schema,
      err.request.payload
    ).toJson.compactPrint

    AvroRecord(
      errorTopic,
      errorSchema,
      topic,
      errorInfo,
      err.request.ackStrategy
    )
  }
}

case class HydraIngestionErrorInfo(
    ingestor: String,
    destination: Option[String],
    errorMessage: String,
    metadata: Map[String, String],
    schema: Option[String],
    payload: String
)

Source File: WatchServiceReceiver.scala From incubator-retired-iota with Apache License 2.0

5 votes

package org.apache.iota.fey

import java.nio.file.StandardWatchEventKinds._
import java.nio.file.{FileSystems, Path}
import java.io.File
import akka.actor.ActorRef
import org.apache.iota.fey.JsonReceiverActor.JSON_RECEIVED
import play.api.libs.json._

import scala.io.Source

class WatchServiceReceiver(receiverActor: ActorRef) extends JsonReceiver{

  processInitialFiles()

  private val watchService = FileSystems.getDefault.newWatchService()

  def watch(path: Path) : Unit = path.register(watchService, ENTRY_CREATE, ENTRY_MODIFY)

  def getJsonObject(params: String): Option[JsValue] = {
    try{
      val stringJson = Source.fromFile(params).getLines.mkString
      Option(Json.parse(stringJson))
    }catch{
      case e: Exception =>
        log.error("Could not parse JSON", e)
        None
    }
  }

  override def execute(): Unit = {

    val key = watchService.take()
    val eventsIterator = key.pollEvents().iterator()

    while(eventsIterator.hasNext) {
      val event = eventsIterator.next()
      val relativePath = event.context().asInstanceOf[Path]
      val path = key.watchable().asInstanceOf[Path].resolve(relativePath)

      log.debug(s"${event.kind()} --- $path")
      event.kind() match {
        case (ENTRY_CREATE | ENTRY_MODIFY) if path.toString.endsWith(CONFIG.JSON_EXTENSION) =>
          processJson(path.toString, path.toFile)
        case _ =>
      }
    }

    key.reset()
  }

  private[fey] def processJson(path: String, file: File) = {
    try{
      getJsonObject(path) match {
        case Some(orchestrationJSON) =>
          val valid = validJson(orchestrationJSON)
          if(valid && (orchestrationJSON \ JSON_PATH.COMMAND).as[String].toUpperCase != "DELETE"){
            checkForLocation(orchestrationJSON)
          }
          if(valid) {
            receiverActor ! JSON_RECEIVED(orchestrationJSON, file)
          }else{
            log.warn(s"File $path not processed. Incorrect JSON schema")
          }
        case None =>
      }
    } catch {
      case e: Exception =>
        log.error(s"File $path will not be processed", e)
    }
  }

  private def processInitialFiles() = {
    Utils.getFilesInDirectory(CONFIG.JSON_REPOSITORY)
      .filter(file => file.getName.endsWith(CONFIG.JSON_EXTENSION))
      .foreach(file => {
        processJson(file.getAbsolutePath, file)
      })
  }

  override def exceptionOnRun(e: Exception): Unit = {
    e match {
      case e: InterruptedException =>
      case e: Exception => log.error("Watch Service stopped", e)
    }
    watchService.close()
  }

}

Source File: CheckpointProcessor.scala From incubator-retired-iota with Apache License 2.0

5 votes

package org.apache.iota.fey

import java.io.File

import akka.actor.ActorRef
import org.apache.iota.fey.JsonReceiverActor.JSON_RECEIVED
import play.api.libs.json.{JsValue, Json}

import scala.io.Source


class CheckpointProcessor(receiverActor: ActorRef) extends JsonReceiver{

  override def run(): Unit = {
    processCheckpointFiles()
  }

  def getJsonObject(params: String): Option[JsValue] = {
    try{
      val stringJson = Source.fromFile(params).getLines.mkString
      Option(Json.parse(stringJson))
    }catch{
      case e: Exception =>
        log.error("Could not parse JSON", e)
        None
    }
  }

  private def processJson(path: String, file: File) = {
    try{
      getJsonObject(path) match {
        case Some(orchestrationJSON) =>
          val valid = validJson(orchestrationJSON)
          if(valid && (orchestrationJSON \ JSON_PATH.COMMAND).as[String].toUpperCase != "DELETE"){
            checkForLocation(orchestrationJSON)
          }
          if(valid) {
            receiverActor ! JSON_RECEIVED(orchestrationJSON, file)
          }else{
            log.warn(s"File $path not processed. Incorrect JSON schema")
          }
          file.delete()
        case None =>
      }
    } catch {
      case e: Exception =>
        log.error(s"File $path will not be processed", e)
    }
  }

  private def processCheckpointFiles() = {
    Utils.getFilesInDirectory(CONFIG.CHECKPOINT_DIR)
      .filter(file => file.getName.endsWith(CONFIG.JSON_EXTENSION))
      .foreach(file => {
        processJson(file.getAbsolutePath, file)
      })
  }

  override def execute(): Unit = {}
  override def exceptionOnRun(e: Exception): Unit = {}
}

Source File: SparkStreamAdapterExample.scala From eventuate with Apache License 2.0

5 votes

package com.rbmhtechnology.example.spark

//#spark-stream-adapter
import com.rbmhtechnology.eventuate._
import com.rbmhtechnology.eventuate.adapter.spark.SparkStreamAdapter

import org.apache.spark._
import org.apache.spark.storage.StorageLevel
import org.apache.spark.streaming._
import org.apache.spark.streaming.dstream.DStream

//#

import akka.actor._

import com.rbmhtechnology.eventuate.log.EventLogWriter
import com.rbmhtechnology.eventuate.log.leveldb.LeveldbEventLog

import scala.collection.immutable._
import scala.io.Source

object SparkStreamAdapterExample extends App {
  implicit val system: ActorSystem = ActorSystem(ReplicationConnection.DefaultRemoteSystemName)

  val logName: String = "L"
  val endpoint: ReplicationEndpoint = new ReplicationEndpoint(id = "1", logNames = Set(logName), logFactory = logId => LeveldbEventLog.props(logId), connections = Set())
  val log: ActorRef = endpoint.logs(logName)
  val writer: EventLogWriter = new EventLogWriter("writer", log)

  endpoint.activate()

  //#spark-stream-adapter
  val sparkConfig = new SparkConf(true)
    .setAppName("adapter")
    .setMaster("local[4]")
  val sparkContext = new SparkContext(sparkConfig)
  val sparkStreamingContext = new StreamingContext(sparkContext, Seconds(1))

  // Create an Eventuate Spark stream adapter
  val sparkStreamAdapter = new SparkStreamAdapter(
    sparkStreamingContext, system.settings.config)

  // Create a DStream from event log L by connecting to its replication endpoint
  val stream: DStream[DurableEvent] = sparkStreamAdapter.eventStream(
    id = "s1", host = "127.0.0.1", port = 2552, logName = "L",
    fromSequenceNr = 1L, storageLevel = StorageLevel.MEMORY_ONLY)

  // For processing in strict event storage order, use repartition(1)
  stream.repartition(1).foreachRDD(rdd => rdd.foreach(println))

  // Start event stream processing
  sparkStreamingContext.start()
  //#

  // Generate new events from stdin
  val lines = Source.stdin.getLines()
  def prompt(): Unit = {
    if (lines.hasNext) lines.next() match {
      case "exit" =>
        sparkStreamingContext.stop(stopSparkContext = true)
        system.terminate()
      case line =>
        writer.write(Seq(line))
        prompt()
    }
  }
  prompt()
}

Source File: OrderExample.scala From eventuate with Apache License 2.0

5 votes

package com.rbmhtechnology.example.ordermgnt

import akka.actor._

import com.rbmhtechnology.eventuate._
import com.rbmhtechnology.eventuate.VersionedAggregate._
import com.rbmhtechnology.eventuate.log.leveldb.LeveldbEventLog
import com.typesafe.config.ConfigFactory

import scala.concurrent.Future
import scala.io.Source
import scala.util._

class OrderExample(manager: ActorRef, view: ActorRef) extends Actor {
  import OrderActor._
  import OrderView._

  val lines = Source.stdin.getLines

  def receive = {
    case GetStateSuccess(state) =>
      state.values.foreach(printOrder)
      prompt()
    case GetStateFailure(cause) =>
      println(cause.getMessage)
      prompt()
    case SaveSnapshotSuccess(orderId, metadata) =>
      println(s"[${orderId}] saved snapshot at sequence number ${metadata.sequenceNr}")
      prompt()
    case SaveSnapshotFailure(orderId, cause) =>
      println(s"[${orderId}] save snapshot failed: ${cause}")
      cause.printStackTrace()
      prompt()
    case GetUpdateCountSuccess(orderId, count) =>
      println(s"[${orderId}] update count = ${count}")
      prompt()
    case CommandSuccess(_) =>
      prompt()
    case CommandFailure(_, cause: ConflictDetectedException[Order]) =>
      println(s"${cause.getMessage}, select one of the following versions to resolve conflict")
      printOrder(cause.versions)
      prompt()
    case CommandFailure(_, cause) =>
      println(cause.getMessage)
      prompt()
    case line: String => line.split(' ').toList match {
      case "state" :: Nil                => manager ! GetState
      case "count" :: id :: Nil          => view ! GetUpdateCount(id)
      case "create" :: id :: Nil         => manager ! CreateOrder(id)
      case "cancel" :: id :: Nil         => manager ! CancelOrder(id)
      case "save" :: id :: Nil           => manager ! SaveSnapshot(id)
      case "add" :: id :: item :: Nil    => manager ! AddOrderItem(id, item)
      case "remove" :: id :: item :: Nil => manager ! RemoveOrderItem(id, item)
      case "resolve" :: id :: idx :: Nil => manager ! Resolve(id, idx.toInt)
      case Nil                           => prompt()
      case "" :: Nil                     => prompt()
      case na :: nas                     => println(s"unknown command: ${na}"); prompt()
    }
  }

  def prompt(): Unit = {
    if (lines.hasNext) lines.next() match {
      case "exit" => context.system.terminate()
      case line   => self ! line
    }
  }

  override def preStart(): Unit =
    prompt()
}

object OrderExample extends App {
  val recover = args(1) == "recover"
  val system = ActorSystem(ReplicationConnection.DefaultRemoteSystemName, ConfigFactory.load(args(0)))
  val endpoint = ReplicationEndpoint(id => LeveldbEventLog.props(id, "s"))(system)

  import system.dispatcher

  def initialize(): Future[Unit] =
    if (recover) endpoint.recover() else Future.successful(endpoint.activate())

  initialize() onComplete {
    case Failure(e) =>
      println(s"Recovery failed: ${e.getMessage}")
      system.terminate()
    case Success(_) =>
      val manager = system.actorOf(Props(new OrderManager(endpoint.id, endpoint.logs(ReplicationEndpoint.DefaultLogName))))
      val view = system.actorOf(Props(new OrderView(endpoint.id, endpoint.logs(ReplicationEndpoint.DefaultLogName))))
      val driver = system.actorOf(Props(new OrderExample(manager, view)).withDispatcher("eventuate.cli-dispatcher"))
  }
}

Source File: MavenCoordinatesListReader.scala From exodus with MIT License

5 votes

package com.wixpress.build.maven

import java.nio.file.{Files, Path}

import scala.io.Source

object MavenCoordinatesListReader {
  def coordinatesIn(filePath:Path):Set[Coordinates] = {
    val lines = Source.fromInputStream(Files.newInputStream(filePath)).getLines().toSet
    coordinatesInText(lines)
  }

  def coordinatesInText(content: Set[String]):Set[Coordinates] = {
    content
      .map(_.trim)
      .filterNot(_.isEmpty)
      .filterNot(_.startsWith("#"))
      .map(l=>Coordinates.deserialize(l))
  }
}

Source File: MigratorApplication.scala From exodus with MIT License

5 votes

package com.wix.bazel.migrator.app

import scala.io.Source


object MigratorApplication extends MigratorApp {
  migrate()

  def migrate(): Unit = {
    printHeader()
    new PublicMigrator(configuration).migrate()
  }

  private def printHeader(): Unit = {
    println(Source.fromInputStream(MigratorApplication.getClass.getResourceAsStream("/banner.txt")).mkString)
    println(s"starting migration with configuration [$configuration]")
  }
}

Source File: gihyo_6_3_TwitterStreamSuite.scala From gihyo-spark-book-example with Apache License 2.0

5 votes

package jp.gihyo.spark.ch06

import java.nio.file.Files

import scala.collection.mutable
import scala.io.Source

import twitter4j.{Status, TwitterObjectFactory}

import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.StreamingContextWrapper

import jp.gihyo.spark.{SparkFunSuite, TestStreamingContext}


class gihyo_6_3_TwitterStreamSuite extends SparkFunSuite with TestStreamingContext {

  test("run") {
    val lines = mutable.Queue[RDD[Status]]()
    val ds = ssc.queueStream(lines)
    val clock = new StreamingContextWrapper(ssc).manualClock
    gihyo_6_3_TwitterStream.run(
      sc,
      ds,
      Files.createTempDirectory("TwitterTag").toString,
      Files.createTempDirectory("TwitterWords").toString)
    val checkpointDir = Files.createTempDirectory("StreamingUnitTest").toString
    ssc.checkpoint(checkpointDir)
    ssc.start()

    (1 to 2).foreach { case i =>
      // test data
      lines += sc.makeRDD(Seq(
        MockTweetGenerator.createMockStatusFromJson(),
        MockTweetGenerator.createMockStatusFromJson(),
        MockTweetGenerator.createMockStatusFromJson(),
        MockTweetGenerator.createMockStatusFromJson()))
      clock.advance(1000)
      Thread.sleep(1000)
    }
  }
}

object MockTweetGenerator {
  // Creates a tweet status from a JSON file
  def createMockStatusFromJson(): Status = {
    val jsonFile = getClass.getResource("/streaming/test-tweet.json").getPath
    TwitterObjectFactory.createStatus(Source.fromFile(jsonFile).getLines().mkString)
  }
}

Source File: Utils.scala From Scala-Machine-Learning-Projects with MIT License

5 votes

package com.packt.ScalaML.HAR

import scala.io.Source

object Utils {
  // Useful Constants: those are separate normalised input features for the neural network
  private val INPUT_SIGNAL_TYPES = Array(
    "body_acc_x_",
    "body_acc_y_",
    "body_acc_z_",
    "body_gyro_x_",
    "body_gyro_y_",
    "body_gyro_z_",
    "total_acc_x_",
    "total_acc_y_",
    "total_acc_z_"
  )

  // Output classes: used to learn how to classify
  private val LABELS = Array(
    "WALKING", 
    "WALKING_UPSTAIRS", 
    "WALKING_DOWNSTAIRS", 
    "SITTING", 
    "STANDING", 
    "LAYING"
  )

  def loadData(dataPath: String, name: String): Array[Array[Array[Float]]] = {
    val dataSignalsPaths = INPUT_SIGNAL_TYPES.map( signal => s"$dataPath/${signal}${name}.txt" )
    val signals = dataSignalsPaths.map { path => 
      Source.fromFile(path).mkString.split("\n").map { line => 
        line.replaceAll("  ", " ").trim().split(" ").map(_.toFloat) }
    }

    val inputDim = signals.length
    val numSamples = signals(0).length
    val timeStep = signals(0)(0).length  

    (0 until numSamples).map { n => 
      (0 until timeStep).map { t =>
        (0 until inputDim).map( i => signals(i)(n)(t) ).toArray
      }.toArray
    }.toArray
  }

  def loadLabels(labelPath: String): Array[Float] = {
    Source.fromFile(labelPath).mkString.split("\n").map(_.toFloat - 1)
  }
}

Source File: CSVImageMetadataReader.scala From Scala-Machine-Learning-Projects with MIT License

5 votes

package Yelp.Preprocessor

import scala.io.Source

object CSVImageMetadataReader {  
      
  def readBusinessToImageLabels(csv: String, rows: List[Int] = List(-1)): Map[Int, String]  = {
    val reader = readMetadata(csv)
    reader.drop(1)
       .map(x => x match {
         case x :: Nil => (x(0).toInt, "-1")
          case _ => (x(0).toInt, x(1).split(" ").head)
       }).toMap
  }  
}

Source File: GrpcSyncServerSimulation.scala From grpc-gatling with MIT License

5 votes

package ch.tamedia.gatling.tests

import ch.tamedia.gatling.GrpcCustomCheck
import ch.tamedia.gatling.actions.impl.{GrpcAsyncCallAction, GrpcSyncCallAction}
import ch.tamedia.noname.server.grpc.endpoint.log.LogResponse
import com.trueaccord.scalapb.GeneratedMessage
import io.gatling.core.Predef._
import scala.concurrent.duration._

import scala.io.Source

class GrpcSyncServerSimulation extends Simulation {
  import ch.tamedia.gatling.Predef._

  val host = "localhost"
  val port = 50051


  val json: String = Source.fromFile("src/test/resources/sample_request.json").getLines.mkString

  val grpcConfig = GRPC()

  val grpcScenario = scenario("Test GRPC server")
      .exec(grpcCall(GrpcAsyncCallAction("async", host, port, json)).check(new GrpcCustomCheck((s: GeneratedMessage) => {
        s.asInstanceOf[LogResponse].message.equals("OK")
      })))
      .exec(grpcCall(GrpcSyncCallAction("sync", host, port, json)).check(new GrpcCustomCheck((s: GeneratedMessage) => {
        s.asInstanceOf[LogResponse].message.equals("OK")
      })))

  setUp(
    grpcScenario.inject(
       atOnceUsers(10),
       rampUsers(10) over(5 seconds),
       constantUsersPerSec(20) during(15 seconds),
       heavisideUsers(1000) over(20 seconds))
  ).protocols(grpcConfig)

}

Source File: TestUtils.scala From keystone with Apache License 2.0

5 votes

package keystoneml.utils

import java.io.{FileReader, ByteArrayInputStream}
import breeze.linalg.DenseMatrix
import breeze.stats.distributions.{Gaussian, RandBasis, ThreadLocalRandomGenerator, Rand}
import edu.berkeley.cs.amplab.mlmatrix.RowPartitionedMatrix
import org.apache.commons.io.IOUtils
import org.apache.commons.math3.random.MersenneTwister
import org.apache.spark.SparkContext

import scala.io.Source
import scala.util.Random


  def genChannelMajorArrayVectorizedImage(x: Int, y: Int, z: Int): ChannelMajorArrayVectorizedImage = {
    ChannelMajorArrayVectorizedImage(genData(x, y, z), ImageMetadata(x,y,z))
  }

  def genRowColumnMajorByteArrayVectorizedImage(x: Int, y: Int, z: Int): RowColumnMajorByteArrayVectorizedImage = {
    RowColumnMajorByteArrayVectorizedImage(genData(x,y,z).map(_.toByte), ImageMetadata(x,y,z))
  }

  def createRandomMatrix(
      sc: SparkContext,
      numRows: Int,
      numCols: Int,
      numParts: Int,
      seed: Int = 42): RowPartitionedMatrix = {

    val rowsPerPart = numRows / numParts
    val matrixParts = sc.parallelize(1 to numParts, numParts).mapPartitionsWithIndex { (index, part) =>
      val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed+index)))
      Iterator(DenseMatrix.rand(rowsPerPart, numCols, Gaussian(0.0, 1.0)(randBasis)))
    }
    RowPartitionedMatrix.fromMatrix(matrixParts.cache())
  }

  def createLocalRandomMatrix(numRows: Int, numCols: Int, seed: Int = 42): DenseMatrix[Double] = {
    val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed)))
    DenseMatrix.rand(numRows, numCols, Gaussian(0.0, 1.0)(randBasis))
  }
}

Source File: GlobalSapSQLContext.scala From HANAVora-Extensions with Apache License 2.0

5 votes

package org.apache.spark.sql

import java.io.File

import com.sap.spark.util.TestUtils
import com.sap.spark.{GlobalSparkContext, WithSQLContext}
import org.apache.spark.SparkContext
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{BoundReference, Cast}
import org.apache.spark.unsafe.types._
import org.apache.spark.sql.types._
import org.scalatest.Suite

import scala.io.Source

trait GlobalSapSQLContext extends GlobalSparkContext with WithSQLContext {
  self: Suite =>

  override implicit def sqlContext: SQLContext = GlobalSapSQLContext._sqlc

  override protected def setUpSQLContext(): Unit =
    GlobalSapSQLContext.init(sc)

  override protected def tearDownSQLContext(): Unit =
    GlobalSapSQLContext.reset()

  def getDataFrameFromSourceFile(sparkSchema: StructType, path: File): DataFrame = {
    val conversions = sparkSchema.toSeq.zipWithIndex.map({
      case (field, index) =>
        Cast(BoundReference(index, StringType, nullable = true), field.dataType)
    })
    val data = Source.fromFile(path)
      .getLines()
      .map({ line =>
      val stringRow = InternalRow.fromSeq(line.split(",", -1).map(UTF8String.fromString))
      Row.fromSeq(conversions.map({ c => c.eval(stringRow) }))
    })
    val rdd = sc.parallelize(data.toSeq, numberOfSparkWorkers)
    sqlContext.createDataFrame(rdd, sparkSchema)
  }
}

object GlobalSapSQLContext {

  private var _sqlc: SQLContext = _

  private def init(sc: SparkContext): Unit =
    if (_sqlc == null) {
      _sqlc = TestUtils.newSQLContext(sc)
    }

  private def reset(): Unit = {
    if (_sqlc != null) {
      _sqlc.catalog.unregisterAllTables()
    }
  }

}

Source File: TestUtils.scala From HANAVora-Extensions with Apache License 2.0

5 votes

package com.sap.spark.util

import java.util.Locale

import scala.io.Source
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.{Row, SQLContext, SapSQLContext}
import org.apache.spark.sql.hive.SapHiveContext
import org.apache.spark.sql.sources.sql.SqlLikeRelation
import org.apache.spark.sql.sources.{BaseRelation, CatalystSource, Table}
import org.apache.spark.sql.types.StructType
import org.mockito.Matchers._
import org.mockito.Mockito._

import scala.tools.nsc.io.Directory
import scala.util.{Failure, Success}


  def parsePTestFile(fileName: String): List[(String, String, String)] = {
    val filePath = getFileFromClassPath(fileName)
    val fileContents = Source.fromFile(filePath).getLines
      .map(p => p.stripMargin.trim)
      .filter(p => !p.isEmpty && !p.startsWith("//")) // filter empty rows and comments
      .mkString("\n")
    val p = new PTestFileParser

    // strip semicolons from query and parsed
    p(fileContents) match {
      case Success(lines) =>
        lines.map {
          case (query, parsed, expect) =>
            (stripSemicolon(query).trim, stripSemicolon(parsed).trim, expect.trim)
        }
      case Failure(ex) => throw ex
    }
  }

  private def stripSemicolon(sql: String): String =
    if (sql.endsWith(";")) {
      sql.substring(0, sql.length-1)
    } else {
      sql
    }

  def withTempDirectory[A](f: Directory => A): A = {
    val dir = Directory.makeTemp()
    try {
      f(dir)
    } finally {
      dir.deleteIfExists()
    }
  }
}

Source File: VirtualScreeningTest.scala From MaRe with Apache License 2.0

5 votes

package se.uu.it.mare

import java.io.File
import java.util.UUID

import scala.io.Source
import scala.util.Properties

import org.apache.spark.SharedSparkContext
import org.junit.runner.RunWith
import org.scalatest.FunSuite
import org.scalatest.junit.JUnitRunner

private object SDFUtils {
  def parseIDsAndScores(sdf: String): Array[(String, String)] = {
    sdf.split("\\n\\$\\$\\$\\$\\n").map { mol =>
      val lines = mol.split("\\n")
      (lines(0), lines.last)
    }
  }
}

@RunWith(classOf[JUnitRunner])
class VirtualScreeningTest extends FunSuite with SharedSparkContext {

  private val tmpDir = new File(Properties.envOrElse("TMPDIR", "/tmp"))

  test("Virtual Screening") {

    sc.hadoopConfiguration.set("textinputformat.record.delimiter", "\n$$$$\n")
    val mols = sc.textFile(getClass.getResource("sdf/molecules.sdf").getPath)

    // Parallel execution with MaRe
    val hitsParallel = new MaRe(mols)
      .map(
        inputMountPoint = TextFile("/input.sdf", "\n$$$$\n"),
        outputMountPoint = TextFile("/output.sdf", "\n$$$$\n"),
        imageName = "mcapuccini/oe:latest",
        command = "fred -receptor /var/openeye/hiv1_protease.oeb " +
          "-hitlist_size 0 " +
          "-conftest none " +
          "-dock_resolution Low " +
          "-dbase /input.sdf " +
          "-docked_molecule_file /output.sdf")
      .reduce(
        inputMountPoint = TextFile("/input.sdf", "\n$$$$\n"),
        outputMountPoint = TextFile("/output.sdf", "\n$$$$\n"),
        imageName = "mcapuccini/sdsorter:latest",
        command = "sdsorter -reversesort='FRED Chemgauss4 score' " +
          "-keep-tag='FRED Chemgauss4 score' " +
          "-nbest=30 " +
          "/input.sdf " +
          "/output.sdf")
      .rdd.collect.mkString("\n$$$$\n")

    // Serial execution
    val inputFile = new File(getClass.getResource("sdf/molecules.sdf").getPath)
    val dockedFile = new File(tmpDir, "mare_test_" + UUID.randomUUID.toString)
    dockedFile.createNewFile
    dockedFile.deleteOnExit
    val outputFile = new File(tmpDir, "mare_test_" + UUID.randomUUID.toString)
    outputFile.createNewFile
    outputFile.deleteOnExit
    DockerHelper.run(
      imageName = "mcapuccini/oe:latest",
      command = "fred -receptor /var/openeye/hiv1_protease.oeb " +
        "-hitlist_size 0 " +
        "-conftest none " +
        "-dock_resolution Low " +
        "-dbase /input.sdf " +
        "-docked_molecule_file /docked.sdf",
      bindFiles = Seq(inputFile, dockedFile),
      volumeFiles = Seq(new File("/input.sdf"), new File("/docked.sdf")),
      forcePull = false)
    DockerHelper.run(
      imageName = "mcapuccini/sdsorter:latest",
      command = "sdsorter -reversesort='FRED Chemgauss4 score' " +
        "-keep-tag='FRED Chemgauss4 score' " +
        "-nbest=30 " +
        "/docked.sdf " +
        "/output.sdf",
      bindFiles = Seq(dockedFile, outputFile),
      volumeFiles = Seq(new File("/docked.sdf"), new File("/output.sdf")),
      forcePull = false)
    val hitsSerial = Source.fromFile(outputFile).mkString

    // Test
    val parallel = SDFUtils.parseIDsAndScores(hitsParallel)
    val serial = SDFUtils.parseIDsAndScores(hitsSerial)
    assert(parallel.deep == serial.deep)

  }

}

Source File: Main.scala From time-series-demo with Apache License 2.0

5 votes

package mesosphere.crimedemo

import java.io.BufferedInputStream
import java.net.URI
import java.util.zip.GZIPInputStream

import org.tukaani.xz.XZInputStream

import scala.io.Source

object Main {

  lazy val log = org.slf4j.LoggerFactory.getLogger(getClass.getName)

  def main(args: Array[String]): Unit = {
    val conf = new Conf(args)
    val publisher = new KafkaPublisher(conf.brokers())
    val topic = conf.topic()
    val sleep = 1000L / conf.eventsPerSecond()
    val uri = new URI(conf.uri())
    val inputStream = new BufferedInputStream(uri.toURL.openStream())

    val wrappedStream = if (conf.uri().endsWith(".gz")) {
      new GZIPInputStream(inputStream)
    }
    else if (conf.uri().endsWith(".xz")) {
      new XZInputStream(inputStream)
    }
    else {
      inputStream
    }
    val source = Source.fromInputStream(wrappedStream)

    var done = 0

    log.info(s"Reading crime from ${conf.uri()} and publishing to ${conf.brokers()} every ${sleep}ms")

    source.getLines().foreach(line => {
      publisher.publishKafka(topic, line.getBytes)
      done += 1

      if (done % 1000 == 0) {
        log.info(s"$done lines done")
      }

      Thread.sleep(sleep)
    })

    log.info(s"$done lines done")
  }
}

Source File: ExtractApplicationProperties.scala From rug with GNU General Public License v3.0

5 votes

package com.atomist.rug.kind.java

import com.atomist.tree.content.project.{ConfigValue, Configuration, SimpleConfigValue, SimpleConfiguration}
import com.atomist.source.FileArtifact
import org.apache.commons.lang3.StringUtils

import scala.collection.mutable.ListBuffer
import scala.io.Source


class ExtractApplicationProperties(source: String) extends Function[FileArtifact, Configuration] {

  override def apply(f: FileArtifact): Configuration = {
    val isWhiteSpace: String => Boolean = line => StringUtils.isWhitespace(line)
    val isComment: String => Boolean = line => !isWhiteSpace(line) && line.dropWhile(c => c.isWhitespace).startsWith("#")
    val isContent: String => Boolean = line => !(isWhiteSpace(line) || isComment(line))

    trait State
    object InComment extends State
    object InBlanks extends State

    var state: State = InComment
    var comment = ""
    val configValues = new ListBuffer[ConfigValue]()

    // Strip # and whitespace from comments (respecting multiline comments)
    def extractComment(comment: String): String = {

      def toCommentContentLine(l: String) = {
        val r = l.dropWhile(c => c.isWhitespace || '#'.equals(c))
        r
      }

      val r = comment.lines.map(l => toCommentContentLine(l)).mkString("\n")
      r
    }

    // Return None if not a valid property line
    def parseContentLine(line: String): Option[ConfigValue] = {
      val stripped = line.dropWhile(c => c.isWhitespace)
      val idx = stripped.indexOf("=")
      if (idx == -1) {
        None
      }
      else {
        val (key, value) = stripped.splitAt(idx)
        val profile = ""
        Some(SimpleConfigValue(key, value.substring(1), source, profile, description = extractComment(comment)))
      }
    }

    def appendToComment(l: String): Unit = {
      if ("".equals(comment)) comment = l
      else comment = comment + "\n" + l
    }

    val lines = Source.fromString(f.content).getLines()
    for (line <- lines) {
      if (isContent(line)) {
        parseContentLine(line).foreach(cv => configValues.append(cv))
        comment = ""
      }
      else state match {
        case InBlanks if isComment(line) =>
          state = InComment
          appendToComment(line)
        case InComment if isComment(line) || isWhiteSpace(line) =>
          appendToComment(line)
        case InComment =>
          comment = ""
          state = InBlanks
        case _ =>
      }
    }
    new SimpleConfiguration(configValues)
  }
}

Source File: OpenApiGenerationTest.scala From udash-core with Apache License 2.0

5 votes

package io.udash
package rest.openapi

import com.avsystem.commons.serialization.json.JsonStringOutput
import io.udash.rest.RestTestApi

import scala.io.Source
import org.scalatest.funsuite.AnyFunSuite

class OpenApiGenerationTest extends AnyFunSuite {
  test("openapi for RestTestApi") {
    val openapi = RestTestApi.openapiMetadata.openapi(
      Info("Test API", "0.1", description = "Some test REST API"),
      servers = List(Server("http://localhost"))
    )
    val expected = Source.fromInputStream(getClass.getResourceAsStream("/RestTestApi.json")).getLines().mkString("\n")
    assert(JsonStringOutput.writePretty(openapi) == expected)
  }
}

Source File: CSVFile.scala From random-projections-at-berlinbuzzwords with Apache License 2.0

5 votes

package com.stefansavev.randomprojections.file

import scala.io.Source
import scala.util.matching.Regex

case class CSVFileOptions(sep: String = ",",
                          hasHeader: Boolean = true,
                          quote: Option[String] = None,
                          onlyTopRecords: Option[Int] = None)

class CSVFile private (_header: Option[Array[String]], source: Source, iter: Iterator[String],  opt: CSVFileOptions) {
  def numColumns: Option[Int] = _header.map(_.length)
  def header: Option[Array[String]] = _header

  def processLine(line: String): Array[String] = {
    CSVFile.processLine(opt, line) //TODO: verify num columns
  }

  def getLines(): Iterator[Array[String]] = iter.map(line => processLine(line))

  def close() = source.close()
}

object CSVFile {
  def processLine(opt: CSVFileOptions, line: String): Array[String] = {
    line.split(Regex.quote(opt.sep))
  }

  def read(fileName: String, opt: CSVFileOptions): CSVFile = {
    val source = Source.fromFile(fileName)
    val linesIterator = source.getLines()
    val iterator = opt.onlyTopRecords match {
      case None => linesIterator
      case Some(n) => linesIterator.take(n + 1) //1 is for the header
    }
    val header = if (opt.hasHeader) Some(processLine(opt, iterator.next())) else None
    new CSVFile(header, source, iterator, opt)
  }
}

Source File: PrintUtils.scala From random-projections-at-berlinbuzzwords with Apache License 2.0

5 votes

package com.stefansavev

import java.io.PrintWriter

import scala.io.{Codec, Source}

object PrintUtils {
  def columnVectorToFile(fileName: String, v: Array[Double]): Unit = {
    val writer = new PrintWriter(fileName)
    for (a <- v) {
      writer.println(a.toString)
    }
    writer.close()
  }

  def stringsToFile(fileName: String, v: Array[String]): Unit = {
    val writer = new PrintWriter(fileName)
    for (a <- v) {
      writer.println(a)
    }
    writer.close()
  }

  def withPrintWriter(fileName: String, body: PrintWriter => Unit): Unit = {
    val writer = new PrintWriter(fileName, "UTF-8")
    body(writer)
    writer.close()
  }

}

object FileReadUtils {
  def withLinesIterator[T](fileName: String)(body: Iterator[String] => T): T = {
    val source = Source.fromFile(fileName)(Codec.UTF8)
    val result = body(source.getLines())
    source.close()
    result
  }
}

Source File: ViewHelpers.scala From dr-cla with BSD 3-Clause "New" or "Revised" License

5 votes

package helpers

import java.net.URL

import javax.inject.Inject
import play.api.{Configuration, Environment}

import scala.io.Source
import scala.util.Try

class ViewHelpers @Inject()
(configuration: Configuration, environment: Environment) {
  val organizationName = configuration.get[String]("app.organization.name")
  val maybeOrganizationLogoUrl = configuration.getOptional[String]("app.organization.logo-url")
  val maybeOrganizationUrl = configuration.getOptional[String]("app.organization.url")
  val maybeOrganizationClaUrl = configuration.getOptional[String]("app.organization.cla-url")

  val claText: String = {
    maybeOrganizationClaUrl
      .flatMap(claUrl => Try(new URL(claUrl)).toOption)
      .orElse(environment.resource("sample-cla.html"))
      .map { claUrl =>
        val text = Source.fromURL(claUrl)
        text.mkString
      } getOrElse {
        throw new Exception("You must set the ORG_CLA environment variable.")
      }
  }
}

Source File: DatabaseScriptTestUtils.scala From openwhisk with Apache License 2.0

5 votes

package org.apache.openwhisk.core.database.test

import scala.concurrent.duration.DurationInt
import scala.io.Source
import org.scalatest.Matchers
import org.scalatest.concurrent.IntegrationPatience
import org.scalatest.concurrent.ScalaFutures
import akka.actor.ActorSystem
import common.WaitFor
import common.WhiskProperties
import pureconfig._
import pureconfig.generic.auto._
import spray.json._
import spray.json.DefaultJsonProtocol._
import org.apache.openwhisk.common.Logging
import org.apache.openwhisk.core.ConfigKeys
import org.apache.openwhisk.core.WhiskConfig
import org.apache.openwhisk.core.database.CouchDbRestClient
import org.apache.openwhisk.core.database.CouchDbConfig

trait DatabaseScriptTestUtils extends ScalaFutures with Matchers with WaitFor with IntegrationPatience {

  case class DatabaseUrl(dbProtocol: String, dbUsername: String, dbPassword: String, dbHost: String, dbPort: String) {
    def url = s"$dbProtocol://$dbUsername:$dbPassword@$dbHost:$dbPort"

    def safeUrl = s"$dbProtocol://$dbHost:$dbPort"
  }

  val python = WhiskProperties.python
  val config = loadConfigOrThrow[CouchDbConfig](ConfigKeys.couchdb)
  val dbProtocol = config.protocol
  val dbHost = config.host
  val dbPort = config.port
  val dbUsername = config.username
  val dbPassword = config.password
  val dbPrefix = WhiskProperties.getProperty(WhiskConfig.dbPrefix)
  val dbUrl = DatabaseUrl(dbProtocol, dbUsername, dbPassword, dbHost, dbPort.toString)

  def retry[T](task: => T) = org.apache.openwhisk.utils.retry(task, 10, Some(500.milliseconds))

  
  def waitForView(db: CouchDbRestClient, designDoc: String, viewName: String, numDocuments: Int) = {
    waitfor(() => {
      val view = db.executeView(designDoc, viewName)().futureValue
      view shouldBe 'right
      view.right.get.fields("rows").convertTo[List[JsObject]].length == numDocuments
    }, totalWait = 2.minutes)
  }
}

Source File: DockerClientWithFileAccess.scala From openwhisk with Apache License 2.0

5 votes

package org.apache.openwhisk.core.containerpool.docker

import java.io.File
import java.nio.file.Paths

import akka.actor.ActorSystem
import akka.stream.alpakka.file.scaladsl.FileTailSource
import akka.stream.scaladsl.{FileIO, Source => AkkaSource}
import akka.util.ByteString

import scala.concurrent.ExecutionContext
import scala.concurrent.Future
import scala.concurrent.blocking
import spray.json.DefaultJsonProtocol._
import spray.json._
import org.apache.openwhisk.common.Logging
import org.apache.openwhisk.common.TransactionId
import org.apache.openwhisk.core.containerpool.ContainerId
import org.apache.openwhisk.core.containerpool.ContainerAddress

import scala.io.Source
import scala.concurrent.duration.FiniteDuration

class DockerClientWithFileAccess(dockerHost: Option[String] = None,
                                 containersDirectory: File = Paths.get("containers").toFile)(
  executionContext: ExecutionContext)(implicit log: Logging, as: ActorSystem)
    extends DockerClient(dockerHost)(executionContext)
    with DockerApiWithFileAccess {

  implicit private val ec = executionContext

  
  def rawContainerLogs(containerId: ContainerId,
                       fromPos: Long,
                       pollInterval: Option[FiniteDuration]): AkkaSource[ByteString, Any]
}

Source File: EnronEmail.scala From Mastering-Scala-Machine-Learning with MIT License

5 votes

package org.akozlov.chapter07

import scala.io.Source

import scala.util.hashing.{MurmurHash3 => Hash}
import scala.util.matching.Regex

import java.util.{Date => javaDateTime}

import java.io.File
import net.liftweb.json._
import Extraction._
import Serialization.{read, write}


object EnronEmail {

  val emailRe = """[a-zA-Z0-9_.+\-][email protected]""".r.unanchored

  def emails(s: String) = {
    for (email <- emailRe findAllIn s) yield email
  }

  def hash(s: String) = {
    java.lang.Integer.MAX_VALUE.toLong + Hash.stringHash(s)
  }

  val messageRe =
    """(?:Message-ID:\s+)(<[A-Za-z0-9_.+\-@]+>)(?s)(?:.*?)(?m)
      |(?:Date:\s+)(.*?)$(?:.*?)
      |(?:From:\s+)([a-zA-Z0-9_.+\-][email protected])(?:.*?)
      |(?:Subject: )(.*?)$""".stripMargin.r.unanchored

  case class Relation(from: String, fromId: Long, to: String, toId: Long, source: String, messageId: String, date: javaDateTime, subject: String)

  implicit val formats = Serialization.formats(NoTypeHints)

  def getFileTree(f: File): Stream[File] =
    f #:: (if (f.isDirectory) f.listFiles().toStream.flatMap(getFileTree) else Stream.empty)

  def main(args: Array[String]) {
    getFileTree(new File(args(0))).par.map {
      file => {
        "\\.$".r findFirstIn file.getName match {
          case Some(x) =>
            try {
              val src = Source.fromFile(file, "us-ascii")
              val message = try src.mkString finally src.close()
              message match {
                case messageRe(messageId, date, from , subject) =>
                  val fromLower = from.toLowerCase
                  for (to <- emails(message).filter(_ != fromLower).toList.distinct)
                    println(write(Relation(fromLower, hash(fromLower), to, hash(to), file.toString, messageId, new javaDateTime(date), subject)))
                case _ =>
              }
            } catch {
              case e: Exception => System.err.println(e)
            }
          case _ =>
        }
      }
    }
  }
}

Source File: HTMLReportGenerator.scala From regressr with Apache License 2.0

5 votes

package org.ebayopensource.regression.internal.reportGenerator

import java.io.{BufferedWriter, File, FileWriter}

import org.fusesource.scalate.{TemplateEngine, TemplateSource}

import scala.io.Source
import scala.util.{Failure, Success, Try}


class HTMLReportGenerator extends ReportGenerator {

  val scalateEngine = new TemplateEngine

  def getContent(reportEntries: Seq[ReportEntry]) : Try[String] = Try {

    if (reportEntries.size==0) {
      throw new IllegalArgumentException("Cannot generate report with 0 reportEntries.")
    }

    val templateText = Source.fromInputStream(getClass.getResourceAsStream("/report/index.html")).mkString
    scalateEngine.escapeMarkup = false

    val regressionCount :Seq[Int] = reportEntries.flatMap {
      reportEntry => {
        reportEntry.requestReportEntries.map {
          requestReportEntry => {
            requestReportEntry.reqMessages.size
          }
        }
      }
    }

    val renderedContent = scalateEngine.layout(TemplateSource.fromText("/com/ebay/n/regression/text.ssp", templateText),
      Map("reportEntries" -> reportEntries, "regressionCount" -> regressionCount.sum))
    renderedContent
  }

  def writeAndGetFile(content: String, reportFilePath: String) : Try[File] = Try {
    val outputFile = new File(reportFilePath)
    val bw = new BufferedWriter(new FileWriter(outputFile))
    bw.write(content)
    bw.close()
    outputFile
  }

  override def generate(reportEntries: Seq[ReportEntry], reportFilePath: String): Try[File] = Try {
    getContent(reportEntries).flatMap {
      content => writeAndGetFile(content, reportFilePath)
    } match {
      case Success(file) => file
      case Failure(t) => throw t
    }
  }
}

Source File: FileDataStore.scala From regressr with Apache License 2.0

5 votes

package org.ebayopensource.regression.internal.datastore

import java.io.{File, PrintWriter}

import scala.io.Source
import scala.util.Try


class FileDataStore(path: String) extends BaseDataStore {

  new File(path).mkdir()

  override def put(key: String, value: String): Unit = {
    val pw = new PrintWriter(new File(buildFilePath(key)))
    pw.write(value)
    pw.flush()
    pw.close()
  }

  private def buildFilePath(key: String) = {
    if (key.endsWith(".strategy")) s"${path}${key}" else s"${path}${key}.json"
  }

  override def get(key: String): Option[String] = {
    val file = new File(buildFilePath(key))
    if (!file.exists()) {
      None
    }
    else {
      Some(Source.fromFile(file).mkString)
    }
  }

  override def close(): Unit = ??? // This is a no op.

  override def listStrategies(): Seq[String] = {
    new File(path).listFiles().filter {
      file => file.getName.endsWith(".strategy")
    }.map(file => file.getName.replaceFirst(s"${BaseDataStore.strategyPrefix}", "").replaceFirst(".strategy", ""))
  }

  override def remove(key: String): Unit = {
    new File(s"${path}${key}").delete()
  }

  override def deleteRecordingFiles(testIdentifier: String): Try[Unit] = Try {
    new File(path).listFiles().filter {
      file => file.getName.startsWith(s"${BaseDataStore.strategyPrefix}${testIdentifier}.")
    }.map(file => file.delete())
  }
}

object FileDataStore {
  val PATH="./tmp/"
}

Source File: YAMLTestStrategyReaderTest.scala From regressr with Apache License 2.0

5 votes

package org.ebayopensource.regression.internal.reader

import org.ebayopensource.regression.UnitSpec

import scala.io.Source
import scala.util.{Failure, Success}


class YAMLTestStrategyReaderTest extends UnitSpec {

  "A reader" should "be able to read a valid strategy file with 1 request" in {
    val strategyContent = Source.fromInputStream(getClass.getResourceAsStream("/yaml/valid_strategy_simple_one_request.yaml")).mkString.replace("\t","")
    YAMLTestStrategyReader.read(strategyContent) match {
      case Success(t) => {
        assert(t.requests.size == 1)
        assert(t.headers.size>0)
      }
      case Failure(t) => assert(false, s"Strategy file was valid. Should not throw exception ${t.getMessage}")
    }
  }

}

Source File: SwaggerAPI.scala From swagger-check with MIT License

5 votes

package de.leanovate.swaggercheck.schema

import java.io.InputStream

import com.fasterxml.jackson.annotation.{JsonCreator, JsonProperty}
import com.fasterxml.jackson.core.JsonFactory
import com.fasterxml.jackson.databind.annotation.JsonDeserialize
import com.fasterxml.jackson.databind.{DeserializationFeature, JsonNode, MappingJsonFactory, ObjectMapper}
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import de.leanovate.swaggercheck.schema.jackson.JsonSchemaModule
import de.leanovate.swaggercheck.schema.model.{Definition, Parameter}

import scala.collection.JavaConverters._
import scala.io.Source

@JsonDeserialize(builder = classOf[SwaggerAPIBuilder])
case class SwaggerAPI(
                       basePath: Option[String],
                       paths: Map[String, Map[String, Operation]],
                       definitions: Map[String, Definition]
                     )

object SwaggerAPI {
  val jsonMapper = objectMapper(new MappingJsonFactory())
  val yamlMapper = objectMapper(new YAMLFactory())

  def parse(jsonOrYaml: String): SwaggerAPI = {
    val mapper = if (jsonOrYaml.trim().startsWith("{")) jsonMapper else yamlMapper
    mapper.readValue(jsonOrYaml, classOf[SwaggerAPI])
  }

  def parse(swaggerInput: InputStream): SwaggerAPI = {
    parse(Source.fromInputStream(swaggerInput).mkString)
  }

  def objectMapper(jsonFactory: JsonFactory): ObjectMapper = {
    val mapper = new ObjectMapper(jsonFactory)
    mapper.registerModule(DefaultScalaModule)
    mapper.registerModule(JsonSchemaModule)
    mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES)
    mapper
  }
}

class SwaggerAPIBuilder @JsonCreator()(
                                        @JsonProperty("basePath") basePath: Option[String],
                                        @JsonProperty("consumes") consumes: Option[Seq[String]],
                                        @JsonProperty("produces") produces: Option[Seq[String]],
                                        @JsonProperty("paths") paths: Option[Map[String, JsonNode]],
                                        @JsonProperty("definitions") definitions: Option[Map[String, Definition]],
                                        @JsonProperty("parameters") globalParameters: Option[Map[String, Parameter]]
                                      ) {
  def build(): SwaggerAPI = {
    val defaultConsumes = consumes.map(_.toSet).getOrElse(Set.empty)
    val defaultProduces = produces.map(_.toSet).getOrElse(Set.empty)
    SwaggerAPI(basePath,
      paths.getOrElse(Map.empty).map {
        case (path, pathDefinition) =>
          val defaultParameters = Option(pathDefinition.get("parameters")).map {
             node =>
               node.iterator().asScala.map {
                 element => SwaggerAPI.jsonMapper.treeToValue(element, classOf[OperationParameter])
               }.toSeq
          }.getOrElse(Seq.empty)

          basePath.map(_ + path).getOrElse(path) -> pathDefinition.fields().asScala.filter(_.getKey != "parameters").map {
            entry =>
              val operation = SwaggerAPI.jsonMapper.treeToValue(entry.getValue, classOf[Operation])
              entry.getKey.toUpperCase -> operation.withDefaults(defaultParameters, defaultConsumes, defaultProduces).resolveGlobalParameters(globalParameters.getOrElse(Map()))
          }.toMap
      },
      definitions.getOrElse(Map.empty))
  }
}

Source File: L8-10-11UDF.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import scala.io.Source
import scala.reflect.runtime.universe

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.json4s.jackson.JsonMethods.parse
import org.json4s.jvalue2extractable
import org.json4s.string2JsonInput

object CdrUDFApp {

  case class Cdr(squareId: Int, timeInterval: Long, countryCode: Int,
    smsInActivity: Float, smsOutActivity: Float, callInActivity: Float,
    callOutActivity: Float, internetTrafficActivity: Float)

  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println(
        "Usage: CdrUDFApp <appname> <batchInterval> <hostname> <port>")
      System.exit(1)
    }
    val Seq(appName, batchInterval, hostname, port) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(batchInterval.toInt))

    val sqlC = new SQLContext(ssc.sparkContext)
    import sqlC.implicits._

    def getCountryCodeMapping() = {
      implicit val formats = org.json4s.DefaultFormats
      parse(Source.fromURL("http://country.io/phone.json").mkString).extract[Map[String, String]].map(_.swap)
    }

    def getCountryNameMapping() = {
      implicit val formats = org.json4s.DefaultFormats
      parse(Source.fromURL("http://country.io/names.json").mkString).extract[Map[String, String]]
    }

    def getCountryName(mappingPhone: Map[String, String], mappingName: Map[String, String], code: Int) = {
      mappingName.getOrElse(mappingPhone.getOrElse(code.toString, "NotFound"), "NotFound")
    }

    val getCountryNamePartial = getCountryName(getCountryCodeMapping(), getCountryNameMapping(), _: Int)

    sqlC.udf.register("getCountryNamePartial", getCountryNamePartial)

    val cdrStream = ssc.socketTextStream(hostname, port.toInt)
      .map(_.split("\\t", -1))
      .foreachRDD(rdd => {
        val cdrs = seqToCdr(rdd).toDF()
        cdrs.registerTempTable("cdrs")

        sqlC.sql("SELECT getCountryNamePartial(countryCode) AS countryName, COUNT(countryCode) AS cCount FROM cdrs GROUP BY countryCode ORDER BY cCount DESC LIMIT 5").show()

      })

    ssc.start()
    ssc.awaitTermination()
  }

  def seqToCdr(rdd: RDD[Array[String]]): RDD[Cdr] = {
    rdd.map(c => c.map(f => f match {
      case x if x.isEmpty() => "0"
      case x => x
    })).map(c => Cdr(c(0).toInt, c(1).toLong, c(2).toInt, c(3).toFloat,
      c(4).toFloat, c(5).toFloat, c(6).toFloat, c(7).toFloat))
  }

}

Source File: L2-1FirstApp.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import scala.io.Source

import org.apache.spark.SparkConf
import org.apache.spark.SparkContext

object TranslateApp {
  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println(
        "Usage: TranslateApp <appname> <book_path> <output_path> <language>")
      System.exit(1)
    }
    val Seq(appName, bookPath, outputPath, lang) = args.toSeq

    val dict = getDictionary(lang)

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)
    val sc = new SparkContext(conf)
    val book = sc.textFile(bookPath)
    val translated = book.map(line => line.split("\\s+").map(word => dict.getOrElse(word, word)).mkString(" "))
    translated.saveAsTextFile(outputPath)
  }

  def getDictionary(lang: String): Map[String, String] = {
    if (!Set("German", "French", "Italian", "Spanish").contains(lang)) {
      System.err.println(
        "Unsupported language: %s".format(lang))
      System.exit(1)
    }
    val url = "http://www.june29.com/IDP/files/%s.txt".format(lang)
    println("Grabbing dictionary from: %s".format(url))
    Source.fromURL(url, "ISO-8859-1").mkString
      .split("\\r?\\n")
      .filter(line => !line.startsWith("#"))
      .map(line => line.split("\\t"))
      .map(tkns => (tkns(0).trim, tkns(1).trim)).toMap
  }

}

Source File: L3-1DStreams.scala From prosparkstreaming with Apache License 2.0

5 votes

package org.apress.prospark

import scala.io.Source
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.hadoop.io.Text

object StreamingTranslateApp {
  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println(
        "Usage: StreamingTranslateApp <appname> <book_path> <output_path> <language>")
      System.exit(1)
    }
    val Seq(appName, bookPath, outputPath, lang) = args.toSeq

    val dict = getDictionary(lang)

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)
    val ssc = new StreamingContext(conf, Seconds(1))

    val book = ssc.textFileStream(bookPath)
    val translated = book.map(line => line.split("\\s+").map(word => dict.getOrElse(word, word)).mkString(" "))
    translated.saveAsTextFiles(outputPath)

    ssc.start()
    ssc.awaitTermination()
  }

  def getDictionary(lang: String): Map[String, String] = {
    if (!Set("German", "French", "Italian", "Spanish").contains(lang)) {
      System.err.println(
        "Unsupported language: %s".format(lang))
      System.exit(1)
    }
    val url = "http://www.june29.com/IDP/files/%s.txt".format(lang)
    println("Grabbing dictionary from: %s".format(url))
    Source.fromURL(url, "ISO-8859-1").mkString
      .split("\\r?\\n")
      .filter(line => !line.startsWith("#"))
      .map(line => line.split("\\t"))
      .map(tkns => (tkns(0).trim, tkns(1).trim)).toMap
  }

}

Source File: OrderedDocFreq.scala From gemini with GNU General Public License v3.0

5 votes

package tech.sourced.gemini

import java.io.{File, PrintWriter}

import scala.io.Source

import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.fasterxml.jackson.module.scala.experimental.ScalaObjectMapper



case class OrderedDocFreq(docs: Int, tokens: IndexedSeq[String], df: collection.Map[String, Int]) {
  def saveToJson(filename: String): Unit = {
    val mapper = new ObjectMapper() with ScalaObjectMapper
    mapper.registerModule(DefaultScalaModule)
    val out = new PrintWriter(filename)
    mapper.writeValue(out, Map(
      "docs" -> docs,
      "tokens" -> tokens,
      "df" -> df
    ))
    out.close()
  }
}

object OrderedDocFreq {
  def fromJson(file: File): OrderedDocFreq = {
    val docFreqMap = parseFile[Map[String, Any]](file)
    val docs = docFreqMap.get("docs") match {
      case Some(v) => v.asInstanceOf[Int]
      case None => throw new RuntimeException(s"Can not parse key 'docs' in docFreq:${file.getAbsolutePath}")
    }
    val df = docFreqMap.get("df") match {
      case Some(v) => v.asInstanceOf[Map[String, Int]]
      case None => throw new RuntimeException(s"Can not parse key 'df' in docFreq:${file.getAbsolutePath}")
    }
    val tokens = docFreqMap.get("tokens") match {
      case Some(v) => v.asInstanceOf[List[String]].toArray
      case None => throw new RuntimeException(s"Can not parse key 'tokens' in docFreq:${file.getAbsolutePath}")
    }
    OrderedDocFreq(docs, tokens, df)
  }

  def parseFile[T: Manifest](file: File): T = {
    val json = Source.fromFile(file)
    val mapper = new ObjectMapper with ScalaObjectMapper
    mapper.registerModule(DefaultScalaModule)
    mapper.readValue[T](json.reader)
  }
}

Source File: WeightedMinHashSpec.scala From gemini with GNU General Public License v3.0

5 votes

package tech.sourced.gemini

import org.scalatest.{FlatSpec, Matchers}
import scala.io.Source
import org.scalatest.Tag

// Tag to set which tests depend on pyhton
object PythonDep extends Tag("tech.sourced.tags.PythonDep")

class WeightedMinHashSpec extends FlatSpec with Matchers {

  "WeightedMinHash constructor" should "initialize correctly" taggedAs(PythonDep) in {
    val mg = new WeightedMinHash(2, 4, 1)

    mg.rs.length should be(4)
    mg.lnCs.length should be(4)
    mg.betas.length should be(4)
    mg.sampleSize should be(4)
  }

  def readCSV(filename: String): Array[Array[Float]] = {
    Source
      .fromFile(s"src/test/resources/weighted-minhash/csv/${filename}")
      .getLines()
      .map(_.split(",").map(_.trim.toFloat))
      .toArray
  }

  "WeightedMinHash hash" should "hash tiny data" taggedAs(PythonDep) in {
    val input = readCSV("tiny-data.csv")

    val rs = readCSV("tiny-rs.csv")
    val lnCs = readCSV("tiny-ln_cs.csv")
    val betas = readCSV("tiny-betas.csv")

    input.zipWithIndex.foreach {
      case (v, i) =>
        val wmh = new WeightedMinHash(v.length, 128, rs, lnCs, betas)
        val hashes = wmh.hash(v)
        val realHashes = readCSV(s"tiny-hashes-${i}.csv").map(_.map(_.toLong))

        hashes should be(realHashes)
    }
  }

  "WeightedMinHash hash" should "hash big data" taggedAs(PythonDep) in {
    val input = readCSV("big-data.csv")

    val rs = readCSV("big-rs.csv")
    val lnCs = readCSV("big-ln_cs.csv")
    val betas = readCSV("big-betas.csv")

    input.zipWithIndex.foreach {
      case (v, i) =>
        val wmh = new WeightedMinHash(v.length, 128, rs, lnCs, betas)
        val hashes = wmh.hash(v)
        val realHashes = readCSV(s"big-hashes-${i}.csv").map(_.map(_.toLong))

        hashes should be(realHashes)
    }
  }
}

Source File: EventHubsSourceOffsetSuite.scala From azure-event-hubs-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.eventhubs

import java.io.File

import org.apache.spark.sql.execution.streaming._
import org.apache.spark.sql.streaming.OffsetSuite
import org.apache.spark.sql.test.SharedSQLContext

class EventHubsSourceOffsetSuite extends OffsetSuite with SharedSQLContext {

  compare(one = EventHubsSourceOffset(("t", 0, 1L)), two = EventHubsSourceOffset(("t", 0, 2L)))

  compare(one = EventHubsSourceOffset(("t", 0, 1L), ("t", 1, 0L)),
          two = EventHubsSourceOffset(("t", 0, 2L), ("t", 1, 1L)))

  compare(one = EventHubsSourceOffset(("t", 0, 1L), ("T", 0, 0L)),
          two = EventHubsSourceOffset(("t", 0, 2L), ("T", 0, 1L)))

  compare(one = EventHubsSourceOffset(("t", 0, 1L)),
          two = EventHubsSourceOffset(("t", 0, 2L), ("t", 1, 1L)))

  val ehso1 = EventHubsSourceOffset(("t", 0, 1L))
  val ehso2 = EventHubsSourceOffset(("t", 0, 2L), ("t", 1, 3L))
  val ehso3 = EventHubsSourceOffset(("t", 0, 2L), ("t", 1, 3L), ("t", 1, 4L))

  compare(EventHubsSourceOffset(SerializedOffset(ehso1.json)),
          EventHubsSourceOffset(SerializedOffset(ehso2.json)))

  test("basic serialization - deserialization") {
    assert(
      EventHubsSourceOffset.getPartitionSeqNos(ehso1) ==
        EventHubsSourceOffset.getPartitionSeqNos(SerializedOffset(ehso1.json)))
  }

  test("OffsetSeqLog serialization - deserialization") {
    withTempDir { temp =>
      // use non-existent directory to test whether log make the dir
      val dir = new File(temp, "dir")
      val metadataLog = new OffsetSeqLog(spark, dir.getAbsolutePath)
      val batch0 = OffsetSeq.fill(ehso1)
      val batch1 = OffsetSeq.fill(ehso2, ehso3)

      val batch0Serialized =
        OffsetSeq.fill(batch0.offsets.flatMap(_.map(o => SerializedOffset(o.json))): _*)

      val batch1Serialized =
        OffsetSeq.fill(batch1.offsets.flatMap(_.map(o => SerializedOffset(o.json))): _*)

      assert(metadataLog.add(0, batch0))
      assert(metadataLog.getLatest() === Some(0 -> batch0Serialized))
      assert(metadataLog.get(0) === Some(batch0Serialized))

      assert(metadataLog.add(1, batch1))
      assert(metadataLog.get(0) === Some(batch0Serialized))
      assert(metadataLog.get(1) === Some(batch1Serialized))
      assert(metadataLog.getLatest() === Some(1 -> batch1Serialized))
      assert(
        metadataLog.get(None, Some(1)) ===
          Array(0 -> batch0Serialized, 1 -> batch1Serialized))

      // Adding the same batch does nothing
      metadataLog.add(1, OffsetSeq.fill(LongOffset(3)))
      assert(metadataLog.get(0) === Some(batch0Serialized))
      assert(metadataLog.get(1) === Some(batch1Serialized))
      assert(metadataLog.getLatest() === Some(1 -> batch1Serialized))
      assert(
        metadataLog.get(None, Some(1)) ===
          Array(0 -> batch0Serialized, 1 -> batch1Serialized))
    }
  }

  test("read Spark 2.1.0 offset format") {
    val offset = readFromResource("eventhubs-source-offset-version-2.1.0.txt")
    assert(
      EventHubsSourceOffset(offset) ===
        EventHubsSourceOffset(("ehName1", 0, 456L), ("ehName1", 1, 789L), ("ehName2", 0, 0L)))
  }

  private def readFromResource(file: String): SerializedOffset = {
    import scala.io.Source
    val input = getClass.getResource(s"/$file").toURI
    val str = Source.fromFile(input).mkString
    SerializedOffset(str)
  }
}

Source File: SensorReader.scala From temperature-machine with Apache License 2.0

5 votes

package bad.robot.temperature.ds18b20

import bad.robot.temperature.AutoClosing._
import bad.robot.temperature._
import bad.robot.temperature.ds18b20.SensorFile._
import bad.robot.temperature.ds18b20.SensorReader._
import bad.robot.temperature.rrd.Host
import bad.robot.temperature.rrd.Seconds.{apply => _, _}

import scala.io.Source
import scalaz.Scalaz._
import scalaz.\/
import scalaz.\/._


object SensorReader {

  def apply(host: Host, files: List[SensorFile]) = new SensorReader(host, files)

  private val toReading: SensorFile => Error \/ SensorReading = file => {
    for {
      source      <- fromTryCatchNonFatal(Source.fromFile(file)).leftMap(FileError)
      data        <- closingAfterUse(source)(_.getLines().toList).headOption.toRightDisjunction(UnexpectedError("Problem reading file, is it empty?"))
      temperature <- Parser.parse(data)
    } yield SensorReading(file.getParentFile.getName, temperature)
  }

}

class SensorReader(host: Host, sensors: List[SensorFile]) extends TemperatureReader {

  def read: Error \/ Measurement = {
    for {
      files         <- sensors.toNel.toRightDisjunction(FailedToFindFile(BaseFolder))
      temperatures  <- files.map(toReading).sequenceU
    } yield Measurement(host, now(), temperatures.toList)
  }

}

Source File: DataGenerator.scala From Scala-for-Machine-Learning-Second-Edition with MIT License

5 votes

package org.scalaml.unsupervised.functionapprox

import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD

import scala.collection.mutable
import scala.io.Source
import scala.util.Random



  def apply(sc: SparkContext): RDD[(Float, Float)] = {
      // See the random noise
    val r = new Random(System.currentTimeMillis + Random.nextLong)
    val src = Source.fromFile(sourceName)
    val input = src.getLines.map(_.split(DELIM))
      ./:(mutable.ArrayBuffer[(Float, Float)]())((buf, xy) => {
      val x = addNoise(xy(0).trim.toFloat, r)
      val y = addNoise(xy(1).trim.toFloat, r)
      buf += ((x, y))
    })
    datasetSize = input.size
    val data_rdd = sc.makeRDD(input, nTasks)
    src.close
    data_rdd
  }
    // Original signal + random noise
  private def addNoise(value: Float, r: Random): Float = value*(1.0 + RATIO*(r.nextDouble - 0.5)).toFloat
}

// -------------------------------------  EOF ----------------------------------------------

Source File: ResourcesLoader.scala From Scala-for-Machine-Learning-Second-Edition with MIT License

5 votes

package org.scalaml.spark

import org.apache.spark.sql.DataFrame

sealed trait Extractor {
  protected[this] val delimiter: String
  def extract(line: String): Array[String]
}


final class CSVExtractor extends Extractor {
  override protected[this] val delimiter: String = ","
  override def extract(line: String): Array[String] = line.split(delimiter)
}

private[spark] object ResourcesLoader {
  import scala.io.Source

  type FieldsSet = Iterator[Array[String]]
  final def loadFromLocal(filename: String, extractor: Extractor): FieldsSet = {
    val src = Source.fromFile(filename)
    val lines = src.getLines().map(extractor.extract(_))
    src.close()
    lines
  }

  final def loadFromHDFS(pathname: String)(implicit sessionLifeCycle: SessionLifeCycle): DataFrame = {
    import sessionLifeCycle.sparkSession.implicits._
    sessionLifeCycle.sparkContext.textFile(pathname).toDF
  }

  final def getPath(filename: String): Option[String] = Option(getClass.getResource(filename).getPath)
}

Source File: Application.scala From retail_analytics with Apache License 2.0

5 votes

package controllers

import scalaz._
import Scalaz._
import scalaz.EitherT._
import scalaz.Validation
//import scalaz.Validation.FlatMap._
import scalaz.NonEmptyList._
import play.api.mvc._
import java.io.File
import scala.io.Source
import org.apache.log4j.Logger
import org.apache.log4j.Level
import models._
import models.stack._

import play.api.libs.json._

object Application extends Controller {

  def index() = Action { implicit request =>
    Ok(views.html.index("Megam Analytics."))
  }

  def upload = Action(parse.multipartFormData) { implicit request =>
       request.body.file("picture").map { picture =>
       import java.io.File
      val filename = picture.filename
      val contentType = picture.contentType
      picture.ref.moveTo(new File("/tmp/"+filename))
     
      models.HDFSFileService.saveFile("/tmp/"+filename) match {
        case Success(succ) => {
          val fu = List(("success" -> succ))
          Redirect("/").flashing(fu: _*)
        }
        case Failure(err) => {
          val fu = List(("error" -> "File doesn't get uploaded"))
          Redirect("/").flashing(fu: _*)
        }
      }
    }.getOrElse {
      val fu = List(("error" -> "File doesn't get uploaded.."))
      Redirect("/").flashing(fu: _*)
    }
  }

  def analysis() = Action { implicit request =>
  val tuple_res = models.Retail.buyingbehaviour(MConfig.recommand_ID.toInt, MConfig.retailfile)
  
     
  println("BACK==========================>>>")
  println(tuple_res._1)
 
  
  //val finalJson = {
  //  for {
  //    product <- productList
  //  } yield Json.parse(product).as[JsObject]
 // }
    Ok(views.html.finalProducts(tuple_res._1, tuple_res._2))
  }

}

Source File: LibLinTest.scala From scala-cp with Apache License 2.0

5 votes

package se.uu.it.cp

import scala.io.Source
import scala.util.Random

import org.junit.runner.RunWith
import org.scalatest.FunSuite
import org.scalatest.junit.JUnitRunner

import de.bwaldvogel.liblinear.Feature
import de.bwaldvogel.liblinear.FeatureNode
import de.bwaldvogel.liblinear.Linear
import de.bwaldvogel.liblinear.Parameter
import de.bwaldvogel.liblinear.Problem
import de.bwaldvogel.liblinear.SolverType

@RunWith(classOf[JUnitRunner])
class LibLinTest extends FunSuite {

  test("Train an inductive classifier with LIBLINEAR") {
 
    // Define a LIBLINEAR data point
    case class LibLinPoint(features: Array[Feature], label: Double)

    // Define a LIBLINEAR underlying algorithm
    class LibLinAlg(val properTrainingSet: Seq[LibLinPoint])
        extends UnderlyingAlgorithm[LibLinPoint] {
      
      // First describe how to access LIBLINEAR data point structure 
      override def makeDataPoint(features: Seq[Double], label: Double) = {
        val libLinFeat = features.zipWithIndex.map {
          case (f, i) =>
            new FeatureNode(i + 1, f).asInstanceOf[Feature]
        }
        LibLinPoint(libLinFeat.toArray, label)
      }
      override def getDataPointFeatures(p: LibLinPoint) = p.features.map(_.getValue)
      override def getDataPointLabel(p: LibLinPoint) = p.label

      // Train a Logistic Regression model
      val lrModel = {
        val problem = new Problem()
        problem.l = properTrainingSet.length
        problem.n = properTrainingSet(0).features.length
        problem.x = properTrainingSet.map(_.features).toArray
        problem.y = properTrainingSet.map(_.label).toArray
        val solver = SolverType.L2R_LR
        val parameter = new Parameter(solver, 1.0, 0.01)
        Linear.train(problem, parameter)
      }
      
      // Define nonconformity measure as probability of wrong prediction
      override def nonConformityMeasure(p: LibLinPoint) = {
        val estimates = Array.fill(2)(0.0)
        Linear.predictProbability(lrModel, p.features, estimates)
        estimates((p.label - 1).abs.toInt)
      }
      
    }
    
    // Load and parse dataset
    val dataPath = getClass.getResource("breast-cancer.data").getPath
    val dataset = Source.fromFile(dataPath).getLines
      .map { line =>
        val split = line.split(" ")
        val label = split(0).toDouble
        val features = split.drop(1).map { featureString =>
          val split = featureString.split(":")
          new FeatureNode(split(0).toInt,split(1).toDouble)
            .asInstanceOf[Feature]
        }
        LibLinPoint(features,label)
      }.toSeq
    
    // Split data
    Random.setSeed(11L)
    val (training,test) = Random.shuffle(dataset).splitAt(450)
    val (properTraining, calibrationSet) = Random.shuffle(training).splitAt(400)
    
    // Train an inductive conformal classifier
    val cp = ICP.trainClassifier(
      new LibLinAlg(properTraining), nOfClasses = 2, calibrationSet)

    // Make some predictions, and compute error fraction
    val significance = 0.05
    val nOfCorrect = test.count { p =>
      val pSet = cp.predict(p.features.map(_.getValue), significance)
      !pSet.contains(p.label)
    }
    val errorFract = nOfCorrect.toDouble / test.length

    // Error fraction should be at most significance 
    assert(errorFract <= significance)

  }

}

Source File: RawModel.scala From sparta with Apache License 2.0

5 votes

package com.stratio.models.benchmark.generator.models

import org.joda.time.DateTime
import org.joda.time.format.DateTimeFormat

import scala.annotation.tailrec
import scala.io.Source
import scala.util.Random

case class RawModel (order_id: String,
                     timestamp: String,
                     client_id: Integer,
                     latitude: Double,
                     longitude: Double,
                     payment_method: String,
                     credit_card: String,
                     shopping_center: String,
                     employee: Integer) {}

object RawModel {

  val Range_client_id = (1, 300)
  val Range_payment_method = Source.fromInputStream(
    this.getClass.getClassLoader.getResourceAsStream("payment-methods.txt")).getLines().toSeq
  val Range_shopping_center = Source.fromInputStream(
    this.getClass.getClassLoader.getResourceAsStream("shopping-centers.txt")).getLines().toSeq
  val Range_employee = (1, 300)
  val Range_quantity = (1, 30)
  val Range_timestap = (0, 60)
  val Range_creditCard = (0, 9)
  val R = Random
  val DigitsCreditCard = 16

  val Range_family_product: Map[String, Map[String,Float]] = Source.fromInputStream(
    this.getClass.getClassLoader.getResourceAsStream("family-products.csv")).getLines().map(x => {
      val splitted = x.split(",")
      (splitted(0), Map(splitted(1) -> splitted(2).toFloat))
    }).toMap

  def generateShoppingCenter(): String = {
    Range_shopping_center(generateRandomInt(0, Range_shopping_center.length - 1))
  }

  def generatePaymentMethod(): String = {
    Range_payment_method(generateRandomInt(0, Range_payment_method.length - 1))
  }

  def generateTimestamp(): String = {
    val datetime = new DateTime().minusDays(generateRandomInt(Range_timestap._1, Range_timestap._2))
    DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss").print(datetime)
  }

  def generateRandomInt(min: Int, max: Int): Int = {
    R.nextInt((max -min) + 1) + min
  }

  @tailrec
  def generateCreditCard(current: String): String = {
    if(current.length != DigitsCreditCard)
      generateCreditCard(current + generateRandomInt(Range_creditCard._1, Range_creditCard._2))
    else current
  }
}

trait RawModelCommonData {

  val geolocations = initGeolocations()
  val clientIdCreditCard: Map[Int, String] =
    initClientIdCreditCard((1 to RawModel.Range_client_id._2).toSeq, Map())
  val clientIdGeo: Map[Int, (Double, Double)] = initClientIdGeo(clientIdCreditCard, geolocations)

  def initGeolocations() : Seq[String] = {
    Source.fromInputStream(
      this.getClass.getClassLoader.getResourceAsStream("geolocations.csv")).getLines().toSeq
  }

  def initClientIdCreditCard(idClients: Seq[Int],
                             clientIdCreditCard: Map[Int, String]): Map[Int, String] = {
    if(idClients.size == 0) {
      clientIdCreditCard
    } else {
      val newIdClients = idClients.init
      val newClientIdCreditCard = clientIdCreditCard + (idClients.last -> RawModel.generateCreditCard(""))
      initClientIdCreditCard(newIdClients, newClientIdCreditCard)
    }
  }

  def initClientIdGeo(clientIdCreditCard: Map[Int, String], geolocations: Seq[String])
  :Map[Int, (Double, Double)] = {
    clientIdCreditCard.map(x => {
      val index = RawModel.generateRandomInt(0, geolocations.size - 1)
      x._1 -> ((geolocations(index)).split(":")(0).toDouble, (geolocations(index)).split(":")(1).toDouble)
    })
  }
}

Source File: InfoHelper.scala From sparta with Apache License 2.0

5 votes

package com.stratio.sparta.serving.core.helpers

import akka.event.slf4j.SLF4JLogging
import com.github.nscala_time.time.Imports.{DateTime, DateTimeFormat}
import com.stratio.sparta.serving.core.constants.AppConstant.version
import com.stratio.sparta.serving.core.models.info.AppInfo

import scala.io.Source
import scala.util._

object InfoHelper extends SLF4JLogging {

  val devContact = "[email protected]"
  val supportContact = "[email protected]"
  val license = Try {
    Source.fromInputStream(InfoHelper.getClass.getClassLoader.getResourceAsStream("LICENSE.txt")).mkString
  }.getOrElse("")

  def getAppInfo: AppInfo = {
    Try(Source.fromInputStream(InfoHelper.getClass.getClassLoader.getResourceAsStream("version.txt")).getLines) match {
      case Success(lines) =>
        val pomVersion = lines.next()
        val profileId = lines.next()
        val timestamp = lines.next()
        val pomParsed = if (pomVersion != "${project.version}") pomVersion else version
        val profileIdParsed = if (profileId != "${profile.id}") profileId else ""
        val timestampParsed = {
          if (timestamp != "${timestamp}") timestamp
          else {
            val format = DateTimeFormat.forPattern("yyyy-MM-dd-hh:mm:ss")
            format.print(DateTime.now)
          }
        }
        AppInfo(pomParsed, profileIdParsed, timestampParsed, devContact, supportContact, license)
      case Failure(e) =>
        log.error("Cannot get version info", e)
        throw e
    }
  }
}

Source File: FileSystemInputTest.scala From sparta with Apache License 2.0

5 votes

package com.stratio.sparta.plugin.input.fileSystem

import java.io._
import com.stratio.sparta.plugin.TemporalSparkContext
import org.scalatest._


import scala.io.Source

class FileSystemInputTest extends TemporalSparkContext with Matchers {

  val directory = getClass().getResource("/origin.txt")
  val lines = Source.fromURL(directory).getLines().toList
  val parentFile = new File(directory.getPath).getParent


  val properties = Map(("directory", "file://" + parentFile))
  val input = new FileSystemInput(properties)

  "Events counted" should " the same as files created" in {
    val dstream= input.initStream(ssc, "MEMORY_ONLY")
    val totalEvents = ssc.sparkContext.accumulator(0L)

    dstream.print()
    dstream.foreachRDD(rdd => {
      val count = rdd.count()
      println(s"EVENTS COUNT : \t $count")
      totalEvents.add(count)
    })

    ssc.start()

    Thread.sleep(3000)
    val file = new File(parentFile + "/output.txt")
    val out = new PrintWriter(file)
    lines.foreach(l => out.write(l))
    out.close()
    val numFile = if (file.exists()) 1 else 0
    ssc.awaitTerminationOrTimeout(10000)

    assert(totalEvents.value === numFile.toLong)
    file.delete()
  }
}

Source File: LongInputTests.scala From boson with Apache License 2.0

5 votes

package io.zink.boson

import bsonLib.BsonObject
import io.netty.util.ResourceLeakDetector
import io.vertx.core.json.JsonObject
import io.zink.boson.bson.bsonImpl.BosonImpl
import org.junit.runner.RunWith
import org.scalatest.FunSuite
import org.scalatest.junit.JUnitRunner
import org.junit.Assert._

import scala.collection.mutable.ArrayBuffer
import scala.concurrent.Await
import scala.concurrent.duration.Duration
import scala.io.Source


@RunWith(classOf[JUnitRunner])
class LongInputTests extends FunSuite {
  ResourceLeakDetector.setLevel(ResourceLeakDetector.Level.ADVANCED)

  val bufferedSource: Source = Source.fromURL(getClass.getResource("/jsonOutput.txt"))
  val finale: String = bufferedSource.getLines.toSeq.head
  bufferedSource.close

  val json: JsonObject = new JsonObject(finale)
  val bson: BsonObject = new BsonObject(json)

  test("extract top field") {
    val expression: String = ".Epoch"
    val boson: Boson = Boson.extractor(expression, (out: Int) => {
      assertTrue(3 == out)
    })
    val res = boson.go(bson.encode.getBytes)
    Await.result(res, Duration.Inf)
  }

  test("extract bottom field") {
    val expression: String = "SSLNLastName"
    val expected: String = "de Huanuco"
    val boson: Boson = Boson.extractor(expression, (out: String) => {
      assertTrue(expected.zip(out).forall(e => e._1.equals(e._2)))
    })
    val res = boson.go(bson.encode.getBytes)
    Await.result(res, Duration.Inf)
  }

  test("extract positions of an Array") {
    val expression: String = "Markets[3 to 5]"
    val mutableBuffer: ArrayBuffer[Array[Byte]] = ArrayBuffer()
    val boson: Boson = Boson.extractor(expression, (out: Array[Byte]) => {
      mutableBuffer += out
    })
    val res = boson.go(bson.encode.getBytes)
    Await.result(res, Duration.Inf)
    assertEquals(3, mutableBuffer.size)
  }

  test("extract further positions of an Array") {
    val expression: String = "Markets[50 to 55]"
    val mutableBuffer: ArrayBuffer[Array[Byte]] = ArrayBuffer()
    val boson: Boson = Boson.extractor(expression, (out: Array[Byte]) => {
      mutableBuffer += out
    })
    val res = boson.go(bson.encode.getBytes)
    Await.result(res, Duration.Inf)
    assertEquals(6, mutableBuffer.size)
  }

  test("size of all occurrences of Key") {
    val expression: String = "Price"
    val mutableBuffer: ArrayBuffer[Float] = ArrayBuffer()
    val boson: Boson = Boson.extractor(expression, (out: Float) => {
      mutableBuffer += out
    })
    val res = boson.go(bson.encode.getBytes)
    Await.result(res, Duration.Inf)
    assertEquals(195, mutableBuffer.size)
  }

}

Source File: JsonSource.scala From play-json-schema-validator with Apache License 2.0

5 votes

package com.eclipsesource.schema

import java.io.InputStream
import java.net.URL

import play.api.libs.json._

import scala.io.Source
import scala.util.{Failure, Success, Try}


  def schemaFromUrl(url: URL)(implicit reads: Reads[SchemaType]): JsResult[SchemaType] = {
    for {
      schemaJson <- JsonSource.fromUrl(url) match {
        case Success(json) => JsSuccess(json)
        case Failure(throwable) => JsError(throwable.getMessage)
      }
      schema <- Json.fromJson[SchemaType](schemaJson)
    } yield schema
  }
}

Source File: EmrConfig.scala From sbt-lighter with Apache License 2.0

5 votes

package sbtlighter

import scala.collection.JavaConverters._
import scala.io.Source

import com.amazonaws.services.elasticmapreduce.model.Configuration
import com.amazonaws.services.s3.AmazonS3
import io.circe.generic.auto._
import io.circe.parser._

case class EmrConfig(
    Classification: String,
    Properties: Option[Map[String, String]],
    Configurations: Option[Seq[EmrConfig]]
) {
  def withProperties(props: (String, String)*) = {
    this.copy(Properties = Some(props.toMap))
  }

  def withEmrConfigs(configs: EmrConfig*) = {
    this.copy(Configurations = Some(configs))
  }

  def toAwsEmrConfig(): Configuration = {
    Some(new Configuration().withClassification(Classification))
      .map { c =>
        Properties.map(props => c.withProperties(props.asJava)).getOrElse(c)
      }
      .map { c =>
        Configurations
          .map { configs =>
            c.withConfigurations(configs.map(_.toAwsEmrConfig): _*)
          }
          .getOrElse(c)
      }
      .get
  }
}

object EmrConfig {
  def apply(classification: String): EmrConfig =
    EmrConfig(classification, None, None)

  def parseJson(jsonString: String) = decode[List[EmrConfig]](jsonString)
  def parseJsonFromS3(s3Url: String)(implicit s3: AmazonS3) = {
    val s3JsonUrl = new S3Url(s3Url)
    val jsonString = s3.getObjectAsString(s3JsonUrl.bucket, s3JsonUrl.key)
    parseJson(jsonString)
  }
}

Source File: TestApp.scala From akka-http-file-server with Apache License 2.0

5 votes

package akkahttp

import java.io.File

import akka.actor.ActorSystem
import com.typesafe.config.{Config, ConfigFactory}

import scala.concurrent.Await
import scala.io.Source

object TestApp extends App {

  val testConf: Config = ConfigFactory.load()

  implicit val system = ActorSystem("ServerTest", testConf)
  implicit val dispatcher = system.dispatcher
  val host = "127.0.0.1"
  val port = 9112

  val server = new FileServer(system, host, 9112)

  //start file server
  val binding = server.start
  val client = new FileServer.Client(system, host, port)

  // upload the file
  val testFile = new File(getClass.getResource("/testfile.txt").toURI())
  val fileHandler = client.upload(testFile)

  //download the file
  val target = File.createTempFile("testapp_download", "")
  val future = fileHandler.flatMap{handler =>
    client.download(handler, target)
  }

  import scala.concurrent.duration._
  Await.result(future, 10 seconds)

  // check the file content.
  Source.fromFile(testFile).foreach{
    print
  }

  println()
  // now you can try to browser http://127.0.0.1:9112/
  println(s"Browser http://${host}:${port} to test download and upload")
  system.awaitTermination()
}

Source File: DefaultRouteTest.scala From scala-for-beginners with Apache License 2.0

5 votes

package com.allaboutscala.donutstore.httpserver.routes


import akka.http.scaladsl.testkit.ScalatestRouteTest
import org.scalatest.{Matchers, WordSpec}

import scala.io.Source



class DefaultRouteTest
  extends WordSpec
    with Matchers
    with ScalatestRouteTest
    with TestBase {

  val defaultRoutes = new DefaultRoute().routes()

  "DonutStore" can {
    "have a welcome page at the root end point" in {
      Get("/") ~> defaultRoutes ~> check {
        responseAs[String] shouldEqual welcomePage()
      }
    }
  }

  private def welcomePage(): String = {
    val path = sys.env.getOrElse("WELCOME_PAGE_PATH", "httpServer/src/main/resources/welcome.html")
    Source.fromFile(path).mkString
  }
}

Source File: WordList.scala From scala-stellar-sdk with Apache License 2.0

5 votes

package stellar.sdk.key

import scala.io.Source

trait WordList {
  def indexOf(word: String): Option[Int]
  def wordAt(i: Int): String
  def contains(word: String): Boolean = indexOf(word).isDefined
  def separator: String
}

class ArrayBackedWordList(source: => Source, val separator: String = " ") extends WordList {
  lazy val words: Array[String] = source.getLines().toArray

  // TODO (jem) - WordList spec that ensures index can be found with normalized variants.
  override def indexOf(word: String): Option[Int] = Some(words.indexOf(word)).filter(_ >= 0)

  override def wordAt(i: Int): String = {
    require(i >= 0 && i < words.length, s"Word index $i is out of range.")
    words(i)
  }
}

object ChineseSimplifiedWords extends ArrayBackedWordList(Source.fromResource("wordlists/chinese_simplified.txt"))
object ChineseTraditionalWords extends ArrayBackedWordList(Source.fromResource("wordlists/chinese_traditional.txt"))
object CzechWords extends ArrayBackedWordList(Source.fromResource("wordlists/czech.txt"))
object EnglishWords extends ArrayBackedWordList(Source.fromResource("wordlists/english.txt"))
object FrenchWords extends ArrayBackedWordList(Source.fromResource("wordlists/french.txt"))
object ItalianWords extends ArrayBackedWordList(Source.fromResource("wordlists/italian.txt"))
object JapaneseWords extends ArrayBackedWordList(Source.fromResource("wordlists/japanese.txt"), "\u3000")
object KoreanWords extends ArrayBackedWordList(Source.fromResource("wordlists/korean.txt"))
object SpanishWords extends ArrayBackedWordList(Source.fromResource("wordlists/spanish.txt"))

Source File: RegexSentenceAnnotator.scala From jigg with Apache License 2.0

5 votes

package jigg.pipeline



import java.util.Properties

import scala.io.Source
import scala.xml.{Node, Elem, Text, Atom}
import jigg.util.XMLUtil.RichNode

class RegexSentenceAnnotator(override val name: String, override val props: Properties) extends Annotator {

  @Prop(gloss = "Regular expression to segment lines (if omitted, specified method is used)") var pattern = ""
  @Prop(gloss = "Use predefined segment pattern newLine|point|pointAndNewLine") var method = "pointAndNewLine"
  readProps()

  val splitRegex = pattern match {
    case "" =>
      method match {
        case "newLine" => RegexSentenceAnnotator.newLine
        case "point" => RegexSentenceAnnotator.point
        case "pointAndNewLine" => RegexSentenceAnnotator.pointAndNewLine
        case other => argumentError("method")
      }
    case pattern =>
      pattern.r
  }

  private[this] val sentenceIDGen = jigg.util.IDGenerator("s")

  override def annotate(annotation: Node): Node = {

    annotation.replaceAll("document") { e =>
      val line = e.text
      val sentenceBoundaries = 0 +: splitRegex.findAllMatchIn(line).map(_.end).toVector :+ line.length
      val sentences: Vector[Node] =
        sentenceBoundaries.sliding(2).toVector flatMap { case Seq(begin_, end_) =>

          def isSpace(c: Char) = c == ' ' || c == '\t' || c == '\n'

          val snippet = line.substring(begin_, end_)
          val begin = snippet.indexWhere(!isSpace(_)) match {
            case -1 => begin_ // space only
            case offset => begin_ + offset
          }
          val end = snippet.lastIndexWhere(!isSpace(_)) match {
            case -1 => begin_
            case offset => begin_ + offset + 1
          }

          // val sentence: String = line.substring(begin, end).trim()
          val sentence: String = line.substring(begin, end)
          if (sentence.isEmpty)
            None
          else {
            Option(<sentence
              id={ sentenceIDGen.next }
              characterOffsetBegin={ begin+"" }
              characterOffsetEnd={ end+"" }>{ sentence }</sentence>)
          }
        }
      // val textRemoved = XMLUtil.removeText(e)
      // XMLUtil.addChild(textRemoved, <sentences>{ sentences }</sentences>)
      e addChild <sentences>{ sentences }</sentences>
    }
  }

  override def requires = Set()
  override def requirementsSatisfied = Set(Requirement.Ssplit)

}

object RegexSentenceAnnotator extends AnnotatorCompanion[RegexSentenceAnnotator] {
  val newLine = """\n+""".r
  val point = """。+""".r
  val pointAndNewLine = """\n+|。\n*""".r
}

Source File: MecabReader.scala From jigg with Apache License 2.0

5 votes

package jigg.nlp.ccg.lexicon


class MecabReader(dict:Dictionary) {
  def toPoSTaggedSentence(lines:Seq[String]) = {
    val terminalSeq = lines.map { line =>
      val splitted = line.split('\t')
      val word = dict.getWordOrCreate(splitted(0))
      val base = dict.getWordOrCreate(splitted(2))

      val conjStr = if (splitted.size > 6) splitted(5) else "_"
      val posStr = splitted(3) + "/" + conjStr

      val pos = dict.getPoSOrCreate(posStr)
      (word, base, pos)
    }
    new PoSTaggedSentence(
      terminalSeq.map(_._1),
      terminalSeq.map(_._2),
      terminalSeq.map(_._3))
  }
  def readSentences(in:Source, n:Int): Array[PoSTaggedSentence] = {
    val sentences = new ArrayBuffer[PoSTaggedSentence]

    val sentenceLines = new ArrayBuffer[String]

    takeLines(in, n).foreach { _ match {
      case "EOS" =>
        sentences += toPoSTaggedSentence(sentenceLines)
        sentenceLines.clear
      case line =>
        sentenceLines += line
    }}
    sentences.toArray
  }
  def readSentences(path:String, n:Int): Array[PoSTaggedSentence] =
    readSentences(Source.fromFile(path), n)
  def takeLines(in:Source, n:Int): Iterator[String] =
    for (line <- in.getLines.filter(_!="") match {
      case lines if (n == -1) => lines
      case lines => lines.take(n) }) yield line

}

Source File: CategoryFeature.scala From jigg with Apache License 2.0

5 votes

package jigg.nlp.ccg.lexicon


trait CategoryFeature {
  def kvs: Seq[(String, String)]
  def unify(lhs: CategoryFeature): Boolean = false // TODO: implement
}

@SerialVersionUID(-8236395926230742650L)
case class JPCategoryFeature(values: Seq[String]) extends CategoryFeature {
  import JPCategoryFeature._

  override def kvs = keys zip values
  override def toString = kvs.filter(_._2 != "").map { case (k, v) => k + "=" + v }.mkString(",")
}

object JPCategoryFeature {
  // This is a hard-coded mapping of feature structure of Japanese category.
  private val k2vals = Map(
    "mod" -> Array("adv", "adn", "nm"),
    "form" -> Array("attr", "base", "cont", "hyp", "imp",
      "beg", "stem", "ta", "te", "pre", "r", "neg", "s", "da"),
    "case" -> Array("ga", "o", "ni", "to", "nc", "caus"),
    "fin" -> Array("f", "t"))

  private val keys = k2vals.keys.toSeq
  private val v2keyIdx = {
    val key2idx = keys.zipWithIndex.toMap
    k2vals.flatMap { case (key, vals) =>
      vals.map { v => v -> key2idx(key) }
    }
  }
  val kvpair = """\w+=(\w+)""".r

  def createFromValues(values: Seq[String]) = values match {
    case Seq() => emptyFeature
    case _ =>
      val sortedValues = Array.fill(keys.size)("")
      values.filter(_!="").foreach { value =>
        val v = value match { case kvpair(v) => v; case v => v }

        if (v(0) != 'X')
          v2keyIdx(v) match { case i => sortedValues(i) = v }
      }
      JPCategoryFeature(sortedValues)
  }
  // We cache this because most categories don't have a feature
  private val emptyFeature = JPCategoryFeature(Array.fill(keys.size)(""))
}

case class EnCategoryFeature(values: Seq[String]) extends CategoryFeature {
  override def kvs = values.zipWithIndex.map { case (v, k) => (k.toString, v) }
  override def toString = values.mkString(",")
}

object EnCategoryFeature {
  def createFromValues(values: Seq[String]) = EnCategoryFeature(values.sortWith(_ < _))
}

Source File: CabochaReader.scala From jigg with Apache License 2.0

5 votes

package jigg.nlp.ccg.lexicon



import scala.io.Source

class CabochaReader[S<:TaggedSentence](ccgSentences: Seq[S]) {
  def readSentences(path: String): Seq[ParsedBunsetsuSentence] = {
    val bunsetsuStart = """\* (\d+) (-?\d+)[A-Z].*""".r
    def addBunsetsuTo(curSent: List[(String, Int)], curBunsetsu: List[String]) = curBunsetsu.reverse match {
      case Nil => curSent
      case headIdx :: tail => (tail.mkString(""), headIdx.toInt) :: curSent
    }

    val bunsetsuSegedSentences: List[List[(String, Int)]] =
      scala.io.Source.fromFile(path).getLines.filter(_ != "").foldLeft(
        (List[List[(String, Int)]](), List[(String, Int)](), List[String]())) {
        case ((processed, curSent, curBunsetsu), line) => line match {
          case bunsetsuStart(_, nextHeadIdx) =>
            (processed, addBunsetsuTo(curSent, curBunsetsu), nextHeadIdx :: Nil) // use first elem as the head idx
          case "EOS" => (addBunsetsuTo(curSent, curBunsetsu).reverse :: processed, Nil, Nil)
          case word => (processed, curSent, word.split("\t")(0) :: curBunsetsu)
        }
      }._1.reverse

    ccgSentences.zip(bunsetsuSegedSentences).map { case (ccgSentence, bunsetsuSentence) =>
      val bunsetsuSegCharIdxs: List[Int] = bunsetsuSentence.map { _._1.size }.scanLeft(0)(_+_).tail // 5 10 ...
      val ccgWordSegCharIdxs: List[Int] = ccgSentence.wordSeq.toList.map { _.v.size }.scanLeft(0)(_+_).tail // 2 5 7 10 ...

      assert(bunsetsuSegCharIdxs.last == ccgWordSegCharIdxs.last)
      val bunsetsuSegWordIdxs: List[Int] = ccgWordSegCharIdxs.zipWithIndex.foldLeft((List[Int](), 0)) { // 1 3 ...
        case ((segWordIdxs, curBunsetsuIdx), (wordIdx, i)) =>
          if (wordIdx >= bunsetsuSegCharIdxs(curBunsetsuIdx)) (i :: segWordIdxs, curBunsetsuIdx + 1)
          else (segWordIdxs, curBunsetsuIdx) // wait until wordIdx exceeds the next bunsetsu segment
      }._1.reverse
      val bunsetsuSeq = bunsetsuSegWordIdxs.zip(-1 :: bunsetsuSegWordIdxs).map { case (bunsetsuIdx, prevIdx) =>
        val offset = prevIdx + 1
        Bunsetsu(offset,
          ccgSentence.wordSeq.slice(offset, bunsetsuIdx + 1),
          ccgSentence.posSeq.slice(offset, bunsetsuIdx + 1))
      }
      ParsedBunsetsuSentence(bunsetsuSeq, bunsetsuSentence.map { _._2 })
    }
  }
}

Source File: CommandLineMain.scala From Muse-CGH with MIT License

5 votes

package ui.command_line


import ui.user.UIMain
import main.{DoubleFieldInfo, ParamsCore}
import scopt.OptionParser
import utilities.{ParallelOp, ImageSaver, ProjectParameters}

import scala.io.Source


object CommandLineMain {
  def main(args: Array[String]) {
    if (args.isEmpty)
      UIMain.main(args) // enter interactive GUI mode
    else {
      val core = new ParamsCore()
      var imgName = "muse_result.png" // default output name
      val parser = new OptionParser[Unit]("muse") {
          head("muse", ProjectParameters.versionNumber.toString)
          arg[String]("<input file>") foreach { ip =>
            try {
              core.textToRender.set(Source.fromFile(ip).mkString) // read input from file
            } catch {
              case e: Throwable =>
                println(s"failed to read input from file.\n$e}")
                System.exit(-1)
            }
          } text "the input file to read."

          opt[String]('o',"out") foreach { n => imgName = n } validate {
            n => if (n.isEmpty) failure("Option --out must not be empty") else success
          } text
            "the out image name (if no extension specified, use .png)"

          // Other settable parameters
          (core.layoutRow ++ core.edgeRow ++ core.fontRow ++ core.wordRow ++ core.randomRow).foreach {
            case DoubleFieldInfo(settable, name, constraint, description) =>
              val abbr = toAbbreviateString(name)
              val requirements = s"$name --$abbr" + constraint.requirementString
              opt[Double](abbr) foreach {
                settable.set
              } validate { d =>
                if (constraint.f(d)) success else failure(requirements)
              } text s"$name: $description, ${constraint.requirementString} (default: ${settable.get})"
          }
        }

      if (parser.parse(args)) {
        println("arguments parsed")

        renderToImage(core, imgName)
      } else {
        println("bad arguments")
        System.exit(-1)
      }
    }
  }

  def toAbbreviateString(fullName: String): String = {
    val words = fullName.split(' ')
    words.map(w => w.head.toString.capitalize + w.tail).mkString
  }

  def renderToImage(core: ParamsCore,imgName: String): Unit = {

    val paintable = core.getPaintableResult(println)
    println("start to paint text...")
    paintable.drawToBufferInParallel(8)

//    paintable.drawToBuffer()
//    (0 until 20).foreach { _ =>
//      paintable.drawToBufferInParallel(8)
//    }
//    (0 until 20).foreach { _ =>
//      paintable.drawToBuffer()
//    }
    println("painting finished.")

    println("saving results...")
    ImageSaver.saveImage(paintable.buffer, imgName).foreach{ actualPath =>
      println(s"results saved to $actualPath")
    }
  }
  

}

Source File: DistServiceExecutor.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.experiments.distributeservice

import java.io.{File, FileWriter}
import java.net.InetAddress
import scala.collection.JavaConverters._
import scala.io.Source
import scala.sys.process._
import scala.util.{Failure, Success, Try}

import akka.actor.Actor
import org.apache.commons.io.FileUtils
import org.apache.commons.lang.text.StrSubstitutor
import org.slf4j.Logger

import org.apache.gearpump.cluster.{ExecutorContext, UserConfig}
import org.apache.gearpump.experiments.distributeservice.DistServiceAppMaster.InstallService
import org.apache.gearpump.util.{ActorUtil, LogUtil}

class DistServiceExecutor(executorContext: ExecutorContext, userConf: UserConfig) extends Actor {
  import executorContext._
  private val LOG: Logger = LogUtil.getLogger(getClass, executor = executorId, app = appId)

  override def receive: Receive = {
    case InstallService(url, zipFileName, targetPath, scriptData, serviceName, serviceSettings) =>
      LOG.info(s"Executor $executorId receive command to install " +
        s"service $serviceName to $targetPath")
      unzipFile(url, zipFileName, targetPath)
      installService(scriptData, serviceName, serviceSettings)
  }

  private def unzipFile(url: String, zipFileName: String, targetPath: String) = {
    val zipFile = File.createTempFile(System.currentTimeMillis().toString, zipFileName)
    val dir = new File(targetPath)
    if (dir.exists()) {
      FileUtils.forceDelete(dir)
    }
    val bytes = FileServer.newClient.get(url).get
    FileUtils.writeByteArrayToFile(zipFile, bytes)
    val result = Try(s"unzip ${zipFile.getAbsolutePath} -d $targetPath".!!)
    result match {
      case Success(msg) => LOG.info(s"Executor $executorId unzip file to $targetPath")
      case Failure(ex) => throw ex
    }
  }

  private def installService(
      scriptData: Array[Byte], serviceName: String, serviceSettings: Map[String, Any]) = {
    val tempFile = File.createTempFile("gearpump", serviceName)
    FileUtils.writeByteArrayToFile(tempFile, scriptData)
    val script = new File("/etc/init.d", serviceName)
    writeFileWithEnvVariables(tempFile, script, serviceSettings ++ getEnvSettings)
    val result = Try(s"chkconfig --add $serviceName".!!)
    result match {
      case Success(msg) => LOG.info(s"Executor install service $serviceName successfully!")
      case Failure(ex) => throw ex
    }
  }

  private def getEnvSettings: Map[String, Any] = {
    Map("workerId" -> worker,
      "localhost" -> ActorUtil.getSystemAddress(context.system).host.get,
      "hostname" -> InetAddress.getLocalHost.getHostName)
  }

  private def writeFileWithEnvVariables(source: File, target: File, envs: Map[String, Any]) = {
    val writer = new FileWriter(target)
    val sub = new StrSubstitutor(envs.asJava)
    sub.setEnableSubstitutionInVariables(true)
    Source.fromFile(source).getLines().foreach(line => writer.write(sub.replace(line) + "\r\n"))
    writer.close()
  }
}

Source File: DownloadSupportSpec.scala From incubator-toree with Apache License 2.0

5 votes

package org.apache.toree.utils

import java.io.FileNotFoundException
import java.net.URL

import org.scalatest.{BeforeAndAfter, Matchers, FunSpec}
import scala.io.Source
import scala.tools.nsc.io.File

class DownloadSupportSpec extends FunSpec with Matchers with BeforeAndAfter {
  val downloadDestinationUrl = new URL("file:///tmp/testfile2.ext")

  val testFileContent = "This is a test"
  val testFileName = "/tmp/testfile.txt"

  //  Create a test file for downloading
  before {
    File(testFileName).writeAll(testFileContent)
  }

  //  Cleanup what we made
  after {
    if (File(testFileName).exists) File(testFileName).delete()
    if (File(downloadDestinationUrl.getPath).exists) File(downloadDestinationUrl.getPath).delete()
  }

  describe("DownloadSupport"){
    describe("#downloadFile( String, String )"){
      it("should download a file to the download directory"){
        val testFileUrl = "file:///tmp/testfile.txt"

        //  Create our utility and download the file
        val downloader = new Object with DownloadSupport
        downloader.downloadFile(
          testFileUrl,
          downloadDestinationUrl.getProtocol + "://" +
            downloadDestinationUrl.getPath)

        //  Verify the file contents are what was in the original file
        val downloadedFileContent: String =
          Source.fromFile(downloadDestinationUrl.getPath).mkString

        downloadedFileContent should be (testFileContent)
      }

    }

    describe("#downloadFile( URL, URL )"){
      it("should download a file to the download directory"){
        val testFileUrl = new URL("file:///tmp/testfile.txt")

        val downloader = new Object with DownloadSupport
        downloader.downloadFile(testFileUrl, downloadDestinationUrl)

        //  Verify the file contents are what was in the original file
        val downloadedFileContent: String =
          Source.fromFile(downloadDestinationUrl.getPath).mkString

        downloadedFileContent should be (testFileContent)
      }

      it("should throw FileNotFoundException if the download URL is bad"){
        val badFilename = "file:///tmp/testbadfile.txt"
        if (File(badFilename).exists) File(badFilename).delete()

        val badFileUrl = new URL(badFilename)

        val downloader = new Object with DownloadSupport
        intercept[FileNotFoundException] {
          downloader.downloadFile(badFileUrl, downloadDestinationUrl)
        }
      }

      it("should throw FileNotFoundException if the download ") {
        val testFileUrl = new URL("file:///tmp/testfile.txt")
        val badDestinationUrl =
          new URL("file:///tmp/badloc/that/doesnt/exist.txt")

        val downloader = new Object with DownloadSupport
        intercept[FileNotFoundException] {
          downloader.downloadFile(testFileUrl, badDestinationUrl)
        }
      }
    }
  }

}

Source File: SparkTestEnv.scala From spark-json-schema with MIT License

5 votes

package org.zalando.spark.jsonschema

import org.apache.spark.sql.SparkSession

import scala.io.Source

object SparkTestEnv {

  lazy val sparkSession: SparkSession = {
    System.clearProperty("spark.driver.port")
    System.clearProperty("spark.hostPort")

    SparkSession.builder()
      .master("local")
      .appName("testapp")
      .config("spark.ui.enabled", value = false)
      .getOrCreate()
  }

  def getTestResourceContent(relativePath: String): String = {
    Option(getClass.getResource(relativePath)) match {
      case Some(relPath) => Source.fromURL (relPath).mkString
      case None => throw new IllegalArgumentException(s"Path can not be reached: $relativePath")
    }
  }

}

Source File: BlockFilterTest.scala From bitcoin-s with MIT License

5 votes

package org.bitcoins.core.gcs

import org.bitcoins.core.protocol.blockchain.Block
import org.bitcoins.core.protocol.script.ScriptPubKey
import org.bitcoins.crypto.DoubleSha256DigestBE
import org.bitcoins.testkit.util.BitcoinSUnitTest
import play.api.libs.json.{JsArray, Json}

import scala.io.Source

class BlockFilterTest extends BitcoinSUnitTest {
  behavior of "BlockFilter"

  // https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki#appendix-c-test-vectors
  case class Bip158TestCase(
      blockHeight: Int,
      blockHash: DoubleSha256DigestBE,
      block: Block,
      prevOutputScripts: Vector[ScriptPubKey],
      prevHeader: DoubleSha256DigestBE,
      filter: GolombFilter,
      header: DoubleSha256DigestBE,
      notes: String
  ) {

    val clue: String = s"Test Notes: $notes"

    def runTest(): org.scalatest.Assertion = {
      val constructedFilter = BlockFilter(block, prevOutputScripts)

      assert(constructedFilter.encodedData.bytes == filter.encodedData.bytes,
             clue)

      val matcher = new BinarySearchFilterMatcher(filter)
      val constructedMatcher = new BinarySearchFilterMatcher(constructedFilter)

      assert(constructedMatcher.decodedHashes == matcher.decodedHashes, clue)

      val constructedHeader = constructedFilter.getHeader(prevHeader.flip)

      assert(constructedHeader.hash == header.flip, clue)
    }
  }

  object Bip158TestCase {

    //["Block Height,Block Hash,Block,[Prev Output Scripts for Block],Previous Basic Header,Basic Filter,Basic Header,Notes"]
    def fromJsArray(array: JsArray): Bip158TestCase = {
      val parseResult = for {
        height <- array(0).validate[Int]
        blockHash <- array(1).validate[String].map(DoubleSha256DigestBE.fromHex)

        block <- array(2).validate[String].map(Block.fromHex)

        scriptArray <- array(3).validate[JsArray]
        scripts = parseScripts(scriptArray)

        prevHeader <-
          array(4)
            .validate[String]
            .map(DoubleSha256DigestBE.fromHex)

        filter <-
          array(5)
            .validate[String]
            .map(BlockFilter.fromHex(_, blockHash.flip))

        header <- array(6).validate[String].map(DoubleSha256DigestBE.fromHex)

        notes <- array(7).validate[String]
      } yield Bip158TestCase(height,
                             blockHash,
                             block,
                             scripts,
                             prevHeader,
                             filter,
                             header,
                             notes)

      parseResult.get
    }

    private def parseScripts(array: JsArray): Vector[ScriptPubKey] = {
      val hexScripts = array.validate[Vector[String]].get

      hexScripts.map(ScriptPubKey.fromAsmHex)
    }
  }

  it must "pass bip 158 test vectors" in {
    val source = Source.fromURL(getClass.getResource("/testnet-19.json"))

    val vec: Vector[JsArray] =
      Json.parse(source.mkString).validate[Vector[JsArray]].get.tail
    val testCases = vec.map(Bip158TestCase.fromJsArray)

    testCases.foreach(_.runTest())
  }
}

Source File: BlockBench.scala From bitcoin-s with MIT License

5 votes

package org.bitcoins.bench.core

import org.bitcoins.core.protocol.blockchain.Block
import org.slf4j.LoggerFactory

import scala.io.Source

object BlockBench extends App {
  private def logger = LoggerFactory.getLogger(this.getClass)

  private def timeBlockParsing[R](block: () => R): Long = {
    val t0 = System.currentTimeMillis()
    val _ = block() // call-by-name
    val t1 = System.currentTimeMillis()
    val time = t1 - t0
    logger.info("Elapsed time: " + time + "ms")
    time
  }

  def bench1(): Unit = {
    val fileName =
      "/00000000000000000008513c860373da0484f065983aeb063ebf81c172e81d48.txt"
    val lines = Source.fromURL(getClass.getResource(fileName)).mkString
    val time = timeBlockParsing(() => Block.fromHex(lines))
    require(time <= 15000)
  }

  def bench2(): Unit = {
    val fileName =
      "/000000000000000000050f70113ab1932c195442cb49bcc4ee4d7f426c8a3295.txt"
    val lines = Source.fromURL(getClass.getResource(fileName)).mkString
    val time = timeBlockParsing(() => Block.fromHex(lines))
    require(time <= 15000)
  }

  0.until(10).foreach(_ => bench1())

  //bench2()
}

Source File: PullRequestOutTest.scala From scala-steward with Apache License 2.0

5 votes

package org.scalasteward.core.vcs.data

import io.circe.parser
import org.http4s.syntax.literals._
import org.scalasteward.core.vcs.data.PullRequestState.Open
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers
import scala.io.Source

class PullRequestOutTest extends AnyFunSuite with Matchers {
  test("decode") {
    val expected =
      List(
        PullRequestOut(
          uri"https://github.com/octocat/Hello-World/pull/1347",
          Open,
          "new-feature"
        )
      )

    val input = Source.fromResource("list-pull-requests.json").mkString
    parser.decode[List[PullRequestOut]](input) shouldBe Right(expected)
  }
}

Source File: BranchOutTest.scala From scala-steward with Apache License 2.0

5 votes

package org.scalasteward.core.vcs.data

import io.circe.parser
import org.scalasteward.core.git.Sha1.HexString
import org.scalasteward.core.git.{Branch, Sha1}
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers
import scala.io.Source

class BranchOutTest extends AnyFunSuite with Matchers {
  test("decode") {
    val input = Source.fromResource("get-branch.json").mkString
    parser.decode[BranchOut](input) shouldBe
      Right(
        BranchOut(
          Branch("master"),
          CommitOut(Sha1(HexString("7fd1a60b01f91b314f59955a4e4d4e80d8edf11d")))
        )
      )
  }
}

Source File: RepoOutTest.scala From scala-steward with Apache License 2.0

5 votes

package org.scalasteward.core.vcs.data

import cats.effect.IO
import io.circe.parser
import org.http4s.syntax.literals._
import org.scalasteward.core.git.Branch
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.matchers.should.Matchers
import scala.io.Source

class RepoOutTest extends AnyFunSuite with Matchers {
  val parent =
    RepoOut(
      "base.g8",
      UserOut("ChristopherDavenport"),
      None,
      uri"https://github.com/ChristopherDavenport/base.g8.git",
      Branch("master")
    )

  val fork =
    RepoOut(
      "base.g8-1",
      UserOut("scala-steward"),
      Some(parent),
      uri"https://github.com/scala-steward/base.g8-1.git",
      Branch("master")
    )

  test("decode") {
    val input = Source.fromResource("create-fork.json").mkString
    parser.decode[RepoOut](input) shouldBe Right(fork)
  }

  test("parentOrRaise") {
    fork.parentOrRaise[IO].unsafeRunSync() shouldBe parent
  }

  test("repo") {
    fork.repo shouldBe Repo("scala-steward", "base.g8-1")
  }
}

Source File: TestUtils.scala From scavro with Apache License 2.0

5 votes

package org.oedura.scavro.plugin

import java.io.{FileOutputStream, InputStream}

import sbt._

import scala.io.Source
import scala.util.Random

class TestUtils(workingDir: File) {
  (workingDir / "in").mkdir
  (workingDir / "out").mkdir

  def tmpDir = workingDir
  def tmpPath = workingDir.getAbsolutePath

  private def extractResource(resourceName: String): File = {
    val is: InputStream = getClass.getResourceAsStream(s"/$resourceName")
    val text = Source.fromInputStream(is).mkString
    val os: FileOutputStream = new FileOutputStream(workingDir / "in" / resourceName)
    os.write(text.getBytes)
    os.close()
    is.close()

    workingDir / "in" / resourceName
  }

  lazy val schemaFile: File = extractResource("Number.avsc")
  lazy val protocolFile: File = {
    schemaFile
    extractResource("NumberSystem.avdl")
  }

  def cleanup() = {
    def getRecursively(f: File): Seq[File] = f.listFiles.filter(_.isDirectory).flatMap(getRecursively) ++ f.listFiles

    getRecursively(workingDir).foreach { f =>
      if (!f.delete()) throw new RuntimeException("Failed to delete " + f.getAbsolutePath)
    }
    tmpDir.delete()
  }
}

object TestUtils {
  private val alphabet = ('a' to 'z') ++ ('A' to 'Z') ++ ('0' to '9')

  def randomFile(dir: File, prefix: String = "", suffix: String = "", maxTries: Int = 100, nameSize: Int = 10): File = {
    def randomFileImpl(triesLeft: Int): String = {
      val testName: String = (1 to nameSize).map(_ => alphabet(Random.nextInt(alphabet.size))).mkString
      if (!(dir / (prefix + testName + suffix)).exists) prefix + testName + suffix
      else if (triesLeft < 0) throw new Exception("Unable to find empty random file path.")
      else randomFileImpl(triesLeft - 1)
    }

    dir / randomFileImpl(maxTries)
  }

  def randomFileName(prefix: String, suffix: String = "", maxTries: Int = 100, nameSize: Int = 10): String = {
    def randomFileNameImpl(triesLeft: Int): String = {
      val testName: String = (1 to nameSize).map(_ => alphabet(Random.nextInt(alphabet.size))).mkString
      if (!file(prefix + testName + suffix).exists) prefix + testName + suffix
      else if (triesLeft < 0) throw new Exception("Unable to find empty random file path.")
      else randomFileNameImpl(triesLeft - 1)
    }

    randomFileNameImpl(maxTries)
  }

  def apply(workingDir: File) = {
    if (workingDir.exists && workingDir.isDirectory) new TestUtils(workingDir)
    else if (!workingDir.exists) {
      val success = workingDir.mkdirs
      if (success) new TestUtils(workingDir)
      else throw new Exception("Cannot initialize working directory")
    } else throw new Exception("Requested directory is occupied by ordinary file")
  }

}

Source File: ProxyFeedback.scala From oni-ml with Apache License 2.0

5 votes

package org.opennetworkinsight.proxy

import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
import org.apache.spark.sql.types.{StructType, StructField, StringType}
import scala.io.Source

import org.opennetworkinsight.proxy.ProxySchema._


object ProxyFeedback {

  
  def loadFeedbackDF(sc: SparkContext,
                     sqlContext: SQLContext,
                     feedbackFile: String,
                     duplicationFactor: Int): DataFrame = {


    val feedbackSchema = StructType(
      List(StructField(Date, StringType, nullable= true),
        StructField(Time, StringType, nullable= true),
        StructField(ClientIP, StringType, nullable= true),
        StructField(Host, StringType, nullable= true),
        StructField(ReqMethod, StringType, nullable= true),
        StructField(UserAgent, StringType, nullable= true),
        StructField(ResponseContentType, StringType, nullable= true),
        StructField(RespCode, StringType, nullable= true),
        StructField(FullURI, StringType, nullable= true)))

    if (new java.io.File(feedbackFile).exists) {

      val dateIndex = 0
      val timeIndex = 1
      val clientIpIndex = 2
      val hostIndex = 3
      val reqMethodIndex = 4
      val userAgentIndex = 5
      val resContTypeIndex = 6
      val respCodeIndex = 11
      val fullURIIndex = 18

      val fullURISeverityIndex = 22

      val lines = Source.fromFile(feedbackFile).getLines().toArray.drop(1)
      val feedback: RDD[String] = sc.parallelize(lines)

      sqlContext.createDataFrame(feedback.map(_.split("\t"))
        .filter(row => row(fullURISeverityIndex).trim.toInt == 3)
        .map(row => Row.fromSeq(List(row(dateIndex),
          row(timeIndex),
          row(clientIpIndex),
          row(hostIndex),
          row(reqMethodIndex),
          row(userAgentIndex),
          row(resContTypeIndex),
          row(respCodeIndex),
          row(fullURIIndex))))
        .flatMap(row => List.fill(duplicationFactor)(row)), feedbackSchema)
        .select(Date, Time, ClientIP, Host, ReqMethod, UserAgent, ResponseContentType, RespCode, FullURI)
    } else {
      sqlContext.createDataFrame(sc.emptyRDD[Row], feedbackSchema)
    }
  }
}

Source File: DomainProcessor.scala From oni-ml with Apache License 2.0

5 votes

package org.opennetworkinsight.utilities

import org.apache.spark.broadcast.Broadcast

import scala.io.Source


object DomainProcessor extends Serializable {

  val COUNTRY_CODES = Set("ac", "ad", "ae", "af", "ag", "ai", "al", "am", "an", "ao", "aq", "ar", "as", "at", "au",
    "aw", "ax", "az", "ba", "bb", "bd", "be", "bf", "bg", "bh", "bi", "bj", "bm", "bn", "bo", "bq", "br", "bs", "bt",
    "bv", "bw", "by", "bz", "ca", "cc", "cd", "cf", "cg", "ch", "ci", "ck", "cl", "cm", "cn", "co", "cr", "cu", "cv",
    "cw", "cx", "cy", "cz", "de", "dj", "dk", "dm", "do", "dz", "ec", "ee", "eg", "eh", "er", "es", "et", "eu", "fi",
    "fj", "fk", "fm", "fo", "fr", "ga", "gb", "gd", "ge", "gf", "gg", "gh", "gi", "gl", "gm", "gn", "gp", "gq", "gr",
    "gs", "gt", "gu", "gw", "gy", "hk", "hm", "hn", "hr", "ht", "hu", "id", "ie", "il", "im", "in", "io", "iq", "ir",
    "is", "it", "je", "jm", "jo", "jp", "ke", "kg", "kh", "ki", "km", "kn", "kp", "kr", "krd", "kw", "ky", "kz", "la",
    "lb", "lc", "li", "lk", "lr", "ls", "lt", "lu", "lv", "ly", "ma", "mc", "md", "me", "mg", "mh", "mk", "ml", "mm",
    "mn", "mo", "mp", "mq", "mr", "ms", "mt", "mu", "mv", "mw", "mx", "my", "mz", "na", "nc", "ne", "nf", "ng", "ni",
    "nl", "no", "np", "nr", "nu", "nz", "om", "pa", "pe", "pf", "pg", "ph", "pk", "pl", "pm", "pn", "pr", "ps", "pt",
    "pw", "py", "qa", "re", "ro", "rs", "ru", "rw", "sa", "sb", "sc", "sd", "se", "sg", "sh", "si", "sj", "", "sk",
    "sl", "sm", "sn", "so", "sr", "ss", "st", "su", "sv", "sx", "sy", "sz", "tc", "td", "tf", "tg", "th", "tj", "tk",
    "tl", "tm", "tn", "to", "tp", "tr", "tt", "tv", "tw", "tz", "ua", "ug", "uk", "us", "uy", "uz", "va", "vc", "ve",
    "vg", "vi", "vn", "vu", "wf", "ws", "ye", "yt", "za", "zm", "zw")

  val TOP_LEVEL_DOMAIN_NAMES = Set("com", "org", "net", "int", "edu", "gov", "mil")
  val NO_DOMAIN = "None"

  def extractDomain(url: String): String = {

    val spliturl = url.split('.')
    val numParts = spliturl.length

    // First check if query is an IP address e.g.: 123.103.104.10.in-addr.arpa or a name.
    // Such URLs receive a domain of NO_DOMAIN

    if (numParts > 2 && spliturl(numParts - 1) == "arpa" && spliturl(numParts - 2) == "in-addr") {
      NO_DOMAIN  // it's an address
    } else if (!COUNTRY_CODES.contains(spliturl.last) && !TOP_LEVEL_DOMAIN_NAMES.contains(spliturl.last)) {
      NO_DOMAIN  //  it does not have a valid top-level domain name
    } else {
      val strippedSplitURL = removeTopLevelDomainName(removeCountryCode(spliturl))
      if (strippedSplitURL.length > 0) {
        strippedSplitURL.last
      } else {
        // invalid URL... nothing that is not TLD.countrycode
        NO_DOMAIN
      }
    }
  }

  def removeCountryCode(urlComponents: Array[String]): Array[String] = {
    if (COUNTRY_CODES.contains(urlComponents.last)) {
      urlComponents.dropRight(1)
    } else {
      urlComponents
    }
  }

  def removeTopLevelDomainName(urlComponents: Array[String]): Array[String] = {
    if (TOP_LEVEL_DOMAIN_NAMES.contains(urlComponents.last)) {
      urlComponents.dropRight(1)
    } else {
      urlComponents
    }
  }
}

Source File: AsynchronousLoggingSpec.scala From scribe with MIT License

5 votes

package spec

import java.io.File
import java.util.concurrent.ConcurrentLinkedQueue

import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AsyncWordSpec
import perfolation._
import scribe.{LogRecord, Logger}
import scribe.format._
import scribe.output.LogOutput
import scribe.writer.{FileWriter, Writer}

import scala.collection.JavaConverters._
import scala.concurrent.Future
import scala.io.Source

class AsynchronousLoggingSpec extends AsyncWordSpec with Matchers {
  private val Regex = """(\d+) - (.+)""".r
  private val threads = "abcdefghijklmnopqrstuvwxyz"
  private val iterations = 10
  private val total = threads.length * iterations

  "Asynchronous Logging" should {
    s"log $total records in the proper order with simple logging" in {
      val queue = new ConcurrentLinkedQueue[String]
      val logger = Logger.empty.orphan().withHandler(
        formatter = AsynchronousLoggingSpec.format,
        writer = new Writer {
          override def write[M](record: LogRecord[M], output: LogOutput): Unit = queue.add(output.plainText.trim)
        }
      )

      Future.sequence(threads.map { char =>
        Future {
          (0 until iterations).foreach { index =>
            logger.info(p"$char:$index")
          }
        }
      }).map { _ =>
        var previous = 0L
        queue.iterator().asScala.foreach {
          case Regex(ts, _) => {
            val timeStamp = ts.toLong
            timeStamp should be >= previous
            previous = timeStamp
          }
        }
        queue.size() should be(total)
      }
    }
    s"log $total records in the proper order with file logging" in {
      val file = new File("logs/app.log")
      file.delete()

      val fileWriter = FileWriter().nio
      val logger = Logger.empty.orphan().withHandler(
        formatter = AsynchronousLoggingSpec.format,
        writer = fileWriter
      )

      Future.sequence(threads.map { char =>
        Future {
          (0 until iterations).foreach { index =>
            logger.info(p"$char:$index")
          }
        }
      }).map { _ =>
        var previous = 0L
        fileWriter.flush()
        fileWriter.dispose()
        val lines = Source.fromFile(file).getLines().toList
        lines.foreach {
          case Regex(ts, message) => {
            val timeStamp = ts.toLong
            timeStamp should be >= previous
            previous = timeStamp
          }
        }
        lines.length should be(threads.length * iterations)
      }
    }
  }
}

object AsynchronousLoggingSpec {
  val format = formatter"$timeStamp - $message"
}

Source File: LineBufferedStream.scala From incubator-livy with Apache License 2.0

5 votes

package org.apache.livy.utils

import java.io.InputStream
import java.util
import java.util.concurrent.locks.ReentrantLock

import scala.io.Source

import org.apache.livy.Logging

class CircularQueue[T](var capacity: Int) extends util.LinkedList[T] {
  override def add(t: T): Boolean = {
    if (size >= capacity) removeFirst
    super.add(t)
  }
}

class LineBufferedStream(inputStream: InputStream, logSize: Int) extends Logging {

  private[this] val _lines: CircularQueue[String] = new CircularQueue[String](logSize)

  private[this] val _lock = new ReentrantLock()
  private[this] val _condition = _lock.newCondition()
  private[this] var _finished = false

  private val thread = new Thread {
    override def run() = {
      val lines = Source.fromInputStream(inputStream).getLines()
      for (line <- lines) {
        info(line)
        _lock.lock()
        try {
          _lines.add(line)
          _condition.signalAll()
        } finally {
          _lock.unlock()
        }
      }

      _lock.lock()
      try {
        _finished = true
        _condition.signalAll()
      } finally {
        _lock.unlock()
      }
    }
  }
  thread.setDaemon(true)
  thread.start()

  def lines: IndexedSeq[String] = {
    _lock.lock()
    val lines = IndexedSeq.empty[String] ++ _lines.toArray(Array.empty[String])
    _lock.unlock()
    lines
  }

  def iterator: Iterator[String] = {
    new LinesIterator
  }

  def waitUntilClose(): Unit = thread.join()

  private class LinesIterator extends Iterator[String] {

    override def hasNext: Boolean = {
      if (_lines.size > 0) {
        true
      } else {
        // Otherwise we might still have more data.
        _lock.lock()
        try {
          if (_finished) {
            false
          } else {
            _condition.await()
            _lines.size > 0
          }
        } finally {
          _lock.unlock()
        }
      }
    }

    override def next(): String = {
      _lock.lock()
      val line = _lines.poll()
      _lock.unlock()
      line
    }
  }
}

Source File: ConfigurationParser.scala From Soteria with MIT License

5 votes

package com.leobenkel.soteria.Config

import com.leobenkel.soteria.Utils.Json.JsonDecode
import com.leobenkel.soteria.Utils.LoggerExtended

import scala.io.Source
import scala.util._

private[soteria] case class ConfigurationParser(
  log:        LoggerExtended,
  configPath: String
) {
  if (!configPath.endsWith(".json")) {
    log.criticalFailure(
      s"The input configuration file was defined as '$configPath' " +
        s"but it should be a '.json' file."
    )
  }

  @transient lazy private val isWeb: Boolean = configPath.startsWith("http://") || configPath
    .startsWith("https://")

  @transient lazy private val fileContent: String =
    Try(if (isWeb) {
      Source.fromURL(configPath)
    } else {
      Source.fromFile(configPath)
    }) match {
      case Success(file) =>
        val content = file.mkString
        file.close()
        content
      case Failure(exception) =>
        log.criticalFailure(exception.toString)
        throw exception
    }

  @transient lazy private val conf: SoteriaConfiguration = {
    JsonDecode.parse[SoteriaConfiguration](fileContent)(SoteriaConfiguration.parser(log)) match {
      case Left(err: String) =>
        log.criticalFailure(err)
        throw new Exception(err)
      case Right(c) => c
    }
  }

  def getConf: SoteriaConfiguration = conf
}

Source File: SerializedModuleTest.scala From Soteria with MIT License

5 votes

package com.leobenkel.soteria.Config

import com.leobenkel.soteria.Modules.NameOfModule
import com.leobenkel.soteria.Utils.Json.JsonDecode
import com.leobenkel.soteria.{LogTest, ParentTest}
import sbt.util.Level

import scala.io.Source

// scalastyle:off magic.number
class SerializedModuleTest extends ParentTest {
  private val test: SerializedModuleTest = this

  private class LogTestWithBuffer extends LogTest(test) {
    private var allMessages: String = ""
    override def log(
      level:   Level.Value,
      message: => String
    ): Unit = {
      test.log.debug(message)
      assertEquals(Level.Error, level)
      allMessages += message + "\n"
    }

    def getMessages: String = allMessages
  }

  test("Test parsing to NameOfModule") {
    val pathToFile = "soteria_succeed_4.json"
    val soteriaLog = new LogTestWithBuffer

    val file = Source.fromResource(pathToFile)
    val content = file.mkString
    file.close()

    log.debug(s"Reading '$pathToFile'")

    val result: Either[String, SoteriaConfiguration] =
      JsonDecode.parse[SoteriaConfiguration](content)(SoteriaConfiguration.parser(soteriaLog))

    assert(result.isRight)

    val serializedModule = result.right.get

    assert(soteriaLog.getMessages.isEmpty)

    val modules = serializedModule.AllModules.sortBy(s => s.key)

    val modulesWithDependanceErrors = serializedModule.ZTestOnly.RawModulesTest
      .filter(_._2.nonEmpty)
    modulesWithDependanceErrors.foreach {
      case (module, errors) =>
        assert(soteriaLog.getMessages.contains(module.toString))
        errors.foreach(e => assert(soteriaLog.getMessages.contains(e)))
    }

    assertEquals(3, modules.length)

    val m1 = modules.head

    assertEquals("com.orgs", m1.organization)
    assertEquals("name-of-library", m1.name)
    assertEquals(Right("3.0"), m1.version)

    val m2 = modules.apply(2)

    assertEquals("com.other.org", m2.organization)
    assertEquals("artifact-name", m2.name)
    assertEquals(Right("2.1.0"), m2.version)

    val m3 = modules.apply(1)
    val m3Obj = m3.nameObj
    assertEquals("com.other.org", m3.organization)
    assertEquals("artif", m3.name)
    assert(m3.version.isLeft)
    assert(m3.version.left.get.contains("0"))
    assertEquals(false, m3Obj.exactName)
    assertEquals(Seq("artifactory", "artifice"), m3Obj.excludeName.sortBy(identity))
    assertEquals(1, m3.dependenciesToRemove.length)
    assertEquals(Seq(NameOfModule.apply("com.orgs", "name-of-library")), m3.dependenciesToRemove)
  }

  test("test serialize/deserialize") {
    val s = SerializedModule.Empty.copy(
      version = "1.0",
      shouldBeProvided = Some(true),
      excludeName = Some(Seq("a", "b"))
    )
    val encodedEi = s.toJsonStructure
    assert(encodedEi.isRight)
    val encoded = encodedEi.right.get
    log.debug(encoded)
    val sParsedEi = SerializedModule.parser("com.org", "arti")(encoded)
    assert(sParsedEi.isRight)
    val sParsed = sParsedEi.right.get
    assertEquals(s, sParsed)
  }
}
// scalastyle:on magic.number

Source File: JsonDecodeTest.scala From Soteria with MIT License

5 votes

package com.leobenkel.soteria.Utils

import com.leobenkel.soteria.Config.SoteriaConfiguration
import com.leobenkel.soteria.Utils.Json.JsonDecode
import com.leobenkel.soteria.Utils.Json.JsonParserHelper._
import com.leobenkel.soteria.{LogTest, ParentTest}
import org.scalatest.Assertion

import scala.io.Source

class JsonDecodeTest extends ParentTest {
  private val soteriaLog: LogTest = new LogTest(this)

  test("Test decode json") {
    val value: Int = 12
    case class MyJson(key: Int)

    implicit val parser: JsonDecode.Parser[MyJson] = (input: Map[String, Any]) => {
      for {
        key <- input.getAsInt("key")
      } yield {
        MyJson(key)
      }
    }

    val ei = JsonDecode.parse[MyJson](s"""
        |{
        |"key": $value
        |}
      """.stripMargin)

    assert(ei.isRight)
    assertEquals(value, ei.right.get.key)
  }

  test("Test decode soteria.json") {
    Map[String, Either[String, SoteriaConfiguration] => Assertion](
      "soteria_succeed_1.json" -> { result =>
        assert(result.isRight)
        val parsed = result.right.get
        assert(parsed.modules.size == 1)
        assert(parsed.modules.head._2.size == 1)
        assert(parsed.modules.head._2.head._2.version == "3.0")
        assert(parsed.scalaCFlags.length == 10)
        assert(parsed.scalaVersions.size == 2)
      },
      "soteria_succeed_2.json" -> { result =>
        assert(result.isRight)
        val parsed = result.right.get
        assert(parsed.modules.size == 1)
        assert(parsed.modules.head._2.size == 1)
        assert(parsed.modules.head._2.head._2.version == "3.0")
        assert(parsed.scalaCFlags.isEmpty)
        assert(parsed.scalaVersions.size == 2)
      },
      "soteria_succeed_3.json" -> { result =>
        assert(result.isRight)
        val parsed = result.right.get
        assert(parsed.modules.isEmpty)
        assert(parsed.scalaCFlags.isEmpty)
        assert(parsed.scalaVersions.size == 2)
      },
      "soteria_fail_no_scalaVersions.json" -> { result =>
        assert(result.isLeft)
        val error = result.left.get
        assert(error.contains("scalaVersions"))
      },
      "soteria_fail_no_version.json" -> { result =>
        assert(result.isLeft)
        val error = result.left.get
        assert(error.contains("version"))
        assert(error.contains("com.orgs"))
        assert(error.contains("name-of-library"))
      },
      "soteria_fail_bad_json.json" -> { result =>
        assert(result.isLeft)
        val error = result.left.get
        assert("Did not parse" == error)
      }
    ).map {
      case (filePath, test) =>
        val file = Source.fromResource(filePath)
        val content = file.mkString
        file.close()

        log.debug(s"Reading '$filePath'")

        val result: Either[String, SoteriaConfiguration] =
          JsonDecode.parse[SoteriaConfiguration](content)(SoteriaConfiguration.parser(soteriaLog))

        test(result)
    }
  }
}

Source File: NotebookSimTest.scala From spark-bench with Apache License 2.0

5 votes

package com.ibm.sparktc.sparkbench

import com.ibm.sparktc.sparkbench.cli.CLIKickoff
import com.ibm.sparktc.sparkbench.testfixtures.BuildAndTeardownData
import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers}

import scala.io.Source

class NotebookSimTest extends FlatSpec with Matchers with BeforeAndAfterEach with Capturing {
  val dataMaker = new BuildAndTeardownData("notebook-sim-test")

  val giantData = s"${dataMaker.sparkBenchTestFolder}/giant-kmeans-data.parquet"
  val tinyData = s"${dataMaker.sparkBenchTestFolder}/tiny-kmeans-data.parquet"

  override def beforeEach(): Unit = {
    super.beforeEach()
    dataMaker.deleteFolders()
    dataMaker.createFolders()
    dataMaker.generateKMeansData(400000, 50, giantData)
    dataMaker.generateKMeansData(100, 5, tinyData)
  }

  override def afterEach(): Unit = {
    dataMaker.deleteFolders()
  }

  "Simulating two notebook users" should "work" in {
    val relativePath = "/etc/notebook-sim.conf"
    val resource = getClass.getResource(relativePath)
    val path = resource.getPath
    val text = Source.fromFile(path).mkString
    CLIKickoff.main(Array(text))
  }



}

Source File: ConfigFileTest.scala From spark-bench with Apache License 2.0

5 votes

package com.ibm.sparktc.sparkbench

import java.io.File

import com.ibm.sparktc.sparkbench.cli.CLIKickoff
import com.ibm.sparktc.sparkbench.testfixtures.BuildAndTeardownData
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}

import scala.io.Source

class ConfigFileTest extends FlatSpec with Matchers with BeforeAndAfterAll with Capturing {

  val dataShiznit = new BuildAndTeardownData("configfiletest")

  override def beforeAll(): Unit = {
    super.beforeAll()
    dataShiznit.deleteFolders()
    dataShiznit.createFolders()
  }

  override def afterAll(): Unit = {
    dataShiznit.deleteFolders()
  }

  val kmeansData = new File("/tmp/spark-bench-scalatest/configfiletest/kmeans-data.parquet")
  val output1 = new File("/tmp/spark-bench-scalatest/configfiletest/conf-file-output-1.csv")
  val output2 = new File("/tmp/spark-bench-scalatest/configfiletest/conf-file-output-2.parquet")

  "Spark-bench run through a config file serially" should "work" in {
    kmeansData.exists() shouldBe false

    val relativePath = "/etc/testConfFile1.conf"
    val resource = getClass.getResource(relativePath)
    val path = resource.getPath
    val text = Source.fromFile(path).mkString
    CLIKickoff.main(Array(text))

    kmeansData.exists() shouldBe true
    output1.exists() shouldBe true
    output2.exists() shouldBe true

    val fileList = output1.listFiles().toList.filter(_.getName.startsWith("part"))

    val fileContents: List[String] =
        Source.fromFile(fileList.head)
          .getLines()
          .toList


    val length: Int = fileContents.length

    (length > 0) shouldBe true
  }

  "Spark-bench run through a config file with the suites running in parallel" should "work" in {
    kmeansData.exists() shouldBe true
    val relativePath = "/etc/testConfFile2.conf"
    val resource = getClass.getResource(relativePath)
    val path = resource.getPath
    val text = Source.fromFile(path).mkString
    CLIKickoff.main(Array(text))
  }
}

Source File: OutputTest.scala From spark-bench with Apache License 2.0

5 votes

package com.ibm.sparktc.sparkbench

import com.ibm.sparktc.sparkbench.cli.CLIKickoff
import com.ibm.sparktc.sparkbench.testfixtures.BuildAndTeardownData
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}

import scala.io.Source

class OutputTest extends FlatSpec with Matchers with BeforeAndAfterAll with Capturing {
  val dataStuff = new BuildAndTeardownData("output-test")

  override def beforeAll(): Unit = {
    super.beforeAll()
    dataStuff.createFolders()
    dataStuff.generateKMeansData(1000, 5, dataStuff.kmeansFile) // scalastyle:ignore
  }

  override def afterAll(): Unit = {
    dataStuff.deleteFolders()
    super.afterAll()
  }

  "Specifying Console output" should "work" in {
    val relativePath = "/etc/testConfFile3.conf"
    val resource = getClass.getResource(relativePath)
    val path = resource.getPath
    val text = Source.fromFile(path).mkString

    val (out, _) = captureOutput(CLIKickoff.main(Array(text)))
    out should not be empty
    out.split("\n").length shouldBe 9
  }

  "Want to see configuration added to results when there's crazy stuff" should "work" in {
    val relativePath = "/etc/testConfFile4.conf"
    val resource = getClass.getResource(relativePath)
    val path = resource.getPath
    val text = Source.fromFile(path).mkString

    val (out, _) = captureOutput(CLIKickoff.main(Array(text)))
    out shouldBe empty
  }
}

Source File: TestHelper.scala From spark-summit-2018 with GNU General Public License v3.0

5 votes

package com.twilio.open.streaming.trend.discovery

import java.io.{ByteArrayInputStream, InputStream}

import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.google.protobuf.Message
import com.googlecode.protobuf.format.JsonFormat
import com.holdenkarau.spark.testing.{LocalSparkContext, SparkContextProvider}
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers, Suite}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.Seq
import scala.io.Source
import scala.reflect.ClassTag
import scala.reflect.classTag

object TestHelper {
  val log: Logger = LoggerFactory.getLogger("com.twilio.open.streaming.trend.discovery.TestHelper")
  val mapper: ObjectMapper = {
    val m = new ObjectMapper()
    m.registerModule(DefaultScalaModule)
  }

  val jsonFormat: JsonFormat = new JsonFormat

  def loadScenario[T<: Message : ClassTag](file: String): Seq[T] = {
    val fileString = Source.fromFile(file).mkString
    val parsed = mapper.readValue(fileString, classOf[Sceanario])
    parsed.input.map { data =>
      val json = mapper.writeValueAsString(data)
      convert[T](json)
    }
  }

  def convert[T<: Message : ClassTag](json: String): T = {
    val clazz = classTag[T].runtimeClass
    val builder = clazz.getMethod("newBuilder").invoke(clazz).asInstanceOf[Message.Builder]
    try {
      val input: InputStream = new ByteArrayInputStream(json.getBytes())
      jsonFormat.merge(input, builder)
      builder.build().asInstanceOf[T]
    } catch {
      case e: Exception =>
        throw e
    }
  }

}

@SerialVersionUID(1L)
case class KafkaDataFrame(key: Array[Byte], topic: Array[Byte], value: Array[Byte]) extends Serializable

case class Sceanario(input: Seq[Any], expected: Option[Any] = None)

trait SparkSqlTest extends BeforeAndAfterAll with SparkContextProvider {
  self: Suite =>

  @transient var _sparkSql: SparkSession = _
  @transient private var _sc: SparkContext = _

  override def sc: SparkContext = _sc

  def conf: SparkConf

  def sparkSql: SparkSession = _sparkSql

  override def beforeAll() {
    _sparkSql = SparkSession.builder().config(conf).getOrCreate()

    _sc = _sparkSql.sparkContext
    setup(_sc)
    super.beforeAll()
  }

  override def afterAll() {
    try {
      _sparkSql.close()
      _sparkSql = null
      LocalSparkContext.stop(_sc)
      _sc = null
    } finally {
      super.afterAll()
    }
  }

}

Source File: DeduplicateAndSortDictionaries.scala From open-korean-text with Apache License 2.0

5 votes

package org.openkoreantext.processor.tools

import java.io.FileOutputStream

import scala.io.Source


object DeduplicateAndSortDictionaries extends Runnable  {

  private[this] def readWords(filename: String): Set[String] = {
    Source.fromFile(filename)(io.Codec("UTF-8"))
        .getLines()
        .map(_.trim)
        .filter(_.length > 0)
        .toSet
  }

  private val RESOURCES_TO_CLEANUP = Seq(
    "noun/nouns.txt", "noun/entities.txt", "noun/spam.txt",
    "noun/names.txt", "noun/twitter.txt", "noun/lol.txt",
    "noun/slangs.txt", "noun/company_names.txt",
    "noun/foreign.txt", "noun/geolocations.txt", "noun/profane.txt",
    "noun/kpop.txt", "noun/bible.txt",
    "noun/wikipedia_title_nouns.txt", "noun/pokemon.txt", "noun/congress.txt",
    "noun/brand.txt", "noun/fashion.txt", "noun/neologism.txt",

    "substantives/modifier.txt", "substantives/suffix.txt",
    "substantives/family_names.txt", "substantives/given_names.txt",

    "adjective/adjective.txt", "adverb/adverb.txt",

    "auxiliary/determiner.txt", "auxiliary/exclamation.txt", "auxiliary/conjunctions.txt",

    "josa/josa.txt", "typos/typos.txt",

    "verb/eomi.txt", "verb/pre_eomi.txt", "verb/verb.txt", "verb/verb_prefix.txt"
  )

  def run {
    RESOURCES_TO_CLEANUP.foreach {
      f: String =>
        val outputFolder = "src/main/resources/org/openkoreantext/processor/util/"
        System.err.println("Processing %s.".format(f))
        val words = readWords(outputFolder + f).toList.sorted

        val out = new FileOutputStream(outputFolder + f)

        words.foreach {
          word: String => out.write((word + "\n").getBytes)
        }
        out.close()
    }
  }
}

Source File: BatchGetUnknownNouns.scala From open-korean-text with Apache License 2.0

5 votes

package org.openkoreantext.processor.qa

import java.util.logging.Logger

import org.openkoreantext.processor.OpenKoreanTextProcessor
import org.openkoreantext.processor.tokenizer.KoreanChunker._
import org.openkoreantext.processor.tokenizer.KoreanTokenizer._
import org.openkoreantext.processor.util.KoreanPos

import scala.io.Source


object BatchGetUnknownNouns {
  private val LOG = Logger.getLogger(getClass.getSimpleName)
  private val VERBOSE = true

  case class ChunkWithTweet(chunk: String, tweet: String)

  def main(args: Array[String]) {
    if (args.length != 1) {
      println("The first arg should be an input file path of Korean tweets.")
      return
    }
    val chunksWithUnknowns = Source.fromFile(args(0)).getLines().foldLeft(List[ChunkWithTweet]()) {
      case (l: List[ChunkWithTweet], line: String) if line.trim.length > 5 =>
        chunk(line).flatMap {
          case t: KoreanToken if t.pos == KoreanPos.Korean && tokenize(t.text).exists(_.unknown) =>
            Some(ChunkWithTweet(t.text, line.trim))
          case t: KoreanToken => None
        }.toList ::: l
      case (l: List[ChunkWithTweet], line: String) => l
    }.toSet

    chunksWithUnknowns.toSeq.sortBy(_.chunk).foreach {
      chunkWithTweet: ChunkWithTweet =>
        println(chunkWithTweet.tweet)
        println(OpenKoreanTextProcessor
            .tokenize(chunkWithTweet.tweet)
            .mkString(" "))

        println(chunkWithTweet.chunk + ": " +
            tokenize(chunkWithTweet.chunk).mkString(" "))
        println()
    }

  }
}

Source File: BatchTokenizeTweets.scala From open-korean-text with Apache License 2.0

5 votes

package org.openkoreantext.processor.qa

import java.util.logging.{Level, Logger}

import org.openkoreantext.processor.OpenKoreanTextProcessor
import org.openkoreantext.processor.tokenizer.KoreanTokenizer.KoreanToken
import org.openkoreantext.processor.util.KoreanPos

import scala.io.Source


object BatchTokenizeTweets {

  case class ParseTime(time: Long, chunk: String)

  private val LOG = Logger.getLogger(getClass.getSimpleName)
  private val VERBOSE = true
  private val NON_NOUNS = Set(KoreanPos.Adjective, KoreanPos.Adverb, KoreanPos.Verb)

  def main(args: Array[String]) {
    if (args.length != 1) {
      println("The first arg should be an input file of Korean tweets.")
      return
    }
    val parseTimesAll = Source.fromFile(args(0)).getLines().foldLeft(List[ParseTime]()) {
      case (l: List[ParseTime], line: String) =>
        val t0 = System.currentTimeMillis()
        val parsed = OpenKoreanTextProcessor.tokenize(line)
        val t1 = System.currentTimeMillis()

        if (VERBOSE) {
          println(parsed.map(t => t.text + "/" + t.pos).mkString(" "))
        }
        ParseTime(t1 - t0, line.trim) :: l
    }

    val loadingTime = parseTimesAll.last

    LOG.log(Level.INFO, "The first one \"%s\" took %d ms including the loading time.".format(loadingTime.chunk, loadingTime.time))

    val parseTimes = parseTimesAll.init

    val averageTweetLength = parseTimes.map(_.chunk.length).sum.toDouble / parseTimes.size

    val averageTime = parseTimes.map(_.time).sum.toDouble / parseTimes.size
    val maxItem = parseTimes.maxBy(_.time)

    LOG.log(Level.INFO, ("Parsed %d items. \n" +
        "       Total time: %d s \n" +
        "       Average tweet length: %.2f chars \n" +
        "       Average time per tweet: %.2f ms \n" +
        "       Max time: %d ms, %s\n" +
        "       Parsed: %s"
        ).format(
          parseTimes.size,
          parseTimes.map(_.time).sum / 1000,
          averageTweetLength,
          averageTime,
          maxItem.time,
          maxItem.chunk,
          OpenKoreanTextProcessor.tokenize(maxItem.chunk).map {
            case t if t.unknown => t.text.toString + t.pos + "*"
            case t => t.text + t.pos.toString
          }.mkString(" ")
        ))
  }

  private def parseToString(parsed: Seq[KoreanToken]): String = {
    parsed.map {
      case t if t.unknown => t.text.toString + t.pos + "*"
      case t => t.text + t.pos.toString
    }.mkString(" ")
  }
}

Source File: KafkaSourceOffsetSuite.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.sql.kafka010

import java.io.File

import org.apache.spark.sql.execution.streaming._
import org.apache.spark.sql.streaming.OffsetSuite
import org.apache.spark.sql.test.SharedSQLContext

class KafkaSourceOffsetSuite extends OffsetSuite with SharedSQLContext {

  compare(
    one = KafkaSourceOffset(("t", 0, 1L)),
    two = KafkaSourceOffset(("t", 0, 2L)))

  compare(
    one = KafkaSourceOffset(("t", 0, 1L), ("t", 1, 0L)),
    two = KafkaSourceOffset(("t", 0, 2L), ("t", 1, 1L)))

  compare(
    one = KafkaSourceOffset(("t", 0, 1L), ("T", 0, 0L)),
    two = KafkaSourceOffset(("t", 0, 2L), ("T", 0, 1L)))

  compare(
    one = KafkaSourceOffset(("t", 0, 1L)),
    two = KafkaSourceOffset(("t", 0, 2L), ("t", 1, 1L)))


  val kso1 = KafkaSourceOffset(("t", 0, 1L))
  val kso2 = KafkaSourceOffset(("t", 0, 2L), ("t", 1, 3L))
  val kso3 = KafkaSourceOffset(("t", 0, 2L), ("t", 1, 3L), ("t", 1, 4L))

  compare(KafkaSourceOffset(SerializedOffset(kso1.json)),
    KafkaSourceOffset(SerializedOffset(kso2.json)))

  test("basic serialization - deserialization") {
    assert(KafkaSourceOffset.getPartitionOffsets(kso1) ==
      KafkaSourceOffset.getPartitionOffsets(SerializedOffset(kso1.json)))
  }


  testWithUninterruptibleThread("OffsetSeqLog serialization - deserialization") {
    withTempDir { temp =>
      // use non-existent directory to test whether log make the dir
      val dir = new File(temp, "dir")
      val metadataLog = new OffsetSeqLog(spark, dir.getAbsolutePath)
      val batch0 = OffsetSeq.fill(kso1)
      val batch1 = OffsetSeq.fill(kso2, kso3)

      val batch0Serialized = OffsetSeq.fill(batch0.offsets.flatMap(_.map(o =>
        SerializedOffset(o.json))): _*)

      val batch1Serialized = OffsetSeq.fill(batch1.offsets.flatMap(_.map(o =>
        SerializedOffset(o.json))): _*)

      assert(metadataLog.add(0, batch0))
      assert(metadataLog.getLatest() === Some(0 -> batch0Serialized))
      assert(metadataLog.get(0) === Some(batch0Serialized))

      assert(metadataLog.add(1, batch1))
      assert(metadataLog.get(0) === Some(batch0Serialized))
      assert(metadataLog.get(1) === Some(batch1Serialized))
      assert(metadataLog.getLatest() === Some(1 -> batch1Serialized))
      assert(metadataLog.get(None, Some(1)) ===
        Array(0 -> batch0Serialized, 1 -> batch1Serialized))

      // Adding the same batch does nothing
      metadataLog.add(1, OffsetSeq.fill(LongOffset(3)))
      assert(metadataLog.get(0) === Some(batch0Serialized))
      assert(metadataLog.get(1) === Some(batch1Serialized))
      assert(metadataLog.getLatest() === Some(1 -> batch1Serialized))
      assert(metadataLog.get(None, Some(1)) ===
        Array(0 -> batch0Serialized, 1 -> batch1Serialized))
    }
  }

  test("read Spark 2.1.0 offset format") {
    val offset = readFromResource("kafka-source-offset-version-2.1.0.txt")
    assert(KafkaSourceOffset(offset) ===
      KafkaSourceOffset(("topic1", 0, 456L), ("topic1", 1, 789L), ("topic2", 0, 0L)))
  }

  private def readFromResource(file: String): SerializedOffset = {
    import scala.io.Source
    val input = getClass.getResource(s"/$file").toURI
    val str = Source.fromFile(input).mkString
    SerializedOffset(str)
  }
}

Source File: RawTextSender.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{ByteArrayOutputStream, IOException}
import java.net.ServerSocket
import java.nio.ByteBuffer

import scala.io.Source

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.IntParam


private[streaming]
object RawTextSender extends Logging {
  def main(args: Array[String]) {
    if (args.length != 4) {
      // scalastyle:off println
      System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
      // scalastyle:on println
      System.exit(1)
    }
    // Parse the arguments using a pattern match
    val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args

    // Repeat the input data multiple times to fill in a buffer
    val lines = Source.fromFile(file).getLines().toArray
    val bufferStream = new ByteArrayOutputStream(blockSize + 1000)
    val ser = new KryoSerializer(new SparkConf()).newInstance()
    val serStream = ser.serializeStream(bufferStream)
    var i = 0
    while (bufferStream.size < blockSize) {
      serStream.writeObject(lines(i))
      i = (i + 1) % lines.length
    }
    val array = bufferStream.toByteArray

    val countBuf = ByteBuffer.wrap(new Array[Byte](4))
    countBuf.putInt(array.length)
    countBuf.flip()

    val serverSocket = new ServerSocket(port)
    logInfo("Listening on port " + port)

    while (true) {
      val socket = serverSocket.accept()
      logInfo("Got a new connection")
      val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec)
      try {
        while (true) {
          out.write(countBuf.array)
          out.write(array)
        }
      } catch {
        case e: IOException =>
          logError("Client disconnected")
      } finally {
        socket.close()
      }
    }
  }
}

Source File: HDFSExecutorMetricsReplayListenerBus.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.io.InputStream

import scala.collection.immutable
import scala.collection.mutable.ListBuffer
import scala.io.Source
import scala.util.parsing.json._

import org.apache.spark.internal.Logging

private[spark] class HDFSExecutorMetricsReplayListenerBus extends SparkListenerBus with Logging {

  
  def replay(
              logDataList: ListBuffer[(InputStream, String)],
              sourceName: String,
              maybeTruncated: Boolean = false): Unit = {

    logDataList.foreach(logData => {
      try {
        for (line <- Source.fromInputStream(logData._1).getLines()) {
          val hashMapParsed = JSON.parseFull(line)
          val hashMap = {
            hashMapParsed match {
              case Some(m: Map[String, Any]) => m
              case _ => new immutable.HashMap[String, Any]
            }
          }
          val hdfsExecutorMetrics = new HDFSExecutorMetrics(
            hashMap("values").asInstanceOf[Map[String, Any]],
            hashMap("host").asInstanceOf[String],
            hashMap("timestamp").asInstanceOf[Double].toLong)
          postToAll(hdfsExecutorMetrics)
        }
      } catch {
        case ex: Exception =>
          ex.printStackTrace();
          logError(ex.toString)
          logWarning(s"Got JsonParseException from log file $logData")
      }
    })
  }
}

Source File: PythonBroadcastSuite.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.api.python

import java.io.{File, PrintWriter}

import scala.io.Source

import org.scalatest.Matchers

import org.apache.spark.{SharedSparkContext, SparkConf, SparkFunSuite}
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.Utils

// This test suite uses SharedSparkContext because we need a SparkEnv in order to deserialize
// a PythonBroadcast:
class PythonBroadcastSuite extends SparkFunSuite with Matchers with SharedSparkContext {
  test("PythonBroadcast can be serialized with Kryo (SPARK-4882)") {
    val tempDir = Utils.createTempDir()
    val broadcastedString = "Hello, world!"
    def assertBroadcastIsValid(broadcast: PythonBroadcast): Unit = {
      val source = Source.fromFile(broadcast.path)
      val contents = source.mkString
      source.close()
      contents should be (broadcastedString)
    }
    try {
      val broadcastDataFile: File = {
        val file = new File(tempDir, "broadcastData")
        val printWriter = new PrintWriter(file)
        printWriter.write(broadcastedString)
        printWriter.close()
        file
      }
      val broadcast = new PythonBroadcast(broadcastDataFile.getAbsolutePath)
      assertBroadcastIsValid(broadcast)
      val conf = new SparkConf().set("spark.kryo.registrationRequired", "true")
      val deserializedBroadcast =
        Utils.clone[PythonBroadcast](broadcast, new KryoSerializer(conf).newInstance())
      assertBroadcastIsValid(deserializedBroadcast)
    } finally {
      Utils.deleteRecursively(tempDir)
    }
  }
}

Source File: LogUrlsStandaloneSuite.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.deploy

import java.net.URL

import scala.collection.mutable
import scala.io.Source

import org.apache.spark.{LocalSparkContext, SparkContext, SparkFunSuite}
import org.apache.spark.scheduler.{SparkListener, SparkListenerExecutorAdded}
import org.apache.spark.scheduler.cluster.ExecutorInfo
import org.apache.spark.util.SparkConfWithEnv

class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {

  
  private val WAIT_TIMEOUT_MILLIS = 10000

  test("verify that correct log urls get propagated from workers") {
    sc = new SparkContext("local-cluster[2,1,1024]", "test")

    val listener = new SaveExecutorInfo
    sc.addSparkListener(listener)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      // Browse to each URL to check that it's valid
      info.logUrlMap.foreach { case (logType, logUrl) =>
        val html = Source.fromURL(logUrl).mkString
        assert(html.contains(s"$logType log page"))
      }
    }
  }

  test("verify that log urls reflect SPARK_PUBLIC_DNS (SPARK-6175)") {
    val SPARK_PUBLIC_DNS = "public_dns"
    val conf = new SparkConfWithEnv(Map("SPARK_PUBLIC_DNS" -> SPARK_PUBLIC_DNS)).set(
      "spark.extraListeners", classOf[SaveExecutorInfo].getName)
    sc = new SparkContext("local-cluster[2,1,1024]", "test", conf)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS)
    val listeners = sc.listenerBus.findListenersByClass[SaveExecutorInfo]
    assert(listeners.size === 1)
    val listener = listeners(0)
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      info.logUrlMap.values.foreach { logUrl =>
        assert(new URL(logUrl).getHost === SPARK_PUBLIC_DNS)
      }
    }
  }
}

private[spark] class SaveExecutorInfo extends SparkListener {
  val addedExecutorInfos = mutable.Map[String, ExecutorInfo]()

  override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
    addedExecutorInfos(executor.executorId) = executor.executorInfo
  }
}

Source File: TnTestHelper.scala From TopNotch with Apache License 2.0

5 votes

package com.bfm.topnotch

import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions._
import org.scalatest.Matchers
import scala.io.Source
import org.json4s._
import org.json4s.native.JsonMethods._

/**
 * This class handles some of the TopNotch reusable test code
 */
object TnTestHelper extends Matchers {
  val INDEX_COL_NAME = "__INDEX_COL__"
  /**
   * Read a file from the resources/src/test/scala/com/bfm/topnotch folder
   * @param fileName The path to the file relative to the path resources/src/test/scala/com/bfm/topnotch
   * @return The contents of the file as one string
   */
  def readResourceFileToJson[T](fileName: String, classType: Class[_]): JValue = {
    parse(Source.fromFile(classType.getResource(fileName).getFile).getLines().mkString("\n"))
  }

  /**
   * Attach an index to rows into a dataframe so we can track them throughout a series of operations
   * @param df The dataframe to index
   * @return A dataframe equal to df but with an index column
   */
  def attachIdx(df: DataFrame): DataFrame = df.withColumn(INDEX_COL_NAME, monotonicallyIncreasingId()).cache

  /**
   * Get a number greater than or equal to num that is divisible by denomiator
   */
  def numDivisibleBy(num: Int, denomiator: Int) = num / denomiator * denomiator

  /**
   * Grow a data frame to a desired size by duplicating rows.
   */
  def growDataFrame(initDF: DataFrame, newSize: Int): DataFrame = {
    val initCount = initDF.count
    if (initCount < 1) throw new IllegalArgumentException("initDF's size must be greater than 0")
    List.fill((newSize / initCount + 1).toInt)(initDF).reduce(_.unionAll(_)).limit(newSize)
  }

  /**
   * Compares two dataframes and ensures that they have the same schema (ignore nullable) and the same values
   * @param actualDF The DF we want to check for correctness
   * @param correctDF The correct DF we use for comparison
   * @param onlySchema only compare the schemas of the dataframes
   */
  def dfEquals(actualDF: DataFrame, correctDF: DataFrame, onlySchema: Boolean = false): Unit = {
    actualDF.schema.map(f => (f.name, f.dataType)).toSet shouldBe correctDF.schema.map(f => (f.name, f.dataType)).toSet
    if (!onlySchema) {
      actualDF.collect.map(_.toSeq.toSet).toSet shouldBe correctDF.collect.map(_.toSeq.toSet).toSet
    }
  }
}

Source File: exercise02.scala From scala-for-the-Impatient with MIT License

5 votes

import scala.collection.mutable
import scala.io.Source


def countWord() : Unit = {
  val wordMap = new mutable.HashMap[String,Int]
  val in = new java.util.Scanner(new java.io.File("exercise02.txt"))
  while(in.hasNext){
    val word = in.next()
    wordMap(word) = wordMap.getOrElse(word,0) + 1
  }
  println(wordMap.mkString(","))
}

def countWord2() : Unit = {
  val source = Source.fromFile("exercise02.txt").mkString
  val tokens = source.split("\\s+")
  val wordMap = new mutable.HashMap[String,Int]
  for(word <- tokens){
    wordMap(word) = wordMap.getOrElse(word,0) + 1
  }
  println(wordMap.mkString(","))
}

Source File: FileUtils.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.wmexchanger.utils

import java.io.BufferedInputStream
import java.io.BufferedOutputStream
import java.io.File
import java.io.FileInputStream
import java.io.FileOutputStream
import java.io.FilenameFilter
import java.io.ObjectInputStream
import java.io.ObjectOutputStream
import java.io.PrintWriter

import org.clulab.wm.wmexchanger.utils.Closer.AutoCloser

import scala.io.Source

object FileUtils {

  def appendingPrintWriterFromFile(file: File): PrintWriter = Sinker.printWriterFromFile(file, append = true)

  def appendingPrintWriterFromFile(path: String): PrintWriter = Sinker.printWriterFromFile(path, append = true)

  def printWriterFromFile(file: File): PrintWriter = Sinker.printWriterFromFile(file, append = false)

  def printWriterFromFile(path: String): PrintWriter = Sinker.printWriterFromFile(path, append = false)

  // Output
  def newBufferedOutputStream(file: File): BufferedOutputStream =
    new BufferedOutputStream(new FileOutputStream(file))

  def newBufferedOutputStream(filename: String): BufferedOutputStream =
    newBufferedOutputStream(new File(filename))

  def newAppendingBufferedOutputStream(file: File): BufferedOutputStream =
    new BufferedOutputStream(new FileOutputStream(file, true))

  def newAppendingBufferedOutputStream(filename: String): BufferedOutputStream =
    newAppendingBufferedOutputStream(new File(filename))

  def newObjectOutputStream(filename: String): ObjectOutputStream =
    new ObjectOutputStream(newBufferedOutputStream(filename))

  // Input
  def newBufferedInputStream(file: File): BufferedInputStream =
    new BufferedInputStream(new FileInputStream(file))

  def newBufferedInputStream(filename: String): BufferedInputStream =
    newBufferedInputStream(new File(filename))

  def newObjectInputStream(filename: String): ObjectInputStream =
    new ObjectInputStream(newBufferedInputStream(filename))

  def findFiles(collectionDir: String, extension: String): Seq[File] = {
    val dir = new File(collectionDir)
    val filter = new FilenameFilter {
      def accept(dir: File, name: String): Boolean = name.endsWith(extension)
    }

    val result = Option(dir.listFiles(filter))
        .getOrElse(throw Sourcer.newFileNotFoundException(collectionDir))
    result
  }

  protected def getTextFromSource(source: Source): String = source.mkString

  def getTextFromFile(file: File): String =
    Sourcer.sourceFromFile(file).autoClose { source =>
      getTextFromSource(source)
    }
}

Source File: Sourcer.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.wmexchanger.utils

import java.io.{File, FileNotFoundException}
import java.nio.charset.StandardCharsets

import org.slf4j.{Logger, LoggerFactory}

import scala.io.BufferedSource
import scala.io.Source

object Sourcer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val utf8: String = StandardCharsets.UTF_8.toString
  
  def sourceFromResource(path: String): BufferedSource = {
    val url = Option(Sourcer.getClass.getResource(path))
        .getOrElse(throw newFileNotFoundException(path))

    logger.info("Sourcing resource " + url.getPath)
    Source.fromURL(url, utf8)
  }
  
  def sourceFromFile(file: File): BufferedSource = {
    logger.info("Sourcing file " + file.getPath)
    Source.fromFile(file, utf8)
  }

  def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path))

  def newFileNotFoundException(path: String): FileNotFoundException = {
    val message1 = path + " (The system cannot find the path specified"
    val message2 = message1 + (if (path.startsWith("~")) ".  Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "")
    val message3 = message2 + ")"

    new FileNotFoundException(message3)
  }
}

Source File: Sourcer.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.eidos.utils

import java.io.{File, FileNotFoundException}
import java.nio.charset.StandardCharsets

import org.slf4j.{Logger, LoggerFactory}

import scala.io.BufferedSource
import scala.io.Source

object Sourcer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val utf8: String = StandardCharsets.UTF_8.toString
  
  def sourceFromResource(path: String): BufferedSource = {
    val url = Option(Sourcer.getClass.getResource(path))
        .getOrElse(throw newFileNotFoundException(path))

    logger.info("Sourcing resource " + url.getPath)
    Source.fromURL(url, utf8)
  }
  
  def sourceFromFile(file: File): BufferedSource = {
    logger.info("Sourcing file " + file.getPath)
    Source.fromFile(file, utf8)
  }

  def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path))

  def newFileNotFoundException(path: String): FileNotFoundException = {
    val message1 = path + " (The system cannot find the path specified"
    val message2 = message1 + (if (path.startsWith("~")) ".  Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "")
    val message3 = message2 + ")"

    new FileNotFoundException(message3)
  }
}

Source File: TestResources.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.eidos.system

import java.io.File

import org.clulab.wm.eidos.test.TestUtils._
import org.clulab.wm.eidos.utils.Closer.AutoCloser
import org.clulab.wm.eidos.utils.Sourcer

import scala.io.Source

class TestResources extends Test {
  
  behavior of "resources"

  def test(file: File): Unit = {
    val path = file.getCanonicalPath()

    it should "not have any Unicode characters in " + path in {
      val count = Sourcer.sourceFromFile(file).autoClose { source =>
        source.getLines().zipWithIndex.foldRight(0) { (lineAndLineNo, sum) =>
          val line = lineAndLineNo._1
          val lineNo = lineAndLineNo._2
          val badCharAndIndex = line.zipWithIndex.filter { case (c: Char, index: Int) =>
            (c < 32 || 127 < c) && c != '\r' && c != '\n' && c != '\t'
          }
          val complaints = badCharAndIndex.map { case (c: Char, index: Int) =>
            "'" + c + "' found at index " + index + "."
          }

          complaints.foreach(complaint => println("Line " + (lineNo + 1) + ": " + complaint))
          sum + complaints.size
        }
      }
      count should be (0)
    }
  }
  
  // https://groups.google.com/forum/#!topic/scala-user/WrmYHHzcJPw  
  type Operation = (File) => Unit

  val wantedSuffixes = Seq(".conf", ".yml", ".tsv", ".kb", ".txt")
  val unwantedSuffixes = Seq("300d.txt", "vectors.txt", "_2016.txt", "/portuguese/grammars/triggers.yml",
                              "word2idx_file.txt")

  def fileMatches(file: File): Boolean = {
    val canonicalPath = file.getCanonicalPath().replace('\\', '/')

    wantedSuffixes.exists(suffix => canonicalPath.endsWith(suffix)) &&
    !unwantedSuffixes.exists(suffix => canonicalPath.endsWith(suffix))
  }

  def directoryMatches(file: File): Boolean = true
  
  def doOperation(path: String)(operation: Operation): Unit = {
    for (files <- Option(new File(path).listFiles); file <- files) {
        if (file.isFile() && fileMatches(file) && file.getAbsolutePath.contains("english"))
          operation(file)
        if (file.isDirectory && directoryMatches(file))
          doOperation(file.getAbsolutePath)(operation)
    }
  }
  
  doOperation(new File("./src/main/resources").getCanonicalPath())(test)
}

Source File: Sourcer.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.elasticsearch.utils

import java.io.{File, FileNotFoundException}
import java.nio.charset.StandardCharsets

import org.slf4j.{Logger, LoggerFactory}

import scala.io.BufferedSource
import scala.io.Source

object Sourcer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val utf8: String = StandardCharsets.UTF_8.toString
  
  def sourceFromResource(path: String): BufferedSource = {
    val url = Option(Sourcer.getClass.getResource(path))
        .getOrElse(throw newFileNotFoundException(path))

    logger.info("Sourcing resource " + url.getPath)
    Source.fromURL(url, utf8)
  }
  
  def sourceFromFile(file: File): BufferedSource = {
    logger.info("Sourcing file " + file.getPath)
    Source.fromFile(file, utf8)
  }

  def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path))

  def newFileNotFoundException(path: String): FileNotFoundException = {
    val message1 = path + " (The system cannot find the path specified"
    val message2 = message1 + (if (path.startsWith("~")) ".  Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "")
    val message3 = message2 + ")"

    new FileNotFoundException(message3)
  }
}

Source File: AssetsHelper.scala From scuruto with MIT License

5 votes

package lib

import skinny.SkinnyEnv

import scala.io.Source

object AssetsHelper {
  val fileName = "version.txt"

  val hash = {
    if (SkinnyEnv.isDevelopment() || SkinnyEnv.isTest()) {
      val basePath = "src/main/webapp/assets/dist"
      Source.fromFile(s"${basePath}/${fileName}").mkString
    } else {
      val basePath = "assets/dist"
      Source.fromInputStream(getClass.getClassLoader.getResourceAsStream(s"${basePath}/${fileName}")).mkString
    }
  }
}

Source File: ReplayListenerBus.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.io.{InputStream, IOException}

import scala.io.Source

import org.json4s.jackson.JsonMethods._

import org.apache.spark.Logging
import org.apache.spark.util.JsonProtocol


  def replay(logData: InputStream, sourceName: String): Unit = {
    var currentLine: String = null
    var lineNumber: Int = 1
    try {
      val lines = Source.fromInputStream(logData).getLines()
      lines.foreach { line =>
        currentLine = line
        postToAll(JsonProtocol.sparkEventFromJson(parse(line)))
        lineNumber += 1
      }
    } catch {
      case ioe: IOException =>
        throw ioe
      case e: Exception =>
        logError(s"Exception parsing Spark event log: $sourceName", e)
        logError(s"Malformed line #$lineNumber: $currentLine\n")
    }
  }

}

Source File: PythonBroadcastSuite.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.api.python

import scala.io.Source

import java.io.{PrintWriter, File}

import org.scalatest.{Matchers, FunSuite}

import org.apache.spark.{SharedSparkContext, SparkConf}
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.Utils

// This test suite uses SharedSparkContext because we need a SparkEnv in order to deserialize
// a PythonBroadcast:
class PythonBroadcastSuite extends FunSuite with Matchers with SharedSparkContext {
  test("PythonBroadcast can be serialized with Kryo (SPARK-4882)") {
    val tempDir = Utils.createTempDir()
    val broadcastedString = "Hello, world!"
    def assertBroadcastIsValid(broadcast: PythonBroadcast): Unit = {
      val source = Source.fromFile(broadcast.path)
      val contents = source.mkString
      source.close()
      contents should be (broadcastedString)
    }
    try {
      val broadcastDataFile: File = {
        val file = new File(tempDir, "broadcastData")
        val printWriter = new PrintWriter(file)
        printWriter.write(broadcastedString)
        printWriter.close()
        file
      }
      val broadcast = new PythonBroadcast(broadcastDataFile.getAbsolutePath)
      assertBroadcastIsValid(broadcast)
      val conf = new SparkConf().set("spark.kryo.registrationRequired", "true")
      val deserializedBroadcast =
        Utils.clone[PythonBroadcast](broadcast, new KryoSerializer(conf).newInstance())
      assertBroadcastIsValid(deserializedBroadcast)
    } finally {
      Utils.deleteRecursively(tempDir)
    }
  }
}

Source File: LogUrlsStandaloneSuite.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.deploy

import java.net.URL

import scala.collection.mutable
import scala.io.Source

import org.scalatest.FunSuite

import org.apache.spark.scheduler.cluster.ExecutorInfo
import org.apache.spark.scheduler.{SparkListenerExecutorAdded, SparkListener}
import org.apache.spark.{SparkConf, SparkContext, LocalSparkContext}

class LogUrlsStandaloneSuite extends FunSuite with LocalSparkContext {

  
  private val WAIT_TIMEOUT_MILLIS = 10000

  test("verify that correct log urls get propagated from workers") {
    sc = new SparkContext("local-cluster[2,1,512]", "test")

    val listener = new SaveExecutorInfo
    sc.addSparkListener(listener)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    assert(sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS))
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      // Browse to each URL to check that it's valid
      info.logUrlMap.foreach { case (logType, logUrl) =>
        val html = Source.fromURL(logUrl).mkString
        assert(html.contains(s"$logType log page"))
      }
    }
  }

  test("verify that log urls reflect SPARK_PUBLIC_DNS (SPARK-6175)") {
    val SPARK_PUBLIC_DNS = "public_dns"
    class MySparkConf extends SparkConf(false) {
      override def getenv(name: String) = {
        if (name == "SPARK_PUBLIC_DNS") SPARK_PUBLIC_DNS
        else super.getenv(name)
      }

      override def clone: SparkConf = {
        new MySparkConf().setAll(getAll)
      }
    }
    val conf = new MySparkConf()
    sc = new SparkContext("local-cluster[2,1,512]", "test", conf)

    val listener = new SaveExecutorInfo
    sc.addSparkListener(listener)

    // Trigger a job so that executors get added
    sc.parallelize(1 to 100, 4).map(_.toString).count()

    assert(sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS))
    listener.addedExecutorInfos.values.foreach { info =>
      assert(info.logUrlMap.nonEmpty)
      info.logUrlMap.values.foreach { logUrl =>
        assert(new URL(logUrl).getHost === SPARK_PUBLIC_DNS)
      }
    }
  }

  private class SaveExecutorInfo extends SparkListener {
    val addedExecutorInfos = mutable.Map[String, ExecutorInfo]()

    override def onExecutorAdded(executor: SparkListenerExecutorAdded) {
      addedExecutorInfos(executor.executorId) = executor.executorInfo
    }
  }
}

Source File: UISuite.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.ui

import java.net.ServerSocket

import scala.io.Source
import scala.util.{Failure, Success, Try}

import org.eclipse.jetty.servlet.ServletContextHandler
import org.scalatest.FunSuite
import org.scalatest.concurrent.Eventually._
import org.scalatest.time.SpanSugar._

import org.apache.spark.LocalSparkContext._
import org.apache.spark.{SparkConf, SparkContext}

class UISuite extends FunSuite {

  
  private def newSparkContext(): SparkContext = {
    val conf = new SparkConf()
      .setMaster("local")
      .setAppName("test")
      .set("spark.ui.enabled", "true")
    val sc = new SparkContext(conf)
    assert(sc.ui.isDefined)
    sc
  }

  ignore("basic ui visibility") {
    withSpark(newSparkContext()) { sc =>
      // test if the ui is visible, and all the expected tabs are visible
      eventually(timeout(10 seconds), interval(50 milliseconds)) {
        val html = Source.fromURL(sc.ui.get.appUIAddress).mkString
        assert(!html.contains("random data that should not be present"))
        assert(html.toLowerCase.contains("stages"))
        assert(html.toLowerCase.contains("storage"))
        assert(html.toLowerCase.contains("environment"))
        assert(html.toLowerCase.contains("executors"))
      }
    }
  }

  ignore("visibility at localhost:4040") {
    withSpark(newSparkContext()) { sc =>
      // test if visible from http://localhost:4040
      eventually(timeout(10 seconds), interval(50 milliseconds)) {
        val html = Source.fromURL("http://localhost:4040").mkString
        assert(html.toLowerCase.contains("stages"))
      }
    }
  }

  test("jetty selects different port under contention") {
    val server = new ServerSocket(0)
    val startPort = server.getLocalPort
    val serverInfo1 = JettyUtils.startJettyServer(
      "0.0.0.0", startPort, Seq[ServletContextHandler](), new SparkConf)
    val serverInfo2 = JettyUtils.startJettyServer(
      "0.0.0.0", startPort, Seq[ServletContextHandler](), new SparkConf)
    // Allow some wiggle room in case ports on the machine are under contention
    val boundPort1 = serverInfo1.boundPort
    val boundPort2 = serverInfo2.boundPort
    assert(boundPort1 != startPort)
    assert(boundPort2 != startPort)
    assert(boundPort1 != boundPort2)
    serverInfo1.server.stop()
    serverInfo2.server.stop()
    server.close()
  }

  test("jetty binds to port 0 correctly") {
    val serverInfo = JettyUtils.startJettyServer(
      "0.0.0.0", 0, Seq[ServletContextHandler](), new SparkConf)
    val server = serverInfo.server
    val boundPort = serverInfo.boundPort
    assert(server.getState === "STARTED")
    assert(boundPort != 0)
    Try { new ServerSocket(boundPort) } match {
      case Success(s) => fail("Port %s doesn't seem used by jetty server".format(boundPort))
      case Failure(e) =>
    }
  }

  test("verify appUIAddress contains the scheme") {
    withSpark(newSparkContext()) { sc =>
      val ui = sc.ui.get
      val uiAddress = ui.appUIAddress
      val uiHostPort = ui.appUIHostPort
      assert(uiAddress.equals("http://" + uiHostPort))
    }
  }

  test("verify appUIAddress contains the port") {
    withSpark(newSparkContext()) { sc =>
      val ui = sc.ui.get
      val splitUIAddress = ui.appUIAddress.split(':')
      val boundPort = ui.boundPort
      assert(splitUIAddress(2).toInt == boundPort)
    }
  }
}

Source File: FileUtil.scala From sangria with Apache License 2.0

5 votes

package sangria.util

import java.io.File

import io.github.classgraph.ClassGraph
import sangria.parser.QueryParser
import sangria.parser.DeliveryScheme.Throw
import spray.json._

import scala.io.Source
import net.jcazevedo.moultingyaml._

import scala.collection.JavaConverters._


object FileUtil extends StringMatchers {
  def loadQuery(name: String) =
    loadResource("queries/" + name)

  def loadYaml(name: String, root: String = "scenarios") =
    loadResource(root + "/" + name).parseYaml

  def loadScenarios(path: String, root: String = "scenarios") = this.synchronized {
    val yamlResources = new ClassGraph()
      .whitelistPackages(root + "." + path)
      .scan()
      .getResourcesWithExtension("yaml")
      .asScala
      .groupBy(_.getPath).mapValues(_.head) // deduplicate (`ClassGraph` gives duplicates for some reason)
      .values
      .toVector

    yamlResources.map { resource =>
      val name = resource.getPath.substring(resource.getPath.lastIndexOf("/") + 1)
      val relativePath = resource.getPathRelativeToClasspathElement
      val stream = this.getClass.getResourceAsStream("/" + relativePath)
      val contents = Source.fromInputStream(stream, "UTF-8").mkString.parseYaml

      ScenarioFile(name, relativePath, contents)
    }
  }

  def loadSchema(path: String) =
    QueryParser.parse(loadResource(path))

  def loadTestData(path: String): Either[YamlValue, JsValue] = {
    val text = loadResource(path)

    if (path endsWith ".yaml") Left(text.parseYaml)
    else if (path endsWith ".json") Right(text.parseJson)
    else throw new IllegalArgumentException(s"Unsupported file format for test data '$path'. Only `*.json` and `*.yaml` files are supported.")
  }

  def loadResource(path: String) =
    Option(this.getClass.getResourceAsStream("/" + path)) match {
      case Some(res) => stripCarriageReturns(Source.fromInputStream(res, "UTF-8").mkString)
      case None => throw new IllegalArgumentException("Resource not found: /" + path)
    }

  case class ScenarioFile(fileName: String, path: String, scenario: YamlValue) {
    def folder = path.substring(0, path.lastIndexOf("/"))
  }
}

Source File: GameResources.scala From wowchat with GNU General Public License v3.0

5 votes

package wowchat.game

import wowchat.common.{WowChatConfig, WowExpansion}

import scala.io.Source

object GameResources {

  lazy val AREA: Map[Int, String] = readIDNameFile(WowChatConfig.getExpansion match {
    case WowExpansion.Vanilla | WowExpansion.TBC | WowExpansion.WotLK => "pre_cata_areas.csv"
    case _ => "post_cata_areas.csv"
  })

  lazy val ACHIEVEMENT: Map[Int, String] = readIDNameFile("achievements.csv")

  private def readIDNameFile(file: String) = {
    Source
      .fromResource(file)
      .getLines
      .map(str => {
        val splt = str.split(",", 2)
        splt(0).toInt -> splt(1)
      })
      .toMap
  }
}

Source File: WoWChat.scala From wowchat with GNU General Public License v3.0

5 votes

package wowchat

import java.util.concurrent.{Executors, TimeUnit}

import wowchat.common.{CommonConnectionCallback, Global, ReconnectDelay, WowChatConfig}
import wowchat.discord.Discord
import wowchat.game.GameConnector
import wowchat.realm.{RealmConnectionCallback, RealmConnector}
import com.typesafe.scalalogging.StrictLogging
import io.netty.channel.nio.NioEventLoopGroup

import scala.io.Source

object WoWChat extends StrictLogging {

  private val RELEASE = "v1.3.3"

  def main(args: Array[String]): Unit = {
    logger.info(s"Running WoWChat - $RELEASE")
    val confFile = if (args.nonEmpty) {
      args(0)
    } else {
      logger.info("No configuration file supplied. Trying with default wowchat.conf.")
      "wowchat.conf"
    }
    Global.config = WowChatConfig(confFile)

    checkForNewVersion

    val gameConnectionController: CommonConnectionCallback = new CommonConnectionCallback {

      private val reconnectExecutor = Executors.newSingleThreadScheduledExecutor
      private val reconnectDelay = new ReconnectDelay

      override def connect: Unit = {
        Global.group = new NioEventLoopGroup

        val realmConnector = new RealmConnector(new RealmConnectionCallback {
          override def success(host: String, port: Int, realmName: String, realmId: Int, sessionKey: Array[Byte]): Unit = {
            gameConnect(host, port, realmName, realmId, sessionKey)
          }

          override def disconnected: Unit = doReconnect

          override def error: Unit = sys.exit(1)
        })

        realmConnector.connect
      }

      private def gameConnect(host: String, port: Int, realmName: String, realmId: Int, sessionKey: Array[Byte]): Unit = {
        new GameConnector(host, port, realmName, realmId, sessionKey, this).connect
      }

      override def connected: Unit = reconnectDelay.reset

      override def disconnected: Unit = doReconnect

      def doReconnect: Unit = {
        Global.group.shutdownGracefully()
        Global.discord.changeRealmStatus("Connecting...")
        val delay = reconnectDelay.getNext
        logger.info(s"Disconnected from server! Reconnecting in $delay seconds...")

        reconnectExecutor.schedule(new Runnable {
          override def run(): Unit = connect
        }, delay, TimeUnit.SECONDS)
      }
    }

    logger.info("Connecting to Discord...")
    Global.discord = new Discord(new CommonConnectionCallback {
      override def connected: Unit = gameConnectionController.connect

      override def error: Unit = sys.exit(1)
    })
  }

  private def checkForNewVersion = {
    // This is JSON, but I really just didn't want to import a full blown JSON library for one string.
    val data = Source.fromURL("https://api.github.com/repos/fjaros/wowchat/releases/latest").mkString
    val regex = "\"tag_name\":\"(.+?)\",".r
    val repoTagName = regex
      .findFirstMatchIn(data)
      .map(_.group(1))
      .getOrElse("NOT FOUND")

    if (repoTagName != RELEASE) {
      logger.error( "~~~ !!!                YOUR WoWChat VERSION IS OUT OF DATE                !!! ~~~")
      logger.error(s"~~~ !!!                     Current Version:  $RELEASE                      !!! ~~~")
      logger.error(s"~~~ !!!                     Repo    Version:  $repoTagName                      !!! ~~~")
      logger.error( "~~~ !!! RUN git pull OR GO TO https://github.com/fjaros/wowchat TO UPDATE !!! ~~~")
      logger.error( "~~~ !!!                YOUR WoWChat VERSION IS OUT OF DATE                !!! ~~~")
    }
  }
}

Source File: Packet.scala From wowchat with GNU General Public License v3.0

5 votes

package wowchat.common

import io.netty.buffer.{ByteBuf, ByteBufAllocator, EmptyByteBuf}

import scala.collection.mutable.ArrayBuffer
import scala.io.Source

case class Packet(
  id: Int,
  byteBuf: ByteBuf = new EmptyByteBuf(ByteBufAllocator.DEFAULT)
) {

  def readString: String = {
    import scala.util.control.Breaks._

    val ret = ArrayBuffer.newBuilder[Byte]
    breakable {
      while (byteBuf.readableBytes > 0) {
        val value = byteBuf.readByte
        if (value == 0) {
          break
        }
        ret += value
      }
    }

    Source.fromBytes(ret.result.toArray, "UTF-8").mkString
  }

  def skipString: Packet = {
    while (byteBuf.readableBytes > 0 && byteBuf.readByte != 0) {}
    this
  }

  // bit manipulation for cata+
  private var bitPosition = 7
  private var byte: Byte = 0

  def resetBitReader: Unit = {
    bitPosition = 7
    byte = 0
  }

  def readBit: Byte = {
    bitPosition += 1
    if (bitPosition > 7) {
      bitPosition = 0
      byte = byteBuf.readByte
    }

    (byte >> (7 - bitPosition) & 1).toByte
  }

  def readBits(length: Int): Int = {
    (length - 1 to 0 by -1).foldLeft(0) {
      case (result, i) => result | (readBit << i)
    }
  }

  def readBitSeq(mask: Array[Byte], indices: Int*): Unit = {
    indices.foreach(i => {
      mask(i) = readBit
    })
  }

  def readXorByte(mask: Byte): Byte = {
    if (mask != 0) {
      (mask ^ byteBuf.readByte).toByte
    } else {
      mask
    }
  }

  def readXorByteSeq(mask: Array[Byte], indices: Int*): Unit = {
    indices.foreach(i => {
      mask(i) = readXorByte(mask(i))
    })
  }
}

Source File: JsonFormatSpec.scala From akka-management with Apache License 2.0

5 votes

package akka.discovery.kubernetes

import org.scalatest.{ Matchers, WordSpec }
import spray.json._
import scala.io.Source

import PodList._

class JsonFormatSpec extends WordSpec with Matchers {
  "JsonFormat" should {
    val data = resourceAsString("pods.json")

    "work" in {
      JsonFormat.podListFormat.read(data.parseJson) shouldBe PodList(
        List(
          Pod(
            Some(PodSpec(List(Container(
              "akka-cluster-tooling-example",
              Some(List(
                ContainerPort(Some("akka-remote"), 10000),
                ContainerPort(Some("management"), 10001),
                ContainerPort(Some("http"), 10002)))
            )))),
            Some(PodStatus(Some("172.17.0.4"), Some("Running"))),
            Some(Metadata(deletionTimestamp = None))
          ),
          Pod(
            Some(PodSpec(List(Container(
              "akka-cluster-tooling-example",
              Some(List(
                ContainerPort(Some("akka-remote"), 10000),
                ContainerPort(Some("management"), 10001),
                ContainerPort(Some("http"), 10002)))
            )))),
            Some(PodStatus(Some("172.17.0.6"), Some("Running"))),
            Some(Metadata(deletionTimestamp = None))
          ),
          Pod(
            Some(PodSpec(List(Container(
              "akka-cluster-tooling-example",
              Some(List(
                ContainerPort(Some("akka-remote"), 10000),
                ContainerPort(Some("management"), 10001),
                ContainerPort(Some("http"), 10002)))
            )))),
            Some(PodStatus(Some("172.17.0.7"), Some("Running"))),
            Some(Metadata(deletionTimestamp = Some("2017-12-06T16:30:22Z")))
          ),
          Pod(
            Some(PodSpec(
              List(Container("akka-cluster-tooling-example", Some(List(ContainerPort(Some("management"), 10001))))))),
            Some(PodStatus(Some("172.17.0.47"), Some("Succeeded"))),
            Some(Metadata(deletionTimestamp = None))
          )
        ))
    }
  }

  private def resourceAsString(name: String): String =
    Source.fromInputStream(getClass.getClassLoader.getResourceAsStream(name)).mkString
}

Source File: MarathonApiServiceDiscoverySpec.scala From akka-management with Apache License 2.0

5 votes

package akka.discovery.marathon

import java.net.InetAddress

import akka.discovery.ServiceDiscovery.ResolvedTarget
import org.scalatest.{ Matchers, WordSpec }
import spray.json._
import scala.io.Source

class MarathonApiServiceDiscoverySpec extends WordSpec with Matchers {
  "targets" should {
    "calculate the correct list of resolved targets" in {
      val data = resourceAsString("apps.json")

      val appList = JsonFormat.appListFormat.read(data.parseJson)

      MarathonApiServiceDiscovery.targets(appList, "management") shouldBe List(
        ResolvedTarget(
          host = "192.168.65.60",
          port = Some(23236),
          address = Option(InetAddress.getByName("192.168.65.60"))),
        ResolvedTarget(
          host = "192.168.65.111",
          port = Some(6850),
          address = Option(InetAddress.getByName("192.168.65.111")))
      )
    }
    "calculate the correct list of resolved targets for docker" in {
      val data = resourceAsString("docker-app.json")

      val appList = JsonFormat.appListFormat.read(data.parseJson)

      MarathonApiServiceDiscovery.targets(appList, "akkamgmthttp") shouldBe List(
        ResolvedTarget(
          host = "10.121.48.204",
          port = Some(29480),
          address = Option(InetAddress.getByName("10.121.48.204"))),
        ResolvedTarget(
          host = "10.121.48.204",
          port = Some(10136),
          address = Option(InetAddress.getByName("10.121.48.204")))
      )
    }
  }

  private def resourceAsString(name: String): String =
    Source.fromInputStream(getClass.getClassLoader.getResourceAsStream(name)).mkString
}

Source File: ScriptManagerTest.scala From codepropertygraph with Apache License 2.0

5 votes

package io.shiftleft.console.scripting

import better.files.File
import cats.effect.IO
import org.scalatest.{Inside, Matchers, WordSpec}

import io.shiftleft.codepropertygraph.Cpg
import io.shiftleft.console.scripting.ScriptManager.{ScriptCollections, ScriptDescription, ScriptDescriptions}

import java.nio.file.{FileSystemNotFoundException, NoSuchFileException, Path}

import scala.io.Source
import scala.util.Try

class ScriptManagerTest extends WordSpec with Matchers with Inside {

  private object TestScriptExecutor extends AmmoniteExecutor {
    override protected def predef: String = ""
    override def runScript(scriptPath: Path, parameters: Map[String, String], cpg: Cpg): IO[Any] = IO.fromTry(
      Try {
        val source = Source.fromFile(scriptPath.toFile)
        val result = source.getLines.mkString(System.lineSeparator())
        source.close()
        result
      }
    )
  }

  private object TestScriptManager extends ScriptManager(TestScriptExecutor)

  protected val DEFAULT_CPG_NAME: String = {
    if (File(".").name == "console") {
      (File("..") / "resources" / "testcode" / "cpgs" / "method" / "cpg.bin.zip").pathAsString
    } else {
      (File("resources") / "testcode" / "cpgs" / "method" / "cpg.bin.zip").pathAsString
    }
  }

  def withScriptManager(f: ScriptManager => Unit): Unit = {
    f(TestScriptManager)
  }

  "listing scripts" should {
    "be correct" in withScriptManager { scriptManager =>
      val scripts = scriptManager.scripts()
      val expected = List(
        ScriptCollections("general",
                          ScriptDescriptions(
                            "A collection of general purpose scripts.",
                            List(ScriptDescription("list-funcs.sc", "Lists all functions."))
                          )),
        ScriptCollections("java",
                          ScriptDescriptions(
                            "A collection of java-specific scripts.",
                            List(ScriptDescription("list-sl-ns.sc", "Lists all shiftleft namespaces."))
                          )),
        ScriptCollections("general/general_plus",
                          ScriptDescriptions(
                            "Even more general purpose scripts.",
                            List.empty
                          ))
      )

      scripts should contain theSameElementsAs expected
    }
  }

  "running scripts" should {
    "be correct when explicitly specifying a CPG" in withScriptManager { scriptManager =>
      val expected =
        """|@main def main() = {
           |  cpg.method.name.l
           |}""".stripMargin

      scriptManager.runScript("general/list-funcs.sc", Map.empty, Cpg.emptyCpg) shouldBe expected
    }

    "be correct when specifying a CPG filename" in withScriptManager { scriptManager =>
      val expected =
        """|@main def main() = {
           |  cpg.method.name.l
           |}""".stripMargin

      scriptManager.runScript("general/list-funcs.sc", Map.empty, DEFAULT_CPG_NAME) shouldBe expected
    }

    "throw an exception if the specified CPG can not be found" in withScriptManager { scriptManager =>
      intercept[FileSystemNotFoundException] {
        scriptManager.runScript("general/list-funcs.sc", Map.empty, "cake.bin.zip")
      }
    }

    "throw an exception if the specified script can not be found" in withScriptManager { scriptManager =>
      intercept[NoSuchFileException] {
        scriptManager.runScript("list-funcs.sc", Map.empty, Cpg.emptyCpg)
      }
    }
  }

}

Source File: SemanticsLoader.scala From codepropertygraph with Apache License 2.0

5 votes

package io.shiftleft.dataflowengineoss.semanticsloader

import org.apache.logging.log4j.LogManager

import scala.io.Source

case class Semantic(methodFullName: String, parameterIndex: Int)
case class Semantics(elements: List[Semantic])

object SemanticsLoader {
  def emptySemantics: Semantics = {
    Semantics(Nil)
  }
}

class SemanticsLoader(filename: String) {
  private val logger = LogManager.getLogger(getClass)

  def load(): Semantics = {
    val bufferedReader = Source.fromFile(filename)
    var lineNumber = 0

    try {
      val semanticElements =
        bufferedReader
          .getLines()
          .flatMap { line =>
            val parts = line.split(",")

            if (parts.size == 2) {
              try {
                val methodFullName = parts(0).trim
                val parameterIndex = parts(1).trim.toInt
                lineNumber += 1
                Some(Semantic(methodFullName, parameterIndex))
              } catch {
                case _: NumberFormatException =>
                  logFormatError("Argument index is not convertable to Int.", lineNumber)
                  None
              }

            } else {
              logFormatError("Invalid number of elements per line. Expected method name followed by argument index.",
                             lineNumber)
              None
            }
          }
          .toList

      Semantics(semanticElements)
    } finally {
      bufferedReader.close()
    }

  }

  private def logFormatError(msg: String, lineNumber: Int): Unit = {
    logger.warn(s"$msg In $filename on line $lineNumber")
  }

}

Source File: MLLib.scala From spark-sql-perf with Apache License 2.0

5 votes

package com.databricks.spark.sql.perf.mllib


import scala.io.Source
import scala.language.implicitConversions

import org.slf4j.LoggerFactory

import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.{SparkConf, SparkContext}

import com.databricks.spark.sql.perf._


class MLLib(sqlContext: SQLContext)
  extends Benchmark(sqlContext) with Serializable {

  def this() = this(SQLContext.getOrCreate(SparkContext.getOrCreate()))
}

object MLLib {

  
  def run(yamlFile: String = null, yamlConfig: String = null): DataFrame = {
    logger.info("Starting run")
    val conf = getConf(yamlFile, yamlConfig)
    val sparkConf = new SparkConf().setAppName("MLlib QA").setMaster("local[2]")
    val sc = SparkContext.getOrCreate(sparkConf)
    sc.setLogLevel("INFO")
    val b = new com.databricks.spark.sql.perf.mllib.MLLib()
    val benchmarks = getBenchmarks(conf)
    println(s"${benchmarks.size} benchmarks identified:")
    val str = benchmarks.map(_.prettyPrint).mkString("\n")
    println(str)
    logger.info("Starting experiments")
    val e = b.runExperiment(
      executionsToRun = benchmarks,
      iterations = 1, // If you want to increase the number of iterations, add more seeds
      resultLocation = conf.output,
      forkThread = false)
    e.waitForFinish(conf.timeout.toSeconds.toInt)
    logger.info("Run finished")
    e.getCurrentResults()
  }
}

Source File: InfluxUDPClient.scala From chronicler with Apache License 2.0

5 votes

package com.github.fsanaulla.chronicler.udp

import java.io.File
import java.net._
import java.nio.charset.{Charset, StandardCharsets}

import com.github.fsanaulla.chronicler.core.components.BodyBuilder
import com.github.fsanaulla.chronicler.core.model.{InfluxWriter, Point}

import scala.io.Source
import scala.util.{Failure, Try}


final class InfluxUDPClient(host: String, port: Int) extends AutoCloseable {
  private[this] val socket = new DatagramSocket()
  private[this] def buildAndSend(msg: Array[Byte]): Try[Unit] =
    Try(
      socket.send(
        new DatagramPacket(
          msg,
          msg.length,
          new InetSocketAddress(host, port)
        )
      )
    )

  def writeNative(point: String, charset: Charset = StandardCharsets.UTF_8): Try[Unit] =
    buildAndSend(point.getBytes(charset))

  def bulkWriteNative(points: Seq[String], charset: Charset = StandardCharsets.UTF_8): Try[Unit] =
    buildAndSend(points.mkString("\n").getBytes(charset))

  def write[T](
      measurement: String,
      entity: T,
      charset: Charset = StandardCharsets.UTF_8
    )(implicit writer: InfluxWriter[T]
    ): Try[Unit] = {
    BodyBuilder.stringBodyBuilder.fromT(measurement, entity) match {
      case Left(ex) => scala.util.Failure(ex)
      case Right(r) =>
        buildAndSend(r.getBytes(charset))
    }
  }

  def bulkWrite[T](
      measurement: String,
      entities: Seq[T],
      charset: Charset = StandardCharsets.UTF_8
    )(implicit writer: InfluxWriter[T]
    ): Try[Unit] = {
    BodyBuilder.stringBodyBuilder.fromSeqT(measurement, entities) match {
      case Left(ex) => Failure(ex)
      case Right(r) =>
        buildAndSend(r.getBytes(charset))
    }
  }

  def writeFromFile(file: File, charset: Charset = StandardCharsets.UTF_8): Try[Unit] = {
    val sendData = Source
      .fromFile(file)
      .getLines()
      .mkString("\n")
      .getBytes(charset)

    buildAndSend(sendData)
  }

  def writePoint(point: Point, charset: Charset = StandardCharsets.UTF_8): Try[Unit] =
    buildAndSend(point.serialize.getBytes(charset))

  def bulkWritePoints(points: Seq[Point], charset: Charset = StandardCharsets.UTF_8): Try[Unit] =
    buildAndSend(
      points
        .map(_.serialize)
        .mkString("\n")
        .getBytes(charset)
    )

  def close(): Unit = socket.close()
}

Source File: BodyBuilder.scala From chronicler with Apache License 2.0

5 votes

package com.github.fsanaulla.chronicler.core.components

import java.nio.file.Path

import com.github.fsanaulla.chronicler.core.alias.ErrorOr
import com.github.fsanaulla.chronicler.core.either
import com.github.fsanaulla.chronicler.core.either.EitherOps
import com.github.fsanaulla.chronicler.core.model.{Appender, InfluxWriter, Point}

import scala.io.Source

trait BodyBuilder[A] {
  def fromFile(filePath: Path, enc: String): A
  def fromString(string: String): A
  def fromStrings(strings: Seq[String]): A
  def fromPoint(point: Point): A
  def fromPoints(points: Seq[Point]): A
  def fromT[T](meas: String, t: T)(implicit wr: InfluxWriter[T]): ErrorOr[A]
  def fromSeqT[T](meas: String, ts: Seq[T])(implicit wr: InfluxWriter[T]): ErrorOr[A]
}

object BodyBuilder {
  implicit val stringBodyBuilder: BodyBuilder[String] = new BodyBuilder[String] with Appender {
    override def fromFile(filePath: Path, enc: String): String =
      Source
        .fromFile(filePath.toUri, enc)
        .getLines()
        .mkString("\n")

    override def fromStrings(strings: Seq[String]): String =
      strings.mkString("\n")

    override def fromPoint(point: Point): String =
      point.serialize

    override def fromPoints(points: Seq[Point]): String =
      points.map(_.serialize).mkString("\n")

    override def fromString(string: String): String =
      string

    override def fromT[T](meas: String, t: T)(implicit wr: InfluxWriter[T]): ErrorOr[String] =
      wr.write(t).mapRight(append(meas, _))

    override def fromSeqT[T](
        meas: String,
        ts: Seq[T]
      )(implicit wr: InfluxWriter[T]
      ): ErrorOr[String] = {
      either.seq(ts.map(wr.write)).mapRight(append(meas, _))
    }
  }
}

Source File: FileIO.scala From ZparkIO with MIT License

5 votes

package com.leobenkel.zparkioProjectExample

import zio.{Task, ZIO}

import scala.io.Source

trait FileIO {
  def fileIO: FileIO.Service
}

object FileIO {
  trait Service {
    protected def readFileContent(path: String): Seq[String]

    final def getFileContent(path: String): ZIO[Any, Throwable, Seq[String]] = {
      Task(readFileContent(path))
    }
  }

  private trait LiveService extends FileIO.Service {
    override protected def readFileContent(path: String): Seq[String] = {
      val file = Source.fromFile(path)
      val content = file.getLines().toArray
      file.close()
      content
    }
  }

  trait Live extends FileIO {
    override def fileIO: Service = new LiveService {}
  }

  def apply(path: String): ZIO[FileIO, Throwable, Seq[String]] = {
    ZIO.accessM[FileIO](_.fileIO.getFileContent(path))
  }
}

Source File: FileIO.scala From ZparkIO with MIT License

5 votes

package com.leobenkel.zparkioProfileExampleMoreComplex.Services

import zio.{Has, Task, ZIO, ZLayer}

import scala.io.Source

object FileIO {
  type FileIO = Has[Service]

  trait Service {
    protected def readFileContent(path: String): Seq[String]

    final def getFileContent(path: String): ZIO[Any, Throwable, Seq[String]] = {
      Task(readFileContent(path))
    }
  }

  trait LiveService extends FileIO.Service {
    override protected def readFileContent(path: String): Seq[String] = {
      val file = Source.fromFile(path)
      val content = file.getLines().toArray
      file.close()
      content
    }
  }

  val Live: ZLayer[Any, Nothing, FileIO] = ZLayer.succeed(new LiveService {})

  def apply(path: String): ZIO[FileIO, Throwable, Seq[String]] = {
    ZIO.accessM[FileIO](_.get.getFileContent(path))
  }
}

Source File: AvscFileSorter.scala From avrohugger with Apache License 2.0

5 votes

package avrohugger.filesorter

import TypeComparator.strContainsType

import java.io.File

import scala.collection.mutable
import scala.io.Source


object AvscFileSorter {

  def sortSchemaFiles(files: Iterable[File]): Seq[File] = {
    val sortedButReversed = mutable.ArrayBuffer.empty[File]
    def normalizeInput(files: List[File]) = files.sortBy(file => file.getName)
    var pending: Iterable[File] = normalizeInput(files.toList)
    while (pending.nonEmpty) {
      val (used, unused) = usedUnusedSchemas(pending)
      sortedButReversed ++= unused
      pending = used
    }
    sortedButReversed.reverse.toSeq
  }

  def usedUnusedSchemas(files: Iterable[File]): (Iterable[File], Iterable[File]) = {
    val usedUnused = files.map { file =>
      val fullName = extractFullName(file)
      val numUsages = files.count { candidate =>
        val candidateName = extractFullName(candidate)
        strContainsType(candidateName, fileText(candidate), fullName)
      }
      (file, numUsages)
    }.partition(usedUnused => usedUnused._2 > 0)
    (usedUnused._1.map(_._1), usedUnused._2.map(_._1))
  }

  def extractFullName(f: File): String = {
    val txt = fileText(f)
    val namespace = namespaceRegex.findFirstMatchIn(txt)
    val name = nameRegex.findFirstMatchIn(txt)
    val nameGroup = name.get.group(1)
    if (namespace.isEmpty) {
      nameGroup
    } else {
      s"${namespace.get.group(1)}.$nameGroup"
    }
  }

  def fileText(f: File): String = {
    val src = Source.fromFile(f)
    try {
      src.getLines.mkString
    } finally {
      src.close()
    }
  }

  val namespaceRegex = "\\\"namespace\\\"\\s*:\\s*\"([^\\\"]+)\\\"".r
  val nameRegex = "\\\"name\\\"\\s*:\\s*\"([^\\\"]+)\\\"".r
}

Source File: AvdlFileSorter.scala From avrohugger with Apache License 2.0

5 votes

package avrohugger.filesorter

import java.io.File

import scala.annotation.tailrec
import scala.io.Source


object AvdlFileSorter {
  def sortSchemaFiles(filesIterable: Iterable[File]): Seq[File] = {
    val files = filesIterable.toList
    val importsMap = files.map{ file =>
      (file.getCanonicalFile, getImports(file))
    }.toMap.mapValues(f => f.filter(_.exists))

    @tailrec def addFiles(processedFiles: Seq[File], remainingFiles: List[File]): Seq[File] = {
      remainingFiles match {
        case Nil => processedFiles
        case h :: t =>
          val processedFilesSet = processedFiles.toSet
          if (importsMap(h).forall(processedFilesSet.contains))
            addFiles(processedFiles :+ h, t)
          else
            addFiles(processedFiles, t :+ h)
      }
    }
    val result = addFiles(Seq.empty, files)
    result
  }

  // TODO This should be replaced by letting AVRO compile the IDL files directly, but I'm not sure how to do that now.
  private[this] val importPattern = """\s*import\s+idl\s+"([^"]+)"\s*;\s*""".r

  private[this] def getImports(file: File): Vector[File] = {
    val source = Source.fromFile(file)
    try {
      source.getLines().collect{
        case importPattern(currentImport) => new File(file.getParentFile, currentImport).getCanonicalFile
      }.toVector
    }
    finally source.close()
  }
}

Source File: AvrohuggerSpec.scala From avrohugger with Apache License 2.0

5 votes

package util

import avrohugger._
import avrohugger.format._
import avrohugger.format.abstractions.SourceFormat

import java.io.File
import java.nio.file.{FileSystems, Path}

import org.specs2.SpecificationLike
import org.specs2.matcher.{Matcher, Matchers, ShouldExpectable}

import scala.io.Source

class AvrohuggerSpec(
  inPath: Path,
  val outputFiles: Seq[Path],
  sourceFormat: SourceFormat
) extends Matchers {
  implicit class PathExtensions(
    path: Path
  ) {
    def ++(next: String) = path.resolve(next)
    def ++(other: Path) = path.resolve(other)
  }

  val sourceFormatName = sourceFormat match {
    case SpecificRecord => "specific"
    case Standard => "standard"
    case Scavro => "scavro"
  }

  val gen = new Generator(sourceFormat)
  val inputPath = {
    val sourceBase = FileSystems.getDefault.getPath("avrohugger-core", "src", "test", "avro")
    (sourceBase ++ inPath)
  }
  val inputFile = inputPath.toFile
  val outDir = gen.defaultOutputDir + s"/$sourceFormatName/"

  private def readFile(f: File): String = {
    val source = Source.fromFile(f)
    try source.mkString finally source.close()
  }

  val expectedBase = FileSystems.getDefault.getPath("avrohugger-core", "src", "test", "expected", sourceFormatName)
  val generatedBase = FileSystems.getDefault.getPath("target", "generated-sources", sourceFormatName)

  private def prefixedFileString(prefixPath: Path, p: Path) = {
    val fullPath = sourceFormat match {
      case Scavro => {
        Option(p.getParent) match {
          case Some(parent) => parent ++ "model" ++ p.getFileName
          case None => FileSystems.getDefault.getPath("model") ++ p
        }
      }
      case _ => p
    }
    readFile((prefixPath ++ fullPath).toFile)
  }

  def generatedString(p: Path) = prefixedFileString(generatedBase, p)
  def expectedString(p: Path) = prefixedFileString(expectedBase, p)

  def checkFileToFile = {
    gen.fileToFile(inputFile, outDir)

    val generated = outputFiles map generatedString
    val expected = outputFiles map expectedString

    ShouldExpectable(generated) shouldEqual expected
  }

  def checkFileToStrings = {
    val generated = gen.fileToStrings(inputFile)
    val expected = outputFiles map expectedString

    ShouldExpectable(generated) shouldEqual expected
  }

  def checkStringToFile = {
    val inputString = readFile(inputFile)
    gen.stringToFile(inputString, outDir)

    val generated = outputFiles map generatedString
    val expected = outputFiles map expectedString

    ShouldExpectable(generated) shouldEqual expected
  }

  def checkStringToStrings = {
    val generated = {
      val inputString = readFile(inputFile)
      gen.stringToStrings(inputString)
    }

    val expected = outputFiles map expectedString

    ShouldExpectable(generated) shouldEqual expected
  }
}

Source File: TypeFlattenerIntegrationTest.scala From play-swagger with MIT License

5 votes

package de.zalando.apifirst

import java.io.File

import de.zalando.apifirst.util.ScalaPrinter
import org.scalatest.{FunSpec, MustMatchers}

import scala.io.Source

class TypeFlattenerIntegrationTest extends FunSpec with MustMatchers {

  val expectation_path = "play-scala-generator/src/test/scala/model/"
  val prefix = "resources."

  import de.zalando.model._
  val plainModels = Seq[WithModel](
    additional_properties_yaml,
    basic_auth_api_yaml,
    basic_extension_yaml,
    basic_polymorphism_yaml,
    cross_spec_references_yaml,
    echo_api_yaml,
    error_in_array_yaml,
    expanded_polymorphism_yaml,
    form_data_yaml,
    full_petstore_api_yaml,
    hackweek_yaml,
    heroku_petstore_api_yaml,
    instagram_api_yaml,
    minimal_api_yaml,
    nakadi_yaml,
    nested_arrays_yaml,
    nested_arrays_validation_yaml,
    nested_objects_yaml,
    nested_objects_validation_yaml,
    nested_options_yaml,
    nested_options_validation_yaml,
    numbers_validation_yaml,
    options_yaml,
    security_api_yaml,
    simple_petstore_api_yaml,
    split_petstore_api_yaml,
    string_formats_yaml,
    string_formats_validation_yaml,
    type_deduplication_yaml,
    uber_api_yaml
  )

  describe("TypeFlattener") {
    plainModels.foreach { model =>
      testTypeFlattener(model)
    }
  }

  def testTypeFlattener(ast: WithModel): Unit = {
    val name = ScalaPrinter.nameFromModel(ast)
    it(s"should flatten API model $name") {
      val scalaModel = TypeNormaliser.flatten(ast.model)
      val expected = asInFile(name, ".scala")
      clean(ScalaPrinter.asScala(name, scalaModel)) mustBe clean(expected)
    }
  }

  def asInFile(name: String, suffix: String): String = {
    val expectedFile = new File(expectation_path, prefix + name + suffix)
    if (expectedFile.canRead) {
      val src = Source.fromFile(expectedFile)
      val result = src.getLines().mkString("\n")
      src.close()
      result
    } else
      ""
  }

  def clean(str: String): String =
    str.split("\n").map(_.trim).filter(_.nonEmpty).mkString("\n")

}

Source File: ExpectedResults.scala From play-swagger with MIT License

5 votes

package de.zalando.swagger

import java.io.{File, FileOutputStream}

import scala.io.Source


trait ExpectedResults {

  val resourcesPath = "swagger-parser/src/test/resources/"

  def expectationsFolder: String = "/expected_results/"

  def dump(result: String, file: File, suffix: String): Unit = {
    if (result.nonEmpty) {
      val newFile = target(file, suffix)
      newFile.getParentFile.mkdirs()
      newFile.delete()
      newFile.createNewFile()
      val out = new FileOutputStream(newFile)
      out.write(result.getBytes)
      out.close()
    }
  }

  def asInFile(file: File, suffix: String): String = {
    val expectedFile = target(file, suffix)
    if (expectedFile.canRead) {
      val src = Source.fromFile(expectedFile)
      val result = src.getLines().mkString("\n")
      src.close()
      result
    } else
      ""
  }

  def target(file: File, suffix: String): File =
    new File(file.getParentFile.getParent + expectationsFolder + file.getName + "." + suffix)

  def clean(str: String): String = str.split("\n").map(_.trim).mkString("\n")
}

Source File: ExpectedResults.scala From play-swagger with MIT License

5 votes

package de.zalando

import java.io.{File, FileOutputStream}

import de.zalando.apifirst.util.ScalaPrinter
import de.zalando.model._

import scala.io.Source


trait ExpectedResults {

  val model = Seq[WithModel](
    additional_properties_yaml,
    basic_polymorphism_yaml,
    nested_arrays_yaml,
    nested_options_yaml,
    basic_extension_yaml,
    expanded_polymorphism_yaml,
    nested_objects_yaml,
    options_yaml
  )
  val examples = Seq[WithModel](
    basic_auth_api_yaml,
    cross_spec_references_yaml,
    echo_api_yaml,
    error_in_array_yaml,
    form_data_yaml,
    full_petstore_api_yaml,
    hackweek_yaml,
    heroku_petstore_api_yaml,
    instagram_api_yaml,
    minimal_api_yaml,
    nakadi_yaml,
    security_api_yaml,
    simple_petstore_api_yaml,
    split_petstore_api_yaml,
    string_formats_yaml,
    type_deduplication_yaml,
    uber_api_yaml
  )
  val validations = Seq[WithModel](
    nested_arrays_validation_yaml,
    nested_objects_validation_yaml,
    nested_options_validation_yaml,
    numbers_validation_yaml,
    string_formats_validation_yaml
  )

  val resourcesPath = "play-scala-generator/src/test/resources/"

  def expectationsFolder: String = "/expected_results/"

  def dump(result: String, name: String, suffix: String): Unit = {
    if (result.nonEmpty) {
      val newFile = target(name, suffix)
      newFile.getParentFile.mkdirs()
      newFile.delete()
      newFile.createNewFile()
      val out = new FileOutputStream(newFile)
      out.write(result.getBytes)
      out.close()
    }
  }

  def asInFile(name: String, suffix: String): String = {
    val expectedFile = target(name, suffix)
    if (expectedFile.canRead) {
      val src = Source.fromFile(expectedFile)
      val result = src.getLines().mkString("\n")
      src.close()
      result
    } else
      ""
  }

  def target(name: String, suffix: String): File =
    new File(resourcesPath + expectationsFolder + name + "." + suffix)

  def clean(str: String): String = str.split("\n").map(_.trim).filter(_.nonEmpty).mkString("\n")

  def nameFromModel(ast: WithModel): String = ScalaPrinter.nameFromModel(ast)

}

Source File: IndexRenderService.scala From silhouette-vuejs-app with Apache License 2.0

5 votes

package models.services

import javax.inject.Inject
import play.api.mvc.RequestHeader
import play.filters.csrf.CSRF
import play.filters.csrf.CSRF.Token

import scala.io.Source

class IndexRenderService @Inject() () {

  def render(title: Option[String] = None, meta: Seq[(String, String)] = Seq.empty)(implicit request: RequestHeader): String = {
    val metaTags = title.map(t => s"<title>$t</title>").getOrElse("") +
      meta.map { case (n, c) => s"""<meta name="$n" content="$c">""" }.mkString("")

    val html = Source.fromFile("public/ui/index.html").mkString
    setCsrfToken(html).replace("</head>", s"$metaTags</head>")
  }

  def setCsrfToken(html: String)(implicit request: RequestHeader): String = {
    val Token(_, value) = CSRF.getToken.get

    html.replace("csrf-token-value=\"\"", s"csrf-token-value='$value'")
  }
}

Source File: JsonTestSuite.scala From borer with Mozilla Public License 2.0

5 votes

package io.bullet.borer

import java.io.BufferedInputStream

import utest._

import scala.io.Source

object JsonTestSuite extends TestSuite {

  val disabled: Set[String] = Set(
    "n_multidigit_number_then_00.json",
    "n_structure_null-byte-outside-string.json",
    "n_structure_whitespace_formfeed.json"
  )

  val testFiles =
    Source
      .fromResource("")
      .getLines()
      .map { name =>
        val is = new BufferedInputStream(getClass.getResourceAsStream("/" + name))
        try name -> Iterator.continually(is.read).takeWhile(_ != -1).map(_.toByte).toArray
        finally is.close()
      }
      .toMap
      .view
      .filter(t => !disabled.contains(t._1))

  val config = Json.DecodingConfig.default.copy(maxNumberMantissaDigits = 99, maxNumberAbsExponent = 999)

  val tests = Tests {

    "Accept" - {
      for {
        (name, bytes) <- testFiles
        if name startsWith "y"
      } {
        Json.decode(bytes).withConfig(config).to[Dom.Element].valueEither match {
          case Left(e)  => throw new RuntimeException(s"Test `$name` did not parse as it should", e)
          case Right(_) => // ok
        }
      }
    }

    "Reject" - {
      for {
        (name, bytes) <- testFiles
        if name startsWith "n"
      } {
        Json.decode(bytes).withConfig(config).to[Dom.Element].valueEither match {
          case Left(_)  => // ok
          case Right(x) => throw new RuntimeException(s"Test `$name` parsed even though it should have failed: $x")
        }
      }
    }

    "Not Crash" - {
      for {
        (name, bytes) <- testFiles
        if name startsWith "i"
      } {
        Json.decode(bytes).withConfig(config).to[Dom.Element].valueEither match {
          case Left(e: Borer.Error.General[_]) => throw new RuntimeException(s"Test `$name` did fail unexpectedly", e)
          case _                               => // everything else is fine
        }
      }
    }
  }
}

Source File: FileSpec.scala From borer with Mozilla Public License 2.0

5 votes

package io.bullet.borer

import java.io.File
import java.nio.charset.StandardCharsets
import java.nio.file.Files

import utest._

import scala.io.Source

object FileSpec extends TestSuite {

  final case class Foo(
      string: String = "This is a really long text for testing writing to a file",
      int: Int = 42,
      double: Double = 0.0)

  implicit val fooCodec = Codec(Encoder.from(Foo.unapply _), Decoder.from(Foo.apply _))

  val tests = Tests {

    "small file" - {
      val tempFile = File.createTempFile("borer", ".json")
      try {
        Json.encode(Foo()).to(tempFile).result ==> tempFile

        new String(Files.readAllBytes(tempFile.toPath), "UTF8") ==>
        """["This is a really long text for testing writing to a file",42,0.0]"""

        Json.decode(tempFile).to[Foo].value ==> Foo()

      } finally tempFile.delete()
    }

    "large file" - {
      val testFileBytes = Source.fromResource("large.json").mkString.getBytes(StandardCharsets.UTF_8)
      val config = Json.DecodingConfig.default
        .copy(maxNumberMantissaDigits = 99, maxNumberAbsExponent = 300, initialCharbufferSize = 8)
      val dom = Json.decode(testFileBytes).withConfig(config).to[Dom.Element].value

      val tempFile = File.createTempFile("borer", ".json")
      try {
        Json.encode(dom).to(tempFile).result ==> tempFile

        Json
          .decode(Input.fromFile(tempFile, bufferSize = 256))
          .withConfig(config)
          .to[Dom.Element]
          .value ==> dom

      } finally tempFile.delete()
    }
  }
}

Source File: FromInputIteratorFileSpec.scala From borer with Mozilla Public License 2.0

5 votes

package io.bullet.borer

import java.nio.charset.StandardCharsets

import utest._

import scala.io.Source

object FromInputIteratorFileSpec extends TestSuite with TestUtils {

  val testFileBytes = Source.fromResource("large.json").mkString.getBytes(StandardCharsets.UTF_8)

  val config = Json.DecodingConfig.default
    .copy(maxNumberMantissaDigits = 99, maxNumberAbsExponent = 300, initialCharbufferSize = 8)
  val dom = Json.decode(testFileBytes).withConfig(config).to[Dom.Element].value

  val tests = Tests {

    "test file" - {
      Json
        .decode(chunkedInput(3, 2, 1, 0, 100, 71))
        .withConfig(config)
        .to[Dom.Element]
        .value ==> dom
    }
  }

  def chunkedInput(chunkSizes: Int*): Iterator[Array[Byte]] =
    chunkIterator(testFileBytes, Iterator.continually(0).flatMap(_ => chunkSizes))

  def chunkIterator(remainingBytes: Array[Byte], chunkSizes: Iterator[Int]): Iterator[Array[Byte]] = {
    val len = chunkSizes.next()
    if (remainingBytes.length <= len) Iterator.single(remainingBytes)
    else Iterator.single(remainingBytes.take(len)) ++ chunkIterator(remainingBytes.drop(len), chunkSizes)
  }

  final class FFPadder(input: Input[Array[Byte]]) extends Input.PaddingProvider[Array[Byte]] {

    def padByte(): Byte = -1

    def padDoubleByte(remaining: Int): Char =
      if (remaining < 1) '\uffff' else ((input.readByte() << 8) | 0xFF).toChar

    def padQuadByte(remaining: Int): Int = {
      import input.{readByte => byte, readDoubleByteBigEndian => doub}
      // format: OFF
      remaining match {
        case 0 =>                                            0xFFFFFFFF
        case 1 =>                         (byte()   << 24) | 0xFFFFFF
        case 2 => (doub() << 16)                           | 0xFFFF
        case 3 => (doub() << 16) | ((byte() & 0xFF) <<  8) | 0xFF
        case _ => throw new IllegalStateException
      }
      // format: ON
    }

    def padOctaByte(remaining: Int): Long = {
      import input.{readByte => byte, readDoubleByteBigEndian => doub, readQuadByteBigEndian => quad}
      // format: OFF
      remaining match {
        case 0 =>                                                                                 0XFFFFFFFFFFFFFFFFL
        case 1 =>                                                      (byte().toLong    << 56) | 0XFFFFFFFFFFFFFFL
        case 2 =>                         (doub().toLong      << 48)                            | 0XFFFFFFFFFFFFL
        case 3 =>                         (doub().toLong      << 48) | ((byte() & 0XFFL) << 40) | 0XFFFFFFFFFFL
        case 4 => (quad().toLong << 32) |                                                         0XFFFFFFFFL
        case 5 => (quad().toLong << 32) |                              ((byte() & 0XFFL) << 24) | 0XFFFFFFL
        case 6 => (quad().toLong << 32) | ((doub() & 0XFFFFL) << 16) |                            0XFFFFL
        case 7 => (quad().toLong << 32) | ((doub() & 0XFFFFL) << 16) | ((byte() & 0XFFL) <<  8) | 0XFFL
        case _ => throw new IllegalStateException
      }
      // format: ON
    }

    def padBytes(rest: Array[Byte], missing: Long) =
      ByteAccess.ForByteArray.concat(rest, Array.fill[Byte](missing.toInt)(-1))
  }
}

Source File: CueSheetVersion.scala From cuesheet with Apache License 2.0

5 votes

package com.kakao.cuesheet

import java.nio.file.{Files, Paths}

import com.kakao.cuesheet.deps.{DependencyAnalyzer, ManagedDependencyNode}
import com.kakao.mango.logging.Logging

import scala.collection.JavaConversions._
import scala.io.Source
import scala.util.Try


object CueSheetVersion extends Logging {
  private val versionPattern = """[^"]*"([^"]+)".*""".r

  lazy val version: String = {
    // read from MANIFEST.MF
    getClass.getClassLoader.getResources("META-INF/MANIFEST.MF").toSeq.flatMap { url =>
      val src = Source.fromInputStream(url.openStream())
      try {
        val manifest = src.getLines().map(_.split(":", 2)).collect {
          case Array(key, value) => (key.trim(), value.trim())
        }.toMap
        (manifest.get("Implementation-Vendor"), manifest.get("Implementation-Title")) match {
          case (Some("com.kakao.cuesheet"), Some("cuesheet")) => manifest.get("Implementation-Version")
          case (Some("com.kakao.cuesheet"), Some("cuesheet-assembly")) => manifest.get("Implementation-Version")
          case _ => Nil
        }
      } finally {
        src.close()
      }
    }.headOption.orElse {
      val (_, applicationJars) = DependencyAnalyzer().graph.divide()
      applicationJars.collectFirst {
        case jar: ManagedDependencyNode if jar.artifact.startsWith("cuesheet") => jar.version
      }
    }.orElse {
      Try(Files.readAllBytes(Paths.get("version.sbt"))).map { bytes =>

      }.toOption
      Try(Source.fromFile("version.sbt")).map { src =>
        // try to read from version.sbt
        try {
          src.getLines().collectFirst {
            case versionPattern(v) => v
          }.head
        } finally {
          src.close()
        }
      }.toOption
    }.getOrElse("Unknown")
  }

}

scala.io.Source Scala Examples