scala.io.BufferedSource Scala Example

Source File: Sourcer.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.wmexchanger.utils

import java.io.{File, FileNotFoundException}
import java.nio.charset.StandardCharsets

import org.slf4j.{Logger, LoggerFactory}

import scala.io.BufferedSource
import scala.io.Source

object Sourcer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val utf8: String = StandardCharsets.UTF_8.toString
  
  def sourceFromResource(path: String): BufferedSource = {
    val url = Option(Sourcer.getClass.getResource(path))
        .getOrElse(throw newFileNotFoundException(path))

    logger.info("Sourcing resource " + url.getPath)
    Source.fromURL(url, utf8)
  }
  
  def sourceFromFile(file: File): BufferedSource = {
    logger.info("Sourcing file " + file.getPath)
    Source.fromFile(file, utf8)
  }

  def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path))

  def newFileNotFoundException(path: String): FileNotFoundException = {
    val message1 = path + " (The system cannot find the path specified"
    val message2 = message1 + (if (path.startsWith("~")) ".  Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "")
    val message3 = message2 + ")"

    new FileNotFoundException(message3)
  }
}

Source File: Sourcer.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.eidos.utils

import java.io.{File, FileNotFoundException}
import java.nio.charset.StandardCharsets

import org.slf4j.{Logger, LoggerFactory}

import scala.io.BufferedSource
import scala.io.Source

object Sourcer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val utf8: String = StandardCharsets.UTF_8.toString
  
  def sourceFromResource(path: String): BufferedSource = {
    val url = Option(Sourcer.getClass.getResource(path))
        .getOrElse(throw newFileNotFoundException(path))

    logger.info("Sourcing resource " + url.getPath)
    Source.fromURL(url, utf8)
  }
  
  def sourceFromFile(file: File): BufferedSource = {
    logger.info("Sourcing file " + file.getPath)
    Source.fromFile(file, utf8)
  }

  def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path))

  def newFileNotFoundException(path: String): FileNotFoundException = {
    val message1 = path + " (The system cannot find the path specified"
    val message2 = message1 + (if (path.startsWith("~")) ".  Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "")
    val message3 = message2 + ")"

    new FileNotFoundException(message3)
  }
}

Source File: Sourcer.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.elasticsearch.utils

import java.io.{File, FileNotFoundException}
import java.nio.charset.StandardCharsets

import org.slf4j.{Logger, LoggerFactory}

import scala.io.BufferedSource
import scala.io.Source

object Sourcer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val utf8: String = StandardCharsets.UTF_8.toString
  
  def sourceFromResource(path: String): BufferedSource = {
    val url = Option(Sourcer.getClass.getResource(path))
        .getOrElse(throw newFileNotFoundException(path))

    logger.info("Sourcing resource " + url.getPath)
    Source.fromURL(url, utf8)
  }
  
  def sourceFromFile(file: File): BufferedSource = {
    logger.info("Sourcing file " + file.getPath)
    Source.fromFile(file, utf8)
  }

  def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path))

  def newFileNotFoundException(path: String): FileNotFoundException = {
    val message1 = path + " (The system cannot find the path specified"
    val message2 = message1 + (if (path.startsWith("~")) ".  Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "")
    val message3 = message2 + ")"

    new FileNotFoundException(message3)
  }
}

Source File: LoadOp.scala From vm with GNU Affero General Public License v3.0

5 votes

package org.mmadt.language.obj.op.sideeffect
import org.mmadt.language.Tokens
import org.mmadt.language.mmlang.mmlangParser
import org.mmadt.language.obj.Inst.Func
import org.mmadt.language.obj.value.StrValue
import org.mmadt.language.obj.{Inst, Obj}
import org.mmadt.storage.obj.value.VInst

import scala.collection.mutable
import scala.io.{BufferedSource, Source}

trait LoadOp {
  this: Obj =>
  def load(file: StrValue): this.type = LoadOp(file).exec(this).asInstanceOf[this.type]
}
object LoadOp extends Func[Obj, Obj] {
  def apply(file: Obj): Inst[Obj, Obj] = new VInst[Obj, Obj](g = (Tokens.load, List(file)), func = this)
  override def apply(start: Obj, inst: Inst[Obj, Obj]): Obj = {
    try {
      val file: String = Inst.oldInst(inst).arg0[Obj].toString
      val source: BufferedSource = Source.fromFile(file.dropRight(1).drop(1))
      val obj = mmlangParser.parse[Obj](source.getLines().foldLeft(new mutable.StringBuilder())((x, y) => x.append(y)).toString())
      source.close()
      start `=>` obj
    }
  }
}

Source File: mmkvStore.scala From vm with GNU Affero General Public License v3.0

5 votes

package org.mmadt.storage.mmkv

import java.util.concurrent.atomic.AtomicLong

import org.mmadt.language.obj.`type`.RecType
import org.mmadt.language.obj.value.strm.RecStrm
import org.mmadt.language.obj.value.{IntValue, RecValue, StrValue, Value}
import org.mmadt.language.obj.{Obj, Rec, ViaTuple, _}
import org.mmadt.language.{LanguageException, LanguageFactory, LanguageProvider, Tokens}
import org.mmadt.storage.StorageFactory._

import scala.collection.mutable
import scala.io.{BufferedSource, Source}


  }
}

object mmkvStore extends AutoCloseable {
  private val dbs: mutable.Map[String, mmkvStore[Obj, Obj]] = new mutable.LinkedHashMap

  def open[K <: Obj, V <: Obj](file: String): mmkvStore[K, V] =
    if (file.equals(Tokens.empty)) dbs.last._2.asInstanceOf[mmkvStore[K, V]]
    else {
      val db = dbs.getOrElseUpdate(file, new mmkvStore(file))
      dbs.remove(file)
      dbs.put(file, db)
      db.asInstanceOf[mmkvStore[K, V]]
    }

  override def close(): Unit = {
    dbs.values.foreach(m => m.close())
    dbs.clear()
  }
}

Source File: InvokeMigrationHandler.scala From flyway-awslambda with MIT License

5 votes

package crossroad0201.aws.flywaylambda

import java.io.{BufferedOutputStream, InputStream, OutputStream, PrintWriter}

import com.amazonaws.regions.{Region, Regions}
import com.amazonaws.services.lambda.runtime.{Context, RequestStreamHandler}
import com.amazonaws.services.s3.{AmazonS3, AmazonS3Client}

import scala.io.{BufferedSource, Codec}
import scala.util.{Failure, Success, Try}

class InvokeMigrationHandler extends RequestStreamHandler with S3MigrationHandlerBase {
  type BucketName = String
  type Prefix = String
  type ConfFileName = String

  override def handleRequest(input: InputStream, output: OutputStream, context: Context): Unit = {
    def parseInput: Try[(BucketName, Prefix, ConfFileName)] = Try {
      import spray.json._
      import DefaultJsonProtocol._

      val json = new BufferedSource(input)(Codec("UTF-8")).mkString
      val jsObj = JsonParser(json).toJson.asJsObject
      jsObj.getFields(
        "bucket_name",
        "prefix"
      ) match {
        case Seq(JsString(b), JsString(p)) => {
          jsObj.getFields(
            "flyway_conf"
          ) match {
            case Seq(JsString(c)) => (b, p, c)
            case _ => (b, p, "flyway.conf")
          }
        }
        case _ => throw new IllegalArgumentException(s"Missing require key [bucketName, prefix]. - $json")
      }
    }

    val logger = context.getLogger

    implicit val s3Client: AmazonS3 = new AmazonS3Client().withRegion(Region.getRegion(Regions.fromName(sys.env("AWS_REGION"))))

    (for {
      i <- parseInput
      _ = { logger.log(s"Flyway migration start. by invoke lambda function(${i._1}, ${i._2}, ${i._3}).") }
      r <- migrate(i._1, i._2, i._3)(context, s3Client)
    } yield r) match {
      case Success(r) =>
        logger.log(r)
        val b = r.getBytes("UTF-8")
        val bout = new BufferedOutputStream(output)
        Stream.continually(bout.write(b))
        bout.flush()
      case Failure(e) =>
        e.printStackTrace()
        val w = new PrintWriter(output)
        w.write(e.toString)
        w.flush()
    }
  }

}

Source File: ParseCSVwithHTML.scala From CSYE7200_Old with MIT License

5 votes

package edu.neu.coe.csye7200.parse

import java.io.{BufferedWriter, File, FileWriter}

import scala.collection.mutable
import scala.io.{BufferedSource, Source}
import scala.util.{Failure, Success, Try}


object ParseCSVwithHTML extends App {
  val parser = ParseCSVwithHTML(CsvParser(delimiter = '\t' + ""))
  val title = "Report"
  if (args.length > 0) {
    val filename = args.head
    val source: BufferedSource = Source.fromFile(filename, "UTF-16")
    val w = parser.parseStreamIntoHTMLTable(source.getLines.toStream, title)
    val file = new File("output.html")
    val bw = new BufferedWriter(new FileWriter(file))
    bw.write(w)
    bw.close()
    println(s"Successfully written $file")
  }
  else
    System.err.println("syntax: ParseCSVwithHTML filename")

}

Source File: FileUtilities.scala From mmlspark with MIT License

5 votes

// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.core.env

import java.io.File
import java.nio.file.{Files, StandardCopyOption}

import scala.io.{BufferedSource, Source}

object FileUtilities {

  def join(folders: String*): File = {
    folders.tail.foldLeft(new File(folders.head)) { case (f, s) => new File(f, s) }
  }

  def join(base: File, folders: String*): File = {
    folders.foldLeft(base) { case (f, s) => new File(f, s) }
  }

  // Same for StandardOpenOption
  type StandardOpenOption = java.nio.file.StandardOpenOption
  object StandardOpenOption {
    import java.nio.file.{StandardOpenOption => S}
    val APPEND = S.APPEND
    val CREATE = S.CREATE
  }

  def allFiles(dir: File, pred: (File => Boolean) = null): Array[File] = {
    def loop(dir: File): Array[File] = {
      val (dirs, files) = dir.listFiles.sorted.partition(_.isDirectory)
      (if (pred == null) files else files.filter(pred)) ++ dirs.flatMap(loop)
    }
    loop(dir)
  }

  // readFile takes a file name or a File, and function to extract a value from
  // BufferedSource which defaults to _.mkString; performs the read, closes the
  // source, and returns the result
  def readFile[T](file: File, read: BufferedSource => T): T = {
    val i = Source.fromFile(file)
    try read(i) finally i.close
  }
  def readFile(file: File): String = readFile(file, _.mkString)

  def writeFile(file: File, stuff: Any, flags: StandardOpenOption*): Unit = {
    Files.write(file.toPath, stuff.toString.getBytes(), flags: _*)
    ()
  }

  def copyFile(from: File, toDir: File, overwrite: Boolean = false): Unit = {
    Files.copy(from.toPath, (new File(toDir, from.getName)).toPath,
               (if (overwrite) Seq(StandardCopyOption.REPLACE_EXISTING)
                else Seq()): _*)
    ()
  }

  // Perhaps this should move into a more specific place, not a generic file utils thing
  def zipFolder(dir: File, out: File): Unit = {
    import java.io.{BufferedInputStream, FileInputStream, FileOutputStream}
    import java.util.zip.{ZipEntry, ZipOutputStream}
    val bufferSize = 2 * 1024
    val data = new Array[Byte](bufferSize)
    val zip = new ZipOutputStream(new FileOutputStream(out))
    val prefixLen = dir.getParentFile.toString.length + 1
    allFiles(dir).foreach { file =>
      zip.putNextEntry(new ZipEntry(file.toString.substring(prefixLen).replace(java.io.File.separator, "/")))
      val in = new BufferedInputStream(new FileInputStream(file), bufferSize)
      var b = 0
      while (b >= 0) { zip.write(data, 0, b); b = in.read(data, 0, bufferSize) }
      in.close()
      zip.closeEntry()
    }
    zip.close()
  }

}

Source File: WebCrawler.scala From CSYE7200 with MIT License

5 votes

package edu.neu.coe.csye7200.asstwc

import java.net.URL

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent._
import scala.concurrent.duration._
import scala.io.{BufferedSource, Source}
import scala.language.postfixOps
import scala.util._
import scala.util.control.NonFatal
import scala.xml.Node


object WebCrawler extends App {

  def getURLContent(u: URL): Future[String] =
    for {
      s <- MonadOps.asFuture(SourceFromURL(u))
      w <- MonadOps.asFuture(sourceToString(s, s"Cannot read from source at $u"))
    } yield w

  def wget(u: URL): Future[Seq[URL]] = {
    // Hint: write as a for-comprehension, using the method createURL(Option[URL], String) to get the appropriate URL for relative links
    // 16 points.
    def getURLs(ns: Node): Seq[Try[URL]] = ??? // TO BE IMPLEMENTED

    def getLinks(g: String): Try[Seq[URL]] = {
      val ny = HTMLParser.parse(g) recoverWith { case f => Failure(new RuntimeException(s"parse problem with URL $u: $f")) }
      for (n <- ny; z <- MonadOps.sequence(getURLs(n))) yield z
    }
    // Hint: write as a for-comprehension, using getURLContent (above) and getLinks above. You might also need MonadOps.asFuture
    // 9 points.
    ??? // TO BE IMPLEMENTED
  }

  def wget(us: Seq[URL]): Future[Seq[Either[Throwable, Seq[URL]]]] = {
    val us2 = us.distinct take 10
    // Hint: Use wget(URL) (above). MonadOps.sequence and Future.sequence are also available to you to use.
    // 15 points. Implement the rest of this, based on us2 instead of us.
    // TO BE IMPLEMENTED
    ???
  }

  def crawler(depth: Int, us: Seq[URL]): Future[Seq[URL]] = {
    def inner(urls: Seq[URL], depth: Int, accum: Seq[URL]): Future[Seq[URL]] =
      if (depth > 0)
        for (us <- MonadOps.flattenRecover(wget(urls), { x => System.err.println(s"""crawler: ignoring exception $x ${if (x.getCause != null) " with cause " + x.getCause else ""}""") }); r <- inner(us, depth - 1, accum ++: urls)) yield r
      else
        Future.successful(accum)

    inner(us, depth, Nil)
  }

  println(s"web reader: ${args.toList}")
  val uys = for (arg <- args toList) yield getURL(arg)
  val s = MonadOps.sequence(uys)
  s match {
    case Success(z) =>
      println(s"invoking crawler on $z")
      val f = crawler(2, z)
      Await.ready(f, Duration("60 second"))
      for (x <- f) println(s"Links: $x")
    case Failure(z) => println(s"failure: $z")
  }

  private def sourceToString(source: BufferedSource, errorMsg: String): Try[String] =
    try Success(source mkString) catch {
      case NonFatal(e) => Failure(WebCrawlerException(errorMsg, e))
    }

  private def getURL(resource: String): Try[URL] = createURL(null, resource)

  private def createURL(context: Option[URL], resource: String): Try[URL] =
    try Success(new URL(context.orNull, resource)) catch {
      case NonFatal(e) =>
        val message: String = s"""Bad URL: ${if (context.isDefined) "context: " + context else ""} resource=$resource"""
        Failure(WebCrawlerException(message, e))
    }

  private def SourceFromURL(resource: URL): Try[BufferedSource] =
    try Success(Source.fromURL(resource)) catch {
      case NonFatal(e) => Failure(WebCrawlerException(s"""Cannot get source from URL: $resource""", e))
    }
}

case class WebCrawlerException(url: String, cause: Throwable) extends Exception(s"Web Crawler could not decode URL: $url", cause)

Source File: ResourceDatasetLoaders.scala From doddle-model with Apache License 2.0

5 votes

package io.picnicml.doddlemodel.data

import java.io.{File, FileOutputStream}

import io.picnicml.doddlemodel.data.CsvLoader.loadCsvDataset

import scala.io.{BufferedSource, Source}

object ResourceDatasetLoaders {

  def loadBostonDataset: DatasetWithIndex = {
    val (data, featureIndex) = loadDatasetFromResources("boston_housing_prices")
    (data(::, 0 to -2), data(::, -1), featureIndex.drop(data.cols - 1))
  }

  def loadBreastCancerDataset: DatasetWithIndex = {
    val (data, featureIndex) = loadDatasetFromResources("breast_cancer")
    (data(::, 0 to -2), data(::, -1), featureIndex.drop(data.cols - 1))
  }

  def loadIrisDataset: DatasetWithIndex = {
    val (data, featureIndex) = loadDatasetFromResources("iris")
    (data(::, 0 to -2), data(::, -1), featureIndex.drop(data.cols - 1))
  }

  
  def loadHighSchoolTestDataset: DatasetWithIndex = {
    val (data, featureIndex) = loadDatasetFromResources("high_school_test")
    (data(::, 0 to -2), data(::, -1), featureIndex.drop(data.cols - 1))
  }

  private[data] def loadDummyCsvReadingDataset: DatasetWithIndex = {
    val (data, featureIndex) = loadDatasetFromResources("dummy_csv_reading")
    (data(::, 0 to -2), data(::, -1), featureIndex.drop(data.cols - 1))
  }

  private def loadDatasetFromResources(datasetName: String): FeaturesWithIndex =
    loadCsvDataset(getBufferedSourceFromResource(s"/datasets/$datasetName.csv"), na = "NA")

  private def getBufferedSourceFromResource(path: String): BufferedSource = {
    val resourceUrl = getClass.getResource(path)
    val file = if (resourceUrl.toString.startsWith("jar:"))
      // reads file from JAR
      readResourceFileWithinJar(path)
    else
      // reads file when using IDE
      new File(resourceUrl.getFile)
    if (file != null && !file.exists)
      throw new RuntimeException(s"Error: File $file not found!")
    Source.fromFile(file)
  }

  private def readResourceFileWithinJar(path: String): File = {
    val inputStream = getClass.getResourceAsStream(path)
    val tempFile = File.createTempFile("tempfile", ".tmp")
    val outputStream = new FileOutputStream(tempFile)

    val buffer = new Array[Byte](130 * 1024)
    Iterator.continually(inputStream.read(buffer)).takeWhile(_ != -1).foreach { bytesRead =>
      outputStream.write(buffer, 0, bytesRead)
      outputStream.flush()
    }

    inputStream.close()
    outputStream.close()

    tempFile.deleteOnExit()
    tempFile
  }
}

Source File: ResourceUtils.scala From sbt-lagom-descriptor-generator with Apache License 2.0

5 votes

package com.lightbend.lagom.spec

import java.io.{ File, InputStream }
import java.nio.file.{ Files, Paths, StandardOpenOption }

import scala.io.{ BufferedSource, Source }


  def writeFile(folder: File, relativeFile: File, fileContents: String): File = {
    val path = Paths.get(folder.getAbsolutePath, relativeFile.getPath)
    // `path` is tha absolute route to the file so only path.parent must be created as directories
    Files.createDirectories(path.getParent)
    Files.write(
      path,
      fileContents.getBytes,
      StandardOpenOption.CREATE,
      StandardOpenOption.SYNC,
      StandardOpenOption.TRUNCATE_EXISTING
    ).toFile

  }
}

Source File: InputSource.scala From flink-parameter-server with Apache License 2.0

5 votes

package hu.sztaki.ilab.ps.matrix.factorization.utils

import java.util.Calendar

import hu.sztaki.ilab.ps.matrix.factorization.utils.InputTypes.{AnyOrWatermark, EventWithTimestamp}
import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.api.watermark.Watermark

import scala.io.{BufferedSource, Source}

class InputSource[T <: EventWithTimestamp](dataFilePath: String, servingSpeed: Int,
                                           fromString: String => T, baseDataStartTime: Option[Long], simulationEndTime: Option[Long])
  extends SourceFunction[T] {

  private val simEndTime: Long = simulationEndTime.getOrElse(0L)
  override def cancel(): Unit = ???

  def createList(reader: BufferedSource): List[T] = {
    reader
      .getLines()
      .map(fromString)
      .toList
  }

  override def run(ctx: SourceFunction.SourceContext[T]): Unit = {
    val reader = Source.fromFile(dataFilePath)

    val events: List[T] = createList(reader)

    val dataStartTime: Long =
      baseDataStartTime match {
        case Some(dst) => dst
        case None => events.head.getEventTime
      }

    val sortedEvents = events
      .map(x => AnyOrWatermark(x.getEventTime, x))
      .toArray

    val servingStartTime = Calendar.getInstance.getTimeInMillis

    sortedEvents
      .foreach( event => {
        val now = Calendar.getInstance().getTimeInMillis
        val servingTime = toServingTime(servingStartTime, dataStartTime, event.time)
        val waitTime = servingTime - now

        Thread.sleep(math.max(waitTime, 0))

        event.value match {
          case v: T => ctx.collectWithTimestamp(v, event.time)
          case wm: Watermark => ctx.emitWatermark(wm)
        }
      })
  }

  private def toServingTime(servingStartTime: Long, dataStartTime: Long, eventTime: Long) = {

    if(simEndTime != 0 && eventTime >= simEndTime){
      val dataDiff = eventTime - simEndTime
      ((servingStartTime + (simEndTime / servingSpeed)) + dataDiff) - (dataStartTime / servingSpeed)
    }
    else{
      val dataDiff = eventTime - dataStartTime
      servingStartTime + (dataDiff / servingSpeed)
    }
  }
}

scala.io.BufferedSource Scala Examples