scala.io.BufferedSource Scala Examples

The following examples show how to use scala.io.BufferedSource. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: Sourcer.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.wmexchanger.utils

import java.io.{File, FileNotFoundException}
import java.nio.charset.StandardCharsets

import org.slf4j.{Logger, LoggerFactory}

import scala.io.BufferedSource
import scala.io.Source

object Sourcer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val utf8: String = StandardCharsets.UTF_8.toString
  
  def sourceFromResource(path: String): BufferedSource = {
    val url = Option(Sourcer.getClass.getResource(path))
        .getOrElse(throw newFileNotFoundException(path))

    logger.info("Sourcing resource " + url.getPath)
    Source.fromURL(url, utf8)
  }
  
  def sourceFromFile(file: File): BufferedSource = {
    logger.info("Sourcing file " + file.getPath)
    Source.fromFile(file, utf8)
  }

  def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path))

  def newFileNotFoundException(path: String): FileNotFoundException = {
    val message1 = path + " (The system cannot find the path specified"
    val message2 = message1 + (if (path.startsWith("~")) ".  Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "")
    val message3 = message2 + ")"

    new FileNotFoundException(message3)
  }
} 
Example 2
Source File: Sourcer.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.eidos.utils

import java.io.{File, FileNotFoundException}
import java.nio.charset.StandardCharsets

import org.slf4j.{Logger, LoggerFactory}

import scala.io.BufferedSource
import scala.io.Source

object Sourcer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val utf8: String = StandardCharsets.UTF_8.toString
  
  def sourceFromResource(path: String): BufferedSource = {
    val url = Option(Sourcer.getClass.getResource(path))
        .getOrElse(throw newFileNotFoundException(path))

    logger.info("Sourcing resource " + url.getPath)
    Source.fromURL(url, utf8)
  }
  
  def sourceFromFile(file: File): BufferedSource = {
    logger.info("Sourcing file " + file.getPath)
    Source.fromFile(file, utf8)
  }

  def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path))

  def newFileNotFoundException(path: String): FileNotFoundException = {
    val message1 = path + " (The system cannot find the path specified"
    val message2 = message1 + (if (path.startsWith("~")) ".  Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "")
    val message3 = message2 + ")"

    new FileNotFoundException(message3)
  }
} 
Example 3
Source File: Sourcer.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.elasticsearch.utils

import java.io.{File, FileNotFoundException}
import java.nio.charset.StandardCharsets

import org.slf4j.{Logger, LoggerFactory}

import scala.io.BufferedSource
import scala.io.Source

object Sourcer {
  protected lazy val logger: Logger = LoggerFactory.getLogger(this.getClass)
  val utf8: String = StandardCharsets.UTF_8.toString
  
  def sourceFromResource(path: String): BufferedSource = {
    val url = Option(Sourcer.getClass.getResource(path))
        .getOrElse(throw newFileNotFoundException(path))

    logger.info("Sourcing resource " + url.getPath)
    Source.fromURL(url, utf8)
  }
  
  def sourceFromFile(file: File): BufferedSource = {
    logger.info("Sourcing file " + file.getPath)
    Source.fromFile(file, utf8)
  }

  def sourceFromFile(path: String): BufferedSource = sourceFromFile(new File(path))

  def newFileNotFoundException(path: String): FileNotFoundException = {
    val message1 = path + " (The system cannot find the path specified"
    val message2 = message1 + (if (path.startsWith("~")) ".  Make sure to not use the tilde (~) character in paths in lieu of the home directory." else "")
    val message3 = message2 + ")"

    new FileNotFoundException(message3)
  }
} 
Example 4
Source File: LoadOp.scala    From vm   with GNU Affero General Public License v3.0 5 votes vote down vote up
package org.mmadt.language.obj.op.sideeffect
import org.mmadt.language.Tokens
import org.mmadt.language.mmlang.mmlangParser
import org.mmadt.language.obj.Inst.Func
import org.mmadt.language.obj.value.StrValue
import org.mmadt.language.obj.{Inst, Obj}
import org.mmadt.storage.obj.value.VInst

import scala.collection.mutable
import scala.io.{BufferedSource, Source}

trait LoadOp {
  this: Obj =>
  def load(file: StrValue): this.type = LoadOp(file).exec(this).asInstanceOf[this.type]
}
object LoadOp extends Func[Obj, Obj] {
  def apply(file: Obj): Inst[Obj, Obj] = new VInst[Obj, Obj](g = (Tokens.load, List(file)), func = this)
  override def apply(start: Obj, inst: Inst[Obj, Obj]): Obj = {
    try {
      val file: String = Inst.oldInst(inst).arg0[Obj].toString
      val source: BufferedSource = Source.fromFile(file.dropRight(1).drop(1))
      val obj = mmlangParser.parse[Obj](source.getLines().foldLeft(new mutable.StringBuilder())((x, y) => x.append(y)).toString())
      source.close()
      start `=>` obj
    }
  }
} 
Example 5
Source File: mmkvStore.scala    From vm   with GNU Affero General Public License v3.0 5 votes vote down vote up
package org.mmadt.storage.mmkv

import java.util.concurrent.atomic.AtomicLong

import org.mmadt.language.obj.`type`.RecType
import org.mmadt.language.obj.value.strm.RecStrm
import org.mmadt.language.obj.value.{IntValue, RecValue, StrValue, Value}
import org.mmadt.language.obj.{Obj, Rec, ViaTuple, _}
import org.mmadt.language.{LanguageException, LanguageFactory, LanguageProvider, Tokens}
import org.mmadt.storage.StorageFactory._

import scala.collection.mutable
import scala.io.{BufferedSource, Source}


  }
}

object mmkvStore extends AutoCloseable {
  private val dbs: mutable.Map[String, mmkvStore[Obj, Obj]] = new mutable.LinkedHashMap

  def open[K <: Obj, V <: Obj](file: String): mmkvStore[K, V] =
    if (file.equals(Tokens.empty)) dbs.last._2.asInstanceOf[mmkvStore[K, V]]
    else {
      val db = dbs.getOrElseUpdate(file, new mmkvStore(file))
      dbs.remove(file)
      dbs.put(file, db)
      db.asInstanceOf[mmkvStore[K, V]]
    }

  override def close(): Unit = {
    dbs.values.foreach(m => m.close())
    dbs.clear()
  }
} 
Example 6
Source File: InvokeMigrationHandler.scala    From flyway-awslambda   with MIT License 5 votes vote down vote up
package crossroad0201.aws.flywaylambda

import java.io.{BufferedOutputStream, InputStream, OutputStream, PrintWriter}

import com.amazonaws.regions.{Region, Regions}
import com.amazonaws.services.lambda.runtime.{Context, RequestStreamHandler}
import com.amazonaws.services.s3.{AmazonS3, AmazonS3Client}

import scala.io.{BufferedSource, Codec}
import scala.util.{Failure, Success, Try}

class InvokeMigrationHandler extends RequestStreamHandler with S3MigrationHandlerBase {
  type BucketName = String
  type Prefix = String
  type ConfFileName = String

  override def handleRequest(input: InputStream, output: OutputStream, context: Context): Unit = {
    def parseInput: Try[(BucketName, Prefix, ConfFileName)] = Try {
      import spray.json._
      import DefaultJsonProtocol._

      val json = new BufferedSource(input)(Codec("UTF-8")).mkString
      val jsObj = JsonParser(json).toJson.asJsObject
      jsObj.getFields(
        "bucket_name",
        "prefix"
      ) match {
        case Seq(JsString(b), JsString(p)) => {
          jsObj.getFields(
            "flyway_conf"
          ) match {
            case Seq(JsString(c)) => (b, p, c)
            case _ => (b, p, "flyway.conf")
          }
        }
        case _ => throw new IllegalArgumentException(s"Missing require key [bucketName, prefix]. - $json")
      }
    }

    val logger = context.getLogger

    implicit val s3Client: AmazonS3 = new AmazonS3Client().withRegion(Region.getRegion(Regions.fromName(sys.env("AWS_REGION"))))

    (for {
      i <- parseInput
      _ = { logger.log(s"Flyway migration start. by invoke lambda function(${i._1}, ${i._2}, ${i._3}).") }
      r <- migrate(i._1, i._2, i._3)(context, s3Client)
    } yield r) match {
      case Success(r) =>
        logger.log(r)
        val b = r.getBytes("UTF-8")
        val bout = new BufferedOutputStream(output)
        Stream.continually(bout.write(b))
        bout.flush()
      case Failure(e) =>
        e.printStackTrace()
        val w = new PrintWriter(output)
        w.write(e.toString)
        w.flush()
    }
  }

} 
Example 7
Source File: ParseCSVwithHTML.scala    From CSYE7200_Old   with MIT License 5 votes vote down vote up
package edu.neu.coe.csye7200.parse

import java.io.{BufferedWriter, File, FileWriter}

import scala.collection.mutable
import scala.io.{BufferedSource, Source}
import scala.util.{Failure, Success, Try}


object ParseCSVwithHTML extends App {
  val parser = ParseCSVwithHTML(CsvParser(delimiter = '\t' + ""))
  val title = "Report"
  if (args.length > 0) {
    val filename = args.head
    val source: BufferedSource = Source.fromFile(filename, "UTF-16")
    val w = parser.parseStreamIntoHTMLTable(source.getLines.toStream, title)
    val file = new File("output.html")
    val bw = new BufferedWriter(new FileWriter(file))
    bw.write(w)
    bw.close()
    println(s"Successfully written $file")
  }
  else
    System.err.println("syntax: ParseCSVwithHTML filename")

} 
Example 8
Source File: FileUtilities.scala    From mmlspark   with MIT License 5 votes vote down vote up
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.core.env

import java.io.File
import java.nio.file.{Files, StandardCopyOption}

import scala.io.{BufferedSource, Source}

object FileUtilities {

  def join(folders: String*): File = {
    folders.tail.foldLeft(new File(folders.head)) { case (f, s) => new File(f, s) }
  }

  def join(base: File, folders: String*): File = {
    folders.foldLeft(base) { case (f, s) => new File(f, s) }
  }

  // Same for StandardOpenOption
  type StandardOpenOption = java.nio.file.StandardOpenOption
  object StandardOpenOption {
    import java.nio.file.{StandardOpenOption => S}
    val APPEND = S.APPEND
    val CREATE = S.CREATE
  }

  def allFiles(dir: File, pred: (File => Boolean) = null): Array[File] = {
    def loop(dir: File): Array[File] = {
      val (dirs, files) = dir.listFiles.sorted.partition(_.isDirectory)
      (if (pred == null) files else files.filter(pred)) ++ dirs.flatMap(loop)
    }
    loop(dir)
  }

  // readFile takes a file name or a File, and function to extract a value from
  // BufferedSource which defaults to _.mkString; performs the read, closes the
  // source, and returns the result
  def readFile[T](file: File, read: BufferedSource => T): T = {
    val i = Source.fromFile(file)
    try read(i) finally i.close
  }
  def readFile(file: File): String = readFile(file, _.mkString)

  def writeFile(file: File, stuff: Any, flags: StandardOpenOption*): Unit = {
    Files.write(file.toPath, stuff.toString.getBytes(), flags: _*)
    ()
  }

  def copyFile(from: File, toDir: File, overwrite: Boolean = false): Unit = {
    Files.copy(from.toPath, (new File(toDir, from.getName)).toPath,
               (if (overwrite) Seq(StandardCopyOption.REPLACE_EXISTING)
                else Seq()): _*)
    ()
  }

  // Perhaps this should move into a more specific place, not a generic file utils thing
  def zipFolder(dir: File, out: File): Unit = {
    import java.io.{BufferedInputStream, FileInputStream, FileOutputStream}
    import java.util.zip.{ZipEntry, ZipOutputStream}
    val bufferSize = 2 * 1024
    val data = new Array[Byte](bufferSize)
    val zip = new ZipOutputStream(new FileOutputStream(out))
    val prefixLen = dir.getParentFile.toString.length + 1
    allFiles(dir).foreach { file =>
      zip.putNextEntry(new ZipEntry(file.toString.substring(prefixLen).replace(java.io.File.separator, "/")))
      val in = new BufferedInputStream(new FileInputStream(file), bufferSize)
      var b = 0
      while (b >= 0) { zip.write(data, 0, b); b = in.read(data, 0, bufferSize) }
      in.close()
      zip.closeEntry()
    }
    zip.close()
  }

} 
Example 9
Source File: WebCrawler.scala    From CSYE7200   with MIT License 5 votes vote down vote up
package edu.neu.coe.csye7200.asstwc

import java.net.URL

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent._
import scala.concurrent.duration._
import scala.io.{BufferedSource, Source}
import scala.language.postfixOps
import scala.util._
import scala.util.control.NonFatal
import scala.xml.Node


object WebCrawler extends App {

  def getURLContent(u: URL): Future[String] =
    for {
      s <- MonadOps.asFuture(SourceFromURL(u))
      w <- MonadOps.asFuture(sourceToString(s, s"Cannot read from source at $u"))
    } yield w

  def wget(u: URL): Future[Seq[URL]] = {
    // Hint: write as a for-comprehension, using the method createURL(Option[URL], String) to get the appropriate URL for relative links
    // 16 points.
    def getURLs(ns: Node): Seq[Try[URL]] = ??? // TO BE IMPLEMENTED

    def getLinks(g: String): Try[Seq[URL]] = {
      val ny = HTMLParser.parse(g) recoverWith { case f => Failure(new RuntimeException(s"parse problem with URL $u: $f")) }
      for (n <- ny; z <- MonadOps.sequence(getURLs(n))) yield z
    }
    // Hint: write as a for-comprehension, using getURLContent (above) and getLinks above. You might also need MonadOps.asFuture
    // 9 points.
    ??? // TO BE IMPLEMENTED
  }

  def wget(us: Seq[URL]): Future[Seq[Either[Throwable, Seq[URL]]]] = {
    val us2 = us.distinct take 10
    // Hint: Use wget(URL) (above). MonadOps.sequence and Future.sequence are also available to you to use.
    // 15 points. Implement the rest of this, based on us2 instead of us.
    // TO BE IMPLEMENTED
    ???
  }

  def crawler(depth: Int, us: Seq[URL]): Future[Seq[URL]] = {
    def inner(urls: Seq[URL], depth: Int, accum: Seq[URL]): Future[Seq[URL]] =
      if (depth > 0)
        for (us <- MonadOps.flattenRecover(wget(urls), { x => System.err.println(s"""crawler: ignoring exception $x ${if (x.getCause != null) " with cause " + x.getCause else ""}""") }); r <- inner(us, depth - 1, accum ++: urls)) yield r
      else
        Future.successful(accum)

    inner(us, depth, Nil)
  }

  println(s"web reader: ${args.toList}")
  val uys = for (arg <- args toList) yield getURL(arg)
  val s = MonadOps.sequence(uys)
  s match {
    case Success(z) =>
      println(s"invoking crawler on $z")
      val f = crawler(2, z)
      Await.ready(f, Duration("60 second"))
      for (x <- f) println(s"Links: $x")
    case Failure(z) => println(s"failure: $z")
  }

  private def sourceToString(source: BufferedSource, errorMsg: String): Try[String] =
    try Success(source mkString) catch {
      case NonFatal(e) => Failure(WebCrawlerException(errorMsg, e))
    }

  private def getURL(resource: String): Try[URL] = createURL(null, resource)

  private def createURL(context: Option[URL], resource: String): Try[URL] =
    try Success(new URL(context.orNull, resource)) catch {
      case NonFatal(e) =>
        val message: String = s"""Bad URL: ${if (context.isDefined) "context: " + context else ""} resource=$resource"""
        Failure(WebCrawlerException(message, e))
    }

  private def SourceFromURL(resource: URL): Try[BufferedSource] =
    try Success(Source.fromURL(resource)) catch {
      case NonFatal(e) => Failure(WebCrawlerException(s"""Cannot get source from URL: $resource""", e))
    }
}

case class WebCrawlerException(url: String, cause: Throwable) extends Exception(s"Web Crawler could not decode URL: $url", cause) 
Example 10
Source File: ResourceDatasetLoaders.scala    From doddle-model   with Apache License 2.0 5 votes vote down vote up
package io.picnicml.doddlemodel.data

import java.io.{File, FileOutputStream}

import io.picnicml.doddlemodel.data.CsvLoader.loadCsvDataset

import scala.io.{BufferedSource, Source}

object ResourceDatasetLoaders {

  def loadBostonDataset: DatasetWithIndex = {
    val (data, featureIndex) = loadDatasetFromResources("boston_housing_prices")
    (data(::, 0 to -2), data(::, -1), featureIndex.drop(data.cols - 1))
  }

  def loadBreastCancerDataset: DatasetWithIndex = {
    val (data, featureIndex) = loadDatasetFromResources("breast_cancer")
    (data(::, 0 to -2), data(::, -1), featureIndex.drop(data.cols - 1))
  }

  def loadIrisDataset: DatasetWithIndex = {
    val (data, featureIndex) = loadDatasetFromResources("iris")
    (data(::, 0 to -2), data(::, -1), featureIndex.drop(data.cols - 1))
  }

  
  def loadHighSchoolTestDataset: DatasetWithIndex = {
    val (data, featureIndex) = loadDatasetFromResources("high_school_test")
    (data(::, 0 to -2), data(::, -1), featureIndex.drop(data.cols - 1))
  }

  private[data] def loadDummyCsvReadingDataset: DatasetWithIndex = {
    val (data, featureIndex) = loadDatasetFromResources("dummy_csv_reading")
    (data(::, 0 to -2), data(::, -1), featureIndex.drop(data.cols - 1))
  }

  private def loadDatasetFromResources(datasetName: String): FeaturesWithIndex =
    loadCsvDataset(getBufferedSourceFromResource(s"/datasets/$datasetName.csv"), na = "NA")

  private def getBufferedSourceFromResource(path: String): BufferedSource = {
    val resourceUrl = getClass.getResource(path)
    val file = if (resourceUrl.toString.startsWith("jar:"))
      // reads file from JAR
      readResourceFileWithinJar(path)
    else
      // reads file when using IDE
      new File(resourceUrl.getFile)
    if (file != null && !file.exists)
      throw new RuntimeException(s"Error: File $file not found!")
    Source.fromFile(file)
  }

  private def readResourceFileWithinJar(path: String): File = {
    val inputStream = getClass.getResourceAsStream(path)
    val tempFile = File.createTempFile("tempfile", ".tmp")
    val outputStream = new FileOutputStream(tempFile)

    val buffer = new Array[Byte](130 * 1024)
    Iterator.continually(inputStream.read(buffer)).takeWhile(_ != -1).foreach { bytesRead =>
      outputStream.write(buffer, 0, bytesRead)
      outputStream.flush()
    }

    inputStream.close()
    outputStream.close()

    tempFile.deleteOnExit()
    tempFile
  }
} 
Example 11
Source File: ResourceUtils.scala    From sbt-lagom-descriptor-generator   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.lagom.spec

import java.io.{ File, InputStream }
import java.nio.file.{ Files, Paths, StandardOpenOption }

import scala.io.{ BufferedSource, Source }


  def writeFile(folder: File, relativeFile: File, fileContents: String): File = {
    val path = Paths.get(folder.getAbsolutePath, relativeFile.getPath)
    // `path` is tha absolute route to the file so only path.parent must be created as directories
    Files.createDirectories(path.getParent)
    Files.write(
      path,
      fileContents.getBytes,
      StandardOpenOption.CREATE,
      StandardOpenOption.SYNC,
      StandardOpenOption.TRUNCATE_EXISTING
    ).toFile

  }
} 
Example 12
Source File: InputSource.scala    From flink-parameter-server   with Apache License 2.0 5 votes vote down vote up
package hu.sztaki.ilab.ps.matrix.factorization.utils

import java.util.Calendar

import hu.sztaki.ilab.ps.matrix.factorization.utils.InputTypes.{AnyOrWatermark, EventWithTimestamp}
import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.api.watermark.Watermark

import scala.io.{BufferedSource, Source}

class InputSource[T <: EventWithTimestamp](dataFilePath: String, servingSpeed: Int,
                                           fromString: String => T, baseDataStartTime: Option[Long], simulationEndTime: Option[Long])
  extends SourceFunction[T] {

  private val simEndTime: Long = simulationEndTime.getOrElse(0L)
  override def cancel(): Unit = ???

  def createList(reader: BufferedSource): List[T] = {
    reader
      .getLines()
      .map(fromString)
      .toList
  }

  override def run(ctx: SourceFunction.SourceContext[T]): Unit = {
    val reader = Source.fromFile(dataFilePath)

    val events: List[T] = createList(reader)

    val dataStartTime: Long =
      baseDataStartTime match {
        case Some(dst) => dst
        case None => events.head.getEventTime
      }

    val sortedEvents = events
      .map(x => AnyOrWatermark(x.getEventTime, x))
      .toArray

    val servingStartTime = Calendar.getInstance.getTimeInMillis

    sortedEvents
      .foreach( event => {
        val now = Calendar.getInstance().getTimeInMillis
        val servingTime = toServingTime(servingStartTime, dataStartTime, event.time)
        val waitTime = servingTime - now

        Thread.sleep(math.max(waitTime, 0))

        event.value match {
          case v: T => ctx.collectWithTimestamp(v, event.time)
          case wm: Watermark => ctx.emitWatermark(wm)
        }
      })
  }

  private def toServingTime(servingStartTime: Long, dataStartTime: Long, eventTime: Long) = {

    if(simEndTime != 0 && eventTime >= simEndTime){
      val dataDiff = eventTime - simEndTime
      ((servingStartTime + (simEndTime / servingSpeed)) + dataDiff) - (dataStartTime / servingSpeed)
    }
    else{
      val dataDiff = eventTime - dataStartTime
      servingStartTime + (dataDiff / servingSpeed)
    }
  }
}