java.io.BufferedInputStream Scala Example

Source File: TFRecordReader.scala From tensorflow_scala with Apache License 2.0

5 votes

package org.platanios.tensorflow.api.io

import org.platanios.tensorflow.api.core.exception.{DataLossException, OutOfRangeException}
import org.platanios.tensorflow.api.utilities.{CRC32C, Coding}
import org.platanios.tensorflow.proto.Example

import com.typesafe.scalalogging.Logger
import org.slf4j.LoggerFactory

import java.io.BufferedInputStream
import java.nio.file.{Files, Path, StandardOpenOption}


      private[this] def readNext(): Example = {
        try {
          // Read the header data.
          val encLength = new Array[Byte](12)
          fileStream.read(encLength)
          val recordLength = Coding.decodeFixedInt64(encLength).toInt
          val encLengthMaskedCrc = CRC32C.mask(CRC32C.value(encLength.take(8)))
          if (Coding.decodeFixedInt32(encLength, offset = 8) != encLengthMaskedCrc) {
            throw DataLossException("Encountered corrupted TensorFlow record.")
          }

          // Read the data.
          val encData = new Array[Byte](recordLength + 4)
          fileStream.read(encData)
          val recordData = encData.take(recordLength)
          val encDataMaskedCrc = CRC32C.mask(CRC32C.value(encData.take(recordLength)))
          if (Coding.decodeFixedInt32(encData, offset = recordLength) != encDataMaskedCrc) {
            throw DataLossException("Encountered corrupted TensorFlow record.")
          }

          Example.parseFrom(recordData)
        } catch {
          case _: OutOfRangeException | _: DataLossException =>
            // We ignore partial read exceptions, because a record may be truncated. The record reader holds the offset
            // prior to the failed read, and so retrying will succeed.
            TFRecordReader.logger.info(s"No more TF records stored at '${filePath.toAbsolutePath}'.")
            null
        }
      }

      override def hasNext: Boolean = {
        if (nextExample == null)
          nextExample = readNext()
        nextExample != null
      }

      override def next(): Example = {
        val example = {
          if (nextExample == null)
            readNext()
          else
            nextExample
        }
        if (example != null)
          nextExample = readNext()
        example
      }
    }
  }
}

private[io] object TFRecordReader {
  private[TFRecordReader] val logger: Logger = Logger(LoggerFactory.getLogger("TF Record Reader"))
}

Source File: Compression.scala From databus-maven-plugin with GNU Affero General Public License v3.0

5 votes

package org.dbpedia.databus.lib

import better.files._
import java.io.{BufferedInputStream, FileInputStream, InputStream}
import com.codahale.metrics.MetricRegistry
import org.apache.commons.compress.archivers.{ArchiveEntry, ArchiveException, ArchiveInputStream, ArchiveStreamFactory}
import org.apache.commons.compress.compressors.{CompressorException, CompressorInputStream, CompressorStreamFactory}

import scala.util.Try

object Compression {

  def detectCompression(datafile: File): Option[String] = {
    try {
      Some(datafile.inputStream.map(_.buffered).apply(CompressorStreamFactory.detect))
    } catch {
      case ce: CompressorException => None
    }
  }

  def detectArchive(datafile: File): Option[String] = {
    try {
      Some(datafile.inputStream.map(_.buffered).apply(ArchiveStreamFactory.detect))
    } catch {
      case ce: ArchiveException => None
    }
  }
}

Source File: Implicits.scala From incubator-daffodil with Apache License 2.0

5 votes

package org.apache.daffodil

import java.io.{ ByteArrayInputStream, BufferedInputStream }

import org.apache.daffodil.xml.NS
import org.apache.daffodil.exceptions.Assert
import scala.language.reflectiveCalls
import scala.language.implicitConversions
import scala.language.{ implicitConversions, reflectiveCalls } // silences scala 2.10 warnings

object Implicits {

  object ImplicitsSuppressUnusedImportWarning {
    def apply() = if (scala.math.random.isNaN()) Assert.impossible()
  }

  
  def intercept[T <: AnyRef](body: => Any)(implicit tag: scala.reflect.ClassTag[T]): T = {
    val clazz = tag.runtimeClass.asInstanceOf[Class[T]]
    val caught = try {
      body
      None
    } catch {
      case npe: NullPointerException => throw npe
      case s: scala.util.control.ControlThrowable => throw s
      case u: Throwable => {
        if (!clazz.isAssignableFrom(u.getClass)) {
          throw new InterceptFailedException(
            "Failed to intercept expected exception. Expected '%s' but got '%s'.".format(clazz.getName, u.getClass.getName))
        } else {
          Some(u)
        }
      }
    }
    caught match {
      case None => throw new InterceptFailedException("Failed to intercept any exceptions.")
      case Some(e) => e.asInstanceOf[T]
    }
  }

  class InterceptFailedException(msg: String) extends RuntimeException(msg)

}

Source File: IoTest.scala From fgbio with MIT License

5 votes

package com.fulcrumgenomics.util

import java.io.{BufferedInputStream, BufferedReader, FileInputStream, InputStreamReader}
import java.util.zip.GZIPInputStream

import com.fulcrumgenomics.testing.UnitSpec
import htsjdk.samtools.util.BlockCompressedInputStream


class IoTest extends UnitSpec {

  Seq(".bgz", ".bgzip").foreach { ext =>
    it should s"round trip data to a bgzipped file with extension ${ext}" in {
      val text = "This is a stupid little text fragment for compression. Yay compression!"
      val data = Seq.fill(10)(text)
      val f = makeTempFile("test.", ext)
      Io.writeLines(f, data)

      val stream = new BufferedInputStream(new FileInputStream(f.toFile))
      BlockCompressedInputStream.isValidFile(stream) shouldBe true
      val reread = Io.readLines(f).toIndexedSeq

      reread shouldBe data
    }
  }
}

Source File: WordEmbeddingsLoader.scala From spark-nlp with Apache License 2.0

5 votes

package com.johnsnowlabs.nlp.embeddings

import java.io.{BufferedInputStream, ByteArrayOutputStream, DataInputStream, FileInputStream}

import com.johnsnowlabs.storage.RocksDBConnection
import org.slf4j.LoggerFactory

import scala.io.Source

object WordEmbeddingsTextIndexer {

  def index(
             source: Iterator[String],
             writer: WordEmbeddingsWriter
           ): Unit = {
    try {
      for (line <- source) {
        val items = line.split(" ")
        val word = items(0)
        val embeddings = items.drop(1).map(i => i.toFloat)
        writer.add(word, embeddings)
      }
    } finally {
      writer.close()
    }
  }

  def index(
             source: String,
             writer: WordEmbeddingsWriter
           ): Unit = {
    val sourceFile = Source.fromFile(source)("UTF-8")
    val lines = sourceFile.getLines()
    index(lines, writer)
    sourceFile.close()
  }
}


object WordEmbeddingsBinaryIndexer {

  private val logger = LoggerFactory.getLogger("WordEmbeddings")

  def index(
             source: DataInputStream,
             writer: WordEmbeddingsWriter): Unit = {

    try {
      // File Header
      val numWords = Integer.parseInt(readString(source))
      val vecSize = Integer.parseInt(readString(source))

      // File Body
      for (i <- 0 until numWords) {
        val word = readString(source)

        // Unit Vector
        val vector = readFloatVector(source, vecSize, writer)
        writer.add(word, vector)
      }

      logger.info(s"Loaded $numWords words, vector size $vecSize")
    } finally {
      writer.close()
    }
  }

  def index(
             source: String,
             writer: WordEmbeddingsWriter): Unit = {

    val ds = new DataInputStream(new BufferedInputStream(new FileInputStream(source), 1 << 15))

    try {
      index(ds, writer)
    } finally {
      ds.close()
    }
  }

  
  private def readFloatVector(ds: DataInputStream, vectorSize: Int, indexer: WordEmbeddingsWriter): Array[Float] = {
    // Read Bytes
    val vectorBuffer = Array.fill[Byte](4 * vectorSize)(0)
    ds.read(vectorBuffer)

    // Convert Bytes to Floats
    indexer.fromBytes(vectorBuffer)
  }
}

Source File: FileUtilities.scala From mmlspark with MIT License

5 votes

// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.core.env

import java.io.File
import java.nio.file.{Files, StandardCopyOption}

import scala.io.{BufferedSource, Source}

object FileUtilities {

  def join(folders: String*): File = {
    folders.tail.foldLeft(new File(folders.head)) { case (f, s) => new File(f, s) }
  }

  def join(base: File, folders: String*): File = {
    folders.foldLeft(base) { case (f, s) => new File(f, s) }
  }

  // Same for StandardOpenOption
  type StandardOpenOption = java.nio.file.StandardOpenOption
  object StandardOpenOption {
    import java.nio.file.{StandardOpenOption => S}
    val APPEND = S.APPEND
    val CREATE = S.CREATE
  }

  def allFiles(dir: File, pred: (File => Boolean) = null): Array[File] = {
    def loop(dir: File): Array[File] = {
      val (dirs, files) = dir.listFiles.sorted.partition(_.isDirectory)
      (if (pred == null) files else files.filter(pred)) ++ dirs.flatMap(loop)
    }
    loop(dir)
  }

  // readFile takes a file name or a File, and function to extract a value from
  // BufferedSource which defaults to _.mkString; performs the read, closes the
  // source, and returns the result
  def readFile[T](file: File, read: BufferedSource => T): T = {
    val i = Source.fromFile(file)
    try read(i) finally i.close
  }
  def readFile(file: File): String = readFile(file, _.mkString)

  def writeFile(file: File, stuff: Any, flags: StandardOpenOption*): Unit = {
    Files.write(file.toPath, stuff.toString.getBytes(), flags: _*)
    ()
  }

  def copyFile(from: File, toDir: File, overwrite: Boolean = false): Unit = {
    Files.copy(from.toPath, (new File(toDir, from.getName)).toPath,
               (if (overwrite) Seq(StandardCopyOption.REPLACE_EXISTING)
                else Seq()): _*)
    ()
  }

  // Perhaps this should move into a more specific place, not a generic file utils thing
  def zipFolder(dir: File, out: File): Unit = {
    import java.io.{BufferedInputStream, FileInputStream, FileOutputStream}
    import java.util.zip.{ZipEntry, ZipOutputStream}
    val bufferSize = 2 * 1024
    val data = new Array[Byte](bufferSize)
    val zip = new ZipOutputStream(new FileOutputStream(out))
    val prefixLen = dir.getParentFile.toString.length + 1
    allFiles(dir).foreach { file =>
      zip.putNextEntry(new ZipEntry(file.toString.substring(prefixLen).replace(java.io.File.separator, "/")))
      val in = new BufferedInputStream(new FileInputStream(file), bufferSize)
      var b = 0
      while (b >= 0) { zip.write(data, 0, b); b = in.read(data, 0, bufferSize) }
      in.close()
      zip.closeEntry()
    }
    zip.close()
  }

}

Source File: Using.scala From Argus-SAF with Apache License 2.0

5 votes

package org.argus.jawa.core.compiler.compile.io

import java.io.{Closeable, FileInputStream, FileOutputStream, InputStream, OutputStream, File => JavaFile}
import java.io.{BufferedInputStream, BufferedOutputStream, InputStreamReader, OutputStreamWriter}
import java.io.{BufferedReader, BufferedWriter}
import java.util.zip.GZIPInputStream
import java.net.URL
import java.nio.channels.FileChannel
import java.nio.charset.Charset
import java.util.jar.{JarFile, JarInputStream, JarOutputStream}
import java.util.zip.{GZIPOutputStream, ZipEntry, ZipFile, ZipInputStream, ZipOutputStream}

import ErrorHandling.translate

import scala.reflect.{Manifest => SManifest}

abstract class Using[Source, T]
{
  protected def open(src: Source): T
  def apply[R](src: Source)(f: T => R): R =
  {
    val resource = open(src)
    try { f(resource) }
    finally { close(resource) }
  }
  protected def close(out: T): Unit
}
abstract class WrapUsing[Source, T](implicit srcMf: SManifest[Source], targetMf: SManifest[T]) extends Using[Source, T]
{
  protected def label[S](m: SManifest[S]): String = m.runtimeClass.getSimpleName
  protected def openImpl(source: Source): T
  protected final def open(source: Source): T =
    translate("Error wrapping " + label(srcMf) + " in " + label(targetMf) + ": ") { openImpl(source) }
}
trait OpenFile[T] extends Using[JavaFile, T]
{
  protected def openImpl(file: JavaFile): T
  protected final def open(file: JavaFile): T =
  {
    val parent = file.getParentFile
    if(parent != null)
      IO.createDirectory(parent)
    openImpl(file)
  }
}
object Using
{
  def wrap[Source, T<: Closeable](openF: Source => T)(implicit srcMf: SManifest[Source], targetMf: SManifest[T]): Using[Source,T] =
    wrap(openF, closeCloseable)
  def wrap[Source, T](openF: Source => T, closeF: T => Unit)(implicit srcMf: SManifest[Source], targetMf: SManifest[T]): Using[Source,T] =
    new WrapUsing[Source, T]
    {
      def openImpl(source: Source): T = openF(source)
      def close(t: T): Unit = closeF(t)
    }

  def resource[Source, T <: Closeable](openF: Source => T): Using[Source,T] =
    resource(openF, closeCloseable)
  def resource[Source, T](openF: Source => T, closeF: T => Unit): Using[Source,T] =
    new Using[Source,T]
    {
      def open(s: Source): T = openF(s)
      def close(s: T): Unit = closeF(s)
    }
  def file[T <: Closeable](openF: JavaFile => T): OpenFile[T] = file(openF, closeCloseable)
  def file[T](openF: JavaFile => T, closeF: T => Unit): OpenFile[T] =
    new OpenFile[T]
    {
      def openImpl(file: JavaFile): T = openF(file)
      def close(t: T): Unit = closeF(t)
    }
  private def closeCloseable[T <: Closeable]: T => Unit = _.close()

  def bufferedOutputStream: Using[OutputStream, BufferedOutputStream] = wrap((out: OutputStream) => new BufferedOutputStream(out) )
  def bufferedInputStream: Using[InputStream, BufferedInputStream] = wrap((in: InputStream) => new BufferedInputStream(in) )
  def fileOutputStream(append: Boolean = false): OpenFile[BufferedOutputStream] = file(f => new BufferedOutputStream(new FileOutputStream(f, append)))
  def fileInputStream: OpenFile[BufferedInputStream] = file(f => new BufferedInputStream(new FileInputStream(f)))
  def urlInputStream: Using[URL, BufferedInputStream] = resource((u: URL) => translate("Error opening " + u + ": ")(new BufferedInputStream(u.openStream)))
  def fileOutputChannel: OpenFile[FileChannel] = file(f => new FileOutputStream(f).getChannel)
  def fileInputChannel: OpenFile[FileChannel] = file(f => new FileInputStream(f).getChannel)
  def fileWriter(charset: Charset = IO.utf8, append: Boolean = false): OpenFile[BufferedWriter] =
    file(f => new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f, append), charset)) )
  def fileReader(charset: Charset): OpenFile[BufferedReader] = file(f => new BufferedReader(new InputStreamReader(new FileInputStream(f), charset)) )
  def urlReader(charset: Charset): Using[URL, BufferedReader] = resource((u: URL) => new BufferedReader(new InputStreamReader(u.openStream, charset)))
  def jarFile(verify: Boolean): OpenFile[JarFile] = file(f => new JarFile(f, verify), (_: JarFile).close())
  def zipFile: OpenFile[ZipFile] = file(f => new ZipFile(f), (_: ZipFile).close())
  def streamReader: Using[(InputStream, Charset), InputStreamReader] = wrap{ (_: (InputStream, Charset)) match { case (in, charset) => new InputStreamReader(in, charset) } }
  def gzipInputStream: Using[InputStream, GZIPInputStream] = wrap((in: InputStream) => new GZIPInputStream(in, 8192) )
  def zipInputStream: Using[InputStream, ZipInputStream] = wrap((in: InputStream) => new ZipInputStream(in))
  def zipOutputStream: Using[OutputStream, ZipOutputStream] = wrap((out: OutputStream) => new ZipOutputStream(out))
  def gzipOutputStream: Using[OutputStream, GZIPOutputStream] = wrap((out: OutputStream) => new GZIPOutputStream(out, 8192), (_: GZIPOutputStream).finish())
  def jarOutputStream: Using[OutputStream, JarOutputStream] = wrap((out: OutputStream) => new JarOutputStream(out))
  def jarInputStream: Using[InputStream, JarInputStream] = wrap((in: InputStream) => new JarInputStream(in))
  def zipEntry(zip: ZipFile): Using[ZipEntry, InputStream] = resource((entry: ZipEntry) =>
    translate("Error opening " + entry.getName + " in " + zip + ": ") { zip.getInputStream(entry) } )
}

Source File: UtilCommands.scala From CM-Well with Apache License 2.0

5 votes

import java.io.{BufferedInputStream, File}
import java.nio.file.{Files, Paths}
import java.security.MessageDigest
import scala.util.control.Breaks._

import javax.xml.bind.DatatypeConverter
import org.apache.commons.compress.archivers.ArchiveEntry
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import org.apache.commons.io.IOUtils
object UtilCommands {
  val OSX_NAME = "Mac OS X"

  val linuxSshpass = if (Files.exists(Paths.get("bin/utils/sshpass"))) "bin/utils/sshpass" else "sshpass"
  val osxSshpass = "/usr/local/bin/sshpass"

  val sshpass = if (isOSX) osxSshpass else linuxSshpass

  def isOSX = System.getProperty("os.name") == OSX_NAME

  def verifyComponentConfNotChanged(componentName:String, configFilePath:String, expectedHash:String) = {
    val confContent = UtilCommands.unTarGz("./components", componentName, configFilePath)
    UtilCommands.checksum(componentName, configFilePath, confContent, expectedHash)
  }

  def checksum(componentName:String, configFilePath:String, confContent:Array[Byte], expectedHash:String) = {
    val actualHash = MessageDigest.getInstance("MD5").digest(confContent)
    val actualHashStr = DatatypeConverter.printHexBinary(actualHash)
    if (!expectedHash.equalsIgnoreCase(actualHashStr))
      throw new Exception(s"$componentName configuration file $configFilePath has been changed, please change the template accordingly " +
        s"(the new digest is $actualHashStr)")
  }

  def unTarGz(rootFolder:String, componentName: String, configFilePath:String):Array[Byte] = {
    var tarArchiveInputStream:TarArchiveInputStream = null
    var bufferInputstream:BufferedInputStream = null
    val gzipCompressor:GzipCompressorInputStream = null
    var confContent: Array[Byte] = null
    try {
      val libDir = new File(rootFolder)
      val pathInput = libDir.listFiles().filter(file => file.getName.contains(componentName))
      val path = Paths.get(pathInput(0).getAbsolutePath)
      val bufferInputStream = new BufferedInputStream(Files.newInputStream(path))
      val gzipCompressor = new GzipCompressorInputStream(bufferInputStream)
      tarArchiveInputStream = new TarArchiveInputStream(gzipCompressor)

      var archiveEntry: ArchiveEntry = null
      archiveEntry = tarArchiveInputStream.getNextEntry
      if(archiveEntry.getName == "./")
        archiveEntry = tarArchiveInputStream.getNextEntry

      val extractFolder = archiveEntry.getName.replaceAll("^\\./","").split("/")(0)
      while (archiveEntry != null) {
        breakable {
        if (archiveEntry.getName.replaceAll("^\\./","") == s"$extractFolder/$configFilePath") {
            confContent = IOUtils.toByteArray(tarArchiveInputStream)
            break
          }
        }
        archiveEntry = tarArchiveInputStream.getNextEntry
      }
    }
    finally {
      if(tarArchiveInputStream != null)
        tarArchiveInputStream.close()
      if(bufferInputstream != null)
        bufferInputstream.close()
      if(gzipCompressor != null)
        gzipCompressor.close()
    }
    confContent
  }
}

Source File: Unpacker.scala From comet-data-pipeline with Apache License 2.0

5 votes

package com.ebiznext.comet.utils

import java.io.{BufferedInputStream, InputStream}
import java.nio.file.{Files, Paths}

import better.files.File
import org.apache.commons.compress.archivers.{
  ArchiveEntry,
  ArchiveInputStream,
  ArchiveStreamFactory
}
import org.apache.commons.compress.compressors.{CompressorInputStream, CompressorStreamFactory}
import org.apache.commons.compress.utils.IOUtils
import org.apache.commons.io.input.CloseShieldInputStream

import scala.util.Try

object Unpacker {

  def unpack(archiveFile: File, directory: File): Try[Unit] = {
    for {
      inputStream <- Try(Files.newInputStream(Paths.get(archiveFile.pathAsString)))
      it          <- open(inputStream)
    } yield {
      while (it.hasNext) {
        val (entry, is) = it.next()
        if (entry.isDirectory) {
          throw new Exception("Compressed archive cannot directories")
        }
        val targetFile = File(directory, entry.getName)
        val o = Files.newOutputStream(targetFile.path)
        try {
          IOUtils.copy(is, o)
        } finally {
          if (o != null) o.close()
        }
      }
    }
  }

  // https://alexwlchan.net/2019/09/unpacking-compressed-archives-in-scala/
  
  def open(inputStream: InputStream): Try[Iterator[(ArchiveEntry, InputStream)]] =
    for {
      uncompressedInputStream <- createUncompressedStream(inputStream)
      archiveInputStream      <- createArchiveStream(uncompressedInputStream)
      iterator = createIterator(archiveInputStream)
    } yield iterator

  private def createUncompressedStream(inputStream: InputStream): Try[CompressorInputStream] =
    Try {
      new CompressorStreamFactory().createCompressorInputStream(
        getMarkableStream(inputStream)
      )
    }

  private def createArchiveStream(
    uncompressedInputStream: CompressorInputStream
  ): Try[ArchiveInputStream] =
    Try {
      new ArchiveStreamFactory()
        .createArchiveInputStream(
          getMarkableStream(uncompressedInputStream)
        )
    }

  private def createIterator(
    archiveInputStream: ArchiveInputStream
  ): Iterator[(ArchiveEntry, InputStream)] =
    new Iterator[(ArchiveEntry, InputStream)] {
      var latestEntry: ArchiveEntry = _

      override def hasNext: Boolean = {
        latestEntry = archiveInputStream.getNextEntry
        latestEntry != null
      }

      override def next(): (ArchiveEntry, InputStream) =
        (latestEntry, new CloseShieldInputStream(archiveInputStream))
    }

  private def getMarkableStream(inputStream: InputStream): InputStream =
    if (inputStream.markSupported())
      inputStream
    else
      new BufferedInputStream(inputStream)

}

Source File: EventHistoryReporter.scala From sparklens with Apache License 2.0

5 votes

package com.qubole.sparklens.app

import java.io.{BufferedInputStream, InputStream}
import java.net.URI

import com.ning.compress.lzf.LZFInputStream
import com.qubole.sparklens.QuboleJobListener
import com.qubole.sparklens.common.Json4sWrapper
import com.qubole.sparklens.helper.HDFSConfigHelper
import net.jpountz.lz4.LZ4BlockInputStream
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.SparkConf
import org.json4s.DefaultFormats
import org.xerial.snappy.SnappyInputStream


class EventHistoryReporter(file: String, extraConf: List[(String, String)] = List.empty) {

  // This is using reflection in spark-2.0.0 ReplayListenerBus
  val busKlass = Class.forName("org.apache.spark.scheduler.ReplayListenerBus")
  val bus = busKlass.newInstance()
  val addListenerMethod = busKlass.getMethod("addListener", classOf[Object])
  val conf = new SparkConf()
    .set("spark.sparklens.reporting.disabled", "false")
    .set("spark.sparklens.save.data", "false")

  extraConf.foreach(x => {
    conf.set(x._1, x._2)
  })

  val listener = new QuboleJobListener(conf)
  addListenerMethod.invoke(bus, listener)


  try {
    val replayMethod = busKlass.getMethod("replay", classOf[InputStream], classOf[String],
      classOf[Boolean])
    replayMethod.invoke(bus, getDecodedInputStream(file, conf), file, boolean2Boolean(false))
  } catch {
    case _: NoSuchMethodException => // spark binaries are 2.1* and above
      val replayMethod = busKlass.getMethod("replay", classOf[InputStream], classOf[String],
        classOf[Boolean], classOf[String => Boolean])
      replayMethod.invoke(bus, getDecodedInputStream(file, conf), file, boolean2Boolean(false),
        getFilter _)
    case x: Exception => {
     println(s"Failed replaying events from ${file} [${x.getMessage}]")
    }
  }


  // Borrowed from CompressionCodecs in spark
  private def getDecodedInputStream(file: String, conf: SparkConf): InputStream = {

    val fs = FileSystem.get(new URI(file), HDFSConfigHelper.getHadoopConf(Some(conf)))
    val path = new Path(file)
    val bufStream = new BufferedInputStream(fs.open(path))

    val logName = path.getName.stripSuffix(".inprogress")
    val codecName: Option[String] = logName.split("\\.").tail.lastOption

    codecName.getOrElse("") match {
      case "lz4" => new LZ4BlockInputStream(bufStream)
      case "lzf" => new LZFInputStream(bufStream)
      case "snappy" => new SnappyInputStream(bufStream)
      case _ => bufStream
    }
  }

  private def getFilter(eventString: String): Boolean = {
    implicit val formats = DefaultFormats
    eventFilter.contains(Json4sWrapper.parse(eventString).extract[Map[String, Any]].get("Event")
      .get.asInstanceOf[String])
  }

  private def eventFilter: Set[String] = {
    Set(
      "SparkListenerTaskEnd",
      "SparkListenerApplicationStart",
      "SparkListenerApplicationEnd",
      "SparkListenerExecutorAdded",
      "SparkListenerExecutorRemoved",
      "SparkListenerJobStart",
      "SparkListenerJobEnd",
      "SparkListenerStageSubmitted",
      "SparkListenerStageCompleted"
    )
  }

}

Source File: InputReaderDecorator.scala From Scala-Design-Patterns-Second-Edition with MIT License

5 votes

package com.ivan.nikolov.structural.decorator

import java.io.{InputStreamReader, BufferedInputStream, ByteArrayOutputStream, BufferedReader}
import java.nio.charset.Charset
import java.util.Base64
import java.util.zip.GZIPOutputStream

import com.ivan.nikolov.structural.decorator.common.{AdvancedInputReader, InputReader}
import com.typesafe.scalalogging.LazyLogging

abstract class InputReaderDecorator(inputReader: InputReader) extends InputReader {
  override def readLines(): Stream[String] = inputReader.readLines()
}

class CapitalizedInputReader(inputReader: InputReader) extends InputReaderDecorator(inputReader) {
  override def readLines(): Stream[String] = super.readLines().map(_.toUpperCase)
}

class CompressingInputReader(inputReader: InputReader) extends InputReaderDecorator(inputReader) with LazyLogging {
  override def readLines(): Stream[String] = super.readLines().map {
    case line =>
      val text = line.getBytes(Charset.forName("UTF-8"))
      logger.info("Length before compression: {}", text.length.toString)
      val output = new ByteArrayOutputStream()
      val compressor = new GZIPOutputStream(output)
      try {
        compressor.write(text, 0, text.length)
        val outputByteArray = output.toByteArray
        logger.info("Length after compression: {}", outputByteArray.length.toString)
        new String(outputByteArray, Charset.forName("UTF-8"))
      } finally {
        compressor.close()
        output.close()
      }
  }
}

class Base64EncoderInputReader(inputReader: InputReader) extends InputReaderDecorator(inputReader) {
  override def readLines(): Stream[String] = super.readLines().map {
    case line => Base64.getEncoder.encodeToString(line.getBytes(Charset.forName("UTF-8")))
  }
}

object DecoratorExample {
  def main(args: Array[String]): Unit = {
    val stream = new BufferedReader(
      new InputStreamReader(
        new BufferedInputStream(this.getClass.getResourceAsStream("data.txt"))
      )
    )
    try {
      val reader = new CapitalizedInputReader(new AdvancedInputReader(stream))
      reader.readLines().foreach(println)
    } finally {
      stream.close()
    }
  }
}

object DecoratorExampleBig {
  def main(args: Array[String]): Unit = {
    val stream = new BufferedReader(
      new InputStreamReader(
        new BufferedInputStream(this.getClass.getResourceAsStream("data.txt"))
      )
    )
    try {
      val reader = new CompressingInputReader(
        new Base64EncoderInputReader(
          new CapitalizedInputReader(
            new AdvancedInputReader(stream)
          )
        )
      )
      reader.readLines().foreach(println)
    } finally {
      stream.close()
    }
  }
}

Source File: EventFileReader.scala From tensorflow_scala with Apache License 2.0

5 votes

package org.platanios.tensorflow.api.io.events

import org.platanios.tensorflow.api.core.exception.{DataLossException, OutOfRangeException}
import org.platanios.tensorflow.api.io.{CompressionType, Loader, NoCompression}
import org.platanios.tensorflow.api.utilities.{CRC32C, Coding}
import org.platanios.tensorflow.proto.Event

import com.typesafe.scalalogging.Logger
import org.slf4j.LoggerFactory

import java.io.BufferedInputStream
import java.nio.file.{Files, Path, StandardOpenOption}


      private[this] def readNext(): Event = {
        try {
          // Read the header data.
          val encLength = new Array[Byte](12)
          fileStream.read(encLength)
          val recordLength = Coding.decodeFixedInt64(encLength).toInt
          val encLengthMaskedCrc = CRC32C.mask(CRC32C.value(encLength.take(8)))
          if (Coding.decodeFixedInt32(encLength, offset = 8) != encLengthMaskedCrc) {
            throw DataLossException("Encountered corrupted TensorFlow record.")
          }

          // Read the data.
          val encData = new Array[Byte](recordLength + 4)
          fileStream.read(encData)
          val recordData = encData.take(recordLength)
          val encDataMaskedCrc = CRC32C.mask(CRC32C.value(encData.take(recordLength)))
          if (Coding.decodeFixedInt32(encData, offset = recordLength) != encDataMaskedCrc) {
            throw DataLossException("Encountered corrupted TensorFlow record.")
          }

          Event.parseFrom(recordData)
        } catch {
          case _: OutOfRangeException | _: DataLossException =>
            // We ignore partial read exceptions, because a record may be truncated. The record reader holds the offset
            // prior to the failed read, and so retrying will succeed.
            EventFileReader.logger.info(s"No more TF records stored at '${filePath.toAbsolutePath}'.")
            null
        }
      }

      override def hasNext: Boolean = {
        if (nextEvent == null)
          nextEvent = readNext()
        nextEvent != null
      }

      override def next(): Event = {
        val event = {
          if (nextEvent == null)
            readNext()
          else
            nextEvent
        }
        if (event != null)
          nextEvent = readNext()
        event
      }
    }
  }
}

private[io] object EventFileReader {
  private[EventFileReader] val logger: Logger = Logger(LoggerFactory.getLogger("Event File Reader"))
}

Source File: LogCatReader.scala From OUTDATED_ledger-wallet-android with MIT License

5 votes

package co.ledger.wallet.core.utils.logs

import java.io.{BufferedInputStream, FileOutputStream, File, InputStream}
import java.util.zip.GZIPOutputStream

import android.content.{Intent, Context}
import android.os.Build
import android.support.v4.content.FileProvider
import co.ledger.wallet.app.Config
import co.ledger.wallet.core.content.FileContentProvider
import co.ledger.wallet.core.utils.io.IOUtils

import scala.concurrent.{Promise, Future}
import scala.util.Try
import scala.concurrent.ExecutionContext.Implicits.global

object LogCatReader {

  def getInputStream: InputStream = Runtime.getRuntime.exec(Array("logcat", "-d")).getInputStream

  def createZipFile(file: File): Future[File] = Future {
    val input = new BufferedInputStream(getInputStream)
    val output = new GZIPOutputStream(new FileOutputStream(file))
    Logger.i(s"Create zip file ${file.getName}")
    IOUtils.copy(input, output)
    Logger.i(s"End zip file ${file.getName} creation")
    output.flush()
    input.close()
    output.close()
    file
  }

  def exportLogsToDefaultTempLogFile()(implicit context: Context): Future[File] = {
    val file = getDefaultTempLogFile
    file.map(createZipFile)
    .getOrElse {
      Promise[File]().failure(file.failed.get).future
    }
  }

  def getDefaultTempLogFile(implicit context: Context) = Try {
    val sharedDirectory = new File(context.getCacheDir, "shared/")
    sharedDirectory.mkdirs()
    File.createTempFile("logs_", ".gzip", sharedDirectory)
  }

  def createEmailIntent(context: Context): Future[Intent] = {
    exportLogsToDefaultTempLogFile()(context).map((f) => {
      val intent = new Intent(Intent.ACTION_SEND)
      intent.setType("application/x-gzip")
      f.setReadable(true, false)
      intent.putExtra(Intent.EXTRA_EMAIL, Array[String](Config.SupportEmailAddress))
      val pkg = context.getPackageManager.getPackageInfo(context.getPackageName, 0)
      val emailBody =
        s"""
          |--- Device informations ---
          | Model: ${Build.MANUFACTURER} ${Build.MODEL} - ${Build.DEVICE}
          | OS: ${Build.DISPLAY}
          | Android version: ${Build.VERSION.RELEASE}
          | Application: ${pkg.packageName}
          | Application version: ${pkg.versionName} - ${pkg.versionCode}
        """.stripMargin
      val uri =  FileProvider.getUriForFile(context, FileContentProvider.Authority, f)
      intent.setFlags(Intent.FLAG_GRANT_READ_URI_PERMISSION)
      intent.putExtra(Intent.EXTRA_TEXT, emailBody)
      intent.putExtra(Intent.EXTRA_STREAM, uri)
      intent
    })
  }

}

Source File: ResponseHelper.scala From OUTDATED_ledger-wallet-android with MIT License

5 votes

package co.ledger.wallet.core.net

import java.io.{ByteArrayOutputStream, BufferedInputStream}

import co.ledger.wallet.core.utils.io.IOUtils
import co.ledger.wallet.core.utils.logs.Logger
import org.json.{JSONArray, JSONObject}
import co.ledger.wallet.core.net.HttpRequestExecutor.defaultExecutionContext
import scala.concurrent.Future
import scala.io.Source
import scala.util.{Failure, Success}

object ResponseHelper {

  implicit class ResponseFuture(f: Future[HttpClient#Response]) {

    def json: Future[(JSONObject, HttpClient#Response)] = {
      f.string.map { case (body, response) =>
        Logger.d("Converting to json")
        (new JSONObject(body), response)
      }
    }

    def jsonArray: Future[(JSONArray, HttpClient#Response)] = {
      f.string.map { case (body, response) =>
        (new JSONArray(body), response)
      }
    }

    def string: Future[(String, HttpClient#Response)] = {
      f.map { response =>
        Logger.d("Converting to string")
        (Source.fromInputStream(response.body).mkString, response)
      }
    }

    def bytes: Future[(Array[Byte], HttpClient#Response)] = {
      f.map { response =>
        val input = new BufferedInputStream(response.body)
        val output = new ByteArrayOutputStream()
        IOUtils.copy(input, output)
        val result = output.toByteArray
        input.close()
        output.close()
        (result, response)
      }
    }

    def noResponseBody: Future[HttpClient#Response] = {
      f.andThen {
        case Success(response) =>
          response.body.close()
          response
        case Failure(cause) =>
          throw cause
      }
    }

  }

}

Source File: SSLContextUtils.scala From akka-grpc with Apache License 2.0

5 votes

package akka.grpc

import java.io.{ BufferedInputStream, IOException, InputStream }
import java.security.KeyStore
import java.security.cert.{ CertificateFactory, X509Certificate }

import javax.net.ssl.{ TrustManager, TrustManagerFactory }

object SSLContextUtils {
  def trustManagerFromStream(certStream: InputStream): TrustManager = {
    try {
      import scala.collection.JavaConverters._
      val cf = CertificateFactory.getInstance("X.509")
      val bis = new BufferedInputStream(certStream)

      val keystore = KeyStore.getInstance(KeyStore.getDefaultType)
      keystore.load(null)
      cf.generateCertificates(bis).asScala.foreach { cert =>
        val alias = cert.asInstanceOf[X509Certificate].getSubjectX500Principal.getName
        keystore.setCertificateEntry(alias, cert)
      }

      val tmf = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm)
      tmf.init(keystore)
      tmf.getTrustManagers()(0)
    } finally certStream.close()
  }

  def trustManagerFromResource(certificateResourcePath: String): TrustManager = {
    val certStream: InputStream = getClass.getResourceAsStream(certificateResourcePath)
    if (certStream == null) throw new IOException(s"Couldn't find '$certificateResourcePath' on the classpath")
    trustManagerFromStream(certStream)
  }
}

Source File: FileUtils.scala From mimir with Apache License 2.0

5 votes

package mimir.util

import java.lang.reflect.Method
import java.net.URL
import java.io.{File, FileOutputStream, BufferedOutputStream, InputStream}
import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveInputStream}
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import java.io.BufferedInputStream
import java.nio.file.Files

object FileUtils {
  def getListOfFiles(dir: String):List[File] = {
    val d = new File(dir)
    if (d.exists && d.isDirectory) {
        d.listFiles.filter(_.isFile).toList
    } else {
        List[File]()
    }
  }
  def addJarToClasspath(jar: File): Unit = {
// Get the ClassLoader class
    val cl: ClassLoader = ClassLoader.getSystemClassLoader
    val clazz: Class[_] = cl.getClass
// Get the protected addURL method from the parent URLClassLoader class
    val method: Method =
      clazz.getSuperclass.getDeclaredMethod("addURL", Seq(classOf[URL]):_*)
// Run projected addURL method to add JAR to classpath
    method.setAccessible(true)
    method.invoke(cl, Seq(jar.toURI().toURL()):_*)
  }
  
  def untar(in:InputStream, destinationDir: String): File = {

    val dest = new File(destinationDir)
    dest.mkdir()

    var tarIn: TarArchiveInputStream = null

    try {
      tarIn = new TarArchiveInputStream(
        new GzipCompressorInputStream(
          new BufferedInputStream(in)))
      var tarEntry = tarIn.getNextTarEntry
      while (tarEntry != null) {

        // create a file with the same name as the tarEntry
        val destPath = new File(dest, tarEntry.getName)
        if (tarEntry.isDirectory) {
          destPath.mkdirs()
        } else {
          // Create any necessary parent dirs
          val parent = destPath.getParentFile
          if (!Files.exists(parent.toPath)) {
            parent.mkdirs()
          }

          destPath.createNewFile()

          val btoRead = new Array[Byte](1024)
          var bout: BufferedOutputStream = null

          try {
            bout = new BufferedOutputStream(new FileOutputStream(destPath))

            var len = 0
            while (len != -1) {
              len = tarIn.read(btoRead)
              if (len != -1) {
                bout.write(btoRead, 0, len)
              }
            }
          } finally {
            if (bout != null) {
              bout.close()
            }
          }
        }
        tarEntry = tarIn.getNextTarEntry
      }
    } finally {
      if (tarIn != null) {
        tarIn.close()
      }
    }
    dest
  }
  
}

Source File: SnapshotGetter.scala From c4proto with Apache License 2.0

5 votes

package ee.cone.c4gate.tests

import java.io.BufferedInputStream
import java.net.{HttpURLConnection, URL}
import java.util.Locale

import ee.cone.c4actor._
import ee.cone.c4gate.HttpResponse

import scala.collection.JavaConverters.{iterableAsScalaIterableConverter, mapAsScalaMapConverter}

//C4STATE_TOPIC_PREFIX=ee.cone.c4actor.tests.SnapshotGetterApp sbt ~'c4actor-extra-examples/runMain ee.cone.c4actor.ServerMain'

class SnapshotGetterTest(execution: Execution) extends Executable {
  private def withConnection[T](url: String): (HttpURLConnection => T) => T =
    FinallyClose[HttpURLConnection, T](_.disconnect())(
      new URL(url).openConnection().asInstanceOf[HttpURLConnection]
    )
  private def setHeaders(connection: HttpURLConnection, headers: List[(String, String)]): Unit = {
    headers.flatMap(normalizeHeader).foreach { case (k, v) =>
      connection.setRequestProperty(k, v)
    }
  }
  private def normalizeHeader[T](kv: (String, T)): List[(String, T)] = {
    val (k, v) = kv
    Option(k).map(k => (k.toLowerCase(Locale.ENGLISH), v)).toList
  }

  def get(url: String): (String, Long, Long, Map[String, List[String]]) = {
    val time = System.currentTimeMillis()
    val res: HttpResponse = withConnection(url) { conn =>
      setHeaders(conn, Nil)
      FinallyClose(new BufferedInputStream(conn.getInputStream)) { is =>
        FinallyClose(new okio.Buffer) { buffer =>
          buffer.readFrom(is)
          val headers = conn.getHeaderFields.asScala.map {
            case (k, l) => k -> l.asScala.toList
          }.flatMap(normalizeHeader).toMap
          HttpResponse(conn.getResponseCode, headers, buffer.readByteString())
        }
      }
    }
    val took = System.currentTimeMillis() - time
    (SnapshotUtilImpl.hashFromData(res.body.toByteArray), res.body.size(), took, res.headers)
  }


  def run(): Unit = {
    val url = "http://10.20.2.216:10580/snapshots/000000000081c0e5-92b87c05-294d-3c1d-b443-fb83bdc71d20-c-lz4"
    for {i <- 1 to 5} yield {
      println(get(url))
    }
    execution.complete()
  }
}

class SnapshotGetterApp
  extends ToStartApp
    with VMExecutionApp
    with ExecutableApp
    with RichDataApp
    with EnvConfigApp {

  override def toStart: List[Executable] = new SnapshotGetterTest(execution) :: super.toStart
  def assembleProfiler: AssembleProfiler = NoAssembleProfiler
}

Source File: BackupWriter.scala From recogito2 with Apache License 2.0

5 votes

package controllers.document

import controllers.HasConfig
import java.io.{File, FileInputStream, FileOutputStream, BufferedInputStream, ByteArrayInputStream, InputStream, PrintWriter}
import java.nio.file.Paths
import java.math.BigInteger
import java.security.{MessageDigest, DigestInputStream}
import java.util.UUID
import java.util.zip.{ZipEntry, ZipOutputStream}
import services.HasDate
import services.annotation.{Annotation, AnnotationService}
import services.document.{ExtendedDocumentMetadata, DocumentToJSON}
import services.generated.tables.records.{DocumentRecord, DocumentFilepartRecord}
import play.api.libs.json.Json
import play.api.libs.Files.TemporaryFileCreator
import scala.concurrent.{ExecutionContext, Future}
import storage.TempDir
import storage.uploads.Uploads

trait BackupWriter extends HasBackupValidation { self: HasConfig =>
  
  // Frontend annotation format
  import services.annotation.FrontendAnnotation._
  
  private val BUFFER_SIZE = 2048
  
  private def writeToZip(inputStream: InputStream, filename: String, zip: ZipOutputStream) = {
    zip.putNextEntry(new ZipEntry(filename))
     
    val md = MessageDigest.getInstance(ALGORITHM)    
    val in = new DigestInputStream(new BufferedInputStream(inputStream), md)

    var data= new Array[Byte](BUFFER_SIZE)
    var count: Int = 0

    while ({ count = in.read(data, 0, BUFFER_SIZE); count } > -1) {
      zip.write(data, 0, count)
    }

    in.close()
    zip.closeEntry()
    
    new BigInteger(1, md.digest()).toString(16)
  }
  
  def createBackup(doc: ExtendedDocumentMetadata)(implicit ctx: ExecutionContext, uploads: Uploads, 
      annotations: AnnotationService, tmpFile: TemporaryFileCreator): Future[File] = {
    
    def getFileAsStream(owner: String, documentId: String, filename: String) = {
      val dir = uploads.getDocumentDir(owner, documentId).get // Fail hard if the dir doesn't exist
      new FileInputStream(new File(dir, filename))
    }
    
    def getManifestAsStream() = {
      val manifest = "Recogito-Version: 2.0.1-alpha"
      new ByteArrayInputStream(manifest.getBytes)
    }
    
    def getMetadataAsStream(doc: ExtendedDocumentMetadata) = {
      
      // DocumentRecord JSON serialization
      import services.document.DocumentToJSON._
      
      val json = Json.prettyPrint(Json.toJson((doc.document, doc.fileparts)))
      new ByteArrayInputStream(json.getBytes)
    }
    
    def getAnnotationsAsStream(docId: String, annotations: Seq[Annotation], parts: Seq[DocumentFilepartRecord]): InputStream = {
      val path = Paths.get(TempDir.get()(self.config), s"${docId}_annotations.json")
      val tmp = tmpFile.create(path)
      val writer = new PrintWriter(path.toFile)
      annotations.foreach(a => writer.println(Json.stringify(Json.toJson(a))))
      writer.close()
      new FileInputStream(path.toFile)
    }
    
    Future {
      tmpFile.create(Paths.get(TempDir.get()(self.config), s"${doc.id}.zip"))
    } flatMap { zipFile =>
      val zipStream = new ZipOutputStream(new FileOutputStream(zipFile.path.toFile))

      writeToZip(getManifestAsStream(), "manifest", zipStream)
      val metadataHash = writeToZip(getMetadataAsStream(doc), "metadata.json", zipStream)

      val fileHashes = doc.fileparts.map { part =>
        writeToZip(getFileAsStream(doc.ownerName, doc.id, part.getFile), "parts" + File.separator + part.getFile, zipStream)
      }

      annotations.findByDocId(doc.id).map { annotations =>
        val annotationsHash = writeToZip(getAnnotationsAsStream(doc.id, annotations.map(_._1), doc.fileparts), "annotations.jsonl", zipStream)
        
        val signature = computeSignature(metadataHash, fileHashes, annotationsHash)
        writeToZip(new ByteArrayInputStream(signature.getBytes), "signature", zipStream)
        
        zipStream.close()
        zipFile.path.toFile
      }
    }
  }
  
}

Source File: ReThinkConnection.scala From stream-reactor with Apache License 2.0

5 votes

package com.datamountaineer.streamreactor.connect.rethink

import java.io.{BufferedInputStream, FileInputStream}

import com.datamountaineer.streamreactor.connect.rethink.config.ReThinkConfigConstants
import com.rethinkdb.RethinkDB
import com.rethinkdb.net.Connection
import com.typesafe.scalalogging.StrictLogging
import org.apache.kafka.common.config.AbstractConfig
import org.apache.kafka.connect.errors.ConnectException


object ReThinkConnection extends StrictLogging {
  def apply(r: RethinkDB, config: AbstractConfig): Connection = {

    val host = config.getString(ReThinkConfigConstants.RETHINK_HOST)
    val port = config.getInt(ReThinkConfigConstants.RETHINK_PORT)
    val username = config.getString(ReThinkConfigConstants.USERNAME)
    val password = config.getPassword(ReThinkConfigConstants.PASSWORD).value()
    val certFile = config.getString(ReThinkConfigConstants.CERT_FILE)
    val authKey = config.getPassword(ReThinkConfigConstants.AUTH_KEY)

    //java driver also catches this
    if (username.nonEmpty && certFile.nonEmpty) {
      throw new ConnectException("Username and Certificate file can not be used together.")
    }

    if ((certFile.nonEmpty && config.getPassword(ReThinkConfigConstants.AUTH_KEY).value().isEmpty)
      || certFile.isEmpty && config.getPassword(ReThinkConfigConstants.AUTH_KEY).value().nonEmpty
    ) {
      throw new ConnectException("Both the certificate file and authentication key must be set for secure TLS connections.")
    }

    val builder = r.connection()
      .hostname(host)
      .port(port)

    if (!username.isEmpty) {
      logger.info("Adding username/password credentials to connection")
      builder.user(username, password)
    }

    if (!certFile.isEmpty) {
      logger.info(s"Using certificate file ${certFile} for TLS connection, overriding any SSLContext")
      val is = new BufferedInputStream(new FileInputStream(certFile))
      builder.certFile(is)
    }

    if (!authKey.value().isEmpty) {
      logger.info("Set authorization key")
      builder.authKey(authKey.value())
    }

    builder.connect()
  }
}

Source File: JarHelper.scala From cassandra-util with Apache License 2.0

5 votes

package com.protectwise.testing.ccm

import java.io.{FileOutputStream, FileInputStream, BufferedInputStream, File}
import java.util.jar.{JarEntry, JarOutputStream}

object JarHelper {

  
  def createJarForPath(path: File, targetJar: File): File = {
    def add(source: File, target: JarOutputStream): Unit = {
      var in: BufferedInputStream = null
      try {
        var name = source.getPath.replace("\\", "/").drop(path.getPath.length()+1)
        if (source.isDirectory && !name.isEmpty && !name.endsWith("/")) name += "/"
        println(s"      $name")
        if (source.isDirectory) {
          if (!name.isEmpty) {
            val entry = new JarEntry(name)
            entry.setTime(source.lastModified())
            target.putNextEntry(entry)
            target.closeEntry()
          }
          source.listFiles.foreach(add(_, target))
          return
        }

        val entry = new JarEntry(name)
        entry.setTime(source.lastModified())
        target.putNextEntry(entry)
        in = new BufferedInputStream(new FileInputStream(source))

        val buffer = Array.ofDim[Byte](1024)
        var count = 0
        while (count != -1) {
          count = in.read(buffer)
          if (count >= 0) target.write(buffer, 0, count)
        }
        target.closeEntry()
      } finally {
        if (in != null) in.close()
      }
    }

    //    val manifest = new java.util.jar.Manifest()
    val target = new JarOutputStream(new FileOutputStream(targetJar))
    add(path, target)
    target.close()

    targetJar
  }
}

Source File: Main.scala From time-series-demo with Apache License 2.0

5 votes

package mesosphere.crimedemo

import java.io.BufferedInputStream
import java.net.URI
import java.util.zip.GZIPInputStream

import org.tukaani.xz.XZInputStream

import scala.io.Source

object Main {

  lazy val log = org.slf4j.LoggerFactory.getLogger(getClass.getName)

  def main(args: Array[String]): Unit = {
    val conf = new Conf(args)
    val publisher = new KafkaPublisher(conf.brokers())
    val topic = conf.topic()
    val sleep = 1000L / conf.eventsPerSecond()
    val uri = new URI(conf.uri())
    val inputStream = new BufferedInputStream(uri.toURL.openStream())

    val wrappedStream = if (conf.uri().endsWith(".gz")) {
      new GZIPInputStream(inputStream)
    }
    else if (conf.uri().endsWith(".xz")) {
      new XZInputStream(inputStream)
    }
    else {
      inputStream
    }
    val source = Source.fromInputStream(wrappedStream)

    var done = 0

    log.info(s"Reading crime from ${conf.uri()} and publishing to ${conf.brokers()} every ${sleep}ms")

    source.getLines().foreach(line => {
      publisher.publishKafka(topic, line.getBytes)
      done += 1

      if (done % 1000 == 0) {
        log.info(s"$done lines done")
      }

      Thread.sleep(sleep)
    })

    log.info(s"$done lines done")
  }
}

Source File: TFRecordIterator.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.utils.tf

import java.io.{BufferedInputStream, File, FileInputStream, InputStream}
import java.nio.{ByteBuffer, ByteOrder}


class TFRecordIterator(inputStream: InputStream) extends Iterator[Array[Byte]] {

  private var dataBuffer: Array[Byte] = null

  private val lengthBuffer: Array[Byte] = new Array[Byte](8)



  override def hasNext: Boolean = {
    if (dataBuffer != null) {
      true
    } else {
      val numOfBytes = inputStream.read(lengthBuffer)
      if (numOfBytes == 8) {
        val lengthWrapper = ByteBuffer.wrap(lengthBuffer)
        lengthWrapper.order(ByteOrder.LITTLE_ENDIAN)
        val length = lengthWrapper.getLong().toInt
        // todo, do crc check, simply skip now
        inputStream.skip(4)

        dataBuffer = new Array[Byte](length)
        inputStream.read(dataBuffer)
        // todo, do crc check, simply skip now
        inputStream.skip(4)
        true
      } else {
        inputStream.close()
        false
      }
    }
  }

  override def next(): Array[Byte] = {
    if (hasNext) {
      val data = this.dataBuffer
      this.dataBuffer = null
      data
    } else {
      throw new NoSuchElementException("next on empty iterator")
    }
  }
}

object TFRecordIterator {
  def apply(file: File): TFRecordIterator = {
    val inputStream = new FileInputStream(file)
    new TFRecordIterator(inputStream)
  }
}

Source File: FileReader.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.visualization.tensorboard

import java.io.{BufferedInputStream}
import java.nio.ByteBuffer

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.tensorflow.util.Event

import scala.collection.mutable.ArrayBuffer
import scala.util.matching.Regex

private[bigdl] object FileReader {
  val fileNameRegex = """bigdl.tfevents.*""".r

  
  def readScalar(file: Path, tag: String, fs: FileSystem): Array[(Long, Float, Double)] = {
    require(fs.isFile(file), s"FileReader: ${file} should be a file")
    val bis = new BufferedInputStream(fs.open(file))
    val longBuffer = new Array[Byte](8)
    val crcBuffer = new Array[Byte](4)
    val bf = new ArrayBuffer[(Long, Float, Double)]
    while (bis.read(longBuffer) > 0) {
      val l = ByteBuffer.wrap(longBuffer.reverse).getLong()
      bis.read(crcBuffer)
      // TODO: checksum
      //      val crc1 = ByteBuffer.wrap(crcBuffer.reverse).getInt()
      val eventBuffer = new Array[Byte](l.toInt)
      bis.read(eventBuffer)
      val e = Event.parseFrom(eventBuffer)
      if (e.getSummary.getValueCount == 1 &&
        tag.equals(e.getSummary.getValue(0).getTag())) {
        bf.append((e.getStep, e.getSummary.getValue(0).getSimpleValue,
          e.getWallTime))
      }
      bis.read(crcBuffer)
      //      val crc2 = ByteBuffer.wrap(crcBuffer.reverse).getInt()
    }
    bis.close()
    bf.toArray.sortWith(_._1 < _._1)
  }
}

Source File: WarcRecord.scala From ArchiveSpark with MIT License

5 votes

package org.archive.archivespark.specific.warc

import java.io.{BufferedInputStream, InputStream}

import org.archive.archivespark.dataspecs.access.DataAccessor
import org.archive.archivespark.functions.StringContent
import org.archive.archivespark.model.dataloads.{ByteLoad, DataLoad, TextLoad}
import org.archive.archivespark.model.pointers.FieldPointer
import org.archive.archivespark.model.{DataEnrichRoot, EnrichRootCompanion}
import org.archive.archivespark.sparkling.cdx.CdxRecord
import org.archive.archivespark.sparkling.warc.{WarcRecord => WARC}
import org.archive.archivespark.specific.warc.functions.WarcPayload

class WarcRecord(cdx: CdxRecord, val data: DataAccessor[InputStream]) extends DataEnrichRoot[CdxRecord, WARC](cdx) with WarcLikeRecord {
  override def access[R >: Null](action: WARC => R): R = data.access { stream =>
    WARC.get(if (stream.markSupported) stream else new BufferedInputStream(stream)) match {
      case Some(record) => action(record)
      case None => null
    }
  }

  override def companion: EnrichRootCompanion[WarcRecord] = WarcRecord
}

object WarcRecord extends EnrichRootCompanion[WarcRecord] {
  override def dataLoad[T](load: DataLoad[T]): Option[FieldPointer[WarcRecord, T]] = (load match {
    case ByteLoad => Some(WarcPayload)
    case TextLoad => Some(StringContent)
    case _ => None
  }).map(_.asInstanceOf[FieldPointer[WarcRecord, T]])
}

Source File: HttpClient.scala From ArchiveSpark with MIT License

5 votes

package org.archive.archivespark.sparkling.http

import java.io.{BufferedInputStream, InputStream}
import java.net.{HttpURLConnection, URL, URLConnection}

import org.archive.archivespark.sparkling.logging.LogContext
import org.archive.archivespark.sparkling.util.Common

import scala.collection.JavaConverters._
import scala.util.Try

object HttpClient {
  val DefaultRetries: Int = 30
  val DefaultSleepMillis: Int = 1000
  val DefaultTimeoutMillis: Int = -1

  implicit val logContext: LogContext = LogContext(this)

  def request[R](url: String, headers: Map[String, String] = Map.empty, retries: Int = DefaultRetries, sleepMillis: Int = DefaultSleepMillis, timeoutMillis: Int = DefaultTimeoutMillis)(action: InputStream => R): R = rangeRequest(url, headers, retries = retries, sleepMillis = sleepMillis, timeoutMillis = timeoutMillis)(action)

  def rangeRequest[R](url: String, headers: Map[String, String] = Map.empty, offset: Long = 0, length: Long = -1, retries: Int = DefaultRetries, sleepMillis: Int = DefaultSleepMillis, timeoutMillis: Int = DefaultTimeoutMillis)(action: InputStream => R): R = {
    rangeRequestConnection(url, headers, offset, length, retries, sleepMillis, timeoutMillis) { case connection: HttpURLConnection =>
      val in = new BufferedInputStream(connection.getInputStream)
      val r = action(in)
      Try(in.close())
      r
    }
  }

  def requestMessage[R](url: String, headers: Map[String, String] = Map.empty, retries: Int = DefaultRetries, sleepMillis: Int = DefaultSleepMillis, timeoutMillis: Int = DefaultTimeoutMillis)(action: HttpMessage => R): R = rangeRequestMessage(url, headers, retries = retries, sleepMillis = sleepMillis, timeoutMillis = timeoutMillis)(action)

  def rangeRequestMessage[R](url: String, headers: Map[String, String] = Map.empty, offset: Long = 0, length: Long = -1, retries: Int = DefaultRetries, sleepMillis: Int = DefaultSleepMillis, timeoutMillis: Int = DefaultTimeoutMillis)(action: HttpMessage => R): R = {
    rangeRequestConnection(url, headers, offset, length, retries, sleepMillis, timeoutMillis) { case connection: HttpURLConnection =>
      val in = new BufferedInputStream(connection.getInputStream)
      val responseHeaders = connection.getHeaderFields.asScala.toMap.flatMap{case (k, v) => v.asScala.headOption.map((if (k == null) "" else k) -> _)}
      val message = new HttpMessage(connection.getResponseMessage, responseHeaders, in)
      val r = action(message)
      Try(in.close())
      r
    }
  }

  def requestConnection[R](url: String, headers: Map[String, String] = Map.empty, retries: Int = DefaultRetries, sleepMillis: Int = DefaultSleepMillis, timeoutMillis: Int = DefaultTimeoutMillis)(action: URLConnection => R): R = rangeRequestConnection(url, headers, retries = retries, sleepMillis = sleepMillis, timeoutMillis = timeoutMillis)(action)

  def rangeRequestConnection[R](url: String, headers: Map[String, String] = Map.empty, offset: Long = 0, length: Long = -1, retries: Int = DefaultRetries, sleepMillis: Int = DefaultSleepMillis, timeoutMillis: Int = DefaultTimeoutMillis)(action: URLConnection => R): R = {
    Common.timeoutWithReporter(timeoutMillis) { reporter =>
      val connection = Common.retry(retries, sleepMillis, (retry, e) => {
        "Request failed (" + retry + "/" + retries + "): " + url + " (" + offset + "-" + (if (length >= 0) length else "") + ") - " + e.getMessage
      }) { _ =>
        reporter.alive()
        val connection = new URL(url).openConnection()
        for ((key, value) <- headers) connection.addRequestProperty(key, value)
        if (offset > 0 || length >= 0) connection.addRequestProperty("Range", "bytes=" + offset + "-" + (if (length >= 0) offset + length - 1 else ""))
        connection.asInstanceOf[HttpURLConnection]
      }
      val r = action(connection)
      Try(connection.disconnect())
      r
    }
  }
}

Source File: HttpMessage.scala From ArchiveSpark with MIT License

5 votes

package org.archive.archivespark.sparkling.http

import java.io.{BufferedInputStream, InputStream}
import java.util.zip.GZIPInputStream

import org.apache.commons.httpclient.ChunkedInputStream
import org.apache.http.client.entity.DeflateInputStream
import org.archive.archivespark.sparkling.io.IOUtil
import org.archive.archivespark.sparkling.util.StringUtil

import scala.collection.immutable.ListMap
import scala.util.Try

class HttpMessage (val statusLine: String, val headers: Map[String, String], val payload: InputStream) {
  import HttpMessage._

  lazy val lowerCaseHeaders: Map[String, String] = headers.map{case (k,v) => (k.toLowerCase, v)}

  def contentEncoding: Option[String] = lowerCaseHeaders.get("content-encoding").map(_.toLowerCase)
  def mime: Option[String] = lowerCaseHeaders.get("content-type").map(_.split(';').head.trim.toLowerCase)
  def charset: Option[String] = {
    lowerCaseHeaders.get("content-type").flatMap(_.split(';').drop(1).headOption).map(_.trim)
      .filter(_.startsWith("charset="))
      .map(_.drop(8).trim.stripPrefix("\"").stripPrefix("'").stripSuffix("'").stripSuffix("\"").split(",", 2).head.trim)
      .filter(_.nonEmpty).map(_.toUpperCase)
  }
  def redirectLocation: Option[String] = lowerCaseHeaders.get("location").map(_.trim)
  def isChunked: Boolean = lowerCaseHeaders.get("transfer-encoding").map(_.toLowerCase).contains("chunked")

  def status: Int = statusLine.split(" +").drop(1).headOption.flatMap(s => Try{s.toInt}.toOption).getOrElse(-1)

  lazy val body: InputStream = Try {
    var decoded = if (isChunked) new ChunkedInputStream(payload) else payload
    val decoders = contentEncoding.toSeq.flatMap(_.split(',').map(_.trim).flatMap(DecoderRegistry.get))
    for (decoder <- decoders) decoded = decoder(decoded)
    new BufferedInputStream(decoded)
  }.getOrElse(IOUtil.emptyStream)

  lazy val bodyString: String = StringUtil.fromInputStream(body, charset.toSeq ++ BodyCharsets)
}

object HttpMessage {
  val Charset: String = "UTF-8"
  val HttpMessageStart = "HTTP/"
  val BodyCharsets: Seq[String] = Seq("UTF-8", "ISO-8859-1", "WINDOWS-1252")

  // see org.apache.http.client.protocol.ResponseContentEncoding
  val DecoderRegistry: Map[String, InputStream => InputStream] = Map(
    "gzip" -> ((in: InputStream) => new GZIPInputStream(in)),
    "x-gzip" -> ((in: InputStream) => new GZIPInputStream(in)),
    "deflate" -> ((in: InputStream) => new DeflateInputStream(in))
  )

  def get(in: InputStream): Option[HttpMessage] = {
    var line = StringUtil.readLine(in, Charset)
    while (line != null && !{
      if (line.startsWith(HttpMessageStart)) {
        val statusLine = line
        val headers = collection.mutable.Buffer.empty[(String, String)]
        line = StringUtil.readLine(in, Charset)
        while (line != null && line.trim.nonEmpty) {
          val split = line.split(":", 2)
          if (split.length == 2) headers += ((split(0).trim, split(1).trim))
          line = StringUtil.readLine(in, Charset)
        }
        return Some(new HttpMessage(statusLine, ListMap(headers: _*), in))
      }
      false
    }) line = StringUtil.readLine(in, Charset)
    None
  }
}

Source File: GzipUtil.scala From ArchiveSpark with MIT License

5 votes

package org.archive.archivespark.sparkling.io

import java.io.{BufferedInputStream, InputStream}

import com.google.common.io.CountingInputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import org.archive.archivespark.sparkling.Sparkling
import org.archive.archivespark.sparkling.util.IteratorUtil

import scala.util.Try

object GzipUtil {
  import Sparkling._

  val Magic0 = 31
  val Magic1 = 139

  def isCompressed(in: InputStream): Boolean = {
    in.mark(2)
    val (b0, b1) = (in.read, in.read)
    in.reset()
    b0 == Magic0 && b1 == Magic1
  }

  def decompressConcatenated(in: InputStream): Iterator[InputStream] = decompressConcatenatedWithPosition(in).map{case (pos, s) => s}

  def decompressConcatenatedWithPosition(in: InputStream): Iterator[(Long, InputStream)] = {
    val stream = new CountingInputStream(IOUtil.supportMark(new NonClosingInputStream(in)))
    var last: Option[InputStream] = None
    IteratorUtil.whileDefined {
      if (last.isDefined) IOUtil.readToEnd(last.get, close = true)
      if (IOUtil.eof(stream)) {
        stream.close()
        None
      } else Try {
        val pos = stream.getCount
        last = Some(new GzipCompressorInputStream(new NonClosingInputStream(stream), false))
        last.map((pos, _))
      }.getOrElse(None)
    }
  }

  def estimateCompressionFactor(in: InputStream, readUncompressedBytes: Long): Double = {
    val stream = new CountingInputStream(new BufferedInputStream(new NonClosingInputStream(in)))
    val uncompressed = new GzipCompressorInputStream(stream, true)
    var read = IOUtil.skip(uncompressed, readUncompressedBytes)
    val decompressed = stream.getCount
    while (decompressed == stream.getCount && !IOUtil.eof(uncompressed, markReset = false)) read += 1
    val factor = read.toDouble / decompressed
    uncompressed.close()
    factor
  }

  def decompress(in: InputStream, filename: Option[String] = None, checkFile: Boolean = false): InputStream = {
    val buffered = if (in.markSupported()) in else new BufferedInputStream(in)
    if (!IOUtil.eof(buffered) && ((filename.isEmpty && !checkFile) || (filename.isDefined && filename.get.toLowerCase.endsWith(GzipExt)))) {
      new GzipCompressorInputStream(buffered, true)
    } else buffered
  }
}

Source File: RewriteSwaggerConfigPlugin.scala From matcher with MIT License

5 votes

import java.io.{BufferedInputStream, ByteArrayOutputStream}
import java.nio.charset.StandardCharsets
import java.nio.file.Files

import Dependencies.Version
import org.apache.commons.compress.archivers.ArchiveStreamFactory
import org.apache.commons.io.IOUtils
import sbt.Keys._
import sbt._

// See https://github.com/swagger-api/swagger-ui/issues/5710
object RewriteSwaggerConfigPlugin extends AutoPlugin {
  override val trigger = PluginTrigger.NoTrigger
  override def projectSettings: Seq[Def.Setting[_]] =
    inConfig(Compile)(
      Seq(
        resourceGenerators += Def.task {
          val jarName       = s"swagger-ui-${Version.swaggerUi}.jar"
          val indexHtmlPath = s"META-INF/resources/webjars/swagger-ui/${Version.swaggerUi}/index.html"
          val outputFile    = resourceManaged.value / indexHtmlPath

          val html = (Compile / dependencyClasspath).value
            .find(_.data.getName == jarName)
            .flatMap(jar => fileContentFromJar(jar.data, indexHtmlPath))
            .map { new String(_, StandardCharsets.UTF_8) }

          val resource = s"$jarName:$indexHtmlPath"
          html match {
            case None => throw new RuntimeException(s"Can't find $resource")
            case Some(html) =>
              val doc = org.jsoup.parser.Parser.parse(html, "127.0.0.1")
              import scala.collection.JavaConverters._
              doc
                .body()
                .children()
                .asScala
                .find { el =>
                  el.tagName() == "script" && el.html().contains("SwaggerUIBundle")
                } match {
                case None => throw new RuntimeException("Can't patch script in index.html")
                case Some(el) =>
                  val update =
                    """
const ui = SwaggerUIBundle({
    url: "/api-docs/swagger.json",
    dom_id: '#swagger-ui',
    deepLinking: true,
    presets: [ SwaggerUIBundle.presets.apis ],
    plugins: [ SwaggerUIBundle.plugins.DownloadUrl ],
    layout: "BaseLayout",
    operationsSorter: "alpha"
});
window.ui = ui;
"""
                  // Careful! ^ will be inserted as one-liner
                  el.text(update)
              }

              Files.createDirectories(outputFile.getParentFile.toPath)
              IO.write(outputFile, doc.outerHtml())
          }

          Seq(outputFile)
        }.taskValue
      ))

  private def fileContentFromJar(jar: File, fileName: String): Option[Array[Byte]] = {
    val fs      = new BufferedInputStream(Files.newInputStream(jar.toPath))
    val factory = new ArchiveStreamFactory()
    val ais     = factory.createArchiveInputStream(fs)

    try Iterator
      .continually(ais.getNextEntry)
      .takeWhile(_ != null)
      .filter(ais.canReadEntryData)
      .find(_.getName == fileName)
      .map { _ =>
        val out = new ByteArrayOutputStream()
        IOUtils.copy(ais, out)
        out.toByteArray
      } finally fs.close()
  }
}

Source File: ImageUtil.scala From piflow with BSD 2-Clause "Simplified" License

5 votes

package cn.piflow.conf.util

import java.io.{BufferedInputStream, FileInputStream}

import com.sksamuel.scrimage.Image


object ImageUtil {

  def getImage(imagePath:String) : Array[Byte] = {
    try{
      val classLoader = this.getClass.getClassLoader
      val imageInputStream = classLoader.getResourceAsStream(imagePath)
      val input = new BufferedInputStream(imageInputStream)
      Image.fromStream(input).bytes
    }catch {
      case ex => println(ex); Array[Byte]()
    }
  }

  def saveImage(imageBytes :  Array[Byte], savePath : String) = {
    Image(imageBytes).output(savePath)
  }

}

Source File: SparkSqlUtils.scala From HadoopLearning with MIT License

5 votes

package com.c503.utils

import java.io.{BufferedInputStream, BufferedReader, FileInputStream, InputStreamReader}
import java.nio.file.Path

import com.google.common.io.Resources
import org.apache.log4j.{Level, Logger}
import org.apache.mesos.Protos.Resource
import org.apache.spark.sql.SparkSession

import scala.io.Source


  def readSqlByPath(sqlPath: String) = {
    val buf = new StringBuilder
    val path = this.getPathByName(sqlPath)
    val file = Source.fromFile(path)
    for (line <- file.getLines) {
      buf ++= line + "\n"
    }
    file.close
    buf.toString()
  }


}

Source File: InterfaceReaderMain.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.lf.iface.reader

import java.io.BufferedInputStream
import java.nio.file.Files

import com.daml.daml_lf_dev.DamlLf

object InterfaceReaderMain extends App {

  val lfFile = new java.io.File(args.apply(0))

  val is = Files.newInputStream(lfFile.toPath)
  try {
    val bis = new BufferedInputStream(is)
    val archive = DamlLf.Archive.parser().parseFrom(bis)
    val out = InterfaceReader.readInterface(archive)
    println(s"out: $out")
  } finally {
    is.close()
  }
}

Source File: FluencyTest.scala From airframe with Apache License 2.0

5 votes

package wvlet.airframe.fluentd
import java.io.BufferedInputStream
import java.net.ServerSocket
import java.util.concurrent.atomic.AtomicBoolean

import javax.annotation.{PostConstruct, PreDestroy}
import wvlet.airframe.codec.PrimitiveCodec.ValueCodec
import wvlet.airframe._
import wvlet.airspec.AirSpec
import wvlet.log.LogSupport
import wvlet.log.io.IOUtil

case class MockFluentdConfig(port: Int)

trait MockFluentd extends LogSupport {
  lazy val socket = bind { config: MockFluentdConfig => new ServerSocket(config.port) }

  val shutdown = new AtomicBoolean(false)

  val t = new Thread(new Runnable {
    override def run(): Unit = {
      val clientSocket = socket.accept()
      val out          = clientSocket.getOutputStream
      val in           = new BufferedInputStream(clientSocket.getInputStream)

      while (!shutdown.get()) {
        var b            = new Array[Byte](8192)
        var totalReadLen = 0
        var readLen      = in.read(b)
        while (readLen != -1) {
          val nextReadLen = in.read(b, totalReadLen, readLen)
          totalReadLen += readLen
          readLen = nextReadLen
        }
        if (totalReadLen > 0) {
          val v = ValueCodec.unpackMsgPack(b, 0, totalReadLen)
          logger.debug(s"Received event: ${v}")
        }
      }
    }
  })

  @PostConstruct
  def start: Unit = {
    debug(s"starting MockFluentd")
    t.start()
  }

  @PreDestroy
  def stop: Unit = {
    debug(s"stopping MockFluentd")
    shutdown.set(true)
    socket.close()
    t.interrupt()
  }
}

case class FluencyMetric(id: Int, name: String) extends TaggedMetric {
  def metricTag = "fluency_metric"
}


class FluencyTest extends AirSpec {
  private val fluentdPort = IOUtil.randomPort

  protected override val design: Design = {
    newDesign
      .bind[MockFluentdConfig].toInstance(new MockFluentdConfig(fluentdPort))
      .bind[MockFluentd].toEagerSingleton
      .add(
        fluentd
          .withFluentdLogger(
            port = fluentdPort,
            // Do not send ack for simplicity
            ackResponseMode = false
          )
      )
  }

  def `should send metrics to fluentd through Fluency`(f: MetricLoggerFactory): Unit = {
    // Use a regular emit method
    f.getLogger.emit("mytag", Map("data" -> "hello"))

    // Use object metric logger
    val l = f.getTypedLogger[FluencyMetric]
    l.emit(FluencyMetric(1, "leo"))
    f.getLoggerWithTagPrefix("system").emit("mytag", Map("data" -> "metric value"))
  }

  test(
    "test extended time",
    design = fluentd.withFluentdLogger(port = fluentdPort, ackResponseMode = false, useExtendedEventTime = true)
  ) { f: MetricLoggerFactory =>
    val l = f.getLogger
    l.emit("mytag", Map("data" -> "hello"))
    l.emitMsgPack("tag", Array(0xc6.toByte))
  }
}

Source File: HDFSFileService.scala From retail_analytics with Apache License 2.0

5 votes

package models

import scalaz._
import Scalaz._
import scalaz.EitherT._
import scalaz.Validation
import scalaz.NonEmptyList._
import java.io.BufferedInputStream
import java.io.File
import java.io.FileInputStream
import java.io.InputStream
import org.apache.hadoop.conf._
import org.apache.hadoop.fs._

object HDFSFileService {
  private val conf = new Configuration()
  private val hdfsCoreSitePath = new Path("core-site.xml")
  private val hdfsHDFSSitePath = new Path("hdfs-site.xml")

  conf.addResource(hdfsCoreSitePath)
  conf.addResource(hdfsHDFSSitePath)

  private val fileSystem = FileSystem.get(conf)

  def saveFile(filepath: String): ValidationNel[Throwable, String] = {
    (Validation.fromTryCatch[String] {
      
      val file = new File(filepath)
      val out = fileSystem.create(new Path(file.getName))
      
      
      val in = new BufferedInputStream(new FileInputStream(file))
      var b = new Array[Byte](1024)
      var numBytes = in.read(b)
      while (numBytes > 0) {
        out.write(b, 0, numBytes)
        numBytes = in.read(b)
      }
      
      in.close()
      out.close()
      "File Uploaded"
    } leftMap { t: Throwable => nels(t) })

  }
 

  def removeFile(filename: String): Boolean = {
    val path = new Path(filename)
    fileSystem.delete(path, true)
  }

  def getFile(filename: String): InputStream = {
    val path = new Path(filename)
    fileSystem.open(path)
  }

  def createFolder(folderPath: String): Unit = {
    val path = new Path(folderPath)
    if (!fileSystem.exists(path)) {
      fileSystem.mkdirs(path)
    }
  }
}

Source File: Utilities.scala From project-matt with MIT License

5 votes

package org.datafy.aws.app.matt.extras

import org.apache.tika.Tika
import org.apache.tika.metadata.Metadata
import java.io.{BufferedInputStream, IOException, InputStream, StringWriter}
import java.util.zip.GZIPInputStream

import org.xml.sax.SAXException
import org.apache.tika.exception.TikaException
import org.apache.tika.metadata.serialization.JsonMetadata
import org.apache.tika.parser.{AutoDetectParser, ParseContext}
import org.apache.tika.parser.pkg.CompressorParser
import org.apache.tika.sax.BodyContentHandler


object Utilities {

  private val MAX_STRING_LENGTH = 2147483647

  private val tika = new Tika()
  tika.setMaxStringLength(MAX_STRING_LENGTH)

  @throws(classOf[IOException])
  @throws(classOf[SAXException])
  @throws(classOf[TikaException])
  def getParsePlainStream(inputStream: InputStream): String = {

    val autoDetectParser = new AutoDetectParser()
    val bodyContentHandler = new BodyContentHandler(MAX_STRING_LENGTH)
    val fileMetadata = new Metadata()

    if (inputStream.read() == -1) {
      return "Could not scan inputStream less than 0 bytes"
    }
    autoDetectParser.parse(inputStream, bodyContentHandler, fileMetadata)
    bodyContentHandler.toString
  }

  @throws(classOf[IOException])
  @throws(classOf[SAXException])
  @throws(classOf[TikaException])
  def getParseCompressedStream(inputStream: InputStream) = {
    
    var inputStream = myStream
    if(!inputStream.markSupported()) {
      inputStream = new BufferedInputStream(inputStream)
    }
    inputStream.mark(2)
    var magicBytes = 0
    try {
      magicBytes = inputStream.read() & 0xff | ((inputStream.read() << 8) & 0xff00)
      inputStream.reset()
    } catch  {
      case ioe: IOException => ioe.printStackTrace()
    }
    magicBytes == GZIPInputStream.GZIP_MAGIC
  }
}

case class And[A]( p1: A=>Boolean, p2: A=>Boolean ) extends (A=>Boolean) {
  def apply( a: A ) = p1(a) && p2(a)
}


case class Or[A]( p1: A=>Boolean, p2: A=>Boolean ) extends (A=>Boolean) {
  def apply( a: A ) = p1(a) || p2(a)
}

Source File: FileUtils.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.wmexchanger.utils

import java.io.BufferedInputStream
import java.io.BufferedOutputStream
import java.io.File
import java.io.FileInputStream
import java.io.FileOutputStream
import java.io.FilenameFilter
import java.io.ObjectInputStream
import java.io.ObjectOutputStream
import java.io.PrintWriter

import org.clulab.wm.wmexchanger.utils.Closer.AutoCloser

import scala.io.Source

object FileUtils {

  def appendingPrintWriterFromFile(file: File): PrintWriter = Sinker.printWriterFromFile(file, append = true)

  def appendingPrintWriterFromFile(path: String): PrintWriter = Sinker.printWriterFromFile(path, append = true)

  def printWriterFromFile(file: File): PrintWriter = Sinker.printWriterFromFile(file, append = false)

  def printWriterFromFile(path: String): PrintWriter = Sinker.printWriterFromFile(path, append = false)

  // Output
  def newBufferedOutputStream(file: File): BufferedOutputStream =
    new BufferedOutputStream(new FileOutputStream(file))

  def newBufferedOutputStream(filename: String): BufferedOutputStream =
    newBufferedOutputStream(new File(filename))

  def newAppendingBufferedOutputStream(file: File): BufferedOutputStream =
    new BufferedOutputStream(new FileOutputStream(file, true))

  def newAppendingBufferedOutputStream(filename: String): BufferedOutputStream =
    newAppendingBufferedOutputStream(new File(filename))

  def newObjectOutputStream(filename: String): ObjectOutputStream =
    new ObjectOutputStream(newBufferedOutputStream(filename))

  // Input
  def newBufferedInputStream(file: File): BufferedInputStream =
    new BufferedInputStream(new FileInputStream(file))

  def newBufferedInputStream(filename: String): BufferedInputStream =
    newBufferedInputStream(new File(filename))

  def newObjectInputStream(filename: String): ObjectInputStream =
    new ObjectInputStream(newBufferedInputStream(filename))

  def findFiles(collectionDir: String, extension: String): Seq[File] = {
    val dir = new File(collectionDir)
    val filter = new FilenameFilter {
      def accept(dir: File, name: String): Boolean = name.endsWith(extension)
    }

    val result = Option(dir.listFiles(filter))
        .getOrElse(throw Sourcer.newFileNotFoundException(collectionDir))
    result
  }

  protected def getTextFromSource(source: Source): String = source.mkString

  def getTextFromFile(file: File): String =
    Sourcer.sourceFromFile(file).autoClose { source =>
      getTextFromSource(source)
    }
}

Source File: PropertiesConfig.scala From DynaML with Apache License 2.0

5 votes

package io.github.mandar2812.dynaml.utils.sumac

import collection._
import java.util.Properties
import java.io.{FileOutputStream, File, FileInputStream, BufferedInputStream}

import collection.JavaConverters._


trait PropertiesConfig extends ExternalConfig {
  self: Args =>

  var propertyFile: File = _

  abstract override def readArgs(originalArgs: Map[String,String]): Map[String,String] = {
    parse(originalArgs, false)

    val props = new Properties()
    if (propertyFile != null) {
      val in = new BufferedInputStream(new FileInputStream(propertyFile))
      props.load(in)
      in.close()
    }
    //append args we read from the property file to the args from the command line, and pass to next trait
    super.readArgs(ExternalConfigUtil.mapWithDefaults(originalArgs,props.asScala))
  }

  abstract override def saveConfig() {
    PropertiesConfig.saveConfig(this, propertyFile)
    super.saveConfig()
  }

}

object PropertiesConfig {
  def saveConfig(args: Args, propertyFile: File) {
    val props = new Properties()
    args.getStringValues.foreach{case(k,v) => props.put(k,v)}
    val out = new FileOutputStream(propertyFile)
    props.store(out, "")
    out.close()
  }
}

Source File: JsonTestSuite.scala From borer with Mozilla Public License 2.0

5 votes

package io.bullet.borer

import java.io.BufferedInputStream

import utest._

import scala.io.Source

object JsonTestSuite extends TestSuite {

  val disabled: Set[String] = Set(
    "n_multidigit_number_then_00.json",
    "n_structure_null-byte-outside-string.json",
    "n_structure_whitespace_formfeed.json"
  )

  val testFiles =
    Source
      .fromResource("")
      .getLines()
      .map { name =>
        val is = new BufferedInputStream(getClass.getResourceAsStream("/" + name))
        try name -> Iterator.continually(is.read).takeWhile(_ != -1).map(_.toByte).toArray
        finally is.close()
      }
      .toMap
      .view
      .filter(t => !disabled.contains(t._1))

  val config = Json.DecodingConfig.default.copy(maxNumberMantissaDigits = 99, maxNumberAbsExponent = 999)

  val tests = Tests {

    "Accept" - {
      for {
        (name, bytes) <- testFiles
        if name startsWith "y"
      } {
        Json.decode(bytes).withConfig(config).to[Dom.Element].valueEither match {
          case Left(e)  => throw new RuntimeException(s"Test `$name` did not parse as it should", e)
          case Right(_) => // ok
        }
      }
    }

    "Reject" - {
      for {
        (name, bytes) <- testFiles
        if name startsWith "n"
      } {
        Json.decode(bytes).withConfig(config).to[Dom.Element].valueEither match {
          case Left(_)  => // ok
          case Right(x) => throw new RuntimeException(s"Test `$name` parsed even though it should have failed: $x")
        }
      }
    }

    "Not Crash" - {
      for {
        (name, bytes) <- testFiles
        if name startsWith "i"
      } {
        Json.decode(bytes).withConfig(config).to[Dom.Element].valueEither match {
          case Left(e: Borer.Error.General[_]) => throw new RuntimeException(s"Test `$name` did fail unexpectedly", e)
          case _                               => // everything else is fine
        }
      }
    }
  }
}

Source File: CapitalizedInputReaderTrait.scala From Scala-Design-Patterns-Second-Edition with MIT License

5 votes

package com.ivan.nikolov.structural.decorator

import java.io.{BufferedInputStream, InputStreamReader, BufferedReader, ByteArrayOutputStream}
import java.nio.charset.Charset
import java.util.Base64
import java.util.zip.GZIPOutputStream

import com.ivan.nikolov.structural.decorator.common.{AdvancedInputReader, InputReader}
import com.typesafe.scalalogging.LazyLogging

trait CapitalizedInputReaderTrait extends InputReader {
  abstract override def readLines(): Stream[String] = super.readLines().map(_.toUpperCase)
}

trait CompressingInputReaderTrait extends InputReader with LazyLogging {
  abstract override def readLines(): Stream[String] = super.readLines().map {
    case line =>
      val text = line.getBytes(Charset.forName("UTF-8"))
      logger.info("Length before compression: {}", text.length.toString)
      val output = new ByteArrayOutputStream()
      val compressor = new GZIPOutputStream(output)
      try {
        compressor.write(text, 0, text.length)
        val outputByteArray = output.toByteArray
        logger.info("Length after compression: {}", outputByteArray.length.toString)
        new String(outputByteArray, Charset.forName("UTF-8"))
      } finally {
        compressor.close()
        output.close()
      }
  }
}

trait Base64EncoderInputReaderTrait extends InputReader {
  abstract override def readLines(): Stream[String] = super.readLines().map {
    case line => Base64.getEncoder.encodeToString(line.getBytes(Charset.forName("UTF-8")))
  }
}

object StackableTraitsExample {
  def main(args: Array[String]): Unit = {
    val stream = new BufferedReader(
      new InputStreamReader(
        new BufferedInputStream(this.getClass.getResourceAsStream("data.txt"))
      )
    )
    try {
      val reader = new AdvancedInputReader(stream) with CapitalizedInputReaderTrait
      reader.readLines().foreach(println)
    } finally {
      stream.close()
    }
  }
}

object StackableTraitsBigExample {
  def main(args: Array[String]): Unit = {
    val stream = new BufferedReader(
      new InputStreamReader(
        new BufferedInputStream(this.getClass.getResourceAsStream("data.txt"))
      )
    )
    try {
      val reader = new AdvancedInputReader(stream) with CapitalizedInputReaderTrait with Base64EncoderInputReaderTrait with CompressingInputReaderTrait
      reader.readLines().foreach(println)
    } finally {
      stream.close()
    }
  }
}

Source File: InputReaderDecorator.scala From Scala-Design-Patterns-Second-Edition with MIT License

5 votes

package com.ivan.nikolov.structural.decorator

import java.io.{InputStreamReader, BufferedInputStream, ByteArrayOutputStream, BufferedReader}
import java.nio.charset.Charset
import java.util.Base64
import java.util.zip.GZIPOutputStream

import com.ivan.nikolov.structural.decorator.common.{AdvancedInputReader, InputReader}
import com.typesafe.scalalogging.LazyLogging

abstract class InputReaderDecorator(inputReader: InputReader) extends InputReader {
  override def readLines(): Stream[String] = inputReader.readLines()
}

class CapitalizedInputReader(inputReader: InputReader) extends InputReaderDecorator(inputReader) {
  override def readLines(): Stream[String] = super.readLines().map(_.toUpperCase)
}

class CompressingInputReader(inputReader: InputReader) extends InputReaderDecorator(inputReader) with LazyLogging {
  override def readLines(): Stream[String] = super.readLines().map {
    case line =>
      val text = line.getBytes(Charset.forName("UTF-8"))
      logger.info("Length before compression: {}", text.length.toString)
      val output = new ByteArrayOutputStream()
      val compressor = new GZIPOutputStream(output)
      try {
        compressor.write(text, 0, text.length)
        val outputByteArray = output.toByteArray
        logger.info("Length after compression: {}", outputByteArray.length.toString)
        new String(outputByteArray, Charset.forName("UTF-8"))
      } finally {
        compressor.close()
        output.close()
      }
  }
}

class Base64EncoderInputReader(inputReader: InputReader) extends InputReaderDecorator(inputReader) {
  override def readLines(): Stream[String] = super.readLines().map {
    case line => Base64.getEncoder.encodeToString(line.getBytes(Charset.forName("UTF-8")))
  }
}

object DecoratorExample {
  def main(args: Array[String]): Unit = {
    val stream = new BufferedReader(
      new InputStreamReader(
        new BufferedInputStream(this.getClass.getResourceAsStream("data.txt"))
      )
    )
    try {
      val reader = new CapitalizedInputReader(new AdvancedInputReader(stream))
      reader.readLines().foreach(println)
    } finally {
      stream.close()
    }
  }
}

object DecoratorExampleBig {
  def main(args: Array[String]): Unit = {
    val stream = new BufferedReader(
      new InputStreamReader(
        new BufferedInputStream(this.getClass.getResourceAsStream("data.txt"))
      )
    )
    try {
      val reader = new CompressingInputReader(
        new Base64EncoderInputReader(
          new CapitalizedInputReader(
            new AdvancedInputReader(stream)
          )
        )
      )
      reader.readLines().foreach(println)
    } finally {
      stream.close()
    }
  }
}

Source File: CapitalizedInputReaderTrait.scala From Scala-Design-Patterns-Second-Edition with MIT License

5 votes

package com.ivan.nikolov.structural.decorator

import java.io.{BufferedInputStream, InputStreamReader, BufferedReader, ByteArrayOutputStream}
import java.nio.charset.Charset
import java.util.Base64
import java.util.zip.GZIPOutputStream

import com.ivan.nikolov.structural.decorator.common.{AdvancedInputReader, InputReader}
import com.typesafe.scalalogging.LazyLogging

trait CapitalizedInputReaderTrait extends InputReader {
  abstract override def readLines(): Stream[String] = super.readLines().map(_.toUpperCase)
}

trait CompressingInputReaderTrait extends InputReader with LazyLogging {
  abstract override def readLines(): Stream[String] = super.readLines().map {
    case line =>
      val text = line.getBytes(Charset.forName("UTF-8"))
      logger.info("Length before compression: {}", text.length.toString)
      val output = new ByteArrayOutputStream()
      val compressor = new GZIPOutputStream(output)
      try {
        compressor.write(text, 0, text.length)
        val outputByteArray = output.toByteArray
        logger.info("Length after compression: {}", outputByteArray.length.toString)
        new String(outputByteArray, Charset.forName("UTF-8"))
      } finally {
        compressor.close()
        output.close()
      }
  }
}

trait Base64EncoderInputReaderTrait extends InputReader {
  abstract override def readLines(): Stream[String] = super.readLines().map {
    case line => Base64.getEncoder.encodeToString(line.getBytes(Charset.forName("UTF-8")))
  }
}

object StackableTraitsExample {
  def main(args: Array[String]): Unit = {
    val stream = new BufferedReader(
      new InputStreamReader(
        new BufferedInputStream(this.getClass.getResourceAsStream("data.txt"))
      )
    )
    try {
      val reader = new AdvancedInputReader(stream) with CapitalizedInputReaderTrait
      reader.readLines().foreach(println)
    } finally {
      stream.close()
    }
  }
}

object StackableTraitsBigExample {
  def main(args: Array[String]): Unit = {
    val stream = new BufferedReader(
      new InputStreamReader(
        new BufferedInputStream(this.getClass.getResourceAsStream("data.txt"))
      )
    )
    try {
      val reader = new AdvancedInputReader(stream) with CapitalizedInputReaderTrait with Base64EncoderInputReaderTrait with CompressingInputReaderTrait
      reader.readLines().foreach(println)
    } finally {
      stream.close()
    }
  }
}

java.io.BufferedInputStream Scala Examples