java.io.OutputStreamWriter Scala Examples

The following examples show how to use java.io.OutputStreamWriter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: BasicTestPerformance4Ftp.scala    From ohara   with Apache License 2.0 6 votes vote down vote up
package oharastream.ohara.it.performance

import java.io.{BufferedWriter, OutputStreamWriter}
import java.util.concurrent.atomic.LongAdder

import oharastream.ohara.common.data.Row
import oharastream.ohara.common.util.{CommonUtils, Releasable}
import org.junit.AssumptionViolatedException
import spray.json.{JsNumber, JsString, JsValue}

import scala.jdk.CollectionConverters._
import oharastream.ohara.client.filesystem.FileSystem

import scala.concurrent.duration.Duration

abstract class BasicTestPerformance4Ftp extends BasicTestPerformance {
  private[this] val ftpHostname = value(PerformanceTestingUtils.FTP_HOSTNAME_KEY)
    .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_HOSTNAME_KEY} is required"))

  private[this] val ftpPort = value(PerformanceTestingUtils.FTP_PORT_KEY)
    .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_PORT_KEY} is required"))
    .toInt

  private[this] val ftpUser = value(PerformanceTestingUtils.FTP_USER_KEY)
    .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_USER_KEY} is required"))

  private[this] val ftpPassword = value(PerformanceTestingUtils.FTP_PASSWORD_KEY)
    .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_PASSWORD_KEY} is required"))

  
  protected val ftpSettings: Map[String, JsValue] = Map(
    // convert the hostname to IP address
    oharastream.ohara.connector.ftp.FTP_HOSTNAME_KEY  -> JsString(ftpHostname),
    oharastream.ohara.connector.ftp.FTP_PORT_KEY      -> JsNumber(ftpPort),
    oharastream.ohara.connector.ftp.FTP_USER_NAME_KEY -> JsString(ftpUser),
    oharastream.ohara.connector.ftp.FTP_PASSWORD_KEY  -> JsString(ftpPassword)
  )

  private[this] val csvInputFolderKey       = PerformanceTestingUtils.CSV_INPUT_KEY
  private[this] val csvOutputFolder: String = value(csvInputFolderKey).getOrElse("/input")

  private[this] val cleanupTestDataKey   = PerformanceTestingUtils.DATA_CLEANUP_KEY
  protected val cleanupTestData: Boolean = value(cleanupTestDataKey).forall(_.toBoolean)

  protected def setupInputData(timeout: Duration): (String, Long, Long) = {
    val client = ftpClient()
    try {
      if (!PerformanceTestingUtils.exists(client, csvOutputFolder))
        PerformanceTestingUtils.createFolder(client, csvOutputFolder)

      val result = generateData(
        numberOfRowsToFlush,
        timeout,
        (rows: Seq[Row]) => {
          val file        = s"$csvOutputFolder/${CommonUtils.randomString()}"
          val writer      = new BufferedWriter(new OutputStreamWriter(client.create(file)))
          val count       = new LongAdder()
          val sizeInBytes = new LongAdder()

          try {
            val cellNames: Set[String] = rows.head.cells().asScala.map(_.name).toSet
            writer
              .append(cellNames.mkString(","))
              .append("\n")
            rows.foreach(row => {
              val content = row.cells().asScala.map(_.value).mkString(",")
              count.increment()
              sizeInBytes.add(content.length)
              writer.append(content).append("\n")
            })
            (count.longValue(), sizeInBytes.longValue())
          } finally Releasable.close(writer)
        }
      )
      (csvOutputFolder, result._1, result._2)
    } finally Releasable.close(client)
  }

  protected[this] def ftpClient() =
    FileSystem.ftpBuilder
      .hostname(ftpHostname)
      .port(ftpPort)
      .user(ftpUser)
      .password(ftpPassword)
      .build
} 
Example 2
Source File: FeatureSelection.scala    From aerosolve   with Apache License 2.0 5 votes vote down vote up
package com.airbnb.aerosolve.training

import java.io.BufferedWriter
import java.io.OutputStreamWriter
import java.util

import com.airbnb.aerosolve.core.{ModelRecord, ModelHeader, FeatureVector, Example}
import com.airbnb.aerosolve.core.models.LinearModel
import com.airbnb.aerosolve.core.util.Util
import com.typesafe.config.Config
import org.slf4j.{LoggerFactory, Logger}
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.rdd.RDD

import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.Buffer
import scala.collection.JavaConversions._
import scala.collection.JavaConverters._
import scala.util.Random
import scala.math.abs
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.fs.Path
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path

object FeatureSelection {
  private final val log: Logger = LoggerFactory.getLogger("FeatureSelection")
  val allKey : (String, String) = ("$ALL", "$POS")

  // Given a RDD compute the pointwise mutual information between
  // the positive label and the discrete features.
  def pointwiseMutualInformation(examples : RDD[Example],
                                 config : Config,
                                 key : String,
                                 rankKey : String,
                                 posThreshold : Double,
                                 minPosCount : Double,
                                 newCrosses : Boolean) : RDD[((String, String), Double)] = {
    val pointwise = LinearRankerUtils.makePointwise(examples, config, key, rankKey)
    val features = pointwise
      .mapPartitions(part => {
      // The tuple2 is var, var | positive
      val output = scala.collection.mutable.HashMap[(String, String), (Double, Double)]()
      part.foreach(example =>{
        val featureVector = example.example.get(0)
        val isPos = if (featureVector.floatFeatures.get(rankKey).asScala.head._2 > posThreshold) 1.0
        else 0.0
        val all : (Double, Double) = output.getOrElse(allKey, (0.0, 0.0))
        output.put(allKey, (all._1 + 1.0, all._2 + 1.0 * isPos))

        val features : Array[(String, String)] =
          LinearRankerUtils.getFeatures(featureVector)
        if (newCrosses) {
          for (i <- features) {
            for (j <- features) {
              if (i._1 < j._1) {
                val key = ("%s<NEW>%s".format(i._1, j._1),
                           "%s<NEW>%s".format(i._2, j._2))
                val x = output.getOrElse(key, (0.0, 0.0))
                output.put(key, (x._1 + 1.0, x._2 + 1.0 * isPos))
              }
            }
          }
        }
        for (feature <- features) {
          val x = output.getOrElse(feature, (0.0, 0.0))
          output.put(feature, (x._1 + 1.0, x._2 + 1.0 * isPos))
        }
      })
      output.iterator
    })
    .reduceByKey((a, b) => (a._1 + b._1, a._2 + b._2))
    .filter(x => x._2._2 >= minPosCount)

    val allCount = features.filter(x => x._1.equals(allKey)).take(1).head

    features.map(x => {
      val prob = x._2._1 / allCount._2._1
      val probPos = x._2._2 / allCount._2._2
      (x._1, math.log(probPos / prob) / math.log(2.0))
    })
  }

  // Returns the maximum entropy per family
  def maxEntropy(input : RDD[((String, String), Double)]) : RDD[((String, String), Double)] = {
    input
      .map(x => (x._1._1, (x._1._2, x._2)))
      .reduceByKey((a, b) => if (math.abs(a._2) > math.abs(b._2)) a else b)
      .map(x => ((x._1, x._2._1), x._2._2))
  }
} 
Example 3
Source File: InteractiveSignerLogger.scala    From coursier   with Apache License 2.0 5 votes vote down vote up
package coursier.publish.signing.logger

import java.io.{OutputStream, OutputStreamWriter, Writer}

import coursier.publish.fileset.{FileSet, Path}
import coursier.publish.logging.ProgressLogger

final class InteractiveSignerLogger(out: Writer, verbosity: Int) extends SignerLogger {

  private val underlying = new ProgressLogger[Object](
    "Signed",
    "files",
    out,
    updateOnChange = true,
    doneEmoji = Some("\u270D\uFE0F ")
  )

  override def signing(id: Object, fileSet: FileSet): Unit = {
    underlying.processingSet(id, Some(fileSet.elements.length))
  }
  override def signed(id: Object, fileSet: FileSet): Unit =
    underlying.processedSet(id)

  override def signingElement(id: Object, path: Path): Unit = {
    if (verbosity >= 2)
      out.write(s"Signing ${path.repr}\n")
    underlying.processing(path.repr, id)
  }
  override def signedElement(id: Object, path: Path, excOpt: Option[Throwable]): Unit = {
    if (verbosity >= 2)
      out.write(s"Signed ${path.repr}\n")
    underlying.processed(path.repr, id, excOpt.nonEmpty)
  }

  override def start(): Unit =
    underlying.start()
  override def stop(keep: Boolean): Unit =
    underlying.stop(keep)
}

object InteractiveSignerLogger {
  def create(out: OutputStream, verbosity: Int): SignerLogger =
    new InteractiveSignerLogger(new OutputStreamWriter(out), verbosity)
} 
Example 4
Source File: InteractiveDirLogger.scala    From coursier   with Apache License 2.0 5 votes vote down vote up
package coursier.publish.dir.logger

import java.io.{OutputStream, OutputStreamWriter}
import java.nio.file.Path

import com.lightbend.emoji.ShortCodes.Defaults.defaultImplicit.emoji
import coursier.publish.logging.ProgressLogger

final class InteractiveDirLogger(out: OutputStreamWriter, dirName: String, verbosity: Int) extends DirLogger {

  private val underlying = new ProgressLogger[String](
    "Read",
    s"files from $dirName",
    out,
    doneEmoji = emoji("mag").map(_.toString())
  )

  override def reading(dir: Path): Unit =
    underlying.processingSet(dirName, None)
  override def element(dir: Path, file: Path): Unit = {
    underlying.processing(file.toString, dirName)
    underlying.processed(file.toString, dirName, false)
  }
  override def read(dir: Path, elements: Int): Unit =
    underlying.processedSet(dirName)

  override def start(): Unit =
    underlying.start()
  override def stop(keep: Boolean): Unit =
    underlying.stop(keep)
}

object InteractiveDirLogger {
  def create(out: OutputStream, dirName: String, verbosity: Int): DirLogger =
    new InteractiveDirLogger(new OutputStreamWriter(out), dirName, verbosity)
} 
Example 5
Source File: Using.scala    From Argus-SAF   with Apache License 2.0 5 votes vote down vote up
package org.argus.jawa.core.compiler.compile.io

import java.io.{Closeable, FileInputStream, FileOutputStream, InputStream, OutputStream, File => JavaFile}
import java.io.{BufferedInputStream, BufferedOutputStream, InputStreamReader, OutputStreamWriter}
import java.io.{BufferedReader, BufferedWriter}
import java.util.zip.GZIPInputStream
import java.net.URL
import java.nio.channels.FileChannel
import java.nio.charset.Charset
import java.util.jar.{JarFile, JarInputStream, JarOutputStream}
import java.util.zip.{GZIPOutputStream, ZipEntry, ZipFile, ZipInputStream, ZipOutputStream}

import ErrorHandling.translate

import scala.reflect.{Manifest => SManifest}

abstract class Using[Source, T]
{
  protected def open(src: Source): T
  def apply[R](src: Source)(f: T => R): R =
  {
    val resource = open(src)
    try { f(resource) }
    finally { close(resource) }
  }
  protected def close(out: T): Unit
}
abstract class WrapUsing[Source, T](implicit srcMf: SManifest[Source], targetMf: SManifest[T]) extends Using[Source, T]
{
  protected def label[S](m: SManifest[S]): String = m.runtimeClass.getSimpleName
  protected def openImpl(source: Source): T
  protected final def open(source: Source): T =
    translate("Error wrapping " + label(srcMf) + " in " + label(targetMf) + ": ") { openImpl(source) }
}
trait OpenFile[T] extends Using[JavaFile, T]
{
  protected def openImpl(file: JavaFile): T
  protected final def open(file: JavaFile): T =
  {
    val parent = file.getParentFile
    if(parent != null)
      IO.createDirectory(parent)
    openImpl(file)
  }
}
object Using
{
  def wrap[Source, T<: Closeable](openF: Source => T)(implicit srcMf: SManifest[Source], targetMf: SManifest[T]): Using[Source,T] =
    wrap(openF, closeCloseable)
  def wrap[Source, T](openF: Source => T, closeF: T => Unit)(implicit srcMf: SManifest[Source], targetMf: SManifest[T]): Using[Source,T] =
    new WrapUsing[Source, T]
    {
      def openImpl(source: Source): T = openF(source)
      def close(t: T): Unit = closeF(t)
    }

  def resource[Source, T <: Closeable](openF: Source => T): Using[Source,T] =
    resource(openF, closeCloseable)
  def resource[Source, T](openF: Source => T, closeF: T => Unit): Using[Source,T] =
    new Using[Source,T]
    {
      def open(s: Source): T = openF(s)
      def close(s: T): Unit = closeF(s)
    }
  def file[T <: Closeable](openF: JavaFile => T): OpenFile[T] = file(openF, closeCloseable)
  def file[T](openF: JavaFile => T, closeF: T => Unit): OpenFile[T] =
    new OpenFile[T]
    {
      def openImpl(file: JavaFile): T = openF(file)
      def close(t: T): Unit = closeF(t)
    }
  private def closeCloseable[T <: Closeable]: T => Unit = _.close()

  def bufferedOutputStream: Using[OutputStream, BufferedOutputStream] = wrap((out: OutputStream) => new BufferedOutputStream(out) )
  def bufferedInputStream: Using[InputStream, BufferedInputStream] = wrap((in: InputStream) => new BufferedInputStream(in) )
  def fileOutputStream(append: Boolean = false): OpenFile[BufferedOutputStream] = file(f => new BufferedOutputStream(new FileOutputStream(f, append)))
  def fileInputStream: OpenFile[BufferedInputStream] = file(f => new BufferedInputStream(new FileInputStream(f)))
  def urlInputStream: Using[URL, BufferedInputStream] = resource((u: URL) => translate("Error opening " + u + ": ")(new BufferedInputStream(u.openStream)))
  def fileOutputChannel: OpenFile[FileChannel] = file(f => new FileOutputStream(f).getChannel)
  def fileInputChannel: OpenFile[FileChannel] = file(f => new FileInputStream(f).getChannel)
  def fileWriter(charset: Charset = IO.utf8, append: Boolean = false): OpenFile[BufferedWriter] =
    file(f => new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f, append), charset)) )
  def fileReader(charset: Charset): OpenFile[BufferedReader] = file(f => new BufferedReader(new InputStreamReader(new FileInputStream(f), charset)) )
  def urlReader(charset: Charset): Using[URL, BufferedReader] = resource((u: URL) => new BufferedReader(new InputStreamReader(u.openStream, charset)))
  def jarFile(verify: Boolean): OpenFile[JarFile] = file(f => new JarFile(f, verify), (_: JarFile).close())
  def zipFile: OpenFile[ZipFile] = file(f => new ZipFile(f), (_: ZipFile).close())
  def streamReader: Using[(InputStream, Charset), InputStreamReader] = wrap{ (_: (InputStream, Charset)) match { case (in, charset) => new InputStreamReader(in, charset) } }
  def gzipInputStream: Using[InputStream, GZIPInputStream] = wrap((in: InputStream) => new GZIPInputStream(in, 8192) )
  def zipInputStream: Using[InputStream, ZipInputStream] = wrap((in: InputStream) => new ZipInputStream(in))
  def zipOutputStream: Using[OutputStream, ZipOutputStream] = wrap((out: OutputStream) => new ZipOutputStream(out))
  def gzipOutputStream: Using[OutputStream, GZIPOutputStream] = wrap((out: OutputStream) => new GZIPOutputStream(out, 8192), (_: GZIPOutputStream).finish())
  def jarOutputStream: Using[OutputStream, JarOutputStream] = wrap((out: OutputStream) => new JarOutputStream(out))
  def jarInputStream: Using[InputStream, JarInputStream] = wrap((in: InputStream) => new JarInputStream(in))
  def zipEntry(zip: ZipFile): Using[ZipEntry, InputStream] = resource((entry: ZipEntry) =>
    translate("Error opening " + entry.getName + " in " + zip + ": ") { zip.getInputStream(entry) } )
} 
Example 6
Source File: AbstractTableSpec.scala    From hail   with MIT License 5 votes vote down vote up
package is.hail.expr.ir

import java.io.OutputStreamWriter

import is.hail.utils._
import is.hail.types._
import is.hail.io.fs.FS
import is.hail.rvd._
import org.json4s.jackson.JsonMethods
import org.json4s.{DefaultFormats, Extraction, Formats, JValue, ShortTypeHints}

import scala.language.implicitConversions

object SortOrder {
  def deserialize(b: Byte): SortOrder =
    if (b == 0.toByte) Ascending
    else if (b == 1.toByte) Descending
    else throw new RuntimeException(s"invalid sort order: $b")
}

sealed abstract class SortOrder {
  def serialize: Byte
}

case object Ascending extends SortOrder {
  def serialize: Byte = 0.toByte
}

case object Descending extends SortOrder {
  def serialize: Byte = 1.toByte
}

case class SortField(field: String, sortOrder: SortOrder)

abstract class AbstractTableSpec extends RelationalSpec {
  def references_rel_path: String

  def table_type: TableType

  def rowsComponent: RVDComponentSpec = getComponent[RVDComponentSpec]("rows")

  def rowsSpec: AbstractRVDSpec

  def globalsSpec: AbstractRVDSpec

  def indexed: Boolean = rowsSpec.indexed
}

object TableSpec {
  def apply(fs: FS, path: String, params: TableSpecParameters): TableSpec = {
    val globalsComponent = params.components("globals").asInstanceOf[RVDComponentSpec]
    val globalsSpec = globalsComponent.rvdSpec(fs, path)

    val rowsComponent = params.components("rows").asInstanceOf[RVDComponentSpec]
    val rowsSpec = rowsComponent.rvdSpec(fs, path)

    new TableSpec(params, globalsSpec, rowsSpec)
  }

  def fromJValue(fs: FS, path: String, jv: JValue): TableSpec = {
    implicit val formats: Formats = RelationalSpec.formats
    val params = jv.extract[TableSpecParameters]
    TableSpec(fs, path, params)
  }
}

case class TableSpecParameters(
  file_version: Int,
  hail_version: String,
  references_rel_path: String,
  table_type: TableType,
  components: Map[String, ComponentSpec]) {

  def write(fs: FS, path: String) {
    using(new OutputStreamWriter(fs.create(path + "/metadata.json.gz"))) { out =>
      out.write(JsonMethods.compact(decomposeWithName(this, "TableSpec")(RelationalSpec.formats)))
    }
  }
}

class TableSpec(
  val params: TableSpecParameters,
  val globalsSpec: AbstractRVDSpec,
  val rowsSpec: AbstractRVDSpec) extends AbstractTableSpec {
  def file_version: Int = params.file_version

  def hail_version: String = params.hail_version

  def components: Map[String, ComponentSpec] = params.components

  def references_rel_path: String = params.references_rel_path

  def table_type: TableType = params.table_type

  def toJValue: JValue = {
    decomposeWithName(params, "TableSpec")(RelationalSpec.formats)
  }
} 
Example 7
Source File: package.scala    From hail   with MIT License 5 votes vote down vote up
package is.hail

import java.io.OutputStreamWriter
import java.nio.charset._

import is.hail.types.virtual.Type
import is.hail.utils._
import is.hail.io.fs.FS

package object io {
  type VCFFieldAttributes = Map[String, String]
  type VCFAttributes = Map[String, VCFFieldAttributes]
  type VCFMetadata = Map[String, VCFAttributes]

  val utfCharset = Charset.forName("UTF-8")

  def exportTypes(filename: String, fs: FS, info: Array[(String, Type)]) {
    val sb = new StringBuilder
    using(new OutputStreamWriter(fs.create(filename))) { out =>
      info.foreachBetween { case (name, t) =>
        sb.append(prettyIdentifier(name))
        sb.append(":")
        t.pretty(sb, 0, compact = true)
      } { sb += ',' }

      out.write(sb.result())
    }
  }
} 
Example 8
Source File: Json4sSerialization.scala    From kafka-serialization   with Apache License 2.0 5 votes vote down vote up
package com.ovoenergy.kafka.serialization.json4s

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStreamReader, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import com.ovoenergy.kafka.serialization.core._
import org.apache.kafka.common.serialization.{Deserializer => KafkaDeserializer, Serializer => KafkaSerializer}
import org.json4s.Formats
import org.json4s.native.Serialization.{read, write}

import scala.reflect.ClassTag
import scala.reflect.runtime.universe._

trait Json4sSerialization {

  def json4sSerializer[T <: AnyRef](implicit jsonFormats: Formats): KafkaSerializer[T] = serializer { (_, data) =>
    val bout = new ByteArrayOutputStream()
    val writer = new OutputStreamWriter(bout, StandardCharsets.UTF_8)

    // TODO Use scala-arm
    try {
      write(data, writer)
      writer.flush()
    } finally {
      writer.close()
    }
    bout.toByteArray
  }

  def json4sDeserializer[T: TypeTag](implicit jsonFormats: Formats): KafkaDeserializer[T] = deserializer { (_, data) =>
    val tt = implicitly[TypeTag[T]]
    implicit val cl = ClassTag[T](tt.mirror.runtimeClass(tt.tpe))
    read[T](new InputStreamReader(new ByteArrayInputStream(data), StandardCharsets.UTF_8))
  }

} 
Example 9
Source File: SpraySerialization.scala    From kafka-serialization   with Apache License 2.0 5 votes vote down vote up
package com.ovoenergy.kafka.serialization.spray

import java.io.{ByteArrayOutputStream, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import org.apache.kafka.common.serialization.{Deserializer => KafkaDeserializer, Serializer => KafkaSerializer}
import spray.json._
import com.ovoenergy.kafka.serialization.core._

trait SpraySerialization {

  def spraySerializer[T](implicit format: JsonWriter[T]): KafkaSerializer[T] = serializer { (_, data) =>
    val bout = new ByteArrayOutputStream()
    val osw = new OutputStreamWriter(bout, StandardCharsets.UTF_8)

    // TODO use scala-arm
    try {
      osw.write(data.toJson.compactPrint)
      osw.flush()
    } finally {
      osw.close()
    }
    bout.toByteArray
  }

  def sprayDeserializer[T](implicit format: JsonReader[T]): KafkaDeserializer[T] = deserializer { (_, data) =>
    JsonParser(ParserInput(data)).convertTo[T]
  }

} 
Example 10
Source File: HadoopFSHelpers.scala    From morpheus   with Apache License 2.0 5 votes vote down vote up
package org.opencypher.morpheus.api.io.fs

import java.io.{BufferedReader, BufferedWriter, InputStreamReader, OutputStreamWriter}

import org.apache.hadoop.fs.{FileSystem, Path}
import org.opencypher.morpheus.api.io.util.FileSystemUtils.using

object HadoopFSHelpers {

  implicit class RichHadoopFileSystem(fileSystem: FileSystem) {

    protected def createDirectoryIfNotExists(path: Path): Unit = {
      if (!fileSystem.exists(path)) {
        fileSystem.mkdirs(path)
      }
    }

    def listDirectories(path: String): List[String] = {
      val p = new Path(path)
      createDirectoryIfNotExists(p)
      fileSystem.listStatus(p)
        .filter(_.isDirectory)
        .map(_.getPath.getName)
        .toList
    }

    def deleteDirectory(path: String): Unit = {
      fileSystem.delete(new Path(path),  true)
    }

    def readFile(path: String): String = {
      using(new BufferedReader(new InputStreamReader(fileSystem.open(new Path(path)), "UTF-8"))) { reader =>
        def readLines = Stream.cons(reader.readLine(), Stream.continually(reader.readLine))
        readLines.takeWhile(_ != null).mkString
      }
    }

    def writeFile(path: String, content: String): Unit = {
      val p = new Path(path)
      val parentDirectory = p.getParent
      createDirectoryIfNotExists(parentDirectory)
      using(fileSystem.create(p)) { outputStream =>
        using(new BufferedWriter(new OutputStreamWriter(outputStream, "UTF-8"))) { bufferedWriter =>
          bufferedWriter.write(content)
        }
      }
    }
  }

} 
Example 11
Source File: GraphLoaderSuite.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.graphx

import java.io.File
import java.io.FileOutputStream
import java.io.OutputStreamWriter
import java.nio.charset.StandardCharsets

import org.apache.spark.SparkFunSuite
import org.apache.spark.util.Utils

class GraphLoaderSuite extends SparkFunSuite with LocalSparkContext {

  test("GraphLoader.edgeListFile") {
    withSpark { sc =>
      val tmpDir = Utils.createTempDir()
      val graphFile = new File(tmpDir.getAbsolutePath, "graph.txt")
      val writer = new OutputStreamWriter(new FileOutputStream(graphFile), StandardCharsets.UTF_8)
      for (i <- (1 until 101)) writer.write(s"$i 0\n")
      writer.close()
      try {
        val graph = GraphLoader.edgeListFile(sc, tmpDir.getAbsolutePath)
        val neighborAttrSums = graph.aggregateMessages[Int](
          ctx => ctx.sendToDst(ctx.srcAttr),
          _ + _)
        assert(neighborAttrSums.collect.toSet === Set((0: VertexId, 100)))
      } finally {
        Utils.deleteRecursively(tmpDir)
      }
    }
  }
} 
Example 12
Source File: StreamMetadata.scala    From multi-tenancy-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming

import java.io.{InputStreamReader, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import scala.util.control.NonFatal

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, FSDataOutputStream, Path}
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization

import org.apache.spark.internal.Logging
import org.apache.spark.sql.streaming.StreamingQuery


  def write(
      metadata: StreamMetadata,
      metadataFile: Path,
      hadoopConf: Configuration): Unit = {
    var output: FSDataOutputStream = null
    try {
      val fs = FileSystem.get(hadoopConf)
      output = fs.create(metadataFile)
      val writer = new OutputStreamWriter(output)
      Serialization.write(metadata, writer)
      writer.close()
    } catch {
      case NonFatal(e) =>
        logError(s"Error writing stream metadata $metadata to $metadataFile", e)
        throw e
    } finally {
      IOUtils.closeQuietly(output)
    }
  }
} 
Example 13
Source File: Main.scala    From scalajs-highcharts   with MIT License 5 votes vote down vote up
package com.karasiq.highcharts.generator

import java.io.{BufferedWriter, FileOutputStream, OutputStreamWriter, PrintWriter}
import java.nio.file._
import java.nio.file.attribute.BasicFileAttributes

import scala.util.control.Exception
import scalaj.http.{Http, HttpOptions}

import com.karasiq.highcharts.generator.writers.{ScalaClassWriter, ScalaJsClassBuilder}

case class HighchartsApiDoc(library: String) {
  private val defaultPackage = System.getProperty(s"highcharts-generator.$library.package", s"com.$library")

  private def httpGet(url: String): List[ConfigurationObject] = {
    val page = Http.get(url)
      .header("User-Agent", "Mozilla/5.0 (X11; OpenBSD amd64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.81 Safari/537.36")
      .header("Accept", "application/json")
      .options(HttpOptions.connTimeout(10000), HttpOptions.readTimeout(10000))

    val json = page.asString
    ConfigurationObject.fromJson(json)
  }

  private def writeFiles(pkg: String, configs: List[ConfigurationObject], rootObject: Option[String] = None): Unit = {
    val header =
      s"""
          |package $pkg
          |
          |import scalajs.js, js.`|`
          |import com.highcharts.CleanJsObject
          |import com.highcharts.HighchartsUtils._
          |
          |""".stripMargin

    val outputDir = Paths.get(System.getProperty("highcharts-generator.output", "src/main/scala"), pkg.split("\\."):_*)
    Files.createDirectories(outputDir)

    // Remove all files
    Files.walkFileTree(outputDir, new SimpleFileVisitor[Path] {
      override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
        Files.delete(file)
        FileVisitResult.CONTINUE
      }
    })

    val classes = new ScalaJsClassBuilder().parse(configs, rootObject)
    val classWriter = new ScalaClassWriter
    classes.foreach { scalaJsClass ⇒
      val file = outputDir.resolve(scalaJsClass.scalaName + ".scala")
      println(s"Writing $file...")
      val writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file.toFile, true), "UTF-8")))
      Exception.allCatch.andFinally(writer.close()) {
        if (Files.size(file) == 0) {
          writer.print(header)
        }
        classWriter.writeClass(scalaJsClass) { line ⇒
          writer.println(line)
        }
        writer.flush()
      }
    }
  }

  def writeConfigs(): Unit = {
    val configs = httpGet(s"https://api.highcharts.com/$library/dump.json")
    writeFiles(s"$defaultPackage.config", configs, Some(s"${library.capitalize}Config"))
  }

  def writeApis(): Unit = {
    val configs = httpGet(s"https://api.highcharts.com/$library/object/dump.json")
    writeFiles(s"$defaultPackage.api", configs)
  }

  def writeAll(): Unit = {
    // TODO: https://github.com/highcharts/highcharts/issues/7227
    writeConfigs()
    // writeApis() // TODO: 404
  }
}

object Main extends App {
  HighchartsApiDoc("highcharts").writeAll()
  HighchartsApiDoc("highstock").writeAll()
  HighchartsApiDoc("highmaps").writeAll()
} 
Example 14
Source File: InteractiveUploadLogger.scala    From coursier   with Apache License 2.0 5 votes vote down vote up
package coursier.publish.upload.logger

import java.io.{OutputStream, OutputStreamWriter, Writer}

import com.lightbend.emoji.ShortCodes.Defaults.defaultImplicit.emoji
import coursier.publish.fileset.FileSet
import coursier.publish.logging.ProgressLogger
import coursier.publish.upload.Upload

// FIXME Would have been better if dummy was passed by the Upload instance when calling the methods of UploadLogger
final class InteractiveUploadLogger(out: Writer, dummy: Boolean, isLocal: Boolean) extends UploadLogger {

  private val underlying = new ProgressLogger[Object](
    if (isLocal) {
      if (dummy)
        "Would have written"
      else
        "Wrote"
    } else {
      if (dummy)
        "Would have uploaded"
      else
        "Uploaded"
    },
    "files",
    out,
    doneEmoji = emoji("truck").map(_.toString())
  )

  override def uploadingSet(id: Object, fileSet: FileSet): Unit =
    underlying.processingSet(id, Some(fileSet.elements.length))
  override def uploadedSet(id: Object, fileSet: FileSet): Unit =
    underlying.processedSet(id)

  override def uploading(url: String, idOpt: Option[Object], totalOpt: Option[Long]): Unit =
    for (id <- idOpt)
      underlying.processing(url, id)

  override def progress(url: String, idOpt: Option[Object], uploaded: Long, total: Long): Unit =
    for (id <- idOpt)
      underlying.progress(url, id, uploaded, total)
  override def uploaded(url: String, idOpt: Option[Object], errorOpt: Option[Upload.Error]): Unit =
    for (id <- idOpt)
      underlying.processed(url, id, errorOpt.nonEmpty)

  override def start(): Unit =
    underlying.start()
  override def stop(keep: Boolean): Unit =
    underlying.stop(keep)
}

object InteractiveUploadLogger {
  def create(out: OutputStream, dummy: Boolean, isLocal: Boolean): UploadLogger =
    new InteractiveUploadLogger(new OutputStreamWriter(out), dummy, isLocal)
} 
Example 15
Source File: FileDownloader.scala    From seahorse   with Apache License 2.0 5 votes vote down vote up
package ai.deepsense.deeplang.doperations.readwritedataframe.filestorage

import java.io.{BufferedWriter, FileOutputStream, IOException, OutputStreamWriter}
import java.nio.file.{Files, Paths}
import java.util.UUID

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

import ai.deepsense.deeplang.ExecutionContext
import ai.deepsense.deeplang.doperations.exceptions.DeepSenseIOException
import ai.deepsense.deeplang.doperations.readwritedataframe.FilePath

private[filestorage] object FileDownloader {

  def downloadFile(url: String)(implicit context: ExecutionContext): FilePath = {
    if (context.tempPath.startsWith("hdfs://")) {
      downloadFileToHdfs(url)
    } else {
      downloadFileToDriver(url)
    }
  }

  private def downloadFileToHdfs(url: String)(implicit context: ExecutionContext) = {
    val content = scala.io.Source.fromURL(url).getLines()
    val hdfsPath = s"${context.tempPath}/${UUID.randomUUID()}"

    val configuration = new Configuration()
    val hdfs = FileSystem.get(configuration)
    val file = new Path(hdfsPath)
    val hdfsStream = hdfs.create(file)
    val writer = new BufferedWriter(new OutputStreamWriter(hdfsStream))
    try {
      content.foreach {s =>
        writer.write(s)
        writer.newLine()
      }
    } finally {
      safeClose(writer)
      hdfs.close()
    }

    FilePath(hdfsPath)
  }

  private def downloadFileToDriver(url: String)
                                  (implicit context: ExecutionContext) = {
    val outputDirPath = Paths.get(context.tempPath)
    // We're checking if the output is a directory following symlinks.
    // The default behaviour of createDirectories is NOT to follow symlinks
    if (!Files.isDirectory(outputDirPath)) {
      Files.createDirectories(outputDirPath)
    }

    val outFilePath = Files.createTempFile(outputDirPath, "download", ".csv")
    // content is a stream. Do not invoke stuff like .toList() on it.
    val content = scala.io.Source.fromURL(url).getLines()
    val writer: BufferedWriter =
      new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilePath.toFile)))
    try {
      content.foreach {s =>
        writer.write(s)
        writer.newLine()
      }
    } finally {
      safeClose(writer)
    }
    FilePath(s"file:///$outFilePath")
  }

  private def safeClose(bufferedWriter: BufferedWriter): Unit = {
    try {
      bufferedWriter.flush()
      bufferedWriter.close()
    } catch {
      case e: IOException => throw new DeepSenseIOException(e)
    }
  }

} 
Example 16
Source File: JsonFileReporter.scala    From kyuubi   with Apache License 2.0 5 votes vote down vote up
package yaooqinn.kyuubi.metrics

import java.io.{BufferedWriter, Closeable, IOException, OutputStreamWriter}
import java.util.{Timer, TimerTask}
import java.util.concurrent.TimeUnit

import scala.util.Try
import scala.util.control.NonFatal

import com.codahale.metrics.MetricRegistry
import com.codahale.metrics.json.MetricsModule
import com.fasterxml.jackson.databind.ObjectMapper
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.fs.permission.FsPermission
import org.apache.kyuubi.Logging
import org.apache.spark.{KyuubiSparkUtil, SparkConf}
import org.apache.spark.KyuubiConf._

private[metrics] class JsonFileReporter(conf: SparkConf, registry: MetricRegistry)
  extends Closeable with Logging {

  private val jsonMapper = new ObjectMapper().registerModule(
    new MetricsModule(TimeUnit.MILLISECONDS, TimeUnit.MILLISECONDS, false))
  private val timer = new Timer(true)
  private val interval = KyuubiSparkUtil.timeStringAsMs(conf.get(METRICS_REPORT_INTERVAL))
  private val path = conf.get(METRICS_REPORT_LOCATION)
  private val hadoopConf = KyuubiSparkUtil.newConfiguration(conf)

  def start(): Unit = {
    timer.schedule(new TimerTask {
      var bw: BufferedWriter = _
      override def run(): Unit = try {
        val json = jsonMapper.writerWithDefaultPrettyPrinter().writeValueAsString(registry)
        val tmpPath = new Path(path + ".tmp")
        val tmpPathUri = tmpPath.toUri
        val fs = if (tmpPathUri.getScheme == null && tmpPathUri.getAuthority == null) {
          FileSystem.getLocal(hadoopConf)
        } else {
          FileSystem.get(tmpPathUri, hadoopConf)
        }
        fs.delete(tmpPath, true)
        bw = new BufferedWriter(new OutputStreamWriter(fs.create(tmpPath, true)))
        bw.write(json)
        bw.close()
        fs.setPermission(tmpPath, FsPermission.createImmutable(Integer.parseInt("644", 8).toShort))
        val finalPath = new Path(path)
        fs.rename(tmpPath, finalPath)
        fs.setPermission(finalPath,
          FsPermission.createImmutable(Integer.parseInt("644", 8).toShort))
      } catch {
        case NonFatal(e) => error("Error writing metrics to json file" + path, e)
      } finally {
        if (bw != null) {
          Try(bw.close())
        }
      }
    }, 0, interval)
  }

  override def close(): Unit = {
    timer.cancel()
  }
} 
Example 17
Source File: GraphLoaderSuite.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.graphx

import java.io.File
import java.io.FileOutputStream
import java.io.OutputStreamWriter
import java.nio.charset.StandardCharsets

import org.apache.spark.SparkFunSuite
import org.apache.spark.util.Utils

class GraphLoaderSuite extends SparkFunSuite with LocalSparkContext {

  test("GraphLoader.edgeListFile") {
    withSpark { sc =>
      val tmpDir = Utils.createTempDir()
      val graphFile = new File(tmpDir.getAbsolutePath, "graph.txt")
      val writer = new OutputStreamWriter(new FileOutputStream(graphFile), StandardCharsets.UTF_8)
      for (i <- (1 until 101)) writer.write(s"$i 0\n")
      writer.close()
      try {
        val graph = GraphLoader.edgeListFile(sc, tmpDir.getAbsolutePath)
        val neighborAttrSums = graph.aggregateMessages[Int](
          ctx => ctx.sendToDst(ctx.srcAttr),
          _ + _)
        assert(neighborAttrSums.collect.toSet === Set((0: VertexId, 100)))
      } finally {
        Utils.deleteRecursively(tmpDir)
      }
    }
  }
} 
Example 18
Source File: CodecStreams.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import java.io.{InputStream, OutputStream, OutputStreamWriter}
import java.nio.charset.{Charset, StandardCharsets}

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.compress._
import org.apache.hadoop.mapreduce.JobContext
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.hadoop.util.ReflectionUtils

import org.apache.spark.TaskContext

object CodecStreams {
  private def getDecompressionCodec(config: Configuration, file: Path): Option[CompressionCodec] = {
    val compressionCodecs = new CompressionCodecFactory(config)
    Option(compressionCodecs.getCodec(file))
  }

  def createInputStream(config: Configuration, file: Path): InputStream = {
    val fs = file.getFileSystem(config)
    val inputStream: InputStream = fs.open(file)

    getDecompressionCodec(config, file)
      .map(codec => codec.createInputStream(inputStream))
      .getOrElse(inputStream)
  }

  
  def getCompressionExtension(context: JobContext): String = {
    getCompressionCodec(context)
      .map(_.getDefaultExtension)
      .getOrElse("")
  }
} 
Example 19
Source File: StreamMetadata.scala    From Spark-2.3.1   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming

import java.io.{InputStreamReader, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import scala.util.control.NonFatal

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, FSDataOutputStream, Path}
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization

import org.apache.spark.internal.Logging
import org.apache.spark.sql.streaming.StreamingQuery


  def write(
      metadata: StreamMetadata,
      metadataFile: Path,
      hadoopConf: Configuration): Unit = {
    var output: FSDataOutputStream = null
    try {
      val fs = metadataFile.getFileSystem(hadoopConf)
      output = fs.create(metadataFile)
      val writer = new OutputStreamWriter(output)
      Serialization.write(metadata, writer)
      writer.close()
    } catch {
      case NonFatal(e) =>
        logError(s"Error writing stream metadata $metadata to $metadataFile", e)
        throw e
    } finally {
      IOUtils.closeQuietly(output)
    }
  }
} 
Example 20
Source File: SparkILoop.scala    From BigDatalog   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.repl

import java.io.{BufferedReader, FileReader}

import Predef.{println => _, _}
import scala.util.Properties.{jdkHome, javaVersion, versionString, javaVmName}

import scala.tools.nsc.interpreter.{JPrintWriter, ILoop}
import scala.tools.nsc.Settings
import scala.tools.nsc.util.stringFromStream


  def run(code: String, sets: Settings = new Settings): String = {
    import java.io.{ BufferedReader, StringReader, OutputStreamWriter }

    stringFromStream { ostream =>
      Console.withOut(ostream) {
        val input = new BufferedReader(new StringReader(code))
        val output = new JPrintWriter(new OutputStreamWriter(ostream), true)
        val repl = new SparkILoop(input, output)

        if (sets.classpath.isDefault)
          sets.classpath.value = sys.props("java.class.path")

        repl process sets
      }
    }
  }
  def run(lines: List[String]): String = run(lines.map(_ + "\n").mkString)
} 
Example 21
Source File: PileupApp.scala    From bdg-sequila   with Apache License 2.0 5 votes vote down vote up
package org.biodatageeks.sequila.apps

import java.io.{OutputStreamWriter, PrintWriter}

import org.apache.spark.sql.{SequilaSession, SparkSession}
import org.bdgenomics.utils.instrumentation.{Metrics, MetricsListener, RecordedMetrics}
import org.biodatageeks.sequila.utils.{InternalParams, SequilaRegister}

object PileupApp extends App{
  override def main(args: Array[String]): Unit = {

    System.setProperty("spark.kryo.registrator", "org.biodatageeks.sequila.pileup.serializers.CustomKryoRegistrator")
    val spark = SparkSession
      .builder()
      .master("local[1]")
      .config("spark.driver.memory","4g")
      .config( "spark.serializer", "org.apache.spark.serializer.KryoSerializer" )
      .enableHiveSupport()
      .getOrCreate()

    val ss = SequilaSession(spark)
    SequilaRegister.register(ss)
    spark.sparkContext.setLogLevel("INFO")

    val bamPath = "/Users/aga/NA12878.chr20.md.bam"
    val referencePath = "/Users/aga/Homo_sapiens_assembly18_chr20.fasta"

    //    val bamPath = "/Users/marek/data/NA12878.chrom20.ILLUMINA.bwa.CEU.low_coverage.20121211.md.bam"
    //    val referencePath = "/Users/marek/data/hs37d5.fa"

    val tableNameBAM = "reads"

    ss.sql(s"""DROP  TABLE IF  EXISTS $tableNameBAM""")
    ss.sql(s"""
              |CREATE TABLE $tableNameBAM
              |USING org.biodatageeks.sequila.datasources.BAM.BAMDataSource
              |OPTIONS(path "$bamPath")
              |
      """.stripMargin)

    val query =
      s"""
         |SELECT count(*)
         |FROM  pileup('$tableNameBAM', 'NA12878', '${referencePath}')
       """.stripMargin
    ss
      .sqlContext
      .setConf(InternalParams.EnableInstrumentation, "true")
    Metrics.initialize(ss.sparkContext)
    val metricsListener = new MetricsListener(new RecordedMetrics())
    ss
      .sparkContext
      .addSparkListener(metricsListener)
    val results = ss.sql(query)
    ss.time{
      results.show()
    }
    val writer = new PrintWriter(new OutputStreamWriter(System.out, "UTF-8"))
    Metrics.print(writer, Some(metricsListener.metrics.sparkMetrics.stageTimes))
    writer.close()
    ss.stop()
  }
} 
Example 22
Source File: JoinOrderTestSuite.scala    From bdg-sequila   with Apache License 2.0 5 votes vote down vote up
package org.biodatageeks.sequila.tests.rangejoins

import java.io.{OutputStreamWriter, PrintWriter}

import com.holdenkarau.spark.testing.{DataFrameSuiteBase, SharedSparkContext}
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{
  IntegerType,
  StringType,
  StructField,
  StructType
}
import org.bdgenomics.utils.instrumentation.{
  Metrics,
  MetricsListener,
  RecordedMetrics
}
import org.biodatageeks.sequila.rangejoins.IntervalTree.IntervalTreeJoinStrategyOptim
import org.scalatest.{BeforeAndAfter, FunSuite}

class JoinOrderTestSuite
    extends FunSuite
    with DataFrameSuiteBase
    with BeforeAndAfter
    with SharedSparkContext {

  val schema = StructType(
    Seq(StructField("chr", StringType),
        StructField("start", IntegerType),
        StructField("end", IntegerType)))
  val metricsListener = new MetricsListener(new RecordedMetrics())
  val writer = new PrintWriter(new OutputStreamWriter(System.out))
  before {
    System.setSecurityManager(null)
    spark.experimental.extraStrategies = new IntervalTreeJoinStrategyOptim(
      spark) :: Nil
    Metrics.initialize(sc)
    val rdd1 = sc
      .textFile(getClass.getResource("/refFlat.txt.bz2").getPath)
      .map(r => r.split('\t'))
      .map(
        r =>
          Row(
            r(2).toString,
            r(4).toInt,
            r(5).toInt
        ))
    val ref = spark.createDataFrame(rdd1, schema)
    ref.createOrReplaceTempView("ref")

    val rdd2 = sc
      .textFile(getClass.getResource("/snp150Flagged.txt.bz2").getPath)
      .map(r => r.split('\t'))
      .map(
        r =>
          Row(
            r(1).toString,
            r(2).toInt,
            r(3).toInt
        ))
    val snp = spark
      .createDataFrame(rdd2, schema)
    snp.createOrReplaceTempView("snp")
  }

  test("Join order - broadcasting snp table") {
    spark.sqlContext.setConf("spark.biodatageeks.rangejoin.useJoinOrder",
                             "true")
    val query =
      s"""
         |SELECT snp.*,ref.* FROM ref JOIN snp
         |ON (ref.chr=snp.chr AND snp.end>=ref.start AND snp.start<=ref.end)
       """.stripMargin

    assert(spark.sql(query).count === 616404L)

  }

  test("Join order - broadcasting ref table") {
    spark.sqlContext.setConf("spark.biodatageeks.rangejoin.useJoinOrder",
                             "true")
    val query =
      s"""
         |SELECT snp.*,ref.* FROM snp JOIN ref
         |ON (ref.chr=snp.chr AND snp.end>=ref.start AND snp.start<=ref.end)
       """.stripMargin
    assert(spark.sql(query).count === 616404L)

  }
  after {
    Metrics.print(writer, Some(metricsListener.metrics.sparkMetrics.stageTimes))
    writer.flush()
    Metrics.stopRecording()
  }
} 
Example 23
Source File: ScalafmtSbtReporter.scala    From sbt-scalafmt   with Apache License 2.0 5 votes vote down vote up
package org.scalafmt.sbt

import java.io.PrintWriter
import java.io.OutputStreamWriter
import java.nio.file.Path

import org.scalafmt.interfaces.ScalafmtReporter
import sbt.internal.util.MessageOnlyException
import sbt.util.Logger

import scala.util.control.NoStackTrace

class ScalafmtSbtReporter(log: Logger, out: OutputStreamWriter)
    extends ScalafmtReporter {
  override def error(file: Path, message: String): Unit = {
    throw new MessageOnlyException(s"$message: $file")
  }

  override def error(file: Path, e: Throwable): Unit = {
    if (e.getMessage != null) {
      error(file, e.getMessage)
    } else {
      throw new FailedToFormat(file.toString, e)
    }
  }

  override def error(file: Path, message: String, e: Throwable): Unit = {
    if (e.getMessage != null) {
      error(file, s"$message: ${e.getMessage()}")
    } else {
      throw new FailedToFormat(file.toString, e)
    }
  }

  override def excluded(file: Path): Unit =
    log.debug(s"file excluded: $file")

  override def parsedConfig(config: Path, scalafmtVersion: String): Unit =
    log.debug(s"parsed config (v$scalafmtVersion): $config")

  override def downloadWriter(): PrintWriter = new PrintWriter(out)
  override def downloadOutputStreamWriter(): OutputStreamWriter = out

  private class FailedToFormat(filename: String, cause: Throwable)
      extends Exception(filename, cause)
      with NoStackTrace
} 
Example 24
Source File: SchrodingerExceptionTest.scala    From aloha   with MIT License 5 votes vote down vote up
package com.eharmony.aloha.ex

import org.junit.{Before, Test}
import org.junit.Assert._
import java.io.{PrintWriter, OutputStreamWriter, ByteArrayOutputStream, PrintStream}

class SchrodingerExceptionTest {

    
    private[this] var ex: SchrodingerException = _

    @Before def before() {
        ex = new SchrodingerException
    }

    @Test def testFillInStackTrace() {
        assertTrue(new SchrodingerException().fillInStackTrace().isInstanceOf[SchrodingerException])
    }

    @Test(expected = classOf[SchrodingerException]) def testGetMessage() {
        ex.getMessage()
    }

    @Test(expected = classOf[SchrodingerException]) def testGetStackTrace() {
        ex.getStackTrace()
    }

    @Test(expected = classOf[SchrodingerException]) def testGetCause() {
        ex.getCause()
    }

    @Test(expected = classOf[SchrodingerException]) def testSetStackTrace() {
        ex.setStackTrace(Array.empty)
    }

    @Test(expected = classOf[SchrodingerException]) def testGetLocalizedMessage() {
        ex.getLocalizedMessage()
    }

    @Test(expected = classOf[SchrodingerException]) def testPrintStackTraceEmpty() {
        ex.printStackTrace()
    }

    @Test(expected = classOf[SchrodingerException]) def testPrintStackTraceStream() {
        val baos = new ByteArrayOutputStream()
        val ps = new PrintStream(baos)
        ex.printStackTrace(ps)
    }

    @Test(expected = classOf[SchrodingerException]) def testPrintStackTraceWriter() {
        val baos = new ByteArrayOutputStream()
        val osw = new OutputStreamWriter(baos)
        val ps = new PrintWriter(osw)
        ex.printStackTrace(ps)
    }

    @Test(expected = classOf[SchrodingerException]) def testInitCause() {
        ex.initCause(new Throwable)
    }

    @Test(expected = classOf[SchrodingerException]) def testToString() {
        ex.toString()
    }

    @Test def testNoThrowForSchrodingerExceptionWithSchrodingerExceptionCause() {
        new SchrodingerException(new SchrodingerException)
    }

    @Test def testNoThrowForSchrodingerExceptionWithExceptionCause() {
        new SchrodingerException(new Exception)
    }

    @Test(expected = classOf[SchrodingerException]) def testThrowForThrowableWithSchrodingerExceptionCause() {
        new Throwable(ex)
    }

    @Test(expected = classOf[SchrodingerException]) def testThrowForExceptionWithSchrodingerExceptionCause() {
        new Exception(ex)
    }

    @Test(expected = classOf[SchrodingerException]) def testThrowForRuntimeExceptionWithSchrodingerExceptionCause() {
        new RuntimeException(ex)
    }
} 
Example 25
Source File: Devel.scala    From libisabelle   with Apache License 2.0 5 votes vote down vote up
package info.hupel.isabelle.setup

import java.io.OutputStreamWriter
import java.nio.file.{Files, Path}

import org.eclipse.jgit.api._
import org.eclipse.jgit.lib.TextProgressMonitor
import org.eclipse.jgit.storage.file._

import org.log4s._

trait Devel {
  def init(path: Path): Unit
  def update(path: Path): Unit
}

case class GitDevel(url: String, branch: String) extends Devel {

  private val logger = getLogger

  private val monitor = new TextProgressMonitor(new OutputStreamWriter(Console.err))

  def init(path: Path): Unit = {
    logger.debug(s"Cloning $branch from $url into $path")
    Files.createDirectories(path)
    new CloneCommand()
      .setDirectory(path.toFile)
      .setURI(url)
      .setBranch(branch)
      .setProgressMonitor(monitor)
      .call()
    ()
  }
  def update(path: Path): Unit = {
    logger.debug(s"Fetching $branch from $url into $path")
    val repo = new FileRepositoryBuilder()
      .findGitDir(path.toFile)
      .setup()
      .build()
    new Git(repo).pull()
      .setRemoteBranchName(branch)
      .call()
    ()
  }
}

object Devel {

  val knownDevels: Map[String, Devel] = Map(
    "isabelle-mirror" -> GitDevel("https://github.com/isabelle-prover/mirror-isabelle.git", "master")
  )

} 
Example 26
Source File: FileDownloader.scala    From seahorse-workflow-executor   with Apache License 2.0 5 votes vote down vote up
package io.deepsense.deeplang.doperations.readwritedataframe.filestorage

import java.io.{BufferedWriter, FileOutputStream, IOException, OutputStreamWriter}
import java.nio.file.{Files, Paths}
import java.util.UUID

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

import io.deepsense.deeplang.ExecutionContext
import io.deepsense.deeplang.doperations.exceptions.DeepSenseIOException
import io.deepsense.deeplang.doperations.readwritedataframe.FilePath

private[filestorage] object FileDownloader {

  def downloadFile(url: String)(implicit context: ExecutionContext): FilePath = {
    if (context.tempPath.startsWith("hdfs://")) {
      downloadFileToHdfs(url)
    } else {
      downloadFileToDriver(url)
    }
  }

  private def downloadFileToHdfs(url: String)(implicit context: ExecutionContext) = {
    val content = scala.io.Source.fromURL(url).getLines()
    val hdfsPath = s"${context.tempPath}/${UUID.randomUUID()}"

    val configuration = new Configuration()
    val hdfs = FileSystem.get(configuration)
    val file = new Path(hdfsPath)
    val hdfsStream = hdfs.create(file)
    val writer = new BufferedWriter(new OutputStreamWriter(hdfsStream))
    try {
      content.foreach {s =>
        writer.write(s)
        writer.newLine()
      }
    } finally {
      safeClose(writer)
      hdfs.close()
    }

    FilePath(hdfsPath)
  }

  private def downloadFileToDriver(url: String)
                                  (implicit context: ExecutionContext) = {
    val outputDirPath = Paths.get(context.tempPath)
    // We're checking if the output is a directory following symlinks.
    // The default behaviour of createDirectories is NOT to follow symlinks
    if (!Files.isDirectory(outputDirPath)) {
      Files.createDirectories(outputDirPath)
    }

    val outFilePath = Files.createTempFile(outputDirPath, "download", ".csv")
    // content is a stream. Do not invoke stuff like .toList() on it.
    val content = scala.io.Source.fromURL(url).getLines()
    val writer: BufferedWriter =
      new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilePath.toFile)))
    try {
      content.foreach {s =>
        writer.write(s)
        writer.newLine()
      }
    } finally {
      safeClose(writer)
    }
    FilePath(s"file:///$outFilePath")
  }

  private def safeClose(bufferedWriter: BufferedWriter): Unit = {
    try {
      bufferedWriter.flush()
      bufferedWriter.close()
    } catch {
      case e: IOException => throw new DeepSenseIOException(e)
    }
  }

} 
Example 27
Source File: TestCompileApplicationInstance.scala    From milan   with Apache License 2.0 5 votes vote down vote up
package com.amazon.milan.tools

import java.io.{OutputStream, OutputStreamWriter}
import java.nio.ByteBuffer
import java.nio.charset.StandardCharsets
import java.nio.file.Files

import com.amazon.milan.application.{Application, ApplicationConfiguration, ApplicationInstance}
import com.amazon.milan.lang._
import com.amazon.milan.testing.applications._
import com.amazon.milan.{Id, SemanticVersion}
import org.junit.Assert._
import org.junit.Test


object TestCompileApplicationInstance {

  case class Record(recordId: String, i: Int)

  class Provider extends ApplicationInstanceProvider {
    override def getApplicationInstance(params: List[(String, String)]): ApplicationInstance = {
      val input = Stream.of[Record]
      val graph = new StreamGraph(input)
      val config = new ApplicationConfiguration
      config.setListSource(input, Record("1", 1))

      val instanceId = params.find(_._1 == "instanceId").get._2
      val appId = params.find(_._1 == "appId").get._2

      new ApplicationInstance(
        instanceId,
        new Application(appId, graph, SemanticVersion.ZERO),
        config)
    }
  }

  class Compiler extends ApplicationInstanceCompiler {
    override def compile(applicationInstance: ApplicationInstance,
                         params: List[(String, String)],
                         output: OutputStream): Unit = {
      val writer = new OutputStreamWriter(output)
      val testParam = params.find(_._1 == "test").get._2
      writer.write(testParam)
      writer.write(applicationInstance.toJsonString)
      writer.close()
    }
  }

}

@Test
class TestCompileApplicationInstance {
  @Test
  def test_CompileApplicationInstance_Main_SendsProviderAndCompilerParameters(): Unit = {

    val tempFile = Files.createTempFile("TestCompileApplicationInstance", ".scala")
    Files.deleteIfExists(tempFile)

    val appId = Id.newId()
    val instanceId = Id.newId()
    val testValue = Id.newId()

    try {
      val args = Array(
        "--provider",
        "com.amazon.milan.tools.TestCompileApplicationInstance.Provider",
        "--compiler",
        "com.amazon.milan.tools.TestCompileApplicationInstance.Compiler",
        "--package",
        "generated",
        "--output",
        tempFile.toString,
        s"-PinstanceId=$instanceId",
        s"-PappId=$appId",
        s"-Ctest=$testValue"
      )
      CompileApplicationInstance.main(args)

      val fileContents = StandardCharsets.UTF_8.decode(ByteBuffer.wrap(Files.readAllBytes(tempFile))).toString
      assertTrue(fileContents.contains(appId))
      assertTrue(fileContents.contains(instanceId))
      assertTrue(fileContents.contains(testValue))
    }
    finally {
      Files.deleteIfExists(tempFile)
    }
  }
} 
Example 28
Source File: BasicTestPerformance4Samba.scala    From ohara   with Apache License 2.0 5 votes vote down vote up
package oharastream.ohara.it.performance

import java.io.{BufferedWriter, OutputStreamWriter}
import java.util.concurrent.atomic.LongAdder

import oharastream.ohara.client.filesystem.FileSystem
import oharastream.ohara.common.data.Row
import oharastream.ohara.common.util.{CommonUtils, Releasable}
import org.junit.AssumptionViolatedException
import spray.json.{JsNumber, JsString, JsValue}

import scala.concurrent.duration.Duration
import scala.jdk.CollectionConverters._

abstract class BasicTestPerformance4Samba extends BasicTestPerformance {
  private[this] val sambaHostname: String = sys.env.getOrElse(
    PerformanceTestingUtils.SAMBA_HOSTNAME_KEY,
    throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_HOSTNAME_KEY} does not exists!!!")
  )

  private[this] val sambaUsername: String = sys.env.getOrElse(
    PerformanceTestingUtils.SAMBA_USER_KEY,
    throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_USER_KEY} does not exists!!!")
  )

  private[this] val sambaPassword: String = sys.env.getOrElse(
    PerformanceTestingUtils.SAMBA_PASSWORD_KEY,
    throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_PASSWORD_KEY} does not exists!!!")
  )

  private[this] val sambaPort: Int = sys.env
    .getOrElse(
      PerformanceTestingUtils.SAMBA_PORT_KEY,
      throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_PORT_KEY} does not exists!!!")
    )
    .toInt

  private[this] val sambaShare: String = sys.env.getOrElse(
    PerformanceTestingUtils.SAMBA_SHARE_KEY,
    throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_SHARE_KEY} does not exists!!!")
  )

  private[this] val csvInputFolderKey       = PerformanceTestingUtils.CSV_INPUT_KEY
  private[this] val csvOutputFolder: String = value(csvInputFolderKey).getOrElse("input")

  private[this] val NEED_DELETE_DATA_KEY: String = PerformanceTestingUtils.DATA_CLEANUP_KEY
  protected[this] val needDeleteData: Boolean    = sys.env.getOrElse(NEED_DELETE_DATA_KEY, "true").toBoolean

  protected val sambaSettings: Map[String, JsValue] = Map(
    oharastream.ohara.connector.smb.SMB_HOSTNAME_KEY   -> JsString(sambaHostname),
    oharastream.ohara.connector.smb.SMB_PORT_KEY       -> JsNumber(sambaPort),
    oharastream.ohara.connector.smb.SMB_USER_KEY       -> JsString(sambaUsername),
    oharastream.ohara.connector.smb.SMB_PASSWORD_KEY   -> JsString(sambaPassword),
    oharastream.ohara.connector.smb.SMB_SHARE_NAME_KEY -> JsString(sambaShare)
  )

  protected def setupInputData(timeout: Duration): (String, Long, Long) = {
    val client = sambaClient()
    try {
      if (!client.exists(csvOutputFolder)) PerformanceTestingUtils.createFolder(client, csvOutputFolder)

      val result = generateData(
        numberOfRowsToFlush,
        timeout,
        (rows: Seq[Row]) => {
          val file        = s"$csvOutputFolder/${CommonUtils.randomString()}"
          val writer      = new BufferedWriter(new OutputStreamWriter(client.create(file)))
          val count       = new LongAdder()
          val sizeInBytes = new LongAdder()

          try {
            val cellNames: Set[String] = rows.head.cells().asScala.map(_.name).toSet
            writer
              .append(cellNames.mkString(","))
              .append("\n")
            rows.foreach(row => {
              val content = row.cells().asScala.map(_.value).mkString(",")
              count.increment()
              sizeInBytes.add(content.length)
              writer
                .append(content)
                .append("\n")
            })
            (count.longValue(), sizeInBytes.longValue())
          } finally Releasable.close(writer)
        }
      )
      (csvOutputFolder, result._1, result._2)
    } finally Releasable.close(client)
  }

  protected[this] def sambaClient(): FileSystem =
    FileSystem.smbBuilder
      .hostname(sambaHostname)
      .port(sambaPort)
      .user(sambaUsername)
      .password(sambaPassword)
      .shareName(sambaShare)
      .build()
} 
Example 29
Source File: TestHdfsFileSystem.scala    From ohara   with Apache License 2.0 5 votes vote down vote up
package oharastream.ohara.client.filesystem.hdfs

import java.io.{BufferedWriter, File, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import oharastream.ohara.client.filesystem.{FileFilter, FileSystem, FileSystemTestBase}
import oharastream.ohara.common.exception.FileSystemException
import oharastream.ohara.common.util.CommonUtils
import org.junit.Test
import org.scalatest.matchers.should.Matchers._

class TestHdfsFileSystem extends FileSystemTestBase {
  private[this] val tempFolder: File = CommonUtils.createTempFolder("local_hdfs")

  private[this] val hdfsURL: String = new File(tempFolder.getAbsolutePath).toURI.toString

  override protected val fileSystem: FileSystem = FileSystem.hdfsBuilder.url(hdfsURL).build

  override protected val rootDir: String = tempFolder.toString

  // override this method because the Local HDFS doesn't support append()
  @Test
  override def testAppend(): Unit = {
    val file = randomFile()
    fileSystem.create(file).close()

    intercept[FileSystemException] {
      fileSystem.append(file)
    }.getMessage shouldBe "Not supported"
  }

  // override this method because the Local HDFS doesn't support append()
  @Test
  override def testDeleteFileThatHaveBeenRead(): Unit = {
    val file              = randomFile(rootDir)
    val data: Seq[String] = Seq("123", "456")
    val writer            = new BufferedWriter(new OutputStreamWriter(fileSystem.create(file), StandardCharsets.UTF_8))
    try data.foreach(line => {
      writer.append(line)
      writer.newLine()
    })
    finally writer.close()

    fileSystem.exists(file) shouldBe true
    fileSystem.readLines(file) shouldBe data
    fileSystem.delete(file)
    fileSystem.exists(file) shouldBe false
    fileSystem.listFileNames(rootDir, FileFilter.EMPTY).size shouldBe 0
  }
} 
Example 30
Source File: ProxyServer.scala    From devbox   with Apache License 2.0 5 votes vote down vote up
package cmdproxy

import java.io.BufferedReader
import java.io.InputStreamReader
import java.io.OutputStreamWriter
import java.io.PrintWriter
import java.net.InetAddress
import java.net.ServerSocket
import java.net.Socket

import scala.util.Using

import devbox.logger.FileLogger
import os.RelPath
import ujson.ParseException
import upickle.default.{macroRW, ReadWriter}

case class Request(workingDir: String, cmd: Seq[String])
object Request {
  implicit val rw: ReadWriter[Request] = macroRW
}


  val localDir: Map[os.RelPath, os.Path] = dirMapping.map(_.swap).toMap

  def start(): Unit = {
    logger.info(s"Starting command proxy server, listening at ${socket.getInetAddress}:${socket.getLocalPort}")
    (new Thread("Git Proxy Thread") {
      override def run(): Unit = {
        while (!socket.isClosed) {
          Using(socket.accept()) { handleConnection } recover {
            case e: Exception =>
              logger.error(s"Error handling request ${e.getMessage}")
            case e: java.net.SocketException if e.getMessage == "Socket closed" =>
              logger.error(s"Git proxy socket closed")
          }
        }
      }
    }).start()

  }

  def handleConnection(conn: Socket): Unit = try {
    logger.info(s"Accepting connection from ${conn.getInetAddress}")
    val in = new BufferedReader(new InputStreamReader(conn.getInputStream, ProxyServer.CHARSET_NAME))
    val out = new PrintWriter(new OutputStreamWriter(conn.getOutputStream, ProxyServer.CHARSET_NAME))

    upickle.default.read[Request](in.readLine()) match {
      case Request(dir, args) =>
        val workingDir = localDir
          .collect{case (remote, local) if RelPath(dir).startsWith(remote) =>
            local / RelPath(dir).relativeTo(remote)
          }
          .head

        // being cautious here and only execute "git" commands
        if (args.headOption.exists((_ == "git"))) {
          logger.info(s"Executing `${args.mkString(" ")}` in $workingDir")

          val proc = os.proc(args).call(
            workingDir,
            mergeErrIntoOut = true,
            stdout = os.ProcessOutput.Readlines(str =>
              out.println(upickle.default.write(Left[String, Int](str)))
            ),
            check = false,
            timeout = 10000
          )

          out.println(upickle.default.write(Right[String, Int](proc.exitCode)))
        } else {
          val msg = s"Not executing non-git commend: `${args.mkString(" ")}`."
          logger.info(msg)
          out.println(upickle.default.write(Right[String, Int](1)))
        }

        out.flush()
    }
  } catch {
    case e: ParseException => logger.error(s"Error parsing incoming json request: ${e.getMessage}")
  }
}

object ProxyServer {
  val DEFAULT_PORT = 20280
  val CHARSET_NAME = "UTF-8"
} 
Example 31
Source File: GraphLoaderSuite.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.graphx

import java.io.File
import java.io.FileOutputStream
import java.io.OutputStreamWriter
import java.nio.charset.StandardCharsets

import org.apache.spark.SparkFunSuite
import org.apache.spark.util.Utils

class GraphLoaderSuite extends SparkFunSuite with LocalSparkContext {

  test("GraphLoader.edgeListFile") {
    withSpark { sc =>
      val tmpDir = Utils.createTempDir()
      val graphFile = new File(tmpDir.getAbsolutePath, "graph.txt")
      val writer = new OutputStreamWriter(new FileOutputStream(graphFile), StandardCharsets.UTF_8)
      for (i <- (1 until 101)) writer.write(s"$i 0\n")
      writer.close()
      try {
        val graph = GraphLoader.edgeListFile(sc, tmpDir.getAbsolutePath)
        val neighborAttrSums = graph.aggregateMessages[Int](
          ctx => ctx.sendToDst(ctx.srcAttr),
          _ + _)
        assert(neighborAttrSums.collect.toSet === Set((0: VertexId, 100)))
      } finally {
        Utils.deleteRecursively(tmpDir)
      }
    }
  }
} 
Example 32
Source File: SparkILoop.scala    From drizzle-spark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.repl

import java.io.BufferedReader

import scala.Predef.{println => _, _}
import scala.tools.nsc.Settings
import scala.tools.nsc.interpreter.{ILoop, JPrintWriter}
import scala.tools.nsc.util.stringFromStream
import scala.util.Properties.{javaVersion, javaVmName, versionString}


  def run(code: String, sets: Settings = new Settings): String = {
    import java.io.{ BufferedReader, StringReader, OutputStreamWriter }

    stringFromStream { ostream =>
      Console.withOut(ostream) {
        val input = new BufferedReader(new StringReader(code))
        val output = new JPrintWriter(new OutputStreamWriter(ostream), true)
        val repl = new SparkILoop(input, output)

        if (sets.classpath.isDefault) {
          sets.classpath.value = sys.props("java.class.path")
        }
        repl process sets
      }
    }
  }
  def run(lines: List[String]): String = run(lines.map(_ + "\n").mkString)
} 
Example 33
Source File: PLYReadWriteTests.scala    From scalismo-faces   with Apache License 2.0 5 votes vote down vote up
package scalismo.faces.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, OutputStreamWriter}
import java.nio.ByteOrder
import java.util.Scanner

import scalismo.faces.FacesTestSuite
import scalismo.faces.io.ply._

class PLYReadWriteTests extends FacesTestSuite {

  describe("Write-read cycles to string, big- and little endian") {

    def testRWEndianCycle[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A], bo: ByteOrder): Unit = {
      val N = toWrite.size
      val os = new ByteArrayOutputStream()
      val writer = new SequenceWriter[A]
      writer.write(toWrite, os, bo)

      val ba = os.toByteArray

      val is = new ByteArrayInputStream(ba)
      val reader = new FixedLengthSequenceReader[A]
      val read = reader.read(N, is, bo)

      read.zip(toWrite).foreach { p =>
        p._1 shouldBe p._2
      }
    }

    def testRWStringCycle[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A]): Unit = {
      val N = toWrite.size
      val os = new ByteArrayOutputStream()
      val osw = new OutputStreamWriter(os)
      val writer = new SequenceWriter[A]
      writer.write(toWrite, osw)
      osw.flush()

      val is = new ByteArrayInputStream(os.toByteArray)
      val isr = new Scanner(is)
      val reader = new FixedLengthSequenceReader[A]
      val read = reader.read(N, isr)

      read.zip(toWrite).foreach { p =>
        p._1 shouldBe p._2
      }
    }

    def testAllThreeCycles[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A]): Unit = {
      testRWStringCycle(toWrite)
      testRWEndianCycle(toWrite, ByteOrder.BIG_ENDIAN)
      testRWEndianCycle(toWrite, ByteOrder.LITTLE_ENDIAN)
    }

    it("should result in the same sequence of bytes") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toByte
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of char") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toChar
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of short") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toShort
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of int") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toInt
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of long") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toLong
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of float") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toFloat
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of double") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255)
      testAllThreeCycles(toWrite)
    }

  }

} 
Example 34
Source File: CsvParserFactory.scala    From spark-cdm   with MIT License 5 votes vote down vote up
package com.microsoft.cdm.utils

import java.io.OutputStreamWriter

import com.univocity.parsers.csv.{CsvParser, CsvParserSettings, CsvWriter, CsvWriterSettings}


object CsvParserFactory {

  def build(): CsvParser = {
    val settings = new CsvParserSettings()
    val format = settings.getFormat
    format.setDelimiter(',')
    settings.setMaxCharsPerColumn(500000)
    settings.setMaxColumns(512 * 4)
    new CsvParser(settings)
  }

  def buildWriter(outputWriter: OutputStreamWriter): CsvWriter = {
    val settings = new CsvWriterSettings()
    settings.setQuoteAllFields(true);
    new CsvWriter(outputWriter, settings)
  }

} 
Example 35
Source File: Pathway.scala    From piflow   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package cn.piflow.bundle.microorganism

import java.io.{BufferedReader, InputStreamReader, OutputStreamWriter}

import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
import cn.piflow.conf.{ConfigurableStop, Port, StopGroup}
import cn.piflow.conf.bean.PropertyDescriptor
import cn.piflow.conf.util.{ImageUtil, MapUtil}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FSDataInputStream, FSDataOutputStream, FileSystem, Path}
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.json.JSONObject


class Pathway extends ConfigurableStop{
  override val authorEmail: String = "[email protected]"
  override val description: String = "Parse Pathway data"
  override val inportList: List[String] =List(Port.DefaultPort.toString)
  override val outportList: List[String] = List(Port.DefaultPort.toString)


  var cachePath:String = _
  def setProperties(map: Map[String, Any]): Unit = {
    cachePath=MapUtil.get(map,key="cachePath").asInstanceOf[String]
  }

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").description("Temporary Cache File Path")
      .defaultValue("/pathway").required(true)
    descriptor = cachePath :: descriptor
    descriptor
  }

  override def getIcon(): Array[Byte] = {
    ImageUtil.getImage("icon/microorganism/Pathway.png")
  }

  override def getGroup(): List[String] = {
    List(StopGroup.MicroorganismGroup)
  }

  override def initialize(ctx: ProcessContext): Unit = {

  }

  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
    val inDf: DataFrame = in.read()
    var pathStr: String =inDf.take(1)(0).get(0).asInstanceOf[String]

    val configuration: Configuration = new Configuration()
    val pathARR: Array[String] = pathStr.split("\\/")
    var hdfsUrl:String=""
    for (x <- (0 until 3)){
      hdfsUrl+=(pathARR(x) +"/")
    }
    configuration.set("fs.defaultFS",hdfsUrl)
    var fs: FileSystem = FileSystem.get(configuration)


    val hdfsPathTemporary = hdfsUrl+cachePath+"/pathwayCache/pathwayCache.json"
    val path: Path = new Path(hdfsPathTemporary)
    if(fs.exists(path)){
      fs.delete(path)
    }
    fs.create(path).close()
    val hdfsWriter: OutputStreamWriter = new OutputStreamWriter(fs.append(path))

    var fdis: FSDataInputStream = null
    var br: BufferedReader = null
    var doc: JSONObject = null
    var hasAnotherSequence:Boolean = true

    inDf.collect().foreach(row => {
      pathStr = row.get(0).asInstanceOf[String]

      fdis = fs.open(new Path(pathStr))
      br = new BufferedReader(new InputStreamReader(fdis))
      var count = 0
      while (hasAnotherSequence) {
          count += 1
          doc = new JSONObject
          hasAnotherSequence = util.KeggPathway.process(br, doc)

          doc.write(hdfsWriter)
          hdfsWriter.write("\n")
        }
      br.close()
      fdis.close()
    })
    hdfsWriter.close()

    val df: DataFrame = pec.get[SparkSession]().read.json(hdfsPathTemporary)
    df.schema.printTreeString()
    println(df.count)

    out.write(df)

  }
} 
Example 36
Source File: MNIST.scala    From spark-tsne   with Apache License 2.0 5 votes vote down vote up
package com.github.saurfang.spark.tsne.examples


import java.io.{BufferedWriter, OutputStreamWriter}

import com.github.saurfang.spark.tsne.impl._
import com.github.saurfang.spark.tsne.tree.SPTree
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.linalg.distributed.RowMatrix
import org.apache.spark.{SparkConf, SparkContext}
import org.slf4j.LoggerFactory

object MNIST {
  private def logger = LoggerFactory.getLogger(MNIST.getClass)

  def main (args: Array[String]) {
    val conf = new SparkConf()
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .registerKryoClasses(Array(classOf[SPTree]))
    val sc = new SparkContext(conf)
    val hadoopConf = sc.hadoopConfiguration
    val fs = FileSystem.get(hadoopConf)

    val dataset = sc.textFile("data/MNIST/mnist.csv.gz")
      .zipWithIndex()
      .filter(_._2 < 6000)
      .sortBy(_._2, true, 60)
      .map(_._1)
      .map(_.split(","))
      .map(x => (x.head.toInt, x.tail.map(_.toDouble)))
      .cache()
    //logInfo(dataset.collect.map(_._2.toList).toList.toString)

    //val features = dataset.map(x => Vectors.dense(x._2))
    //val scaler = new StandardScaler(true, true).fit(features)
    //val scaledData = scaler.transform(features)
    //  .map(v => Vectors.dense(v.toArray.map(x => if(x.isNaN || x.isInfinite) 0.0 else x)))
    //  .cache()
    val data = dataset.flatMap(_._2)
    val mean = data.mean()
    val std = data.stdev()
    val scaledData = dataset.map(x => Vectors.dense(x._2.map(v => (v - mean) / std))).cache()

    val labels = dataset.map(_._1).collect()
    val matrix = new RowMatrix(scaledData)
    val pcaMatrix = matrix.multiply(matrix.computePrincipalComponents(50))
    pcaMatrix.rows.cache()

    val costWriter = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(s".tmp/MNIST/cost.txt"), true)))

    //SimpleTSNE.tsne(pcaMatrix, perplexity = 20, maxIterations = 200)
    BHTSNE.tsne(pcaMatrix, maxIterations = 500, callback = {
    //LBFGSTSNE.tsne(pcaMatrix, perplexity = 10, maxNumIterations = 500, numCorrections = 10, convergenceTol = 1e-8)
      case (i, y, loss) =>
        if(loss.isDefined) logger.info(s"$i iteration finished with loss $loss")

        val os = fs.create(new Path(s".tmp/MNIST/result${"%05d".format(i)}.csv"), true)
        val writer = new BufferedWriter(new OutputStreamWriter(os))
        try {
          (0 until y.rows).foreach {
            row =>
              writer.write(labels(row).toString)
              writer.write(y(row, ::).inner.toArray.mkString(",", ",", "\n"))
          }
          if(loss.isDefined) costWriter.write(loss.get + "\n")
        } finally {
          writer.close()
        }
    })
    costWriter.close()

    sc.stop()
  }
} 
Example 37
Source File: CodecStreams.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import java.io.{InputStream, OutputStream, OutputStreamWriter}
import java.nio.charset.{Charset, StandardCharsets}

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.compress._
import org.apache.hadoop.mapreduce.JobContext
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.hadoop.util.ReflectionUtils

import org.apache.spark.TaskContext

object CodecStreams {
  private def getDecompressionCodec(config: Configuration, file: Path): Option[CompressionCodec] = {
    val compressionCodecs = new CompressionCodecFactory(config)
    Option(compressionCodecs.getCodec(file))
  }

  def createInputStream(config: Configuration, file: Path): InputStream = {
    val fs = file.getFileSystem(config)
    val inputStream: InputStream = fs.open(file)

    getDecompressionCodec(config, file)
      .map(codec => codec.createInputStream(inputStream))
      .getOrElse(inputStream)
  }

  
  def getCompressionExtension(context: JobContext): String = {
    getCompressionCodec(context)
      .map(_.getDefaultExtension)
      .getOrElse("")
  }
} 
Example 38
Source File: StreamMetadata.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming

import java.io.{InputStreamReader, OutputStreamWriter}
import java.nio.charset.StandardCharsets
import java.util.ConcurrentModificationException

import scala.util.control.NonFatal

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileAlreadyExistsException, FSDataInputStream, Path}
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization

import org.apache.spark.internal.Logging
import org.apache.spark.sql.execution.streaming.CheckpointFileManager.CancellableFSDataOutputStream
import org.apache.spark.sql.streaming.StreamingQuery


  def write(
      metadata: StreamMetadata,
      metadataFile: Path,
      hadoopConf: Configuration): Unit = {
    var output: CancellableFSDataOutputStream = null
    try {
      val fileManager = CheckpointFileManager.create(metadataFile.getParent, hadoopConf)
      output = fileManager.createAtomic(metadataFile, overwriteIfPossible = false)
      val writer = new OutputStreamWriter(output)
      Serialization.write(metadata, writer)
      writer.close()
    } catch {
      case e: FileAlreadyExistsException =>
        if (output != null) {
          output.cancel()
        }
        throw new ConcurrentModificationException(
          s"Multiple streaming queries are concurrently using $metadataFile", e)
      case e: Throwable =>
        if (output != null) {
          output.cancel()
        }
        logError(s"Error writing stream metadata $metadata to $metadataFile", e)
        throw e
    }
  }
} 
Example 39
Source File: InteractiveChecksumLogger.scala    From coursier   with Apache License 2.0 5 votes vote down vote up
package coursier.publish.checksum.logger

import java.io.{OutputStream, OutputStreamWriter, Writer}

import coursier.publish.checksum.ChecksumType
import coursier.publish.fileset.FileSet
import coursier.publish.logging.ProgressLogger

final class InteractiveChecksumLogger(out: Writer, verbosity: Int) extends ChecksumLogger {

  private val underlying = new ProgressLogger[Object](
    "Computed",
    "checksums",
    out
  )

  override def computingSet(id: Object, fs: FileSet): Unit =
    underlying.processingSet(id, Some(fs.elements.length))
  override def computing(id: Object, type0: ChecksumType, path: String): Unit = {
    if (verbosity >= 2)
      out.write(s"Computing ${type0.name} checksum of ${path.repr}\n")
    underlying.processing(path, id)
  }
  override def computed(id: Object, type0: ChecksumType, path: String, errorOpt: Option[Throwable]): Unit = {
    if (verbosity >= 2)
      out.write(s"Computed ${type0.name} checksum of ${path.repr}\n")
    underlying.processed(path, id, errorOpt.nonEmpty)
  }
  override def computedSet(id: Object, fs: FileSet): Unit =
    underlying.processedSet(id)

  override def start(): Unit =
    underlying.start()
  override def stop(keep: Boolean): Unit =
    underlying.stop(keep)
}

object InteractiveChecksumLogger {
  def create(out: OutputStream, verbosity: Int): InteractiveChecksumLogger =
    new InteractiveChecksumLogger(new OutputStreamWriter(out), verbosity)
} 
Example 40
Source File: MessageWriter.scala    From lsp4s   with Apache License 2.0 5 votes vote down vote up
package scala.meta.jsonrpc

import java.io.ByteArrayOutputStream
import java.io.OutputStream
import java.io.OutputStreamWriter
import java.io.PrintWriter
import java.nio.ByteBuffer
import java.nio.charset.StandardCharsets
import scala.concurrent.Future
import io.circe.syntax._
import monix.execution.Ack
import monix.reactive.Observer
import scribe.LoggerSupport


  def write(msg: Message): Future[Ack] = lock.synchronized {
    baos.reset()
    val json = msg.asJson
    val protocol = BaseProtocolMessage.fromJson(json)
    logger.trace(s" --> $json")
    val byteBuffer = MessageWriter.write(protocol, baos, headerOut)
    out.onNext(byteBuffer)
  }
}

object MessageWriter {

  def headerWriter(out: OutputStream): PrintWriter = {
    new PrintWriter(new OutputStreamWriter(out, StandardCharsets.US_ASCII))
  }

  def write(message: BaseProtocolMessage): ByteBuffer = {
    val out = new ByteArrayOutputStream()
    val header = headerWriter(out)
    write(message, out, header)
  }

  def write(
      message: BaseProtocolMessage,
      out: ByteArrayOutputStream,
      headerOut: PrintWriter
  ): ByteBuffer = {
    message.header.foreach {
      case (key, value) =>
        headerOut.write(key)
        headerOut.write(": ")
        headerOut.write(value)
        headerOut.write("\r\n")
    }
    headerOut.write("\r\n")
    out.write(message.content)
    out.flush()
    val buffer = ByteBuffer.wrap(out.toByteArray, 0, out.size())
    buffer
  }
} 
Example 41
Source File: package.scala    From sjson-new   with Apache License 2.0 5 votes vote down vote up
package sjsonnew
package support.scalajson

import java.io.{ByteArrayOutputStream, OutputStreamWriter}

import shaded.scalajson.ast.unsafe._
import org.scalactic._

package object unsafe {
  implicit class AnyOps[A: JsonWriter](val _x: A) {
    def toJson: JValue    = Converter toJsonUnsafe _x
    def toJsonStr: String = _x.toJson.toJsonStr
  }
  implicit class AnyOps2[A: JsonWriter : JsonReader](val _x: A) {
    def jsonRoundTrip: A = _x.toJson.toJsonStr.toJson.fromJson[A]
    def jsonPrettyRoundTrip: A = _x.toJson.toPrettyStr.toJson.fromJson[A]
    def jsonBinaryRoundTrip: A = _x.toJson.toBinary.toJson.fromJson[A]
  }

  implicit class JValueOps(val _j: JValue) extends AnyVal {
    def toJsonStr: String          = CompactPrinter(_j)
    def toPrettyStr: String        = PrettyPrinter(_j)
    def toBinary: Array[Byte]      = {
      val baos = new ByteArrayOutputStream()
      val xpto = new OutputStreamWriter(baos)
      CompactPrinter.print(_j, xpto)
      xpto.close()
      baos.toByteArray
    }
    def fromJson[A: JsonReader]: A = Converter.fromJsonUnsafe[A](_j)

    // scalajson.ast.unsafe doesn't have good toStrings
    def to_s: String = _j match {
      case JNull          => "JNull"
      case JString(value) => s"JString($value)"
      case JNumber(value) => s"JNumber($value)"
      case JTrue          => "JTrue"
      case JFalse         => "JFalse"
      case JObject(value) => value.iterator map (f => s"${f.field}: ${f.value.to_s}") mkString ("JObject(", ", ", ")")
      case JArray(value)  => value.iterator map (_.to_s) mkString ("JArray(", ", ", ")")
    }
  }

  implicit class StringOps(val _s: String) extends AnyVal {
    def toJson: JValue                = Parser parseUnsafe _s
    def fromJsonStr[A: JsonReader]: A = _s.toJson.fromJson[A]
  }

  implicit class ByteArrayOps(val a: Array[Byte]) extends AnyVal {
    def toJson: JValue                = Parser.parseFromByteBuffer(java.nio.ByteBuffer.wrap(a)).get
  }

  // Can't trust unsafe's toString, eg. JObject doesn't nicely toString its fields array, so its toString sucks
  implicit val altPrettifier: Prettifier = Prettifier {
    case j: JValue => j.to_s
    case x         => Prettifier default x
  }
} 
Example 42
Source File: GraphLoaderSuite.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.graphx

import java.io.File
import java.io.FileOutputStream
import java.io.OutputStreamWriter
import java.nio.charset.StandardCharsets

import org.apache.spark.SparkFunSuite
import org.apache.spark.util.Utils

class GraphLoaderSuite extends SparkFunSuite with LocalSparkContext {

  test("GraphLoader.edgeListFile") {
    withSpark { sc =>
      val tmpDir = Utils.createTempDir()
      val graphFile = new File(tmpDir.getAbsolutePath, "graph.txt")
      val writer = new OutputStreamWriter(new FileOutputStream(graphFile), StandardCharsets.UTF_8)
      for (i <- (1 until 101)) writer.write(s"$i 0\n")
      writer.close()
      try {
        val graph = GraphLoader.edgeListFile(sc, tmpDir.getAbsolutePath)
        val neighborAttrSums = graph.aggregateMessages[Int](
          ctx => ctx.sendToDst(ctx.srcAttr),
          _ + _)
        assert(neighborAttrSums.collect.toSet === Set((0: VertexId, 100)))
      } finally {
        Utils.deleteRecursively(tmpDir)
      }
    }
  }
} 
Example 43
Source File: StreamMetadata.scala    From sparkoscope   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming

import java.io.{InputStreamReader, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import scala.util.control.NonFatal

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, FSDataOutputStream, Path}
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization

import org.apache.spark.internal.Logging
import org.apache.spark.sql.streaming.StreamingQuery


  def write(
      metadata: StreamMetadata,
      metadataFile: Path,
      hadoopConf: Configuration): Unit = {
    var output: FSDataOutputStream = null
    try {
      val fs = FileSystem.get(hadoopConf)
      output = fs.create(metadataFile)
      val writer = new OutputStreamWriter(output)
      Serialization.write(metadata, writer)
      writer.close()
    } catch {
      case NonFatal(e) =>
        logError(s"Error writing stream metadata $metadata to $metadataFile", e)
        throw e
    } finally {
      IOUtils.closeQuietly(output)
    }
  }
} 
Example 44
Source File: TestDiskFull.scala    From eidos   with Apache License 2.0 5 votes vote down vote up
package org.clulab.wm.eidos.utils

import java.io.BufferedOutputStream
import java.io.FileOutputStream
import java.io.IOException
import java.io.OutputStreamWriter
import java.io.PrintWriter
import java.io.SyncFailedException
import java.nio.charset.StandardCharsets

import org.clulab.wm.eidos.test.TestUtils._
import org.clulab.wm.eidos.utils.Closer.AutoCloser

class TestDiskFull extends Test {

  def test1 = {
    val file = "/E:/full.dat"
    var i = 0

    try {
      val text1 = "The quick brown fox jumped over the lazy dog."
      val text = text1 + text1

      for (limit <- 1 until 400) {
        val fos = new FileOutputStream(file)
        val osw = new OutputStreamWriter(new BufferedOutputStream(fos), StandardCharsets.UTF_8.toString)
        i = 0

        new PrintWriter(osw).autoClose { pw =>
          while (i < limit) {
            pw.print(text)
            i += 1
            //          pw.flush()
            //          osw.flush()
            //          fos.flush()
            fos.getFD.sync()
          }
        }
      }
    }
    catch {
      case exception: SyncFailedException =>
        println(s"Synchronization failed for file $file at $i")
        println("Exiting with code -2 on assumption that the disk is full")
        System.exit(-2)
      case exception: IOException =>
        println(s"IO failed for file $file at $i")
        println("Exiting with code -2 on assumption that the disk is full")
        System.exit(-2)
      case exception: Exception =>
        println(s"Exception for file $file at $i")
        exception.printStackTrace()
      case throwable: Throwable =>
        println(s"Throwable for file $file at $i")
        throwable.printStackTrace()
    }
  }

//  test1
} 
Example 45
Source File: SessionDataFileHDFSWriter.scala    From spark_training   with Apache License 2.0 5 votes vote down vote up
package com.malaska.spark.training.streaming.dstream.sessionization

import java.io.BufferedWriter
import java.io.FileWriter
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.conf.Configuration
import java.io.OutputStreamWriter
import org.apache.hadoop.fs.Path
import java.util.Random

object SessionDataFileHDFSWriter {
  
  val eol = System.getProperty("line.separator");  
  
  def main(args: Array[String]) {
    if (args.length == 0) {
        println("SessionDataFileWriter {tempDir} {distDir} {numberOfFiles} {numberOfEventsPerFile} {waitBetweenFiles}");
        return;
    }
    val conf = new Configuration
    conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"))
    conf.addResource(new Path("/etc/hadoop/conf/mapred-site.xml"))
    conf.addResource(new Path("/etc/hadoop/conf/hdfs-site.xml"))
    
    val fs = FileSystem.get(new Configuration)
    val rootTempDir = args(0)
    val rootDistDir = args(1)
    val files = args(2).toInt
    val loops = args(3).toInt
    val waitBetweenFiles = args(4).toInt
    val r = new Random
    for (f <- 1 to files) {
      val rootName = "/weblog." + System.currentTimeMillis()
      val tmpPath = new Path(rootTempDir + rootName + ".tmp")
      val writer = new BufferedWriter(new OutputStreamWriter(fs.create(tmpPath)))
      
      print(f + ": [")
      
      val randomLoops = loops + r.nextInt(loops)
      
      for (i <- 1 to randomLoops) {
        writer.write(SessionDataGenerator.getNextEvent + eol)
        if (i%100 == 0) {
          print(".")
        }
      }
      println("]")
      writer.close
      
      val distPath = new Path(rootDistDir + rootName + ".dat")
      
      fs.rename(tmpPath, distPath)
      Thread.sleep(waitBetweenFiles)
    }
    println("Done")
  }
} 
Example 46
Source File: MustacheTemplates.scala    From fintrospect   with Apache License 2.0 5 votes vote down vote up
package io.fintrospect.templating

import java.io.{ByteArrayOutputStream, File, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import com.github.mustachejava.resolver.{DefaultResolver, FileSystemResolver}
import com.github.mustachejava.{DefaultMustacheFactory, Mustache}
import com.twitter.io.Buf

object MustacheTemplates extends Templates {

  private def render(view: View, mustache: Mustache): Buf = {
    val outputStream = new ByteArrayOutputStream(4096)
    val writer = new OutputStreamWriter(outputStream, StandardCharsets.UTF_8)
    try {
      mustache.execute(writer, view)
    } finally {
      writer.close()
    }

    Buf.ByteArray.Owned(outputStream.toByteArray)
  }

  def CachingClasspath(baseClasspathPackage: String = "."): TemplateRenderer = new TemplateRenderer {

    private val factory = new DefaultMustacheFactory(new DefaultResolver(baseClasspathPackage)) {
      setObjectHandler(new ScalaObjectHandler)
    }

    def toBuf(view: View): Buf = render(view, factory.compile(view.template + ".mustache"))
  }

  def Caching(baseTemplateDir: String): TemplateRenderer = new TemplateRenderer {

    private val factory = new DefaultMustacheFactory(new FileSystemResolver(new File(baseTemplateDir))) {
      setObjectHandler(new ScalaObjectHandler)
    }

    def toBuf(view: View): Buf = render(view, factory.compile(view.template + ".mustache"))
  }

  def HotReload(baseTemplateDir: String = "."): TemplateRenderer = new TemplateRenderer {

    class WipeableMustacheFactory extends DefaultMustacheFactory(new FileSystemResolver(new File(baseTemplateDir))) {
      setObjectHandler(new ScalaObjectHandler)
    }

    def toBuf(view: View): Buf = render(view, new WipeableMustacheFactory().compile(view.template + ".mustache"))
  }
} 
Example 47
Source File: CommonLog.scala    From AppCrawler   with Apache License 2.0 5 votes vote down vote up
package com.testerhome.appcrawler

import java.io.OutputStreamWriter

import com.fasterxml.jackson.annotation.JsonIgnore
import org.apache.log4j._



trait CommonLog {
  BasicConfigurator.configure()
  Logger.getRootLogger.setLevel(Level.INFO)
  @JsonIgnore
  val layout=new PatternLayout("%d{yyyy-MM-dd HH:mm:ss} %p [%c{1}.%M.%L] %m%n")
  @JsonIgnore
  lazy val log = initLog()

  def initLog(): Logger ={
    val log = Logger.getLogger(this.getClass.getName)
    //val log=Logger.getRootLogger
    if(log.getAppender("console")==null){
      val console=new ConsoleAppender()
      console.setName("console")
      console.setWriter(new OutputStreamWriter(System.out))
      console.setLayout(layout)
      log.addAppender(console)
    }else{
      log.info("alread exist")
    }
    log.trace(s"set ${this} log level to ${GA.logLevel}")
    log.setLevel(GA.logLevel)
    log.setAdditivity(false)
    log
  }
} 
Example 48
Source File: StarsAnalysisDemo.scala    From CkoocNLP   with Apache License 2.0 5 votes vote down vote up
package applications.analysis

import java.io.{BufferedWriter, FileOutputStream, OutputStreamWriter}

import functions.segment.Segmenter
import org.apache.log4j.{Level, Logger}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Row, SparkSession}


object StarsAnalysisDemo {
  def main(args: Array[String]) {
    Logger.getLogger("org").setLevel(Level.WARN)

    val spark = SparkSession
      .builder
      .master("local[2]")
      .appName("Stars Analysis Demo")
      .getOrCreate()

    val filePath = "E:/data/chinaNews/entertainment.txt"


    // 加载数据,并保留年份和内容字段,并对内容字段进行过滤
    import spark.implicits._
    val data = spark.sparkContext.textFile(filePath).flatMap { line =>
      val tokens: Array[String] = line.split("\u00ef")
      if (tokens.length > 3) {
        var year: String = tokens(2).split("-")(0)
        if (tokens(2).contains("年")) year = tokens(2).split("年")(0)

        var content = tokens(3)
        if (content.length > 22 && content.substring(0, 20).contains("日电")) {
          content = content.substring(content.indexOf("日电") + 2, content.length).trim
        }

        if (content.startsWith("(")) content = content.substring(content.indexOf(")") + 1, content.length)
        if (content.length > 20 && content.substring(content.length - 20, content.length).contains("记者")) {
          content = content.substring(0, content.lastIndexOf("记者")).trim
        }

        Some(year, content)
      } else None
    }.toDF("year", "content")

    // 分词,去除长度为1的词,每个词保留词性
    val segmenter = new Segmenter()
      .isAddNature(true)
      .isDelEn(true)
      .isDelNum(true)
      .setMinTermLen(2)
      .setMinTermNum(5)
      .setSegType("StandardSegment")
      .setInputCol("content")
      .setOutputCol("segmented")
    val segDF: DataFrame = segmenter.transform(data)
    segDF.cache()

    val segRDD: RDD[(Int, Seq[String])] = segDF.select("year", "segmented").rdd.map { case Row(year: String, terms: Seq[String]) =>
      (Integer.parseInt(year), terms)
    }

    val result: Array[String] = segRDD.map(line => line._1.toString + "\u00ef" + line._2.mkString(",")).collect()
    val writer: BufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("E:/entertainment_seg.txt")))
    result.foreach(line => writer.write(line + "\n"))
    writer.close()

    // 统计2016出现在新闻中最多的明星
    val stars2016 = segRDD.filter(_._1 == 2016)
      .flatMap { case (year: Int, termStr: Seq[String]) =>
        val person = termStr
          .map(term => (term.split("/")(0), term.split("/")(1)))
          .filter(_._2.equalsIgnoreCase("nr"))
          .map(term => (term._1, 1L))

        person
      }
      .reduceByKey(_ + _)
      .sortBy(_._2, ascending = false)

    segDF.unpersist()

    stars2016.take(100).foreach(println)

    spark.stop()
  }
} 
Example 49
Source File: AkkaHttpPrometheusExporter.scala    From cloudstate   with Apache License 2.0 5 votes vote down vote up
package io.cloudstate.proxy

import java.io.OutputStreamWriter
import java.util

import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import io.prometheus.client.CollectorRegistry
import akka.http.scaladsl.model._
import akka.http.scaladsl.server.Directives._
import akka.stream.Materializer
import akka.util.ByteString
import io.prometheus.client.exporter.common.TextFormat

import scala.concurrent.Future


class AkkaHttpPrometheusExporter(metricsPort: Int, registry: CollectorRegistry = CollectorRegistry.defaultRegistry)(
    implicit system: ActorSystem,
    mat: Materializer
) {

  private[this] final val PrometheusContentType = ContentType.parse(TextFormat.CONTENT_TYPE_004).right.get

  private def routes = get {
    (path("metrics") | pathSingleSlash) {
      encodeResponse {
        parameter(Symbol("name[]").*) { names =>
          complete {
            val namesSet = new util.HashSet[String]()
            names.foreach(namesSet.add)
            val builder = ByteString.newBuilder
            val writer = new OutputStreamWriter(builder.asOutputStream)
            TextFormat.write004(writer, registry.filteredMetricFamilySamples(namesSet))
            // Very important to flush the writer before we build the byte string!
            writer.flush()
            HttpEntity(PrometheusContentType, builder.result())
          }
        }
      }
    }
  }

  def start(): Future[Http.ServerBinding] =
    Http().bindAndHandle(routes, "0.0.0.0", metricsPort)
} 
Example 50
Source File: InteractiveSonatypeLogger.scala    From coursier   with Apache License 2.0 5 votes vote down vote up
package coursier.publish.sonatype.logger

import java.io.{OutputStream, OutputStreamWriter}

import coursier.cache.internal.Terminal.Ansi

final class InteractiveSonatypeLogger(out: OutputStreamWriter, verbosity: Int) extends SonatypeLogger {
  override def listingProfiles(attempt: Int, total: Int): Unit =
    if (verbosity >= 0) {
      val extra =
        if (attempt == 0) ""
        else s" (attempt $attempt / $total)"
      out.write("Listing Sonatype profiles..." + extra)
      out.flush()
    }
  override def listedProfiles(errorOpt: Option[Throwable]): Unit = {
    if (verbosity >= 0) {
      out.clearLine(2)
      out.write('\n')
      out.up(1)
      out.flush()
    }

    val msgOpt =
      if (errorOpt.isEmpty) {
        if (verbosity >= 1)
          Some("Listed Sonatype profiles")
        else
          None
      } else
        Some("Fail to list Sonatype profiles")

    for (msg <- msgOpt) {
      out.write(s"$msg\n")
      out.flush()
    }
  }
}

object InteractiveSonatypeLogger {
  def create(out: OutputStream, verbosity: Int): SonatypeLogger =
    new InteractiveSonatypeLogger(new OutputStreamWriter(out), verbosity)
} 
Example 51
Source File: FileSystem.scala    From ohara   with Apache License 2.0 4 votes vote down vote up
package oharastream.ohara.client.filesystem

import java.io.{BufferedReader, BufferedWriter, IOException, InputStreamReader, OutputStreamWriter}
import java.nio.charset.{Charset, StandardCharsets}

import oharastream.ohara.client.filesystem.ftp.FtpFileSystem
import oharastream.ohara.client.filesystem.hdfs.HdfsFileSystem
import oharastream.ohara.client.filesystem.smb.SmbFileSystem
import oharastream.ohara.common.exception.FileSystemException

trait FileSystem extends oharastream.ohara.kafka.connector.storage.FileSystem {
  
  def readLines(path: String, encode: String = "UTF-8"): Array[String] = {
    val reader = new BufferedReader(new InputStreamReader(open(path), Charset.forName(encode)))
    try Iterator.continually(reader.readLine()).takeWhile(_ != null).toArray
    finally reader.close()
  }

  def wrap[T](f: () => T): T =
    try {
      f()
    } catch {
      case e: IOException           => throw new FileSystemException(e.getMessage, e)
      case e: IllegalStateException => throw new FileSystemException(e.getMessage, e)
    }
}

object FileSystem {
  def hdfsBuilder: HdfsFileSystem.Builder = HdfsFileSystem.builder
  def ftpBuilder: FtpFileSystem.Builder   = FtpFileSystem.builder
  def smbBuilder: SmbFileSystem.Builder   = SmbFileSystem.builder
}