java.io.OutputStreamWriter Scala Example

Source File: BasicTestPerformance4Ftp.scala From ohara with Apache License 2.0

6 votes

package oharastream.ohara.it.performance

import java.io.{BufferedWriter, OutputStreamWriter}
import java.util.concurrent.atomic.LongAdder

import oharastream.ohara.common.data.Row
import oharastream.ohara.common.util.{CommonUtils, Releasable}
import org.junit.AssumptionViolatedException
import spray.json.{JsNumber, JsString, JsValue}

import scala.jdk.CollectionConverters._
import oharastream.ohara.client.filesystem.FileSystem

import scala.concurrent.duration.Duration

abstract class BasicTestPerformance4Ftp extends BasicTestPerformance {
  private[this] val ftpHostname = value(PerformanceTestingUtils.FTP_HOSTNAME_KEY)
    .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_HOSTNAME_KEY} is required"))

  private[this] val ftpPort = value(PerformanceTestingUtils.FTP_PORT_KEY)
    .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_PORT_KEY} is required"))
    .toInt

  private[this] val ftpUser = value(PerformanceTestingUtils.FTP_USER_KEY)
    .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_USER_KEY} is required"))

  private[this] val ftpPassword = value(PerformanceTestingUtils.FTP_PASSWORD_KEY)
    .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_PASSWORD_KEY} is required"))

  
  protected val ftpSettings: Map[String, JsValue] = Map(
    // convert the hostname to IP address
    oharastream.ohara.connector.ftp.FTP_HOSTNAME_KEY  -> JsString(ftpHostname),
    oharastream.ohara.connector.ftp.FTP_PORT_KEY      -> JsNumber(ftpPort),
    oharastream.ohara.connector.ftp.FTP_USER_NAME_KEY -> JsString(ftpUser),
    oharastream.ohara.connector.ftp.FTP_PASSWORD_KEY  -> JsString(ftpPassword)
  )

  private[this] val csvInputFolderKey       = PerformanceTestingUtils.CSV_INPUT_KEY
  private[this] val csvOutputFolder: String = value(csvInputFolderKey).getOrElse("/input")

  private[this] val cleanupTestDataKey   = PerformanceTestingUtils.DATA_CLEANUP_KEY
  protected val cleanupTestData: Boolean = value(cleanupTestDataKey).forall(_.toBoolean)

  protected def setupInputData(timeout: Duration): (String, Long, Long) = {
    val client = ftpClient()
    try {
      if (!PerformanceTestingUtils.exists(client, csvOutputFolder))
        PerformanceTestingUtils.createFolder(client, csvOutputFolder)

      val result = generateData(
        numberOfRowsToFlush,
        timeout,
        (rows: Seq[Row]) => {
          val file        = s"$csvOutputFolder/${CommonUtils.randomString()}"
          val writer      = new BufferedWriter(new OutputStreamWriter(client.create(file)))
          val count       = new LongAdder()
          val sizeInBytes = new LongAdder()

          try {
            val cellNames: Set[String] = rows.head.cells().asScala.map(_.name).toSet
            writer
              .append(cellNames.mkString(","))
              .append("\n")
            rows.foreach(row => {
              val content = row.cells().asScala.map(_.value).mkString(",")
              count.increment()
              sizeInBytes.add(content.length)
              writer.append(content).append("\n")
            })
            (count.longValue(), sizeInBytes.longValue())
          } finally Releasable.close(writer)
        }
      )
      (csvOutputFolder, result._1, result._2)
    } finally Releasable.close(client)
  }

  protected[this] def ftpClient() =
    FileSystem.ftpBuilder
      .hostname(ftpHostname)
      .port(ftpPort)
      .user(ftpUser)
      .password(ftpPassword)
      .build
}

Source File: FeatureSelection.scala From aerosolve with Apache License 2.0

5 votes

package com.airbnb.aerosolve.training

import java.io.BufferedWriter
import java.io.OutputStreamWriter
import java.util

import com.airbnb.aerosolve.core.{ModelRecord, ModelHeader, FeatureVector, Example}
import com.airbnb.aerosolve.core.models.LinearModel
import com.airbnb.aerosolve.core.util.Util
import com.typesafe.config.Config
import org.slf4j.{LoggerFactory, Logger}
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.rdd.RDD

import scala.collection.mutable.HashMap
import scala.collection.mutable.HashSet
import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.Buffer
import scala.collection.JavaConversions._
import scala.collection.JavaConverters._
import scala.util.Random
import scala.math.abs
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.fs.Path
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path

object FeatureSelection {
  private final val log: Logger = LoggerFactory.getLogger("FeatureSelection")
  val allKey : (String, String) = ("$ALL", "$POS")

  // Given a RDD compute the pointwise mutual information between
  // the positive label and the discrete features.
  def pointwiseMutualInformation(examples : RDD[Example],
                                 config : Config,
                                 key : String,
                                 rankKey : String,
                                 posThreshold : Double,
                                 minPosCount : Double,
                                 newCrosses : Boolean) : RDD[((String, String), Double)] = {
    val pointwise = LinearRankerUtils.makePointwise(examples, config, key, rankKey)
    val features = pointwise
      .mapPartitions(part => {
      // The tuple2 is var, var | positive
      val output = scala.collection.mutable.HashMap[(String, String), (Double, Double)]()
      part.foreach(example =>{
        val featureVector = example.example.get(0)
        val isPos = if (featureVector.floatFeatures.get(rankKey).asScala.head._2 > posThreshold) 1.0
        else 0.0
        val all : (Double, Double) = output.getOrElse(allKey, (0.0, 0.0))
        output.put(allKey, (all._1 + 1.0, all._2 + 1.0 * isPos))

        val features : Array[(String, String)] =
          LinearRankerUtils.getFeatures(featureVector)
        if (newCrosses) {
          for (i <- features) {
            for (j <- features) {
              if (i._1 < j._1) {
                val key = ("%s<NEW>%s".format(i._1, j._1),
                           "%s<NEW>%s".format(i._2, j._2))
                val x = output.getOrElse(key, (0.0, 0.0))
                output.put(key, (x._1 + 1.0, x._2 + 1.0 * isPos))
              }
            }
          }
        }
        for (feature <- features) {
          val x = output.getOrElse(feature, (0.0, 0.0))
          output.put(feature, (x._1 + 1.0, x._2 + 1.0 * isPos))
        }
      })
      output.iterator
    })
    .reduceByKey((a, b) => (a._1 + b._1, a._2 + b._2))
    .filter(x => x._2._2 >= minPosCount)

    val allCount = features.filter(x => x._1.equals(allKey)).take(1).head

    features.map(x => {
      val prob = x._2._1 / allCount._2._1
      val probPos = x._2._2 / allCount._2._2
      (x._1, math.log(probPos / prob) / math.log(2.0))
    })
  }

  // Returns the maximum entropy per family
  def maxEntropy(input : RDD[((String, String), Double)]) : RDD[((String, String), Double)] = {
    input
      .map(x => (x._1._1, (x._1._2, x._2)))
      .reduceByKey((a, b) => if (math.abs(a._2) > math.abs(b._2)) a else b)
      .map(x => ((x._1, x._2._1), x._2._2))
  }
}

Source File: InteractiveSignerLogger.scala From coursier with Apache License 2.0

5 votes

package coursier.publish.signing.logger

import java.io.{OutputStream, OutputStreamWriter, Writer}

import coursier.publish.fileset.{FileSet, Path}
import coursier.publish.logging.ProgressLogger

final class InteractiveSignerLogger(out: Writer, verbosity: Int) extends SignerLogger {

  private val underlying = new ProgressLogger[Object](
    "Signed",
    "files",
    out,
    updateOnChange = true,
    doneEmoji = Some("\u270D\uFE0F ")
  )

  override def signing(id: Object, fileSet: FileSet): Unit = {
    underlying.processingSet(id, Some(fileSet.elements.length))
  }
  override def signed(id: Object, fileSet: FileSet): Unit =
    underlying.processedSet(id)

  override def signingElement(id: Object, path: Path): Unit = {
    if (verbosity >= 2)
      out.write(s"Signing ${path.repr}\n")
    underlying.processing(path.repr, id)
  }
  override def signedElement(id: Object, path: Path, excOpt: Option[Throwable]): Unit = {
    if (verbosity >= 2)
      out.write(s"Signed ${path.repr}\n")
    underlying.processed(path.repr, id, excOpt.nonEmpty)
  }

  override def start(): Unit =
    underlying.start()
  override def stop(keep: Boolean): Unit =
    underlying.stop(keep)
}

object InteractiveSignerLogger {
  def create(out: OutputStream, verbosity: Int): SignerLogger =
    new InteractiveSignerLogger(new OutputStreamWriter(out), verbosity)
}

Source File: InteractiveDirLogger.scala From coursier with Apache License 2.0

5 votes

package coursier.publish.dir.logger

import java.io.{OutputStream, OutputStreamWriter}
import java.nio.file.Path

import com.lightbend.emoji.ShortCodes.Defaults.defaultImplicit.emoji
import coursier.publish.logging.ProgressLogger

final class InteractiveDirLogger(out: OutputStreamWriter, dirName: String, verbosity: Int) extends DirLogger {

  private val underlying = new ProgressLogger[String](
    "Read",
    s"files from $dirName",
    out,
    doneEmoji = emoji("mag").map(_.toString())
  )

  override def reading(dir: Path): Unit =
    underlying.processingSet(dirName, None)
  override def element(dir: Path, file: Path): Unit = {
    underlying.processing(file.toString, dirName)
    underlying.processed(file.toString, dirName, false)
  }
  override def read(dir: Path, elements: Int): Unit =
    underlying.processedSet(dirName)

  override def start(): Unit =
    underlying.start()
  override def stop(keep: Boolean): Unit =
    underlying.stop(keep)
}

object InteractiveDirLogger {
  def create(out: OutputStream, dirName: String, verbosity: Int): DirLogger =
    new InteractiveDirLogger(new OutputStreamWriter(out), dirName, verbosity)
}

Source File: Using.scala From Argus-SAF with Apache License 2.0

5 votes

package org.argus.jawa.core.compiler.compile.io

import java.io.{Closeable, FileInputStream, FileOutputStream, InputStream, OutputStream, File => JavaFile}
import java.io.{BufferedInputStream, BufferedOutputStream, InputStreamReader, OutputStreamWriter}
import java.io.{BufferedReader, BufferedWriter}
import java.util.zip.GZIPInputStream
import java.net.URL
import java.nio.channels.FileChannel
import java.nio.charset.Charset
import java.util.jar.{JarFile, JarInputStream, JarOutputStream}
import java.util.zip.{GZIPOutputStream, ZipEntry, ZipFile, ZipInputStream, ZipOutputStream}

import ErrorHandling.translate

import scala.reflect.{Manifest => SManifest}

abstract class Using[Source, T]
{
  protected def open(src: Source): T
  def apply[R](src: Source)(f: T => R): R =
  {
    val resource = open(src)
    try { f(resource) }
    finally { close(resource) }
  }
  protected def close(out: T): Unit
}
abstract class WrapUsing[Source, T](implicit srcMf: SManifest[Source], targetMf: SManifest[T]) extends Using[Source, T]
{
  protected def label[S](m: SManifest[S]): String = m.runtimeClass.getSimpleName
  protected def openImpl(source: Source): T
  protected final def open(source: Source): T =
    translate("Error wrapping " + label(srcMf) + " in " + label(targetMf) + ": ") { openImpl(source) }
}
trait OpenFile[T] extends Using[JavaFile, T]
{
  protected def openImpl(file: JavaFile): T
  protected final def open(file: JavaFile): T =
  {
    val parent = file.getParentFile
    if(parent != null)
      IO.createDirectory(parent)
    openImpl(file)
  }
}
object Using
{
  def wrap[Source, T<: Closeable](openF: Source => T)(implicit srcMf: SManifest[Source], targetMf: SManifest[T]): Using[Source,T] =
    wrap(openF, closeCloseable)
  def wrap[Source, T](openF: Source => T, closeF: T => Unit)(implicit srcMf: SManifest[Source], targetMf: SManifest[T]): Using[Source,T] =
    new WrapUsing[Source, T]
    {
      def openImpl(source: Source): T = openF(source)
      def close(t: T): Unit = closeF(t)
    }

  def resource[Source, T <: Closeable](openF: Source => T): Using[Source,T] =
    resource(openF, closeCloseable)
  def resource[Source, T](openF: Source => T, closeF: T => Unit): Using[Source,T] =
    new Using[Source,T]
    {
      def open(s: Source): T = openF(s)
      def close(s: T): Unit = closeF(s)
    }
  def file[T <: Closeable](openF: JavaFile => T): OpenFile[T] = file(openF, closeCloseable)
  def file[T](openF: JavaFile => T, closeF: T => Unit): OpenFile[T] =
    new OpenFile[T]
    {
      def openImpl(file: JavaFile): T = openF(file)
      def close(t: T): Unit = closeF(t)
    }
  private def closeCloseable[T <: Closeable]: T => Unit = _.close()

  def bufferedOutputStream: Using[OutputStream, BufferedOutputStream] = wrap((out: OutputStream) => new BufferedOutputStream(out) )
  def bufferedInputStream: Using[InputStream, BufferedInputStream] = wrap((in: InputStream) => new BufferedInputStream(in) )
  def fileOutputStream(append: Boolean = false): OpenFile[BufferedOutputStream] = file(f => new BufferedOutputStream(new FileOutputStream(f, append)))
  def fileInputStream: OpenFile[BufferedInputStream] = file(f => new BufferedInputStream(new FileInputStream(f)))
  def urlInputStream: Using[URL, BufferedInputStream] = resource((u: URL) => translate("Error opening " + u + ": ")(new BufferedInputStream(u.openStream)))
  def fileOutputChannel: OpenFile[FileChannel] = file(f => new FileOutputStream(f).getChannel)
  def fileInputChannel: OpenFile[FileChannel] = file(f => new FileInputStream(f).getChannel)
  def fileWriter(charset: Charset = IO.utf8, append: Boolean = false): OpenFile[BufferedWriter] =
    file(f => new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f, append), charset)) )
  def fileReader(charset: Charset): OpenFile[BufferedReader] = file(f => new BufferedReader(new InputStreamReader(new FileInputStream(f), charset)) )
  def urlReader(charset: Charset): Using[URL, BufferedReader] = resource((u: URL) => new BufferedReader(new InputStreamReader(u.openStream, charset)))
  def jarFile(verify: Boolean): OpenFile[JarFile] = file(f => new JarFile(f, verify), (_: JarFile).close())
  def zipFile: OpenFile[ZipFile] = file(f => new ZipFile(f), (_: ZipFile).close())
  def streamReader: Using[(InputStream, Charset), InputStreamReader] = wrap{ (_: (InputStream, Charset)) match { case (in, charset) => new InputStreamReader(in, charset) } }
  def gzipInputStream: Using[InputStream, GZIPInputStream] = wrap((in: InputStream) => new GZIPInputStream(in, 8192) )
  def zipInputStream: Using[InputStream, ZipInputStream] = wrap((in: InputStream) => new ZipInputStream(in))
  def zipOutputStream: Using[OutputStream, ZipOutputStream] = wrap((out: OutputStream) => new ZipOutputStream(out))
  def gzipOutputStream: Using[OutputStream, GZIPOutputStream] = wrap((out: OutputStream) => new GZIPOutputStream(out, 8192), (_: GZIPOutputStream).finish())
  def jarOutputStream: Using[OutputStream, JarOutputStream] = wrap((out: OutputStream) => new JarOutputStream(out))
  def jarInputStream: Using[InputStream, JarInputStream] = wrap((in: InputStream) => new JarInputStream(in))
  def zipEntry(zip: ZipFile): Using[ZipEntry, InputStream] = resource((entry: ZipEntry) =>
    translate("Error opening " + entry.getName + " in " + zip + ": ") { zip.getInputStream(entry) } )
}

Source File: AbstractTableSpec.scala From hail with MIT License

5 votes

package is.hail.expr.ir

import java.io.OutputStreamWriter

import is.hail.utils._
import is.hail.types._
import is.hail.io.fs.FS
import is.hail.rvd._
import org.json4s.jackson.JsonMethods
import org.json4s.{DefaultFormats, Extraction, Formats, JValue, ShortTypeHints}

import scala.language.implicitConversions

object SortOrder {
  def deserialize(b: Byte): SortOrder =
    if (b == 0.toByte) Ascending
    else if (b == 1.toByte) Descending
    else throw new RuntimeException(s"invalid sort order: $b")
}

sealed abstract class SortOrder {
  def serialize: Byte
}

case object Ascending extends SortOrder {
  def serialize: Byte = 0.toByte
}

case object Descending extends SortOrder {
  def serialize: Byte = 1.toByte
}

case class SortField(field: String, sortOrder: SortOrder)

abstract class AbstractTableSpec extends RelationalSpec {
  def references_rel_path: String

  def table_type: TableType

  def rowsComponent: RVDComponentSpec = getComponent[RVDComponentSpec]("rows")

  def rowsSpec: AbstractRVDSpec

  def globalsSpec: AbstractRVDSpec

  def indexed: Boolean = rowsSpec.indexed
}

object TableSpec {
  def apply(fs: FS, path: String, params: TableSpecParameters): TableSpec = {
    val globalsComponent = params.components("globals").asInstanceOf[RVDComponentSpec]
    val globalsSpec = globalsComponent.rvdSpec(fs, path)

    val rowsComponent = params.components("rows").asInstanceOf[RVDComponentSpec]
    val rowsSpec = rowsComponent.rvdSpec(fs, path)

    new TableSpec(params, globalsSpec, rowsSpec)
  }

  def fromJValue(fs: FS, path: String, jv: JValue): TableSpec = {
    implicit val formats: Formats = RelationalSpec.formats
    val params = jv.extract[TableSpecParameters]
    TableSpec(fs, path, params)
  }
}

case class TableSpecParameters(
  file_version: Int,
  hail_version: String,
  references_rel_path: String,
  table_type: TableType,
  components: Map[String, ComponentSpec]) {

  def write(fs: FS, path: String) {
    using(new OutputStreamWriter(fs.create(path + "/metadata.json.gz"))) { out =>
      out.write(JsonMethods.compact(decomposeWithName(this, "TableSpec")(RelationalSpec.formats)))
    }
  }
}

class TableSpec(
  val params: TableSpecParameters,
  val globalsSpec: AbstractRVDSpec,
  val rowsSpec: AbstractRVDSpec) extends AbstractTableSpec {
  def file_version: Int = params.file_version

  def hail_version: String = params.hail_version

  def components: Map[String, ComponentSpec] = params.components

  def references_rel_path: String = params.references_rel_path

  def table_type: TableType = params.table_type

  def toJValue: JValue = {
    decomposeWithName(params, "TableSpec")(RelationalSpec.formats)
  }
}

Source File: package.scala From hail with MIT License

5 votes

package is.hail

import java.io.OutputStreamWriter
import java.nio.charset._

import is.hail.types.virtual.Type
import is.hail.utils._
import is.hail.io.fs.FS

package object io {
  type VCFFieldAttributes = Map[String, String]
  type VCFAttributes = Map[String, VCFFieldAttributes]
  type VCFMetadata = Map[String, VCFAttributes]

  val utfCharset = Charset.forName("UTF-8")

  def exportTypes(filename: String, fs: FS, info: Array[(String, Type)]) {
    val sb = new StringBuilder
    using(new OutputStreamWriter(fs.create(filename))) { out =>
      info.foreachBetween { case (name, t) =>
        sb.append(prettyIdentifier(name))
        sb.append(":")
        t.pretty(sb, 0, compact = true)
      } { sb += ',' }

      out.write(sb.result())
    }
  }
}

Source File: Json4sSerialization.scala From kafka-serialization with Apache License 2.0

5 votes

package com.ovoenergy.kafka.serialization.json4s

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStreamReader, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import com.ovoenergy.kafka.serialization.core._
import org.apache.kafka.common.serialization.{Deserializer => KafkaDeserializer, Serializer => KafkaSerializer}
import org.json4s.Formats
import org.json4s.native.Serialization.{read, write}

import scala.reflect.ClassTag
import scala.reflect.runtime.universe._

trait Json4sSerialization {

  def json4sSerializer[T <: AnyRef](implicit jsonFormats: Formats): KafkaSerializer[T] = serializer { (_, data) =>
    val bout = new ByteArrayOutputStream()
    val writer = new OutputStreamWriter(bout, StandardCharsets.UTF_8)

    // TODO Use scala-arm
    try {
      write(data, writer)
      writer.flush()
    } finally {
      writer.close()
    }
    bout.toByteArray
  }

  def json4sDeserializer[T: TypeTag](implicit jsonFormats: Formats): KafkaDeserializer[T] = deserializer { (_, data) =>
    val tt = implicitly[TypeTag[T]]
    implicit val cl = ClassTag[T](tt.mirror.runtimeClass(tt.tpe))
    read[T](new InputStreamReader(new ByteArrayInputStream(data), StandardCharsets.UTF_8))
  }

}

Source File: SpraySerialization.scala From kafka-serialization with Apache License 2.0

5 votes

package com.ovoenergy.kafka.serialization.spray

import java.io.{ByteArrayOutputStream, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import org.apache.kafka.common.serialization.{Deserializer => KafkaDeserializer, Serializer => KafkaSerializer}
import spray.json._
import com.ovoenergy.kafka.serialization.core._

trait SpraySerialization {

  def spraySerializer[T](implicit format: JsonWriter[T]): KafkaSerializer[T] = serializer { (_, data) =>
    val bout = new ByteArrayOutputStream()
    val osw = new OutputStreamWriter(bout, StandardCharsets.UTF_8)

    // TODO use scala-arm
    try {
      osw.write(data.toJson.compactPrint)
      osw.flush()
    } finally {
      osw.close()
    }
    bout.toByteArray
  }

  def sprayDeserializer[T](implicit format: JsonReader[T]): KafkaDeserializer[T] = deserializer { (_, data) =>
    JsonParser(ParserInput(data)).convertTo[T]
  }

}

Source File: HadoopFSHelpers.scala From morpheus with Apache License 2.0

5 votes

package org.opencypher.morpheus.api.io.fs

import java.io.{BufferedReader, BufferedWriter, InputStreamReader, OutputStreamWriter}

import org.apache.hadoop.fs.{FileSystem, Path}
import org.opencypher.morpheus.api.io.util.FileSystemUtils.using

object HadoopFSHelpers {

  implicit class RichHadoopFileSystem(fileSystem: FileSystem) {

    protected def createDirectoryIfNotExists(path: Path): Unit = {
      if (!fileSystem.exists(path)) {
        fileSystem.mkdirs(path)
      }
    }

    def listDirectories(path: String): List[String] = {
      val p = new Path(path)
      createDirectoryIfNotExists(p)
      fileSystem.listStatus(p)
        .filter(_.isDirectory)
        .map(_.getPath.getName)
        .toList
    }

    def deleteDirectory(path: String): Unit = {
      fileSystem.delete(new Path(path),  true)
    }

    def readFile(path: String): String = {
      using(new BufferedReader(new InputStreamReader(fileSystem.open(new Path(path)), "UTF-8"))) { reader =>
        def readLines = Stream.cons(reader.readLine(), Stream.continually(reader.readLine))
        readLines.takeWhile(_ != null).mkString
      }
    }

    def writeFile(path: String, content: String): Unit = {
      val p = new Path(path)
      val parentDirectory = p.getParent
      createDirectoryIfNotExists(parentDirectory)
      using(fileSystem.create(p)) { outputStream =>
        using(new BufferedWriter(new OutputStreamWriter(outputStream, "UTF-8"))) { bufferedWriter =>
          bufferedWriter.write(content)
        }
      }
    }
  }

}

Source File: GraphLoaderSuite.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.graphx

import java.io.File
import java.io.FileOutputStream
import java.io.OutputStreamWriter
import java.nio.charset.StandardCharsets

import org.apache.spark.SparkFunSuite
import org.apache.spark.util.Utils

class GraphLoaderSuite extends SparkFunSuite with LocalSparkContext {

  test("GraphLoader.edgeListFile") {
    withSpark { sc =>
      val tmpDir = Utils.createTempDir()
      val graphFile = new File(tmpDir.getAbsolutePath, "graph.txt")
      val writer = new OutputStreamWriter(new FileOutputStream(graphFile), StandardCharsets.UTF_8)
      for (i <- (1 until 101)) writer.write(s"$i 0\n")
      writer.close()
      try {
        val graph = GraphLoader.edgeListFile(sc, tmpDir.getAbsolutePath)
        val neighborAttrSums = graph.aggregateMessages[Int](
          ctx => ctx.sendToDst(ctx.srcAttr),
          _ + _)
        assert(neighborAttrSums.collect.toSet === Set((0: VertexId, 100)))
      } finally {
        Utils.deleteRecursively(tmpDir)
      }
    }
  }
}

Source File: StreamMetadata.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.streaming

import java.io.{InputStreamReader, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import scala.util.control.NonFatal

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, FSDataOutputStream, Path}
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization

import org.apache.spark.internal.Logging
import org.apache.spark.sql.streaming.StreamingQuery


  def write(
      metadata: StreamMetadata,
      metadataFile: Path,
      hadoopConf: Configuration): Unit = {
    var output: FSDataOutputStream = null
    try {
      val fs = FileSystem.get(hadoopConf)
      output = fs.create(metadataFile)
      val writer = new OutputStreamWriter(output)
      Serialization.write(metadata, writer)
      writer.close()
    } catch {
      case NonFatal(e) =>
        logError(s"Error writing stream metadata $metadata to $metadataFile", e)
        throw e
    } finally {
      IOUtils.closeQuietly(output)
    }
  }
}

Source File: Main.scala From scalajs-highcharts with MIT License

5 votes

package com.karasiq.highcharts.generator

import java.io.{BufferedWriter, FileOutputStream, OutputStreamWriter, PrintWriter}
import java.nio.file._
import java.nio.file.attribute.BasicFileAttributes

import scala.util.control.Exception
import scalaj.http.{Http, HttpOptions}

import com.karasiq.highcharts.generator.writers.{ScalaClassWriter, ScalaJsClassBuilder}

case class HighchartsApiDoc(library: String) {
  private val defaultPackage = System.getProperty(s"highcharts-generator.$library.package", s"com.$library")

  private def httpGet(url: String): List[ConfigurationObject] = {
    val page = Http.get(url)
      .header("User-Agent", "Mozilla/5.0 (X11; OpenBSD amd64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.81 Safari/537.36")
      .header("Accept", "application/json")
      .options(HttpOptions.connTimeout(10000), HttpOptions.readTimeout(10000))

    val json = page.asString
    ConfigurationObject.fromJson(json)
  }

  private def writeFiles(pkg: String, configs: List[ConfigurationObject], rootObject: Option[String] = None): Unit = {
    val header =
      s"""
          |package $pkg
          |
          |import scalajs.js, js.`|`
          |import com.highcharts.CleanJsObject
          |import com.highcharts.HighchartsUtils._
          |
          |""".stripMargin

    val outputDir = Paths.get(System.getProperty("highcharts-generator.output", "src/main/scala"), pkg.split("\\."):_*)
    Files.createDirectories(outputDir)

    // Remove all files
    Files.walkFileTree(outputDir, new SimpleFileVisitor[Path] {
      override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
        Files.delete(file)
        FileVisitResult.CONTINUE
      }
    })

    val classes = new ScalaJsClassBuilder().parse(configs, rootObject)
    val classWriter = new ScalaClassWriter
    classes.foreach { scalaJsClass ⇒
      val file = outputDir.resolve(scalaJsClass.scalaName + ".scala")
      println(s"Writing $file...")
      val writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file.toFile, true), "UTF-8")))
      Exception.allCatch.andFinally(writer.close()) {
        if (Files.size(file) == 0) {
          writer.print(header)
        }
        classWriter.writeClass(scalaJsClass) { line ⇒
          writer.println(line)
        }
        writer.flush()
      }
    }
  }

  def writeConfigs(): Unit = {
    val configs = httpGet(s"https://api.highcharts.com/$library/dump.json")
    writeFiles(s"$defaultPackage.config", configs, Some(s"${library.capitalize}Config"))
  }

  def writeApis(): Unit = {
    val configs = httpGet(s"https://api.highcharts.com/$library/object/dump.json")
    writeFiles(s"$defaultPackage.api", configs)
  }

  def writeAll(): Unit = {
    // TODO: https://github.com/highcharts/highcharts/issues/7227
    writeConfigs()
    // writeApis() // TODO: 404
  }
}

object Main extends App {
  HighchartsApiDoc("highcharts").writeAll()
  HighchartsApiDoc("highstock").writeAll()
  HighchartsApiDoc("highmaps").writeAll()
}

Source File: InteractiveUploadLogger.scala From coursier with Apache License 2.0

5 votes

package coursier.publish.upload.logger

import java.io.{OutputStream, OutputStreamWriter, Writer}

import com.lightbend.emoji.ShortCodes.Defaults.defaultImplicit.emoji
import coursier.publish.fileset.FileSet
import coursier.publish.logging.ProgressLogger
import coursier.publish.upload.Upload

// FIXME Would have been better if dummy was passed by the Upload instance when calling the methods of UploadLogger
final class InteractiveUploadLogger(out: Writer, dummy: Boolean, isLocal: Boolean) extends UploadLogger {

  private val underlying = new ProgressLogger[Object](
    if (isLocal) {
      if (dummy)
        "Would have written"
      else
        "Wrote"
    } else {
      if (dummy)
        "Would have uploaded"
      else
        "Uploaded"
    },
    "files",
    out,
    doneEmoji = emoji("truck").map(_.toString())
  )

  override def uploadingSet(id: Object, fileSet: FileSet): Unit =
    underlying.processingSet(id, Some(fileSet.elements.length))
  override def uploadedSet(id: Object, fileSet: FileSet): Unit =
    underlying.processedSet(id)

  override def uploading(url: String, idOpt: Option[Object], totalOpt: Option[Long]): Unit =
    for (id <- idOpt)
      underlying.processing(url, id)

  override def progress(url: String, idOpt: Option[Object], uploaded: Long, total: Long): Unit =
    for (id <- idOpt)
      underlying.progress(url, id, uploaded, total)
  override def uploaded(url: String, idOpt: Option[Object], errorOpt: Option[Upload.Error]): Unit =
    for (id <- idOpt)
      underlying.processed(url, id, errorOpt.nonEmpty)

  override def start(): Unit =
    underlying.start()
  override def stop(keep: Boolean): Unit =
    underlying.stop(keep)
}

object InteractiveUploadLogger {
  def create(out: OutputStream, dummy: Boolean, isLocal: Boolean): UploadLogger =
    new InteractiveUploadLogger(new OutputStreamWriter(out), dummy, isLocal)
}

Source File: FileDownloader.scala From seahorse with Apache License 2.0

5 votes

package ai.deepsense.deeplang.doperations.readwritedataframe.filestorage

import java.io.{BufferedWriter, FileOutputStream, IOException, OutputStreamWriter}
import java.nio.file.{Files, Paths}
import java.util.UUID

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

import ai.deepsense.deeplang.ExecutionContext
import ai.deepsense.deeplang.doperations.exceptions.DeepSenseIOException
import ai.deepsense.deeplang.doperations.readwritedataframe.FilePath

private[filestorage] object FileDownloader {

  def downloadFile(url: String)(implicit context: ExecutionContext): FilePath = {
    if (context.tempPath.startsWith("hdfs://")) {
      downloadFileToHdfs(url)
    } else {
      downloadFileToDriver(url)
    }
  }

  private def downloadFileToHdfs(url: String)(implicit context: ExecutionContext) = {
    val content = scala.io.Source.fromURL(url).getLines()
    val hdfsPath = s"${context.tempPath}/${UUID.randomUUID()}"

    val configuration = new Configuration()
    val hdfs = FileSystem.get(configuration)
    val file = new Path(hdfsPath)
    val hdfsStream = hdfs.create(file)
    val writer = new BufferedWriter(new OutputStreamWriter(hdfsStream))
    try {
      content.foreach {s =>
        writer.write(s)
        writer.newLine()
      }
    } finally {
      safeClose(writer)
      hdfs.close()
    }

    FilePath(hdfsPath)
  }

  private def downloadFileToDriver(url: String)
                                  (implicit context: ExecutionContext) = {
    val outputDirPath = Paths.get(context.tempPath)
    // We're checking if the output is a directory following symlinks.
    // The default behaviour of createDirectories is NOT to follow symlinks
    if (!Files.isDirectory(outputDirPath)) {
      Files.createDirectories(outputDirPath)
    }

    val outFilePath = Files.createTempFile(outputDirPath, "download", ".csv")
    // content is a stream. Do not invoke stuff like .toList() on it.
    val content = scala.io.Source.fromURL(url).getLines()
    val writer: BufferedWriter =
      new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilePath.toFile)))
    try {
      content.foreach {s =>
        writer.write(s)
        writer.newLine()
      }
    } finally {
      safeClose(writer)
    }
    FilePath(s"file:///$outFilePath")
  }

  private def safeClose(bufferedWriter: BufferedWriter): Unit = {
    try {
      bufferedWriter.flush()
      bufferedWriter.close()
    } catch {
      case e: IOException => throw new DeepSenseIOException(e)
    }
  }

}

Source File: JsonFileReporter.scala From kyuubi with Apache License 2.0

5 votes

package yaooqinn.kyuubi.metrics

import java.io.{BufferedWriter, Closeable, IOException, OutputStreamWriter}
import java.util.{Timer, TimerTask}
import java.util.concurrent.TimeUnit

import scala.util.Try
import scala.util.control.NonFatal

import com.codahale.metrics.MetricRegistry
import com.codahale.metrics.json.MetricsModule
import com.fasterxml.jackson.databind.ObjectMapper
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.fs.permission.FsPermission
import org.apache.kyuubi.Logging
import org.apache.spark.{KyuubiSparkUtil, SparkConf}
import org.apache.spark.KyuubiConf._

private[metrics] class JsonFileReporter(conf: SparkConf, registry: MetricRegistry)
  extends Closeable with Logging {

  private val jsonMapper = new ObjectMapper().registerModule(
    new MetricsModule(TimeUnit.MILLISECONDS, TimeUnit.MILLISECONDS, false))
  private val timer = new Timer(true)
  private val interval = KyuubiSparkUtil.timeStringAsMs(conf.get(METRICS_REPORT_INTERVAL))
  private val path = conf.get(METRICS_REPORT_LOCATION)
  private val hadoopConf = KyuubiSparkUtil.newConfiguration(conf)

  def start(): Unit = {
    timer.schedule(new TimerTask {
      var bw: BufferedWriter = _
      override def run(): Unit = try {
        val json = jsonMapper.writerWithDefaultPrettyPrinter().writeValueAsString(registry)
        val tmpPath = new Path(path + ".tmp")
        val tmpPathUri = tmpPath.toUri
        val fs = if (tmpPathUri.getScheme == null && tmpPathUri.getAuthority == null) {
          FileSystem.getLocal(hadoopConf)
        } else {
          FileSystem.get(tmpPathUri, hadoopConf)
        }
        fs.delete(tmpPath, true)
        bw = new BufferedWriter(new OutputStreamWriter(fs.create(tmpPath, true)))
        bw.write(json)
        bw.close()
        fs.setPermission(tmpPath, FsPermission.createImmutable(Integer.parseInt("644", 8).toShort))
        val finalPath = new Path(path)
        fs.rename(tmpPath, finalPath)
        fs.setPermission(finalPath,
          FsPermission.createImmutable(Integer.parseInt("644", 8).toShort))
      } catch {
        case NonFatal(e) => error("Error writing metrics to json file" + path, e)
      } finally {
        if (bw != null) {
          Try(bw.close())
        }
      }
    }, 0, interval)
  }

  override def close(): Unit = {
    timer.cancel()
  }
}

Source File: GraphLoaderSuite.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.graphx

import java.io.File
import java.io.FileOutputStream
import java.io.OutputStreamWriter
import java.nio.charset.StandardCharsets

import org.apache.spark.SparkFunSuite
import org.apache.spark.util.Utils

class GraphLoaderSuite extends SparkFunSuite with LocalSparkContext {

  test("GraphLoader.edgeListFile") {
    withSpark { sc =>
      val tmpDir = Utils.createTempDir()
      val graphFile = new File(tmpDir.getAbsolutePath, "graph.txt")
      val writer = new OutputStreamWriter(new FileOutputStream(graphFile), StandardCharsets.UTF_8)
      for (i <- (1 until 101)) writer.write(s"$i 0\n")
      writer.close()
      try {
        val graph = GraphLoader.edgeListFile(sc, tmpDir.getAbsolutePath)
        val neighborAttrSums = graph.aggregateMessages[Int](
          ctx => ctx.sendToDst(ctx.srcAttr),
          _ + _)
        assert(neighborAttrSums.collect.toSet === Set((0: VertexId, 100)))
      } finally {
        Utils.deleteRecursively(tmpDir)
      }
    }
  }
}

Source File: CodecStreams.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources

import java.io.{InputStream, OutputStream, OutputStreamWriter}
import java.nio.charset.{Charset, StandardCharsets}

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.compress._
import org.apache.hadoop.mapreduce.JobContext
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.hadoop.util.ReflectionUtils

import org.apache.spark.TaskContext

object CodecStreams {
  private def getDecompressionCodec(config: Configuration, file: Path): Option[CompressionCodec] = {
    val compressionCodecs = new CompressionCodecFactory(config)
    Option(compressionCodecs.getCodec(file))
  }

  def createInputStream(config: Configuration, file: Path): InputStream = {
    val fs = file.getFileSystem(config)
    val inputStream: InputStream = fs.open(file)

    getDecompressionCodec(config, file)
      .map(codec => codec.createInputStream(inputStream))
      .getOrElse(inputStream)
  }

  
  def getCompressionExtension(context: JobContext): String = {
    getCompressionCodec(context)
      .map(_.getDefaultExtension)
      .getOrElse("")
  }
}

Source File: StreamMetadata.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.streaming

import java.io.{InputStreamReader, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import scala.util.control.NonFatal

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, FSDataOutputStream, Path}
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization

import org.apache.spark.internal.Logging
import org.apache.spark.sql.streaming.StreamingQuery


  def write(
      metadata: StreamMetadata,
      metadataFile: Path,
      hadoopConf: Configuration): Unit = {
    var output: FSDataOutputStream = null
    try {
      val fs = metadataFile.getFileSystem(hadoopConf)
      output = fs.create(metadataFile)
      val writer = new OutputStreamWriter(output)
      Serialization.write(metadata, writer)
      writer.close()
    } catch {
      case NonFatal(e) =>
        logError(s"Error writing stream metadata $metadata to $metadataFile", e)
        throw e
    } finally {
      IOUtils.closeQuietly(output)
    }
  }
}

Source File: SparkILoop.scala From BigDatalog with Apache License 2.0

5 votes

package org.apache.spark.repl

import java.io.{BufferedReader, FileReader}

import Predef.{println => _, _}
import scala.util.Properties.{jdkHome, javaVersion, versionString, javaVmName}

import scala.tools.nsc.interpreter.{JPrintWriter, ILoop}
import scala.tools.nsc.Settings
import scala.tools.nsc.util.stringFromStream


  def run(code: String, sets: Settings = new Settings): String = {
    import java.io.{ BufferedReader, StringReader, OutputStreamWriter }

    stringFromStream { ostream =>
      Console.withOut(ostream) {
        val input = new BufferedReader(new StringReader(code))
        val output = new JPrintWriter(new OutputStreamWriter(ostream), true)
        val repl = new SparkILoop(input, output)

        if (sets.classpath.isDefault)
          sets.classpath.value = sys.props("java.class.path")

        repl process sets
      }
    }
  }
  def run(lines: List[String]): String = run(lines.map(_ + "\n").mkString)
}

Source File: PileupApp.scala From bdg-sequila with Apache License 2.0

5 votes

package org.biodatageeks.sequila.apps

import java.io.{OutputStreamWriter, PrintWriter}

import org.apache.spark.sql.{SequilaSession, SparkSession}
import org.bdgenomics.utils.instrumentation.{Metrics, MetricsListener, RecordedMetrics}
import org.biodatageeks.sequila.utils.{InternalParams, SequilaRegister}

object PileupApp extends App{
  override def main(args: Array[String]): Unit = {

    System.setProperty("spark.kryo.registrator", "org.biodatageeks.sequila.pileup.serializers.CustomKryoRegistrator")
    val spark = SparkSession
      .builder()
      .master("local[1]")
      .config("spark.driver.memory","4g")
      .config( "spark.serializer", "org.apache.spark.serializer.KryoSerializer" )
      .enableHiveSupport()
      .getOrCreate()

    val ss = SequilaSession(spark)
    SequilaRegister.register(ss)
    spark.sparkContext.setLogLevel("INFO")

    val bamPath = "/Users/aga/NA12878.chr20.md.bam"
    val referencePath = "/Users/aga/Homo_sapiens_assembly18_chr20.fasta"

    //    val bamPath = "/Users/marek/data/NA12878.chrom20.ILLUMINA.bwa.CEU.low_coverage.20121211.md.bam"
    //    val referencePath = "/Users/marek/data/hs37d5.fa"

    val tableNameBAM = "reads"

    ss.sql(s"""DROP  TABLE IF  EXISTS $tableNameBAM""")
    ss.sql(s"""
              |CREATE TABLE $tableNameBAM
              |USING org.biodatageeks.sequila.datasources.BAM.BAMDataSource
              |OPTIONS(path "$bamPath")
              |
      """.stripMargin)

    val query =
      s"""
         |SELECT count(*)
         |FROM  pileup('$tableNameBAM', 'NA12878', '${referencePath}')
       """.stripMargin
    ss
      .sqlContext
      .setConf(InternalParams.EnableInstrumentation, "true")
    Metrics.initialize(ss.sparkContext)
    val metricsListener = new MetricsListener(new RecordedMetrics())
    ss
      .sparkContext
      .addSparkListener(metricsListener)
    val results = ss.sql(query)
    ss.time{
      results.show()
    }
    val writer = new PrintWriter(new OutputStreamWriter(System.out, "UTF-8"))
    Metrics.print(writer, Some(metricsListener.metrics.sparkMetrics.stageTimes))
    writer.close()
    ss.stop()
  }
}

Source File: JoinOrderTestSuite.scala From bdg-sequila with Apache License 2.0

5 votes

package org.biodatageeks.sequila.tests.rangejoins

import java.io.{OutputStreamWriter, PrintWriter}

import com.holdenkarau.spark.testing.{DataFrameSuiteBase, SharedSparkContext}
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{
  IntegerType,
  StringType,
  StructField,
  StructType
}
import org.bdgenomics.utils.instrumentation.{
  Metrics,
  MetricsListener,
  RecordedMetrics
}
import org.biodatageeks.sequila.rangejoins.IntervalTree.IntervalTreeJoinStrategyOptim
import org.scalatest.{BeforeAndAfter, FunSuite}

class JoinOrderTestSuite
    extends FunSuite
    with DataFrameSuiteBase
    with BeforeAndAfter
    with SharedSparkContext {

  val schema = StructType(
    Seq(StructField("chr", StringType),
        StructField("start", IntegerType),
        StructField("end", IntegerType)))
  val metricsListener = new MetricsListener(new RecordedMetrics())
  val writer = new PrintWriter(new OutputStreamWriter(System.out))
  before {
    System.setSecurityManager(null)
    spark.experimental.extraStrategies = new IntervalTreeJoinStrategyOptim(
      spark) :: Nil
    Metrics.initialize(sc)
    val rdd1 = sc
      .textFile(getClass.getResource("/refFlat.txt.bz2").getPath)
      .map(r => r.split('\t'))
      .map(
        r =>
          Row(
            r(2).toString,
            r(4).toInt,
            r(5).toInt
        ))
    val ref = spark.createDataFrame(rdd1, schema)
    ref.createOrReplaceTempView("ref")

    val rdd2 = sc
      .textFile(getClass.getResource("/snp150Flagged.txt.bz2").getPath)
      .map(r => r.split('\t'))
      .map(
        r =>
          Row(
            r(1).toString,
            r(2).toInt,
            r(3).toInt
        ))
    val snp = spark
      .createDataFrame(rdd2, schema)
    snp.createOrReplaceTempView("snp")
  }

  test("Join order - broadcasting snp table") {
    spark.sqlContext.setConf("spark.biodatageeks.rangejoin.useJoinOrder",
                             "true")
    val query =
      s"""
         |SELECT snp.*,ref.* FROM ref JOIN snp
         |ON (ref.chr=snp.chr AND snp.end>=ref.start AND snp.start<=ref.end)
       """.stripMargin

    assert(spark.sql(query).count === 616404L)

  }

  test("Join order - broadcasting ref table") {
    spark.sqlContext.setConf("spark.biodatageeks.rangejoin.useJoinOrder",
                             "true")
    val query =
      s"""
         |SELECT snp.*,ref.* FROM snp JOIN ref
         |ON (ref.chr=snp.chr AND snp.end>=ref.start AND snp.start<=ref.end)
       """.stripMargin
    assert(spark.sql(query).count === 616404L)

  }
  after {
    Metrics.print(writer, Some(metricsListener.metrics.sparkMetrics.stageTimes))
    writer.flush()
    Metrics.stopRecording()
  }
}

Source File: ScalafmtSbtReporter.scala From sbt-scalafmt with Apache License 2.0

5 votes

package org.scalafmt.sbt

import java.io.PrintWriter
import java.io.OutputStreamWriter
import java.nio.file.Path

import org.scalafmt.interfaces.ScalafmtReporter
import sbt.internal.util.MessageOnlyException
import sbt.util.Logger

import scala.util.control.NoStackTrace

class ScalafmtSbtReporter(log: Logger, out: OutputStreamWriter)
    extends ScalafmtReporter {
  override def error(file: Path, message: String): Unit = {
    throw new MessageOnlyException(s"$message: $file")
  }

  override def error(file: Path, e: Throwable): Unit = {
    if (e.getMessage != null) {
      error(file, e.getMessage)
    } else {
      throw new FailedToFormat(file.toString, e)
    }
  }

  override def error(file: Path, message: String, e: Throwable): Unit = {
    if (e.getMessage != null) {
      error(file, s"$message: ${e.getMessage()}")
    } else {
      throw new FailedToFormat(file.toString, e)
    }
  }

  override def excluded(file: Path): Unit =
    log.debug(s"file excluded: $file")

  override def parsedConfig(config: Path, scalafmtVersion: String): Unit =
    log.debug(s"parsed config (v$scalafmtVersion): $config")

  override def downloadWriter(): PrintWriter = new PrintWriter(out)
  override def downloadOutputStreamWriter(): OutputStreamWriter = out

  private class FailedToFormat(filename: String, cause: Throwable)
      extends Exception(filename, cause)
      with NoStackTrace
}

Source File: SchrodingerExceptionTest.scala From aloha with MIT License

5 votes

package com.eharmony.aloha.ex

import org.junit.{Before, Test}
import org.junit.Assert._
import java.io.{PrintWriter, OutputStreamWriter, ByteArrayOutputStream, PrintStream}

class SchrodingerExceptionTest {

    
    private[this] var ex: SchrodingerException = _

    @Before def before() {
        ex = new SchrodingerException
    }

    @Test def testFillInStackTrace() {
        assertTrue(new SchrodingerException().fillInStackTrace().isInstanceOf[SchrodingerException])
    }

    @Test(expected = classOf[SchrodingerException]) def testGetMessage() {
        ex.getMessage()
    }

    @Test(expected = classOf[SchrodingerException]) def testGetStackTrace() {
        ex.getStackTrace()
    }

    @Test(expected = classOf[SchrodingerException]) def testGetCause() {
        ex.getCause()
    }

    @Test(expected = classOf[SchrodingerException]) def testSetStackTrace() {
        ex.setStackTrace(Array.empty)
    }

    @Test(expected = classOf[SchrodingerException]) def testGetLocalizedMessage() {
        ex.getLocalizedMessage()
    }

    @Test(expected = classOf[SchrodingerException]) def testPrintStackTraceEmpty() {
        ex.printStackTrace()
    }

    @Test(expected = classOf[SchrodingerException]) def testPrintStackTraceStream() {
        val baos = new ByteArrayOutputStream()
        val ps = new PrintStream(baos)
        ex.printStackTrace(ps)
    }

    @Test(expected = classOf[SchrodingerException]) def testPrintStackTraceWriter() {
        val baos = new ByteArrayOutputStream()
        val osw = new OutputStreamWriter(baos)
        val ps = new PrintWriter(osw)
        ex.printStackTrace(ps)
    }

    @Test(expected = classOf[SchrodingerException]) def testInitCause() {
        ex.initCause(new Throwable)
    }

    @Test(expected = classOf[SchrodingerException]) def testToString() {
        ex.toString()
    }

    @Test def testNoThrowForSchrodingerExceptionWithSchrodingerExceptionCause() {
        new SchrodingerException(new SchrodingerException)
    }

    @Test def testNoThrowForSchrodingerExceptionWithExceptionCause() {
        new SchrodingerException(new Exception)
    }

    @Test(expected = classOf[SchrodingerException]) def testThrowForThrowableWithSchrodingerExceptionCause() {
        new Throwable(ex)
    }

    @Test(expected = classOf[SchrodingerException]) def testThrowForExceptionWithSchrodingerExceptionCause() {
        new Exception(ex)
    }

    @Test(expected = classOf[SchrodingerException]) def testThrowForRuntimeExceptionWithSchrodingerExceptionCause() {
        new RuntimeException(ex)
    }
}

Source File: Devel.scala From libisabelle with Apache License 2.0

5 votes

package info.hupel.isabelle.setup

import java.io.OutputStreamWriter
import java.nio.file.{Files, Path}

import org.eclipse.jgit.api._
import org.eclipse.jgit.lib.TextProgressMonitor
import org.eclipse.jgit.storage.file._

import org.log4s._

trait Devel {
  def init(path: Path): Unit
  def update(path: Path): Unit
}

case class GitDevel(url: String, branch: String) extends Devel {

  private val logger = getLogger

  private val monitor = new TextProgressMonitor(new OutputStreamWriter(Console.err))

  def init(path: Path): Unit = {
    logger.debug(s"Cloning $branch from $url into $path")
    Files.createDirectories(path)
    new CloneCommand()
      .setDirectory(path.toFile)
      .setURI(url)
      .setBranch(branch)
      .setProgressMonitor(monitor)
      .call()
    ()
  }
  def update(path: Path): Unit = {
    logger.debug(s"Fetching $branch from $url into $path")
    val repo = new FileRepositoryBuilder()
      .findGitDir(path.toFile)
      .setup()
      .build()
    new Git(repo).pull()
      .setRemoteBranchName(branch)
      .call()
    ()
  }
}

object Devel {

  val knownDevels: Map[String, Devel] = Map(
    "isabelle-mirror" -> GitDevel("https://github.com/isabelle-prover/mirror-isabelle.git", "master")
  )

}

Source File: FileDownloader.scala From seahorse-workflow-executor with Apache License 2.0

5 votes

package io.deepsense.deeplang.doperations.readwritedataframe.filestorage

import java.io.{BufferedWriter, FileOutputStream, IOException, OutputStreamWriter}
import java.nio.file.{Files, Paths}
import java.util.UUID

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

import io.deepsense.deeplang.ExecutionContext
import io.deepsense.deeplang.doperations.exceptions.DeepSenseIOException
import io.deepsense.deeplang.doperations.readwritedataframe.FilePath

private[filestorage] object FileDownloader {

  def downloadFile(url: String)(implicit context: ExecutionContext): FilePath = {
    if (context.tempPath.startsWith("hdfs://")) {
      downloadFileToHdfs(url)
    } else {
      downloadFileToDriver(url)
    }
  }

  private def downloadFileToHdfs(url: String)(implicit context: ExecutionContext) = {
    val content = scala.io.Source.fromURL(url).getLines()
    val hdfsPath = s"${context.tempPath}/${UUID.randomUUID()}"

    val configuration = new Configuration()
    val hdfs = FileSystem.get(configuration)
    val file = new Path(hdfsPath)
    val hdfsStream = hdfs.create(file)
    val writer = new BufferedWriter(new OutputStreamWriter(hdfsStream))
    try {
      content.foreach {s =>
        writer.write(s)
        writer.newLine()
      }
    } finally {
      safeClose(writer)
      hdfs.close()
    }

    FilePath(hdfsPath)
  }

  private def downloadFileToDriver(url: String)
                                  (implicit context: ExecutionContext) = {
    val outputDirPath = Paths.get(context.tempPath)
    // We're checking if the output is a directory following symlinks.
    // The default behaviour of createDirectories is NOT to follow symlinks
    if (!Files.isDirectory(outputDirPath)) {
      Files.createDirectories(outputDirPath)
    }

    val outFilePath = Files.createTempFile(outputDirPath, "download", ".csv")
    // content is a stream. Do not invoke stuff like .toList() on it.
    val content = scala.io.Source.fromURL(url).getLines()
    val writer: BufferedWriter =
      new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilePath.toFile)))
    try {
      content.foreach {s =>
        writer.write(s)
        writer.newLine()
      }
    } finally {
      safeClose(writer)
    }
    FilePath(s"file:///$outFilePath")
  }

  private def safeClose(bufferedWriter: BufferedWriter): Unit = {
    try {
      bufferedWriter.flush()
      bufferedWriter.close()
    } catch {
      case e: IOException => throw new DeepSenseIOException(e)
    }
  }

}

Source File: TestCompileApplicationInstance.scala From milan with Apache License 2.0

5 votes

package com.amazon.milan.tools

import java.io.{OutputStream, OutputStreamWriter}
import java.nio.ByteBuffer
import java.nio.charset.StandardCharsets
import java.nio.file.Files

import com.amazon.milan.application.{Application, ApplicationConfiguration, ApplicationInstance}
import com.amazon.milan.lang._
import com.amazon.milan.testing.applications._
import com.amazon.milan.{Id, SemanticVersion}
import org.junit.Assert._
import org.junit.Test


object TestCompileApplicationInstance {

  case class Record(recordId: String, i: Int)

  class Provider extends ApplicationInstanceProvider {
    override def getApplicationInstance(params: List[(String, String)]): ApplicationInstance = {
      val input = Stream.of[Record]
      val graph = new StreamGraph(input)
      val config = new ApplicationConfiguration
      config.setListSource(input, Record("1", 1))

      val instanceId = params.find(_._1 == "instanceId").get._2
      val appId = params.find(_._1 == "appId").get._2

      new ApplicationInstance(
        instanceId,
        new Application(appId, graph, SemanticVersion.ZERO),
        config)
    }
  }

  class Compiler extends ApplicationInstanceCompiler {
    override def compile(applicationInstance: ApplicationInstance,
                         params: List[(String, String)],
                         output: OutputStream): Unit = {
      val writer = new OutputStreamWriter(output)
      val testParam = params.find(_._1 == "test").get._2
      writer.write(testParam)
      writer.write(applicationInstance.toJsonString)
      writer.close()
    }
  }

}

@Test
class TestCompileApplicationInstance {
  @Test
  def test_CompileApplicationInstance_Main_SendsProviderAndCompilerParameters(): Unit = {

    val tempFile = Files.createTempFile("TestCompileApplicationInstance", ".scala")
    Files.deleteIfExists(tempFile)

    val appId = Id.newId()
    val instanceId = Id.newId()
    val testValue = Id.newId()

    try {
      val args = Array(
        "--provider",
        "com.amazon.milan.tools.TestCompileApplicationInstance.Provider",
        "--compiler",
        "com.amazon.milan.tools.TestCompileApplicationInstance.Compiler",
        "--package",
        "generated",
        "--output",
        tempFile.toString,
        s"-PinstanceId=$instanceId",
        s"-PappId=$appId",
        s"-Ctest=$testValue"
      )
      CompileApplicationInstance.main(args)

      val fileContents = StandardCharsets.UTF_8.decode(ByteBuffer.wrap(Files.readAllBytes(tempFile))).toString
      assertTrue(fileContents.contains(appId))
      assertTrue(fileContents.contains(instanceId))
      assertTrue(fileContents.contains(testValue))
    }
    finally {
      Files.deleteIfExists(tempFile)
    }
  }
}

Source File: BasicTestPerformance4Samba.scala From ohara with Apache License 2.0

5 votes

package oharastream.ohara.it.performance

import java.io.{BufferedWriter, OutputStreamWriter}
import java.util.concurrent.atomic.LongAdder

import oharastream.ohara.client.filesystem.FileSystem
import oharastream.ohara.common.data.Row
import oharastream.ohara.common.util.{CommonUtils, Releasable}
import org.junit.AssumptionViolatedException
import spray.json.{JsNumber, JsString, JsValue}

import scala.concurrent.duration.Duration
import scala.jdk.CollectionConverters._

abstract class BasicTestPerformance4Samba extends BasicTestPerformance {
  private[this] val sambaHostname: String = sys.env.getOrElse(
    PerformanceTestingUtils.SAMBA_HOSTNAME_KEY,
    throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_HOSTNAME_KEY} does not exists!!!")
  )

  private[this] val sambaUsername: String = sys.env.getOrElse(
    PerformanceTestingUtils.SAMBA_USER_KEY,
    throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_USER_KEY} does not exists!!!")
  )

  private[this] val sambaPassword: String = sys.env.getOrElse(
    PerformanceTestingUtils.SAMBA_PASSWORD_KEY,
    throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_PASSWORD_KEY} does not exists!!!")
  )

  private[this] val sambaPort: Int = sys.env
    .getOrElse(
      PerformanceTestingUtils.SAMBA_PORT_KEY,
      throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_PORT_KEY} does not exists!!!")
    )
    .toInt

  private[this] val sambaShare: String = sys.env.getOrElse(
    PerformanceTestingUtils.SAMBA_SHARE_KEY,
    throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_SHARE_KEY} does not exists!!!")
  )

  private[this] val csvInputFolderKey       = PerformanceTestingUtils.CSV_INPUT_KEY
  private[this] val csvOutputFolder: String = value(csvInputFolderKey).getOrElse("input")

  private[this] val NEED_DELETE_DATA_KEY: String = PerformanceTestingUtils.DATA_CLEANUP_KEY
  protected[this] val needDeleteData: Boolean    = sys.env.getOrElse(NEED_DELETE_DATA_KEY, "true").toBoolean

  protected val sambaSettings: Map[String, JsValue] = Map(
    oharastream.ohara.connector.smb.SMB_HOSTNAME_KEY   -> JsString(sambaHostname),
    oharastream.ohara.connector.smb.SMB_PORT_KEY       -> JsNumber(sambaPort),
    oharastream.ohara.connector.smb.SMB_USER_KEY       -> JsString(sambaUsername),
    oharastream.ohara.connector.smb.SMB_PASSWORD_KEY   -> JsString(sambaPassword),
    oharastream.ohara.connector.smb.SMB_SHARE_NAME_KEY -> JsString(sambaShare)
  )

  protected def setupInputData(timeout: Duration): (String, Long, Long) = {
    val client = sambaClient()
    try {
      if (!client.exists(csvOutputFolder)) PerformanceTestingUtils.createFolder(client, csvOutputFolder)

      val result = generateData(
        numberOfRowsToFlush,
        timeout,
        (rows: Seq[Row]) => {
          val file        = s"$csvOutputFolder/${CommonUtils.randomString()}"
          val writer      = new BufferedWriter(new OutputStreamWriter(client.create(file)))
          val count       = new LongAdder()
          val sizeInBytes = new LongAdder()

          try {
            val cellNames: Set[String] = rows.head.cells().asScala.map(_.name).toSet
            writer
              .append(cellNames.mkString(","))
              .append("\n")
            rows.foreach(row => {
              val content = row.cells().asScala.map(_.value).mkString(",")
              count.increment()
              sizeInBytes.add(content.length)
              writer
                .append(content)
                .append("\n")
            })
            (count.longValue(), sizeInBytes.longValue())
          } finally Releasable.close(writer)
        }
      )
      (csvOutputFolder, result._1, result._2)
    } finally Releasable.close(client)
  }

  protected[this] def sambaClient(): FileSystem =
    FileSystem.smbBuilder
      .hostname(sambaHostname)
      .port(sambaPort)
      .user(sambaUsername)
      .password(sambaPassword)
      .shareName(sambaShare)
      .build()
}

Source File: TestHdfsFileSystem.scala From ohara with Apache License 2.0

5 votes

package oharastream.ohara.client.filesystem.hdfs

import java.io.{BufferedWriter, File, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import oharastream.ohara.client.filesystem.{FileFilter, FileSystem, FileSystemTestBase}
import oharastream.ohara.common.exception.FileSystemException
import oharastream.ohara.common.util.CommonUtils
import org.junit.Test
import org.scalatest.matchers.should.Matchers._

class TestHdfsFileSystem extends FileSystemTestBase {
  private[this] val tempFolder: File = CommonUtils.createTempFolder("local_hdfs")

  private[this] val hdfsURL: String = new File(tempFolder.getAbsolutePath).toURI.toString

  override protected val fileSystem: FileSystem = FileSystem.hdfsBuilder.url(hdfsURL).build

  override protected val rootDir: String = tempFolder.toString

  // override this method because the Local HDFS doesn't support append()
  @Test
  override def testAppend(): Unit = {
    val file = randomFile()
    fileSystem.create(file).close()

    intercept[FileSystemException] {
      fileSystem.append(file)
    }.getMessage shouldBe "Not supported"
  }

  // override this method because the Local HDFS doesn't support append()
  @Test
  override def testDeleteFileThatHaveBeenRead(): Unit = {
    val file              = randomFile(rootDir)
    val data: Seq[String] = Seq("123", "456")
    val writer            = new BufferedWriter(new OutputStreamWriter(fileSystem.create(file), StandardCharsets.UTF_8))
    try data.foreach(line => {
      writer.append(line)
      writer.newLine()
    })
    finally writer.close()

    fileSystem.exists(file) shouldBe true
    fileSystem.readLines(file) shouldBe data
    fileSystem.delete(file)
    fileSystem.exists(file) shouldBe false
    fileSystem.listFileNames(rootDir, FileFilter.EMPTY).size shouldBe 0
  }
}

Source File: ProxyServer.scala From devbox with Apache License 2.0

5 votes

package cmdproxy

import java.io.BufferedReader
import java.io.InputStreamReader
import java.io.OutputStreamWriter
import java.io.PrintWriter
import java.net.InetAddress
import java.net.ServerSocket
import java.net.Socket

import scala.util.Using

import devbox.logger.FileLogger
import os.RelPath
import ujson.ParseException
import upickle.default.{macroRW, ReadWriter}

case class Request(workingDir: String, cmd: Seq[String])
object Request {
  implicit val rw: ReadWriter[Request] = macroRW
}


  val localDir: Map[os.RelPath, os.Path] = dirMapping.map(_.swap).toMap

  def start(): Unit = {
    logger.info(s"Starting command proxy server, listening at ${socket.getInetAddress}:${socket.getLocalPort}")
    (new Thread("Git Proxy Thread") {
      override def run(): Unit = {
        while (!socket.isClosed) {
          Using(socket.accept()) { handleConnection } recover {
            case e: Exception =>
              logger.error(s"Error handling request ${e.getMessage}")
            case e: java.net.SocketException if e.getMessage == "Socket closed" =>
              logger.error(s"Git proxy socket closed")
          }
        }
      }
    }).start()

  }

  def handleConnection(conn: Socket): Unit = try {
    logger.info(s"Accepting connection from ${conn.getInetAddress}")
    val in = new BufferedReader(new InputStreamReader(conn.getInputStream, ProxyServer.CHARSET_NAME))
    val out = new PrintWriter(new OutputStreamWriter(conn.getOutputStream, ProxyServer.CHARSET_NAME))

    upickle.default.read[Request](in.readLine()) match {
      case Request(dir, args) =>
        val workingDir = localDir
          .collect{case (remote, local) if RelPath(dir).startsWith(remote) =>
            local / RelPath(dir).relativeTo(remote)
          }
          .head

        // being cautious here and only execute "git" commands
        if (args.headOption.exists((_ == "git"))) {
          logger.info(s"Executing `${args.mkString(" ")}` in $workingDir")

          val proc = os.proc(args).call(
            workingDir,
            mergeErrIntoOut = true,
            stdout = os.ProcessOutput.Readlines(str =>
              out.println(upickle.default.write(Left[String, Int](str)))
            ),
            check = false,
            timeout = 10000
          )

          out.println(upickle.default.write(Right[String, Int](proc.exitCode)))
        } else {
          val msg = s"Not executing non-git commend: `${args.mkString(" ")}`."
          logger.info(msg)
          out.println(upickle.default.write(Right[String, Int](1)))
        }

        out.flush()
    }
  } catch {
    case e: ParseException => logger.error(s"Error parsing incoming json request: ${e.getMessage}")
  }
}

object ProxyServer {
  val DEFAULT_PORT = 20280
  val CHARSET_NAME = "UTF-8"
}

Source File: GraphLoaderSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.graphx

import java.io.File
import java.io.FileOutputStream
import java.io.OutputStreamWriter
import java.nio.charset.StandardCharsets

import org.apache.spark.SparkFunSuite
import org.apache.spark.util.Utils

class GraphLoaderSuite extends SparkFunSuite with LocalSparkContext {

  test("GraphLoader.edgeListFile") {
    withSpark { sc =>
      val tmpDir = Utils.createTempDir()
      val graphFile = new File(tmpDir.getAbsolutePath, "graph.txt")
      val writer = new OutputStreamWriter(new FileOutputStream(graphFile), StandardCharsets.UTF_8)
      for (i <- (1 until 101)) writer.write(s"$i 0\n")
      writer.close()
      try {
        val graph = GraphLoader.edgeListFile(sc, tmpDir.getAbsolutePath)
        val neighborAttrSums = graph.aggregateMessages[Int](
          ctx => ctx.sendToDst(ctx.srcAttr),
          _ + _)
        assert(neighborAttrSums.collect.toSet === Set((0: VertexId, 100)))
      } finally {
        Utils.deleteRecursively(tmpDir)
      }
    }
  }
}

Source File: SparkILoop.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.repl

import java.io.BufferedReader

import scala.Predef.{println => _, _}
import scala.tools.nsc.Settings
import scala.tools.nsc.interpreter.{ILoop, JPrintWriter}
import scala.tools.nsc.util.stringFromStream
import scala.util.Properties.{javaVersion, javaVmName, versionString}


  def run(code: String, sets: Settings = new Settings): String = {
    import java.io.{ BufferedReader, StringReader, OutputStreamWriter }

    stringFromStream { ostream =>
      Console.withOut(ostream) {
        val input = new BufferedReader(new StringReader(code))
        val output = new JPrintWriter(new OutputStreamWriter(ostream), true)
        val repl = new SparkILoop(input, output)

        if (sets.classpath.isDefault) {
          sets.classpath.value = sys.props("java.class.path")
        }
        repl process sets
      }
    }
  }
  def run(lines: List[String]): String = run(lines.map(_ + "\n").mkString)
}

Source File: PLYReadWriteTests.scala From scalismo-faces with Apache License 2.0

5 votes

package scalismo.faces.io

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, OutputStreamWriter}
import java.nio.ByteOrder
import java.util.Scanner

import scalismo.faces.FacesTestSuite
import scalismo.faces.io.ply._

class PLYReadWriteTests extends FacesTestSuite {

  describe("Write-read cycles to string, big- and little endian") {

    def testRWEndianCycle[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A], bo: ByteOrder): Unit = {
      val N = toWrite.size
      val os = new ByteArrayOutputStream()
      val writer = new SequenceWriter[A]
      writer.write(toWrite, os, bo)

      val ba = os.toByteArray

      val is = new ByteArrayInputStream(ba)
      val reader = new FixedLengthSequenceReader[A]
      val read = reader.read(N, is, bo)

      read.zip(toWrite).foreach { p =>
        p._1 shouldBe p._2
      }
    }

    def testRWStringCycle[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A]): Unit = {
      val N = toWrite.size
      val os = new ByteArrayOutputStream()
      val osw = new OutputStreamWriter(os)
      val writer = new SequenceWriter[A]
      writer.write(toWrite, osw)
      osw.flush()

      val is = new ByteArrayInputStream(os.toByteArray)
      val isr = new Scanner(is)
      val reader = new FixedLengthSequenceReader[A]
      val read = reader.read(N, isr)

      read.zip(toWrite).foreach { p =>
        p._1 shouldBe p._2
      }
    }

    def testAllThreeCycles[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A]): Unit = {
      testRWStringCycle(toWrite)
      testRWEndianCycle(toWrite, ByteOrder.BIG_ENDIAN)
      testRWEndianCycle(toWrite, ByteOrder.LITTLE_ENDIAN)
    }

    it("should result in the same sequence of bytes") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toByte
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of char") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toChar
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of short") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toShort
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of int") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toInt
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of long") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toLong
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of float") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toFloat
      testAllThreeCycles(toWrite)
    }
    it("should result in the same sequence of double") {
      val toWrite = for (i <- 0 until 20) yield (randomDouble * 255)
      testAllThreeCycles(toWrite)
    }

  }

}

Source File: CsvParserFactory.scala From spark-cdm with MIT License

5 votes

package com.microsoft.cdm.utils

import java.io.OutputStreamWriter

import com.univocity.parsers.csv.{CsvParser, CsvParserSettings, CsvWriter, CsvWriterSettings}


object CsvParserFactory {

  def build(): CsvParser = {
    val settings = new CsvParserSettings()
    val format = settings.getFormat
    format.setDelimiter(',')
    settings.setMaxCharsPerColumn(500000)
    settings.setMaxColumns(512 * 4)
    new CsvParser(settings)
  }

  def buildWriter(outputWriter: OutputStreamWriter): CsvWriter = {
    val settings = new CsvWriterSettings()
    settings.setQuoteAllFields(true);
    new CsvWriter(outputWriter, settings)
  }

}

Source File: Pathway.scala From piflow with BSD 2-Clause "Simplified" License

5 votes

package cn.piflow.bundle.microorganism

import java.io.{BufferedReader, InputStreamReader, OutputStreamWriter}

import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext}
import cn.piflow.conf.{ConfigurableStop, Port, StopGroup}
import cn.piflow.conf.bean.PropertyDescriptor
import cn.piflow.conf.util.{ImageUtil, MapUtil}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FSDataInputStream, FSDataOutputStream, FileSystem, Path}
import org.apache.spark.sql.{DataFrame, SparkSession}
import org.json.JSONObject


class Pathway extends ConfigurableStop{
  override val authorEmail: String = "[email protected]"
  override val description: String = "Parse Pathway data"
  override val inportList: List[String] =List(Port.DefaultPort.toString)
  override val outportList: List[String] = List(Port.DefaultPort.toString)


  var cachePath:String = _
  def setProperties(map: Map[String, Any]): Unit = {
    cachePath=MapUtil.get(map,key="cachePath").asInstanceOf[String]
  }

  override def getPropertyDescriptor(): List[PropertyDescriptor] = {
    var descriptor : List[PropertyDescriptor] = List()
    val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").description("Temporary Cache File Path")
      .defaultValue("/pathway").required(true)
    descriptor = cachePath :: descriptor
    descriptor
  }

  override def getIcon(): Array[Byte] = {
    ImageUtil.getImage("icon/microorganism/Pathway.png")
  }

  override def getGroup(): List[String] = {
    List(StopGroup.MicroorganismGroup)
  }

  override def initialize(ctx: ProcessContext): Unit = {

  }

  override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = {
    val inDf: DataFrame = in.read()
    var pathStr: String =inDf.take(1)(0).get(0).asInstanceOf[String]

    val configuration: Configuration = new Configuration()
    val pathARR: Array[String] = pathStr.split("\\/")
    var hdfsUrl:String=""
    for (x <- (0 until 3)){
      hdfsUrl+=(pathARR(x) +"/")
    }
    configuration.set("fs.defaultFS",hdfsUrl)
    var fs: FileSystem = FileSystem.get(configuration)


    val hdfsPathTemporary = hdfsUrl+cachePath+"/pathwayCache/pathwayCache.json"
    val path: Path = new Path(hdfsPathTemporary)
    if(fs.exists(path)){
      fs.delete(path)
    }
    fs.create(path).close()
    val hdfsWriter: OutputStreamWriter = new OutputStreamWriter(fs.append(path))

    var fdis: FSDataInputStream = null
    var br: BufferedReader = null
    var doc: JSONObject = null
    var hasAnotherSequence:Boolean = true

    inDf.collect().foreach(row => {
      pathStr = row.get(0).asInstanceOf[String]

      fdis = fs.open(new Path(pathStr))
      br = new BufferedReader(new InputStreamReader(fdis))
      var count = 0
      while (hasAnotherSequence) {
          count += 1
          doc = new JSONObject
          hasAnotherSequence = util.KeggPathway.process(br, doc)

          doc.write(hdfsWriter)
          hdfsWriter.write("\n")
        }
      br.close()
      fdis.close()
    })
    hdfsWriter.close()

    val df: DataFrame = pec.get[SparkSession]().read.json(hdfsPathTemporary)
    df.schema.printTreeString()
    println(df.count)

    out.write(df)

  }
}

Source File: MNIST.scala From spark-tsne with Apache License 2.0

5 votes

package com.github.saurfang.spark.tsne.examples


import java.io.{BufferedWriter, OutputStreamWriter}

import com.github.saurfang.spark.tsne.impl._
import com.github.saurfang.spark.tsne.tree.SPTree
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.linalg.distributed.RowMatrix
import org.apache.spark.{SparkConf, SparkContext}
import org.slf4j.LoggerFactory

object MNIST {
  private def logger = LoggerFactory.getLogger(MNIST.getClass)

  def main (args: Array[String]) {
    val conf = new SparkConf()
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .registerKryoClasses(Array(classOf[SPTree]))
    val sc = new SparkContext(conf)
    val hadoopConf = sc.hadoopConfiguration
    val fs = FileSystem.get(hadoopConf)

    val dataset = sc.textFile("data/MNIST/mnist.csv.gz")
      .zipWithIndex()
      .filter(_._2 < 6000)
      .sortBy(_._2, true, 60)
      .map(_._1)
      .map(_.split(","))
      .map(x => (x.head.toInt, x.tail.map(_.toDouble)))
      .cache()
    //logInfo(dataset.collect.map(_._2.toList).toList.toString)

    //val features = dataset.map(x => Vectors.dense(x._2))
    //val scaler = new StandardScaler(true, true).fit(features)
    //val scaledData = scaler.transform(features)
    //  .map(v => Vectors.dense(v.toArray.map(x => if(x.isNaN || x.isInfinite) 0.0 else x)))
    //  .cache()
    val data = dataset.flatMap(_._2)
    val mean = data.mean()
    val std = data.stdev()
    val scaledData = dataset.map(x => Vectors.dense(x._2.map(v => (v - mean) / std))).cache()

    val labels = dataset.map(_._1).collect()
    val matrix = new RowMatrix(scaledData)
    val pcaMatrix = matrix.multiply(matrix.computePrincipalComponents(50))
    pcaMatrix.rows.cache()

    val costWriter = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(s".tmp/MNIST/cost.txt"), true)))

    //SimpleTSNE.tsne(pcaMatrix, perplexity = 20, maxIterations = 200)
    BHTSNE.tsne(pcaMatrix, maxIterations = 500, callback = {
    //LBFGSTSNE.tsne(pcaMatrix, perplexity = 10, maxNumIterations = 500, numCorrections = 10, convergenceTol = 1e-8)
      case (i, y, loss) =>
        if(loss.isDefined) logger.info(s"$i iteration finished with loss $loss")

        val os = fs.create(new Path(s".tmp/MNIST/result${"%05d".format(i)}.csv"), true)
        val writer = new BufferedWriter(new OutputStreamWriter(os))
        try {
          (0 until y.rows).foreach {
            row =>
              writer.write(labels(row).toString)
              writer.write(y(row, ::).inner.toArray.mkString(",", ",", "\n"))
          }
          if(loss.isDefined) costWriter.write(loss.get + "\n")
        } finally {
          writer.close()
        }
    })
    costWriter.close()

    sc.stop()
  }
}

Source File: CodecStreams.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources

import java.io.{InputStream, OutputStream, OutputStreamWriter}
import java.nio.charset.{Charset, StandardCharsets}

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.compress._
import org.apache.hadoop.mapreduce.JobContext
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.hadoop.util.ReflectionUtils

import org.apache.spark.TaskContext

object CodecStreams {
  private def getDecompressionCodec(config: Configuration, file: Path): Option[CompressionCodec] = {
    val compressionCodecs = new CompressionCodecFactory(config)
    Option(compressionCodecs.getCodec(file))
  }

  def createInputStream(config: Configuration, file: Path): InputStream = {
    val fs = file.getFileSystem(config)
    val inputStream: InputStream = fs.open(file)

    getDecompressionCodec(config, file)
      .map(codec => codec.createInputStream(inputStream))
      .getOrElse(inputStream)
  }

  
  def getCompressionExtension(context: JobContext): String = {
    getCompressionCodec(context)
      .map(_.getDefaultExtension)
      .getOrElse("")
  }
}

Source File: StreamMetadata.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.streaming

import java.io.{InputStreamReader, OutputStreamWriter}
import java.nio.charset.StandardCharsets
import java.util.ConcurrentModificationException

import scala.util.control.NonFatal

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileAlreadyExistsException, FSDataInputStream, Path}
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization

import org.apache.spark.internal.Logging
import org.apache.spark.sql.execution.streaming.CheckpointFileManager.CancellableFSDataOutputStream
import org.apache.spark.sql.streaming.StreamingQuery


  def write(
      metadata: StreamMetadata,
      metadataFile: Path,
      hadoopConf: Configuration): Unit = {
    var output: CancellableFSDataOutputStream = null
    try {
      val fileManager = CheckpointFileManager.create(metadataFile.getParent, hadoopConf)
      output = fileManager.createAtomic(metadataFile, overwriteIfPossible = false)
      val writer = new OutputStreamWriter(output)
      Serialization.write(metadata, writer)
      writer.close()
    } catch {
      case e: FileAlreadyExistsException =>
        if (output != null) {
          output.cancel()
        }
        throw new ConcurrentModificationException(
          s"Multiple streaming queries are concurrently using $metadataFile", e)
      case e: Throwable =>
        if (output != null) {
          output.cancel()
        }
        logError(s"Error writing stream metadata $metadata to $metadataFile", e)
        throw e
    }
  }
}

Source File: InteractiveChecksumLogger.scala From coursier with Apache License 2.0

5 votes

package coursier.publish.checksum.logger

import java.io.{OutputStream, OutputStreamWriter, Writer}

import coursier.publish.checksum.ChecksumType
import coursier.publish.fileset.FileSet
import coursier.publish.logging.ProgressLogger

final class InteractiveChecksumLogger(out: Writer, verbosity: Int) extends ChecksumLogger {

  private val underlying = new ProgressLogger[Object](
    "Computed",
    "checksums",
    out
  )

  override def computingSet(id: Object, fs: FileSet): Unit =
    underlying.processingSet(id, Some(fs.elements.length))
  override def computing(id: Object, type0: ChecksumType, path: String): Unit = {
    if (verbosity >= 2)
      out.write(s"Computing ${type0.name} checksum of ${path.repr}\n")
    underlying.processing(path, id)
  }
  override def computed(id: Object, type0: ChecksumType, path: String, errorOpt: Option[Throwable]): Unit = {
    if (verbosity >= 2)
      out.write(s"Computed ${type0.name} checksum of ${path.repr}\n")
    underlying.processed(path, id, errorOpt.nonEmpty)
  }
  override def computedSet(id: Object, fs: FileSet): Unit =
    underlying.processedSet(id)

  override def start(): Unit =
    underlying.start()
  override def stop(keep: Boolean): Unit =
    underlying.stop(keep)
}

object InteractiveChecksumLogger {
  def create(out: OutputStream, verbosity: Int): InteractiveChecksumLogger =
    new InteractiveChecksumLogger(new OutputStreamWriter(out), verbosity)
}

Source File: MessageWriter.scala From lsp4s with Apache License 2.0

5 votes

package scala.meta.jsonrpc

import java.io.ByteArrayOutputStream
import java.io.OutputStream
import java.io.OutputStreamWriter
import java.io.PrintWriter
import java.nio.ByteBuffer
import java.nio.charset.StandardCharsets
import scala.concurrent.Future
import io.circe.syntax._
import monix.execution.Ack
import monix.reactive.Observer
import scribe.LoggerSupport


  def write(msg: Message): Future[Ack] = lock.synchronized {
    baos.reset()
    val json = msg.asJson
    val protocol = BaseProtocolMessage.fromJson(json)
    logger.trace(s" --> $json")
    val byteBuffer = MessageWriter.write(protocol, baos, headerOut)
    out.onNext(byteBuffer)
  }
}

object MessageWriter {

  def headerWriter(out: OutputStream): PrintWriter = {
    new PrintWriter(new OutputStreamWriter(out, StandardCharsets.US_ASCII))
  }

  def write(message: BaseProtocolMessage): ByteBuffer = {
    val out = new ByteArrayOutputStream()
    val header = headerWriter(out)
    write(message, out, header)
  }

  def write(
      message: BaseProtocolMessage,
      out: ByteArrayOutputStream,
      headerOut: PrintWriter
  ): ByteBuffer = {
    message.header.foreach {
      case (key, value) =>
        headerOut.write(key)
        headerOut.write(": ")
        headerOut.write(value)
        headerOut.write("\r\n")
    }
    headerOut.write("\r\n")
    out.write(message.content)
    out.flush()
    val buffer = ByteBuffer.wrap(out.toByteArray, 0, out.size())
    buffer
  }
}

Source File: package.scala From sjson-new with Apache License 2.0

5 votes

package sjsonnew
package support.scalajson

import java.io.{ByteArrayOutputStream, OutputStreamWriter}

import shaded.scalajson.ast.unsafe._
import org.scalactic._

package object unsafe {
  implicit class AnyOps[A: JsonWriter](val _x: A) {
    def toJson: JValue    = Converter toJsonUnsafe _x
    def toJsonStr: String = _x.toJson.toJsonStr
  }
  implicit class AnyOps2[A: JsonWriter : JsonReader](val _x: A) {
    def jsonRoundTrip: A = _x.toJson.toJsonStr.toJson.fromJson[A]
    def jsonPrettyRoundTrip: A = _x.toJson.toPrettyStr.toJson.fromJson[A]
    def jsonBinaryRoundTrip: A = _x.toJson.toBinary.toJson.fromJson[A]
  }

  implicit class JValueOps(val _j: JValue) extends AnyVal {
    def toJsonStr: String          = CompactPrinter(_j)
    def toPrettyStr: String        = PrettyPrinter(_j)
    def toBinary: Array[Byte]      = {
      val baos = new ByteArrayOutputStream()
      val xpto = new OutputStreamWriter(baos)
      CompactPrinter.print(_j, xpto)
      xpto.close()
      baos.toByteArray
    }
    def fromJson[A: JsonReader]: A = Converter.fromJsonUnsafe[A](_j)

    // scalajson.ast.unsafe doesn't have good toStrings
    def to_s: String = _j match {
      case JNull          => "JNull"
      case JString(value) => s"JString($value)"
      case JNumber(value) => s"JNumber($value)"
      case JTrue          => "JTrue"
      case JFalse         => "JFalse"
      case JObject(value) => value.iterator map (f => s"${f.field}: ${f.value.to_s}") mkString ("JObject(", ", ", ")")
      case JArray(value)  => value.iterator map (_.to_s) mkString ("JArray(", ", ", ")")
    }
  }

  implicit class StringOps(val _s: String) extends AnyVal {
    def toJson: JValue                = Parser parseUnsafe _s
    def fromJsonStr[A: JsonReader]: A = _s.toJson.fromJson[A]
  }

  implicit class ByteArrayOps(val a: Array[Byte]) extends AnyVal {
    def toJson: JValue                = Parser.parseFromByteBuffer(java.nio.ByteBuffer.wrap(a)).get
  }

  // Can't trust unsafe's toString, eg. JObject doesn't nicely toString its fields array, so its toString sucks
  implicit val altPrettifier: Prettifier = Prettifier {
    case j: JValue => j.to_s
    case x         => Prettifier default x
  }
}

Source File: GraphLoaderSuite.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.graphx

import java.io.File
import java.io.FileOutputStream
import java.io.OutputStreamWriter
import java.nio.charset.StandardCharsets

import org.apache.spark.SparkFunSuite
import org.apache.spark.util.Utils

class GraphLoaderSuite extends SparkFunSuite with LocalSparkContext {

  test("GraphLoader.edgeListFile") {
    withSpark { sc =>
      val tmpDir = Utils.createTempDir()
      val graphFile = new File(tmpDir.getAbsolutePath, "graph.txt")
      val writer = new OutputStreamWriter(new FileOutputStream(graphFile), StandardCharsets.UTF_8)
      for (i <- (1 until 101)) writer.write(s"$i 0\n")
      writer.close()
      try {
        val graph = GraphLoader.edgeListFile(sc, tmpDir.getAbsolutePath)
        val neighborAttrSums = graph.aggregateMessages[Int](
          ctx => ctx.sendToDst(ctx.srcAttr),
          _ + _)
        assert(neighborAttrSums.collect.toSet === Set((0: VertexId, 100)))
      } finally {
        Utils.deleteRecursively(tmpDir)
      }
    }
  }
}

Source File: StreamMetadata.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.streaming

import java.io.{InputStreamReader, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import scala.util.control.NonFatal

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, FSDataOutputStream, Path}
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization

import org.apache.spark.internal.Logging
import org.apache.spark.sql.streaming.StreamingQuery


  def write(
      metadata: StreamMetadata,
      metadataFile: Path,
      hadoopConf: Configuration): Unit = {
    var output: FSDataOutputStream = null
    try {
      val fs = FileSystem.get(hadoopConf)
      output = fs.create(metadataFile)
      val writer = new OutputStreamWriter(output)
      Serialization.write(metadata, writer)
      writer.close()
    } catch {
      case NonFatal(e) =>
        logError(s"Error writing stream metadata $metadata to $metadataFile", e)
        throw e
    } finally {
      IOUtils.closeQuietly(output)
    }
  }
}

Source File: TestDiskFull.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.eidos.utils

import java.io.BufferedOutputStream
import java.io.FileOutputStream
import java.io.IOException
import java.io.OutputStreamWriter
import java.io.PrintWriter
import java.io.SyncFailedException
import java.nio.charset.StandardCharsets

import org.clulab.wm.eidos.test.TestUtils._
import org.clulab.wm.eidos.utils.Closer.AutoCloser

class TestDiskFull extends Test {

  def test1 = {
    val file = "/E:/full.dat"
    var i = 0

    try {
      val text1 = "The quick brown fox jumped over the lazy dog."
      val text = text1 + text1

      for (limit <- 1 until 400) {
        val fos = new FileOutputStream(file)
        val osw = new OutputStreamWriter(new BufferedOutputStream(fos), StandardCharsets.UTF_8.toString)
        i = 0

        new PrintWriter(osw).autoClose { pw =>
          while (i < limit) {
            pw.print(text)
            i += 1
            //          pw.flush()
            //          osw.flush()
            //          fos.flush()
            fos.getFD.sync()
          }
        }
      }
    }
    catch {
      case exception: SyncFailedException =>
        println(s"Synchronization failed for file $file at $i")
        println("Exiting with code -2 on assumption that the disk is full")
        System.exit(-2)
      case exception: IOException =>
        println(s"IO failed for file $file at $i")
        println("Exiting with code -2 on assumption that the disk is full")
        System.exit(-2)
      case exception: Exception =>
        println(s"Exception for file $file at $i")
        exception.printStackTrace()
      case throwable: Throwable =>
        println(s"Throwable for file $file at $i")
        throwable.printStackTrace()
    }
  }

//  test1
}

Source File: SessionDataFileHDFSWriter.scala From spark_training with Apache License 2.0

5 votes

package com.malaska.spark.training.streaming.dstream.sessionization

import java.io.BufferedWriter
import java.io.FileWriter
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.conf.Configuration
import java.io.OutputStreamWriter
import org.apache.hadoop.fs.Path
import java.util.Random

object SessionDataFileHDFSWriter {
  
  val eol = System.getProperty("line.separator");  
  
  def main(args: Array[String]) {
    if (args.length == 0) {
        println("SessionDataFileWriter {tempDir} {distDir} {numberOfFiles} {numberOfEventsPerFile} {waitBetweenFiles}");
        return;
    }
    val conf = new Configuration
    conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"))
    conf.addResource(new Path("/etc/hadoop/conf/mapred-site.xml"))
    conf.addResource(new Path("/etc/hadoop/conf/hdfs-site.xml"))
    
    val fs = FileSystem.get(new Configuration)
    val rootTempDir = args(0)
    val rootDistDir = args(1)
    val files = args(2).toInt
    val loops = args(3).toInt
    val waitBetweenFiles = args(4).toInt
    val r = new Random
    for (f <- 1 to files) {
      val rootName = "/weblog." + System.currentTimeMillis()
      val tmpPath = new Path(rootTempDir + rootName + ".tmp")
      val writer = new BufferedWriter(new OutputStreamWriter(fs.create(tmpPath)))
      
      print(f + ": [")
      
      val randomLoops = loops + r.nextInt(loops)
      
      for (i <- 1 to randomLoops) {
        writer.write(SessionDataGenerator.getNextEvent + eol)
        if (i%100 == 0) {
          print(".")
        }
      }
      println("]")
      writer.close
      
      val distPath = new Path(rootDistDir + rootName + ".dat")
      
      fs.rename(tmpPath, distPath)
      Thread.sleep(waitBetweenFiles)
    }
    println("Done")
  }
}

Source File: MustacheTemplates.scala From fintrospect with Apache License 2.0

5 votes

package io.fintrospect.templating

import java.io.{ByteArrayOutputStream, File, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import com.github.mustachejava.resolver.{DefaultResolver, FileSystemResolver}
import com.github.mustachejava.{DefaultMustacheFactory, Mustache}
import com.twitter.io.Buf

object MustacheTemplates extends Templates {

  private def render(view: View, mustache: Mustache): Buf = {
    val outputStream = new ByteArrayOutputStream(4096)
    val writer = new OutputStreamWriter(outputStream, StandardCharsets.UTF_8)
    try {
      mustache.execute(writer, view)
    } finally {
      writer.close()
    }

    Buf.ByteArray.Owned(outputStream.toByteArray)
  }

  def CachingClasspath(baseClasspathPackage: String = "."): TemplateRenderer = new TemplateRenderer {

    private val factory = new DefaultMustacheFactory(new DefaultResolver(baseClasspathPackage)) {
      setObjectHandler(new ScalaObjectHandler)
    }

    def toBuf(view: View): Buf = render(view, factory.compile(view.template + ".mustache"))
  }

  def Caching(baseTemplateDir: String): TemplateRenderer = new TemplateRenderer {

    private val factory = new DefaultMustacheFactory(new FileSystemResolver(new File(baseTemplateDir))) {
      setObjectHandler(new ScalaObjectHandler)
    }

    def toBuf(view: View): Buf = render(view, factory.compile(view.template + ".mustache"))
  }

  def HotReload(baseTemplateDir: String = "."): TemplateRenderer = new TemplateRenderer {

    class WipeableMustacheFactory extends DefaultMustacheFactory(new FileSystemResolver(new File(baseTemplateDir))) {
      setObjectHandler(new ScalaObjectHandler)
    }

    def toBuf(view: View): Buf = render(view, new WipeableMustacheFactory().compile(view.template + ".mustache"))
  }
}

Source File: CommonLog.scala From AppCrawler with Apache License 2.0

5 votes

package com.testerhome.appcrawler

import java.io.OutputStreamWriter

import com.fasterxml.jackson.annotation.JsonIgnore
import org.apache.log4j._



trait CommonLog {
  BasicConfigurator.configure()
  Logger.getRootLogger.setLevel(Level.INFO)
  @JsonIgnore
  val layout=new PatternLayout("%d{yyyy-MM-dd HH:mm:ss} %p [%c{1}.%M.%L] %m%n")
  @JsonIgnore
  lazy val log = initLog()

  def initLog(): Logger ={
    val log = Logger.getLogger(this.getClass.getName)
    //val log=Logger.getRootLogger
    if(log.getAppender("console")==null){
      val console=new ConsoleAppender()
      console.setName("console")
      console.setWriter(new OutputStreamWriter(System.out))
      console.setLayout(layout)
      log.addAppender(console)
    }else{
      log.info("alread exist")
    }
    log.trace(s"set ${this} log level to ${GA.logLevel}")
    log.setLevel(GA.logLevel)
    log.setAdditivity(false)
    log
  }
}

Source File: StarsAnalysisDemo.scala From CkoocNLP with Apache License 2.0

5 votes

package applications.analysis

import java.io.{BufferedWriter, FileOutputStream, OutputStreamWriter}

import functions.segment.Segmenter
import org.apache.log4j.{Level, Logger}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Row, SparkSession}


object StarsAnalysisDemo {
  def main(args: Array[String]) {
    Logger.getLogger("org").setLevel(Level.WARN)

    val spark = SparkSession
      .builder
      .master("local[2]")
      .appName("Stars Analysis Demo")
      .getOrCreate()

    val filePath = "E:/data/chinaNews/entertainment.txt"


    // 加载数据，并保留年份和内容字段，并对内容字段进行过滤
    import spark.implicits._
    val data = spark.sparkContext.textFile(filePath).flatMap { line =>
      val tokens: Array[String] = line.split("\u00ef")
      if (tokens.length > 3) {
        var year: String = tokens(2).split("-")(0)
        if (tokens(2).contains("年")) year = tokens(2).split("年")(0)

        var content = tokens(3)
        if (content.length > 22 && content.substring(0, 20).contains("日电")) {
          content = content.substring(content.indexOf("日电") + 2, content.length).trim
        }

        if (content.startsWith("(")) content = content.substring(content.indexOf(")") + 1, content.length)
        if (content.length > 20 && content.substring(content.length - 20, content.length).contains("记者")) {
          content = content.substring(0, content.lastIndexOf("记者")).trim
        }

        Some(year, content)
      } else None
    }.toDF("year", "content")

    // 分词，去除长度为1的词，每个词保留词性
    val segmenter = new Segmenter()
      .isAddNature(true)
      .isDelEn(true)
      .isDelNum(true)
      .setMinTermLen(2)
      .setMinTermNum(5)
      .setSegType("StandardSegment")
      .setInputCol("content")
      .setOutputCol("segmented")
    val segDF: DataFrame = segmenter.transform(data)
    segDF.cache()

    val segRDD: RDD[(Int, Seq[String])] = segDF.select("year", "segmented").rdd.map { case Row(year: String, terms: Seq[String]) =>
      (Integer.parseInt(year), terms)
    }

    val result: Array[String] = segRDD.map(line => line._1.toString + "\u00ef" + line._2.mkString(",")).collect()
    val writer: BufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("E:/entertainment_seg.txt")))
    result.foreach(line => writer.write(line + "\n"))
    writer.close()

    // 统计2016出现在新闻中最多的明星
    val stars2016 = segRDD.filter(_._1 == 2016)
      .flatMap { case (year: Int, termStr: Seq[String]) =>
        val person = termStr
          .map(term => (term.split("/")(0), term.split("/")(1)))
          .filter(_._2.equalsIgnoreCase("nr"))
          .map(term => (term._1, 1L))

        person
      }
      .reduceByKey(_ + _)
      .sortBy(_._2, ascending = false)

    segDF.unpersist()

    stars2016.take(100).foreach(println)

    spark.stop()
  }
}

Source File: AkkaHttpPrometheusExporter.scala From cloudstate with Apache License 2.0

5 votes

package io.cloudstate.proxy

import java.io.OutputStreamWriter
import java.util

import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import io.prometheus.client.CollectorRegistry
import akka.http.scaladsl.model._
import akka.http.scaladsl.server.Directives._
import akka.stream.Materializer
import akka.util.ByteString
import io.prometheus.client.exporter.common.TextFormat

import scala.concurrent.Future


class AkkaHttpPrometheusExporter(metricsPort: Int, registry: CollectorRegistry = CollectorRegistry.defaultRegistry)(
    implicit system: ActorSystem,
    mat: Materializer
) {

  private[this] final val PrometheusContentType = ContentType.parse(TextFormat.CONTENT_TYPE_004).right.get

  private def routes = get {
    (path("metrics") | pathSingleSlash) {
      encodeResponse {
        parameter(Symbol("name[]").*) { names =>
          complete {
            val namesSet = new util.HashSet[String]()
            names.foreach(namesSet.add)
            val builder = ByteString.newBuilder
            val writer = new OutputStreamWriter(builder.asOutputStream)
            TextFormat.write004(writer, registry.filteredMetricFamilySamples(namesSet))
            // Very important to flush the writer before we build the byte string!
            writer.flush()
            HttpEntity(PrometheusContentType, builder.result())
          }
        }
      }
    }
  }

  def start(): Future[Http.ServerBinding] =
    Http().bindAndHandle(routes, "0.0.0.0", metricsPort)
}

Source File: InteractiveSonatypeLogger.scala From coursier with Apache License 2.0

5 votes

package coursier.publish.sonatype.logger

import java.io.{OutputStream, OutputStreamWriter}

import coursier.cache.internal.Terminal.Ansi

final class InteractiveSonatypeLogger(out: OutputStreamWriter, verbosity: Int) extends SonatypeLogger {
  override def listingProfiles(attempt: Int, total: Int): Unit =
    if (verbosity >= 0) {
      val extra =
        if (attempt == 0) ""
        else s" (attempt $attempt / $total)"
      out.write("Listing Sonatype profiles..." + extra)
      out.flush()
    }
  override def listedProfiles(errorOpt: Option[Throwable]): Unit = {
    if (verbosity >= 0) {
      out.clearLine(2)
      out.write('\n')
      out.up(1)
      out.flush()
    }

    val msgOpt =
      if (errorOpt.isEmpty) {
        if (verbosity >= 1)
          Some("Listed Sonatype profiles")
        else
          None
      } else
        Some("Fail to list Sonatype profiles")

    for (msg <- msgOpt) {
      out.write(s"$msg\n")
      out.flush()
    }
  }
}

object InteractiveSonatypeLogger {
  def create(out: OutputStream, verbosity: Int): SonatypeLogger =
    new InteractiveSonatypeLogger(new OutputStreamWriter(out), verbosity)
}

Source File: FileSystem.scala From ohara with Apache License 2.0

4 votes

package oharastream.ohara.client.filesystem

import java.io.{BufferedReader, BufferedWriter, IOException, InputStreamReader, OutputStreamWriter}
import java.nio.charset.{Charset, StandardCharsets}

import oharastream.ohara.client.filesystem.ftp.FtpFileSystem
import oharastream.ohara.client.filesystem.hdfs.HdfsFileSystem
import oharastream.ohara.client.filesystem.smb.SmbFileSystem
import oharastream.ohara.common.exception.FileSystemException

trait FileSystem extends oharastream.ohara.kafka.connector.storage.FileSystem {
  
  def readLines(path: String, encode: String = "UTF-8"): Array[String] = {
    val reader = new BufferedReader(new InputStreamReader(open(path), Charset.forName(encode)))
    try Iterator.continually(reader.readLine()).takeWhile(_ != null).toArray
    finally reader.close()
  }

  def wrap[T](f: () => T): T =
    try {
      f()
    } catch {
      case e: IOException           => throw new FileSystemException(e.getMessage, e)
      case e: IllegalStateException => throw new FileSystemException(e.getMessage, e)
    }
}

object FileSystem {
  def hdfsBuilder: HdfsFileSystem.Builder = HdfsFileSystem.builder
  def ftpBuilder: FtpFileSystem.Builder   = FtpFileSystem.builder
  def smbBuilder: SmbFileSystem.Builder   = SmbFileSystem.builder
}

java.io.OutputStreamWriter Scala Examples