java.io.OutputStreamWriter Scala Examples
The following examples show how to use java.io.OutputStreamWriter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: BasicTestPerformance4Ftp.scala From ohara with Apache License 2.0 | 6 votes |
package oharastream.ohara.it.performance import java.io.{BufferedWriter, OutputStreamWriter} import java.util.concurrent.atomic.LongAdder import oharastream.ohara.common.data.Row import oharastream.ohara.common.util.{CommonUtils, Releasable} import org.junit.AssumptionViolatedException import spray.json.{JsNumber, JsString, JsValue} import scala.jdk.CollectionConverters._ import oharastream.ohara.client.filesystem.FileSystem import scala.concurrent.duration.Duration abstract class BasicTestPerformance4Ftp extends BasicTestPerformance { private[this] val ftpHostname = value(PerformanceTestingUtils.FTP_HOSTNAME_KEY) .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_HOSTNAME_KEY} is required")) private[this] val ftpPort = value(PerformanceTestingUtils.FTP_PORT_KEY) .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_PORT_KEY} is required")) .toInt private[this] val ftpUser = value(PerformanceTestingUtils.FTP_USER_KEY) .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_USER_KEY} is required")) private[this] val ftpPassword = value(PerformanceTestingUtils.FTP_PASSWORD_KEY) .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_PASSWORD_KEY} is required")) protected val ftpSettings: Map[String, JsValue] = Map( // convert the hostname to IP address oharastream.ohara.connector.ftp.FTP_HOSTNAME_KEY -> JsString(ftpHostname), oharastream.ohara.connector.ftp.FTP_PORT_KEY -> JsNumber(ftpPort), oharastream.ohara.connector.ftp.FTP_USER_NAME_KEY -> JsString(ftpUser), oharastream.ohara.connector.ftp.FTP_PASSWORD_KEY -> JsString(ftpPassword) ) private[this] val csvInputFolderKey = PerformanceTestingUtils.CSV_INPUT_KEY private[this] val csvOutputFolder: String = value(csvInputFolderKey).getOrElse("/input") private[this] val cleanupTestDataKey = PerformanceTestingUtils.DATA_CLEANUP_KEY protected val cleanupTestData: Boolean = value(cleanupTestDataKey).forall(_.toBoolean) protected def setupInputData(timeout: Duration): (String, Long, Long) = { val client = ftpClient() try { if (!PerformanceTestingUtils.exists(client, csvOutputFolder)) PerformanceTestingUtils.createFolder(client, csvOutputFolder) val result = generateData( numberOfRowsToFlush, timeout, (rows: Seq[Row]) => { val file = s"$csvOutputFolder/${CommonUtils.randomString()}" val writer = new BufferedWriter(new OutputStreamWriter(client.create(file))) val count = new LongAdder() val sizeInBytes = new LongAdder() try { val cellNames: Set[String] = rows.head.cells().asScala.map(_.name).toSet writer .append(cellNames.mkString(",")) .append("\n") rows.foreach(row => { val content = row.cells().asScala.map(_.value).mkString(",") count.increment() sizeInBytes.add(content.length) writer.append(content).append("\n") }) (count.longValue(), sizeInBytes.longValue()) } finally Releasable.close(writer) } ) (csvOutputFolder, result._1, result._2) } finally Releasable.close(client) } protected[this] def ftpClient() = FileSystem.ftpBuilder .hostname(ftpHostname) .port(ftpPort) .user(ftpUser) .password(ftpPassword) .build }
Example 2
Source File: FeatureSelection.scala From aerosolve with Apache License 2.0 | 5 votes |
package com.airbnb.aerosolve.training import java.io.BufferedWriter import java.io.OutputStreamWriter import java.util import com.airbnb.aerosolve.core.{ModelRecord, ModelHeader, FeatureVector, Example} import com.airbnb.aerosolve.core.models.LinearModel import com.airbnb.aerosolve.core.util.Util import com.typesafe.config.Config import org.slf4j.{LoggerFactory, Logger} import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ import org.apache.spark.rdd.RDD import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.Buffer import scala.collection.JavaConversions._ import scala.collection.JavaConverters._ import scala.util.Random import scala.math.abs import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.fs.Path import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path object FeatureSelection { private final val log: Logger = LoggerFactory.getLogger("FeatureSelection") val allKey : (String, String) = ("$ALL", "$POS") // Given a RDD compute the pointwise mutual information between // the positive label and the discrete features. def pointwiseMutualInformation(examples : RDD[Example], config : Config, key : String, rankKey : String, posThreshold : Double, minPosCount : Double, newCrosses : Boolean) : RDD[((String, String), Double)] = { val pointwise = LinearRankerUtils.makePointwise(examples, config, key, rankKey) val features = pointwise .mapPartitions(part => { // The tuple2 is var, var | positive val output = scala.collection.mutable.HashMap[(String, String), (Double, Double)]() part.foreach(example =>{ val featureVector = example.example.get(0) val isPos = if (featureVector.floatFeatures.get(rankKey).asScala.head._2 > posThreshold) 1.0 else 0.0 val all : (Double, Double) = output.getOrElse(allKey, (0.0, 0.0)) output.put(allKey, (all._1 + 1.0, all._2 + 1.0 * isPos)) val features : Array[(String, String)] = LinearRankerUtils.getFeatures(featureVector) if (newCrosses) { for (i <- features) { for (j <- features) { if (i._1 < j._1) { val key = ("%s<NEW>%s".format(i._1, j._1), "%s<NEW>%s".format(i._2, j._2)) val x = output.getOrElse(key, (0.0, 0.0)) output.put(key, (x._1 + 1.0, x._2 + 1.0 * isPos)) } } } } for (feature <- features) { val x = output.getOrElse(feature, (0.0, 0.0)) output.put(feature, (x._1 + 1.0, x._2 + 1.0 * isPos)) } }) output.iterator }) .reduceByKey((a, b) => (a._1 + b._1, a._2 + b._2)) .filter(x => x._2._2 >= minPosCount) val allCount = features.filter(x => x._1.equals(allKey)).take(1).head features.map(x => { val prob = x._2._1 / allCount._2._1 val probPos = x._2._2 / allCount._2._2 (x._1, math.log(probPos / prob) / math.log(2.0)) }) } // Returns the maximum entropy per family def maxEntropy(input : RDD[((String, String), Double)]) : RDD[((String, String), Double)] = { input .map(x => (x._1._1, (x._1._2, x._2))) .reduceByKey((a, b) => if (math.abs(a._2) > math.abs(b._2)) a else b) .map(x => ((x._1, x._2._1), x._2._2)) } }
Example 3
Source File: InteractiveSignerLogger.scala From coursier with Apache License 2.0 | 5 votes |
package coursier.publish.signing.logger import java.io.{OutputStream, OutputStreamWriter, Writer} import coursier.publish.fileset.{FileSet, Path} import coursier.publish.logging.ProgressLogger final class InteractiveSignerLogger(out: Writer, verbosity: Int) extends SignerLogger { private val underlying = new ProgressLogger[Object]( "Signed", "files", out, updateOnChange = true, doneEmoji = Some("\u270D\uFE0F ") ) override def signing(id: Object, fileSet: FileSet): Unit = { underlying.processingSet(id, Some(fileSet.elements.length)) } override def signed(id: Object, fileSet: FileSet): Unit = underlying.processedSet(id) override def signingElement(id: Object, path: Path): Unit = { if (verbosity >= 2) out.write(s"Signing ${path.repr}\n") underlying.processing(path.repr, id) } override def signedElement(id: Object, path: Path, excOpt: Option[Throwable]): Unit = { if (verbosity >= 2) out.write(s"Signed ${path.repr}\n") underlying.processed(path.repr, id, excOpt.nonEmpty) } override def start(): Unit = underlying.start() override def stop(keep: Boolean): Unit = underlying.stop(keep) } object InteractiveSignerLogger { def create(out: OutputStream, verbosity: Int): SignerLogger = new InteractiveSignerLogger(new OutputStreamWriter(out), verbosity) }
Example 4
Source File: InteractiveDirLogger.scala From coursier with Apache License 2.0 | 5 votes |
package coursier.publish.dir.logger import java.io.{OutputStream, OutputStreamWriter} import java.nio.file.Path import com.lightbend.emoji.ShortCodes.Defaults.defaultImplicit.emoji import coursier.publish.logging.ProgressLogger final class InteractiveDirLogger(out: OutputStreamWriter, dirName: String, verbosity: Int) extends DirLogger { private val underlying = new ProgressLogger[String]( "Read", s"files from $dirName", out, doneEmoji = emoji("mag").map(_.toString()) ) override def reading(dir: Path): Unit = underlying.processingSet(dirName, None) override def element(dir: Path, file: Path): Unit = { underlying.processing(file.toString, dirName) underlying.processed(file.toString, dirName, false) } override def read(dir: Path, elements: Int): Unit = underlying.processedSet(dirName) override def start(): Unit = underlying.start() override def stop(keep: Boolean): Unit = underlying.stop(keep) } object InteractiveDirLogger { def create(out: OutputStream, dirName: String, verbosity: Int): DirLogger = new InteractiveDirLogger(new OutputStreamWriter(out), dirName, verbosity) }
Example 5
Source File: Using.scala From Argus-SAF with Apache License 2.0 | 5 votes |
package org.argus.jawa.core.compiler.compile.io import java.io.{Closeable, FileInputStream, FileOutputStream, InputStream, OutputStream, File => JavaFile} import java.io.{BufferedInputStream, BufferedOutputStream, InputStreamReader, OutputStreamWriter} import java.io.{BufferedReader, BufferedWriter} import java.util.zip.GZIPInputStream import java.net.URL import java.nio.channels.FileChannel import java.nio.charset.Charset import java.util.jar.{JarFile, JarInputStream, JarOutputStream} import java.util.zip.{GZIPOutputStream, ZipEntry, ZipFile, ZipInputStream, ZipOutputStream} import ErrorHandling.translate import scala.reflect.{Manifest => SManifest} abstract class Using[Source, T] { protected def open(src: Source): T def apply[R](src: Source)(f: T => R): R = { val resource = open(src) try { f(resource) } finally { close(resource) } } protected def close(out: T): Unit } abstract class WrapUsing[Source, T](implicit srcMf: SManifest[Source], targetMf: SManifest[T]) extends Using[Source, T] { protected def label[S](m: SManifest[S]): String = m.runtimeClass.getSimpleName protected def openImpl(source: Source): T protected final def open(source: Source): T = translate("Error wrapping " + label(srcMf) + " in " + label(targetMf) + ": ") { openImpl(source) } } trait OpenFile[T] extends Using[JavaFile, T] { protected def openImpl(file: JavaFile): T protected final def open(file: JavaFile): T = { val parent = file.getParentFile if(parent != null) IO.createDirectory(parent) openImpl(file) } } object Using { def wrap[Source, T<: Closeable](openF: Source => T)(implicit srcMf: SManifest[Source], targetMf: SManifest[T]): Using[Source,T] = wrap(openF, closeCloseable) def wrap[Source, T](openF: Source => T, closeF: T => Unit)(implicit srcMf: SManifest[Source], targetMf: SManifest[T]): Using[Source,T] = new WrapUsing[Source, T] { def openImpl(source: Source): T = openF(source) def close(t: T): Unit = closeF(t) } def resource[Source, T <: Closeable](openF: Source => T): Using[Source,T] = resource(openF, closeCloseable) def resource[Source, T](openF: Source => T, closeF: T => Unit): Using[Source,T] = new Using[Source,T] { def open(s: Source): T = openF(s) def close(s: T): Unit = closeF(s) } def file[T <: Closeable](openF: JavaFile => T): OpenFile[T] = file(openF, closeCloseable) def file[T](openF: JavaFile => T, closeF: T => Unit): OpenFile[T] = new OpenFile[T] { def openImpl(file: JavaFile): T = openF(file) def close(t: T): Unit = closeF(t) } private def closeCloseable[T <: Closeable]: T => Unit = _.close() def bufferedOutputStream: Using[OutputStream, BufferedOutputStream] = wrap((out: OutputStream) => new BufferedOutputStream(out) ) def bufferedInputStream: Using[InputStream, BufferedInputStream] = wrap((in: InputStream) => new BufferedInputStream(in) ) def fileOutputStream(append: Boolean = false): OpenFile[BufferedOutputStream] = file(f => new BufferedOutputStream(new FileOutputStream(f, append))) def fileInputStream: OpenFile[BufferedInputStream] = file(f => new BufferedInputStream(new FileInputStream(f))) def urlInputStream: Using[URL, BufferedInputStream] = resource((u: URL) => translate("Error opening " + u + ": ")(new BufferedInputStream(u.openStream))) def fileOutputChannel: OpenFile[FileChannel] = file(f => new FileOutputStream(f).getChannel) def fileInputChannel: OpenFile[FileChannel] = file(f => new FileInputStream(f).getChannel) def fileWriter(charset: Charset = IO.utf8, append: Boolean = false): OpenFile[BufferedWriter] = file(f => new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f, append), charset)) ) def fileReader(charset: Charset): OpenFile[BufferedReader] = file(f => new BufferedReader(new InputStreamReader(new FileInputStream(f), charset)) ) def urlReader(charset: Charset): Using[URL, BufferedReader] = resource((u: URL) => new BufferedReader(new InputStreamReader(u.openStream, charset))) def jarFile(verify: Boolean): OpenFile[JarFile] = file(f => new JarFile(f, verify), (_: JarFile).close()) def zipFile: OpenFile[ZipFile] = file(f => new ZipFile(f), (_: ZipFile).close()) def streamReader: Using[(InputStream, Charset), InputStreamReader] = wrap{ (_: (InputStream, Charset)) match { case (in, charset) => new InputStreamReader(in, charset) } } def gzipInputStream: Using[InputStream, GZIPInputStream] = wrap((in: InputStream) => new GZIPInputStream(in, 8192) ) def zipInputStream: Using[InputStream, ZipInputStream] = wrap((in: InputStream) => new ZipInputStream(in)) def zipOutputStream: Using[OutputStream, ZipOutputStream] = wrap((out: OutputStream) => new ZipOutputStream(out)) def gzipOutputStream: Using[OutputStream, GZIPOutputStream] = wrap((out: OutputStream) => new GZIPOutputStream(out, 8192), (_: GZIPOutputStream).finish()) def jarOutputStream: Using[OutputStream, JarOutputStream] = wrap((out: OutputStream) => new JarOutputStream(out)) def jarInputStream: Using[InputStream, JarInputStream] = wrap((in: InputStream) => new JarInputStream(in)) def zipEntry(zip: ZipFile): Using[ZipEntry, InputStream] = resource((entry: ZipEntry) => translate("Error opening " + entry.getName + " in " + zip + ": ") { zip.getInputStream(entry) } ) }
Example 6
Source File: AbstractTableSpec.scala From hail with MIT License | 5 votes |
package is.hail.expr.ir import java.io.OutputStreamWriter import is.hail.utils._ import is.hail.types._ import is.hail.io.fs.FS import is.hail.rvd._ import org.json4s.jackson.JsonMethods import org.json4s.{DefaultFormats, Extraction, Formats, JValue, ShortTypeHints} import scala.language.implicitConversions object SortOrder { def deserialize(b: Byte): SortOrder = if (b == 0.toByte) Ascending else if (b == 1.toByte) Descending else throw new RuntimeException(s"invalid sort order: $b") } sealed abstract class SortOrder { def serialize: Byte } case object Ascending extends SortOrder { def serialize: Byte = 0.toByte } case object Descending extends SortOrder { def serialize: Byte = 1.toByte } case class SortField(field: String, sortOrder: SortOrder) abstract class AbstractTableSpec extends RelationalSpec { def references_rel_path: String def table_type: TableType def rowsComponent: RVDComponentSpec = getComponent[RVDComponentSpec]("rows") def rowsSpec: AbstractRVDSpec def globalsSpec: AbstractRVDSpec def indexed: Boolean = rowsSpec.indexed } object TableSpec { def apply(fs: FS, path: String, params: TableSpecParameters): TableSpec = { val globalsComponent = params.components("globals").asInstanceOf[RVDComponentSpec] val globalsSpec = globalsComponent.rvdSpec(fs, path) val rowsComponent = params.components("rows").asInstanceOf[RVDComponentSpec] val rowsSpec = rowsComponent.rvdSpec(fs, path) new TableSpec(params, globalsSpec, rowsSpec) } def fromJValue(fs: FS, path: String, jv: JValue): TableSpec = { implicit val formats: Formats = RelationalSpec.formats val params = jv.extract[TableSpecParameters] TableSpec(fs, path, params) } } case class TableSpecParameters( file_version: Int, hail_version: String, references_rel_path: String, table_type: TableType, components: Map[String, ComponentSpec]) { def write(fs: FS, path: String) { using(new OutputStreamWriter(fs.create(path + "/metadata.json.gz"))) { out => out.write(JsonMethods.compact(decomposeWithName(this, "TableSpec")(RelationalSpec.formats))) } } } class TableSpec( val params: TableSpecParameters, val globalsSpec: AbstractRVDSpec, val rowsSpec: AbstractRVDSpec) extends AbstractTableSpec { def file_version: Int = params.file_version def hail_version: String = params.hail_version def components: Map[String, ComponentSpec] = params.components def references_rel_path: String = params.references_rel_path def table_type: TableType = params.table_type def toJValue: JValue = { decomposeWithName(params, "TableSpec")(RelationalSpec.formats) } }
Example 7
Source File: package.scala From hail with MIT License | 5 votes |
package is.hail import java.io.OutputStreamWriter import java.nio.charset._ import is.hail.types.virtual.Type import is.hail.utils._ import is.hail.io.fs.FS package object io { type VCFFieldAttributes = Map[String, String] type VCFAttributes = Map[String, VCFFieldAttributes] type VCFMetadata = Map[String, VCFAttributes] val utfCharset = Charset.forName("UTF-8") def exportTypes(filename: String, fs: FS, info: Array[(String, Type)]) { val sb = new StringBuilder using(new OutputStreamWriter(fs.create(filename))) { out => info.foreachBetween { case (name, t) => sb.append(prettyIdentifier(name)) sb.append(":") t.pretty(sb, 0, compact = true) } { sb += ',' } out.write(sb.result()) } } }
Example 8
Source File: Json4sSerialization.scala From kafka-serialization with Apache License 2.0 | 5 votes |
package com.ovoenergy.kafka.serialization.json4s import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStreamReader, OutputStreamWriter} import java.nio.charset.StandardCharsets import com.ovoenergy.kafka.serialization.core._ import org.apache.kafka.common.serialization.{Deserializer => KafkaDeserializer, Serializer => KafkaSerializer} import org.json4s.Formats import org.json4s.native.Serialization.{read, write} import scala.reflect.ClassTag import scala.reflect.runtime.universe._ trait Json4sSerialization { def json4sSerializer[T <: AnyRef](implicit jsonFormats: Formats): KafkaSerializer[T] = serializer { (_, data) => val bout = new ByteArrayOutputStream() val writer = new OutputStreamWriter(bout, StandardCharsets.UTF_8) // TODO Use scala-arm try { write(data, writer) writer.flush() } finally { writer.close() } bout.toByteArray } def json4sDeserializer[T: TypeTag](implicit jsonFormats: Formats): KafkaDeserializer[T] = deserializer { (_, data) => val tt = implicitly[TypeTag[T]] implicit val cl = ClassTag[T](tt.mirror.runtimeClass(tt.tpe)) read[T](new InputStreamReader(new ByteArrayInputStream(data), StandardCharsets.UTF_8)) } }
Example 9
Source File: SpraySerialization.scala From kafka-serialization with Apache License 2.0 | 5 votes |
package com.ovoenergy.kafka.serialization.spray import java.io.{ByteArrayOutputStream, OutputStreamWriter} import java.nio.charset.StandardCharsets import org.apache.kafka.common.serialization.{Deserializer => KafkaDeserializer, Serializer => KafkaSerializer} import spray.json._ import com.ovoenergy.kafka.serialization.core._ trait SpraySerialization { def spraySerializer[T](implicit format: JsonWriter[T]): KafkaSerializer[T] = serializer { (_, data) => val bout = new ByteArrayOutputStream() val osw = new OutputStreamWriter(bout, StandardCharsets.UTF_8) // TODO use scala-arm try { osw.write(data.toJson.compactPrint) osw.flush() } finally { osw.close() } bout.toByteArray } def sprayDeserializer[T](implicit format: JsonReader[T]): KafkaDeserializer[T] = deserializer { (_, data) => JsonParser(ParserInput(data)).convertTo[T] } }
Example 10
Source File: HadoopFSHelpers.scala From morpheus with Apache License 2.0 | 5 votes |
package org.opencypher.morpheus.api.io.fs import java.io.{BufferedReader, BufferedWriter, InputStreamReader, OutputStreamWriter} import org.apache.hadoop.fs.{FileSystem, Path} import org.opencypher.morpheus.api.io.util.FileSystemUtils.using object HadoopFSHelpers { implicit class RichHadoopFileSystem(fileSystem: FileSystem) { protected def createDirectoryIfNotExists(path: Path): Unit = { if (!fileSystem.exists(path)) { fileSystem.mkdirs(path) } } def listDirectories(path: String): List[String] = { val p = new Path(path) createDirectoryIfNotExists(p) fileSystem.listStatus(p) .filter(_.isDirectory) .map(_.getPath.getName) .toList } def deleteDirectory(path: String): Unit = { fileSystem.delete(new Path(path), true) } def readFile(path: String): String = { using(new BufferedReader(new InputStreamReader(fileSystem.open(new Path(path)), "UTF-8"))) { reader => def readLines = Stream.cons(reader.readLine(), Stream.continually(reader.readLine)) readLines.takeWhile(_ != null).mkString } } def writeFile(path: String, content: String): Unit = { val p = new Path(path) val parentDirectory = p.getParent createDirectoryIfNotExists(parentDirectory) using(fileSystem.create(p)) { outputStream => using(new BufferedWriter(new OutputStreamWriter(outputStream, "UTF-8"))) { bufferedWriter => bufferedWriter.write(content) } } } } }
Example 11
Source File: GraphLoaderSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import java.io.File import java.io.FileOutputStream import java.io.OutputStreamWriter import java.nio.charset.StandardCharsets import org.apache.spark.SparkFunSuite import org.apache.spark.util.Utils class GraphLoaderSuite extends SparkFunSuite with LocalSparkContext { test("GraphLoader.edgeListFile") { withSpark { sc => val tmpDir = Utils.createTempDir() val graphFile = new File(tmpDir.getAbsolutePath, "graph.txt") val writer = new OutputStreamWriter(new FileOutputStream(graphFile), StandardCharsets.UTF_8) for (i <- (1 until 101)) writer.write(s"$i 0\n") writer.close() try { val graph = GraphLoader.edgeListFile(sc, tmpDir.getAbsolutePath) val neighborAttrSums = graph.aggregateMessages[Int]( ctx => ctx.sendToDst(ctx.srcAttr), _ + _) assert(neighborAttrSums.collect.toSet === Set((0: VertexId, 100))) } finally { Utils.deleteRecursively(tmpDir) } } } }
Example 12
Source File: StreamMetadata.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import java.io.{InputStreamReader, OutputStreamWriter} import java.nio.charset.StandardCharsets import scala.util.control.NonFatal import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, FSDataOutputStream, Path} import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.apache.spark.internal.Logging import org.apache.spark.sql.streaming.StreamingQuery def write( metadata: StreamMetadata, metadataFile: Path, hadoopConf: Configuration): Unit = { var output: FSDataOutputStream = null try { val fs = FileSystem.get(hadoopConf) output = fs.create(metadataFile) val writer = new OutputStreamWriter(output) Serialization.write(metadata, writer) writer.close() } catch { case NonFatal(e) => logError(s"Error writing stream metadata $metadata to $metadataFile", e) throw e } finally { IOUtils.closeQuietly(output) } } }
Example 13
Source File: Main.scala From scalajs-highcharts with MIT License | 5 votes |
package com.karasiq.highcharts.generator import java.io.{BufferedWriter, FileOutputStream, OutputStreamWriter, PrintWriter} import java.nio.file._ import java.nio.file.attribute.BasicFileAttributes import scala.util.control.Exception import scalaj.http.{Http, HttpOptions} import com.karasiq.highcharts.generator.writers.{ScalaClassWriter, ScalaJsClassBuilder} case class HighchartsApiDoc(library: String) { private val defaultPackage = System.getProperty(s"highcharts-generator.$library.package", s"com.$library") private def httpGet(url: String): List[ConfigurationObject] = { val page = Http.get(url) .header("User-Agent", "Mozilla/5.0 (X11; OpenBSD amd64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.81 Safari/537.36") .header("Accept", "application/json") .options(HttpOptions.connTimeout(10000), HttpOptions.readTimeout(10000)) val json = page.asString ConfigurationObject.fromJson(json) } private def writeFiles(pkg: String, configs: List[ConfigurationObject], rootObject: Option[String] = None): Unit = { val header = s""" |package $pkg | |import scalajs.js, js.`|` |import com.highcharts.CleanJsObject |import com.highcharts.HighchartsUtils._ | |""".stripMargin val outputDir = Paths.get(System.getProperty("highcharts-generator.output", "src/main/scala"), pkg.split("\\."):_*) Files.createDirectories(outputDir) // Remove all files Files.walkFileTree(outputDir, new SimpleFileVisitor[Path] { override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = { Files.delete(file) FileVisitResult.CONTINUE } }) val classes = new ScalaJsClassBuilder().parse(configs, rootObject) val classWriter = new ScalaClassWriter classes.foreach { scalaJsClass ⇒ val file = outputDir.resolve(scalaJsClass.scalaName + ".scala") println(s"Writing $file...") val writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file.toFile, true), "UTF-8"))) Exception.allCatch.andFinally(writer.close()) { if (Files.size(file) == 0) { writer.print(header) } classWriter.writeClass(scalaJsClass) { line ⇒ writer.println(line) } writer.flush() } } } def writeConfigs(): Unit = { val configs = httpGet(s"https://api.highcharts.com/$library/dump.json") writeFiles(s"$defaultPackage.config", configs, Some(s"${library.capitalize}Config")) } def writeApis(): Unit = { val configs = httpGet(s"https://api.highcharts.com/$library/object/dump.json") writeFiles(s"$defaultPackage.api", configs) } def writeAll(): Unit = { // TODO: https://github.com/highcharts/highcharts/issues/7227 writeConfigs() // writeApis() // TODO: 404 } } object Main extends App { HighchartsApiDoc("highcharts").writeAll() HighchartsApiDoc("highstock").writeAll() HighchartsApiDoc("highmaps").writeAll() }
Example 14
Source File: InteractiveUploadLogger.scala From coursier with Apache License 2.0 | 5 votes |
package coursier.publish.upload.logger import java.io.{OutputStream, OutputStreamWriter, Writer} import com.lightbend.emoji.ShortCodes.Defaults.defaultImplicit.emoji import coursier.publish.fileset.FileSet import coursier.publish.logging.ProgressLogger import coursier.publish.upload.Upload // FIXME Would have been better if dummy was passed by the Upload instance when calling the methods of UploadLogger final class InteractiveUploadLogger(out: Writer, dummy: Boolean, isLocal: Boolean) extends UploadLogger { private val underlying = new ProgressLogger[Object]( if (isLocal) { if (dummy) "Would have written" else "Wrote" } else { if (dummy) "Would have uploaded" else "Uploaded" }, "files", out, doneEmoji = emoji("truck").map(_.toString()) ) override def uploadingSet(id: Object, fileSet: FileSet): Unit = underlying.processingSet(id, Some(fileSet.elements.length)) override def uploadedSet(id: Object, fileSet: FileSet): Unit = underlying.processedSet(id) override def uploading(url: String, idOpt: Option[Object], totalOpt: Option[Long]): Unit = for (id <- idOpt) underlying.processing(url, id) override def progress(url: String, idOpt: Option[Object], uploaded: Long, total: Long): Unit = for (id <- idOpt) underlying.progress(url, id, uploaded, total) override def uploaded(url: String, idOpt: Option[Object], errorOpt: Option[Upload.Error]): Unit = for (id <- idOpt) underlying.processed(url, id, errorOpt.nonEmpty) override def start(): Unit = underlying.start() override def stop(keep: Boolean): Unit = underlying.stop(keep) } object InteractiveUploadLogger { def create(out: OutputStream, dummy: Boolean, isLocal: Boolean): UploadLogger = new InteractiveUploadLogger(new OutputStreamWriter(out), dummy, isLocal) }
Example 15
Source File: FileDownloader.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperations.readwritedataframe.filestorage import java.io.{BufferedWriter, FileOutputStream, IOException, OutputStreamWriter} import java.nio.file.{Files, Paths} import java.util.UUID import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import ai.deepsense.deeplang.ExecutionContext import ai.deepsense.deeplang.doperations.exceptions.DeepSenseIOException import ai.deepsense.deeplang.doperations.readwritedataframe.FilePath private[filestorage] object FileDownloader { def downloadFile(url: String)(implicit context: ExecutionContext): FilePath = { if (context.tempPath.startsWith("hdfs://")) { downloadFileToHdfs(url) } else { downloadFileToDriver(url) } } private def downloadFileToHdfs(url: String)(implicit context: ExecutionContext) = { val content = scala.io.Source.fromURL(url).getLines() val hdfsPath = s"${context.tempPath}/${UUID.randomUUID()}" val configuration = new Configuration() val hdfs = FileSystem.get(configuration) val file = new Path(hdfsPath) val hdfsStream = hdfs.create(file) val writer = new BufferedWriter(new OutputStreamWriter(hdfsStream)) try { content.foreach {s => writer.write(s) writer.newLine() } } finally { safeClose(writer) hdfs.close() } FilePath(hdfsPath) } private def downloadFileToDriver(url: String) (implicit context: ExecutionContext) = { val outputDirPath = Paths.get(context.tempPath) // We're checking if the output is a directory following symlinks. // The default behaviour of createDirectories is NOT to follow symlinks if (!Files.isDirectory(outputDirPath)) { Files.createDirectories(outputDirPath) } val outFilePath = Files.createTempFile(outputDirPath, "download", ".csv") // content is a stream. Do not invoke stuff like .toList() on it. val content = scala.io.Source.fromURL(url).getLines() val writer: BufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilePath.toFile))) try { content.foreach {s => writer.write(s) writer.newLine() } } finally { safeClose(writer) } FilePath(s"file:///$outFilePath") } private def safeClose(bufferedWriter: BufferedWriter): Unit = { try { bufferedWriter.flush() bufferedWriter.close() } catch { case e: IOException => throw new DeepSenseIOException(e) } } }
Example 16
Source File: JsonFileReporter.scala From kyuubi with Apache License 2.0 | 5 votes |
package yaooqinn.kyuubi.metrics import java.io.{BufferedWriter, Closeable, IOException, OutputStreamWriter} import java.util.{Timer, TimerTask} import java.util.concurrent.TimeUnit import scala.util.Try import scala.util.control.NonFatal import com.codahale.metrics.MetricRegistry import com.codahale.metrics.json.MetricsModule import com.fasterxml.jackson.databind.ObjectMapper import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.fs.permission.FsPermission import org.apache.kyuubi.Logging import org.apache.spark.{KyuubiSparkUtil, SparkConf} import org.apache.spark.KyuubiConf._ private[metrics] class JsonFileReporter(conf: SparkConf, registry: MetricRegistry) extends Closeable with Logging { private val jsonMapper = new ObjectMapper().registerModule( new MetricsModule(TimeUnit.MILLISECONDS, TimeUnit.MILLISECONDS, false)) private val timer = new Timer(true) private val interval = KyuubiSparkUtil.timeStringAsMs(conf.get(METRICS_REPORT_INTERVAL)) private val path = conf.get(METRICS_REPORT_LOCATION) private val hadoopConf = KyuubiSparkUtil.newConfiguration(conf) def start(): Unit = { timer.schedule(new TimerTask { var bw: BufferedWriter = _ override def run(): Unit = try { val json = jsonMapper.writerWithDefaultPrettyPrinter().writeValueAsString(registry) val tmpPath = new Path(path + ".tmp") val tmpPathUri = tmpPath.toUri val fs = if (tmpPathUri.getScheme == null && tmpPathUri.getAuthority == null) { FileSystem.getLocal(hadoopConf) } else { FileSystem.get(tmpPathUri, hadoopConf) } fs.delete(tmpPath, true) bw = new BufferedWriter(new OutputStreamWriter(fs.create(tmpPath, true))) bw.write(json) bw.close() fs.setPermission(tmpPath, FsPermission.createImmutable(Integer.parseInt("644", 8).toShort)) val finalPath = new Path(path) fs.rename(tmpPath, finalPath) fs.setPermission(finalPath, FsPermission.createImmutable(Integer.parseInt("644", 8).toShort)) } catch { case NonFatal(e) => error("Error writing metrics to json file" + path, e) } finally { if (bw != null) { Try(bw.close()) } } }, 0, interval) } override def close(): Unit = { timer.cancel() } }
Example 17
Source File: GraphLoaderSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import java.io.File import java.io.FileOutputStream import java.io.OutputStreamWriter import java.nio.charset.StandardCharsets import org.apache.spark.SparkFunSuite import org.apache.spark.util.Utils class GraphLoaderSuite extends SparkFunSuite with LocalSparkContext { test("GraphLoader.edgeListFile") { withSpark { sc => val tmpDir = Utils.createTempDir() val graphFile = new File(tmpDir.getAbsolutePath, "graph.txt") val writer = new OutputStreamWriter(new FileOutputStream(graphFile), StandardCharsets.UTF_8) for (i <- (1 until 101)) writer.write(s"$i 0\n") writer.close() try { val graph = GraphLoader.edgeListFile(sc, tmpDir.getAbsolutePath) val neighborAttrSums = graph.aggregateMessages[Int]( ctx => ctx.sendToDst(ctx.srcAttr), _ + _) assert(neighborAttrSums.collect.toSet === Set((0: VertexId, 100))) } finally { Utils.deleteRecursively(tmpDir) } } } }
Example 18
Source File: CodecStreams.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import java.io.{InputStream, OutputStream, OutputStreamWriter} import java.nio.charset.{Charset, StandardCharsets} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.hadoop.io.compress._ import org.apache.hadoop.mapreduce.JobContext import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat import org.apache.hadoop.util.ReflectionUtils import org.apache.spark.TaskContext object CodecStreams { private def getDecompressionCodec(config: Configuration, file: Path): Option[CompressionCodec] = { val compressionCodecs = new CompressionCodecFactory(config) Option(compressionCodecs.getCodec(file)) } def createInputStream(config: Configuration, file: Path): InputStream = { val fs = file.getFileSystem(config) val inputStream: InputStream = fs.open(file) getDecompressionCodec(config, file) .map(codec => codec.createInputStream(inputStream)) .getOrElse(inputStream) } def getCompressionExtension(context: JobContext): String = { getCompressionCodec(context) .map(_.getDefaultExtension) .getOrElse("") } }
Example 19
Source File: StreamMetadata.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import java.io.{InputStreamReader, OutputStreamWriter} import java.nio.charset.StandardCharsets import scala.util.control.NonFatal import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, FSDataOutputStream, Path} import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.apache.spark.internal.Logging import org.apache.spark.sql.streaming.StreamingQuery def write( metadata: StreamMetadata, metadataFile: Path, hadoopConf: Configuration): Unit = { var output: FSDataOutputStream = null try { val fs = metadataFile.getFileSystem(hadoopConf) output = fs.create(metadataFile) val writer = new OutputStreamWriter(output) Serialization.write(metadata, writer) writer.close() } catch { case NonFatal(e) => logError(s"Error writing stream metadata $metadata to $metadataFile", e) throw e } finally { IOUtils.closeQuietly(output) } } }
Example 20
Source File: SparkILoop.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.repl import java.io.{BufferedReader, FileReader} import Predef.{println => _, _} import scala.util.Properties.{jdkHome, javaVersion, versionString, javaVmName} import scala.tools.nsc.interpreter.{JPrintWriter, ILoop} import scala.tools.nsc.Settings import scala.tools.nsc.util.stringFromStream def run(code: String, sets: Settings = new Settings): String = { import java.io.{ BufferedReader, StringReader, OutputStreamWriter } stringFromStream { ostream => Console.withOut(ostream) { val input = new BufferedReader(new StringReader(code)) val output = new JPrintWriter(new OutputStreamWriter(ostream), true) val repl = new SparkILoop(input, output) if (sets.classpath.isDefault) sets.classpath.value = sys.props("java.class.path") repl process sets } } } def run(lines: List[String]): String = run(lines.map(_ + "\n").mkString) }
Example 21
Source File: PileupApp.scala From bdg-sequila with Apache License 2.0 | 5 votes |
package org.biodatageeks.sequila.apps import java.io.{OutputStreamWriter, PrintWriter} import org.apache.spark.sql.{SequilaSession, SparkSession} import org.bdgenomics.utils.instrumentation.{Metrics, MetricsListener, RecordedMetrics} import org.biodatageeks.sequila.utils.{InternalParams, SequilaRegister} object PileupApp extends App{ override def main(args: Array[String]): Unit = { System.setProperty("spark.kryo.registrator", "org.biodatageeks.sequila.pileup.serializers.CustomKryoRegistrator") val spark = SparkSession .builder() .master("local[1]") .config("spark.driver.memory","4g") .config( "spark.serializer", "org.apache.spark.serializer.KryoSerializer" ) .enableHiveSupport() .getOrCreate() val ss = SequilaSession(spark) SequilaRegister.register(ss) spark.sparkContext.setLogLevel("INFO") val bamPath = "/Users/aga/NA12878.chr20.md.bam" val referencePath = "/Users/aga/Homo_sapiens_assembly18_chr20.fasta" // val bamPath = "/Users/marek/data/NA12878.chrom20.ILLUMINA.bwa.CEU.low_coverage.20121211.md.bam" // val referencePath = "/Users/marek/data/hs37d5.fa" val tableNameBAM = "reads" ss.sql(s"""DROP TABLE IF EXISTS $tableNameBAM""") ss.sql(s""" |CREATE TABLE $tableNameBAM |USING org.biodatageeks.sequila.datasources.BAM.BAMDataSource |OPTIONS(path "$bamPath") | """.stripMargin) val query = s""" |SELECT count(*) |FROM pileup('$tableNameBAM', 'NA12878', '${referencePath}') """.stripMargin ss .sqlContext .setConf(InternalParams.EnableInstrumentation, "true") Metrics.initialize(ss.sparkContext) val metricsListener = new MetricsListener(new RecordedMetrics()) ss .sparkContext .addSparkListener(metricsListener) val results = ss.sql(query) ss.time{ results.show() } val writer = new PrintWriter(new OutputStreamWriter(System.out, "UTF-8")) Metrics.print(writer, Some(metricsListener.metrics.sparkMetrics.stageTimes)) writer.close() ss.stop() } }
Example 22
Source File: JoinOrderTestSuite.scala From bdg-sequila with Apache License 2.0 | 5 votes |
package org.biodatageeks.sequila.tests.rangejoins import java.io.{OutputStreamWriter, PrintWriter} import com.holdenkarau.spark.testing.{DataFrameSuiteBase, SharedSparkContext} import org.apache.spark.sql.Row import org.apache.spark.sql.types.{ IntegerType, StringType, StructField, StructType } import org.bdgenomics.utils.instrumentation.{ Metrics, MetricsListener, RecordedMetrics } import org.biodatageeks.sequila.rangejoins.IntervalTree.IntervalTreeJoinStrategyOptim import org.scalatest.{BeforeAndAfter, FunSuite} class JoinOrderTestSuite extends FunSuite with DataFrameSuiteBase with BeforeAndAfter with SharedSparkContext { val schema = StructType( Seq(StructField("chr", StringType), StructField("start", IntegerType), StructField("end", IntegerType))) val metricsListener = new MetricsListener(new RecordedMetrics()) val writer = new PrintWriter(new OutputStreamWriter(System.out)) before { System.setSecurityManager(null) spark.experimental.extraStrategies = new IntervalTreeJoinStrategyOptim( spark) :: Nil Metrics.initialize(sc) val rdd1 = sc .textFile(getClass.getResource("/refFlat.txt.bz2").getPath) .map(r => r.split('\t')) .map( r => Row( r(2).toString, r(4).toInt, r(5).toInt )) val ref = spark.createDataFrame(rdd1, schema) ref.createOrReplaceTempView("ref") val rdd2 = sc .textFile(getClass.getResource("/snp150Flagged.txt.bz2").getPath) .map(r => r.split('\t')) .map( r => Row( r(1).toString, r(2).toInt, r(3).toInt )) val snp = spark .createDataFrame(rdd2, schema) snp.createOrReplaceTempView("snp") } test("Join order - broadcasting snp table") { spark.sqlContext.setConf("spark.biodatageeks.rangejoin.useJoinOrder", "true") val query = s""" |SELECT snp.*,ref.* FROM ref JOIN snp |ON (ref.chr=snp.chr AND snp.end>=ref.start AND snp.start<=ref.end) """.stripMargin assert(spark.sql(query).count === 616404L) } test("Join order - broadcasting ref table") { spark.sqlContext.setConf("spark.biodatageeks.rangejoin.useJoinOrder", "true") val query = s""" |SELECT snp.*,ref.* FROM snp JOIN ref |ON (ref.chr=snp.chr AND snp.end>=ref.start AND snp.start<=ref.end) """.stripMargin assert(spark.sql(query).count === 616404L) } after { Metrics.print(writer, Some(metricsListener.metrics.sparkMetrics.stageTimes)) writer.flush() Metrics.stopRecording() } }
Example 23
Source File: ScalafmtSbtReporter.scala From sbt-scalafmt with Apache License 2.0 | 5 votes |
package org.scalafmt.sbt import java.io.PrintWriter import java.io.OutputStreamWriter import java.nio.file.Path import org.scalafmt.interfaces.ScalafmtReporter import sbt.internal.util.MessageOnlyException import sbt.util.Logger import scala.util.control.NoStackTrace class ScalafmtSbtReporter(log: Logger, out: OutputStreamWriter) extends ScalafmtReporter { override def error(file: Path, message: String): Unit = { throw new MessageOnlyException(s"$message: $file") } override def error(file: Path, e: Throwable): Unit = { if (e.getMessage != null) { error(file, e.getMessage) } else { throw new FailedToFormat(file.toString, e) } } override def error(file: Path, message: String, e: Throwable): Unit = { if (e.getMessage != null) { error(file, s"$message: ${e.getMessage()}") } else { throw new FailedToFormat(file.toString, e) } } override def excluded(file: Path): Unit = log.debug(s"file excluded: $file") override def parsedConfig(config: Path, scalafmtVersion: String): Unit = log.debug(s"parsed config (v$scalafmtVersion): $config") override def downloadWriter(): PrintWriter = new PrintWriter(out) override def downloadOutputStreamWriter(): OutputStreamWriter = out private class FailedToFormat(filename: String, cause: Throwable) extends Exception(filename, cause) with NoStackTrace }
Example 24
Source File: SchrodingerExceptionTest.scala From aloha with MIT License | 5 votes |
package com.eharmony.aloha.ex import org.junit.{Before, Test} import org.junit.Assert._ import java.io.{PrintWriter, OutputStreamWriter, ByteArrayOutputStream, PrintStream} class SchrodingerExceptionTest { private[this] var ex: SchrodingerException = _ @Before def before() { ex = new SchrodingerException } @Test def testFillInStackTrace() { assertTrue(new SchrodingerException().fillInStackTrace().isInstanceOf[SchrodingerException]) } @Test(expected = classOf[SchrodingerException]) def testGetMessage() { ex.getMessage() } @Test(expected = classOf[SchrodingerException]) def testGetStackTrace() { ex.getStackTrace() } @Test(expected = classOf[SchrodingerException]) def testGetCause() { ex.getCause() } @Test(expected = classOf[SchrodingerException]) def testSetStackTrace() { ex.setStackTrace(Array.empty) } @Test(expected = classOf[SchrodingerException]) def testGetLocalizedMessage() { ex.getLocalizedMessage() } @Test(expected = classOf[SchrodingerException]) def testPrintStackTraceEmpty() { ex.printStackTrace() } @Test(expected = classOf[SchrodingerException]) def testPrintStackTraceStream() { val baos = new ByteArrayOutputStream() val ps = new PrintStream(baos) ex.printStackTrace(ps) } @Test(expected = classOf[SchrodingerException]) def testPrintStackTraceWriter() { val baos = new ByteArrayOutputStream() val osw = new OutputStreamWriter(baos) val ps = new PrintWriter(osw) ex.printStackTrace(ps) } @Test(expected = classOf[SchrodingerException]) def testInitCause() { ex.initCause(new Throwable) } @Test(expected = classOf[SchrodingerException]) def testToString() { ex.toString() } @Test def testNoThrowForSchrodingerExceptionWithSchrodingerExceptionCause() { new SchrodingerException(new SchrodingerException) } @Test def testNoThrowForSchrodingerExceptionWithExceptionCause() { new SchrodingerException(new Exception) } @Test(expected = classOf[SchrodingerException]) def testThrowForThrowableWithSchrodingerExceptionCause() { new Throwable(ex) } @Test(expected = classOf[SchrodingerException]) def testThrowForExceptionWithSchrodingerExceptionCause() { new Exception(ex) } @Test(expected = classOf[SchrodingerException]) def testThrowForRuntimeExceptionWithSchrodingerExceptionCause() { new RuntimeException(ex) } }
Example 25
Source File: Devel.scala From libisabelle with Apache License 2.0 | 5 votes |
package info.hupel.isabelle.setup import java.io.OutputStreamWriter import java.nio.file.{Files, Path} import org.eclipse.jgit.api._ import org.eclipse.jgit.lib.TextProgressMonitor import org.eclipse.jgit.storage.file._ import org.log4s._ trait Devel { def init(path: Path): Unit def update(path: Path): Unit } case class GitDevel(url: String, branch: String) extends Devel { private val logger = getLogger private val monitor = new TextProgressMonitor(new OutputStreamWriter(Console.err)) def init(path: Path): Unit = { logger.debug(s"Cloning $branch from $url into $path") Files.createDirectories(path) new CloneCommand() .setDirectory(path.toFile) .setURI(url) .setBranch(branch) .setProgressMonitor(monitor) .call() () } def update(path: Path): Unit = { logger.debug(s"Fetching $branch from $url into $path") val repo = new FileRepositoryBuilder() .findGitDir(path.toFile) .setup() .build() new Git(repo).pull() .setRemoteBranchName(branch) .call() () } } object Devel { val knownDevels: Map[String, Devel] = Map( "isabelle-mirror" -> GitDevel("https://github.com/isabelle-prover/mirror-isabelle.git", "master") ) }
Example 26
Source File: FileDownloader.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.deeplang.doperations.readwritedataframe.filestorage import java.io.{BufferedWriter, FileOutputStream, IOException, OutputStreamWriter} import java.nio.file.{Files, Paths} import java.util.UUID import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import io.deepsense.deeplang.ExecutionContext import io.deepsense.deeplang.doperations.exceptions.DeepSenseIOException import io.deepsense.deeplang.doperations.readwritedataframe.FilePath private[filestorage] object FileDownloader { def downloadFile(url: String)(implicit context: ExecutionContext): FilePath = { if (context.tempPath.startsWith("hdfs://")) { downloadFileToHdfs(url) } else { downloadFileToDriver(url) } } private def downloadFileToHdfs(url: String)(implicit context: ExecutionContext) = { val content = scala.io.Source.fromURL(url).getLines() val hdfsPath = s"${context.tempPath}/${UUID.randomUUID()}" val configuration = new Configuration() val hdfs = FileSystem.get(configuration) val file = new Path(hdfsPath) val hdfsStream = hdfs.create(file) val writer = new BufferedWriter(new OutputStreamWriter(hdfsStream)) try { content.foreach {s => writer.write(s) writer.newLine() } } finally { safeClose(writer) hdfs.close() } FilePath(hdfsPath) } private def downloadFileToDriver(url: String) (implicit context: ExecutionContext) = { val outputDirPath = Paths.get(context.tempPath) // We're checking if the output is a directory following symlinks. // The default behaviour of createDirectories is NOT to follow symlinks if (!Files.isDirectory(outputDirPath)) { Files.createDirectories(outputDirPath) } val outFilePath = Files.createTempFile(outputDirPath, "download", ".csv") // content is a stream. Do not invoke stuff like .toList() on it. val content = scala.io.Source.fromURL(url).getLines() val writer: BufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilePath.toFile))) try { content.foreach {s => writer.write(s) writer.newLine() } } finally { safeClose(writer) } FilePath(s"file:///$outFilePath") } private def safeClose(bufferedWriter: BufferedWriter): Unit = { try { bufferedWriter.flush() bufferedWriter.close() } catch { case e: IOException => throw new DeepSenseIOException(e) } } }
Example 27
Source File: TestCompileApplicationInstance.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.tools import java.io.{OutputStream, OutputStreamWriter} import java.nio.ByteBuffer import java.nio.charset.StandardCharsets import java.nio.file.Files import com.amazon.milan.application.{Application, ApplicationConfiguration, ApplicationInstance} import com.amazon.milan.lang._ import com.amazon.milan.testing.applications._ import com.amazon.milan.{Id, SemanticVersion} import org.junit.Assert._ import org.junit.Test object TestCompileApplicationInstance { case class Record(recordId: String, i: Int) class Provider extends ApplicationInstanceProvider { override def getApplicationInstance(params: List[(String, String)]): ApplicationInstance = { val input = Stream.of[Record] val graph = new StreamGraph(input) val config = new ApplicationConfiguration config.setListSource(input, Record("1", 1)) val instanceId = params.find(_._1 == "instanceId").get._2 val appId = params.find(_._1 == "appId").get._2 new ApplicationInstance( instanceId, new Application(appId, graph, SemanticVersion.ZERO), config) } } class Compiler extends ApplicationInstanceCompiler { override def compile(applicationInstance: ApplicationInstance, params: List[(String, String)], output: OutputStream): Unit = { val writer = new OutputStreamWriter(output) val testParam = params.find(_._1 == "test").get._2 writer.write(testParam) writer.write(applicationInstance.toJsonString) writer.close() } } } @Test class TestCompileApplicationInstance { @Test def test_CompileApplicationInstance_Main_SendsProviderAndCompilerParameters(): Unit = { val tempFile = Files.createTempFile("TestCompileApplicationInstance", ".scala") Files.deleteIfExists(tempFile) val appId = Id.newId() val instanceId = Id.newId() val testValue = Id.newId() try { val args = Array( "--provider", "com.amazon.milan.tools.TestCompileApplicationInstance.Provider", "--compiler", "com.amazon.milan.tools.TestCompileApplicationInstance.Compiler", "--package", "generated", "--output", tempFile.toString, s"-PinstanceId=$instanceId", s"-PappId=$appId", s"-Ctest=$testValue" ) CompileApplicationInstance.main(args) val fileContents = StandardCharsets.UTF_8.decode(ByteBuffer.wrap(Files.readAllBytes(tempFile))).toString assertTrue(fileContents.contains(appId)) assertTrue(fileContents.contains(instanceId)) assertTrue(fileContents.contains(testValue)) } finally { Files.deleteIfExists(tempFile) } } }
Example 28
Source File: BasicTestPerformance4Samba.scala From ohara with Apache License 2.0 | 5 votes |
package oharastream.ohara.it.performance import java.io.{BufferedWriter, OutputStreamWriter} import java.util.concurrent.atomic.LongAdder import oharastream.ohara.client.filesystem.FileSystem import oharastream.ohara.common.data.Row import oharastream.ohara.common.util.{CommonUtils, Releasable} import org.junit.AssumptionViolatedException import spray.json.{JsNumber, JsString, JsValue} import scala.concurrent.duration.Duration import scala.jdk.CollectionConverters._ abstract class BasicTestPerformance4Samba extends BasicTestPerformance { private[this] val sambaHostname: String = sys.env.getOrElse( PerformanceTestingUtils.SAMBA_HOSTNAME_KEY, throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_HOSTNAME_KEY} does not exists!!!") ) private[this] val sambaUsername: String = sys.env.getOrElse( PerformanceTestingUtils.SAMBA_USER_KEY, throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_USER_KEY} does not exists!!!") ) private[this] val sambaPassword: String = sys.env.getOrElse( PerformanceTestingUtils.SAMBA_PASSWORD_KEY, throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_PASSWORD_KEY} does not exists!!!") ) private[this] val sambaPort: Int = sys.env .getOrElse( PerformanceTestingUtils.SAMBA_PORT_KEY, throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_PORT_KEY} does not exists!!!") ) .toInt private[this] val sambaShare: String = sys.env.getOrElse( PerformanceTestingUtils.SAMBA_SHARE_KEY, throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_SHARE_KEY} does not exists!!!") ) private[this] val csvInputFolderKey = PerformanceTestingUtils.CSV_INPUT_KEY private[this] val csvOutputFolder: String = value(csvInputFolderKey).getOrElse("input") private[this] val NEED_DELETE_DATA_KEY: String = PerformanceTestingUtils.DATA_CLEANUP_KEY protected[this] val needDeleteData: Boolean = sys.env.getOrElse(NEED_DELETE_DATA_KEY, "true").toBoolean protected val sambaSettings: Map[String, JsValue] = Map( oharastream.ohara.connector.smb.SMB_HOSTNAME_KEY -> JsString(sambaHostname), oharastream.ohara.connector.smb.SMB_PORT_KEY -> JsNumber(sambaPort), oharastream.ohara.connector.smb.SMB_USER_KEY -> JsString(sambaUsername), oharastream.ohara.connector.smb.SMB_PASSWORD_KEY -> JsString(sambaPassword), oharastream.ohara.connector.smb.SMB_SHARE_NAME_KEY -> JsString(sambaShare) ) protected def setupInputData(timeout: Duration): (String, Long, Long) = { val client = sambaClient() try { if (!client.exists(csvOutputFolder)) PerformanceTestingUtils.createFolder(client, csvOutputFolder) val result = generateData( numberOfRowsToFlush, timeout, (rows: Seq[Row]) => { val file = s"$csvOutputFolder/${CommonUtils.randomString()}" val writer = new BufferedWriter(new OutputStreamWriter(client.create(file))) val count = new LongAdder() val sizeInBytes = new LongAdder() try { val cellNames: Set[String] = rows.head.cells().asScala.map(_.name).toSet writer .append(cellNames.mkString(",")) .append("\n") rows.foreach(row => { val content = row.cells().asScala.map(_.value).mkString(",") count.increment() sizeInBytes.add(content.length) writer .append(content) .append("\n") }) (count.longValue(), sizeInBytes.longValue()) } finally Releasable.close(writer) } ) (csvOutputFolder, result._1, result._2) } finally Releasable.close(client) } protected[this] def sambaClient(): FileSystem = FileSystem.smbBuilder .hostname(sambaHostname) .port(sambaPort) .user(sambaUsername) .password(sambaPassword) .shareName(sambaShare) .build() }
Example 29
Source File: TestHdfsFileSystem.scala From ohara with Apache License 2.0 | 5 votes |
package oharastream.ohara.client.filesystem.hdfs import java.io.{BufferedWriter, File, OutputStreamWriter} import java.nio.charset.StandardCharsets import oharastream.ohara.client.filesystem.{FileFilter, FileSystem, FileSystemTestBase} import oharastream.ohara.common.exception.FileSystemException import oharastream.ohara.common.util.CommonUtils import org.junit.Test import org.scalatest.matchers.should.Matchers._ class TestHdfsFileSystem extends FileSystemTestBase { private[this] val tempFolder: File = CommonUtils.createTempFolder("local_hdfs") private[this] val hdfsURL: String = new File(tempFolder.getAbsolutePath).toURI.toString override protected val fileSystem: FileSystem = FileSystem.hdfsBuilder.url(hdfsURL).build override protected val rootDir: String = tempFolder.toString // override this method because the Local HDFS doesn't support append() @Test override def testAppend(): Unit = { val file = randomFile() fileSystem.create(file).close() intercept[FileSystemException] { fileSystem.append(file) }.getMessage shouldBe "Not supported" } // override this method because the Local HDFS doesn't support append() @Test override def testDeleteFileThatHaveBeenRead(): Unit = { val file = randomFile(rootDir) val data: Seq[String] = Seq("123", "456") val writer = new BufferedWriter(new OutputStreamWriter(fileSystem.create(file), StandardCharsets.UTF_8)) try data.foreach(line => { writer.append(line) writer.newLine() }) finally writer.close() fileSystem.exists(file) shouldBe true fileSystem.readLines(file) shouldBe data fileSystem.delete(file) fileSystem.exists(file) shouldBe false fileSystem.listFileNames(rootDir, FileFilter.EMPTY).size shouldBe 0 } }
Example 30
Source File: ProxyServer.scala From devbox with Apache License 2.0 | 5 votes |
package cmdproxy import java.io.BufferedReader import java.io.InputStreamReader import java.io.OutputStreamWriter import java.io.PrintWriter import java.net.InetAddress import java.net.ServerSocket import java.net.Socket import scala.util.Using import devbox.logger.FileLogger import os.RelPath import ujson.ParseException import upickle.default.{macroRW, ReadWriter} case class Request(workingDir: String, cmd: Seq[String]) object Request { implicit val rw: ReadWriter[Request] = macroRW } val localDir: Map[os.RelPath, os.Path] = dirMapping.map(_.swap).toMap def start(): Unit = { logger.info(s"Starting command proxy server, listening at ${socket.getInetAddress}:${socket.getLocalPort}") (new Thread("Git Proxy Thread") { override def run(): Unit = { while (!socket.isClosed) { Using(socket.accept()) { handleConnection } recover { case e: Exception => logger.error(s"Error handling request ${e.getMessage}") case e: java.net.SocketException if e.getMessage == "Socket closed" => logger.error(s"Git proxy socket closed") } } } }).start() } def handleConnection(conn: Socket): Unit = try { logger.info(s"Accepting connection from ${conn.getInetAddress}") val in = new BufferedReader(new InputStreamReader(conn.getInputStream, ProxyServer.CHARSET_NAME)) val out = new PrintWriter(new OutputStreamWriter(conn.getOutputStream, ProxyServer.CHARSET_NAME)) upickle.default.read[Request](in.readLine()) match { case Request(dir, args) => val workingDir = localDir .collect{case (remote, local) if RelPath(dir).startsWith(remote) => local / RelPath(dir).relativeTo(remote) } .head // being cautious here and only execute "git" commands if (args.headOption.exists((_ == "git"))) { logger.info(s"Executing `${args.mkString(" ")}` in $workingDir") val proc = os.proc(args).call( workingDir, mergeErrIntoOut = true, stdout = os.ProcessOutput.Readlines(str => out.println(upickle.default.write(Left[String, Int](str))) ), check = false, timeout = 10000 ) out.println(upickle.default.write(Right[String, Int](proc.exitCode))) } else { val msg = s"Not executing non-git commend: `${args.mkString(" ")}`." logger.info(msg) out.println(upickle.default.write(Right[String, Int](1))) } out.flush() } } catch { case e: ParseException => logger.error(s"Error parsing incoming json request: ${e.getMessage}") } } object ProxyServer { val DEFAULT_PORT = 20280 val CHARSET_NAME = "UTF-8" }
Example 31
Source File: GraphLoaderSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import java.io.File import java.io.FileOutputStream import java.io.OutputStreamWriter import java.nio.charset.StandardCharsets import org.apache.spark.SparkFunSuite import org.apache.spark.util.Utils class GraphLoaderSuite extends SparkFunSuite with LocalSparkContext { test("GraphLoader.edgeListFile") { withSpark { sc => val tmpDir = Utils.createTempDir() val graphFile = new File(tmpDir.getAbsolutePath, "graph.txt") val writer = new OutputStreamWriter(new FileOutputStream(graphFile), StandardCharsets.UTF_8) for (i <- (1 until 101)) writer.write(s"$i 0\n") writer.close() try { val graph = GraphLoader.edgeListFile(sc, tmpDir.getAbsolutePath) val neighborAttrSums = graph.aggregateMessages[Int]( ctx => ctx.sendToDst(ctx.srcAttr), _ + _) assert(neighborAttrSums.collect.toSet === Set((0: VertexId, 100))) } finally { Utils.deleteRecursively(tmpDir) } } } }
Example 32
Source File: SparkILoop.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.repl import java.io.BufferedReader import scala.Predef.{println => _, _} import scala.tools.nsc.Settings import scala.tools.nsc.interpreter.{ILoop, JPrintWriter} import scala.tools.nsc.util.stringFromStream import scala.util.Properties.{javaVersion, javaVmName, versionString} def run(code: String, sets: Settings = new Settings): String = { import java.io.{ BufferedReader, StringReader, OutputStreamWriter } stringFromStream { ostream => Console.withOut(ostream) { val input = new BufferedReader(new StringReader(code)) val output = new JPrintWriter(new OutputStreamWriter(ostream), true) val repl = new SparkILoop(input, output) if (sets.classpath.isDefault) { sets.classpath.value = sys.props("java.class.path") } repl process sets } } } def run(lines: List[String]): String = run(lines.map(_ + "\n").mkString) }
Example 33
Source File: PLYReadWriteTests.scala From scalismo-faces with Apache License 2.0 | 5 votes |
package scalismo.faces.io import java.io.{ByteArrayInputStream, ByteArrayOutputStream, OutputStreamWriter} import java.nio.ByteOrder import java.util.Scanner import scalismo.faces.FacesTestSuite import scalismo.faces.io.ply._ class PLYReadWriteTests extends FacesTestSuite { describe("Write-read cycles to string, big- and little endian") { def testRWEndianCycle[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A], bo: ByteOrder): Unit = { val N = toWrite.size val os = new ByteArrayOutputStream() val writer = new SequenceWriter[A] writer.write(toWrite, os, bo) val ba = os.toByteArray val is = new ByteArrayInputStream(ba) val reader = new FixedLengthSequenceReader[A] val read = reader.read(N, is, bo) read.zip(toWrite).foreach { p => p._1 shouldBe p._2 } } def testRWStringCycle[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A]): Unit = { val N = toWrite.size val os = new ByteArrayOutputStream() val osw = new OutputStreamWriter(os) val writer = new SequenceWriter[A] writer.write(toWrite, osw) osw.flush() val is = new ByteArrayInputStream(os.toByteArray) val isr = new Scanner(is) val reader = new FixedLengthSequenceReader[A] val read = reader.read(N, isr) read.zip(toWrite).foreach { p => p._1 shouldBe p._2 } } def testAllThreeCycles[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A]): Unit = { testRWStringCycle(toWrite) testRWEndianCycle(toWrite, ByteOrder.BIG_ENDIAN) testRWEndianCycle(toWrite, ByteOrder.LITTLE_ENDIAN) } it("should result in the same sequence of bytes") { val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toByte testAllThreeCycles(toWrite) } it("should result in the same sequence of char") { val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toChar testAllThreeCycles(toWrite) } it("should result in the same sequence of short") { val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toShort testAllThreeCycles(toWrite) } it("should result in the same sequence of int") { val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toInt testAllThreeCycles(toWrite) } it("should result in the same sequence of long") { val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toLong testAllThreeCycles(toWrite) } it("should result in the same sequence of float") { val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toFloat testAllThreeCycles(toWrite) } it("should result in the same sequence of double") { val toWrite = for (i <- 0 until 20) yield (randomDouble * 255) testAllThreeCycles(toWrite) } } }
Example 34
Source File: CsvParserFactory.scala From spark-cdm with MIT License | 5 votes |
package com.microsoft.cdm.utils import java.io.OutputStreamWriter import com.univocity.parsers.csv.{CsvParser, CsvParserSettings, CsvWriter, CsvWriterSettings} object CsvParserFactory { def build(): CsvParser = { val settings = new CsvParserSettings() val format = settings.getFormat format.setDelimiter(',') settings.setMaxCharsPerColumn(500000) settings.setMaxColumns(512 * 4) new CsvParser(settings) } def buildWriter(outputWriter: OutputStreamWriter): CsvWriter = { val settings = new CsvWriterSettings() settings.setQuoteAllFields(true); new CsvWriter(outputWriter, settings) } }
Example 35
Source File: Pathway.scala From piflow with BSD 2-Clause "Simplified" License | 5 votes |
package cn.piflow.bundle.microorganism import java.io.{BufferedReader, InputStreamReader, OutputStreamWriter} import cn.piflow.{JobContext, JobInputStream, JobOutputStream, ProcessContext} import cn.piflow.conf.{ConfigurableStop, Port, StopGroup} import cn.piflow.conf.bean.PropertyDescriptor import cn.piflow.conf.util.{ImageUtil, MapUtil} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FSDataInputStream, FSDataOutputStream, FileSystem, Path} import org.apache.spark.sql.{DataFrame, SparkSession} import org.json.JSONObject class Pathway extends ConfigurableStop{ override val authorEmail: String = "[email protected]" override val description: String = "Parse Pathway data" override val inportList: List[String] =List(Port.DefaultPort.toString) override val outportList: List[String] = List(Port.DefaultPort.toString) var cachePath:String = _ def setProperties(map: Map[String, Any]): Unit = { cachePath=MapUtil.get(map,key="cachePath").asInstanceOf[String] } override def getPropertyDescriptor(): List[PropertyDescriptor] = { var descriptor : List[PropertyDescriptor] = List() val cachePath = new PropertyDescriptor().name("cachePath").displayName("cachePath").description("Temporary Cache File Path") .defaultValue("/pathway").required(true) descriptor = cachePath :: descriptor descriptor } override def getIcon(): Array[Byte] = { ImageUtil.getImage("icon/microorganism/Pathway.png") } override def getGroup(): List[String] = { List(StopGroup.MicroorganismGroup) } override def initialize(ctx: ProcessContext): Unit = { } override def perform(in: JobInputStream, out: JobOutputStream, pec: JobContext): Unit = { val inDf: DataFrame = in.read() var pathStr: String =inDf.take(1)(0).get(0).asInstanceOf[String] val configuration: Configuration = new Configuration() val pathARR: Array[String] = pathStr.split("\\/") var hdfsUrl:String="" for (x <- (0 until 3)){ hdfsUrl+=(pathARR(x) +"/") } configuration.set("fs.defaultFS",hdfsUrl) var fs: FileSystem = FileSystem.get(configuration) val hdfsPathTemporary = hdfsUrl+cachePath+"/pathwayCache/pathwayCache.json" val path: Path = new Path(hdfsPathTemporary) if(fs.exists(path)){ fs.delete(path) } fs.create(path).close() val hdfsWriter: OutputStreamWriter = new OutputStreamWriter(fs.append(path)) var fdis: FSDataInputStream = null var br: BufferedReader = null var doc: JSONObject = null var hasAnotherSequence:Boolean = true inDf.collect().foreach(row => { pathStr = row.get(0).asInstanceOf[String] fdis = fs.open(new Path(pathStr)) br = new BufferedReader(new InputStreamReader(fdis)) var count = 0 while (hasAnotherSequence) { count += 1 doc = new JSONObject hasAnotherSequence = util.KeggPathway.process(br, doc) doc.write(hdfsWriter) hdfsWriter.write("\n") } br.close() fdis.close() }) hdfsWriter.close() val df: DataFrame = pec.get[SparkSession]().read.json(hdfsPathTemporary) df.schema.printTreeString() println(df.count) out.write(df) } }
Example 36
Source File: MNIST.scala From spark-tsne with Apache License 2.0 | 5 votes |
package com.github.saurfang.spark.tsne.examples import java.io.{BufferedWriter, OutputStreamWriter} import com.github.saurfang.spark.tsne.impl._ import com.github.saurfang.spark.tsne.tree.SPTree import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.linalg.distributed.RowMatrix import org.apache.spark.{SparkConf, SparkContext} import org.slf4j.LoggerFactory object MNIST { private def logger = LoggerFactory.getLogger(MNIST.getClass) def main (args: Array[String]) { val conf = new SparkConf() .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") .registerKryoClasses(Array(classOf[SPTree])) val sc = new SparkContext(conf) val hadoopConf = sc.hadoopConfiguration val fs = FileSystem.get(hadoopConf) val dataset = sc.textFile("data/MNIST/mnist.csv.gz") .zipWithIndex() .filter(_._2 < 6000) .sortBy(_._2, true, 60) .map(_._1) .map(_.split(",")) .map(x => (x.head.toInt, x.tail.map(_.toDouble))) .cache() //logInfo(dataset.collect.map(_._2.toList).toList.toString) //val features = dataset.map(x => Vectors.dense(x._2)) //val scaler = new StandardScaler(true, true).fit(features) //val scaledData = scaler.transform(features) // .map(v => Vectors.dense(v.toArray.map(x => if(x.isNaN || x.isInfinite) 0.0 else x))) // .cache() val data = dataset.flatMap(_._2) val mean = data.mean() val std = data.stdev() val scaledData = dataset.map(x => Vectors.dense(x._2.map(v => (v - mean) / std))).cache() val labels = dataset.map(_._1).collect() val matrix = new RowMatrix(scaledData) val pcaMatrix = matrix.multiply(matrix.computePrincipalComponents(50)) pcaMatrix.rows.cache() val costWriter = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(s".tmp/MNIST/cost.txt"), true))) //SimpleTSNE.tsne(pcaMatrix, perplexity = 20, maxIterations = 200) BHTSNE.tsne(pcaMatrix, maxIterations = 500, callback = { //LBFGSTSNE.tsne(pcaMatrix, perplexity = 10, maxNumIterations = 500, numCorrections = 10, convergenceTol = 1e-8) case (i, y, loss) => if(loss.isDefined) logger.info(s"$i iteration finished with loss $loss") val os = fs.create(new Path(s".tmp/MNIST/result${"%05d".format(i)}.csv"), true) val writer = new BufferedWriter(new OutputStreamWriter(os)) try { (0 until y.rows).foreach { row => writer.write(labels(row).toString) writer.write(y(row, ::).inner.toArray.mkString(",", ",", "\n")) } if(loss.isDefined) costWriter.write(loss.get + "\n") } finally { writer.close() } }) costWriter.close() sc.stop() } }
Example 37
Source File: CodecStreams.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import java.io.{InputStream, OutputStream, OutputStreamWriter} import java.nio.charset.{Charset, StandardCharsets} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.hadoop.io.compress._ import org.apache.hadoop.mapreduce.JobContext import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat import org.apache.hadoop.util.ReflectionUtils import org.apache.spark.TaskContext object CodecStreams { private def getDecompressionCodec(config: Configuration, file: Path): Option[CompressionCodec] = { val compressionCodecs = new CompressionCodecFactory(config) Option(compressionCodecs.getCodec(file)) } def createInputStream(config: Configuration, file: Path): InputStream = { val fs = file.getFileSystem(config) val inputStream: InputStream = fs.open(file) getDecompressionCodec(config, file) .map(codec => codec.createInputStream(inputStream)) .getOrElse(inputStream) } def getCompressionExtension(context: JobContext): String = { getCompressionCodec(context) .map(_.getDefaultExtension) .getOrElse("") } }
Example 38
Source File: StreamMetadata.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import java.io.{InputStreamReader, OutputStreamWriter} import java.nio.charset.StandardCharsets import java.util.ConcurrentModificationException import scala.util.control.NonFatal import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileAlreadyExistsException, FSDataInputStream, Path} import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.apache.spark.internal.Logging import org.apache.spark.sql.execution.streaming.CheckpointFileManager.CancellableFSDataOutputStream import org.apache.spark.sql.streaming.StreamingQuery def write( metadata: StreamMetadata, metadataFile: Path, hadoopConf: Configuration): Unit = { var output: CancellableFSDataOutputStream = null try { val fileManager = CheckpointFileManager.create(metadataFile.getParent, hadoopConf) output = fileManager.createAtomic(metadataFile, overwriteIfPossible = false) val writer = new OutputStreamWriter(output) Serialization.write(metadata, writer) writer.close() } catch { case e: FileAlreadyExistsException => if (output != null) { output.cancel() } throw new ConcurrentModificationException( s"Multiple streaming queries are concurrently using $metadataFile", e) case e: Throwable => if (output != null) { output.cancel() } logError(s"Error writing stream metadata $metadata to $metadataFile", e) throw e } } }
Example 39
Source File: InteractiveChecksumLogger.scala From coursier with Apache License 2.0 | 5 votes |
package coursier.publish.checksum.logger import java.io.{OutputStream, OutputStreamWriter, Writer} import coursier.publish.checksum.ChecksumType import coursier.publish.fileset.FileSet import coursier.publish.logging.ProgressLogger final class InteractiveChecksumLogger(out: Writer, verbosity: Int) extends ChecksumLogger { private val underlying = new ProgressLogger[Object]( "Computed", "checksums", out ) override def computingSet(id: Object, fs: FileSet): Unit = underlying.processingSet(id, Some(fs.elements.length)) override def computing(id: Object, type0: ChecksumType, path: String): Unit = { if (verbosity >= 2) out.write(s"Computing ${type0.name} checksum of ${path.repr}\n") underlying.processing(path, id) } override def computed(id: Object, type0: ChecksumType, path: String, errorOpt: Option[Throwable]): Unit = { if (verbosity >= 2) out.write(s"Computed ${type0.name} checksum of ${path.repr}\n") underlying.processed(path, id, errorOpt.nonEmpty) } override def computedSet(id: Object, fs: FileSet): Unit = underlying.processedSet(id) override def start(): Unit = underlying.start() override def stop(keep: Boolean): Unit = underlying.stop(keep) } object InteractiveChecksumLogger { def create(out: OutputStream, verbosity: Int): InteractiveChecksumLogger = new InteractiveChecksumLogger(new OutputStreamWriter(out), verbosity) }
Example 40
Source File: MessageWriter.scala From lsp4s with Apache License 2.0 | 5 votes |
package scala.meta.jsonrpc import java.io.ByteArrayOutputStream import java.io.OutputStream import java.io.OutputStreamWriter import java.io.PrintWriter import java.nio.ByteBuffer import java.nio.charset.StandardCharsets import scala.concurrent.Future import io.circe.syntax._ import monix.execution.Ack import monix.reactive.Observer import scribe.LoggerSupport def write(msg: Message): Future[Ack] = lock.synchronized { baos.reset() val json = msg.asJson val protocol = BaseProtocolMessage.fromJson(json) logger.trace(s" --> $json") val byteBuffer = MessageWriter.write(protocol, baos, headerOut) out.onNext(byteBuffer) } } object MessageWriter { def headerWriter(out: OutputStream): PrintWriter = { new PrintWriter(new OutputStreamWriter(out, StandardCharsets.US_ASCII)) } def write(message: BaseProtocolMessage): ByteBuffer = { val out = new ByteArrayOutputStream() val header = headerWriter(out) write(message, out, header) } def write( message: BaseProtocolMessage, out: ByteArrayOutputStream, headerOut: PrintWriter ): ByteBuffer = { message.header.foreach { case (key, value) => headerOut.write(key) headerOut.write(": ") headerOut.write(value) headerOut.write("\r\n") } headerOut.write("\r\n") out.write(message.content) out.flush() val buffer = ByteBuffer.wrap(out.toByteArray, 0, out.size()) buffer } }
Example 41
Source File: package.scala From sjson-new with Apache License 2.0 | 5 votes |
package sjsonnew package support.scalajson import java.io.{ByteArrayOutputStream, OutputStreamWriter} import shaded.scalajson.ast.unsafe._ import org.scalactic._ package object unsafe { implicit class AnyOps[A: JsonWriter](val _x: A) { def toJson: JValue = Converter toJsonUnsafe _x def toJsonStr: String = _x.toJson.toJsonStr } implicit class AnyOps2[A: JsonWriter : JsonReader](val _x: A) { def jsonRoundTrip: A = _x.toJson.toJsonStr.toJson.fromJson[A] def jsonPrettyRoundTrip: A = _x.toJson.toPrettyStr.toJson.fromJson[A] def jsonBinaryRoundTrip: A = _x.toJson.toBinary.toJson.fromJson[A] } implicit class JValueOps(val _j: JValue) extends AnyVal { def toJsonStr: String = CompactPrinter(_j) def toPrettyStr: String = PrettyPrinter(_j) def toBinary: Array[Byte] = { val baos = new ByteArrayOutputStream() val xpto = new OutputStreamWriter(baos) CompactPrinter.print(_j, xpto) xpto.close() baos.toByteArray } def fromJson[A: JsonReader]: A = Converter.fromJsonUnsafe[A](_j) // scalajson.ast.unsafe doesn't have good toStrings def to_s: String = _j match { case JNull => "JNull" case JString(value) => s"JString($value)" case JNumber(value) => s"JNumber($value)" case JTrue => "JTrue" case JFalse => "JFalse" case JObject(value) => value.iterator map (f => s"${f.field}: ${f.value.to_s}") mkString ("JObject(", ", ", ")") case JArray(value) => value.iterator map (_.to_s) mkString ("JArray(", ", ", ")") } } implicit class StringOps(val _s: String) extends AnyVal { def toJson: JValue = Parser parseUnsafe _s def fromJsonStr[A: JsonReader]: A = _s.toJson.fromJson[A] } implicit class ByteArrayOps(val a: Array[Byte]) extends AnyVal { def toJson: JValue = Parser.parseFromByteBuffer(java.nio.ByteBuffer.wrap(a)).get } // Can't trust unsafe's toString, eg. JObject doesn't nicely toString its fields array, so its toString sucks implicit val altPrettifier: Prettifier = Prettifier { case j: JValue => j.to_s case x => Prettifier default x } }
Example 42
Source File: GraphLoaderSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx import java.io.File import java.io.FileOutputStream import java.io.OutputStreamWriter import java.nio.charset.StandardCharsets import org.apache.spark.SparkFunSuite import org.apache.spark.util.Utils class GraphLoaderSuite extends SparkFunSuite with LocalSparkContext { test("GraphLoader.edgeListFile") { withSpark { sc => val tmpDir = Utils.createTempDir() val graphFile = new File(tmpDir.getAbsolutePath, "graph.txt") val writer = new OutputStreamWriter(new FileOutputStream(graphFile), StandardCharsets.UTF_8) for (i <- (1 until 101)) writer.write(s"$i 0\n") writer.close() try { val graph = GraphLoader.edgeListFile(sc, tmpDir.getAbsolutePath) val neighborAttrSums = graph.aggregateMessages[Int]( ctx => ctx.sendToDst(ctx.srcAttr), _ + _) assert(neighborAttrSums.collect.toSet === Set((0: VertexId, 100))) } finally { Utils.deleteRecursively(tmpDir) } } } }
Example 43
Source File: StreamMetadata.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import java.io.{InputStreamReader, OutputStreamWriter} import java.nio.charset.StandardCharsets import scala.util.control.NonFatal import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, FSDataOutputStream, Path} import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.apache.spark.internal.Logging import org.apache.spark.sql.streaming.StreamingQuery def write( metadata: StreamMetadata, metadataFile: Path, hadoopConf: Configuration): Unit = { var output: FSDataOutputStream = null try { val fs = FileSystem.get(hadoopConf) output = fs.create(metadataFile) val writer = new OutputStreamWriter(output) Serialization.write(metadata, writer) writer.close() } catch { case NonFatal(e) => logError(s"Error writing stream metadata $metadata to $metadataFile", e) throw e } finally { IOUtils.closeQuietly(output) } } }
Example 44
Source File: TestDiskFull.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.eidos.utils import java.io.BufferedOutputStream import java.io.FileOutputStream import java.io.IOException import java.io.OutputStreamWriter import java.io.PrintWriter import java.io.SyncFailedException import java.nio.charset.StandardCharsets import org.clulab.wm.eidos.test.TestUtils._ import org.clulab.wm.eidos.utils.Closer.AutoCloser class TestDiskFull extends Test { def test1 = { val file = "/E:/full.dat" var i = 0 try { val text1 = "The quick brown fox jumped over the lazy dog." val text = text1 + text1 for (limit <- 1 until 400) { val fos = new FileOutputStream(file) val osw = new OutputStreamWriter(new BufferedOutputStream(fos), StandardCharsets.UTF_8.toString) i = 0 new PrintWriter(osw).autoClose { pw => while (i < limit) { pw.print(text) i += 1 // pw.flush() // osw.flush() // fos.flush() fos.getFD.sync() } } } } catch { case exception: SyncFailedException => println(s"Synchronization failed for file $file at $i") println("Exiting with code -2 on assumption that the disk is full") System.exit(-2) case exception: IOException => println(s"IO failed for file $file at $i") println("Exiting with code -2 on assumption that the disk is full") System.exit(-2) case exception: Exception => println(s"Exception for file $file at $i") exception.printStackTrace() case throwable: Throwable => println(s"Throwable for file $file at $i") throwable.printStackTrace() } } // test1 }
Example 45
Source File: SessionDataFileHDFSWriter.scala From spark_training with Apache License 2.0 | 5 votes |
package com.malaska.spark.training.streaming.dstream.sessionization import java.io.BufferedWriter import java.io.FileWriter import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.conf.Configuration import java.io.OutputStreamWriter import org.apache.hadoop.fs.Path import java.util.Random object SessionDataFileHDFSWriter { val eol = System.getProperty("line.separator"); def main(args: Array[String]) { if (args.length == 0) { println("SessionDataFileWriter {tempDir} {distDir} {numberOfFiles} {numberOfEventsPerFile} {waitBetweenFiles}"); return; } val conf = new Configuration conf.addResource(new Path("/etc/hadoop/conf/core-site.xml")) conf.addResource(new Path("/etc/hadoop/conf/mapred-site.xml")) conf.addResource(new Path("/etc/hadoop/conf/hdfs-site.xml")) val fs = FileSystem.get(new Configuration) val rootTempDir = args(0) val rootDistDir = args(1) val files = args(2).toInt val loops = args(3).toInt val waitBetweenFiles = args(4).toInt val r = new Random for (f <- 1 to files) { val rootName = "/weblog." + System.currentTimeMillis() val tmpPath = new Path(rootTempDir + rootName + ".tmp") val writer = new BufferedWriter(new OutputStreamWriter(fs.create(tmpPath))) print(f + ": [") val randomLoops = loops + r.nextInt(loops) for (i <- 1 to randomLoops) { writer.write(SessionDataGenerator.getNextEvent + eol) if (i%100 == 0) { print(".") } } println("]") writer.close val distPath = new Path(rootDistDir + rootName + ".dat") fs.rename(tmpPath, distPath) Thread.sleep(waitBetweenFiles) } println("Done") } }
Example 46
Source File: MustacheTemplates.scala From fintrospect with Apache License 2.0 | 5 votes |
package io.fintrospect.templating import java.io.{ByteArrayOutputStream, File, OutputStreamWriter} import java.nio.charset.StandardCharsets import com.github.mustachejava.resolver.{DefaultResolver, FileSystemResolver} import com.github.mustachejava.{DefaultMustacheFactory, Mustache} import com.twitter.io.Buf object MustacheTemplates extends Templates { private def render(view: View, mustache: Mustache): Buf = { val outputStream = new ByteArrayOutputStream(4096) val writer = new OutputStreamWriter(outputStream, StandardCharsets.UTF_8) try { mustache.execute(writer, view) } finally { writer.close() } Buf.ByteArray.Owned(outputStream.toByteArray) } def CachingClasspath(baseClasspathPackage: String = "."): TemplateRenderer = new TemplateRenderer { private val factory = new DefaultMustacheFactory(new DefaultResolver(baseClasspathPackage)) { setObjectHandler(new ScalaObjectHandler) } def toBuf(view: View): Buf = render(view, factory.compile(view.template + ".mustache")) } def Caching(baseTemplateDir: String): TemplateRenderer = new TemplateRenderer { private val factory = new DefaultMustacheFactory(new FileSystemResolver(new File(baseTemplateDir))) { setObjectHandler(new ScalaObjectHandler) } def toBuf(view: View): Buf = render(view, factory.compile(view.template + ".mustache")) } def HotReload(baseTemplateDir: String = "."): TemplateRenderer = new TemplateRenderer { class WipeableMustacheFactory extends DefaultMustacheFactory(new FileSystemResolver(new File(baseTemplateDir))) { setObjectHandler(new ScalaObjectHandler) } def toBuf(view: View): Buf = render(view, new WipeableMustacheFactory().compile(view.template + ".mustache")) } }
Example 47
Source File: CommonLog.scala From AppCrawler with Apache License 2.0 | 5 votes |
package com.testerhome.appcrawler import java.io.OutputStreamWriter import com.fasterxml.jackson.annotation.JsonIgnore import org.apache.log4j._ trait CommonLog { BasicConfigurator.configure() Logger.getRootLogger.setLevel(Level.INFO) @JsonIgnore val layout=new PatternLayout("%d{yyyy-MM-dd HH:mm:ss} %p [%c{1}.%M.%L] %m%n") @JsonIgnore lazy val log = initLog() def initLog(): Logger ={ val log = Logger.getLogger(this.getClass.getName) //val log=Logger.getRootLogger if(log.getAppender("console")==null){ val console=new ConsoleAppender() console.setName("console") console.setWriter(new OutputStreamWriter(System.out)) console.setLayout(layout) log.addAppender(console) }else{ log.info("alread exist") } log.trace(s"set ${this} log level to ${GA.logLevel}") log.setLevel(GA.logLevel) log.setAdditivity(false) log } }
Example 48
Source File: StarsAnalysisDemo.scala From CkoocNLP with Apache License 2.0 | 5 votes |
package applications.analysis import java.io.{BufferedWriter, FileOutputStream, OutputStreamWriter} import functions.segment.Segmenter import org.apache.log4j.{Level, Logger} import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, Row, SparkSession} object StarsAnalysisDemo { def main(args: Array[String]) { Logger.getLogger("org").setLevel(Level.WARN) val spark = SparkSession .builder .master("local[2]") .appName("Stars Analysis Demo") .getOrCreate() val filePath = "E:/data/chinaNews/entertainment.txt" // 加载数据,并保留年份和内容字段,并对内容字段进行过滤 import spark.implicits._ val data = spark.sparkContext.textFile(filePath).flatMap { line => val tokens: Array[String] = line.split("\u00ef") if (tokens.length > 3) { var year: String = tokens(2).split("-")(0) if (tokens(2).contains("年")) year = tokens(2).split("年")(0) var content = tokens(3) if (content.length > 22 && content.substring(0, 20).contains("日电")) { content = content.substring(content.indexOf("日电") + 2, content.length).trim } if (content.startsWith("(")) content = content.substring(content.indexOf(")") + 1, content.length) if (content.length > 20 && content.substring(content.length - 20, content.length).contains("记者")) { content = content.substring(0, content.lastIndexOf("记者")).trim } Some(year, content) } else None }.toDF("year", "content") // 分词,去除长度为1的词,每个词保留词性 val segmenter = new Segmenter() .isAddNature(true) .isDelEn(true) .isDelNum(true) .setMinTermLen(2) .setMinTermNum(5) .setSegType("StandardSegment") .setInputCol("content") .setOutputCol("segmented") val segDF: DataFrame = segmenter.transform(data) segDF.cache() val segRDD: RDD[(Int, Seq[String])] = segDF.select("year", "segmented").rdd.map { case Row(year: String, terms: Seq[String]) => (Integer.parseInt(year), terms) } val result: Array[String] = segRDD.map(line => line._1.toString + "\u00ef" + line._2.mkString(",")).collect() val writer: BufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("E:/entertainment_seg.txt"))) result.foreach(line => writer.write(line + "\n")) writer.close() // 统计2016出现在新闻中最多的明星 val stars2016 = segRDD.filter(_._1 == 2016) .flatMap { case (year: Int, termStr: Seq[String]) => val person = termStr .map(term => (term.split("/")(0), term.split("/")(1))) .filter(_._2.equalsIgnoreCase("nr")) .map(term => (term._1, 1L)) person } .reduceByKey(_ + _) .sortBy(_._2, ascending = false) segDF.unpersist() stars2016.take(100).foreach(println) spark.stop() } }
Example 49
Source File: AkkaHttpPrometheusExporter.scala From cloudstate with Apache License 2.0 | 5 votes |
package io.cloudstate.proxy import java.io.OutputStreamWriter import java.util import akka.actor.ActorSystem import akka.http.scaladsl.Http import io.prometheus.client.CollectorRegistry import akka.http.scaladsl.model._ import akka.http.scaladsl.server.Directives._ import akka.stream.Materializer import akka.util.ByteString import io.prometheus.client.exporter.common.TextFormat import scala.concurrent.Future class AkkaHttpPrometheusExporter(metricsPort: Int, registry: CollectorRegistry = CollectorRegistry.defaultRegistry)( implicit system: ActorSystem, mat: Materializer ) { private[this] final val PrometheusContentType = ContentType.parse(TextFormat.CONTENT_TYPE_004).right.get private def routes = get { (path("metrics") | pathSingleSlash) { encodeResponse { parameter(Symbol("name[]").*) { names => complete { val namesSet = new util.HashSet[String]() names.foreach(namesSet.add) val builder = ByteString.newBuilder val writer = new OutputStreamWriter(builder.asOutputStream) TextFormat.write004(writer, registry.filteredMetricFamilySamples(namesSet)) // Very important to flush the writer before we build the byte string! writer.flush() HttpEntity(PrometheusContentType, builder.result()) } } } } } def start(): Future[Http.ServerBinding] = Http().bindAndHandle(routes, "0.0.0.0", metricsPort) }
Example 50
Source File: InteractiveSonatypeLogger.scala From coursier with Apache License 2.0 | 5 votes |
package coursier.publish.sonatype.logger import java.io.{OutputStream, OutputStreamWriter} import coursier.cache.internal.Terminal.Ansi final class InteractiveSonatypeLogger(out: OutputStreamWriter, verbosity: Int) extends SonatypeLogger { override def listingProfiles(attempt: Int, total: Int): Unit = if (verbosity >= 0) { val extra = if (attempt == 0) "" else s" (attempt $attempt / $total)" out.write("Listing Sonatype profiles..." + extra) out.flush() } override def listedProfiles(errorOpt: Option[Throwable]): Unit = { if (verbosity >= 0) { out.clearLine(2) out.write('\n') out.up(1) out.flush() } val msgOpt = if (errorOpt.isEmpty) { if (verbosity >= 1) Some("Listed Sonatype profiles") else None } else Some("Fail to list Sonatype profiles") for (msg <- msgOpt) { out.write(s"$msg\n") out.flush() } } } object InteractiveSonatypeLogger { def create(out: OutputStream, verbosity: Int): SonatypeLogger = new InteractiveSonatypeLogger(new OutputStreamWriter(out), verbosity) }
Example 51
Source File: FileSystem.scala From ohara with Apache License 2.0 | 4 votes |
package oharastream.ohara.client.filesystem import java.io.{BufferedReader, BufferedWriter, IOException, InputStreamReader, OutputStreamWriter} import java.nio.charset.{Charset, StandardCharsets} import oharastream.ohara.client.filesystem.ftp.FtpFileSystem import oharastream.ohara.client.filesystem.hdfs.HdfsFileSystem import oharastream.ohara.client.filesystem.smb.SmbFileSystem import oharastream.ohara.common.exception.FileSystemException trait FileSystem extends oharastream.ohara.kafka.connector.storage.FileSystem { def readLines(path: String, encode: String = "UTF-8"): Array[String] = { val reader = new BufferedReader(new InputStreamReader(open(path), Charset.forName(encode))) try Iterator.continually(reader.readLine()).takeWhile(_ != null).toArray finally reader.close() } def wrap[T](f: () => T): T = try { f() } catch { case e: IOException => throw new FileSystemException(e.getMessage, e) case e: IllegalStateException => throw new FileSystemException(e.getMessage, e) } } object FileSystem { def hdfsBuilder: HdfsFileSystem.Builder = HdfsFileSystem.builder def ftpBuilder: FtpFileSystem.Builder = FtpFileSystem.builder def smbBuilder: SmbFileSystem.Builder = SmbFileSystem.builder }