java.io.BufferedWriter Scala Examples
The following examples show how to use java.io.BufferedWriter.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: BasicTestPerformance4Ftp.scala From ohara with Apache License 2.0 | 6 votes |
package oharastream.ohara.it.performance import java.io.{BufferedWriter, OutputStreamWriter} import java.util.concurrent.atomic.LongAdder import oharastream.ohara.common.data.Row import oharastream.ohara.common.util.{CommonUtils, Releasable} import org.junit.AssumptionViolatedException import spray.json.{JsNumber, JsString, JsValue} import scala.jdk.CollectionConverters._ import oharastream.ohara.client.filesystem.FileSystem import scala.concurrent.duration.Duration abstract class BasicTestPerformance4Ftp extends BasicTestPerformance { private[this] val ftpHostname = value(PerformanceTestingUtils.FTP_HOSTNAME_KEY) .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_HOSTNAME_KEY} is required")) private[this] val ftpPort = value(PerformanceTestingUtils.FTP_PORT_KEY) .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_PORT_KEY} is required")) .toInt private[this] val ftpUser = value(PerformanceTestingUtils.FTP_USER_KEY) .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_USER_KEY} is required")) private[this] val ftpPassword = value(PerformanceTestingUtils.FTP_PASSWORD_KEY) .getOrElse(throw new AssumptionViolatedException(s"${PerformanceTestingUtils.FTP_PASSWORD_KEY} is required")) protected val ftpSettings: Map[String, JsValue] = Map( // convert the hostname to IP address oharastream.ohara.connector.ftp.FTP_HOSTNAME_KEY -> JsString(ftpHostname), oharastream.ohara.connector.ftp.FTP_PORT_KEY -> JsNumber(ftpPort), oharastream.ohara.connector.ftp.FTP_USER_NAME_KEY -> JsString(ftpUser), oharastream.ohara.connector.ftp.FTP_PASSWORD_KEY -> JsString(ftpPassword) ) private[this] val csvInputFolderKey = PerformanceTestingUtils.CSV_INPUT_KEY private[this] val csvOutputFolder: String = value(csvInputFolderKey).getOrElse("/input") private[this] val cleanupTestDataKey = PerformanceTestingUtils.DATA_CLEANUP_KEY protected val cleanupTestData: Boolean = value(cleanupTestDataKey).forall(_.toBoolean) protected def setupInputData(timeout: Duration): (String, Long, Long) = { val client = ftpClient() try { if (!PerformanceTestingUtils.exists(client, csvOutputFolder)) PerformanceTestingUtils.createFolder(client, csvOutputFolder) val result = generateData( numberOfRowsToFlush, timeout, (rows: Seq[Row]) => { val file = s"$csvOutputFolder/${CommonUtils.randomString()}" val writer = new BufferedWriter(new OutputStreamWriter(client.create(file))) val count = new LongAdder() val sizeInBytes = new LongAdder() try { val cellNames: Set[String] = rows.head.cells().asScala.map(_.name).toSet writer .append(cellNames.mkString(",")) .append("\n") rows.foreach(row => { val content = row.cells().asScala.map(_.value).mkString(",") count.increment() sizeInBytes.add(content.length) writer.append(content).append("\n") }) (count.longValue(), sizeInBytes.longValue()) } finally Releasable.close(writer) } ) (csvOutputFolder, result._1, result._2) } finally Releasable.close(client) } protected[this] def ftpClient() = FileSystem.ftpBuilder .hostname(ftpHostname) .port(ftpPort) .user(ftpUser) .password(ftpPassword) .build }
Example 2
Source File: LoadDistLabelKb.scala From rex with Apache License 2.0 | 5 votes |
package org.rex.dl import java.io.{BufferedWriter, File, FileWriter} import scala.io.Source import scala.util.Try object LoadDistLabelKb { import org.rex.dl.DistLabelStr._ type Triple = (Query, Answer, Label) def parseTripleFromLine(l: String): Triple = { val bits = l.split("\t") (bits(0), bits(1), bits(2)) } def apply(f: File): Try[KnowledgeBase] = Try { Source .fromFile(f) .getLines() .map(parseTripleFromLine) .foldLeft(Map.empty[Query, Map[Answer, Labels]]) { case (m, (q, a, r)) => if (m contains q) { val answersForQ = m(q) (m - q) + (q -> ( if (answersForQ contains a) (answersForQ - a) + (a -> (answersForQ(a) + r)) else answersForQ + (a -> Set(r)) )) } else m + (q -> Map(a -> Set(r))) } } def apply(kb: KnowledgeBase)(f: File): Try[Unit] = Try { val w = new BufferedWriter(new FileWriter(f)) try { kb.foreach { case (q, amap) => amap.foreach { case (a, labels) => labels.foreach { l => w.write(s"${writeTripleToLine(q, a, l)}\n") } } } } finally { w.close() } } @inline def writeTripleToLine(t: Triple): String = writeTripleToLine(t._1, t._2, t._3) @inline def writeTripleToLine(q: Query, a: Answer, l: Label): String = s"$q\t$q\t$l" }
Example 3
Source File: ParseCSVwithHTML.scala From CSYE7200_Old with MIT License | 5 votes |
package edu.neu.coe.csye7200.parse import java.io.{BufferedWriter, File, FileWriter} import scala.collection.mutable import scala.io.{BufferedSource, Source} import scala.util.{Failure, Success, Try} object ParseCSVwithHTML extends App { val parser = ParseCSVwithHTML(CsvParser(delimiter = '\t' + "")) val title = "Report" if (args.length > 0) { val filename = args.head val source: BufferedSource = Source.fromFile(filename, "UTF-16") val w = parser.parseStreamIntoHTMLTable(source.getLines.toStream, title) val file = new File("output.html") val bw = new BufferedWriter(new FileWriter(file)) bw.write(w) bw.close() println(s"Successfully written $file") } else System.err.println("syntax: ParseCSVwithHTML filename") }
Example 4
Source File: Using.scala From Argus-SAF with Apache License 2.0 | 5 votes |
package org.argus.jawa.core.compiler.compile.io import java.io.{Closeable, FileInputStream, FileOutputStream, InputStream, OutputStream, File => JavaFile} import java.io.{BufferedInputStream, BufferedOutputStream, InputStreamReader, OutputStreamWriter} import java.io.{BufferedReader, BufferedWriter} import java.util.zip.GZIPInputStream import java.net.URL import java.nio.channels.FileChannel import java.nio.charset.Charset import java.util.jar.{JarFile, JarInputStream, JarOutputStream} import java.util.zip.{GZIPOutputStream, ZipEntry, ZipFile, ZipInputStream, ZipOutputStream} import ErrorHandling.translate import scala.reflect.{Manifest => SManifest} abstract class Using[Source, T] { protected def open(src: Source): T def apply[R](src: Source)(f: T => R): R = { val resource = open(src) try { f(resource) } finally { close(resource) } } protected def close(out: T): Unit } abstract class WrapUsing[Source, T](implicit srcMf: SManifest[Source], targetMf: SManifest[T]) extends Using[Source, T] { protected def label[S](m: SManifest[S]): String = m.runtimeClass.getSimpleName protected def openImpl(source: Source): T protected final def open(source: Source): T = translate("Error wrapping " + label(srcMf) + " in " + label(targetMf) + ": ") { openImpl(source) } } trait OpenFile[T] extends Using[JavaFile, T] { protected def openImpl(file: JavaFile): T protected final def open(file: JavaFile): T = { val parent = file.getParentFile if(parent != null) IO.createDirectory(parent) openImpl(file) } } object Using { def wrap[Source, T<: Closeable](openF: Source => T)(implicit srcMf: SManifest[Source], targetMf: SManifest[T]): Using[Source,T] = wrap(openF, closeCloseable) def wrap[Source, T](openF: Source => T, closeF: T => Unit)(implicit srcMf: SManifest[Source], targetMf: SManifest[T]): Using[Source,T] = new WrapUsing[Source, T] { def openImpl(source: Source): T = openF(source) def close(t: T): Unit = closeF(t) } def resource[Source, T <: Closeable](openF: Source => T): Using[Source,T] = resource(openF, closeCloseable) def resource[Source, T](openF: Source => T, closeF: T => Unit): Using[Source,T] = new Using[Source,T] { def open(s: Source): T = openF(s) def close(s: T): Unit = closeF(s) } def file[T <: Closeable](openF: JavaFile => T): OpenFile[T] = file(openF, closeCloseable) def file[T](openF: JavaFile => T, closeF: T => Unit): OpenFile[T] = new OpenFile[T] { def openImpl(file: JavaFile): T = openF(file) def close(t: T): Unit = closeF(t) } private def closeCloseable[T <: Closeable]: T => Unit = _.close() def bufferedOutputStream: Using[OutputStream, BufferedOutputStream] = wrap((out: OutputStream) => new BufferedOutputStream(out) ) def bufferedInputStream: Using[InputStream, BufferedInputStream] = wrap((in: InputStream) => new BufferedInputStream(in) ) def fileOutputStream(append: Boolean = false): OpenFile[BufferedOutputStream] = file(f => new BufferedOutputStream(new FileOutputStream(f, append))) def fileInputStream: OpenFile[BufferedInputStream] = file(f => new BufferedInputStream(new FileInputStream(f))) def urlInputStream: Using[URL, BufferedInputStream] = resource((u: URL) => translate("Error opening " + u + ": ")(new BufferedInputStream(u.openStream))) def fileOutputChannel: OpenFile[FileChannel] = file(f => new FileOutputStream(f).getChannel) def fileInputChannel: OpenFile[FileChannel] = file(f => new FileInputStream(f).getChannel) def fileWriter(charset: Charset = IO.utf8, append: Boolean = false): OpenFile[BufferedWriter] = file(f => new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f, append), charset)) ) def fileReader(charset: Charset): OpenFile[BufferedReader] = file(f => new BufferedReader(new InputStreamReader(new FileInputStream(f), charset)) ) def urlReader(charset: Charset): Using[URL, BufferedReader] = resource((u: URL) => new BufferedReader(new InputStreamReader(u.openStream, charset))) def jarFile(verify: Boolean): OpenFile[JarFile] = file(f => new JarFile(f, verify), (_: JarFile).close()) def zipFile: OpenFile[ZipFile] = file(f => new ZipFile(f), (_: ZipFile).close()) def streamReader: Using[(InputStream, Charset), InputStreamReader] = wrap{ (_: (InputStream, Charset)) match { case (in, charset) => new InputStreamReader(in, charset) } } def gzipInputStream: Using[InputStream, GZIPInputStream] = wrap((in: InputStream) => new GZIPInputStream(in, 8192) ) def zipInputStream: Using[InputStream, ZipInputStream] = wrap((in: InputStream) => new ZipInputStream(in)) def zipOutputStream: Using[OutputStream, ZipOutputStream] = wrap((out: OutputStream) => new ZipOutputStream(out)) def gzipOutputStream: Using[OutputStream, GZIPOutputStream] = wrap((out: OutputStream) => new GZIPOutputStream(out, 8192), (_: GZIPOutputStream).finish()) def jarOutputStream: Using[OutputStream, JarOutputStream] = wrap((out: OutputStream) => new JarOutputStream(out)) def jarInputStream: Using[InputStream, JarInputStream] = wrap((in: InputStream) => new JarInputStream(in)) def zipEntry(zip: ZipFile): Using[ZipEntry, InputStream] = resource((entry: ZipEntry) => translate("Error opening " + entry.getName + " in " + zip + ": ") { zip.getInputStream(entry) } ) }
Example 5
Source File: FileLogger.scala From odin with Apache License 2.0 | 5 votes |
package io.odin.loggers import java.io.BufferedWriter import java.nio.file.{Files, Paths} import cats.effect.syntax.all._ import cats.effect.{Resource, Sync, Timer} import cats.instances.list._ import cats.syntax.all._ import io.odin.formatter.Formatter import io.odin.{Level, Logger, LoggerMessage} case class FileLogger[F[_]: Timer](buffer: BufferedWriter, formatter: Formatter, override val minLevel: Level)( implicit F: Sync[F] ) extends DefaultLogger[F](minLevel) { def log(msg: LoggerMessage): F[Unit] = write(msg, formatter).guarantee(flush) override def log(msgs: List[LoggerMessage]): F[Unit] = msgs.traverse(write(_, formatter)).void.guarantee(flush) private def write(msg: LoggerMessage, formatter: Formatter): F[Unit] = F.delay { buffer.write(formatter.format(msg) + System.lineSeparator()) } private def flush: F[Unit] = F.delay(buffer.flush()).handleErrorWith(_ => F.unit) } object FileLogger { def apply[F[_]: Timer](fileName: String, formatter: Formatter, minLevel: Level)( implicit F: Sync[F] ): Resource[F, Logger[F]] = { def mkBuffer: F[BufferedWriter] = F.delay(Files.newBufferedWriter(Paths.get(fileName))) def closeBuffer(buffer: BufferedWriter): F[Unit] = F.delay(buffer.close()).handleErrorWith(_ => F.unit) Resource.make(mkBuffer)(closeBuffer).map { buffer => FileLogger(buffer, formatter, minLevel) } } }
Example 6
Source File: TsvRetrieverFromFile.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.dc.stream import java.io.{BufferedWriter, File, FileWriter} import akka.actor.ActorSystem import akka.stream.{KillSwitch, KillSwitches, Materializer} import akka.stream.Supervision.Decider import akka.stream.contrib.SourceGen import akka.stream.scaladsl.{Flow, Keep, Sink, Source} import akka.util.ByteString import cmwell.dc.LazyLogging import cmwell.dc.stream.MessagesTypesAndExceptions.{DcInfo, InfotonData} import cmwell.dc.stream.TsvRetriever.{logger, TsvFlowOutput} import cmwell.util.resource._ import scala.concurrent.Future import scala.util.{Failure, Success} import scala.concurrent.ExecutionContext.Implicits.global object TsvRetrieverFromFile extends LazyLogging { def apply(dcInfo: DcInfo)(implicit mat: Materializer, system: ActorSystem): Source[InfotonData, (KillSwitch, Future[Seq[Option[String]]])] = { val persistFile = dcInfo.tsvFile.get + ".persist" def appendToPersistFile(str: String): Unit = { val bw = new BufferedWriter(new FileWriter(persistFile, true)) bw.write(str) bw.close() } val linesToDrop = dcInfo.positionKey.fold { if (!new File(persistFile).exists) 0L else using(scala.io.Source.fromFile(persistFile))(_.getLines.toList.last.toLong) }(pos => pos.toLong) val positionKeySink = Flow[InfotonData] .recover { case e: Throwable => InfotonData(null, null, -1) } .scan(linesToDrop) { case (count, InfotonData(null, null, -1)) => { appendToPersistFile("crash at: " + count + "\n" + count.toString + "\n") count } case (count, _) => { val newCount = count + 1 if (newCount % 10000 == 0) appendToPersistFile(newCount.toString + "\n") newCount } } .toMat(Sink.last)( (_, right) => right.map { count => appendToPersistFile(count.toString + "\n") Seq.fill(2)(Option(count.toString)) } ) Source .fromIterator(() => scala.io.Source.fromFile(dcInfo.tsvFile.get).getLines()) .drop { logger.info(s"Dropping $linesToDrop initial lines from file ${dcInfo.tsvFile.get} for sync ${dcInfo.key}") linesToDrop } .viaMat(KillSwitches.single)(Keep.right) .map(line => TsvRetriever.parseTSVAndCreateInfotonDataFromIt(ByteString(line))) .alsoToMat(positionKeySink)(Keep.both) } }
Example 7
Source File: HadoopFSHelpers.scala From morpheus with Apache License 2.0 | 5 votes |
package org.opencypher.morpheus.api.io.fs import java.io.{BufferedReader, BufferedWriter, InputStreamReader, OutputStreamWriter} import org.apache.hadoop.fs.{FileSystem, Path} import org.opencypher.morpheus.api.io.util.FileSystemUtils.using object HadoopFSHelpers { implicit class RichHadoopFileSystem(fileSystem: FileSystem) { protected def createDirectoryIfNotExists(path: Path): Unit = { if (!fileSystem.exists(path)) { fileSystem.mkdirs(path) } } def listDirectories(path: String): List[String] = { val p = new Path(path) createDirectoryIfNotExists(p) fileSystem.listStatus(p) .filter(_.isDirectory) .map(_.getPath.getName) .toList } def deleteDirectory(path: String): Unit = { fileSystem.delete(new Path(path), true) } def readFile(path: String): String = { using(new BufferedReader(new InputStreamReader(fileSystem.open(new Path(path)), "UTF-8"))) { reader => def readLines = Stream.cons(reader.readLine(), Stream.continually(reader.readLine)) readLines.takeWhile(_ != null).mkString } } def writeFile(path: String, content: String): Unit = { val p = new Path(path) val parentDirectory = p.getParent createDirectoryIfNotExists(parentDirectory) using(fileSystem.create(p)) { outputStream => using(new BufferedWriter(new OutputStreamWriter(outputStream, "UTF-8"))) { bufferedWriter => bufferedWriter.write(content) } } } } }
Example 8
Source File: Main.scala From scalajs-highcharts with MIT License | 5 votes |
package com.karasiq.highcharts.generator import java.io.{BufferedWriter, FileOutputStream, OutputStreamWriter, PrintWriter} import java.nio.file._ import java.nio.file.attribute.BasicFileAttributes import scala.util.control.Exception import scalaj.http.{Http, HttpOptions} import com.karasiq.highcharts.generator.writers.{ScalaClassWriter, ScalaJsClassBuilder} case class HighchartsApiDoc(library: String) { private val defaultPackage = System.getProperty(s"highcharts-generator.$library.package", s"com.$library") private def httpGet(url: String): List[ConfigurationObject] = { val page = Http.get(url) .header("User-Agent", "Mozilla/5.0 (X11; OpenBSD amd64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.81 Safari/537.36") .header("Accept", "application/json") .options(HttpOptions.connTimeout(10000), HttpOptions.readTimeout(10000)) val json = page.asString ConfigurationObject.fromJson(json) } private def writeFiles(pkg: String, configs: List[ConfigurationObject], rootObject: Option[String] = None): Unit = { val header = s""" |package $pkg | |import scalajs.js, js.`|` |import com.highcharts.CleanJsObject |import com.highcharts.HighchartsUtils._ | |""".stripMargin val outputDir = Paths.get(System.getProperty("highcharts-generator.output", "src/main/scala"), pkg.split("\\."):_*) Files.createDirectories(outputDir) // Remove all files Files.walkFileTree(outputDir, new SimpleFileVisitor[Path] { override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = { Files.delete(file) FileVisitResult.CONTINUE } }) val classes = new ScalaJsClassBuilder().parse(configs, rootObject) val classWriter = new ScalaClassWriter classes.foreach { scalaJsClass ⇒ val file = outputDir.resolve(scalaJsClass.scalaName + ".scala") println(s"Writing $file...") val writer = new PrintWriter(new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file.toFile, true), "UTF-8"))) Exception.allCatch.andFinally(writer.close()) { if (Files.size(file) == 0) { writer.print(header) } classWriter.writeClass(scalaJsClass) { line ⇒ writer.println(line) } writer.flush() } } } def writeConfigs(): Unit = { val configs = httpGet(s"https://api.highcharts.com/$library/dump.json") writeFiles(s"$defaultPackage.config", configs, Some(s"${library.capitalize}Config")) } def writeApis(): Unit = { val configs = httpGet(s"https://api.highcharts.com/$library/object/dump.json") writeFiles(s"$defaultPackage.api", configs) } def writeAll(): Unit = { // TODO: https://github.com/highcharts/highcharts/issues/7227 writeConfigs() // writeApis() // TODO: 404 } } object Main extends App { HighchartsApiDoc("highcharts").writeAll() HighchartsApiDoc("highstock").writeAll() HighchartsApiDoc("highmaps").writeAll() }
Example 9
Source File: FeatureSelection.scala From aerosolve with Apache License 2.0 | 5 votes |
package com.airbnb.aerosolve.training import java.io.BufferedWriter import java.io.OutputStreamWriter import java.util import com.airbnb.aerosolve.core.{ModelRecord, ModelHeader, FeatureVector, Example} import com.airbnb.aerosolve.core.models.LinearModel import com.airbnb.aerosolve.core.util.Util import com.typesafe.config.Config import org.slf4j.{LoggerFactory, Logger} import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ import org.apache.spark.rdd.RDD import scala.collection.mutable.HashMap import scala.collection.mutable.HashSet import scala.collection.mutable.ArrayBuffer import scala.collection.mutable.Buffer import scala.collection.JavaConversions._ import scala.collection.JavaConverters._ import scala.util.Random import scala.math.abs import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.fs.Path import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path object FeatureSelection { private final val log: Logger = LoggerFactory.getLogger("FeatureSelection") val allKey : (String, String) = ("$ALL", "$POS") // Given a RDD compute the pointwise mutual information between // the positive label and the discrete features. def pointwiseMutualInformation(examples : RDD[Example], config : Config, key : String, rankKey : String, posThreshold : Double, minPosCount : Double, newCrosses : Boolean) : RDD[((String, String), Double)] = { val pointwise = LinearRankerUtils.makePointwise(examples, config, key, rankKey) val features = pointwise .mapPartitions(part => { // The tuple2 is var, var | positive val output = scala.collection.mutable.HashMap[(String, String), (Double, Double)]() part.foreach(example =>{ val featureVector = example.example.get(0) val isPos = if (featureVector.floatFeatures.get(rankKey).asScala.head._2 > posThreshold) 1.0 else 0.0 val all : (Double, Double) = output.getOrElse(allKey, (0.0, 0.0)) output.put(allKey, (all._1 + 1.0, all._2 + 1.0 * isPos)) val features : Array[(String, String)] = LinearRankerUtils.getFeatures(featureVector) if (newCrosses) { for (i <- features) { for (j <- features) { if (i._1 < j._1) { val key = ("%s<NEW>%s".format(i._1, j._1), "%s<NEW>%s".format(i._2, j._2)) val x = output.getOrElse(key, (0.0, 0.0)) output.put(key, (x._1 + 1.0, x._2 + 1.0 * isPos)) } } } } for (feature <- features) { val x = output.getOrElse(feature, (0.0, 0.0)) output.put(feature, (x._1 + 1.0, x._2 + 1.0 * isPos)) } }) output.iterator }) .reduceByKey((a, b) => (a._1 + b._1, a._2 + b._2)) .filter(x => x._2._2 >= minPosCount) val allCount = features.filter(x => x._1.equals(allKey)).take(1).head features.map(x => { val prob = x._2._1 / allCount._2._1 val probPos = x._2._2 / allCount._2._2 (x._1, math.log(probPos / prob) / math.log(2.0)) }) } // Returns the maximum entropy per family def maxEntropy(input : RDD[((String, String), Double)]) : RDD[((String, String), Double)] = { input .map(x => (x._1._1, (x._1._2, x._2))) .reduceByKey((a, b) => if (math.abs(a._2) > math.abs(b._2)) a else b) .map(x => ((x._1, x._2._1), x._2._2)) } }
Example 10
Source File: FileDownloader.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperations.readwritedataframe.filestorage import java.io.{BufferedWriter, FileOutputStream, IOException, OutputStreamWriter} import java.nio.file.{Files, Paths} import java.util.UUID import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import ai.deepsense.deeplang.ExecutionContext import ai.deepsense.deeplang.doperations.exceptions.DeepSenseIOException import ai.deepsense.deeplang.doperations.readwritedataframe.FilePath private[filestorage] object FileDownloader { def downloadFile(url: String)(implicit context: ExecutionContext): FilePath = { if (context.tempPath.startsWith("hdfs://")) { downloadFileToHdfs(url) } else { downloadFileToDriver(url) } } private def downloadFileToHdfs(url: String)(implicit context: ExecutionContext) = { val content = scala.io.Source.fromURL(url).getLines() val hdfsPath = s"${context.tempPath}/${UUID.randomUUID()}" val configuration = new Configuration() val hdfs = FileSystem.get(configuration) val file = new Path(hdfsPath) val hdfsStream = hdfs.create(file) val writer = new BufferedWriter(new OutputStreamWriter(hdfsStream)) try { content.foreach {s => writer.write(s) writer.newLine() } } finally { safeClose(writer) hdfs.close() } FilePath(hdfsPath) } private def downloadFileToDriver(url: String) (implicit context: ExecutionContext) = { val outputDirPath = Paths.get(context.tempPath) // We're checking if the output is a directory following symlinks. // The default behaviour of createDirectories is NOT to follow symlinks if (!Files.isDirectory(outputDirPath)) { Files.createDirectories(outputDirPath) } val outFilePath = Files.createTempFile(outputDirPath, "download", ".csv") // content is a stream. Do not invoke stuff like .toList() on it. val content = scala.io.Source.fromURL(url).getLines() val writer: BufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilePath.toFile))) try { content.foreach {s => writer.write(s) writer.newLine() } } finally { safeClose(writer) } FilePath(s"file:///$outFilePath") } private def safeClose(bufferedWriter: BufferedWriter): Unit = { try { bufferedWriter.flush() bufferedWriter.close() } catch { case e: IOException => throw new DeepSenseIOException(e) } } }
Example 11
Source File: JsonFileReporter.scala From kyuubi with Apache License 2.0 | 5 votes |
package yaooqinn.kyuubi.metrics import java.io.{BufferedWriter, Closeable, IOException, OutputStreamWriter} import java.util.{Timer, TimerTask} import java.util.concurrent.TimeUnit import scala.util.Try import scala.util.control.NonFatal import com.codahale.metrics.MetricRegistry import com.codahale.metrics.json.MetricsModule import com.fasterxml.jackson.databind.ObjectMapper import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.fs.permission.FsPermission import org.apache.kyuubi.Logging import org.apache.spark.{KyuubiSparkUtil, SparkConf} import org.apache.spark.KyuubiConf._ private[metrics] class JsonFileReporter(conf: SparkConf, registry: MetricRegistry) extends Closeable with Logging { private val jsonMapper = new ObjectMapper().registerModule( new MetricsModule(TimeUnit.MILLISECONDS, TimeUnit.MILLISECONDS, false)) private val timer = new Timer(true) private val interval = KyuubiSparkUtil.timeStringAsMs(conf.get(METRICS_REPORT_INTERVAL)) private val path = conf.get(METRICS_REPORT_LOCATION) private val hadoopConf = KyuubiSparkUtil.newConfiguration(conf) def start(): Unit = { timer.schedule(new TimerTask { var bw: BufferedWriter = _ override def run(): Unit = try { val json = jsonMapper.writerWithDefaultPrettyPrinter().writeValueAsString(registry) val tmpPath = new Path(path + ".tmp") val tmpPathUri = tmpPath.toUri val fs = if (tmpPathUri.getScheme == null && tmpPathUri.getAuthority == null) { FileSystem.getLocal(hadoopConf) } else { FileSystem.get(tmpPathUri, hadoopConf) } fs.delete(tmpPath, true) bw = new BufferedWriter(new OutputStreamWriter(fs.create(tmpPath, true))) bw.write(json) bw.close() fs.setPermission(tmpPath, FsPermission.createImmutable(Integer.parseInt("644", 8).toShort)) val finalPath = new Path(path) fs.rename(tmpPath, finalPath) fs.setPermission(finalPath, FsPermission.createImmutable(Integer.parseInt("644", 8).toShort)) } catch { case NonFatal(e) => error("Error writing metrics to json file" + path, e) } finally { if (bw != null) { Try(bw.close()) } } }, 0, interval) } override def close(): Unit = { timer.cancel() } }
Example 12
Source File: NerHelper.scala From spark-nlp with Apache License 2.0 | 5 votes |
package com.johnsnowlabs.benchmarks.spark import java.io.{BufferedWriter, File, FileWriter} import com.johnsnowlabs.nlp.annotators.common.NerTagged import com.johnsnowlabs.nlp.training.CoNLL import com.johnsnowlabs.nlp.{Annotation, SparkAccessor} import com.johnsnowlabs.nlp.util.io.ExternalResource import org.apache.spark.ml.PipelineModel import scala.collection.mutable object NerHelper { def saveNerSpanTags(annotations: Array[Array[Annotation]], file: String): Unit = { val bw = new BufferedWriter(new FileWriter(new File(file))) bw.write(s"start\tend\ttag\ttext\n") for (i <- 0 until annotations.length) { for (a <- annotations(i)) bw.write(s"${a.begin}\t${a.end}\t${a.result}\t${a.metadata("entity").replace("\n", " ")}\n") } bw.close() } def calcStat(correct: Int, predicted: Int, predictedCorrect: Int): (Float, Float, Float) = { // prec = (predicted & correct) / predicted // rec = (predicted & correct) / correct val prec = predictedCorrect.toFloat / predicted val rec = predictedCorrect.toFloat / correct val f1 = 2 * prec * rec / (prec + rec) (prec, rec, f1) } def measureExact(nerReader: CoNLL, model: PipelineModel, file: ExternalResource, printErrors: Int = 0): Unit = { val df = nerReader.readDataset(SparkAccessor.benchmarkSpark, file.path).toDF() val transformed = model.transform(df) val rows = transformed.select("ner_span", "label_span").collect() val correctPredicted = mutable.Map[String, Int]() val predicted = mutable.Map[String, Int]() val correct = mutable.Map[String, Int]() var toPrintErrors = printErrors for (row <- rows) { val predictions = NerTagged.getAnnotations(row, 0).filter(a => a.result != "O") val labels = NerTagged.getAnnotations(row, 1).filter(a => a.result != "O") for (p <- predictions) { val tag = p.metadata("entity") predicted(tag) = predicted.getOrElse(tag, 0) + 1 } for (l <- labels) { val tag = l.metadata("entity") correct(tag) = correct.getOrElse(tag, 0) + 1 } val correctPredictions = labels.toSet.intersect(predictions.toSet) for (a <- correctPredictions) { val tag = a.metadata("entity") correctPredicted(tag) = correctPredicted.getOrElse(tag, 0) + 1 } if (toPrintErrors > 0) { for (p <- predictions) { if (toPrintErrors > 0 && !correctPredictions.contains(p)) { System.out.println(s"Predicted\t${p.result}\t${p.begin}\t${p.end}\t${p.metadata("text")}") toPrintErrors -= 1 } } for (p <- labels) { if (toPrintErrors > 0 && !correctPredictions.contains(p)) { System.out.println(s"Correct\t${p.result}\t${p.begin}\t${p.end}\t${p.metadata("text")}") toPrintErrors -= 1 } } } } val (prec, rec, f1) = calcStat(correct.values.sum, predicted.values.sum, correctPredicted.values.sum) System.out.println(s"$prec\t$rec\t$f1") val tags = (correct.keys ++ predicted.keys ++ correctPredicted.keys).toList.distinct for (tag <- tags) { val (prec, rec, f1) = calcStat(correct.getOrElse(tag, 0), predicted.getOrElse(tag, 0), correctPredicted.getOrElse(tag, 0)) System.out.println(s"$tag\t$prec\t$rec\t$f1") } } }
Example 13
Source File: TrajSampling.scala From traj-sim-spark with Apache License 2.0 | 5 votes |
package edu.utah.cs.trajectory import java.io.{BufferedWriter, File, FileWriter} import edu.utah.cs.spatial.{LineSegment, Point} import org.apache.spark.{SparkConf, SparkContext} object TrajSampling { def main(args: Array[String]): Unit = { val sparkConf = new SparkConf().setAppName("TrajSampling") val sc = new SparkContext(sparkConf) Thread.sleep(3000) if (args.length < 2) { println("usage: TrajSampling <input_file_path> <output_file_path> <sample_count>") System.exit(1) } val input_file_path = args(0) val output_file_path = args(1) val cnt = args(2).toInt val sampled_trajs = sc.textFile(input_file_path).mapPartitions(iter => { iter.map(x => { val splitted = x.split("\t") (splitted(0).toInt, LineSegment(Point(Array(splitted(1).toDouble, splitted(2).toDouble)), Point(Array(splitted(3).toDouble, splitted(4).toDouble)))) }).toArray.groupBy(_._1).map(now => (now._1, now._2.sortBy(_._1).map(_._2))).iterator }).takeSample(withReplacement = false, cnt, System.currentTimeMillis()) val file = new File(output_file_path) val bw = new BufferedWriter(new FileWriter(file)) for (i <- sampled_trajs.indices) { val cur_traj = sampled_trajs(i)._2 cur_traj.foreach(x => bw.write(i + "\t" + x.toTSV + "\n")) } bw.close() sc.stop() } }
Example 14
Source File: LineSegmentClustering.scala From traj-sim-spark with Apache License 2.0 | 5 votes |
package edu.utah.cs.trajectory import java.io.{BufferedWriter, File, FileWriter} import com.vividsolutions.jts.geom.{GeometryCollection, GeometryFactory} import edu.utah.cs.partitioner.STRSegPartition import edu.utah.cs.spatial.{LineSegment, MBR, Point, Polygon} import edu.utah.cs.util.{BloomFilter, BloomFilterMeta} import org.apache.spark.{SparkConf, SparkContext} import org.geotools.geojson.geom.GeometryJSON object LineSegmentClustering { final val max_entries_per_node = 25 final val k = 10 final val N = 34085 def main(args: Array[String]): Unit = { val sc = new SparkContext(new SparkConf().setAppName("LineSegmentClustering")) if (args.length < 2) { println("usage: SpatialSpanClustering <input_file_path> <output_file_path>") System.exit(1) } val input_file_path = args(0) val output_file_path = args(1) val dataRDD = sc.textFile(input_file_path) .map(x => x.split('\t')) .map(x => (LineSegment(Point(Array(x(2).toDouble, x(1).toDouble)), Point(Array(x(4).toDouble, x(3).toDouble))), TrajMeta(x(0).toInt, x(5).toInt))) val bf_meta = BloomFilterMeta(N, 1) val bc_bf_meta = sc.broadcast(bf_meta) BloomFilter.meta = bf_meta val num_partitions = dataRDD.getNumPartitions val (partitioned_rdd, part_mbrs) = STRSegPartition(dataRDD, num_partitions, 0.01, max_entries_per_node) val part_bounds = partitioned_rdd.mapPartitions(iter => { if (iter.nonEmpty) { var maxx = Double.MinValue var maxy = Double.MinValue var minx = Double.MaxValue var miny = Double.MaxValue iter.map(_._1).foreach(x => { maxx = Math.max(Math.max(x.start.coord(0), x.end.coord(0)), maxx) maxy = Math.max(Math.max(x.start.coord(1), x.end.coord(1)), maxy) minx = Math.min(Math.min(x.start.coord(0), x.end.coord(0)), minx) miny = Math.min(Math.min(x.start.coord(1), x.end.coord(1)), miny) }) Array(MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy)))).iterator } else Array().iterator }).collect() val file = new File(output_file_path) val bw = new BufferedWriter(new FileWriter(file)) val collection = new GeometryCollection(part_bounds.map(x => Polygon(Array(x.low, Point(Array(x.low.coord(0), x.high.coord(1))), x.high, Point(Array(x.high.coord(0), x.low.coord(1))), x.low)).content), new GeometryFactory) new GeometryJSON().writeGeometryCollection(collection, bw) bw.close() sc.stop() } }
Example 15
Source File: SpatialSpanClustering.scala From traj-sim-spark with Apache License 2.0 | 5 votes |
package edu.utah.cs.trajectory import java.io.{BufferedWriter, File, FileWriter} import com.vividsolutions.jts.geom.{GeometryCollection, GeometryFactory} import edu.utah.cs.partitioner.STRMBRPartition import edu.utah.cs.spatial.{LineSegment, MBR, Point, Polygon} import edu.utah.cs.util._ import org.apache.spark.{SparkConf, SparkContext} import org.geotools.geojson.geom.GeometryJSON object SpatialSpanClustering { final val max_entries_per_node = 25 def getMBR(x: (Int, Array[(Int, LineSegment)])): (MBR, Int) = { val pts = x._2.flatMap(p => Array(p._2.start, p._2.end)) var maxx = Double.MinValue var maxy = Double.MinValue var minx = Double.MaxValue var miny = Double.MaxValue pts.foreach(x => { maxx = Math.max(x.coord(0), maxx) maxy = Math.max(x.coord(1), maxy) minx = Math.min(x.coord(0), minx) miny = Math.min(x.coord(1), miny) }) (MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy))), x._1) } def main(args: Array[String]): Unit = { val sc = new SparkContext(new SparkConf().setAppName("SpatialSpanClustering")) if (args.length < 2) { println("usage: SpatialSpanClustering <input_file_path> <output_file_path>") System.exit(1) } val input_file_path = args(0) val output_file_path = args(1) val bf_meta = BloomFilterMeta(10000, 1) val bc_bf_meta = sc.broadcast(bf_meta) BloomFilter.meta = bf_meta val mbrs = sc.textFile(input_file_path).mapPartitions(iter => { iter.map(x => { val splitted = x.split("\t") (splitted(0).toInt, LineSegment(Point(Array(splitted(2).toDouble, splitted(1).toDouble)), Point(Array(splitted(4).toDouble, splitted(3).toDouble)))) }).toArray.groupBy(_._1).map(now => getMBR(now)).iterator }) val num_partitions = mbrs.getNumPartitions * 4 val partitioned_rdd = STRMBRPartition(mbrs, num_partitions, 0.01, max_entries_per_node) val part_bounds = partitioned_rdd.mapPartitions(iter => { if (iter.nonEmpty) { var maxx = Double.MinValue var maxy = Double.MinValue var minx = Double.MaxValue var miny = Double.MaxValue iter.map(_._1).foreach(x => { maxx = Math.max(x.high.coord(0), maxx) maxy = Math.max(x.high.coord(1), maxy) minx = Math.min(x.low.coord(0), minx) miny = Math.min(x.low.coord(1), miny) }) Array(MBR(Point(Array(minx, miny)), Point(Array(maxx, maxy)))).iterator } else Array().iterator }).collect() val file = new File(output_file_path) val bw = new BufferedWriter(new FileWriter(file)) val collection = new GeometryCollection(part_bounds.map(x => Polygon(Array(x.low, Point(Array(x.low.coord(0), x.high.coord(1))), x.high, Point(Array(x.high.coord(0), x.low.coord(1))), x.low)).content), new GeometryFactory) new GeometryJSON().writeGeometryCollection(collection, bw) bw.close() sc.stop() } }
Example 16
Source File: NERServiceSpec.scala From recogito2 with Apache License 2.0 | 5 votes |
package transform.ner import java.io.File import org.specs2.mutable._ import org.specs2.runner._ import org.junit.runner._ import org.joox.JOOX._ import org.pelagios.recogito.sdk.ner.EntityType import play.api.test._ import play.api.test.Helpers._ import scala.concurrent.duration._ import scala.io.Source import org.codehaus.plexus.util.StringInputStream import java.io.StringWriter import java.io.BufferedWriter @RunWith(classOf[JUnitRunner]) class NERServiceSpec extends Specification { def parsePlaintext() = { val TEST_TEXT = Source.fromFile("test/resources/transform/ner/text-for-ner-01.txt").getLines().mkString("\n") NERService.parseText(TEST_TEXT, None) } def enrichTEI() = { val TEST_TEI = new File("test/resources/transform/ner/tei-for-ner.tei.xml") val writer = new StringWriter() NERService.enrichTEI(TEST_TEI, None, Some(new BufferedWriter(writer))) $(writer.toString) } "The NER text parse function" should { "detect 8 Named Entites in the test text" in { val entities = parsePlaintext() entities.size must equalTo (8) } "detect 3 Locations - Pylos, Sparta and Ithaca" in { val entities = parsePlaintext() val locations = entities.filter(_.entityType == EntityType.LOCATION).map(_.chars) locations.size must equalTo(3) locations must contain("Pylos") locations must contain("Sparta") locations must contain("Ithaca") } "detect 1 date" in { val entities = parsePlaintext() entities.filter(_.entityType.equals(EntityType.DATE)).size must equalTo(1) } "detect 4 persons - Ulysses (2x), Penelope and Telemachus" in { val entities = parsePlaintext() val persons = entities.filter(_.entityType == EntityType.PERSON).map(_.chars) persons.size must equalTo(4) persons must contain("Penelope") persons must contain("Telemachus") persons.filter(_.equals("Ulysses")).size must equalTo(2) } "retain correct char offsets for each entity" in { val TEST_TEXT = Source.fromFile("test/resources/transform/ner/text-for-ner-01.txt").getLines().mkString("\n") val entities = parsePlaintext() entities.map(e => { val snippetFromSourceFile = TEST_TEXT.substring(e.charOffset, e.charOffset + e.chars.size) snippetFromSourceFile must equalTo(e.chars) }) } } "The NER TEI enrichment function" should { "insert 11 placeName tags" in { val enriched = enrichTEI() enriched.find("placeName").size must equalTo(11) } "insert 24 persName tags" in { val enriched = enrichTEI() enriched.find("persName").size must equalTo(24) } } }
Example 17
Source File: MnistManager.scala From dl4scala with MIT License | 5 votes |
package org.dl4scala.datasets.mnist import java.io.{BufferedWriter, FileWriter, IOException} import org.dl4scala.datasets.fetchers.MnistDataFetcher def close(): Unit = { if (images != null) { try images.close() catch { case e: IOException => } images = null } if (labels != null) { try labels.close() catch { case e: IOException => } labels = null } } } object MnistManager { private val HEADER_SIZE = 8 @throws(classOf[IOException]) def writeImageToPpm(image: Array[Array[Int]], ppmFileName: String): Unit = { try { val ppmOut = new BufferedWriter(new FileWriter(ppmFileName)) val rows = image.length val cols = image(0).length ppmOut.write("P3\n") ppmOut.write("" + rows + " " + cols + " 255\n") (0 until rows).foreach{ i => val s = new StringBuilder (0 until cols).foreach(j => s.append(image(i)(j) + " " + image(i)(j) + " " + image(i)(j) + " ")) ppmOut.write(s.toString) } ppmOut.close() } catch { case e: Exception => println("BufferedWriter error" + e.printStackTrace()) } } }
Example 18
Source File: VerifierLoggerBenchmark.scala From Waves with MIT License | 5 votes |
package com.wavesplatform.transaction.smart import java.io.BufferedWriter import java.nio.file.{Files, Path, Paths} import java.util.concurrent.TimeUnit import cats.Id import com.wavesplatform.account.KeyPair import com.wavesplatform.common.state.ByteStr import com.wavesplatform.common.utils._ import com.wavesplatform.lang.v1.compiler.Terms import com.wavesplatform.lang.v1.compiler.Terms.{CONST_BOOLEAN, EVALUATED} import com.wavesplatform.lang.v1.evaluator.Log import com.wavesplatform.lang.v1.evaluator.ctx.impl.waves.Bindings import com.wavesplatform.state.BinaryDataEntry import com.wavesplatform.transaction.DataTransaction import com.wavesplatform.transaction.smart.VerifierLoggerBenchmark.BigLog import org.openjdk.jmh.annotations._ import org.openjdk.jmh.infra.Blackhole @OutputTimeUnit(TimeUnit.MILLISECONDS) @BenchmarkMode(Array(Mode.AverageTime)) @Threads(1) @Fork(1) @Warmup(iterations = 10) @Measurement(iterations = 10) class VerifierLoggerBenchmark { @Benchmark def verifierLogged(bh: Blackhole, log: BigLog): Unit = { val logs = Verifier.buildLogs("id", log.value) bh.consume(log.writer.write(logs)) } } object VerifierLoggerBenchmark { @State(Scope.Benchmark) class BigLog { val resultFile: Path = Paths.get("log.txt") val writer: BufferedWriter = Files.newBufferedWriter(resultFile) private val dataTx: DataTransaction = DataTransaction .selfSigned(1.toByte, KeyPair(Array[Byte]()), (1 to 4).map(i => BinaryDataEntry(s"data$i", ByteStr(Array.fill(1024 * 30)(1)))).toList, 100000000, 0) .explicitGet() private val dataTxObj: Terms.CaseObj = Bindings.transactionObject( RealTransactionWrapper(dataTx, ???, ???, ???).explicitGet(), proofsEnabled = true ) val value: (Log[Id], Either[String, EVALUATED]) = ( List.fill(500)("txVal" -> Right(dataTxObj)), Right(CONST_BOOLEAN(true)) ) @TearDown def deleteFile(): Unit = { Files.delete(resultFile) writer.close() } } }
Example 19
Source File: ElevationOverlay.scala From geotrellis-osm-elevation with Apache License 2.0 | 5 votes |
package geotrellis.osme.core import java.io.{BufferedWriter, FileWriter, File} import com.vividsolutions.jts.geom.{LineString, MultiLineString} import geotrellis.raster.io.geotiff.SinglebandGeoTiff import geotrellis.vector.io.json.{GeoJson, JsonFeatureCollection} import scala.collection.immutable.Map import spray.json._ import DefaultJsonProtocol._ import geotrellis.vector.io.json.FeatureFormats.writeFeatureJson import geotrellis.vector.io.json.GeometryFormats._ import geotrellis.vector.densify.DensifyMethods import geotrellis.vector.dissolve.DissolveMethods import geotrellis.vector._ val segmentsFeatures = segments.map { segment => val center = segment.centroid match { case PointResult(p) => p case NoResult => throw new Exception("No result found in PointOrNoResult") } val (col, row) = rasterExtent.mapToGrid(center) val elevation = geotiff.tile.getDouble(col, row) val meanvMap: Map[String, Double] = Map("MEANV" -> elevation) LineFeature(segment, meanvMap) } return segmentsFeatures.toTraversable } }
Example 20
Source File: ElevationSpec.scala From geotrellis-osm-elevation with Apache License 2.0 | 5 votes |
package geotrellis.osme.core import java.io.{FileWriter, BufferedWriter, File} import geotrellis.raster.io.geotiff.SinglebandGeoTiff import geotrellis.vector.{Feature, Line, LineFeature} import geotrellis.vector.io.json.GeoJson._ import spray.json.DefaultJsonProtocol._ import geotrellis.vector.io.json.{JsonFeatureCollection, GeoJson} import spray.json.JsonReader import scala.io.Source import org.scalatest._ class ElevationSpec extends FunSpec with Matchers { def sharedData = { val geojson = Source.fromFile("data/imgn36w100vector.geojson").getLines.mkString val gjCol = parse[JsonFeatureCollection](geojson) new { val geotiff = SinglebandGeoTiff("data/imgn36w100_13_3_3.tif") val multiLine = gjCol.getAllLines().toMultiLine val elevationGeoJson = ElevationOverlay(geotiff, multiLine) } } describe("Core spec") { val numInputLines = sharedData.multiLine.lines.size val numOutputLines = sharedData.elevationGeoJson.size val ratio = numOutputLines / numInputLines println(s"Ratio of input lines to output lines: $ratio : 1") it("returned geojson should contain the MEANV property") { val elevationFeatures = sharedData.elevationGeoJson val hasMeanV = elevationFeatures.forall(feat => feat.data.contains("MEANV")) assert(hasMeanV) } it("should produce a geojson file that can be put into geocolor.io") { val elevationFeatures = sharedData.elevationGeoJson val jsonFeatures = JsonFeatureCollection(elevationFeatures) val file = new File("geocolor_test.json") val bw = new BufferedWriter(new FileWriter(file)) bw.write(jsonFeatures.toJson.prettyPrint) bw.close() } it("Every feature should intersect the tile extent") { val elevationFeatures = sharedData.elevationGeoJson val rasterPoly = sharedData.geotiff.rasterExtent.extent.toPolygon() val doesIntersect = elevationFeatures.forall(feat => rasterPoly.intersects(feat.geom)) assert(doesIntersect) } } }
Example 21
Source File: DevelopmentEmailNotifications.scala From sundial with MIT License | 5 votes |
package service.notifications import java.io.{BufferedWriter, File, FileWriter} import dao.SundialDaoFactory import dto.DisplayModels import model.{EmailNotification, ProcessStatus} import software.amazon.awssdk.services.ses.SesClient import scala.sys.process._ class DevelopmentEmailNotifications(daoFactory: SundialDaoFactory, displayModels: DisplayModels, sesClient: SesClient) extends EmailNotifications(daoFactory, "[email protected]", displayModels, sesClient) { override def sendEmail(processStatus: ProcessStatus, previousProcessStatus: Option[ProcessStatus], teams: Seq[EmailNotification], subject: String, body: String): Unit = { val outfile = File.createTempFile("sundial", ".html") val bw = new BufferedWriter(new FileWriter(outfile)) bw.write(body) bw.close() Seq("open", outfile.getAbsolutePath()).! } }
Example 22
Source File: FileDownloader.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.deeplang.doperations.readwritedataframe.filestorage import java.io.{BufferedWriter, FileOutputStream, IOException, OutputStreamWriter} import java.nio.file.{Files, Paths} import java.util.UUID import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import io.deepsense.deeplang.ExecutionContext import io.deepsense.deeplang.doperations.exceptions.DeepSenseIOException import io.deepsense.deeplang.doperations.readwritedataframe.FilePath private[filestorage] object FileDownloader { def downloadFile(url: String)(implicit context: ExecutionContext): FilePath = { if (context.tempPath.startsWith("hdfs://")) { downloadFileToHdfs(url) } else { downloadFileToDriver(url) } } private def downloadFileToHdfs(url: String)(implicit context: ExecutionContext) = { val content = scala.io.Source.fromURL(url).getLines() val hdfsPath = s"${context.tempPath}/${UUID.randomUUID()}" val configuration = new Configuration() val hdfs = FileSystem.get(configuration) val file = new Path(hdfsPath) val hdfsStream = hdfs.create(file) val writer = new BufferedWriter(new OutputStreamWriter(hdfsStream)) try { content.foreach {s => writer.write(s) writer.newLine() } } finally { safeClose(writer) hdfs.close() } FilePath(hdfsPath) } private def downloadFileToDriver(url: String) (implicit context: ExecutionContext) = { val outputDirPath = Paths.get(context.tempPath) // We're checking if the output is a directory following symlinks. // The default behaviour of createDirectories is NOT to follow symlinks if (!Files.isDirectory(outputDirPath)) { Files.createDirectories(outputDirPath) } val outFilePath = Files.createTempFile(outputDirPath, "download", ".csv") // content is a stream. Do not invoke stuff like .toList() on it. val content = scala.io.Source.fromURL(url).getLines() val writer: BufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilePath.toFile))) try { content.foreach {s => writer.write(s) writer.newLine() } } finally { safeClose(writer) } FilePath(s"file:///$outFilePath") } private def safeClose(bufferedWriter: BufferedWriter): Unit = { try { bufferedWriter.flush() bufferedWriter.close() } catch { case e: IOException => throw new DeepSenseIOException(e) } } }
Example 23
Source File: WhiskConfigTests.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.core import java.io.BufferedWriter import java.io.File import java.io.FileWriter import org.junit.runner.RunWith import org.scalatest.FlatSpec import org.scalatest.Matchers import org.scalatest.junit.JUnitRunner import common.StreamLogging @RunWith(classOf[JUnitRunner]) class WhiskConfigTests extends FlatSpec with Matchers with StreamLogging { behavior of "WhiskConfig" it should "get required property" in { val config = new WhiskConfig(WhiskConfig.edgeHost) assert(config.isValid) assert(config.edgeHost.nonEmpty) } it should "be valid when a prop file is provided defining required props" in { val file = File.createTempFile("cxt", ".txt") file.deleteOnExit() val bw = new BufferedWriter(new FileWriter(file)) bw.write("a=A\n") bw.close() val config = new WhiskConfig(Map("a" -> null), Set.empty, file) assert(config.isValid && config("a") == "A") } it should "not be valid when a prop file is provided but does not define required props" in { val file = File.createTempFile("cxt", ".txt") file.deleteOnExit() val bw = new BufferedWriter(new FileWriter(file)) bw.write("a=A\n") bw.close() val config = new WhiskConfig(Map("a" -> null, "b" -> null), Set.empty, file) assert(!config.isValid && config("b") == null) } it should "be valid when a prop file is provided defining required props and optional properties" in { val file = File.createTempFile("cxt", ".txt") file.deleteOnExit() val bw = new BufferedWriter(new FileWriter(file)) bw.write("a=A\n") bw.write("b=B\n") bw.write("c=C\n") bw.close() val config = new WhiskConfig(Map("a" -> null, "b" -> "???"), Set("c", "d"), file, env = Map.empty) assert(config.isValid && config("a") == "A" && config("b") == "B") assert(config("c") == "C") assert(config("d") == "") assert(config("a", "c") == "C") assert(config("a", "d") == "A") assert(config("d", "a") == "A") assert(config("c", "a") == "A") } }
Example 24
Source File: BasicTestPerformance4Samba.scala From ohara with Apache License 2.0 | 5 votes |
package oharastream.ohara.it.performance import java.io.{BufferedWriter, OutputStreamWriter} import java.util.concurrent.atomic.LongAdder import oharastream.ohara.client.filesystem.FileSystem import oharastream.ohara.common.data.Row import oharastream.ohara.common.util.{CommonUtils, Releasable} import org.junit.AssumptionViolatedException import spray.json.{JsNumber, JsString, JsValue} import scala.concurrent.duration.Duration import scala.jdk.CollectionConverters._ abstract class BasicTestPerformance4Samba extends BasicTestPerformance { private[this] val sambaHostname: String = sys.env.getOrElse( PerformanceTestingUtils.SAMBA_HOSTNAME_KEY, throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_HOSTNAME_KEY} does not exists!!!") ) private[this] val sambaUsername: String = sys.env.getOrElse( PerformanceTestingUtils.SAMBA_USER_KEY, throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_USER_KEY} does not exists!!!") ) private[this] val sambaPassword: String = sys.env.getOrElse( PerformanceTestingUtils.SAMBA_PASSWORD_KEY, throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_PASSWORD_KEY} does not exists!!!") ) private[this] val sambaPort: Int = sys.env .getOrElse( PerformanceTestingUtils.SAMBA_PORT_KEY, throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_PORT_KEY} does not exists!!!") ) .toInt private[this] val sambaShare: String = sys.env.getOrElse( PerformanceTestingUtils.SAMBA_SHARE_KEY, throw new AssumptionViolatedException(s"${PerformanceTestingUtils.SAMBA_SHARE_KEY} does not exists!!!") ) private[this] val csvInputFolderKey = PerformanceTestingUtils.CSV_INPUT_KEY private[this] val csvOutputFolder: String = value(csvInputFolderKey).getOrElse("input") private[this] val NEED_DELETE_DATA_KEY: String = PerformanceTestingUtils.DATA_CLEANUP_KEY protected[this] val needDeleteData: Boolean = sys.env.getOrElse(NEED_DELETE_DATA_KEY, "true").toBoolean protected val sambaSettings: Map[String, JsValue] = Map( oharastream.ohara.connector.smb.SMB_HOSTNAME_KEY -> JsString(sambaHostname), oharastream.ohara.connector.smb.SMB_PORT_KEY -> JsNumber(sambaPort), oharastream.ohara.connector.smb.SMB_USER_KEY -> JsString(sambaUsername), oharastream.ohara.connector.smb.SMB_PASSWORD_KEY -> JsString(sambaPassword), oharastream.ohara.connector.smb.SMB_SHARE_NAME_KEY -> JsString(sambaShare) ) protected def setupInputData(timeout: Duration): (String, Long, Long) = { val client = sambaClient() try { if (!client.exists(csvOutputFolder)) PerformanceTestingUtils.createFolder(client, csvOutputFolder) val result = generateData( numberOfRowsToFlush, timeout, (rows: Seq[Row]) => { val file = s"$csvOutputFolder/${CommonUtils.randomString()}" val writer = new BufferedWriter(new OutputStreamWriter(client.create(file))) val count = new LongAdder() val sizeInBytes = new LongAdder() try { val cellNames: Set[String] = rows.head.cells().asScala.map(_.name).toSet writer .append(cellNames.mkString(",")) .append("\n") rows.foreach(row => { val content = row.cells().asScala.map(_.value).mkString(",") count.increment() sizeInBytes.add(content.length) writer .append(content) .append("\n") }) (count.longValue(), sizeInBytes.longValue()) } finally Releasable.close(writer) } ) (csvOutputFolder, result._1, result._2) } finally Releasable.close(client) } protected[this] def sambaClient(): FileSystem = FileSystem.smbBuilder .hostname(sambaHostname) .port(sambaPort) .user(sambaUsername) .password(sambaPassword) .shareName(sambaShare) .build() }
Example 25
Source File: TestHdfsFileSystem.scala From ohara with Apache License 2.0 | 5 votes |
package oharastream.ohara.client.filesystem.hdfs import java.io.{BufferedWriter, File, OutputStreamWriter} import java.nio.charset.StandardCharsets import oharastream.ohara.client.filesystem.{FileFilter, FileSystem, FileSystemTestBase} import oharastream.ohara.common.exception.FileSystemException import oharastream.ohara.common.util.CommonUtils import org.junit.Test import org.scalatest.matchers.should.Matchers._ class TestHdfsFileSystem extends FileSystemTestBase { private[this] val tempFolder: File = CommonUtils.createTempFolder("local_hdfs") private[this] val hdfsURL: String = new File(tempFolder.getAbsolutePath).toURI.toString override protected val fileSystem: FileSystem = FileSystem.hdfsBuilder.url(hdfsURL).build override protected val rootDir: String = tempFolder.toString // override this method because the Local HDFS doesn't support append() @Test override def testAppend(): Unit = { val file = randomFile() fileSystem.create(file).close() intercept[FileSystemException] { fileSystem.append(file) }.getMessage shouldBe "Not supported" } // override this method because the Local HDFS doesn't support append() @Test override def testDeleteFileThatHaveBeenRead(): Unit = { val file = randomFile(rootDir) val data: Seq[String] = Seq("123", "456") val writer = new BufferedWriter(new OutputStreamWriter(fileSystem.create(file), StandardCharsets.UTF_8)) try data.foreach(line => { writer.append(line) writer.newLine() }) finally writer.close() fileSystem.exists(file) shouldBe true fileSystem.readLines(file) shouldBe data fileSystem.delete(file) fileSystem.exists(file) shouldBe false fileSystem.listFileNames(rootDir, FileFilter.EMPTY).size shouldBe 0 } }
Example 26
Source File: ForgerBoxMerklePathInfoTest.scala From Sidechains-SDK with MIT License | 5 votes |
package com.horizen.validation import java.io.{BufferedReader, BufferedWriter, FileReader, FileWriter} import java.lang.{Byte => JByte} import java.util import java.util.{ArrayList => JArrayList} import com.horizen.box.ForgerBox import com.horizen.fixtures.BoxFixture import com.horizen.utils.{BytesUtils, ForgerBoxMerklePathInfo, ForgerBoxMerklePathInfoSerializer, MerklePath, Pair} import com.horizen.vrf.VrfGeneratedDataProvider import org.junit.Assert.{assertEquals, assertNotEquals, assertTrue} import org.junit.Test import org.scalatest.junit.JUnitSuite class ForgerBoxMerklePathInfoTest extends JUnitSuite with BoxFixture { val vrfGenerationSeed = 907 val vrfGenerationPrefix = "ForgerBoxMerklePathInfoTest" //uncomment if you want update vrf related data if (false) { VrfGeneratedDataProvider.updateVrfPublicKey(vrfGenerationPrefix, vrfGenerationSeed) } val forgerBox: ForgerBox = getForgerBox( getPrivateKey25519("123".getBytes()).publicImage(), 1000L, 100L, getPrivateKey25519("456".getBytes()).publicImage(), VrfGeneratedDataProvider.getVrfPublicKey(vrfGenerationPrefix, vrfGenerationSeed) ) val emptyMerklePath: MerklePath = new MerklePath(new JArrayList()) val nonEmptyMerklePath: MerklePath = new MerklePath(util.Arrays.asList( new Pair[JByte, Array[Byte]](0.toByte, BytesUtils.fromHexString("29d000eee85f08b6482026be2d92d081d6f9418346e6b2e9fe2e9b985f24ed1e")), new Pair[JByte, Array[Byte]](1.toByte, BytesUtils.fromHexString("61bfbdf7038dc7f21e2bcf193faef8e6caa8222af016a6ed86b9e9d860f046df")) )) @Test def comparison(): Unit = { assertNotEquals("Box merkle path info expected to be different.", emptyMerklePath, nonEmptyMerklePath) } @Test def serialization(): Unit = { // Test 1: empty merkle path (single element in merkle tree) val boxWithEmptyPath = ForgerBoxMerklePathInfo(forgerBox, emptyMerklePath) var boxBytes = boxWithEmptyPath.bytes var deserializedBox = ForgerBoxMerklePathInfoSerializer.parseBytes(boxBytes) assertEquals("Deserialized box merkle path info hashCode expected to be equal to the original one.", boxWithEmptyPath.hashCode(), deserializedBox.hashCode()) assertEquals("Deserialized box merkle path info expected to be equal to the original one.", boxWithEmptyPath, deserializedBox) // Test 2: non empty merkle path val boxWithNonEmptyPath = ForgerBoxMerklePathInfo(forgerBox, nonEmptyMerklePath) boxBytes = boxWithNonEmptyPath.bytes deserializedBox = ForgerBoxMerklePathInfoSerializer.parseBytes(boxBytes) assertEquals("Deserialized box merkle path info hashCode expected to be equal to the original one.", boxWithNonEmptyPath.hashCode(), deserializedBox.hashCode()) assertEquals("Deserialized box merkle path info expected to be equal to the original one.", boxWithNonEmptyPath, deserializedBox) // Set to true and run if you want to update regression data. if (false) { val out = new BufferedWriter(new FileWriter("src/test/resources/boxmerklepathinfo_hex")) out.write(BytesUtils.toHexString(boxBytes)) out.close() } // Test 3: try to deserialize broken bytes. assertTrue("ForgerBoxMerklePathInfo expected to be not parsed due to broken data.", ForgerBoxMerklePathInfoSerializer.parseBytesTry("broken bytes".getBytes).isFailure) } @Test def serializationRegression(): Unit = { var bytes: Array[Byte] = null try { val classLoader = getClass.getClassLoader val file = new FileReader(classLoader.getResource("boxmerklepathinfo_hex").getFile) bytes = BytesUtils.fromHexString(new BufferedReader(file).readLine()) } catch { case e: Exception => fail(e.toString) } val boxMerklePathInfoTry = ForgerBoxMerklePathInfoSerializer.parseBytesTry(bytes) assertTrue("ForgerBoxMerklePathInfo expected to by parsed.", boxMerklePathInfoTry.isSuccess) val boxWithNonEmptyPath = ForgerBoxMerklePathInfo(forgerBox, nonEmptyMerklePath) assertEquals("Parsed info is different to original.", boxWithNonEmptyPath, boxMerklePathInfoTry.get) } }
Example 27
Source File: MNIST.scala From spark-tsne with Apache License 2.0 | 5 votes |
package com.github.saurfang.spark.tsne.examples import java.io.{BufferedWriter, OutputStreamWriter} import com.github.saurfang.spark.tsne.impl._ import com.github.saurfang.spark.tsne.tree.SPTree import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.linalg.distributed.RowMatrix import org.apache.spark.{SparkConf, SparkContext} import org.slf4j.LoggerFactory object MNIST { private def logger = LoggerFactory.getLogger(MNIST.getClass) def main (args: Array[String]) { val conf = new SparkConf() .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") .registerKryoClasses(Array(classOf[SPTree])) val sc = new SparkContext(conf) val hadoopConf = sc.hadoopConfiguration val fs = FileSystem.get(hadoopConf) val dataset = sc.textFile("data/MNIST/mnist.csv.gz") .zipWithIndex() .filter(_._2 < 6000) .sortBy(_._2, true, 60) .map(_._1) .map(_.split(",")) .map(x => (x.head.toInt, x.tail.map(_.toDouble))) .cache() //logInfo(dataset.collect.map(_._2.toList).toList.toString) //val features = dataset.map(x => Vectors.dense(x._2)) //val scaler = new StandardScaler(true, true).fit(features) //val scaledData = scaler.transform(features) // .map(v => Vectors.dense(v.toArray.map(x => if(x.isNaN || x.isInfinite) 0.0 else x))) // .cache() val data = dataset.flatMap(_._2) val mean = data.mean() val std = data.stdev() val scaledData = dataset.map(x => Vectors.dense(x._2.map(v => (v - mean) / std))).cache() val labels = dataset.map(_._1).collect() val matrix = new RowMatrix(scaledData) val pcaMatrix = matrix.multiply(matrix.computePrincipalComponents(50)) pcaMatrix.rows.cache() val costWriter = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(s".tmp/MNIST/cost.txt"), true))) //SimpleTSNE.tsne(pcaMatrix, perplexity = 20, maxIterations = 200) BHTSNE.tsne(pcaMatrix, maxIterations = 500, callback = { //LBFGSTSNE.tsne(pcaMatrix, perplexity = 10, maxNumIterations = 500, numCorrections = 10, convergenceTol = 1e-8) case (i, y, loss) => if(loss.isDefined) logger.info(s"$i iteration finished with loss $loss") val os = fs.create(new Path(s".tmp/MNIST/result${"%05d".format(i)}.csv"), true) val writer = new BufferedWriter(new OutputStreamWriter(os)) try { (0 until y.rows).foreach { row => writer.write(labels(row).toString) writer.write(y(row, ::).inner.toArray.mkString(",", ",", "\n")) } if(loss.isDefined) costWriter.write(loss.get + "\n") } finally { writer.close() } }) costWriter.close() sc.stop() } }
Example 28
Source File: ConfigSpec.scala From wookiee with Apache License 2.0 | 5 votes |
package com.webtrends.harness import java.io.{BufferedWriter, File, FileWriter} import java.util.concurrent.TimeUnit import akka.actor.{Actor, ActorSystem, Props} import akka.testkit.TestProbe import com.typesafe.config.ConfigFactory import com.webtrends.harness.app.HarnessActor.ConfigChange import com.webtrends.harness.config.ConfigWatcherActor import com.webtrends.harness.health.{ComponentState, HealthComponent} import com.webtrends.harness.service.messages.CheckHealth import org.specs2.mutable.SpecificationWithJUnit import scala.concurrent.ExecutionContextExecutor import scala.concurrent.duration.FiniteDuration import scala.reflect.io.{Directory, Path} class ConfigSpec extends SpecificationWithJUnit { implicit val dur = FiniteDuration(2, TimeUnit.SECONDS) new File("services/test/conf").mkdirs() implicit val sys = ActorSystem("system", ConfigFactory.parseString( """ akka.actor.provider = "akka.actor.LocalActorRefProvider" services { path = "services" } """).withFallback(ConfigFactory.load)) implicit val ec: ExecutionContextExecutor = sys.dispatcher val probe = TestProbe() val parent = sys.actorOf(Props(new Actor { val child = context.actorOf(ConfigWatcherActor.props, "child") def receive = { case x if sender == child => probe.ref forward x case x => child forward x } })) sequential "config " should { "be in good health" in { probe.send(parent, CheckHealth) val msg = probe.expectMsgClass(classOf[HealthComponent]) msg.state equals ComponentState.NORMAL } "detect changes in config" in { val file = new File("services/test/conf/test.conf") val bw = new BufferedWriter(new FileWriter(file)) bw.write("test = \"value\"") bw.close() val msg = probe.expectMsgClass(classOf[ConfigChange]) msg.isInstanceOf[ConfigChange] } } step { sys.terminate().onComplete { _ => Directory(Path(new File("services"))).deleteRecursively() } } }
Example 29
Source File: Preprocess.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package com.packt.ScalaML.BitCoin import java.io.{ BufferedWriter, File, FileWriter } import org.apache.spark.sql.types.{ DoubleType, IntegerType, StructField, StructType } import org.apache.spark.sql.{ DataFrame, Row, SparkSession } import scala.collection.mutable.ListBuffer object Preprocess { //how many of first rows are omitted val dropFirstCount: Int = 612000 def rollingWindow(data: DataFrame, window: Int, xFilename: String, yFilename: String): Unit = { var i = 0 val xWriter = new BufferedWriter(new FileWriter(new File(xFilename))) val yWriter = new BufferedWriter(new FileWriter(new File(yFilename))) val zippedData = data.rdd.zipWithIndex().collect() System.gc() val dataStratified = zippedData.drop(dropFirstCount) //todo slice fisrt 614K while (i < (dataStratified.length - window)) { val x = dataStratified .slice(i, i + window) .map(r => r._1.getAs[Double]("Delta")).toList val y = dataStratified.apply(i + window)._1.getAs[Integer]("label") val stringToWrite = x.mkString(",") xWriter.write(stringToWrite + "\n") yWriter.write(y + "\n") i += 1 if (i % 10 == 0) { xWriter.flush() yWriter.flush() } } xWriter.close() yWriter.close() } def main(args: Array[String]): Unit = { //todo modify these variables to match desirable files val priceDataFileName: String = "C:/Users/admin-karim/Desktop/bitstampUSD_1-min_data_2012-01-01_to_2017-10-20.csv/bitstampUSD_1-min_data_2012-01-01_to_2017-10-20.csv" val outputDataFilePath: String = "output/scala_test_x.csv" val outputLabelFilePath: String = "output/scala_test_y.csv" val spark = SparkSession .builder() .master("local[*]") .config("spark.sql.warehouse.dir", "E:/Exp/") .appName("Bitcoin Preprocessing") .getOrCreate() val data = spark.read.format("com.databricks.spark.csv").option("header", "true").load(priceDataFileName) data.show(10) println((data.count(), data.columns.size)) val dataWithDelta = data.withColumn("Delta", data("Close") - data("Open")) import org.apache.spark.sql.functions._ import spark.sqlContext.implicits._ val dataWithLabels = dataWithDelta.withColumn("label", when($"Close" - $"Open" > 0, 1).otherwise(0)) rollingWindow(dataWithLabels, 22, outputDataFilePath, outputLabelFilePath) spark.stop() } }
Example 30
Source File: WriteSupport.scala From CodeAnalyzerTutorial with Apache License 2.0 | 5 votes |
package tutor.utils import java.io.{BufferedWriter, File, FileWriter, Writer} trait WriteSupport { def withWriter(path: String)(f: Writer => Unit): Unit ={ var writer: Writer = null try { val file = new File(path) if (!file.exists()) file.createNewFile() writer = new BufferedWriter(new FileWriter(file)) f(writer) writer.flush() } finally { if (writer != null) writer.close() } } }
Example 31
Source File: ApiGwRestEndToEndTests.scala From openwhisk with Apache License 2.0 | 5 votes |
package apigw.healthtests import java.io.BufferedWriter import java.io.File import java.io.FileWriter import akka.http.scaladsl.model.StatusCodes.OK import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner import common.TestUtils._ import common.rest.WskRestOperations import common.rest.RestResult import common.WskActorSystem @RunWith(classOf[JUnitRunner]) class ApiGwRestEndToEndTests extends ApiGwEndToEndTests with WskActorSystem { override lazy val wsk = new WskRestOperations override val createCode = OK.intValue override def verifyAPICreated(rr: RunResult): Unit = { val apiResultRest = rr.asInstanceOf[RestResult] apiResultRest.statusCode shouldBe OK val apiurl = apiResultRest.getField("gwApiUrl") + "/path" println(s"apiurl: '$apiurl'") } override def verifyAPIList(rr: RunResult, actionName: String, testurlop: String, testapiname: String, testbasepath: String, testrelpath: String): Unit = { val apiResultRest = rr.asInstanceOf[RestResult] val apiValue = RestResult.getFieldJsObject(apiResultRest.getFieldListJsObject("apis")(0), "value") val apidoc = RestResult.getFieldJsObject(apiValue, "apidoc") val basepath = RestResult.getField(apidoc, "basePath") basepath shouldBe testbasepath val paths = RestResult.getFieldJsObject(apidoc, "paths") paths.fields.contains(testrelpath) shouldBe true val info = RestResult.getFieldJsObject(apidoc, "info") val title = RestResult.getField(info, "title") title shouldBe testapiname val relpath = RestResult.getFieldJsObject(paths, testrelpath) val urlop = RestResult.getFieldJsObject(relpath, testurlop) val openwhisk = RestResult.getFieldJsObject(urlop, "x-openwhisk") val actionN = RestResult.getField(openwhisk, "action") actionN shouldBe actionName } override def verifyAPISwaggerCreated(rr: RunResult): Unit = { val apiResultRest = rr.asInstanceOf[RestResult] apiResultRest.statusCode shouldBe OK } override def writeSwaggerFile(rr: RunResult): File = { val swaggerfile = File.createTempFile("api", ".json") swaggerfile.deleteOnExit() val bw = new BufferedWriter(new FileWriter(swaggerfile)) val apiResultRest = rr.asInstanceOf[RestResult] val apiValue = RestResult.getFieldJsObject(apiResultRest.getFieldListJsObject("apis")(0), "value") val apidoc = RestResult.getFieldJsObject(apiValue, "apidoc") bw.write(apidoc.toString()) bw.close() swaggerfile } override def getSwaggerApiUrl(rr: RunResult): String = { val apiResultRest = rr.asInstanceOf[RestResult] apiResultRest.getField("gwApiUrl") + "/path" } }
Example 32
Source File: RuleStatLocator.scala From apalache with Apache License 2.0 | 5 votes |
package at.forsyte.apalache.tla.bmcmt.profiler import java.io.{BufferedWriter, FileWriter, PrintWriter} import scala.collection.immutable.SortedMap class RuleStatLocator { private var ruleStats: Map[String, RuleStat] = Map() def getRuleStat(ruleName: String): RuleStat = { ruleStats.get(ruleName) match { case Some(r) => r case None => val newRule = new RuleStat(ruleName) ruleStats += ruleName -> newRule newRule } } def getStats = SortedMap(ruleStats.toSeq :_*) def writeStats(filename: String): Unit = { val writer = new PrintWriter(new FileWriter(filename, false)) writer.println("Rule profiling statistics") val hrule = List.fill(80)('-').mkString writer.println(hrule) writer.println("%20s %9s %9s %9s %9s %9s" .format("name", "calls", "cells", "smt-consts", "smt-asserts", "smt-avg-size")) writer.println(hrule) val stats = ruleStats.values.toSeq.sortWith(_.nCalls > _.nCalls) for (rs <- stats) { writer.println("%-20s %9d %9d %9d %9d %9d" .format(rs.ruleName, rs.nCalls, rs.nCellsSelf, rs.nSmtConstsSelf, rs.nSmtAssertsSelf, rs.smtAssertsSizeAvg)) } writer.close() } }
Example 33
Source File: HTMLReportGenerator.scala From regressr with Apache License 2.0 | 5 votes |
package org.ebayopensource.regression.internal.reportGenerator import java.io.{BufferedWriter, File, FileWriter} import org.fusesource.scalate.{TemplateEngine, TemplateSource} import scala.io.Source import scala.util.{Failure, Success, Try} class HTMLReportGenerator extends ReportGenerator { val scalateEngine = new TemplateEngine def getContent(reportEntries: Seq[ReportEntry]) : Try[String] = Try { if (reportEntries.size==0) { throw new IllegalArgumentException("Cannot generate report with 0 reportEntries.") } val templateText = Source.fromInputStream(getClass.getResourceAsStream("/report/index.html")).mkString scalateEngine.escapeMarkup = false val regressionCount :Seq[Int] = reportEntries.flatMap { reportEntry => { reportEntry.requestReportEntries.map { requestReportEntry => { requestReportEntry.reqMessages.size } } } } val renderedContent = scalateEngine.layout(TemplateSource.fromText("/com/ebay/n/regression/text.ssp", templateText), Map("reportEntries" -> reportEntries, "regressionCount" -> regressionCount.sum)) renderedContent } def writeAndGetFile(content: String, reportFilePath: String) : Try[File] = Try { val outputFile = new File(reportFilePath) val bw = new BufferedWriter(new FileWriter(outputFile)) bw.write(content) bw.close() outputFile } override def generate(reportEntries: Seq[ReportEntry], reportFilePath: String): Try[File] = Try { getContent(reportEntries).flatMap { content => writeAndGetFile(content, reportFilePath) } match { case Success(file) => file case Failure(t) => throw t } } }
Example 34
Source File: Xml.scala From temperature-machine with Apache License 2.0 | 5 votes |
package bad.robot.temperature.rrd import java.io.{BufferedWriter, FileWriter} import bad.robot.temperature.{FileOps, Files, JsonOps, encode} import bad.robot.temperature.rrd.ChartJson._ import org.rrd4j.ConsolFun._ import org.rrd4j.core.RrdDb import bad.robot.temperature.Files._ import scala.collection.JavaConverters._ import scala.xml.{Elem, XML} case class Xml(xml: Elem) { def exportXml(filename: String) = { XML.save(Files.path / filename, xml) } def exportJson(filename: String) = { val writer = new BufferedWriter(new FileWriter(Files.path / filename)) writer.write(toJson()) writer.close() } def toJson(): String = { val series = parse(xml) encode(series).spaces2ps } } object Xml { def apply(start: Seconds, end: Seconds, hosts: List[Host]): Xml = { val database = new RrdDb(RrdFile.file) val request = database.createFetchRequest(AVERAGE, start, end) val sensors = for { host <- hosts sensor <- 1 to RrdFile.MaxSensors } yield { s"${host.name}-sensor-$sensor" } request.setFilter(nonEmpty(sensors, database).asJava) val data = request.fetchData() val xml = data.exportXml() new Xml(XML.loadString(xml)) } def nonEmpty(sensors: List[String], database: RrdDb) = sensors.filter(database.hasValuesFor).toSet }
Example 35
Source File: JsonFileTest.scala From temperature-machine with Apache License 2.0 | 5 votes |
package bad.robot.temperature.server import java.io.{BufferedWriter, FileWriter} import org.specs2.matcher.DisjunctionMatchers.be_\/- import org.specs2.mutable.Specification class JsonFileTest extends Specification { val exampleJson = """ |[ | { | "label": "bedroom1-sensor-1", | "data": [ | { | "x": 1507709610000, | "y": "NaN" | }, | { | "x": 1507709640000, | "y": "+2.2062500000E01" | }, | { | "x": 1507709680000, | "y": "+2.2262500000E01" | } | ] | } |] """.stripMargin "Load a file" >> { createFile() JsonFile.load must be_\/-(exampleJson) } private def createFile() = { val writer = new BufferedWriter(new FileWriter(JsonFile.file)) writer.write(exampleJson) writer.close() } }
Example 36
Source File: TestLoadDataWithJunkChars.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.integration.spark.testsuite.dataload import java.io.{BufferedWriter, File, FileWriter} import java.util.Random import org.apache.spark.sql.Row import org.apache.spark.sql.test.util.QueryTest import org.scalatest.BeforeAndAfterAll class TestLoadDataWithJunkChars extends QueryTest with BeforeAndAfterAll { var filePath = "" val junkchars = "ǍǎǏǐǑǒǓǔǕǖǗǘǙǚǛǜǝǞǟǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰ" def buildTestData() = { filePath = s"$integrationPath/spark/target/junkcharsdata.csv" val file = new File(filePath) val writer = new BufferedWriter(new FileWriter(file)) writer.write("c1,c2\n") val random = new Random for (i <- 1 until 1000) { writer.write("a" + i + "," + junkchars + "\n") if ( i % 100 == 0) { writer.flush() } } writer.write("a1000000," + junkchars) writer.close } test("[bug]fix bug of duplicate rows in UnivocityCsvParser #877") { buildTestData() sql("drop table if exists junkcharsdata") sql("""create table if not exists junkcharsdata (c1 string, c2 string) STORED AS carbondata""") sql(s"LOAD DATA LOCAL INPATH '$filePath' into table junkcharsdata") checkAnswer(sql("select count(*) from junkcharsdata"), Seq(Row(1000))) sql("drop table if exists junkcharsdata") new File(filePath).delete() } }
Example 37
Source File: SessionDataFileHDFSWriter.scala From spark_training with Apache License 2.0 | 5 votes |
package com.malaska.spark.training.streaming.dstream.sessionization import java.io.BufferedWriter import java.io.FileWriter import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.conf.Configuration import java.io.OutputStreamWriter import org.apache.hadoop.fs.Path import java.util.Random object SessionDataFileHDFSWriter { val eol = System.getProperty("line.separator"); def main(args: Array[String]) { if (args.length == 0) { println("SessionDataFileWriter {tempDir} {distDir} {numberOfFiles} {numberOfEventsPerFile} {waitBetweenFiles}"); return; } val conf = new Configuration conf.addResource(new Path("/etc/hadoop/conf/core-site.xml")) conf.addResource(new Path("/etc/hadoop/conf/mapred-site.xml")) conf.addResource(new Path("/etc/hadoop/conf/hdfs-site.xml")) val fs = FileSystem.get(new Configuration) val rootTempDir = args(0) val rootDistDir = args(1) val files = args(2).toInt val loops = args(3).toInt val waitBetweenFiles = args(4).toInt val r = new Random for (f <- 1 to files) { val rootName = "/weblog." + System.currentTimeMillis() val tmpPath = new Path(rootTempDir + rootName + ".tmp") val writer = new BufferedWriter(new OutputStreamWriter(fs.create(tmpPath))) print(f + ": [") val randomLoops = loops + r.nextInt(loops) for (i <- 1 to randomLoops) { writer.write(SessionDataGenerator.getNextEvent + eol) if (i%100 == 0) { print(".") } } println("]") writer.close val distPath = new Path(rootDistDir + rootName + ".dat") fs.rename(tmpPath, distPath) Thread.sleep(waitBetweenFiles) } println("Done") } }
Example 38
Source File: SessionDataFileWriter.scala From spark_training with Apache License 2.0 | 5 votes |
package com.malaska.spark.training.streaming.dstream.sessionization import java.io.BufferedWriter import java.io.FileWriter object SessionDataFileWriter { val eol = System.getProperty("line.separator"); def main(args: Array[String]) { if (args.length == 0) { println("SessionDataFileWriter {numberOfRecords} {outputFile} "); return; } val writer = new BufferedWriter(new FileWriter(args(1))) val loops = args(0).toInt for (i <- 1 to loops) { writer.write(SessionDataGenerator.getNextEvent + eol) } writer.close } }
Example 39
Source File: TreeNode.scala From AppCrawler with Apache License 2.0 | 5 votes |
package com.testerhome.appcrawler import java.io.{BufferedWriter, FileWriter} import scala.collection.mutable.ListBuffer case class TreeNode[T]( value: T, children: ListBuffer[TreeNode[T]] = ListBuffer[TreeNode[T]]() ) { def equals(node: TreeNode[T]): Boolean = { node.value == this.value } def find(tree: TreeNode[T], node: TreeNode[T]): Option[TreeNode[T]] = { if (tree.equals(node)) { return Some(tree) } tree.children.foreach(t => { find(t, node) match { case Some(v) => return Some(v) case None => {} } }) None } def appendNode(currenTree: TreeNode[T], node: TreeNode[T]): TreeNode[T] = { find(currenTree, node) match { case Some(v) => { v } case None => { this.children.append(node) node } } } def toXml(tree: TreeNode[T]): String = { val s=new StringBuffer() val before = (tree: TreeNode[T]) => { s.append(s"""<node TEXT="${xml.Utility.escape(tree.value.toString)}">""") //todo: 增加图片地址链接 LINK="file:///Users/seveniruby/projects/LBSRefresh/Android_20160216105737/946_StockDetail-Back--.png" } val after = (tree: TreeNode[T]) => { s.append("</node>") s.append("\n") } s.append("""<map version="1.0.1">""") s.append("\n") traversal[T](tree, before, after) s.append("</map>") s.toString } def traversal[T](tree: TreeNode[T], before: (TreeNode[T]) => Any = (x: TreeNode[T]) => Unit, after: (TreeNode[T]) => Any = (x: TreeNode[T]) => Unit): Unit = { before(tree) tree.children.foreach(t => { traversal(t, before, after) }) after(tree) } def generateFreeMind(list: ListBuffer[T], path:String=null): String = { if(list.isEmpty){ return "" } val root=TreeNode(list.head) var currentNode=root list.slice(1, list.size).foreach(e=>{ currentNode=currentNode.appendNode(root, TreeNode(e)) }) val xml=toXml(root) if(path!=null){ val file = new java.io.File(path) val bw = new BufferedWriter(new FileWriter(file)) bw.write(xml) bw.close() } xml } }
Example 40
Source File: SortedStringWriter.scala From ScalaClean with Apache License 2.0 | 5 votes |
package org.scalaclean.analysis import java.io.BufferedWriter import java.nio.file.{Files, Path, StandardOpenOption} import scala.collection.mutable // TODO This class should not be needed - use StringWriter instead // however ElementWriter appears to be very sensitive to write order right now class SortedStringWriter(targetPath: Path) { val target: BufferedWriter = Files.newBufferedWriter(targetPath, StandardOpenOption.TRUNCATE_EXISTING, StandardOpenOption.WRITE, StandardOpenOption.CREATE) private val strings: mutable.Set[String] = mutable.SortedSet[String]() def writeLine(s: String): Boolean = { strings.add(s) } def flush(): Unit = { strings.toVector.foreach { line => target.write(line) target.newLine() } strings.clear target.flush() } def close(): Unit = { flush() target.close() } }
Example 41
Source File: StarsAnalysisDemo.scala From CkoocNLP with Apache License 2.0 | 5 votes |
package applications.analysis import java.io.{BufferedWriter, FileOutputStream, OutputStreamWriter} import functions.segment.Segmenter import org.apache.log4j.{Level, Logger} import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, Row, SparkSession} object StarsAnalysisDemo { def main(args: Array[String]) { Logger.getLogger("org").setLevel(Level.WARN) val spark = SparkSession .builder .master("local[2]") .appName("Stars Analysis Demo") .getOrCreate() val filePath = "E:/data/chinaNews/entertainment.txt" // 加载数据,并保留年份和内容字段,并对内容字段进行过滤 import spark.implicits._ val data = spark.sparkContext.textFile(filePath).flatMap { line => val tokens: Array[String] = line.split("\u00ef") if (tokens.length > 3) { var year: String = tokens(2).split("-")(0) if (tokens(2).contains("年")) year = tokens(2).split("年")(0) var content = tokens(3) if (content.length > 22 && content.substring(0, 20).contains("日电")) { content = content.substring(content.indexOf("日电") + 2, content.length).trim } if (content.startsWith("(")) content = content.substring(content.indexOf(")") + 1, content.length) if (content.length > 20 && content.substring(content.length - 20, content.length).contains("记者")) { content = content.substring(0, content.lastIndexOf("记者")).trim } Some(year, content) } else None }.toDF("year", "content") // 分词,去除长度为1的词,每个词保留词性 val segmenter = new Segmenter() .isAddNature(true) .isDelEn(true) .isDelNum(true) .setMinTermLen(2) .setMinTermNum(5) .setSegType("StandardSegment") .setInputCol("content") .setOutputCol("segmented") val segDF: DataFrame = segmenter.transform(data) segDF.cache() val segRDD: RDD[(Int, Seq[String])] = segDF.select("year", "segmented").rdd.map { case Row(year: String, terms: Seq[String]) => (Integer.parseInt(year), terms) } val result: Array[String] = segRDD.map(line => line._1.toString + "\u00ef" + line._2.mkString(",")).collect() val writer: BufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("E:/entertainment_seg.txt"))) result.foreach(line => writer.write(line + "\n")) writer.close() // 统计2016出现在新闻中最多的明星 val stars2016 = segRDD.filter(_._1 == 2016) .flatMap { case (year: Int, termStr: Seq[String]) => val person = termStr .map(term => (term.split("/")(0), term.split("/")(1))) .filter(_._2.equalsIgnoreCase("nr")) .map(term => (term._1, 1L)) person } .reduceByKey(_ + _) .sortBy(_._2, ascending = false) segDF.unpersist() stars2016.take(100).foreach(println) spark.stop() } }
Example 42
Source File: FileSystem.scala From ohara with Apache License 2.0 | 4 votes |
package oharastream.ohara.client.filesystem import java.io.{BufferedReader, BufferedWriter, IOException, InputStreamReader, OutputStreamWriter} import java.nio.charset.{Charset, StandardCharsets} import oharastream.ohara.client.filesystem.ftp.FtpFileSystem import oharastream.ohara.client.filesystem.hdfs.HdfsFileSystem import oharastream.ohara.client.filesystem.smb.SmbFileSystem import oharastream.ohara.common.exception.FileSystemException trait FileSystem extends oharastream.ohara.kafka.connector.storage.FileSystem { def readLines(path: String, encode: String = "UTF-8"): Array[String] = { val reader = new BufferedReader(new InputStreamReader(open(path), Charset.forName(encode))) try Iterator.continually(reader.readLine()).takeWhile(_ != null).toArray finally reader.close() } def wrap[T](f: () => T): T = try { f() } catch { case e: IOException => throw new FileSystemException(e.getMessage, e) case e: IllegalStateException => throw new FileSystemException(e.getMessage, e) } } object FileSystem { def hdfsBuilder: HdfsFileSystem.Builder = HdfsFileSystem.builder def ftpBuilder: FtpFileSystem.Builder = FtpFileSystem.builder def smbBuilder: SmbFileSystem.Builder = SmbFileSystem.builder }