java.nio.file.FileSystems Scala Examples
The following examples show how to use java.nio.file.FileSystems.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: FilePathMatcher.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.generic.util import java.io.File import java.nio.file.{FileSystems, PathMatcher} import com.typesafe.scalalogging.LazyLogging import org.apache.commons.io.FileUtils import org.mozilla.universalchardet.CharsetListener import scala.util.Try object FilePathMatcher { def apply(rootDir: File, pathPattern: String): Try[PathMatcher] = Try { val trimmedPattern = new String(pathPattern.trim.toCharArray.dropWhile(_ == '/')).trim val path=rootDir.getAbsolutePath.replaceAll("\\\\","/") FileSystems.getDefault.getPathMatcher(s"glob:${path}/$trimmedPattern") //FileSystems.getDefault.getPathMatcher(s"glob:${rootDir.getAbsolutePath}/$trimmedPattern") } } object EncodingDetector extends LazyLogging { val DEFAULT_ENCODING = "UTF-8" def guessEncoding(file: File) = { val bytes = FileUtils.readFileToByteArray(file) val dummyListener = new CharsetListener { override def report(charset: String): Unit = {} } val detector = new org.mozilla.universalchardet.UniversalDetector(dummyListener) detector.handleData(bytes, 0, bytes.length) detector.dataEnd() val encoding = Option(detector.getDetectedCharset) getOrElse DEFAULT_ENCODING logger.debug("Detected encoding {} for {}", detector.getDetectedCharset, file) detector.reset() (encoding, bytes) } }
Example 2
Source File: extract_ldcc.scala From attic-nlp4l with Apache License 2.0 | 5 votes |
import java.io.File import java.nio.file.FileSystems import org.apache.lucene.index._ import org.apache.lucene.search.TermQuery import org.nlp4l.core.analysis.Analyzer import org.nlp4l.core._ import scalax.file.Path import scalax.file.PathSet val srcIndex = "/tmp/index-ldcc" val index = "/tmp/index-ldcc-part" val searcher = ISearcher(srcIndex) // write documents into an index val schema = SchemaLoader.loadFile("examples/schema/ldcc.conf") val writer = IWriter(index, schema) def writeCategoryDocs(cat: String): Unit = { val results = searcher.search(query=new TermQuery(new Term("cat", cat)), rows=1000) results.foreach(doc => { writer.write(doc) }) } writeCategoryDocs("dokujo-tsushin") writeCategoryDocs("sports-watch") writer.close
Example 3
Source File: index_ceeaus.scala From attic-nlp4l with Apache License 2.0 | 5 votes |
import java.io.File import java.io.FileInputStream import java.io.InputStreamReader import java.io.BufferedReader import java.nio.file.FileSystems import org.apache.lucene.index._ import org.apache.lucene.search.TermQuery import org.nlp4l.core.analysis.Analyzer import org.nlp4l.core._ import scalax.file.Path import scalax.file.PathSet val index = "/tmp/index-ceeaus" def lines(fl: Path, encoding: String): List[String] = { val is = new FileInputStream(fl.path) val r = new InputStreamReader(is, encoding) val br = new BufferedReader(r) var result: List[String] = Nil try{ var line = br.readLine() while(line != null){ result = result :+ line line = br.readLine() } result } finally{ br.close r.close is.close } } def document(fl: Path, ja: Boolean): Document = { val ps: Array[String] = fl.path.split(File.separator) // for Windows // val ps: Array[String] = file.path.split("\\\\") val file = ps(3) val typ = ps(2) val cat = if(file.indexOf("smk") >= 0) "smk" else "ptj" // smoking or part time job val encoding = if(ja) "sjis" else "UTF-8" val body = lines(fl, encoding) Document(Set( Field("file", file), Field("type", typ), Field("cat", cat), Field(if(ja) "body_ja" else "body_en", body) )) } // delete existing Lucene index val p = Path(new File(index)) p.deleteRecursively() // write documents into an index val schema = SchemaLoader.loadFile("examples/schema/ceeaus.conf") val writer = IWriter(index, schema) val c: PathSet[Path] = Path("corpora", "CEEAUS").children() // write English docs c.toList.sorted.filter(e => e.name.indexOf("CJEJUS")<0 && e.name.indexOf("PLAIN")<0).foreach( f => f.children().toList.sorted.filter( g => g.name.indexOf("(1)") < 0 && g.name.endsWith(".txt")).foreach(h => writer.write(document(h, false))) ) // write Japanese docs c.toList.sorted.filter(e => e.name.indexOf("CJEJUS")>=0).foreach( f => f.children().toList.sorted.filter( g => g.name.indexOf("(1)") < 0 && g.name.endsWith(".txt")).foreach(h => writer.write(document(h, true))) ) writer.close // search val searcher = ISearcher(index) val results = searcher.search(query=new TermQuery(new Term("body_ja", "喫煙")), rows=10) results.foreach(doc => { printf("[DocID] %d: %s\n", doc.docId, doc.get("file")) })
Example 4
Source File: hmm_postagger.scala From attic-nlp4l with Apache License 2.0 | 5 votes |
import java.io.File import java.nio.file.FileSystems import org.apache.lucene.index._ import org.apache.lucene.search.TermQuery import org.nlp4l.core.analysis.Analyzer import org.nlp4l.core._ import org.nlp4l.lm.{HmmTagger, HmmModel, HmmModelIndexer} import scala.io._ import scala.util.matching.Regex import scalax.file.Path import scalax.file.PathSet val index = "/tmp/index-brown-hmm" // delete existing Lucene index val p = Path(new File(index)) p.deleteRecursively() // create HMM model index val c: PathSet[Path] = Path("corpora", "brown", "brown").children() val indexer = HmmModelIndexer(index) c.filter{ e => val s = e.name val c = s.charAt(s.length - 1) c >= '0' && c <= '9' }.toList.sorted.foreach{ f => val source = Source.fromFile(f.path, "UTF-8") source.getLines().map(_.trim).filter(_.length > 0).foreach { g => val pairs = g.split("\\s+") val doc = pairs.map{h => h.split("/")}.filter{_.length==2}.map{i => (i(0).toLowerCase(), i(1))} indexer.addDocument(doc) } } indexer.close() // read the model index val model = HmmModel(index) println("\n=== tagger test ===") val tagger = HmmTagger(model) tagger.tokens("i like to go to france .") tagger.tokens("you executed lucene program .") tagger.tokens("nlp4l development members may be able to present better keywords .")
Example 5
Source File: index_ldcc_sports-watch.scala From attic-nlp4l with Apache License 2.0 | 5 votes |
import java.io.File import java.nio.file.FileSystems import org.apache.lucene.analysis.ja.{JapaneseAnalyzer, CompoundNounCnAnalyzer, CompoundNounRn2Analyzer, CompoundNounLn2Analyzer} import org.apache.lucene.index._ import org.apache.lucene.search.TermQuery import org.nlp4l.core.analysis.Analyzer import org.nlp4l.core._ import scalax.file.Path import scalax.file.PathSet val index = "/tmp/index-ldcc-sports-watch" def document(file: Path): Document = { val ps: Array[String] = file.path.split(File.separator) // for Windows // val ps: Array[String] = file.path.split("\\\\") val cat = ps(3) val lines = file.lines().toArray val url = lines(0) val date = lines(1) val title = lines(2) val body = file.lines().drop(3).toList Document(Set( Field("url", url), Field("date", date), Field("cat", cat), Field("body2", body), Field("title", title), Field("body", body), Field("body_rn2", body), Field("body_ln2", body) )) } def teschema(): Schema = { val analyzerJa = Analyzer(new JapaneseAnalyzer) val analyzerCn = Analyzer(new CompoundNounCnAnalyzer) val analyzerRn2 = Analyzer(new CompoundNounRn2Analyzer) val analyzerLn2 = Analyzer(new CompoundNounLn2Analyzer) val fieldTypes = Map( "url" -> FieldType(null, true, true), "date" -> FieldType(null, true, true), "cat" -> FieldType(null, true, true), "title" -> FieldType(analyzerJa, true, true), "body2" -> FieldType(analyzerJa, true, true), "body" -> FieldType(analyzerCn, true, true, true, true), "body_rn2" -> FieldType(analyzerRn2, true, true, true, true), "body_ln2" -> FieldType(analyzerLn2, true, true, true, true) ) Schema(analyzerJa, fieldTypes) } // delete existing Lucene index val p = Path(new File(index)) p.deleteRecursively() // write documents into an index val schema = teschema() val writer = IWriter(index, schema) val c: PathSet[Path] = Path("corpora", "ldcc", "text", "sports-watch").children() c.filterNot( g => g.name.equals("LICENSE.txt") ).toList.sorted.foreach( h => writer.write(document(h)) ) writer.close // search val searcher = ISearcher(index) val results = searcher.search(query=new TermQuery(new Term("title", "iphone")), rows=10) results.foreach(doc => { printf("[DocID] %d: %s\n", doc.docId, doc.get("title")) })
Example 6
Source File: SimpleFST.scala From attic-nlp4l with Apache License 2.0 | 5 votes |
package org.nlp4l.core import java.nio.file.FileSystems import org.apache.lucene.store.{Directory, FSDirectory, IOContext, IndexOutput, IndexInput} import org.apache.lucene.util.IntsRefBuilder import org.apache.lucene.util.fst._ object SimpleFST { def apply(generateUnknownWords: Boolean = false) = new SimpleFST(generateUnknownWords) } class SimpleFST(generateUnknownWords: Boolean) { val outputs = PositiveIntOutputs.getSingleton.asInstanceOf[Outputs[Long]] val builder: Builder[Long] = new Builder[Long](FST.INPUT_TYPE.BYTE4, outputs) val scratchInts = new IntsRefBuilder val scratchArc = new FST.Arc[Long] var fst: FST[Long] = null var fstReader: FST.BytesReader = null val MAX_LEN_UNKNOWN_WORD = 4 val UNKNOWN_WORD: Long = -1 // add entries in alphabetical order def addEntry(text: String, value: Long): Unit = { builder.add(Util.toUTF32(text, scratchInts), value) } def finish() : Unit = { fst = builder.finish fstReader = fst.getBytesReader } def leftMostSubstring(str: String, pos: Int): Seq[(Int, Long)] = { val pendingOutput = outputs.getNoOutput fst.getFirstArc(scratchArc) leftMostSubstring(str, pos, 0, pendingOutput, List.empty[(Int, Long)]) } private def leftMostSubstring(str: String, pos: Int, index: Int, pendingOutput: Long, result: List[(Int, Long)]): Seq[(Int, Long)] = { if(str.length <= pos + index){ if(result.size > 0 || !generateUnknownWords) result else{ unknownWords(str, pos, 1, result) // result is empty } } else{ val codePoint = str.codePointAt(pos + index) if(fst.findTargetArc(codePoint, scratchArc, scratchArc, fstReader) == null){ if(result.size > 0 || !generateUnknownWords) result else{ unknownWords(str, pos, 1, result) // result is empty } } else{ val nextIndex = index + Character.charCount(codePoint) val pendingOutput2 = fst.outputs.add(pendingOutput, scratchArc.output) if(scratchArc.isFinal()){ val matchOutputs = fst.outputs.add(pendingOutput2, scratchArc.nextFinalOutput) leftMostSubstring(str, pos, nextIndex, matchOutputs, result :+ (pos + nextIndex, matchOutputs)) } else{ leftMostSubstring(str, pos, nextIndex, pendingOutput2, result) } } } } private def unknownWords(str: String, pos: Int, index: Int, result: List[(Int, Long)]): Seq[(Int, Long)] = { if(str.length < pos + index || index > MAX_LEN_UNKNOWN_WORD) result else{ unknownWords(str, pos, index + 1, result :+ (pos + index, UNKNOWN_WORD)) } } def exactMatch(str: String): Long = { // TODO avoid using leftMostSubstring() in exactMatch() as it is somewhat inefficient val result = leftMostSubstring(str, 0) if(result.length > 0){ val last = result.last if(last._1 == str.length) last._2 else UNKNOWN_WORD } else UNKNOWN_WORD } def save(dirStr: String, file: String = "fst.dic"): Unit = { val dir: Directory = FSDirectory.open(FileSystems.getDefault.getPath(dirStr)) val out: IndexOutput = dir.createOutput(file, IOContext.DEFAULT) fst.save(out) out.close() dir.close() } def load(dirStr: String, file: String = "fst.dic"): Unit = { val dir: Directory = FSDirectory.open(FileSystems.getDefault.getPath(dirStr)) val in: IndexInput = dir.openInput(file, IOContext.DEFAULT) fst = new FST[Long](in, outputs) fstReader = fst.getBytesReader } }
Example 7
Source File: DirectoryTemplateSource.scala From cluster-broccoli with Apache License 2.0 | 5 votes |
package de.frosner.broccoli.templates import java.nio.file.{FileSystems, Files} import com.typesafe.config.{ConfigFactory, ConfigValue} import pureconfig._ import pureconfig.module.enumeratum._ import de.frosner.broccoli.models.{ParameterInfo, Template} import play.api.libs.json.Json import scala.collection.JavaConverters._ import scala.io.Source import scala.util.Try def loadTemplates(): Seq[Template] = { val rootTemplatesDirectory = FileSystems.getDefault.getPath(directory).toAbsolutePath if (!Files.isDirectory(rootTemplatesDirectory)) { throw new IllegalStateException(s"Templates directory ${rootTemplatesDirectory} is not a directory") } log.info(s"Looking for templates in $rootTemplatesDirectory") val templateDirectories = Files.list(rootTemplatesDirectory).iterator().asScala.filter(Files.isDirectory(_)).toSeq log.info(s"Found ${templateDirectories.length} template directories: ${templateDirectories.mkString(", ")}") val templates = templateDirectories.flatMap(templateDirectory => { val tryTemplate = Try { val templateFileContent = Source.fromFile(templateDirectory.resolve("template.json").toString).mkString val templateId = templateDirectory.getFileName.toString val templateInfo = loadConfigOrThrow[TemplateConfig.TemplateInfo]( ConfigFactory.parseFile(templateDirectory.resolve("template.conf").toFile)) loadTemplate(templateId, templateFileContent, templateInfo).get } tryTemplate.failed.map(throwable => log.error(s"Parsing template '$templateDirectory' failed: $throwable")) tryTemplate.toOption }) log.info(s"Successfully parsed ${templates.length} templates: ${templates.map(_.id).mkString(", ")}") templates.sortBy(_.id) } }
Example 8
Source File: DirectoryTemplateSourceSpec.scala From cluster-broccoli with Apache License 2.0 | 5 votes |
package de.frosner.broccoli.templates import java.nio.file.{FileSystems, Files, Path} import de.frosner.broccoli.models._ import org.specs2.mock.Mockito import org.specs2.mutable.Specification import org.mockito.Mockito._ import scala.io.Source class DirectoryTemplateSourceSpec extends Specification with TemporaryTemplatesContext with Mockito { "Loading templates from a directory" should { def templateRenderer: TemplateRenderer = { val templateRenderer = mock[TemplateRenderer] when(templateRenderer.validateParameterName(anyString)).thenReturn(true) templateRenderer } "fail if the passed directory is not directory" in { val directory = FileSystems.getDefault.getPath("not-a-directory") Files.exists(directory) must beFalse new DirectoryTemplateSource(directory.toString, mock[TemplateRenderer]).loadTemplates must throwA( new IllegalStateException(s"Templates directory ${directory.toAbsolutePath} is not a directory")) } "parse fully specified templates correctly" in { templatesDirectory: Path => val templates = new DirectoryTemplateSource(templatesDirectory.toString, templateRenderer).loadTemplates() templates must contain( beEqualTo(Template( "curl", Source.fromFile(templatesDirectory.resolve("curl/template.json").toFile).mkString, "A periodic job that sends an HTTP GET request to a specified address every minute.", Map( "id" -> ParameterInfo("id", None, None, None, ParameterType.Raw, Some(0)), "URL" -> ParameterInfo("URL", None, Some(RawParameterValue("localhost:8000")), None, ParameterType.Raw, Some(1)), "enabled" -> ParameterInfo("enabled", None, Some(RawParameterValue("true")), None, ParameterType.Raw, Some(2)) ) ))).exactly(1) } "use a default template description if not provided" in { templatesDirectory: Path => val templates = new DirectoryTemplateSource(templatesDirectory.toString, templateRenderer).loadTemplates() templates.map(_.description) must contain(beEqualTo("curl-without-decription template")).exactly(1) } "not contain templates that failed to parse" in { templatesDirectory: Path => val templates = new DirectoryTemplateSource(templatesDirectory.toString, templateRenderer).loadTemplates() templates.map(_.id) must not contain beEqualTo("broken-template") } } }
Example 9
Source File: RangerAdminClientImpl.scala From spark-ranger with Apache License 2.0 | 5 votes |
package org.apache.ranger.services.spark import java.nio.file.{Files, FileSystems} import java.util import com.google.gson.GsonBuilder import org.apache.commons.logging.{Log, LogFactory} import org.apache.ranger.admin.client.RangerAdminRESTClient import org.apache.ranger.plugin.util.{GrantRevokeRequest, ServicePolicies, ServiceTags} class RangerAdminClientImpl extends RangerAdminRESTClient { private val LOG: Log = LogFactory.getLog(classOf[RangerAdminClientImpl]) private val cacheFilename = "sparkSql_hive_jenkins.json" private val gson = new GsonBuilder().setDateFormat("yyyyMMdd-HH:mm:ss.SSS-Z").setPrettyPrinting().create override def init(serviceName: String, appId: String, configPropertyPrefix: String): Unit = {} override def getServicePoliciesIfUpdated( lastKnownVersion: Long, lastActivationTimeInMillis: Long): ServicePolicies = { val basedir = this.getClass.getProtectionDomain.getCodeSource.getLocation.getPath val cachePath = FileSystems.getDefault.getPath(basedir, cacheFilename) LOG.info("Reading policies from " + cachePath) val bytes = Files.readAllBytes(cachePath) gson.fromJson(new String(bytes), classOf[ServicePolicies]) } override def grantAccess(request: GrantRevokeRequest): Unit = {} override def revokeAccess(request: GrantRevokeRequest): Unit = {} override def getServiceTagsIfUpdated( lastKnownVersion: Long, lastActivationTimeInMillis: Long): ServiceTags = null override def getTagTypes(tagTypePattern: String): util.List[String] = null }
Example 10
Source File: index_ldcc.scala From attic-nlp4l with Apache License 2.0 | 5 votes |
import java.io.File import java.nio.file.FileSystems import org.apache.lucene.index._ import org.apache.lucene.search.TermQuery import org.nlp4l.core.analysis.Analyzer import org.nlp4l.core._ import scalax.file.Path import scalax.file.PathSet val index = "/tmp/index-ldcc" def document(file: Path): Document = { val ps: Array[String] = file.path.split(File.separator) // for Windows // val ps: Array[String] = file.path.split("\\\\") val cat = ps(3) val lines = file.lines().toArray val url = lines(0) val date = lines(1) val title = lines(2) val body = file.lines().drop(3).toList Document(Set( Field("url", url), Field("date", date), Field("cat", cat), Field("title", title), Field("body", body) )) } // delete existing Lucene index val p = Path(new File(index)) p.deleteRecursively() // write documents into an index val schema = SchemaLoader.loadFile("examples/schema/ldcc.conf") val writer = IWriter(index, schema) val c: PathSet[Path] = Path("corpora", "ldcc", "text").children() c.filterNot( e => e.name.endsWith(".txt") ).foreach { f => f.children().filterNot( g => g.name.equals("LICENSE.txt") ).toList.sorted.foreach( h => writer.write(document(h)) ) } writer.close // search val searcher = ISearcher(index) val results = searcher.search(query=new TermQuery(new Term("title", "iphone")), rows=10) results.foreach(doc => { printf("[DocID] %d: %s\n", doc.docId, doc.get("title")) })
Example 11
Source File: FileIO.scala From swave with Mozilla Public License 2.0 | 5 votes |
package swave.core.io.files import java.io.File import java.nio.channels.FileChannel import java.nio.file.{FileSystems, Files, Path, StandardOpenOption} import scala.util.control.NonFatal import com.typesafe.config.Config import swave.core.impl.util.SettingsCompanion import swave.core.io.Bytes import swave.core.macros._ object FileIO extends SpoutFromFiles with DrainToFiles { lazy val userHomePath: Path = FileSystems.getDefault.getPath(System getProperty "user.home") def resolveFileSystemPath(pathName: String): Path = if (pathName.length >= 2 && pathName.charAt(0) == '~' && pathName.charAt(1) == File.separatorChar) { userHomePath.resolve(pathName substring 2) } else FileSystems.getDefault.getPath(pathName) val WriteCreateOptions: Set[StandardOpenOption] = { import StandardOpenOption._ Set(CREATE, TRUNCATE_EXISTING, WRITE) } final case class Settings(defaultFileReadingChunkSize: Int, defaultFileWritingChunkSize: Int) { requireArg(defaultFileReadingChunkSize > 0, "`defaultFileChunkSize` must be > 0") requireArg(defaultFileWritingChunkSize >= 0, "`defaultFileWritingChunkSize` must be >= 0") def withDefaultFileReadingChunkSize(defaultFileReadingChunkSize: Int) = copy(defaultFileReadingChunkSize = defaultFileReadingChunkSize) def withDefaultFileWritingChunkSize(defaultFileWritingChunkSize: Int) = copy(defaultFileWritingChunkSize = defaultFileWritingChunkSize) } object Settings extends SettingsCompanion[Settings]("swave.core.file-io") { def fromSubConfig(c: Config): Settings = Settings( defaultFileReadingChunkSize = c getInt "default-file-reading-chunk-size", defaultFileWritingChunkSize = c getInt "default-file-writing-chunk-size") } def writeFile[T: Bytes](fileName: String, data: T): Unit = writeFile(resolveFileSystemPath(fileName), data) def writeFile[T: Bytes](file: File, data: T): Unit = writeFile(file.toPath, data) def writeFile[T: Bytes](path: Path, data: T, options: StandardOpenOption*): Unit = { implicit def decorator(value: T): Bytes.Decorator[T] = Bytes.decorator(value) Files.write(path, data.toArray, options: _*) () } def readFile[T: Bytes](fileName: String): T = readFile(resolveFileSystemPath(fileName)) def readFile[T: Bytes](file: File): T = readFile(file.toPath) def readFile[T: Bytes](path: Path): T = implicitly[Bytes[T]].apply(Files.readAllBytes(path)) private[io] def quietClose(channel: FileChannel): Unit = try channel.close() catch { case NonFatal(_) ⇒ } }
Example 12
Source File: FtpFileLister.scala From stream-reactor with Apache License 2.0 | 5 votes |
package com.datamountaineer.streamreactor.connect.ftp.source import java.nio.file.{FileSystems, Paths} import java.time.{Duration, Instant} import com.typesafe.scalalogging.StrictLogging import org.apache.commons.net.ftp.{FTPClient, FTPFile} // org.apache.commons.net.ftp.FTPFile only contains the relative path case class AbsoluteFtpFile(ftpFile:FTPFile, parentDir:String) { def name() = ftpFile.getName def size() = ftpFile.getSize def timestamp() = ftpFile.getTimestamp.toInstant def path() = Paths.get(parentDir, name).toString def age(): Duration = Duration.between(timestamp, Instant.now) } case class FtpFileLister(ftp: FTPClient) extends StrictLogging { def pathMatch(pattern: String, path: String):Boolean = { val g = s"glob:$pattern" FileSystems.getDefault.getPathMatcher(g).matches(Paths.get(path)) } def isGlobPattern(pattern: String): Boolean = List("*", "?", "[", "{").exists(pattern.contains(_)) def listFiles(path: String) : Seq[AbsoluteFtpFile] = { val pathParts : Seq[String] = path.split("/") val (basePath, patterns) = pathParts.zipWithIndex.view.find{case (part, _) => isGlobPattern(part)} match { case Some((_, index)) => pathParts.splitAt(index) case _ => (pathParts.init, Seq[String](pathParts.last)) } def iter(basePath: String, patterns: List[String]) : Seq[AbsoluteFtpFile] = { Option(ftp.listFiles(basePath + "/")) match { case Some(files) => patterns match { case pattern :: Nil => { files.filter(f => f.isFile && pathMatch(pattern, f.getName)) .map(AbsoluteFtpFile(_, basePath + "/")) } case pattern :: rest => { files.filter(f => f.getName() != "." && f.getName() != ".." && pathMatch(pattern, f.getName)) .flatMap(f => iter(Paths.get(basePath, f.getName).toString, rest)) } case _ => Seq() } case _ => Seq() } } iter(Paths.get("/", basePath:_*).toString, patterns.toList) } }
Example 13
Source File: file_watcher.scala From libisabelle with Apache License 2.0 | 5 votes |
package isabelle import java.io.{File => JFile} import java.nio.file.FileSystems import java.nio.file.{WatchKey, WatchEvent, Path => JPath} import java.nio.file.StandardWatchEventKinds.{ENTRY_CREATE, ENTRY_DELETE, ENTRY_MODIFY} import scala.collection.JavaConversions class File_Watcher private[File_Watcher] // dummy template { def register(dir: JFile) { } def register_parent(file: JFile) { } def deregister(dir: JFile) { } def purge(retain: Set[JFile]) { } def shutdown() { } } object File_Watcher { val none: File_Watcher = new File_Watcher { override def toString: String = "File_Watcher.none" } def apply(handle: Set[JFile] => Unit, delay: => Time = Time.seconds(0.5)): File_Watcher = if (Platform.is_windows) none else new Impl(handle, delay) override def shutdown() { watcher_thread.interrupt watcher_thread.join delay_changed.revoke } } }
Example 14
Source File: file_watcher.scala From libisabelle with Apache License 2.0 | 5 votes |
package isabelle import java.io.{File => JFile} import java.nio.file.FileSystems import java.nio.file.{WatchKey, WatchEvent, Path => JPath} import java.nio.file.StandardWatchEventKinds.{ENTRY_CREATE, ENTRY_DELETE, ENTRY_MODIFY} import scala.collection.JavaConversions class File_Watcher private[File_Watcher] // dummy template { def register(dir: JFile) { } def register_parent(file: JFile) { } def deregister(dir: JFile) { } def purge(retain: Set[JFile]) { } def shutdown() { } } object File_Watcher { val none: File_Watcher = new File_Watcher { override def toString: String = "File_Watcher.none" } def apply(handle: Set[JFile] => Unit, delay: => Time = Time.seconds(0.5)): File_Watcher = if (Platform.is_windows) none else new Impl(handle, delay) override def shutdown() { watcher_thread.interrupt watcher_thread.join delay_changed.revoke } } }
Example 15
Source File: file_watcher.scala From libisabelle with Apache License 2.0 | 5 votes |
package isabelle import java.io.{File => JFile} import java.nio.file.FileSystems import java.nio.file.{WatchKey, WatchEvent, Path => JPath} import java.nio.file.StandardWatchEventKinds.{ENTRY_CREATE, ENTRY_DELETE, ENTRY_MODIFY} import scala.collection.JavaConversions class File_Watcher private[File_Watcher] // dummy template { def register(dir: JFile) { } def register_parent(file: JFile) { } def deregister(dir: JFile) { } def purge(retain: Set[JFile]) { } def shutdown() { } } object File_Watcher { val none: File_Watcher = new File_Watcher { override def toString: String = "File_Watcher.none" } def apply(handle: Set[JFile] => Unit, delay: => Time = Time.seconds(0.5)): File_Watcher = if (Platform.is_windows) none else new Impl(handle, delay) override def shutdown() { watcher_thread.interrupt watcher_thread.join delay_changed.revoke } } }
Example 16
Source File: PluginXmlDetector.scala From sbt-idea-plugin with Apache License 2.0 | 5 votes |
package org.jetbrains.sbtidea.download import java.net.URI import java.nio.file.{FileSystems, Files, Path} import java.util.Collections import java.util.function.Predicate private class PluginXmlDetector extends Predicate[Path] { import org.jetbrains.sbtidea.packaging.artifact._ private val MAP = Collections.emptyMap[String, Any]() var result: String = _ override def test(t: Path): Boolean = { if (!t.toString.endsWith(".jar")) return false val uri = URI.create(s"jar:${t.toUri}") try { using(FileSystems.newFileSystem(uri, MAP)) { fs => val maybePluginXml = fs.getPath("META-INF", "plugin.xml") if (Files.exists(maybePluginXml)) { result = new String(Files.readAllBytes(maybePluginXml)) true } else { false } } } catch { case e: java.util.zip.ZipError => throw new RuntimeException(s"Corrupt zip file: $t", e) } } }
Example 17
Source File: PluginMock.scala From sbt-idea-plugin with Apache License 2.0 | 5 votes |
package org.jetbrains.sbtidea.download.plugin import java.net.URI import java.nio.file.{FileSystems, Files, Path} import org.jetbrains.sbtidea.TmpDirUtils import org.jetbrains.sbtidea.packaging.artifact import org.jetbrains.sbtidea.Keys._ import org.jetbrains.sbtidea.download.plugin.PluginDescriptor.Dependency import scala.collection.JavaConverters._ trait PluginMock extends TmpDirUtils { implicit class PluginMetaDataExt(metadata: PluginDescriptor) { def toPluginId: IntellijPlugin.Id = IntellijPlugin.Id(metadata.id, Some(metadata.version), None) } protected def createPluginJarMock(metaData: PluginDescriptor): Path = { val tmpDir = newTmpDir val targetPath = tmpDir.resolve(s"${metaData.name}.jar") val targetUri = URI.create("jar:" + targetPath.toUri) val opts = Map("create" -> "true").asJava artifact.using(FileSystems.newFileSystem(targetUri, opts)) { fs => Files.createDirectory(fs.getPath("/", "META-INF")) Files.write( fs.getPath("/", "META-INF", "plugin.xml"), createPluginXmlContent(metaData).getBytes ) } targetPath } protected def createPluginZipMock(metaData: PluginDescriptor): Path = { val tmpDir = newTmpDir val targetPath = tmpDir.resolve(s"${metaData.name}.zip") val targetUri = URI.create("jar:" + targetPath.toUri) val opts = Map("create" -> "true").asJava val mainPluginJar = createPluginJarMock(metaData) artifact.using(FileSystems.newFileSystem(targetUri, opts)) { fs => val libRoot = fs.getPath("/", metaData.name, "lib") Files.createDirectories(libRoot) Files.copy( mainPluginJar, libRoot.resolve(mainPluginJar.getFileName.toString) ) } targetPath } protected def createPluginXmlContent(metaData: PluginDescriptor): String = { val depStr = metaData.dependsOn.map { case Dependency(id, true) => s"""<depends optional="true">$id</depends>""" case Dependency(id, false) => s"<depends>$id</depends>" } s""" |<idea-plugin> | <name>${metaData.name}</name> | <id>${metaData.id}</id> | <version>${metaData.version}</version> | <idea-version since-build="${metaData.sinceBuild}" until-build="${metaData.untilBuild}"/> | ${depStr.mkString("\n")} |</idea-plugin> |""".stripMargin } }
Example 18
Source File: package.scala From sbt-idea-plugin with Apache License 2.0 | 5 votes |
package org.jetbrains.sbtidea.packaging import java.nio.file.FileSystems import sbt.Keys.TaskStreams import scala.util.control.NonFatal package object artifact { def timed[T](msg: String, f: => T)(implicit streams: TaskStreams): T = { val start = System.currentTimeMillis() val res = f streams.log.info(s"(${System.currentTimeMillis() - start}ms) $msg") res } def using[T <: AutoCloseable, V](r: => T)(f: T => V): V = { val resource: T = r require(resource != null, "resource is null") var exception: Throwable = null try { f(resource) } catch { case NonFatal(e) => exception = e throw e } finally { if (resource != FileSystems.getDefault) closeAndAddSuppressed(exception, resource) } } private def closeAndAddSuppressed(e: Throwable, resource: AutoCloseable): Unit = { if (e != null) { try { resource.close() } catch { case NonFatal(suppressed) => e.addSuppressed(suppressed) } } else { resource.close() } } }
Example 19
Source File: ContentNegLogsApp.scala From 006877 with MIT License | 5 votes |
package aia.stream import java.nio.file.{ Files, FileSystems, Path } import scala.concurrent.Future import scala.concurrent.duration._ import akka.NotUsed import akka.actor.{ ActorSystem , Actor, Props } import akka.event.Logging import akka.stream.{ ActorMaterializer, ActorMaterializerSettings, Supervision } import akka.http.scaladsl.Http import akka.http.scaladsl.Http.ServerBinding import akka.http.scaladsl.server.Directives._ import com.typesafe.config.{ Config, ConfigFactory } object ContentNegLogsApp extends App { val config = ConfigFactory.load() val host = config.getString("http.host") val port = config.getInt("http.port") val logsDir = { val dir = config.getString("log-stream-processor.logs-dir") Files.createDirectories(FileSystems.getDefault.getPath(dir)) } val maxLine = config.getInt("log-stream-processor.max-line") val maxJsObject = config.getInt("log-stream-processor.max-json-object") implicit val system = ActorSystem() implicit val ec = system.dispatcher val decider : Supervision.Decider = { case _: LogStreamProcessor.LogParseException => Supervision.Stop case _ => Supervision.Stop } implicit val materializer = ActorMaterializer( ActorMaterializerSettings(system) .withSupervisionStrategy(decider) ) val api = new ContentNegLogsApi(logsDir, maxLine, maxJsObject).routes val bindingFuture: Future[ServerBinding] = Http().bindAndHandle(api, host, port) val log = Logging(system.eventStream, "content-neg-logs") bindingFuture.map { serverBinding => log.info(s"Bound to ${serverBinding.localAddress} ") }.onFailure { case ex: Exception => log.error(ex, "Failed to bind to {}:{}!", host, port) system.terminate() } }
Example 20
Source File: LatencyAnalyzer.scala From spatial with MIT License | 5 votes |
package spatial.dse import argon._ import spatial.lang._ import spatial.node._ import spatial.util.spatialConfig import spatial.util.modeling._ import spatial.traversal._ import spatial.targets._ import java.io.File import models._ import argon.node._ case class LatencyAnalyzer(IR: State, latencyModel: LatencyModel) extends AccelTraversal { var cycleScope: List[Double] = Nil var intervalScope: List[Double] = Nil var totalCycles: Seq[Long] = Seq() val batchSize = 1000 def getListOfFiles(d: String):List[String] = { import java.nio.file.{FileSystems, Files} import scala.collection.JavaConverters._ val dir = FileSystems.getDefault.getPath(d) Files.walk(dir).iterator().asScala.filter(Files.isRegularFile(_)).map(_.toString).toList//.foreach(println) } override def silence(): Unit = { super.silence() } def test(rewriteParams: Seq[Seq[Any]]): Unit = { import scala.language.postfixOps import java.io.File import sys.process._ val gen_dir = if (config.genDir.startsWith("/")) config.genDir + "/" else config.cwd + s"/${config.genDir}/" val modelJar = getListOfFiles(gen_dir + "/model").filter(_.contains("RuntimeModel-assembly")).head totalCycles = rewriteParams.grouped(batchSize).flatMap{params => val batchedParams = params.map{rp => "tune " + rp.mkString(" ")}.mkString(" ") val cmd = s"""java -jar ${modelJar} ni ${batchedParams}""" // println(s"running cmd: $cmd") val output = Process(cmd, new File(gen_dir)).!! output.split("\n").filter(_.contains("Total Cycles for App")).map{r => "^.*: ".r.replaceAllIn(r,"").trim.toLong }.toSeq }.toSeq // println(s"DSE Model result: $totalCycles") } override protected def preprocess[A](b: Block[A]): Block[A] = { super.preprocess(b) } override protected def postprocess[A](b: Block[A]): Block[A] = { super.postprocess(b) } override protected def visit[A](lhs: Sym[A], rhs: Op[A]): Unit = { } }
Example 21
Source File: FileUtil.scala From wookiee with Apache License 2.0 | 5 votes |
package com.webtrends.harness.utils import java.io.File import java.nio.file.{FileSystems, Files, Path} import scala.io.Source def getSymLink(f:File) : File = { if (f == null) throw new NullPointerException("File must not be null") val path = FileSystems.getDefault.getPath(f.getPath) if (Files.isSymbolicLink(path)) { f.getCanonicalFile } else { f.getAbsoluteFile } } }
Example 22
Source File: WatchServiceReceiver.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import java.nio.file.StandardWatchEventKinds._ import java.nio.file.{FileSystems, Path} import java.io.File import akka.actor.ActorRef import org.apache.iota.fey.JsonReceiverActor.JSON_RECEIVED import play.api.libs.json._ import scala.io.Source class WatchServiceReceiver(receiverActor: ActorRef) extends JsonReceiver{ processInitialFiles() private val watchService = FileSystems.getDefault.newWatchService() def watch(path: Path) : Unit = path.register(watchService, ENTRY_CREATE, ENTRY_MODIFY) def getJsonObject(params: String): Option[JsValue] = { try{ val stringJson = Source.fromFile(params).getLines.mkString Option(Json.parse(stringJson)) }catch{ case e: Exception => log.error("Could not parse JSON", e) None } } override def execute(): Unit = { val key = watchService.take() val eventsIterator = key.pollEvents().iterator() while(eventsIterator.hasNext) { val event = eventsIterator.next() val relativePath = event.context().asInstanceOf[Path] val path = key.watchable().asInstanceOf[Path].resolve(relativePath) log.debug(s"${event.kind()} --- $path") event.kind() match { case (ENTRY_CREATE | ENTRY_MODIFY) if path.toString.endsWith(CONFIG.JSON_EXTENSION) => processJson(path.toString, path.toFile) case _ => } } key.reset() } private[fey] def processJson(path: String, file: File) = { try{ getJsonObject(path) match { case Some(orchestrationJSON) => val valid = validJson(orchestrationJSON) if(valid && (orchestrationJSON \ JSON_PATH.COMMAND).as[String].toUpperCase != "DELETE"){ checkForLocation(orchestrationJSON) } if(valid) { receiverActor ! JSON_RECEIVED(orchestrationJSON, file) }else{ log.warn(s"File $path not processed. Incorrect JSON schema") } case None => } } catch { case e: Exception => log.error(s"File $path will not be processed", e) } } private def processInitialFiles() = { Utils.getFilesInDirectory(CONFIG.JSON_REPOSITORY) .filter(file => file.getName.endsWith(CONFIG.JSON_EXTENSION)) .foreach(file => { processJson(file.getAbsolutePath, file) }) } override def exceptionOnRun(e: Exception): Unit = { e match { case e: InterruptedException => case e: Exception => log.error("Watch Service stopped", e) } watchService.close() } }
Example 23
Source File: SourceFileTracer.scala From exodus with MIT License | 5 votes |
package com.wix.bazel.migrator.analyze.jdk import java.nio.file.{FileSystem, FileSystems, Files, Path} import com.wix.bazel.migrator.model.SourceModule import MavenRelativeSourceDirPathFromModuleRoot.PossibleLocation import com.wix.bazel.migrator.analyze.CodePath trait SourceFileTracer { def traceSourceFile(module: SourceModule, fqn: String, pathToJar: String, testClass: Boolean): CodePath } class JavaPSourceFileTracer(repoRoot: Path, processRunner: ProcessRunner = new JavaProcessRunner, fileSystem: FileSystem = FileSystems.getDefault) extends SourceFileTracer { private val Command = "javap" private def parseFileName(stdOut: String) = { val firstLine = stdOut.split("\n")(0) firstLine.split('"') match { case Array(_, fileName) => fileName case _ => throw new RuntimeException(s"Unknown stdout format $stdOut") } } private def findLocationIn(relativePathFromMonoRepoRoot: String, possibleLocations: Set[PossibleLocation], filePath: String): Option[String] = possibleLocations.find { location => { val possiblePath = repoRoot.resolve(relativePathFromMonoRepoRoot).resolve(location).resolve(filePath) Files.exists(possiblePath) } } override def traceSourceFile(module: SourceModule, fqn: String, pathToClasses: String, testClass: Boolean): CodePath = { val packagePart = fqn.splitAt(fqn.lastIndexOf('.'))._1.replace('.', '/') val cmdArgs = List( "-cp", pathToClasses, fqn) val runResult = processRunner.run(repoRoot, "javap", cmdArgs) if (runResult.exitCode != 0) { throw new RuntimeException(s"Problem locating the source file of class $fqn in $pathToClasses") } val filePath = packagePart + "/" + parseFileName(runResult.stdOut) val locations = MavenRelativeSourceDirPathFromModuleRoot.getPossibleLocationFor(testClass) findLocationIn(module.relativePathFromMonoRepoRoot, locations, filePath) match { case Some(location) =>CodePath(module, location, filePath) case None => { throw new RuntimeException(s"Could not find location of $filePath in ${module.relativePathFromMonoRepoRoot}") } } } } object MavenRelativeSourceDirPathFromModuleRoot { type PossibleLocation = String private val mainCodePrefixes = Set("src/main") private val testCodePrefixes = Set("src/test", "src/it", "src/e2e") private val languages = Set("java", "scala") private val ProdCodeLocations: Set[PossibleLocation] = mainCodePrefixes.flatMap(prefix => languages.map(language => s"$prefix/$language")) private val TestCodeLocations: Set[PossibleLocation] = testCodePrefixes.flatMap(prefix => languages.map(language => s"$prefix/$language")) def getPossibleLocationFor(testCode:Boolean): Set[PossibleLocation] = if (testCode) TestCodeLocations else ProdCodeLocations }
Example 24
Source File: DefaultFileWatchService.scala From play-file-watch with Apache License 2.0 | 5 votes |
package play.dev.filewatch import java.io.File import java.nio.file.FileSystems import io.methvin.watcher.DirectoryChangeEvent import io.methvin.watcher.DirectoryChangeListener import io.methvin.watcher.DirectoryWatcher import io.methvin.watchservice.MacOSXListeningWatchService import scala.collection.JavaConverters._ import scala.util.control.NonFatal class DefaultFileWatchService(logger: LoggerProxy, isMac: Boolean) extends FileWatchService { def this(logger: LoggerProxy) = this(logger, false) def watch(filesToWatch: Seq[File], onChange: () => Unit) = { val dirsToWatch = filesToWatch.filter { file => if (file.isDirectory) { true } else if (file.isFile) { logger.warn("An attempt has been made to watch the file: " + file.getCanonicalPath) logger.warn("DefaultFileWatchService only supports watching directories. The file will not be watched.") false } else false } val watchService = if (isMac) new MacOSXListeningWatchService() else FileSystems.getDefault.newWatchService() val directoryWatcher = DirectoryWatcher .builder() .paths(dirsToWatch.map(_.toPath).asJava) .listener(new DirectoryChangeListener { override def onEvent(event: DirectoryChangeEvent): Unit = onChange() }) .watchService(watchService) .build() val thread = new Thread( new Runnable { override def run(): Unit = { try { directoryWatcher.watch() } catch { case NonFatal(_) => // Do nothing, this means the watch service has been closed, or we've been interrupted. } } }, "play-watch-service" ) thread.setDaemon(true) thread.start() new FileWatcher { override def stop(): Unit = directoryWatcher.close() } } }
Example 25
Source File: FanLogsApp.scala From 006877 with MIT License | 5 votes |
package aia.stream import java.nio.file.{ Files, FileSystems, Path } import scala.concurrent.Future import scala.concurrent.duration._ import akka.NotUsed import akka.actor.{ ActorSystem , Actor, Props } import akka.event.Logging import akka.stream.{ ActorMaterializer, ActorMaterializerSettings, Supervision } import akka.http.scaladsl.Http import akka.http.scaladsl.Http.ServerBinding import akka.http.scaladsl.server.Directives._ import com.typesafe.config.{ Config, ConfigFactory } object FanLogsApp extends App { val config = ConfigFactory.load() val host = config.getString("http.host") val port = config.getInt("http.port") val logsDir = { val dir = config.getString("log-stream-processor.logs-dir") Files.createDirectories(FileSystems.getDefault.getPath(dir)) } val maxLine = config.getInt("log-stream-processor.max-line") val maxJsObject = config.getInt("log-stream-processor.max-json-object") implicit val system = ActorSystem() implicit val ec = system.dispatcher val decider : Supervision.Decider = { case _: LogStreamProcessor.LogParseException => Supervision.Resume case _ => Supervision.Stop } implicit val materializer = ActorMaterializer( ActorMaterializerSettings(system) .withSupervisionStrategy(decider) ) val api = new FanLogsApi(logsDir, maxLine, maxJsObject).routes val bindingFuture: Future[ServerBinding] = Http().bindAndHandle(api, host, port) val log = Logging(system.eventStream, "fan-logs") bindingFuture.map { serverBinding => log.info(s"Bound to ${serverBinding.localAddress} ") }.onFailure { case ex: Exception => log.error(ex, "Failed to bind to {}:{}!", host, port) system.terminate() } }
Example 26
Source File: LogsApp.scala From 006877 with MIT License | 5 votes |
package aia.stream import java.nio.file.{ Files, FileSystems, Path } import scala.concurrent.Future import scala.concurrent.duration._ import akka.NotUsed import akka.actor.{ ActorSystem , Actor, Props } import akka.event.Logging import akka.stream.{ ActorMaterializer, ActorMaterializerSettings, Supervision } import akka.http.scaladsl.Http import akka.http.scaladsl.Http.ServerBinding import akka.http.scaladsl.server.Directives._ import com.typesafe.config.{ Config, ConfigFactory } object LogsApp extends App { val config = ConfigFactory.load() val host = config.getString("http.host") val port = config.getInt("http.port") val logsDir = { val dir = config.getString("log-stream-processor.logs-dir") Files.createDirectories(FileSystems.getDefault.getPath(dir)) } val maxLine = config.getInt("log-stream-processor.max-line") implicit val system = ActorSystem() implicit val ec = system.dispatcher val decider : Supervision.Decider = { case _: LogStreamProcessor.LogParseException => Supervision.Stop case _ => Supervision.Stop } implicit val materializer = ActorMaterializer( ActorMaterializerSettings(system) .withSupervisionStrategy(decider) ) val api = new LogsApi(logsDir, maxLine).routes val bindingFuture: Future[ServerBinding] = Http().bindAndHandle(api, host, port) val log = Logging(system.eventStream, "logs") bindingFuture.map { serverBinding => log.info(s"Bound to ${serverBinding.localAddress} ") }.onFailure { case ex: Exception => log.error(ex, "Failed to bind to {}:{}!", host, port) system.terminate() } }
Example 27
Source File: LogStreamProcessorApp.scala From 006877 with MIT License | 5 votes |
package aia.stream import java.nio.file.{ Files, FileSystems, Path } import scala.concurrent.Future import scala.concurrent.duration._ import akka.NotUsed import akka.actor.{ ActorSystem , Actor, Props } import akka.event.Logging import akka.stream.{ ActorMaterializer, ActorMaterializerSettings, Supervision } import akka.http.scaladsl.Http import akka.http.scaladsl.Http.ServerBinding import akka.http.scaladsl.server.Directives._ import com.typesafe.config.{ Config, ConfigFactory } object LogStreamProcessorApp extends App { val config = ConfigFactory.load() val host = config.getString("http.host") val port = config.getInt("http.port") val logsDir = { val dir = config.getString("log-stream-processor.logs-dir") Files.createDirectories(FileSystems.getDefault.getPath(dir)) } val notificationsDir = { val dir = config.getString("log-stream-processor.notifications-dir") Files.createDirectories(FileSystems.getDefault.getPath(dir)) } val metricsDir = { val dir = config.getString("log-stream-processor.metrics-dir") Files.createDirectories(FileSystems.getDefault.getPath(dir)) } val maxLine = config.getInt("log-stream-processor.max-line") val maxJsObject = config.getInt("log-stream-processor.max-json-object") implicit val system = ActorSystem() implicit val ec = system.dispatcher val decider : Supervision.Decider = { case _: LogStreamProcessor.LogParseException => Supervision.Resume case _ => Supervision.Stop } implicit val materializer = ActorMaterializer( ActorMaterializerSettings(system) .withSupervisionStrategy(decider) ) val api = new LogStreamProcessorApi(logsDir, notificationsDir, metricsDir, maxLine, maxJsObject).routes val bindingFuture: Future[ServerBinding] = Http().bindAndHandle(api, host, port) val log = Logging(system.eventStream, "processor") bindingFuture.map { serverBinding => log.info(s"Bound to ${serverBinding.localAddress} ") }.onFailure { case ex: Exception => log.error(ex, "Failed to bind to {}:{}!", host, port) system.terminate() } }
Example 28
Source File: withZipFs.scala From Converter with GNU General Public License v3.0 | 5 votes |
package org.scalablytyped.converter.internal.importer import java.net.URI import java.nio.file.Path object withZipFs { import java.nio.file.FileSystems def maybe[T](path: os.Path, enable: Boolean)(f: Option[Path] => T): T = if (enable) apply(path)(path => f(Some(path))) else f(None) def apply[T](path: os.Path)(f: Path => T): T = { val uri = URI.create(s"jar:file:${path}") val env = new java.util.HashMap[String, String]() env.put("create", "true") val zipfs = FileSystems.newFileSystem(uri, env) val root = zipfs.getPath("/") try f(root) finally zipfs.close() } }
Example 29
Source File: TestDataset.scala From uberdata with Apache License 2.0 | 5 votes |
package eleflow.uberdata.data import java.nio.file.{FileSystems, Files} import eleflow.uberdata.core.IUberdataContext import eleflow.uberdata.core.conf.SparkNotebookConfig import eleflow.uberdata.core.data.{DataTransformer, Dataset, FileDataset} import eleflow.uberdata.core.enums.{DataSetType, DateSplitType} import eleflow.uberdata.core.util.ClusterSettings import eleflow.uberdata.util.DateUtil import org.apache.spark.rpc.netty.BeforeAndAfterWithContext import org.apache.spark.sql.Row import org.apache.spark.sql.types._ import org.scalatest.{FlatSpec, Matchers} import Dataset._ import org.apache.spark.ml.tuning.TrainValidationSplitModel class TestDataset extends FlatSpec with Matchers with BeforeAndAfterWithContext { val model: TrainValidationSplitModel it should "Correct handle date dayofaweek values" in { val dataSet = Dataset(context, s"${defaultFilePath}HandleDataTransformer.csv") val ndataSet = dataSet.formatDateValues(4,DateSplitType.DayOfAWeek) val results = DataTransformer.createLabeledPointFromRDD(ndataSet,Seq("int"), Seq("id"), DataSetType.Train).collect() assert(results(0)._1._2 == 1) assert(results(1)._1._2 == 2) assert(results(2)._1._2 == 3) assert(results(0)._2.features.toArray.deep == Array(0.0, 1.0, 10.5, 4.0).deep) assert(results(1)._2.features.toArray.deep == Array(1.0, 0.0, 0.1, 6.0).deep) assert(results(2)._2.features.toArray.deep == Array(1.0, 0.0, 10.0, 6.0).deep) } it should "Correct handle date dayofaweek and period values" in { //context.sparkContext.clearJars() DateUtil.applyDateFormat("YYMMddHH") val fileDataset = Dataset(context, s"${defaultFilePath}DayOfAWeekDataTransformer.csv") fileDataset.applyColumnTypes(Seq(LongType, LongType, StringType, DecimalType( ClusterSettings.defaultDecimalPrecision,ClusterSettings.defaultDecimalScale), TimestampType)) val dataset = FileDatasetToDataset(fileDataset) val datasetWithDate : Array[Row] = dataset.formatDateValues(4, DateSplitType.DayOfAWeek | DateSplitType.Period).collect() assert(datasetWithDate(0)(4) == 4) assert(datasetWithDate(0)(5) == 3) assert(datasetWithDate(1)(4) == 5) assert(datasetWithDate(1)(5) == 3) assert(datasetWithDate(2)(4) == 6) assert(datasetWithDate(2)(5) == 3) val filePath = FileSystems.getDefault.getPath(SparkNotebookConfig.propertyFolder, SparkNotebookConfig.dateFormatFileName) Files.deleteIfExists(filePath) } }
Example 30
Source File: SerializedCpg.scala From codepropertygraph with Apache License 2.0 | 5 votes |
package io.shiftleft import java.io.{File, IOException} import java.net.{URI, URISyntaxException} import java.nio.file.{FileSystem, FileSystems, Files} import java.util import com.google.protobuf.GeneratedMessageV3 class SerializedCpg extends AutoCloseable { @throws[IOException] def addOverlay(overlay: GeneratedMessageV3, name: String): Unit = { if (!isEmpty) { val pathInZip = zipFileSystem.getPath(s"${counter}_${name}") counter += 1 val outputStream = Files.newOutputStream(pathInZip) overlay.writeTo(outputStream) outputStream.close() } } @throws[IOException] def addOverlay(overlays: Iterator[GeneratedMessageV3], name: String): Unit = { overlays.zipWithIndex.foreach { case (overlay, i) => addOverlay(overlay, name + "_" + i) } } @throws[IOException] override def close(): Unit = { if (!isEmpty) { zipFileSystem.close() } } }
Example 31
Source File: ZipArchive.scala From codepropertygraph with Apache License 2.0 | 5 votes |
package io.shiftleft.codepropertygraph.cpgloading import java.io.Closeable import java.nio.file.attribute.BasicFileAttributes import java.nio.file.{FileSystem, FileSystems, FileVisitResult, Files, Path, Paths, SimpleFileVisitor} import java.util.{Collection => JCollection} import scala.collection.mutable.ArrayBuffer import scala.jdk.CollectionConverters._ class ZipArchive(inputFile: String) extends Closeable { private val zipFileSystem: FileSystem = FileSystems.newFileSystem(Paths.get(inputFile), null) private def root: Path = zipFileSystem.getRootDirectories.iterator.next private def walk(rootPath: Path): Seq[Path] = { val entries = ArrayBuffer[Path]() Files.walkFileTree( rootPath, new SimpleFileVisitor[Path]() { override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = { if (attrs.isRegularFile) entries += file FileVisitResult.CONTINUE } } ) entries.toSeq } def entries: Seq[Path] = walk(root) def getFileEntries: JCollection[Path] = entries.asJava override def close(): Unit = zipFileSystem.close() }
Example 32
Source File: AvrohuggerSpec.scala From avrohugger with Apache License 2.0 | 5 votes |
package util import avrohugger._ import avrohugger.format._ import avrohugger.format.abstractions.SourceFormat import java.io.File import java.nio.file.{FileSystems, Path} import org.specs2.SpecificationLike import org.specs2.matcher.{Matcher, Matchers, ShouldExpectable} import scala.io.Source class AvrohuggerSpec( inPath: Path, val outputFiles: Seq[Path], sourceFormat: SourceFormat ) extends Matchers { implicit class PathExtensions( path: Path ) { def ++(next: String) = path.resolve(next) def ++(other: Path) = path.resolve(other) } val sourceFormatName = sourceFormat match { case SpecificRecord => "specific" case Standard => "standard" case Scavro => "scavro" } val gen = new Generator(sourceFormat) val inputPath = { val sourceBase = FileSystems.getDefault.getPath("avrohugger-core", "src", "test", "avro") (sourceBase ++ inPath) } val inputFile = inputPath.toFile val outDir = gen.defaultOutputDir + s"/$sourceFormatName/" private def readFile(f: File): String = { val source = Source.fromFile(f) try source.mkString finally source.close() } val expectedBase = FileSystems.getDefault.getPath("avrohugger-core", "src", "test", "expected", sourceFormatName) val generatedBase = FileSystems.getDefault.getPath("target", "generated-sources", sourceFormatName) private def prefixedFileString(prefixPath: Path, p: Path) = { val fullPath = sourceFormat match { case Scavro => { Option(p.getParent) match { case Some(parent) => parent ++ "model" ++ p.getFileName case None => FileSystems.getDefault.getPath("model") ++ p } } case _ => p } readFile((prefixPath ++ fullPath).toFile) } def generatedString(p: Path) = prefixedFileString(generatedBase, p) def expectedString(p: Path) = prefixedFileString(expectedBase, p) def checkFileToFile = { gen.fileToFile(inputFile, outDir) val generated = outputFiles map generatedString val expected = outputFiles map expectedString ShouldExpectable(generated) shouldEqual expected } def checkFileToStrings = { val generated = gen.fileToStrings(inputFile) val expected = outputFiles map expectedString ShouldExpectable(generated) shouldEqual expected } def checkStringToFile = { val inputString = readFile(inputFile) gen.stringToFile(inputString, outDir) val generated = outputFiles map generatedString val expected = outputFiles map expectedString ShouldExpectable(generated) shouldEqual expected } def checkStringToStrings = { val generated = { val inputString = readFile(inputFile) gen.stringToStrings(inputString) } val expected = outputFiles map expectedString ShouldExpectable(generated) shouldEqual expected } }
Example 33
Source File: GlobalTests.scala From avrohugger with Apache License 2.0 | 5 votes |
package util import avrohugger.format.abstractions.SourceFormat import java.nio.file.{FileSystems, Path} final case class AvrohuggerTest( inPath: Path, outputFiles: Seq[Path], description: String ) { def toSpec(sf: SourceFormat) = new AvrohuggerSpec(inPath, outputFiles, sf) } object GlobalTests { import scala.language.implicitConversions private implicit def stringToPath(s: String): Path = FileSystems.getDefault.getPath(s) private implicit def stringSeqToPath(comps: Seq[String]): Path = FileSystems.getDefault.getPath(comps mkString "/") val tests = Seq( AvrohuggerTest( "importedcomplex.avsc", Seq( Seq("model", "UnionRecord.scala"), Seq("model", "v2", "NestedRecord.scala"), Seq("test", "ComplexExternalDependency.scala") ), "correctly generate imports from complex types" ) ) }
Example 34
Source File: index_brown.scala From attic-nlp4l with Apache License 2.0 | 5 votes |
import java.io.File import java.nio.file.FileSystems import org.apache.lucene.index._ import org.apache.lucene.search.TermQuery import org.nlp4l.core.analysis.Analyzer import org.nlp4l.core._ import scala.io._ import scalax.file.Path import scalax.file.PathSet val index = "/tmp/index-brown" def removePos(line: String): String = { val words = line.split(" ") words.map{ word => val idx = word.lastIndexOf('/') if(idx >= 0) word.substring(0, idx) else word }.mkString(" ") } def document(file: Path, catsmap: Map[String, String]): Document = { val ps: Array[String] = file.path.split(File.separator) // for Windows // val ps: Array[String] = file.path.split("\\\\") val fl = ps(3) val cat = catsmap.getOrElse(fl, "") val bodyPos = file.lines().filterNot(_.length()==0).toList val body = bodyPos.map(removePos(_)) Document(Set( Field("file", fl), Field("cat", cat), Field("body_pos", bodyPos), Field("body_pos_nn", bodyPos), Field("body", body)) ) } // delete existing Lucene index val p = Path(new File(index)) p.deleteRecursively() // define a schema for the index // load schema from file val schema = SchemaLoader.loadFile("examples/schema/brown.conf") // write documents into an index val writer = IWriter(index, schema) // read category list val cats = Source.fromFile("corpora/brown/brown/cats.txt") val catsmap = cats.getLines().map{ line => val ps = line.split(" ") (ps(0),ps(1)) }.toMap val c: PathSet[Path] = Path("corpora", "brown", "brown").children() // write articles c.filter(e => e.name.startsWith("c") && e.name.length() == 4).toList.sorted.foreach(f => writer.write(document(f, catsmap))) writer.close // search test val searcher = ISearcher(index) val results = searcher.search(query=new TermQuery(new Term("body", "smoothly")), rows=10) results.foreach(doc => { printf("[DocID] %d: %s\n", doc.docId, doc.get("file")) })
Example 35
Source File: index_ceeaus_all.scala From attic-nlp4l with Apache License 2.0 | 5 votes |
import java.io.File import java.io.FileInputStream import java.io.InputStreamReader import java.io.BufferedReader import java.nio.file.FileSystems import org.apache.lucene.index._ import org.apache.lucene.search.TermQuery import org.nlp4l.core.analysis.Analyzer import org.nlp4l.core.analysis.AnalyzerBuilder import org.nlp4l.core._ import scalax.file.Path import scalax.file.PathSet val index = "/tmp/index-ceeaus-all" def lines(fl: Path, encoding: String): List[String] = { val is = new FileInputStream(fl.path) val r = new InputStreamReader(is, encoding) val br = new BufferedReader(r) var result: List[String] = Nil try{ var line = br.readLine() while(line != null){ result = result :+ line line = br.readLine() } result } finally{ br.close r.close is.close } } def document(fl: Path, ja: Boolean): Document = { val ps: Array[String] = fl.path.split(File.separator) // for Windows // val ps: Array[String] = file.path.split("\\\\") val file = ps(3) val typ = ps(2) val cat = "all" val encoding = if(ja) "sjis" else "UTF-8" val body = lines(fl, encoding) val body_set = if(ja) Set(Field("body_ja", body)) else Set(Field("body_en", body), Field("body_ws", body)) Document(Set( Field("file", file), Field("type", typ), Field("cat", cat)) ++ body_set ) } // delete existing Lucene index val p = Path(new File(index)) p.deleteRecursively() // write documents into an index val schema = SchemaLoader.loadFile("examples/schema/ceeaus.conf") val writer = IWriter(index, schema) val c: PathSet[Path] = Path("corpora", "CEEAUS", "PLAIN").children() // write English docs c.filter(e => e.name.indexOf("cjejus")<0 && e.name.endsWith(".txt")).toList.sorted.foreach(g => writer.write(document(g, false))) // write English docs c.filter(e => e.name.indexOf("cjejus")>=0 && e.name.endsWith(".txt")).toList.sorted.foreach(g => writer.write(document(g, true))) writer.close // search test val searcher = ISearcher(index) val results = searcher.search(query=new TermQuery(new Term("body_ja", "喫煙")), rows=10) results.foreach(doc => { printf("[DocID] %d: %s\n", doc.docId, doc.get("file")) }) // search test for ch4 val results2 = searcher.search(query=new TermQuery(new Term("body_ws", "still,")), rows=10) results2.foreach(doc => { printf("[DocID] %d: %s\n", doc.docId, doc.get("file")) })