org.apache.commons.io.FileUtils Scala Examples
The following examples show how to use org.apache.commons.io.FileUtils.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: Releaser.scala From releaser with Apache License 2.0 | 5 votes |
package uk.gov.hmrc.releaser import java.io.File import java.nio.file.{Files, Path} import org.apache.commons.io.FileUtils import uk.gov.hmrc.releaser.bintray.{BintrayHttp, BintrayRepoConnector, DefaultBintrayRepoConnector} import uk.gov.hmrc.releaser.github.{GithubConnector, Repo} import uk.gov.hmrc.{CredentialsFinder, FileDownloader, Logger} import scala.util.{Failure, Success, Try} object ReleaserMain { def main(args: Array[String]): Unit = { val result = Releaser(args) System.exit(result) } } object Releaser extends Logger { import ArgParser._ def apply(args: Array[String]): Int = { parser.parse(args, Config()) match { case Some(config) => val githubName = config.githubNameOverride.getOrElse(config.artefactName) run(config.artefactName, ReleaseCandidateVersion(config.rcVersion), config.releaseType, githubName, config.releaseNotes, config.dryRun) case None => -1 } } def run(artefactName: String, rcVersion: ReleaseCandidateVersion, releaseType: ReleaseType.Value, gitHubName: String, releaseNotes: Option[String], dryRun: Boolean = false): Int = { val githubCredsFile = System.getProperty("user.home") + "/.github/.credentials" val bintrayCredsFile = System.getProperty("user.home") + "/.bintray/.credentials" val githubCredsOpt = CredentialsFinder.findGithubCredsInFile(new File(githubCredsFile).toPath) val bintrayCredsOpt = CredentialsFinder.findBintrayCredsInFile(new File(bintrayCredsFile).toPath) doReleaseWithCleanup { directories => if (githubCredsOpt.isEmpty) { log.info(s"Didn't find github credentials in $githubCredsFile") -1 } else if (bintrayCredsOpt.isEmpty) { log.info(s"Didn't find Bintray credentials in $bintrayCredsFile") -1 } else { val releaserVersion = getClass.getPackage.getImplementationVersion val metaDataProvider = new ArtefactMetaDataProvider() val gitHubDetails = if (dryRun) GithubConnector.dryRun(githubCredsOpt.get, releaserVersion) else GithubConnector(githubCredsOpt.get, releaserVersion) val bintrayDetails = if (dryRun) BintrayRepoConnector.dryRun(bintrayCredsOpt.get, directories.workDir) else BintrayRepoConnector(bintrayCredsOpt.get, directories.workDir) val bintrayRepoConnector = new DefaultBintrayRepoConnector(directories.workDir, new BintrayHttp(bintrayCredsOpt.get), new FileDownloader) val coordinator = new Coordinator(directories.stageDir, metaDataProvider, gitHubDetails, bintrayRepoConnector) val result = coordinator.start(artefactName, Repo(gitHubName), rcVersion, releaseType, releaseNotes) result match { case Success(targetVersion) => log.info(s"Releaser successfully released $artefactName $targetVersion") 0 case Failure(e) => e.printStackTrace() log.info(s"Releaser failed to release $artefactName $rcVersion with error '${e.getMessage}'") 1 } } } } def doReleaseWithCleanup[T](f: ReleaseDirectories => T): T = { val directories = ReleaseDirectories() try { f(directories) } finally { log.info("cleaning releaser work directory") directories.delete().recover{case t => log.warn(s"failed to delete releaser work directory ${t.getMessage}")} } } } case class ReleaseDirectories(tmpDirectory: Path = Files.createTempDirectory("releaser")) { lazy val workDir = Files.createDirectories(tmpDirectory.resolve("work")) lazy val stageDir = Files.createDirectories(tmpDirectory.resolve("stage")) def delete() = Try { FileUtils.forceDelete(tmpDirectory.toFile) } }
Example 2
Source File: RemoteConfigWriter.scala From mvn_scalafmt with Apache License 2.0 | 5 votes |
package org.antipathy.mvn_scalafmt.io import org.antipathy.mvn_scalafmt.model.RemoteConfig import java.io.File import java.nio.charset.StandardCharsets import org.apache.commons.io.FileUtils import org.apache.maven.plugin.logging.Log import java.nio.file.{Files, Path} override def write(input: RemoteConfig): Path = { log.info(s"Writing remote config to ${input.location.toAbsolutePath}") if (Files.exists(input.location)) { Files.delete(input.location) } val newConfig = new File(input.location.toAbsolutePath.toString) FileUtils.writeStringToFile( newConfig, input.contents, StandardCharsets.UTF_8 ) newConfig.toPath } }
Example 3
Source File: RemoteConfigWriterSpec.scala From mvn_scalafmt with Apache License 2.0 | 5 votes |
package org.antipathy.mvn_scalafmt.io import java.io.File import java.nio.charset.StandardCharsets import java.nio.file.{Files, Paths} import org.antipathy.mvn_scalafmt.model.RemoteConfig import org.apache.commons.io.FileUtils import org.apache.maven.plugin.logging.SystemStreamLog import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.GivenWhenThen import org.scalatest.matchers.should.Matchers class RemoteConfigWriterSpec extends AnyFlatSpec with GivenWhenThen with Matchers { behavior of "RemoteConfigWriter" it should "Write a config to a local path" in { val localPath = s"${System.getProperty("java.io.tmpdir")}${File.separator}.scalafmt.conf" val contents = """version = "1.5.1" |maxColumn = 120 |align = false |rewrite.rules = [SortImports] |danglingParentheses = true |importSelectors = singleLine |binPack.parentConstructors = true |includeCurlyBraceInSelectChains = false""".stripMargin val writer = new RemoteConfigWriter(new SystemStreamLog) val input = RemoteConfig(contents, Paths.get(localPath)) writer.write(input) new String(Files.readAllBytes(new File(localPath).toPath)) Files.delete(input.location) } it should "Overwrite a config in a local path" in { val localPath = s"${System.getProperty("java.io.tmpdir")}${File.separator}.scalafmt2.conf" val contents = """version = "1.5.1" |maxColumn = 120 |align = false |rewrite.rules = [SortImports] |danglingParentheses = true |importSelectors = singleLine |binPack.parentConstructors = true |includeCurlyBraceInSelectChains = false""".stripMargin val oldContents = "SomeOldConfig" val writer = new RemoteConfigWriter(new SystemStreamLog) val input = RemoteConfig(contents, Paths.get(localPath)) FileUtils.writeStringToFile(new File(localPath), oldContents, StandardCharsets.UTF_8) new String(Files.readAllBytes(new File(localPath).toPath)) should be(oldContents) writer.write(input) new String(Files.readAllBytes(new File(localPath).toPath)) should be(contents) Files.delete(input.location) } }
Example 4
Source File: TaglessFinal.scala From Mastering-Functional-Programming with MIT License | 5 votes |
package jvm import scala.concurrent.{ Future, Await } import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.Duration import cats._, cats.implicits._ trait Capabilities[F[_]] { def resource(name: String): F[String] def notify(target: String, text: String): F[Unit] } object TaglessFinalExample extends App { implicit val capabilities: Capabilities[Future] = new Capabilities[Future] { import java.io.File import org.apache.commons.io.FileUtils def resource(name: String): Future[String] = Future { FileUtils.readFileToString(new File(name), "utf8") } def notify(target: String, text: String): Future[Unit] = Future { println(s"Notifying $target: $text") } } implicit val anotherEnvironmentCapabilities: Capabilities[Future] = new Capabilities[Future] { def resource(name: String): Future[String] = ??? def notify(target: String, text: String): Future[Unit] = ??? } implicit val logMonad: Monad[Future] = new Monad[Future] { def flatMap[A, B](fa: Future[A])(f: (A) ⇒ Future[B]): Future[B] = fa.flatMap { x => println(s"Trace of the Future's result: $x") f(x) } def pure[A](x: A): Future[A] = Future(x) def tailRecM[A, B](a: A)(f: (A) ⇒ Future[Either[A, B]]): Future[B] = ??? } def income[F[_]](implicit M: Monad[F], C: Capabilities[F]): F[Unit] = for { contents <- C.resource("sales.csv") total = contents .split("\n").toList.tail // Collection of lines, drop the CSV header .map { _.split(",").toList match // List[Double] - prices of each of the entries { case name :: price :: Nil => price.toDouble } } .sum _ <- C.notify("[email protected]", s"Total income made today: $total") } yield () Await.result(income[Future](logMonad, capabilities), Duration.Inf) // Block so that the application does not exit prematurely } object FacadeExample { trait Capabilities { def resource(name: String): String def notify(target: String, text: String): Unit } def income(c: Capabilities): Unit = { val contents = c.resource("sales.csv") val total = contents .split("\n").toList.tail // Collection of lines, drop the CSV header .map { _.split(",").toList match // List[Double] - prices of each of the entries { case name :: price :: Nil => price.toDouble } } .sum c.notify("[email protected]", s"Total income made today: $total") } }
Example 5
Source File: TilingServiceSpec.scala From recogito2 with Apache License 2.0 | 5 votes |
package transform.tiling import java.io.File import org.apache.commons.io.FileUtils import org.specs2.mutable._ import org.specs2.runner._ import org.junit.runner._ import play.api.test._ import play.api.test.Helpers._ @RunWith(classOf[JUnitRunner]) class TilingServiceSpec extends Specification { val TEST_IMAGE = new File("test/resources/transform/tiling/Ptolemy_map_15th_century.jpg") val TMP_DIR = { val dir = new File("test/resources/transform/tiling/tmp") if (dir.exists) FileUtils.deleteDirectory(dir) dir } "The Tiling function" should { "create proper Zoomify tiles from the test image" in { TilingService.createZoomify(TEST_IMAGE, TMP_DIR) TMP_DIR.exists must equalTo(true) TMP_DIR.list.size must equalTo(2) new File(TMP_DIR, "ImageProperties.xml").exists must equalTo(true) val tileGroup0 = new File(TMP_DIR, "TileGroup0") tileGroup0.exists must equalTo(true) tileGroup0.list.size must equalTo(65) tileGroup0.list.filter(_.endsWith(".jpg")).size must equalTo(65) FileUtils.deleteDirectory(TMP_DIR) success } } }
Example 6
Source File: TestNewApiWithCaseClass.scala From Mastering-Spark-for-Data-Science with MIT License | 5 votes |
package io.gzet.newapi import java.io.File import io.gzet.newapi.CreateAvroWithCase.{V21EnhancedDate, GkgRecordCase} import io.gzet.test.SparkFunSuite import com.databricks.spark.avro._ import org.apache.commons.io.FileUtils class TestNewApiWithCaseClass extends SparkFunSuite { val inputFilePath = getClass.getResource("/20160101020000.gkg.csv") val avroStructPath = "target/20160101020000.gkg.case.avro" localTest("Create and write Avro using spark-avro lib and case") { spark => val gdeltRDD = spark.sparkContext.textFile(inputFilePath.toString) val gdeltRowRDD = gdeltRDD.map(_.split("\t", -1)) val gkgRecordRDD = gdeltRowRDD.map(attributes => GkgRecordCase(CreateAvroWithCase.createGkgRecordId(attributes(0)), attributes(1).toLong, attributes(2), attributes(3), attributes(4), CreateAvroWithCase.createV1Counts(attributes(5)), CreateAvroWithCase.createV21Counts(attributes(6)), CreateAvroWithCase.createV1Themes(attributes(7)), CreateAvroWithCase.createV2EnhancedThemes(attributes(8)), CreateAvroWithCase.createV1Locations(attributes(9)), CreateAvroWithCase.createV2Locations(attributes(10)), CreateAvroWithCase.createV1Persons(attributes(11)), CreateAvroWithCase.createV2Persons(attributes(12)), CreateAvroWithCase.createV1Orgs(attributes(13)), CreateAvroWithCase.createV2Orgs(attributes(14)), CreateAvroWithCase.createV1Stone(attributes(15)), CreateAvroWithCase.createEnhancedDate((attributes(16))), CreateAvroWithCase.createV2GCAM(attributes(17)), attributes(18), CreateAvroWithCase.createV21RelImgAndVid(attributes(19)), CreateAvroWithCase.createV21RelImgAndVid(attributes(20)), CreateAvroWithCase.createV21RelImgAndVid(attributes(21)), CreateAvroWithCase.createV21Quotations(attributes(22)), CreateAvroWithCase.createV21AllNames(attributes(23)), CreateAvroWithCase.createV21Amounts(attributes(24)), CreateAvroWithCase.createV21TransInfo(attributes(25)), attributes(26)) ) FileUtils.deleteDirectory(new File(avroStructPath)) val gdeltDF = spark.createDataFrame(gkgRecordRDD) gdeltDF.write.avro(avroStructPath) assertResult(4) (new File(avroStructPath).listFiles.length) } localTest("Read Avro into Dataframe using spark-avro") { spark => val gdeltAvroDF = spark.read.format("com.databricks.spark.avro").load(avroStructPath) assertResult(10)(gdeltAvroDF.count) gdeltAvroDF.show } }
Example 7
Source File: TestNewApiWithStructs.scala From Mastering-Spark-for-Data-Science with MIT License | 5 votes |
package io.gzet.newapi import java.io.File import io.gzet.test.SparkFunSuite import com.databricks.spark.avro._ import org.apache.commons.io.FileUtils import org.apache.spark.sql.Row class TestNewApiWithStructs extends SparkFunSuite { val inputFilePath = getClass.getResource("/20160101020000.gkg.csv") val avroStructPath = "target/20160101020000.gkg.struct.avro" localTest("Create and write Avro using spark-avro lib and Structs") { spark => val gdeltRDD = spark.sparkContext.textFile(inputFilePath.toString) val gdeltRowRDD = gdeltRDD.map(_.split("\t", -1)) .map(attributes => Row( CreateAvroWithStructs.createGkgRecordID(attributes(0)), attributes(1).toLong, attributes(2), attributes(3), attributes(4), CreateAvroWithStructs.createV1Counts(attributes(5)), CreateAvroWithStructs.createV21Counts(attributes(6)), CreateAvroWithStructs.createV1Themes(attributes(7)), CreateAvroWithStructs.createV2EnhancedThemes(attributes(8)), CreateAvroWithStructs.createV1Locations(attributes(9)), CreateAvroWithStructs.createV2Locations(attributes(10)), CreateAvroWithStructs.createV1Persons(attributes(11)), CreateAvroWithStructs.createV2Persons(attributes(12)), CreateAvroWithStructs.createV1Orgs(attributes(13)), CreateAvroWithStructs.createV2Orgs(attributes(14)), CreateAvroWithStructs.createV1Stone(attributes(15)), CreateAvroWithStructs.createV21Dates(attributes(16)), CreateAvroWithStructs.createV2GCAM(attributes(17)), attributes(18), CreateAvroWithStructs.createV21RelImgAndVid(attributes(19)), CreateAvroWithStructs.createV21RelImgAndVid(attributes(20)), CreateAvroWithStructs.createV21RelImgAndVid(attributes(21)), CreateAvroWithStructs.createV21Quotations(attributes(22)), CreateAvroWithStructs.createV21AllNames(attributes(23)), CreateAvroWithStructs.createV21Amounts(attributes(24)), CreateAvroWithStructs.createV21TransInfo(attributes(25)), attributes(26) )) FileUtils.deleteDirectory(new File(avroStructPath)) val gdeltDF = spark.createDataFrame(gdeltRowRDD, CreateAvroWithStructs.GkgSchema) gdeltDF.write.avro(avroStructPath) assertResult(4) (new File(avroStructPath).listFiles.length) } localTest("Read Avro into Dataframe using spark-avro") { spark => val gdeltAvroDF = spark.read.format("com.databricks.spark.avro").load(avroStructPath) assertResult(10) (gdeltAvroDF.count) gdeltAvroDF.show } }
Example 8
Source File: CryptoTest.scala From Mastering-Spark-for-Data-Science with MIT License | 5 votes |
package io.gzet import java.io.File import org.apache.commons.io.FileUtils import org.apache.hadoop.io.compress.CryptoCodec import org.apache.spark.{SparkConf, SparkContext} import org.scalatest.{Matchers, FunSuite} class CryptoTest extends FunSuite with Matchers { val cryptoDir = System.getProperty("java.io.tmpdir") + "cryptTestDir" test("Crypto encrypt then decrypt file") { val conf = new SparkConf() .setAppName("Test Crypto") .setMaster("local") .set("spark.default.parallelism", "1") .set("spark.hadoop.io.compression.codecs", "org.apache.hadoop.io.compress.CryptoCodec") val sc = new SparkContext(conf) val testFile = getClass.getResource("/gdeltTestFile.csv") val rdd = sc.textFile(testFile.getPath) rdd.saveAsTextFile(cryptoDir, classOf[CryptoCodec]) val read = sc.textFile(cryptoDir) val allLines = read.collect allLines.size should be(20) allLines(0).startsWith("331150686") should be (true) allLines(allLines.length - 1).endsWith("polytrack/") should be (true) FileUtils.deleteDirectory(new File(cryptoDir)) sc.stop } }
Example 9
Source File: StorageSpec.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.backends.spark import java.io.File import dbis.piglet.backends.{Record, SchemaClass} import org.apache.commons.io.FileUtils import org.apache.spark.{SparkConf, SparkContext} import org.scalatest._ case class DataRecord(col1: Int, col2: String) extends java.io.Serializable with SchemaClass { override def mkString(delim: String) = s"$col1$delim$col2" } case class DoubleRecord(col1: Double, col2: Double) extends java.io.Serializable with SchemaClass { override def mkString(delim: String) = s"$col1$delim$col2" } class StorageSpec extends FlatSpec with Matchers with BeforeAndAfter { var sc: SparkContext = _ var conf: SparkConf = _ before { // to avoid Akka rebinding to the same port, since it doesn't unbind // immediately after shutdown System.clearProperty("spark.driver.port") System.clearProperty("spark.hostPort") conf = new SparkConf().setMaster("local").setAppName(getClass.getSimpleName) sc = new SparkContext(conf) } after { // cleanup SparkContext data sc.stop() sc = null conf = null System.clearProperty("spark.driver.port") System.clearProperty("spark.hostPort") } "PigStorage" should "load objects using an extractor" in { val res = PigStorage[Person]().load(sc, "sparklib/src/test/resources/person.csv", (data: Array[String]) => Person(data(0), data(1).toInt), ",") res.collect() should be (Array(Person("Anna", 21), Person("John", 53), Person("Mike", 32))) } it should "save and load records" in { val res = PigStorage[Person]().load(sc, "sparklib/src/test/resources/person.csv", (data: Array[String]) => Person(data(0), data(1).toInt), ",") PigStorage[Person]().write("person.data", res, "|") val otherRes = PigStorage[Person]().load(sc, "person.data", (data: Array[String]) => Person(data(0), data(1).toInt), "[|]") res.collect() should be (otherRes.collect()) FileUtils.deleteDirectory(new File("person.data")) } } }
Example 10
Source File: FlinkStreamingCEPTest.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.cep.test.flink import java.io.File import dbis.piglet.backends.{ Record, SchemaClass } import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment import org.scalatest._ import org.apache.commons.io.FileUtils import org.apache.flink.api.scala._ import dbis.piglet.cep.nfa._ import dbis.piglet.cep.ops.SelectionStrategy._ import dbis.piglet.cep.ops.OutputStrategy._ import dbis.piglet.cep.flink.CustomDataStreamMatcher._ import scala.collection.mutable.ArrayBuffer import org.apache.flink.streaming.api.windowing.windows.GlobalWindow import org.apache.flink.streaming.api.windowing.assigners.GlobalWindows case class StreamingDoubleRecord(col1: Int, col2: Int) extends java.io.Serializable with SchemaClass { override def mkString(delim: String) = s"$col1$delim$col2" } object OurStreamingNFA { def filter1(record: StreamingDoubleRecord, rvalues: NFAStructure[StreamingDoubleRecord]): Boolean = record.col1 == 1 def filter2(record: StreamingDoubleRecord, rvalues: NFAStructure[StreamingDoubleRecord]): Boolean = record.col1 == 2 def filter3(record: StreamingDoubleRecord, rvalues: NFAStructure[StreamingDoubleRecord]): Boolean = record.col1 == 3 def createNFA = { val testNFA: NFAController[StreamingDoubleRecord] = new NFAController() val firstState = testNFA.createAndGetStartState("First") val secondState = testNFA.createAndGetNormalState("Second") val thirdState = testNFA.createAndGetNormalState("Third") val finalState = testNFA.createAndGetFinalState("Final") val firstEdge = testNFA.createAndGetForwardEdge(filter1) val secondEdge = testNFA.createAndGetForwardEdge(filter2) val thirdEdge = testNFA.createAndGetForwardEdge(filter3) testNFA.createForwardTransition(firstState, firstEdge, secondState) testNFA.createForwardTransition(secondState, secondEdge, thirdState) testNFA.createForwardTransition(thirdState, thirdEdge, finalState) testNFA } } class FlinkStreamingCEPTest extends FlatSpec with Matchers with BeforeAndAfterEach { var resultArray = new ArrayBuffer[StreamingDoubleRecord] override def beforeEach() { resultArray.clear() } val sample = Seq( StreamingDoubleRecord(1,1), StreamingDoubleRecord(2,2), StreamingDoubleRecord(1,3), StreamingDoubleRecord(2,4), StreamingDoubleRecord(3,5), StreamingDoubleRecord(1,6), StreamingDoubleRecord(2,7), StreamingDoubleRecord(3,8)) "Flink Streaming CEP" should "detect the pattern SEQ(A, B, C) with first match" in { val env = StreamExecutionEnvironment.getExecutionEnvironment env.getConfig.disableSysoutLogging() val data = env.fromCollection(sample) val res = data.matchNFA(OurStreamingNFA.createNFA, env, FirstMatch) } it should "detect the pattern SEQ(A, B, C) with any match" in { val env = StreamExecutionEnvironment.getExecutionEnvironment env.getConfig.disableSysoutLogging() val data = env.fromCollection(sample) val res = data.matchNFA(OurStreamingNFA.createNFA, env, AllMatches) } it should "detect the pattern SEQ(A, B, C) with next match" in { val env = StreamExecutionEnvironment.getExecutionEnvironment env.getConfig.disableSysoutLogging() val data = env.fromCollection(sample) val res = data.matchNFA(OurStreamingNFA.createNFA, env, NextMatches) } it should "detect the pattern SEQ(A, B, C) with contiguity match" in { val env = StreamExecutionEnvironment.getExecutionEnvironment env.getConfig.disableSysoutLogging() val data = env.fromCollection(sample) val res = data.matchNFA(OurStreamingNFA.createNFA, env, ContiguityMatches) } }
Example 11
Source File: ArchiveUtils.scala From dl4scala with MIT License | 5 votes |
package org.dl4scala.util import org.slf4j.LoggerFactory import org.apache.commons.compress.archivers.tar.TarArchiveEntry import org.apache.commons.compress.archivers.tar.TarArchiveInputStream import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream import org.apache.commons.io.FileUtils import org.apache.commons.io.IOUtils import java.io._ import java.util.zip.GZIPInputStream import java.util.zip.ZipInputStream tarIn.close() } else if (file.endsWith(".gz")) { val is2 = new GZIPInputStream(fin) val extracted = new File(target.getParent, target.getName.replace(".gz", "")) if (extracted.exists) extracted.delete extracted.createNewFile val fos = FileUtils.openOutputStream(extracted) IOUtils.copyLarge(is2, fos) is2.close() fos.flush() fos.close() } target.delete } }
Example 12
Source File: FlowerDataSetIterator.scala From dl4scala with MIT License | 5 votes |
package org.dl4scala.examples.transferlearning.vgg16.dataHelpers import java.io.{File, IOException} import java.net.URL import org.datavec.api.io.filters.BalancedPathFilter import org.datavec.api.io.labels.ParentPathLabelGenerator import org.datavec.api.split.{FileSplit, InputSplit} import org.datavec.image.loader.BaseImageLoader import org.nd4j.linalg.dataset.api.iterator.DataSetIterator import java.util import java.util.Random import org.apache.commons.io.FileUtils import org.datavec.api.util.ArchiveUtils import org.datavec.image.recordreader.ImageRecordReader import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator import org.deeplearning4j.nn.modelimport.keras.trainedmodels.TrainedModels object FlowerDataSetIterator { private val log = org.slf4j.LoggerFactory.getLogger(FlowerDataSetIterator.getClass) private val DATA_DIR = new File(System.getProperty("user.home")) + "/dl4jDataDir" private val DATA_URL = "http://download.tensorflow.org/example_images/flower_photos.tgz" private val FLOWER_DIR = DATA_DIR + "/flower_photos" private val allowedExtensions = BaseImageLoader.ALLOWED_FORMATS private val rng = new Random(13) private val height = 224 private val width = 224 private val channels = 3 private val numClasses = 5 private val labelMaker = new ParentPathLabelGenerator private var trainData: InputSplit = _ private var testData: InputSplit = _ private var batchSize = 0 @throws(classOf[IOException]) def trainIterator: DataSetIterator = makeIterator(trainData) @throws(classOf[IOException]) def testIterator: DataSetIterator = makeIterator(testData) @throws(classOf[IOException]) def setup(batchSizeArg: Int, trainPerc: Int): Unit = { try downloadAndUntar() catch { case e: IOException => e.printStackTrace() log.error("IOException : ", e) } batchSize = batchSizeArg val parentDir = new File(FLOWER_DIR) val filesInDir = new FileSplit(parentDir, allowedExtensions, rng) val pathFilter = new BalancedPathFilter(rng, allowedExtensions, labelMaker) if (trainPerc >= 100) throw new IllegalArgumentException("Percentage of data set aside for training has to be less than 100%." + " Test percentage = 100 - training percentage, has to be greater than 0") val filesInDirSplit = filesInDir.sample(pathFilter, trainPerc, 100 - trainPerc) trainData = filesInDirSplit(0) testData = filesInDirSplit(1) } @throws(classOf[IOException]) private def makeIterator(split: InputSplit) = { val recordReader = new ImageRecordReader(height, width, channels, labelMaker) recordReader.initialize(split) val iter = new RecordReaderDataSetIterator(recordReader, batchSize, 1, numClasses) iter.setPreProcessor(TrainedModels.VGG16.getPreProcessor) iter } @throws(classOf[IOException]) def downloadAndUntar(): Unit = { val rootFile = new File(DATA_DIR) if (!rootFile.exists) rootFile.mkdir val tarFile = new File(DATA_DIR, "flower_photos.tgz") if (!tarFile.isFile) { log.info("Downloading the flower dataset from " + DATA_URL + "...") FileUtils.copyURLToFile(new URL(DATA_URL), tarFile) } ArchiveUtils.unzipFileTo(tarFile.getAbsolutePath, rootFile.getAbsolutePath) } }
Example 13
Source File: PerTestSparkSession.scala From Spark-RSVD with Apache License 2.0 | 5 votes |
package com.criteo.rsvd import java.io.File import java.nio.file.{Files, Path} import java.util.concurrent.locks.ReentrantLock import org.apache.commons.io.FileUtils import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.sql.{SQLContext, SparkSession} import org.scalatest.{BeforeAndAfterEach, Suite} import scala.reflect.ClassTag import scala.util.control.NonFatal object LocalSparkSession { private[this] val lock = new ReentrantLock() def acquire(): Unit = lock.lock() def release(): Unit = lock.unlock() def builder: SparkSession.Builder = { SparkSession .builder() .master("local[*]") .appName("test") .config("spark.ui.enabled", false) } } def sparkConf: Map[String, Any] = Map() def toRDD[T: ClassTag](input: Seq[T]): RDD[T] = sc.parallelize(input) def toArray[T](input: RDD[T]): Array[T] = input.collect() protected def closeSession() = { currentSession.foreach(_.stop()) currentSession = None try { checkpointDir.foreach(path => FileUtils.deleteDirectory(new File(path.toString))) } catch { case NonFatal(_) => } checkpointDir = None LocalSparkSession.release() } private def getOrCreateSession = synchronized { if (currentSession.isEmpty) { val builder = LocalSparkSession.builder for ((key, value) <- sparkConf) { builder.config(key, value.toString) } currentSession = Some(builder.getOrCreate()) checkpointDir = Some(Files.createTempDirectory("spark-unit-test-checkpoint-")) currentSession.get.sparkContext .setCheckpointDir(checkpointDir.get.toString) currentSession.get.sparkContext.setLogLevel("WARN") } currentSession.get } override def beforeEach(): Unit = { LocalSparkSession.acquire() super.beforeEach() } override def afterEach(): Unit = { try { super.afterEach() } finally { closeSession() } } }
Example 14
Source File: DockerCopyBuildAction.scala From berilia with Apache License 2.0 | 5 votes |
package com.criteo.dev.cluster.docker import java.io.File import com.criteo.dev.cluster.{GeneralConstants, GeneralUtilities} import org.apache.commons.io.FileUtils class DockerCopyBuildAction (dockerFile: String, dockerImage: String, resourcePath: String) extends DockerBuildAction (dockerFile, dockerImage) { val tempDir = "tmpResources" override def run() : Unit = { val tmpResourcePath = s"${GeneralUtilities.getHomeDir}/${DockerConstants.dockerBaseDir}/$tempDir" val tmpResource = new File(tmpResourcePath) GeneralUtilities.prepareDir(tmpResourcePath) val resource = new File(s"${GeneralUtilities.getHomeDir}/$resourcePath") require (resource.exists(), s"Internal error, resource to copy does not exist: $resourcePath") if (resource.isFile()) { FileUtils.copyFileToDirectory(resource, tmpResource) } else if (resource.isDirectory()) { FileUtils.copyDirectory(resource, tmpResource) } super.addArg(DockerConstants.resource, s"$tempDir") super.run() FileUtils.deleteDirectory(tmpResource) } } object DockerCopyBuildAction { def apply(dockerFile: String, dockerImage: String, resourcePath: String) = { val obj = new DockerCopyBuildAction(dockerFile, dockerImage, resourcePath) obj.run } }
Example 15
Source File: SourceFileSequenceBuilder.scala From mvn_scalafmt with Apache License 2.0 | 5 votes |
package org.antipathy.mvn_scalafmt.builder import java.io.File import java.nio.file.{Files, Paths} import org.apache.commons.io.FileUtils import org.apache.maven.plugin.logging.Log import scala.jdk.CollectionConverters._ override def build(paths: Seq[File]): Seq[File] = if (paths == null) { log.warn("Could not locate any scala sources to format") Seq.empty[File] } else { val files = paths.map(_.getCanonicalPath).flatMap { p => if (Files.exists(Paths.get(p))) { Some(new File(p)) } else { log.warn(s"Could not locate Scala source at $p") None } } files.flatMap(file => FileUtils.listFiles(file, Array("scala", "sc", "sbt"), true).asScala) } }
Example 16
Source File: TgzTransformerSpec.scala From releaser with Apache License 2.0 | 5 votes |
package uk.gov.hmrc.releaser import java.io._ import java.nio.file.{Files, Path} import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveInputStream} import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream import org.apache.commons.io.FileUtils import org.scalatest._ import scala.collection.mutable.ListBuffer import scala.util.{Failure, Success} class TgzTransformerSpec extends WordSpec with Matchers with BeforeAndAfterEach with OptionValues with TryValues{ val tgzPath = new File(this.getClass.getResource("/help-frontend/uk/gov/hmrc/help-frontend_2.11/1.26.0-3-gd7ed03c/help-frontend_2.11-1.26.0-3-gd7ed03c.tgz").toURI).toPath var transformer:TgzTransformer = _ val candidate_1_26_0_3_gd7ed03c = ReleaseCandidateVersion("1.26.0-3-gd7ed03c") val release_1_4_0 = ReleaseVersion("1.4.0") var tmpDir:Path = _ override def beforeEach(){ tmpDir = Files.createTempDirectory("tmp") transformer = new TgzTransformer() FileUtils.copyFileToDirectory(tgzPath.toFile, tmpDir.toFile) } override def afterEach(){ FileUtils.deleteDirectory(tmpDir.toFile) } "the transformer" should { "decompress the tgz, rename the main folder and compress it back" in { val inFile = new File(tmpDir.toFile, tgzPath.getFileName.toString).toPath val targetFilePath = tmpDir.resolve("help-frontend-1.4.0.tgz") val originalTarEntries = listTgzEntries(inFile) assertTarEntry(originalTarEntries, "./help-frontend-1.26.0-3-gd7ed03c/") assertTarEntry(originalTarEntries, "./help-frontend-1.4.0/", exists = false) assertTarEntry(originalTarEntries, "./start-docker.sh", mode = Some(493)) val outFileTry = transformer(inFile, "help-frontend", candidate_1_26_0_3_gd7ed03c, release_1_4_0, targetFilePath) outFileTry match { case Success(outFile) => val tarEntries = listTgzEntries(targetFilePath) assertTarEntry(tarEntries, "./help-frontend-1.26.0-3-gd7ed03c/", exists = false) assertTarEntry(tarEntries, "./help-frontend-1.4.0/") assertTarEntry(tarEntries, "./start-docker.sh", mode = Some(493)) case Failure(e) => fail("Caught exception: " + e.getMessage, e) } } } private def listTgzEntries(localTgzFile: Path) : List[TarArchiveEntry] = { val bytes = new Array[Byte](2048) val fin = new BufferedInputStream(new FileInputStream(localTgzFile.toFile)) val gzIn = new GzipCompressorInputStream(fin) val tarIn = new TarArchiveInputStream(gzIn) val entries = ListBuffer[TarArchiveEntry]() Iterator continually tarIn.getNextTarEntry takeWhile (null !=) foreach { tarEntry => entries += tarEntry } tarIn.close() entries.toList } private def assertTarEntry(tarEntries: List[TarArchiveEntry], entryName: String, exists: Boolean = true, mode: Option[Int] = None) = { val entryOption = tarEntries.find(_.getName == entryName) entryOption match { case Some(entry) => exists shouldBe true mode.foreach { m => m shouldBe entry.getMode} case None => exists shouldBe false } } }
Example 17
Source File: HttpSlippyTileReader.scala From geotrellis-osm-elevation with Apache License 2.0 | 5 votes |
package geotrellis.osme.core import geotrellis.vector._ import geotrellis.raster._ import geotrellis.raster.io.geotiff._ import geotrellis.spark._ import geotrellis.spark.io.s3._ import geotrellis.spark.io.slippy._ import geotrellis.util.Filesystem import org.apache.commons.io.FileUtils import org.apache.commons.io.filefilter._ import org.apache.commons.io.IOUtils._ import org.apache.spark._ import org.apache.spark.rdd._ import java.net._ import java.io.File class HttpSlippyTileReader[T](pathTemplate: String)(fromBytes: (SpatialKey, Array[Byte]) => T) extends SlippyTileReader[T] { def getURL(template: String, z: Int, x: Int, y: Int) = template.replace("{z}", z.toString).replace("{x}", x.toString).replace("{y}", y.toString) def getByteArray(url: String) = { val inStream = new URL(url).openStream() try { toByteArray(inStream) } finally { inStream.close() } } def read(zoom: Int)(implicit sc: SparkContext): RDD[(SpatialKey, T)] = ??? def read(zoom: Int, key: SpatialKey): T = fromBytes(key, getByteArray(getURL(pathTemplate, zoom, key.col, key.row))) override def read(zoom: Int, x: Int, y: Int): T = read(zoom, SpatialKey(x, y)) }
Example 18
Source File: QueryCsvTest.scala From apache-spark-test with Apache License 2.0 | 5 votes |
package com.github.dnvriend.spark.sstreaming import com.github.dnvriend.TestSpec import org.apache.commons.io.FileUtils import org.apache.spark.sql.streaming.{ OutputMode, ProcessingTime } import org.apache.spark.sql.types._ import org.scalatest.Ignore import scala.concurrent.duration._ import scala.language.implicitConversions @Ignore class QueryCsvTest extends TestSpec { def copyFiles(nrTimes: Int = 10): Unit = { FileUtils.deleteDirectory("/tmp/csv") FileUtils.forceMkdir("/tmp/csv") (1 to nrTimes).foreach { x => FileUtils.copyFile(TestSpec.PeopleCsv, s"/tmp/csv/people-$x") } } val schema: StructType = StructType(Array( StructField("id", LongType, nullable = false), StructField("name", StringType, nullable = true), StructField("age", IntegerType, nullable = true) )) it should "query csv file" in withSparkSession { spark => copyFiles() val csv = spark.readStream .schema(schema) .format("csv") .option("maxFilesPerTrigger", 1) .option("header", "false") // Use first line of all files as header .option("inferSchema", "false") // Automatically infer data types .option("delimiter", ";") .load("/tmp/csv") csv.printSchema() println("Is the query streaming: " + csv.isStreaming) println("Are there any streaming queries? " + spark.streams.active.isEmpty) val query = csv .writeStream .format("console") .trigger(ProcessingTime(5.seconds)) .queryName("consoleStream") .outputMode(OutputMode.Append()) .start() // waiting for data sleep(3.seconds) spark.streams .active .foreach(println) spark.streams .active .foreach(_.explain(extended = true)) query.awaitTermination(20.seconds) } }
Example 19
Source File: SharedSparkSessionSuite.scala From ecosystem with Apache License 2.0 | 5 votes |
package org.tensorflow.spark.datasources.tfrecords import java.io.File import org.apache.commons.io.FileUtils import org.apache.spark.SharedSparkSession import org.junit.{After, Before} import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike} trait BaseSuite extends WordSpecLike with Matchers with BeforeAndAfterAll class SharedSparkSessionSuite extends SharedSparkSession with BaseSuite { val TF_SANDBOX_DIR = "tf-sandbox" val file = new File(TF_SANDBOX_DIR) @Before override def beforeAll() = { super.setUp() FileUtils.deleteQuietly(file) file.mkdirs() } @After override def afterAll() = { FileUtils.deleteQuietly(file) super.tearDown() } }
Example 20
Source File: LocalWriteSuite.scala From ecosystem with Apache License 2.0 | 5 votes |
package org.tensorflow.spark.datasources.tfrecords import java.nio.file.Files import java.nio.file.Paths import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.GenericRow import org.apache.spark.sql.types._ import org.apache.commons.io.FileUtils class LocalWriteSuite extends SharedSparkSessionSuite { val testRows: Array[Row] = Array( new GenericRow(Array[Any](11, 1, 23L, 10.0F, 14.0, List(1.0, 3.0), "r1")), new GenericRow(Array[Any](21, 2, 24L, 12.0F, 15.0, List(2.0, 3.0), "r2")), new GenericRow(Array[Any](31, 3, 25L, 14.0F, 16.0, List(3.0, 3.0), "r3"))) val schema = StructType(List(StructField("id", IntegerType), StructField("IntegerTypeLabel", IntegerType), StructField("LongTypeLabel", LongType), StructField("FloatTypeLabel", FloatType), StructField("DoubleTypeLabel", DoubleType), StructField("VectorLabel", ArrayType(DoubleType, true)), StructField("name", StringType))) "Propagate" should { "write data locally" in { // Create a dataframe with 2 partitions val rdd = spark.sparkContext.parallelize(testRows, numSlices = 2) val df = spark.createDataFrame(rdd, schema) // Write the partitions onto the local hard drive. Since it is going to be the // local file system, the partitions will be written in the same directory of the // same machine. // In a distributed setting though, two different machines would each hold a single // partition. val localPath = Files.createTempDirectory("spark-connector-propagate").toAbsolutePath.toString val savePath = localPath + "/testResult" df.write.format("tfrecords") .option("recordType", "Example") .option("writeLocality", "local") .save(savePath) // Read again this directory, this time using the Hadoop file readers, it should // return the same data. // This only works in this test and does not hold in general, because the partitions // will be written on the workers. Everything runs locally for tests. val df2 = spark.read.format("tfrecords").option("recordType", "Example") .load(savePath).sort("id").select("id", "IntegerTypeLabel", "LongTypeLabel", "FloatTypeLabel", "DoubleTypeLabel", "VectorLabel", "name") // Correct column order. assert(df2.collect().toSeq === testRows.toSeq) } } }
Example 21
Source File: JsonIOTest.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.extra.json import java.nio.file.Files import io.circe.Printer import com.spotify.scio._ import com.spotify.scio.io.TapSpec import com.spotify.scio.testing._ import com.spotify.scio.util.ScioUtil import org.apache.beam.sdk.Pipeline.PipelineExecutionException import org.apache.commons.io.FileUtils import scala.jdk.CollectionConverters._ import scala.io.Source object JsonIOTest { case class Record(i: Int, s: String, o: Option[Int]) } class JsonIOTest extends ScioIOSpec with TapSpec { import JsonIOTest._ private val xs = (1 to 100).map(x => Record(x, x.toString, if (x % 2 == 0) Some(x) else None)) "JsonIO" should "work" in { testTap(xs)(_.saveAsJsonFile(_))(".json") testJobTest(xs)(JsonIO(_))(_.jsonFile(_))(_.saveAsJsonFile(_)) } it should "support custom printer" in { val dir = tmpDir val t = runWithFileFuture { _.parallelize(xs) .saveAsJsonFile(dir.getPath, printer = Printer.noSpaces.copy(dropNullValues = true)) } verifyTap(t, xs.toSet) val result = Files .list(dir.toPath) .iterator() .asScala .flatMap(p => Source.fromFile(p.toFile).getLines()) .toSeq val expected = (1 to 100).map { x => s"""{"i":$x,"s":"$x"${if (x % 2 == 0) s""","o":$x""" else ""}}""" } result should contain theSameElementsAs expected FileUtils.deleteDirectory(dir) } it should "handle invalid JSON" in { val badData = Seq( """{"i":1, "s":hello}""", """{"i":1}""", """{"s":"hello"}""", """{"i":1, "s":1}""", """{"i":"hello", "s":1}""" ) val dir = tmpDir runWithFileFuture { _.parallelize(badData).saveAsTextFile(dir.getPath) } val sc = ScioContext() sc.jsonFile[Record](ScioUtil.addPartSuffix(dir.getPath)) a[PipelineExecutionException] should be thrownBy { sc.run() } FileUtils.deleteDirectory(dir) } }
Example 22
Source File: ConverterProviderTest.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.avro.types import java.nio.file.Files import com.spotify.scio._ import com.spotify.scio.avro._ import org.apache.commons.io.FileUtils import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class ConverterProviderTest extends AnyFlatSpec with Matchers { import ConverterProviderTest._ "ConverterProvider" should "#1831: handle Avro map" in { val dir = Files.createTempDirectory("avro-") val data = Seq(Record(Map("a" -> 1), Some(Map("b" -> 2)), List(Map("c" -> 3)))) val sc1 = ScioContext() sc1.parallelize(data).saveAsTypedAvroFile(dir.toString) sc1.run() val sc2 = ScioContext() val t = sc2.typedAvroFile[Record](s"$dir/*.avro").materialize sc2.run() t.underlying.value.toSeq should contain theSameElementsAs data FileUtils.deleteDirectory(dir.toFile) } } object ConverterProviderTest { @AvroType.toSchema case class Record(a: Map[String, Int], b: Option[Map[String, Int]], c: List[Map[String, Int]]) }
Example 23
Source File: TFTapTest.scala From scio with Apache License 2.0 | 5 votes |
package com.spotify.scio.tensorflow import java.util.UUID import com.spotify.scio.io.TapSpec import org.apache.commons.io.FileUtils class TFTapTest extends TapSpec { "SCollection" should "support saveAsTFRecordFile" in { val data = Seq.fill(100)(UUID.randomUUID().toString) import org.apache.beam.sdk.io.{Compression => CType} for (compressionType <- Seq(CType.UNCOMPRESSED, CType.DEFLATE, CType.GZIP)) { val dir = tmpDir val t = runWithFileFuture { _.parallelize(data) .map(_.getBytes) .saveAsTfRecordFile(dir.getPath, compression = compressionType) } verifyTap(t.map(new String(_)), data.toSet) FileUtils.deleteDirectory(dir) } } }
Example 24
Source File: UDFBuilder.scala From sope with Apache License 2.0 | 5 votes |
package com.sope.etl.register import java.io.File import java.net.URLClassLoader import com.sope.etl.getObjectInstance import com.sope.etl.transform.exception.YamlDataTransformException import com.sope.etl.utils.JarUtils import com.sope.utils.Logging import org.apache.commons.io.FileUtils import org.apache.spark.sql.expressions.UserDefinedFunction import scala.tools.nsc.Settings import scala.tools.nsc.interpreter.IMain object UDFBuilder extends Logging { val DefaultClassLocation = "/tmp/sope/dynamic/" val DefaultJarLocation = "/tmp/sope/sope-dynamic-udf.jar" def buildDynamicUDFs(udfCodeMap: Map[String, String]): Map[String, UserDefinedFunction] = { val file = new java.io.File(UDFBuilder.DefaultClassLocation) FileUtils.deleteDirectory(file) file.mkdirs() val udfMap = evalUDF(udfCodeMap) JarUtils.buildJar(DefaultClassLocation, DefaultJarLocation) udfMap } }
Example 25
Source File: BruteForceSequenceMatcher.scala From sonar-scala with GNU Lesser General Public License v3.0 | 5 votes |
package com.buransky.plugins.scoverage.pathcleaner import java.io.File import org.apache.commons.io.FileUtils import BruteForceSequenceMatcher._ import com.buransky.plugins.scoverage.util.PathUtil import scala.collection.JavaConversions._ import org.sonar.api.utils.log.Loggers object BruteForceSequenceMatcher { val extensions = Array[String]("java", "scala") type PathSeq = Seq[String] } class BruteForceSequenceMatcher(baseDir: File, sourcePath: String) extends PathSanitizer { private val sourceDir = initSourceDir() require(sourceDir.isAbsolute) require(sourceDir.isDirectory) private val log = Loggers.get(classOf[BruteForceSequenceMatcher]) private val sourcePathLength = PathUtil.splitPath(sourceDir.getAbsolutePath).size private val filesMap = initFilesMap() def getSourceRelativePath(reportPath: PathSeq): Option[PathSeq] = { // match with file system map of files val relPathOption = for { absPathCandidates <- filesMap.get(reportPath.last) path <- absPathCandidates.find(absPath => absPath.endsWith(reportPath)) } yield path.drop(sourcePathLength) relPathOption } // mock able helpers that allow us to remove the dependency to the real file system during tests private[pathcleaner] def initSourceDir(): File = { sourcePath.split(",").headOption.map { first => val firstFile = new File(first) if (firstFile.isAbsolute) { firstFile } else { val sourceDir = new File(baseDir, first) sourceDir } }.orNull } private[pathcleaner] def initFilesMap(): Map[String, Seq[PathSeq]] = { val srcFiles = FileUtils.iterateFiles(sourceDir, extensions, true) val paths = srcFiles.map(file => PathUtil.splitPath(file.getAbsolutePath)).toSeq // group them by filename, in case multiple files have the same name paths.groupBy(path => path.last) } }
Example 26
Source File: GeneratorTest.scala From courier with Apache License 2.0 | 5 votes |
package org.coursera.courier.generator import java.io.File import java.io.IOException import com.linkedin.data.DataList import com.linkedin.data.DataMap import com.linkedin.data.codec.JacksonDataCodec import com.linkedin.data.template.DataTemplate import com.linkedin.data.template.JacksonDataTemplateCodec import com.linkedin.data.template.PrettyPrinterJacksonDataTemplateCodec import org.apache.commons.io.FileUtils import org.scalatest.junit.AssertionsForJUnit import org.scalatest.junit.JUnitSuite abstract class GeneratorTest extends JUnitSuite with AssertionsForJUnit { def printJson(dataTemplate: DataTemplate[DataMap]): Unit = printJson(dataTemplate.data) def printJson(dataMap: DataMap): Unit = println(mapToJson(dataMap)) def assertJson(left: DataTemplate[DataMap], right: String): Unit = { val leftMap = readJsonToMap(mapToJson(left.data())) val rightMap = readJsonToMap(right) assert(leftMap === rightMap) } def roundTrip(complex: DataMap): DataMap = { readJsonToMap(mapToJson(complex)) } def roundTrip(complex: DataList): DataList = { readJsonToList(listToJson(complex)) } private val jsonPath = new File( System.getProperty("referencesuite.srcdir") + File.separator + "main" + File.separator + "json") protected def load(filename: String): String = { FileUtils.readFileToString(new File(jsonPath, filename)) } private val prettyPrinter = new PrettyPrinterJacksonDataTemplateCodec private val codec = new JacksonDataTemplateCodec private val dataCodec = new JacksonDataCodec private def mapToJson(dataTemplate: DataTemplate[DataMap]): String = mapToJson(dataTemplate.data) private def listToJson(dataTemplate: DataTemplate[DataList]): String = { listToJson(dataTemplate.data) } private def mapToJson(dataMap: DataMap): String = prettyPrinter.mapToString(dataMap) private def listToJson(dataList: DataList): String = prettyPrinter.listToString(dataList) private def readJsonToMap(string: String): DataMap = dataCodec.stringToMap(string) private def readJsonToList(string: String): DataList = dataCodec.stringToList(string) }
Example 27
Source File: BillerCache.scala From apple-of-my-iap with MIT License | 5 votes |
package com.meetup.iap import com.meetup.iap.receipt.Subscription import org.slf4j.LoggerFactory import java.io.File import scala.io.Source import org.json4s.DefaultFormats import org.json4s.native.Serialization.{read, writePretty} import org.apache.commons.io.FileUtils object BillerCache { val log = LoggerFactory.getLogger(BillerCache.getClass) implicit val formats = DefaultFormats private val ProjectName = "iap-service" private val inProject = new File(".").getCanonicalPath.endsWith(ProjectName) private val Folder = { val base = if(inProject) "" else "iap-service/" new File(s"${base}tmp/") } if(!Folder.exists) { Folder.mkdirs } private val TempFile = new File(Folder, "subscriptions.json") if(!TempFile.exists) { TempFile.createNewFile } private val PlansFile = new File(Folder, "plans.json") if (!PlansFile.exists) { PlansFile.createNewFile } def readFromCache(): Map[String, Subscription] = { log.info("Reading from file: " + TempFile.getAbsolutePath) val raw = Source.fromFile(TempFile).mkString.trim if(raw.nonEmpty) { Map(read[Map[String, Subscription]](raw).toSeq: _*) } else Map.empty } def writeToCache(subs: Map[String, Subscription]) { val json = writePretty(subs) FileUtils.writeStringToFile(TempFile, json, "UTF-8") } def readPlansFromFile(): List[Plan] = { log.info(s"Reading from plans file: ${PlansFile.getAbsolutePath}") val raw = Source.fromFile(PlansFile).mkString.trim if(raw.nonEmpty) { log.info("Found some plans") List(read[List[Plan]](raw).toSeq: _*) } else List.empty } }
Example 28
Source File: TransformerSerialization.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.deeplang.doperables.spark.wrappers.transformers import java.nio.file.{Files, Path} import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfter, Suite} import io.deepsense.deeplang.doperables.Transformer import io.deepsense.deeplang.doperables.dataframe.DataFrame import io.deepsense.deeplang.{DeeplangIntegTestSupport, ExecutionContext} trait TransformerSerialization extends Suite with BeforeAndAfter { var tempDir: Path = _ before { tempDir = Files.createTempDirectory("writeReadTransformer") } after { FileUtils.deleteDirectory(tempDir.toFile) } } object TransformerSerialization { implicit class TransformerSerializationOps(private val transformer: Transformer) { def applyTransformationAndSerialization( path: Path, df: DataFrame)(implicit executionContext: ExecutionContext): DataFrame = { val result = transformer._transform(executionContext, df) val deserialized = loadSerializedTransformer(path) val resultFromSerializedTransformer = deserialized._transform(executionContext, df) DeeplangIntegTestSupport.assertDataFramesEqual(result, resultFromSerializedTransformer) result } def loadSerializedTransformer( path: Path)( implicit executionContext: ExecutionContext): Transformer = { val outputPath: Path = path.resolve(this.getClass.getName) transformer.save(executionContext, outputPath.toString) Transformer.load(executionContext, outputPath.toString) } } }
Example 29
Source File: ParquetIOTest.scala From ratatool with Apache License 2.0 | 5 votes |
package com.spotify.ratatool.io import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File} import java.nio.file.Files import com.spotify.ratatool.Schemas import com.spotify.ratatool.avro.specific.TestRecord import com.spotify.ratatool.scalacheck._ import org.apache.commons.io.FileUtils import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class ParquetIOTest extends AnyFlatSpec with Matchers { private val genericSchema = Schemas.avroSchema private val genericGen = genericRecordOf(genericSchema) private val genericData = (1 to 100).flatMap(_ => genericGen.sample) private val specificSchema = TestRecord.getClassSchema private val specificGen = specificRecordOf[TestRecord] private val specificData = (1 to 100).flatMap(_ => specificGen.sample) "ParquetIO" should "work with generic record and stream" in { val out = new ByteArrayOutputStream() ParquetIO.writeToOutputStream(genericData, genericSchema, out) val in = new ByteArrayInputStream(out.toByteArray) val result = ParquetIO.readFromInputStream(in).toList result should equal (genericData) } it should "work with generic record and file" in { val dir = Files.createTempDirectory("ratatool-") val file = new File(dir.toString, "temp.parquet") ParquetIO.writeToFile(genericData, genericSchema, file) val result = ParquetIO.readFromFile(file).toList result should equal (genericData) FileUtils.deleteDirectory(dir.toFile) } it should "work with specific record and stream" in { val out = new ByteArrayOutputStream() ParquetIO.writeToOutputStream(specificData, specificSchema, out) val in = new ByteArrayInputStream(out.toByteArray) val result = ParquetIO.readFromInputStream[TestRecord](in).toList result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_))) } it should "work with specific record and file" in { val dir = Files.createTempDirectory("ratatool-") val file = new File(dir.toString, "temp.parquet") ParquetIO.writeToFile(specificData, specificSchema, file) val result = ParquetIO.readFromFile[TestRecord](file).toList result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_))) FileUtils.deleteDirectory(dir.toFile) } }
Example 30
Source File: ModifyFilesRuleTest.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.generic import java.io.File import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers} import scala.io.Source class ModifyFilesRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach { val projectRoot = new File(getClass.getClassLoader.getResource("someproject").getFile) val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak") override def beforeEach = { FileUtils.deleteQuietly(destProjectRoot) FileUtils.copyDirectory(projectRoot, destProjectRoot) } "ModifyFilesRule" should "modify the file correctly" in { val ruleConfig = ModifyFilesRuleConfig( "**/fileA.txt", None, List( ContentMapping("hello\\s(.+)\\n", "hallo $1\n"), ContentMapping("(.+)\\sBob", "$1 Alice") ) ) val projectCtx = new GenericProjectCtx(destProjectRoot) val provider = new AllFilesModelProvider val model = provider create projectCtx val rule = new ModifyFilesRule(ruleConfig) val result = rule transform model val file = result.files.find(_.getName == "fileA.txt") file.nonEmpty should be (true) Source.fromFile(file.get).getLines.toList should be (List("hallo world", "hi Alice")) } }
Example 31
Source File: MoveFilesRuleTest.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.generic import java.io.File import org.apache.commons.io.FileUtils import org.json4s.jackson.JsonMethods._ import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers} class MoveFilesRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach { val projectRoot = new File(getClass.getClassLoader.getResource("someproject").getFile) val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak") override def beforeEach = { FileUtils.deleteQuietly(destProjectRoot) FileUtils.copyDirectory(projectRoot, destProjectRoot) } "MoveFilesRule" should "move file to the dest directory" in { val ruleConfigJson = asJsonNode(parse( """ |{ | "moves":[ | { | "pathPattern":"**.txt", "otherdirectory/dest"), Move("*.txt", "otherdirectory") ) ) val projectCtx = new GenericProjectCtx(destProjectRoot) val provider = new AllFilesModelProvider val model = provider create projectCtx val rule = new MoveFilesRule(ruleConfig) val result = rule transform model result.files forall (_.exists) should be (true) } }
Example 32
Source File: ModifyXMLFilesRuleTest.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.generic import java.io.File import org.apache.commons.io.FileUtils import com.ebay.rtran.xml._ import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers} import scala.io.Source import scala.language.postfixOps class ModifyXMLFilesRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach { val projectRoot = new File(getClass.getClassLoader.getResource("someproject").getFile) val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak") override def beforeEach = { FileUtils.deleteQuietly(destProjectRoot) FileUtils.copyDirectory(projectRoot, destProjectRoot) } "ModifyXMLFilesRuleTest" should "able to delete nodes" in { val provider = new XMLFilesModelProvider val ruleConfig = ModifyXMLFilesRuleConfig( Some("***.xml"), List( ModifyXMLOperation( "//person[@name=\'Bob\']/job", OperationType.Replace, Some("<job>Software Engineer</job>") ) ) ) val provider = new XMLFilesModelProvider val rule = new ModifyXMLFilesRule(ruleConfig) val transformedModel = rule.transform(provider.create(new GenericProjectCtx(destProjectRoot))) provider save transformedModel val transformedContent = Source.fromFile(new File(destProjectRoot, "somedirectory/someXML.xml")).getLines.mkString("\n") transformedContent should include ("Bob") transformedContent should include ("Software Engineer") transformedContent should not include "Salesman" } }
Example 33
Source File: RuleEngineTest.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.core import java.io.File import org.apache.commons.io.FileUtils import org.json4s.jackson.JsonMethods._ import com.ebay.rtran.core.mock.{MyModifyFileRule, MyProject, MyRenameFileRule, MyRenameFileRuleConfig} import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers} import scala.io.Source import scala.collection.JavaConversions._ class RuleEngineTest extends FlatSpecLike with Matchers with BeforeAndAfterEach { val projectDir = new File(getClass.getClassLoader.getResource("myproject").getFile) val backupDir = new File(projectDir.getParentFile, projectDir.getName + "-bak") override def beforeEach = { FileUtils.copyDirectory(projectDir, backupDir) } override def afterEach = { FileUtils.deleteQuietly(backupDir) } "RuleEngine" should "execute rules from UpgradeConfiguration" in { val engine = new RuleEngine val projectRoot = backupDir val configuration = JsonUpgradeConfiguration( List( JsonRuleConfiguration("ModifyFileRule", None), JsonRuleConfiguration("RenameFileRule", Some(parse("""{"newName":"anotherfile"}"""))) )) engine.execute(new MyProject(projectRoot), configuration) new File(projectRoot, "somefile").exists should be (false) new File(projectRoot, "anotherfile").exists should be (true) Source.fromFile(new File(projectRoot, "anotherfile")).getLines.toList should be (List("hi world", "hi Bob")) } "RuleEngine" should "execute rules from code" in { val engine = new RuleEngine val projectRoot = backupDir engine.execute( new MyProject(projectRoot), List( new MyModifyFileRule(), new MyRenameFileRule(MyRenameFileRuleConfig("anotherfile")) ) ) new File(projectRoot, "somefile").exists should be (false) new File(projectRoot, "anotherfile").exists should be (true) Source.fromFile(new File(projectRoot, "anotherfile")).getLines.toList should be (List("hi world", "hi Bob")) } }
Example 34
Source File: CliExec.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.cli // scalastyle:off // TODO(vlad): make sure that a simple intellij run fills in the resources // @see https://github.com/apache/spark/blob/master/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala#L54 // scalastyle:on import java.io.File import com.salesforce.op.cli.gen.Ops import org.apache.commons.io.FileUtils class CliExec { protected val DEBUG = false private[cli] def delete(dir: File): Unit = { FileUtils.deleteDirectory(dir) if (dir.exists()) { throw new IllegalStateException(s"Directory '${dir.getAbsolutePath}' still exists") } } def main(args: Array[String]): Unit = try { val ops = for { arguments <- CommandParser.parse(args, CliParameters()) if arguments.command == "gen" settings <- arguments.values } yield Ops(settings) ops getOrElse { CommandParser.showUsage() quit("wrong arguments", 1) } val outcome = ops.map (_.run()) outcome getOrElse quit("Generation failed; see error messages", 1) } catch { case x: Exception => if (DEBUG) x.printStackTrace() val msg = Option(x.getMessage).getOrElse(x.getStackTrace.mkString("", "\n", "\n")) quit(msg) } def quit(errorMsg: String, code: Int = -1): Nothing = { System.err.println(errorMsg) sys.exit(code) } } object CLI { def main(args: Array[String]): Unit = (new CliExec).main(args) }
Example 35
Source File: LogFile.scala From kyuubi with Apache License 2.0 | 5 votes |
package yaooqinn.kyuubi.operation import java.io.{BufferedReader, File, FileInputStream, FileNotFoundException, FileOutputStream, InputStreamReader, IOException, PrintStream} import java.util.ArrayList import scala.collection.JavaConverters._ import org.apache.commons.io.FileUtils import org.apache.hadoop.io.IOUtils import org.apache.kyuubi.Logging import org.apache.spark.sql.Row import yaooqinn.kyuubi.KyuubiSQLException class LogFile private ( file: File, private var reader: Option[BufferedReader], writer: PrintStream, @volatile private var isRemoved: Boolean = false) extends Logging { def this(file: File) = { this(file, LogFile.createReader(file, isRemoved = false), new PrintStream(new FileOutputStream(file))) } private def resetReader(): Unit = { reader.foreach(IOUtils.closeStream) reader = None } private def readResults(nLines: Long): Seq[Row] = { reader = reader.orElse(LogFile.createReader(file, isRemoved)) val logs = new ArrayList[Row]() reader.foreach { r => var i = 1 try { var line: String = r.readLine() while ((i < nLines || nLines <= 0) && line != null) { logs.add(Row(line)) line = r.readLine() i += 1 } } catch { case e: FileNotFoundException => val operationHandle = file.getName val path = file.getAbsolutePath val msg = if (isRemoved) { s"Operation[$operationHandle] has been closed and the log file $path has been removed" } else { s"Operation[$operationHandle] Log file $path is not found" } throw new KyuubiSQLException(msg, e) } } logs.asScala } def write(msg: String): Unit = { writer.print(msg) } def close(): Unit = synchronized { try { reader.foreach(_.close()) writer.close() if (!isRemoved) { FileUtils.forceDelete(file) isRemoved = true } } catch { case e: IOException => error(s"Failed to remove corresponding log file of operation: ${file.getName}", e) } } } object LogFile { def createReader(file: File, isRemoved: Boolean): Option[BufferedReader] = try { Option(new BufferedReader(new InputStreamReader(new FileInputStream(file)))) } catch { case e: FileNotFoundException => val operationHandle = file.getName val path = file.getAbsolutePath val msg = if (isRemoved) { s"Operation[$operationHandle] has been closed and the log file $path has been removed" } else { s"Operation[$operationHandle] Log file $path is not found" } throw new KyuubiSQLException(msg, e) } }
Example 36
Source File: DefaultSourceSpec.scala From jgit-spark-connector with Apache License 2.0 | 5 votes |
package tech.sourced.engine import java.nio.file.{Path, Paths} import java.util.UUID import org.apache.commons.io.FileUtils import org.eclipse.jgit.api.Git class DefaultSourceSpec extends BaseSourceSpec("DefaultSource") { var tmpPath: Path = Paths.get(System.getProperty("java.io.tmpdir"), UUID.randomUUID.toString) override protected def beforeAll(): Unit = { super.beforeAll() tmpPath.toFile.mkdir() } "DefaultSource" should "not optimize if the conditions on the " + "join are not the expected ones" in { val repos = engine.getRepositories val references = ss.read.format("tech.sourced.engine").option("table", "references").load() val out = repos.join(references, (references("repository_id") === repos("id")) .and(references("name").startsWith("refs/pull")) ).count() val df = references.limit(1).getCommits df.count() should be(1) } it should "return the remote branches renamed to refs/heads" in { val repoDir = tmpPath.resolve("repo") Git.cloneRepository() .setURI("https://github.com/src-d/jgit-spark-connector.git") .setDirectory(repoDir.toFile) .call() val engine = Engine(ss, tmpPath.toString, "standard") val masters = engine.getRepositories .getMaster .collect() .sortBy(_.getAs[String]("repository_id")) masters.length should be(2) masters(0).getAs[String]("repository_id") should startWith("file") masters(0).getAs[Boolean]("is_remote") should be(false) masters(1).getAs[String]("repository_id") should startWith("github") masters(1).getAs[Boolean]("is_remote") should be(true) engine.getRepositories.getRemoteReferences.getMaster.count() should be(1) } it should "match HEAD and not just refs/heads/HEAD" in { val repoDir = tmpPath.resolve("repo") import tech.sourced.engine.util.RepoUtils._ val repo = createRepo(repoDir) commitFile(repo, "foo", "bar", "baz") Engine(ss, tmpPath.toString, "standard").getRepositories.getHEAD.count() should be(1) } it should "traverse all commits if it's not chained" in { val row = engine.session.sql("SELECT COUNT(*) FROM commits").first() row(0) should be(4444) val row2 = engine.session.sql("SELECT COUNT(*) FROM commits WHERE index > 0").first() row2(0) should be(4390) } override protected def afterAll(): Unit = { super.afterAll() FileUtils.deleteQuietly(tmpPath.toFile) } }
Example 37
Source File: RepositoryRDDProviderSpec.scala From jgit-spark-connector with Apache License 2.0 | 5 votes |
package tech.sourced.engine.provider import java.nio.file.{Path, Paths} import java.util.UUID import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfterEach, FlatSpec, Matchers} import tech.sourced.engine.util.RepoUtils import tech.sourced.engine.{BaseSivaSpec, BaseSparkSpec} class RepositoryRDDProviderSpec extends FlatSpec with Matchers with BeforeAndAfterEach with BaseSparkSpec with BaseSivaSpec { private var provider: RepositoryRDDProvider = _ private var tmpPath: Path = _ override def beforeEach(): Unit = { super.beforeEach() provider = RepositoryRDDProvider(ss.sparkContext) tmpPath = Paths.get( System.getProperty("java.io.tmpdir"), UUID.randomUUID().toString ) } override def afterEach(): Unit = { super.afterEach() FileUtils.deleteQuietly(tmpPath.toFile) } "RepositoryRDDProvider" should "retrieve bucketized raw repositories" in { tmpPath.resolve("a").toFile.mkdir() createRepo(tmpPath.resolve("a").resolve("repo")) tmpPath.resolve("b").toFile.mkdir() createRepo(tmpPath.resolve("b").resolve("repo")) createRepo(tmpPath.resolve("repo")) val repos = provider.get(tmpPath.toString, "standard").collect() repos.length should be(3) } it should "retrieve non-bucketized raw repositories" in { tmpPath.resolve("a").toFile.mkdir() createRepo(tmpPath.resolve("repo")) tmpPath.resolve("b").toFile.mkdir() createRepo(tmpPath.resolve("repo2")) val repos = provider.get(tmpPath.toString, "standard").collect() repos.length should be(2) } it should "retrieve bucketized siva repositories" in { val repos = provider.get(resourcePath, "siva").collect() repos.length should be(3) } it should "retrieve non-bucketized siva repositories" in { val repos = provider.get(Paths.get(resourcePath, "ff").toString, "siva").collect() repos.length should be(1) } private def createRepo(path: Path) = { val repo = RepoUtils.createRepo(path) RepoUtils.commitFile(repo, "file.txt", "something something", "some commit") } }
Example 38
Source File: RepoUtils.scala From jgit-spark-connector with Apache License 2.0 | 5 votes |
package tech.sourced.engine.util import java.nio.file.{Path, Paths} import org.apache.commons.io.FileUtils import org.eclipse.jgit.api.CreateBranchCommand.SetupUpstreamMode import org.eclipse.jgit.api.Git import org.eclipse.jgit.revwalk.RevCommit import org.eclipse.jgit.transport.URIish object RepoUtils { def createBareRepo(path: Path): Git = { Git.init().setBare(true).setDirectory(path.toFile).call() } def createRepo(path: Path): Git = { Git.init().setDirectory(path.toFile).call() } def addRemote(repo: Git, name: String, url: String): Unit = { val cmd = repo.remoteAdd() cmd.setName(name) cmd.setUri(new URIish(url)) cmd.call() } def commitFile(repo: Git, name: String, content: String, msg: String): RevCommit = { val file = Paths.get(repo.getRepository.getDirectory.getParent, name) FileUtils.write(file.toFile, content) repo.add().addFilepattern(name).call() repo.commit().setMessage(msg).call() } }
Example 39
Source File: MetadataIteratorSpec.scala From jgit-spark-connector with Apache License 2.0 | 5 votes |
package tech.sourced.engine.iterator import java.nio.file.Paths import java.util.{Properties, UUID} import org.apache.commons.io.FileUtils import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} import org.apache.spark.sql.types.{Metadata, StringType, StructType} import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} import tech.sourced.engine.{BaseSparkSpec, Schema} class JDBCQueryIteratorSpec extends FlatSpec with Matchers with BeforeAndAfterAll with BaseSparkSpec { private val tmpPath = Paths.get( System.getProperty("java.io.tmpdir"), UUID.randomUUID.toString ) private val dbPath = tmpPath.resolve("test.db") override def beforeAll(): Unit = { super.beforeAll() tmpPath.toFile.mkdir() val rdd = ss.sparkContext.parallelize(Seq( Row("id1"), Row("id2"), Row("id3") )) val properties = new Properties() properties.put("driver", "org.sqlite.JDBC") val df = ss.createDataFrame(rdd, StructType(Seq(Schema.repositories.head))) df.write.jdbc(s"jdbc:sqlite:${dbPath.toString}", "repositories", properties) } override def afterAll(): Unit = { super.afterAll() FileUtils.deleteQuietly(tmpPath.toFile) } "JDBCQueryIterator" should "return all rows for the query" in { val iter = new JDBCQueryIterator( Seq(attr("id")), dbPath.toString, "SELECT id FROM repositories ORDER BY id" ) // calling hasNext more than one time does not cause rows to be lost iter.hasNext iter.hasNext val rows = (for (row <- iter) yield row).toArray rows.length should be(3) rows(0).length should be(1) rows(0)(0).toString should be("id1") rows(1)(0).toString should be("id2") rows(2)(0).toString should be("id3") } private def attr(name: String): Attribute = AttributeReference( name, StringType, nullable = false, Metadata.empty )() }
Example 40
Source File: BloopSpec.scala From seed with Apache License 2.0 | 5 votes |
package seed.generation import java.nio.file.{Files, Path} import bloop.config.ConfigEncoderDecoders import minitest.SimpleTestSuite import org.apache.commons.io.FileUtils import seed.generation.util.BuildUtil.tempPath object BloopSpec extends SimpleTestSuite { def parseBloopFile(path: Path): bloop.config.Config.File = { val json = FileUtils.readFileToString(path.toFile, "UTF-8") io.circe.parser.decode(json)(ConfigEncoderDecoders.allDecoder).right.get } test("Inherit javaDeps in child modules") { val projectPath = tempPath.resolve("inherit-javadeps") Files.createDirectory(projectPath) val bloopPath = projectPath.resolve(".bloop") val build = util.ProjectGeneration.generateJavaDepBloopProject(projectPath) assertEquals(build("example").module.jvm.get.moduleDeps, List("base")) val base = parseBloopFile(bloopPath.resolve("base.json")) assert( base.project.classpath .exists(_.toString.contains("/org/postgresql/postgresql/")) ) val example = parseBloopFile(bloopPath.resolve("example.json")) assert( example.project.classpath .exists(_.toString.contains("/org/postgresql/postgresql/")) ) val exampleTest = parseBloopFile(bloopPath.resolve("example-test.json")) assert( exampleTest.project.classpath .exists(_.toString.contains("/org/postgresql/postgresql/")) ) } }
Example 41
Source File: PublishSpec.scala From seed with Apache License 2.0 | 5 votes |
package seed.cli import java.io.File import java.nio.file.{Files, Path} import minitest.SimpleTestSuite import org.apache.commons.io.FileUtils import seed.Log import seed.generation.util.BuildUtil import sys.process._ object PublishSpec extends SimpleTestSuite { def testVersionDetection(path: File): Unit = { Process("git init", path).!! FileUtils.write(new File(path, "test.txt"), "test", "UTF-8") Process("git add test.txt", path).!! Process("git commit . -m import", path).!! Process("git tag 0.1.0", path).!! // no 'v' prefix assertEquals( Publish.getVersion(path.toPath, None, Log.silent), Some("0.1.0") ) FileUtils.write(new File(path, "test2.txt"), "test", "UTF-8") Process("git add test2.txt", path).!! Process("git commit . -m import", path).!! Process("git tag v0.1.1", path).!! // 'v' prefix assertEquals( Publish.getVersion(path.toPath, None, Log.silent), Some("0.1.1") ) } test("Determine version number (relative path)") { val relativePath = new File("temp-git-version") if (Files.exists(relativePath.toPath)) FileUtils.deleteDirectory(relativePath) Files.createDirectories(relativePath.toPath) testVersionDetection(relativePath) FileUtils.deleteDirectory(relativePath) } test("Determine version number (absolute path)") { val relativePath = BuildUtil.tempPath.resolve("git-version") if (Files.exists(relativePath)) FileUtils.deleteDirectory(relativePath.toFile) Files.createDirectories(relativePath) testVersionDetection(relativePath.toFile) } }
Example 42
Source File: WatcherSpec.scala From seed with Apache License 2.0 | 5 votes |
package seed.cli.util import java.nio.file.Files import minitest.SimpleTestSuite import org.apache.commons.io.FileUtils import seed.generation.util.BuildUtil import zio.IO import scala.collection.mutable import scala.concurrent.ExecutionContext.Implicits.global object WatcherSpec extends SimpleTestSuite { testAsync("Detect new file in root path") { val rootPath = BuildUtil.tempPath.resolve("watcher") Files.createDirectories(rootPath) val collected = mutable.ListBuffer[Unit]() var stop = false val watcher = Watcher .watchPaths( List(rootPath), () => { // Only consider Scala/Java source files FileUtils.write(rootPath.resolve("test.html").toFile, "test", "UTF-8") stop = true FileUtils .write(rootPath.resolve("test.scala").toFile, "test", "UTF-8") } ) .foreachWhile { v => IO.effectTotal { collected += v !stop } } RTS.unsafeRunToFuture(watcher).map(_ => assertEquals(collected, List(()))) } testAsync("Detect new file in sub-directory") { val rootPath = BuildUtil.tempPath.resolve("watcher2") val subDirectoryPath = rootPath.resolve("sub") Files.createDirectories(subDirectoryPath) val collected = mutable.ListBuffer[Unit]() var stop = false val watcher = Watcher .watchPaths(List(rootPath), { () => stop = true FileUtils.write(rootPath.resolve("test.scala").toFile, "test", "UTF-8") }) .foreachWhile { v => IO.effectTotal { collected += v !stop } } RTS.unsafeRunToFuture(watcher).map(_ => assertEquals(collected, List(()))) } testAsync("Watch file path") { val rootPath = BuildUtil.tempPath.resolve("watcher3") Files.createDirectories(rootPath) val filePath = rootPath.resolve("test.scala") FileUtils.write(filePath.toFile, "test", "UTF-8") val collected = mutable.ListBuffer[Unit]() var stop = false val watcher = Watcher .watchPaths(List(filePath), { () => stop = true FileUtils.write(filePath.toFile, "test2", "UTF-8") }) .foreachWhile { v => IO.effectTotal { collected += v !stop } } RTS.unsafeRunToFuture(watcher).map(_ => assertEquals(collected, List(()))) } }
Example 43
Source File: XMLFilesModelProviderTest.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.generic import java.io.File import org.apache.commons.io.FileUtils import com.ebay.rtran.xml.XMLFilesModelProvider import org.scalatest.{FlatSpecLike, Matchers} import scala.io.Source class XMLFilesModelProviderTest extends FlatSpecLike with Matchers { val projectRoot = new File(getClass.getClassLoader.getResource("someproject").getFile) "XMLFilesModeProvider" should "get all xml files in the project" in { val provider = new XMLFilesModelProvider val model = provider.create(new GenericProjectCtx(projectRoot)) model.xmlRoots.size should be (1) } "XMLFilesModeProvider" should "be able to save the files that are marked modified" in { val provider = new XMLFilesModelProvider val model = provider.create(new GenericProjectCtx(projectRoot)) val (file, root) = model.xmlRoots.head val newFile = new File(file.getParentFile, file.getName + ".new") provider.save(model.copy(modified = Map(newFile -> Some(root)))) val content = Source.fromFile(newFile).getLines.filterNot(_.matches("\\s+")).map(_.trim).mkString content should not be "" FileUtils.deleteQuietly(newFile) } }
Example 44
Source File: LogWriterSpec.scala From mist with Apache License 2.0 | 5 votes |
package io.hydrosphere.mist.master.logging import java.nio.file.{Files, Paths} import akka.actor.ActorSystem import akka.pattern.ask import akka.testkit.{TestActorRef, TestKit} import akka.util.Timeout import com.typesafe.config.ConfigFactory import io.hydrosphere.mist.core.logging.LogEvent import io.hydrosphere.mist.master.LogStoragePaths import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfterAll, FunSpecLike, Matchers} import scala.concurrent.Await import scala.concurrent.duration._ class LogWriterSpec extends TestKit(ActorSystem("log-writer-test", ConfigFactory.load("master"))) with FunSpecLike with Matchers with BeforeAndAfterAll { val dirName = "log_writer_test" val dir = Paths.get(".", "target", dirName) override def beforeAll(): Unit = { Files.createDirectories(dir) } override def afterAll(): Unit = { FileUtils.deleteDirectory(dir.toFile) TestKit.shutdownActorSystem(system) } implicit val timeout = Timeout(5 second) describe("writer actor") { it("should write to file") { val path = dir.resolve("test") val f = path.toFile if (f.exists()) f.delete() Files.createFile(path) val actor = TestActorRef(WriterActor.props(path)) val event = LogEvent.mkDebug("id", "message") val future = actor ? WriteRequest("id", Seq(event)) val update = Await.result(future.mapTo[LogUpdate], Duration.Inf) update.jobId shouldBe "id" update.events shouldBe Seq(event) update.bytesOffset shouldBe (event.mkString + "\n").getBytes.length } } describe("writers group") { it("should proxy to writer") { val mappings = new LogStoragePaths(dir) val expectedPath = mappings.pathFor("id") if (Files.exists(expectedPath)) Files.delete(expectedPath) val actor = TestActorRef(WritersGroupActor.props(mappings)) val event = LogEvent.mkDebug("id", "message") val future = actor ? WriteRequest("id", Seq(event)) val update = Await.result(future.mapTo[LogUpdate], Duration.Inf) val expectedSize = (event.mkString + "\n").getBytes.length update.jobId shouldBe "id" update.events shouldBe Seq(event) update.bytesOffset shouldBe expectedSize Files.readAllBytes(expectedPath).length shouldBe expectedSize } } }
Example 45
Source File: FStorageSpec.scala From mist with Apache License 2.0 | 5 votes |
package io.hydrosphere.mist.master.data import java.nio.file.Paths import com.typesafe.config.{Config, ConfigValueFactory} import io.hydrosphere.mist.master.models.NamedConfig import org.apache.commons.io.FileUtils import org.scalatest._ class FStorageSpec extends FunSpec with Matchers with BeforeAndAfter { case class TestEntry( name: String, value: Int ) extends NamedConfig val testEntryConfigRepr = new ConfigRepr[TestEntry] { import scala.collection.JavaConverters._ override def fromConfig(config: Config): TestEntry = { TestEntry(config.getString("name"), config.getInt("value")) } override def toConfig(a: TestEntry): Config = { val map = Map("value" -> ConfigValueFactory.fromAnyRef(a.value)) ConfigValueFactory.fromMap(map.asJava).toConfig } } val path = "./target/file_store_test" before { val f = Paths.get(path).toFile if (f.exists()) FileUtils.deleteDirectory(f) } it("should store files") { val storage = FsStorage.create(path, testEntryConfigRepr) storage.write("one", TestEntry("one", 1)) storage.write("two", TestEntry("two", 2)) storage.entries should contain allOf( TestEntry("one", 1), TestEntry("two", 2) ) storage.delete("one") storage.entries should contain allElementsOf(Seq(TestEntry("two", 2))) } }
Example 46
Source File: FunctionConfigStorageSpec.scala From mist with Apache License 2.0 | 5 votes |
package io.hydrosphere.mist.master.data import java.nio.file.Paths import io.hydrosphere.mist.master.models.FunctionConfig import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfter, Matchers, FunSpec} class FunctionConfigStorageSpec extends FunSpec with Matchers with BeforeAndAfter { val path = "./target/data/func_store_test" before { val f = Paths.get(path).toFile if (f.exists()) FileUtils.deleteDirectory(f) } import scala.concurrent.ExecutionContext.Implicits.global import io.hydrosphere.mist.master.TestUtils._ it("should update") { val functions = testStorage() functions.all.await.size shouldBe 1 functions.update(FunctionConfig("second", "path", "className", "foo")).await functions.all.await.size shouldBe 2 } it("should get") { val functions = testStorage() functions.get("first").await.isDefined shouldBe true functions.get("second").await.isDefined shouldBe false functions.update(FunctionConfig("second", "path", "className", "foo")).await functions.get("second").await.isDefined shouldBe true } it("should override defaults") { val functions = testStorage() functions.get("first").await.get.className shouldBe "className" functions.update(FunctionConfig("first", "path", "anotherClassName", "foo")).await functions.get("first").await.get.className shouldBe "anotherClassName" } def testStorage( defaults: Seq[FunctionConfig] = Seq(FunctionConfig("first", "path", "className", "foo"))): FunctionConfigStorage = { new FunctionConfigStorage( FsStorage.create(path, ConfigRepr.EndpointsRepr), defaults ) } }
Example 47
Source File: RunnerSelectorSpec.scala From mist with Apache License 2.0 | 5 votes |
package io.hydrosphere.mist.worker.runners import java.io.File import java.nio.file.Paths import io.hydrosphere.mist.worker.SparkArtifact import io.hydrosphere.mist.worker.runners.python.PythonRunner import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfter, FunSpecLike, Matchers} class RunnerSelectorSpec extends FunSpecLike with Matchers with BeforeAndAfter { val basePath = "./target/runner" val pyFile = SparkArtifact(Paths.get(basePath, "test.py").toFile, "url") val jarFile = SparkArtifact(Paths.get(basePath, "test.jar").toFile, "url") val unknown = SparkArtifact(Paths.get(basePath, "test.unknown").toFile, "url") before { val f = new File(basePath) if (f.exists()) FileUtils.deleteDirectory(f) FileUtils.forceMkdir(f) FileUtils.touch(pyFile.local) FileUtils.touch(jarFile.local) } after { FileUtils.deleteQuietly(pyFile.local) FileUtils.deleteQuietly(jarFile.local) } it("should select runner by extension") { val selector = new SimpleRunnerSelector selector.selectRunner(pyFile) shouldBe a[PythonRunner] selector.selectRunner(jarFile) shouldBe a[ScalaRunner] } it("should throw exception when unknown file type is passed") { val selector = new SimpleRunnerSelector intercept[IllegalArgumentException] { selector.selectRunner(unknown) } } }
Example 48
Source File: ForkedSparkContextSpec.scala From pravda-ml with Apache License 2.0 | 5 votes |
package org.apache.spark.ml.odkl import java.io.File import breeze.linalg import odkl.analysis.spark.TestEnv import odkl.analysis.spark.util.SQLOperations import org.apache.commons.io.FileUtils import org.scalatest.FlatSpec class ForkedSparkContextSpec extends FlatSpec with TestEnv with org.scalatest.Matchers with WithTestData { "Fork " should " support one layer" in { val directory = new File(FileUtils.getTempDirectory, "forkedSpark") try { val estimator = new ForkedSparkEstimator[LinearRegressionModel,LinearRegressionSGD](new LinearRegressionSGD()) .setTempPath(directory.getAbsolutePath) .setMaster("local[1]") val model = estimator.fit(noInterceptData) val dev: linalg.Vector[Double] = hiddenModel.asBreeze - model.getCoefficients.asBreeze val deviation: Double = dev dot dev deviation should be <= delta model.getIntercept should be(0.0) } finally { FileUtils.deleteDirectory(directory) } } "Fork " should " support two layers" in { val directory = new File(FileUtils.getTempDirectory, "forkedSpark") try { val estimator = new ForkedSparkEstimator[LinearRegressionModel,ForkedSparkEstimator[LinearRegressionModel,LinearRegressionSGD]]( new ForkedSparkEstimator[LinearRegressionModel,LinearRegressionSGD](new LinearRegressionSGD()) .setTempPath(directory.getAbsolutePath) .setMaster("local[1]")) .setTempPath(directory.getAbsolutePath) .setMaster("local[1]") val model = estimator.fit(noInterceptData) val dev: linalg.Vector[Double] = hiddenModel.asBreeze - model.getCoefficients.asBreeze val deviation: Double = dev dot dev deviation should be <= delta model.getIntercept should be(0.0) } finally { FileUtils.deleteDirectory(directory) } } }
Example 49
Source File: BetweennessEdmonds$Test.scala From sparkling-graph with BSD 2-Clause "Simplified" License | 5 votes |
package ml.sparkling.graph.operators.measures.vertex.betweenness.edmonds import java.nio.file.Files import ml.sparkling.graph.operators.MeasureTest import org.apache.commons.io.FileUtils import org.apache.spark.SparkContext import org.apache.spark.graphx.{Graph, VertexRDD} class BetweennessEdmonds$Test(implicit sc: SparkContext) extends MeasureTest { val tempDir = Files.createTempDirectory("spark-checkpoint") override def beforeAll() = { sc.setCheckpointDir(tempDir.toAbsolutePath.toString) } override def afterAll() = { FileUtils.deleteDirectory(tempDir.toFile) } "Edmonds betweenness centrality for random graph" should "be correctly calculated" in { Given("graph") val filePath = getClass.getResource("/graphs/graph_ER_15") val graph: Graph[Int, Int] = loadGraph(filePath.toString) When("Computes betweenness") val result = EdmondsBC.computeBC(graph) Then("Should calculate betweenness correctly") val bcFile = getClass.getResource("/graphs/graph_ER_15_bc") val bcCorrectValues = sc.textFile(bcFile.getPath) .filter(_.nonEmpty) .map(l => { val t = l.split("\t", 2); (t(0).toInt, t(1).toDouble) }) .sortBy({ case (vId, data) => vId }) .map({ case (vId, data) => data}).collect() val bcValues = result.sortBy({ case (vId, data) => vId }) .map({ case (vId, data) => data }).collect() bcCorrectValues.zip(bcValues).foreach({ case (a, b) => a should be(b +- 1e-5) }) result.unpersist(false) } }
Example 50
Source File: BetweennessHua$Test.scala From sparkling-graph with BSD 2-Clause "Simplified" License | 5 votes |
package ml.sparkling.graph.operators.measures.vertex.betweenness.hua import java.nio.file.Files import ml.sparkling.graph.operators.MeasureTest import ml.sparkling.graph.operators.measures.vertex.betweenness.edmonds.EdmondsBC import org.apache.commons.io.FileUtils import org.apache.spark.SparkContext import org.apache.spark.graphx.Graph import org.scalatest.tagobjects.Slow class BetweennessHua$Test (implicit sc: SparkContext) extends MeasureTest { val tempDir = Files.createTempDirectory("spark-checkpoint") override def beforeAll() = { sc.setCheckpointDir(tempDir.toAbsolutePath.toString) } override def afterAll() = { FileUtils.deleteDirectory(tempDir.toFile) } "Hua betweenness centrality for random graph" should "be correctly calculated" in { Given("graph") val filePath = getClass.getResource("/graphs/graph_ER_15") val graph: Graph[Int, Int] = loadGraph(filePath.toString) When("Computes betweenness") val result = HuaBC.computeBC(graph) Then("Should calculate betweenness correctly") val bcFile = getClass.getResource("/graphs/graph_ER_15_bc") val bcCorrectValues = sc.textFile(bcFile.getPath) .filter(_.nonEmpty) .map(l => { val t = l.split("\t", 2); (t(0).toInt, t(1).toDouble) }) .sortBy({ case (vId, data) => vId }) .map({ case (vId, data) => data}).collect() val bcValues = result.sortBy({ case (vId, data) => vId }) .map({ case (vId, data) => data }).collect() bcCorrectValues.zip(bcValues).foreach({ case (a, b) => a should be(b +- 1e-5) }) result.unpersist(false) } "Hua betweenness centrality for random graph" should "take no longer then Edmonds" taggedAs(Slow) in { Given("graph") val filePath = getClass.getResource("/graphs/graph_ER_15") val graph: Graph[Int, Int] = loadGraph(filePath.toString) When("computes betwenness centrality") val (_, edmondsTime) = time("Edmonds algorithm for betweenness centrality")(EdmondsBC.computeBC(graph)) val (_, huaTime) = time("Hua algorithm for betweenness centrality")(HuaBC.computeBC(graph)) Then("Hua algorithm should be faster") huaTime should be <= edmondsTime } }
Example 51
Source File: SparkTest.scala From sparkling-graph with BSD 2-Clause "Simplified" License | 5 votes |
package ml.sparkling.graph.operators import java.nio.file.{Files, Path} import ml.sparkling.graph.operators.algorithms.aproximation.ApproximatedShortestPathsAlgorithm$Test import ml.sparkling.graph.operators.algorithms.coarsening.labelpropagation.LPCoarsening$Test import ml.sparkling.graph.operators.algorithms.community.pscan.PSCAN$Test import ml.sparkling.graph.operators.algorithms.link.BasicLinkPredictor$Test import ml.sparkling.graph.operators.algorithms.shortestpaths.ShortestPathsAlgorithm$Test import ml.sparkling.graph.operators.measures.edge.AdamicAdar$Test import ml.sparkling.graph.operators.measures.graph.{FreemanCentrality$Test, Modularity$Test} import ml.sparkling.graph.operators.measures.vertex.betweenness.edmonds.BetweennessEdmonds$Test import ml.sparkling.graph.operators.measures.vertex.betweenness.hua.BetweennessHua$Test import ml.sparkling.graph.operators.measures.vertex.closenes.Closeness$Test import ml.sparkling.graph.operators.measures.vertex.clustering.LocalClustering$Test import ml.sparkling.graph.operators.measures.vertex.eigenvector.EigenvectorCentrality$Test import ml.sparkling.graph.operators.measures.vertex.hits.Hits$Test import ml.sparkling.graph.operators.measures.{NeighborhoodConnectivity$Test, VertexEmbeddedness$Test} import ml.sparkling.graph.operators.partitioning.{CommunityBasedPartitioning$Test, PSCANBasedPartitioning$Test, PropagationBasedPartitioning$Test} import org.apache.commons.io.FileUtils import org.apache.spark.{SparkConf, SparkContext} import org.scalatest._ class SparkTest extends Spec with BeforeAndAfterAll { val file: Path = Files.createTempDirectory("tmpCheckpoint") override val invokeBeforeAllAndAfterAllEvenIfNoTestsAreExpected=true val master = "local[8]" def appName: String = "operators-tests" implicit val sc: SparkContext = { val conf = new SparkConf() .setMaster(master) .setAppName(appName) val out=new SparkContext(conf) out.setCheckpointDir(file.toString) out } override def afterAll() = { if(!sc.isStopped){ sc.stop() } FileUtils.deleteDirectory(file.toFile) } override def nestedSuites = { Vector( new PSCANBasedPartitioning$Test, new PropagationBasedPartitioning$Test, new ApproximatedShortestPathsAlgorithm$Test, new ShortestPathsAlgorithm$Test, new EigenvectorCentrality$Test, new VertexEmbeddedness$Test, new PSCAN$Test, new Modularity$Test, new CommunityBasedPartitioning$Test, new NeighborhoodConnectivity$Test, new Hits$Test, new LocalClustering$Test, new FreemanCentrality$Test, new AdamicAdar$Test, new BasicLinkPredictor$Test, new Closeness$Test, new BetweennessEdmonds$Test, new BetweennessHua$Test ) } }
Example 52
Source File: SortShuffleSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark import java.io.File import scala.collection.JavaConverters._ import org.apache.commons.io.FileUtils import org.apache.commons.io.filefilter.TrueFileFilter import org.scalatest.BeforeAndAfterAll import org.apache.spark.rdd.ShuffledRDD import org.apache.spark.serializer.{JavaSerializer, KryoSerializer} import org.apache.spark.shuffle.sort.SortShuffleManager import org.apache.spark.util.Utils class SortShuffleSuite extends ShuffleSuite with BeforeAndAfterAll { // This test suite should run all tests in ShuffleSuite with sort-based shuffle. private var tempDir: File = _ override def beforeAll() { super.beforeAll() // Once 'spark.local.dir' is set, it is cached. Unless this is manually cleared // before/after a test, it could return the same directory even if this property // is configured. Utils.clearLocalRootDirs() conf.set("spark.shuffle.manager", "sort") } override def beforeEach(): Unit = { super.beforeEach() tempDir = Utils.createTempDir() conf.set("spark.local.dir", tempDir.getAbsolutePath) } override def afterEach(): Unit = { try { Utils.deleteRecursively(tempDir) Utils.clearLocalRootDirs() } finally { super.afterEach() } } test("SortShuffleManager properly cleans up files for shuffles that use the serialized path") { sc = new SparkContext("local", "test", conf) // Create a shuffled RDD and verify that it actually uses the new serialized map output path val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x)) val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4)) .setSerializer(new KryoSerializer(conf)) val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]] assert(SortShuffleManager.canUseSerializedShuffle(shuffleDep)) ensureFilesAreCleanedUp(shuffledRdd) } test("SortShuffleManager properly cleans up files for shuffles that use the deserialized path") { sc = new SparkContext("local", "test", conf) // Create a shuffled RDD and verify that it actually uses the old deserialized map output path val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x)) val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4)) .setSerializer(new JavaSerializer(conf)) val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]] assert(!SortShuffleManager.canUseSerializedShuffle(shuffleDep)) ensureFilesAreCleanedUp(shuffledRdd) } private def ensureFilesAreCleanedUp(shuffledRdd: ShuffledRDD[_, _, _]): Unit = { def getAllFiles: Set[File] = FileUtils.listFiles(tempDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet val filesBeforeShuffle = getAllFiles // Force the shuffle to be performed shuffledRdd.count() // Ensure that the shuffle actually created files that will need to be cleaned up val filesCreatedByShuffle = getAllFiles -- filesBeforeShuffle filesCreatedByShuffle.map(_.getName) should be Set("shuffle_0_0_0.data", "shuffle_0_0_0.index") // Check that the cleanup actually removes the files sc.env.blockManager.master.removeShuffle(0, blocking = true) for (file <- filesCreatedByShuffle) { assert (!file.exists(), s"Shuffle file $file was not cleaned up") } } }
Example 53
Source File: NsdbMiniCluster.scala From NSDb with Apache License 2.0 | 5 votes |
package io.radicalbit.nsdb.minicluster import java.io.File import java.time.Duration import java.util.UUID import com.typesafe.scalalogging.LazyLogging import org.apache.commons.io.FileUtils trait NsdbMiniCluster extends LazyLogging { protected[this] val instanceId = { UUID.randomUUID } protected[this] val startingHostname = "127.0.0." protected[this] def rootFolder: String protected[this] def nodesNumber: Int protected[this] def passivateAfter: Duration protected[this] def replicationFactor: Int lazy val nodes: Set[NSDbMiniClusterNode] = (for { i <- 0 until nodesNumber } yield new NSDbMiniClusterNode( hostname = s"$startingHostname${i + 1}", storageDir = s"$rootFolder/data$i", passivateAfter = passivateAfter, replicationFactor = replicationFactor )).toSet def start(cleanup: Boolean = false): Unit = { if (cleanup) FileUtils.deleteDirectory(new File(rootFolder)) nodes.foreach(_.start()) } def stop(): Unit = nodes.foreach(n => n.stop()) }
Example 54
Source File: GzipUtils.scala From odinson with Apache License 2.0 | 5 votes |
package ai.lum.odinson.extra import org.apache.commons.io.FileUtils import java.io._ import java.util.zip._ import java.nio.charset.StandardCharsets object GzipUtils { def compress(data: String): Array[Byte] = { val baos = new ByteArrayOutputStream(data.length) val gzip = new GZIPOutputStream(baos) val bytes = data.getBytes(StandardCharsets.UTF_8) gzip.write(bytes) gzip.close() val compressed = baos.toByteArray baos.close() compressed } def uncompress(file: File): String = { val inputStream = FileUtils.openInputStream(file) val res = uncompress(inputStream) inputStream.close() res } def uncompress(compressed: Array[Byte]): String = { uncompress(new ByteArrayInputStream(compressed)) } def uncompress(input: InputStream): String = { val gzip = new GZIPInputStream(input) val br = new BufferedReader(new InputStreamReader(gzip, StandardCharsets.UTF_8)) val sb = new StringBuilder() var line: String = br.readLine() while (line != null) { sb.append(line) line = br.readLine() } br.close() gzip.close() sb.toString() } }
Example 55
Source File: YarnShuffleIntegrationSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import java.io.File import com.google.common.base.Charsets.UTF_8 import com.google.common.io.Files import org.apache.commons.io.FileUtils import org.apache.hadoop.yarn.conf.YarnConfiguration import org.scalatest.Matchers import org.apache.spark._ import org.apache.spark.network.shuffle.ShuffleTestAccessor import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor} import org.apache.spark.tags.ExtendedYarnTest @ExtendedYarnTest class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite { override def newYarnConfig(): YarnConfiguration = { val yarnConfig = new YarnConfiguration() yarnConfig.set(YarnConfiguration.NM_AUX_SERVICES, "spark_shuffle") yarnConfig.set(YarnConfiguration.NM_AUX_SERVICE_FMT.format("spark_shuffle"), classOf[YarnShuffleService].getCanonicalName) yarnConfig.set("spark.shuffle.service.port", "0") yarnConfig } test("external shuffle service") { val shuffleServicePort = YarnTestAccessor.getShuffleServicePort val shuffleService = YarnTestAccessor.getShuffleServiceInstance val registeredExecFile = YarnTestAccessor.getRegisteredExecutorFile(shuffleService) logInfo("Shuffle service port = " + shuffleServicePort) val result = File.createTempFile("result", null, tempDir) val finalState = runSpark( false, mainClassName(YarnExternalShuffleDriver.getClass), appArgs = Seq(result.getAbsolutePath(), registeredExecFile.getAbsolutePath), extraConf = Map( "spark.shuffle.service.enabled" -> "true", "spark.shuffle.service.port" -> shuffleServicePort.toString ) ) checkResult(finalState, result) assert(YarnTestAccessor.getRegisteredExecutorFile(shuffleService).exists()) } } private object YarnExternalShuffleDriver extends Logging with Matchers { val WAIT_TIMEOUT_MILLIS = 10000 def main(args: Array[String]): Unit = { if (args.length != 2) { // scalastyle:off println System.err.println( s""" |Invalid command line: ${args.mkString(" ")} | |Usage: ExternalShuffleDriver [result file] [registed exec file] """.stripMargin) // scalastyle:on println System.exit(1) } val sc = new SparkContext(new SparkConf() .setAppName("External Shuffle Test")) val conf = sc.getConf val status = new File(args(0)) val registeredExecFile = new File(args(1)) logInfo("shuffle service executor file = " + registeredExecFile) var result = "failure" val execStateCopy = new File(registeredExecFile.getAbsolutePath + "_dup") try { val data = sc.parallelize(0 until 100, 10).map { x => (x % 10) -> x }.reduceByKey{ _ + _ }. collect().toSet sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) data should be ((0 until 10).map{x => x -> (x * 10 + 450)}.toSet) result = "success" // only one process can open a leveldb file at a time, so we copy the files FileUtils.copyDirectory(registeredExecFile, execStateCopy) assert(!ShuffleTestAccessor.reloadRegisteredExecutors(execStateCopy).isEmpty) } finally { sc.stop() FileUtils.deleteDirectory(execStateCopy) Files.write(result, status, UTF_8) } } }
Example 56
Source File: SortShuffleSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark import java.io.File import scala.collection.JavaConverters._ import org.apache.commons.io.FileUtils import org.apache.commons.io.filefilter.TrueFileFilter import org.scalatest.BeforeAndAfterAll import org.apache.spark.rdd.ShuffledRDD import org.apache.spark.shuffle.sort.SortShuffleManager import org.apache.spark.serializer.{JavaSerializer, KryoSerializer} import org.apache.spark.util.Utils class SortShuffleSuite extends ShuffleSuite with BeforeAndAfterAll { // This test suite should run all tests in ShuffleSuite with sort-based shuffle. private var tempDir: File = _ override def beforeAll() { conf.set("spark.shuffle.manager", "sort") } override def beforeEach(): Unit = { tempDir = Utils.createTempDir() conf.set("spark.local.dir", tempDir.getAbsolutePath) } override def afterEach(): Unit = { try { Utils.deleteRecursively(tempDir) } finally { super.afterEach() } } test("SortShuffleManager properly cleans up files for shuffles that use the serialized path") { sc = new SparkContext("local", "test", conf) // Create a shuffled RDD and verify that it actually uses the new serialized map output path val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x)) val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4)) .setSerializer(new KryoSerializer(conf)) val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]] assert(SortShuffleManager.canUseSerializedShuffle(shuffleDep)) ensureFilesAreCleanedUp(shuffledRdd) } test("SortShuffleManager properly cleans up files for shuffles that use the deserialized path") { sc = new SparkContext("local", "test", conf) // Create a shuffled RDD and verify that it actually uses the old deserialized map output path val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x)) val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4)) .setSerializer(new JavaSerializer(conf)) val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]] assert(!SortShuffleManager.canUseSerializedShuffle(shuffleDep)) ensureFilesAreCleanedUp(shuffledRdd) } private def ensureFilesAreCleanedUp(shuffledRdd: ShuffledRDD[_, _, _]): Unit = { def getAllFiles: Set[File] = FileUtils.listFiles(tempDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet val filesBeforeShuffle = getAllFiles // Force the shuffle to be performed shuffledRdd.count() // Ensure that the shuffle actually created files that will need to be cleaned up val filesCreatedByShuffle = getAllFiles -- filesBeforeShuffle filesCreatedByShuffle.map(_.getName) should be Set("shuffle_0_0_0.data", "shuffle_0_0_0.index") // Check that the cleanup actually removes the files sc.env.blockManager.master.removeShuffle(0, blocking = true) for (file <- filesCreatedByShuffle) { assert (!file.exists(), s"Shuffle file $file was not cleaned up") } } }
Example 57
Source File: YarnShuffleIntegrationSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import java.io.File import java.nio.charset.StandardCharsets import com.google.common.io.Files import org.apache.commons.io.FileUtils import org.apache.hadoop.yarn.conf.YarnConfiguration import org.scalatest.Matchers import org.apache.spark._ import org.apache.spark.internal.Logging import org.apache.spark.network.shuffle.ShuffleTestAccessor import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor} import org.apache.spark.tags.ExtendedYarnTest @ExtendedYarnTest class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite { override def newYarnConfig(): YarnConfiguration = { val yarnConfig = new YarnConfiguration() yarnConfig.set(YarnConfiguration.NM_AUX_SERVICES, "spark_shuffle") yarnConfig.set(YarnConfiguration.NM_AUX_SERVICE_FMT.format("spark_shuffle"), classOf[YarnShuffleService].getCanonicalName) yarnConfig.set("spark.shuffle.service.port", "0") yarnConfig } test("external shuffle service") { val shuffleServicePort = YarnTestAccessor.getShuffleServicePort val shuffleService = YarnTestAccessor.getShuffleServiceInstance val registeredExecFile = YarnTestAccessor.getRegisteredExecutorFile(shuffleService) logInfo("Shuffle service port = " + shuffleServicePort) val result = File.createTempFile("result", null, tempDir) val finalState = runSpark( false, mainClassName(YarnExternalShuffleDriver.getClass), appArgs = Seq(result.getAbsolutePath(), registeredExecFile.getAbsolutePath), extraConf = Map( "spark.shuffle.service.enabled" -> "true", "spark.shuffle.service.port" -> shuffleServicePort.toString ) ) checkResult(finalState, result) assert(YarnTestAccessor.getRegisteredExecutorFile(shuffleService).exists()) } } private object YarnExternalShuffleDriver extends Logging with Matchers { val WAIT_TIMEOUT_MILLIS = 10000 def main(args: Array[String]): Unit = { if (args.length != 2) { // scalastyle:off println System.err.println( s""" |Invalid command line: ${args.mkString(" ")} | |Usage: ExternalShuffleDriver [result file] [registered exec file] """.stripMargin) // scalastyle:on println System.exit(1) } val sc = new SparkContext(new SparkConf() .setAppName("External Shuffle Test")) val conf = sc.getConf val status = new File(args(0)) val registeredExecFile = new File(args(1)) logInfo("shuffle service executor file = " + registeredExecFile) var result = "failure" val execStateCopy = new File(registeredExecFile.getAbsolutePath + "_dup") try { val data = sc.parallelize(0 until 100, 10).map { x => (x % 10) -> x }.reduceByKey{ _ + _ }. collect().toSet sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) data should be ((0 until 10).map{x => x -> (x * 10 + 450)}.toSet) result = "success" // only one process can open a leveldb file at a time, so we copy the files FileUtils.copyDirectory(registeredExecFile, execStateCopy) assert(!ShuffleTestAccessor.reloadRegisteredExecutors(execStateCopy).isEmpty) } finally { sc.stop() FileUtils.deleteDirectory(execStateCopy) Files.write(result, status, StandardCharsets.UTF_8) } } }
Example 58
Source File: SortShuffleSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark import java.io.File import scala.collection.JavaConverters._ import org.apache.commons.io.FileUtils import org.apache.commons.io.filefilter.TrueFileFilter import org.scalatest.BeforeAndAfterAll import org.apache.spark.rdd.ShuffledRDD import org.apache.spark.serializer.{JavaSerializer, KryoSerializer} import org.apache.spark.shuffle.sort.SortShuffleManager import org.apache.spark.util.Utils class SortShuffleSuite extends ShuffleSuite with BeforeAndAfterAll { // This test suite should run all tests in ShuffleSuite with sort-based shuffle. private var tempDir: File = _ override def beforeAll() { super.beforeAll() conf.set("spark.shuffle.manager", "sort") } override def beforeEach(): Unit = { super.beforeEach() tempDir = Utils.createTempDir() conf.set("spark.local.dir", tempDir.getAbsolutePath) } override def afterEach(): Unit = { try { Utils.deleteRecursively(tempDir) } finally { super.afterEach() } } test("SortShuffleManager properly cleans up files for shuffles that use the serialized path") { sc = new SparkContext("local", "test", conf) // Create a shuffled RDD and verify that it actually uses the new serialized map output path val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x)) val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4)) .setSerializer(new KryoSerializer(conf)) val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]] assert(SortShuffleManager.canUseSerializedShuffle(shuffleDep)) ensureFilesAreCleanedUp(shuffledRdd) } test("SortShuffleManager properly cleans up files for shuffles that use the deserialized path") { sc = new SparkContext("local", "test", conf) // Create a shuffled RDD and verify that it actually uses the old deserialized map output path val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x)) val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4)) .setSerializer(new JavaSerializer(conf)) val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]] assert(!SortShuffleManager.canUseSerializedShuffle(shuffleDep)) ensureFilesAreCleanedUp(shuffledRdd) } private def ensureFilesAreCleanedUp(shuffledRdd: ShuffledRDD[_, _, _]): Unit = { def getAllFiles: Set[File] = FileUtils.listFiles(tempDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet val filesBeforeShuffle = getAllFiles // Force the shuffle to be performed shuffledRdd.count() // Ensure that the shuffle actually created files that will need to be cleaned up val filesCreatedByShuffle = getAllFiles -- filesBeforeShuffle filesCreatedByShuffle.map(_.getName) should be Set("shuffle_0_0_0.data", "shuffle_0_0_0.index") // Check that the cleanup actually removes the files sc.env.blockManager.master.removeShuffle(0, blocking = true) for (file <- filesCreatedByShuffle) { assert (!file.exists(), s"Shuffle file $file was not cleaned up") } } }
Example 59
Source File: ScenarioLoader.scala From mantis with Apache License 2.0 | 5 votes |
package io.iohk.ethereum.ets.common import java.io.File import io.iohk.ethereum.utils.Logger import org.apache.commons.io.FileUtils import scala.collection.JavaConverters._ import scala.io.Source trait ScenarioLoader[T] extends ScenarioParser[T] with Logger { def load(path: String, options: TestOptions, ignoredTestNames: Set[String] = Set.empty): List[ScenarioGroup[T]] = { val testDir = new File(getClass.getClassLoader.getResource(path).toURI) val files = FileUtils.listFiles(testDir, Array("json"), true).asScala.toList files.filterNot(file => ignoredTestNames.contains(file.getName)).flatMap { file => val name = file.getAbsolutePath.drop(testDir.getAbsolutePath.length + 1).dropRight(".json".length) if (!options.isGroupIncluded(name)) None else { log.info(s"Loading test scenarios from: $file") val text = Source.fromFile(file).getLines.mkString val scenarios = parse(text) Some(ScenarioGroup(name, scenarios)) } } } }
Example 60
Source File: InceptionFetcherTest.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.fetcher.tensorflow import java.io.File import org.apache.commons.io.FileUtils import org.apache.s2graph.core.fetcher.BaseFetcherTest import play.api.libs.json.Json class InceptionFetcherTest extends BaseFetcherTest { val runDownloadModel: Boolean = true val runCleanup: Boolean = true def cleanup(downloadPath: String, dir: String) = { synchronized { FileUtils.deleteQuietly(new File(downloadPath)) FileUtils.deleteDirectory(new File(dir)) } } def downloadModel(dir: String) = { import sys.process._ synchronized { FileUtils.forceMkdir(new File(dir)) val url = "https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip" val wget = s"wget $url" wget ! val unzip = s"unzip inception5h.zip -d $dir" unzip ! } } //TODO: make this test case to run smoothly ignore("test get bytes for image url") { val downloadPath = "inception5h.zip" val modelPath = "inception" try { if (runDownloadModel) downloadModel(modelPath) val serviceName = "s2graph" val columnName = "user" val labelName = "image_net" val options = s""" |{ | "fetcher": { | "className": "org.apache.s2graph.core.fetcher.tensorflow.InceptionFetcher", | "modelPath": "$modelPath" | } |} """.stripMargin val (service, column, label) = initEdgeFetcher(serviceName, columnName, labelName, Option(options)) val srcVertices = Seq( "http://www.gstatic.com/webp/gallery/1.jpg", "http://www.gstatic.com/webp/gallery/2.jpg", "http://www.gstatic.com/webp/gallery/3.jpg" ) val stepResult = queryEdgeFetcher(service, column, label, srcVertices) stepResult.edgeWithScores.groupBy(_.edge.srcVertex).foreach { case (srcVertex, ls) => val url = srcVertex.innerIdVal.toString val scores = ls.map { es => val edge = es.edge val label = edge.tgtVertex.innerIdVal.toString val score = edge.property[Double]("score").value() Json.obj("label" -> label, "score" -> score) } val jsArr = Json.toJson(scores) val json = Json.obj("url" -> url, "scores" -> jsArr) println(Json.prettyPrint(json)) } } finally { if (runCleanup) cleanup(downloadPath, modelPath) } } }
Example 61
Source File: PailDataSourceSpec.scala From utils with Apache License 2.0 | 5 votes |
package com.indix.utils.spark.pail import java.util import com.backtype.hadoop.pail.{PailFormatFactory, PailSpec, PailStructure} import com.backtype.support.{Utils => PailUtils} import com.google.common.io.Files import org.apache.commons.io.FileUtils import org.apache.spark.sql.SparkSession import org.scalatest.{BeforeAndAfterAll, FlatSpec} import org.scalatest.Matchers._ import scala.collection.JavaConverters._ import scala.util.Random case class User(name: String, age: Int) class UserPailStructure extends PailStructure[User] { override def isValidTarget(dirs: String*): Boolean = true override def getType: Class[_] = classOf[User] override def serialize(user: User): Array[Byte] = PailUtils.serialize(user) override def getTarget(user: User): util.List[String] = List(user.age % 10).map(_.toString).asJava override def deserialize(serialized: Array[Byte]): User = PailUtils.deserialize(serialized).asInstanceOf[User] } class PailDataSourceSpec extends FlatSpec with BeforeAndAfterAll with PailDataSource { private var spark: SparkSession = _ override protected def beforeAll(): Unit = { super.beforeAll() spark = SparkSession.builder().master("local[2]").appName("PailDataSource").getOrCreate() } val userPailSpec = new PailSpec(PailFormatFactory.SEQUENCE_FILE, new UserPailStructure) "PailBasedReaderWriter" should "read/write user records from/into pail" in { val output = Files.createTempDir() val users = (1 to 100).map { index => User(s"foo$index", Random.nextInt(40))} spark.sparkContext.parallelize(users) .saveAsPail(output.getAbsolutePath, userPailSpec) val input = output.getAbsolutePath val total = spark.sparkContext.pailFile[User](input) .map(u => u.name) .count() total should be(100) FileUtils.deleteDirectory(output) } }
Example 62
Source File: ParquetAvroDataSourceSpec.scala From utils with Apache License 2.0 | 5 votes |
package com.indix.utils.spark.parquet import java.io.File import com.google.common.io.Files import com.indix.utils.spark.parquet.avro.ParquetAvroDataSource import org.apache.commons.io.FileUtils import org.apache.parquet.hadoop.metadata.CompressionCodecName import org.apache.spark.sql.SparkSession import org.scalactic.Equality import org.scalatest.Matchers.{be, convertToAnyShouldWrapper, equal} import org.scalatest.{BeforeAndAfterAll, FlatSpec} import java.util.{Arrays => JArrays} case class SampleAvroRecord(a: Int, b: String, c: Seq[String], d: Boolean, e: Double, f: collection.Map[String, String], g: Array[Byte]) class ParquetAvroDataSourceSpec extends FlatSpec with BeforeAndAfterAll with ParquetAvroDataSource { private var spark: SparkSession = _ implicit val sampleAvroRecordEq = new Equality[SampleAvroRecord] { override def areEqual(left: SampleAvroRecord, b: Any): Boolean = b match { case right: SampleAvroRecord => left.a == right.a && left.b == right.b && Equality.default[Seq[String]].areEqual(left.c, right.c) && left.d == right.d && left.e == right.e && Equality.default[collection.Map[String, String]].areEqual(left.f, right.f) && JArrays.equals(left.g, right.g) case _ => false } } override protected def beforeAll(): Unit = { super.beforeAll() spark = SparkSession.builder().master("local[2]").appName("ParquetAvroDataSource").getOrCreate() } override protected def afterAll(): Unit = { try { spark.sparkContext.stop() } finally { super.afterAll() } } "AvroBasedParquetDataSource" should "read/write avro records as ParquetData" in { val outputLocation = Files.createTempDir().getAbsolutePath + "/output" val sampleRecords: Seq[SampleAvroRecord] = Seq( SampleAvroRecord(1, "1", List("a1"), true, 1.0d, Map("a1" -> "b1"), "1".getBytes), SampleAvroRecord(2, "2", List("a2"), false, 2.0d, Map("a2" -> "b2"), "2".getBytes), SampleAvroRecord(3, "3", List("a3"), true, 3.0d, Map("a3" -> "b3"), "3".getBytes), SampleAvroRecord(4, "4", List("a4"), true, 4.0d, Map("a4" -> "b4"), "4".getBytes), SampleAvroRecord(5, "5", List("a5"), false, 5.0d, Map("a5" -> "b5"), "5".getBytes) ) val sampleDf = spark.createDataFrame(sampleRecords) sampleDf.rdd.saveAvroInParquet(outputLocation, sampleDf.schema, CompressionCodecName.GZIP) val sparkVal = spark import sparkVal.implicits._ val records: Array[SampleAvroRecord] = spark.read.parquet(outputLocation).as[SampleAvroRecord].collect() records.length should be(5) // We use === to use the custom Equality defined above for comparing Array[Byte] // Ref - https://github.com/scalatest/scalatest/issues/491 records.sortBy(_.a) === sampleRecords.sortBy(_.a) FileUtils.deleteDirectory(new File(outputLocation)) } }
Example 63
Source File: RocksMapTest.scala From utils with Apache License 2.0 | 5 votes |
package com.indix.utils.store import java.io.Serializable import java.nio.file.{Paths, Files} import org.apache.commons.io.FileUtils import org.scalatest.{Matchers, FlatSpec} case class TestObject(a: Int, b: String, c: Array[Int], d: Array[String]) extends Serializable { def equals(other: TestObject): Boolean = { this.a.equals(other.a) && this.b.equals(other.b) && this.c.sameElements(other.c) && this.d.sameElements(other.d) } } case class ComplexTestObject(a: Int, b: TestObject) extends Serializable { def equals(other: ComplexTestObject): Boolean = { this.a.equals(other.a) && this.b.equals(other.b) } } class RocksMapTest extends FlatSpec with Matchers { "RocksMap" should "serialize and deserialize the keys and values" in { val db = new RocksMap("test") val a: Int = 1 val b: String = "hello" val c: Array[Int] = Array(1, 2, 3) val d: Array[String] = Array("a", "b", "c") val serialized_a = db.serialize(a) val serialized_b = db.serialize(b) val serialized_c = db.serialize(c) val serialized_d = db.serialize(d) val serialized_TestObject = db.serialize(TestObject(a, b, c, d)) val serialized_ComplexObject = db.serialize(ComplexTestObject(a, TestObject(a, b, c, d))) db.deserialize[Int](serialized_a) should be(a) db.deserialize[String](serialized_b) should be(b) db.deserialize[Array[Int]](serialized_c) should be(c) db.deserialize[Array[String]](serialized_d) should be(d) db.deserialize[TestObject](serialized_TestObject).equals(TestObject(a, b, c, d)) should be(true) db.deserialize[ComplexTestObject](serialized_ComplexObject).equals(ComplexTestObject(a, TestObject(a, b, c, d))) should be(true) db.drop() db.close() } it should "put and get values" in { val db = new RocksMap("test") db.put(1, 1.0) db.get[Int, Double](1).getOrElse(0) should be(1.0) db.clear() db.drop() db.close() } it should "remove values" in { val db = new RocksMap("test") db.put(1, 1L) db.get[Int, Long](1).getOrElse(0) should be(1L) db.remove(1) db.get[Int, Long](1) should be(None) db.drop() db.close() } it should "clear all the values" in { val db = new RocksMap(name = "test") db.put(1, "hello") db.put(2, "yello") db.get(1) should not be (None) db.get(2) should not be (None) db.clear() db.get(1) should be(None) db.get(2) should be(None) db.drop() db.close() } it should "clear the data files when drop is called" in { val db = new RocksMap(name = "test") Files.exists(Paths.get(db.pathString)) should be (true) db.drop() Files.exists(Paths.get(db.pathString)) should be (false) db.close() } }
Example 64
Source File: TestSolrStreamWriter.scala From spark-solr with Apache License 2.0 | 5 votes |
package com.lucidworks.spark import java.io.File import java.util.UUID import com.lucidworks.spark.util.{ConfigurationConstants, SolrCloudUtil, SolrQuerySupport, SolrSupport} import org.apache.commons.io.FileUtils import org.apache.spark.solr.SparkInternalObjects class TestSolrStreamWriter extends TestSuiteBuilder { test("Stream data into Solr") { val collectionName = "testStreaming-" + UUID.randomUUID().toString SolrCloudUtil.buildCollection(zkHost, collectionName, null, 1, cloudClient, sc) sparkSession.conf.set("spark.sql.streaming.schemaInference", "true") sparkSession.sparkContext.setLogLevel("DEBUG") val offsetsDir = FileUtils.getTempDirectory + "/spark-stream-offsets-" + UUID.randomUUID().toString try { val datasetPath = "src/test/resources/test-data/oneusagov" val streamingJsonDF = sparkSession.readStream.json(datasetPath) val accName = "acc-" + UUID.randomUUID().toString assert(streamingJsonDF.isStreaming) val writeOptions = Map( "collection" -> collectionName, "zkhost" -> zkHost, "checkpointLocation" -> offsetsDir, ConfigurationConstants.GENERATE_UNIQUE_KEY -> "true", ConfigurationConstants.ACCUMULATOR_NAME -> accName) val streamingQuery = streamingJsonDF .drop("_id") .writeStream .outputMode("append") .format("solr") .options(writeOptions) .start() try { logger.info(s"Explain ${streamingQuery.explain()}") streamingQuery.processAllAvailable() logger.info(s"Status ${streamingQuery.status}") SolrSupport.getCachedCloudClient(zkHost).commit(collectionName) assert(SolrQuerySupport.getNumDocsFromSolr(collectionName, zkHost, None) === 13) val acc = SparkInternalObjects.getAccumulatorById(SparkSolrAccumulatorContext.getId(accName).get) assert(acc.isDefined) assert(acc.get.value == 13) } finally { streamingQuery.stop() } } finally { SolrCloudUtil.deleteCollection(collectionName, cluster) FileUtils.deleteDirectory(new File(offsetsDir)) } } }
Example 65
Source File: ZookeeperLocalServer.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package it.teamdigitale.miniclusters import java.io.File import java.net.InetSocketAddress import org.apache.commons.io.FileUtils import org.apache.zookeeper.server.{ServerCnxnFactory, ZooKeeperServer} class ZookeeperLocalServer(port: Int) { var zkServer: Option[ServerCnxnFactory] = None def start(): Unit = { if (zkServer.isEmpty) { val dataDirectory = System.getProperty("java.io.tmpdir") val dir = new File(dataDirectory, "zookeeper") println(dir.toString) if (dir.exists()) FileUtils.deleteDirectory(dir) try { val tickTime = 5000 val server = new ZooKeeperServer(dir.getAbsoluteFile, dir.getAbsoluteFile, tickTime) val factory = ServerCnxnFactory.createFactory factory.configure(new InetSocketAddress("0.0.0.0", port), 1024) factory.startup(server) println("ZOOKEEPER server up!!") zkServer = Some(factory) } catch { case ex: Exception => System.err.println(s"Error in zookeeper server: ${ex.printStackTrace()}") } finally { dir.deleteOnExit() } } else println("ZOOKEEPER is already up") } def stop() = { if (zkServer.isDefined) { zkServer.get.shutdown() } println("ZOOKEEPER server stopped") } }
Example 66
Source File: CodeGeneratorEngineHook.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.execute.hook import java.io.File import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.engine.execute.{EngineExecutor, EngineHook} import com.webank.wedatasphere.linkis.scheduler.executer.{ExecuteRequest, RunTypeExecuteRequest} import com.webank.wedatasphere.linkis.server.JMap import org.apache.commons.io.FileUtils import org.apache.commons.lang.StringUtils import scala.collection.mutable.ArrayBuffer @Deprecated //changed to UdfLoadEngineHook abstract class CodeGeneratorEngineHook extends EngineHook with Logging{ self => val udfPathProp = "udf.paths" protected var creator: String = _ protected var user: String = _ protected var initSpecialCode: String = _ protected val runType: String protected def acceptCodeType(line: String): Boolean protected def generateCode(): Array[String] = { val codeBuffer = new ArrayBuffer[String] val statementBuffer = new ArrayBuffer[String] var accept = true initSpecialCode.split("\n").foreach{ case "" => case l if l.startsWith("%") => if(acceptCodeType(l)){ accept = true codeBuffer.append(statementBuffer.mkString("\n")) statementBuffer.clear() }else{ accept = false } case l if accept => statementBuffer.append(l) case _ => } if(statementBuffer.nonEmpty) codeBuffer.append(statementBuffer.mkString("\n")) codeBuffer.toArray } override def beforeCreateEngine(params: JMap[String, String]): JMap[String, String] = { creator = params.get("creator") user = params.get("user") initSpecialCode = StringUtils.split(params.get(udfPathProp), ",").map(readFile).mkString("\n") params } override def afterCreatedEngine(executor: EngineExecutor): Unit = { generateCode().foreach { case "" => case c: String => info("Submit udf registration to engine, code: " + c) executor.execute(new ExecuteRequest with RunTypeExecuteRequest{ override val code: String = c override val runType: String = self.runType }) info("executed code: " + c) } } protected def readFile(path: String): String = { info("read file: " + path) val file = new File(path) if(file.exists()){ FileUtils.readFileToString(file) } else { info("udf file: [" + path + "] doesn't exist, ignore it.") "" } } } @Deprecated class SqlCodeGeneratorEngineHook extends CodeGeneratorEngineHook{ override val runType = "sql" override protected def acceptCodeType(line: String): Boolean = { line.startsWith("%sql") } } @Deprecated class PythonCodeGeneratorEngineHook extends CodeGeneratorEngineHook{ override val runType = "python" override protected def acceptCodeType(line: String): Boolean = { line.startsWith("%python") } } @Deprecated class ScalaCodeGeneratorEngineHook extends CodeGeneratorEngineHook{ override val runType = "scala" override protected def acceptCodeType(line: String): Boolean = { line.startsWith("%scala") } }
Example 67
Source File: PythonCodeParserTest.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.execute import java.io.File import com.google.common.io.Resources import org.apache.commons.io.FileUtils object PythonCodeParserTest { def main(args: Array[String]): Unit = { val parser = new PythonCodeParser var code = FileUtils.readFileToString(new File(Resources.getResource("stack.py").getPath)) parser.parse(code, null).foreach { statement => println("---------------------------statement begin-----------------") println(statement) println("---------------------------statement end-----------------") } } }
Example 68
Source File: SQLCodeParserTest.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.execute import java.io.File import com.google.common.io.Resources import org.apache.commons.io.FileUtils object SQLCodeParserTest { def main(args: Array[String]): Unit = { val parser = new SQLCodeParser var code = FileUtils.readFileToString(new File(Resources.getResource("very_complex.sql").getPath)) parser.parse(code, null).foreach { statement => println("---------------------------statement begin-----------------") println(statement) println("---------------------------statement end-----------------") } } }
Example 69
Source File: CodeGeneratorEngineHookTest.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.execute.hook import java.io.File import java.util import com.google.common.io.Resources import com.webank.wedatasphere.linkis.engine.execute.{EngineExecutor, EngineExecutorContext} import com.webank.wedatasphere.linkis.protocol.engine.RequestEngine import com.webank.wedatasphere.linkis.resourcemanager.Resource import com.webank.wedatasphere.linkis.scheduler.executer.{ExecuteRequest, ExecuteResponse} import org.apache.commons.io.FileUtils object CodeGeneratorEngineHookTest { def main(args: Array[String]): Unit = { val requestEngine = new TestRequestEngine requestEngine.properties.put(RequestEngine.ENGINE_INIT_SPECIAL_CODE, FileUtils.readFileToString(new File(Resources.getResource("engine_special_code").getPath))) val engineExecutor = new TestEngineExecutor(1, true) var engineHook: CodeGeneratorEngineHook = new SqlCodeGeneratorEngineHook engineHook.beforeCreateEngine(new util.HashMap(requestEngine.properties)) engineHook.afterCreatedEngine(engineExecutor) engineHook = new PythonCodeGeneratorEngineHook engineHook.beforeCreateEngine(new util.HashMap(requestEngine.properties)) engineHook.afterCreatedEngine(engineExecutor) engineHook = new ScalaCodeGeneratorEngineHook engineHook.beforeCreateEngine(new util.HashMap(requestEngine.properties)) engineHook.afterCreatedEngine(engineExecutor) } } class TestRequestEngine extends RequestEngine { override val user: String = "" override val properties: util.Map[String, String] = new util.HashMap[String, String](){ } override val creator: String = "" } class TestEngineExecutor(outputPrintLimit: Int, isSupportParallelism: Boolean) extends EngineExecutor(outputPrintLimit, isSupportParallelism){ override def execute(executeRequest: ExecuteRequest): ExecuteResponse = { null } override def getName: String = "" override def getActualUsedResources: Resource = null override protected def executeLine(engineExecutorContext: EngineExecutorContext, code: String): ExecuteResponse = null override protected def executeCompletely(engineExecutorContext: EngineExecutorContext, code: String, completedLine: String): ExecuteResponse = null override def close(): Unit = null }
Example 70
Source File: ScalaCodeParserTest.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.execute import java.io.File import com.google.common.io.Resources import org.apache.commons.io.FileUtils object ScalaCodeParserTest { def main(args: Array[String]): Unit = { val parser = new ScalaCodeParser var code = FileUtils.readFileToString(new File(Resources.getResource("test.scala.txt").getPath)) parser.parse(code, null).foreach { statement => println("---------------------------statement begin-----------------") println(statement) println("---------------------------statement end-----------------") } } }
Example 71
Source File: JarLoaderEngineHook.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.enginemanager.hook import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.enginemanager.{Engine, EngineHook} import com.webank.wedatasphere.linkis.enginemanager.conf.EngineManagerConfiguration.ENGINE_UDF_APP_NAME import com.webank.wedatasphere.linkis.protocol.engine.RequestEngine import com.webank.wedatasphere.linkis.rpc.Sender import com.webank.wedatasphere.linkis.udf.api.rpc.{RequestUdfTree, ResponseUdfTree} import com.webank.wedatasphere.linkis.udf.entity.{UDFInfo, UDFTree} import org.apache.commons.collections.CollectionUtils import org.apache.commons.io.FileUtils import org.apache.commons.lang.StringUtils import org.codehaus.jackson.map.ObjectMapper import scala.collection.JavaConversions._ import scala.collection.mutable class JarLoaderEngineHook extends EngineHook with Logging{ override def beforeCreateSession(requestEngine: RequestEngine): RequestEngine = { info("start loading UDFs") val udfInfos = extractUdfInfos(requestEngine).filter{info => info.getUdfType == 0 && info.getExpire == false && StringUtils.isNotBlank(info.getPath) && isJarExists(info) && info.getLoad == true } // add to class path val jars = new mutable.HashSet[String]() udfInfos.foreach{udfInfo => jars.add("file://" + udfInfo.getPath)} val jarPaths = jars.mkString(",") if(StringUtils.isBlank(requestEngine.properties.get("jars"))){ requestEngine.properties.put("jars", jarPaths) } else { requestEngine.properties.put("jars", requestEngine.properties.get("jars") + "," + jarPaths) } info("added jars: " + jarPaths) //jars.foreach(fetchRemoteFile) //info("copied jars.") info("end loading UDFs") requestEngine } override def afterCreatedSession(engine: Engine, requestEngine: RequestEngine): Unit = { } protected def isJarExists(udfInfo: UDFInfo) : Boolean = { true // if(FileUtils.getFile(udfInfo.getPath).exists()){ // true // } else { // info(s"The jar file [${udfInfo.getPath}] of UDF [${udfInfo.getUdfName}] doesn't exist, ignore it.") // false // } } protected def extractUdfInfos(requestEngine: RequestEngine): mutable.ArrayBuffer[UDFInfo] = { val udfInfoBuilder = new mutable.ArrayBuffer[UDFInfo] val userName = requestEngine.user val udfTree = queryUdfRpc(userName) extractUdfInfos(udfInfoBuilder, udfTree, userName) udfInfoBuilder } protected def extractUdfInfos(udfInfoBuilder: mutable.ArrayBuffer[UDFInfo], udfTree: UDFTree, userName: String) : Unit = { if(CollectionUtils.isNotEmpty(udfTree.getUdfInfos)){ for(udfInfo <- udfTree.getUdfInfos){ udfInfoBuilder.append(udfInfo) } } if(CollectionUtils.isNotEmpty(udfTree.getChildrens)){ for(child <- udfTree.getChildrens){ var childInfo = child if(TreeType.specialTypes.contains(child.getUserName)){ childInfo = queryUdfRpc(userName, child.getId, child.getUserName) } else { childInfo = queryUdfRpc(userName, child.getId, TreeType.SELF) } extractUdfInfos(udfInfoBuilder, childInfo, userName) } } } private def queryUdfRpc(userName: String, treeId: Long = -1, treeType: String = "self"): UDFTree = { val udfTree = Sender.getSender(ENGINE_UDF_APP_NAME.getValue) .ask(RequestUdfTree(userName, treeType, treeId, "udf")) .asInstanceOf[ResponseUdfTree] .udfTree //info("got udf tree:" + new ObjectMapper().writer().withDefaultPrettyPrinter().writeValueAsString(udfTree)) udfTree } }
Example 72
Source File: TokenAuthentication.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.gateway.security.token import java.io.File import java.util.Properties import java.util.concurrent.TimeUnit import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils} import com.webank.wedatasphere.linkis.gateway.config.GatewayConfiguration._ import com.webank.wedatasphere.linkis.gateway.http.GatewayContext import com.webank.wedatasphere.linkis.gateway.security.{GatewaySSOUtils, SecurityFilter} import com.webank.wedatasphere.linkis.server.Message import org.apache.commons.io.{FileUtils, IOUtils} import org.apache.commons.lang.StringUtils object TokenAuthentication extends Logging { private val (props, file) = if(ENABLE_TOKEN_AUTHENTICATION.getValue) (new Properties, new File(this.getClass.getClassLoader.getResource(TOKEN_AUTHENTICATION_CONFIG.getValue).toURI.getPath)) else (null, null) private var lastModified = 0l if(ENABLE_TOKEN_AUTHENTICATION.getValue) { Utils.defaultScheduler.scheduleAtFixedRate(new Runnable { override def run(): Unit = Utils.tryAndError(init()) }, TOKEN_AUTHENTICATION_SCAN_INTERVAL.getValue, TOKEN_AUTHENTICATION_SCAN_INTERVAL.getValue, TimeUnit.MILLISECONDS) init() } private def init(): Unit = if(file.lastModified() > lastModified) { lastModified = file.lastModified() info(s"loading token authentication file $file.") val newProps = new Properties val input = FileUtils.openInputStream(file) Utils.tryFinally(newProps.load(input))(IOUtils.closeQuietly(input)) props.putAll(newProps) } private def validateTokenUser(token: String, tokenUser: String): Boolean = { val tokenUsers = props.getProperty(token) if(tokenUsers == "*" || (StringUtils.isNotBlank(tokenUsers) && tokenUsers.contains(tokenUser))) true else false } def isTokenRequest(gatewayContext: GatewayContext) : Boolean = { (gatewayContext.getRequest.getHeaders.containsKey(TOKEN_KEY) && gatewayContext.getRequest.getHeaders.containsKey(TOKEN_USER_KEY)) || ( gatewayContext.getRequest.getCookies.containsKey(TOKEN_KEY) && gatewayContext.getRequest.getCookies.containsKey(TOKEN_USER_KEY)) } def tokenAuth(gatewayContext: GatewayContext): Boolean = { if(!ENABLE_TOKEN_AUTHENTICATION.getValue) { val message = Message.noLogin(s"Gateway未启用token认证,请采用其他认证方式!") << gatewayContext.getRequest.getRequestURI SecurityFilter.filterResponse(gatewayContext, message) return false } var token = gatewayContext.getRequest.getHeaders.get(TOKEN_KEY)(0) var tokenUser = gatewayContext.getRequest.getHeaders.get(TOKEN_USER_KEY)(0) if(StringUtils.isBlank(token) || StringUtils.isBlank(tokenUser)) { token = gatewayContext.getRequest.getCookies.get(TOKEN_KEY)(0).getValue tokenUser = gatewayContext.getRequest.getCookies.get(TOKEN_USER_KEY)(0).getValue if(StringUtils.isBlank(token) || StringUtils.isBlank(tokenUser)) { val message = Message.noLogin(s"请在Header或Cookie中同时指定$TOKEN_KEY 和 $TOKEN_USER_KEY,以便完成token认证!") << gatewayContext.getRequest.getRequestURI SecurityFilter.filterResponse(gatewayContext, message) return false } } if(validateTokenUser(token, tokenUser)){ info(s"Token authentication succeed, uri: ${gatewayContext.getRequest.getRequestURI}, token: $token, tokenUser: $tokenUser.") GatewaySSOUtils.setLoginUser(gatewayContext.getRequest, tokenUser) true } else { val message = Message.noLogin(s"未授权的token$token,无法将请求绑定给tokenUser$tokenUser!") << gatewayContext.getRequest.getRequestURI SecurityFilter.filterResponse(gatewayContext, message) false } } }
Example 73
Source File: RefreshUtils.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.common.utils import java.io.File import java.util.concurrent.TimeUnit import com.webank.wedatasphere.linkis.common.conf.Configuration import org.apache.commons.io.FileUtils object RefreshUtils { def registerFileRefresh(period: Long, file: String, deal: java.util.List[String] => Unit): Unit = { Utils.defaultScheduler.scheduleAtFixedRate(new Runnable { val f = new File(file) var fileModifiedTime = if(f.exists()) f.lastModified() else 0 override def run(): Unit = { if(!f.exists()) return if(f.lastModified() > fileModifiedTime) { deal(FileUtils.readLines(f, Configuration.BDP_ENCODING.getValue)) fileModifiedTime = f.lastModified() } } }, period, period, TimeUnit.MILLISECONDS) } } abstract class Deal { def deal(line: String): Unit }
Example 74
Source File: TestUtil.scala From pulse with Apache License 2.0 | 5 votes |
package io.phdata.pulse.solr import java.io.File import java.nio.file.Paths import java.util.UUID import org.apache.commons.io.FileUtils import org.apache.solr.client.solrj.embedded.JettyConfig import org.apache.solr.cloud.MiniSolrCloudCluster object TestUtil { def miniSolrCloudCluster(): MiniSolrCloudCluster = { // clean up the solr files so we don't try to read collections from old runs FileUtils.deleteDirectory(new File("target/solr7")) // Set up a MiniSolrCloudCluster val clusterHome = s"${System.getProperty("user.dir")}/target/solr7/solrHome/${UUID.randomUUID()}" val jettyConfig = JettyConfig.builder().setContext("/solr").setPort(8983).stopAtShutdown(true).build() new MiniSolrCloudCluster(1, null, Paths.get(clusterHome), MiniSolrCloudCluster.DEFAULT_CLOUD_SOLR_XML, null, null) } def randomIdentifier() = UUID.randomUUID().toString.substring(0, 5) }
Example 75
Source File: TestUtil.scala From pulse with Apache License 2.0 | 5 votes |
package io.phdata.pulse.solr import java.io.File import java.nio.file.Paths import java.util.UUID import org.apache.commons.io.FileUtils import org.apache.solr.client.solrj.embedded.JettyConfig import org.apache.solr.cloud.MiniSolrCloudCluster object TestUtil { def miniSolrCloudCluster(): MiniSolrCloudCluster = { val DEFAULT_SOLR_CLOUD_XML = """<solr> | | <str name="shareSchema">${shareSchema:false}</str> | <str name="configSetBaseDir">${configSetBaseDir:configsets}</str> | <str name="coreRootDirectory">${coreRootDirectory:target/solr4/cores}</str> | | <shardHandlerFactory name="shardHandlerFactory" class="HttpShardHandlerFactory"> | <str name="urlScheme">${urlScheme:}</str> | <int name="socketTimeout">${socketTimeout:90000}</int> | <int name="connTimeout">${connTimeout:15000}</int> | </shardHandlerFactory> | | <solrcloud> | <str name="host">127.0.0.1</str> | <int name="hostPort">${hostPort:8983}</int> | <str name="hostContext">${hostContext:solr}</str> | <int name="zkClientTimeout">${solr.zkclienttimeout:30000}</int> | <bool name="genericCoreNodeNames">${genericCoreNodeNames:true}</bool> | <int name="leaderVoteWait">10000</int> | <int name="distribUpdateConnTimeout">${distribUpdateConnTimeout:45000}</int> | <int name="distribUpdateSoTimeout">${distribUpdateSoTimeout:340000}</int> | </solrcloud> | |</solr>""".stripMargin System.setProperty("solr.directoryFactory", "solr.RAMDirectoryFactory") // clean up the solr files so we don't try to read collections from old runs FileUtils.deleteDirectory(new File("target/solr4")) // Set up a MiniSolrCloudCluster val clusterHome = s"${System.getProperty("user.dir")}/target/solr4/solrHome/${UUID.randomUUID()}" val jettyConfig = JettyConfig.builder().setContext("/solr").setPort(8983).stopAtShutdown(true).build() new MiniSolrCloudCluster(1, Paths.get(clusterHome), DEFAULT_SOLR_CLOUD_XML, jettyConfig) } def randomIdentifier() = UUID.randomUUID().toString.substring(0, 5) }
Example 76
Source File: S3PointCloudInputFormat.scala From geotrellis-pointcloud with Apache License 2.0 | 5 votes |
package geotrellis.pointcloud.spark.store.s3 import geotrellis.spark.store.s3._ import geotrellis.pointcloud.spark.store.hadoop.formats._ import geotrellis.pointcloud.util.Filesystem import io.pdal._ import io.circe.Json import io.circe.syntax._ import cats.syntax.either._ import org.apache.hadoop.mapreduce.{InputSplit, TaskAttemptContext} import org.apache.commons.io.FileUtils import java.io.{File, InputStream} import java.net.URI import scala.collection.JavaConverters._ mode match { case "s3" => new S3URIRecordReader[S3PointCloudHeader, List[PointCloud]](s3Client) { def read(key: String, uri: URI): (S3PointCloudHeader, List[PointCloud]) = { val s3Pipeline = pipeline .hcursor .downField("pipeline").downArray .downField("filename").withFocus(_ => uri.toString.asJson) .top.fold(pipeline)(identity) executePipeline(context)(key, s3Pipeline) } } case _ => val tmpDir = { val dir = PointCloudInputFormat.getTmpDir(context) if (dir == null) Filesystem.createDirectory() else Filesystem.createDirectory(dir) } new S3StreamRecordReader[S3PointCloudHeader, List[PointCloud]](s3Client) { def read(key: String, is: InputStream): (S3PointCloudHeader, List[PointCloud]) = { // copy remote file into local tmp dir tmpDir.mkdirs() // to be sure that dirs created val localPath = new File(tmpDir, key.replace("/", "_")) FileUtils.copyInputStreamToFile(is, localPath) is.close() // use local filename path if it's present in json val localPipeline = pipeline .hcursor .downField("pipeline").downArray .downField("filename").withFocus(_ => localPath.getAbsolutePath.asJson) .top.fold(pipeline)(identity) try executePipeline(context)(key, localPipeline) finally { localPath.delete() tmpDir.delete() } } } } } }
Example 77
Source File: KafkaServer.scala From akka_streams_tutorial with MIT License | 5 votes |
package alpakka.env import java.io.File import java.net.InetSocketAddress import java.nio.file.{Files, Paths} import java.util.Properties import kafka.server.{KafkaConfig, KafkaServerStartable} import org.apache.commons.io.FileUtils import org.apache.zookeeper.server.quorum.QuorumPeerConfig import org.apache.zookeeper.server.{ServerConfig, ZooKeeperServerMain} object KafkaServer extends App { val zookeeperPort = 2181 val kafkaLogs = "/tmp/kafka-logs" val kafkaLogsPath = Paths.get(kafkaLogs) // See: https://stackoverflow.com/questions/59592518/kafka-broker-doesnt-find-cluster-id-and-creates-new-one-after-docker-restart/60864763#comment108382967_60864763 def fix25Behaviour() = { val fileWithConflictingContent = kafkaLogsPath.resolve("meta.properties").toFile if (fileWithConflictingContent.exists()) FileUtils.forceDelete(fileWithConflictingContent) } def removeKafkaLogs(): Unit = { if (kafkaLogsPath.toFile.exists()) FileUtils.forceDelete(kafkaLogsPath.toFile) } // Keeps the persistent data fix25Behaviour() // If everything fails //removeKafkaLogs() val quorumConfiguration = new QuorumPeerConfig { // Since we do not run a cluster, we are not interested in zookeeper data override def getDataDir: File = Files.createTempDirectory("zookeeper").toFile override def getDataLogDir: File = Files.createTempDirectory("zookeeper-logs").toFile override def getClientPortAddress: InetSocketAddress = new InetSocketAddress(zookeeperPort) } class StoppableZooKeeperServerMain extends ZooKeeperServerMain { def stop(): Unit = shutdown() } val zooKeeperServer = new StoppableZooKeeperServerMain() val zooKeeperConfig = new ServerConfig() zooKeeperConfig.readFrom(quorumConfiguration) val zooKeeperThread = new Thread { override def run(): Unit = zooKeeperServer.runFromConfig(zooKeeperConfig) } zooKeeperThread.start() val kafkaProperties = new Properties() kafkaProperties.put("zookeeper.connect", s"localhost:$zookeeperPort") kafkaProperties.put("broker.id", "0") kafkaProperties.put("offsets.topic.replication.factor", "1") kafkaProperties.put("log.dirs", kafkaLogs) kafkaProperties.put("delete.topic.enable", "true") kafkaProperties.put("group.initial.rebalance.delay.ms", "0") kafkaProperties.put("transaction.state.log.min.isr", "1") kafkaProperties.put("transaction.state.log.replication.factor", "1") kafkaProperties.put("zookeeper.connection.timeout.ms", "6000") kafkaProperties.put("num.partitions", "10") val kafkaConfig = KafkaConfig.fromProps(kafkaProperties) val kafka = new KafkaServerStartable(kafkaConfig) println("About to start...") kafka.startup() scala.sys.addShutdownHook{ println("About to shutdown...") kafka.shutdown() kafka.awaitShutdown() zooKeeperServer.stop() } zooKeeperThread.join() }
Example 78
Source File: TestSetup.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import java.io.File import java.nio.file.Paths import org.apache.commons.io.FileUtils import org.scalatest.Tag object TestSetup { private var runSetup = true val configTest = getClass.getResource("/test-fey-configuration.conf") def setup(): Unit = { if(runSetup){ println("SETTING UP ...") createFeyTmpDirectoriesForTest() copyTestActorToTmp() copyJSONstoTmp() runSetup = false } } private def copyTestActorToTmp(): Unit = { copyResourceFileToLocal("/fey-test-actor.jar",s"${CONFIG.JAR_REPOSITORY}/fey-test-actor.jar") } private def copyJSONstoTmp(): Unit = { copyResourceFileToLocal("/json/valid-json.json",s"${CONFIG.JSON_REPOSITORY}/valid-json.json.not") copyResourceFileToLocal("/json/invalid-json.json",s"${CONFIG.JSON_REPOSITORY}/invalid-json.json.not") } private def copyResourceFileToLocal(resourcePath: String, destination: String): Unit = { val resourceFile = getClass.getResource(resourcePath) val dest = new File(destination) FileUtils.copyURLToFile(resourceFile, dest) } private def createFeyTmpDirectoriesForTest(): Unit = { var file = new File(s"/tmp/fey/test/checkpoint") file.mkdirs() file = new File(s"/tmp/fey/test/json") file.mkdirs() file = new File(s"/tmp/fey/test/json/watchtest") file.mkdirs() file = new File(s"/tmp/fey/test/jars") file.mkdirs() file = new File(s"/tmp/fey/test/jars/dynamic") file.mkdirs() } } object SlowTest extends Tag("org.apache.iota.fey.SlowTest")
Example 79
Source File: Tryout.scala From spark-es with Apache License 2.0 | 5 votes |
import java.nio.file.Files import org.apache.commons.io.FileUtils import org.apache.spark.SparkContext import org.elasticsearch.common.settings.Settings import org.elasticsearch.node.NodeBuilder import org.apache.spark.elasticsearch._ object Tryout { def main(args: Array[String]): Unit = { val sparkContext = new SparkContext("local[2]", "SparkES") val dataDir = Files.createTempDirectory("elasticsearch").toFile dataDir.deleteOnExit() val settings = Settings.settingsBuilder() .put("path.home", dataDir.getAbsolutePath) .put("path.logs", s"${dataDir.getAbsolutePath}/logs") .put("path.data", s"${dataDir.getAbsolutePath}/data") .put("index.store.fs.memory.enabled", true) .put("index.number_of_shards", 1) .put("index.number_of_replicas", 0) .put("cluster.name", "SparkES") .build() val node = NodeBuilder.nodeBuilder().settings(settings).node() val client = node.client() sparkContext .parallelize(Seq( ESDocument(ESMetadata("2", "type1", "index1"), """{"name": "John Smith"}"""), ESDocument(ESMetadata("1", "type1", "index1"), """{"name": "Sergey Shumov"}""") ), 2) .saveToES(Seq("localhost"), "SparkES") client.admin().cluster().prepareHealth("index1").setWaitForGreenStatus().get() val documents = sparkContext.esRDD( Seq("localhost"), "SparkES", Seq("index1"), Seq("type1"), "name:sergey") println(documents.count()) documents.foreach(println) sparkContext.stop() client.close() node.close() FileUtils.deleteQuietly(dataDir) } }
Example 80
Source File: LocalElasticSearch.scala From spark-es with Apache License 2.0 | 5 votes |
package org.apache.spark.elasticsearch import java.nio.file.Files import java.util.UUID import org.apache.commons.io.FileUtils import org.elasticsearch.common.settings.Settings import org.elasticsearch.node.{NodeBuilder, Node} class LocalElasticSearch(val clusterName: String = UUID.randomUUID().toString) { lazy val node = buildNode() lazy val client = node.client() val dataDir = Files.createTempDirectory("elasticsearch").toFile private var started = false def buildNode(): Node = { val settings = Settings.settingsBuilder() .put("path.home", dataDir.getAbsolutePath) .put("path.logs", s"${dataDir.getAbsolutePath}/logs") .put("path.data", s"${dataDir.getAbsolutePath}/data") .put("index.store.fs.memory.enabled", true) .put("index.number_of_shards", 1) .put("index.number_of_replicas", 0) .put("cluster.name", clusterName) .build() val instance = NodeBuilder.nodeBuilder().settings(settings).node() started = true instance } def close(): Unit = { if (started) { client.close() node.close() } try { FileUtils.forceDelete(dataDir) } catch { case e: Exception => } } }
Example 81
Source File: MultiNodeSupportCassandra.scala From eventuate with Apache License 2.0 | 5 votes |
package com.rbmhtechnology.eventuate import java.io.File import akka.actor.Props import akka.remote.testconductor.RoleName import akka.remote.testkit.MultiNodeSpec import com.rbmhtechnology.eventuate.log.cassandra._ import org.apache.commons.io.FileUtils import org.scalatest.BeforeAndAfterAll trait MultiNodeSupportCassandra extends BeforeAndAfterAll { this: MultiNodeSpec with MultiNodeWordSpec => val coordinator = RoleName("nodeA") def cassandraDir: String = MultiNodeEmbeddedCassandra.DefaultCassandraDir def logProps(logId: String): Props = CassandraEventLog.props(logId) override def atStartup(): Unit = { if (isNode(coordinator)) { MultiNodeEmbeddedCassandra.start(cassandraDir) Cassandra(system) } enterBarrier("startup") } override def afterAll(): Unit = { // get all config data before shutting down node val snapshotRootDir = new File(system.settings.config.getString("eventuate.snapshot.filesystem.dir")) // shut down node super.afterAll() // clean database and delete snapshot files if (isNode(coordinator)) { FileUtils.deleteDirectory(snapshotRootDir) MultiNodeEmbeddedCassandra.clean() } } }
Example 82
Source File: MultiNodeSupportLeveldb.scala From eventuate with Apache License 2.0 | 5 votes |
package com.rbmhtechnology.eventuate import java.io.File import akka.actor.Props import akka.remote.testconductor.RoleName import akka.remote.testkit.MultiNodeSpec import com.rbmhtechnology.eventuate.log.leveldb.LeveldbEventLog import org.apache.commons.io.FileUtils import org.scalatest.BeforeAndAfterAll trait MultiNodeSupportLeveldb extends BeforeAndAfterAll { this: MultiNodeSpec with MultiNodeWordSpec => val coordinator = RoleName("nodeA") def logProps(logId: String): Props = LeveldbEventLog.props(logId) override def afterAll(): Unit = { // get all config data before shutting down node val snapshotRootDir = new File(system.settings.config.getString("eventuate.snapshot.filesystem.dir")) val logRootDir = new File(system.settings.config.getString("eventuate.log.leveldb.dir")) // shut down node super.afterAll() // delete log and snapshot files if (isNode(coordinator)) { FileUtils.deleteDirectory(snapshotRootDir) FileUtils.deleteDirectory(logRootDir) } } }
Example 83
Source File: PersistOnEventWithRecoverySpecLeveldb.scala From eventuate with Apache License 2.0 | 5 votes |
package com.rbmhtechnology.eventuate import java.util.UUID import akka.actor.Actor import akka.actor.ActorRef import akka.actor.Props import akka.testkit.TestProbe import com.rbmhtechnology.eventuate.ReplicationIntegrationSpec.replicationConnection import com.rbmhtechnology.eventuate.utilities._ import org.apache.commons.io.FileUtils import org.scalatest.Matchers import org.scalatest.WordSpec import scala.concurrent.duration.DurationInt object PersistOnEventWithRecoverySpecLeveldb { class OnBEmitRandomActor(val eventLog: ActorRef, probe: TestProbe) extends EventsourcedActor with PersistOnEvent { override def id = getClass.getName override def onCommand = Actor.emptyBehavior override def onEvent = { case "A" => case "B" => persistOnEvent(UUID.randomUUID().toString) case uuid: String => probe.ref ! uuid } } def persistOnEventProbe(locationA1: Location, log: ActorRef) = { val probe = locationA1.probe locationA1.system.actorOf(Props(new OnBEmitRandomActor(log, probe))) probe } val noMsgTimeout = 100.millis } class PersistOnEventWithRecoverySpecLeveldb extends WordSpec with Matchers with MultiLocationSpecLeveldb { import RecoverySpecLeveldb._ import PersistOnEventWithRecoverySpecLeveldb._ override val logFactory: String => Props = id => SingleLocationSpecLeveldb.TestEventLog.props(id, batching = true) "An EventsourcedActor with PersistOnEvent" must { "not re-attempt persistence on successful write after reordering of events through disaster recovery" in { val locationB = location("B", customConfig = RecoverySpecLeveldb.config) def newLocationA = location("A", customConfig = RecoverySpecLeveldb.config) val locationA1 = newLocationA val endpointB = locationB.endpoint(Set("L1"), Set(replicationConnection(locationA1.port))) def newEndpointA(l: Location, activate: Boolean) = l.endpoint(Set("L1"), Set(replicationConnection(locationB.port)), activate = activate) val endpointA1 = newEndpointA(locationA1, activate = true) val targetA = endpointA1.target("L1") val logDirA = logDirectory(targetA) val targetB = endpointB.target("L1") val a1Probe = persistOnEventProbe(locationA1, targetA.log) write(targetA, List("A")) write(targetB, List("B")) val event = a1Probe.expectMsgClass(classOf[String]) assertConvergence(Set("A", "B", event), endpointA1, endpointB) locationA1.terminate().await FileUtils.deleteDirectory(logDirA) val locationA2 = newLocationA val endpointA2 = newEndpointA(locationA2, activate = false) endpointA2.recover().await val a2Probe = persistOnEventProbe(locationA2, endpointA2.logs("L1")) a2Probe.expectMsg(event) a2Probe.expectNoMsg(noMsgTimeout) assertConvergence(Set("A", "B", event), endpointA2, endpointB) } } }
Example 84
Source File: NeuralNetwork.scala From Scala-Machine-Learning-Projects with MIT License | 5 votes |
package Yelp.Trainer import org.deeplearning4j.nn.conf.MultiLayerConfiguration import org.deeplearning4j.nn.multilayer.MultiLayerNetwork import org.nd4j.linalg.factory.Nd4j import java.io.File import org.apache.commons.io.FileUtils import java.io.{DataInputStream, DataOutputStream, FileInputStream} import java.nio.file.{Files, Paths} object NeuralNetwork { def loadNN(NNconfig: String, NNparams: String) = { // get neural network config val confFromJson: MultiLayerConfiguration = MultiLayerConfiguration.fromJson(FileUtils.readFileToString(new File(NNconfig))) // get neural network parameters val dis: DataInputStream = new DataInputStream(new FileInputStream(NNparams)) val newParams = Nd4j.read(dis) // creating network object val savedNetwork: MultiLayerNetwork = new MultiLayerNetwork(confFromJson) savedNetwork.init() savedNetwork.setParameters(newParams) savedNetwork } def saveNN(model: MultiLayerNetwork, NNconfig: String, NNparams: String) = { // save neural network config FileUtils.write(new File(NNconfig), model.getLayerWiseConfigurations().toJson()) // save neural network parms val dos: DataOutputStream = new DataOutputStream(Files.newOutputStream(Paths.get(NNparams))) Nd4j.write(model.params(), dos) } }
Example 85
Source File: ExampleMahaService.scala From maha with Apache License 2.0 | 5 votes |
// Copyright 2017, Yahoo Holdings Inc. // Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms. package com.yahoo.maha.api.jersey.example import java.io.File import java.util.UUID import com.yahoo.maha.core.ddl.OracleDDLGenerator import com.yahoo.maha.jdbc.{JdbcConnection, List, Seq} import com.yahoo.maha.service.{DefaultMahaService, MahaService, MahaServiceConfig} import com.zaxxer.hikari.{HikariConfig, HikariDataSource} import grizzled.slf4j.Logging import org.apache.commons.io.FileUtils import org.joda.time.DateTime import org.joda.time.format.DateTimeFormat object ExampleMahaService extends Logging { val REGISTRY_NAME = "academic"; private var dataSource: Option[HikariDataSource] = None private var jdbcConnection: Option[JdbcConnection] = None val h2dbId = UUID.randomUUID().toString.replace("-","") val today: String = DateTimeFormat.forPattern("yyyy-MM-dd").print(DateTime.now()) val yesterday: String = DateTimeFormat.forPattern("yyyy-MM-dd").print(DateTime.now().minusDays(1)) def initJdbcToH2(): Unit = { val config = new HikariConfig() config.setJdbcUrl(s"jdbc:h2:mem:$h2dbId;MODE=Oracle;DB_CLOSE_DELAY=-1") config.setUsername("sa") config.setPassword("h2.test.database.password") config.setMaximumPoolSize(2) dataSource = Option(new HikariDataSource(config)) jdbcConnection = dataSource.map(new JdbcConnection(_)) assert(jdbcConnection.isDefined, "Failed to connect to h2 local server") } def getMahaService(scope: String = "main"): MahaService = { val jsonString = FileUtils.readFileToString(new File(s"src/$scope/resources/maha-service-config.json")) .replaceAll("h2dbId", s"$h2dbId") initJdbcToH2() val mahaServiceResult = MahaServiceConfig.fromJson(jsonString.getBytes("utf-8")) if (mahaServiceResult.isFailure) { mahaServiceResult.leftMap { res=> error(s"Failed to launch Example MahaService, MahaService Error list is: ${res.list.toList}") } } val mahaServiceConfig = mahaServiceResult.toOption.get val mahaService: MahaService = new DefaultMahaService(mahaServiceConfig) stageStudentData(mahaServiceConfig) mahaService } def stageStudentData(mahaServiceConfig: MahaServiceConfig) : Unit = { val ddlGenerator = new OracleDDLGenerator val erRegistryConfig = mahaServiceConfig.registry.get(ExampleMahaService.REGISTRY_NAME).get val erRegistry= erRegistryConfig.registry erRegistry.factMap.values.foreach { publicFact => publicFact.factList.foreach { fact=> val ddl = ddlGenerator.toDDL(fact) assert(jdbcConnection.get.executeUpdate(ddl).isSuccess) } } val insertSql = """ INSERT INTO student_grade_sheet (year, section_id, student_id, class_id, total_marks, date, comment) VALUES (?, ?, ?, ?, ?, ?, ?) """ val rows: List[Seq[Any]] = List( Seq(1, 100, 213, 200, 125, ExampleMahaService.today, "some comment") ) rows.foreach { row => val result = jdbcConnection.get.executeUpdate(insertSql, row) assert(result.isSuccess) } var count = 0 jdbcConnection.get.queryForObject("select * from student_grade_sheet") { rs => while (rs.next()) { count += 1 } } assert(rows.size == count) } }
Example 86
Source File: StandaloneKCFTests.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.standalone import java.nio.charset.StandardCharsets.UTF_8 import java.nio.file.Files import common.WskProps import org.apache.commons.io.FileUtils import org.apache.openwhisk.core.containerpool.kubernetes.test.KubeClientSupport import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner import system.basic.WskRestBasicTests @RunWith(classOf[JUnitRunner]) class StandaloneKCFTests extends WskRestBasicTests with StandaloneServerFixture with StandaloneSanityTestSupport with KubeClientSupport { override implicit val wskprops = WskProps().copy(apihost = serverUrl) //Turn on to debug locally easily override protected val dumpLogsAlways = false override protected val dumpStartupLogs = false override protected def useMockServer = false override protected def supportedTests = Set("Wsk Action REST should invoke a blocking action and get only the result") override protected def extraArgs: Seq[String] = Seq("--dev-mode", "--dev-kcf") private val podTemplate = """--- |apiVersion: "v1" |kind: "Pod" |metadata: | annotations: | allow-outbound : "true" | labels: | launcher: standalone""".stripMargin private val podTemplateFile = Files.createTempFile("whisk", null).toFile override val customConfig = { FileUtils.write(podTemplateFile, podTemplate, UTF_8) Some(s"""include classpath("standalone-kcf.conf") | |whisk { | kubernetes { | pod-template = "${podTemplateFile.toURI}" | } |}""".stripMargin) } override def afterAll(): Unit = { checkPodState() super.afterAll() podTemplateFile.delete() } def checkPodState(): Unit = { val podList = kubeClient.pods().withLabel("launcher").list() podList.getItems.isEmpty shouldBe false } }
Example 87
Source File: ConfigMapValueTests.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.common import java.nio.charset.StandardCharsets.UTF_8 import java.nio.file.Files import com.typesafe.config.ConfigFactory import org.apache.commons.io.FileUtils import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner import org.scalatest.{FlatSpec, Matchers} import pureconfig._ import pureconfig.generic.auto._ @RunWith(classOf[JUnitRunner]) class ConfigMapValueTests extends FlatSpec with Matchers { behavior of "ConfigMapValue" case class ValueTest(template: ConfigMapValue, count: Int) it should "read from string" in { val config = ConfigFactory.parseString(""" |whisk { | value-test { | template = "test string" | count = 42 | } |}""".stripMargin) val valueTest = readValueTest(config) valueTest.template.value shouldBe "test string" } it should "read from file reference" in { val file = Files.createTempFile("whisk", null).toFile FileUtils.write(file, "test string", UTF_8) val config = ConfigFactory.parseString(s""" |whisk { | value-test { | template = "${file.toURI}" | count = 42 | } |}""".stripMargin) val valueTest = readValueTest(config) valueTest.template.value shouldBe "test string" file.delete() } private def readValueTest(config: com.typesafe.config.Config) = { loadConfigOrThrow[ValueTest](config.getConfig("whisk.value-test")) } }
Example 88
Source File: ConfigMapValue.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.common import java.io.File import java.net.URI import java.nio.charset.StandardCharsets.UTF_8 import org.apache.commons.io.FileUtils import pureconfig.ConfigReader import pureconfig.ConvertHelpers.catchReadError class ConfigMapValue private (val value: String) object ConfigMapValue { def apply(config: String): ConfigMapValue = { val value = if (config.startsWith("file:")) { val uri = new URI(config) val file = new File(uri) FileUtils.readFileToString(file, UTF_8) } else config new ConfigMapValue(value) } implicit val reader: ConfigReader[ConfigMapValue] = ConfigReader.fromString[ConfigMapValue](catchReadError(apply)) }
Example 89
Source File: CollectionResourceUsage.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.core.database.cosmosdb import org.apache.commons.io.FileUtils import org.apache.openwhisk.core.entity.ByteSize import org.apache.openwhisk.core.entity.SizeUnits.KB case class CollectionResourceUsage(documentsSize: Option[ByteSize], collectionSize: Option[ByteSize], documentsCount: Option[Long], indexingProgress: Option[Int], documentsSizeQuota: Option[ByteSize]) { def indexSize: Option[ByteSize] = { for { ds <- documentsSize cs <- collectionSize } yield cs - ds } def asString: String = { List( documentsSize.map(ds => s"documentSize: ${displaySize(ds)}"), indexSize.map(is => s"indexSize: ${displaySize(is)}"), documentsCount.map(dc => s"documentsCount: $dc"), documentsSizeQuota.map(dq => s"collectionSizeQuota: ${displaySize(dq)}")).flatten.mkString(",") } private def displaySize(b: ByteSize) = FileUtils.byteCountToDisplaySize(b.toBytes) } object CollectionResourceUsage { val quotaHeader = "x-ms-resource-quota" val usageHeader = "x-ms-resource-usage" val indexHeader = "x-ms-documentdb-collection-index-transformation-progress" def apply(responseHeaders: Map[String, String]): Option[CollectionResourceUsage] = { for { quota <- responseHeaders.get(quotaHeader).map(headerValueToMap) usage <- responseHeaders.get(usageHeader).map(headerValueToMap) } yield { CollectionResourceUsage( usage.get("documentsSize").map(_.toLong).map(ByteSize(_, KB)), usage.get("collectionSize").map(_.toLong).map(ByteSize(_, KB)), usage.get("documentsCount").map(_.toLong), responseHeaders.get(indexHeader).map(_.toInt), quota.get("collectionSize").map(_.toLong).map(ByteSize(_, KB))) } } private def headerValueToMap(value: String): Map[String, String] = { //storedProcedures=100;triggers=25;functions=25;documentsCount=-1;documentsSize=xxx;collectionSize=xxx val pairs = value.split("=|;").grouped(2) pairs.map { case Array(k, v) => k -> v }.toMap } }
Example 90
Source File: InstallRouteMgmt.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.standalone import java.io.File import akka.http.scaladsl.model.Uri import org.apache.commons.io.{FileUtils, IOUtils} import org.apache.openwhisk.common.TransactionId.systemPrefix import org.apache.openwhisk.common.{Logging, TransactionId} import scala.sys.process.ProcessLogger import scala.util.Try import scala.sys.process._ case class InstallRouteMgmt(workDir: File, authKey: String, apiHost: Uri, namespace: String, gatewayUrl: Uri, wsk: String)(implicit log: Logging) { case class Action(name: String, desc: String) private val noopLogger = ProcessLogger(_ => ()) private implicit val tid: TransactionId = TransactionId(systemPrefix + "apiMgmt") val actionNames = Array( Action("createApi", "Create an API"), Action("deleteApi", "Delete the API"), Action("getApi", "Retrieve the specified API configuration (in JSON format)")) def run(): Unit = { require(wskExists, s"wsk command not found at $wsk. Route management actions cannot be installed") log.info(this, packageUpdateCmd.!!.trim) //TODO Optimize to ignore this if package already installed actionNames.foreach { action => val name = action.name val actionZip = new File(workDir, s"$name.zip") FileUtils.copyURLToFile(IOUtils.resourceToURL(s"/$name.zip"), actionZip) val cmd = createActionUpdateCmd(action, name, actionZip) val result = cmd.!!.trim log.info(this, s"Installed $name - $result") FileUtils.deleteQuietly(actionZip) } //This log message is used by tests to confirm that actions are installed log.info(this, "Installed Route Management Actions") } private def createActionUpdateCmd(action: Action, name: String, actionZip: File) = { Seq( wsk, "--apihost", apiHost.toString(), "--auth", authKey, "action", "update", s"$namespace/apimgmt/$name", actionZip.getAbsolutePath, "-a", "description", action.desc, "--kind", "nodejs:default", "-a", "web-export", "true", "-a", "final", "true") } private def packageUpdateCmd = { Seq( wsk, "--apihost", apiHost.toString(), "--auth", authKey, "package", "update", s"$namespace/apimgmt", "--shared", "no", "-a", "description", "This package manages the gateway API configuration.", "-p", "gwUrlV2", gatewayUrl.toString()) } def wskExists: Boolean = Try(s"$wsk property get --cliversion".!(noopLogger)).getOrElse(-1) == 0 }
Example 91
Source File: TestSpec.scala From spark-distcp with Apache License 2.0 | 5 votes |
package com.coxautodata import java.io.ByteArrayInputStream import java.nio.file.Files import com.coxautodata.objects.SerializableFileStatus import com.coxautodata.utils.FileListing import org.apache.commons.io.{FileUtils, IOUtils} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, LocalFileSystem, Path} import org.scalatest.{BeforeAndAfterEach, FunSpec, Matchers} trait TestSpec extends FunSpec with Matchers with BeforeAndAfterEach { var testingBaseDir: java.nio.file.Path = _ var testingBaseDirName: String = _ var testingBaseDirPath: Path = _ var localFileSystem: LocalFileSystem = _ override def beforeEach(): Unit = { super.beforeEach() testingBaseDir = Files.createTempDirectory("test_output") testingBaseDirName = testingBaseDir.toString localFileSystem = FileSystem.getLocal(new Configuration()) testingBaseDirPath = localFileSystem.makeQualified(new Path(testingBaseDirName)) } override def afterEach(): Unit = { super.afterEach() FileUtils.deleteDirectory(testingBaseDir.toFile) } def createFile(relativePath: Path, content: Array[Byte]): SerializableFileStatus = { val path = new Path(testingBaseDirPath, relativePath) localFileSystem.mkdirs(path.getParent) val in = new ByteArrayInputStream(content) val out = localFileSystem.create(path) IOUtils.copy(in, out) in.close() out.close() SerializableFileStatus(localFileSystem.getFileStatus(path)) } def fileStatusToResult(f: SerializableFileStatus): FileListing = { FileListing(f.getPath.toString, if (f.isFile) Some(f.getLen) else None) } }
Example 92
Source File: TestFolder.scala From schedoscope with Apache License 2.0 | 5 votes |
package org.schedoscope.scheduler.driver import java.io.File import org.apache.commons.io.FileUtils import org.scalatest.Suite trait TestFolder extends Suite { self: Suite => var testFolder: File = _ var inputFolder: File = _ var outputFolder: File = _ def in = inputFolder.getAbsolutePath() def out = outputFolder.getAbsolutePath() private def deleteFile(file: File) { if (!file.exists) return if (file.isFile) { file.delete() } else { file.listFiles().foreach(deleteFile) file.delete() } } def /() = File.separator def createInputFile(path: String) { FileUtils.touch(new File(s"${inputFolder}${File.separator}${path}")) } def outputFile(path: String) = new File(outputPath(path)) def inputFile(path: String) = new File(inputPath(path)) def inputPath(path: String) = s"${in}${File.separator}${path}" def outputPath(path: String) = s"${out}${File.separator}${path}" abstract override def withFixture(test: NoArgTest) = { val tempFolder = System.getProperty("java.io.tmpdir") var folder: File = null do { folder = new File(tempFolder, "scalatest-" + System.nanoTime) } while (!folder.mkdir()) testFolder = folder inputFolder = new File(testFolder, "in"); inputFolder.mkdir() outputFolder = new File(testFolder, "out") outputFolder.mkdir() try { super.withFixture(test) } finally { deleteFile(testFolder) } } }
Example 93
Source File: JavaMetricsScreen.scala From Pi-Akka-Cluster with Apache License 2.0 | 5 votes |
package akka_oled import java.lang.management.ManagementFactory import java.text.DecimalFormat import com.sun.management.OperatingSystemMXBean import org.apache.commons.io.FileUtils import scala.collection.mutable trait JavaMetricsScreen { def getJavaMetrics(): Array[Array[String]] = { val bean = ManagementFactory.getPlatformMXBean(classOf[OperatingSystemMXBean]) val formatter = new DecimalFormat("#0.00") val map = mutable.LinkedHashMap[String, String]( "Max mem:" -> FileUtils.byteCountToDisplaySize( ManagementFactory.getMemoryMXBean.getHeapMemoryUsage.getMax), "Curr mem:" -> FileUtils.byteCountToDisplaySize(ManagementFactory.getMemoryMXBean.getHeapMemoryUsage.getUsed), "CPU:" -> (formatter.format(bean.getSystemCpuLoad) + "%"), "Threads:" -> ManagementFactory.getThreadMXBean.getThreadCount.toString, "Classes:" -> ManagementFactory.getClassLoadingMXBean.getLoadedClassCount.toString) map.toArray.map(x => Array(x._1, x._2)) } }
Example 94
Source File: GraphFrameTestSparkContext.scala From graphframes with Apache License 2.0 | 5 votes |
package org.graphframes import java.io.File import java.nio.file.Files import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfterAll, Suite} import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.{SparkSession, SQLContext, SQLImplicits} trait GraphFrameTestSparkContext extends BeforeAndAfterAll { self: Suite => @transient var spark: SparkSession = _ @transient var sc: SparkContext = _ @transient var sqlContext: SQLContext = _ @transient var sparkMajorVersion: Int = _ @transient var sparkMinorVersion: Int = _ def isLaterVersion(minVersion: String): Boolean = { val (minMajorVersion, minMinorVersion) = TestUtils.majorMinorVersion(minVersion) if (sparkMajorVersion != minMajorVersion) { return sparkMajorVersion > minMajorVersion } else { return sparkMinorVersion >= minMinorVersion } } override def beforeAll() { super.beforeAll() spark = SparkSession.builder() .master("local[2]") .appName("GraphFramesUnitTest") .config("spark.sql.shuffle.partitions", 4) .getOrCreate() val checkpointDir = Files.createTempDirectory(this.getClass.getName).toString spark.sparkContext.setCheckpointDir(checkpointDir) sc = spark.sparkContext sqlContext = spark.sqlContext val (verMajor, verMinor) = TestUtils.majorMinorVersion(sc.version) sparkMajorVersion = verMajor sparkMinorVersion = verMinor } override def afterAll() { val checkpointDir = sc.getCheckpointDir if (spark != null) { spark.stop() } spark = null sqlContext = null sc = null checkpointDir.foreach { dir => FileUtils.deleteQuietly(new File(dir)) } super.afterAll() } }
Example 95
Source File: PluginsFilesUtils.scala From sparta with Apache License 2.0 | 5 votes |
package com.stratio.sparta.serving.core.utils import java.io.File import java.net.URL import java.util.{Calendar, UUID} import akka.event.slf4j.SLF4JLogging import com.stratio.sparta.serving.core.helpers.JarsHelper import org.apache.commons.io.FileUtils trait PluginsFilesUtils extends SLF4JLogging { def addPluginsToClassPath(pluginsFiles: Array[String]): Unit = { log.info(pluginsFiles.mkString(",")) pluginsFiles.foreach(filePath => { log.info(s"Adding to classpath plugin file: $filePath") if (filePath.startsWith("/") || filePath.startsWith("file://")) addFromLocal(filePath) if (filePath.startsWith("hdfs")) addFromHdfs(filePath) if (filePath.startsWith("http")) addFromHttp(filePath) }) } private def addFromLocal(filePath: String): Unit = { log.info(s"Getting file from local: $filePath") val file = new File(filePath.replace("file://", "")) JarsHelper.addToClasspath(file) } private def addFromHdfs(fileHdfsPath: String): Unit = { log.info(s"Getting file from HDFS: $fileHdfsPath") val inputStream = HdfsUtils().getFile(fileHdfsPath) val fileName = fileHdfsPath.split("/").last log.info(s"HDFS file name is $fileName") val file = new File(s"/tmp/sparta/userjars/${UUID.randomUUID().toString}/$fileName") log.info(s"Downloading HDFS file to local file system: ${file.getAbsoluteFile}") FileUtils.copyInputStreamToFile(inputStream, file) JarsHelper.addToClasspath(file) } private def addFromHttp(fileURI: String): Unit = { log.info(s"Getting file from HTTP: $fileURI") val tempFile = File.createTempFile(s"sparta-plugin-${Calendar.getInstance().getTimeInMillis}", ".jar") val url = new URL(fileURI) FileUtils.copyURLToFile(url, tempFile) JarsHelper.addToClasspath(tempFile) } }
Example 96
Source File: FileSystemOutputIT.scala From sparta with Apache License 2.0 | 5 votes |
package com.stratio.sparta.plugin.output.filesystem import java.io.File import com.stratio.sparta.plugin.TemporalSparkContext import com.stratio.sparta.plugin.output.fileSystem.FileSystemOutput import com.stratio.sparta.sdk.pipeline.output.{Output, OutputFormatEnum, SaveModeEnum} import org.apache.commons.io.FileUtils import org.apache.spark.sql._ import org.apache.spark.sql.types._ import org.junit.runner.RunWith import org.scalatest.Matchers import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class FileSystemOutputIT extends TemporalSparkContext with Matchers { val directory = getClass().getResource("/origin.txt") val parentFile = new File(directory.getPath).getParent val properties = Map(("path", parentFile + "/testRow"), ("outputFormat", "row")) val fields = StructType(StructField("name", StringType, false) :: StructField("age", IntegerType, false) :: StructField("year", IntegerType, true) :: Nil) val fsm = new FileSystemOutput("key", properties) "An object of type FileSystemOutput " should "have the same values as the properties Map" in { fsm.outputFormat should be(OutputFormatEnum.ROW) } private def dfGen(): DataFrame = { val sqlCtx = SparkSession.builder().config(sc.getConf).getOrCreate() val dataRDD = sc.parallelize(List(("user1", 23, 1993), ("user2", 26, 1990), ("user3", 21, 1995))) .map { case (name, age, year) => Row(name, age, year) } sqlCtx.createDataFrame(dataRDD, fields) } def fileExists(path: String): Boolean = new File(path).exists() "Given a DataFrame, a directory" should "be created with the data written inside" in { fsm.save(dfGen(), SaveModeEnum.Append, Map(Output.TableNameKey -> "test")) fileExists(fsm.path.get) should equal(true) } it should "exist with the given path and be deleted" in { if (fileExists(fsm.path.get)) FileUtils.deleteDirectory(new File(fsm.path.get)) fileExists(fsm.path.get) should equal(false) } val fsm2 = new FileSystemOutput("key", properties.updated("outputFormat", "json") .updated("path", parentFile + "/testJson")) "Given another DataFrame, a directory" should "be created with the data inside in JSON format" in { fsm2.outputFormat should be(OutputFormatEnum.JSON) fsm2.save(dfGen(), SaveModeEnum.Append, Map(Output.TableNameKey -> "test")) fileExists(fsm2.path.get) should equal(true) } it should "exist with the given path and be deleted" in { if (fileExists(s"${fsm2.path.get}/test")) FileUtils.deleteDirectory(new File(s"${fsm2.path.get}/test")) fileExists(s"${fsm2.path.get}/test") should equal(false) } }
Example 97
Source File: MLAtlasEntityUtilsSuite.scala From spark-atlas-connector with Apache License 2.0 | 5 votes |
package com.hortonworks.spark.atlas.types import java.io.File import org.apache.atlas.{AtlasClient, AtlasConstants} import org.apache.atlas.model.instance.AtlasEntity import org.apache.commons.io.FileUtils import org.apache.spark.ml.Pipeline import org.apache.spark.ml.feature.MinMaxScaler import org.apache.spark.ml.linalg.Vectors import org.apache.spark.sql.types.{IntegerType, StringType, StructType} import org.scalatest.{FunSuite, Matchers} import com.hortonworks.spark.atlas.TestUtils._ import com.hortonworks.spark.atlas.{AtlasUtils, WithHiveSupport} class MLAtlasEntityUtilsSuite extends FunSuite with Matchers with WithHiveSupport { def getTableEntity(tableName: String): AtlasEntity = { val dbDefinition = createDB("db1", "hdfs:///test/db/db1") val sd = createStorageFormat() val schema = new StructType() .add("user", StringType, false) .add("age", IntegerType, true) val tableDefinition = createTable("db1", s"$tableName", schema, sd) val tableEntities = internal.sparkTableToEntity( tableDefinition, AtlasConstants.DEFAULT_CLUSTER_NAME, Some(dbDefinition)) val tableEntity = tableEntities.entity tableEntity } test("pipeline, pipeline model, fit and transform") { val uri = "/" val pipelineDir = "tmp/pipeline" val modelDir = "tmp/model" val pipelineDirEntity = internal.mlDirectoryToEntity(uri, pipelineDir) pipelineDirEntity.entity.getAttribute("uri") should be (uri) pipelineDirEntity.entity.getAttribute("directory") should be (pipelineDir) pipelineDirEntity.dependencies.length should be (0) val modelDirEntity = internal.mlDirectoryToEntity(uri, modelDir) modelDirEntity.entity.getAttribute("uri") should be (uri) modelDirEntity.entity.getAttribute("directory") should be (modelDir) modelDirEntity.dependencies.length should be (0) val df = sparkSession.createDataFrame(Seq( (1, Vectors.dense(0.0, 1.0, 4.0), 1.0), (2, Vectors.dense(1.0, 0.0, 4.0), 2.0), (3, Vectors.dense(1.0, 0.0, 5.0), 3.0), (4, Vectors.dense(0.0, 0.0, 5.0), 4.0) )).toDF("id", "features", "label") val scaler = new MinMaxScaler() .setInputCol("features") .setOutputCol("features_scaled") .setMin(0.0) .setMax(3.0) val pipeline = new Pipeline().setStages(Array(scaler)) val model = pipeline.fit(df) pipeline.write.overwrite().save(pipelineDir) val pipelineEntity = internal.mlPipelineToEntity(pipeline.uid, pipelineDirEntity) pipelineEntity.entity.getTypeName should be (metadata.ML_PIPELINE_TYPE_STRING) pipelineEntity.entity.getAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME) should be ( pipeline.uid) pipelineEntity.entity.getAttribute("name") should be (pipeline.uid) pipelineEntity.entity.getRelationshipAttribute("directory") should be ( AtlasUtils.entityToReference(pipelineDirEntity.entity, useGuid = false)) pipelineEntity.dependencies should be (Seq(pipelineDirEntity)) val modelEntity = internal.mlModelToEntity(model.uid, modelDirEntity) val modelUid = model.uid.replaceAll("pipeline", "model") modelEntity.entity.getTypeName should be (metadata.ML_MODEL_TYPE_STRING) modelEntity.entity.getAttribute(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME) should be (modelUid) modelEntity.entity.getAttribute("name") should be (modelUid) modelEntity.entity.getRelationshipAttribute("directory") should be ( AtlasUtils.entityToReference(modelDirEntity.entity, useGuid = false)) modelEntity.dependencies should be (Seq(modelDirEntity)) FileUtils.deleteDirectory(new File("tmp")) } }
Example 98
Source File: WithRemoteHiveMetastoreServiceSupport.scala From spark-atlas-connector with Apache License 2.0 | 5 votes |
package com.hortonworks.spark.atlas import java.io.File import java.nio.file.Files import com.hortonworks.spark.atlas.utils.SparkUtils import com.hotels.beeju.ThriftHiveMetaStoreTestUtil import org.apache.commons.io.FileUtils import org.apache.spark.sql.SparkSession import org.scalatest.{BeforeAndAfterAll, Suite} trait WithRemoteHiveMetastoreServiceSupport extends BeforeAndAfterAll { self: Suite => protected val dbName = "sac_hive_metastore" protected var sparkSession: SparkSession = _ private var warehouseDir: String = _ private val hive = new ThriftHiveMetaStoreTestUtil(dbName) private def cleanupAnyExistingSession(): Unit = { val session = SparkSession.getActiveSession.orElse(SparkSession.getDefaultSession) if (session.isDefined) { session.get.sessionState.catalog.reset() session.get.stop() SparkSession.clearActiveSession() SparkSession.clearDefaultSession() } } override protected def beforeAll(): Unit = { super.beforeAll() cleanupAnyExistingSession() hive.before() warehouseDir = Files.createTempDirectory("sac-warehouse-").toString sparkSession = SparkSession.builder() .master("local") .appName(this.getClass.getCanonicalName) .enableHiveSupport() .config("spark.ui.enabled", "false") .config("spark.sql.warehouse.dir", warehouseDir) .config("spark.hadoop.hive.metastore.uris", hive.getThriftConnectionUri) .getOrCreate() // reset hiveConf to make sure the configuration change takes effect SparkUtils.resetHiveConf } override protected def afterAll(): Unit = { try { hive.after() sparkSession.sessionState.catalog.reset() sparkSession.stop() SparkSession.clearActiveSession() SparkSession.clearDefaultSession() } finally { // reset hiveConf again to prevent affecting other tests SparkUtils.resetHiveConf sparkSession = null FileUtils.deleteDirectory(new File(warehouseDir)) } System.clearProperty("spark.driver.port") super.afterAll() } }
Example 99
Source File: WithHiveSupport.scala From spark-atlas-connector with Apache License 2.0 | 5 votes |
package com.hortonworks.spark.atlas import java.io.File import java.nio.file.Files import org.apache.commons.io.FileUtils import org.apache.spark.sql.SparkSession import org.scalatest.{BeforeAndAfterAll, Suite} trait WithHiveSupport extends BeforeAndAfterAll { self: Suite => protected var sparkSession: SparkSession = _ private var metastoreDir: String = _ private var warehouseDir: String = _ private def cleanupAnyExistingSession(): Unit = { val session = SparkSession.getActiveSession.orElse(SparkSession.getDefaultSession) if (session.isDefined) { session.get.sessionState.catalog.reset() session.get.stop() SparkSession.clearActiveSession() SparkSession.clearDefaultSession() } } override protected def beforeAll(): Unit = { super.beforeAll() cleanupAnyExistingSession() metastoreDir = Files.createTempDirectory("sac-metastore-").toString warehouseDir = Files.createTempDirectory("sac-warehouse-").toString System.setProperty("derby.system.home", metastoreDir) sparkSession = SparkSession.builder() .master("local") .appName(this.getClass.getCanonicalName) .enableHiveSupport() .config("spark.ui.enabled", "false") .config("spark.sql.warehouse.dir", warehouseDir) .getOrCreate() } override protected def afterAll(): Unit = { try { sparkSession.sessionState.catalog.reset() sparkSession.stop() SparkSession.clearActiveSession() SparkSession.clearDefaultSession() } finally { sparkSession = null FileUtils.deleteDirectory(new File(warehouseDir)) } System.clearProperty("spark.driver.port") super.afterAll() } }
Example 100
Source File: DistServiceExecutor.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.experiments.distributeservice import java.io.{File, FileWriter} import java.net.InetAddress import scala.collection.JavaConverters._ import scala.io.Source import scala.sys.process._ import scala.util.{Failure, Success, Try} import akka.actor.Actor import org.apache.commons.io.FileUtils import org.apache.commons.lang.text.StrSubstitutor import org.slf4j.Logger import org.apache.gearpump.cluster.{ExecutorContext, UserConfig} import org.apache.gearpump.experiments.distributeservice.DistServiceAppMaster.InstallService import org.apache.gearpump.util.{ActorUtil, LogUtil} class DistServiceExecutor(executorContext: ExecutorContext, userConf: UserConfig) extends Actor { import executorContext._ private val LOG: Logger = LogUtil.getLogger(getClass, executor = executorId, app = appId) override def receive: Receive = { case InstallService(url, zipFileName, targetPath, scriptData, serviceName, serviceSettings) => LOG.info(s"Executor $executorId receive command to install " + s"service $serviceName to $targetPath") unzipFile(url, zipFileName, targetPath) installService(scriptData, serviceName, serviceSettings) } private def unzipFile(url: String, zipFileName: String, targetPath: String) = { val zipFile = File.createTempFile(System.currentTimeMillis().toString, zipFileName) val dir = new File(targetPath) if (dir.exists()) { FileUtils.forceDelete(dir) } val bytes = FileServer.newClient.get(url).get FileUtils.writeByteArrayToFile(zipFile, bytes) val result = Try(s"unzip ${zipFile.getAbsolutePath} -d $targetPath".!!) result match { case Success(msg) => LOG.info(s"Executor $executorId unzip file to $targetPath") case Failure(ex) => throw ex } } private def installService( scriptData: Array[Byte], serviceName: String, serviceSettings: Map[String, Any]) = { val tempFile = File.createTempFile("gearpump", serviceName) FileUtils.writeByteArrayToFile(tempFile, scriptData) val script = new File("/etc/init.d", serviceName) writeFileWithEnvVariables(tempFile, script, serviceSettings ++ getEnvSettings) val result = Try(s"chkconfig --add $serviceName".!!) result match { case Success(msg) => LOG.info(s"Executor install service $serviceName successfully!") case Failure(ex) => throw ex } } private def getEnvSettings: Map[String, Any] = { Map("workerId" -> worker, "localhost" -> ActorUtil.getSystemAddress(context.system).host.get, "hostname" -> InetAddress.getLocalHost.getHostName) } private def writeFileWithEnvVariables(source: File, target: File, envs: Map[String, Any]) = { val writer = new FileWriter(target) val sub = new StrSubstitutor(envs.asJava) sub.setEnableSubstitutionInVariables(true) Source.fromFile(source).getLines().foreach(line => writer.write(sub.replace(line) + "\r\n")) writer.close() } }
Example 101
Source File: DistributeServiceClient.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.experiments.distributeservice import java.io.File import scala.concurrent.Future import scala.util.{Failure, Success} import akka.pattern.ask import org.apache.commons.io.FileUtils import org.apache.gearpump.cluster.client.ClientContext import org.apache.gearpump.cluster.main.{ArgumentsParser, CLIOption} import org.apache.gearpump.experiments.distributeservice.DistServiceAppMaster.{FileContainer, GetFileContainer, InstallService} import org.apache.gearpump.util.{AkkaApp, Constants} object DistributeServiceClient extends AkkaApp with ArgumentsParser { implicit val timeout = Constants.FUTURE_TIMEOUT override val options: Array[(String, CLIOption[Any])] = Array( "appid" -> CLIOption[Int]("<the distributed shell appid>", required = true), "file" -> CLIOption[String]("<service zip file path>", required = true), "script" -> CLIOption[String]( "<file path of service script that will be installed to /etc/init.d>", required = true), "serviceName" -> CLIOption[String]("<service name>", required = true), "target" -> CLIOption[String]("<target path on each machine>", required = true) ) override def help(): Unit = { super.help() // scalastyle:off println Console.err.println(s"-D<name>=<value> set a property to the service") // scalastyle:on println } override def main(akkaConf: Config, args: Array[String]): Unit = { val config = parse(filterCustomOptions(args)) val context = ClientContext(akkaConf) implicit val system = context.system implicit val dispatcher = system.dispatcher val appid = config.getInt("appid") val zipFile = new File(config.getString("file")) val script = new File(config.getString("script")) val serviceName = config.getString("serviceName") val appMaster = context.resolveAppID(appid) (appMaster ? GetFileContainer).asInstanceOf[Future[FileContainer]].map { container => val bytes = FileUtils.readFileToByteArray(zipFile) val result = FileServer.newClient.save(container.url, bytes) result match { case Success(_) => appMaster ! InstallService(container.url, zipFile.getName, config.getString("target"), FileUtils.readFileToByteArray(script), serviceName, parseServiceConfig(args)) context.close() case Failure(ex) => throw ex } } } private def filterCustomOptions(args: Array[String]): Array[String] = { args.filter(!_.startsWith("-D")) } private def parseServiceConfig(args: Array[String]): Map[String, Any] = { val result = Map.empty[String, Any] args.foldLeft(result) { (result, argument) => if (argument.startsWith("-D") && argument.contains("=")) { val fixedKV = argument.substring(2).split("=") result + (fixedKV(0) -> fixedKV(1)) } else { result } } } }
Example 102
Source File: PersistenceSpec.scala From 006877 with MIT License | 5 votes |
package akka.testkit import java.io.File import com.typesafe.config._ import scala.util._ import akka.actor._ import org.scalatest._ import org.apache.commons.io.FileUtils abstract class PersistenceSpec(system: ActorSystem) extends TestKit(system) with ImplicitSender with WordSpecLike with Matchers with BeforeAndAfterAll with PersistenceCleanup { def this(name: String, config: Config) = this(ActorSystem(name, config)) override protected def beforeAll() = deleteStorageLocations() override protected def afterAll() = { deleteStorageLocations() TestKit.shutdownActorSystem(system) } def killActors(actors: ActorRef*) = { actors.foreach { actor => watch(actor) system.stop(actor) expectTerminated(actor) Thread.sleep(1000) // the actor name is not unique intermittently on travis when creating it again after killActors, this is ducktape. } } } trait PersistenceCleanup { def system: ActorSystem val storageLocations = List( "akka.persistence.journal.leveldb.dir", "akka.persistence.journal.leveldb-shared.store.dir", "akka.persistence.snapshot-store.local.dir").map { s => new File(system.settings.config.getString(s)) } def deleteStorageLocations(): Unit = { storageLocations.foreach(dir => Try(FileUtils.deleteDirectory(dir))) } }
Example 103
Source File: PersistenceSpec.scala From 006877 with MIT License | 5 votes |
package akka.testkit import java.io.File import com.typesafe.config._ import scala.util._ import akka.actor._ import org.scalatest._ import org.apache.commons.io.FileUtils abstract class PersistenceSpec(system: ActorSystem) extends TestKit(system) with ImplicitSender with WordSpecLike with Matchers with BeforeAndAfterAll with PersistenceCleanup { def this(name: String, config: Config) = this(ActorSystem(name, config)) override protected def beforeAll() = deleteStorageLocations() override protected def afterAll() = { deleteStorageLocations() TestKit.shutdownActorSystem(system) } def killActors(actors: ActorRef*) = { actors.foreach { actor => watch(actor) system.stop(actor) expectTerminated(actor) Thread.sleep(1000) // the actor name is not unique intermittently on travis when creating it again after killActors, this is ducktape. } } } trait PersistenceCleanup { def system: ActorSystem val storageLocations = List( "akka.persistence.journal.leveldb.dir", "akka.persistence.journal.leveldb-shared.store.dir", "akka.persistence.snapshot-store.local.dir").map(s => new File(system.settings.config.getString(s))) def deleteStorageLocations(): Unit = { storageLocations.foreach(dir => Try(FileUtils.deleteDirectory(dir))) } }
Example 104
Source File: ExternalCluster.scala From incubator-livy with Apache License 2.0 | 5 votes |
package org.apache.livy.test.framework import java.io._ import org.apache.commons.io.FileUtils import org.apache.hadoop.fs.Path import org.apache.livy.{LivyConf, Logging} import org.apache.livy.client.common.TestUtils import org.apache.livy.server.LivyServer /** * Used to run tests on an real external cluster * In order to utilize test against an external cluster, you need to create * a configuration called cluster.spec and run the test suite with the option * -Dcluster.spec=<yourcluster.spec> * * Also, make sure to disable the following line InteractiveIT.scala * s.run("""sc.getConf.get("spark.executor.instances")""").verifyResult("res1: String = 1\n") * * This is because your external cluster may not have the same configuration as the MiniCluster * * See the cluster.spec.template file for an example cluster.spec */ class ExternalCluster(config: Map[String, String]) extends Cluster with Logging { private var _configDir: File = _ private var _livyEndpoint: String = _ private var _livyThriftJdbcUrl: Option[String] = _ private var _hdfsScrathDir: Path = _ private var _authScheme: String = _ private var _user: String = _ private var _password: String = _ private var _sslCertPath: String = _ private var _principal: String = _ private var _keytabPath: String = _ // Livy rest url endpoint override def livyEndpoint: String = _livyEndpoint // Livy jdbc url endpoint override def jdbcEndpoint: Option[String] = _livyThriftJdbcUrl // Temp directory in hdfs override def hdfsScratchDir(): Path = _hdfsScrathDir // Working directory that store core-site.xml, yarn-site.xml override def configDir(): File = _configDir // Security details override def authScheme: String = _authScheme override def user: String = _user override def password: String = _password override def sslCertPath: String = _sslCertPath override def principal: String = _principal override def keytabPath: String = _keytabPath override def doAsClusterUser[T](task: => T): T = task override def deploy(): Unit = { _configDir = new File(config.getOrElse("configDir", "hadoop-conf")) _livyEndpoint = config.getOrElse("livyEndpoint", "") _authScheme = config.getOrElse("authScheme", "") _user = config.getOrElse("user", "") _password = config.getOrElse("password", "") _sslCertPath = config.getOrElse("sslCertPath", "") _principal = config.getOrElse("principal", "") _keytabPath = config.getOrElse("keytabPath", "") // Needs to be set after all the other fields are filled in properly _hdfsScrathDir = fs.makeQualified(new Path(config.getOrElse("hdfsScratchDir", "/"))) } override def cleanUp(): Unit = { } def runLivy(): Unit = { } def stopLivy(): Unit = { } }
Example 105
Source File: BaseInteractiveServletSpec.scala From incubator-livy with Apache License 2.0 | 5 votes |
package org.apache.livy.server.interactive import java.io.File import java.nio.file.Files import org.apache.commons.io.FileUtils import org.apache.spark.launcher.SparkLauncher import org.apache.livy.LivyConf import org.apache.livy.rsc.RSCConf import org.apache.livy.server.BaseSessionServletSpec import org.apache.livy.sessions.{Kind, SessionKindModule, Spark} abstract class BaseInteractiveServletSpec extends BaseSessionServletSpec[InteractiveSession, InteractiveRecoveryMetadata] { mapper.registerModule(new SessionKindModule()) protected var tempDir: File = _ override def afterAll(): Unit = { super.afterAll() if (tempDir != null) { scala.util.Try(FileUtils.deleteDirectory(tempDir)) tempDir = null } } override protected def createConf(): LivyConf = synchronized { if (tempDir == null) { tempDir = Files.createTempDirectory("client-test").toFile() } super.createConf() .set(LivyConf.SESSION_STAGING_DIR, tempDir.toURI().toString()) .set(LivyConf.REPL_JARS, "dummy.jar") .set(LivyConf.LIVY_SPARK_VERSION, sys.env("LIVY_SPARK_VERSION")) .set(LivyConf.LIVY_SPARK_SCALA_VERSION, sys.env("LIVY_SCALA_VERSION")) } protected def createRequest( inProcess: Boolean = true, extraConf: Map[String, String] = Map(), kind: Kind = Spark): CreateInteractiveRequest = { val classpath = sys.props("java.class.path") val request = new CreateInteractiveRequest() request.kind = kind request.name = None request.conf = extraConf ++ Map( RSCConf.Entry.LIVY_JARS.key() -> "", RSCConf.Entry.CLIENT_IN_PROCESS.key() -> inProcess.toString, SparkLauncher.SPARK_MASTER -> "local", SparkLauncher.DRIVER_EXTRA_CLASSPATH -> classpath, SparkLauncher.EXECUTOR_EXTRA_CLASSPATH -> classpath ) request } }
Example 106
Source File: YarnShuffleIntegrationSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import java.io.File import java.nio.charset.StandardCharsets import com.google.common.io.Files import org.apache.commons.io.FileUtils import org.apache.hadoop.yarn.conf.YarnConfiguration import org.scalatest.Matchers import org.apache.spark._ import org.apache.spark.internal.Logging import org.apache.spark.network.shuffle.ShuffleTestAccessor import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor} import org.apache.spark.tags.ExtendedYarnTest @ExtendedYarnTest class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite { override def newYarnConfig(): YarnConfiguration = { val yarnConfig = new YarnConfiguration() yarnConfig.set(YarnConfiguration.NM_AUX_SERVICES, "spark_shuffle") yarnConfig.set(YarnConfiguration.NM_AUX_SERVICE_FMT.format("spark_shuffle"), classOf[YarnShuffleService].getCanonicalName) yarnConfig.set("spark.shuffle.service.port", "0") yarnConfig } test("external shuffle service") { val shuffleServicePort = YarnTestAccessor.getShuffleServicePort val shuffleService = YarnTestAccessor.getShuffleServiceInstance val registeredExecFile = YarnTestAccessor.getRegisteredExecutorFile(shuffleService) logInfo("Shuffle service port = " + shuffleServicePort) val result = File.createTempFile("result", null, tempDir) val finalState = runSpark( false, mainClassName(YarnExternalShuffleDriver.getClass), appArgs = Seq(result.getAbsolutePath(), registeredExecFile.getAbsolutePath), extraConf = Map( "spark.shuffle.service.enabled" -> "true", "spark.shuffle.service.port" -> shuffleServicePort.toString ) ) checkResult(finalState, result) assert(YarnTestAccessor.getRegisteredExecutorFile(shuffleService).exists()) } } private object YarnExternalShuffleDriver extends Logging with Matchers { val WAIT_TIMEOUT_MILLIS = 10000 def main(args: Array[String]): Unit = { if (args.length != 2) { // scalastyle:off println System.err.println( s""" |Invalid command line: ${args.mkString(" ")} | |Usage: ExternalShuffleDriver [result file] [registered exec file] """.stripMargin) // scalastyle:on println System.exit(1) } val sc = new SparkContext(new SparkConf() .setAppName("External Shuffle Test")) val conf = sc.getConf val status = new File(args(0)) val registeredExecFile = new File(args(1)) logInfo("shuffle service executor file = " + registeredExecFile) var result = "failure" val execStateCopy = new File(registeredExecFile.getAbsolutePath + "_dup") try { val data = sc.parallelize(0 until 100, 10).map { x => (x % 10) -> x }.reduceByKey{ _ + _ }. collect().toSet sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) data should be ((0 until 10).map{x => x -> (x * 10 + 450)}.toSet) result = "success" // only one process can open a leveldb file at a time, so we copy the files FileUtils.copyDirectory(registeredExecFile, execStateCopy) assert(!ShuffleTestAccessor.reloadRegisteredExecutors(execStateCopy).isEmpty) } finally { sc.stop() FileUtils.deleteDirectory(execStateCopy) Files.write(result, status, StandardCharsets.UTF_8) } } }
Example 107
Source File: SortShuffleSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark import java.io.File import scala.collection.JavaConverters._ import org.apache.commons.io.FileUtils import org.apache.commons.io.filefilter.TrueFileFilter import org.scalatest.BeforeAndAfterAll import org.apache.spark.rdd.ShuffledRDD import org.apache.spark.serializer.{JavaSerializer, KryoSerializer} import org.apache.spark.shuffle.sort.SortShuffleManager import org.apache.spark.util.Utils class SortShuffleSuite extends ShuffleSuite with BeforeAndAfterAll { // This test suite should run all tests in ShuffleSuite with sort-based shuffle. private var tempDir: File = _ override def beforeAll() { super.beforeAll() conf.set("spark.shuffle.manager", "sort") } override def beforeEach(): Unit = { super.beforeEach() tempDir = Utils.createTempDir() conf.set("spark.local.dir", tempDir.getAbsolutePath) } override def afterEach(): Unit = { try { Utils.deleteRecursively(tempDir) } finally { super.afterEach() } } test("SortShuffleManager properly cleans up files for shuffles that use the serialized path") { sc = new SparkContext("local", "test", conf) // Create a shuffled RDD and verify that it actually uses the new serialized map output path val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x)) val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4)) .setSerializer(new KryoSerializer(conf)) val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]] assert(SortShuffleManager.canUseSerializedShuffle(shuffleDep)) ensureFilesAreCleanedUp(shuffledRdd) } test("SortShuffleManager properly cleans up files for shuffles that use the deserialized path") { sc = new SparkContext("local", "test", conf) // Create a shuffled RDD and verify that it actually uses the old deserialized map output path val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x)) val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4)) .setSerializer(new JavaSerializer(conf)) val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]] assert(!SortShuffleManager.canUseSerializedShuffle(shuffleDep)) ensureFilesAreCleanedUp(shuffledRdd) } private def ensureFilesAreCleanedUp(shuffledRdd: ShuffledRDD[_, _, _]): Unit = { def getAllFiles: Set[File] = FileUtils.listFiles(tempDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet val filesBeforeShuffle = getAllFiles // Force the shuffle to be performed shuffledRdd.count() // Ensure that the shuffle actually created files that will need to be cleaned up val filesCreatedByShuffle = getAllFiles -- filesBeforeShuffle filesCreatedByShuffle.map(_.getName) should be Set("shuffle_0_0_0.data", "shuffle_0_0_0.index") // Check that the cleanup actually removes the files sc.env.blockManager.master.removeShuffle(0, blocking = true) for (file <- filesCreatedByShuffle) { assert (!file.exists(), s"Shuffle file $file was not cleaned up") } } }
Example 108
Source File: EmbeddedIO.scala From embedded-kafka with Apache License 2.0 | 5 votes |
package com.tuplejump.embedded.kafka import java.io.{ File => JFile } import scala.util.Try import org.apache.commons.io.FileUtils object EmbeddedIO extends Logging { private val shutdownDeletePaths = new scala.collection.mutable.HashSet[String]() val logsDir = new JFile(".", "logs") dirSetup(new JFile(logsDir.getAbsolutePath)) def createTempDir(tmpName: String): JFile = dirSetup(new JFile(logsDir, tmpName)) private def dirSetup(dir: JFile): JFile = { if (logsDir.exists()) deleteRecursively(logsDir) dir.mkdir logger.info(s"Created dir ${dir.getAbsolutePath.replace("./", "")}") registerShutdownDeleteDir(dir) sys.runtime.addShutdownHook(new Thread("delete temp dir " + dir) { override def run(): Unit = { if (!hasRootAsShutdownDeleteDir(dir)) deleteRecursively(dir) } }) dir } protected def registerShutdownDeleteDir(file: JFile) { shutdownDeletePaths.synchronized { shutdownDeletePaths += file.getAbsolutePath } } private def hasRootAsShutdownDeleteDir(file: JFile): Boolean = { val absolutePath = file.getAbsolutePath shutdownDeletePaths.synchronized { shutdownDeletePaths.exists { path => !absolutePath.equals(path) && absolutePath.startsWith(path) } } } protected def deleteRecursively(delete: JFile): Unit = for { file <- Option(delete) } Try(FileUtils.deleteDirectory(file)) }
Example 109
Source File: SharedSparkSessionSuite.scala From spark-tensorflow-connector with Apache License 2.0 | 5 votes |
package org.trustedanalytics.spark.datasources.tensorflow import java.io.File import org.apache.commons.io.FileUtils import org.apache.spark.SharedSparkSession import org.junit.{After, Before} import org.scalatest.{BeforeAndAfterAll, Matchers, WordSpecLike} trait BaseSuite extends WordSpecLike with Matchers with BeforeAndAfterAll class SharedSparkSessionSuite extends SharedSparkSession with BaseSuite { val TF_SANDBOX_DIR = "tf-sandbox" val file = new File(TF_SANDBOX_DIR) @Before override def beforeAll() = { super.setUp() FileUtils.deleteQuietly(file) file.mkdirs() } @After override def afterAll() = { FileUtils.deleteQuietly(file) super.tearDown() } }
Example 110
Source File: TestCreateTableWithBlockletSize.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.spark.testsuite.createTable import scala.util.Random import org.apache.commons.io.FileUtils import org.apache.spark.sql.CarbonEnv import org.apache.spark.sql.test.util.QueryTest import org.scalatest.BeforeAndAfterAll import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.reader.CarbonFooterReaderV3 import org.apache.carbondata.core.util.path.CarbonTablePath class TestCreateTableWithBlockletSize extends QueryTest with BeforeAndAfterAll { override def beforeAll { sql("use default") sql("drop table if exists source") } test("test create table with blocklet size") { val rdd = sqlContext.sparkContext.parallelize(1 to 1000000) .map(x => (Random.nextInt(), Random.nextInt().toString)) sqlContext.createDataFrame(rdd) .write .format("carbondata") .option("table_blocksize", "8") .option("table_blocklet_size", "3") .option("tableName", "source") .save() // read footer and verify number of blocklets val table = CarbonEnv.getCarbonTable(None, "source")(sqlContext.sparkSession) val folder = FileFactory.getCarbonFile(table.getTablePath) val files = folder.listFiles(true) import scala.collection.JavaConverters._ val dataFiles = files.asScala.filter(_.getName.endsWith(CarbonTablePath.CARBON_DATA_EXT)) dataFiles.foreach { dataFile => val fileReader = FileFactory .getFileHolder(FileFactory.getFileType(dataFile.getPath)) val buffer = fileReader .readByteBuffer(FileFactory.getUpdatedFilePath(dataFile.getPath), dataFile.getSize - 8, 8) val footerReader = new CarbonFooterReaderV3(dataFile.getAbsolutePath, buffer.getLong) val footer = footerReader.readFooterVersion3 assertResult(2)(footer.blocklet_index_list.size) assertResult(2)(footer.blocklet_info_list3.size) } sql("drop table source") } test("test create table with invalid blocklet size") { val ex = intercept[MalformedCarbonCommandException] { sql("CREATE TABLE T1(name String) STORED AS CARBONDATA TBLPROPERTIES('TABLE_BLOCKLET_SIZE'='3X')") } assert(ex.getMessage.toLowerCase.contains("invalid table_blocklet_size")) } override def afterAll { sql("use default") sql("drop table if exists source") } }
Example 111
Source File: DirectSQLExample.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.examples import java.io.File import org.apache.commons.io.FileUtils import org.apache.spark.sql.SparkSession import org.apache.carbondata.core.metadata.datatype.{DataTypes, Field} import org.apache.carbondata.examples.util.ExampleUtils import org.apache.carbondata.sdk.file.{CarbonWriter, Schema} // scalastyle:off println object DirectSQLExample { def main(args: Array[String]) { val carbonSession = ExampleUtils.createSparkSession("DirectSQLExample") exampleBody(carbonSession) carbonSession.close() } def exampleBody(carbonSession : SparkSession): Unit = { val rootPath = new File(this.getClass.getResource("/").getPath + "../../../..").getCanonicalPath val path = s"$rootPath/examples/spark/target/carbonFile/" import carbonSession._ // 1. generate data file cleanTestData(path) val rows = 20 buildTestData(path, rows) val readPath = path println("Running SQL on carbon files directly") try { // 2. run queries directly, no need to create table first sql(s"""select * FROM carbon.`$readPath` limit 10""".stripMargin).show() // 3. check rows count val counts = sql(s"""select * FROM carbon.`$readPath`""".stripMargin).count() assert(rows == counts) } catch { case e: Exception => throw e } finally { // 3.delete data files cleanTestData(path) } } // prepare SDK writer output def buildTestData( path: String, num: Int = 3): Unit = { // getCanonicalPath gives path with \, but the code expects /. val writerPath = path.replace("\\", "/") val fields = new Array[Field](3) fields(0) = new Field("name", DataTypes.STRING) fields(1) = new Field("age", DataTypes.INT) fields(2) = new Field("height", DataTypes.DOUBLE) try { val builder = CarbonWriter .builder() .outputPath(writerPath) .uniqueIdentifier(System.currentTimeMillis) .withBlockSize(2) .withCsvInput(new Schema(fields)) .writtenBy("DirectSQLExample") val writer = builder.build() var i = 0 while (i < num) { writer.write(Array[String]("robot" + i, String.valueOf(i), String.valueOf(i.toDouble / 2))) i += 1 } writer.close() } catch { case e: Exception => throw e } } def cleanTestData(path: String): Unit = { FileUtils.deleteDirectory(new File(path)) } } // scalastyle:on println
Example 112
Source File: TestRegisterIndexCarbonTable.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.spark.testsuite.secondaryindex import java.io.{File, IOException} import org.apache.commons.io.FileUtils import org.apache.spark.sql.Row import org.apache.spark.sql.test.TestQueryExecutor import org.apache.spark.sql.test.util.QueryTest import org.scalatest.BeforeAndAfterAll import org.apache.carbondata.core.constants.CarbonCommonConstants class TestRegisterIndexCarbonTable extends QueryTest with BeforeAndAfterAll { override def beforeAll { sql("drop database if exists carbon cascade") } def restoreData(dblocation: String, tableName: String) = { val destination = dblocation + CarbonCommonConstants.FILE_SEPARATOR + tableName val source = dblocation+ "_back" + CarbonCommonConstants.FILE_SEPARATOR + tableName try { FileUtils.copyDirectory(new File(source), new File(destination)) FileUtils.deleteDirectory(new File(source)) } catch { case e : Exception => throw new IOException("carbon table data restore failed.") } finally { } } def backUpData(dblocation: String, tableName: String) = { val source = dblocation + CarbonCommonConstants.FILE_SEPARATOR + tableName val destination = dblocation+ "_back" + CarbonCommonConstants.FILE_SEPARATOR + tableName try { FileUtils.copyDirectory(new File(source), new File(destination)) } catch { case e : Exception => throw new IOException("carbon table data backup failed.") } } test("register tables test") { val location = TestQueryExecutor.warehouse + CarbonCommonConstants.FILE_SEPARATOR + "dbName" sql("drop database if exists carbon cascade") sql(s"create database carbon location '${location}'") sql("use carbon") sql("""create table carbon.carbontable (c1 string,c2 int,c3 string,c5 string) STORED AS carbondata""") sql("insert into carbontable select 'a',1,'aa','aaa'") sql("create index index_on_c3 on table carbontable (c3, c5) AS 'carbondata'") backUpData(location, "carbontable") backUpData(location, "index_on_c3") sql("drop table carbontable") restoreData(location, "carbontable") restoreData(location, "index_on_c3") sql("refresh table carbontable") sql("refresh table index_on_c3") checkAnswer(sql("select count(*) from carbontable"), Row(1)) checkAnswer(sql("select c1 from carbontable"), Seq(Row("a"))) sql("REGISTER INDEX TABLE index_on_c3 ON carbontable") assert(sql("show indexes on carbontable").collect().nonEmpty) } override def afterAll { sql("drop database if exists carbon cascade") sql("use default") } }
Example 113
Source File: services.scala From InteractiveGraph-neo4j with BSD 2-Clause "Simplified" License | 5 votes |
package org.grapheco.server.pidb import java.io.{File, FileInputStream} import org.apache.commons.io.{FileUtils, IOUtils} import org.grapheco.server.util.{JsonUtils, Logging, ServletContextUtils} import org.neo4j.driver.v1._ import org.neo4j.graphdb.factory.{GraphDatabaseFactory, GraphDatabaseSettings} import org.neo4j.graphdb.{GraphDatabaseService, Label, RelationshipType} import org.springframework.beans.factory.annotation.Autowired import org.springframework.beans.factory.{DisposableBean, InitializingBean} import cn.pidb.engine.{BoltService, CypherService, PidbConnector} import scala.collection.JavaConversions._ import scala.collection.mutable import scala.reflect.ClassTag class PidbService(boltUrl:String, boltUser:String, boltPassword:String) extends BoltService(boltUrl, boltUser, boltPassword){ def getRelativeOrAbsoluteFile(path: String) = { Some(new File(path)).map { file => if (file.isAbsolute) { file } else { new File(ServletContextUtils.getServletContext.getRealPath(s"/${path}")) } }.get } }
Example 114
Source File: IntegrationTests.scala From scala-typed-holes with Apache License 2.0 | 5 votes |
package holes import java.nio.charset.StandardCharsets import java.nio.file.{Files, Path, Paths} import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfterAll, FunSpec} import scala.sys.process._ class IntegrationTests extends FunSpec with BeforeAndAfterAll { private val pluginJar = sys.props("plugin.jar") private val scalacClasspath = sys.props("scalac.classpath") private val targetDir = Paths.get("target/integration-tests") private def runScalac(args: String*): String = { val buf = new StringBuffer val logger = new ProcessLogger { override def out(s: => String): Unit = { buf.append(s); buf.append('\n') } override def err(s: => String): Unit = { buf.append(s); buf.append('\n') } override def buffer[T](f: => T): T = f } Process( "java" :: "-Dscala.usejavacp=true" :: "-cp" :: scalacClasspath :: "scala.tools.nsc.Main" :: args.toList ).!(logger) buf.toString } private def compileFile(path: Path): String = runScalac( s"-Xplugin:$pluginJar", "-P:typed-holes:log-level:info", "-d", targetDir.toString, path.toString ) override def beforeAll(): Unit = { println(runScalac("-version")) FileUtils.deleteQuietly(targetDir.toFile) Files.createDirectories(targetDir) } describe("produces the expected output") { for (scenario <- Paths.get("src/test/resources").toFile.listFiles().toList.map(_.toPath)) { it(scenario.getFileName.toString) { val expected = new String(Files.readAllBytes(scenario.resolve("expected.txt")), StandardCharsets.UTF_8).trim val actual = compileFile(scenario.resolve("input.scala")).trim if (actual != expected) { println("Compiler output:") println("=====") println(actual) println("=====") } assert(actual === expected) } } } }
Example 115
Source File: Template.scala From AppCrawler with Apache License 2.0 | 5 votes |
package com.testerhome.appcrawler import java.io.File import org.apache.commons.io.FileUtils import org.fusesource.scalate.TemplateEngine import scala.collection.mutable import scala.collection.mutable.ListBuffer import scala.io.Source class Template { val elements = mutable.HashMap[String, ListBuffer[Map[String, Any]]]() def getPageSource(url:String): Unit ={ val page=Source.fromURL(s"${url}/source/xml").mkString val xml=DataObject.fromJson[Map[String, String]](page).getOrElse("value", "") .asInstanceOf[Map[String, String]].getOrElse("tree", "") val doc=XPathUtil.toDocument(xml) elements("Demo")=ListBuffer[Map[String, Any]]() elements("Demo")++=XPathUtil.getListFromXPath("//*[]", doc) } def read(path:String): Unit = { //val path = "/Users/seveniruby/projects/AppCrawlerSuite/AppCrawler/android_20170109145102/elements.yml" val store = (DataObject.fromYaml[URIElementStore](Source.fromFile(path).mkString)).elementStore store.foreach(s => { val reqDom = s._2.reqDom val url = s._2.element.url if (reqDom.size != 0) { val doc = XPathUtil.toDocument(reqDom) if (elements.contains(url) == false) { elements.put(url, ListBuffer[Map[String, Any]]()) } elements(url) ++= XPathUtil.getListFromXPath("//*", doc) val tagsLimit=List("Image", "Button", "Text") elements(url) = elements(url) .filter(_.getOrElse("visible", "true")=="true") .filter(_.getOrElse("tag", "").toString.contains("StatusBar")==false) .filter(e=>tagsLimit.exists(t=>e.getOrElse("tag", "").toString.contains(t))) .distinct } }) } def write(template:String, dir:String) { val engine = new TemplateEngine elements.foreach(e => { val file:String = e._1 println(s"file=${file}") e._2.foreach(m => { val name = m("name") val value = m("value") val label = m("label") val xpath = m("xpath") println(s"name=${name} label=${label} value=${value} xpath=${xpath}") }) val output = engine.layout(template, Map( "file" -> s"Template_${file.split('-').takeRight(1).head.toString}", "elements" -> elements(file)) ) println(output) val directory=new File(dir) if(directory.exists()==false){ FileUtils.forceMkdir(directory) } println(s"template source directory = ${dir}") val appdex=template.split('.').takeRight(2).head scala.reflect.io.File(s"${dir}/${file}.${appdex}").writeAll(output) }) } }
Example 116
Source File: Report.scala From AppCrawler with Apache License 2.0 | 5 votes |
package com.testerhome.appcrawler import org.apache.commons.io.FileUtils import org.scalatest.tools.Runner import scala.collection.mutable import scala.collection.mutable.ListBuffer import scala.io.{Source, Codec} import scala.reflect.io.File import collection.JavaConversions._ log.info(s"run ${cmdArgs.mkString(" ")}") Runner.run(cmdArgs) changeTitle() } def changeTitle(title:String=Report.title): Unit ={ val originTitle="ScalaTest Results" val indexFile=reportPath+"/index.html" val newContent=Source.fromFile(indexFile).mkString.replace(originTitle, title) scala.reflect.io.File(indexFile).writeAll(newContent) } } object Report extends Report{ var showCancel=false var title="AppCrawler" var master="" var candidate="" var reportDir="" var store=new URIElementStore def loadResult(elementsFile: String): URIElementStore ={ DataObject.fromYaml[URIElementStore](Source.fromFile(elementsFile).mkString) } }
Example 117
Source File: TestGetClassFile.scala From AppCrawler with Apache License 2.0 | 5 votes |
package com.testerhome.appcrawler.ut import com.testerhome.appcrawler.plugin.FlowDiff import com.testerhome.appcrawler.{DiffSuite, Report} import org.apache.commons.io.FileUtils import org.scalatest.Checkpoints.Checkpoint import org.scalatest.{FunSuite, Matchers} class TestGetClassFile extends FunSuite with Matchers{ test("test checkpoints"){ markup { """ |dddddddd """.stripMargin } markup("xxxx") val cp = new Checkpoint() val (x, y) = (1, 2) cp { x should be < 0 } cp { y should be > 9 } cp.reportAll() } test("test markup"){ markup { """ |dddddddd """.stripMargin } markup("xxxx") } test("get class file"){ val location=classOf[DiffSuite].getProtectionDomain.getCodeSource.getLocation println(location) val f=getClass.getResource("/com/xueqiu/qa/appcrawler/ut/TestDiffReport.class").getFile println(f) FileUtils.copyFile(new java.io.File(f), new java.io.File("/tmp/1.class")) println(getClass.getClassLoader.getResources("com/xueqiu/qa/appcrawler/ut/TestDiffReport.class")) } }
Example 118
Source File: Generator.scala From play-soap with Apache License 2.0 | 5 votes |
package play.soap.docs import java.io.File import java.util.Collections import org.apache.commons.io.FileUtils import org.pegdown.ast.WikiLinkNode import org.pegdown.VerbatimSerializer import org.pegdown.LinkRenderer import org.pegdown.Extensions import org.pegdown.PegDownProcessor import play.doc.PrettifyVerbatimSerializer import play.twirl.api.Html object Generator extends App { val outDir = new File(args(0)) val inDir = new File(args(1)) val inPages = args.drop(2) val parser = new PegDownProcessor(Extensions.ALL) val linkRenderer = new LinkRenderer { import LinkRenderer.Rendering override def render(node: WikiLinkNode) = { node.getText.split("\\|", 2) match { case Array(name) => new Rendering(name + ".html", name) case Array(title, name) => new Rendering(name + ".html", title) case _ => new Rendering(node.getText + ".html", node.getText) } } } val verbatimSerializer = Collections.singletonMap[String, VerbatimSerializer](VerbatimSerializer.DEFAULT, PrettifyVerbatimSerializer) val nav = Seq( "Home" -> "Home", "Using sbt WSDL" -> "SbtWsdl", "Using the Play SOAP client" -> "PlaySoapClient", "Using JAX WS Handlers" -> "Handlers", "Security" -> "Security" ) val titleMap = nav.map(t => t._2 -> t._1).toMap // Ensure target directory exists outDir.mkdirs() inPages.foreach { name => val inFile = new File(inDir, name + ".md") val markdown = FileUtils.readFileToString(inFile) val htmlSnippet = parser.markdownToHtml(markdown, linkRenderer, verbatimSerializer) val title = titleMap.get(name) val htmlPage = html.template(title, nav)(Html(htmlSnippet)) FileUtils.writeStringToFile(new File(outDir, name + ".html"), htmlPage.body) } }
Example 119
Source File: UsesMasterSlaveServers.scala From scala-commons with MIT License | 5 votes |
package com.avsystem.commons package redis import java.io.File import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfterAll, Suite} import scala.concurrent.Await import scala.concurrent.duration._ trait UsesMasterSlaveServers extends BeforeAndAfterAll with RedisProcessUtils { this: Suite => val masterSlavePath: String = "masterSlave/" + System.currentTimeMillis() val masterSlaveDir: File = new File(masterSlavePath.replaceAllLiterally("/", File.separator)) def masterName: String def ports: Seq[Int] def sentinelPorts: Seq[Int] lazy val addresses: Seq[NodeAddress] = ports.map(port => NodeAddress(port = port)) lazy val sentinelAddresses: Seq[NodeAddress] = sentinelPorts.map(port => NodeAddress(port = port)) var redisProcesses: Seq[RedisProcess] = _ var sentinelProcesses: Seq[RedisProcess] = _ protected def prepareDirectory(): Unit override protected def beforeAll(): Unit = { super.beforeAll() prepareDirectory() val processesFut = Future.traverse(ports)(port => launchRedis( "--port", port.toString, "--daemonize", "no", "--pidfile", "redis.pid", "--dbfilename", "dump.rdb", "--dir", s"$masterSlavePath/$port" )) val sentinelsFut = Future.traverse(sentinelPorts)(port => launchSentinel( s"$masterSlavePath/$port/sentinel.conf", "--port", port.toString, "--daemonize", "no", "--pidfile", "redis.pid", "--dir", s"$masterSlavePath/$port" )) redisProcesses = Await.result(processesFut, 10.seconds) sentinelProcesses = Await.result(sentinelsFut, 10.seconds) } override protected def afterAll(): Unit = { Await.result(Future.traverse(redisProcesses ++ sentinelProcesses)(shutdownRedis), 10.seconds) FileUtils.deleteDirectory(masterSlaveDir) super.afterAll() } }
Example 120
Source File: UsesClusterServers.scala From scala-commons with MIT License | 5 votes |
package com.avsystem.commons package redis import java.io.File import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfterAll, Suite} import scala.concurrent.Await import scala.concurrent.duration._ trait UsesClusterServers extends BeforeAndAfterAll with RedisProcessUtils { this: Suite => val clusterPath: String = "cluster/" + System.currentTimeMillis() val clusterDir: File = new File(clusterPath.replaceAllLiterally("/", File.separator)) def ports: Seq[Int] lazy val addresses: Seq[NodeAddress] = ports.map(port => NodeAddress(port = port)) var redisProcesses: Seq[RedisProcess] = _ protected def prepareDirectory(): Unit protected def slotKey(slot: Int): String = ClusterUtils.SlotKeys(slot) override protected def beforeAll(): Unit = { super.beforeAll() prepareDirectory() redisProcesses = Await.result(Future.traverse(ports)(port => launchRedis( "--port", port.toString, "--daemonize", "no", "--pidfile", "redis.pid", "--dbfilename", "dump.rdb", "--dir", s"$clusterPath/$port", "--appendonly", "yes", "--appendfilename", "appendonly.aof", "--cluster-enabled", "yes", "--cluster-config-file", "nodes.conf" )), 10.seconds) } override protected def afterAll(): Unit = { Await.result(Future.traverse(redisProcesses)(shutdownRedis), 10.seconds) FileUtils.deleteDirectory(clusterDir) super.afterAll() } }
Example 121
Source File: MQTTSinkWordCount.scala From bahir with Apache License 2.0 | 5 votes |
package org.apache.bahir.examples.sql.streaming.mqtt import java.io.File import org.apache.commons.io.FileUtils import org.apache.spark.sql.SparkSession object MQTTSinkWordCount { def main(args: Array[String]) { if (args.length < 2) { // scalastyle:off System.err.println("Usage: MQTTSinkWordCount <port> <brokerUrl> <topic>") // scalastyle:on System.exit(1) } val checkpointDir = System.getProperty("java.io.tmpdir") + "/mqtt-example/" // Remove checkpoint directory. FileUtils.deleteDirectory(new File(checkpointDir)) val port = args(0) val brokerUrl = args(1) val topic = args(2) val spark = SparkSession.builder .appName("MQTTSinkWordCount").master("local[4]") .getOrCreate() import spark.implicits._ // Create DataFrame representing the stream of input lines from local network socket. val lines = spark.readStream .format("socket") .option("host", "localhost").option("port", port) .load().select("value").as[String] // Split the lines into words. val words = lines.flatMap(_.split(" ")) // Generate running word count. val wordCounts = words.groupBy("value").count() // Start publishing the counts to MQTT server. val query = wordCounts.writeStream .format("org.apache.bahir.sql.streaming.mqtt.MQTTStreamSinkProvider") .option("checkpointLocation", checkpointDir) .outputMode("complete") .option("topic", topic) .option("localStorage", checkpointDir) .start(brokerUrl) query.awaitTermination() } }
Example 122
Source File: ElasticServer.scala From nexus with Apache License 2.0 | 5 votes |
package ch.epfl.bluebrain.nexus.commons.es.server.embed import java.nio.file.Files import java.util.Arrays._ import akka.http.scaladsl.model.Uri import ch.epfl.bluebrain.nexus.commons.es.server.embed.ElasticServer.MyNode import ch.epfl.bluebrain.nexus.util.{ActorSystemFixture, Randomness} import org.apache.commons.io.FileUtils import org.elasticsearch.common.settings.Settings import org.elasticsearch.index.reindex.ReindexPlugin import org.elasticsearch.node.Node import org.elasticsearch.painless.PainlessPlugin import org.elasticsearch.plugins.Plugin import org.elasticsearch.transport.Netty4Plugin import org.scalatest.wordspec.AnyWordSpecLike import org.scalatest.BeforeAndAfterAll import scala.jdk.CollectionConverters._ import scala.util.Try // $COVERAGE-OFF$ abstract class ElasticServer extends ActorSystemFixture("ElasticServer") with AnyWordSpecLike with BeforeAndAfterAll with Randomness { override protected def beforeAll(): Unit = { super.beforeAll() startElastic() } override protected def afterAll(): Unit = { stopElastic() super.afterAll() } val startPort = freePort() val endPort = startPort + 100 val esUri = Uri(s"http://localhost:$startPort") implicit val ec = system.dispatcher private val clusterName = "elasticsearch" private val dataDir = Files.createTempDirectory("elasticsearch_data_").toFile private val settings = Settings .builder() .put("path.home", dataDir.toString) .put("http.port", s"$startPort-$endPort") .put("http.cors.enabled", true) .put("cluster.name", clusterName) .put("http.type", "netty4") .build private lazy val node = new MyNode(settings, asList(classOf[Netty4Plugin], classOf[PainlessPlugin], classOf[ReindexPlugin])) def startElastic(): Unit = { node.start() () } def stopElastic(): Unit = { node.close() Try(FileUtils.forceDelete(dataDir)) () } } object ElasticServer extends Randomness { import java.util import org.elasticsearch.node.InternalSettingsPreparer private class MyNode(preparedSettings: Settings, classpathPlugins: util.Collection[Class[_ <: Plugin]]) extends Node( InternalSettingsPreparer .prepareEnvironment(preparedSettings, Map.empty[String, String].asJava, null, () => "elasticsearch"), classpathPlugins, true ) {} } // $COVERAGE-ON$
Example 123
Source File: TarFlowSpec.scala From nexus with Apache License 2.0 | 5 votes |
package ch.epfl.bluebrain.nexus.storage import java.io.ByteArrayInputStream import java.nio.file.{Files, Path, Paths} import akka.actor.ActorSystem import akka.stream.alpakka.file.scaladsl.Directory import akka.stream.scaladsl.{FileIO, Source} import akka.testkit.TestKit import akka.util.ByteString import ch.epfl.bluebrain.nexus.storage.utils.{EitherValues, IOEitherValues, Randomness} import org.apache.commons.compress.archivers.tar.TarArchiveInputStream import org.apache.commons.io.FileUtils import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpecLike import org.scalatest.{BeforeAndAfterAll, Inspectors, OptionValues} import scala.annotation.tailrec class TarFlowSpec extends TestKit(ActorSystem("TarFlowSpec")) with AnyWordSpecLike with Matchers with IOEitherValues with Randomness with EitherValues with OptionValues with Inspectors with BeforeAndAfterAll { val basePath = Files.createTempDirectory("tarflow") val dir1 = basePath.resolve("one") val dir2 = basePath.resolve("two") override def afterAll(): Unit = { super.afterAll() FileUtils.cleanDirectory(basePath.toFile) () } type PathAndContent = (Path, String) "A TarFlow" should { Files.createDirectories(dir1) Files.createDirectories(dir2) def relativize(path: Path): String = basePath.getParent().relativize(path).toString "generate the byteString for a tar file correctly" in { val file1 = dir1.resolve("file1.txt") val file1Content = genString() val file2 = dir1.resolve("file3.txt") val file2Content = genString() val file3 = dir2.resolve("file3.txt") val file3Content = genString() val files = List(file1 -> file1Content, file2 -> file2Content, file3 -> file3Content) forAll(files) { case (file, content) => Source.single(ByteString(content)).runWith(FileIO.toPath(file)).futureValue } val byteString = Directory.walk(basePath).via(TarFlow.writer(basePath)).runReduce(_ ++ _).futureValue val bytes = new ByteArrayInputStream(byteString.toArray) val tar = new TarArchiveInputStream(bytes) @tailrec def readEntries( tar: TarArchiveInputStream, entries: List[PathAndContent] = Nil ): List[PathAndContent] = { val entry = tar.getNextTarEntry if (entry == null) entries else { val data = Array.ofDim[Byte](entry.getSize.toInt) tar.read(data) readEntries(tar, (Paths.get(entry.getName) -> ByteString(data).utf8String) :: entries) } } val directories = List(relativize(basePath) -> "", relativize(dir1) -> "", relativize(dir2) -> "") val untarred = readEntries(tar).map { case (path, content) => path.toString -> content } val expected = files.map { case (path, content) => relativize(path) -> content } ++ directories untarred should contain theSameElementsAs expected } } }
Example 124
Source File: format_flow.scala From scalabpe with Apache License 2.0 | 5 votes |
package scalabpe import java.io._ import scala.collection.mutable.HashMap import scala.collection.mutable.ArrayBuffer import scala.io.Source import org.apache.commons.io.FileUtils import scala.xml._ import scala.collection.mutable._ import scalabpe.core._ import org.apache.commons.lang.StringUtils import Tools._ object FormatFlowTool { def help() { println( """ usage: scalabpe.FormatFlowTool [options] dirname options: -h|--help 帮助信息 """) } def parseArgs(args:Array[String]):HashMapStringAny = { val map = HashMapStringAny() var i = 0 val files = ArrayBufferString() while(i < args.size) { args(i) match { case "-h" | "--help" => return null case s if s.startsWith("-") => println("invalid option "+s) return null case _ => files += args(i) i += 1 } } map.put("files",files) map } def main(args:Array[String]) { var params = parseArgs(args) if( params == null ) { help() return } var files = params.nls("files") if( files.size == 0 ) { help() return } var dir = files(0) if( !new File(dir).exists() ) { val p1 = "compose_conf"+File.separator+dir if( new File(p1).exists ) { dir = p1 } else { println("not a valid dir, dir="+dir) return } } processDir(dir,params) } def processDir(dir:String,params:HashMapStringAny) { val files = new File(dir).listFiles.filter(_.getName.endsWith(".flow")) for(f <- files ) { processFile(dir,f.getName,params) } } def processFile(dir:String,f:String,params:HashMapStringAny) { val lines = readAllLines(dir+File.separator+f) // TODO } }
Example 125
Source File: NodeActor.scala From ForestFlow with Apache License 2.0 | 5 votes |
package ai.forestflow.serving.cluster import java.io.File import akka.actor.{Actor, ActorLogging, ActorRef, Props, Timers} import akka.cluster.Cluster import akka.cluster.pubsub.DistributedPubSub import akka.cluster.pubsub.DistributedPubSubMediator.Subscribe import ai.forestflow.domain.CleanupLocalStorage import org.apache.commons.io.FileUtils import com.typesafe.scalalogging.LazyLogging import ai.forestflow.utils.ThrowableImplicits._ import scala.util.{Failure, Success, Try} /*** * This actor is responsible for node-level (host-level) stuff that should be done on a per-node basis. * A good example of this is file system cleanup tasks. */ object NodeActor extends LazyLogging { def props(): Props = Props(new NodeActor) .withDispatcher("blocking-io-dispatcher") def cleanupLocalStorage(path: String): Unit = { val localDir = new File(path) val localDirExists = localDir.exists() logger.info(s"Cleaning up local storage: Local Directory: $localDir , exists? $localDirExists") if (localDirExists) Try(FileUtils.deleteDirectory(localDir)) match { case Success(_) => logger.info(s"Local Directory $localDir cleaned up successfully") case Failure(ex) => logger.error(s"Local Directory $localDir cleanup failed! Reason: ${ex.printableStackTrace}") } } } class NodeActor extends Actor with ActorLogging with Timers { implicit val cluster: Cluster = Cluster(context.system) val mediator: ActorRef = DistributedPubSub(context.system).mediator mediator ! Subscribe(classOf[CleanupLocalStorage].getSimpleName, self) override def receive: Receive = { case CleanupLocalStorage(path) => NodeActor.cleanupLocalStorage(path) } }
Example 126
Source File: SentencePieceWrapper.scala From spark-nlp with Apache License 2.0 | 5 votes |
package com.johnsnowlabs.ml.tensorflow.sentencepiece import java.io.File import java.nio.file.{Files, Paths} import java.util.UUID import org.apache.commons.io.FileUtils import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.sql.SparkSession class SentencePieceWrapper( var sppModel: Array[Byte] ) extends Serializable { @transient private var mspp: SentencePieceProcessor = _ def getSppModel: SentencePieceProcessor = { if (mspp == null){ val spp = new SentencePieceProcessor() spp.loadFromSerializedProto(sppModel) mspp = spp } mspp } } object SentencePieceWrapper { def read( path: String ): SentencePieceWrapper = { val byteArray = Files.readAllBytes(Paths.get(path)) val sppWrapper = new SentencePieceWrapper(byteArray) val spp = new SentencePieceProcessor() spp.loadFromSerializedProto(byteArray) sppWrapper.mspp = spp sppWrapper } } trait WriteSentencePieceModel { def writeSentencePieceModel( path: String, spark: SparkSession, spp: SentencePieceWrapper, suffix: String, filename:String ): Unit = { val uri = new java.net.URI(path.replaceAllLiterally("\\", "/")) val fs = FileSystem.get(uri, spark.sparkContext.hadoopConfiguration) // 1. Create tmp folder val tmpFolder = Files.createTempDirectory(UUID.randomUUID().toString.takeRight(12) + suffix) .toAbsolutePath.toString val sppFile = Paths.get(tmpFolder, filename).toString // 2. Save Tensorflow state FileUtils.writeByteArrayToFile(new File(sppFile), spp.sppModel) // 3. Copy to dest folder fs.copyFromLocalFile(new Path(sppFile), new Path(path)) // 4. Remove tmp folder FileUtils.deleteDirectory(new File(tmpFolder)) } } trait ReadSentencePieceModel { val sppFile: String def readSentencePieceModel( path: String, spark: SparkSession, suffix: String ): SentencePieceWrapper = { val uri = new java.net.URI(path.replaceAllLiterally("\\", "/")) val fs = FileSystem.get(uri, spark.sparkContext.hadoopConfiguration) // 1. Create tmp directory val tmpFolder = Files.createTempDirectory(UUID.randomUUID().toString.takeRight(12)+ suffix) .toAbsolutePath.toString // 2. Copy to local dir fs.copyToLocalFile(new Path(path, sppFile), new Path(tmpFolder)) val sppModelFilePath = new Path(tmpFolder, sppFile) val byteArray = Files.readAllBytes(Paths.get(sppModelFilePath.toString)) val sppWrapper = new SentencePieceWrapper(byteArray) sppWrapper } }
Example 127
Source File: TrainingHelper.scala From spark-nlp with Apache License 2.0 | 5 votes |
package com.johnsnowlabs.util import java.io.File import java.nio.file.{Files, Paths, StandardCopyOption} import java.sql.Timestamp import java.util.Date import com.johnsnowlabs.nlp.pretrained.ResourceType.ResourceType import com.johnsnowlabs.nlp.pretrained.{ResourceMetadata, ResourceType} import org.apache.commons.io.FileUtils import org.apache.spark.ml.util.MLWriter object TrainingHelper { def saveModel(name: String, language: Option[String], libVersion: Option[Version], sparkVersion: Option[Version], modelWriter: MLWriter, folder: String, category: Option[ResourceType] = Some(ResourceType.NOT_DEFINED) ): Unit = { // 1. Get current timestamp val timestamp = new Timestamp(new Date().getTime) // 2. Save model to file val file = Paths.get(folder, timestamp.toString).toString.replaceAllLiterally("\\", "/") modelWriter.save(file) // 3. Zip file val tempzipFile = Paths.get(folder, timestamp + ".zip") ZipArchiveUtil.zip(file, tempzipFile.toString) // 4. Set checksum val checksum = FileHelper.generateChecksum(tempzipFile.toString) // 5. Create resource metadata val meta = new ResourceMetadata(name, language, libVersion, sparkVersion, true, timestamp, true, category = category, checksum) val zipfile = Paths.get(meta.fileName) // 6. Move the zip Files.move(tempzipFile, zipfile, StandardCopyOption.REPLACE_EXISTING) // 7. Remove original file try { FileUtils.deleteDirectory(new File(file)) } catch { case _: java.io.IOException => //file lock may prevent deletion, ignore and continue } // 6. Add to metadata.json info about resource val metadataFile = Paths.get(folder, "metadata.json").toString ResourceMetadata.addMetadataToFile(metadataFile, meta) } }
Example 128
Source File: FileHelper.scala From spark-nlp with Apache License 2.0 | 5 votes |
package com.johnsnowlabs.util import java.io.{File, IOException} import java.nio.charset.Charset import java.nio.file.{Files, Paths} import java.security.MessageDigest import java.text.DecimalFormat import org.apache.commons.io.FileUtils object FileHelper { def writeLines(file: String, lines: Seq[String], encoding: String = "UTF-8"): Unit = { val writer = Files.newBufferedWriter(Paths.get(file), Charset.forName(encoding)) try { var cnt = 0 for (line <- lines) { writer.write(line) if (cnt > 0) writer.write(System.lineSeparator()) cnt += 1 } } catch { case ex: IOException => ex.printStackTrace() } finally if (writer != null) writer.close() } def delete(file: String, throwOnError: Boolean = false): Unit = { val f = new File(file) if (f.exists()) { try { if (f.isDirectory) FileUtils.deleteDirectory(f) else FileUtils.deleteQuietly(f) } catch { case e: Exception => if (throwOnError) throw e else FileUtils.forceDeleteOnExit(f) } } } def generateChecksum(path: String): String = { val arr = Files readAllBytes (Paths get path) val checksum = MessageDigest.getInstance("MD5") digest arr checksum.map("%02X" format _).mkString } def getHumanReadableFileSize(size: Long): String = { if (size <= 0) return "0" val units = Array[String]("B", "KB", "MB", "GB", "TB", "PB", "EB") val digitGroups = (Math.log10(size) / Math.log10(1024)).toInt new DecimalFormat("#,##0.#").format(size / Math.pow(1024, digitGroups)) + " " + units(digitGroups) } }
Example 129
Source File: Quickstart.scala From delta with Apache License 2.0 | 5 votes |
package example import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.{SparkSession, SQLContext} import io.delta.tables._ import org.apache.spark.sql.functions._ import org.apache.commons.io.FileUtils import java.io.File object Quickstart { def main(args: Array[String]): Unit = { val spark = SparkSession .builder() .appName("Quickstart") .master("local[*]") .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") .getOrCreate() val file = new File("/tmp/delta-table") if (file.exists()) FileUtils.deleteDirectory(file) // Create a table println("Creating a table") val path = file.getCanonicalPath var data = spark.range(0, 5) data.write.format("delta").save(path) // Read table println("Reading the table") val df = spark.read.format("delta").load(path) df.show() // Upsert (merge) new data println("Upsert new data") val newData = spark.range(0, 20).toDF val deltaTable = DeltaTable.forPath(path) deltaTable.as("oldData") .merge( newData.as("newData"), "oldData.id = newData.id") .whenMatched .update(Map("id" -> col("newData.id"))) .whenNotMatched .insert(Map("id" -> col("newData.id"))) .execute() deltaTable.toDF.show() // Update table data println("Overwrite the table") data = spark.range(5, 10) data.write.format("delta").mode("overwrite").save(path) deltaTable.toDF.show() // Update every even value by adding 100 to it println("Update to the table (add 100 to every even value)") deltaTable.update( condition = expr("id % 2 == 0"), set = Map("id" -> expr("id + 100"))) deltaTable.toDF.show() // Delete every even value deltaTable.delete(condition = expr("id % 2 == 0")) deltaTable.toDF.show() // Read old version of the data using time travel print("Read old data using time travel") val df2 = spark.read.format("delta").option("versionAsOf", 0).load(path) df2.show() // Cleanup FileUtils.deleteDirectory(file) spark.stop() } }
Example 130
Source File: QuickstartSQL.scala From delta with Apache License 2.0 | 5 votes |
package example import org.apache.spark.sql.SparkSession import io.delta.tables._ import org.apache.spark.sql.functions._ import org.apache.commons.io.FileUtils import java.io.File object QuickstartSQL { def main(args: Array[String]): Unit = { // Create Spark Conf val spark = SparkSession .builder() .appName("QuickstartSQL") .master("local[*]") .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") .getOrCreate() val tableName = "tblname" // Clear up old session spark.sql(s"DROP TABLE IF EXISTS $tableName") spark.sql(s"DROP TABLE IF EXISTS newData") try { // Create a table println("Creating a table") spark.sql(s"CREATE TABLE $tableName(id LONG) USING delta") spark.sql(s"INSERT INTO $tableName VALUES 0, 1, 2, 3, 4") // Read table println("Reading the table") spark.sql(s"SELECT * FROM $tableName").show() // Upsert (merge) new data println("Upsert new data") spark.sql("CREATE TABLE newData(id LONG) USING parquet") spark.sql("INSERT INTO newData VALUES 3, 4, 5, 6") spark.sql(s"""MERGE INTO $tableName USING newData ON ${tableName}.id = newData.id WHEN MATCHED THEN UPDATE SET ${tableName}.id = newData.id WHEN NOT MATCHED THEN INSERT * """) spark.sql(s"SELECT * FROM $tableName").show() // Update table data println("Overwrite the table") spark.sql(s"INSERT OVERWRITE $tableName VALUES 5, 6, 7, 8, 9") spark.sql(s"SELECT * FROM $tableName").show() // Update every even value by adding 100 to it println("Update to the table (add 100 to every even value)") spark.sql(s"UPDATE $tableName SET id = (id + 100) WHERE (id % 2 == 0)") spark.sql(s"SELECT * FROM $tableName").show() // Delete every even value spark.sql(s"DELETE FROM $tableName WHERE (id % 2 == 0)") spark.sql(s"SELECT * FROM $tableName").show() // Read old version of the data using time travel print("Read old data using time travel") val df2 = spark.read.format("delta").option("versionAsOf", 0).table(tableName) df2.show() } finally { // Cleanup spark.sql(s"DROP TABLE IF EXISTS $tableName") spark.sql(s"DROP TABLE IF EXISTS newData") spark.stop() } } }
Example 131
Source File: QuickstartSQLOnPaths.scala From delta with Apache License 2.0 | 5 votes |
package example import org.apache.spark.sql.SparkSession import io.delta.tables._ import org.apache.spark.sql.functions._ import org.apache.commons.io.FileUtils import java.io.File object QuickstartSQLOnPaths { def main(args: Array[String]): Unit = { // Create Spark Conf val spark = SparkSession .builder() .appName("QuickstartSQLOnPaths") .master("local[*]") .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") .getOrCreate() val tablePath = new File("/tmp/delta-table") if (tablePath.exists()) FileUtils.deleteDirectory(tablePath) // Clear up old session spark.sql(s"DROP TABLE IF EXISTS newData") try { // Create a table println("Creating a table") spark.sql(s"CREATE TABLE delta.`$tablePath`(id LONG) USING delta") spark.sql(s"INSERT INTO delta.`$tablePath` VALUES 0, 1, 2, 3, 4") // Read table println("Reading the table") spark.sql(s"SELECT * FROM delta.`$tablePath`").show() // Upsert (merge) new data println("Upsert new data") spark.sql("CREATE TABLE newData(id LONG) USING parquet") spark.sql("INSERT INTO newData VALUES 3, 4, 5, 6") spark.sql(s"""MERGE INTO delta.`$tablePath` data USING newData ON data.id = newData.id WHEN MATCHED THEN UPDATE SET data.id = newData.id WHEN NOT MATCHED THEN INSERT * """) spark.sql(s"SELECT * FROM delta.`$tablePath`").show() // Update table data println("Overwrite the table") spark.sql(s"INSERT OVERWRITE delta.`$tablePath` VALUES 5, 6, 7, 8, 9") spark.sql(s"SELECT * FROM delta.`$tablePath`").show() // Update every even value by adding 100 to it println("Update to the table (add 100 to every even value)") spark.sql(s"UPDATE delta.`$tablePath` SET id = (id + 100) WHERE (id % 2 == 0)") spark.sql(s"SELECT * FROM delta.`$tablePath`").show() // Delete every even value spark.sql(s"DELETE FROM delta.`$tablePath` WHERE (id % 2 == 0)") spark.sql(s"SELECT * FROM delta.`$tablePath`").show() } finally { // Cleanup spark.sql(s"DROP TABLE IF EXISTS newData") spark.stop() } } }
Example 132
Source File: Utilities.scala From delta with Apache License 2.0 | 5 votes |
package example import java.io.File import io.delta.tables.DeltaTable import org.apache.commons.io.FileUtils import org.apache.spark.sql.SparkSession object Utilities { def main(args: Array[String]): Unit = { // Create a Spark Session with SQL enabled val spark = SparkSession .builder() .appName("Utilities") .master("local[*]") .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") // control the parallelism for vacuum .config("spark.sql.sources.parallelPartitionDiscovery.parallelism", "4") .getOrCreate() // Create a table println("Create a parquet table") val data = spark.range(0, 5) val file = new File("/tmp/parquet-table") val path = file.getAbsolutePath data.write.format("parquet").save(path) // Convert to delta println("Convert to Delta") DeltaTable.convertToDelta(spark, s"parquet.`$path`") // Read table as delta var df = spark.read.format("delta").load(path) // Read old version of data using time travel df = spark.read.format("delta").option("versionAsOf", 0).load(path) df.show() val deltaTable = DeltaTable.forPath(path) // Utility commands println("Vacuum the table") deltaTable.vacuum() println("Describe History for the table") deltaTable.history().show() // Generate manifest println("Generate Manifest files") deltaTable.generate("SYMLINK_FORMAT_MANIFEST") // SQL utility commands println("SQL Vacuum") spark.sql(s"VACUUM '$path' RETAIN 169 HOURS") println("SQL Describe History") println(spark.sql(s"DESCRIBE HISTORY '$path'").collect()) // Cleanup FileUtils.deleteDirectory(new File(path)) spark.stop() } }
Example 133
Source File: CodeGen.scala From mmlspark with MIT License | 5 votes |
// Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. package com.microsoft.ml.spark.codegen import java.io.File import com.microsoft.ml.spark.codegen.Config._ import com.microsoft.ml.spark.core.env.FileUtilities._ import org.apache.commons.io.FileUtils import org.apache.commons.io.FilenameUtils._ object CodeGen { def generateArtifacts(): Unit = { println( s"""|Running code generation with config: | topDir: $TopDir | packageDir: $PackageDir | pySrcDir: $PySrcDir | pyTestDir: $PyTestDir | rsrcDir: $RSrcDir""".stripMargin) println("Creating temp folders") if (GeneratedDir.exists()) FileUtils.forceDelete(GeneratedDir) println("Generating python APIs") PySparkWrapperGenerator() println("Generating R APIs") SparklyRWrapperGenerator(Version) def toDir(f: File): File = new File(f, File.separator) //writeFile(new File(pySrcDir, "__init__.py"), packageHelp("")) FileUtils.copyDirectoryToDirectory(toDir(PySrcOverrideDir), toDir(PySrcDir)) FileUtils.copyDirectoryToDirectory(toDir(PyTestOverrideDir), toDir(PyTestDir)) makeInitFiles() // build init file // package python+r zip files // zipFolder(pyDir, pyZipFile) RPackageDir.mkdirs() zipFolder(RSrcDir, new File(RPackageDir, s"mmlspark-$Version.zip")) //FileUtils.forceDelete(rDir) // leave the python source files, so they will be included in the super-jar // FileUtils.forceDelete(pyDir) } private def makeInitFiles(packageFolder: String = ""): Unit = { val dir = new File(new File(PySrcDir,"mmlspark"), packageFolder) val packageString = if (packageFolder != "") packageFolder.replace("/",".") else "" val importStrings = dir.listFiles.filter(_.isFile).sorted .map(_.getName) .filter(name => name.endsWith(".py") && !name.startsWith("_") && !name.startsWith("test")) .map(name => s"from mmlspark$packageString.${getBaseName(name)} import *\n").mkString("") writeFile(new File(dir, "__init__.py"), packageHelp(importStrings)) dir.listFiles().filter(_.isDirectory).foreach(f => makeInitFiles(packageFolder +"/" + f.getName) ) } def main(args: Array[String]): Unit = { generateArtifacts() } }
Example 134
Source File: DownloaderSuite.scala From mmlspark with MIT License | 5 votes |
// Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. package com.microsoft.ml.spark.downloader import java.io.File import java.nio.file.Files import com.microsoft.ml.spark.core.test.base.TestBase import org.apache.commons.io.FileUtils import scala.collection.JavaConversions._ import scala.concurrent.duration.Duration import scala.util.Random class DownloaderSuite extends TestBase { lazy val saveDir = Files.createTempDirectory("Models-").toFile lazy val d = new ModelDownloader(session, saveDir.toURI) test("retry utility should catch flakiness"){ (1 to 20).foreach { i => val result = FaultToleranceUtils.retryWithTimeout(20, Duration.apply(2, "seconds")) { val r = Random.nextDouble() if (r > .5) { println(s"$r failed") throw new IllegalArgumentException("Flakiness") } else if (r < .1){ //Getting stuck val m = 3* 1e3.toLong println(s"$r Stuck for $m") Thread.sleep(m) } println(s"$r Success") 5 } assert(result === 5) } } test("A downloader should be able to download a model", TestBase.Extended) { val m = d.remoteModels.filter(_.name == "CNN").next() val schema = d.downloadModel(m) println(schema) assert(m.size == new File(schema.uri).length()) assert(d.localModels.toList.length == 1) } ignore("A downloader should be able to get all Models " + "and maybeDownload should be fast if models are downloaded", TestBase.Extended) { val (modTimes, modTimes2) = FaultToleranceUtils.retryWithTimeout(10, Duration.apply(500, "seconds")) { d.downloadModels() val modTimes = d.localModels.map(s => new File(s.uri).lastModified()) d.downloadModels() val modTimes2 = d.localModels.map(s => new File(s.uri).lastModified()) (modTimes, modTimes2) } // No modification on second call because models are cached assert(modTimes.toList === modTimes2.toList) // the downloader's local models will reflect the change assert(d.localModels.toList.length == d.remoteModels.toList.length) // there will be a metadata file for every model assert(saveDir.list().count(_.endsWith(".meta")) == d.localModels.toList.length) } override def afterAll(): Unit = { if (saveDir.exists()) { FileUtils.forceDelete(saveDir) } super.afterAll() } }
Example 135
Source File: S3ObjectUploader.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.tools.neptune.export import java.io._ import java.util import java.util.concurrent.{Executors, TimeoutException} import java.util.stream.Collectors import java.util.{Collections, Vector} import com.amazonaws.auth.profile.ProfileCredentialsProvider import com.amazonaws.services.s3.AmazonS3ClientBuilder import com.amazonaws.services.s3.model.{ObjectMetadata, PutObjectRequest} import com.amazonaws.{AmazonServiceException, ClientConfiguration, Protocol, SdkClientException} import org.apache.commons.io.{FileUtils, IOUtils} import org.slf4j.LoggerFactory import scala.concurrent.{Await, ExecutionContext, Future} import scala.concurrent.duration.{FiniteDuration, _} object S3ObjectUploader{ val executor = Executors.newFixedThreadPool(1) implicit val ec: ExecutionContext = scala.concurrent.ExecutionContext.fromExecutor(executor) protected lazy val logger = LoggerFactory.getLogger("s3_uploader") def init(proxyHost:Option[String], proxyPort:Option[Int]) = { val clientRegion = "us-east-1" val config = new ClientConfiguration config.setProtocol(Protocol.HTTPS) proxyHost.foreach(host => config.setProxyHost(host)) proxyPort.foreach(port => config.setProxyPort(port)) val s3Client = AmazonS3ClientBuilder.standard() .withRegion(clientRegion) .withClientConfiguration(config) .withCredentials(new ProfileCredentialsProvider()) .build() s3Client } def persistChunkToS3Bucket(chunkData:String, fileName:String, proxyHost:Option[String], proxyPort:Option[Int], s3Directory:String) = { try{ init(proxyHost, proxyPort).putObject(s3Directory, fileName, chunkData) } catch { case e: AmazonServiceException => e.printStackTrace() throw e case e: SdkClientException => e.printStackTrace() throw e } } def persistChunkToS3Bucket(tmpFile:File, proxyHost:Option[String], proxyPort:Option[Int], s3Directory:String, retryCount:Int = 3):Unit = { try{ val s3UploadTask = Future{init(proxyHost, proxyPort).putObject(s3Directory, tmpFile.getName, tmpFile)}(ec) Await.result(s3UploadTask, 5.minutes) tmpFile.delete() } catch { case e:TimeoutException => if(retryCount > 0) { logger.error("S3 upload task run more than 5 minutes..Going to retry") persistChunkToS3Bucket(tmpFile, proxyHost, proxyPort, s3Directory, retryCount-1) } else{ throw new Exception( "S3 upload task duration was more than 5 minutes") } case e: AmazonServiceException => e.printStackTrace() throw e case e: SdkClientException => e.printStackTrace() throw e } } }
Example 136
Source File: AnalyzeInconsistenciesResult.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.analytics.main import java.io.File import java.nio.charset.StandardCharsets.UTF_8 import cmwell.analytics.data.InfotonAndIndexWithSystemFields import cmwell.analytics.util.Connector import org.apache.commons.io.FileUtils import org.apache.log4j.LogManager import org.apache.spark.sql.{Column, DataFrame, Row} import org.rogach.scallop.{ScallopConf, ScallopOption} import scala.collection.breakOut object AnalyzeInconsistenciesResult { def main(args: Array[String]): Unit = { val logger = LogManager.getLogger(AnalyzeInconsistenciesResult.getClass) try { object Opts extends ScallopConf(args) { val in: ScallopOption[String] = opt[String]("in", short = 'i', descr = "The path to read the (parquet) inconsistencies dataset from", required = true) val out: ScallopOption[String] = opt[String]("out", short = 'o', descr = "The path to save the (csv) output to", required = true) val shell: ScallopOption[Boolean] = opt[Boolean]("spark-shell", short = 's', descr = "Run a Spark shell", required = false, default = Some(false)) verify() } Connector( appName = "Analyze InfotonAndIndexWithSystemFields Output", sparkShell = Opts.shell() ).withSparkSessionDo { spark => val ds: DataFrame = spark.read.parquet(Opts.in()) import org.apache.spark.sql.functions._ // A column expression that counts the number of failures for each constraint. // This will also include null counts, needed to interpret the results. val constraints: Seq[(String, Column)] = InfotonAndIndexWithSystemFields.constraints(ds).map { case (name, predicate) => name -> sum(when(predicate, 0L).otherwise(1L)).as(name) }(breakOut) // Compute the failure counts val failureCounts: Row = ds.agg(constraints.head._2, constraints.tail.map(_._2): _*).head val results = for { i <- constraints.indices constraintName = constraints(i)._1 failureCount = if (failureCounts.isNullAt(i)) 0 else failureCounts.getAs[Long](i) } yield s"$constraintName,$failureCount" FileUtils.write(new File(Opts.out()), "constraint,failures\n" + results.mkString("\n"), UTF_8) } } catch { case ex: Throwable => logger.error(ex.getMessage, ex) System.exit(1) } } }
Example 137
Source File: ExtractFromParquet.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.analytics.data import java.io.File import java.nio.charset.StandardCharsets.UTF_8 import cmwell.analytics.util.Connector import cmwell.analytics.util.StringUtil._ import org.apache.commons.io.FileUtils import org.apache.spark.sql.{DataFrame, Row, SparkSession} import org.rogach.scallop.{ScallopConf, ScallopOption} object ExtractFromParquet { def main(args: Array[String]): Unit = { object Opts extends ScallopConf(args) { val pathsToFind: ScallopOption[String] = opt[String]("paths-to-find", short = 'f', descr = "A file containing the list of paths to look for", required = true) val parquetData: ScallopOption[String] = opt[String]("parquet-file", short = 'p', descr = "A Parquet file containing the data; single string column rdfStatement", required = true) val extractedData: ScallopOption[String] = opt[String]("extracted-data", short = 'd', descr = "The file that extracted data will be written to (in nquads format)", required = true) val pathsNotFound: ScallopOption[String] = opt[String]("paths-not-found", short = 'n', descr = "The output file that any paths that were not found are written to", required = true) val pathsFound: ScallopOption[String] = opt[String]("paths-found", short = 'a', descr = "The output file containing the paths that we found are written to", required = true) verify() } Connector(sparkShell = true, appName = "Extract from parquet").withSparkSessionDo { spark: SparkSession => val pathsToFind = Set(splitLines(FileUtils.readFileToString(new File(Opts.pathsToFind()), UTF_8)): _*) val ds: DataFrame = spark.read.parquet(Opts.parquetData()) // Cheesy parsing of path from an RDF nquad, but sufficient for this purpose def extractPath(rdfStatement: String): String = rdfStatement.substring(7, rdfStatement.indexOf(">")) val statementsFound = ds.rdd.filter { row: Row => val statement = row.getAs[String]("rdfStatement") val path = extractPath(statement) pathsToFind.contains(path) }.collect() // expect the result to be small, so collect is OK // Save all the paths that were not found to file - look for them in other files. val pathsFound: Set[String] = Set(statementsFound.map(row => extractPath(row.getString(0))): _*) println(s"There were ${pathsFound.size} paths found (out of ${pathsToFind.size}).") FileUtils.writeStringToFile(new File(Opts.pathsFound()), pathsFound.mkString("\n"), UTF_8, false) val pathsNotFound = pathsToFind.diff(pathsFound) println(s"There were ${pathsNotFound.size} paths not found.") FileUtils.writeStringToFile(new File(Opts.pathsNotFound()), pathsNotFound.mkString("\n"), UTF_8, false) // Save the RDF statements for the paths that were found val x = statementsFound.map(row => row.getString(0)).mkString("\n") FileUtils.writeStringToFile(new File(Opts.extractedData()), x, UTF_8, false) } } }
Example 138
Source File: DumpCompleteDocumentFromEs.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.analytics.main import java.nio.file.Paths import akka.actor.ActorSystem import akka.stream.ActorMaterializer import cmwell.analytics.data.{DataWriterFactory, IndexWithCompleteDocument} import cmwell.analytics.downloader.PartitionedDownloader import cmwell.analytics.util.TimestampConversion.timestampConverter import cmwell.analytics.util.{DiscoverEsTopology, FindContactPoints} import org.apache.commons.io.FileUtils import org.apache.log4j.LogManager import org.rogach.scallop.{ScallopConf, ScallopOption} import scala.concurrent.ExecutionContextExecutor object DumpCompleteDocumentFromEs { def main(args: Array[String]): Unit = { val logger = LogManager.getLogger(DumpCompleteDocumentFromEs.getClass) // Since we expect this to be run on a CM-Well node, the default parallelism is to use half the processors // so as to avoid starving the CM-Well node from processor resources. A higher level of parallelism might // be possible (without interfering with CM-Well) since most of the work will actually be on the ES side. val defaultParallelism = 1 max (Runtime.getRuntime.availableProcessors / 2) implicit val system: ActorSystem = ActorSystem("dump-complete-document-from-es") implicit val executionContext: ExecutionContextExecutor = system.dispatcher implicit val actorMaterializer: ActorMaterializer = ActorMaterializer() try { object Opts extends ScallopConf(args) { val readIndex: ScallopOption[String] = opt[String]("read-index", short = 'i', descr = "The name of the index to read from (default: cm_well_all)", required = false) val parallelism: ScallopOption[Int] = opt[Int]("parallelism", short = 'p', descr = "The parallelism level", default = Some(defaultParallelism)) val currentOnly: ScallopOption[Boolean] = opt[Boolean]("current-filter", short = 'c', descr = "Filter on current status", default = None) val lastModifiedGteFilter: ScallopOption[java.sql.Timestamp] = opt[java.sql.Timestamp]("lastmodified-gte-filter", descr = "Filter on lastModified >= <value>, where value is an ISO8601 timestamp", default = None)(timestampConverter) val pathPrefixFilter: ScallopOption[String] = opt[String]("path-prefix-filter", descr = "Filter on the path prefix matching <value>", default = None) val out: ScallopOption[String] = opt[String]("out", short = 'o', descr = "The path to save the output to", required = true) val format: ScallopOption[String] = opt[String]("format", short = 'f', descr = "The data format: either 'parquet' or 'csv'", default = Some("parquet")) val url: ScallopOption[String] = trailArg[String]("url", descr = "A CM-Well URL", required = true) verify() } val esContactPoint = FindContactPoints.es(Opts.url()) val indexesOrAliasesToRead = Opts.readIndex.toOption.fold(Seq("cm_well_all"))(Seq(_)) val esTopology = DiscoverEsTopology(esContactPoint = esContactPoint, aliases = indexesOrAliasesToRead) // Calling script should clear output directory as necessary. val objectExtractor = IndexWithCompleteDocument val dataWriterFactory = DataWriterFactory.file(format = Opts.format(), objectExtractor, outDirectory = Opts.out()) PartitionedDownloader.runDownload( esTopology = esTopology, parallelism = Opts.parallelism(), currentOnly = Opts.currentOnly(), lastModifiedGteFilter = Opts.lastModifiedGteFilter.toOption, pathPrefixFilter = Opts.pathPrefixFilter.toOption, objectExtractor = objectExtractor, dataWriterFactory = dataWriterFactory, sourceFilter = false) // The Hadoop convention is to touch the (empty) _SUCCESS file to signal successful completion. FileUtils.touch(Paths.get(Opts.out(), "_SUCCESS").toFile) } catch { case ex: Throwable => logger.error(ex.getMessage, ex) System.exit(1) } finally { system.terminate() } } }
Example 139
Source File: DumpKeyFieldsFromEs.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.analytics.main import java.nio.file.Paths import akka.actor.ActorSystem import akka.stream.ActorMaterializer import cmwell.analytics.data.{DataWriterFactory, IndexWithKeyFields} import cmwell.analytics.downloader.PartitionedDownloader import cmwell.analytics.util.TimestampConversion.timestampConverter import cmwell.analytics.util.{DiscoverEsTopology, FindContactPoints} import org.apache.commons.io.FileUtils import org.apache.log4j.LogManager import org.rogach.scallop.{ScallopConf, ScallopOption} import scala.concurrent.ExecutionContextExecutor object DumpKeyFieldsFromEs { def main(args: Array[String]): Unit = { val logger = LogManager.getLogger(DumpKeyFieldsFromEs.getClass) implicit val system: ActorSystem = ActorSystem("dump-key-fields-from-es") implicit val executionContext: ExecutionContextExecutor = system.dispatcher implicit val actorMaterializer: ActorMaterializer = ActorMaterializer() try { // Since we expect this to be run on a CM-Well node, the default parallelism is to use half the processors // so as to avoid starving the CM-Well node from processor resources. A higher level of parallelism might // be possible (without interfering with CM-Well) since most of the work will actually be on the ES side. val defaultParallelism = 1 max (Runtime.getRuntime.availableProcessors / 2) object Opts extends ScallopConf(args) { val readIndex: ScallopOption[String] = opt[String]("read-index", short = 'i', descr = "The name of the index to read from (default: cm_well_all)", required = false) val parallelism: ScallopOption[Int] = opt[Int]("parallelism", short = 'p', descr = "The parallelism level", default = Some(defaultParallelism)) val currentOnly: ScallopOption[Boolean] = opt[Boolean]("current-only", short = 'c', descr = "Only download current uuids") val lastModifiedGteFilter: ScallopOption[java.sql.Timestamp] = opt[java.sql.Timestamp]("lastmodified-gte-filter", descr = "Filter on lastModified >= <value>, where value is an ISO8601 timestamp", default = None)(timestampConverter) val pathPrefixFilter: ScallopOption[String] = opt[String]("path-prefix-filter", descr = "Filter on the path prefix matching <value>", default = None) val format: ScallopOption[String] = opt[String]("format", short = 'f', descr = "The data format: either 'parquet' or 'csv'", default = Some("parquet")) val out: ScallopOption[String] = opt[String]("out", short = 'o', descr = "The path to save the output to", required = true) val url: ScallopOption[String] = trailArg[String]("url", descr = "A CM-Well URL", required = true) val sourceFilter: ScallopOption[Boolean] = toggle("source-filter", noshort = true, default = Some(true), prefix = "no-", descrNo = "Do not filter _source fields (workaround for bad index)", descrYes = "Use source filtering to reduce network traffic") verify() } val esContactPoint = FindContactPoints.es(Opts.url()) val indexesOrAliasesToRead = Opts.readIndex.toOption.fold(Seq("cm_well_all"))(Seq(_)) val esTopology = DiscoverEsTopology(esContactPoint = esContactPoint, aliases = indexesOrAliasesToRead) // Calling script should clear output directory as necessary. val objectExtractor = IndexWithKeyFields val dataWriterFactory = DataWriterFactory.file(format = Opts.format(), objectExtractor, outDirectory = Opts.out()) PartitionedDownloader.runDownload( esTopology = esTopology, parallelism = Opts.parallelism(), currentOnly = Opts.currentOnly(), lastModifiedGteFilter = Opts.lastModifiedGteFilter.toOption, pathPrefixFilter = Opts.pathPrefixFilter.toOption, objectExtractor = objectExtractor, dataWriterFactory = dataWriterFactory, sourceFilter = Opts.sourceFilter()) // The Hadoop convention is to touch the (empty) _SUCCESS file to signal successful completion. FileUtils.touch(Paths.get(Opts.out(), "_SUCCESS").toFile) } catch { case ex: Throwable => logger.error(ex.getMessage, ex) System.exit(1) } finally { system.terminate() } } }
Example 140
Source File: DumpUuidOnlyFromEs.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.analytics.main import java.nio.file.Paths import akka.actor.ActorSystem import akka.stream.ActorMaterializer import cmwell.analytics.data.{DataWriterFactory, IndexWithUuidOnly} import cmwell.analytics.downloader.PartitionedDownloader import cmwell.analytics.util.TimestampConversion.timestampConverter import cmwell.analytics.util.{DiscoverEsTopology, FindContactPoints} import org.apache.commons.io.FileUtils import org.apache.log4j.LogManager import org.rogach.scallop.{ScallopConf, ScallopOption} import scala.concurrent.ExecutionContextExecutor object DumpUuidOnlyFromEs { def main(args: Array[String]): Unit = { val logger = LogManager.getLogger(DumpUuidOnlyFromEs.getClass) // Since we expect this to be run on a CM-Well node, the default parallelism is to use half the processors // so as to avoid starving the CM-Well node from processor resources. A higher level of parallelism might // be possible (without interfering with CM-Well) since most of the work will actually be on the ES side. val defaultParallelism = 1 max (Runtime.getRuntime.availableProcessors / 2) implicit val system: ActorSystem = ActorSystem("dump-uuid-only-from-es") implicit val executionContext: ExecutionContextExecutor = system.dispatcher implicit val actorMaterializer: ActorMaterializer = ActorMaterializer() try { object Opts extends ScallopConf(args) { val readIndex: ScallopOption[String] = opt[String]("read-index", short = 'i', descr = "The name of the index to read from (default: cm_well_all)", required = false) val parallelism: ScallopOption[Int] = opt[Int]("parallelism", short = 'p', descr = "The parallelism level", default = Some(defaultParallelism)) val currentOnly: ScallopOption[Boolean] = opt[Boolean]("current-only", short = 'c', descr = "Only download current uuids") val lastModifiedGteFilter: ScallopOption[java.sql.Timestamp] = opt[java.sql.Timestamp]("lastmodified-gte-filter", descr = "Filter on lastModified >= <value>, where value is an ISO8601 timestamp", default = None)(timestampConverter) val pathPrefixFilter: ScallopOption[String] = opt[String]("path-prefix-filter", descr = "Filter on the path prefix matching <value>", default = None) val out: ScallopOption[String] = opt[String]("out", short = 'o', descr = "The path to save the output to", required = true) val format: ScallopOption[String] = opt[String]("format", short = 'f', descr = "The data format: either 'parquet' or 'csv'", default = Some("parquet")) val url: ScallopOption[String] = trailArg[String]("url", descr = "A CM-Well URL", required = true) val sourceFilter: ScallopOption[Boolean] = toggle("source-filter", noshort = true, default=Some(true), prefix = "no-", descrNo = "Do not filter _source fields (workaround for bad index)", descrYes = "Use source filtering to reduce network traffic") verify() } val esContactPoint = FindContactPoints.es(Opts.url()) val indexesOrAliasesToRead = Opts.readIndex.toOption.fold(Seq("cm_well_all"))(Seq(_)) val esTopology = DiscoverEsTopology(esContactPoint = esContactPoint, aliases = indexesOrAliasesToRead) // Calling script should clear output directory as necessary. val objectExtractor = IndexWithUuidOnly val dataWriterFactory = DataWriterFactory.file(format = Opts.format(), objectExtractor, outDirectory = Opts.out()) PartitionedDownloader.runDownload( esTopology = esTopology, parallelism = Opts.parallelism(), currentOnly = Opts.currentOnly(), lastModifiedGteFilter = Opts.lastModifiedGteFilter.toOption, pathPrefixFilter = Opts.pathPrefixFilter.toOption, objectExtractor = objectExtractor, dataWriterFactory = dataWriterFactory, sourceFilter = Opts.sourceFilter()) // The Hadoop convention is to touch the (empty) _SUCCESS file to signal successful completion. FileUtils.touch(Paths.get(Opts.out(), "_SUCCESS").toFile) } catch { case ex: Throwable => logger.error(ex.getMessage, ex) System.exit(1) } finally { system.terminate() } } }
Example 141
Source File: DumpSystemFieldsFromEs.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.analytics.main import java.nio.file.Paths import akka.actor.ActorSystem import akka.stream.ActorMaterializer import cmwell.analytics.data.{DataWriterFactory, IndexWithSystemFields} import cmwell.analytics.downloader.PartitionedDownloader import cmwell.analytics.util.TimestampConversion.timestampConverter import cmwell.analytics.util.{DiscoverEsTopology, FindContactPoints} import org.apache.commons.io.FileUtils import org.apache.log4j.LogManager import org.rogach.scallop.{ScallopConf, ScallopOption} import scala.concurrent.ExecutionContextExecutor object DumpSystemFieldsFromEs { def main(args: Array[String]): Unit = { val logger = LogManager.getLogger(DumpSystemFieldsFromEs.getClass) implicit val system: ActorSystem = ActorSystem("dump-system-fields-from-es") implicit val executionContext: ExecutionContextExecutor = system.dispatcher implicit val actorMaterializer: ActorMaterializer = ActorMaterializer() try { // Since we expect this to be run on a CM-Well node, the default parallelism is to use half the processors // so as to avoid starving the CM-Well node from processor resources. A higher level of parallelism might // be possible (without interfering with CM-Well) since most of the work will actually be on the ES side. val defaultParallelism = 1 max (Runtime.getRuntime.availableProcessors / 2) object Opts extends ScallopConf(args) { val readIndex: ScallopOption[String] = opt[String]("read-index", short = 'i', descr = "The name of the index to read from (default: cm_well_all)", required = false) val parallelism: ScallopOption[Int] = opt[Int]("parallelism", short = 'p', descr = "The parallelism level", default = Some(defaultParallelism)) val currentOnly: ScallopOption[Boolean] = opt[Boolean]("current-only", short = 'c', descr = "Only download current uuids") val lastModifiedGteFilter: ScallopOption[java.sql.Timestamp] = opt[java.sql.Timestamp]("lastmodified-gte-filter", descr = "Filter on lastModified >= <value>, where value is an ISO8601 timestamp", default = None)(timestampConverter) val pathPrefixFilter: ScallopOption[String] = opt[String]("path-prefix-filter", descr = "Filter on the path prefix matching <value>", default = None) val format: ScallopOption[String] = opt[String]("format", short = 'f', descr = "The data format: either 'parquet' or 'csv'", default = Some("parquet")) val out: ScallopOption[String] = opt[String]("out", short = 'o', descr = "The path to save the output to", required = true) val url: ScallopOption[String] = trailArg[String]("url", descr = "A CM-Well URL", required = true) val sourceFilter: ScallopOption[Boolean] = toggle("source-filter", noshort = true, default=Some(true), prefix = "no-", descrNo = "Do not filter _source fields (workaround for bad index)", descrYes = "Use source filtering to reduce network traffic") verify() } val esContactPoint = FindContactPoints.es(Opts.url()) val indexesOrAliasesToRead = Opts.readIndex.toOption.fold(Seq("cm_well_all"))(Seq(_)) val esTopology = DiscoverEsTopology(esContactPoint = esContactPoint, aliases = indexesOrAliasesToRead) // Calling script should clear output directory as necessary. val objectExtractor = IndexWithSystemFields val dataWriterFactory = DataWriterFactory.file(format = Opts.format(), objectExtractor, outDirectory = Opts.out()) PartitionedDownloader.runDownload( esTopology = esTopology, parallelism = Opts.parallelism(), currentOnly = Opts.currentOnly(), lastModifiedGteFilter = Opts.lastModifiedGteFilter.toOption, pathPrefixFilter = Opts.pathPrefixFilter.toOption, objectExtractor = objectExtractor, dataWriterFactory = dataWriterFactory, sourceFilter = Opts.sourceFilter()) // The Hadoop convention is to touch the (empty) _SUCCESS file to signal successful completion. FileUtils.touch(Paths.get(Opts.out(), "_SUCCESS").toFile) } catch { case ex: Throwable => logger.error(ex.getMessage, ex) System.exit(1) } finally { system.terminate() } } }
Example 142
Source File: DataWriterFactory.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.analytics.data import java.io.File import java.nio.file.Paths import akka.actor.ActorSystem import akka.stream.ActorMaterializer import cmwell.analytics.util.Shard import org.apache.avro.generic.GenericRecord import org.apache.commons.io.FileUtils import org.apache.parquet.hadoop.metadata.CompressionCodecName import scala.concurrent.ExecutionContextExecutor trait DataWriterFactory[T <: GenericRecord] { def apply(shard: Shard): DataWriter[T] } object DataWriterFactory { private val compressionCodec = CompressionCodecName.SNAPPY def file[T <: GenericRecord with CsvGenerator](format: String, objectExtractor: ObjectExtractor[T], outDirectory: String): Shard => DataWriter[T] = { val extension = s".$format" + (if (format == "parquet") s"${compressionCodec.getExtension}" else "") // Generate a meaningful file name for the target file name based on the source shard index name and shard number. (sourceShard: Shard) => { val outFile: File = Paths.get(outDirectory, s"part-r-${sourceShard.indexName}.${sourceShard.shard}$extension").toFile if (outFile.exists) FileUtils.forceDelete(outFile) new File(outFile.getParent).mkdirs() FileDataWriter[T](format, objectExtractor.schema, outFile.toString, compressionCodec) } } def index[T <: GenericRecord](indexMap: Map[String, String], // source-index -> target-index esEndpoint: String) (implicit system: ActorSystem, executionContext: ExecutionContextExecutor, actorMaterializer: ActorMaterializer ): Shard => DataWriter[T] = { (sourceShard: Shard) => { val targetIndex = indexMap(sourceShard.indexName) new IndexDataWriter[T](indexName = targetIndex, esEndpoint = esEndpoint) } } }
Example 143
Source File: MetadataTest.scala From spark-pagerank with MIT License | 5 votes |
package com.soundcloud.spark.pagerank import java.io.File import org.apache.commons.io.FileUtils import org.scalatest.{ BeforeAndAfter, FunSuite, Matchers } class MetadataTest extends FunSuite with BeforeAndAfter with Matchers with SparkTesting { val path = "target/test/MetadataTest" val metadata = Metadata(numVertices=1) before { FileUtils.deleteDirectory(new File(path)) } test("save and load") { Metadata.save(spark, metadata, path) Metadata.load(spark, path) shouldBe (metadata) } }
Example 144
Source File: GraphBuilderAppTest.scala From spark-pagerank with MIT License | 5 votes |
package com.soundcloud.spark.pagerank import java.io.File import org.apache.commons.io.FileUtils import org.scalatest.{ BeforeAndAfter, FunSuite, Matchers } class GraphBuilderAppTest extends FunSuite with BeforeAndAfter with Matchers with GraphTesting with SparkTesting { val path = "target/test/GraphBuilderAppTest" before { FileUtils.deleteDirectory(new File(path)) } // TODO(jd): design a better integration test as this just runs the app without assertions test("integration test") { val options = new GraphBuilderApp.Options() options.output = path options.numPartitions = 1 val input = spark.sparkContext.parallelize(Seq( (1, 5, 1.0), (2, 1, 1.0), (3, 1, 1.0), (4, 2, 1.0), (4, 3, 1.0), (5, 3, 1.0), (5, 4, 1.0) ).map(_.productIterator.toSeq.mkString("\t"))) GraphBuilderApp.runFromInputs(options, spark, input) } }
Example 145
Source File: PageRankAppTest.scala From spark-pagerank with MIT License | 5 votes |
package com.soundcloud.spark.pagerank import java.io.File import org.apache.commons.io.FileUtils import org.apache.spark.storage.StorageLevel import org.scalatest.{ BeforeAndAfter, Matchers, FunSuite } class PageRankAppTest extends FunSuite with BeforeAndAfter with Matchers with GraphTesting with SparkTesting { val path = "target/test/PageRankAppTest" before { FileUtils.deleteDirectory(new File(path)) } // TODO(jd): design a better integration test as this just runs the app without assertions test("integration test") { val options = new PageRankApp.Options() options.output = path val numVertices = 5 val prior = 1.0 / numVertices val stats = Seq(s"numVertices,$numVertices") val edges = spark.sparkContext.parallelize(Seq[OutEdgePair]( // node 1 is dangling (2, OutEdge(1, 1.0)), (3, OutEdge(1, 1.0)), (4, OutEdge(2, 0.5)), (4, OutEdge(3, 0.5)), (5, OutEdge(3, 0.5)), (5, OutEdge(4, 0.5)) )) val vertices = spark.sparkContext.parallelize(Seq[RichVertexPair]( (1, VertexMetadata(prior, true)), (2, VertexMetadata(prior, false)), (3, VertexMetadata(prior, false)), (4, VertexMetadata(prior, false)), (5, VertexMetadata(prior, false)) )) val graph = PageRankGraph( numVertices, edges.persist(StorageLevel.MEMORY_ONLY), vertices.persist(StorageLevel.MEMORY_ONLY) ) PageRankApp.runFromInputs( spark, options, graph, priorsOpt = None ) } }
Example 146
Source File: YarnShuffleIntegrationSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn import java.io.File import java.nio.charset.StandardCharsets import com.google.common.io.Files import org.apache.commons.io.FileUtils import org.apache.hadoop.yarn.conf.YarnConfiguration import org.scalatest.Matchers import org.apache.spark._ import org.apache.spark.internal.Logging import org.apache.spark.network.shuffle.ShuffleTestAccessor import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor} import org.apache.spark.tags.ExtendedYarnTest @ExtendedYarnTest class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite { override def newYarnConfig(): YarnConfiguration = { val yarnConfig = new YarnConfiguration() yarnConfig.set(YarnConfiguration.NM_AUX_SERVICES, "spark_shuffle") yarnConfig.set(YarnConfiguration.NM_AUX_SERVICE_FMT.format("spark_shuffle"), classOf[YarnShuffleService].getCanonicalName) yarnConfig.set("spark.shuffle.service.port", "0") yarnConfig } test("external shuffle service") { val shuffleServicePort = YarnTestAccessor.getShuffleServicePort val shuffleService = YarnTestAccessor.getShuffleServiceInstance val registeredExecFile = YarnTestAccessor.getRegisteredExecutorFile(shuffleService) logInfo("Shuffle service port = " + shuffleServicePort) val result = File.createTempFile("result", null, tempDir) val finalState = runSpark( false, mainClassName(YarnExternalShuffleDriver.getClass), appArgs = Seq(result.getAbsolutePath(), registeredExecFile.getAbsolutePath), extraConf = Map( "spark.shuffle.service.enabled" -> "true", "spark.shuffle.service.port" -> shuffleServicePort.toString ) ) checkResult(finalState, result) assert(YarnTestAccessor.getRegisteredExecutorFile(shuffleService).exists()) } } private object YarnExternalShuffleDriver extends Logging with Matchers { val WAIT_TIMEOUT_MILLIS = 10000 def main(args: Array[String]): Unit = { if (args.length != 2) { // scalastyle:off println System.err.println( s""" |Invalid command line: ${args.mkString(" ")} | |Usage: ExternalShuffleDriver [result file] [registered exec file] """.stripMargin) // scalastyle:on println System.exit(1) } val sc = new SparkContext(new SparkConf() .setAppName("External Shuffle Test")) val conf = sc.getConf val status = new File(args(0)) val registeredExecFile = new File(args(1)) logInfo("shuffle service executor file = " + registeredExecFile) var result = "failure" val execStateCopy = new File(registeredExecFile.getAbsolutePath + "_dup") try { val data = sc.parallelize(0 until 100, 10).map { x => (x % 10) -> x }.reduceByKey{ _ + _ }. collect().toSet sc.listenerBus.waitUntilEmpty(WAIT_TIMEOUT_MILLIS) data should be ((0 until 10).map{x => x -> (x * 10 + 450)}.toSet) result = "success" // only one process can open a leveldb file at a time, so we copy the files FileUtils.copyDirectory(registeredExecFile, execStateCopy) assert(!ShuffleTestAccessor.reloadRegisteredExecutors(execStateCopy).isEmpty) } finally { sc.stop() FileUtils.deleteDirectory(execStateCopy) Files.write(result, status, StandardCharsets.UTF_8) } } }
Example 147
Source File: SortShuffleSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark import java.io.File import scala.collection.JavaConverters._ import org.apache.commons.io.FileUtils import org.apache.commons.io.filefilter.TrueFileFilter import org.scalatest.BeforeAndAfterAll import org.apache.spark.rdd.ShuffledRDD import org.apache.spark.serializer.{JavaSerializer, KryoSerializer} import org.apache.spark.shuffle.sort.SortShuffleManager import org.apache.spark.util.Utils class SortShuffleSuite extends ShuffleSuite with BeforeAndAfterAll { // This test suite should run all tests in ShuffleSuite with sort-based shuffle. private var tempDir: File = _ override def beforeAll() { super.beforeAll() conf.set("spark.shuffle.manager", "sort") } override def beforeEach(): Unit = { super.beforeEach() tempDir = Utils.createTempDir() conf.set("spark.local.dir", tempDir.getAbsolutePath) } override def afterEach(): Unit = { try { Utils.deleteRecursively(tempDir) } finally { super.afterEach() } } test("SortShuffleManager properly cleans up files for shuffles that use the serialized path") { sc = new SparkContext("local", "test", conf) // Create a shuffled RDD and verify that it actually uses the new serialized map output path val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x)) val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4)) .setSerializer(new KryoSerializer(conf)) val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]] assert(SortShuffleManager.canUseSerializedShuffle(shuffleDep)) ensureFilesAreCleanedUp(shuffledRdd) } test("SortShuffleManager properly cleans up files for shuffles that use the deserialized path") { sc = new SparkContext("local", "test", conf) // Create a shuffled RDD and verify that it actually uses the old deserialized map output path val rdd = sc.parallelize(1 to 10, 1).map(x => (x, x)) val shuffledRdd = new ShuffledRDD[Int, Int, Int](rdd, new HashPartitioner(4)) .setSerializer(new JavaSerializer(conf)) val shuffleDep = shuffledRdd.dependencies.head.asInstanceOf[ShuffleDependency[_, _, _]] assert(!SortShuffleManager.canUseSerializedShuffle(shuffleDep)) ensureFilesAreCleanedUp(shuffledRdd) } private def ensureFilesAreCleanedUp(shuffledRdd: ShuffledRDD[_, _, _]): Unit = { def getAllFiles: Set[File] = FileUtils.listFiles(tempDir, TrueFileFilter.INSTANCE, TrueFileFilter.INSTANCE).asScala.toSet val filesBeforeShuffle = getAllFiles // Force the shuffle to be performed shuffledRdd.count() // Ensure that the shuffle actually created files that will need to be cleaned up val filesCreatedByShuffle = getAllFiles -- filesBeforeShuffle filesCreatedByShuffle.map(_.getName) should be Set("shuffle_0_0_0.data", "shuffle_0_0_0.index") // Check that the cleanup actually removes the files sc.env.blockManager.master.removeShuffle(0, blocking = true) for (file <- filesCreatedByShuffle) { assert (!file.exists(), s"Shuffle file $file was not cleaned up") } } }
Example 148
Source File: TransformerSerialization.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperables.spark.wrappers.transformers import java.nio.file.{Files, Path} import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfter, Suite} import ai.deepsense.deeplang.doperables.Transformer import ai.deepsense.deeplang.doperables.dataframe.DataFrame import ai.deepsense.deeplang.{DeeplangIntegTestSupport, ExecutionContext} trait TransformerSerialization extends Suite with BeforeAndAfter { var tempDir: Path = _ before { tempDir = Files.createTempDirectory("writeReadTransformer") } after { FileUtils.deleteDirectory(tempDir.toFile) } } object TransformerSerialization { implicit class TransformerSerializationOps(private val transformer: Transformer) { def applyTransformationAndSerialization( path: Path, df: DataFrame)(implicit executionContext: ExecutionContext): DataFrame = { val result = transformer._transform(executionContext, df) val deserialized = loadSerializedTransformer(path) val resultFromSerializedTransformer = deserialized._transform(executionContext, df) DeeplangIntegTestSupport.assertDataFramesEqual(result, resultFromSerializedTransformer) result } def loadSerializedTransformer( path: Path)( implicit executionContext: ExecutionContext): Transformer = { val outputPath: Path = path.resolve(this.getClass.getName) transformer.save(executionContext, outputPath.toString) Transformer.load(executionContext, outputPath.toString) } } }
Example 149
Source File: MavenAddManagedDependenciesRuleTest.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.maven import java.io.File import org.apache.commons.io.FileUtils import com.ebay.rtran.maven.util.MavenModelUtil import com.ebay.rtran.maven.util.MavenModelUtil.SimpleDependency import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers} class MavenAddManagedDependenciesRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach { val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile) val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak") override def beforeEach = { FileUtils.deleteQuietly(destProjectRoot) FileUtils.copyDirectory(projectRoot, destProjectRoot) } "MavenAddManagedDependenciesRule" should "be able to add dependencies to dependency management" in { val ruleConfig = MavenAddManagedDependenciesRuleConfig( Set( SimpleDependency("org.slf4j", "slf4j-api", Some("1.7.12")), SimpleDependency("com.typesafe.akka", "akka-actor_2.11", Some("2.3.9")) ) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenAddManagedDependenciesRule(ruleConfig) provider save rule.transform(model) val transformed = provider create projectCtx val parent = transformed.parents.head val dm1 = parent.managedDependencies.values.find(_.getArtifactId == "slf4j-api") dm1 should not be None dm1.get.getVersion should be ("1.7.12") val dm2 = parent.managedDependencies.values.find(_.getArtifactId == "akka-actor_2.11") dm2 should not be None dm2.get.getVersion should be ("2.4.17") } }
Example 150
Source File: MultiModuleMavenModelProviderTest.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.maven import java.io.{File, FileReader} import org.apache.commons.io.FileUtils import org.apache.maven.model.io.xpp3.MavenXpp3Reader import org.codehaus.plexus.util.xml.Xpp3Dom import org.scalatest.{FlatSpecLike, Matchers} import collection.JavaConversions._ class MultiModuleMavenModelProviderTest extends FlatSpecLike with Matchers { val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile) "MavenModelProvider" should "resolve all the pom files in the project" in { val projectCtx = new MavenProjectCtx(projectRoot) val provider = new MultiModuleMavenModelProvider val model = provider.create(projectCtx) model.modules foreach { m => m.resolvedDependencies foreach {dep => Option(dep.getVersion) should not be None } } } "MavenModelProvider" should "resolve all the pom files recursively in the project" in { val dir = new File(getClass.getClassLoader.getResource("recursive").getFile) val projectCtx = new MavenProjectCtx(dir) val provider = new MultiModuleMavenModelProvider val model = provider.create(projectCtx) model.modules.size should be (5) } "MavenModelProvider" should "not remove empty property nodes" in { val dir = new File(projectRoot.getParent, projectRoot.getName + "-bak") FileUtils.deleteQuietly(dir) FileUtils.copyDirectory(projectRoot, dir) val projectCtx = new MavenProjectCtx(dir) val provider = new MultiModuleMavenModelProvider val model = provider.create(projectCtx) provider save model val pom = new MavenXpp3Reader().read(new FileReader(new File(dir, "pom.xml"))) pom.getProperties.getProperty("empty.property1") should be ("") pom.getProperties.getProperty("empty.property2") should be ("") pom.getProperties.getProperty("empty.property3") should be ("") } "MavenModelProvider" should "not break on xlint element" in { val dir = new File(projectRoot.getParent, projectRoot.getName + "-bak") FileUtils.deleteQuietly(dir) FileUtils.copyDirectory(projectRoot, dir) val projectCtx = new MavenProjectCtx(dir) val provider = new MultiModuleMavenModelProvider val model = provider.create(projectCtx) for { root <- model.parents.headOption build <- Option(root.pomModel.getBuild) sourcePlugin <- build.getPlugins.find(_.getArtifactId == "some-maven-plugin") } { build.removePlugin(sourcePlugin) } provider save model val pom = new MavenXpp3Reader().read(new FileReader(new File(dir, "pom.xml"))) pom.getBuild.getPlugins.size() should be(1) val plugin = pom.getBuild.getPlugins.find(_.getArtifactId == "maven-source-plugin") plugin shouldNot be(None) plugin.map(_.getConfiguration.asInstanceOf[Xpp3Dom].getChild("compilerArguments").getChildCount) should be(Some(3)) } }
Example 151
Source File: MavenDependenciesMappingRuleTest.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.maven import java.io.File import org.apache.commons.io.FileUtils import com.ebay.rtran.maven.util.MavenModelUtil import com.ebay.rtran.maven.util.MavenModelUtil.SimpleDependency import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers} import scala.collection.JavaConversions._ class MavenDependenciesMappingRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach { val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile) val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak") override def beforeEach = { FileUtils.deleteQuietly(destProjectRoot) FileUtils.copyDirectory(projectRoot, destProjectRoot) } "MavenDependenciesMappingRule" should "be able to alter dependencies according to mapping" in { val ruleConfig = MavenDependenciesMappingRuleConfig( Set(SimpleDependency("junit", "junit")), Set(SimpleDependency("org.slf4j", "slf4j-api"), SimpleDependency("org.slf4j", "slf4j-log4j12")) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenDependenciesMappingRule(ruleConfig) provider save rule.transform(model) val transformed = provider create projectCtx transformed.modules foreach { module => module.pomModel.getDependencies.exists(_.getArtifactId == "junit") should be (false) module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-api") should be (true) module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-log4j12") should be (true) } } "MavenDependenciesMappingRule" should "not alter dependencies that don't exist" in { val ruleConfig = MavenDependenciesMappingRuleConfig( Set(SimpleDependency("org.slf4j", "slf4j-api")), Set(SimpleDependency("org.slf4j", "slf4j-log4j12")) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenDependenciesMappingRule(ruleConfig) provider save rule.transform(model) val transformed = provider create projectCtx transformed.modules foreach { module => module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-api") should be (false) module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-log4j12") should be (false) } } "MavenDependenciesMappingRule" should "alter dependencies matches that match other condition" in { val ruleConfig = MavenDependenciesMappingRuleConfig( Set(SimpleDependency("junit", "junit", Some("4.9"))), Set(SimpleDependency("org.slf4j", "slf4j-api"), SimpleDependency("org.slf4j", "slf4j-log4j12")) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenDependenciesMappingRule(ruleConfig) provider save rule.transform(model) val transformed = provider create projectCtx transformed.modules foreach { module => if (module.pomModel.getPackaging == "pom") { module.pomModel.getDependencies.exists(_.getArtifactId == "junit") should be (true) } else { module.pomModel.getDependencies.exists(_.getArtifactId == "junit") should be (false) module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-api") should be (true) module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-log4j12") should be (true) } } } "MavenDependenciesMappingRule" should "not alter dependencies if other condition doesn't match" in { val ruleConfig = MavenDependenciesMappingRuleConfig( Set(SimpleDependency("junit", "junit", scope = Some("compile"))), Set(SimpleDependency("org.slf4j", "slf4j-api"), SimpleDependency("org.slf4j", "slf4j-log4j12")) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenDependenciesMappingRule(ruleConfig) provider save rule.transform(model) val transformed = provider create projectCtx transformed.modules foreach { module => module.pomModel.getDependencies.exists(_.getArtifactId == "junit") should be (true) module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-api") should be (false) module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-log4j12") should be (false) } } }
Example 152
Source File: MavenRemoveDependenciesRuleTest.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.maven import java.io.File import org.apache.commons.io.FileUtils import com.ebay.rtran.maven.util.MavenModelUtil import com.ebay.rtran.maven.util.MavenModelUtil.SimpleDependency import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers} import scala.collection.JavaConversions._ class MavenRemoveDependenciesRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach { val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile) val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak") override def beforeEach = { FileUtils.deleteQuietly(destProjectRoot) FileUtils.copyDirectory(projectRoot, destProjectRoot) } "MavenRemoveDependencies" should "be able to remove dependencies" in { val ruleConfig = MavenRemoveDependenciesRuleConfig( Set(SimpleDependency("junit", "junit")) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenRemoveDependenciesRule(ruleConfig) provider save rule.transform(model) val transformed = provider create projectCtx transformed.modules foreach { module => module.pomModel.getDependencies.exists(_.getArtifactId == "junit") should be (false) } } "MavenAddDependenciesRule" should "not remove dependencies that don't exist" in { val ruleConfig = MavenRemoveDependenciesRuleConfig( Set(SimpleDependency("org.slf4j", "slf4j-api")) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenRemoveDependenciesRule(ruleConfig) val originalSizes = model.modules map (_.pomModel.getDependencies.size) provider save rule.transform(model) val transformed = provider create projectCtx transformed.modules map (_.pomModel.getDependencies.size) should be (originalSizes) } "MavenRemoveDependencies" should "remove dependencies matches that match other condition" in { val ruleConfig = MavenRemoveDependenciesRuleConfig( Set(SimpleDependency("junit", "junit", version = Some("4.9"))) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenRemoveDependenciesRule(ruleConfig) provider save rule.transform(model) val transformed = provider create projectCtx transformed.modules foreach { module => if (module.pomModel.getPackaging == "pom") { module.pomModel.getDependencies.exists(_.getArtifactId == "junit") should be (true) } else { module.pomModel.getDependencies.exists(_.getArtifactId == "junit") should be (false) } } } "MavenRemoveDependencies" should "not remove dependencies if other condition doesn't match" in { val ruleConfig = MavenRemoveDependenciesRuleConfig( Set(SimpleDependency("junit", "junit", scope = Some("compile"))) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenRemoveDependenciesRule(ruleConfig) provider save rule.transform(model) val transformed = provider create projectCtx transformed.modules foreach { module => module.pomModel.getDependencies.exists(_.getArtifactId == "junit") should be (true) } } }
Example 153
Source File: MavenPluginsMappingRuleTest.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.maven import java.io.File import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers} import scala.collection.JavaConversions._ class MavenPluginsMappingRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach { val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile) val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak") override def beforeEach = { FileUtils.deleteQuietly(destProjectRoot) FileUtils.copyDirectory(projectRoot, destProjectRoot) } "MavenPluginsMappingRule" should "be able to alter both plugins and managed plugins" in { val ruleConfig = MavenPluginsMappingRuleConfig( List( PluginMapping( SimplePlugin(Some("com.ebay.rtran.old"), "some-maven-plugin"), SimplePlugin(Some("com.ebay.rtran.new"), "some-maven-plugin") ) ) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenPluginsMappingRule(ruleConfig) provider save rule.transform(model) val transformed = provider create projectCtx transformed.parents.head .pomModel.getBuild.getPluginManagement.getPlugins .exists(_.getGroupId == "com.ebay.rtran.old") should be (false) transformed.parents.head .pomModel.getBuild.getPluginManagement.getPlugins .exists(_.getGroupId == "com.ebay.rtran.new") should be (true) transformed.parents.head .pomModel.getBuild.getPlugins .exists(_.getGroupId == "com.ebay.rtran.old") should be (false) transformed.parents.head .pomModel.getBuild.getPlugins .exists(_.getGroupId == "com.ebay.rtran.new") should be (true) } "MavenPluginsMappingRule" should "not alter plugins or managed plugins that don't exist" in { val ruleConfig = MavenPluginsMappingRuleConfig( List( PluginMapping( SimplePlugin(Some("com.ebay.rtran.old"), "non-exist"), SimplePlugin(Some("com.ebay.rtran.new"), "non-exist") ) ) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenPluginsMappingRule(ruleConfig) val mpSize = model.parents.head .pomModel.getBuild.getPluginManagement.getPlugins.size val pluginSize = model.parents.head .pomModel.getBuild.getPlugins.size provider save rule.transform(model) val transformed = provider create projectCtx transformed.parents.head .pomModel.getBuild.getPluginManagement.getPlugins.size should be (mpSize) transformed.parents.head .pomModel.getBuild.getPluginManagement.getPlugins .exists(_.getGroupId == "com.ebay.rtran.old") should be (true) transformed.parents.head .pomModel.getBuild.getPlugins.size should be (pluginSize) transformed.parents.head .pomModel.getBuild.getPlugins .exists(_.getGroupId == "com.ebay.rtran.old") should be (true) } }
Example 154
Source File: MavenRemoveManagedDependenciesRuleTest.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.maven import java.io.File import com.ebay.rtran.maven.util.MavenModelUtil import MavenModelUtil.SimpleDependency import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfterEach, Matchers, FlatSpecLike} import scala.collection.JavaConversions._ class MavenRemoveManagedDependenciesRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach { val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile) val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak") override def beforeEach = { FileUtils.deleteQuietly(destProjectRoot) FileUtils.copyDirectory(projectRoot, destProjectRoot) } "MavenRemoveManagedDependenciesRule" should "be able to remove managed dependencies" in { val ruleConfig = MavenRemoveManagedDependenciesRuleConfig( Set(SimpleDependency("org.eclipse.aether", "aether-spi")) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenRemoveManagedDependenciesRule(ruleConfig) provider save rule.transform(model) val transformed = provider create projectCtx transformed.parents.head .pomModel.getDependencyManagement.getDependencies.exists(_.getArtifactId == "aether-spi") should be (false) } "MavenRemoveManagedDependenciesRule" should "not remove managed dependencies that don't exist" in { val ruleConfig = MavenRemoveManagedDependenciesRuleConfig( Set(SimpleDependency("org.slf4j", "slf4j-api")) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenRemoveManagedDependenciesRule(ruleConfig) val originalSize = model.parents.head .pomModel.getDependencyManagement.getDependencies.size provider save rule.transform(model) val transformed = provider create projectCtx transformed.parents.head .pomModel.getDependencyManagement.getDependencies.size should be (originalSize) } "MavenRemoveManagedDependenciesRule" should "remove managed dependencies matches that match other condition" in { val ruleConfig = MavenRemoveManagedDependenciesRuleConfig( Set(SimpleDependency("org.eclipse.aether", "aether-spi", version = Some("1.0.2.v20150114"))) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenRemoveManagedDependenciesRule(ruleConfig) provider save rule.transform(model) val transformed = provider create projectCtx transformed.parents.head .pomModel.getDependencyManagement.getDependencies.exists(_.getArtifactId == "aether-spi") should be (false) } "MavenRemoveManagedDependenciesRule" should "not remove managed dependencies if other condition doesn't match" in { val ruleConfig = MavenRemoveManagedDependenciesRuleConfig( Set(SimpleDependency("org.eclipse.aether", "aether-spi", version = Some("1.0.3.v20150114"))) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenRemoveManagedDependenciesRule(ruleConfig) provider save rule.transform(model) val transformed = provider create projectCtx transformed.parents.head .pomModel.getDependencyManagement.getDependencies.exists(_.getArtifactId == "aether-spi") should be (true) } }
Example 155
Source File: MavenRemoveRepositoriesRuleTest.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.maven import java.io.File import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers} class MavenRemoveRepositoriesRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach { val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile) val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak") override def beforeEach = { FileUtils.deleteQuietly(destProjectRoot) FileUtils.copyDirectory(projectRoot, destProjectRoot) } "MavenRemoveRepositoriesRule" should "remove repository that matches given patterns" in { val ruleConfig = MavenRemoveRepositoriesRuleConfig( Set( ".*/content/repositories/releases[/]?", ".*/content/repositories/snapshots[/]?" ) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val rule = new MavenRemoveRepositoriesRule(ruleConfig) val model = provider create projectCtx provider save (rule transform model) val transformed = provider create projectCtx transformed.modules foreach { module => module.pomModel.getRepositories.size should be (0) } } }
Example 156
Source File: MavenAddDependenciesRuleTest.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.maven import java.io.File import org.apache.commons.io.FileUtils import com.ebay.rtran.maven.util.MavenModelUtil import com.ebay.rtran.maven.util.MavenModelUtil.SimpleDependency import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers} import scala.collection.JavaConversions._ class MavenAddDependenciesRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach { val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile) val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak") override def beforeEach = { FileUtils.deleteQuietly(destProjectRoot) FileUtils.copyDirectory(projectRoot, destProjectRoot) } "MavenAddDependenciesRule" should "be able to add dependencies" in { val ruleConfig = MavenAddDependenciesRuleConfig( Set( SimpleDependency("org.slf4j", "slf4j-api"), SimpleDependency("org.slf4j", "slf4j-log4j12") ) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenAddDependenciesRule(ruleConfig) provider save rule.transform(model) val transformed = provider create projectCtx transformed.modules foreach { module => module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-api") should be (true) module.pomModel.getDependencies.exists(_.getArtifactId == "slf4j-log4j12") should be (true) } } "MavenAddDependenciesRule" should "not add dependencies that already exist" in { val ruleConfig = MavenAddDependenciesRuleConfig( Set( SimpleDependency("junit", "junit") ) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenAddDependenciesRule(ruleConfig) val originalSize = model.modules .find(_.pomModel.getPackaging == "pom") .map(_.pomModel.getDependencies.size) provider save rule.transform(model) val transformed = provider create projectCtx transformed.modules .find(_.pomModel.getPackaging == "pom") .map(_.pomModel.getDependencies.size) should be (originalSize) transformed.modules foreach { module => module.pomModel.getDependencies.exists(_.getArtifactId == "junit") should be (true) } } }
Example 157
Source File: MavenRemovePluginsRuleTest.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.maven import java.io.File import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers} import scala.collection.JavaConversions._ class MavenRemovePluginsRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach { val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile) val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak") override def beforeEach = { FileUtils.deleteQuietly(destProjectRoot) FileUtils.copyDirectory(projectRoot, destProjectRoot) } "MavenRemovePluginsRule" should "be able to remove both plugins and managed plugins" in { val ruleConfig = MavenRemoveManagedPluginsRuleConfig( Set(SimplePlugin(artifactId = "maven-source-plugin")) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenRemovePluginsRule(ruleConfig) provider save rule.transform(model) val transformed = provider create projectCtx transformed.parents.head .pomModel.getBuild.getPluginManagement.getPlugins .exists(_.getArtifactId == "maven-source-plugin") should be (false) transformed.parents.head .pomModel.getBuild.getPlugins .exists(_.getArtifactId == "maven-source-plugin") should be (false) } "MavenRemovePluginsRule" should "not remove plugins or managed plugins that don't exist" in { val ruleConfig = MavenRemoveManagedPluginsRuleConfig( Set(SimplePlugin(artifactId = "maven-surefire-plugin")) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenRemovePluginsRule(ruleConfig) val mpSize = model.parents.head.pomModel.getBuild.getPluginManagement.getPlugins.size val pluginSize = model.parents.head.pomModel.getBuild.getPlugins.size provider save rule.transform(model) val transformed = provider create projectCtx transformed.parents.head .pomModel.getBuild.getPluginManagement.getPlugins.size should be (mpSize) transformed.parents.head .pomModel.getBuild.getPlugins.size should be (pluginSize) } "MavenRemovePluginsRule" should "remove both plugins and managed plugins matches that match other condition" in { val ruleConfig = MavenRemoveManagedPluginsRuleConfig( Set(SimplePlugin(artifactId = "maven-source-plugin", version = Some("2.2.1"))) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenRemovePluginsRule(ruleConfig) provider save rule.transform(model) val transformed = provider create projectCtx transformed.parents.head .pomModel.getBuild.getPluginManagement.getPlugins .exists(_.getArtifactId == "maven-source-plugin") should be (false) transformed.parents.head .pomModel.getBuild.getPlugins .exists(_.getArtifactId == "maven-source-plugin") should be (false) } "MavenRemoveManagedPluginsRule" should "not remove plugins or managed plugins if other condition doesn't match" in { val ruleConfig = MavenRemoveManagedPluginsRuleConfig( Set(SimplePlugin(artifactId = "maven-source-plugin", version = Some("2.2.0"))) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenRemovePluginsRule(ruleConfig) provider save rule.transform(model) val transformed = provider create projectCtx transformed.parents.head .pomModel.getBuild.getPluginManagement.getPlugins .exists(_.getArtifactId == "maven-source-plugin") should be (true) transformed.parents.head .pomModel.getBuild.getPlugins .exists(_.getArtifactId == "maven-source-plugin") should be (true) } }
Example 158
Source File: MavenExcludeDependenciesRuleTest.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.maven import java.io.File import org.apache.commons.io.FileUtils import org.scalatest.{BeforeAndAfterEach, FlatSpecLike, Matchers} import scala.collection.JavaConversions._ class MavenExcludeDependenciesRuleTest extends FlatSpecLike with Matchers with BeforeAndAfterEach { val projectRoot = new File(getClass.getClassLoader.getResource("mvnproject").getFile) val destProjectRoot = new File(projectRoot.getParentFile, projectRoot.getName + "-bak") override def beforeEach = { FileUtils.deleteQuietly(destProjectRoot) FileUtils.copyDirectory(projectRoot, destProjectRoot) } "MavenExcludeDependenciesRule" should "exclude the dependencies if they are used transitively" in { val ruleConfig = MavenExcludeDependenciesRuleConfig( Set(SimpleExclusion("org.springframework", "spring-aop")) ) val projectCtx = new MavenProjectCtx(destProjectRoot) val provider = new MultiModuleMavenModelProvider val model = provider create projectCtx val rule = new MavenExcludeDependenciesRule(ruleConfig) provider save rule.transform(model) val transformed = provider create projectCtx transformed.modules foreach { module => if (module.pomModel.getPackaging != "war") { module.pomModel.getDependencies.forall(_.getExclusions.size == 0) should be (true) }else { module.pomModel.getDependencies.exists(_.getExclusions.size > 0) should be (true) } } } }
Example 159
Source File: ModifyFilesRule.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.generic import org.apache.commons.io.FileUtils import com.ebay.rtran._ import com.ebay.rtran.api.{IRule, IRuleConfig} import com.ebay.rtran.generic.util.{EncodingDetector, FilePathMatcher} class ModifyFilesRule(ruleConfig: ModifyFilesRuleConfig) extends IRule[AllFilesModel] { override def transform(model: AllFilesModel): AllFilesModel = { val modified = model.files filter {file => FilePathMatcher(model.projectRoot, ruleConfig.pathPattern).map(_ matches file) getOrElse false } map {file => val content = ruleConfig.encoding map (encoding => FileUtils.readFileToString(file, encoding)) getOrElse { val (encoding, bytes) = EncodingDetector.guessEncoding(file) new String(bytes, encoding) } val newContent = ruleConfig.contentMappings.foldLeft(content) {(c, contentMapping) => contentMapping match { case ContentMapping(regex, replacement, false) => c.replaceAll(regex, replacement) case ContentMapping(regex, replacement, true) => c.replaceFirst(regex, replacement) } } if (content != newContent) { FileUtils.write(file, newContent, false) Some(file) } else None } collect { case Some(f) => f } model.copy(modified = modified) } } case class ModifyFilesRuleConfig(pathPattern: String, encoding: Option[String], contentMappings: List[ContentMapping]) extends IRuleConfig case class ContentMapping(regex: String, replacement: String, firstOnly: Boolean = false)
Example 160
Source File: AllFilesModel.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.generic import java.io.File import org.apache.commons.io.FileUtils import org.apache.commons.io.filefilter.TrueFileFilter import com.ebay.rtran.api.{IModel, IModelProvider} import scala.collection.JavaConversions._ case class AllFilesModel(projectRoot: File, files: List[File], modified: List[File] = List.empty) extends IModel class AllFilesModelProvider extends IModelProvider[AllFilesModel, GenericProjectCtx] { override def id(): String = getClass.getName override def save(model: AllFilesModel): Unit = { // all files operations are taken in place // simply validate the model if (!model.files.forall(_.exists)) { throw new IllegalStateException(s"${model.files.filterNot(_.exists)} does not exist") } } override def create(project: GenericProjectCtx): AllFilesModel = AllFilesModel( project.rootDir, FileUtils.listFiles(project.rootDir, TrueFileFilter.TRUE, TrueFileFilter.TRUE).toList ) }
Example 161
Source File: FilePathMatcher.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.generic.util import java.io.File import java.nio.file.{FileSystems, PathMatcher} import com.typesafe.scalalogging.LazyLogging import org.apache.commons.io.FileUtils import org.mozilla.universalchardet.CharsetListener import scala.util.Try object FilePathMatcher { def apply(rootDir: File, pathPattern: String): Try[PathMatcher] = Try { val trimmedPattern = new String(pathPattern.trim.toCharArray.dropWhile(_ == '/')).trim val path=rootDir.getAbsolutePath.replaceAll("\\\\","/") FileSystems.getDefault.getPathMatcher(s"glob:${path}/$trimmedPattern") //FileSystems.getDefault.getPathMatcher(s"glob:${rootDir.getAbsolutePath}/$trimmedPattern") } } object EncodingDetector extends LazyLogging { val DEFAULT_ENCODING = "UTF-8" def guessEncoding(file: File) = { val bytes = FileUtils.readFileToByteArray(file) val dummyListener = new CharsetListener { override def report(charset: String): Unit = {} } val detector = new org.mozilla.universalchardet.UniversalDetector(dummyListener) detector.handleData(bytes, 0, bytes.length) detector.dataEnd() val encoding = Option(detector.getDetectedCharset) getOrElse DEFAULT_ENCODING logger.debug("Detected encoding {} for {}", detector.getDetectedCharset, file) detector.reset() (encoding, bytes) } }
Example 162
Source File: MoveFilesRule.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.generic import java.io.File import com.ebay.rtran._ import org.apache.commons.io.FileUtils import com.ebay.rtran.api.{IRule, IRuleConfig} import com.ebay.rtran.generic.util.FilePathMatcher class MoveFilesRule(ruleConfig: MoveFilesRuleConfig) extends IRule[AllFilesModel] { override def transform(model: AllFilesModel): AllFilesModel = { val result = ruleConfig.moves.foldLeft(model.files) {(files, move) => val removes = files filter { file => FilePathMatcher(model.projectRoot, move.pathPattern).map(_ matches file).getOrElse(false) } val dest = new File(model.projectRoot, move.destDir) val creates = removes map {f => FileUtils.moveFileToDirectory(f, dest, true) new File(dest, f.getName) } files diff removes ++ creates } model.copy(files = result) } } case class MoveFilesRuleConfig(moves: List[Move]) extends IRuleConfig case class Move(pathPattern: String, destDir: String)
Example 163
Source File: XMLFilesModel.scala From RTran with Apache License 2.0 | 5 votes |
package com.ebay.rtran.xml import java.io.{File, FileInputStream} import org.apache.axiom.om.{OMElement, OMXMLBuilderFactory} import org.apache.commons.io.FileUtils import com.ebay.rtran.api.{IModel, IModelProvider} import com.ebay.rtran.generic.GenericProjectCtx import com.ebay.rtran.xml.util.XmlUtil import scala.collection.JavaConversions._ import scala.language.postfixOps import scala.util.{Success, Try} case class XMLFilesModel(projectRoot: File, xmlRoots: Map[File, OMElement], modified: Map[File, Option[OMElement]] = Map.empty) extends IModel class XMLFilesModelProvider extends IModelProvider[XMLFilesModel, GenericProjectCtx] { override def id(): String = getClass.getName override def save(model: XMLFilesModel): Unit = { model.modified foreach { case (file, root) => root.map(r => XmlUtil.writeOMElement2File(file, r)) } } override def create(projectCtx: GenericProjectCtx): XMLFilesModel = XMLFilesModel( projectCtx.rootDir, FileUtils.listFiles(projectCtx.rootDir, Array("xml"), true) map {file => file -> Try(OMXMLBuilderFactory.createOMBuilder(new FileInputStream(file)).getDocumentElement) } collect { case (f, Success(r)) => f -> r } toMap ) }
Example 164
Source File: BigQueryClientSpecs.scala From spark-bigquery with Apache License 2.0 | 4 votes |
package com.samelamin.spark.bigquery import java.io.File import com.google.api.services.bigquery.Bigquery import com.google.api.services.bigquery.model._ import com.google.cloud.hadoop.io.bigquery._ import com.holdenkarau.spark.testing.DataFrameSuiteBase import com.samelamin.spark.bigquery.converters.{BigQueryAdapter, SchemaConverters} import org.apache.commons.io.FileUtils import org.apache.spark.sql._ import org.mockito.Matchers.{any, eq => mockitoEq} import org.mockito.Mockito._ import org.scalatest.FeatureSpec import org.scalatest.mock.MockitoSugar class BigQueryClientSpecs extends FeatureSpec with DataFrameSuiteBase with MockitoSugar { val BQProjectId = "google.com:foo-project" def setupBigQueryClient(sqlCtx: SQLContext, bigQueryMock: Bigquery): BigQueryClient = { val fakeJobReference = new JobReference() fakeJobReference.setProjectId(BQProjectId) fakeJobReference.setJobId("bigquery-job-1234") val dataProjectId = "publicdata" // Create the job result. val jobStatus = new JobStatus() jobStatus.setState("DONE") jobStatus.setErrorResult(null) val jobHandle = new Job() jobHandle.setStatus(jobStatus) jobHandle.setJobReference(fakeJobReference) // Create table reference. val tableRef = new TableReference() tableRef.setProjectId(dataProjectId) tableRef.setDatasetId("test_dataset") tableRef.setTableId("test_table") // Mock getting Bigquery jobs when(bigQueryMock.jobs().get(any[String], any[String]).execute()) .thenReturn(jobHandle) when(bigQueryMock.jobs().insert(any[String], any[Job]).execute()) .thenReturn(jobHandle) val bigQueryClient = new BigQueryClient(sqlCtx, bigQueryMock) bigQueryClient } scenario("When writing to BQ") { val sqlCtx = sqlContext import sqlCtx.implicits._ val gcsPath = "/tmp/testfile2.json" FileUtils.deleteQuietly(new File(gcsPath)) val adaptedDf = BigQueryAdapter(sc.parallelize(List(1, 2, 3)).toDF) val bigQueryMock = mock[Bigquery](RETURNS_DEEP_STUBS) val fullyQualifiedOutputTableId = "testProjectID:test_dataset.test" val targetTable = BigQueryStrings.parseTableReference(fullyQualifiedOutputTableId) val bigQueryClient = setupBigQueryClient(sqlCtx, bigQueryMock) val bigQuerySchema = SchemaConverters.SqlToBQSchema(adaptedDf) bigQueryClient.load(targetTable,bigQuerySchema,gcsPath) verify(bigQueryMock.jobs().insert(mockitoEq(BQProjectId),any[Job]), times(1)).execute() } scenario("When reading from BQ") { val sqlCtx = sqlContext val fullyQualifiedOutputTableId = "testProjectID:test_dataset.test" val sqlQuery = s"select * from $fullyQualifiedOutputTableId" val bqQueryContext = new BigQuerySQLContext(sqlCtx) bqQueryContext.setBigQueryProjectId(BQProjectId) val bigQueryMock = mock[Bigquery](RETURNS_DEEP_STUBS) val bigQueryClient = setupBigQueryClient(sqlCtx, bigQueryMock) bigQueryClient.selectQuery(sqlQuery) verify(bigQueryMock.jobs().insert(mockitoEq(BQProjectId),any[Job]), times(1)).execute() } scenario("When running a DML Queries") { val sqlCtx = sqlContext val fullyQualifiedOutputTableId = "testProjectID:test_dataset.test" val dmlQuery = s"UPDATE $fullyQualifiedOutputTableId SET test_col = new_value WHERE test_col = old_value" val bqQueryContext = new BigQuerySQLContext(sqlCtx) bqQueryContext.setBigQueryProjectId(BQProjectId) val bigQueryMock = mock[Bigquery](RETURNS_DEEP_STUBS) val bigQueryClient = setupBigQueryClient(sqlCtx, bigQueryMock) bigQueryClient.runDMLQuery(dmlQuery) verify(bigQueryMock.jobs().insert(mockitoEq(BQProjectId),any[Job]), times(1)).execute() } }