java.io.File Scala Examples

The following examples show how to use java.io.File. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.
Example 1
Source File: KafkaTopicInfo.scala    From matcher   with MIT License 7 votes vote down vote up
package tools

import java.io.File

import akka.actor.ActorSystem
import com.typesafe.config.ConfigFactory
import com.wavesplatform.dex.queue.KafkaMatcherQueue.eventDeserializer
import com.wavesplatform.dex.queue.{QueueEvent, QueueEventWithMeta}
import com.wavesplatform.dex.settings.toConfigOps
import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.serialization.StringDeserializer

import scala.collection.JavaConverters._
import scala.concurrent.duration.DurationInt

object KafkaTopicInfo extends App {
  implicit val system: ActorSystem = ActorSystem()

  val configFile = new File(args(0))
  val topic      = args(1)
  val from       = args(2).toLong
  val max        = args(3).toInt

  println(s"""configFile: ${configFile.getAbsolutePath}
             |topic: $topic
             |from: $from
             |max: $max""".stripMargin)

  val requestTimeout = java.time.Duration.ofNanos(5.seconds.toNanos)

  val config = ConfigFactory
    .parseString("""waves.dex.events-queue.kafka.consumer.client {
                   |  client.id = "kafka-topics-info"
                   |  enable.auto.commit = false
                   |  auto.offset.reset = earliest
                   |}
                   |
                   |""".stripMargin)
    .withFallback {
      ConfigFactory
        .parseFile(configFile)
        .withFallback(ConfigFactory.defaultApplication())
        .withFallback(ConfigFactory.defaultReference())
        .resolve()
        .getConfig("waves.dex.events-queue.kafka")
    }

  val consumer = new KafkaConsumer[String, QueueEvent](
    config.getConfig("waves.dex.events-queue.kafka.consumer.client").toProperties,
    new StringDeserializer,
    eventDeserializer
  )

  try {
    val topicPartition  = new TopicPartition(topic, 0)
    val topicPartitions = java.util.Collections.singletonList(topicPartition)
    consumer.assign(topicPartitions)

    {
      val r = consumer.partitionsFor(topic, requestTimeout)
      println(s"Partitions:\n${r.asScala.mkString("\n")}")
    }

    {
      val r = consumer.endOffsets(topicPartitions, requestTimeout)
      println(s"End offsets for $topicPartition: ${r.asScala.mkString(", ")}")
    }

    consumer.seek(topicPartition, from)

    val pollDuriation = java.time.Duration.ofNanos(1.seconds.toNanos)
    val lastOffset    = from + max
    var continue      = true
    while (continue) {
      println(s"Reading from Kafka")

      val xs = consumer.poll(pollDuriation).asScala.toVector
      xs.foreach { msg =>
        println(QueueEventWithMeta(msg.offset(), msg.timestamp(), msg.value()))
      }

      xs.lastOption.foreach { x =>
        if (x.offset() == lastOffset) continue = false
      }
    }
  } finally {
    consumer.close()
  }
} 
Example 2
Source File: DataFrameExample.scala    From drizzle-spark   with Apache License 2.0 7 votes vote down vote up
// scalastyle:off println
package org.apache.spark.examples.ml

import java.io.File

import scopt.OptionParser

import org.apache.spark.examples.mllib.AbstractParams
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.stat.MultivariateOnlineSummarizer
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.apache.spark.util.Utils


object DataFrameExample {

  case class Params(input: String = "data/mllib/sample_libsvm_data.txt")
    extends AbstractParams[Params]

  def main(args: Array[String]) {
    val defaultParams = Params()

    val parser = new OptionParser[Params]("DataFrameExample") {
      head("DataFrameExample: an example app using DataFrame for ML.")
      opt[String]("input")
        .text(s"input path to dataframe")
        .action((x, c) => c.copy(input = x))
      checkConfig { params =>
        success
      }
    }

    parser.parse(args, defaultParams) match {
      case Some(params) => run(params)
      case _ => sys.exit(1)
    }
  }

  def run(params: Params): Unit = {
    val spark = SparkSession
      .builder
      .appName(s"DataFrameExample with $params")
      .getOrCreate()

    // Load input data
    println(s"Loading LIBSVM file with UDT from ${params.input}.")
    val df: DataFrame = spark.read.format("libsvm").load(params.input).cache()
    println("Schema from LIBSVM:")
    df.printSchema()
    println(s"Loaded training data as a DataFrame with ${df.count()} records.")

    // Show statistical summary of labels.
    val labelSummary = df.describe("label")
    labelSummary.show()

    // Convert features column to an RDD of vectors.
    val features = df.select("features").rdd.map { case Row(v: Vector) => v }
    val featureSummary = features.aggregate(new MultivariateOnlineSummarizer())(
      (summary, feat) => summary.add(Vectors.fromML(feat)),
      (sum1, sum2) => sum1.merge(sum2))
    println(s"Selected features column with average values:\n ${featureSummary.mean.toString}")

    // Save the records in a parquet file.
    val tmpDir = Utils.createTempDir()
    val outputDir = new File(tmpDir, "dataframe").toString
    println(s"Saving to $outputDir as Parquet file.")
    df.write.parquet(outputDir)

    // Load the records back.
    println(s"Loading Parquet file with UDT from $outputDir.")
    val newDF = spark.read.parquet(outputDir)
    println(s"Schema from Parquet:")
    newDF.printSchema()

    spark.stop()
  }
}
// scalastyle:on println 
Example 3
Source File: CommandUtils.scala    From drizzle-spark   with Apache License 2.0 7 votes vote down vote up
package org.apache.spark.deploy.worker

import java.io.{File, FileOutputStream, InputStream, IOException}

import scala.collection.JavaConverters._
import scala.collection.Map

import org.apache.spark.SecurityManager
import org.apache.spark.deploy.Command
import org.apache.spark.internal.Logging
import org.apache.spark.launcher.WorkerCommandBuilder
import org.apache.spark.util.Utils


  def redirectStream(in: InputStream, file: File) {
    val out = new FileOutputStream(file, true)
    // TODO: It would be nice to add a shutdown hook here that explains why the output is
    //       terminating. Otherwise if the worker dies the executor logs will silently stop.
    new Thread("redirect output to " + file) {
      override def run() {
        try {
          Utils.copyStream(in, out, true)
        } catch {
          case e: IOException =>
            logInfo("Redirection to " + file + " closed: " + e.getMessage)
        }
      }
    }.start()
  }
} 
Example 4
Source File: KeyUtils.scala    From daml   with Apache License 2.0 6 votes vote down vote up
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.jwt

import java.io.{File, FileInputStream}
import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.security.cert.CertificateFactory
import java.security.interfaces.{ECPublicKey, RSAPrivateKey, RSAPublicKey}
import java.security.spec.PKCS8EncodedKeySpec
import java.security.KeyFactory

import com.daml.lf.data.TryOps.Bracket.bracket
import scalaz.Show
import scalaz.syntax.show._

import scala.util.Try

object KeyUtils {
  final case class Error(what: Symbol, message: String)

  object Error {
    implicit val showInstance: Show[Error] =
      Show.shows(e => s"KeyUtils.Error: ${e.what}, ${e.message}")
  }

  private val mimeCharSet = StandardCharsets.ISO_8859_1

  
  def generateJwks(keys: Map[String, RSAPublicKey]): String = {
    def generateKeyEntry(keyId: String, key: RSAPublicKey): String =
      s"""    {
         |      "kid": "$keyId",
         |      "kty": "RSA",
         |      "alg": "RS256",
         |      "use": "sig",
         |      "e": "${java.util.Base64.getUrlEncoder
           .encodeToString(key.getPublicExponent.toByteArray)}",
         |      "n": "${java.util.Base64.getUrlEncoder.encodeToString(key.getModulus.toByteArray)}"
         |    }""".stripMargin

    s"""
       |{
       |  "keys": [
       |${keys.toList.map { case (keyId, key) => generateKeyEntry(keyId, key) }.mkString(",\n")}
       |  ]
       |}
    """.stripMargin
  }
} 
Example 5
Source File: RUtils.scala    From drizzle-spark   with Apache License 2.0 6 votes vote down vote up
package org.apache.spark.api.r

import java.io.File
import java.util.Arrays

import org.apache.spark.{SparkEnv, SparkException}

private[spark] object RUtils {
  // Local path where R binary packages built from R source code contained in the spark
  // packages specified with "--packages" or "--jars" command line option reside.
  var rPackages: Option[String] = None

  
  def isRInstalled: Boolean = {
    try {
      val builder = new ProcessBuilder(Arrays.asList("R", "--version"))
      builder.start().waitFor() == 0
    } catch {
      case e: Exception => false
    }
  }
} 
Example 6
Source File: package.scala    From mantis   with Apache License 2.0 6 votes vote down vote up
package io.iohk.ethereum

import java.io.{File, PrintWriter}
import java.net.{Inet6Address, InetAddress}
import java.security.SecureRandom

import io.iohk.ethereum.crypto._
import org.spongycastle.crypto.AsymmetricCipherKeyPair
import org.spongycastle.crypto.params.ECPublicKeyParameters
import org.spongycastle.math.ec.ECPoint
import org.spongycastle.util.encoders.Hex

import scala.io.Source

package object network {

  val ProtocolVersion = 4

  implicit class ECPublicKeyParametersNodeId(val pubKey: ECPublicKeyParameters) extends AnyVal {
    def toNodeId: Array[Byte] =
      pubKey.asInstanceOf[ECPublicKeyParameters].getQ
      .getEncoded(false)
      .drop(1) // drop type info
  }

  def publicKeyFromNodeId(nodeId: String): ECPoint = {
    val bytes = ECDSASignature.uncompressedIndicator +: Hex.decode(nodeId)
    curve.getCurve.decodePoint(bytes)
  }

  def loadAsymmetricCipherKeyPair(filePath: String, secureRandom: SecureRandom): AsymmetricCipherKeyPair = {
    val file = new File(filePath)
    if(!file.exists()){
      val keysValuePair = generateKeyPair(secureRandom)

      //Write keys to file
      val (priv, _) = keyPairToByteArrays(keysValuePair)
      require(file.getParentFile.exists() || file.getParentFile.mkdirs(), "Key's file parent directory creation failed")
      val writer = new PrintWriter(filePath)
      try {
        writer.write(Hex.toHexString(priv))
      } finally {
        writer.close()
      }

      keysValuePair
    } else {
      val reader = Source.fromFile(filePath)
      try {
        val privHex = reader.mkString
        keyPairFromPrvKey(Hex.decode(privHex))
      } finally {
        reader.close()
      }
    }
  }

  
  def getHostName(address: InetAddress): String = {
    val hostName = address.getHostAddress
    address match {
      case _: Inet6Address => s"[$hostName]"
      case _ => hostName
    }
  }

} 
Example 7
Source File: GMM.scala    From spark-tda   with Apache License 2.0 6 votes vote down vote up
import java.io.{File, PrintWriter}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.clustering.GaussianMixture
import org.apache.spark.sql.functions._

def computeGaussianMixtureModel(
  pathToTextFile: String,
  quantity: Int) {
  case class Point(x: Double, y: Double)

  def save(f: File)(func: PrintWriter => Unit) {
    val p = new PrintWriter(f)
    try {
      func(p)
    } finally {
      p.close()
    }
  }

  val filename = pathToTextFile.split("\\.")(0)

  val outputFilename = s"$filename-GMM-k${quantity}.tsv"

  val points = sc
    .textFile(pathToTextFile)
    .map {
      line => line.trim.split("\\s+")
    }
    .map {
      row => Point(row(0).toDouble, row(1).toDouble)
    }

  val features = points
    .map {
      p => Vectors.dense(p.x, p.y)
    }

  features.cache()

  val gmm = new GaussianMixture()
    .setK(quantity)
    .run(features)

  val predictions = features
    .map {
      f => (f(0), f(1), gmm.predict(f) + 1)
    }
    .collect

  save(new File(outputFilename)) {
    println(s"OUTPUT TO: ${outputFilename}")
    f => predictions.foreach{
      case (x, y, ccid) => f.println(s"${x}\t${y}\t${ccid}")
    }
  }
} 
Example 8
Source File: PointCloudRelation.scala    From geotrellis-pointcloud   with Apache License 2.0 5 votes vote down vote up
package geotrellis.pointcloud.spark.datasource

import geotrellis.pointcloud.spark.store.hadoop._
import geotrellis.pointcloud.spark.store.hadoop.HadoopPointCloudRDD.{Options => HadoopOptions}
import geotrellis.pointcloud.util.Filesystem
import geotrellis.proj4.CRS
import geotrellis.store.hadoop.util.HdfsUtils
import geotrellis.vector.Extent

import cats.implicits._
import io.pdal._
import io.circe.syntax._
import org.apache.hadoop.fs.Path
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.sources.{BaseRelation, TableScan}
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Row, SQLContext}

import java.io.File

import scala.collection.JavaConverters._

// This class has to be serializable since it is shipped over the network.
class PointCloudRelation(
  val sqlContext: SQLContext,
  path: String,
  options: HadoopOptions
) extends BaseRelation with TableScan with Serializable {

  @transient implicit lazy val sc: SparkContext = sqlContext.sparkContext

  // TODO: switch between HadoopPointCloudRDD and S3PointcCloudRDD
  lazy val isS3: Boolean = path.startsWith("s3")

  override def schema: StructType = {
    lazy val (local, fixedPath) =
      if(path.startsWith("s3") || path.startsWith("hdfs")) {
        val tmpDir = Filesystem.createDirectory()
        val remotePath = new Path(path)
        // copy remote file into local tmp dir
        val localPath = new File(tmpDir, remotePath.getName)
        HdfsUtils.copyPath(remotePath, new Path(s"file:///${localPath.getAbsolutePath}"), sc.hadoopConfiguration)
        (true, localPath.toString)
      } else (false, path)

    val localPipeline =
      options.pipeline
        .hcursor
        .downField("pipeline").downArray
        .downField("filename").withFocus(_ => fixedPath.asJson)
        .top.fold(options.pipeline)(identity)

    val pl = Pipeline(localPipeline.noSpaces)
    if (pl.validate()) pl.execute()
    val pointCloud = try {
      pl.getPointViews().next().getPointCloud(0)
    } finally {
      pl.close()
      if(local) println(new File(fixedPath).delete)
    }

    val rdd = HadoopPointCloudRDD(new Path(path), options)

    val md: (Option[Extent], Option[CRS]) =
      rdd
        .map { case (header, _) => (header.projectedExtent3D.map(_.extent3d.toExtent), header.crs) }
        .reduce { case ((e1, c), (e2, _)) => ((e1, e2).mapN(_ combine _), c) }

    val metadata = new MetadataBuilder().putString("metadata", md.asJson.noSpaces).build

    pointCloud.deriveSchema(metadata)
  }

  override def buildScan(): RDD[Row] = {
    val rdd = HadoopPointCloudRDD(new Path(path), options)
    rdd.flatMap { _._2.flatMap { pc => pc.readAll.toList.map { k => Row(k: _*) } } }
  }
} 
Example 9
Source File: S3PointCloudInputFormat.scala    From geotrellis-pointcloud   with Apache License 2.0 5 votes vote down vote up
package geotrellis.pointcloud.spark.store.s3

import geotrellis.spark.store.s3._
import geotrellis.pointcloud.spark.store.hadoop.formats._
import geotrellis.pointcloud.util.Filesystem

import io.pdal._
import io.circe.Json
import io.circe.syntax._
import cats.syntax.either._
import org.apache.hadoop.mapreduce.{InputSplit, TaskAttemptContext}
import org.apache.commons.io.FileUtils

import java.io.{File, InputStream}
import java.net.URI

import scala.collection.JavaConverters._


    mode match {
      case "s3" =>
        new S3URIRecordReader[S3PointCloudHeader, List[PointCloud]](s3Client) {
          def read(key: String, uri: URI): (S3PointCloudHeader, List[PointCloud]) = {
            val s3Pipeline =
              pipeline
                .hcursor
                .downField("pipeline").downArray
                .downField("filename").withFocus(_ => uri.toString.asJson)
                .top.fold(pipeline)(identity)

            executePipeline(context)(key, s3Pipeline)
          }
        }

      case _ =>
        val tmpDir = {
          val dir = PointCloudInputFormat.getTmpDir(context)
          if (dir == null) Filesystem.createDirectory()
          else Filesystem.createDirectory(dir)
        }

        new S3StreamRecordReader[S3PointCloudHeader, List[PointCloud]](s3Client) {
          def read(key: String, is: InputStream): (S3PointCloudHeader, List[PointCloud]) = {
            // copy remote file into local tmp dir
            tmpDir.mkdirs() // to be sure that dirs created
            val localPath = new File(tmpDir, key.replace("/", "_"))
            FileUtils.copyInputStreamToFile(is, localPath)
            is.close()

            // use local filename path if it's present in json
            val localPipeline =
              pipeline
                .hcursor
                .downField("pipeline").downArray
                .downField("filename").withFocus(_ => localPath.getAbsolutePath.asJson)
                .top.fold(pipeline)(identity)

            try executePipeline(context)(key, localPipeline) finally {
              localPath.delete()
              tmpDir.delete()
            }
          }
        }
    }
  }
} 
Example 10
Source File: PointCloudTestEnvironment.scala    From geotrellis-pointcloud   with Apache License 2.0 5 votes vote down vote up
package geotrellis.pointcloud.spark

import geotrellis.spark.testkit._

import org.apache.hadoop.fs.Path
import org.scalatest.Suite

import java.io.File

trait PointCloudTestEnvironment extends TestEnvironment { self: Suite =>
  val testResources = new File("src/test/resources")
  val lasPath = new Path(s"file://${testResources.getAbsolutePath}/las")
  val multipleLasPath = new Path(s"file://${testResources.getAbsolutePath}/las/files")

  def setS3Credentials: Unit = {
    try {
      val conf = ssc.sparkContext.hadoopConfiguration

      conf.set("fs.s3.impl", classOf[org.apache.hadoop.fs.s3a.S3AFileSystem].getName)
      conf.set("fs.s3a.aws.credentials.provider", classOf[com.amazonaws.auth.DefaultAWSCredentialsProviderChain].getName)
      conf.set("fs.s3a.endpoint", "s3.eu-west-2.amazonaws.com")
    } catch {
      case e: Throwable => println(e.getMessage)
    }
  }
} 
Example 11
Source File: CreateSaltedTable.scala    From Taxi360   with Apache License 2.0 5 votes vote down vote up
package com.hadooparchitecturebook.taxi360.setup.hbase

import java.io.File

import org.apache.commons.lang.StringUtils
import org.apache.hadoop.hbase.{HBaseConfiguration, HColumnDescriptor, HTableDescriptor, TableName}
import org.apache.hadoop.hbase.client.ConnectionFactory
import org.apache.hadoop.hbase.io.compress.Compression
import org.apache.hadoop.hbase.regionserver.{BloomType, ConstantSizeRegionSplitPolicy}
import org.apache.hadoop.hbase.util.Bytes

import scala.collection.mutable


object CreateSaltedTable {
  def main(args:Array[String]): Unit = {

    if (args.length == 0) {
      println("<tableName> <columnFamily> <regionCount> <numOfSalts> <hbaseConfigFolder>")
    }
    val tableName = args(0)
    val columnFamilyName = args(1)
    val regionCount = args(2).toInt
    val numOfSalts = args(3).toInt
    val hbaseConfigFolder = args(4)

    val conf = HBaseConfiguration.create()

    conf.addResource(new File(hbaseConfigFolder + "hbase-site.xml").toURI.toURL)

    val connection = ConnectionFactory.createConnection(conf)

    val admin = connection.getAdmin

    val tableDescriptor = new HTableDescriptor(TableName.valueOf(tableName))

    val columnDescriptor = new HColumnDescriptor(columnFamilyName)

    columnDescriptor.setCompressionType(Compression.Algorithm.SNAPPY)
    columnDescriptor.setBlocksize(64 * 1024)
    columnDescriptor.setBloomFilterType(BloomType.ROW)

    tableDescriptor.addFamily(columnDescriptor)

    tableDescriptor.setMaxFileSize(Long.MaxValue)
    tableDescriptor.setRegionSplitPolicyClassName(classOf[ConstantSizeRegionSplitPolicy].getName)

    val splitKeys = new mutable.MutableList[Array[Byte]]
    for (i <- 0 to regionCount) {
      val regionSplitStr = StringUtils.leftPad((i*(numOfSalts/regionCount)).toString, 4, "0")
      splitKeys += Bytes.toBytes(regionSplitStr)
    }
    admin.createTable(tableDescriptor, splitKeys.toArray)
  }
} 
Example 12
Source File: HBaseRestServer.scala    From Taxi360   with Apache License 2.0 5 votes vote down vote up
package com.hadooparchitecturebook.taxi360.server.hbase

import java.io.File

import com.sun.jersey.spi.container.servlet.ServletContainer
import org.apache.hadoop.hbase.HBaseConfiguration
import org.mortbay.jetty.Server
import org.mortbay.jetty.servlet.{Context, ServletHolder}

object HBaseRestServer {
  def main(args:Array[String]): Unit = {

    if (args.length == 0) {
      println("<port> <configDir> <numberOfSalts> <customerTableName>")
    }
    val port = args(0).toInt
    val hbaseConfigFolder = args(1)
    val numberOfSalts = args(2).toInt
    val appEventTableName = args(3)

    val conf = HBaseConfiguration.create()
    conf.addResource(new File(hbaseConfigFolder + "hbase-site.xml").toURI.toURL)

    HBaseGlobalValues.init(conf, numberOfSalts,
      appEventTableName)

    val server = new Server(port)

    val sh = new ServletHolder(classOf[ServletContainer])
    sh.setInitParameter("com.sun.jersey.config.property.resourceConfigClass", "com.sun.jersey.api.core.PackagesResourceConfig")
    sh.setInitParameter("com.sun.jersey.config.property.packages", "com.hadooparchitecturebook.taxi360.server.hbase")
    sh.setInitParameter("com.sun.jersey.api.json.POJOMappingFeature", "true")

    val context = new Context(server, "/", Context.SESSIONS)
    context.addServlet(sh, "/*")

    println("starting HBase Rest Server")
    server.start()
    println("started HBase Rest Sserver")
    server.join()
  }
} 
Example 13
Source File: CsvKafkaPublisher.scala    From Taxi360   with Apache License 2.0 5 votes vote down vote up
package com.hadooparchitecturebook.taxi360.common

import java.io.File
import java.util.Random

import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}

import scala.io.Source

object CsvKafkaPublisher {

  var counter = 0
  var salts = 0

  def main(args:Array[String]): Unit = {
    if (args.length == 0) {
      println("<brokerList> " +
        "<topicName> " +
        "<dataFolderOrFile> " +
        "<sleepPerRecord> " +
        "<acks> " +
        "<linger.ms> " +
        "<producer.type> " +
        "<batch.size> " +
        "<salts>")
      return
    }

    val kafkaBrokerList = args(0)
    val kafkaTopicName = args(1)
    val nyTaxiDataFolder = args(2)
    val sleepPerRecord = args(3).toInt
    val acks = args(4).toInt
    val lingerMs = args(5).toInt
    val producerType = args(6) //"async"
    val batchSize = args(7).toInt
    salts = args(8).toInt

    val kafkaProducer = KafkaProducerUntil.getNewProducer(kafkaBrokerList, acks, lingerMs, producerType, batchSize)

    println("--Input:" + nyTaxiDataFolder)

    val dataFolder = new File(nyTaxiDataFolder)
    if (dataFolder.isDirectory) {
      val files = dataFolder.listFiles().iterator
      files.foreach(f => {
        println("--Input:" + f)
        processFile(f, kafkaTopicName, kafkaProducer, sleepPerRecord)
      })
    } else {
      println("--Input:" + dataFolder)
      processFile(dataFolder, kafkaTopicName, kafkaProducer, sleepPerRecord)
    }
    println("---Done")
  }

  def processFile(file:File, kafkaTopicName:String,
                  kafkaProducer: KafkaProducer[String, String], sleepPerRecord:Int): Unit = {
    var counter = 0
    val r = new Random()

    println("-Starting Reading")
    Source.fromFile(file).getLines().foreach(l => {
      counter += 1
      if (counter % 10000 == 0) {
        println("{Sent:" + counter + "}")
      }
      if (counter % 100 == 0) {
        print(".")
      }
      Thread.sleep(sleepPerRecord)

      val saltedVender = r.nextInt(salts) + l

      if (counter > 2) {
        publishTaxiRecord(saltedVender, kafkaTopicName, kafkaProducer)
      }
    })
  }

  def publishTaxiRecord(line:String, kafkaTopicName:String, kafkaProducer: KafkaProducer[String, String]): Unit = {

    if (line.startsWith("vendor_name") || line.length < 10) {
      println("skip")
    } else {
      val message = new ProducerRecord[String, String](kafkaTopicName, line.hashCode.toString, line)
      kafkaProducer.send(message)
    }
  }


} 
Example 14
Source File: SparkStreamingTaxiTripToHBase.scala    From Taxi360   with Apache License 2.0 5 votes vote down vote up
package com.hadooparchitecturebook.taxi360.streaming.ingestion.hbase

import java.io.File

import com.hadooparchitecturebook.taxi360.model.NyTaxiYellowTripBuilder
import org.apache.hadoop.hbase.spark.HBaseContext
import org.apache.hadoop.hbase.spark.HBaseDStreamFunctions._
import kafka.serializer.StringDecoder
import org.apache.hadoop.hbase.{HBaseConfiguration, TableName}
import org.apache.solr.common.cloud.ZooKeeperException
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.{SparkConf, SparkContext}

object SparkStreamingTaxiTripToHBase {
  def main(args: Array[String]): Unit = {
    println("Java Version:" + System.getProperty("java.version"))
    println("Java Home:" + System.getProperties().getProperty("java.home"))

    val v:ZooKeeperException = null

    if (args.length == 0) {
      println("Args: <KafkaBrokerList> " +
        "<kafkaTopicList> " +
        "<numberOfSeconds>" +
        "<runLocal>" +
        "<hbaseTable>" +
        "<numOfSalts>" +
        "<checkpointDir>" +
        "<hbaseConfigFolder>")
      return
    }

    val kafkaBrokerList = args(0)
    val kafkaTopicList = args(1)
    val numberOfSeconds = args(2).toInt
    val runLocal = args(3).equals("l")
    val tableName = args(4)
    val numOfSalts = args(5).toInt
    val checkpointFolder = args(6)
    val hbaseConfigFolder = args(7)

    println("kafkaBrokerList:" + kafkaBrokerList)
    println("kafkaTopicList:" + kafkaTopicList)
    println("numberOfSeconds:" + numberOfSeconds)
    println("runLocal:" + runLocal)
    println("tableName:" + tableName)
    println("numOfSalts:" + numOfSalts)

    val sc:SparkContext = if (runLocal) {
      val sparkConfig = new SparkConf()
      sparkConfig.set("spark.broadcast.compress", "false")
      sparkConfig.set("spark.shuffle.compress", "false")
      sparkConfig.set("spark.shuffle.spill.compress", "false")
      new SparkContext("local[2]", "TableStatsSinglePathMain", sparkConfig)
    } else {
      val sparkConf = new SparkConf().setAppName("Spark Streaming Ingestion to HBase")
      new SparkContext(sparkConf)
    }
    val ssc = new StreamingContext(sc, Seconds(numberOfSeconds))

    val topicsSet = kafkaTopicList.split(",").toSet
    val kafkaParams = Map[String, String]("metadata.broker.list" -> kafkaBrokerList)

    val messageStream = KafkaUtils.
      createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topicsSet)

    val conf = HBaseConfiguration.create()

    conf.addResource(new File(hbaseConfigFolder + "hbase-site.xml").toURI.toURL)

    val hbaseContext = new HBaseContext(sc, conf)

    val tripDStream = messageStream.map(r => {
      (r._1, r._2.split(","))
    }).filter(r => r._2.size > 3).map(r => {
      (r._1, NyTaxiYellowTripBuilder.build(r._2))
    })

    tripDStream.hbaseBulkPut(hbaseContext, TableName.valueOf(tableName), taxi => {
      TaxiTripHBaseHelper.generatePut(taxi._2, numOfSalts)
    })

    ssc.checkpoint(checkpointFolder)
    ssc.start()
    ssc.awaitTermination()
  }
} 
Example 15
Source File: SampleRoutes.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package akkahttp

import java.io.File

import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.server.Directives._
import akka.http.scaladsl.server.Route
import org.slf4j.{Logger, LoggerFactory}

import scala.concurrent.Await
import scala.concurrent.duration._
import scala.sys.process.Process
import scala.util.{Failure, Success}


object SampleRoutes extends App {
  val logger: Logger = LoggerFactory.getLogger(this.getClass)
  implicit val system = ActorSystem("SampleRoutes")
  implicit val executionContext = system.dispatcher


  def getFromBrowsableDir: Route = {
    val dirToBrowse = File.separator + "tmp"

    // pathPrefix allows loading dirs and files recursively
    pathPrefix("entries") {
      getFromBrowseableDirectory(dirToBrowse)
    }
  }

  def parseFormData: Route = path("post") {
    formFields('color, 'age.as[Int]) { (color, age) =>
      complete(s"The color is '$color' and the age is $age")
    }
  }

  def routes: Route = {
    getFromBrowsableDir ~ parseFormData
  }

  val bindingFuture = Http().bindAndHandle(routes, "127.0.0.1", 8000)

  bindingFuture.onComplete {
    case Success(b) =>
      println("Server started, listening on: " + b.localAddress)
    case Failure(e) =>
      println(s"Server could not bind to... Exception message: ${e.getMessage}")
      system.terminate()
  }

  def browserClient() = {
    val os = System.getProperty("os.name").toLowerCase
    if (os == "mac os x") Process("open ./src/main/resources/SampleRoutes.html").!
  }

  browserClient()

  sys.addShutdownHook {
    println("About to shutdown...")
    val fut = bindingFuture.map(serverBinding => serverBinding.terminate(hardDeadline = 3.seconds))
    println("Waiting for connections to terminate...")
    val onceAllConnectionsTerminated = Await.result(fut, 10.seconds)
    println("Connections terminated")
    onceAllConnectionsTerminated.flatMap { _ => system.terminate()
    }
  }
} 
Example 16
Source File: KafkaServer.scala    From akka_streams_tutorial   with MIT License 5 votes vote down vote up
package alpakka.env

import java.io.File
import java.net.InetSocketAddress
import java.nio.file.{Files, Paths}
import java.util.Properties

import kafka.server.{KafkaConfig, KafkaServerStartable}
import org.apache.commons.io.FileUtils
import org.apache.zookeeper.server.quorum.QuorumPeerConfig
import org.apache.zookeeper.server.{ServerConfig, ZooKeeperServerMain}


object KafkaServer extends App {

  val zookeeperPort = 2181

  val kafkaLogs = "/tmp/kafka-logs"
  val kafkaLogsPath = Paths.get(kafkaLogs)

  // See: https://stackoverflow.com/questions/59592518/kafka-broker-doesnt-find-cluster-id-and-creates-new-one-after-docker-restart/60864763#comment108382967_60864763
  def fix25Behaviour() = {
    val fileWithConflictingContent = kafkaLogsPath.resolve("meta.properties").toFile
    if (fileWithConflictingContent.exists())  FileUtils.forceDelete(fileWithConflictingContent)
  }

  def removeKafkaLogs(): Unit = {
    if (kafkaLogsPath.toFile.exists()) FileUtils.forceDelete(kafkaLogsPath.toFile)
  }

  // Keeps the persistent data
  fix25Behaviour()
  // If everything fails
  //removeKafkaLogs()

  val quorumConfiguration = new QuorumPeerConfig {
    // Since we do not run a cluster, we are not interested in zookeeper data
    override def getDataDir: File = Files.createTempDirectory("zookeeper").toFile
    override def getDataLogDir: File = Files.createTempDirectory("zookeeper-logs").toFile
    override def getClientPortAddress: InetSocketAddress = new InetSocketAddress(zookeeperPort)
  }

  class StoppableZooKeeperServerMain extends ZooKeeperServerMain {
    def stop(): Unit = shutdown()
  }

  val zooKeeperServer = new StoppableZooKeeperServerMain()

  val zooKeeperConfig = new ServerConfig()
  zooKeeperConfig.readFrom(quorumConfiguration)

  val zooKeeperThread = new Thread {
    override def run(): Unit = zooKeeperServer.runFromConfig(zooKeeperConfig)
  }

  zooKeeperThread.start()

  val kafkaProperties = new Properties()
  kafkaProperties.put("zookeeper.connect", s"localhost:$zookeeperPort")
  kafkaProperties.put("broker.id", "0")
  kafkaProperties.put("offsets.topic.replication.factor", "1")
  kafkaProperties.put("log.dirs", kafkaLogs)
  kafkaProperties.put("delete.topic.enable", "true")
  kafkaProperties.put("group.initial.rebalance.delay.ms", "0")
  kafkaProperties.put("transaction.state.log.min.isr", "1")
  kafkaProperties.put("transaction.state.log.replication.factor", "1")
  kafkaProperties.put("zookeeper.connection.timeout.ms", "6000")
  kafkaProperties.put("num.partitions", "10")

  val kafkaConfig = KafkaConfig.fromProps(kafkaProperties)

  val kafka = new KafkaServerStartable(kafkaConfig)

  println("About to start...")
  kafka.startup()

  scala.sys.addShutdownHook{
    println("About to shutdown...")
    kafka.shutdown()
    kafka.awaitShutdown()
    zooKeeperServer.stop()
  }

  zooKeeperThread.join()
} 
Example 17
Source File: SparkSessionConfiguration.scala    From spark-structured-streaming-examples   with Apache License 2.0 5 votes vote down vote up
package com.phylosoft.spark.learning

import java.io.File

import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession

trait SparkSessionConfiguration {

  val settings: Traversable[(String, String)]

  private val warehouseLocation = "file:///" + new File("spark-warehouse").getAbsolutePath.toString

  private lazy val conf = new SparkConf()
    .set("spark.sql.warehouse.dir", warehouseLocation)
    .set("spark.sql.session.timeZone", "UTC")
    .set("spark.sql.shuffle.partitions", "4") // keep the size of shuffles small
    .set("spark.sql.cbo.enabled", "true")
    .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
    .set("spark.kryoserializer.buffer", "24")
    .setAll(settings)

  implicit lazy val spark: SparkSession = SparkSession.builder
    .config(conf)
    .enableHiveSupport()
    .getOrCreate()

} 
Example 18
Source File: CongestionModel.scala    From spatial   with MIT License 5 votes vote down vote up
package models

import java.io.File
import java.io.PrintWriter
import utils.io.files._
import utils.math.{CombinationTree, ReduceTree}

import scala.io.Source

object CongestionModel {

	abstract class FeatureVec[T] {
		def loads: T
		def stores: T
		def gateds: T
		def outerIters: T
		def innerIters: T
		def bitsPerCycle: T
		def toSeq: Seq[T] = Seq(stores, outerIters, loads, innerIters, gateds, bitsPerCycle)
	}
	case class RawFeatureVec(loads: Double, stores: Double, gateds: Double, outerIters: Double, innerIters: Double, bitsPerCycle: Double) extends FeatureVec[Double]
	case class CalibFeatureVec(loads: Double, stores: Double, gateds: Double, outerIters: Double, innerIters: Double, bitsPerCycle: Double) extends FeatureVec[Double]

	// Set up lattice properties
	val feature_dims = 6
	val lattice_rank = 6
	val lattice_size = Seq(3,3,3,3,3,3)
	val num_keypoints = 8
	val num_lattices = 1
	var model: String = ""

	// Derive lattice properties
	val sizes = scala.Array.tabulate(lattice_rank){i => lattice_size(i)}
	val dimensions = sizes.length
	val params_per_lattice = sizes.product
	val strides: scala.Array[Int] = scala.Array.fill(dimensions){1}
	val nparams = num_lattices * params_per_lattice

	// Grab lattice params
	lazy val loads_keypoints_inputs = ModelData.loads_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/loads_keypoints_inputs.csv", ","){x => x.toDouble}
	lazy val loads_keypoints_outputs = ModelData.loads_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/loads_keypoints_outputs.csv", ","){x => x.toDouble}
	lazy val stores_keypoints_inputs = ModelData.stores_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/stores_keypoints_inputs.csv", ","){x => x.toDouble}
	lazy val stores_keypoints_outputs = ModelData.stores_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/stores_keypoints_outputs.csv", ","){x => x.toDouble}
	lazy val gateds_keypoints_inputs = ModelData.gateds_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/gateds_keypoints_inputs.csv", ","){x => x.toDouble}
	lazy val gateds_keypoints_outputs = ModelData.gateds_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/gateds_keypoints_outputs.csv", ","){x => x.toDouble}
	lazy val outerIters_keypoints_inputs = ModelData.outerIters_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/outerIters_keypoints_inputs.csv", ","){x => x.toDouble}
	lazy val outerIters_keypoints_outputs = ModelData.outerIters_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/outerIters_keypoints_outputs.csv", ","){x => x.toDouble}
	lazy val innerIters_keypoints_inputs = ModelData.innerIters_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/innerIters_keypoints_inputs.csv", ","){x => x.toDouble}
	lazy val innerIters_keypoints_outputs = ModelData.innerIters_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/innerIters_keypoints_outputs.csv", ","){x => x.toDouble}
	lazy val bitsPerCycle_keypoints_inputs = ModelData.bitsPerCycle_keypoints_inputs(model).map(_.toDouble) //loadCSVNow[Int](s"../data/${model}/CALIBRATOR_INPUT_PARAMS/bitsPerCycle_keypoints_inputs.csv", ","){x => x.toDouble}
	lazy val bitsPerCycle_keypoints_outputs = ModelData.bitsPerCycle_keypoints_outputs(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/CALIBRATOR_OUTPUT_PARAMS/bitsPerCycle_keypoints_outputs.csv", ","){x => x.toDouble}
    lazy val params = ModelData.params(model).map(_.toDouble) //loadCSVNow[Double](s"../data/${model}/LATTICE_PARAMS.csv", ","){x => x.toDouble}

    
	def evaluate(features: RawFeatureVec, typ: Runtime.CtrlSchedule): Int = {
		model = typ.toString

		val calibrated_features = calibrate_features(features)
		val result = hypercube_features(calibrated_features)
		// TODO: Model is naughty if it returns <170
		// println(s"evaluating $features = ${170 max result.toInt}")
		170 max result.toInt
	}
} 
Example 19
Source File: LatencyAnalyzer.scala    From spatial   with MIT License 5 votes vote down vote up
package spatial.dse

import argon._
import spatial.lang._
import spatial.node._
import spatial.util.spatialConfig
import spatial.util.modeling._
import spatial.traversal._
import spatial.targets._
import java.io.File
import models._
import argon.node._


case class LatencyAnalyzer(IR: State, latencyModel: LatencyModel) extends AccelTraversal {
  var cycleScope: List[Double] = Nil
  var intervalScope: List[Double] = Nil
  var totalCycles: Seq[Long] = Seq()
  val batchSize = 1000

  def getListOfFiles(d: String):List[String] = {
    import java.nio.file.{FileSystems, Files}
    import scala.collection.JavaConverters._
    val dir = FileSystems.getDefault.getPath(d) 
    Files.walk(dir).iterator().asScala.filter(Files.isRegularFile(_)).map(_.toString).toList//.foreach(println)
  }
  
  override def silence(): Unit = {
    super.silence()
  }


  def test(rewriteParams: Seq[Seq[Any]]): Unit = {
    import scala.language.postfixOps
    import java.io.File
    import sys.process._

    val gen_dir = if (config.genDir.startsWith("/")) config.genDir + "/" else config.cwd + s"/${config.genDir}/"
    val modelJar = getListOfFiles(gen_dir + "/model").filter(_.contains("RuntimeModel-assembly")).head
    totalCycles = rewriteParams.grouped(batchSize).flatMap{params => 
      val batchedParams = params.map{rp => "tune " + rp.mkString(" ")}.mkString(" ")
      val cmd = s"""java -jar ${modelJar} ni ${batchedParams}"""
      // println(s"running cmd: $cmd")
      val output = Process(cmd, new File(gen_dir)).!!
      output.split("\n").filter(_.contains("Total Cycles for App")).map{r => 
        "^.*: ".r.replaceAllIn(r,"").trim.toLong
      }.toSeq
    }.toSeq
    // println(s"DSE Model result: $totalCycles")

  }

  override protected def preprocess[A](b: Block[A]): Block[A] = {

    super.preprocess(b)
  }

  override protected def postprocess[A](b: Block[A]): Block[A] = {
    super.postprocess(b)
  }

  override protected def visit[A](lhs: Sym[A], rhs: Op[A]): Unit = {  }



} 
Example 20
Source File: OOB.scala    From spatial   with MIT License 5 votes vote down vote up
package emul

import java.io.PrintStream
import java.io.File

object OOB {
  lazy val writeStream = new PrintStream("./logs/writes.log")
  lazy val readStream = new PrintStream("./logs/reads.log")
  def open(): Unit = {
    new File("./logs/").mkdirs()
    writeStream
    readStream
  }
  def close(): Unit = {
    writeStream.close()
    readStream.close()
  }

  def readOrElse[T](mem: String, addr: String, invalid: T, en: Boolean)(rd: => T): T = {
    try {
      val data = rd
      if (en) readStream.println(s"Mem: $mem; Addr: $addr")
      data
    }
    catch {case err: java.lang.ArrayIndexOutOfBoundsException =>
      if (en) readStream.println(s"Mem: $mem; Addr: $addr [OOB]")
      invalid
    }
  }
  def writeOrElse(mem: String, addr: String, data: Any, en: Boolean)(wr: => Unit): Unit = {
    try {
      wr
      if (en) writeStream.println(s"Mem: $mem; Addr: $addr; Data: $data")
    }
    catch {case err: java.lang.ArrayIndexOutOfBoundsException =>
      if (en) writeStream.println(s"Mem: $mem; Addr: $addr; Data: $data [OOB]")
    }
  }

} 
Example 21
Source File: globals.scala    From spatial   with MIT License 5 votes vote down vote up
package fringe
import java.io.{File, PrintWriter}

import fringe.targets.DeviceTarget
import fringe.templates.axi4.{AXI4BundleParameters, AXI4StreamParameters}


  private var _tclScript: PrintWriter = {
    val pw = new PrintWriter(new File("bigIP.tcl"))
    pw.flush()
    pw
  }
  def tclScript: PrintWriter = _tclScript
  def tclScript_=(value: PrintWriter): Unit = _tclScript = value


  var regression_testing: String = scala.util.Properties.envOrElse("RUNNING_REGRESSION", "0")

  // Top parameters
  // These are set by the generated Instantiator class
  var numArgIns: Int = 1      // Number of ArgIn registers
  var numArgOuts: Int = 1     // Number of ArgOut registers
  var numArgIOs: Int = 0      // Number of HostIO registers
  var numArgInstrs: Int = 0   // TODO: What is this?
  var argOutLoopbacksMap: Map[Int,Int] = Map.empty // TODO: What is this?

  var loadStreamInfo: List[StreamParInfo] = Nil
  var storeStreamInfo: List[StreamParInfo] = Nil
  var gatherStreamInfo: List[StreamParInfo] = Nil
  var scatterStreamInfo: List[StreamParInfo] = Nil
  var axiStreamInsInfo: List[AXI4StreamParameters] = List(AXI4StreamParameters(64,8,64))
  var axiStreamOutsInfo: List[AXI4StreamParameters] = List(AXI4StreamParameters(64,8,64))

  var numAllocators: Int = 0

  def LOAD_STREAMS: List[StreamParInfo] = if (loadStreamInfo.isEmpty) List(StreamParInfo(DATA_WIDTH, WORDS_PER_STREAM, 0)) else loadStreamInfo
  def STORE_STREAMS: List[StreamParInfo] = if (storeStreamInfo.isEmpty) List(StreamParInfo(DATA_WIDTH, WORDS_PER_STREAM, 0)) else storeStreamInfo
  def GATHER_STREAMS: List[StreamParInfo] = if (gatherStreamInfo.isEmpty) List(StreamParInfo(DATA_WIDTH, WORDS_PER_STREAM, 0)) else gatherStreamInfo
  def SCATTER_STREAMS: List[StreamParInfo] = if (scatterStreamInfo.isEmpty) List(StreamParInfo(DATA_WIDTH, WORDS_PER_STREAM, 0)) else scatterStreamInfo

  def AXI_STREAMS_IN: List[AXI4StreamParameters] = if (axiStreamInsInfo.isEmpty) List(AXI4StreamParameters(256,8,32)) else axiStreamInsInfo
  def AXI_STREAMS_OUT: List[AXI4StreamParameters] = if (axiStreamOutsInfo.isEmpty) List(AXI4StreamParameters(256,8,32)) else axiStreamOutsInfo

  def NUM_LOAD_STREAMS: Int = LOAD_STREAMS.size
  def NUM_STORE_STREAMS: Int = STORE_STREAMS.size

  def NUM_ARG_INS: Int = numArgIns
  def NUM_ARG_OUTS: Int = numArgOuts
  def NUM_ARG_IOS: Int = numArgIOs
  def NUM_ARG_LOOPS: Int = argOutLoopbacksMap.size max 1
  def NUM_ARGS: Int = numArgIns + numArgOuts
  def NUM_STREAMS: Int = LOAD_STREAMS.size + STORE_STREAMS.size
} 
Example 22
Source File: TemplateRunner.scala    From spatial   with MIT License 5 votes vote down vote up
package fringe.test

import java.io.File

import scala.collection.mutable.ArrayBuffer
import scala.util.Properties.envOrElse

object TemplateRunner {
  def deleteRecursively(file: File): Unit = {
    if (file.isDirectory)
      file.listFiles.foreach(deleteRecursively)
    if (file.exists && !file.delete)
      throw new Exception(s"Unable to delete ${file.getAbsolutePath}")
  }
  def apply(templateMap: Map[String, String => Boolean], args: Array[String]): Unit = {
    // Choose the default backend based on what is available.
    lazy val firrtlTerpBackendAvailable: Boolean = {
      try {
        val cls = Class.forName("chisel3.iotesters.FirrtlTerpBackend")
        cls != null
      } catch {
        case e: Throwable => false
      }
    }
    lazy val defaultBackend = if (firrtlTerpBackendAvailable) "firrtl" else ""

    val backendName = envOrElse("TESTER_BACKENDS", defaultBackend).split(" ").head
    val tempDir = s"""${envOrElse("NEW_TEMPLATES_HOME", "tmp")}/test_run_dir/"""
    val specificRegex = "(.*[0-9]+)".r
    val problemsToRun = if (args.isEmpty) {
      templateMap.keys.toSeq.sorted.toArray // Run all by default
    } else {
      args.map { arg => arg match {
        case "all" => templateMap.keys.toSeq.sorted // Run all
        case specificRegex(c) => List(c).toSeq // Run specific test
        case _ => // Figure out tests that match this template and run all
          val tempRegex = s"(${arg}[0-9]+)".r
          templateMap.keys.toSeq.sorted.filter(tempRegex.pattern.matcher(_).matches)
      }}.flatten.toArray
    }

    var successful = 0
    var passedTests:List[String] = List()
    val errors = new ArrayBuffer[String]
    for(testName <- problemsToRun) {
      // Wipe tempdir for consecutive tests of same module
      deleteRecursively(new File(tempDir))
      templateMap.get(testName) match {
        case Some(test) =>
          println(s"Starting template $testName")
          try {
            if(test(backendName)) {
              successful += 1
              passedTests = passedTests :+ s"$testName"
            }
            else {
              errors += s"Template $testName: test error occurred"
            }
          }
          catch {
            case exception: Exception =>
              exception.printStackTrace()
              errors += s"Template $testName: exception ${exception.getMessage}"
            case t : Throwable =>
              errors += s"Template $testName: throwable ${t.getMessage}"
          }
        case _ =>
          errors += s"Bad template name: $testName"
      }
    }
    if(successful > 0) {
      println(s"""Templates passing: $successful (${passedTests.mkString(", ")})""")
    }
    if(errors.nonEmpty) {
      println("=" * 80)
      println(s"Errors: ${errors.length}: in the following templates")
      println(errors.mkString("\n"))
      println("=" * 80)
      System.exit(1)
    }
  }
} 
Example 23
Source File: AvroSource.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.avro

import java.io.File
import java.util.concurrent.atomic.AtomicBoolean

import com.sksamuel.exts.Logging
import com.sksamuel.exts.io.Using
import io.eels._
import io.eels.datastream.{DataStream, Publisher, Subscriber, Subscription}
import io.eels.schema.StructType
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

case class AvroSource(path: Path)
                     (implicit conf: Configuration, fs: FileSystem) extends Source with Using {

  override lazy val schema: StructType = {
    using(AvroReaderFns.createAvroReader(path)) { reader =>
      val record = reader.next()
      AvroSchemaFns.fromAvroSchema(record.getSchema)
    }
  }

  override def parts(): Seq[Publisher[Seq[Row]]] = Seq(AvroSourcePublisher(path))
}

case class AvroSourcePublisher(path: Path)
                              (implicit conf: Configuration, fs: FileSystem)
  extends Publisher[Seq[Row]] with Logging with Using {
  override def subscribe(subscriber: Subscriber[Seq[Row]]): Unit = {
    val deserializer = new AvroDeserializer()
    try {
      using(AvroReaderFns.createAvroReader(path)) { reader =>
        val running = new AtomicBoolean(true)
        subscriber.subscribed(Subscription.fromRunning(running))
        AvroRecordIterator(reader)
          .takeWhile(_ => running.get)
          .map(deserializer.toRow)
          .grouped(DataStream.DefaultBatchSize)
          .foreach(subscriber.next)
        subscriber.completed()
      }
    } catch {
      case t: Throwable => subscriber.error(t)
    }
  }
}

object AvroSource {
  def apply(file: File)(implicit conf: Configuration, fs: FileSystem): AvroSource = AvroSource(new Path(file.getAbsoluteFile.toString))
  def apply(path: java.nio.file.Path)(implicit conf: Configuration, fs: FileSystem): AvroSource = apply(path.toFile)
} 
Example 24
Source File: AvroSink.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.avro

import java.io.File

import io.eels.schema.StructType
import io.eels.{Row, Sink, SinkWriter}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.permission.FsPermission
import org.apache.hadoop.fs.{FileSystem, Path}

case class AvroSink(path: Path,
                    overwrite: Boolean = false,
                    permission: Option[FsPermission] = None,
                    inheritPermissions: Option[Boolean] = None)
                   (implicit conf: Configuration, fs: FileSystem) extends Sink {

  def withOverwrite(overwrite: Boolean): AvroSink = copy(overwrite = overwrite)
  def withPermission(permission: FsPermission): AvroSink = copy(permission = Option(permission))
  def withInheritPermission(inheritPermissions: Boolean): AvroSink = copy(inheritPermissions = Option(inheritPermissions))

  override def open(schema: StructType): SinkWriter = new SinkWriter {

    private val writer = new AvroWriter(schema, fs.create(path, overwrite))

    override def write(row: Row): Unit = writer.write(row)

    override def close(): Unit = {
      writer.close()
      permission match {
        case Some(perm) => fs.setPermission(path, perm)
        case None =>
          if (inheritPermissions.getOrElse(false)) {
            val permission = fs.getFileStatus(path.getParent).getPermission
            fs.setPermission(path, permission)
          }
      }
    }
  }
}

object AvroSink {
  def apply(file: File)(implicit conf: Configuration, fs: FileSystem): AvroSink = AvroSink(new Path(file.getAbsoluteFile.toString))
  def apply(path: java.nio.file.Path)(implicit conf: Configuration, fs: FileSystem): AvroSink = apply(path.toFile)
} 
Example 25
Source File: ParquetProjectionTest.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.parquet

import java.io.{File, FilenameFilter}

import io.eels.datastream.DataStream
import io.eels.schema.{Field, StringType, StructType}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.scalatest.{FlatSpec, Matchers}

class ParquetProjectionTest extends FlatSpec with Matchers {

  cleanUpResidualParquetTestFiles

  private val schema = StructType(
    Field("name", StringType, nullable = false),
    Field("job", StringType, nullable = false),
    Field("location", StringType, nullable = false)
  )
  private val ds = DataStream.fromValues(
    schema,
    Seq(
      Vector("clint eastwood", "actor", "carmel"),
      Vector("elton john", "musician", "pinner")
    )
  )

  private implicit val conf = new Configuration()
  private implicit val fs = FileSystem.get(new Configuration())
  private val file = new File(s"test_${System.currentTimeMillis()}.pq")
  file.deleteOnExit()
  private val path = new Path(file.toURI)

  if (fs.exists(path))
    fs.delete(path, false)

  ds.to(ParquetSink(path).withOverwrite(true))

  "ParquetSource" should "support projections" in {
    val rows = ParquetSource(path).withProjection("name").toDataStream().collect
    rows.map(_.values) shouldBe Vector(Vector("clint eastwood"), Vector("elton john"))
  }

  it should "return all data when no projection is set" in {
    val rows = ParquetSource(path).toDataStream().collect
    rows.map(_.values) shouldBe Vector(Vector("clint eastwood", "actor", "carmel"), Vector("elton john", "musician", "pinner"))
  }

  private def cleanUpResidualParquetTestFiles = {
    new File(".").listFiles(new FilenameFilter {
      override def accept(dir: File, name: String): Boolean = {
        (name.startsWith("test_") && name.endsWith(".pq")) || (name.startsWith(".test_") && name.endsWith(".pq.crc"))
      }
    }).foreach(_.delete())
  }

} 
Example 26
Source File: ParquetSpeedTest.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.parquet

import java.io.File

import com.sksamuel.exts.metrics.Timed
import io.eels.Row
import io.eels.component.parquet.avro.{AvroParquetSink, AvroParquetSource}
import io.eels.component.parquet.util.ParquetLogMute
import io.eels.datastream.DataStream
import io.eels.schema.StructType
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

import scala.util.Random


object ParquetSpeedTest extends App with Timed {
  ParquetLogMute()

  val size = 2000000
  val schema = StructType("a", "b", "c", "d", "e")
  val createRow = Row(schema, Random.nextBoolean(), Random.nextFloat(), Random.nextGaussian(), Random.nextLong(), Random.nextString(4))
  val ds = DataStream.fromIterator(schema, Iterator.continually(createRow).take(size))

  implicit val conf = new Configuration()
  implicit val fs = FileSystem.getLocal(new Configuration())

  val path = new Path("parquet_speed.pq")
  fs.delete(path, false)

  new File(path.toString).deleteOnExit()

  timed("Insertion") {
    ds.to(AvroParquetSink(path).withOverwrite(true))
  }

  while (true) {

    timed("Reading with ParquetSource") {
      val actual = ParquetSource(path).toDataStream().size
      assert(actual == size)
    }

    println("")
    println("---------")
    println("")

    Thread.sleep(2000)

    timed("Reading with AvroParquetSource") {
      val actual = AvroParquetSource(path).toDataStream().size
      assert(actual == size)
    }
  }
} 
Example 27
Source File: ParquetMultipleFileSpeedTest.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.parquet

import java.io.File

import com.sksamuel.exts.metrics.Timed
import io.eels.component.parquet.util.ParquetLogMute
import io.eels.datastream.DataStream
import io.eels.schema.StructType
import io.eels.{FilePattern, Row}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

import scala.util.Random


object ParquetMultipleFileSpeedTest extends App with Timed {
  ParquetLogMute()

  val size = 5000000
  val count = 20
  val schema = StructType("a", "b", "c", "d", "e")

  def createRow = Row(schema, Random.nextBoolean(), Random.nextFloat(), Random.nextGaussian(), Random.nextLong(), Random.nextString(4))

  implicit val conf = new Configuration()
  implicit val fs = FileSystem.getLocal(new Configuration())

  val dir = new Path("parquet-speed-test")
  new File(dir.toString).mkdirs()

  new File(dir.toString).listFiles().foreach(_.delete)
  timed("Insertion") {
    val ds = DataStream.fromRowIterator(schema, Iterator.continually(createRow).take(size))
    ds.to(ParquetSink(new Path("parquet-speed-test/parquet_speed.pq")), count)
  }

  for (_ <- 1 to 25) {
    assert(count == FilePattern("parquet-speed-test/*").toPaths().size)

    timed("Reading with ParquetSource") {
      val actual = ParquetSource("parquet-speed-test/*").toDataStream().map { row => row }.filter(_ => true).size
      assert(actual == size, s"Expected $size but was $actual")
    }

    println("")
    println("---------")
    println("")
  }
} 
Example 28
Source File: ParquetVsOrcSpeedTest.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.hive

import java.io.File
import java.math.MathContext

import com.sksamuel.exts.metrics.Timed
import io.eels.Row
import io.eels.component.orc.{OrcSink, OrcSource}
import io.eels.component.parquet.{ParquetSink, ParquetSource}
import io.eels.datastream.DataStream
import io.eels.schema._
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

import scala.math.BigDecimal.RoundingMode
import scala.util.Random

object ParquetVsOrcSpeedTest extends App with Timed {

  implicit val conf = new Configuration()
  implicit val fs = FileSystem.getLocal(new Configuration())

  val size = 5000000

  val structType = StructType(
    Field("name", StringType),
    Field("age", IntType.Signed),
    Field("height", DoubleType),
    Field("amazing", BooleanType),
    Field("fans", LongType.Signed),
    Field("rating", DecimalType(4, 2))
  )

  def iter: Iterator[Vector[Any]] = Iterator.continually(Vector(
    Random.nextString(10),
    Random.nextInt(),
    Random.nextDouble(),
    Random.nextBoolean(),
    Random.nextLong(),
    BigDecimal(Random.nextDouble(), new MathContext(4)).setScale(2, RoundingMode.UP)
  ))

  def ds: DataStream = DataStream.fromIterator(structType, iter.take(size).map(Row(structType, _)))

  val ppath = new Path("parquet_speed.pq")
  fs.delete(ppath, false)

  val opath = new Path("orc_speed.orc")
  fs.delete(opath, false)

  new File(ppath.toString).deleteOnExit()
  new File(opath.toString).deleteOnExit()

  timed("Orc Insertion") {
    ds.to(OrcSink(opath))
  }

  timed("Parquet Insertion") {
    ds.to(ParquetSink(ppath))
  }

  while (true) {

    timed("Reading with OrcSource") {
      val actual = OrcSource(opath).toDataStream().size
      assert(actual == size, s"$actual != $size")
    }

    timed("Reading with ParquetSource") {
      val actual = ParquetSource(ppath).toDataStream().size
      assert(actual == size, s"$actual != $size")
    }
  }
} 
Example 29
Source File: HiveDynamicPartitionTest.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.hive

import java.io.File

import io.eels.component.hive.partition.DynamicPartitionStrategy
import io.eels.datastream.DataStream
import io.eels.schema.{Field, Partition, StructType}
import org.scalatest.{BeforeAndAfterAll, FunSuite, Matchers}

import scala.util.Try

class HiveDynamicPartitionTest extends FunSuite with Matchers with BeforeAndAfterAll {

  import HiveConfig._

  val dbname = HiveTestUtils.createTestDatabase
  val table = "dynp_test_" + System.currentTimeMillis()

  val schema = StructType(Field("a"), Field("b"))

  Try {
    HiveTable(dbname, table).create(schema, Seq("a"))
  }

  override def afterAll(): Unit = Try {
    HiveTable(dbname, table).drop()
  }

  test("dynamic partition strategy should create new partitions") {
    assume(new File(s"$basePath/core-site.xml").exists)
    HiveTable(dbname, table).partitionValues("a") shouldBe Set.empty
    DataStream.fromValues(schema, Seq(Seq("1", "2"), Seq("3", "4"))).to(HiveSink(dbname, table))
    HiveTable(dbname, table).partitionValues("a") shouldBe Set("1", "3")
  }

  test("skip partition if partition already exists") {
    assume(new File(s"$basePath/core-site.xml").exists)
    new DynamicPartitionStrategy().ensurePartition(Partition("a" -> "1"), dbname, table, false, client)
    new DynamicPartitionStrategy().ensurePartition(Partition("a" -> "1"), dbname, table, false, client)
  }
} 
Example 30
Source File: HiveTableTest.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.hive

import java.io.File

import io.eels.Row
import io.eels.datastream.DataStream
import io.eels.schema.{Field, StringType, StructType}
import org.scalatest.{FunSuite, Matchers}

import scala.util.{Random, Try}

class HiveTableTest extends FunSuite with Matchers {

  import HiveConfig._

  val dbname = HiveTestUtils.createTestDatabase
  val table = "test_table_" + System.currentTimeMillis()

  Try {
    HiveTable(dbname, table).drop()
  }

  test("partition values should return values for the matching key") {
    assume(new File(s"$basePath/core-site.xml").exists)

    val schema = StructType(
      Field("a", StringType),
      Field("b", StringType),
      Field("c", StringType)
    )
    def createRow = Row(schema,
      Seq(
        Random.shuffle(List("a", "b", "c")).head,
        Random.shuffle(List("x", "y", "z")).head,
        Random.shuffle(List("q", "r", "s")).head
      )
    )

    val sink = HiveSink(dbname, table).withCreateTable(true, Seq("a", "b"))
    val size = 1000

    DataStream.fromIterator(schema, Iterator.continually(createRow).take(size)).to(sink, 4)

    HiveTable(dbname, table).partitionValues("b") shouldBe Set("x", "y", "z")
  }
} 
Example 31
Source File: HivePartitionConstraintTest.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.hive

import java.io.File

import io.eels.datastream.DataStream
import io.eels.schema.{Field, PartitionConstraint, StringType, StructType}
import org.scalatest.{BeforeAndAfterAll, FunSuite, Matchers}

import scala.util.Try

class HivePartitionConstraintTest extends FunSuite with Matchers with BeforeAndAfterAll {

  import HiveConfig._

  val dbname = HiveTestUtils.createTestDatabase
  private val table = "constraints_test_" + System.currentTimeMillis()

  override def afterAll(): Unit = Try {
    HiveTable(dbname, table).drop()
  }

  val schema = StructType(
    Field("state", StringType),
    Field("city", StringType)
  )

  Try {
    DataStream.fromValues(schema, Seq(
      Seq("iowa", "des moines"),
      Seq("iowa", "iow city"),
      Seq("maine", "augusta")
    )).to(HiveSink(dbname, table).withCreateTable(true, Seq("state")))
  }

  test("hive source with partition constraint should return matching data") {
    assume(new File(s"$basePath/core-site.xml").exists)

    HiveSource(dbname, table)
      .addPartitionConstraint(PartitionConstraint.equals("state", "iowa"))
      .toDataStream()
      .collect.size shouldBe 2
  }

  test("hive source with non-existing partitions in constraint should return no data") {
    assume(new File(s"$basePath/core-site.xml").exists)

    HiveSource(dbname, table)
      .addPartitionConstraint(PartitionConstraint.equals("state", "pa"))
      .toDataStream()
      .collect.size shouldBe 0
  }
} 
Example 32
Source File: HiveStatsTest.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.hive

import java.io.File

import io.eels.Row
import io.eels.datastream.DataStream
import io.eels.schema._
import org.scalatest.{BeforeAndAfterAll, FunSuite, Matchers}

import scala.util.{Random, Try}

class HiveStatsTest extends FunSuite with Matchers with BeforeAndAfterAll {

  import HiveConfig._

  private val dbname = HiveTestUtils.createTestDatabase
  private val table = "stats_test_" + System.currentTimeMillis()
  private val partitioned_table = "stats_test2_" + System.currentTimeMillis()

  val schema = StructType(
    Field("a", StringType),
    Field("b", IntType.Signed)
  )

  def createRow = Row(schema, Seq(Random.shuffle(List("a", "b", "c")).head, Random.shuffle(List(1, 2, 3, 4, 5)).head))

  val amount = 10000

  override def afterAll(): Unit = Try {
    HiveTable(dbname, table).drop()
    HiveTable(dbname, partitioned_table).drop()
  }

  Try {
    DataStream.fromIterator(schema, Iterator.continually(createRow).take(amount))
      .to(HiveSink(dbname, table).withCreateTable(true), 4)

    DataStream.fromIterator(schema, Iterator.continually(createRow).take(amount))
      .to(HiveSink(dbname, partitioned_table).withCreateTable(true, Seq("a")), 4)
  }

  test("stats should return row counts for a non-partitioned table") {
    assume(new File(s"$basePath/core-site.xml").exists)
    HiveTable(dbname, table).stats().count shouldBe amount
  }

  test("stats should return row counts for a partitioned table") {
    assume(new File(s"$basePath/core-site.xml").exists)
    HiveTable(dbname, partitioned_table).stats().count shouldBe amount
  }

  test("stats should throw exception when constraints specified on a non-partitioned table") {
    assume(new File(s"$basePath/core-site.xml").exists)
    intercept[RuntimeException] {
      val constraints = Seq(PartitionConstraint.equals("a", "b"))
      HiveTable(dbname, table).stats().count(constraints)
    }
  }

  test("stats should support row count constraints for a partitioned table") {
    assume(new File(s"$basePath/core-site.xml").exists)
    val constraints = Seq(PartitionConstraint.equals("a", "b"))
    HiveTable(dbname, partitioned_table).stats().count(constraints) > 0 shouldBe true
    HiveTable(dbname, partitioned_table).stats().count(constraints) should be < amount.toLong
  }

  test("stats should support min and max for a non-partitioned tabled") {
    assume(new File(s"$basePath/core-site.xml").exists)
    HiveTable(dbname, table).stats.max("b") shouldBe 5
    HiveTable(dbname, table).stats.min("b") shouldBe 1
  }

  test("stats should support min and max for a partitioned table") {
    assume(new File(s"$basePath/core-site.xml").exists)
    HiveTable(dbname, partitioned_table).stats.max("b") shouldBe 5
    HiveTable(dbname, partitioned_table).stats.min("b") shouldBe 1
  }
} 
Example 33
Source File: CompactorTest.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.hive

import java.io.File

import io.eels.datastream.DataStream
import io.eels.schema.{Field, StructType}

class CompactorTest extends HiveTests {

  import HiveConfig._

  HiveTable("default", "wibble").drop(true)

  "Compactor" should {
    "delete the originals" ignore {

      val schema = StructType(Field("a"), Field("b"))
      val ds = DataStream.fromValues(schema, Seq(
        Array("1", "2"),
        Array("3", "4"),
        Array("5", "6"),
        Array("7", "8")
      ))
      ds.to(HiveSink("default", "wibble").withCreateTable(true))

      assume(new File(s"$basePath/core-site.xml").exists)

      HiveTable("default", "wibble").paths(false, false).size should be > 1
      Compactor("default", "wibble").compactTo(1)
      HiveTable("default", "wibble").paths(false, false).size should be
      1
    }
    "merge the contents" ignore {
      assume(new File(s"$basePath/core-site.xml").exists)

      HiveSource("default", "wibble").toDataStream().collectValues shouldBe Seq(
        Array("1", "2"),
        Array("3", "4"),
        Array("5", "6"),
        Array("7", "8")
      )
    }
  }
} 
Example 34
Source File: OrcMultipleFileSpeedTest.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.orc

import java.io.File

import com.sksamuel.exts.metrics.Timed
import io.eels.datastream.DataStream
import io.eels.schema.StructType
import io.eels.{FilePattern, Row}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

import scala.util.Random


object OrcMultipleFileSpeedTest extends App with Timed {

  val size = 5000000
  val count = 20
  val schema = StructType("a", "b", "c", "d", "e")

  def createRow = Row(schema, Random.nextBoolean(), Random.nextFloat(), Random.nextGaussian(), Random.nextLong(), Random.nextString(4))

  implicit val conf = new Configuration()
  implicit val fs = FileSystem.getLocal(new Configuration())

  val dir = new Path("orc-speed-test")
  new File(dir.toString).mkdirs()

  timed("Insertion") {
    val ds = DataStream.fromIterator(schema, Iterator.continually(createRow).take(size))
    new File(dir.toString).listFiles().foreach(_.delete)
    ds.to(OrcSink(new Path("orc-speed-test/orc_speed.pq")).withOverwrite(true), count)
  }

  for (_ <- 1 to 25) {
    assert(count == FilePattern("orc-speed-test/*").toPaths().size)

    timed("Reading with OrcSource") {
      val actual = OrcSource("orc-speed-test/*").toDataStream().map { row => row }.filter(_ => true).size
      assert(actual == size, s"Expected $size but was $actual")
    }

    println("")
    println("---------")
    println("")
  }
} 
Example 35
Source File: OrcPredicateTest.scala    From eel-sdk   with Apache License 2.0 5 votes vote down vote up
package io.eels.component.orc

import java.io.{File, FilenameFilter}

import io.eels.Predicate
import io.eels.datastream.DataStream
import io.eels.schema.{Field, LongType, StringType, StructType}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}

class OrcPredicateTest extends FlatSpec with Matchers with BeforeAndAfterAll {
  cleanUpResidualOrcTestFiles

  val schema = StructType(
    Field("name", StringType, nullable = true),
    Field("city", StringType, nullable = true),
    Field("age", LongType.Signed, nullable = true)
  )

  val values = Vector.fill(1000) {
    Vector("sam", "middlesbrough", 37)
  } ++ Vector.fill(1000) {
    Vector("laura", "iowa city", 24)
  }

  val ds = DataStream.fromValues(schema, values)

  implicit val conf = new Configuration()
  implicit val fs = FileSystem.get(new Configuration())
  val path = new Path("test.orc")

  if (fs.exists(path))
    fs.delete(path, false)

  new File(path.toString).deleteOnExit()

  ds.to(OrcSink(path).withRowIndexStride(1000))

  override protected def afterAll(): Unit = fs.delete(path, false)

  "OrcSource" should "support string equals predicates" in {
    conf.set("eel.orc.predicate.row.filter", "false")
    val rows = OrcSource(path).withPredicate(Predicate.equals("name", "sam")).toDataStream().collect
    rows.map(_.values).toSet shouldBe Set(Vector("sam", "middlesbrough", 37L))
  }

  it should "support gt predicates" in {
    conf.set("eel.orc.predicate.row.filter", "false")
    val rows = OrcSource(path).withPredicate(Predicate.gt("age", 30L)).toDataStream().collect
    rows.map(_.values).toSet shouldBe Set(Vector("sam", "middlesbrough", 37L))
  }

  it should "support lt predicates" in {
    conf.set("eel.orc.predicate.row.filter", "false")
    val rows = OrcSource(path).withPredicate(Predicate.lt("age", 30)).toDataStream().collect
    rows.map(_.values).toSet shouldBe Set(Vector("laura", "iowa city", 24L))
  }

  it should "enable row level filtering with predicates by default" in {
    conf.set("eel.orc.predicate.row.filter", "true")
    val rows = OrcSource(path).withPredicate(Predicate.equals("name", "sam")).toDataStream().collect
    rows.head.schema shouldBe schema
    rows.head.values shouldBe Vector("sam", "middlesbrough", 37L)
  }

  private def cleanUpResidualOrcTestFiles = {
    new File(".").listFiles(new FilenameFilter {
      override def accept(dir: File, name: String): Boolean = {
        (name.startsWith("test_") && name.endsWith(".orc")) || (name.startsWith(".test_") && name.endsWith(".orc.crc"))
      }
    }).foreach(_.delete())
  }
} 
Example 36
Source File: KafkaTestUtils.scala    From spark-kafka-writer   with Apache License 2.0 5 votes vote down vote up
package com.github.benfradet.spark.kafka.writer

import java.io.File
import java.net.InetSocketAddress
import java.util.Arrays.asList
import java.util.Properties

import kafka.server.{KafkaConfig, KafkaServerStartable}
import org.apache.kafka.clients.admin.{AdminClient, NewTopic}
import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer}

import scala.util.Random

class KafkaTestUtils {
  // zk
  private val zkHost = "localhost"
  private val zkPort = 2181
  private var zk: EmbeddedZookeeper = _
  private var zkReady = false

  // kafka
  private val brokerHost = "localhost"
  private val brokerPort = 9092
  private var kafkaServer: KafkaServerStartable = _
  private var topicCountMap = Map.empty[String, Int]
  private var brokerReady = false
  private var kafkaAdminClient: AdminClient = _

  
  @scala.annotation.varargs
  def createTopics(topics: String*): Unit =
    for (topic <- topics) {
      kafkaAdminClient.createTopics(asList(new NewTopic(topic, 1, 1: Short)))
      Thread.sleep(1000)
      topicCountMap = topicCountMap + (topic -> 1)
    }

  private def brokerProps: Properties = {
    val props = new Properties
    props.put("broker.id", "0")
    props.put("host.name", brokerHost)
    props.put("log.dir",
      {
        val dir = System.getProperty("java.io.tmpdir") +
          "/logDir-" + new Random().nextInt(Int.MaxValue)
        val f = new File(dir)
        f.mkdirs()
        dir
      }
    )
    props.put("port", brokerPort.toString)
    props.put("zookeeper.connect", zkAddress)
    props.put("zookeeper.connection.timeout.ms", "10000")
    props.put("offsets.topic.replication.factor", "1")
    props
  }

  private class EmbeddedZookeeper(hostname: String, port: Int) {
    private val snapshotDir = {
      val f = new File(System.getProperty("java.io.tmpdir"),
        "snapshotDir-" + Random.nextInt(Int.MaxValue))
      f.mkdirs()
      f
    }
    private val logDir = {
      val f = new File(System.getProperty("java.io.tmpdir"),
        "logDir-" + Random.nextInt(Int.MaxValue))
      f.mkdirs()
      f
    }

    private val factory = {
      val zkTickTime = 500
      val zk = new ZooKeeperServer(snapshotDir, logDir, zkTickTime)
      val f = new NIOServerCnxnFactory
      val maxCnxn = 16
      f.configure(new InetSocketAddress(hostname, port), maxCnxn)
      f.startup(zk)
      f
    }

    def shutdown(): Unit = {
      factory.shutdown()
      snapshotDir.delete()
      logDir.delete()
      ()
    }
  }
} 
Example 37
Source File: TotalTweetsScheduler.scala    From redrock   with Apache License 2.0 5 votes vote down vote up
package com.restapi

import java.io.{File, FileInputStream}

import akka.actor.{ActorRef, Actor, ActorSystem, Props}
import akka.io.IO
import org.slf4j.LoggerFactory
import play.api.libs.json.Json
import spray.can.Http
import akka.pattern.ask
import spray.http.DateTime
import scala.concurrent.duration._
import akka.util.Timeout
import scala.concurrent.ExecutionContext.Implicits.global
import org.apache.commons.codec.digest.DigestUtils
import scala.io.Source

case object GetTotalTweetsScheduler

object CurrentTotalTweets {
  @volatile
  var totalTweets: Long = 0
}

class ExecuterTotalTweetsES(delay: FiniteDuration, interval: FiniteDuration) extends Actor {
  context.system.scheduler.schedule(delay, interval) {
    getTotalTweetsES
  }

  val logger = LoggerFactory.getLogger(this.getClass)

  override def receive: Actor.Receive = {
    case GetTotalTweetsScheduler => {
      logger.info(s"Getting Total of Tweets. Begin: ${CurrentTotalTweets.totalTweets}")
    }
    case _ => // just ignore any messages
  }

  def getTotalTweetsES: Unit = {
    val elasticsearchRequests = new GetElasticsearchResponse(0, Array[String](), Array[String](),
      LoadConf.restConf.getString("searchParam.defaulStartDatetime"),
      LoadConf.restConf.getString("searchParam.defaultEndDatetime"),
      LoadConf.esConf.getString("decahoseIndexName"))
    val totalTweetsResponse = Json.parse(elasticsearchRequests.getTotalTweetsESResponse())
    logger.info(s"Getting Total of Tweets. Current: ${CurrentTotalTweets.totalTweets}")
    CurrentTotalTweets.totalTweets = (totalTweetsResponse \ "hits" \ "total").as[Long]
    logger.info(s"Total users updated. New: ${CurrentTotalTweets.totalTweets}")
  }
} 
Example 38
Source File: package.scala    From sbt-reactive-app   with Apache License 2.0 5 votes vote down vote up
package com.lightbend.rp.sbtreactiveapp

import java.io.File
import java.nio.file.Paths
import org.apache.tools.ant.filters.StringInputStream
import sbt.Logger
import scala.collection.immutable.Seq
import scala.sys.process.{ Process, ProcessLogger }

package object cmd {
  
  private[cmd] def run(
    cwd: File = Paths.get(".").toRealPath().toFile,
    env: Map[String, String] = Map.empty,
    input: Option[String] = None,
    logStdErr: Option[Logger] = None,
    logStdOut: Option[Logger] = None)(args: String*): (Int, Seq[String], Seq[String]) = {
    var outList = List.empty[String]
    var errList = List.empty[String]

    val stringLogger = ProcessLogger(
      { s =>
        outList = s :: outList

        logStdOut.foreach(_.info(s))
      },
      { s =>
        errList = s :: errList

        logStdErr.foreach(_.error(s))
      })

    val exitCode =
      input
        .map(new StringInputStream(_))
        .foldLeft(Process(args, cwd = cwd, env.toVector: _*))(_ #< _)
        .run(stringLogger)
        .exitValue()

    (exitCode, outList.reverse, errList.reverse)
  }

  private[cmd] def runSuccess(failMsg: String)(result: (Int, Seq[String], Seq[String])): Unit = {
    if (result._1 != 0) {
      sys.error(s"$failMsg [${result._1}]")
    }
  }
} 
Example 39
Source File: package.scala    From Principles-of-Reactive-Programming   with GNU General Public License v3.0 5 votes vote down vote up
import java.io.File

package object common {

  
  def resourceAsStreamFromSrc(resourcePath: List[String]): Option[java.io.InputStream] = {
    val classesDir = new File(getClass.getResource(".").toURI)
    val projectDir = classesDir.getParentFile.getParentFile.getParentFile.getParentFile
    val resourceFile = subFile(projectDir, ("src" :: "main" :: "resources" :: resourcePath): _*)
    if (resourceFile.exists)
      Some(new java.io.FileInputStream(resourceFile))
    else
      None
  }
} 
Example 40
Source File: package.scala    From Principles-of-Reactive-Programming   with GNU General Public License v3.0 5 votes vote down vote up
import java.io.File

package object common {

  
  def resourceAsStreamFromSrc(resourcePath: List[String]): Option[java.io.InputStream] = {
    val classesDir = new File(getClass.getResource(".").toURI)
    val projectDir = classesDir.getParentFile.getParentFile.getParentFile.getParentFile
    val resourceFile = subFile(projectDir, ("src" :: "main" :: "resources" :: resourcePath): _*)
    if (resourceFile.exists)
      Some(new java.io.FileInputStream(resourceFile))
    else
      None
  }
} 
Example 41
Source File: package.scala    From Principles-of-Reactive-Programming   with GNU General Public License v3.0 5 votes vote down vote up
import java.io.File

package object common {

  
  def resourceAsStreamFromSrc(resourcePath: List[String]): Option[java.io.InputStream] = {
    val classesDir = new File(getClass.getResource(".").toURI)
    val projectDir = classesDir.getParentFile.getParentFile.getParentFile.getParentFile
    val resourceFile = subFile(projectDir, ("src" :: "main" :: "resources" :: resourcePath): _*)
    if (resourceFile.exists)
      Some(new java.io.FileInputStream(resourceFile))
    else
      None
  }
} 
Example 42
Source File: package.scala    From Principles-of-Reactive-Programming   with GNU General Public License v3.0 5 votes vote down vote up
import java.io.File

package object common {

  
  def resourceAsStreamFromSrc(resourcePath: List[String]): Option[java.io.InputStream] = {
    val classesDir = new File(getClass.getResource(".").toURI)
    val projectDir = classesDir.getParentFile.getParentFile.getParentFile.getParentFile
    val resourceFile = subFile(projectDir, ("src" :: "main" :: "resources" :: resourcePath): _*)
    if (resourceFile.exists)
      Some(new java.io.FileInputStream(resourceFile))
    else
      None
  }
} 
Example 43
Source File: package.scala    From Principles-of-Reactive-Programming   with GNU General Public License v3.0 5 votes vote down vote up
import java.io.File

package object common {

  
  def resourceAsStreamFromSrc(resourcePath: List[String]): Option[java.io.InputStream] = {
    val classesDir = new File(getClass.getResource(".").toURI)
    val projectDir = classesDir.getParentFile.getParentFile.getParentFile.getParentFile
    val resourceFile = subFile(projectDir, ("src" :: "main" :: "resources" :: resourcePath): _*)
    if (resourceFile.exists)
      Some(new java.io.FileInputStream(resourceFile))
    else
      None
  }
} 
Example 44
Source File: package.scala    From Principles-of-Reactive-Programming   with GNU General Public License v3.0 5 votes vote down vote up
import java.io.File

package object common {

  
  def resourceAsStreamFromSrc(resourcePath: List[String]): Option[java.io.InputStream] = {
    val classesDir = new File(getClass.getResource(".").toURI)
    val projectDir = classesDir.getParentFile.getParentFile.getParentFile.getParentFile
    val resourceFile = subFile(projectDir, ("src" :: "main" :: "resources" :: resourcePath): _*)
    if (resourceFile.exists)
      Some(new java.io.FileInputStream(resourceFile))
    else
      None
  }
} 
Example 45
Source File: SidechainSettingsReader.scala    From Sidechains-SDK   with MIT License 5 votes vote down vote up
package com.horizen

import java.io.File
import java.net.URL
import java.util.{Optional => JOptional}

import com.typesafe.config.{Config, ConfigFactory}
import net.ceedubs.ficus.Ficus._
import net.ceedubs.ficus.readers.ArbitraryTypeReader._
import scorex.core.settings.{ScorexSettings, SettingsReaders}
import scorex.util.ScorexLogging

import scala.compat.java8.OptionConverters.toScala


object SidechainSettingsReader
  extends ScorexLogging
    with SettingsReaders
{
  protected val sidechainSettingsName = "sidechain-sdk-settings.conf"

  def fromConfig(config: Config): SidechainSettings = {
    val webSocketConnectorConfiguration = config.as[WebSocketSettings]("scorex.websocket")
    val scorexSettings = config.as[ScorexSettings]("scorex")
    val genesisSetting = config.as[GenesisDataSettings]("scorex.genesis")
    val backwardTransfer = config.as[withdrawalEpochCertificateSettings]("scorex.withdrawalEpochCertificate")
    val walletSetting = config.as[WalletSettings]("scorex.wallet")
    SidechainSettings(scorexSettings, genesisSetting, webSocketConnectorConfiguration, backwardTransfer, walletSetting)
  }

  def readConfigFromPath(userConfigPath: String, applicationConfigPath: Option[String]): Config = {

    val userConfigFile: File = new File(userConfigPath)

    val userConfig: Option[Config] = if (userConfigFile.exists()) {
      Some(ConfigFactory.parseFile(userConfigFile))
    } else None

    val applicationConfigURL: Option[URL] = applicationConfigPath.map(filename => new File(filename))
      .filter(_.exists()).map(_.toURI.toURL)
      .orElse(applicationConfigPath.map(r => getClass.getClassLoader.getResource(r)))

    val applicationConfig: Option[Config] = if (applicationConfigURL.isDefined) {
      Some(ConfigFactory.parseURL(applicationConfigURL.get))
    } else None

    var config: Config = ConfigFactory.defaultOverrides()

    if (userConfig.isDefined)
      config = config.withFallback(userConfig.get)

    if (applicationConfig.isDefined)
      config = config.withFallback(applicationConfig.get)

    config = config
      .withFallback(ConfigFactory.parseResources(sidechainSettingsName))
      .withFallback(ConfigFactory.defaultReference())
      .resolve()

    config
  }

  def readConfigFromPath(userConfigPath: String, applicationConfigPath: JOptional[String]) : Config =
    readConfigFromPath(userConfigPath, toScala(applicationConfigPath))

  def read(userConfigPath: String, applicationConfigPath: Option[String]) : SidechainSettings =
    fromConfig(readConfigFromPath(userConfigPath, applicationConfigPath))
} 
Example 46
Source File: VersionedLevelDbStorageAdapter.scala    From Sidechains-SDK   with MIT License 5 votes vote down vote up
package com.horizen.storage.leveldb

import java.io.File
import java.util
import java.util.{Optional, List => JList}

import com.horizen.storage.Storage
import com.horizen.storage.leveldb.LDBFactory.factory
import com.horizen.utils.{Pair => JPair, _}
import org.iq80.leveldb.Options

import scala.collection.JavaConverters._
import scala.compat.java8.OptionConverters._



class VersionedLevelDbStorageAdapter(pathToDB: String, keepVersions: Int) extends Storage{
  private val dataBase: VersionedLDBKVStore = createDb(pathToDB)

  override def get(key: ByteArrayWrapper): Optional[ByteArrayWrapper] = dataBase.get(key).map(byteArrayToWrapper).asJava

  override def getOrElse(key: ByteArrayWrapper, defaultValue: ByteArrayWrapper): ByteArrayWrapper = dataBase.getOrElse(key, defaultValue)

  override def get(keys: JList[ByteArrayWrapper]): JList[JPair[ByteArrayWrapper, Optional[ByteArrayWrapper]]] = {
    dataBase.get(keys.asScala.map(_.data))
      .map{case (key, value) =>
        new JPair(byteArrayToWrapper(key), value.map(v => byteArrayToWrapper(v)).asJava)}
      .asJava
  }

  override def getAll: JList[JPair[ByteArrayWrapper, ByteArrayWrapper]] = {
    dataBase.getAll
      .map{case (key, value) => new JPair(byteArrayToWrapper(key), byteArrayToWrapper(value))}
      .asJava
  }

  override def lastVersionID(): Optional[ByteArrayWrapper] = dataBase.versions.lastOption.map(byteArrayToWrapper).asJava

  override def update(version: ByteArrayWrapper, toUpdate: JList[JPair[ByteArrayWrapper, ByteArrayWrapper]], toRemove: util.List[ByteArrayWrapper]): Unit = {

    val toUpdateAsScala = toUpdate.asScala.toList
    val toRemoveAsScala = toRemove.asScala.toList

    //key for storing version shall not be used as key in any key-value pair in VersionedLDBKVStore
    require(!toUpdateAsScala.exists(pair => pair.getKey == version) && !toRemoveAsScala.contains(version))

    val convertedToUpdate = toUpdateAsScala.map(pair => (pair.getKey.data, pair.getValue.data))
    val convertedToRemove = toRemoveAsScala.map(_.data)
    dataBase.update(convertedToUpdate, convertedToRemove)(version)
  }

  override def rollback(versionID: ByteArrayWrapper): Unit = dataBase.rollbackTo(versionID)

  override def rollbackVersions(): JList[ByteArrayWrapper] = dataBase.versions.map(byteArrayToWrapper).asJava

  override def close(): Unit = dataBase.close()

  def createDb(path: String): VersionedLDBKVStore = {
    val dir = new File(path)
    dir.mkdirs()
    val options = new Options()
    options.createIfMissing(true)
    val db = factory.open(dir, options)
    new VersionedLDBKVStore(db, keepVersions)
  }

  override def isEmpty: Boolean = dataBase.versions.isEmpty
} 
Example 47
Source File: SigProofTest.scala    From Sidechains-SDK   with MIT License 5 votes vote down vote up
package com.horizen

import java.io.{BufferedReader, File, FileReader}
import java.util.Optional
import java.{lang, util}

import com.horizen.box.WithdrawalRequestBox
import com.horizen.box.data.WithdrawalRequestBoxData
import com.horizen.cryptolibprovider.{SchnorrFunctionsImplZendoo, ThresholdSignatureCircuitImplZendoo}
import com.horizen.proposition.MCPublicKeyHashProposition
import com.horizen.schnorrnative.SchnorrSecretKey
import com.horizen.utils.BytesUtils
import org.junit.Assert.{assertEquals, assertTrue}
import org.junit.{Ignore, Test}

import scala.collection.JavaConverters._
import scala.util.Random

class SigProofTest {
  private val classLoader: ClassLoader = getClass.getClassLoader
  private val sigCircuit: ThresholdSignatureCircuitImplZendoo = new ThresholdSignatureCircuitImplZendoo()
  private val schnorrFunctions: SchnorrFunctionsImplZendoo = new SchnorrFunctionsImplZendoo()

  private def buildSchnorrPrivateKey(index: Int): SchnorrSecretKey = {
    var bytes: Array[Byte] = null
    try {
      val resourceName = "schnorr_sk0"+ index + "_hex"
      val file = new FileReader(classLoader.getResource(resourceName).getFile)
      bytes = BytesUtils.fromHexString(new BufferedReader(file).readLine())
    }
    catch {
      case e: Exception =>
        assertEquals(e.toString(), true, false)
    }

    SchnorrSecretKey.deserialize(bytes)
  }

  //Test will take around 2 minutes, enable for sanity checking of ThresholdSignatureCircuit
  @Ignore
  @Test
  def simpleCheck(): Unit = {
    val keyPairsLen = 7
    val threshold = 5 //hardcoded value

    val keyPairs = (0 until keyPairsLen).view.map(buildSchnorrPrivateKey).map(secret => (secret, secret.getPublicKey))
    val publicKeysBytes: util.List[Array[Byte]] = keyPairs.map(_._2.serializePublicKey()).toList.asJava
    val provingKeyPath = new File(classLoader.getResource("sample_proving_key_7_keys_with_threshold_5").getFile).getAbsolutePath;
    val verificationKeyPath = new File(classLoader.getResource("sample_vk_7_keys_with_threshold_5").getFile).getAbsolutePath;

    val sysConstant = sigCircuit.generateSysDataConstant(publicKeysBytes, threshold)

    val mcBlockHash = Array.fill(32)(Random.nextInt().toByte)
    val previousMcBlockHash = Array.fill(32)(Random.nextInt().toByte)

    val wb: util.List[WithdrawalRequestBox] = Seq(new WithdrawalRequestBox(new WithdrawalRequestBoxData(new MCPublicKeyHashProposition(Array.fill(20)(Random.nextInt().toByte)), 2345), 42)).asJava

    val messageToBeSigned = sigCircuit.generateMessageToBeSigned(wb, mcBlockHash, previousMcBlockHash)

    val emptySigs = List.fill[Optional[Array[Byte]]](keyPairsLen - threshold)(Optional.empty[Array[Byte]]())
    val signatures: util.List[Optional[Array[Byte]]] = (keyPairs
      .map{case (secret, public) => schnorrFunctions.sign(secret.serializeSecretKey(), public.serializePublicKey(), messageToBeSigned)}
      .map(b => Optional.of(b))
      .take(threshold)
      .toList ++ emptySigs)
      .asJava

    val proofAndQuality: utils.Pair[Array[Byte], lang.Long] = sigCircuit.createProof(wb, mcBlockHash, previousMcBlockHash, publicKeysBytes, signatures, threshold, provingKeyPath)

    val result = sigCircuit.verifyProof(wb, mcBlockHash, previousMcBlockHash, proofAndQuality.getValue, proofAndQuality.getKey, sysConstant, verificationKeyPath)

    assertTrue("Proof verification expected to be successfully", result)
  }

} 
Example 48
Source File: AccStorage.scala    From EncryCore   with GNU General Public License v3.0 5 votes vote down vote up
package encry.api.http

import java.io.File
import cats.syntax.either._
import com.typesafe.scalalogging.StrictLogging
import encry.settings.EncryAppSettings
import encry.storage.VersionalStorage.StorageKey
import encry.storage.levelDb.versionalLevelDB.LevelDbFactory
import org.encryfoundation.common.utils.Algos
import org.iq80.leveldb.{DB, Options}
import scorex.utils.Random
import supertagged.TaggedType

trait AccStorage extends StrictLogging with AutoCloseable {

  val storage: DB

  val verifyPassword: String => Boolean = pass => {
    val salt = storage.get(AccStorage.SaltKey)
    val passHash = storage.get(AccStorage.PasswordHashKey)
    Algos.hash(pass.getBytes() ++ salt) sameElements passHash
  }

  def setPassword(pass: String): Either[Throwable, Unit] = {
    val batch = storage.createWriteBatch()
    val salt = Random.randomBytes()
    try {
      batch.put(AccStorage.PasswordHashKey, Algos.hash(pass.getBytes() ++ salt))
      batch.put(AccStorage.SaltKey, salt)
      storage.write(batch).asRight[Throwable]
    } catch {
      case err: Throwable => err.asLeft[Unit]
    }
    finally {
      batch.close()
    }
  }

  override def close(): Unit = storage.close()

}

object AccStorage extends StrictLogging {

  object PasswordHash extends TaggedType[Array[Byte]]
  object PasswordSalt extends TaggedType[Array[Byte]]

  type PasswordHash = PasswordHash.Type
  type PasswordSalt = PasswordSalt.Type

  val PasswordHashKey: StorageKey = StorageKey @@ Algos.hash("Password_Key")
  val SaltKey: StorageKey = StorageKey @@ Algos.hash("Salt_Key")

  def getDirStorage(settings: EncryAppSettings): File = new File(s"${settings.directory}/userKeys")

  def init(settings: EncryAppSettings): AccStorage = new AccStorage {
    override val storage: DB = LevelDbFactory.factory.open(getDirStorage(settings), new Options)
  }

} 
Example 49
Source File: SettingsReaders.scala    From EncryCore   with GNU General Public License v3.0 5 votes vote down vote up
package encry.settings

import java.io.File
import java.net.InetSocketAddress

import com.typesafe.config.Config
import encry.storage.VersionalStorage
import encry.storage.VersionalStorage.StorageType
import encry.utils.ByteStr
import net.ceedubs.ficus.readers.ValueReader
import org.encryfoundation.common.utils.constants.{Constants, TestNetConstants}

trait SettingsReaders {
  implicit val byteStrReader: ValueReader[ByteStr] = (cfg, path) => ByteStr.decodeBase58(cfg.getString(path)).get
  implicit val storageTypeReader: ValueReader[StorageType] = (cfg, path) => cfg.getString(path) match {
    case "iodb"    => VersionalStorage.IODB
    case "LevelDb" => VersionalStorage.LevelDB
  }
  implicit val fileReader: ValueReader[File] = (cfg, path) => new File(cfg.getString(path))
  implicit val byteValueReader: ValueReader[Byte] = (cfg, path) => cfg.getInt(path).toByte
  implicit val inetSocketAddressReader: ValueReader[InetSocketAddress] = { (config: Config, path: String) =>
    val split = config.getString(path).split(":")
    new InetSocketAddress(split(0), split(1).toInt)
  }

  implicit val ConstantsSettingsReader: ValueReader[Constants] = (cfg, path) => {
    def getConstants(constantsClass: String): Constants = {
      constantsClass match {
        case "TestConstants" => TestConstants
        case "SlowMiningConstants" => SlowMiningConstants
        case _ => TestNetConstants
      }
    }
    getConstants(
      if (cfg.hasPath(path)) cfg.getString(path) else ""
    )
  }

} 
Example 50
Source File: RootNodesStorageTest.scala    From EncryCore   with GNU General Public License v3.0 5 votes vote down vote up
package encry.storage

import java.io.File

import encry.view.state.avlTree.utils.implicits.Instances._
import encry.modifiers.InstanceFactory
import encry.storage.VersionalStorage.{StorageKey, StorageValue, StorageVersion}
import encry.storage.levelDb.versionalLevelDB.{LevelDbFactory, VLDBWrapper, VersionalLevelDBCompanion}
import encry.utils.{EncryGenerator, FileHelper}
import encry.view.state.avlTree.AvlTree
import org.encryfoundation.common.utils.Algos
import org.encryfoundation.common.utils.TaggedTypes.Height
import org.iq80.leveldb.{DB, Options, ReadOptions}
import org.scalatest.{FunSuite, Matchers, PropSpec}
import scorex.utils.Random

import scala.util.{Random => SRandom}

class RootNodesStorageTest extends PropSpec with InstanceFactory with EncryGenerator with Matchers {

  def createAvl: AvlTree[StorageKey, StorageValue] = {
    val firstDir: File = FileHelper.getRandomTempDir
    val firstStorage: VLDBWrapper = {
      val levelDBInit = LevelDbFactory.factory.open(firstDir, new Options)
      VLDBWrapper(VersionalLevelDBCompanion(levelDBInit, settings.levelDB.copy(keySize = 33), keySize = 33))
    }
    val dir: File = FileHelper.getRandomTempDir
    val levelDb: DB = LevelDbFactory.factory.open(dir, new Options)
    AvlTree[StorageKey, StorageValue](firstStorage, RootNodesStorage.emptyRootStorage[StorageKey, StorageValue])
  }

  property("testRollback") {
    val avl: AvlTree[StorageKey, StorageValue] = createAvl
    val dir: File = FileHelper.getRandomTempDir
    val levelDb: DB = LevelDbFactory.factory.open(dir, new Options)
    val batch1 = levelDb.createWriteBatch()
    val readOptions1 = new ReadOptions()
    val rootNodesStorage = RootNodesStorage[StorageKey, StorageValue](levelDb, 10, dir)
    val (_, avlAfterInsertions, insertList) =
      (0 to SRandom.nextInt(1000) + 10).foldLeft(rootNodesStorage, avl, List.empty[(Height, (List[(StorageKey, StorageValue)], List[StorageKey]))]) {
      case ((rootStorage, previousAvl, insertionList), height) =>
        val version = StorageVersion @@ Random.randomBytes()
        val toInsert = (0 to SRandom.nextInt(100)).foldLeft(List.empty[(StorageKey, StorageValue)]) {
          case (list, _) => (StorageKey @@ Random.randomBytes() -> StorageValue @@ Random.randomBytes()) :: list
        }
        val previousInsertions = insertionList.lastOption.map(_._2._1).getOrElse(List.empty[(StorageKey, StorageValue)])
        val deletions = previousInsertions.take(1).map(_._1)
        val newAvl = previousAvl.insertAndDeleteMany(
          version,
          toInsert,
          deletions
        )
        val newRootStorage = rootStorage.insert(
          version,
          newAvl.rootNode,
          Height @@ height
        )
        (newRootStorage, newAvl, insertionList :+ (Height @@ height -> (toInsert -> deletions)))
    }
    val (_, rootNodeRestored) = rootNodesStorage.rollbackToSafePoint(insertList.dropWhile(_._1 != rootNodesStorage.safePointHeight).drop(1))
    (avlAfterInsertions.rootNode.hash sameElements rootNodeRestored.hash) shouldBe true
  }
} 
Example 51
Source File: SnapshotAssemblerBench.scala    From EncryCore   with GNU General Public License v3.0 5 votes vote down vote up
package benches

import java.io.File
import java.util.concurrent.TimeUnit

import benches.SnapshotAssemblerBench.SnapshotAssemblerBenchState
import encry.view.state.avlTree.utils.implicits.Instances._
import benches.StateBenches.{StateBenchState, benchSettings}
import benches.Utils.{getRandomTempDir, utxoFromBoxHolder}
import encry.settings.Settings
import encry.storage.{RootNodesStorage, VersionalStorage}
import encry.storage.VersionalStorage.{StorageKey, StorageValue, StorageVersion}
import encry.storage.levelDb.versionalLevelDB.{LevelDbFactory, VLDBWrapper, VersionalLevelDBCompanion}
import encry.utils.FileHelper
import encry.view.fast.sync.SnapshotHolder
import encry.view.state.UtxoState
import encry.view.state.avlTree.AvlTree
import org.encryfoundation.common.utils.TaggedTypes.Height
import org.iq80.leveldb.{DB, Options}
import org.openjdk.jmh.annotations.{Benchmark, Mode, Scope, State}
import org.openjdk.jmh.infra.Blackhole
import org.openjdk.jmh.profile.GCProfiler
import org.openjdk.jmh.runner.{Runner, RunnerException}
import org.openjdk.jmh.runner.options.{OptionsBuilder, TimeValue, VerboseMode}
import scorex.utils.Random

class SnapshotAssemblerBench {

  
  @Benchmark
  def createTree(stateBench: SnapshotAssemblerBenchState, bh: Blackhole): Unit = {
    bh.consume {
      //stateBench.a.initializeSnapshotData(stateBench.block1)
    }
  }
}
object SnapshotAssemblerBench {

  @throws[RunnerException]
  def main(args: Array[String]): Unit = {
    val opt = new OptionsBuilder()
      .include(".*" + classOf[SnapshotAssemblerBench].getSimpleName + ".*")
      .forks(1)
      .threads(1)
      .warmupIterations(benchSettings.benchesSettings.warmUpIterations)
      .measurementIterations(benchSettings.benchesSettings.measurementIterations)
      .mode(Mode.AverageTime)
      .timeUnit(TimeUnit.SECONDS)
      .verbosity(VerboseMode.EXTRA)
      .addProfiler(classOf[GCProfiler])
      .warmupTime(TimeValue.milliseconds(benchSettings.benchesSettings.warmUpTime))
      .measurementTime(TimeValue.milliseconds(benchSettings.benchesSettings.measurementTime))
      .build
    new Runner(opt).run
  }

  @State(Scope.Benchmark)
  class SnapshotAssemblerBenchState extends Settings {

    val a: AvlTree[StorageKey, StorageValue] =
      createAvl("9gKDVmfsA6J4b78jDBx6JmS86Zph98NnjnUqTJBkW7zitQMReia", 0, 500000)
    val block1                              = Utils.generateGenesisBlock(Height @@ 1)


    def createAvl(address: String, from: Int, to: Int): AvlTree[StorageKey, StorageValue] = {
      val firstDir: File = FileHelper.getRandomTempDir
      val firstStorage: VLDBWrapper = {
        val levelDBInit = LevelDbFactory.factory.open(firstDir, new Options)
        VLDBWrapper(VersionalLevelDBCompanion(levelDBInit, settings.levelDB, keySize = 32))
      }
      val dir: File = FileHelper.getRandomTempDir
      val levelDb: DB = LevelDbFactory.factory.open(dir, new Options)
      val rootNodesStorage = RootNodesStorage[StorageKey, StorageValue](levelDb, 10, dir)

      val firstAvl: AvlTree[StorageKey, StorageValue] = AvlTree[StorageKey, StorageValue](firstStorage, rootNodesStorage)
      val avlNew = (from to to).foldLeft(firstAvl) { case (avl, i) =>
        val bx = Utils.genAssetBox(address, i, nonce = i)
        val b = (StorageKey !@@ bx.id, StorageValue @@ bx.bytes)
        avl.insertAndDeleteMany(StorageVersion @@ Random.randomBytes(), List(b), List.empty)
      }
      avlNew
    }

    def tmpDir: File = FileHelper.getRandomTempDir
  }

} 
Example 52
Source File: HistoryBenches.scala    From EncryCore   with GNU General Public License v3.0 5 votes vote down vote up
package benches

import java.io.File
import java.util.concurrent.TimeUnit

import benches.HistoryBenches.HistoryBenchState
import benches.Utils._
import encry.view.history.History
import encryBenchmark.BenchSettings
import org.encryfoundation.common.modifiers.history.Block
import org.openjdk.jmh.annotations._
import org.openjdk.jmh.infra.Blackhole
import org.openjdk.jmh.profile.GCProfiler
import org.openjdk.jmh.runner.{Runner, RunnerException}
import org.openjdk.jmh.runner.options.{OptionsBuilder, TimeValue, VerboseMode}

class HistoryBenches {

  @Benchmark
  def appendBlocksToHistoryBench(benchStateHistory: HistoryBenchState, bh: Blackhole): Unit = {
    bh.consume {
      val history: History = generateHistory(benchStateHistory.settings, getRandomTempDir)
      benchStateHistory.blocks.foldLeft(history) { case (historyL, block) =>
        historyL.append(block.header)
        historyL.append(block.payload)
        historyL.reportModifierIsValid(block)
      }
      history.closeStorage()
    }
  }

  @Benchmark
  def readHistoryFileBench(benchStateHistory: HistoryBenchState, bh: Blackhole): Unit = {
    bh.consume {
      val history: History = generateHistory(benchStateHistory.settings, benchStateHistory.tmpDir)
      history.closeStorage()
    }
  }
}

object HistoryBenches extends BenchSettings {

  @throws[RunnerException]
  def main(args: Array[String]): Unit = {
    val opt = new OptionsBuilder()
      .include(".*" + classOf[HistoryBenches].getSimpleName + ".*")
      .forks(1)
      .threads(1)
      .warmupIterations(benchSettings.benchesSettings.warmUpIterations)
      .measurementIterations(benchSettings.benchesSettings.measurementIterations)
      .mode(Mode.AverageTime)
      .timeUnit(TimeUnit.SECONDS)
      .verbosity(VerboseMode.EXTRA)
      .addProfiler(classOf[GCProfiler])
      .warmupTime(TimeValue.milliseconds(benchSettings.benchesSettings.warmUpTime))
      .measurementTime(TimeValue.milliseconds(benchSettings.benchesSettings.measurementTime))
      .build
    new Runner(opt).run
  }

  @State(Scope.Benchmark)
  class HistoryBenchState extends encry.settings.Settings {

    val tmpDir: File = getRandomTempDir
    val initialHistory: History = generateHistory(settings, tmpDir)

    val resultedHistory: (History, Option[Block], Vector[Block]) =
      (0 until benchSettings.historyBenchSettings.blocksNumber)
        .foldLeft(initialHistory, Option.empty[Block], Vector.empty[Block]) {
          case ((prevHistory, prevBlock, vector), _) =>
            val block: Block =
              generateNextBlockValidForHistory(prevHistory, 0, prevBlock,  Seq(coinbaseTransaction(0)))
            prevHistory.append(block.header)
            prevHistory.append(block.payload)
            (prevHistory.reportModifierIsValid(block), Some(block), vector :+ block)
        }
    resultedHistory._1.closeStorage()

    val blocks: Vector[Block] = resultedHistory._3
  }
} 
Example 53
Source File: StateRollbackBench.scala    From EncryCore   with GNU General Public License v3.0 5 votes vote down vote up
package benches

import java.io.File
import java.util.concurrent.TimeUnit

import benches.StateRollbackBench.StateRollbackState
import benches.Utils._
import encry.storage.VersionalStorage
import encry.utils.CoreTaggedTypes.VersionTag
import encry.view.state.{BoxHolder, UtxoState}
import encryBenchmark.{BenchSettings, Settings}
import org.encryfoundation.common.modifiers.history.Block
import org.encryfoundation.common.modifiers.state.box.AssetBox
import org.encryfoundation.common.utils.TaggedTypes.{ADKey, Difficulty}
import org.openjdk.jmh.annotations.{Benchmark, Mode, Scope, State}
import org.openjdk.jmh.infra.Blackhole
import org.openjdk.jmh.profile.GCProfiler
import org.openjdk.jmh.runner.{Runner, RunnerException}
import org.openjdk.jmh.runner.options.{OptionsBuilder, TimeValue, VerboseMode}

class StateRollbackBench {

  @Benchmark
  def applyBlocksToTheState(stateBench: StateRollbackState, bh: Blackhole): Unit = {
    bh.consume {
      val innerState: UtxoState =
        utxoFromBoxHolder(stateBench.boxesHolder, getRandomTempDir, None, stateBench.settings, VersionalStorage.IODB)
      val newState = stateBench.chain.foldLeft(innerState -> List.empty[VersionTag]) { case ((state, rootHashes), block) =>
        val newState = state.applyModifier(block).right.get
        newState -> (rootHashes :+ newState.version)
      }
      val stateAfterRollback = newState._1.rollbackTo(newState._2.dropRight(1).last, List.empty).get
      val stateAfterForkBlockApplying = stateAfterRollback.applyModifier(stateBench.forkBlocks.last).right.get
      stateAfterForkBlockApplying.close()
    }
  }
}

object StateRollbackBench extends BenchSettings {

  @throws[RunnerException]
  def main(args: Array[String]): Unit = {
    val opt = new OptionsBuilder()
      .include(".*" + classOf[StateRollbackBench].getSimpleName + ".*")
      .forks(1)
      .threads(1)
      .warmupIterations(benchSettings.benchesSettings.warmUpIterations)
      .measurementIterations(benchSettings.benchesSettings.measurementIterations)
      .mode(Mode.AverageTime)
      .timeUnit(TimeUnit.SECONDS)
      .verbosity(VerboseMode.EXTRA)
      .addProfiler(classOf[GCProfiler])
      .warmupTime(TimeValue.milliseconds(benchSettings.benchesSettings.warmUpTime))
      .measurementTime(TimeValue.milliseconds(benchSettings.benchesSettings.measurementTime))
      .build
    new Runner(opt).run
  }

  @State(Scope.Benchmark)
  class StateRollbackState extends encry.settings.Settings {

    val tmpDir: File = getRandomTempDir

    val initialBoxes: IndexedSeq[AssetBox] = (0 until benchSettings.stateBenchSettings.totalBoxesNumber).map(nonce =>
      genHardcodedBox(privKey.publicImage.address.address, nonce)
    )
    val boxesHolder: BoxHolder = BoxHolder(initialBoxes)
    var state: UtxoState = utxoFromBoxHolder(boxesHolder, tmpDir, None, settings, VersionalStorage.LevelDB)
    val genesisBlock: Block = generateGenesisBlockValidForState(state)

    state = state.applyModifier(genesisBlock).right.get

    val stateGenerationResults: (List[(Block, Block)], Block, UtxoState, IndexedSeq[AssetBox]) =
      (0 until benchSettings.stateBenchSettings.blocksNumber).foldLeft(List.empty[(Block, Block)], genesisBlock, state, initialBoxes) {
        case ((blocks, block, stateL, boxes), _) =>
          val nextBlockMainChain: Block = generateNextBlockForStateWithSpendingAllPreviousBoxes(
            block,
            stateL,
            block.payload.txs.flatMap(_.newBoxes.map(_.asInstanceOf[AssetBox])).toIndexedSeq)
          val nextBlockFork: Block = generateNextBlockForStateWithSpendingAllPreviousBoxes(
            block,
            stateL,
            block.payload.txs.flatMap(_.newBoxes.map(_.asInstanceOf[AssetBox])).toIndexedSeq,
            addDiff = Difficulty @@ BigInt(100)
          )
          val stateN: UtxoState = stateL.applyModifier(nextBlockMainChain).right.get
          (blocks :+ (nextBlockMainChain, nextBlockFork),
            nextBlockMainChain,
            stateN,
            boxes.drop(
              benchSettings.stateBenchSettings.transactionsNumberInEachBlock *
                benchSettings.stateBenchSettings.numberOfInputsInOneTransaction)
          )
      }
    val chain: List[Block] = genesisBlock +: stateGenerationResults._1.map(_._1)
    val forkBlocks: List[Block] = genesisBlock +: stateGenerationResults._1.map(_._2)
    state = stateGenerationResults._3
    state.close()
  }
} 
Example 54
Source File: SparkConfig.scala    From gsoc_relationship   with Apache License 2.0 5 votes vote down vote up
package com.holmesprocessing.analytics.relationship

import java.io.File

import org.apache.spark.{SparkConf, SparkContext}
import com.typesafe.config.ConfigFactory

object SparkConfig {

  val config = ConfigFactory.parseFile(new File("./config/relationship.conf"))

  val hosts = "hosts"
  val username = "username"
  val password = "password"
  val keyspace = "keyspace"
  val analytics_knowledge_base = "analytics_knowledge_base"
  val analytics_mv_knowledge_base_by_feature = "analytics_mv_knowledge_base_by_feature"
  val analytics_primary_relationships = "analytics_primary_relationships"
  val results = "results"
  val results_meta = "results_meta"
  val results_data = "results_data"
  val objects_table = "objects_table"

  val appName = "relationship"
  val master = "localhost"

  val sparkconf = new SparkConf(true)
    .set("spark.cassandra.connection.host", hosts)
    .set("spark.cassandra.auth.username", username)
    .set("spark.cassandra.auth.password", password)

  val sc = new SparkContext(master, appName, sparkconf)
} 
Example 55
Source File: GoogleAuthentication.scala    From amadou   with Apache License 2.0 5 votes vote down vote up
package com.mediative.amadou.bigquery

import java.io.{File, FileReader}
import scala.collection.JavaConversions._
import com.google.api.client.extensions.java6.auth.oauth2.AuthorizationCodeInstalledApp
import com.google.api.client.extensions.jetty.auth.oauth2.LocalServerReceiver
import com.google.api.client.googleapis.auth.oauth2.{
  GoogleAuthorizationCodeFlow,
  GoogleClientSecrets
}
import com.google.api.client.http.{HttpRequest, HttpRequestInitializer}
import com.google.api.client.http.javanet.NetHttpTransport
import com.google.api.client.json.jackson2.JacksonFactory
import com.google.api.client.util.store.FileDataStoreFactory
import org.apache.spark.sql.SparkSession

sealed abstract class GoogleAuthentication(val scopes: String*)

object GoogleAuthentication {
  lazy val HTTP_TRANSPORT = new NetHttpTransport()
  lazy val JSON_FACTORY   = new JacksonFactory()

  case object Dbm
      extends GoogleAuthentication("https://www.googleapis.com/auth/doubleclickbidmanager")

  def apply(auth: GoogleAuthentication, spark: SparkSession): HttpRequestInitializer = auth match {
    case Dbm =>
      val clientFilePath = spark.conf.get("spark.google.cloud.auth.client.file")
      require(clientFilePath != null, "'google.cloud.auth.client.file' not configured")

      val clientFile = new File(clientFilePath)
      require(clientFile.exists, s"$clientFilePath does not exists")

      val clientSecrets    = GoogleClientSecrets.load(JSON_FACTORY, new FileReader(clientFile))
      val dataStoreFactory = new FileDataStoreFactory(clientFile.getParentFile)

      val flow = new GoogleAuthorizationCodeFlow.Builder(
        HTTP_TRANSPORT,
        JSON_FACTORY,
        clientSecrets,
        auth.scopes)
        .setDataStoreFactory(dataStoreFactory)
        .build()

      val cred = new AuthorizationCodeInstalledApp(flow, new LocalServerReceiver())
        .authorize("user")
      new CustomHttpRequestInitializer(cred)
  }

  class CustomHttpRequestInitializer(wrapped: HttpRequestInitializer)
      extends HttpRequestInitializer {
    override def initialize(httpRequest: HttpRequest) = {
      wrapped.initialize(httpRequest)
      httpRequest.setConnectTimeout(10 * 60000) // 10 minutes connect timeout
      httpRequest.setReadTimeout(10 * 60000)    // 10 minutes read timeout
      ()
    }
  }
} 
Example 56
Source File: config.scala    From spark-integration   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.k8s.integrationtest

import java.io.File

import com.google.common.base.Charsets
import com.google.common.io.Files

package object config {
  def getTestImageTag: String = {
    val imageTagFileProp = System.getProperty("spark.kubernetes.test.imageTagFile")
    require(imageTagFileProp != null, "Image tag file must be provided in system properties.")
    val imageTagFile = new File(imageTagFileProp)
    require(imageTagFile.isFile, s"No file found for image tag at ${imageTagFile.getAbsolutePath}.")
    Files.toString(imageTagFile, Charsets.UTF_8).trim
  }

  def getTestImageRepo: String = {
    val imageRepo = System.getProperty("spark.kubernetes.test.imageRepo")
    require(imageRepo != null, "Image repo must be provided in system properties.")
    imageRepo
  }
} 
Example 57
Source File: Minikube.scala    From spark-integration   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.deploy.k8s.integrationtest.backend.minikube

import java.io.File
import java.nio.file.Paths

import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient}

import org.apache.spark.deploy.k8s.integrationtest.{Logging, ProcessUtils}

// TODO support windows
private[spark] object Minikube extends Logging {

  private val MINIKUBE_STARTUP_TIMEOUT_SECONDS = 60

  def getMinikubeIp: String = {
    val outputs = executeMinikube("ip")
      .filter(_.matches("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$"))
    assert(outputs.size == 1, "Unexpected amount of output from minikube ip")
    outputs.head
  }

  def getMinikubeStatus: MinikubeStatus.Value = {
    val statusString = executeMinikube("status")
      .filter(line => line.contains("minikubeVM: ") || line.contains("minikube:"))
      .head
      .replaceFirst("minikubeVM: ", "")
      .replaceFirst("minikube: ", "")
    MinikubeStatus.unapply(statusString)
        .getOrElse(throw new IllegalStateException(s"Unknown status $statusString"))
  }

  def getKubernetesClient: DefaultKubernetesClient = {
    val kubernetesMaster = s"https://${getMinikubeIp}:8443"
    val userHome = System.getProperty("user.home")
    val kubernetesConf = new ConfigBuilder()
      .withApiVersion("v1")
      .withMasterUrl(kubernetesMaster)
      .withCaCertFile(Paths.get(userHome, ".minikube", "ca.crt").toFile.getAbsolutePath)
      .withClientCertFile(Paths.get(userHome, ".minikube", "apiserver.crt").toFile.getAbsolutePath)
      .withClientKeyFile(Paths.get(userHome, ".minikube", "apiserver.key").toFile.getAbsolutePath)
      .build()
    new DefaultKubernetesClient(kubernetesConf)
  }

  private def executeMinikube(action: String, args: String*): Seq[String] = {
    ProcessUtils.executeProcess(
      Array("bash", "-c", s"minikube $action") ++ args, MINIKUBE_STARTUP_TIMEOUT_SECONDS)
  }
}

private[spark] object MinikubeStatus extends Enumeration {

  // The following states are listed according to
  // https://github.com/docker/machine/blob/master/libmachine/state/state.go.
  val STARTING = status("Starting")
  val RUNNING = status("Running")
  val PAUSED = status("Paused")
  val STOPPING = status("Stopping")
  val STOPPED = status("Stopped")
  val ERROR = status("Error")
  val TIMEOUT = status("Timeout")
  val SAVED = status("Saved")
  val NONE = status("")

  def status(value: String): Value = new Val(nextId, value)
  def unapply(s: String): Option[Value] = values.find(s == _.toString)
} 
Example 58
Source File: RMCallbackHandler.scala    From DataXServer   with Apache License 2.0 5 votes vote down vote up
package org.tianlangstudio.data.hamal.yarn

import java.io.File
import java.util.{Collections, List}

import org.tianlangstudio.data.hamal.core.{Constants, HamalConf}
import org.tianlangstudio.data.hamal.core.HamalConf
//import java.util.Collections

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path, FileContext}
import org.apache.hadoop.yarn.api.records._
import org.apache.hadoop.yarn.client.api.{AMRMClient, NMClient}
import org.apache.hadoop.yarn.client.api.async.AMRMClientAsync
import org.apache.hadoop.yarn.conf.YarnConfiguration
import org.apache.hadoop.yarn.util.{ConverterUtils, Records}
import scala.jdk.CollectionConverters._
//import scala.collection.JavaConverters._
/**
 * Created by zhuhq on 2016/4/29.
 */
class RMCallbackHandler(nmClient:NMClient,containerCmd:Container => String,hamalConf: HamalConf,yarnConfiguration: Configuration)  extends  AMRMClientAsync.CallbackHandler {
  private val logging = org.slf4j.LoggerFactory.getLogger(classOf[RMCallbackHandler])
  override def onContainersCompleted(statuses: List[ContainerStatus]): Unit = {
    for(containerStatus <- statuses.asScala) {
      logging.info(s"containerId:${containerStatus} exitStatus:${containerStatus}")
    }
  }

  override def onError(e: Throwable): Unit = {
    logging.error("on error",e)

  }

  override def getProgress: Float = {

    0
  }

  override def onShutdownRequest(): Unit = {
    logging.info("on shutdown request")

  }

  override def onNodesUpdated(updatedNodes: List[NodeReport]): Unit = {
    logging.info("on nodes updated")
    for(nodeReport <- updatedNodes.asScala) {
      logging.info(s"node id:${nodeReport} node labels:${nodeReport}");
    }
  }

  override def onContainersAllocated(containers: List[Container]): Unit = {
    logging.info("on containers allocated");
    for (container:Container <- containers.asScala) {
      try {
        // Launch container by create ContainerLaunchContext
        val  ctx = Records.newRecord(classOf[ContainerLaunchContext]);

        //ctx.setCommands(Collections.singletonList(""" echo "begin";sleep 900;echo "end"; """))
        ctx.setCommands(Collections.singletonList(containerCmd(container)))
        val packagePath = hamalConf.getString(Constants.DATAX_EXECUTOR_FILE,"executor.zip");
        val archiveStat = FileSystem.get(yarnConfiguration).getFileStatus(new Path(packagePath))
        val  packageUrl = ConverterUtils.getYarnUrlFromPath(
          FileContext.getFileContext.makeQualified(new Path(packagePath)));
        val packageResource = Records.newRecord[LocalResource](classOf[LocalResource])

        packageResource.setResource(packageUrl);
        packageResource.setSize(archiveStat.getLen);
        packageResource.setTimestamp(archiveStat.getModificationTime);
        packageResource.setType(LocalResourceType.ARCHIVE);
        packageResource.setVisibility(LocalResourceVisibility.APPLICATION)
        ctx.setLocalResources(Collections.singletonMap(Constants.DATAX_EXECUTOR_ARCHIVE_FILE_NAME,packageResource))
        logging.info("[AM] Launching container " + container.getId());
        nmClient.startContainer(container, ctx);
      } catch {
        case ex:Exception =>
          logging.info("[AM] Error launching container " + container.getId() + " " + ex);
      }
    }
  }

} 
Example 59
Source File: FileUtil.scala    From wookiee   with Apache License 2.0 5 votes vote down vote up
package com.webtrends.harness.utils

import java.io.File
import java.nio.file.{FileSystems, Files, Path}

import scala.io.Source


  def getSymLink(f:File) : File = {
    if (f == null)
      throw new NullPointerException("File must not be null")
    val path = FileSystems.getDefault.getPath(f.getPath)
    if (Files.isSymbolicLink(path)) {
      f.getCanonicalFile
    } else {
      f.getAbsoluteFile
    }
  }
} 
Example 60
Source File: ConfigSpec.scala    From wookiee   with Apache License 2.0 5 votes vote down vote up
package com.webtrends.harness

import java.io.{BufferedWriter, File, FileWriter}
import java.util.concurrent.TimeUnit

import akka.actor.{Actor, ActorSystem, Props}
import akka.testkit.TestProbe
import com.typesafe.config.ConfigFactory
import com.webtrends.harness.app.HarnessActor.ConfigChange
import com.webtrends.harness.config.ConfigWatcherActor
import com.webtrends.harness.health.{ComponentState, HealthComponent}
import com.webtrends.harness.service.messages.CheckHealth
import org.specs2.mutable.SpecificationWithJUnit

import scala.concurrent.ExecutionContextExecutor
import scala.concurrent.duration.FiniteDuration
import scala.reflect.io.{Directory, Path}

class ConfigSpec extends SpecificationWithJUnit {
  implicit val dur = FiniteDuration(2, TimeUnit.SECONDS)
  new File("services/test/conf").mkdirs()
  implicit val sys = ActorSystem("system", ConfigFactory.parseString( """
    akka.actor.provider = "akka.actor.LocalActorRefProvider"
    services { path = "services" }
    """).withFallback(ConfigFactory.load))

  implicit val ec: ExecutionContextExecutor =  sys.dispatcher

  val probe = TestProbe()
  val parent = sys.actorOf(Props(new Actor {
    val child = context.actorOf(ConfigWatcherActor.props, "child")
    def receive = {
      case x if sender == child => probe.ref forward x
      case x => child forward x
    }
  }))

  sequential

  "config " should {
    "be in good health" in {
      probe.send(parent, CheckHealth)
      val msg = probe.expectMsgClass(classOf[HealthComponent])
      msg.state equals ComponentState.NORMAL
    }

    "detect changes in config" in {
      val file = new File("services/test/conf/test.conf")
      val bw = new BufferedWriter(new FileWriter(file))
      bw.write("test = \"value\"")
      bw.close()
      val msg = probe.expectMsgClass(classOf[ConfigChange])
      msg.isInstanceOf[ConfigChange]
    }
  }

  step {
    sys.terminate().onComplete { _ =>
        Directory(Path(new File("services"))).deleteRecursively()
    }
  }
} 
Example 61
Source File: SparkFunSuite.scala    From tispark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark

import java.io.File

import org.apache.spark.internal.Logging
import org.scalatest._
import org.slf4j.Logger

abstract class SparkFunSuite extends FunSuite with Logging {
  protected val logger: Logger = log

  
  final protected override def withFixture(test: NoArgTest): Outcome = {
    val testName = test.text
    val suiteName = this.getClass.getName
    val shortSuiteName = suiteName.replaceAll("org.apache.spark", "o.a.s")
    try {
      logInfo(s"\n\n===== TEST OUTPUT FOR $shortSuiteName: '$testName' =====\n")
      test()
    } finally {
      logInfo(s"\n\n===== FINISHED $shortSuiteName: '$testName' =====\n")
    }
  }

  protected final def getTestResourcePath(file: String): String =
    getTestResourceFile(file).getCanonicalPath

  // helper function
  protected final def getTestResourceFile(file: String): File =
    new File(getClass.getClassLoader.getResource(file).getFile)

} 
Example 62
Source File: TPCDSQuerySuite.scala    From tispark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.benchmark

import java.io.File

import org.apache.spark.sql.BaseTiSparkTest
import org.apache.spark.sql.catalyst.util.resourceToString

import scala.collection.mutable

class TPCDSQuerySuite extends BaseTiSparkTest {
  private val tpcdsDirectory = getClass.getResource("/tpcds-sql").getPath
  private val tpcdsQueries = getListOfFiles(tpcdsDirectory)

  private def getListOfFiles(dir: String): List[String] = {
    val d = new File(dir)
    if (d.exists && d.isDirectory) {
      d.listFiles.filter(_.isFile).map(_.getName.stripSuffix(".sql")).toList
    } else {
      List[String]()
    }
  }

  private def run(queries: List[String], numRows: Int = 1, timeout: Int = 0): Unit =
    try {
      // set broadcast threshold to -1 so it will not oom
      spark.conf.set("spark.sql.autoBroadcastJoinThreshold", -1)
      setCurrentDatabase(tpcdsDBName)
      val succeeded = mutable.ArrayBuffer.empty[String]
      queries.foreach { q =>
        println(s"Query: $q")
        val start = System.currentTimeMillis()
        // We do not use statistic information here due to conflict of netty versions when physical plan has broadcast nodes.
        val queryString = resourceToString(
          s"tpcds-sql/$q.sql",
          classLoader = Thread.currentThread().getContextClassLoader)
        val df = spark.sql(queryString)
        var failed = false
        val jobGroup = s"benchmark $q"
        val t = new Thread("query runner") {
          override def run(): Unit =
            try {
              sqlContext.sparkContext.setJobGroup(jobGroup, jobGroup, interruptOnCancel = true)
              df.show(numRows)
            } catch {
              case e: Exception =>
                println("Failed to run: " + e)
                failed = true
            }
        }
        t.setDaemon(true)
        t.start()
        t.join(timeout)
        if (t.isAlive) {
          println(s"Timeout after $timeout seconds")
          sqlContext.sparkContext.cancelJobGroup(jobGroup)
          t.interrupt()
        } else {
          if (!failed) {
            succeeded += q
            println(s"   Took: ${System.currentTimeMillis() - start} ms")
            println("------------------------------------------------------------------")
          }
        }

        queryViaTiSpark(queryString)
        println(s"TiSpark finished $q")
      }
    } catch {
      case e: Throwable =>
        println(s"TiSpark failed to run TPCDS")
        fail(e)
    }

  test("TPCDS Test") {
    if (runTPCDS) {
      run(tpcdsQueries)
    }
  }
} 
Example 63
Source File: Utils.scala    From tispark   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.test

import java.io.{File, PrintWriter}
import java.nio.file.{Files, Paths}
import java.util.Properties

import org.slf4j.Logger

import scala.collection.JavaConversions._

object Utils {

  def writeFile(content: String, path: String): Unit =
    TryResource(new PrintWriter(path))(_.close()) {
      _.print(content)
    }

  def TryResource[T](res: T)(closeOp: T => Unit)(taskOp: T => Unit): Unit =
    try {
      taskOp(res)
    } finally {
      closeOp(res)
    }

  def readFile(path: String): List[String] =
    Files.readAllLines(Paths.get(path)).toList

  def getOrThrow(prop: Properties, key: String): String = {
    val jvmProp = System.getProperty(key)
    if (jvmProp != null) {
      jvmProp
    } else {
      val v = prop.getProperty(key)
      if (v == null) {
        throw new IllegalArgumentException(key + " is null")
      } else {
        v
      }
    }
  }

  def getFlagOrFalse(prop: Properties, key: String): Boolean =
    getFlag(prop, key, "false")

  private def getFlag(prop: Properties, key: String, defValue: String): Boolean =
    getOrElse(prop, key, defValue).equalsIgnoreCase("true")

  def getOrElse(prop: Properties, key: String, defValue: String): String = {
    val jvmProp = System.getProperty(key)
    if (jvmProp != null) {
      jvmProp
    } else {
      Option(prop.getProperty(key)).getOrElse(defValue)
    }
  }

  def getFlagOrTrue(prop: Properties, key: String): Boolean =
    getFlag(prop, key, "true")

  def time[R](block: => R)(logger: Logger): R = {
    val t0 = System.nanoTime()
    val result = block
    val t1 = System.nanoTime()
    logger.info("Elapsed time: " + (t1 - t0) / 1000.0 / 1000.0 / 1000.0 + "s")
    result
  }

  def ensurePath(basePath: String, paths: String*): Boolean =
    new File(joinPath(basePath, paths: _*)).mkdirs()

  def joinPath(basePath: String, paths: String*): String =
    Paths.get(basePath, paths: _*).toAbsolutePath.toString
} 
Example 64
Source File: RedisBenchmarks.scala    From spark-redis   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package com.redislabs.provider.redis

import java.io.{File, FileWriter, PrintWriter}
import java.time.{Duration => JDuration}

import com.redislabs.provider.redis.util.Logging


trait RedisBenchmarks extends Logging {

  val benchmarkReportDir = new File("target/reports/benchmarks/")
  benchmarkReportDir.mkdirs()

  def time[R](tag: String)(block: => R): R = {
    val t0 = System.nanoTime()
    val result = block // call-by-name
    val t1 = System.nanoTime()
    new PrintWriter(new FileWriter(s"$benchmarkReportDir/results.txt", true)) {
      // scalastyle:off
      this.println(s"$tag, ${JDuration.ofNanos(t1 - t0)}")
      close()
    }
    result
  }
} 
Example 65
Source File: JsonReceiverActor.scala    From incubator-retired-iota   with Apache License 2.0 5 votes vote down vote up
package org.apache.iota.fey

import java.nio.file.Paths
import java.io.File

import akka.actor.{Actor, ActorLogging, ActorRef, Props}
import play.api.libs.json.{JsValue, Json}

class JsonReceiverActor extends Actor with ActorLogging {

  import JsonReceiverActor._

  val monitoring_actor = FEY_MONITOR.actorRef
  var watchFileTask: WatchServiceReceiver = _
  var watchThread: Thread = _

  override def preStart() {
    prepareDynamicJarRepo()
    processCheckpointFiles()

    watchFileTask = new WatchServiceReceiver(self)
    watchThread = new Thread(watchFileTask, GLOBAL_DEFINITIONS.WATCH_SERVICE_THREAD)

    monitoring_actor  ! Monitor.START(Utils.getTimestamp)
    watchThread.setDaemon(true)
    watchThread.start()

    watchFileTask.watch(Paths.get(CONFIG.JSON_REPOSITORY))
  }

  private def prepareDynamicJarRepo() = {
    val jarDir = new File(CONFIG.DYNAMIC_JAR_REPO)
    if (!jarDir.exists()){
      jarDir.mkdir()
    }else if(CONFIG.DYNAMIC_JAR_FORCE_PULL){
      jarDir.listFiles().foreach(_.delete())
    }
  }


  private def processCheckpointFiles() = {
    if (CONFIG.CHEKPOINT_ENABLED) {
      val checkpoint = new CheckpointProcessor(self)
      checkpoint.run()
    }
  }

  override def postStop() {
    monitoring_actor  ! Monitor.STOP(Utils.getTimestamp)
    watchThread.interrupt()
    watchThread.join()
  }

  override def postRestart(reason: Throwable): Unit = {
    monitoring_actor  ! Monitor.RESTART(reason, Utils.getTimestamp)
    preStart()
  }

  override def receive: Receive = {
    case JSON_RECEIVED(json, file) =>
      log.info(s"JSON RECEIVED => ${Json.stringify(json)}")
      context.parent ! FeyCore.ORCHESTRATION_RECEIVED(json, Some(file))

    case _ =>
  }

}

object JsonReceiverActor {

  case class JSON_RECEIVED(json: JsValue, file: File)

} 
Example 66
Source File: WatchServiceReceiver.scala    From incubator-retired-iota   with Apache License 2.0 5 votes vote down vote up
package org.apache.iota.fey

import java.nio.file.StandardWatchEventKinds._
import java.nio.file.{FileSystems, Path}
import java.io.File
import akka.actor.ActorRef
import org.apache.iota.fey.JsonReceiverActor.JSON_RECEIVED
import play.api.libs.json._

import scala.io.Source

class WatchServiceReceiver(receiverActor: ActorRef) extends JsonReceiver{

  processInitialFiles()

  private val watchService = FileSystems.getDefault.newWatchService()

  def watch(path: Path) : Unit = path.register(watchService, ENTRY_CREATE, ENTRY_MODIFY)

  def getJsonObject(params: String): Option[JsValue] = {
    try{
      val stringJson = Source.fromFile(params).getLines.mkString
      Option(Json.parse(stringJson))
    }catch{
      case e: Exception =>
        log.error("Could not parse JSON", e)
        None
    }
  }

  override def execute(): Unit = {

    val key = watchService.take()
    val eventsIterator = key.pollEvents().iterator()

    while(eventsIterator.hasNext) {
      val event = eventsIterator.next()
      val relativePath = event.context().asInstanceOf[Path]
      val path = key.watchable().asInstanceOf[Path].resolve(relativePath)

      log.debug(s"${event.kind()} --- $path")
      event.kind() match {
        case (ENTRY_CREATE | ENTRY_MODIFY) if path.toString.endsWith(CONFIG.JSON_EXTENSION) =>
          processJson(path.toString, path.toFile)
        case _ =>
      }
    }

    key.reset()
  }

  private[fey] def processJson(path: String, file: File) = {
    try{
      getJsonObject(path) match {
        case Some(orchestrationJSON) =>
          val valid = validJson(orchestrationJSON)
          if(valid && (orchestrationJSON \ JSON_PATH.COMMAND).as[String].toUpperCase != "DELETE"){
            checkForLocation(orchestrationJSON)
          }
          if(valid) {
            receiverActor ! JSON_RECEIVED(orchestrationJSON, file)
          }else{
            log.warn(s"File $path not processed. Incorrect JSON schema")
          }
        case None =>
      }
    } catch {
      case e: Exception =>
        log.error(s"File $path will not be processed", e)
    }
  }

  private def processInitialFiles() = {
    Utils.getFilesInDirectory(CONFIG.JSON_REPOSITORY)
      .filter(file => file.getName.endsWith(CONFIG.JSON_EXTENSION))
      .foreach(file => {
        processJson(file.getAbsolutePath, file)
      })
  }

  override def exceptionOnRun(e: Exception): Unit = {
    e match {
      case e: InterruptedException =>
      case e: Exception => log.error("Watch Service stopped", e)
    }
    watchService.close()
  }

} 
Example 67
Source File: FeyGenericActorReceiver.scala    From incubator-retired-iota   with Apache License 2.0 5 votes vote down vote up
package org.apache.iota.fey

import java.io.{File, FileOutputStream}
import java.net.URL
import java.nio.file.{Files, Paths}
import com.eclipsesource.schema._
import akka.actor.ActorRef
import com.eclipsesource.schema.SchemaValidator
import org.apache.commons.io.IOUtils
import play.api.libs.json._
import scala.concurrent.duration._
import scala.util.Properties._

abstract class FeyGenericActorReceiver(override val params: Map[String,String] = Map.empty,
                                       override val backoff: FiniteDuration = 1.minutes,
                                       override val connectTo: Map[String,ActorRef] = Map.empty,
                                       override val schedulerTimeInterval: FiniteDuration = 2.seconds,
                                       override val orchestrationName: String = "",
                                       override val orchestrationID: String = "",
                                       override val autoScale: Boolean = false) extends FeyGenericActor{

  private[fey] val feyCore = FEY_CORE_ACTOR.actorRef

  override final def processMessage[T](message: T, sender: ActorRef): Unit = {
    try {
      val jsonString = getJSONString(message)
      if(jsonString != "{}") {
        processJson(jsonString)
      }
      startBackoff()
    }catch{
      case e: Exception => log.error(e, s"Could not process message $message")
    }
  }

  private[fey] def processJson(jsonString: String) = {
    var orchID:String = "None"
    try{
      val orchestrationJSON = Json.parse(jsonString)
      orchID = (orchestrationJSON \ JSON_PATH.GUID).as[String]
      val valid = validJson(orchestrationJSON)
      if(valid && (orchestrationJSON \ JSON_PATH.COMMAND).as[String].toUpperCase != "DELETE"){
        checkForLocation(orchestrationJSON)
      }
      if(valid) {
        feyCore ! FeyCore.ORCHESTRATION_RECEIVED(orchestrationJSON, None)
      }else{
        log.warning(s"Could not forward Orchestration $orchID. Invalid JSON schema")
      }
    } catch {
      case e: Exception =>
        log.error(e, s"Orchestration $orchID could not be forwarded")
    }
  }

  
  def resolveCredentials(credentials: Option[JsObject]):Option[(String, String)] = {
    credentials match {
      case None => None
      case Some(cred) =>
        val user = (cred \ JSON_PATH.JAR_CRED_USER).as[String]
        val password = (cred \ JSON_PATH.JAR_CRED_PASSWORD).as[String]
        Option(envOrElse(user,user), envOrElse(password,password))
    }
  }

} 
Example 68
Source File: CheckpointProcessor.scala    From incubator-retired-iota   with Apache License 2.0 5 votes vote down vote up
package org.apache.iota.fey

import java.io.File

import akka.actor.ActorRef
import org.apache.iota.fey.JsonReceiverActor.JSON_RECEIVED
import play.api.libs.json.{JsValue, Json}

import scala.io.Source


class CheckpointProcessor(receiverActor: ActorRef) extends JsonReceiver{

  override def run(): Unit = {
    processCheckpointFiles()
  }

  def getJsonObject(params: String): Option[JsValue] = {
    try{
      val stringJson = Source.fromFile(params).getLines.mkString
      Option(Json.parse(stringJson))
    }catch{
      case e: Exception =>
        log.error("Could not parse JSON", e)
        None
    }
  }

  private def processJson(path: String, file: File) = {
    try{
      getJsonObject(path) match {
        case Some(orchestrationJSON) =>
          val valid = validJson(orchestrationJSON)
          if(valid && (orchestrationJSON \ JSON_PATH.COMMAND).as[String].toUpperCase != "DELETE"){
            checkForLocation(orchestrationJSON)
          }
          if(valid) {
            receiverActor ! JSON_RECEIVED(orchestrationJSON, file)
          }else{
            log.warn(s"File $path not processed. Incorrect JSON schema")
          }
          file.delete()
        case None =>
      }
    } catch {
      case e: Exception =>
        log.error(s"File $path will not be processed", e)
    }
  }

  private def processCheckpointFiles() = {
    Utils.getFilesInDirectory(CONFIG.CHECKPOINT_DIR)
      .filter(file => file.getName.endsWith(CONFIG.JSON_EXTENSION))
      .foreach(file => {
        processJson(file.getAbsolutePath, file)
      })
  }

  override def execute(): Unit = {}
  override def exceptionOnRun(e: Exception): Unit = {}
} 
Example 69
Source File: JsonReceiver.scala    From incubator-retired-iota   with Apache License 2.0 5 votes vote down vote up
package org.apache.iota.fey

import java.io.FileOutputStream
import java.net.URL
import java.io.File

import com.eclipsesource.schema._
import org.slf4j.LoggerFactory
import play.api.libs.json._
import JSON_PATH._
import java.nio.file.{Files, Paths}

import org.apache.commons.io.IOUtils
import org.apache.commons.codec.binary.Base64
import scala.util.Properties._


  def exceptionOnRun(e: Exception): Unit
}

object HttpBasicAuth {
  val BASIC = "Basic"
  val AUTHORIZATION = "Authorization"

  def encodeCredentials(username: String, password: String): String = {
    new String(Base64.encodeBase64((username + ":" + password).getBytes))
  }

  def getHeader(username: String, password: String): String =
    BASIC + " " + encodeCredentials(username, password)
} 
Example 70
Source File: WatchServiceReceiverSpec.scala    From incubator-retired-iota   with Apache License 2.0 5 votes vote down vote up
package org.apache.iota.fey

import java.nio.file.{Files, Paths}
import java.nio.charset.StandardCharsets

import akka.testkit.{EventFilter, TestProbe}

import scala.concurrent.duration.{DurationInt, FiniteDuration}
import java.io.File

import ch.qos.logback.classic.Level

class WatchServiceReceiverSpec extends BaseAkkaSpec{

  val watcherTB = TestProbe("WATCH-SERVICE")
  var watchFileTask:WatchServiceReceiver = _
  val watchTestDir = s"${CONFIG.JSON_REPOSITORY}/watchtest"

  "Creating WatchServiceReceiver" should {
    "process initial files in the JSON repository" in {
      CONFIG.JSON_EXTENSION = "json.not"
      watchFileTask = new WatchServiceReceiver(watcherTB.ref)
      watcherTB.expectMsgAllClassOf(classOf[JsonReceiverActor.JSON_RECEIVED])
      CONFIG.JSON_EXTENSION = "json.test"
    }
  }

  var watchThread: Thread = _
  "Start a Thread with WatchServiceReceiver" should {
    "Start Thread" in {
      watchThread = new Thread(watchFileTask, "TESTING-WATCHER-IN-THREAD")
      watchThread.setDaemon(true)
      watchThread.start()
      TestProbe().isThreadRunning("TESTING-WATCHER-IN-THREAD") should be(true)
    }
  }

  "Start watching directory" should {
    "Starting receiving CREATED event" taggedAs(SlowTest) in {
      watchFileTask.watch(Paths.get(watchTestDir))
      Files.write(Paths.get(s"$watchTestDir/watched.json.test"), Utils_JSONTest.create_json_test.getBytes(StandardCharsets.UTF_8))
      watcherTB.expectMsgAllClassOf(20.seconds, classOf[JsonReceiverActor.JSON_RECEIVED])
    }
    "Starting receiving UPDATE event" taggedAs(SlowTest) in {
      Files.write(Paths.get(s"$watchTestDir/watched-update.json.test"), Utils_JSONTest.delete_json_test.getBytes(StandardCharsets.UTF_8))
      Thread.sleep(200)
      Files.write(Paths.get(s"$watchTestDir/watched-update.json.test"), Utils_JSONTest.create_json_test.getBytes(StandardCharsets.UTF_8))
      watcherTB.expectMsgAllClassOf(20.seconds, classOf[JsonReceiverActor.JSON_RECEIVED])
    }
  }

  "processJson" should {
    "log to warn level when json has invalid schema" in {
      Files.write(Paths.get(s"$watchTestDir/watched-invalid.json.test"), Utils_JSONTest.test_json_schema_invalid.getBytes(StandardCharsets.UTF_8))
      watchFileTask.processJson(s"$watchTestDir/watched-invalid.json.test",new File(s"$watchTestDir/watched-invalid.json.test"))
      s"File $watchTestDir/watched-invalid.json.test not processed. Incorrect JSON schema" should beLoggedAt(Level.WARN)
    }
  }

  "interrupt watchservice" should{
    "interrupt thread" in {
      watchThread.interrupt()
    }
  }

} 
Example 71
Source File: TestSetup.scala    From incubator-retired-iota   with Apache License 2.0 5 votes vote down vote up
package org.apache.iota.fey

import java.io.File
import java.nio.file.Paths

import org.apache.commons.io.FileUtils
import org.scalatest.Tag

object TestSetup {

  private var runSetup = true

  val configTest = getClass.getResource("/test-fey-configuration.conf")

  def setup(): Unit = {
    if(runSetup){
      println("SETTING UP ...")
      createFeyTmpDirectoriesForTest()
      copyTestActorToTmp()
      copyJSONstoTmp()
      runSetup = false
    }
  }

  private def copyTestActorToTmp(): Unit = {
    copyResourceFileToLocal("/fey-test-actor.jar",s"${CONFIG.JAR_REPOSITORY}/fey-test-actor.jar")
  }

  private def copyJSONstoTmp(): Unit = {
    copyResourceFileToLocal("/json/valid-json.json",s"${CONFIG.JSON_REPOSITORY}/valid-json.json.not")
    copyResourceFileToLocal("/json/invalid-json.json",s"${CONFIG.JSON_REPOSITORY}/invalid-json.json.not")
  }

  private def copyResourceFileToLocal(resourcePath: String, destination: String): Unit = {
    val resourceFile = getClass.getResource(resourcePath)
    val dest = new File(destination)
    FileUtils.copyURLToFile(resourceFile, dest)
  }

  private def createFeyTmpDirectoriesForTest(): Unit = {
    var file = new File(s"/tmp/fey/test/checkpoint")
    file.mkdirs()
    file = new File(s"/tmp/fey/test/json")
    file.mkdirs()
    file = new File(s"/tmp/fey/test/json/watchtest")
    file.mkdirs()
    file = new File(s"/tmp/fey/test/jars")
    file.mkdirs()
    file = new File(s"/tmp/fey/test/jars/dynamic")
    file.mkdirs()
  }

}

object SlowTest extends Tag("org.apache.iota.fey.SlowTest") 
Example 72
Source File: MultiNodeSupportCassandra.scala    From eventuate   with Apache License 2.0 5 votes vote down vote up
package com.rbmhtechnology.eventuate

import java.io.File

import akka.actor.Props
import akka.remote.testconductor.RoleName
import akka.remote.testkit.MultiNodeSpec

import com.rbmhtechnology.eventuate.log.cassandra._

import org.apache.commons.io.FileUtils
import org.scalatest.BeforeAndAfterAll

trait MultiNodeSupportCassandra extends BeforeAndAfterAll { this: MultiNodeSpec with MultiNodeWordSpec =>
  val coordinator = RoleName("nodeA")

  def cassandraDir: String =
    MultiNodeEmbeddedCassandra.DefaultCassandraDir

  def logProps(logId: String): Props =
    CassandraEventLog.props(logId)

  override def atStartup(): Unit = {
    if (isNode(coordinator)) {
      MultiNodeEmbeddedCassandra.start(cassandraDir)
      Cassandra(system)
    }
    enterBarrier("startup")
  }

  override def afterAll(): Unit = {
    // get all config data before shutting down node
    val snapshotRootDir = new File(system.settings.config.getString("eventuate.snapshot.filesystem.dir"))

    // shut down node
    super.afterAll()

    // clean database and delete snapshot files
    if (isNode(coordinator)) {
      FileUtils.deleteDirectory(snapshotRootDir)
      MultiNodeEmbeddedCassandra.clean()
    }
  }
} 
Example 73
Source File: MultiNodeSupportLeveldb.scala    From eventuate   with Apache License 2.0 5 votes vote down vote up
package com.rbmhtechnology.eventuate

import java.io.File

import akka.actor.Props
import akka.remote.testconductor.RoleName
import akka.remote.testkit.MultiNodeSpec

import com.rbmhtechnology.eventuate.log.leveldb.LeveldbEventLog

import org.apache.commons.io.FileUtils
import org.scalatest.BeforeAndAfterAll

trait MultiNodeSupportLeveldb extends BeforeAndAfterAll { this: MultiNodeSpec with MultiNodeWordSpec =>
  val coordinator = RoleName("nodeA")

  def logProps(logId: String): Props =
    LeveldbEventLog.props(logId)

  override def afterAll(): Unit = {
    // get all config data before shutting down node
    val snapshotRootDir = new File(system.settings.config.getString("eventuate.snapshot.filesystem.dir"))
    val logRootDir = new File(system.settings.config.getString("eventuate.log.leveldb.dir"))

    // shut down node
    super.afterAll()

    // delete log and snapshot files
    if (isNode(coordinator)) {
      FileUtils.deleteDirectory(snapshotRootDir)
      FileUtils.deleteDirectory(logRootDir)
    }
  }
} 
Example 74
Source File: LocationSpecLeveldb.scala    From eventuate   with Apache License 2.0 5 votes vote down vote up
package com.rbmhtechnology.eventuate

import java.io.File

import akka.actor._

import com.rbmhtechnology.eventuate.log._
import com.rbmhtechnology.eventuate.log.leveldb._
import com.rbmhtechnology.eventuate.utilities.RestarterActor
import com.typesafe.config.ConfigFactory

trait LocationCleanupLeveldb extends LocationCleanup {
  override def storageLocations: List[File] =
    List("eventuate.log.leveldb.dir", "eventuate.snapshot.filesystem.dir").map(s => new File(config.getString(s)))
}

object SingleLocationSpecLeveldb {
  object TestEventLog {
    def props(logId: String, batching: Boolean, currentSystemTime: Long = 0): Props = {
      val logProps = Props(new TestEventLog(logId, currentSystemTime))
        .withDispatcher("eventuate.log.dispatchers.write-dispatcher")
      if (batching) Props(new BatchingLayer(logProps)) else logProps
    }
  }

  class TestEventLog(id: String, override val currentSystemTime: Long = 0) extends LeveldbEventLog(id, "log-test") with SingleLocationSpec.TestEventLog[LeveldbEventLogState] {
    override def unhandled(message: Any): Unit = message match {
      case "boom" => throw IntegrationTestException
      case "dir"  => sender() ! logDir
      case _      => super.unhandled(message)
    }
  }
}

trait SingleLocationSpecLeveldb extends SingleLocationSpec with LocationCleanupLeveldb {
  import SingleLocationSpecLeveldb._

  private var _log: ActorRef = _

  override def beforeEach(): Unit = {
    super.beforeEach()
    _log = system.actorOf(logProps(logId))
  }

  def log: ActorRef =
    _log

  def logProps(logId: String): Props =
    RestarterActor.props(TestEventLog.props(logId, batching, currentSystemTime))
}

trait MultiLocationSpecLeveldb extends MultiLocationSpec with LocationCleanupLeveldb {
  override val logFactory: String => Props = id => LeveldbEventLog.props(id)

  override val providerConfig = ConfigFactory.parseString(
    s"""
       |eventuate.log.leveldb.dir = target/test-log
       |eventuate.log.leveldb.index-update-limit = 3
       |eventuate.log.leveldb.deletion-retry-delay = 1 ms
     """.stripMargin)
} 
Example 75
Source File: YamlHelpers.scala    From barstools   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package barstools.tapeout.transforms

import net.jcazevedo.moultingyaml._
import java.io.File

class YamlFileReader(resource: String) {
  def parse[A](file: String = "")(implicit reader: YamlReader[A]) : Seq[A] = {
    // If the user doesn't provide a Yaml file name, use defaults
    val yamlString = file match {
      case f if f.isEmpty => 
        // Use example config if no file is provided
        val stream = getClass.getResourceAsStream(resource)
        io.Source.fromInputStream(stream).mkString
      case f if new File(f).exists => 
        scala.io.Source.fromFile(f).getLines.mkString("\n")
      case _ => 
        throw new Exception("No valid Yaml file found!")
    }
    yamlString.parseYamls.map(x => reader.read(x))
  }
} 
Example 76
Source File: KinesisProducerIntegrationSpec.scala    From reactive-kinesis   with Apache License 2.0 5 votes vote down vote up
package com.weightwatchers.reactive.kinesis

import java.io.File

import com.amazonaws.services.kinesis.producer.{KinesisProducer => AWSKinesisProducer}
import com.typesafe.config.ConfigFactory
import com.weightwatchers.reactive.kinesis.common.{
  KinesisSuite,
  KinesisTestConsumer,
  TestCredentials
}
import com.weightwatchers.reactive.kinesis.consumer.KinesisConsumer.ConsumerConf
import com.weightwatchers.reactive.kinesis.models.ProducerEvent
import com.weightwatchers.reactive.kinesis.producer.{KinesisProducer, ProducerConf}
import org.scalatest.concurrent.Eventually
import org.scalatest.mockito.MockitoSugar
import org.scalatest.time.{Millis, Seconds, Span}
import org.scalatest.{BeforeAndAfterAll, FreeSpec, Matchers}

import scala.concurrent.duration._
import scala.language.postfixOps
import scala.util.Random

//scalastyle:off magic.number
class KinesisProducerIntegrationSpec
    extends FreeSpec
    with Matchers
    with MockitoSugar
    with BeforeAndAfterAll
    with Eventually
    with KinesisSuite {

  implicit val ece = scala.concurrent.ExecutionContext.global

  val TestStreamNrOfMessagesPerShard: Long = 0

  implicit override val patienceConfig: PatienceConfig =
    PatienceConfig(timeout = Span(5, Seconds), interval = Span(100, Millis))

  "The KinesisProducer" - {

    "Should publish a message to a stream" in new withKinesisConfForApp(
      "int-test-stream-producer-1"
    ) {

      val conf     = producerConf()
      val producer = KinesisProducer(conf)

      val existingRecordCount = testConsumer.retrieveRecords(conf.streamName, 10).size

      val event = ProducerEvent("1234", Random.alphanumeric.take(10).mkString)
      producer.addUserRecord(event)

      eventually {
        val records: Seq[String] = testConsumer.retrieveRecords(conf.streamName, 10)
        records.size shouldBe (existingRecordCount + 1)
        records should contain(
          new String(event.payload.array(), java.nio.charset.StandardCharsets.UTF_8)
        )
      }
    }
  }
}

//scalastyle:on 
Example 77
Source File: Persister.scala    From exodus   with MIT License 5 votes vote down vote up
package com.wix.bazel.migrator

import java.io.File
import java.nio.file.attribute.BasicFileAttributes
import java.nio.file.{Files, Paths}
import java.time.Instant
import java.time.temporal.TemporalUnit
import java.util

import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import com.wix.bazel.migrator.model.{CodePurpose, Package, Target, TestType}
import com.wix.bazel.migrator.utils.{IgnoringIsArchiveDefMixin, IgnoringIsProtoArtifactDefMixin, IgnoringIsWarDefMixin, TypeAddingMixin}
import com.wix.build.maven.analysis.SourceModules
import com.wixpress.build.maven.{Coordinates, MavenScope, Packaging}

import scala.collection.JavaConverters._

object Persister {

  private val transformedFile = new File("dag.bazel")
  private val mavenCache = Paths.get("classpathModules.cache")
  val objectMapper = new ObjectMapper().registerModule(DefaultScalaModule)
    .addMixIn(classOf[Target], classOf[TypeAddingMixin])
    .addMixIn(classOf[CodePurpose], classOf[TypeAddingMixin])
    .addMixIn(classOf[TestType], classOf[TypeAddingMixin])
    .addMixIn(classOf[MavenScope], classOf[TypeAddingMixin])
    .addMixIn(classOf[Packaging], classOf[IgnoringIsArchiveDefMixin])
    .addMixIn(classOf[Packaging], classOf[IgnoringIsWarDefMixin])
    .addMixIn(classOf[Coordinates], classOf[IgnoringIsProtoArtifactDefMixin])
    .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)

  def persistTransformationResults(bazelPackages: Set[Package]): Unit = {
    println("Persisting transformation")
    objectMapper.writeValue(transformedFile, bazelPackages)
  }

  def readTransformationResults(): Set[Package] = {
    val collectionType = objectMapper.getTypeFactory.constructCollectionType(classOf[util.Collection[Package]], classOf[Package])
    val value: util.Collection[Package] = objectMapper.readValue(transformedFile, collectionType)
    val bazelPackages = value.asScala.toSet
    bazelPackages
  }

  def persistMavenClasspathResolution(sourceModules: SourceModules): Unit = {
    println("Persisting maven")
    objectMapper.writeValue(mavenCache.toFile, sourceModules)
  }

  def readTransMavenClasspathResolution(): SourceModules = {
    objectMapper.readValue[SourceModules](mavenCache.toFile, classOf[SourceModules])
  }

  def mavenClasspathResolutionIsUnavailableOrOlderThan(amount: Int, unit: TemporalUnit): Boolean =
    !Files.isReadable(mavenCache) ||
      lastModifiedMavenCache().toInstant.isBefore(Instant.now().minus(amount, unit))

  private def lastModifiedMavenCache() =
    Files.readAttributes(mavenCache, classOf[BasicFileAttributes]).lastModifiedTime()

} 
Example 78
Source File: SqliteTestBase.scala    From smui   with Apache License 2.0 5 votes vote down vote up
package utils

import java.io.File

import org.scalatest.{BeforeAndAfterAll, Suite}
import play.api.db.evolutions.Evolutions
import play.api.db.{Database, Databases}

trait SqliteTestBase extends BeforeAndAfterAll { self: Suite =>

  private lazy val dbFile = File.createTempFile("sqlitetest", ".db")

  lazy val db: Database = {
    // Use a temp file for the database - in-memory DB cannot be used
    // since it would be a different DB for each connection in the connection pool
    // (see https://www.sqlite.org/inmemorydb.html)
    val d = Databases("org.sqlite.JDBC", s"jdbc:sqlite:${dbFile.getAbsolutePath}")
    Evolutions.applyEvolutions(d)
    d
  }

  override protected def afterAll(): Unit = {
    super.afterAll()
    db.shutdown()
    dbFile.delete()
  }

} 
Example 79
Source File: Preprocess.scala    From Scala-Machine-Learning-Projects   with MIT License 5 votes vote down vote up
package com.packt.ScalaML.BitCoin

import java.io.{ BufferedWriter, File, FileWriter }
import org.apache.spark.sql.types.{ DoubleType, IntegerType, StructField, StructType }
import org.apache.spark.sql.{ DataFrame, Row, SparkSession }
import scala.collection.mutable.ListBuffer

object Preprocess {
  //how many of first rows are omitted
    val dropFirstCount: Int = 612000

    def rollingWindow(data: DataFrame, window: Int, xFilename: String, yFilename: String): Unit = {
      var i = 0
      val xWriter = new BufferedWriter(new FileWriter(new File(xFilename)))
      val yWriter = new BufferedWriter(new FileWriter(new File(yFilename)))

      val zippedData = data.rdd.zipWithIndex().collect()
      System.gc()
      val dataStratified = zippedData.drop(dropFirstCount) //todo slice fisrt 614K
      while (i < (dataStratified.length - window)) {
        val x = dataStratified
          .slice(i, i + window)
          .map(r => r._1.getAs[Double]("Delta")).toList
        val y = dataStratified.apply(i + window)._1.getAs[Integer]("label")
        val stringToWrite = x.mkString(",")
        xWriter.write(stringToWrite + "\n")
        yWriter.write(y + "\n")

        i += 1
        if (i % 10 == 0) {
          xWriter.flush()
          yWriter.flush()
        }
      }

      xWriter.close()
      yWriter.close()
    }
    
  def main(args: Array[String]): Unit = {
    //todo modify these variables to match desirable files
    val priceDataFileName: String = "C:/Users/admin-karim/Desktop/bitstampUSD_1-min_data_2012-01-01_to_2017-10-20.csv/bitstampUSD_1-min_data_2012-01-01_to_2017-10-20.csv"
    val outputDataFilePath: String = "output/scala_test_x.csv"
    val outputLabelFilePath: String = "output/scala_test_y.csv"

    val spark = SparkSession
      .builder()
      .master("local[*]")
      .config("spark.sql.warehouse.dir", "E:/Exp/")
      .appName("Bitcoin Preprocessing")
      .getOrCreate()

    val data = spark.read.format("com.databricks.spark.csv").option("header", "true").load(priceDataFileName)
    data.show(10)
    println((data.count(), data.columns.size))

    val dataWithDelta = data.withColumn("Delta", data("Close") - data("Open"))

    import org.apache.spark.sql.functions._
    import spark.sqlContext.implicits._

    val dataWithLabels = dataWithDelta.withColumn("label", when($"Close" - $"Open" > 0, 1).otherwise(0))
    rollingWindow(dataWithLabels, 22, outputDataFilePath, outputLabelFilePath)    
    spark.stop()
  }
} 
Example 80
Source File: ResultFileGenerator.scala    From Scala-Machine-Learning-Projects   with MIT License 5 votes vote down vote up
package Yelp.Evaluator

import java.io.File
import Yelp.Trainer.NeuralNetwork._
import Yelp.Preprocessor.CSVImageMetadataReader._
import Yelp.Preprocessor.makeND4jDataSets.makeDataSetTE
import Yelp.Preprocessor.featureAndDataAligner
import Yelp.Preprocessor.imageFeatureExtractor._
import Yelp.Evaluator.ResultFileGenerator._
import Yelp.Preprocessor.makeND4jDataSets._
import Yelp.Evaluator.ModelEvaluation._
import Yelp.Trainer.CNN._
import Yelp.Trainer.CNNEpochs._
import scala.Vector

object ResultFileGenerator {
  def writeSubmissionFile(outcsv: String, phtoObj: List[(String, Vector[Double])], thresh: Double): Unit = {
    // prints to a csv or other txt file
    def printToFile(f: java.io.File)(op: java.io.PrintWriter => Unit) {
      val p = new java.io.PrintWriter(f)
      try { op(p) } finally { p.close() }
    }
    // assigning cutoffs for each class
    def findIndicesAboveThresh(x: Vector[Double]): Vector[Int] = {
      x.zipWithIndex.filter(x => x._1 >= thresh).map(_._2)
    }
    // create vector of rows to write to csv
    val ret = (for (i <- 0 until phtoObj.length) yield {
      (phtoObj(i)._1 + "," + findIndicesAboveThresh(phtoObj(i)._2).mkString(" "))
    }).toVector
    // actually write text file
    printToFile(new File(outcsv)) {
      p => (Vector("business_ids,labels") ++ ret).foreach(p.println)
    }
  }
  
  def SubmitObj(alignedData: featureAndDataAligner,
    modelPath: String,
    model0: String = "model0",
    model1: String = "model1",
    model2: String = "model2",
    model3: String = "model3",
    model4: String = "model4",
    model5: String = "model5",
    model6: String = "model6",
    model7: String = "model7",
    model8: String = "model8"): List[(String, Vector[Double])] = {

    // new code which works in REPL    
    // creates a List for each model (class) containing a map from the bizID to the probability of belonging in that class 
    val big = for (m <- List(model0, model1, model2, model3, model4, model5, model6, model7, model8)) yield {
      val ds = makeDataSetTE(alignedData)
      val model = loadNN(modelPath + m + ".json", modelPath + m + ".bin")
      val scores = scoreModel(model, ds)
      val bizScores = aggImgScores2Business(scores, alignedData)
      bizScores.toMap
    }

    // transforming the data structure above into a List for each bizID containing a Tuple (bizid, List[Double]) where the Vector[Double] is the 
    // the vector of probabilities 
    alignedData.data.map(_._2).distinct map (x =>
      (x, big.map(x2 => x2(x)).toVector))
  }
} 
Example 81
Source File: GrayscaleConverter.scala    From Scala-Machine-Learning-Projects   with MIT License 5 votes vote down vote up
package Yelp.Preprocessor

import java.io.File
import javax.imageio.ImageIO
import java.awt.Color

object GrayscaleConverter {
  def main(args: Array[String]): Unit = {
    def pixels2Gray(R: Int, G: Int, B: Int): Int = (R + G + B) / 3

    def makeGray(testImage: java.awt.image.BufferedImage): java.awt.image.BufferedImage = {
      val w = testImage.getWidth
      val h = testImage.getHeight
      for {
        w1 <- (0 until w).toVector
        h1 <- (0 until h).toVector
      } yield {
        val col = testImage.getRGB(w1, h1)
        val R = (col & 0xff0000) / 65536
        val G = (col & 0xff00) / 256
        val B = (col & 0xff)
        val graycol = pixels2Gray(R, G, B)
        testImage.setRGB(w1, h1, new Color(graycol, graycol, graycol).getRGB)
      }
      testImage
    }

    val testImage = ImageIO.read(new File("data/images/preprocessed/147square.jpg"))
    val grayImage = makeGray(testImage)
    ImageIO.write(grayImage, "jpg", new File("data/images/preprocessed/147gray.jpg"))
  }
} 
Example 82
Source File: imageFeatureExtractor.scala    From Scala-Machine-Learning-Projects   with MIT License 5 votes vote down vote up
package Yelp.Preprocessor

import java.io.File
import javax.imageio.ImageIO
import scala.util.matching.Regex
import imageUtils._

object imageFeatureExtractor {  
  
  
  def processImages(imgs: List[String], resizeImgDim: Int = 128, nPixels: Int = -1): Map[Int, Vector[Int]] = {       
    imgs.map(x => 
      patt_get_jpg_name.findAllIn(x).mkString.toInt -> { 
        val img0 = ImageIO.read(new File(x))
         .makeSquare
         .resizeImg(resizeImgDim, resizeImgDim) // (128, 128)
         .image2gray
       if(nPixels != -1) img0.slice(0, nPixels)
       else img0
     }   
   ).filter( x => x._2 != ())
    .toMap    
  }  
} 
Example 83
Source File: ImageResize.scala    From Scala-Machine-Learning-Projects   with MIT License 5 votes vote down vote up
package Yelp.Preprocessor

import org.imgscalr._
import java.io.File
import javax.imageio.ImageIO

object ImageResize {
  def main(args: Array[String]): Unit = {

    def resizeImg(img: java.awt.image.BufferedImage, width: Int, height: Int) = {
      Scalr.resize(img, Scalr.Method.BALANCED, width, height)
    }

    val testImage = ImageIO.read(new File("data/images/train/147.jpg"))

    val testImage32 = resizeImg(testImage, 32, 32)
    val testImage64 = resizeImg(testImage, 64, 64)
    val testImage128 = resizeImg(testImage, 128, 128)
    val testImage256 = resizeImg(testImage, 256, 256)

    ImageIO.write(testImage32, "jpg", new File("data/images/preprocessed/147resize32.jpg"))
    ImageIO.write(testImage64, "jpg", new File("data/images/preprocessed/147resize64.jpg"))
    ImageIO.write(testImage128, "jpg", new File("data/images/preprocessed/147resize128.jpg"))
    ImageIO.write(testImage256, "jpg", new File("data/images/preprocessed/147resize256.jpg"))
  }
} 
Example 84
Source File: SquaringImage.scala    From Scala-Machine-Learning-Projects   with MIT License 5 votes vote down vote up
package Yelp.Preprocessor

import org.imgscalr._
import java.io.File
import javax.imageio.ImageIO

object SquaringImage {
  def main(args: Array[String]): Unit = {
    def makeSquare(img: java.awt.image.BufferedImage): java.awt.image.BufferedImage = {
      val w = img.getWidth
      val h = img.getHeight
      val dim = List(w, h).min

      img match {
        case x if w == h => img
        case x if w > h => Scalr.crop(img, (w - h) / 2, 0, dim, dim)
        case x if w < h => Scalr.crop(img, 0, (h - w) / 2, dim, dim)
      }
    }

    val myimg = ImageIO.read(new File("data/images/train/147.jpg"))
    val myimgSquare = makeSquare(myimg)
    ImageIO.write(myimgSquare, "jpg", new File("data/images/preprocessed/147square.jpg"))
  }
} 
Example 85
Source File: NeuralNetwork.scala    From Scala-Machine-Learning-Projects   with MIT License 5 votes vote down vote up
package Yelp.Trainer

import org.deeplearning4j.nn.conf.MultiLayerConfiguration
import org.deeplearning4j.nn.multilayer.MultiLayerNetwork
import org.nd4j.linalg.factory.Nd4j
import java.io.File
import org.apache.commons.io.FileUtils
import java.io.{DataInputStream, DataOutputStream, FileInputStream}
import java.nio.file.{Files, Paths}

object NeuralNetwork {  
  def loadNN(NNconfig: String, NNparams: String) = {
    // get neural network config
    val confFromJson: MultiLayerConfiguration = MultiLayerConfiguration.fromJson(FileUtils.readFileToString(new File(NNconfig)))    
     // get neural network parameters 
    val dis: DataInputStream = new DataInputStream(new FileInputStream(NNparams))
    val newParams = Nd4j.read(dis)    
     // creating network object
    val savedNetwork: MultiLayerNetwork = new MultiLayerNetwork(confFromJson)
    savedNetwork.init()
    savedNetwork.setParameters(newParams)    
    savedNetwork
  }
  
  def saveNN(model: MultiLayerNetwork, NNconfig: String, NNparams: String) = {
    // save neural network config
    FileUtils.write(new File(NNconfig), model.getLayerWiseConfigurations().toJson())     
    // save neural network parms
    val dos: DataOutputStream = new DataOutputStream(Files.newOutputStream(Paths.get(NNparams)))
    Nd4j.write(model.params(), dos)
  }  
} 
Example 86
Source File: KerberosLoginProvider.scala    From rokku   with Apache License 2.0 5 votes vote down vote up
package com.ing.wbaa.rokku.proxy.provider

import java.io.File

import com.ing.wbaa.rokku.proxy.config.KerberosSettings
import com.typesafe.scalalogging.LazyLogging
import org.apache.commons.lang.StringUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.security.UserGroupInformation

import scala.util.{ Failure, Success, Try }

trait KerberosLoginProvider extends LazyLogging {

  protected[this] def kerberosSettings: KerberosSettings

  loginUserFromKeytab(kerberosSettings.keytab, kerberosSettings.principal)

  private def loginUserFromKeytab(keytab: String, principal: String): Unit = {

    if (StringUtils.isNotBlank(keytab) && StringUtils.isNotBlank(principal)) {
      if (!new File(keytab).exists()) {
        logger.info("keytab file does not exist {}", keytab)
      } else {
        Try {
          UserGroupInformation.setConfiguration(new Configuration())
          UserGroupInformation.loginUserFromKeytab(principal, keytab)
        } match {
          case Success(_)         => logger.info("kerberos credentials provided {}", UserGroupInformation.getLoginUser)
          case Failure(exception) => logger.error("kerberos login error {}", exception)
        }
      }
    } else {
      logger.info("kerberos credentials are not provided")
    }
  }

} 
Example 87
Source File: S3SdkHelpers.scala    From rokku   with Apache License 2.0 5 votes vote down vote up
package com.ing.wbaa.testkit.awssdk

import java.io.File

import akka.http.scaladsl.model.Uri.Authority
import com.amazonaws.ClientConfiguration
import com.amazonaws.auth.{AWSCredentials, AWSStaticCredentialsProvider, BasicSessionCredentials}
import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration
import com.amazonaws.services.s3.transfer.TransferManagerBuilder
import com.amazonaws.services.s3.transfer.model.UploadResult
import com.amazonaws.services.s3.{AmazonS3, AmazonS3ClientBuilder}
import com.typesafe.config.ConfigFactory

import scala.collection.JavaConverters._


trait S3SdkHelpers {
  val awsRegion = ConfigFactory.load().getString("rokku.storage.s3.region")

  def getAmazonS3(authority: Authority,
                  credentials: AWSCredentials = new BasicSessionCredentials("accesskey", "secretkey", "token")
                 ): AmazonS3 = {
    val cliConf = new ClientConfiguration()
    cliConf.setMaxErrorRetry(1)

    AmazonS3ClientBuilder
      .standard()
      .withClientConfiguration(cliConf)
      .withCredentials(new AWSStaticCredentialsProvider(credentials))
      .withPathStyleAccessEnabled(true)
      .withEndpointConfiguration(new EndpointConfiguration(s"http://s3.localhost:${authority.port}", awsRegion))
      .build()
  }

  def getKeysInBucket(sdk: AmazonS3, bucket: String): List[String] =
    sdk
      .listObjectsV2(bucket)
      .getObjectSummaries
      .asScala.toList
      .map(_.getKey)

  def doMultiPartUpload(sdk: AmazonS3, bucket: String, file: String, key: String): UploadResult = {
    val upload = TransferManagerBuilder
      .standard()
      .withS3Client(sdk)
      .build()
      .upload(bucket, key, new File(file))

    upload.waitForUploadResult()
  }
} 
Example 88
Source File: RokkuFixtures.scala    From rokku   with Apache License 2.0 5 votes vote down vote up
package com.ing.wbaa.testkit

import java.io.{File, RandomAccessFile}

import com.amazonaws.services.s3.AmazonS3
import com.ing.wbaa.testkit.awssdk.S3SdkHelpers
import org.scalatest.Assertion

import scala.concurrent.{ExecutionContext, Future}
import scala.util.{Random, Try}

trait RokkuFixtures extends S3SdkHelpers {

  
  def withHomeBucket(s3Client: AmazonS3, objects: Seq[String])(testCode: String => Future[Assertion])(implicit exCtx: ExecutionContext): Future[Assertion] = {
    val testBucket = "home"
    Try(s3Client.createBucket(testBucket))
    objects.foreach(obj => s3Client.putObject(testBucket, obj, ""))
    testCode(testBucket).andThen {
      case _ =>
      cleanBucket(s3Client, testBucket)
    }
  }

  private def cleanBucket(s3Client: AmazonS3, bucketName: String) = {
    import scala.collection.JavaConverters._
    s3Client.listObjectsV2(bucketName).getObjectSummaries.asScala.toList.map(_.getKey).foreach { key =>
      s3Client.deleteObject(bucketName, key)
    }
  }
} 
Example 89
Source File: AppConfig.scala    From odsc-east-realish-predictions   with Apache License 2.0 5 votes vote down vote up
package com.twilio.open.odsc.realish.config

import java.io.File

import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.dataformat.yaml.YAMLFactory
import com.fasterxml.jackson.module.scala.DefaultScalaModule

object AppConfig {
  private val mapper = new ObjectMapper(new YAMLFactory)
  mapper.registerModule(DefaultScalaModule)

  def parse(configPath: String): AppConfig = {
    mapper.readValue(new File(configPath), classOf[AppConfig])
  }

}

@SerialVersionUID(100L)
case class AppConfig(
  sparkAppConfig: SparkAppConfig,
  streamingQueryConfig: StreamingQueryConfig
) extends Serializable

@SerialVersionUID(100L)
case class SparkAppConfig(
  appName: String,
  core: Map[String, String]
) extends Serializable

trait KafkaConsumerConfig {
  val topic: String
  val subscriptionType: String
  val conf: Map[String, String]
}

@SerialVersionUID(100L)
case class ConsumerConfig(
  topic: String,
  subscriptionType: String,
  conf: Map[String, String]
) extends KafkaConsumerConfig with Serializable

@SerialVersionUID(100L)
case class StreamingQueryConfig(
  streamName: String,
  triggerInterval: String,
  triggerEnabled: Boolean,
  windowInterval: String,
  watermarkInterval: String
) extends Serializable 
Example 90
Source File: SchemaReader.scala    From darwin   with Apache License 2.0 5 votes vote down vote up
package it.agilelab.darwin.app.mock

import java.io.{File, InputStream}

import org.apache.avro.Schema

object SchemaReader {

  def readFromResources(p: String): Schema = {
    read(getClass.getClassLoader.getResourceAsStream(p))
  }

  def read(f: File): Schema = {
    val parser = new Schema.Parser()
    parser.parse(f)
  }

  def read(s: String): Schema = {
    val parser = new Schema.Parser()
    parser.parse(s)
  }

  def read(is: InputStream): Schema = {
    val parser = new Schema.Parser()
    parser.parse(is)
  }
} 
Example 91
Source File: SbtActorApi.scala    From sbt-actor-api   with MIT License 5 votes vote down vote up
package im.actor

import im.actor.api._
import java.io.File
import sbt._, Keys._

object SbtActorApi extends AutoPlugin {
  val ActorApi = config("actorapi").hide

  val path = SettingKey[File]("actor-schema-path", "The path that contains actor.json file")
  val outputPath = SettingKey[File]("actor-schema-output-path", "The paths where to save the generated *.scala files.")

  lazy val actorapi = TaskKey[Seq[File]]("actorapi", "Compile json schema to scala code")
  lazy val actorapiClean = TaskKey[Seq[File]]("actorapi-clean", "Clean generated code")

  lazy val actorapiMain = SettingKey[String]("actorapi-main", "ActorApi main class.")

  lazy val settings: Seq[Setting[_]] = Seq(
    sourceDirectory in ActorApi <<= (sourceDirectory in Compile),
    path <<= sourceDirectory in ActorApi,
    managedClasspath in ActorApi <<= (classpathTypes, update) map { (ct, report) ⇒
      Classpaths.managedJars(ActorApi, ct, report)
    },
    outputPath <<= sourceManaged in ActorApi,

    actorapi <<= (
      sourceDirectory in ActorApi,
      sourceManaged in ActorApi,
      managedClasspath in ActorApi,
      javaHome,
      streams
    ).map(generate),

    actorapiClean <<= (
      sourceManaged in ActorApi,
      streams
    ).map(clean),

    sourceGenerators in Compile <+= actorapi
  )

  private def compiledFileDir(targetDir: File): File =
    targetDir / "main" / "scala"

  private def compiledFile(targetDir: File, name: String): File =
    compiledFileDir(targetDir) / s"${name}.scala"

  private def clean(targetDir: File, streams: TaskStreams): Seq[File] = {
    val log = streams.log

    log.info("Cleaning actor schema")

    IO.delete(targetDir)

    Seq(targetDir)
  }

  private def generate(srcDir: File, targetDir: File, classpath: Classpath, javaHome: Option[File], streams: TaskStreams): Seq[File] = {
    val log = streams.log

    log.info(f"Generating actor schema for $srcDir%s")

    val input = srcDir / "actor-api"

    if (!input.exists()) {
      log.info(f"$input%s does not exists")
      Nil
    } else {
      val output = compiledFileDir(targetDir)

      val cached = FileFunction.cached(streams.cacheDirectory / "actor-api", FilesInfo.lastModified, FilesInfo.exists) {
        (in: Set[File]) ⇒
          {
            if (!output.exists())
              IO.createDirectory(output)

            val src = input / "actor.json"
            if (src.exists()) {
              val sources = (new Json2Tree(IO.read(src))).convert()

              sources foreach {
                case (name, source) ⇒
                  val targetFile = compiledFile(targetDir, name)

                  log.info(f"Generated ActorApi $targetFile%s")

                  IO.write(targetFile, source)
              }
            } else {
              log.info(f"no actor.json file in $input%s")
            }

            (output ** ("*.scala")).get.toSet
          }
      }
      cached((input ** "actor.json").get.toSet).toSeq
    }
  }
} 
Example 92
Source File: VLFeatSuite.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.utils.external

import java.io.File

import breeze.linalg._
import breeze.numerics.abs
import org.scalatest.FunSuite
import keystoneml.pipelines.Logging
import keystoneml.utils.{ImageUtils, MatrixUtils, TestUtils}

class VLFeatSuite extends FunSuite with Logging {
  test("Load an Image and compute SIFT Features") {
    val testImage = TestUtils.loadTestImage("images/000012.jpg")
    val singleImage = ImageUtils.mapPixels(testImage, _/255.0)
    val grayImage = ImageUtils.toGrayScale(singleImage)

    val extLib = new VLFeat

    val stepSize = 3
    val binSize = 4
    val scales = 4
    val descriptorLength = 128
    val scaleStep = 0

    val rawDescDataShort = extLib.getSIFTs(grayImage.metadata.xDim, grayImage.metadata.yDim,
      stepSize, binSize, scales, scaleStep, grayImage.getSingleChannelAsFloatArray())

    assert(rawDescDataShort.length % descriptorLength == 0, "Resulting SIFTs must be 128-dimensional.")

    val numCols = rawDescDataShort.length/descriptorLength
    val result = new DenseMatrix(descriptorLength, numCols, rawDescDataShort.map(_.toDouble))

    // Compare with the output of running this image through vl_phow with matlab from the enceval package:
    // featpipem_addpaths;
    // im = im2single(imread('images/000012.jpg'));
    // featextr = featpipem.features.PhowExtractor();
    // featextr.step = 3;
    // [frames feats] = featextr.compute(im);
    // csvwrite('images/feats128.csv', feats)

    val testFeatures = csvread(new File(TestUtils.getTestResourceFileName("images/feats128.csv")))

    val diff = result - testFeatures

    // Because of subtle differences in the way image smoothing works in the VLFeat C library and the VLFeat matlab
    // library (vl_imsmooth_f vs. _vl_imsmooth_f), these two matrices will not be exactly the same.
    // Instead, we check that 99.5% of the matrix entries are off by at most 1.
    val absdiff = abs(diff).toDenseVector

    assert(absdiff.findAll(_ > 1.0).length.toDouble < 0.005*absdiff.length,
      "Fewer than 0.05% of entries may be different by more than 1.")
  }
} 
Example 93
Source File: EncEvalSuite.scala    From keystone   with Apache License 2.0 5 votes vote down vote up
package keystoneml.utils.external

import java.io.File

import breeze.linalg._
import breeze.stats.distributions.Gaussian
import keystoneml.nodes.learning.GaussianMixtureModel
import keystoneml.nodes.learning.external.GaussianMixtureModelEstimator
import org.scalatest.FunSuite
import keystoneml.pipelines.Logging
import keystoneml.utils.{Stats, TestUtils}

class EncEvalSuite extends FunSuite with Logging {

  test("Load SIFT Descriptors and compute Fisher Vector Features") {

    val siftDescriptor = csvread(new File(TestUtils.getTestResourceFileName("images/feats.csv")))

    val gmmMeans = TestUtils.getTestResourceFileName("images/voc_codebook/means.csv")
    val gmmVars = TestUtils.getTestResourceFileName("images/voc_codebook/variances.csv")
    val gmmWeights = TestUtils.getTestResourceFileName("images/voc_codebook/priors")

    val gmm = GaussianMixtureModel.load(gmmMeans, gmmVars, gmmWeights)

    val nCenters = gmm.means.cols
    val nDim = gmm.means.rows

    val extLib = new EncEval

    val fisherVector = extLib.calcAndGetFVs(
      gmm.means.toArray.map(_.toFloat),
      nCenters,
      nDim,
      gmm.variances.toArray.map(_.toFloat),
      gmm.weights.toArray.map(_.toFloat),
      siftDescriptor.toArray.map(_.toFloat))

    log.info(s"Fisher Vector is ${fisherVector.sum}")
    assert(Stats.aboutEq(fisherVector.sum, 40.109097, 1e-4), "SUM of Fisher Vectors must match expected sum.")

  }

  test("Compute a GMM from scala") {
    val nsamps = 10000

    // Generate two gaussians.
    val x = Gaussian(-1.0, 0.5).samples.take(nsamps).toArray
    val y = Gaussian(5.0, 1.0).samples.take(nsamps).toArray

    val z = shuffle(x ++ y).map(x => DenseVector(x))

    // Compute a 1-d GMM.
    val extLib = new EncEval
    val gmm = new GaussianMixtureModelEstimator(2).fit(z)

    logInfo(s"GMM means: ${gmm.means.toArray.mkString(",")}")
    logInfo(s"GMM vars: ${gmm.variances.toArray.mkString(",")}")
    logInfo(s"GMM weights: ${gmm.weights.toArray.mkString(",")}")

    // The results should be close to the distribution we set up.
    assert(Stats.aboutEq(min(gmm.means), -1.0, 1e-1), "Smallest mean should be close to -1.0")
    assert(Stats.aboutEq(max(gmm.means), 5.0, 1e-1), "Largest mean should be close to 1.0")
    assert(Stats.aboutEq(math.sqrt(min(gmm.variances)), 0.5, 1e-1), "Smallest SD should be close to 0.25")
    assert(Stats.aboutEq(math.sqrt(max(gmm.variances)), 1.0, 1e-1), "Largest SD should be close to 5.0")
  }
} 
Example 94
Source File: DefaultBodyWritables.scala    From play-ws   with Apache License 2.0 5 votes vote down vote up
package play.api.libs.ws

import java.io.File
import java.nio.ByteBuffer
import java.util.function.Supplier

import akka.stream.scaladsl.StreamConverters.fromInputStream
import akka.stream.scaladsl.FileIO
import akka.stream.scaladsl.Source
import akka.util.ByteString

import scala.compat.java8.FunctionConverters.asScalaFromSupplier


  implicit val writeableOf_urlEncodedForm: BodyWritable[Map[String, Seq[String]]] = {
    import java.net.URLEncoder
    BodyWritable(
      formData =>
        InMemoryBody(
          ByteString.fromString(
            formData.flatMap(item => item._2.map(c => s"${item._1}=${URLEncoder.encode(c, "UTF-8")}")).mkString("&")
          )
        ),
      "application/x-www-form-urlencoded"
    )
  }

  implicit val writeableOf_urlEncodedSimpleForm: BodyWritable[Map[String, String]] = {
    writeableOf_urlEncodedForm.map[Map[String, String]](_.map(kv => kv._1 -> Seq(kv._2)))
  }

}

object DefaultBodyWritables extends DefaultBodyWritables 
Example 95
Source File: TestZooKeeper.scala    From mango   with Apache License 2.0 5 votes vote down vote up
package com.kakao.mango.zk

import java.io.{File, IOException}
import java.net.{ServerSocket, Socket}
import java.util.concurrent.TimeUnit

import com.kakao.mango.concurrent.NamedExecutors
import com.kakao.mango.logging.{LogLevelOverrider, Logging}
import com.kakao.shaded.guava.io.Files
import org.apache.zookeeper.server.persistence.FileTxnSnapLog
import org.apache.zookeeper.server.{ServerCnxnFactory, ServerConfig, ZooKeeperServer}
import org.scalatest.{BeforeAndAfterAll, Suite}

trait TestZooKeeper extends BeforeAndAfterAll with Logging { this: Suite =>

  
  val zkServerPort = 2181
  val zkServerExecutor = NamedExecutors.single("zookeeper-server")
  var zk: ZooKeeperConnection = _

  override protected def beforeAll(): Unit = {
    logger.info("Launching a standalone ZooKeeper server for testing...")

    try {
      val socket = new ServerSocket(zkServerPort)
      socket.close()
    } catch {
      case e: IOException =>
        throw new RuntimeException(s"TCP port $zkServerPort is required for tests but not available")
    }

    zkServerExecutor.submit {
      LogLevelOverrider.error("org.apache.zookeeper")

      val datadir = Files.createTempDir().getAbsolutePath
      val config = new ServerConfig
      config.parse(Array(zkServerPort.toString, datadir))

      val zkServer = new ZooKeeperServer
      zkServer.setTxnLogFactory(new FileTxnSnapLog(new File(datadir), new File(datadir)))
      zkServer.setTickTime(6000)
      zkServer.setMinSessionTimeout(6000)
      zkServer.setMaxSessionTimeout(6000)

      val cnxnFactory = ServerCnxnFactory.createFactory

      try {
        cnxnFactory.configure(config.getClientPortAddress, 60)
        cnxnFactory.startup(zkServer)
        cnxnFactory.join()
      } catch {
        case _: InterruptedException =>
          logger.info("ZooKeeper server interrupted; shutting down...")
          cnxnFactory.shutdown()
          cnxnFactory.join()
          if (zkServer.isRunning) {
            zkServer.shutdown()
          }
          logger.info("ZooKeeper server stopped")
      }
    }

    var connected = false
    while (!connected) {
      logger.info("Waiting for ZooKeeper server to launch...")
      try {
        val socket = new Socket("localhost", zkServerPort)
        logger.info("ZooKeeper server is available")
        socket.close()

        zk = ZooKeeperConnection(s"localhost:$zkServerPort")
        connected = true
      } catch {
        case _: IOException => Thread.sleep(1000) // retry
      }
    }

    super.beforeAll()
  }

  override protected def afterAll(): Unit = {
    try super.afterAll()
    finally {
      zk.close()
      logger.info("Interrupting ZooKeeper server...")
      zkServerExecutor.shutdownNow()
      while (!zkServerExecutor.awaitTermination(1, TimeUnit.SECONDS)) {
        logger.info("awaiting ZooKeeper server termination...")
      }
      logger.info("ZooKeeper server terminated")
    }
  }
} 
Example 96
Source File: FileSystems.scala    From mango   with Apache License 2.0 5 votes vote down vote up
package com.kakao.mango.io

import java.io.File
import java.nio.file._

import scala.collection.JavaConversions._
import scala.collection.mutable


  def entries(dir: Path, recursive: Boolean = true): Stream[Path] = {
    val maxDepth = if (recursive) Int.MaxValue else 1
    val stack = mutable.Stack[(Path, Int)]((dir, maxDepth))

    new Iterator[Iterator[Path]] {
      override def hasNext: Boolean = stack.nonEmpty
      override def next(): Iterator[Path] = {
        val (dir, depth) = stack.pop()
        Files.newDirectoryStream(dir).iterator().flatMap {
          case entry if Files.isDirectory(entry) =>
            if (depth > 1) stack.push((entry, depth - 1))
            Nil
          case entry => Some(entry)
        }
      }
    }.toStream.flatten
  }

  def entries(dir: File): Stream[File] = entries(dir.toPath, recursive = true).map(_.toFile)
  def entries(dir: File, recursive: Boolean): Stream[File] = entries(dir.toPath, recursive).map(_.toFile)
  def entries(dir: String): Stream[Path] = entries(Paths.get(dir), recursive = true)
  def entries(dir: String, recursive: Boolean): Stream[Path] = entries(Paths.get(dir), recursive)

} 
Example 97
Source File: QueryPartitionSuite.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive

import java.io.File
import java.sql.Timestamp

import com.google.common.io.Files
import org.apache.hadoop.fs.FileSystem

import org.apache.spark.internal.config._
import org.apache.spark.sql._
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.util.Utils

class QueryPartitionSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
  import spark.implicits._

  private def queryWhenPathNotExist(): Unit = {
    withTempView("testData") {
      withTable("table_with_partition", "createAndInsertTest") {
        withTempDir { tmpDir =>
          val testData = sparkContext.parallelize(
            (1 to 10).map(i => TestData(i, i.toString))).toDF()
          testData.createOrReplaceTempView("testData")

          // create the table for test
          sql(s"CREATE TABLE table_with_partition(key int,value string) " +
              s"PARTITIONED by (ds string) location '${tmpDir.toURI}' ")
          sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='1') " +
              "SELECT key,value FROM testData")
          sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='2') " +
              "SELECT key,value FROM testData")
          sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='3') " +
              "SELECT key,value FROM testData")
          sql("INSERT OVERWRITE TABLE table_with_partition  partition (ds='4') " +
              "SELECT key,value FROM testData")

          // test for the exist path
          checkAnswer(sql("select key,value from table_with_partition"),
            testData.union(testData).union(testData).union(testData))

          // delete the path of one partition
          tmpDir.listFiles
              .find { f => f.isDirectory && f.getName().startsWith("ds=") }
              .foreach { f => Utils.deleteRecursively(f) }

          // test for after delete the path
          checkAnswer(sql("select key,value from table_with_partition"),
            testData.union(testData).union(testData))
        }
      }
    }
  }

  test("SPARK-5068: query data when path doesn't exist") {
    withSQLConf(SQLConf.HIVE_VERIFY_PARTITION_PATH.key -> "true") {
      queryWhenPathNotExist()
    }
  }

  test("Replace spark.sql.hive.verifyPartitionPath by spark.files.ignoreMissingFiles") {
    withSQLConf(SQLConf.HIVE_VERIFY_PARTITION_PATH.key -> "false") {
      sparkContext.conf.set(IGNORE_MISSING_FILES.key, "true")
      queryWhenPathNotExist()
    }
  }

  test("SPARK-21739: Cast expression should initialize timezoneId") {
    withTable("table_with_timestamp_partition") {
      sql("CREATE TABLE table_with_timestamp_partition(value int) PARTITIONED BY (ts TIMESTAMP)")
      sql("INSERT OVERWRITE TABLE table_with_timestamp_partition " +
        "PARTITION (ts = '2010-01-01 00:00:00.000') VALUES (1)")

      // test for Cast expression in TableReader
      checkAnswer(sql("SELECT * FROM table_with_timestamp_partition"),
        Seq(Row(1, Timestamp.valueOf("2010-01-01 00:00:00.000"))))

      // test for Cast expression in HiveTableScanExec
      checkAnswer(sql("SELECT value FROM table_with_timestamp_partition " +
        "WHERE ts = '2010-01-01 00:00:00.000'"), Row(1))
    }
  }
} 
Example 98
Source File: HiveQueryFileTest.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive.execution

import java.io.File

import org.apache.spark.sql.catalyst.util._


  def whiteList: Seq[String] = ".*" :: Nil

  def testCases: Seq[(String, File)]

  val runAll: Boolean =
    !(System.getProperty("spark.hive.alltests") == null) ||
    runOnlyDirectories.nonEmpty ||
    skipDirectories.nonEmpty

  val whiteListProperty: String = "spark.hive.whitelist"
  // Allow the whiteList to be overridden by a system property
  val realWhiteList: Seq[String] =
    Option(System.getProperty(whiteListProperty)).map(_.split(",").toSeq).getOrElse(whiteList)

  // Go through all the test cases and add them to scala test.
  testCases.sorted.foreach {
    case (testCaseName, testCaseFile) =>
      if (blackList.map(_.r.pattern.matcher(testCaseName).matches()).reduceLeft(_||_)) {
        logDebug(s"Blacklisted test skipped $testCaseName")
      } else if (realWhiteList.map(_.r.pattern.matcher(testCaseName).matches()).reduceLeft(_||_) ||
        runAll) {
        // Build a test case and submit it to scala test framework...
        val queriesString = fileToString(testCaseFile)
        createQueryTest(testCaseName, queriesString, reset = true, tryWithoutResettingFirst = true)
      } else {
        // Only output warnings for the built in whitelist as this clutters the output when the user
        // trying to execute a single test from the commandline.
        if (System.getProperty(whiteListProperty) == null && !runAll) {
          ignore(testCaseName) {}
        }
      }
  }
} 
Example 99
Source File: HiveClientBuilder.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.hive.client

import java.io.File

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.util.VersionInfo

import org.apache.spark.SparkConf
import org.apache.spark.util.Utils

private[client] object HiveClientBuilder {
  // In order to speed up test execution during development or in Jenkins, you can specify the path
  // of an existing Ivy cache:
  private val ivyPath: Option[String] = {
    sys.env.get("SPARK_VERSIONS_SUITE_IVY_PATH").orElse(
      Some(new File(sys.props("java.io.tmpdir"), "hive-ivy-cache").getAbsolutePath))
  }

  private def buildConf(extraConf: Map[String, String]) = {
    lazy val warehousePath = Utils.createTempDir()
    lazy val metastorePath = Utils.createTempDir()
    metastorePath.delete()
    extraConf ++ Map(
      "javax.jdo.option.ConnectionURL" -> s"jdbc:derby:;databaseName=$metastorePath;create=true",
      "hive.metastore.warehouse.dir" -> warehousePath.toString)
  }

  // for testing only
  def buildClient(
      version: String,
      hadoopConf: Configuration,
      extraConf: Map[String, String] = Map.empty,
      sharesHadoopClasses: Boolean = true): HiveClient = {
    IsolatedClientLoader.forVersion(
      hiveMetastoreVersion = version,
      hadoopVersion = VersionInfo.getVersion,
      sparkConf = new SparkConf(),
      hadoopConf = hadoopConf,
      config = buildConf(extraConf),
      ivyPath = ivyPath,
      sharesHadoopClasses = sharesHadoopClasses).createClient()
  }
} 
Example 100
Source File: EvalPythonExec.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.python

import java.io.File

import scala.collection.mutable.ArrayBuffer

import org.apache.spark.{SparkEnv, TaskContext}
import org.apache.spark.api.python.ChainedPythonFunctions
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.execution.SparkPlan
import org.apache.spark.sql.types.{DataType, StructField, StructType}
import org.apache.spark.util.Utils



abstract class EvalPythonExec(udfs: Seq[PythonUDF], output: Seq[Attribute], child: SparkPlan)
  extends SparkPlan {

  def children: Seq[SparkPlan] = child :: Nil

  override def producedAttributes: AttributeSet = AttributeSet(output.drop(child.output.length))

  private def collectFunctions(udf: PythonUDF): (ChainedPythonFunctions, Seq[Expression]) = {
    udf.children match {
      case Seq(u: PythonUDF) =>
        val (chained, children) = collectFunctions(u)
        (ChainedPythonFunctions(chained.funcs ++ Seq(udf.func)), children)
      case children =>
        // There should not be any other UDFs, or the children can't be evaluated directly.
        assert(children.forall(_.find(_.isInstanceOf[PythonUDF]).isEmpty))
        (ChainedPythonFunctions(Seq(udf.func)), udf.children)
    }
  }

  protected def evaluate(
      funcs: Seq[ChainedPythonFunctions],
      argOffsets: Array[Array[Int]],
      iter: Iterator[InternalRow],
      schema: StructType,
      context: TaskContext): Iterator[InternalRow]

  protected override def doExecute(): RDD[InternalRow] = {
    val inputRDD = child.execute().map(_.copy())

    inputRDD.mapPartitions { iter =>
      val context = TaskContext.get()

      // The queue used to buffer input rows so we can drain it to
      // combine input with output from Python.
      val queue = HybridRowQueue(context.taskMemoryManager(),
        new File(Utils.getLocalDir(SparkEnv.get.conf)), child.output.length)
      context.addTaskCompletionListener[Unit] { ctx =>
        queue.close()
      }

      val (pyFuncs, inputs) = udfs.map(collectFunctions).unzip

      // flatten all the arguments
      val allInputs = new ArrayBuffer[Expression]
      val dataTypes = new ArrayBuffer[DataType]
      val argOffsets = inputs.map { input =>
        input.map { e =>
          if (allInputs.exists(_.semanticEquals(e))) {
            allInputs.indexWhere(_.semanticEquals(e))
          } else {
            allInputs += e
            dataTypes += e.dataType
            allInputs.length - 1
          }
        }.toArray
      }.toArray
      val projection = newMutableProjection(allInputs, child.output)
      val schema = StructType(dataTypes.zipWithIndex.map { case (dt, i) =>
        StructField(s"_$i", dt)
      })

      // Add rows to queue to join later with the result.
      val projectedRowIter = iter.map { inputRow =>
        queue.add(inputRow.asInstanceOf[UnsafeRow])
        projection(inputRow)
      }

      val outputRowIterator = evaluate(
        pyFuncs, argOffsets, projectedRowIter, schema, context)

      val joined = new JoinedRow
      val resultProj = UnsafeProjection.create(output, output)

      outputRowIterator.map { outputRow =>
        resultProj(joined(queue.remove(), outputRow))
      }
    }
  }
} 
Example 101
Source File: resources.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.command

import java.io.File
import java.net.URI

import org.apache.hadoop.fs.Path

import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}


case class ListJarsCommand(jars: Seq[String] = Seq.empty[String]) extends RunnableCommand {
  override val output: Seq[Attribute] = {
    AttributeReference("Results", StringType, nullable = false)() :: Nil
  }
  override def run(sparkSession: SparkSession): Seq[Row] = {
    val jarList = sparkSession.sparkContext.listJars()
    if (jars.nonEmpty) {
      for {
        jarName <- jars.map(f => new Path(f).getName)
        jarPath <- jarList if jarPath.contains(jarName)
      } yield Row(jarPath)
    } else {
      jarList.map(Row(_))
    }
  }
} 
Example 102
Source File: OrcTest.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources.orc

import java.io.File

import scala.reflect.ClassTag
import scala.reflect.runtime.universe.TypeTag

import org.scalatest.BeforeAndAfterAll

import org.apache.spark.sql._
import org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION
import org.apache.spark.sql.test.SQLTestUtils


  protected def withOrcTable[T <: Product: ClassTag: TypeTag]
      (data: Seq[T], tableName: String)
      (f: => Unit): Unit = {
    withOrcDataFrame(data) { df =>
      df.createOrReplaceTempView(tableName)
      withTempView(tableName)(f)
    }
  }

  protected def makeOrcFile[T <: Product: ClassTag: TypeTag](
      data: Seq[T], path: File): Unit = {
    data.toDF().write.mode(SaveMode.Overwrite).orc(path.getCanonicalPath)
  }

  protected def makeOrcFile[T <: Product: ClassTag: TypeTag](
      df: DataFrame, path: File): Unit = {
    df.write.mode(SaveMode.Overwrite).orc(path.getCanonicalPath)
  }

  protected def checkPredicatePushDown(df: DataFrame, numRows: Int, predicate: String): Unit = {
    withTempPath { file =>
      // It needs to repartition data so that we can have several ORC files
      // in order to skip stripes in ORC.
      df.repartition(numRows).write.orc(file.getCanonicalPath)
      val actual = stripSparkFilter(spark.read.orc(file.getCanonicalPath).where(predicate)).count()
      assert(actual < numRows)
    }
  }
} 
Example 103
Source File: HadoopFsRelationSuite.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.datasources

import java.io.{File, FilenameFilter}

import org.apache.spark.sql.QueryTest
import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, SortMergeJoinExec}
import org.apache.spark.sql.test.SharedSQLContext

class HadoopFsRelationSuite extends QueryTest with SharedSQLContext {

  test("sizeInBytes should be the total size of all files") {
    withTempDir{ dir =>
      dir.delete()
      spark.range(1000).write.parquet(dir.toString)
      // ignore hidden files
      val allFiles = dir.listFiles(new FilenameFilter {
        override def accept(dir: File, name: String): Boolean = {
          !name.startsWith(".") && !name.startsWith("_")
        }
      })
      val totalSize = allFiles.map(_.length()).sum
      val df = spark.read.parquet(dir.toString)
      assert(df.queryExecution.logical.stats.sizeInBytes === BigInt(totalSize))
    }
  }

  test("SPARK-22790: spark.sql.sources.compressionFactor takes effect") {
    import testImplicits._
    Seq(1.0, 0.5).foreach { compressionFactor =>
      withSQLConf("spark.sql.sources.fileCompressionFactor" -> compressionFactor.toString,
        "spark.sql.autoBroadcastJoinThreshold" -> "400") {
        withTempPath { workDir =>
          // the file size is 740 bytes
          val workDirPath = workDir.getAbsolutePath
          val data1 = Seq(100, 200, 300, 400).toDF("count")
          data1.write.parquet(workDirPath + "/data1")
          val df1FromFile = spark.read.parquet(workDirPath + "/data1")
          val data2 = Seq(100, 200, 300, 400).toDF("count")
          data2.write.parquet(workDirPath + "/data2")
          val df2FromFile = spark.read.parquet(workDirPath + "/data2")
          val joinedDF = df1FromFile.join(df2FromFile, Seq("count"))
          if (compressionFactor == 0.5) {
            val bJoinExec = joinedDF.queryExecution.executedPlan.collect {
              case bJoin: BroadcastHashJoinExec => bJoin
            }
            assert(bJoinExec.nonEmpty)
            val smJoinExec = joinedDF.queryExecution.executedPlan.collect {
              case smJoin: SortMergeJoinExec => smJoin
            }
            assert(smJoinExec.isEmpty)
          } else {
            // compressionFactor is 1.0
            val bJoinExec = joinedDF.queryExecution.executedPlan.collect {
              case bJoin: BroadcastHashJoinExec => bJoin
            }
            assert(bJoinExec.isEmpty)
            val smJoinExec = joinedDF.queryExecution.executedPlan.collect {
              case smJoin: SortMergeJoinExec => smJoin
            }
            assert(smJoinExec.nonEmpty)
          }
        }
      }
    }
  }
} 
Example 104
Source File: StreamMetadataSuite.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql.execution.streaming

import java.io.File
import java.util.UUID

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path

import org.apache.spark.sql.streaming.StreamTest

class StreamMetadataSuite extends StreamTest {

  test("writing and reading") {
    withTempDir { dir =>
      val id = UUID.randomUUID.toString
      val metadata = StreamMetadata(id)
      val file = new Path(new File(dir, "test").toString)
      StreamMetadata.write(metadata, file, hadoopConf)
      val readMetadata = StreamMetadata.read(file, hadoopConf)
      assert(readMetadata.nonEmpty)
      assert(readMetadata.get.id === id)
    }
  }

  test("read Spark 2.1.0 format") {
    // query-metadata-logs-version-2.1.0.txt has the execution metadata generated by Spark 2.1.0
    assert(
      readForResource("query-metadata-logs-version-2.1.0.txt") ===
      StreamMetadata("d366a8bf-db79-42ca-b5a4-d9ca0a11d63e"))
  }

  private def readForResource(fileName: String): StreamMetadata = {
    val input = getClass.getResource(s"/structured-streaming/$fileName")
    StreamMetadata.read(new Path(input.toString), hadoopConf).get
  }

  private val hadoopConf = new Configuration()
} 
Example 105
Source File: BarChartPainter.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.painter

import java.io.File
import java.util.Scanner

import org.jfree.chart.{ChartFactory, ChartUtils}
import org.jfree.chart.plot.PlotOrientation
import org.jfree.data.category.DefaultCategoryDataset

import org.apache.spark.util.Utils

class BarChartPainter(dataPath: String, picturePath: String)
  extends Painter(dataPath, picturePath) {

  def createDataset(): DefaultCategoryDataset = {
    fw.flush()
    fw.close()
    val dataset = new DefaultCategoryDataset
    val scaner = new Scanner(new File(dataPath))
    while (scaner.hasNext()) {
      val cols = scaner.next().split(",")
      dataset.addValue(Utils.byteStringAsMb(cols(1) + "b"), "peak", cols(0))
      dataset.addValue(Utils.byteStringAsMb(cols(2) + "b"), "majority", cols(0))
    }
    dataset
  }

  def paint(
      width: Int,
      height: Int,
      chartTitle: String,
      categoryAxisLabel: String,
      valueAxisLabel: String,
      yLB: Double,
      yUB: Double): Unit = {
    val barChart = ChartFactory.createBarChart(
      chartTitle,
      categoryAxisLabel,
      valueAxisLabel,
      createDataset,
      PlotOrientation.VERTICAL,
      true,
      false,
      false)
    barChart.getCategoryPlot.getRangeAxis.setRange(yLB, yUB)
    ChartUtils.saveChartAsJPEG(new File(picturePath), barChart, width, height)
  }

  override def paint(
      width: Int,
      height: Int,
      chartTitle: String,
      categoryAxisLabel: String,
      valueAxisLabel: String): Unit = {}
} 
Example 106
Source File: Painter.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.painter

import java.awt.Font
import java.io.{File, FileWriter}

import org.jfree.chart.{ChartFactory, StandardChartTheme}
import org.jfree.data.general.Dataset

abstract class Painter(dataPath: String, picturePath: String) {
  initialize()
  var fw: FileWriter = _

  def initialize(): Unit = {
    val dataFile = new File(dataPath)
    if (dataFile.exists()) {
      dataFile.delete()
    }
    fw = new FileWriter(dataPath, true)
    val standardChartTheme = new StandardChartTheme("CN")
    standardChartTheme.setExtraLargeFont(new Font("Monospaced", Font.BOLD, 20))
    standardChartTheme.setRegularFont(new Font("Monospaced", Font.PLAIN, 15))
    standardChartTheme.setLargeFont(new Font("Monospaced", Font.PLAIN, 15))
    ChartFactory.setChartTheme(standardChartTheme)
  }

  def addPoint(xAxis: Any, yAxis: Any): Unit = {
    fw.write(s"${xAxis},${yAxis}\n")
  }

  def addPoint(xAxis: Any, yAxis: Any, zAxis: Any): Unit = {
    fw.write(s"${xAxis},${yAxis},${zAxis}\n")
  }

  def createDataset(): Dataset

  def paint(
      width: Int,
      height: Int,
      chartTitle: String,
      categoryAxisLabel: String,
      valueAxisLabel: String): Unit
} 
Example 107
Source File: TimeSeriesChartPainter.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.painter

import java.io.File
import java.util.Scanner

import org.jfree.chart.{ChartFactory, ChartUtils}
import org.jfree.data.time.{FixedMillisecond, TimeSeries, TimeSeriesCollection}
import org.jfree.data.xy.XYDataset

class TimeSeriesChartPainter(dataPath: String, picturePath: String)
  extends Painter(dataPath, picturePath) {

  def createDataset(): XYDataset = {
    fw.flush()
    fw.close()
    val dataset = new TimeSeriesCollection
    val timeSeries = new TimeSeries("default")
    val scaner = new Scanner(new File(dataPath))
    while (scaner.hasNext()) {
      val cols = scaner.next().split(",")
      timeSeries.addOrUpdate(new FixedMillisecond(cols(1).toLong), cols(0).toLong)
    }
    dataset.addSeries(timeSeries)
    dataset
  }

  def paint(
      width: Int,
      height: Int,
      chartTitle: String,
      categoryAxisLabel: String,
      valueAxisLabel: String): Unit = {
    val lineChart = ChartFactory.createTimeSeriesChart(
      chartTitle,
      categoryAxisLabel,
      valueAxisLabel,
      createDataset,
      false,
      false,
      false)
    ChartUtils.saveChartAsJPEG(new File(picturePath), lineChart, width, height)
  }
} 
Example 108
Source File: LineChartPainter.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.painter

import java.io.File
import java.util.Scanner

import org.jfree.chart.{ChartFactory, ChartUtils}
import org.jfree.chart.plot.PlotOrientation
import org.jfree.data.category.DefaultCategoryDataset

class LineChartPainter(dataPath: String, picturePath: String)
  extends Painter(dataPath, picturePath) {

  def createDataset(): DefaultCategoryDataset = {
    fw.flush()
    fw.close()
    val dataset = new DefaultCategoryDataset
    val scaner = new Scanner(new File(dataPath))
    while (scaner.hasNext()) {
      val cols = scaner.next().split(",")
      dataset.addValue(cols(0).toLong, "default", cols(1))
    }
    dataset
  }

  def paint(
      width: Int,
      height: Int,
      chartTitle: String,
      categoryAxisLabel: String,
      valueAxisLabel: String): Unit = {
    val lineChart = ChartFactory.createLineChart(
      chartTitle,
      categoryAxisLabel,
      valueAxisLabel,
      createDataset,
      PlotOrientation.VERTICAL,
      false,
      false,
      false)
    ChartUtils.saveChartAsJPEG(new File(picturePath), lineChart, width, height)
  }
} 
Example 109
Source File: ExecutorNumMonitor.scala    From XSQL   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.monitor.executor

import java.io.File
import java.util.Date

import scala.xml._

import org.apache.spark.alarm.{AlertMessage, EmailAlarm, HtmlMessage}
import org.apache.spark.monitor.{Monitor, MonitorItem}
import org.apache.spark.monitor.MonitorItem.MonitorItem
import org.apache.spark.painter.TimeSeriesChartPainter
import org.apache.spark.scheduler._
import org.apache.spark.status.ExecutorSummaryWrapper

class ExecutorNumMonitor extends ExecutorMonitor {
  override val item: MonitorItem = MonitorItem.EXECUTOR_NUM_NOTIFIER
  lazy val dataPath = s"/tmp/${item}-${conf.get("spark.app.id")}.csv"
  lazy val picturePath = s"/tmp/${item}-${conf.get("spark.app.id")}.jpg"
  lazy val eventMinInterval =
    conf.getTimeAsMs(s"${Monitor.PREFIX}.${item.toString.toLowerCase}.granularity", "60s")
  var lastPointTime: Long = new Date().getTime
  var recentEventTime: Long = new Date().getTime

  lazy private val painter = new TimeSeriesChartPainter(dataPath, picturePath)

  def executorNum(): Long = {
    kvStore.count(classOf[ExecutorSummaryWrapper], "active", true)
  }

  def addPoint(executorNum: Long, time: Long): Unit = {
    painter.addPoint(executorNum, recentEventTime)
  }
  // scalastyle:off
  override def watchOut(event: SparkListenerEvent): Option[AlertMessage] = {
    event match {
      case env: SparkListenerExecutorAdded =>
        // try to coarse num change in 60s into one point, so that we can keep graph clean and readable
        if (env.time - lastPointTime > eventMinInterval) {
          addPoint(executorNum, recentEventTime)
          addPoint(executorNum, env.time)
          lastPointTime = env.time
        }
        recentEventTime = env.time
        Option.empty
      case env: SparkListenerExecutorRemoved =>
        if (env.time - lastPointTime > eventMinInterval) {
          addPoint(executorNum, recentEventTime)
          addPoint(executorNum, env.time)
          lastPointTime = env.time
        }
        recentEventTime = env.time
        Option.empty
      case e: SparkListenerApplicationEnd =>
        addPoint(executorNum, recentEventTime)
        addPoint(executorNum, new Date().getTime)
        painter.paint(600, 400, "executor num curve", "datetime", "executor num")
        if (EmailAlarm.get().isDefined) {
          val pic = EmailAlarm.get().get.embed(new File(picturePath))
          val a = <h2>动态调度情况:</h2>
            <img src={"cid:"+pic}></img>
            <br/>
          Option(new HtmlMessage(title = item, content = a.mkString))
        } else {
          Option.empty
        }
    }
  }
  // scalastyle:on
} 
Example 110
Source File: GlobalSapSQLContext.scala    From HANAVora-Extensions   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import java.io.File

import com.sap.spark.util.TestUtils
import com.sap.spark.{GlobalSparkContext, WithSQLContext}
import org.apache.spark.SparkContext
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{BoundReference, Cast}
import org.apache.spark.unsafe.types._
import org.apache.spark.sql.types._
import org.scalatest.Suite

import scala.io.Source

trait GlobalSapSQLContext extends GlobalSparkContext with WithSQLContext {
  self: Suite =>

  override implicit def sqlContext: SQLContext = GlobalSapSQLContext._sqlc

  override protected def setUpSQLContext(): Unit =
    GlobalSapSQLContext.init(sc)

  override protected def tearDownSQLContext(): Unit =
    GlobalSapSQLContext.reset()

  def getDataFrameFromSourceFile(sparkSchema: StructType, path: File): DataFrame = {
    val conversions = sparkSchema.toSeq.zipWithIndex.map({
      case (field, index) =>
        Cast(BoundReference(index, StringType, nullable = true), field.dataType)
    })
    val data = Source.fromFile(path)
      .getLines()
      .map({ line =>
      val stringRow = InternalRow.fromSeq(line.split(",", -1).map(UTF8String.fromString))
      Row.fromSeq(conversions.map({ c => c.eval(stringRow) }))
    })
    val rdd = sc.parallelize(data.toSeq, numberOfSparkWorkers)
    sqlContext.createDataFrame(rdd, sparkSchema)
  }
}

object GlobalSapSQLContext {

  private var _sqlc: SQLContext = _

  private def init(sc: SparkContext): Unit =
    if (_sqlc == null) {
      _sqlc = TestUtils.newSQLContext(sc)
    }

  private def reset(): Unit = {
    if (_sqlc != null) {
      _sqlc.catalog.unregisterAllTables()
    }
  }

} 
Example 111
Source File: DefaultFileWatchService.scala    From play-file-watch   with Apache License 2.0 5 votes vote down vote up
package play.dev.filewatch

import java.io.File
import java.nio.file.FileSystems

import io.methvin.watcher.DirectoryChangeEvent
import io.methvin.watcher.DirectoryChangeListener
import io.methvin.watcher.DirectoryWatcher
import io.methvin.watchservice.MacOSXListeningWatchService

import scala.collection.JavaConverters._
import scala.util.control.NonFatal


class DefaultFileWatchService(logger: LoggerProxy, isMac: Boolean) extends FileWatchService {

  def this(logger: LoggerProxy) = this(logger, false)

  def watch(filesToWatch: Seq[File], onChange: () => Unit) = {
    val dirsToWatch = filesToWatch.filter { file =>
      if (file.isDirectory) {
        true
      } else if (file.isFile) {
        logger.warn("An attempt has been made to watch the file: " + file.getCanonicalPath)
        logger.warn("DefaultFileWatchService only supports watching directories. The file will not be watched.")
        false
      } else false
    }

    val watchService = if (isMac) new MacOSXListeningWatchService() else FileSystems.getDefault.newWatchService()
    val directoryWatcher =
      DirectoryWatcher
        .builder()
        .paths(dirsToWatch.map(_.toPath).asJava)
        .listener(new DirectoryChangeListener {
          override def onEvent(event: DirectoryChangeEvent): Unit = onChange()
        })
        .watchService(watchService)
        .build()

    val thread = new Thread(
      new Runnable {
        override def run(): Unit = {
          try {
            directoryWatcher.watch()
          } catch {
            case NonFatal(_) => // Do nothing, this means the watch service has been closed, or we've been interrupted.
          }
        }
      },
      "play-watch-service"
    )
    thread.setDaemon(true)
    thread.start()

    new FileWatcher {
      override def stop(): Unit = directoryWatcher.close()
    }
  }
} 
Example 112
Source File: PollingFileWatchService.scala    From play-file-watch   with Apache License 2.0 5 votes vote down vote up
package play.dev.filewatch

import java.io.File

import better.files.{ File => ScalaFile, _ }

import annotation.tailrec


object SourceModificationWatch {
  type PathFinder = () => Iterator[ScalaFile]

  private def listFiles(sourcesFinder: PathFinder): Set[ScalaFile] = sourcesFinder().toSet

  private def findLastModifiedTime(files: Set[ScalaFile]): Long = {
    if (files.nonEmpty) files.maxBy(_.lastModifiedTime).lastModifiedTime.toEpochMilli
    else 0L
  }

  @tailrec def watch(sourcesFinder: PathFinder, pollDelayMillis: Int, state: WatchState)(
      terminationCondition: => Boolean
  ): (Boolean, WatchState) = {
    import state._

    val filesToWatch = listFiles(sourcesFinder)

    val sourceFilesPath: Set[String] = filesToWatch.map(_.toJava.getCanonicalPath)
    val lastModifiedTime             = findLastModifiedTime(filesToWatch)

    val sourcesModified =
      lastModifiedTime > lastCallbackCallTime ||
        previousFiles != sourceFilesPath

    val (triggered, newCallbackCallTime) =
      if (sourcesModified)
        (false, System.currentTimeMillis)
      else
        (awaitingQuietPeriod, lastCallbackCallTime)

    val newState =
      new WatchState(newCallbackCallTime, sourceFilesPath, sourcesModified, if (triggered) count + 1 else count)
    if (triggered)
      (true, newState)
    else {
      Thread.sleep(pollDelayMillis)
      if (terminationCondition)
        (false, newState)
      else
        watch(sourcesFinder, pollDelayMillis, newState)(terminationCondition)
    }
  }
}

final class WatchState(
    val lastCallbackCallTime: Long,
    val previousFiles: Set[String],
    val awaitingQuietPeriod: Boolean,
    val count: Int
) {
  def previousFileCount: Int = previousFiles.size
}

object WatchState {
  def empty = new WatchState(0L, Set.empty[String], false, 0)
} 
Example 113
Source File: ExampleMahaService.scala    From maha   with Apache License 2.0 5 votes vote down vote up
// Copyright 2017, Yahoo Holdings Inc.
// Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms.
package com.yahoo.maha.api.jersey.example

import java.io.File
import java.util.UUID

import com.yahoo.maha.core.ddl.OracleDDLGenerator
import com.yahoo.maha.jdbc.{JdbcConnection, List, Seq}
import com.yahoo.maha.service.{DefaultMahaService, MahaService, MahaServiceConfig}
import com.zaxxer.hikari.{HikariConfig, HikariDataSource}
import grizzled.slf4j.Logging
import org.apache.commons.io.FileUtils
import org.joda.time.DateTime
import org.joda.time.format.DateTimeFormat

object ExampleMahaService extends Logging {

  val REGISTRY_NAME = "academic";

  private var dataSource: Option[HikariDataSource] = None
  private var jdbcConnection: Option[JdbcConnection] = None
  val h2dbId = UUID.randomUUID().toString.replace("-","")
  val today: String = DateTimeFormat.forPattern("yyyy-MM-dd").print(DateTime.now())
  val yesterday: String = DateTimeFormat.forPattern("yyyy-MM-dd").print(DateTime.now().minusDays(1))

  def initJdbcToH2(): Unit = {
    val config = new HikariConfig()
    config.setJdbcUrl(s"jdbc:h2:mem:$h2dbId;MODE=Oracle;DB_CLOSE_DELAY=-1")
    config.setUsername("sa")
    config.setPassword("h2.test.database.password")
    config.setMaximumPoolSize(2)
    dataSource = Option(new HikariDataSource(config))
    jdbcConnection = dataSource.map(new JdbcConnection(_))
    assert(jdbcConnection.isDefined, "Failed to connect to h2 local server")
  }

  def getMahaService(scope: String = "main"): MahaService = {
    val jsonString = FileUtils.readFileToString(new File(s"src/$scope/resources/maha-service-config.json"))
      .replaceAll("h2dbId", s"$h2dbId")

    initJdbcToH2()

    val mahaServiceResult = MahaServiceConfig.fromJson(jsonString.getBytes("utf-8"))
    if (mahaServiceResult.isFailure) {
      mahaServiceResult.leftMap {
        res=>
          error(s"Failed to launch Example MahaService, MahaService Error list is: ${res.list.toList}")
      }
    }
    val mahaServiceConfig = mahaServiceResult.toOption.get
    val mahaService: MahaService = new DefaultMahaService(mahaServiceConfig)
    stageStudentData(mahaServiceConfig)
    mahaService
  }

  def stageStudentData(mahaServiceConfig: MahaServiceConfig) : Unit = {

    val ddlGenerator = new OracleDDLGenerator
    val erRegistryConfig = mahaServiceConfig.registry.get(ExampleMahaService.REGISTRY_NAME).get
    val erRegistry= erRegistryConfig.registry
    erRegistry.factMap.values.foreach {
      publicFact =>
        publicFact.factList.foreach {
          fact=>
            val ddl = ddlGenerator.toDDL(fact)
            assert(jdbcConnection.get.executeUpdate(ddl).isSuccess)
        }
    }

    val insertSql =
      """
        INSERT INTO student_grade_sheet (year, section_id, student_id, class_id, total_marks, date, comment)
        VALUES (?, ?, ?, ?, ?, ?, ?)
      """

    val rows: List[Seq[Any]] = List(
      Seq(1, 100, 213, 200, 125, ExampleMahaService.today, "some comment")
    )

    rows.foreach {
      row =>
        val result = jdbcConnection.get.executeUpdate(insertSql, row)
        assert(result.isSuccess)
    }
    var count = 0
    jdbcConnection.get.queryForObject("select * from student_grade_sheet") {
      rs =>
        while (rs.next()) {
          count += 1
        }
    }
    assert(rows.size == count)
  }
} 
Example 114
Source File: WorkerStateReporter.scala    From maha   with Apache License 2.0 5 votes vote down vote up
// Copyright 2018, Yahoo Inc.
// Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms.

package com.yahoo.maha.worker.state

import java.io.File

import akka.actor.{Actor, ActorPath, ActorSystem, Props}
import com.typesafe.config.{Config, ConfigFactory}
import com.yahoo.maha.core.Engine
import com.yahoo.maha.worker.state.actor._
import grizzled.slf4j.Logging


object WorkerStateReporter extends Logging {

  // Use a bounded mailbox to prevent memory leaks in the rare case when jobs get piled up to be processed by the actor
  val defaultConfig: Config = ConfigFactory.parseString(
    """
      |akka.actor.nonblocking_bounded_mailbox {
      |  mailbox-type = akka.dispatch.NonBlockingBoundedMailbox
      |  mailbox-capacity = 10000
      |}
      |akka {
      |  loggers = ["akka.event.slf4j.Slf4jLogger"]
      |  loglevel = "INFO"
      |}
      |""".stripMargin)

}



case class WorkerStateReporter(akkaConf: String) extends Logging {

  val config: Config = {
    val file = new File(akkaConf)
    if(file.exists() && file.canRead) {
      info(s"Using akka conf file : ${file.getAbsolutePath}")
      ConfigFactory.parseFile(file)
    } else {
      info("Using default akka config")
      WorkerStateReporter.defaultConfig
    }
  }
  val system = ActorSystem("maha-workers", config)
  lazy val workerStateActorPath: ActorPath = {
    val actorConfig = WorkerStateActorConfig()
    val props: Props = Props(classOf[WorkerStateActor], actorConfig).withMailbox("akka.actor.nonblocking_bounded_mailbox")
    val path = system.actorOf(props, actorConfig.name).path
    info(s"Created WorkerStateActor: $path")
    path
  }

  def jobStarted(executionType: ExecutionType, jobId: Long, engine: Engine, cost: Long, estimatedRows: Long, userId: String): Unit = {
    sendMessage(JobStarted(executionType, jobId, engine, cost, estimatedRows, userId))
  }

  def jobEnded(executionType: ExecutionType, jobId: Long, engine: Engine, cost: Long, estimatedRows: Long, userId: String): Unit = {
    sendMessage(JobEnded(executionType, jobId, engine, cost, estimatedRows, userId))
  }

  def sendMessage(actorMessage:WorkerStateActorMessage) = {
    try {
      system.actorSelection(workerStateActorPath).tell(actorMessage, Actor.noSender)
    } catch {
      case t: Throwable =>
        warn(s"Failed to send $actorMessage message to WorkerStateActor", t)
    }
  }
} 
Example 115
Source File: RocksDBStorage.scala    From JustinDB   with Apache License 2.0 5 votes vote down vote up
package justin.db.storage

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}
import java.util.UUID

import com.esotericsoftware.kryo.io.{Input, Output}
import com.esotericsoftware.kryo.{Kryo, Serializer}
import justin.db.storage.PluggableStorageProtocol.{Ack, StorageGetData}
import org.rocksdb.{FlushOptions, Options, RocksDB}

import scala.concurrent.Future

// TODO:
// Current version store every single data under one file (totally doesn't care about data originality).
// Data should be eventually splitted by ring partitionId.
// This might be an issue during possible data movements between nodes.
final class RocksDBStorage(dir: File) extends PluggableStorageProtocol {
  import RocksDBStorage._

  {
    RocksDB.loadLibrary()
  }

  private[this] val kryo = new Kryo()

  private[this] val db: RocksDB = {
    val options: Options = new Options().setCreateIfMissing(true)
    RocksDB.open(options, dir.getPath)
  }

  override def get(id: UUID)(resolveOriginality: (UUID) => PluggableStorageProtocol.DataOriginality): Future[PluggableStorageProtocol.StorageGetData] = {
    val key: Array[Byte] = uuid2bytes(kryo, id)
    val dataBytes: Array[Byte] = db.get(key)

    val justinDataOpt = Option(dataBytes).map { dataBytes =>
      val input = new Input(new ByteArrayInputStream(dataBytes))
      JustinDataSerializer.read(kryo, input, classOf[JustinData])
    }

    Future.successful(justinDataOpt.map(StorageGetData.Single).getOrElse(StorageGetData.None))
  }

  override def put(data: JustinData)(resolveOriginality: (UUID) => PluggableStorageProtocol.DataOriginality): Future[PluggableStorageProtocol.Ack] = {
    val key: Array[Byte] = uuid2bytes(kryo, data.id)
    val dataBytes: Array[Byte] = {
      val output = new Output(new ByteArrayOutputStream())
      JustinDataSerializer.write(kryo, output, data)
      output.getBuffer
    }

    db.put(key, dataBytes)
    db.flush(new FlushOptions().setWaitForFlush(true))

    Ack.future
  }
}

object RocksDBStorage {

  def uuid2bytes(kryo: Kryo, id: UUID): Array[Byte] = {
    val output = new Output(new ByteArrayOutputStream(), 16)
    UUIDSerializer.write(kryo, output, id)
    output.getBuffer
  }

  object UUIDSerializer extends Serializer[UUID] {
    override def read(kryo: Kryo, input: Input, `type`: Class[UUID]): UUID = {
      new UUID(input.readLong, input.readLong)
    }

    override def write(kryo: Kryo, output: Output, uuid: UUID): Unit = {
      output.writeLong(uuid.getMostSignificantBits)
      output.writeLong(uuid.getLeastSignificantBits)
    }
  }

  object JustinDataSerializer extends Serializer[JustinData] {
    override def read(kryo: Kryo, input: Input, `type`: Class[JustinData]): JustinData = {
      JustinData(
        id        = UUIDSerializer.read(kryo, input, classOf[UUID]),
        value     = input.readString(),
        vclock    = input.readString(),
        timestamp = input.readLong()
      )
    }

    override def write(kryo: Kryo, output: Output, data: JustinData): Unit = {
      UUIDSerializer.write(kryo, output, data.id)
      output.writeString(data.value)
      output.writeString(data.vclock)
      output.writeLong(data.timestamp)
    }
  }
} 
Example 116
Source File: VirtualScreeningTest.scala    From MaRe   with Apache License 2.0 5 votes vote down vote up
package se.uu.it.mare

import java.io.File
import java.util.UUID

import scala.io.Source
import scala.util.Properties

import org.apache.spark.SharedSparkContext
import org.junit.runner.RunWith
import org.scalatest.FunSuite
import org.scalatest.junit.JUnitRunner

private object SDFUtils {
  def parseIDsAndScores(sdf: String): Array[(String, String)] = {
    sdf.split("\\n\\$\\$\\$\\$\\n").map { mol =>
      val lines = mol.split("\\n")
      (lines(0), lines.last)
    }
  }
}

@RunWith(classOf[JUnitRunner])
class VirtualScreeningTest extends FunSuite with SharedSparkContext {

  private val tmpDir = new File(Properties.envOrElse("TMPDIR", "/tmp"))

  test("Virtual Screening") {

    sc.hadoopConfiguration.set("textinputformat.record.delimiter", "\n$$$$\n")
    val mols = sc.textFile(getClass.getResource("sdf/molecules.sdf").getPath)

    // Parallel execution with MaRe
    val hitsParallel = new MaRe(mols)
      .map(
        inputMountPoint = TextFile("/input.sdf", "\n$$$$\n"),
        outputMountPoint = TextFile("/output.sdf", "\n$$$$\n"),
        imageName = "mcapuccini/oe:latest",
        command = "fred -receptor /var/openeye/hiv1_protease.oeb " +
          "-hitlist_size 0 " +
          "-conftest none " +
          "-dock_resolution Low " +
          "-dbase /input.sdf " +
          "-docked_molecule_file /output.sdf")
      .reduce(
        inputMountPoint = TextFile("/input.sdf", "\n$$$$\n"),
        outputMountPoint = TextFile("/output.sdf", "\n$$$$\n"),
        imageName = "mcapuccini/sdsorter:latest",
        command = "sdsorter -reversesort='FRED Chemgauss4 score' " +
          "-keep-tag='FRED Chemgauss4 score' " +
          "-nbest=30 " +
          "/input.sdf " +
          "/output.sdf")
      .rdd.collect.mkString("\n$$$$\n")

    // Serial execution
    val inputFile = new File(getClass.getResource("sdf/molecules.sdf").getPath)
    val dockedFile = new File(tmpDir, "mare_test_" + UUID.randomUUID.toString)
    dockedFile.createNewFile
    dockedFile.deleteOnExit
    val outputFile = new File(tmpDir, "mare_test_" + UUID.randomUUID.toString)
    outputFile.createNewFile
    outputFile.deleteOnExit
    DockerHelper.run(
      imageName = "mcapuccini/oe:latest",
      command = "fred -receptor /var/openeye/hiv1_protease.oeb " +
        "-hitlist_size 0 " +
        "-conftest none " +
        "-dock_resolution Low " +
        "-dbase /input.sdf " +
        "-docked_molecule_file /docked.sdf",
      bindFiles = Seq(inputFile, dockedFile),
      volumeFiles = Seq(new File("/input.sdf"), new File("/docked.sdf")),
      forcePull = false)
    DockerHelper.run(
      imageName = "mcapuccini/sdsorter:latest",
      command = "sdsorter -reversesort='FRED Chemgauss4 score' " +
        "-keep-tag='FRED Chemgauss4 score' " +
        "-nbest=30 " +
        "/docked.sdf " +
        "/output.sdf",
      bindFiles = Seq(dockedFile, outputFile),
      volumeFiles = Seq(new File("/docked.sdf"), new File("/output.sdf")),
      forcePull = false)
    val hitsSerial = Source.fromFile(outputFile).mkString

    // Test
    val parallel = SDFUtils.parseIDsAndScores(hitsParallel)
    val serial = SDFUtils.parseIDsAndScores(hitsSerial)
    assert(parallel.deep == serial.deep)

  }

} 
Example 117
Source File: TestSuiteTests.scala    From circe-json-schema   with Apache License 2.0 5 votes vote down vote up
package io.circe.schema

import cats.data.Validated
import io.circe.{ Decoder, Json }
import java.io.File
import org.scalatest.flatspec.AnyFlatSpec

case class SchemaTestCase(description: String, data: Json, valid: Boolean)
case class SchemaTest(description: String, schema: Json, tests: List[SchemaTestCase])

object SchemaTestCase {
  implicit val decodeSchemaTestCase: Decoder[SchemaTestCase] = io.circe.generic.semiauto.deriveDecoder
}

object SchemaTest {
  implicit val decodeSchemaTest: Decoder[SchemaTest] = io.circe.generic.semiauto.deriveDecoder
}

class TestSuiteTests(path: String) extends AnyFlatSpec {
  val tests: List[SchemaTest] = io.circe.jawn
    .decodeFile[List[SchemaTest]](new File(path))
    .getOrElse(
      throw new Exception(s"Unable to load test file: $path")
    )

  tests.foreach {
    case SchemaTest(description, schema, tests) =>
      tests.foreach {
        case SchemaTestCase(caseDescription, data, valid) =>
          val expected = if (valid) "validate successfully" else "fail to validate"
          s"$description: $caseDescription" should expected in {
            val errors = Schema.load(schema).validate(data)

            if (valid) {
              assert(errors == Validated.valid(()))
            } else {
              assert(errors.isInvalid)
            }
          }

          it should s"$expected when schema is loaded from a string" in {
            val errors = Schema.loadFromString(schema.noSpaces).get.validate(data)

            if (valid) {
              assert(errors == Validated.valid(()))
            } else {
              assert(errors.isInvalid)
            }
          }
      }
  }
}

class AdditionalItemsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/additionalItems.json")
class AdditionalPropertiesTestSuiteTests extends TestSuiteTests("tests/tests/draft7/additionalProperties.json")
class AllOfTestSuiteTests extends TestSuiteTests("tests/tests/draft7/allOf.json")
class AnyOfTestSuiteTests extends TestSuiteTests("tests/tests/draft7/anyOf.json")
class BooleanSchemaTestSuiteTests extends TestSuiteTests("tests/tests/draft7/boolean_schema.json")
class ConstTestSuiteTests extends TestSuiteTests("tests/tests/draft7/const.json")
class ContainsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/contains.json")
class DefaultTestSuiteTests extends TestSuiteTests("tests/tests/draft7/default.json")
//class DefinitionsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/definitions.json")
class EnumTestSuiteTests extends TestSuiteTests("tests/tests/draft7/enum.json")
class ExclusiveMaximumTestSuiteTests extends TestSuiteTests("tests/tests/draft7/exclusiveMaximum.json")
class ExclusiveMinimumTestSuiteTests extends TestSuiteTests("tests/tests/draft7/exclusiveMinimum.json")
class FormatTestSuiteTests extends TestSuiteTests("tests/tests/draft7/format.json")
class IfThenElseTestSuiteTests extends TestSuiteTests("tests/tests/draft7/if-then-else.json")
class ItemsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/items.json")
class MaximumTestSuiteTests extends TestSuiteTests("tests/tests/draft7/maximum.json")
class MaxItemsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/maxItems.json")
class MaxLengthTestSuiteTests extends TestSuiteTests("tests/tests/draft7/maxLength.json")
class MaxPropertiesTestSuiteTests extends TestSuiteTests("tests/tests/draft7/maxProperties.json")
class MinimumTestSuiteTests extends TestSuiteTests("tests/tests/draft7/minimum.json")
class MinItemsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/minItems.json")
class MinLengthTestSuiteTests extends TestSuiteTests("tests/tests/draft7/minLength.json")
class MinPropertiesTestSuiteTests extends TestSuiteTests("tests/tests/draft7/minProperties.json")
class MultipleOfTestSuiteTests extends TestSuiteTests("tests/tests/draft7/multipleOf.json")
class NotTestSuiteTests extends TestSuiteTests("tests/tests/draft7/not.json")
class OneOfTestSuiteTests extends TestSuiteTests("tests/tests/draft7/oneOf.json")
class PatternTestSuiteTests extends TestSuiteTests("tests/tests/draft7/pattern.json")
class PatternPropertiesTestSuiteTests extends TestSuiteTests("tests/tests/draft7/patternProperties.json")
class PropertyNamesTestSuiteTests extends TestSuiteTests("tests/tests/draft7/propertyNames.json")
// Not currently running remote tests.
//class RefTestSuiteTests extends TestSuiteTests("tests/tests/draft7/ref.json")
//class RefRemoteTestSuiteTests extends TestSuiteTests("tests/tests/draft7/refRemote.json")
class RequiredTestSuiteTests extends TestSuiteTests("tests/tests/draft7/required.json")
class TypeTestSuiteTests extends TestSuiteTests("tests/tests/draft7/type.json")
class UniqueItemsTestSuiteTests extends TestSuiteTests("tests/tests/draft7/uniqueItems.json") 
Example 118
Source File: KMeans.scala    From spark-tda   with Apache License 2.0 5 votes vote down vote up
import java.io.{File, PrintWriter}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.clustering.KMeans
import org.apache.spark.sql.functions._

def computeKMeans(
  pathToTextFile: String,
  quantity: Int,
  iteration: Int) {
  case class Point(x: Double, y: Double)

  def save(f: File)(func: PrintWriter => Unit) {
    val p = new PrintWriter(f)
    try {
      func(p)
    } finally {
      p.close()
    }
  }

  val filename = pathToTextFile.split("\\.")(0)

  val outputFilename = s"$filename-KMEANS-k${quantity}-i${iteration}.tsv"

  val points = sc
    .textFile(pathToTextFile)
    .map {
      line => line.trim.split("\\s+")
    }
    .map {
      row => Point(row(0).toDouble, row(1).toDouble)
    }

  val features = points
    .map {
      p => Vectors.dense(p.x, p.y)
    }

  features.cache()

  val kmeans = KMeans.train(features, quantity, iteration)

  val predictions = features
    .map {
      f => (f(0), f(1), model.predict(f) + 1)
    }
    .collect

  save(new File(outputFilename)) {
    println(s"OUTPUT TO: ${outputFilename}")
    f => predictions.foreach{
      case (x, y, ccid) => f.println(s"${x}\t${y}\t${ccid}")
    }
  }
} 
Example 119
Source File: ReebDiagram.scala    From spark-tda   with Apache License 2.0 5 votes vote down vote up
import java.io.{File, PrintWriter}
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.feature.{ReebDiagram, VectorAssembler}
import org.apache.spark.sql.functions._

def computeReebDiagram(
  pathToTextFile: String,
  quantity: Int,
  linkThresholdRatio: Double,
  coreThresholdRatio: Double,
  topTreeRatio: Double) {

  def save(f: File)(func: PrintWriter => Unit) {
    val p = new PrintWriter(f)
    try {
      func(p)
    } finally {
      p.close()
    }
  }

  val filename = pathToTextFile.split("\\.")(0)

  val outputFilename = s"$filename-REEB-k${quantity}-l${linkThresholdRatio}-c${coreThresholdRatio}-i${topTreeRatio}.tsv"

  val points = sc.textFile(pathToTextFile)
    .map {
      line => line.trim.split("\\s+")
    }
    .zipWithIndex
    .map { case (row, i) =>
      (i, row(0).toDouble, row(1).toDouble, 0)
    }
    .toDF("id", "x", "y", "cover_id")

  val cardinality = points.count

  val assembler = new VectorAssembler()
    .setInputCols(Array("x", "y"))
    .setOutputCol("feature")

  val features = assembler
    .transform(points)

  val reeb = new ReebDiagram()
    .setK(quantity)
    .setLinkThresholdRatio(linkThresholdRatio)
    .setCoreThresholdRatio(coreThresholdRatio)
    .setTopTreeSize((topTreeRatio * cardinality).toInt)
    .setTopTreeLeafSize(quantity)
    .setIdCol("id")
    .setCoverCol("cover_id")
    .setFeaturesCol("feature")
    .setOutputCol("cluster_id")

  val transformed = reeb
    .fit(features)
    .transform(features)

  val clusters = Map(
    transformed
      .select("cluster_id")
      .rdd
      .map(row => row.getLong(0))
      .distinct
      .zipWithIndex
      .collect(): _*)

  val result = transformed
    .select("x", "y", "cluster_id")
    .rdd
    .map(row => (row.getDouble(0), row.getDouble(1), row.getLong(2)))
    .map { case (x, y, clusterId) => (x, y, clusters(clusterId) + 1)}
    .collect()

  save(new File(outputFilename)) {
    println(s"OUTPUT TO: ${outputFilename}")
    f => result.foreach{
      case (x, y, ccid) => f.println(s"${x}\t${y}\t${ccid}")
    }
  }
} 
Example 120
Source File: TempDirectory.scala    From spark-tda   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.ml.feature

import java.io.File
import org.scalatest.{BeforeAndAfterAll, Suite}
import com.holdenkarau.spark.testing.Utils


  protected def tempDir: File = _tempDir

  override def beforeAll(): Unit = {
    super.beforeAll()
    _tempDir = Utils.createTempDir()
  }

  override def afterAll(): Unit = {
    try {
      Utils.deleteRecursively(_tempDir)
    } finally {
      super.afterAll()
    }
  }
} 
Example 121
Source File: MarkdownPagesEndpoint.scala    From udash-core   with Apache License 2.0 5 votes vote down vote up
package io.udash.web.guide.markdown

import java.io.{BufferedReader, File, FileReader}
import java.time.Instant
import java.util.concurrent.ConcurrentHashMap

import com.avsystem.commons._
import com.vladsch.flexmark.ext.toc.TocExtension
import com.vladsch.flexmark.html.HtmlRenderer
import com.vladsch.flexmark.parser.Parser

import scala.concurrent.{ExecutionContext, Future}

final class MarkdownPagesEndpoint(guideResourceBase: String)(implicit ec: ExecutionContext) extends MarkdownPageRPC {

  private val tocExtension = TocExtension.create
  private val parser = Parser.builder.extensions(JList(tocExtension)).build
  private val renderer = HtmlRenderer.builder.extensions(JList(tocExtension)).build
  private val renderedPages = new ConcurrentHashMap[MarkdownPage, (Future[String], Instant)]

  private def render(file: File): Future[String] = Future {
    val reader = new BufferedReader(new FileReader(file))
    val document = parser.parseReader(reader)
    renderer.render(document)
  }

  override def loadContent(page: MarkdownPage): Future[String] = {
    val (result, _) = renderedPages.compute(page, { (_, cached) =>
      val pageFile = new File(guideResourceBase + page.file)
      cached.opt.filter {
        case (currentRender, renderedInstant) =>
          currentRender.value.exists(_.isSuccess) && renderedInstant.toEpochMilli >= pageFile.lastModified()
      }.getOrElse((render(pageFile), Instant.ofEpochMilli(pageFile.lastModified())))
    })
    result
  }
} 
Example 122
Source File: FileDownloadServlet.scala    From udash-core   with Apache License 2.0 5 votes vote down vote up
package io.udash.rpc.utils

import java.io.File
import java.nio.file.Files
import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}


  protected def resolveFileMimeType(file: File): String =
    Option(getServletContext.getMimeType(file.getAbsolutePath)).getOrElse("application/octet-stream")

  override def doGet(request: HttpServletRequest, response: HttpServletResponse): Unit = {
    val file = resolveFile(request)

    if (!file.exists()) response.sendError(404, "File not found!")
    else {
      // MIME type
      response.setContentType(resolveFileMimeType(file))
      // content length
      response.setContentLengthLong(file.length)
      // file name
      response.setHeader("Content-Disposition", s"""attachment; filename="${presentedFileName(file.getName)}"""")

      val outStream = response.getOutputStream
      Files.copy(file.toPath, outStream)
      outStream.close()
    }
  }
} 
Example 123
Source File: CssFileRenderer.scala    From udash-core   with Apache License 2.0 5 votes vote down vote up
package io.udash.css

import java.io.{File, PrintWriter}

import scalacss.internal.Renderer


class CssFileRenderer(dirPath: String, styles: Seq[CssBase], createMain: Boolean) {
  def render()(implicit renderer: Renderer[String]): Unit = {
    val dir = new File(dirPath)
    dir.mkdirs()

    val mainFile: Option[File] = if (createMain) Some(new File(s"${dir.getAbsolutePath}/main.css")) else None
    mainFile.foreach(_.createNewFile())

    val mainWriter = mainFile.map(new PrintWriter(_, "UTF-8"))

    styles.foreach { style =>
      val name = style.getClass.getName
      val f = new File(s"${dir.getAbsolutePath}/$name.css") {
        createNewFile()
      }
      new PrintWriter(f, "UTF-8") {
        write(style.render)
        flush()
        close()
      }

      mainWriter.foreach(_.append(s"""@import "$name.css";\n"""))
    }

    mainWriter.foreach { w =>
      w.flush()
      w.close()
    }
  }
} 
Example 124
Source File: ValueStoreSerializationExt.scala    From random-projections-at-berlinbuzzwords   with Apache License 2.0 5 votes vote down vote up
package com.stefansavev.randomprojections.serialization

import java.io.File
import com.stefansavev.core.serialization.Utils
import com.stefansavev.randomprojections.datarepr.dense.store.ValuesStore
import com.stefansavev.randomprojections.serialization.DataFrameViewSerializers._
import com.typesafe.scalalogging.StrictLogging

object ValueStoreSerializationExt {
  val ser = valuesStoreSerializer()

  implicit class ValueStoreSerializerExt(input: ValuesStore) {
    def toFile(file: File): Unit = {
      Utils.toFile(ser, file, input)
    }

    def toFile(fileName: String): Unit = {
      toFile(new File(fileName))
    }

    def toBytes(): Array[Byte] = {
      Utils.toBytes(ser, input)
    }
  }

  implicit class ValueStoreDeserializerExt(t: ValuesStore.type) extends StrictLogging {
    def fromFile(file: File): ValuesStore = {
      if (!file.exists()) {
        throw new IllegalStateException("file does not exist: " + file.getAbsolutePath)
      }
      logger.info("Loading file: " + file.getAbsolutePath)
      val output = Utils.fromFile(ser, file)
      output
    }

    def fromFile(fileName: String): ValuesStore = {
      fromFile(new File(fileName))
    }

    def fromBytes(input: Array[Byte]): ValuesStore = {
      Utils.fromBytes(ser, input)
    }
  }

} 
Example 125
Source File: DataFrameViewSerializationExt.scala    From random-projections-at-berlinbuzzwords   with Apache License 2.0 5 votes vote down vote up
package com.stefansavev.randomprojections.serialization

import java.io.File
import com.stefansavev.core.serialization.Utils
import com.stefansavev.randomprojections.datarepr.dense.DataFrameView
import com.stefansavev.randomprojections.serialization.DataFrameViewSerializers._

object DataFrameViewSerializationExt {

  implicit class DataFrameSerializerExt(input: DataFrameView) {
    def toFile(file: File): Unit = {
      val ser = dataFrameSerializer()
      Utils.toFile(ser, file, input)
    }

    def toFile(fileName: String): Unit = {
      toFile(new File(fileName))
    }
  }

  implicit class DataFrameDeserializerExt(t: DataFrameView.type) {

    def fromFile(file: File): DataFrameView = {
      if (!file.exists()) {
        throw new IllegalStateException("file does not exist")
      }

      val ser = dataFrameSerializer()
      val output = Utils.fromFile(ser, file)
      output
    }

    def fromFile(dir: String): DataFrameView = {
      fromFile(new File(dir))
    }
  }

} 
Example 126
Source File: FileWriter.scala    From sbt-avrohugger   with Apache License 2.0 5 votes vote down vote up
package sbtavrohugger;

import avrohugger.filesorter.{AvdlFileSorter, AvscFileSorter}
import avrohugger.Generator
import java.io.File

import sbt.Keys._
import sbt.{Logger, globFilter, singleFileFinder}
import sbt.Path._

object FileWriter {

  private[sbtavrohugger] def generateCaseClasses(
    generator: Generator,
    srcDirs: Seq[File],
    target: File,
    log: Logger): Set[java.io.File] = {
    log.info("Considering source directories %s".format(srcDirs.mkString(",")))
    def getSrcFiles(dirs: Seq[File], fileExtension: String) = for {
      srcDir <- dirs
      srcFile <- (srcDir ** s"*.$fileExtension").get
    } yield srcFile
    
    for (inFile <- AvscFileSorter.sortSchemaFiles(getSrcFiles(srcDirs, "avsc"))) {
      log.info("Compiling AVSC %s to %s".format(inFile, target.getPath))
      generator.fileToFile(inFile, target.getPath)
    }

   for (idlFile <- AvdlFileSorter.sortSchemaFiles(getSrcFiles(srcDirs, "avdl"))) {
      log.info("Compiling Avro IDL %s".format(idlFile))
      generator.fileToFile(idlFile, target.getPath)
    }

    for (inFile <- getSrcFiles(srcDirs, "avro")) {
      log.info("Compiling Avro datafile %s".format(inFile))
      generator.fileToFile(inFile, target.getPath)
    }

    for (protocol <- getSrcFiles(srcDirs, "avpr")) {
      log.info("Compiling Avro protocol %s".format(protocol))
      generator.fileToFile(protocol, target.getPath)
    }

    (target ** ("*.java"|"*.scala")).get.toSet
  }

} 
Example 127
Source File: StandardTestUtil.scala    From sbt-avrohugger   with Apache License 2.0 5 votes vote down vote up
package test

import java.io.File

import org.apache.avro.file.{ DataFileReader, DataFileWriter }
import org.apache.avro.generic.{
  GenericRecord,
  GenericDatumReader,
  GenericDatumWriter }


import org.specs2.mutable.Specification

object StandardTestUtil extends Specification {

  def write(file: File, records: List[GenericRecord]) = {
  
    val userDatumWriter = new GenericDatumWriter[GenericRecord]
    val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter)
    dataFileWriter.create(records.head.getSchema, file);
    records.foreach(record => dataFileWriter.append(record))
    dataFileWriter.close();
   

  }

  def read(file: File, records: List[GenericRecord]) = {
    val dummyRecord = new GenericDatumReader[GenericRecord]
    val schema = new DataFileReader(file, dummyRecord).getSchema
    val userDatumReader = new GenericDatumReader[GenericRecord](schema)
    val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader)
    // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala
    // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat
    // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader.
    var record: GenericRecord = null.asInstanceOf[GenericRecord]
    var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord]
    val recordIter = records.iterator
    while (dataFileReader.hasNext) {
      sameRecord = dataFileReader.next(sameRecord)
      record = recordIter.next
    }
    dataFileReader.close()
    sameRecord must ===(record)
  }

  def verifyWriteAndRead(records: List[GenericRecord]) = {
    val fileName = s"${records.head.getClass.getName}"
    val fileEnding = "avro"
    val file = File.createTempFile(fileName, fileEnding)
    file.deleteOnExit()
    write(file, records)
    read(file, records)
  }

} 
Example 128
Source File: StandardTestUtil.scala    From sbt-avrohugger   with Apache License 2.0 5 votes vote down vote up
package test

import java.io.File

import org.apache.avro.file.{ DataFileReader, DataFileWriter }
import org.apache.avro.generic.{
  GenericRecord,
  GenericDatumReader,
  GenericDatumWriter }


import org.specs2.mutable.Specification

object StandardTestUtil extends Specification {

  def write(file: File, records: List[GenericRecord]) = {
  
    val userDatumWriter = new GenericDatumWriter[GenericRecord]
    val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter)
    dataFileWriter.create(records.head.getSchema, file);
    records.foreach(record => dataFileWriter.append(record))
    dataFileWriter.close();
   

  }

  def read(file: File, records: List[GenericRecord]) = {
    
    val dummyRecord = new GenericDatumReader[GenericRecord]
    val schema = new DataFileReader(file, dummyRecord).getSchema
    val userDatumReader = new GenericDatumReader[GenericRecord](schema)
    val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader)
    // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala
    // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat
    // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader.
    var record: GenericRecord = null.asInstanceOf[GenericRecord]
    var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord]
    val recordIter = records.iterator
    while (dataFileReader.hasNext) {
      sameRecord = dataFileReader.next(sameRecord)
      record = recordIter.next
    }
    dataFileReader.close()
    sameRecord must ===(record)
  
  }

  def verifyWriteAndRead(records: List[GenericRecord]) = {
    val fileName = s"${records.head.getClass.getName}"
    val fileEnding = "avro"
    val file = File.createTempFile(fileName, fileEnding)
    file.deleteOnExit()
    write(file, records)
    read(file, records)
  }

} 
Example 129
Source File: StandardDefaultValuesSpec.scala    From sbt-avrohugger   with Apache License 2.0 5 votes vote down vote up
import test._
import org.specs2.mutable.Specification
import java.io.File
import scala.collection.mutable.Buffer
import scala.collection.JavaConverters._

import org.apache.avro.file._
import org.apache.avro.generic._
import org.apache.avro._
class StandardDefaultValuesSpec extends Specification {
  skipAll
  "A case class with default values" should {
    "deserialize correctly" in {
      val record = DefaultTest()

      val enumSchemaString = """{"type":"enum","name":"DefaultEnum","symbols":["SPADES","DIAMONDS","CLUBS","HEARTS"]}"""
      val enumSchema = new Schema.Parser().parse(enumSchemaString)
      val genericEnum = new GenericData.EnumSymbol(enumSchema, record.suit.toString)
      
      val embeddedSchemaString = """{"type":"record","name":"Embedded","fields":[{"name":"inner","type":"int"}]},"default":{"inner":1}}"""
      val embeddedSchema = new Schema.Parser().parse(embeddedSchemaString)
      val embeddedGenericRecord = new GenericData.Record(embeddedSchema)
      embeddedGenericRecord.put("inner", record.embedded.inner)

      val recordSchemaString = """{"type":"record","name":"DefaultTest","namespace":"test","fields":[{"name":"suit","type":{"type":"enum","name":"DefaultEnum","symbols":["SPADES","DIAMONDS","CLUBS","HEARTS"]},"default":"SPADES"},{"name":"number","type":"int","default":0},{"name":"str","type":"string","default":"str"},{"name":"optionString","type":["null","string"],"default":null},{"name":"optionStringValue","type":["string","null"],"default":"default"},{"name":"embedded","type":{"type":"record","name":"Embedded","fields":[{"name":"inner","type":"int"}]},"default":{"inner":1}},{"name":"defaultArray","type":{"type":"array","items":"int"},"default":[1,3,4,5]},{"name":"optionalEnum","type":["null","DefaultEnum"],"default":null},{"name":"defaultMap","type":{"type":"map","values":"string"},"default":{"Hello":"world","Merry":"Christmas"}},{"name":"byt","type":"bytes","default":"ÿ"}, {"name":"defaultEither","type": ["int", "string"],"default":2}, {"name":"defaultCoproduct","type": ["int", "string", "boolean"],"default":3}]}"""
      val recordSchema = new Schema.Parser().parse(recordSchemaString)
      
      val genericRecord = new GenericData.Record(recordSchema)
      genericRecord.put("suit", genericEnum)
    	genericRecord.put("number", record.number)
    	genericRecord.put("str", record.str)
    	genericRecord.put("optionString", record.optionString.getOrElse(null))
      genericRecord.put("optionStringValue", record.optionStringValue.getOrElse(null))
      genericRecord.put("embedded", embeddedGenericRecord)
      genericRecord.put("defaultArray",record.defaultArray.asJava)
      genericRecord.put("optionalEnum", record.optionalEnum.getOrElse(null))
      genericRecord.put("defaultMap", record.defaultMap.asJava)
      genericRecord.put("byt", java.nio.ByteBuffer.wrap(record.byt))
      genericRecord.put("defaultEither", record.defaultEither.fold(identity, identity))
      genericRecord.put("defaultCoproduct", record.defaultCoproduct.select[Int].getOrElse(0))
      val records = List(genericRecord)
      
      val fileName = s"${records.head.getClass.getName}"
      val fileEnding = "avro"
      val file = File.createTempFile(fileName, fileEnding)
      file.deleteOnExit()
      StandardTestUtil.write(file, records)

      var dummyRecord = new GenericDatumReader[GenericRecord]
      val schema = new DataFileReader(file, dummyRecord).getSchema
      val userDatumReader = new GenericDatumReader[GenericRecord](schema)
      val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader)
      // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala
      // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat
      // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader.
      var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord]
      while (dataFileReader.hasNext) {
        sameRecord = dataFileReader.next(sameRecord)
      }
      dataFileReader.close()

      sameRecord.get("suit").toString === DefaultEnum.SPADES.toString
      sameRecord.get("number") === 0
      sameRecord.get("str").toString === "str"
      sameRecord.get("optionString") === null
      sameRecord.get("optionStringValue").toString === "default"
      sameRecord.get("embedded").asInstanceOf[GenericRecord].get("inner") === 1
      sameRecord.get("defaultArray") === List(1,3,4,5).asJava
      sameRecord.get("optionalEnum") === null
      sameRecord.get("defaultMap").toString === "{Hello=world, Merry=Christmas}"
      sameRecord.get("byt") === java.nio.ByteBuffer.wrap("ÿ".getBytes)
      sameRecord.get("defaultEither") === 2
      sameRecord.get("defaultCoproduct") === 3
    }
  }
} 
Example 130
Source File: StandardTestUtil.scala    From sbt-avrohugger   with Apache License 2.0 5 votes vote down vote up
package test

import java.io.File

import org.apache.avro.file.{ DataFileReader, DataFileWriter }
import org.apache.avro.generic.{
  GenericRecord,
  GenericDatumReader,
  GenericDatumWriter }


import org.specs2.mutable.Specification

object StandardTestUtil extends Specification {

  def write(file: File, records: List[GenericRecord]) = {
  
    val userDatumWriter = new GenericDatumWriter[GenericRecord]
    val dataFileWriter = new DataFileWriter[GenericRecord](userDatumWriter)
    dataFileWriter.create(records.head.getSchema, file);
    records.foreach(record => dataFileWriter.append(record))
    dataFileWriter.close();
   

  }

  def read(file: File, records: List[GenericRecord]) = {
    val dummyRecord = new GenericDatumReader[GenericRecord]
    val schema = new DataFileReader(file, dummyRecord).getSchema
    val userDatumReader = new GenericDatumReader[GenericRecord](schema)
    val dataFileReader = new DataFileReader[GenericRecord](file, userDatumReader)
    // Adapted from: https://github.com/tackley/avrohugger-list-issue/blob/master/src/main/scala/net/tackley/Reader.scala
    // This isn't great scala, but represents how org.apache.avro.mapred.AvroInputFormat
    // (via org.apache.avro.file.DataFileStream) interacts with the StandardDatumReader.
    var record: GenericRecord = null.asInstanceOf[GenericRecord]
    var sameRecord: GenericRecord = null.asInstanceOf[GenericRecord]
    val recordIter = records.iterator
    while (dataFileReader.hasNext) {
      sameRecord = dataFileReader.next(sameRecord)
      record = recordIter.next
    }
    dataFileReader.close()
    sameRecord must ===(record)
  }

  def verifyWriteAndRead(records: List[GenericRecord]) = {
    val fileName = s"${records.head.getClass.getName}"
    val fileEnding = "avro"
    val file = File.createTempFile(fileName, fileEnding)
    file.deleteOnExit()
    write(file, records)
    read(file, records)
  }

} 
Example 131
Source File: ExpectedResults.scala    From api-first-hand   with MIT License 5 votes vote down vote up
package de.zalando

import java.io.{ File, FileOutputStream }

import de.zalando.apifirst.util.ScalaPrinter
import de.zalando.model._

import scala.io.Source


trait ExpectedResults {

  val model = Seq[WithModel](
    additional_properties_yaml,
    basic_polymorphism_yaml,
    nested_arrays_yaml,
    nested_options_yaml,
    basic_extension_yaml,
    expanded_polymorphism_yaml,
    nested_objects_yaml,
    options_yaml,
    wrong_field_name_yaml,
    all_of_imports_yaml,
    i038_invalid_enum_members_yaml
  )
  val examples = Seq[WithModel](
    basic_auth_api_yaml,
    cross_spec_references_yaml,
    echo_api_yaml,
    error_in_array_yaml,
    form_data_yaml,
    full_petstore_api_yaml,
    hackweek_yaml,
    heroku_petstore_api_yaml,
    instagram_api_yaml,
    minimal_api_yaml,
    nakadi_yaml,
    security_api_yaml,
    simple_petstore_api_yaml,
    split_petstore_api_yaml,
    string_formats_yaml,
    type_deduplication_yaml,
    uber_api_yaml,
    i041_no_json_deserialiser_yaml
  )
  val validations = Seq[WithModel](
    nested_arrays_validation_yaml,
    nested_objects_validation_yaml,
    nested_options_validation_yaml,
    numbers_validation_yaml,
    string_formats_validation_yaml
  )

  val resourcesPath = "play-scala-generator/src/test/resources/"

  def expectationsFolder: String = "/expected_results/"

  def dump(result: String, name: String, suffix: String): Unit = {
    if (result.nonEmpty) {
      val newFile = target(name, suffix)
      newFile.getParentFile.mkdirs()
      newFile.delete()
      newFile.createNewFile()
      val out = new FileOutputStream(newFile)
      out.write(result.getBytes)
      out.close()
    }
  }

  def asInFile(name: String, suffix: String): String = {
    val expectedFile = target(name, suffix)
    if (expectedFile.canRead) {
      val src = Source.fromFile(expectedFile)
      val result = src.getLines().mkString("\n")
      src.close()
      result
    } else
      ""
  }

  def target(name: String, suffix: String): File =
    new File(resourcesPath + expectationsFolder + name + "." + suffix)

  def clean(str: String): String = str.split("\n").map(_.trim).filter(_.nonEmpty).mkString("\n")

  def nameFromModel(ast: WithModel): String = ScalaPrinter.nameFromModel(ast)

} 
Example 132
Source File: Display.scala    From almond   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package almond.display

import java.io.File
import java.net.URL
import java.nio.file.Path
import java.util.{Map => JMap}

import almond.interpreter.api.{DisplayData, OutputHandler}
import jupyter.{Displayer, Displayers}

import scala.collection.JavaConverters._

trait Display {
  def data(): Map[String, String]
  def metadata(): Map[String, String] = Map()
  def displayData(): DisplayData =
    DisplayData(data(), metadata = metadata())

  def display()(implicit output: OutputHandler): Unit =
    output.display(displayData())

  // registering things with jvm-repr just in case
  Display.registered
}

object Display {

  private lazy val registered: Unit = {
    Displayers.register(
      classOf[Display],
      new Displayer[Display] {
        def display(d: Display): JMap[String, String] =
          d.data().asJava
      }
    )
  }


  def markdown(content: String)(implicit output: OutputHandler): Unit =
    Markdown(content).display()
  def html(content: String)(implicit output: OutputHandler): Unit =
    Html(content).display()
  def latex(content: String)(implicit output: OutputHandler): Unit =
    Latex(content).display()
  def text(content: String)(implicit output: OutputHandler): Unit =
    Text(content).display()

  def js(content: String)(implicit output: OutputHandler): Unit =
    Javascript(content).display()

  def svg(content: String)(implicit output: OutputHandler): Unit =
    Svg(content).display()


  trait Builder[C, T] {

    protected def build(contentOrUrl: Either[URL, C]): T

    def apply(content: C): T =
      build(Right(content))

    def from(url: String): T =
      build(Left(new URL(url)))
    def from(url: URL): T =
      build(Left(url))

    def fromFile(file: File): T =
      build(Left(file.toURI.toURL))
    def fromFile(path: Path): T =
      build(Left(path.toUri.toURL))
    def fromFile(path: String): T =
      build(Left(new File(path).toURI.toURL))
  }


} 
Example 133
Source File: NotebookSparkSessionBuilder.scala    From almond   with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
package org.apache.spark.sql.almondinternals

import java.io.File
import java.lang.{Boolean => JBoolean}

import almond.interpreter.api.{CommHandler, OutputHandler}
import almond.display.Display.html
import ammonite.interp.api.InterpAPI
import ammonite.repl.api.ReplAPI
import org.apache.log4j.{Category, Logger, RollingFileAppender}
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.ammonitesparkinternals.AmmoniteSparkSessionBuilder

import scala.collection.JavaConverters._

class NotebookSparkSessionBuilder
 (implicit
   interpApi: InterpAPI,
   replApi: ReplAPI,
   publish: OutputHandler,
   commHandler: CommHandler
 ) extends AmmoniteSparkSessionBuilder {

  private var progress0 = true
  private var keep0 = true

  private var logsInDeveloperConsoleOpt = Option.empty[Boolean]

  def progress(enable: Boolean = true, keep: Boolean = true): this.type = {
    progress0 = enable
    keep0 = keep
    this
  }

  def logsInDeveloperConsole(enable: JBoolean = null): this.type = {
    logsInDeveloperConsoleOpt = Option[JBoolean](enable).map[Boolean](x => x)
    this
  }

  override def getOrCreate(): SparkSession = {

    val logFileOpt = logsInDeveloperConsoleOpt match {
      case Some(false) =>
        None
      case Some(true) =>
        val fileOpt = NotebookSparkSessionBuilder.logFile(classOf[SparkSession])
        if (fileOpt.isEmpty)
          Console.err.println("Warning: cannot determine log file, logs won't be sent to developer console.")
        fileOpt
      case None =>
        NotebookSparkSessionBuilder.logFile(classOf[SparkSession])
    }

    var sendLogOpt = Option.empty[SendLog]

    try {
      sendLogOpt = logFileOpt.map { f =>
        println("See your browser developer console for detailed spark logs.")
        SendLog.start(f)
      }

      val session = super.getOrCreate()

      for (url <- session.sparkContext.uiWebUrl)
        html(s"""<a target="_blank" href="$url">Spark UI</a>""")

      session.sparkContext.addSparkListener(
        new ProgressSparkListener(session, keep0, progress0)
      )

      session
    } finally {
      sendLogOpt.foreach(_.stop())
    }
  }

}

object NotebookSparkSessionBuilder {

  private def logFile(clazz: Class[_]): Option[File] = {

    def appenders(log: Category): Stream[Any] =
      if (log == null)
        Stream()
      else
        log.getAllAppenders.asScala.toStream #::: appenders(log.getParent)

    appenders(Logger.getLogger(clazz)).collectFirst {
      case rfa: RollingFileAppender => new File(rfa.getFile)
    }
  }

} 
Example 134
Source File: BitMap.scala    From Scurses   with MIT License 5 votes vote down vote up
package net.team2xh.onions.components.widgets

import java.awt.image.BufferedImage
import java.io.File
import javax.imageio.ImageIO

import net.team2xh.onions.Symbols
import net.team2xh.onions.Themes.ColorScheme
import net.team2xh.onions.components.{FramePanel, Widget}
import net.team2xh.scurses.{Colors, Scurses}

object BitMap {

  def apply(parent: FramePanel, path: String, relative: Boolean = false)(implicit screen: Scurses): BitMap = {
    val fullPath = if (relative) new File("").getAbsolutePath + path else path
    val image = ImageIO.read(new File(fullPath))
    new BitMap(parent, image)
  }

  def apply(parent: FramePanel, image: BufferedImage)(implicit screen: Scurses): BitMap = {
    new BitMap(parent, image)
  }

}

class BitMap(parent: FramePanel, image: BufferedImage)
                 (implicit screen: Scurses) extends Widget(parent) {

  val colors = {
    val width = image.getWidth
    val height = image.getHeight
    for (x <- 0 until width)
      yield for (y <- 0 until height / 2) yield {
        // Read two rows at a time
        val upper = Colors.fromRGBInt(image.getRGB(x, y * 2))
        val lower = if (height % 2 == 1) -1 else Colors.fromRGBInt(image.getRGB(x, y * 2 + 1))
        (upper, lower)
      }
  }

  override def redraw(focus: Boolean, theme: ColorScheme): Unit = {
    val width = image.getWidth min innerWidth
    val x0 = (innerWidth - width) / 2
    for (x <- 0 until width) {
      for (y <- 0 until innerHeight) {
        // Read two rows at a time
        val c = colors(x)(y)
        screen.put(x0 + x, y, Symbols.BLOCK_UPPER, c._1, c._2)
      }
    }
  }

  override def handleKeypress(keypress: Int): Unit = { }

  override def focusable: Boolean = false
  override def innerHeight: Int = image.getHeight / 2
} 
Example 135
Source File: SarkPredictorEngineSpec.scala    From elasticsearch-prediction-spark   with Apache License 2.0 5 votes vote down vote up
package com.sdhu.elasticsearchprediction.spark
package test

import com.mahisoft.elasticsearchprediction._
import utils.DataProperties
import plugin.domain.IndexValue
import plugin.exception.PredictionException
import plugin.engine.PredictorEngine

import org.apache.spark._
import rdd.RDD
import mllib.regression._
import mllib.classification._

import org.scalatest._
import com.holdenkarau.spark.testing._
import java.io.File
import java.util.Collection

import scala.collection.JavaConversions._


class SparkPredictorEngineSpec extends FlatSpec with MustMatchers {
  val pconf = getClass.getResource("/prop1.conf").getPath
  val dataP = getClass.getResource("/mini.csv").toURI.toString
  val dp = new DataProperties(pconf)
  val modelP = getClass.getResource("/spark-clf-test.model").getPath 
  val clf_type = "spark.logistic-regression"

  "Predictor Engine" should "throw empty model exception" in {
    val eng = new SparkPredictorEngine(modelP, SVM_Helper)
    evaluating {eng.getPrediction(List[IndexValue]())} must produce [PredictionException]
  }

//  "Spark_PredictorEngine" should "return sparkPredictorEngine of svm type" in {
//    val speng = new Spark_PredictorEngine(modelP, "spark.svm")
//    speng.getSparkPredictorEngine mustBe a [SparkPredictorEngine[_]]
//    
//  }

  it should "return a generic PredictorEngine" in {
    val speng = new Spark_PredictorEngine(modelP, "spark.svm")
    speng.getPredictorEngine mustBe a [PredictorEngine]
  }

  it should "load the classifier" in {
    val speng = new Spark_PredictorEngine(modelP, clf_type)
    val eng = speng.getSparkPredictorEngine
    val m = eng.getModel
    val cm = m.categoriesMap.getOrElse(Map[String, Double]())

    m.clf must not be empty
    //m.numClasses must be(Some(2))
    //m.binThreshold must be(Some(0.5))
    cm.keys must contain allOf("Female", "Male", "United-States", "China")
  }

  it should "evaluate values" in { 
    val speng = new Spark_PredictorEngine(modelP, clf_type)
    val eng = speng.getSparkPredictorEngine

    val p0 = Array("50", "Self-emp-not-inc", "Male", "0", "0", "United-States")
    val cindv = ReadUtil.arr2CIndVal(p0)
    
    val check = eng.getPrediction(cindv) 
    
    check must equal(0.0)
    check mustBe a [java.lang.Double]
  }
  
  it should "evaluate values using generic Predictor engine" in { 
    val speng = new Spark_PredictorEngine(modelP, clf_type)
    val eng = speng.getPredictorEngine

    val p0 = Array("50", "Self-emp-not-inc", "Male", "0", "0", "United-States")
    val cindv = ReadUtil.arr2CIndVal(p0)
    
    val check = eng.getPrediction(cindv) 
    
    check must equal(0.0)
    check mustBe a [java.lang.Double]
  }
} 
Example 136
Source File: ApplicationWithProcess.scala    From aloha   with Apache License 2.0 5 votes vote down vote up
package me.jrwang.aloha.app

import java.io.File
import java.nio.charset.StandardCharsets

import scala.collection.JavaConverters._
import scala.concurrent.Promise

import com.google.common.io.Files
import me.jrwang.aloha.common.Logging
import me.jrwang.aloha.common.util.{FileAppender, Utils}

abstract class ApplicationWithProcess extends AbstractApplication with Logging {
  private var process: Process = _
  private var stdoutAppender: FileAppender = _
  private var stderrAppender: FileAppender = _

  // Timeout to wait for when trying to terminate an app.
  private val APP_TERMINATE_TIMEOUT_MS = 10 * 1000

  def getProcessBuilder(): ProcessBuilder

  private var stateMonitorThread: Thread = _

  override def start(): Promise[ExitState] = {
    val processBuilder = getProcessBuilder()
    val command = processBuilder.command()
    val formattedCommand = command.asScala.mkString("\"", "\" \"", "\"")
    logInfo(s"Launch command: $formattedCommand")
    processBuilder.directory(appDir)

    process = processBuilder.start()
    // Redirect its stdout and stderr to files
    val stdout = new File(appDir, "stdout")
    stdoutAppender = FileAppender(process.getInputStream, stdout, alohaConf)

    val header = "Aloha Application Command: %s\n%s\n\n".format(
      formattedCommand, "=" * 40)
    val stderr = new File(appDir, "stderr")
    Files.write(header, stderr, StandardCharsets.UTF_8)
    stderrAppender = FileAppender(process.getErrorStream, stderr, alohaConf)

    stateMonitorThread = new Thread("app-state-monitor-thread") {
      override def run(): Unit = {
        val exitCode = process.waitFor()
        if(exitCode == 0) {
          result.success(ExitState(ExitCode.SUCCESS, Some("success")))
        } else {
          result.success(ExitState(ExitCode.FAILED, Some("failed")))
        }
      }
    }
    stateMonitorThread.start()
    result
  }

  override def shutdown(reason: Option[String]): Unit = {
    if (process != null) {
      logInfo("Killing process!")
      if (stdoutAppender != null) {
        stdoutAppender.stop()
      }
      if (stderrAppender != null) {
        stderrAppender.stop()
      }
      val exitCode = Utils.terminateProcess(process, APP_TERMINATE_TIMEOUT_MS)
      if (exitCode.isEmpty) {
        logWarning("Failed to terminate process: " + process +
          ". This process will likely be orphaned.")
      }
    }
  }
} 
Example 137
Source File: AbstractApplication.scala    From aloha   with Apache License 2.0 5 votes vote down vote up
package me.jrwang.aloha.app

import java.io.File

import scala.concurrent.Promise

import me.jrwang.aloha.common.AlohaConf

abstract class AbstractApplication extends Application {
  protected val result: Promise[ExitState] = Promise()

  protected var appDesc: ApplicationDescription = _
  protected var appDir: File = _
  protected var alohaConf: AlohaConf = _

  override def withDescription(desc: ApplicationDescription): Application = {
    this.appDesc = desc
    this
  }

  override def withApplicationDir(appDir: File): Application = {
    this.appDir = appDir
    this
  }

  override def withAlohaConf(conf: AlohaConf): Application = {
    this.alohaConf = conf
    this
  }

} 
Example 138
Source File: Application.scala    From aloha   with Apache License 2.0 5 votes vote down vote up
package me.jrwang.aloha.app

import java.io.File

import scala.concurrent.Promise

import me.jrwang.aloha.common.{AlohaConf, AlohaException, Logging}
import me.jrwang.aloha.scheduler.AlohaUserCodeClassLoaders


trait Application {

  def start(): Promise[ExitState]

  def shutdown(reason: Option[String]): Unit

  def withDescription(desc: ApplicationDescription): Application

  def withApplicationDir(appDir: File): Application

  def withAlohaConf(conf: AlohaConf): Application

  def clean(): Unit
}

object Application extends Logging {
  def create(appDesc: ApplicationDescription): Application = {
    //TODO we should download dependencies and resource files
    logInfo(s"Create module for [$appDesc]")
    val fullClassName = appDesc.entryPoint
    try {
      val urls = appDesc.libs.map(new File(_)).filter(_.exists())
        .flatMap(_.listFiles().filter(_.isFile)).map(_.toURI.toURL)
      val classLoader = AlohaUserCodeClassLoaders.childFirst(urls)
      Thread.currentThread().setContextClassLoader(classLoader)
      val klass = classLoader.loadClass(fullClassName)
      require(classOf[Application].isAssignableFrom(klass),
        s"$fullClassName is not a subclass of ${classOf[Application].getName}.")
      klass.getConstructor().newInstance().asInstanceOf[Application].withDescription(appDesc)
    } catch {
      case _: NoSuchMethodException =>
        throw new AlohaException(
          s"$fullClassName did not have a zero-argument constructor." +
            s"Note: if the class is defined inside of another Scala class, then its constructors " +
            s"may accept an implicit parameter that references the enclosing class; in this case, " +
            s"you must define the class as a top-level class in order to prevent this extra" +
            " parameter from breaking Atom's ability to find a valid constructor.")
      case e: Throwable =>
        throw e
    }
  }
} 
Example 139
Source File: AppRunner.scala    From aloha   with Apache License 2.0 5 votes vote down vote up
package me.jrwang.aloha.scheduler.worker

import java.io.File

import scala.concurrent.Await
import scala.concurrent.duration.Duration

import me.jrwang.aloha.app.{Application, ApplicationDescription, ApplicationState, ExitCode}
import me.jrwang.aloha.common.{AlohaConf, Logging}
import me.jrwang.aloha.rpc.RpcEndpointRef
import me.jrwang.aloha.scheduler.ApplicationStateChanged

class AppRunner(
    val conf: AlohaConf,
    val appId: String,
    val appDesc: ApplicationDescription,
    val worker: RpcEndpointRef,
    val workerId: String,
    val host: String,
    val appDir: File,
    @volatile var state: ApplicationState.Value)
  extends Logging {
  private var workerThread: Thread = null

  private[worker] def start() {
    workerThread = new Thread(s"ApplicationRunner for $appId") {
      override def run() {
        fetchAndRunApplication()
      }
    }
    workerThread.start()
  }

  // Stop this application runner
  private[worker] def kill() {
    if (workerThread != null) {
      // the workerThread will kill the application when interrupted
      workerThread.interrupt()
      workerThread = null
      state = ApplicationState.KILLED
    }
  }

  
  private def fetchAndRunApplication() {
    var app: Application = null
    try {
      app = Application.create(appDesc).withApplicationDir(appDir).withAlohaConf(conf)
      val exitStatePromise = app.start()
      state = ApplicationState.RUNNING
      worker.send(ApplicationStateChanged(appId, ApplicationState.RUNNING, None))
      val exitState = Await.result(exitStatePromise.future, Duration.Inf)
      if(exitState.code == ExitCode.FAILED) {
        worker.send(ApplicationStateChanged(appId, ApplicationState.FAILED, exitState.msg, None))
      } else {
        worker.send(ApplicationStateChanged(appId, ApplicationState.FINISHED, exitState.msg, None))
      }
    } catch {
      case _: InterruptedException =>
        logInfo(s"Runner thread for application $appId interrupted")
        state = ApplicationState.KILLED
        killApp(app, Some("User request to kill app."))
        worker.send(ApplicationStateChanged(appId, ApplicationState.KILLED, Some("User request to kill app.")))
      case e: Exception =>
        logError("Error running executor", e)
        state = ApplicationState.FAILED
        killApp(app, Some(e.toString))
        worker.send(ApplicationStateChanged(appId, ApplicationState.FAILED, Some(e.toString), Some(e)))
    } finally {
      if(app != null) {
        app.clean()
      }
    }
  }

  private def killApp(app: Application, reason: Option[String]) = {
    if(app != null) {
      try {
        app.shutdown(reason)
      } catch {
        case e: Throwable =>
          logError(s"Error while killing app $appDesc.", e)
      }
    }
  }

} 
Example 140
Source File: ImageReader.scala    From scala-deeplearn-examples   with Apache License 2.0 5 votes vote down vote up
package io.brunk.examples

import java.io.{File, FileFilter}
import java.lang.Math.toIntExact

import org.datavec.api.io.filters.BalancedPathFilter
import org.datavec.api.io.labels.ParentPathLabelGenerator
import org.datavec.api.split.{FileSplit, InputSplit}
import org.datavec.image.loader.BaseImageLoader
import org.datavec.image.recordreader.ImageRecordReader
import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator
import org.deeplearning4j.datasets.iterator.MultipleEpochsIterator
import org.deeplearning4j.eval.Evaluation
import org.nd4j.linalg.dataset.api.iterator.DataSetIterator
import org.nd4j.linalg.dataset.api.preprocessor.ImagePreProcessingScaler

import scala.collection.JavaConverters._


object ImageReader {

  val channels = 3
  val height = 150
  val width = 150

  val batchSize = 50
  val numClasses = 2
  val epochs = 100
  val splitTrainTest = 0.8

  val random = new java.util.Random()

  def createImageIterator(path: String): (MultipleEpochsIterator, DataSetIterator) = {
    val baseDir = new File(path)
    val labelGenerator = new ParentPathLabelGenerator
    val fileSplit = new FileSplit(baseDir, BaseImageLoader.ALLOWED_FORMATS, random)

    val numExamples = toIntExact(fileSplit.length)
    val numLabels = fileSplit.getRootDir.listFiles(new FileFilter {
      override def accept(pathname: File): Boolean = pathname.isDirectory
    }).length

    val pathFilter = new BalancedPathFilter(random, labelGenerator, numExamples, numLabels, batchSize)

    //val inputSplit = fileSplit.sample(pathFilter, splitTrainTest, 1 - splitTrainTest)
    val inputSplit = fileSplit.sample(pathFilter, 70, 30)

    val trainData = inputSplit(0)
    val validationData = inputSplit(1)

    val recordReader = new ImageRecordReader(height, width, channels, labelGenerator)
    val scaler = new ImagePreProcessingScaler(0, 1)

    recordReader.initialize(trainData, null)
    val dataIter = new RecordReaderDataSetIterator(recordReader, batchSize, 1, numClasses)
    scaler.fit(dataIter)
    dataIter.setPreProcessor(scaler)
    val trainIter = new MultipleEpochsIterator(epochs, dataIter)

    val valRecordReader = new ImageRecordReader(height, width, channels, labelGenerator)
    valRecordReader.initialize(validationData, null)
    val validationIter = new RecordReaderDataSetIterator(valRecordReader, batchSize, 1, numClasses)
    scaler.fit(validationIter)
    validationIter.setPreProcessor(scaler)

    (trainIter, validationIter)
  }

} 
Example 141
Source File: IrisReader.scala    From scala-deeplearn-examples   with Apache License 2.0 5 votes vote down vote up
package io.brunk.examples

import java.io.File

import org.datavec.api.records.reader.impl.csv.CSVRecordReader
import org.datavec.api.split.FileSplit
import org.deeplearning4j.datasets.datavec.RecordReaderDataSetIterator
import org.nd4j.linalg.dataset.SplitTestAndTrain
import org.nd4j.linalg.dataset.api.preprocessor.NormalizerStandardize

object IrisReader {
  val numLinesToSkip = 1

  val batchSize  = 150
  val labelIndex = 4
  val numLabels  = 3

  val seed = 1

  def readData(): SplitTestAndTrain = {
    val recordReader = new CSVRecordReader(numLinesToSkip, ',')
    recordReader.initialize(new FileSplit(new File("data/iris.csv")))
    val iterator = new RecordReaderDataSetIterator(recordReader, batchSize, labelIndex, numLabels)
    val dataSet  = iterator.next() // read all data in a single batch
    dataSet.shuffle(seed)
    val testAndTrain = dataSet.splitTestAndTrain(0.67)
    val train        = testAndTrain.getTrain
    val test         = testAndTrain.getTest

//    val normalizer = new NormalizerStandardize
//    normalizer.fit(train)
//    normalizer.transform(train) // normalize training data
//    normalizer.transform(test)  // normalize test data
    testAndTrain
  }
} 
Example 142
Source File: CorpusReader.scala    From ai.vitk.ner   with GNU General Public License v3.0 5 votes vote down vote up
package ai.vitk.ner

import java.io.{File, InputStream}

import org.slf4j.LoggerFactory

import scala.collection.mutable.ListBuffer
import scala.io.Source

object CorpusReader {
  val logger = LoggerFactory.getLogger(CorpusReader.getClass)

  
  def readVLSPTest1(resourcePath: String): List[Sentence] = {
    // read lines of the file and remove lines which contains "<s>"
    val stream = getClass.getResourceAsStream(resourcePath)
    val lines = Source.fromInputStream(stream).getLines().toList.filter {
      line => line.trim != "<s>"
    }
    val sentences = new ListBuffer[Sentence]()
    var tokens = new ListBuffer[Token]()
    for (i <- (0 until lines.length)) {
      val line = lines(i).trim
      if (line == "</s>") {
        if (!tokens.isEmpty) sentences.append(Sentence(tokens))
        tokens = new ListBuffer[Token]()
      } else {
        val parts = line.split("\\s+")
        if (parts.length < 3) 
          logger.error("Invalid line = " + line) 
        else 
          tokens.append(Token(parts(0), Map(Label.PartOfSpeech -> parts(1), Label.Chunk -> parts(2))))
      }
    }
    logger.info(resourcePath + ", number of sentences = " + sentences.length)
    sentences.toList
  }
  
  def readVLSPTest2(dir: String): List[Sentence] = {
    def getListOfFiles: List[File] = {
      val d = new File(dir)
      if (d.exists && d.isDirectory) {
        d.listFiles.filter(_.isFile).toList
      } else {
        List[File]()
      }
    }
    val files = getListOfFiles
    logger.info("Number of test files = " + files.length)
    files.flatMap {
      file  => {
        val x = file.getAbsolutePath
        val resourcePath = x.substring(x.indexOf("/ner"))
        readVLSPTest1(resourcePath)
      } 
    }
  }
  
  
  def main(args: Array[String]): Unit = {
    val path = "/ner/vi/train.txt"
    val sentences = readCoNLL(path)
    logger.info("Number of sentences = " + sentences.length)
    sentences.take(10).foreach(s => logger.info(s.toString))
    sentences.takeRight(10).foreach(s => logger.info(s.toString))
  }
} 
Example 143
Source File: Releaser.scala    From releaser   with Apache License 2.0 5 votes vote down vote up
package uk.gov.hmrc.releaser

import java.io.File
import java.nio.file.{Files, Path}

import org.apache.commons.io.FileUtils
import uk.gov.hmrc.releaser.bintray.{BintrayHttp, BintrayRepoConnector, DefaultBintrayRepoConnector}
import uk.gov.hmrc.releaser.github.{GithubConnector, Repo}
import uk.gov.hmrc.{CredentialsFinder, FileDownloader, Logger}

import scala.util.{Failure, Success, Try}

object ReleaserMain {
  def main(args: Array[String]): Unit = {
    val result = Releaser(args)
    System.exit(result)
  }
}

object Releaser extends Logger {

  import ArgParser._

  def apply(args: Array[String]): Int = {
    parser.parse(args, Config()) match {
      case Some(config) =>
        val githubName = config.githubNameOverride.getOrElse(config.artefactName)
        run(config.artefactName, ReleaseCandidateVersion(config.rcVersion), config.releaseType, githubName, config.releaseNotes, config.dryRun)
      case None => -1
    }
  }

  def run(artefactName: String, rcVersion: ReleaseCandidateVersion, releaseType: ReleaseType.Value, gitHubName: String, releaseNotes: Option[String], dryRun: Boolean = false): Int = {
    val githubCredsFile = System.getProperty("user.home") + "/.github/.credentials"
    val bintrayCredsFile = System.getProperty("user.home") + "/.bintray/.credentials"

    val githubCredsOpt = CredentialsFinder.findGithubCredsInFile(new File(githubCredsFile).toPath)
    val bintrayCredsOpt = CredentialsFinder.findBintrayCredsInFile(new File(bintrayCredsFile).toPath)

    doReleaseWithCleanup { directories =>
      if (githubCredsOpt.isEmpty) {
        log.info(s"Didn't find github credentials in $githubCredsFile")
        -1
      } else if (bintrayCredsOpt.isEmpty) {
        log.info(s"Didn't find Bintray credentials in $bintrayCredsFile")
        -1
      } else {

        val releaserVersion = getClass.getPackage.getImplementationVersion
        val metaDataProvider = new ArtefactMetaDataProvider()
        val gitHubDetails = if (dryRun) GithubConnector.dryRun(githubCredsOpt.get, releaserVersion) else GithubConnector(githubCredsOpt.get, releaserVersion)
        val bintrayDetails = if (dryRun) BintrayRepoConnector.dryRun(bintrayCredsOpt.get, directories.workDir) else BintrayRepoConnector(bintrayCredsOpt.get, directories.workDir)
        val bintrayRepoConnector = new DefaultBintrayRepoConnector(directories.workDir, new BintrayHttp(bintrayCredsOpt.get), new FileDownloader)

        val coordinator = new Coordinator(directories.stageDir, metaDataProvider, gitHubDetails, bintrayRepoConnector)
        val result = coordinator.start(artefactName, Repo(gitHubName), rcVersion, releaseType, releaseNotes)

        result match {
          case Success(targetVersion) =>
            log.info(s"Releaser successfully released $artefactName $targetVersion")
            0
          case Failure(e) =>
            e.printStackTrace()
            log.info(s"Releaser failed to release $artefactName $rcVersion with error '${e.getMessage}'")
            1
        }
      }
    }
  }

  def doReleaseWithCleanup[T](f: ReleaseDirectories => T): T = {
    val directories = ReleaseDirectories()
    try {
      f(directories)
    } finally {
      log.info("cleaning releaser work directory")
      directories.delete().recover{case  t => log.warn(s"failed to delete releaser work directory ${t.getMessage}")}
    }

  }
}

case class ReleaseDirectories(tmpDirectory: Path = Files.createTempDirectory("releaser")) {

  lazy val workDir = Files.createDirectories(tmpDirectory.resolve("work"))
  lazy val stageDir = Files.createDirectories(tmpDirectory.resolve("stage"))

  def delete() = Try {
    FileUtils.forceDelete(tmpDirectory.toFile)
  }
} 
Example 144
Source File: package.scala    From theGardener   with Apache License 2.0 5 votes vote down vote up
import java.io.File

import play.api.Logging

import scala.concurrent._
import scala.util.control.NonFatal
import scala.util.{Failure, Try}

package object utils extends Logging {

  implicit class TryOps[T](t: Try[T]) {
    def logError(msg: => String): Try[T] = t.recoverWith {
      case e =>
        logger.error(msg, e)
        Failure(e)
    }
  }

  implicit class FutureOps[T](f: Future[T]) {
    def logError(msg: => String)(implicit ec: ExecutionContext): Future[T] = f.recoverWith {
      case NonFatal(e) => logger.error(msg, e)
        Future.failed(e)
    }
  }

  implicit class PathExt(path: String) {
    def fixPathSeparator: String = path.replace('/', File.separatorChar)
  }

} 
Example 145
Source File: PageController.scala    From theGardener   with Apache License 2.0 5 votes vote down vote up
package controllers

import java.io.File

import com.github.ghik.silencer.silent
import controllers.AssetAccessError.{AssetNotAllowed, AssetNotFound}
import controllers.dto._
import io.swagger.annotations._
import javax.inject.Inject
import play.api.Configuration
import play.api.libs.json.Json
import play.api.mvc._
import repositories._
import services._

import scala.concurrent.ExecutionContext

@silent("Interpolated")
@silent("missing interpolator")
@Api(value = "PageController", produces = "application/json")
class PageController @Inject()(pageService: PageService)(implicit ec: ExecutionContext) extends InjectedController {

  @ApiOperation(value = "Get pages from path", response = classOf[PageDTO], responseContainer = "list")
  @ApiResponses(Array(new ApiResponse(code = 404, message = "Page not found")))
  def getPageFromPath(path: String): Action[AnyContent] = Action.async {
    pageService.computePageFromPath(path).map {
      case Some(pageDto) => Ok(Json.toJson(Seq(pageDto)))
      case None => NotFound(s"No Page $path")
    }
  }

}

sealed abstract class AssetAccessError(message: String) extends Throwable(message)


object AssetAccessError {

  case class AssetNotAllowed(message: String) extends AssetAccessError(message)

  case class AssetNotFound(message: String) extends AssetAccessError(message)

}

class PageAssetController @Inject()(config: Configuration, projectRepository: ProjectRepository)(implicit ec: ExecutionContext) extends InjectedController {

  val projectsRootDirectory = config.get[String]("projects.root.directory")

  def getImageFromPath(path: String): Action[AnyContent] = Action {
    val params = path.split(">")

    (for {
      projectId <- params.lift(0)
      branchName <- params.lift(1)
      relativePath <- params.lift(2)

      documentationRootPath <- projectRepository.findById(projectId).flatMap(_.documentationRootPath)
      assetFileAccess = accessToAsset(s"$projectsRootDirectory/$projectId/$branchName/$documentationRootPath", relativePath)
    } yield (relativePath, assetFileAccess)) match {

      case None => NotFound("Project not found or bad configuration")
      case Some((_, Left(AssetNotAllowed(message)))) => Forbidden(message)
      case Some((_, Left(AssetNotFound(message)))) => NotFound(message)
      case Some((_, Right(assetFile))) => Ok.sendFile(assetFile)
    }
  }

  def accessToAsset(documentationRootPath: String, assetRelativePath: String): Either[AssetAccessError, File] = {
    val assetFile = new File(s"$documentationRootPath/$assetRelativePath")
    val documentationCanonicalPath = new File(documentationRootPath).getCanonicalPath
    val assetCanonicalPath = assetFile.getCanonicalPath

    if (!assetCanonicalPath.contains(documentationCanonicalPath)) {
      Left(AssetNotAllowed(s"Asset $assetRelativePath not allowed"))
    } else if (!assetFile.exists()) {
      Left(AssetNotFound(s"Asset $assetRelativePath not found"))
    } else {
      Right(assetFile)
    }
  }

} 
Example 146
Source File: CustomConfigSystemReader.scala    From theGardener   with Apache License 2.0 5 votes vote down vote up
package utils

import java.io.File
import java.io.File.separator

import org.eclipse.jgit.lib.Config
import org.eclipse.jgit.storage.file.FileBasedConfig
import org.eclipse.jgit.util.{FS, SystemReader}

object CustomConfigSystemReader {

  def overrideSystemGitConfig(): Unit = {
    val userGitConfig = new File(s"target${separator}data${separator}gitconfig")
    SystemReader.setInstance(new CustomConfigSystemReader(userGitConfig))
  }
}

class CustomConfigSystemReader(userGitConfig: File) extends SystemReader {
  val proxy = SystemReader.getInstance()

  override def getHostname: String = proxy.getHostname
  override def getenv(variable: String): String = proxy.getenv(variable)
  override def getProperty(key: String): String = proxy.getProperty(key)

  override def getCurrentTime: Long = proxy.getCurrentTime
  override def getTimezone(when: Long): Int = proxy.getTimezone(when)

  override def openJGitConfig(parent: Config, fs: FS): FileBasedConfig = proxy.openJGitConfig(parent, fs)
  override def openUserConfig(parent: Config, fs: FS): FileBasedConfig = new FileBasedConfig(parent, userGitConfig, fs)
  override def openSystemConfig(parent: Config, fs: FS): FileBasedConfig = new FileBasedConfig(parent, null, fs) {
    override def load(): Unit = ()
    override def isOutdated: Boolean = false
  }

} 
Example 147
Source File: CaptchaHelper.scala    From ez-framework   with Apache License 2.0 5 votes vote down vote up
package com.ecfront.ez.framework.service.auth.helper

import java.io.{File, FileOutputStream}

import com.ecfront.ez.framework.core.logger.Logging
import com.github.cage.GCage


object CaptchaHelper extends Logging {

  def generate(text: String): File = {
    val temp = File.createTempFile("ez_captcha_", ".jpg")
    val os = new FileOutputStream(temp)
    try {
      temp.deleteOnExit()
      new GCage().draw(text, os)
      temp
    } catch {
      case e: Throwable =>
        logger.error("Generate captche error.", e)
        null
    } finally {
      os.close()
    }
  }
} 
Example 148
Source File: I18NProcessor.scala    From ez-framework   with Apache License 2.0 5 votes vote down vote up
package com.ecfront.ez.framework.core.i18n

import java.io.File
import java.util.regex.Pattern

import com.ecfront.common.Resp
import com.ecfront.ez.framework.core.EZ
import com.ecfront.ez.framework.core.logger.Logging

import scala.io.Source


  def setLanguage(_language: String): Unit = {
    EZ.Info.language = _language
  }

  private val tabR = "\t"

  def process(resp: Resp[_]): Unit = {
    if (resp.message != null && resp.message.nonEmpty) {
      resp.message = i18n(resp.message.replaceAll(tabR, " "))
    }
  }

  def i18n(str: String): String = {
    var newStr = str
    i18nInfo.find(_._1.matcher(str).matches()).foreach {
      matchedItem =>
        val matcher = matchedItem._1.matcher(str)
        newStr = matcher.replaceAll(matchedItem._2(EZ.Info.language))
    }
    newStr
  }

  implicit class Impl(val str: String) {
    def x: String = i18n(str)
  }

} 
Example 149
Source File: AttachmentService.scala    From BacklogMigration-Redmine   with MIT License 5 votes vote down vote up
package com.nulabinc.backlog.r2b.exporter.service

import java.io.{File, FileOutputStream}
import java.net.{HttpURLConnection, URL}
import java.nio.channels.Channels

import com.nulabinc.backlog.migration.common.utils.ControlUtil.using
import com.nulabinc.backlog.migration.common.utils.Logging

object AttachmentService extends Logging {
  private val MAX_REDIRECT_COUNT = 10

  def download(url: URL, file: File): Unit = {
    val redirected = followRedirect(url)

    doDownload(redirected, file)
  }

  private def doDownload(url: URL, file: File): Unit =
    try {
      val rbc = Channels.newChannel(url.openStream())
      val fos = new FileOutputStream(file)
      fos.getChannel.transferFrom(rbc, 0, java.lang.Long.MAX_VALUE)

      rbc.close()
      fos.close()
    } catch {
      case e: Throwable => logger.warn("Download attachment failed: " + e.getMessage)
    }

  private def followRedirect(url: URL, count: Int = 0): URL =
    url.openConnection match {
      case http: HttpURLConnection =>
        http.setRequestMethod("GET")
        http.connect()
        using(http) { connection =>
          connection.getResponseCode match {
            case 301 | 302 | 303 =>
              val newUrl = new URL(connection.getHeaderField("Location"))
              if (count < MAX_REDIRECT_COUNT) followRedirect(newUrl, count + 1) else newUrl
            case _ =>
              url
          }
        }
      case _ =>
        url
    }
} 
Example 150
Source File: IterateeMain.scala    From advanced-scala-code   with Apache License 2.0 5 votes vote down vote up
package iteratee

import scala.util.{Failure, Success}

object IterateeMain {
  def fileExample(): Unit = {
    import io.iteratee.monix.task._
    import java.io.File

    val wordsE = readLines(new File("license.txt")).flatMap { line =>
      enumIndexedSeq(line.split("\\W"))
    }
    val noEmptyLinesEE = filter[String](str => str.trim.length > 0)
    val toLowerEE = map[String, String](_.toLowerCase)
    val countWordsI = fold[String, Map[String, Int]](Map.empty) { (acc, next) =>
      acc.get(next) match {
        case None => acc + (next -> 1)
        case Some(num) => acc + (next -> (1 + num))
      }
    }
    val dataT = wordsE.through(noEmptyLinesEE).
      through(toLowerEE).into(countWordsI).map { dataMap =>
      dataMap.toList.sortWith( _._2 > _._2).take(5).map(_._1)
    }
    import monix.execution.Scheduler.Implicits.global
    dataT.runOnComplete {
      case Success(data) => println(data)
      case Failure(th) => th.printStackTrace()
    }

    
  }


  def main(args: Array[String]) {
    import io.iteratee.modules.id._

    // Just one Int
    val singleNumE = enumOne(42)
    val singleNumI = takeI[Int](1)
    val singleNumResult = singleNumE.into(singleNumI)
    println(singleNumResult)

    // Incrementing one Int
    val incrementNumEE = map[Int, Int](_ + 1)
    val incrementedNumResult = singleNumE.through(incrementNumEE).into(singleNumI)
    println(incrementedNumResult)

    // First 10 even numbers
    val naturalsE = iterate(1)(_ + 1)
    val moreThan100EE = filter[Int](_ >= 100)
    val evenFilterEE = filter[Int](_ % 2 == 0)
    val first10I = takeI[Int](10)
    println(naturalsE.through(moreThan100EE).through(evenFilterEE).into(first10I))

    {
      import io.iteratee.modules.eval._
      // Summing N first numbers
      val naturalsE = iterate(1)(_ + 1)
      val limit1kEE = take[Int](30000)
      val sumI = fold[Int, Int](0) { (acc, next) => acc + next }
      println(naturalsE.through(limit1kEE).into(sumI).value)
    }

    fileExample()

  }
} 
Example 151
Source File: TransformerBenchmark.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package com.truecar.mleap.spark.benchmark

import java.io.{FileInputStream, File}

import ml.bundle.fs.DirectoryBundle
import com.truecar.mleap.runtime.LocalLeapFrame
import com.truecar.mleap.runtime.transformer.Transformer
import com.truecar.mleap.serialization.ml.v1.MlJsonSerializer
import org.scalameter.api._
import org.scalameter.picklers.Implicits._
import spray.json._
import com.truecar.mleap.serialization.mleap.v1.MleapJsonSupport._


object TransformerBenchmark extends Bench.ForkedTime {
  lazy override val executor = {
    SeparateJvmsExecutor(
      Executor.Warmer.Zero,
      Aggregator.min[Double],
      new Measurer.Default)
  }

  val mlSerializer = MlJsonSerializer
  val classLoader = getClass.getClassLoader
  val regressionFile = new File("/tmp/transformer.ml")
  val frameFile = new File("/tmp/frame.json")

  val bundleReader = DirectoryBundle(regressionFile)
  val regression = mlSerializer.deserializeWithClass(bundleReader).asInstanceOf[Transformer]

  val lines = scala.io.Source.fromFile(frameFile).mkString
  val frame = lines.parseJson.convertTo[LocalLeapFrame]

  val ranges = for {
    size <- Gen.range("size")(1000, 10000, 1000)
  } yield 0 until size

  measure method "transform" in {
    using(ranges) in {
      size =>
        size.foreach {
          _ => regression.transform(frame)
        }
    }
  }
} 
Example 152
Source File: SparkTransformerBenchmark.scala    From mleap   with Apache License 2.0 5 votes vote down vote up
package com.truecar.mleap.spark.benchmark

import java.io.{FileInputStream, File}

import com.esotericsoftware.kryo.io.Input
import com.truecar.mleap.runtime.LocalLeapFrame
import com.truecar.mleap.spark.benchmark.util.SparkSerializer
import org.apache.spark.sql.{Row, SQLContext}
import org.apache.spark.{SparkContext, SparkConf}
import org.apache.spark.ml.Transformer
import org.scalameter.Bench
import scala.collection.JavaConverters._
import org.scalameter.api._
import org.scalameter.picklers.Implicits._
import org.apache.log4j.Logger
import org.apache.log4j.Level
import com.truecar.mleap.spark.MleapSparkSupport._
import spray.json._
import com.truecar.mleap.serialization.mleap.v1.MleapJsonSupport._


object SparkTransformerBenchmark extends Bench.ForkedTime {
  lazy override val executor = {
    SeparateJvmsExecutor(
      Executor.Warmer.Zero,
      Aggregator.min[Double],
      new Measurer.Default)
  }

  val classLoader = getClass.getClassLoader
  val regressionFile = new File("/tmp/spark.transformer.kryo")
  val frameFile = new File("/tmp/frame.json")

  val inputStream = new FileInputStream(regressionFile)
  val input = new Input(inputStream)

  val regression: Transformer = SparkSerializer().read(input)
  val lines = scala.io.Source.fromFile(frameFile).mkString
  val frame = lines.parseJson.convertTo[LocalLeapFrame]

  Logger.getLogger("org").setLevel(Level.OFF)
  Logger.getLogger("akka").setLevel(Level.OFF)

  val sparkConf = new SparkConf()
    .setAppName("Spark Transformer Benchmark")
    .setMaster("local[1]")
  val sc = new SparkContext(sparkConf)
  val sqlContext = new SQLContext(sc)

  val rdd = frame.dataset.data.map(a => Row(a.toSeq: _*)).toList.asJava
  val schema = frame.schema.toSpark
  val sparkFrame = sqlContext.createDataFrame(rdd, schema)

  val ranges = for {
    size <- Gen.range("size")(1000, 10000, 1000)
  } yield 0 until size

  measure method "transform" in {
    using(ranges) in {
      size =>
        size.foreach {
          _ => regression.transform(sparkFrame).head
        }
    }
  }

//  sc.stop()
} 
Example 153
Source File: AWTSystemProvider.scala    From scala-game-library   with MIT License 5 votes vote down vote up
package sgl
package awt

import sgl.util._

import java.net.URI
import java.awt.Desktop
import java.io.File

import scala.concurrent.ExecutionContext

trait AWTSystemProvider extends SystemProvider with PartsResourcePathProvider {

  object AWT5System extends System {

    override def exit(): Unit = {
      sys.exit()
    }

    override def currentTimeMillis: Long = java.lang.System.currentTimeMillis
    override def nanoTime: Long = java.lang.System.nanoTime

    override def loadText(path: ResourcePath): Loader[Array[String]] = {
      FutureLoader {
        val localAsset = if(DynamicResourcesEnabled) findDynamicResource(path) else None
        val is = localAsset.map(a => new java.io.FileInputStream(a)).getOrElse(getClass.getClassLoader.getResourceAsStream(path.path))
        if(is == null) {
          throw new ResourceNotFoundException(path)
        }
        scala.io.Source.fromInputStream(is).getLines.toArray
      }
    }

    override def loadBinary(path: ResourcePath): Loader[Array[Byte]] = {
      FutureLoader {
        val localAsset = if(DynamicResourcesEnabled) findDynamicResource(path) else None
        val is = localAsset.map(a => new java.io.FileInputStream(a)).getOrElse(getClass.getClassLoader.getResourceAsStream(path.path))
        if(is == null) {
          throw new ResourceNotFoundException(path)
        }
        val bis = new java.io.BufferedInputStream(is)
        val bytes = new scala.collection.mutable.ListBuffer[Byte]
        var b: Int = 0
        while({ b = bis.read; b != -1}) {
          bytes.append(b.toByte)
        }
        bytes.toArray
      }
    }

    override def openWebpage(uri: URI): Unit = {
      val desktop = if(Desktop.isDesktopSupported()) Desktop.getDesktop() else null
      if(desktop != null && desktop.isSupported(Desktop.Action.BROWSE)) {
        try {
          desktop.browse(uri);
        } catch {
          case (e: Exception) =>
            e.printStackTrace()
        }
      }
    }

  }
  val System = AWT5System

  override val ResourcesRoot = PartsResourcePath(Vector())
  override val MultiDPIResourcesRoot = PartsResourcePath(Vector())

  
  val DynamicResourcesEnabled: Boolean = false
  // TODO: provide a command line flag to control this as well, in particular to give
  //       the asset directory.
 
  def findDynamicResource(path: ResourcePath): Option[File] = {
    def findFromDir(d: File): Option[File] = {
      val asset = new File(d.getAbsolutePath + "/assets/" + path.path)
      if(asset.exists) Some(asset) else None
    }
    def findFromWorkingDir: Option[File] = 
      findFromDir(new File(java.lang.System.getProperty("user.dir")))

    val protectionDomain = this.getClass.getProtectionDomain()
    val codeSource = protectionDomain.getCodeSource()
    if(codeSource == null)
      return findFromWorkingDir
    val jar = new File(codeSource.getLocation.toURI.getPath)
    if(!jar.exists)
      return findFromWorkingDir
    val parent = jar.getParentFile
    if(parent == null)
      return findFromWorkingDir

    findFromDir(parent).orElse(findFromWorkingDir)
  }

  //Centralize the execution context used for asynchronous tasks in the Desktop backend
  //Could be overriden at wiring time
  implicit val executionContext: ExecutionContext = ExecutionContext.Implicits.global
} 
Example 154
Source File: SparkFunSuite.scala    From spark-gbtlr   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark

// scalastyle:off
import java.io.File

import org.apache.spark.internal.Logging
import org.apache.spark.util.AccumulatorContext
import org.scalatest.{BeforeAndAfterAll, FunSuite, Outcome}


  final protected override def withFixture(test: NoArgTest): Outcome = {
    val testName = test.text
    val suiteName = this.getClass.getName
    val shortSuiteName = suiteName.replaceAll("org.apache.spark", "o.a.s")
    try {
      logInfo(s"\n\n===== TEST OUTPUT FOR $shortSuiteName: '$testName' =====\n")
      test()
    } finally {
      logInfo(s"\n\n===== FINISHED $shortSuiteName: '$testName' =====\n")
    }
  }

} 
Example 155
Source File: Config.scala    From zipkin-mesos-framework   with Apache License 2.0 5 votes vote down vote up
package net.elodina.mesos.zipkin

import java.io.{File, FileInputStream}
import java.net.URI
import java.util.Properties

import net.elodina.mesos.zipkin.utils.{BindAddress, Period}

object Config {
  val DEFAULT_FILE = new File("zipkin-mesos.properties")

  var debug: Boolean = false
  var genTraces: Boolean = false
  var storage: String = "file:zipkin-mesos.json"

  var master: Option[String] = None
  var principal: Option[String] = None
  var secret: Option[String] = None
  var user: Option[String] = None

  var frameworkName: String = "zipkin"
  var frameworkRole: String = "*"
  var frameworkTimeout: Period = new Period("30d")

  var log: Option[File] = None
  var api: Option[String] = None
  var bindAddress: Option[BindAddress] = None

  def apiPort: Int = {
    val port = new URI(getApi).getPort
    if (port == -1) 80 else port
  }

  def replaceApiPort(port: Int): Unit = {
    val prev: URI = new URI(getApi)
    api = Some("" + new URI(
      prev.getScheme, prev.getUserInfo,
      prev.getHost, port,
      prev.getPath, prev.getQuery, prev.getFragment
    ))
  }

  def getApi: String = {
    api.getOrElse(throw new Error("api not initialized"))
  }

  def getMaster: String = {
    master.getOrElse(throw new Error("master not initialized"))
  }

  def getZk: String = {
    master.getOrElse(throw new Error("zookeeper not initialized"))
  }

  private[zipkin] def loadFromFile(file: File): Unit = {
    val props: Properties = new Properties()
    val stream: FileInputStream = new FileInputStream(file)

    props.load(stream)
    stream.close()

    if (props.containsKey("debug")) debug = java.lang.Boolean.valueOf(props.getProperty("debug"))
    if (props.containsKey("genTraces")) genTraces = java.lang.Boolean.valueOf(props.getProperty("genTraces"))
    if (props.containsKey("storage")) storage = props.getProperty("storage")

    if (props.containsKey("master")) master = Some(props.getProperty("master"))
    if (props.containsKey("user")) user = Some(props.getProperty("user"))
    if (props.containsKey("principal")) principal = Some(props.getProperty("principal"))
    if (props.containsKey("secret")) secret = Some(props.getProperty("secret"))

    if (props.containsKey("framework-name")) frameworkName = props.getProperty("framework-name")
    if (props.containsKey("framework-role")) frameworkRole = props.getProperty("framework-role")
    if (props.containsKey("framework-timeout")) frameworkTimeout = new Period(props.getProperty("framework-timeout"))

    if (props.containsKey("log")) log = Some(new File(props.getProperty("log")))
    if (props.containsKey("api")) api = Some(props.getProperty("api"))
    if (props.containsKey("bind-address")) bindAddress = Some(new BindAddress(props.getProperty("bind-address")))
  }

  override def toString: String = {
    s"""
       |debug: $debug, storage: $storage
        |mesos: master=$master, user=${if (user.isEmpty || user.get.isEmpty) "<default>" else user}
        |principal=${principal.getOrElse("<none>")}, secret=${if (secret.isDefined) "*****" else "<none>"}
        |framework: name=$frameworkName, role=$frameworkRole, timeout=$frameworkTimeout
        |api: $api, bind-address: ${bindAddress.getOrElse("<all>")}, genTraces: $genTraces
    """.stripMargin.trim
  }
} 
Example 156
Source File: Storage.scala    From zipkin-mesos-framework   with Apache License 2.0 5 votes vote down vote up
package net.elodina.mesos.zipkin.storage

import java.io.{File, FileWriter}

import org.I0Itec.zkclient.ZkClient
import org.I0Itec.zkclient.exception.ZkNodeExistsException
import org.I0Itec.zkclient.serialize.ZkSerializer
import play.api.libs.json.{Json, Reads, Writes}

import scala.io.Source

trait Storage[T] {
  def save(value: T)(implicit writes: Writes[T])

  def load(implicit reads: Reads[T]): Option[T]
}

case class FileStorage[T](file: String) extends Storage[T] {
  override def save(value: T)(implicit writes: Writes[T]) {
    val writer = new FileWriter(file)
    try {
      writer.write(Json.stringify(Json.toJson(value)))
    } finally {
      writer.close()
    }
  }

  override def load(implicit reads: Reads[T]): Option[T] = {
    if (!new File(file).exists()) None
    else Json.parse(Source.fromFile(file).mkString).asOpt[T]
  }
}

case class ZkStorage[T](zk: String) extends Storage[T] {
  val (zkConnect, path) = zk.span(_ != '/')
  createChrootIfRequired()

  private def createChrootIfRequired() {
    if (path != "") {
      val client = zkClient
      try {
        client.createPersistent(path, true)
      }
      finally {
        client.close()
      }
    }
  }

  private def zkClient: ZkClient = new ZkClient(zkConnect, 30000, 30000, ZKStringSerializer)

  override def save(value: T)(implicit writes: Writes[T]) {
    val client = zkClient
    val json = Json.stringify(Json.toJson(value))
    try {
      client.createPersistent(path, json)
    }
    catch {
      case e: ZkNodeExistsException => client.writeData(path, json)
    }
    finally {
      client.close()
    }
  }

  override def load(implicit reads: Reads[T]): Option[T] = {
    val client = zkClient
    try {
      Option(client.readData(path, true).asInstanceOf[String]).flatMap(Json.parse(_).asOpt[T])
    }
    finally {
      client.close()
    }
  }
}

private object ZKStringSerializer extends ZkSerializer {
  def serialize(data: Object): Array[Byte] = data.asInstanceOf[String].getBytes("UTF-8")

  def deserialize(bytes: Array[Byte]): Object = {
    if (bytes == null) null
    else new String(bytes, "UTF-8")
  }
} 
Example 157
Source File: ZipkinComponentServer.scala    From zipkin-mesos-framework   with Apache License 2.0 5 votes vote down vote up
package net.elodina.mesos.zipkin.components

import java.io.File

import net.elodina.mesos.zipkin.http.HttpServer

import scala.sys.process.Process
import scala.sys.process.ProcessBuilder

class ZipkinComponentServer {

  var process: Process = null

  @volatile var shutdownInitiated = false

  def isStarted = Option(process).isDefined

  def start(taskConfig: TaskConfig, taskId: String) = {
    val jarMask = ZipkinComponent.getComponentFromTaskId(taskId) match {
      case "collector" => HttpServer.collectorMask
      case "query" => HttpServer.queryMask
      case "web" => HttpServer.webMask
      case _ => throw new IllegalArgumentException(s"Illegal component name found in task id: $taskId")
    }
    val distToLaunch = initJar(jarMask)
    process = configureProcess(taskConfig, distToLaunch).run()
    //TODO: consider logs redirect
  }

  def await(): Option[Int] = {
    if (isStarted) Some(process.exitValue()) else None
  }

  def acknowledgeShutdownStatus(): Boolean = {
    val oldStatus = shutdownInitiated
    if (shutdownInitiated) shutdownInitiated = false
    oldStatus
  }

  def stop(shutdownInitiated: Boolean) {
    if (isStarted) {
      this.shutdownInitiated = shutdownInitiated
      process.destroy()
    }
  }

  private def initJar(jarMask: String): File = {
    new File(".").listFiles().find(file => file.getName.matches(jarMask)) match {
      case None => throw new IllegalStateException("Corresponding jar not found")
      case Some(componentDist) => componentDist
    }
  }

  private def configureProcess(taskConfig: TaskConfig, distToLaunch: File): ProcessBuilder = {
    val configFileArg = taskConfig.configFile.map(Seq("-f", _))
    var command = Seq("java", "-jar", distToLaunch.getCanonicalPath)
    configFileArg.foreach(command ++= _)
    command ++= taskConfig.flags.map { case (k: String, v: String) => s"-$k=$v" }
    Process(command, Some(new File(".")), taskConfig.env.toList: _*)
  }
} 
Example 158
Source File: S3.scala    From teamcity-s3-plugin   with Apache License 2.0 5 votes vote down vote up
package com.gu.teamcity

import java.io.{InputStream, File}

import com.amazonaws.ClientConfiguration
import com.amazonaws.auth.{AWSCredentialsProviderChain, DefaultAWSCredentialsProviderChain}
import com.amazonaws.services.s3.AmazonS3Client
import com.amazonaws.services.s3.model.{ObjectMetadata, PutObjectRequest, CannedAccessControlList}
import com.amazonaws.services.s3.transfer.TransferManager
import jetbrains.buildServer.serverSide.SBuild

import scala.util.{Success, Try}

class S3(config: S3ConfigManager) {
  val credentialsProvider = {
    val provider = new AWSCredentialsProviderChain(config, new DefaultAWSCredentialsProviderChain())
    provider.setReuseLastProvider(false)
    provider
  }

  val transferManager = new TransferManager(
    new AmazonS3Client(credentialsProvider, new ClientConfiguration().withMaxErrorRetry(2))
  )

  def upload(bucket: String, build: SBuild, fileName: String, contents: InputStream, fileSize: Long): Try[Unit] =
    Try {
      val uploadDirectory = s"${S3Plugin.cleanFullName(build)}/${build.getBuildNumber}"
      val metadata = {
        val md = new ObjectMetadata()
        md.setContentLength(fileSize)
        md
      }
      val req = new PutObjectRequest(bucket, s"$uploadDirectory/$fileName", contents, metadata)
      req.withCannedAcl(CannedAccessControlList.BucketOwnerFullControl)
      val upload = transferManager.upload(req)
      upload.waitForUploadResult()
    }

  def upload(bucket: String, build: SBuild, fileName: String, file: File): Try[Unit] =
    Try {
      val uploadDirectory = s"${S3Plugin.cleanFullName(build)}/${build.getBuildNumber}"
      val req = new PutObjectRequest(bucket, s"$uploadDirectory/$fileName", file)
      req.withCannedAcl(CannedAccessControlList.BucketOwnerFullControl)
      val upload = transferManager.upload(req)
      upload.waitForUploadResult()
    }

} 
Example 159
Source File: S3ConfigManager.scala    From teamcity-s3-plugin   with Apache License 2.0 5 votes vote down vote up
package com.gu.teamcity

import java.io.{File, PrintWriter}

import com.amazonaws.auth.{BasicAWSCredentials, AWSCredentialsProvider, AWSCredentials}
import jetbrains.buildServer.serverSide.ServerPaths
import org.json4s._
import org.json4s.native.JsonMethods._
import org.json4s.native.Serialization
import org.json4s.native.Serialization._

case class S3Config(
  artifactBucket: Option[String], buildManifestBucket: Option[String], tagManifestBucket: Option[String],
  awsAccessKey: Option[String], awsSecretKey: Option[String]
)

class S3ConfigManager(paths: ServerPaths) extends AWSCredentialsProvider {
  implicit val formats = Serialization.formats(NoTypeHints)

  val configFile = new File(s"${paths.getConfigDir}/s3.json")

  private[teamcity] var config: Option[S3Config] = {
    if (configFile.exists()) {
      parse(configFile).extractOpt[S3Config]
    } else None
  }

  def artifactBucket: Option[String] = config.flatMap(_.artifactBucket)
  def buildManifestBucket: Option[String] = config.flatMap(_.buildManifestBucket)
  def tagManifestBucket: Option[String] = config.flatMap(_.tagManifestBucket)

  private[teamcity] def update(config: S3Config): Unit = {
    this.config = Some(if (config.awsSecretKey.isEmpty && config.awsAccessKey == this.config.flatMap(_.awsAccessKey)) {
      config.copy(awsSecretKey = this.config.flatMap(_.awsSecretKey))
    } else config)
  }

  def updateAndPersist(newConfig: S3Config): Unit = {
    synchronized {
      update(newConfig)
      val out = new PrintWriter(configFile, "UTF-8")
      try { writePretty(config, out) }
      finally { out.close }
    }
  }

  def details: Map[String, Option[String]] = Map(
    "artifactBucket" -> artifactBucket,
    "buildManifestBucket" -> buildManifestBucket,
    "tagManifestBucket" -> tagManifestBucket,
    "accessKey" -> config.flatMap(_.awsAccessKey)
  )

  override def getCredentials: AWSCredentials = (for {
    c <- config
    accessKey <- c.awsAccessKey
    secretKey <- c.awsSecretKey
  } yield new BasicAWSCredentials(accessKey, secretKey)).getOrElse(null) // Yes, this is sad

  override def refresh(): Unit = ()
}

object S3ConfigManager {
  val bucketElement = "bucket"
  val s3Element = "S3"
} 
Example 160
Source File: ArtifactUploader.scala    From teamcity-s3-plugin   with Apache License 2.0 5 votes vote down vote up
package com.gu.teamcity

import java.io.File
import java.util.Date

import jetbrains.buildServer.messages.{BuildMessage1, DefaultMessagesInfo, Status}
import jetbrains.buildServer.serverSide.{BuildServerAdapter, SRunningBuild}

import scala.util.control.NonFatal

class ArtifactUploader(config: S3ConfigManager, s3: S3) extends BuildServerAdapter {

  override def beforeBuildFinish(runningBuild: SRunningBuild) {
    def report(msg: String): Unit = {
      runningBuild.getBuildLog().message(msg,Status.NORMAL,new Date,DefaultMessagesInfo.MSG_TEXT,DefaultMessagesInfo.SOURCE_ID,null)
    }

    report("About to upload artifacts to S3")


    getAllFiles(runningBuild).foreach { case (name: String, artifact: File) =>
      config.artifactBucket match {
        case None => report("Target artifactBucket was not set")
        case Some(bucket) =>
          s3.upload(bucket, runningBuild, name, artifact).recover {
            case NonFatal(e) =>
              runningBuild.getBuildLog().message(s"Error uploading artifacts: ${e.getMessage}",
                  Status.ERROR,new Date,DefaultMessagesInfo.MSG_BUILD_FAILURE,DefaultMessagesInfo.SOURCE_ID,null)
          }
      }
    }

    report("Artifact S3 upload complete")
  }

  def getAllFiles(runningBuild: SRunningBuild): Seq[(String,File)] = {
    if (!runningBuild.isArtifactsExists) {
      Nil
    } else {
      ArtifactUploader.getChildren(runningBuild.getArtifactsDirectory)
    }
  }

  private def normalMessage(text: String) =
    new BuildMessage1(DefaultMessagesInfo.SOURCE_ID, DefaultMessagesInfo.MSG_TEXT, Status.NORMAL, new Date, text)
}

object ArtifactUploader {

  def getChildren(file: File, paths: Seq[String] = Nil, current: String = ""): Seq[(String, File)] = {
    file.listFiles.toSeq.flatMap {
      child =>
        if (child.isHidden) {
          Seq()
        } else {
          val newPath = current + child.getName
          if (child.isDirectory) {
            getChildren(child, paths, newPath + File.separator)
          } else {
            Seq((newPath, child))
          }
        }
    }
  }

} 
Example 161
Source File: Codegen.scala    From caliban   with Apache License 2.0 5 votes vote down vote up
package caliban.tools

import java.io.{ File, PrintWriter }

import caliban.parsing.adt.Document
import zio.{ Task, UIO }

object Codegen {
  def generate(
    arguments: Options,
    writer: (Document, String, Option[String], String) => String
  ): Task[Unit] = {
    val s           = ".*/scala/(.*)/(.*).scala".r.findFirstMatchIn(arguments.toPath)
    val packageName = arguments.packageName.orElse(s.map(_.group(1).split("/").mkString(".")))
    val objectName  = s.map(_.group(2)).getOrElse("Client")
    val effect      = arguments.effect.getOrElse("zio.UIO")
    val loader      = getSchemaLoader(arguments.schemaPath, arguments.headers)
    for {
      schema    <- loader.load
      code      = writer(schema, objectName, packageName, effect)
      formatted <- Formatter.format(code, arguments.fmtPath)
      _ <- Task(new PrintWriter(new File(arguments.toPath)))
            .bracket(q => UIO(q.close()), pw => Task(pw.println(formatted)))
    } yield ()
  }

  private def getSchemaLoader(path: String, schemaPathHeaders: Option[List[Options.Header]]): SchemaLoader =
    if (path.startsWith("http")) SchemaLoader.fromIntrospection(path, schemaPathHeaders)
    else SchemaLoader.fromFile(path)

} 
Example 162
Source File: CodeExampleImpl.scala    From slinky   with MIT License 5 votes vote down vote up
package slinky.docs

import java.io.File

import slinky.core.facade.ReactElement

import scala.io.Source
import scala.reflect.macros.blackbox

object CodeExampleImpl {
  def text(c: blackbox.Context)(exampleLocation: c.Expr[String]): c.Expr[ReactElement] = {
    import c.universe._
    val Literal(Constant(loc: String)) = exampleLocation.tree
    val inputFile = new File(s"docs/src/main/scala/${loc.split('.').mkString("/")}.scala")
    val enclosingPackage = loc.split('.').init.mkString(".")

    val fileContent = Source.fromFile(inputFile).mkString

    val innerCode = fileContent.split('\n')

    val textToDisplay = innerCode
      .map(_.replaceAllLiterally("//display:", ""))
      .filterNot(_.endsWith("//nodisplay"))
      .dropWhile(_.trim.isEmpty)
      .reverse.dropWhile(_.trim.isEmpty).reverse
      .mkString("\n")

    val codeToRun = innerCode.filter(_.startsWith("//run:")).map(_.replaceAllLiterally("//run:", "")).mkString("\n")

    c.Expr[ReactElement](
      q"""{
         import ${c.parse(enclosingPackage)}._

         _root_.slinky.docs.CodeExampleInternal(codeText = ${Literal(Constant(textToDisplay))}, demoElement = {${c.parse(codeToRun)}})
       }""")
  }
} 
Example 163
Source File: HttpSlippyTileReader.scala    From geotrellis-osm-elevation   with Apache License 2.0 5 votes vote down vote up
package geotrellis.osme.core

import geotrellis.vector._
import geotrellis.raster._
import geotrellis.raster.io.geotiff._
import geotrellis.spark._
import geotrellis.spark.io.s3._

import geotrellis.spark.io.slippy._
import geotrellis.util.Filesystem

import org.apache.commons.io.FileUtils
import org.apache.commons.io.filefilter._
import org.apache.commons.io.IOUtils._
import org.apache.spark._
import org.apache.spark.rdd._
import java.net._
import java.io.File

class HttpSlippyTileReader[T](pathTemplate: String)(fromBytes: (SpatialKey, Array[Byte]) => T) extends SlippyTileReader[T] {
    def getURL(template: String, z: Int, x: Int, y: Int) = 
        template.replace("{z}", z.toString).replace("{x}", x.toString).replace("{y}", y.toString)
    def getByteArray(url: String) = {
      val inStream = new URL(url).openStream()
      try {
        toByteArray(inStream)
      } finally {
        inStream.close()
      }
    }

    def read(zoom: Int)(implicit sc: SparkContext): RDD[(SpatialKey, T)] = ???
    def read(zoom: Int, key: SpatialKey): T = fromBytes(key, getByteArray(getURL(pathTemplate, zoom, key.col, key.row)))
    override def read(zoom: Int, x: Int, y: Int): T =
        read(zoom, SpatialKey(x, y))
} 
Example 164
Source File: ElevationOverlay.scala    From geotrellis-osm-elevation   with Apache License 2.0 5 votes vote down vote up
package geotrellis.osme.core

import java.io.{BufferedWriter, FileWriter, File}

import com.vividsolutions.jts.geom.{LineString, MultiLineString}
import geotrellis.raster.io.geotiff.SinglebandGeoTiff
import geotrellis.vector.io.json.{GeoJson, JsonFeatureCollection}
import scala.collection.immutable.Map
import spray.json._
import DefaultJsonProtocol._
import geotrellis.vector.io.json.FeatureFormats.writeFeatureJson
import geotrellis.vector.io.json.GeometryFormats._
import geotrellis.vector.densify.DensifyMethods
import geotrellis.vector.dissolve.DissolveMethods
import geotrellis.vector._



    val segmentsFeatures = segments.map { segment =>
       val center = segment.centroid match {
         case PointResult(p) => p
         case NoResult => throw new Exception("No result found in PointOrNoResult")
       }
       val (col, row) = rasterExtent.mapToGrid(center)
       val elevation = geotiff.tile.getDouble(col, row)
       val meanvMap: Map[String, Double] = Map("MEANV" -> elevation)
       LineFeature(segment, meanvMap)
     }

    return segmentsFeatures.toTraversable


  }
} 
Example 165
Source File: ElevationSpec.scala    From geotrellis-osm-elevation   with Apache License 2.0 5 votes vote down vote up
package geotrellis.osme.core

import java.io.{FileWriter, BufferedWriter, File}

import geotrellis.raster.io.geotiff.SinglebandGeoTiff
import geotrellis.vector.{Feature, Line, LineFeature}
import geotrellis.vector.io.json.GeoJson._
import spray.json.DefaultJsonProtocol._
import geotrellis.vector.io.json.{JsonFeatureCollection, GeoJson}
import spray.json.JsonReader
import scala.io.Source
import org.scalatest._


class ElevationSpec extends FunSpec with Matchers {

  def sharedData = {

    val geojson = Source.fromFile("data/imgn36w100vector.geojson").getLines.mkString
    val gjCol = parse[JsonFeatureCollection](geojson)

    new {
      val geotiff = SinglebandGeoTiff("data/imgn36w100_13_3_3.tif")
      val multiLine = gjCol.getAllLines().toMultiLine
      val elevationGeoJson = ElevationOverlay(geotiff, multiLine)
    }

  }

  describe("Core spec") {

    val numInputLines = sharedData.multiLine.lines.size
    val numOutputLines = sharedData.elevationGeoJson.size
    val ratio = numOutputLines / numInputLines
    println(s"Ratio of input lines to output lines: $ratio : 1")

    it("returned geojson should contain the MEANV property") {
      val elevationFeatures =  sharedData.elevationGeoJson
      val hasMeanV = elevationFeatures.forall(feat => feat.data.contains("MEANV"))
      assert(hasMeanV)
    }

    it("should produce a geojson file that can be put into geocolor.io") {
      val elevationFeatures =  sharedData.elevationGeoJson
      val jsonFeatures = JsonFeatureCollection(elevationFeatures)

      val file = new File("geocolor_test.json")
      val bw = new BufferedWriter(new FileWriter(file))
      bw.write(jsonFeatures.toJson.prettyPrint)
      bw.close()
    }

    it("Every feature should intersect the tile extent") {
      val elevationFeatures =  sharedData.elevationGeoJson
      val rasterPoly =  sharedData.geotiff.rasterExtent.extent.toPolygon()
      val doesIntersect = elevationFeatures.forall(feat => rasterPoly.intersects(feat.geom))
      assert(doesIntersect)
    }


  }
} 
Example 166
Source File: Logger.scala    From shapenet-viewer   with MIT License 5 votes vote down vote up
package edu.stanford.graphics.shapenet.util

import org.slf4j.LoggerFactory
import java.io.File

import org.slf4j.bridge.SLF4JBridgeHandler
import uk.org.lidalia.sysoutslf4j.context.SysOutOverSLF4J


                   additive: Boolean = false) = {
    import ch.qos.logback.classic.spi.ILoggingEvent
    import ch.qos.logback.classic.Level
    import ch.qos.logback.classic.LoggerContext
    import ch.qos.logback.classic.encoder.PatternLayoutEncoder
    import ch.qos.logback.core.FileAppender

    // Make sure log directory is created
    val file: File = new File(filename)
    val parent: File = file.getParentFile
    if (parent != null) parent.mkdirs

    val loggerContext = LoggerFactory.getILoggerFactory().asInstanceOf[LoggerContext]
    val logger = loggerContext.getLogger(loggerName)

    // Setup pattern
    val patternLayoutEncoder = new PatternLayoutEncoder()
    patternLayoutEncoder.setPattern(pattern)
    patternLayoutEncoder.setContext(loggerContext)
    patternLayoutEncoder.start()

    // Setup appender
    val fileAppender = new FileAppender[ILoggingEvent]()
    fileAppender.setFile(filename)
    fileAppender.setEncoder(patternLayoutEncoder)
    fileAppender.setContext(loggerContext)
    fileAppender.start()

    // Attach appender to logger
    logger.addAppender(fileAppender)
    //logger.setLevel(Level.DEBUG)
    logger.setAdditive(additive)

    fileAppender.getName
  }

  def detachAppender(appenderName: String, loggerName: String = org.slf4j.Logger.ROOT_LOGGER_NAME): Unit = {
    import ch.qos.logback.classic.LoggerContext

    val loggerContext = LoggerFactory.getILoggerFactory().asInstanceOf[LoggerContext]
    val logger = loggerContext.getLogger(loggerName)
    logger.detachAppender(appenderName)
  }

  def getLogger(clazz: Class[_]): org.slf4j.Logger = {
    LoggerFactory.getLogger(clazz)
  }

  def getLogger(name: String): org.slf4j.Logger = {
    LoggerFactory.getLogger(name)
  }
}

trait Loggable {
  lazy val logger = Logger.getLogger(this.getClass)

  def startTrack(name: String): Unit = {
    logger.debug("Starting " + name)
  }

  def endTrack(name: String): Unit = {
    logger.debug("Finished " + name)
  }
} 
Example 167
Source File: FullId.scala    From shapenet-viewer   with MIT License 5 votes vote down vote up
package edu.stanford.graphics.shapenet.common

import java.io.File

import scala.util.matching.Regex


case class FullId(source: String, id: String) {
  lazy val fullid = source + "." + id
}

object FullId {
  val fullIdRegex = new Regex("([a-zA-z0-9_-]+)\\.([a-zA-z0-9_-]+)")
  def apply(fullid: String, defaultSource: Option[String] = None): FullId = {
    val dotIndex = fullid.indexOf('.')
    val (source, id) = if (fullid.startsWith("http://") || fullid.startsWith("https://")) {
      ("raw", fullid)
    } else if (fullid.startsWith("file://")) {
      ("raw", fullid.substring(7))
    } else if (fullid.startsWith("/")) {
      ("raw", fullid)
    } else if (new File(fullid).isAbsolute) {
      ("raw", fullid)
    } else if (dotIndex > 0) {
      (fullid.substring(0, dotIndex), fullid.substring(dotIndex + 1))
    } else {
      val s = defaultSource.getOrElse(if (fullid.contains("scene")) "wssScenes" else "3dw")
      (s, fullid)
    }
    new FullId(source,id)
  }
  def matches(id1: String, id2: String): Boolean = {
    val f1 = FullId(id1)
    val f2 = FullId(id2)
    f1 == f2
  }
  def isFullId(s: String): Boolean = {
    fullIdRegex.pattern.matcher(s).matches()
  }
} 
Example 168
Source File: ColorPalette.scala    From shapenet-viewer   with MIT License 5 votes vote down vote up
package edu.stanford.graphics.shapenet.colors

import java.awt.Color
import javax.imageio.ImageIO
import java.io.File

import edu.stanford.graphics.shapenet.Constants


trait ColorPalette {
  def getColor(id: Int): Color
  def getColorCount(): Int = -1

  def getColor(id: Int, alpha: Float): Color = {
    val c = getColor(id)
    edu.stanford.graphics.shapenet.colors.getColor(c, alpha)
  }
}

class ColorBar(rgbColors: Array[Color]) extends ColorPalette {
  val nColors = rgbColors.length
  def getColor(r: Double): Color = getColor((r*(nColors-1)).toInt)
  def getColor(id: Int): Color = rgbColors(id % nColors)
  override def getColorCount() = nColors
}

object ColorBar {
  val texturesDir = Constants.ASSETS_DIR + "Textures" + File.separator
  lazy val coolwarmBar = ColorBar(texturesDir + "Cool2WarmBar.png")
  lazy val warmBar = ColorBar(texturesDir + "heatmap.png")
  def apply(filename: String): ColorBar = {
    val img = ImageIO.read(new File(filename))
    val rgb = Array.ofDim[Color](img.getWidth)
    for (x <- 0 until rgb.length) {
      rgb(x) = new Color(img.getRGB(x, 0))
    }
    new ColorBar(rgb)
  }
}

object PhiColorPalette extends ColorPalette {
  def getColor(id: Int): Color = {
    val startColor = new Color(0x4FD067)
    val hsb = Color.RGBtoHSB(startColor.getRed, startColor.getGreen, startColor.getBlue, null)
    val invPhi = 1.0/Constants.phi
    var hue = hsb(0) + id*invPhi
    hue = hue - math.floor(hue)
    val c = Color.getHSBColor(hue.toFloat, 0.5f, 0.95f)
    // Switch blue and green for nice pretty colors
    new Color(c.getRed, c.getBlue, c.getGreen)
  }
}

object DefaultColorPalette extends ColorPalette {
  def getColor(id: Int): Color = {
    var h = (-3.88 * id) % (2*Math.PI)
    if (h<0) h += 2*Math.PI
    h /= 2*Math.PI
    val c = Color.getHSBColor(h.toFloat, (0.4 + 0.2 * Math.sin(0.42 * id)).toFloat, 0.5f)
    c
  }
} 
Example 169
Source File: Summarizer.scala    From berkeley-doc-summarizer   with GNU General Public License v3.0 5 votes vote down vote up
package edu.berkeley.nlp.summ

import java.io.File
import edu.berkeley.nlp.entity.ConllDocReader
import edu.berkeley.nlp.entity.coref.CorefDocAssembler
import edu.berkeley.nlp.entity.coref.MentionPropertyComputer
import edu.berkeley.nlp.entity.coref.NumberGenderComputer
import edu.berkeley.nlp.entity.lang.EnglishCorefLanguagePack
import edu.berkeley.nlp.entity.lang.Language
import edu.berkeley.nlp.futile.LightRunner
import edu.berkeley.nlp.futile.fig.basic.IOUtils
import edu.berkeley.nlp.futile.util.Logger
import edu.berkeley.nlp.summ.data.SummDoc
import edu.berkeley.nlp.summ.preprocess.DiscourseDependencyParser
import edu.berkeley.nlp.summ.preprocess.EDUSegmenter
import edu.berkeley.nlp.summ.data.DiscourseDepExProcessed


object Summarizer {
  
  val numberGenderPath = "data/gender.data";
  val segmenterPath = "models/edusegmenter.ser.gz"
  val discourseParserPath = "models/discoursedep.ser.gz"
  val modelPath = "models/summarizer-full.ser.gz"
  
  val inputDir = ""
  val outputDir = ""
  
  // Indicates that we shouldn't do any discourse preprocessing; this is only appropriate
  // for the sentence-extractive version of the system
  val noRst = false
  
  // Summary budget, in words. Set this to whatever you want it to.
  val budget = 50
  
  def main(args: Array[String]) {
    LightRunner.initializeOutput(Summarizer.getClass())
    LightRunner.populateScala(Summarizer.getClass(), args)
    
    Logger.logss("Loading model...")
    val model = IOUtils.readObjFile(modelPath).asInstanceOf[CompressiveAnaphoraSummarizer]
    Logger.logss("Model loaded!")
    val (segmenter, discourseParser) = if (noRst) {
      (None, None)
    } else {
      Logger.logss("Loading segmenter...")
      val tmpSegmenter = IOUtils.readObjFile(segmenterPath).asInstanceOf[EDUSegmenter]
      Logger.logss("Segmenter loaded!")
      Logger.logss("Loading discourse parser...")
      val tmpDiscourseParser = IOUtils.readObjFile(discourseParserPath).asInstanceOf[DiscourseDependencyParser]
      Logger.logss("Discourse parser loaded!")
      (Some(tmpSegmenter), Some(tmpDiscourseParser))
    }
    
    val numberGenderComputer = NumberGenderComputer.readBergsmaLinData(numberGenderPath);
    val mpc = new MentionPropertyComputer(Some(numberGenderComputer))
    
    val reader = new ConllDocReader(Language.ENGLISH)
    val assembler = new CorefDocAssembler(new EnglishCorefLanguagePack, true)
    val filesToSummarize = new File(inputDir).listFiles()
    for (file <- filesToSummarize) {
      val conllDoc = reader.readConllDocs(file.getAbsolutePath).head
      val corefDoc = assembler.createCorefDoc(conllDoc, mpc)
      val summDoc = SummDoc.makeSummDoc(conllDoc.docID, corefDoc, Seq())
      val ex = if (noRst) {
        DiscourseDepExProcessed.makeTrivial(summDoc)
      } else {
        DiscourseDepExProcessed.makeWithEduAndSyntactic(summDoc, segmenter.get, discourseParser.get)
      }
      val summaryLines = model.summarize(ex, budget, true)
      val outWriter = IOUtils.openOutHard(outputDir + "/" + file.getName)
      for (summLine <- summaryLines) {
        outWriter.println(summLine)
      }
      outWriter.close
    }
    LightRunner.finalizeOutput()
  }
} 
Example 170
Source File: EDUAligner.scala    From berkeley-doc-summarizer   with GNU General Public License v3.0 5 votes vote down vote up
package edu.berkeley.nlp.summ.data

import java.io.File

import scala.collection.mutable.ArrayBuffer

import edu.berkeley.nlp.entity.coref.MentionPropertyComputer
import edu.berkeley.nlp.entity.coref.NumberGenderComputer
import edu.berkeley.nlp.futile.util.Logger

object EDUAligner {
  
  def align(leafWords: Seq[Seq[String]], docSents: Seq[DepParse]) = {
    var currSentIdx = 0
    var currWordIdx = 0
    val leafSpans = new ArrayBuffer[((Int,Int),(Int,Int))]
    for (i <- 0 until leafWords.size) {
      val start = (currSentIdx, currWordIdx)
      val currLen = docSents(currSentIdx).size
      require(currWordIdx + leafWords(i).size <= currLen,
              currWordIdx + " " + leafWords(i).size + " " + currLen + "\nsent = " + docSents(currSentIdx).getWords.toSeq + ", leaf words = " + leafWords(i).toSeq)
      var leafWordIdx = 0
      while (leafWordIdx < leafWords(i).size) {
        val docWord = docSents(currSentIdx).getWord(currWordIdx)
        val leafWord = leafWords(i)(leafWordIdx)
        val currWordsEqual = docWord == leafWord
        val currWordsEffectivelyEqual = docWord.contains("'") || docWord.contains("`") // Ignore some punc symbols because they're weird
        // Spurious period but last thing ended in period, so it was probably added by the tokenizer (like "Ltd. .")
        if (!currWordsEqual && docWord == "." && currWordIdx > 0 && docSents(currSentIdx).getWord(currWordIdx - 1).endsWith(".")) {
          currWordIdx += 1
          if (currWordIdx == docSents(currSentIdx).size) {
            currSentIdx += 1
            currWordIdx = 0
          }
          // N.B. don't advance leafWordIdx
        } else {
          require(currWordsEqual || currWordsEffectivelyEqual, docWord + " :: " + leafWord + "\nsent = " + docSents(currSentIdx).getWords.toSeq + ", leaf words = " + leafWords(i).toSeq)
          currWordIdx += 1
          if (currWordIdx == docSents(currSentIdx).size) {
            currSentIdx += 1
            currWordIdx = 0
          }
          leafWordIdx += 1
        }
      }
      val end = if (currWordIdx == 0) {
        (currSentIdx - 1, docSents(currSentIdx - 1).size)
      } else {
        (currSentIdx, currWordIdx)
      }
      leafSpans += start -> end
//        if (currWordIdx == docSents(currSentIdx).size) {
//          currSentIdx += 1
//          currWordIdx = 0
//        }
    }
    leafSpans
//    }
  }
  
  def main(args: Array[String]) {
    val allTreeFiles = new File("data/RSTDiscourse/data/RSTtrees-WSJ-main-1.0/ALL-FILES/").listFiles.sortBy(_.getName).filter(_.getName.endsWith(".out.dis"))
    val allTrees = allTreeFiles.map(file => DiscourseTreeReader.readDisFile(file.getAbsolutePath))
//    val allSummDocs = new File("data/RSTDiscourse/data/RSTtrees-WSJ-main-1.0/ALL-FILES-PREPROC/").listFiles.sortBy(_.getName))
    val numberGenderComputer = NumberGenderComputer.readBergsmaLinData("data/gender.data");
    val mpc = new MentionPropertyComputer(Some(numberGenderComputer))
    val allSummDocFiles = new File("data/RSTDiscourse/data/RSTtrees-WSJ-main-1.0/ALL-FILES-PROC2/").listFiles.sortBy(_.getName)
    val allSummDocs = allSummDocFiles.map(file => SummDoc.readSummDocNoAbstract(file.getAbsolutePath, mpc, filterSpuriousDocs = false, filterSpuriousSummSents = false))
    val summNames = new File("data/RSTDiscourse/data/RSTtrees-WSJ-main-1.0/SUMM-SUBSET-PROC/").listFiles.map(_.getName)
    require(allTrees.size == allSummDocs.size)
    val badFiles = new ArrayBuffer[String]
    for (i <- 0 until allTrees.size) {
      require(allTreeFiles(i).getName.dropRight(4) == allSummDocFiles(i).getName, allTreeFiles(i).getName.dropRight(4) + " " + allSummDocFiles(i).getName)
      Logger.logss(allSummDocFiles(i).getName)
      try {
        align(allTrees(i).leafWords, allSummDocs(i).doc)
      } catch {
        case e: Exception => {
          Logger.logss(e)
          badFiles += allSummDocFiles(i).getName
        }
      }
    }
    Logger.logss(badFiles.size + " bad files: " + badFiles)
    val badSummDocs = (badFiles.toSet & summNames.toSet)
    Logger.logss(badSummDocs.size + " bad summarized files: " + badSummDocs.toSeq.sorted) 
  }
} 
Example 171
Source File: RougeFileMunger.scala    From berkeley-doc-summarizer   with GNU General Public License v3.0 5 votes vote down vote up
package edu.berkeley.nlp.summ

import java.io.File
import edu.berkeley.nlp.futile.fig.basic.IOUtils
import scala.collection.JavaConverters._


object RougeFileMunger {

  val input = "data/RSTDiscourse/sample-outputs/"
  val output = "data/RSTDiscourse/sample-outputs-rouge/"
  val settingsPath = "data/RSTDiscourse/rouge-settings.xml"
  val detokenize = true
  
  def writeSummary(fileName: String, sents: Seq[String], outPath: String, keepFile: Boolean) {
    val outFile = new File(outPath)
    if (!keepFile) outFile.deleteOnExit()
    val outWriter = IOUtils.openOutHard(outFile)
    outWriter.println("<html>")
    outWriter.println("<head><title>" + fileName + "</title></head>")
    outWriter.println("<<body bgcolor=\"white\">")
    var counter = 1
    for (sent <- sents) {
      outWriter.println("<a name=\"" + counter + "\">[" + counter + "]</a> <a href=\"#" + counter + "\" id=" + counter + ">" + sent + "</a>")
      counter += 1
    }
    outWriter.println("</body>")
    outWriter.println("</html>")
    outWriter.close
  }
  
  def detokenizeSentence(line: String) = {
    line.replace(" ,", ",").replace(" .", ".").replace(" !", "!").replace(" ?", "?").replace(" :", ":").replace(" ;", ";").
         replace("`` ", "``").replace(" ''", "''").replace(" '", "'").replace(" \"", "\"").replace("$ ", "$")
  }
  
  def processFiles(rootPath: String, subDir: String) = {
    val refFiles = new File(rootPath + "/" + subDir).listFiles
    for (refFile <- refFiles) {
      val rawName = refFile.getName()
      val name = rawName.substring(0, if (rawName.indexOf("_") == -1) rawName.size else rawName.indexOf("_"))
      val lines = IOUtils.readLinesHard(refFile.getAbsolutePath()).asScala.map(sent => if (detokenize) detokenizeSentence(sent) else sent)
      writeSummary(name, lines, output + "/" + subDir + "/" + refFile.getName, true)
    }
  }
  
  def writeSettings(settingsPath: String, dirPaths: String) {
    val outWriter = IOUtils.openOutHard(settingsPath)
    outWriter.println("""<ROUGE_EVAL version="1.55">""")
    val rawDirName = new File(dirPaths).getName()
    val docs = new File(dirPaths + "/reference").listFiles
    var idx = 0
    for (doc <- docs) {
      val rawName = doc.getName().substring(0, doc.getName.indexOf("_"))
      outWriter.println("<EVAL ID=\"TASK_" + idx + "\">")
      outWriter.println("<MODEL-ROOT>" + rawDirName + "/reference</MODEL-ROOT>")
      outWriter.println("<PEER-ROOT>" + rawDirName + "/system</PEER-ROOT>")
      outWriter.println("<INPUT-FORMAT TYPE=\"SEE\">  </INPUT-FORMAT>")
      outWriter.println("<PEERS>")
      outWriter.println("<P ID=\"1\">" + rawName + "_system1.txt</P>")
      outWriter.println("</PEERS>")
      outWriter.println("<MODELS>")
      outWriter.println("<M ID=\"1\">" + rawName + "_reference1.txt</M>")
      outWriter.println("</MODELS>")
      outWriter.println("</EVAL>")
      idx += 1
    }
    outWriter.println("</ROUGE_EVAL>")
    outWriter.close
  }
  
  def main(args: Array[String]) {
    processFiles(input, "reference")
    processFiles(input, "system")
    writeSettings(settingsPath, output) 
  }
} 
Example 172
Source File: SHC.scala    From shc   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import org.apache.spark.sql.execution.datasources.hbase.Logging

import java.io.File

import com.google.common.io.Files
import org.apache.hadoop.hbase.client.Table
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{HBaseTestingUtility, TableName}
import org.apache.spark.sql.execution.datasources.hbase.SparkHBaseConf
import org.apache.spark.{SparkContext, SparkConf}
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}

class SHC  extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll  with Logging {
  implicit class StringToColumn(val sc: StringContext) {
    def $(args: Any*): ColumnName = {
      new ColumnName(sc.s(args: _*))
    }
  }

  var spark: SparkSession = null
  var sc: SparkContext = null
  var sqlContext: SQLContext = null
  var df: DataFrame = null

  private[spark] var htu = new HBaseTestingUtility
  private[spark] def tableName = "table1"

  private[spark] def columnFamilies: Array[String] = Array.tabulate(9){ x=> s"cf$x"}
  var table: Table = null
  val conf = new SparkConf
  conf.set(SparkHBaseConf.testConf, "true")
  // private[spark] var columnFamilyStr = Bytes.toString(columnFamily)

  def defineCatalog(tName: String) = s"""{
                                         |"table":{"namespace":"default", "name":"$tName"},
                                         |"rowkey":"key",
                                         |"columns":{
                                              |"col0":{"cf":"rowkey", "col":"key", "type":"string"},
                                              |"col1":{"cf":"cf1", "col":"col1", "type":"boolean"},
                                              |"col2":{"cf":"cf2", "col":"col2", "type":"double"},
                                              |"col3":{"cf":"cf3", "col":"col3", "type":"float"},
                                              |"col4":{"cf":"cf4", "col":"col4", "type":"int"},
                                              |"col5":{"cf":"cf5", "col":"col5", "type":"bigint"},
                                              |"col6":{"cf":"cf6", "col":"col6", "type":"smallint"},
                                              |"col7":{"cf":"cf7", "col":"col7", "type":"string"},
                                              |"col8":{"cf":"cf8", "col":"col8", "type":"tinyint"}
                                            |}
                                         |}""".stripMargin

  @deprecated(since = "04.12.2017(dd/mm/year)", message = "use `defineCatalog` instead")
  def catalog = defineCatalog(tableName)

  override def beforeAll() {
    val tempDir: File = Files.createTempDir
    tempDir.deleteOnExit
    htu.startMiniCluster
    SparkHBaseConf.conf = htu.getConfiguration
    logInfo(" - minicluster started")
    println(" - minicluster started")

    spark = SparkSession.builder()
      .master("local")
      .appName("HBaseTest")
      .config(conf)
      .getOrCreate()

    sqlContext = spark.sqlContext
    sc = spark.sparkContext
  }

  override def afterAll() {
    htu.shutdownMiniCluster()
    spark.stop()
  }

  def createTable(name: String, cfs: Array[String]) {
    val tName = Bytes.toBytes(name)
    val bcfs = cfs.map(Bytes.toBytes(_))
    try {
      htu.deleteTable(TableName.valueOf(tName))
    } catch {
      case _ : Throwable =>
        logInfo(" - no table " + name + " found")
    }
    htu.createMultiRegionTable(TableName.valueOf(tName), bcfs)
  }


  def createTable(name: Array[Byte], cfs: Array[Array[Byte]]) {
    try {
      htu.deleteTable(TableName.valueOf(name))
    } catch {
      case _ : Throwable =>
        logInfo(" - no table " + Bytes.toString(name) + " found")
    }
    htu.createMultiRegionTable(TableName.valueOf(name), cfs)
  }
} 
Example 173
Source File: HBaseTestSuite.scala    From shc   with Apache License 2.0 5 votes vote down vote up
package org.apache.spark.sql

import java.io.File

import scala.collection.JavaConverters._

import com.google.common.io.Files
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{TableName, HBaseTestingUtility}
import org.apache.spark.sql.execution.datasources.hbase.Logging
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite}

class HBaseTestSuite extends FunSuite with BeforeAndAfterEach with BeforeAndAfterAll  with Logging {
  private[spark] var htu = HBaseTestingUtility.createLocalHTU()
  private[spark] var tableName: Array[Byte] = Bytes.toBytes("t1")
  private[spark] var columnFamily: Array[Byte] = Bytes.toBytes("cf0")
  private[spark] var columnFamilies: Array[Array[Byte]] =
    Array(Bytes.toBytes("cf0"), Bytes.toBytes("cf1"), Bytes.toBytes("cf2"), Bytes.toBytes("cf3"), Bytes.toBytes("cf4"))
  var table: Table = null
  // private[spark] var columnFamilyStr = Bytes.toString(columnFamily)

  override def beforeAll() {
    val tempDir: File = Files.createTempDir
    tempDir.deleteOnExit
    htu.cleanupTestDir
    htu.startMiniZKCluster
    htu.startMiniHBaseCluster(1, 4)
    logInfo(" - minicluster started")
    println(" - minicluster started")
    try {
      htu.deleteTable(TableName.valueOf(tableName))

      //htu.createTable(TableName.valueOf(tableName), columnFamily, 2, Bytes.toBytes("abc"), Bytes.toBytes("xyz"), 2)
    } catch {
      case _ : Throwable =>
        logInfo(" - no table " + Bytes.toString(tableName) + " found")
    }
    setupTable()
  }



  override def afterAll() {
    try {
      table.close()
      println("shutdown")
      htu.deleteTable(TableName.valueOf(tableName))
      logInfo("shuting down minicluster")
      htu.shutdownMiniHBaseCluster
      htu.shutdownMiniZKCluster
      logInfo(" - minicluster shut down")
      htu.cleanupTestDir
    } catch {
      case _ : Throwable => logError("teardown error")
    }
  }

  def setupTable() {
    val config = htu.getConfiguration
    htu.createMultiRegionTable(TableName.valueOf(tableName), columnFamilies)
    println("create htable t1")
    val connection = ConnectionFactory.createConnection(config)
    val r = connection.getRegionLocator(TableName.valueOf("t1"))
    table = connection.getTable(TableName.valueOf("t1"))

    val regionLocations = r.getAllRegionLocations.asScala.toSeq
    println(s"$regionLocations size: ${regionLocations.size}")
    (0 until 100).foreach { x =>
      var put = new Put(Bytes.toBytes(s"row$x"))
      (0 until 5).foreach { y =>
        put.addColumn(columnFamilies(y), Bytes.toBytes(s"c$y"), Bytes.toBytes(s"value $x $y"))
      }
      table.put(put)
    }
  }
} 
Example 174
Source File: CodeGenerator.scala    From jvm-toxcore-c   with GNU General Public License v3.0 5 votes vote down vote up
package im.tox.tox4j.impl.jni.codegen

import java.io.{ File, PrintWriter }

import com.google.common.base.CaseFormat
import gnieh.pp.PrettyRenderer
import im.tox.tox4j.impl.jni.codegen.cxx.Ast._
import im.tox.tox4j.impl.jni.codegen.cxx.{ Ast, Print }

object NameConversions {

  def cxxVarName(name: String): String = CaseFormat.LOWER_CAMEL.to(CaseFormat.LOWER_UNDERSCORE, name)
  def cxxTypeName(name: String): String = CaseFormat.UPPER_CAMEL.to(CaseFormat.UPPER_UNDERSCORE, name)
  def javaVarName(name: String): String = CaseFormat.LOWER_UNDERSCORE.to(CaseFormat.LOWER_CAMEL, name)
  def javaTypeName(name: String): String = CaseFormat.UPPER_UNDERSCORE.to(CaseFormat.UPPER_CAMEL, name)

}

abstract class CodeGenerator extends App {

  def writeCode(path: String, sep: String = "\n\n")(code: Ast.TranslationUnit): Unit = {
    val renderer = new PrettyRenderer(130)

    val writer = new PrintWriter(new File("cpp/src", path))
    try {
      writer.println(code.map(Print.printDecl).map(renderer).mkString(sep))
    } finally {
      writer.close()
    }
  }

  def ifdef(header: String, guard: String, code: TranslationUnit*): TranslationUnit = {
    Include(header) +:
      Ifdef(guard) +:
      code.flatten :+
      Endif
  }

} 
Example 175
Source File: DefaultSource.scala    From spark-google-spreadsheets   with Apache License 2.0 5 votes vote down vote up
package com.github.potix2.spark.google.spreadsheets

import java.io.File

import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, RelationProvider, SchemaRelationProvider}
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}

class DefaultSource extends RelationProvider with SchemaRelationProvider with CreatableRelationProvider {
  final val DEFAULT_CREDENTIAL_PATH = "/etc/gdata/credential.p12"

  override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]) = {
    createRelation(sqlContext, parameters, null)
  }

  private[spreadsheets] def pathToSheetNames(parameters: Map[String, String]): (String, String) = {
    val path = parameters.getOrElse("path", sys.error("'path' must be specified for spreadsheets."))
    val elems = path.split('/')
    if (elems.length < 2)
      throw new Exception("'path' must be formed like '<spreadsheet>/<worksheet>'")

    (elems(0), elems(1))
  }

  override def createRelation(sqlContext: SQLContext, parameters: Map[String, String], schema: StructType) = {
    val (spreadsheetName, worksheetName) = pathToSheetNames(parameters)
    val context = createSpreadsheetContext(parameters)
    createRelation(sqlContext, context, spreadsheetName, worksheetName, schema)
  }


  override def createRelation(sqlContext: SQLContext, mode: SaveMode, parameters: Map[String, String], data: DataFrame): BaseRelation = {
    val (spreadsheetName, worksheetName) = pathToSheetNames(parameters)
    implicit val context = createSpreadsheetContext(parameters)
    val spreadsheet = SparkSpreadsheetService.findSpreadsheet(spreadsheetName)
    if(!spreadsheet.isDefined)
      throw new RuntimeException(s"no such a spreadsheet: $spreadsheetName")

    spreadsheet.get.addWorksheet(worksheetName, data.schema, data.collect().toList, Util.toRowData)
    createRelation(sqlContext, context, spreadsheetName, worksheetName, data.schema)
  }

  private[spreadsheets] def createSpreadsheetContext(parameters: Map[String, String]) = {
    val serviceAccountIdOption = parameters.get("serviceAccountId")
    val credentialPath = parameters.getOrElse("credentialPath", DEFAULT_CREDENTIAL_PATH)
    SparkSpreadsheetService(serviceAccountIdOption, new File(credentialPath))
  }

  private[spreadsheets] def createRelation(sqlContext: SQLContext,
                                           context: SparkSpreadsheetService.SparkSpreadsheetContext,
                                           spreadsheetName: String,
                                           worksheetName: String,
                                           schema: StructType): SpreadsheetRelation =
    if (schema == null) {
      createRelation(sqlContext, context, spreadsheetName, worksheetName, None)
    }
    else {
      createRelation(sqlContext, context, spreadsheetName, worksheetName, Some(schema))
    }

  private[spreadsheets] def createRelation(sqlContext: SQLContext,
                                           context: SparkSpreadsheetService.SparkSpreadsheetContext,
                                           spreadsheetName: String,
                                           worksheetName: String,
                                           schema: Option[StructType]): SpreadsheetRelation =
    SpreadsheetRelation(context, spreadsheetName, worksheetName, schema)(sqlContext)
} 
Example 176
Source File: SparkSpreadsheetServiceReadSuite.scala    From spark-google-spreadsheets   with Apache License 2.0 5 votes vote down vote up
package com.github.potix2.spark.google.spreadsheets

import java.io.File

import org.scalatest.{BeforeAndAfter, FlatSpec}

class SparkSpreadsheetServiceReadSuite extends FlatSpec with BeforeAndAfter {
  private val serviceAccountId = "53797494708-ds5v22b6cbpchrv2qih1vg8kru098k9i@developer.gserviceaccount.com"
  private val testCredentialPath = "src/test/resources/spark-google-spreadsheets-test-eb7b191d1e1d.p12"
  private val TEST_SPREADSHEET_NAME = "SpreadsheetSuite"
  private val TEST_SPREADSHEET_ID = "1H40ZeqXrMRxgHIi3XxmHwsPs2SgVuLUFbtaGcqCAk6c"

  private val context: SparkSpreadsheetService.SparkSpreadsheetContext =
    SparkSpreadsheetService.SparkSpreadsheetContext(Some(serviceAccountId), new File(testCredentialPath))
  private val spreadsheet: SparkSpreadsheetService.SparkSpreadsheet =
    context.findSpreadsheet(TEST_SPREADSHEET_ID)

  behavior of "A Spreadsheet"


  it should "have a name" in {
    assert(spreadsheet.name == TEST_SPREADSHEET_NAME)
  }

  behavior of "A worksheet"
  it should "be None when a worksheet is missing" in {
    assert(spreadsheet.findWorksheet("foo").isEmpty)
  }

  it should "be retrieved when the worksheet exists" in {
    val worksheet = spreadsheet.findWorksheet("case2")
    assert(worksheet.isDefined)
    assert(worksheet.get.name == "case2")
    assert(worksheet.get.headers == List("id", "firstname", "lastname", "email", "country", "ipaddress"))

    val firstRow = worksheet.get.rows(0)
    assert(firstRow == Map(
      "id" -> "1",
      "firstname" -> "Annie",
      "lastname" -> "Willis",
      "email" -> "[email protected]",
      "country" -> "Burundi",
      "ipaddress" -> "241.162.49.104"))
  }
} 
Example 177
Source File: LibFFMRelationSuite.scala    From sona   with Apache License 2.0 5 votes vote down vote up
package com.tencent.angel.sona.ml.source.libffm

import java.io.File
import java.nio.charset.StandardCharsets

import com.google.common.io.Files
import org.apache.spark.SparkFunSuite
import com.tencent.angel.sona.ml.util.MLlibTestSparkContext
import org.apache.spark.util.SparkUtil

class LibFFMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
  // Path for dataset
  var path: String = _

  override def beforeAll(): Unit = {
    super.beforeAll()
    val lines0 =
      """
        |1 0:1:1.0 1:3:2.0 2:5:3.0
        |0
      """.stripMargin
    val lines1 =
      """
        |0 0:2:4.0 1:4:5.0 2:6:6.0
      """.stripMargin
    val dir = SparkUtil.createTempDir()
    val succ = new File(dir, "_SUCCESS")
    val file0 = new File(dir, "part-00000")
    val file1 = new File(dir, "part-00001")
    Files.write("", succ, StandardCharsets.UTF_8)
    Files.write(lines0, file0, StandardCharsets.UTF_8)
    Files.write(lines1, file1, StandardCharsets.UTF_8)
    path = dir.getPath
  }

  override def afterAll(): Unit = {
    try {
      val prefix = "C:\\Users\\fitzwang\\AppData\\Local\\Temp\\"
      if (path.startsWith(prefix)) {
        SparkUtil.deleteRecursively(new File(path))
      }
    } finally {
      super.afterAll()
    }
  }

  test("ffmIO"){
    val df = spark.read.format("libffm").load(path)
    val metadata = df.schema(1).metadata

    val fieldSet = MetaSummary.getFieldSet(metadata)
    println(fieldSet.mkString("[", ",", "]"))

    val keyFieldMap = MetaSummary.getKeyFieldMap(metadata)
    println(keyFieldMap.mkString("[", ",", "]"))

    df.write.format("libffm").save("temp.libffm")
  }

  test("read_ffm"){
    val df = spark.read.format("libffm").load(path)
    val metadata = df.schema(1).metadata

    val fieldSet = MetaSummary.getFieldSet(metadata)
    println(fieldSet.mkString("[", ",", "]"))

    val keyFieldMap = MetaSummary.getKeyFieldMap(metadata)
    println(keyFieldMap.mkString("[", ",", "]"))
  }

} 
Example 178
Source File: MLlibTestSparkContext.scala    From sona   with Apache License 2.0 5 votes vote down vote up
package com.tencent.angel.sona.ml.util

import java.io.File

import org.apache.spark.SparkContext
import org.apache.spark.sql.types.UDTRegistration
import org.apache.spark.sql.{SQLContext, SQLImplicits, SparkSession}
import org.apache.spark.util.{SparkUtil, Utils}
import org.scalatest.Suite

trait MLlibTestSparkContext extends TempDirectory { self: Suite =>
  @transient var spark: SparkSession = _
  @transient var sc: SparkContext = _
  @transient var checkpointDir: String = _

  override def beforeAll() {
    super.beforeAll()

    SparkUtil.UDTRegister("org.apache.spark.linalg.Vector", "org.apache.spark.linalg.VectorUDT")
    SparkUtil.UDTRegister("org.apache.spark.linalg.DenseVector", "org.apache.spark.linalg.VectorUDT")
    SparkUtil.UDTRegister("org.apache.spark.linalg.SparseVector", "org.apache.spark.linalg.VectorUDT")
    SparkUtil.UDTRegister("org.apache.spark.linalg.Matrix", "org.apache.spark.linalg.MatrixUDT")
    SparkUtil.UDTRegister("org.apache.spark.linalg.DenseMatrix", "org.apache.spark.linalg.MatrixUDT")
    SparkUtil.UDTRegister("org.apache.spark.linalg.SparseMatrix", "org.apache.spark.linalg.MatrixUDT")

    spark = SparkSession.builder
      .master("local[2]")
      .appName("MLlibUnitTest")
      .getOrCreate()
    sc = spark.sparkContext

    checkpointDir = SparkUtil.createDirectory(tempDir.getCanonicalPath, "checkpoints").toString
    sc.setCheckpointDir(checkpointDir)
  }

  override def afterAll() {
    try {
      SparkUtil.deleteRecursively(new File(checkpointDir))
      SparkSession.clearActiveSession()
      if (spark != null) {
        spark.stop()
      }
      spark = null
    } finally {
      super.afterAll()
    }
  }

  /**
   * A helper object for importing SQL implicits.
   *
   * Note that the alternative of importing `spark.implicits._` is not possible here.
   * This is because we create the `SQLContext` immediately before the first test is run,
   * but the implicits import is needed in the constructor.
   */
  protected object testImplicits extends SQLImplicits {
    protected override def _sqlContext: SQLContext = self.spark.sqlContext
  }
} 
Example 179
Source File: TempDirectory.scala    From sona   with Apache License 2.0 5 votes vote down vote up
package com.tencent.angel.sona.ml.util

import java.io.File

import org.scalatest.{BeforeAndAfterAll, Suite}
import org.apache.spark.util.SparkUtil

/**
 * Trait that creates a temporary directory before all tests and deletes it after all.
 */
trait TempDirectory extends BeforeAndAfterAll { self: Suite =>

  private var _tempDir: File = _

  /**
   * Returns the temporary directory as a `File` instance.
   */
  protected def tempDir: File = _tempDir

  override def beforeAll(): Unit = {
    super.beforeAll()
    _tempDir = SparkUtil.createTempDir(namePrefix = this.getClass.getName)
  }

  override def afterAll(): Unit = {
    try {
      SparkUtil.deleteRecursively(_tempDir)
    } finally {
      super.afterAll()
    }
  }
} 
Example 180
Source File: PMMLReadWriteTest.scala    From sona   with Apache License 2.0 5 votes vote down vote up
package com.tencent.angel.sona.ml.util

import java.io.{File, IOException}

import org.dmg.pmml.PMML
import org.scalatest.Suite
import org.apache.spark.SparkContext
import com.tencent.angel.sona.ml.param.Params


trait PMMLReadWriteTest extends TempDirectory { self: Suite =>
  /**
   * Test PMML export. Requires exported model is small enough to be loaded locally.
   * Checks that the model can be exported and the result is valid PMML, but does not check
   * the specific contents of the model.
   */
  def testPMMLWrite[T <: Params with GeneralMLWritable](sc: SparkContext, instance: T,
    checkModelData: PMML => Unit): Unit = {
    val uid = instance.uid
    val subdirName = Identifiable.randomUID("pmml-")

    val subdir = new File(tempDir, subdirName)
    val path = new File(subdir, uid).getPath

    instance.write.format("pmml").save(path)
    intercept[IOException] {
      instance.write.format("pmml").save(path)
    }
    instance.write.format("pmml").overwrite().save(path)
    val pmmlStr = sc.textFile(path).collect.mkString("\n")
    val pmmlModel = PMMLUtils.loadFromString(pmmlStr)
    assert(pmmlModel.getHeader.getApplication.getName.startsWith("Apache Spark"))
    checkModelData(pmmlModel)
  }
} 
Example 181
Source File: TypesafeConfigSource.scala    From zio-config   with Apache License 2.0 5 votes vote down vote up
package zio.config.typesafe

import java.io.File
import java.lang.{ Boolean => JBoolean }

import com.typesafe.config._
import zio.config.PropertyTree.{ Leaf, _ }
import zio.config.{ ConfigSource, _ }
import zio.{ IO, Task, ZIO }

import scala.collection.JavaConverters._
import scala.util.{ Failure, Success, Try }

object TypesafeConfigSource {
  def fromDefaultLoader: Either[String, ConfigSource] =
    fromTypesafeConfig(ConfigFactory.load.resolve)

  def fromHoconFile[A](
    file: File
  ): Task[ConfigSource] =
    IO.effect(ConfigFactory.parseFile(file).resolve)
      .flatMap(typesafeConfig => {
        ZIO
          .fromEither(fromTypesafeConfig(typesafeConfig))
          .mapError(str => new RuntimeException(str))
      })

  def fromHoconString(
    input: String
  ): Either[String, zio.config.ConfigSource] =
    fromTypesafeConfig(
      ConfigFactory.parseString(input).resolve
    )

  def fromTypesafeConfig(
    input: => com.typesafe.config.Config
  ): Either[String, ConfigSource] =
    Try {
      input
    } match {
      case Failure(exception) => Left(exception.getMessage)
      case Success(value) =>
        getPropertyTree(value) match {
          case Left(value)  => Left(value)
          case Right(value) => Right(ConfigSource.fromPropertyTree(value, "hocon", LeafForSequence.Invalid))
        }
    }

  private[config] def getPropertyTree(
    input: com.typesafe.config.Config
  ): Either[String, PropertyTree[String, String]] = {
    def loopBoolean(value: Boolean)         = Leaf(value.toString)
    def loopNumber(value: Number)           = Leaf(value.toString)
    val loopNull                            = PropertyTree.empty
    def loopString(value: String)           = Leaf(value)
    def loopList(values: List[ConfigValue]) = Sequence(values.map(loopAny))

    def loopConfig(config: ConfigObject) =
      Record(config.asScala.toVector.map { case (key, value) => key -> loopAny(value) }.toMap)

    def loopAny(value: ConfigValue): PropertyTree[String, String] = value.valueType() match {
      case ConfigValueType.OBJECT  => loopConfig(value.asInstanceOf[ConfigObject])
      case ConfigValueType.LIST    => loopList(value.asInstanceOf[ConfigList].asScala.toList)
      case ConfigValueType.BOOLEAN => loopBoolean(value.unwrapped().asInstanceOf[JBoolean])
      case ConfigValueType.NUMBER  => loopNumber(value.unwrapped().asInstanceOf[Number])
      case ConfigValueType.NULL    => loopNull
      case ConfigValueType.STRING  => loopString(value.unwrapped().asInstanceOf[String])
    }

    Try(loopConfig(input.root())) match {
      case Failure(t) =>
        Left(
          "Unable to form the zio.config.PropertyTree from Hocon string." +
            " This may be due to the presence of explicit usage of nulls in hocon string. " +
            t.getMessage
        )
      case Success(value) => Right(value)
    }
  }
} 
Example 182
Source File: TypesafeConfig.scala    From zio-config   with Apache License 2.0 5 votes vote down vote up
package zio.config.typesafe

import java.io.File

import com.typesafe.config.ConfigFactory
import zio.config.Config
import zio.{ Layer, Tag, ZIO }
import zio.config.ConfigDescriptor

object TypesafeConfig {
  def fromDefaultLoader[A](
    configDescriptor: ConfigDescriptor[A]
  )(implicit tag: Tag[A]): Layer[Throwable, Config[A]] =
    fromTypesafeConfig(ConfigFactory.load.resolve, configDescriptor)

  def fromHoconFile[A](
    file: File,
    configDescriptor: ConfigDescriptor[A]
  )(implicit tag: Tag[A]): Layer[Throwable, Config[A]] =
    fromTypesafeConfig(ConfigFactory.parseFile(file).resolve, configDescriptor)

  def fromHoconString[A](
    str: String,
    configDescriptor: ConfigDescriptor[A]
  )(implicit tag: Tag[A]): Layer[Throwable, Config[A]] =
    fromTypesafeConfig(ConfigFactory.parseString(str).resolve, configDescriptor)

  def fromTypesafeConfig[A](
    conf: => com.typesafe.config.Config,
    configDescriptor: ConfigDescriptor[A]
  )(implicit tag: Tag[A]): Layer[Throwable, Config[A]] =
    Config.fromConfigDescriptorM(
      ZIO
        .fromEither(TypesafeConfigSource.fromTypesafeConfig(conf))
        .map(configDescriptor from _)
        .mapError(error => new RuntimeException(error))
    )
} 
Example 183
Source File: SbtLayerConfigurations.scala    From sbt-jib   with Apache License 2.0 5 votes vote down vote up
package de.gccc.jib

import java.io.File

import com.google.cloud.tools.jib.api.LayerConfiguration
import sbt._

object SbtLayerConfigurations {

  def generate(
      targetDirectory: File,
      classes: Seq[File],
      resourceDirectories: Seq[File],
      internalDependencies: Keys.Classpath,
      external: Keys.Classpath,
      extraMappings: Seq[(File, String)],
      specialResourceDirectory: File
  ): List[LayerConfiguration] = {

    val internalDependenciesLayer = {
      SbtJibHelper.mappingsConverter("internal", reproducibleDependencies(targetDirectory, internalDependencies))
    }
    val externalDependenciesLayer = {
      SbtJibHelper.mappingsConverter("libs", MappingsHelper.fromClasspath(external.seq, "/app/libs"))
    }

    val resourcesLayer = {
      SbtJibHelper.mappingsConverter(
        "conf",
        resourceDirectories.flatMap(MappingsHelper.contentOf(_, "/app/resources", _.isFile))
      )
    }

    val specialResourcesLayer = {
      SbtJibHelper.mappingsConverter("resources",
                                     MappingsHelper.contentOf(specialResourceDirectory, "/app/resources", _.isFile))
    }

    val extraLayer =
      if (extraMappings.nonEmpty) SbtJibHelper.mappingsConverter("extra", extraMappings.filter(_._1.isFile)) :: Nil
      else Nil

    val allClasses = classes
    // we only want class-files in our classes layer
    // FIXME: not just extensions checking?
      .flatMap(MappingsHelper.contentOf(_, "/app/classes", f => if (f.isFile) f.getName.endsWith(".class") else false))

    val classesLayer = SbtJibHelper.mappingsConverter("classes", allClasses)

    // the ordering here is really important
    (extraLayer ::: List(
      externalDependenciesLayer,
      resourcesLayer,
      internalDependenciesLayer,
      specialResourcesLayer,
      classesLayer
    )).filterNot(lc => lc.getLayerEntries.isEmpty)
  }

  private def reproducibleDependencies(targetDirectory: File, internalDependencies: Keys.Classpath) = {
    val dependencies = internalDependencies.seq.map(_.data)

    val stageDirectory = targetDirectory / "jib" / "dependency-stage"
    IO.delete(stageDirectory)
    IO.createDirectory(stageDirectory)

    val stripper = new ZipStripper()

    dependencies.foreach { in =>
      val fileName = in.getName
      val out      = new File(stageDirectory, fileName)
      stripper.strip(in, out)
    }

    MappingsHelper.contentOf(stageDirectory, "/app/libs")
  }

} 
Example 184
Source File: SbtJibHelper.scala    From sbt-jib   with Apache License 2.0 5 votes vote down vote up
package de.gccc.jib

import java.io.File

import com.google.cloud.tools.jib.api.buildplan.AbsoluteUnixPath
import com.google.cloud.tools.jib.api.LayerConfiguration

private[jib] object SbtJibHelper {

  def mappingsConverter(name: String, mappings: Seq[(File, String)]): LayerConfiguration = {
    val layerConfiguration = LayerConfiguration.builder()

    mappings
      .filter(_._1.isFile) // fixme resolve all directory files
      .map { case (file, fullPathOnImage) => (file.toPath, fullPathOnImage) }
      .toList
      .sortBy(_._2)
      .foreach {
        case (sourceFile, pathOnImage) =>
          layerConfiguration.addEntry(sourceFile, AbsoluteUnixPath.get(pathOnImage))
      }

    layerConfiguration.build()
  }

} 
Example 185
Source File: MappingsHelper.scala    From sbt-jib   with Apache License 2.0 5 votes vote down vote up
package de.gccc.jib

import java.io.File

import sbt._
import sbt.io.{ IO, PathFinder }

import scala.language.postfixOps


  def fromClasspath(entries: Seq[Attributed[File]],
                    target: String,
                    includeArtifact: Artifact => Boolean,
                    includeOnNoArtifact: Boolean = false): Seq[(File, String)] =
    entries.filter(attr => attr.get(sbt.Keys.artifact.key) map includeArtifact getOrElse includeOnNoArtifact).map {
      attribute =>
        val file = attribute.data
        file -> s"$target/${file.getName}"
    }

} 
Example 186
Source File: ScatterGatherChannelSpec.scala    From zio-nio   with Apache License 2.0 5 votes vote down vote up
package zio.nio.core.channels

import java.io.{ File, RandomAccessFile }

import zio.nio.core.{ BaseSpec, Buffer }
import zio.test.Assertion._
import zio.test._
import zio.{ Chunk, IO, ZIO }

import scala.io.Source

object ScatterGatherChannelSpec extends BaseSpec {

  override def spec = suite("ScatterGatherChannelSpec")(
    testM("scattering read") {
      for {
        raf         <- ZIO.effectTotal(new RandomAccessFile("nio-core/src/test/resources/scattering_read_test.txt", "r"))
        fileChannel = raf.getChannel
        readLine = (buffer: Buffer[Byte]) =>
          for {
            _     <- buffer.flip
            array <- buffer.array
            text  = array.takeWhile(_ != 10).map(_.toChar).mkString.trim
          } yield text
        buffs   <- IO.collectAll(Seq(Buffer.byte(5), Buffer.byte(5)))
        channel = new FileChannel(fileChannel)
        _       <- channel.readBuffer(buffs)
        list    <- IO.collectAll(buffs.map(readLine))
        _       <- channel.close
      } yield assert(list)(equalTo("Hello" :: "World" :: Nil))
    },
    testM("gathering write") {
      for {
        file        <- ZIO.effect(new File("nio-core/src/test/resources/gathering_write_test.txt"))
        raf         = new RandomAccessFile(file, "rw")
        fileChannel = raf.getChannel

        buffs <- IO.collectAll(
                  Seq(
                    Buffer.byte(Chunk.fromArray("Hello".getBytes)),
                    Buffer.byte(Chunk.fromArray("World".getBytes))
                  )
                )
        channel = new FileChannel(fileChannel)
        _       <- channel.writeBuffer(buffs)
        _       <- channel.close
        result  = Source.fromFile(file).getLines().toSeq
        _       = file.delete()
      } yield assert(result)(equalTo(Seq("HelloWorld")))
    }
  )
} 
Example 187
Source File: ScatterGatherChannelSpec.scala    From zio-nio   with Apache License 2.0 5 votes vote down vote up
package zio.nio.channels

import java.io.{ File, RandomAccessFile }

import zio.nio.core.Buffer
import zio.nio.BaseSpec
import zio.test.Assertion._
import zio.test._
import zio.{ Chunk, IO, ZIO }

import scala.io.Source

object ScatterGatherChannelSpec extends BaseSpec {

  override def spec = suite("ScatterGatherChannelSpec")(
    testM("scattering read") {
      for {
        raf         <- ZIO.effectTotal(new RandomAccessFile("nio/src/test/resources/scattering_read_test.txt", "r"))
        fileChannel = raf.getChannel
        readLine = (buffer: Buffer[Byte]) =>
          for {
            _     <- buffer.flip
            array <- buffer.array
            text  = array.takeWhile(_ != 10).map(_.toChar).mkString.trim
          } yield text
        buffs <- IO.collectAll(Seq(Buffer.byte(5), Buffer.byte(5)))
        list <- FileChannel(fileChannel).use { channel =>
                 for {
                   _    <- channel.readBuffer(buffs)
                   list <- IO.collectAll(buffs.map(readLine))
                 } yield list
               }
      } yield assert(list)(equalTo("Hello" :: "World" :: Nil))
    },
    testM("gathering write") {
      for {
        file        <- ZIO.effect(new File("nio/src/test/resources/gathering_write_test.txt"))
        raf         = new RandomAccessFile(file, "rw")
        fileChannel = raf.getChannel

        buffs <- IO.collectAll(
                  Seq(
                    Buffer.byte(Chunk.fromArray("Hello".getBytes)),
                    Buffer.byte(Chunk.fromArray("World".getBytes))
                  )
                )
        _      <- FileChannel(fileChannel).use(_.writeBuffer(buffs).unit)
        result = Source.fromFile(file).getLines().toSeq
        _      = file.delete()
      } yield assert(result)(equalTo(Seq("HelloWorld")))
    }
  )
} 
Example 188
Source File: GlobalConfig.scala    From sbt-api-builder   with MIT License 5 votes vote down vote up
package apibuilder.sbt

import java.io.File

import sbt.IO

import scala.util.Try

final case class GlobalConfig(profiles: Map[String, Profile] = Map.empty) extends AnyVal {
  override def toString: String = profiles.keys.mkString(", ")
}
final case class Profile(token: String) extends AnyVal

object GlobalConfig {
  private val ProfileM = "^\\s*\\[\\s*(profile\\s+|)(\\w+)\\s*\\]\\s*$".r
  private val TokenM   = "^\\s*token\\s*=\\s*(\\w+)$".r

  private[this] implicit final class Ext(val acc: List[(String, Option[Profile])]) extends AnyVal {
    def hasNotSeen(pn: String): Boolean = !acc.exists { case (pn0, _) => pn0 == pn }
  }

  def load(f: File): Either[Throwable, GlobalConfig] =
    Try {
      IO.reader(f) { r =>
        GlobalConfig(
          IO.foldLines(r, List.empty[(String, Option[Profile])]) {
              case (acc, ProfileM(_, pn)) if acc.hasNotSeen(pn) => (pn -> None) :: acc
              case ((cpn, None) :: rest, TokenM(t))             => (cpn -> Some(Profile(t))) :: rest
              case (acc, _)                                     => acc
            }
            .collect { case (profile, Some(config)) => profile -> config }
            .toMap
        )
      }
    }.toEither
} 
Example 189
Source File: CLIConfig.scala    From sbt-api-builder   with MIT License 5 votes vote down vote up
package apibuilder.sbt

import java.io.{File, FileNotFoundException}
import java.nio.file.{Path, PathMatcher}

import io.circe.Decoder
import io.circe.yaml.parser
import sbt.IO

final case class CLIConfig(organizationFor: Map[String, OrganizationConfig])        extends AnyVal
final case class OrganizationConfig(applicationFor: Map[String, ApplicationConfig]) extends AnyVal
final case class ApplicationConfig(version: String, generators: Seq[GeneratorConfig])
final case class GeneratorConfig(generator: String, maybeTargetPath: Option[Path], pathMatchers: Seq[PathMatcher])

object CLIConfig extends BaseDecoders {
  final def load(f: File): Either[ConfigException, CLIConfig] =
    if (!f.getParentFile.exists) Left(MissingParentDirectory(f))
    else {
      try {
        IO.reader(f) { r =>
          parser
            .parse(r)
            .left
            .map(pf => InvalidContent(pf.message))
            .flatMap(_.as[CLIConfig].left.map(df => InvalidContent(df.message)))
        }
      } catch {
        case _: FileNotFoundException => Left(MissingFile(f))
      }
    }

  implicit final val cliConfigDecoder: Decoder[CLIConfig] = Decoder.instance { c =>
    c.downField("code").as[Map[String, OrganizationConfig]].map(CLIConfig.apply)
  }
  implicit final val organizationConfigDecoder: Decoder[OrganizationConfig] = Decoder.instance { c =>
    c.value.as[Map[String, ApplicationConfig]].map(OrganizationConfig.apply)
  }
  implicit final val applicationConfig: Decoder[ApplicationConfig] = Decoder.instance { c =>
    for {
      version    <- c.downField("version").as[String]
      generators <- c.downField("generators").as[Seq[GeneratorConfig]]
    } yield ApplicationConfig(version, generators)
  }
  implicit final val generatorConfigDecoder: Decoder[GeneratorConfig] = Decoder.instance { c =>
    for {
      generator       <- c.downField("generator").as[String]
      maybeTargetPath <- c.downField("target").as[Option[Path]]
      pathMatchers    <- c.downField("files").as[Seq[PathMatcher]]
    } yield GeneratorConfig(generator, maybeTargetPath, pathMatchers)
  }
} 
Example 190
Source File: HLSTools.scala    From fpga-tidbits   with BSD 2-Clause "Simplified" License 5 votes vote down vote up
package fpgatidbits.hlstools
import sys.process._
import java.io.File

// Collection of utilities for Vivado HLS

object TidbitsHLSTools {
  // quick-and-dirty single file HLS synthesis
  def hlsToVerilog(
    inFile: String,
    outDir: String,
    synDir: String,
    projName: String,
    topFxnName: String,
    inclDirs: Seq[String] = Seq(),
    fpgaPart: String = "xc7z020clg400-1",
    nsClk: String = "5.0"
  ) = {
    // get path to hls_syn.tcl
    val synthScriptPath = getClass.getResource("/script/hls_syn.tcl").getPath
    // need to provide include dirs as a single string argument, parsing
    // done in tcl. note: dirs here should have no spaces!
    val inclDirString = inclDirs.mkString(" ")
    // call the actual synthesis script
    val cmdline = Seq(
      "vivado_hls",
      "-f", synthScriptPath,
      "-tclargs", projName, inFile, fpgaPart, nsClk, topFxnName, inclDirString
    )
    val status = Process(cmdline, new File(synDir)) ! ProcessLogger(stdout append _+"\n", stderr append _+"\n")
    // copy results to outDir
    s"cp -a $synDir/$projName/sol1/impl/verilog/. $outDir/".!!
  }
} 
Example 191
Source File: License.scala    From iep-apps   with Apache License 2.0 5 votes vote down vote up
import java.io.File
import java.io.PrintStream
import java.time.ZonedDateTime
import java.time.ZoneOffset
import scala.io.Source
import sbt._


  """.stripMargin.trim

  def findFiles(dir: File): Seq[File] = {
    (dir ** "*.scala").get ++ (dir ** "*.java").get
  }

  def checkLicenseHeaders(log: Logger, srcDir: File): Unit = {
    val badFiles = findFiles(srcDir).filterNot(checkLicenseHeader)
    if (badFiles.nonEmpty) {
      badFiles.foreach { f => log.error(s"bad license header: $f") }
      sys.error(s"${badFiles.size} files with incorrect header, run formatLicenseHeaders to fix")
    } else {
      log.info("all files have correct license header")
    }
  }

  def checkLicenseHeader(file: File): Boolean = {
    val lines = Source.fromFile(file, "UTF-8").getLines().toList
    checkLicenseHeader(lines)
  }

  def checkLicenseHeader(lines: List[String]): Boolean = {
    val header = lines.takeWhile(!_.startsWith("package ")).mkString(lineSeparator)
    header == apache2
  }

  def formatLicenseHeaders(log: Logger, srcDir: File): Unit = {
    findFiles(srcDir).foreach { f => formatLicenseHeader(log, f) }
  }

  def formatLicenseHeader(log: Logger, file: File): Unit = {
    val lines = Source.fromFile(file, "UTF-8").getLines().toList
    if (!checkLicenseHeader(lines)) {
      log.info(s"fixing license header: $file")
      writeLines(file, apache2 :: removeExistingHeader(lines))
    }
  }

  def removeExistingHeader(lines: List[String]): List[String] = {
    val res = lines.dropWhile(!_.startsWith("package "))
    if (res.isEmpty) lines else res
  }

  def writeLines(file: File, lines: List[String]): Unit = {
    val out = new PrintStream(file)
    try lines.foreach(out.println) finally out.close()
  }
} 
Example 192
Source File: S3CopyService.scala    From iep-apps   with Apache License 2.0 5 votes vote down vote up
package com.netflix.atlas.persistence

import java.io.File
import java.nio.file.Files
import java.nio.file.Paths

import akka.NotUsed
import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import akka.stream.KillSwitch
import akka.stream.KillSwitches
import akka.stream.scaladsl.Keep
import akka.stream.scaladsl.Source
import com.netflix.atlas.core.util.Streams
import com.netflix.iep.service.AbstractService
import com.netflix.spectator.api.Registry
import com.typesafe.config.Config
import com.typesafe.scalalogging.StrictLogging
import javax.inject.Inject
import javax.inject.Singleton

import scala.concurrent.duration._

@Singleton
class S3CopyService @Inject()(
  val config: Config,
  val registry: Registry,
  implicit val system: ActorSystem
) extends AbstractService
    with StrictLogging {

  private val dataDir = config.getString("atlas.persistence.local-file.data-dir")

  private implicit val mat = ActorMaterializer()

  private var killSwitch: KillSwitch = _
  private val s3Config = config.getConfig("atlas.persistence.s3")

  private val cleanupTimeoutMs = s3Config.getDuration("cleanup-timeout").toMillis
  private val maxInactiveMs = s3Config.getDuration("max-inactive-duration").toMillis
  private val maxFileDurationMs =
    config.getDuration("atlas.persistence.local-file.max-duration").toMillis

  require(
    maxInactiveMs > maxFileDurationMs,
    "`max-inactive-duration` MUST be longer than `max-duration`, otherwise file may be renamed before normal write competes"
  )

  override def startImpl(): Unit = {
    logger.info("Starting service")
    killSwitch = Source
      .tick(1.second, 5.seconds, NotUsed)
      .viaMat(KillSwitches.single)(Keep.right)
      .flatMapMerge(Int.MaxValue, _ => Source(FileUtil.listFiles(new File(dataDir))))
      .toMat(new S3CopySink(s3Config, registry, system))(Keep.left)
      .run()
  }

  override def stopImpl(): Unit = {
    logger.info("Stopping service")
    waitForCleanup()
    if (killSwitch != null) killSwitch.shutdown()
  }

  private def waitForCleanup(): Unit = {
    logger.info("Waiting for cleanup")
    val start = System.currentTimeMillis
    while (hasMoreFiles) {
      if (System.currentTimeMillis() > start + cleanupTimeoutMs) {
        logger.error("Cleanup timeout")
        return
      }
      Thread.sleep(1000)
    }
    logger.info("Cleanup done")
  }

  private def hasMoreFiles: Boolean = {
    try {
      Streams.scope(Files.list(Paths.get(dataDir))) { dir =>
        dir.anyMatch(f => Files.isRegularFile(f))
      }
    } catch {
      case e: Exception => {
        logger.error(s"Error checking hasMoreFiles in $dataDir", e)
        true // Assuming there's more files on error to retry
      }
    }
  }
} 
Example 193
Source File: FileUtil.scala    From iep-apps   with Apache License 2.0 5 votes vote down vote up
package com.netflix.atlas.persistence

import java.io.File
import java.nio.file.Files

import com.netflix.atlas.core.util.Streams
import com.typesafe.scalalogging.StrictLogging

import scala.jdk.StreamConverters._

object FileUtil extends StrictLogging {

  def delete(f: File): Unit = {
    try {
      Files.delete(f.toPath)
      logger.debug(s"deleted file $f")
    } catch {
      case e: Exception => logger.error(s"failed to delete path $f", e)
    }
  }

  def listFiles(f: File): List[File] = {
    try {
      Streams.scope(Files.list(f.toPath)) { dir =>
        dir.toScala(List).map(_.toFile)
      }
    } catch {
      case e: Exception =>
        logger.error(s"failed to list files for: $f", e)
        Nil
    }
  }

  def isTmpFile(f: File): Boolean = {
    f.getName.endsWith(RollingFileWriter.TmpFileSuffix)
  }

} 
Example 194
Source File: AvroTest.scala    From iep-apps   with Apache License 2.0 5 votes vote down vote up
package com.netflix.atlas.persistence

import java.io.File
import java.nio.file.Files
import java.nio.file.Paths

import org.apache.avro.file.DataFileReader
import org.apache.avro.specific.SpecificDatumReader

// Read metadata for all avro files in given directory
object AvroTest {

  def main(args: Array[String]): Unit = {
    val dir = args(0)
    Files
      .walk(Paths.get(dir))
      .filter(path => Files.isRegularFile(path))
      .forEach(p => readFile(p.toFile))
  }

  private def readFile(file: File): Unit = {
    println(s"##### Reading file: $file")
    var count = 0
    val userDatumReader = new SpecificDatumReader[AvroDatapoint](classOf[AvroDatapoint])
    val dataFileReader = new DataFileReader[AvroDatapoint](file, userDatumReader)
    while (dataFileReader.hasNext) {
      dataFileReader.next()
      count += 1
    }

    println(s"    blockCount = ${dataFileReader.getBlockCount}")
    println(s"    blockSize  = ${dataFileReader.getBlockSize}")
    println(s"    numRecords = $count")

    dataFileReader.close()
    println
  }
} 
Example 195
Source File: RollingFileWriterSuite.scala    From iep-apps   with Apache License 2.0 5 votes vote down vote up
package com.netflix.atlas.persistence

import java.io.File
import java.nio.file.Files
import java.nio.file.Paths

import com.netflix.atlas.core.model.Datapoint
import com.netflix.spectator.api.NoopRegistry
import org.apache.avro.file.DataFileReader
import org.apache.avro.specific.SpecificDatumReader
import org.scalatest.BeforeAndAfter
import org.scalatest.BeforeAndAfterAll
import org.scalatest.funsuite.AnyFunSuite

import scala.collection.mutable.ListBuffer

class RollingFileWriterSuite extends AnyFunSuite with BeforeAndAfter with BeforeAndAfterAll {

  private val outputDir = "./target/unitTestAvroOutput"
  private val registry = new NoopRegistry

  before {
    listFilesSorted(outputDir).foreach(_.delete()) // Clean up files if exits
    Files.createDirectories(Paths.get(outputDir))
  }

  after {
    listFilesSorted(outputDir).foreach(_.delete())
    Files.deleteIfExists(Paths.get(outputDir))
  }

  // Write 3 datapoints, first 2 is written in file 1, rollover, and 3rd one is written in file 2
  test("avro writer rollover by max records") {
    val rollingConf = RollingConfig(2, 12000, 12000)
    val hourStart = 3600000
    val hourEnd = 7200000
    val writer =
      new RollingFileWriter(s"$outputDir/prefix", rollingConf, hourStart, hourEnd, registry)
    writer.initialize()
    createData(hourStart, 0, 1, 2).foreach(writer.write)
    writer.write(Datapoint(Map.empty, hourEnd, 3)) // out of range, should be ignored
    writer.close()

    // Check num of files
    val files = listFilesSorted(outputDir)
    assert(files.size == 2)

    // Check file 1 records
    val file1 = files.head
    assert(file1.getName.endsWith(".0000-0001"))
    val dpArray1 = readAvro(file1)
    assert(dpArray1.size == 2)
    assert(dpArray1(0).getValue == 0)
    assert(dpArray1(0).getTags.get("node") == "0")
    assert(dpArray1(1).getValue == 1)
    assert(dpArray1(1).getTags.get("node") == "1")

    // Check file 2 records
    val file2 = files.last
    assert(file2.getName.endsWith(".0002-0002"))
    val dpArray2 = readAvro(file2)
    assert(dpArray2.size == 1)
    assert(dpArray2(0).getValue == 2)
    assert(dpArray2(0).getTags.get("node") == "2")
  }

  private def createData(startTime: Long, values: Double*): List[Datapoint] = {
    values.toList.zipWithIndex.map {
      case (v, i) =>
        val tags = Map(
          "name" -> "cpu",
          "node" -> s"$i"
        )
        Datapoint(tags, startTime + i * 1000, v, 60000)
    }
  }

  private def listFilesSorted(dir: String): List[File] = {
    val d = new File(dir)
    if (!d.exists()) {
      Nil
    } else {
      new File(dir).listFiles().filter(_.isFile).toList.sortBy(_.getName)
    }
  }

  private def readAvro(file: File): Array[AvroDatapoint] = {
    val userDatumReader = new SpecificDatumReader[AvroDatapoint](classOf[AvroDatapoint])
    val dataFileReader = new DataFileReader[AvroDatapoint](file, userDatumReader)
    val dpListBuf = ListBuffer.empty[AvroDatapoint]
    try {
      while (dataFileReader.hasNext) {
        dpListBuf.addOne(dataFileReader.next)
      }
    } finally {
      dataFileReader.close()
    }
    dpListBuf.toArray
  }
} 
Example 196
Source File: Resources.scala    From MoVE   with Mozilla Public License 2.0 5 votes vote down vote up
import sbt._
import java.io.FileNotFoundException
import java.io.File

object Resources {

	
	def getJavaHome: File = {
		val javaHome =
	    Option(System.getenv("JAVA_HOME")).map(_+"/jre").
	      orElse(Option(System.getProperty("java.home")))
		javaHome match {
			case Some(str) =>
				file(str)
			case None =>
				throw new FileNotFoundException("$JAVA_HOME is undefined as well as the system property `java.home`." +
																			"Setup a environment variable JAVA_HOME")
		}
	}

	def checkExists(file:File): File = {
		if(file.exists()) file
		else throw new FileNotFoundException(s"Can't find needed resource: $file")
	}
} 
Example 197
Source File: VwSparseMultilabelPredictorTest.scala    From aloha   with MIT License 5 votes vote down vote up
package com.eharmony.aloha.models.vw.jni.multilabel

import java.io.{ByteArrayOutputStream, File, FileInputStream}

import com.eharmony.aloha.ModelSerializationTestHelper
import com.eharmony.aloha.io.sources.{Base64StringSource, ExternalSource, ModelSource}
import org.apache.commons.codec.binary.Base64
import org.apache.commons.io.IOUtils
import org.junit.Assert._
import org.junit.Test
import org.junit.runner.RunWith
import org.junit.runners.BlockJUnit4ClassRunner
import vowpalWabbit.learner.{VWActionScoresLearner, VWLearners}


@RunWith(classOf[BlockJUnit4ClassRunner])
class VwSparseMultilabelPredictorTest extends ModelSerializationTestHelper {
  import VwSparseMultilabelPredictorTest._

  @Test def testSerializability(): Unit = {
    val predictor = getPredictor(getModelSource(), 3)
    val ds = serializeDeserializeRoundTrip(predictor)
    assertEquals(predictor, ds)
    assertEquals(predictor.vwParams(), ds.vwParams())
    assertNotNull(ds.vwModel)
  }

  @Test def testVwParameters(): Unit = {
    val numLabelsInTrainingSet = 3
    val predictor = getPredictor(getModelSource(), numLabelsInTrainingSet)

    predictor.vwParams() match {
      case Data(vwBinFilePath, ringSize) =>
        checkVwBinFile(vwBinFilePath)
        checkVwRingSize(numLabelsInTrainingSet, ringSize.toInt)
      case ps => fail(s"Unexpected VW parameters format.  Found string: $ps")
    }
  }
}

object VwSparseMultilabelPredictorTest {
  private val Data = """\s*-i\s+(\S+)\s+--ring_size\s+(\d+)\s+--testonly\s+--quiet""".r

  private def getModelSource(): ModelSource = {
    val f = File.createTempFile("i_dont", "care")
    f.deleteOnExit()
    val learner = VWLearners.create[VWActionScoresLearner](s"--quiet --csoaa_ldf mc --csoaa_rank -f ${f.getCanonicalPath}")
    learner.close()
    val baos = new ByteArrayOutputStream()
    IOUtils.copy(new FileInputStream(f), baos)
    val src = Base64StringSource(Base64.encodeBase64URLSafeString(baos.toByteArray))
    ExternalSource(src.localVfs)
  }

  private def getPredictor(modelSrc: ModelSource, numLabelsInTrainingSet: Int) =
    VwSparseMultilabelPredictor[Any](modelSrc, Nil, Nil, numLabelsInTrainingSet)

  private def checkVwBinFile(vwBinFilePath: String): Unit = {
    val vwBinFile = new File(vwBinFilePath)
    assertTrue("VW binary file should have been written to disk", vwBinFile.exists())
    vwBinFile.deleteOnExit()
  }

  private def checkVwRingSize(numLabelsInTrainingSet: Int, ringSize: Int): Unit = {
    assertEquals(
      "vw --ring_size parameter is incorrect:",
      numLabelsInTrainingSet + VwSparseMultilabelPredictor.AddlVwRingSize,
      ringSize.toInt
    )
  }
} 
Example 198
Source File: StdAvroModelFactory.scala    From aloha   with MIT License 5 votes vote down vote up
package com.eharmony.aloha.factory.avro

import java.io.File

import org.apache.commons.{vfs => vfs1, vfs2}
import com.eharmony.aloha.io.vfs.{Vfs1, Vfs2}
import com.eharmony.aloha.audit.impl.avro.Score
import com.eharmony.aloha.factory.ModelFactory
import org.apache.avro.generic.GenericRecord

import scala.util.Try




  @deprecated(message = "Prefer StdAvroModelFactory.fromConfig(conf: FactoryConfig)", since = "4.0.1")
  def apply(modelDomainSchemaVfsUrl: String,
            modelCodomainRefInfoStr: String,
            imports: Seq[String] = Nil,
            classCacheDir: Option[File] = None,
            dereferenceAsOptional: Boolean = true,
            useVfs2: Boolean = true): Try[ModelFactory[GenericRecord, Score]] = {

    val vfs = url(modelDomainSchemaVfsUrl, useVfs2)

    vfs.flatMap { u =>
      UrlConfig(
        u,
        modelCodomainRefInfoStr,
        imports,
        classCacheDir,
        dereferenceAsOptional
      )()
    }
  }

  private[this] def url(modelDomainSchemaVfsUrl: String, useVfs2: Boolean) = {
    val u =
      if (useVfs2)
        Try { Vfs2(vfs2.VFS.getManager.resolveFile(modelDomainSchemaVfsUrl)) }
      else Try { Vfs1(vfs1.VFS.getManager.resolveFile(modelDomainSchemaVfsUrl)) }
    FactoryConfig.wrapException(u)
  }
} 
Example 199
Source File: modelFactoryPlaceholder.scala    From aloha   with MIT License 5 votes vote down vote up
package com.eharmony.aloha.factory

import java.io.File

import com.eharmony.aloha.factory.ex.AlohaFactoryException
import com.eharmony.aloha.io.StringReadable
import org.apache.commons.{vfs, vfs2}
import spray.json.{JsObject, pimpString}

import scala.util.{Failure, Try}


    def resolveFileContents(): Try[JsObject]
}

private[factory] case class Vfs2ImportedModelPlaceholder(fileDescriptor: String) extends ImportedModelPlaceholder {
    def resolveFileContents() = for {
        file <- Try {
            vfs2.VFS.getManager.resolveFile(fileDescriptor)
        } recoverWith {
            case f => Failure { new AlohaFactoryException(s"Couldn't resolve VFS2 file: $fileDescriptor", f) }
        }
        json <- Try {
            StringReadable.fromVfs2(file).parseJson.asJsObject
        } recoverWith {
            case f => Failure { new AlohaFactoryException(s"Couldn't get JSON for VFS2 file: $file", f) }
        }
    } yield json
}

private[factory] case class Vfs1ImportedModelPlaceholder(fileDescriptor: String) extends ImportedModelPlaceholder {
    def resolveFileContents() = for {
        file <- Try {
            vfs.VFS.getManager.resolveFile(fileDescriptor)
        } recoverWith {
            case f => Failure { new AlohaFactoryException(s"Couldn't resolve VFS1 file: $fileDescriptor", f) }
        }
        json <- Try {
            StringReadable.fromVfs1(file).parseJson.asJsObject
        } recoverWith {
            case f => Failure { new AlohaFactoryException(s"Couldn't get JSON for VFS1 file: $file", f) }
        }
    } yield json
}

private[factory] case class FileImportedModelPlaceholder(fileDescriptor: String) extends ImportedModelPlaceholder {
    def resolveFileContents() = for {
        file <- Try {
            new File(fileDescriptor)
        } recoverWith {
            case f => Failure { new AlohaFactoryException(s"Couldn't resolve file: $fileDescriptor", f) }
        }
        json <- Try {
            StringReadable.fromFile(file).parseJson.asJsObject
        } recoverWith {
            case f => Failure { new AlohaFactoryException(s"Couldn't get JSON for file: $file", f) }
        }
    } yield json
} 
Example 200
Source File: ContainerReadable.scala    From aloha   with MIT License 5 votes vote down vote up
package com.eharmony.aloha.io

import scala.language.higherKinds

import java.io.{File, InputStream, Reader}
import java.net.URL
import org.apache.commons.{vfs => vfs1, vfs2}

trait ContainerReadable[C[_]] {
    def fromString[A](s: String): C[A]
    def fromFile[A](f: File): C[A]
    def fromInputStream[A](is: InputStream): C[A]
    def fromUrl[A](u: URL): C[A]
    def fromReader[A](r: Reader): C[A]
    def fromVfs1[A](foVfs1: vfs1.FileObject): C[A]
    def fromVfs2[A](foVfs2: vfs2.FileObject): C[A]
    def fromResource[A](s: String): C[A]
    def fromClasspathResource[A](s: String): C[A]
}