org.apache.commons.io.IOUtils Scala Example

Source File: BinaryFileReader.scala From mmlspark with MIT License

5 votes

// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark

import com.microsoft.ml.spark.core.env.StreamUtilities
import com.microsoft.ml.spark.core.schema.BinaryFileSchema
import com.microsoft.ml.spark.core.utils.AsyncUtils
import org.apache.commons.io.IOUtils
import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
import org.apache.spark.binary.BinaryFileFormat
import org.apache.spark.sql.catalyst.encoders.RowEncoder
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.apache.spark.binary.ConfUtils
import org.apache.spark.sql.types.BinaryType

import scala.concurrent.{ExecutionContext, Future}
import scala.concurrent.duration.Duration

object BinaryFileReader {

  private def recursePath(fileSystem: FileSystem,
                          path: Path,
                          pathFilter: FileStatus => Boolean,
                          visitedSymlinks: Set[Path]): Array[Path] ={
    val filteredPaths = fileSystem.listStatus(path).filter(pathFilter)
    val filteredDirs = filteredPaths.filter(fs => fs.isDirectory & !visitedSymlinks(fs.getPath))
    val symlinksFound = visitedSymlinks ++ filteredDirs.filter(_.isSymlink).map(_.getPath)
    filteredPaths.map(_.getPath) ++ filteredDirs.map(_.getPath)
      .flatMap(p => recursePath(fileSystem, p, pathFilter, symlinksFound))
  }

  def recursePath(fileSystem: FileSystem, path: Path, pathFilter: FileStatus => Boolean): Array[Path] ={
    recursePath(fileSystem, path, pathFilter, Set())
  }

  
  def readFromPaths(df: DataFrame,
                    pathCol: String,
                    bytesCol: String,
                    concurrency: Int,
                    timeout: Int
                   ): DataFrame = {
    val outputSchema = df.schema.add(bytesCol, BinaryType, nullable = true)
    val encoder = RowEncoder(outputSchema)
    val hconf = ConfUtils.getHConf(df)

    df.mapPartitions { rows =>
      val futures = rows.map {row: Row =>
        Future {
            val path = new Path(row.getAs[String](pathCol))
            val fs = path.getFileSystem(hconf.value)
            val bytes = StreamUtilities.using(fs.open(path)) {is => IOUtils.toByteArray(is)}.get
            val ret = Row.merge(Seq(row, Row(bytes)): _*)
            ret
          }(ExecutionContext.global)
      }
      AsyncUtils.bufferedAwait(
        futures,concurrency, Duration.fromNanos(timeout*(20^6).toLong))(ExecutionContext.global)
    }(encoder)
  }

}

Source File: StreamMetadata.scala From Spark-2.3.1 with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.streaming

import java.io.{InputStreamReader, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import scala.util.control.NonFatal

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, FSDataOutputStream, Path}
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization

import org.apache.spark.internal.Logging
import org.apache.spark.sql.streaming.StreamingQuery


  def write(
      metadata: StreamMetadata,
      metadataFile: Path,
      hadoopConf: Configuration): Unit = {
    var output: FSDataOutputStream = null
    try {
      val fs = metadataFile.getFileSystem(hadoopConf)
      output = fs.create(metadataFile)
      val writer = new OutputStreamWriter(output)
      Serialization.write(metadata, writer)
      writer.close()
    } catch {
      case NonFatal(e) =>
        logError(s"Error writing stream metadata $metadata to $metadataFile", e)
        throw e
    } finally {
      IOUtils.closeQuietly(output)
    }
  }
}

Source File: Packer.scala From haystack-traces with Apache License 2.0

5 votes

package com.expedia.www.haystack.trace.commons.packer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, OutputStream}
import java.util.zip.GZIPOutputStream

import com.expedia.www.haystack.trace.commons.packer.PackerType.PackerType
import com.github.luben.zstd.ZstdOutputStream
import com.google.protobuf.GeneratedMessageV3
import org.apache.commons.io.IOUtils
import org.xerial.snappy.SnappyOutputStream

object PackerType extends Enumeration {
  type PackerType = Value
  val GZIP, SNAPPY, NONE, ZSTD = Value
}

case class PackedMetadata(t: PackerType)

abstract class Packer[T <: GeneratedMessageV3] {
  val packerType: PackerType

  protected def compressStream(stream: OutputStream): OutputStream

  private def pack(protoObj: T): Array[Byte] = {
    val outStream = new ByteArrayOutputStream
    val compressedStream = compressStream(outStream)
    if (compressedStream != null) {
      IOUtils.copy(new ByteArrayInputStream(protoObj.toByteArray), compressedStream)
      compressedStream.close() // this flushes the data to final outStream
      outStream.toByteArray
    } else {
      protoObj.toByteArray
    }
  }

  def apply(protoObj: T): PackedMessage[T] = {
    PackedMessage(protoObj, pack, PackedMetadata(packerType))
  }
}

class NoopPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.NONE
  override protected def compressStream(stream: OutputStream): OutputStream = null
}

class SnappyPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.SNAPPY
  override protected def compressStream(stream: OutputStream): OutputStream = new SnappyOutputStream(stream)
}


class ZstdPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.ZSTD
  override protected def compressStream(stream: OutputStream): OutputStream = new ZstdOutputStream(stream)
}

class GzipPacker[T <: GeneratedMessageV3] extends Packer[T] {
  override val packerType = PackerType.GZIP
  override protected def compressStream(stream: OutputStream): OutputStream = new GZIPOutputStream(stream)
}

Source File: Unpacker.scala From haystack-traces with Apache License 2.0

5 votes

package com.expedia.www.haystack.trace.commons.packer

import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream}
import java.nio.ByteBuffer
import java.util.zip.GZIPInputStream

import com.expedia.open.tracing.buffer.SpanBuffer
import com.github.luben.zstd.ZstdInputStream
import org.apache.commons.io.IOUtils
import org.json4s.jackson.Serialization
import org.xerial.snappy.SnappyInputStream

object Unpacker {
  import PackedMessage._

  private def readMetadata(packedDataBytes: Array[Byte]): Array[Byte] = {
    val byteBuffer = ByteBuffer.wrap(packedDataBytes)
    val magicBytesExist = MAGIC_BYTES.indices forall { idx => byteBuffer.get() == MAGIC_BYTES.apply(idx) }
    if (magicBytesExist) {
      val headerLength = byteBuffer.getInt
      val metadataBytes = new Array[Byte](headerLength)
      byteBuffer.get(metadataBytes, 0, headerLength)
      metadataBytes
    } else {
      null
    }
  }

  private def unpack(compressedStream: InputStream) = {
    val outputStream = new ByteArrayOutputStream()
    IOUtils.copy(compressedStream, outputStream)
    outputStream.toByteArray
  }

  def readSpanBuffer(packedDataBytes: Array[Byte]): SpanBuffer = {
    var parsedDataBytes: Array[Byte] = null
    val metadataBytes = readMetadata(packedDataBytes)
    if (metadataBytes != null) {
      val packedMetadata = Serialization.read[PackedMetadata](new String(metadataBytes))
      val compressedDataOffset = MAGIC_BYTES.length + 4 + metadataBytes.length
      packedMetadata.t match {
        case PackerType.SNAPPY =>
          parsedDataBytes = unpack(
            new SnappyInputStream(
              new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset)))
        case PackerType.GZIP =>
          parsedDataBytes = unpack(
            new GZIPInputStream(
              new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset)))
        case PackerType.ZSTD =>
          parsedDataBytes = unpack(
            new ZstdInputStream(
              new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset)))
        case _ =>
          return SpanBuffer.parseFrom(
            new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset))
      }
    } else {
      parsedDataBytes = packedDataBytes
    }
    SpanBuffer.parseFrom(parsedDataBytes)
  }
}

Source File: Package.scala From seed with Apache License 2.0

5 votes

package seed.generation

import java.io.{File, FileInputStream, OutputStream}
import java.util.jar.{Attributes, JarEntry, JarOutputStream, Manifest}

import org.apache.commons.io.IOUtils
import java.nio.file.Path

import seed.Log
import seed.cli.util.Ansi

import scala.collection.mutable

// Adapted from https://stackoverflow.com/a/1281295
object Package {
  def create(
    source: List[(Path, String)],
    target: OutputStream,
    mainClass: Option[String],
    classPath: List[String],
    log: Log
  ): Unit = {
    val manifest       = new Manifest()
    val mainAttributes = manifest.getMainAttributes
    mainAttributes.put(Attributes.Name.MANIFEST_VERSION, "1.0")
    // TODO Set additional package fields: https://docs.oracle.com/javase/tutorial/deployment/jar/packageman.html
    mainClass.foreach(
      cls => mainAttributes.put(Attributes.Name.MAIN_CLASS, cls)
    )
    if (classPath.nonEmpty)
      mainAttributes.put(Attributes.Name.CLASS_PATH, classPath.mkString(" "))

    val targetFile = new JarOutputStream(target, manifest)
    val entryCache = mutable.Set[String]()
    source.foreach {
      case (path, jarPath) =>
        log.debug(s"Packaging ${Ansi.italic(path.toString)}...")
        add(path.toFile, jarPath, targetFile, entryCache, log)
    }
    targetFile.close()
  }

  def add(
    source: File,
    jarPath: String,
    target: JarOutputStream,
    entryCache: mutable.Set[String],
    log: Log
  ): Unit = {
    val path =
      if (source.isFile) jarPath
      else {
        require(!jarPath.endsWith("/"))
        jarPath + "/"
      }

    val addedEntry =
      if (entryCache.contains(path)) {
        if (source.isFile)
          log.warn(
            s"Skipping file ${Ansi.italic(source.toString)} as another module already added it"
          )

        false
      } else {
        val entry = new JarEntry(path)
        entry.setTime(source.lastModified)
        target.putNextEntry(entry)
        entryCache += path
        if (source.isFile) IOUtils.copy(new FileInputStream(source), target)

        true
      }

    if (!source.isFile)
      for (nestedFile <- source.listFiles)
        add(nestedFile, path + nestedFile.getName, target, entryCache, log)

    if (addedEntry) target.closeEntry()
  }
}

Source File: Http.scala From seed with Apache License 2.0

5 votes

package seed.publish.util

import java.net.URI

import org.apache.commons.io.IOUtils
import org.apache.http.{HttpHost, HttpRequest, HttpRequestInterceptor}
import org.apache.http.entity.ContentType
import seed.util.ZioHelpers._
import zio.Task
import org.apache.http.auth.AuthScope
import org.apache.http.auth.UsernamePasswordCredentials
import org.apache.http.client.protocol.HttpClientContext
import org.apache.http.impl.auth.BasicScheme
import org.apache.http.impl.client.{BasicAuthCache, BasicCredentialsProvider}
import org.apache.http.impl.nio.client.CloseableHttpAsyncClient
import org.apache.http.impl.nio.client.HttpAsyncClients
import org.apache.http.nio.client.methods.HttpAsyncMethods
import org.apache.http.nio.protocol.HttpAsyncRequestProducer
import org.apache.http.protocol.HttpContext

class Http(httpClient: CloseableHttpAsyncClient) {
  def put(url: String, bytes: Array[Byte]): Task[String] = {
    val producer =
      HttpAsyncMethods.createPut(url, bytes, ContentType.DEFAULT_BINARY)
    send(url, producer)
  }

  def post(url: String, bytes: Array[Byte]): Task[String] = {
    val producer =
      HttpAsyncMethods.createPost(url, bytes, ContentType.DEFAULT_BINARY)
    send(url, producer)
  }

  def destroy(): Unit = httpClient.close()

  private def send(url: String, producer: HttpAsyncRequestProducer) = {
    val client = new CompletableHttpAsyncClient(httpClient)

    val uri        = URI.create(url)
    val targetHost = new HttpHost(uri.getHost, uri.getPort, uri.getScheme)
    val authCache  = new BasicAuthCache()
    authCache.put(targetHost, new BasicScheme())

    val clientContext = HttpClientContext.create()
    clientContext.setAuthCache(authCache)

    val future =
      client.execute(producer, HttpAsyncMethods.createConsumer(), clientContext)

    fromCompletableFuture(future)
      .map(r => IOUtils.toString(r.getEntity.getContent, "UTF-8"))
  }
}

class CustomRequestInterceptor(log: seed.Log) extends HttpRequestInterceptor {
  override def process(request: HttpRequest, context: HttpContext): Unit =
    log.debug("Sending HTTP request " + request + "...")
}

object Http {
  def create(log: seed.Log, authHost: String, auth: (String, String)): Http = {
    val credsProvider = new BasicCredentialsProvider()
    credsProvider.setCredentials(
      new AuthScope(authHost, 443),
      new UsernamePasswordCredentials(auth._1, auth._2)
    )

    val c = HttpAsyncClients
      .custom()
      .setDefaultCredentialsProvider(credsProvider)
      .addInterceptorFirst(new CustomRequestInterceptor(log))
      .build()
    c.start()

    new Http(c)
  }
}

Source File: MimeTypeDetectorTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.stages.impl.feature

import java.io.FileInputStream

import com.salesforce.op._
import com.salesforce.op.features.types._
import com.salesforce.op.stages.base.unary.UnaryTransformer
import com.salesforce.op.test.{OpTransformerSpec, TestFeatureBuilder, TestSparkContext}
import com.salesforce.op.testkit.RandomText
import com.salesforce.op.utils.spark.RichDataset._
import org.apache.commons.io.IOUtils
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner


@RunWith(classOf[JUnitRunner])
class MimeTypeDetectorTest extends OpTransformerSpec[Text, MimeTypeDetector] with Base64TestData {
  val inputData = randomData
  val transformer = new MimeTypeDetector().setInput(randomBase64)
  val expectedResult = expectedRandom

  it should "validate the type hint" in {
    assertThrows[IllegalArgumentException](new MimeTypeDetector().setTypeHint("blarg"))
  }
  it should "validate the ma bytes to parse" in {
    assertThrows[IllegalArgumentException](new MimeTypeDetector().setMaxBytesToParse(-1L))
  }
  it should "detect octet stream data" in {
    val mime = randomBase64.detectMimeTypes()
    mime.originStage shouldBe a[UnaryTransformer[_, _]]
    val result = mime.originStage.asInstanceOf[UnaryTransformer[Base64, Text]].transform(randomData)

    result.collect(mime) should contain theSameElementsInOrderAs expectedRandom
  }
  it should "detect other mime types" in {
    val mime = realBase64.detectMimeTypes()
    val result = mime.originStage.asInstanceOf[UnaryTransformer[Base64, Text]].transform(realData)

    result.collect(mime) should contain theSameElementsInOrderAs expectedMime
  }
  it should "detect other mime types with a json type hint" in {
    val mime = realBase64.detectMimeTypes(typeHint = Some("application/json"))
    val result = mime.originStage.asInstanceOf[UnaryTransformer[Base64, Text]].transform(realData)

    result.collect(mime) should contain theSameElementsInOrderAs expectedMimeJson
  }
}


trait Base64TestData {
  self: TestSparkContext =>

  val seed = 42L

  lazy val (randomData, randomBase64) = {
    val rnd = RandomText.base64(0, 10000)
    rnd.reset(seed)
    TestFeatureBuilder(Base64.empty +: Base64("") +: rnd.take(10).toSeq)
  }
  lazy val (realData, realBase64) = TestFeatureBuilder(
    Seq(
      "811harmo24to36.mp3", "820orig36to48.wav", "face.png",
      "log4j.properties", "note.xml", "RunnerParams.json",
      "dummy.csv", "Canon_40D.jpg", "sample.pdf"
    ).map(loadResourceAsBase64)
  )

  val expectedRandom = Text.empty +: Seq.fill(11)(Text("application/octet-stream"))

  val expectedMime = Seq(
    "audio/mpeg", "audio/vnd.wave", "image/png",
    "text/plain", "application/xml", "text/plain",
    "text/plain", "image/jpeg", "application/pdf"
  ).map(_.toText)

  val expectedMimeJson = Seq(
    "audio/mpeg", "audio/vnd.wave", "image/png",
    "application/json", "application/xml", "application/json",
    "application/json", "image/jpeg", "application/pdf"
  ).map(_.toText)

  def loadResourceAsBase64(name: String): Base64 = Base64 {
    val bytes = IOUtils.toByteArray(new FileInputStream(resourceFile(name = name)))
    new String(java.util.Base64.getEncoder.encode(bytes))
  }

}

Source File: Base64Test.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License

5 votes

package com.salesforce.op.features.types

import java.nio.charset.Charset

import com.salesforce.op.test.TestCommon
import org.apache.commons.io.IOUtils
import org.junit.runner.RunWith
import org.scalatest.PropSpec
import org.scalatest.junit.JUnitRunner
import org.scalatest.prop.PropertyChecks

@RunWith(classOf[JUnitRunner])
class Base64Test extends PropSpec with PropertyChecks with TestCommon {

  property("handle empty") {
    forAll(None) {
      (v: Option[String]) =>
        Base64(v).asBytes shouldBe None
        Base64(v).asString shouldBe None
        Base64(v).asInputStream shouldBe None
    }
  }

  property("can show byte contents") {
    forAll {
      (b: Array[Byte]) =>
        val b64 = toBase64(b)
        (Base64(b64).asBytes map (_.toList)) shouldBe Some(b.toList)
    }
  }

  property("can show string contents") {
    forAll {
      (s: String) =>
        val b64 = toBase64(s.getBytes)
        Base64(b64).asString shouldBe Some(s)
    }
  }

  property("produce a stream") {
    forAll {
      (s: String) =>
        val b64 = toBase64(s.getBytes)
        Base64(b64).asInputStream.map(IOUtils.toString(_, Charset.defaultCharset())) shouldBe Some(s)
    }
  }

  property("produce a stream and map over it") {
    forAll {
      (s: String) =>
        val b64 = toBase64(s.getBytes)
        Base64(b64).mapInputStream(IOUtils.toString(_, Charset.defaultCharset())) shouldBe Some(s)
    }
  }

  def toBase64(b: Array[Byte]): String = new String(java.util.Base64.getEncoder.encode(b))
}

Source File: StreamMetadata.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.streaming

import java.io.{InputStreamReader, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import scala.util.control.NonFatal

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, FSDataOutputStream, Path}
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization

import org.apache.spark.internal.Logging
import org.apache.spark.sql.streaming.StreamingQuery


  def write(
      metadata: StreamMetadata,
      metadataFile: Path,
      hadoopConf: Configuration): Unit = {
    var output: FSDataOutputStream = null
    try {
      val fs = FileSystem.get(hadoopConf)
      output = fs.create(metadataFile)
      val writer = new OutputStreamWriter(output)
      Serialization.write(metadata, writer)
      writer.close()
    } catch {
      case NonFatal(e) =>
        logError(s"Error writing stream metadata $metadata to $metadataFile", e)
        throw e
    } finally {
      IOUtils.closeQuietly(output)
    }
  }
}

Source File: S3ObjectUploader.scala From CM-Well with Apache License 2.0

5 votes

package cmwell.tools.neptune.export

import java.io._
import java.util
import java.util.concurrent.{Executors, TimeoutException}
import java.util.stream.Collectors
import java.util.{Collections, Vector}

import com.amazonaws.auth.profile.ProfileCredentialsProvider
import com.amazonaws.services.s3.AmazonS3ClientBuilder
import com.amazonaws.services.s3.model.{ObjectMetadata, PutObjectRequest}
import com.amazonaws.{AmazonServiceException, ClientConfiguration, Protocol, SdkClientException}
import org.apache.commons.io.{FileUtils, IOUtils}
import org.slf4j.LoggerFactory

import scala.concurrent.{Await, ExecutionContext, Future}
import scala.concurrent.duration.{FiniteDuration, _}

object S3ObjectUploader{

  val executor = Executors.newFixedThreadPool(1)
  implicit val ec: ExecutionContext = scala.concurrent.ExecutionContext.fromExecutor(executor)
  protected lazy val logger = LoggerFactory.getLogger("s3_uploader")


  def init(proxyHost:Option[String], proxyPort:Option[Int]) = {
    val clientRegion = "us-east-1"
    val config = new ClientConfiguration
    config.setProtocol(Protocol.HTTPS)
    proxyHost.foreach(host => config.setProxyHost(host))
    proxyPort.foreach(port =>  config.setProxyPort(port))
    val s3Client = AmazonS3ClientBuilder.standard()
      .withRegion(clientRegion)
      .withClientConfiguration(config)
      .withCredentials(new ProfileCredentialsProvider())
      .build()
    s3Client
  }


  def persistChunkToS3Bucket(chunkData:String, fileName:String, proxyHost:Option[String], proxyPort:Option[Int], s3Directory:String) = {
        try{
          init(proxyHost, proxyPort).putObject(s3Directory, fileName, chunkData)
      }
      catch {
        case e: AmazonServiceException =>
          e.printStackTrace()
          throw e
        case e: SdkClientException =>
          e.printStackTrace()
          throw e
      }
  }

  def persistChunkToS3Bucket(tmpFile:File, proxyHost:Option[String], proxyPort:Option[Int], s3Directory:String, retryCount:Int = 3):Unit = {
    try{
      val s3UploadTask = Future{init(proxyHost, proxyPort).putObject(s3Directory, tmpFile.getName, tmpFile)}(ec)
      Await.result(s3UploadTask,  5.minutes)
      tmpFile.delete()
    }
    catch {
      case e:TimeoutException =>
        if(retryCount > 0) {
          logger.error("S3 upload task run more than 5 minutes..Going to retry")
          persistChunkToS3Bucket(tmpFile, proxyHost, proxyPort, s3Directory, retryCount-1)
        }
        else{
          throw new Exception( "S3 upload task duration was more than 5 minutes")
        }
      case e: AmazonServiceException =>
        e.printStackTrace()
        throw e
      case e: SdkClientException =>
        e.printStackTrace()
        throw e
    }
  }

}

Source File: UtilCommands.scala From CM-Well with Apache License 2.0

5 votes

import java.io.{BufferedInputStream, File}
import java.nio.file.{Files, Paths}
import java.security.MessageDigest
import scala.util.control.Breaks._

import javax.xml.bind.DatatypeConverter
import org.apache.commons.compress.archivers.ArchiveEntry
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import org.apache.commons.io.IOUtils
object UtilCommands {
  val OSX_NAME = "Mac OS X"

  val linuxSshpass = if (Files.exists(Paths.get("bin/utils/sshpass"))) "bin/utils/sshpass" else "sshpass"
  val osxSshpass = "/usr/local/bin/sshpass"

  val sshpass = if (isOSX) osxSshpass else linuxSshpass

  def isOSX = System.getProperty("os.name") == OSX_NAME

  def verifyComponentConfNotChanged(componentName:String, configFilePath:String, expectedHash:String) = {
    val confContent = UtilCommands.unTarGz("./components", componentName, configFilePath)
    UtilCommands.checksum(componentName, configFilePath, confContent, expectedHash)
  }

  def checksum(componentName:String, configFilePath:String, confContent:Array[Byte], expectedHash:String) = {
    val actualHash = MessageDigest.getInstance("MD5").digest(confContent)
    val actualHashStr = DatatypeConverter.printHexBinary(actualHash)
    if (!expectedHash.equalsIgnoreCase(actualHashStr))
      throw new Exception(s"$componentName configuration file $configFilePath has been changed, please change the template accordingly " +
        s"(the new digest is $actualHashStr)")
  }

  def unTarGz(rootFolder:String, componentName: String, configFilePath:String):Array[Byte] = {
    var tarArchiveInputStream:TarArchiveInputStream = null
    var bufferInputstream:BufferedInputStream = null
    val gzipCompressor:GzipCompressorInputStream = null
    var confContent: Array[Byte] = null
    try {
      val libDir = new File(rootFolder)
      val pathInput = libDir.listFiles().filter(file => file.getName.contains(componentName))
      val path = Paths.get(pathInput(0).getAbsolutePath)
      val bufferInputStream = new BufferedInputStream(Files.newInputStream(path))
      val gzipCompressor = new GzipCompressorInputStream(bufferInputStream)
      tarArchiveInputStream = new TarArchiveInputStream(gzipCompressor)

      var archiveEntry: ArchiveEntry = null
      archiveEntry = tarArchiveInputStream.getNextEntry
      if(archiveEntry.getName == "./")
        archiveEntry = tarArchiveInputStream.getNextEntry

      val extractFolder = archiveEntry.getName.replaceAll("^\\./","").split("/")(0)
      while (archiveEntry != null) {
        breakable {
        if (archiveEntry.getName.replaceAll("^\\./","") == s"$extractFolder/$configFilePath") {
            confContent = IOUtils.toByteArray(tarArchiveInputStream)
            break
          }
        }
        archiveEntry = tarArchiveInputStream.getNextEntry
      }
    }
    finally {
      if(tarArchiveInputStream != null)
        tarArchiveInputStream.close()
      if(bufferInputstream != null)
        bufferInputstream.close()
      if(gzipCompressor != null)
        gzipCompressor.close()
    }
    confContent
  }
}

Source File: VelocityRenderer.scala From shellbase with Apache License 2.0

5 votes

package com.sumologic.shellbase

import java.io._
import java.util.Properties

import org.apache.commons.io.IOUtils
import org.apache.velocity.VelocityContext
import org.apache.velocity.app.{Velocity, VelocityEngine}

import scala.collection.JavaConversions._

object VelocityRenderer {

  val props = new Properties()
  props.setProperty("runtime.references.strict", "true")
  props.setProperty("velocimacro.arguments.strict", "true")
  props.setProperty("velocimacro.permissions.allow.inline.local.scope", "true")
  props.setProperty("directive.foreach.skip.invalid", "false")
  props.setProperty("runtime.log.logsystem.log4j.logger", "org.apache.velocity")
  props.setProperty("resource.loader", "class,file")
  props.setProperty("class.resource.loader.description", "Velocity Classpath Resource Loader")
  props.setProperty("class.resource.loader.class",
    "org.apache.velocity.runtime.resource.loader.ClasspathResourceLoader")
  Velocity.init(props)

  def render(map: Iterable[(String, AnyRef)], reader: Reader, writer: Writer): Unit = {
    val context = new VelocityContext()
    map.foreach { pair =>
      context.put(pair._1, pair._2)
    }
    Velocity.evaluate(context, writer, "ops util velocity renderer", reader)
  }

  def render(templateVars: Iterable[(String, AnyRef)], templatePath: String, outputPath: String): Unit = {
    val templateReader = new InputStreamReader(getClass.getClassLoader.getResourceAsStream(templatePath))
    val outputWriter = new OutputStreamWriter(new FileOutputStream(outputPath))

    try {
      render(templateVars, templateReader, outputWriter)
    } finally {
      IOUtils.closeQuietly(templateReader)
      IOUtils.closeQuietly(outputWriter)
    }
  }

  def createScriptFromTemplate(scriptResource: String,
                               variables: Map[AnyRef, AnyRef] = Map[AnyRef, AnyRef]()): File = {
    val engine = new VelocityEngine()
    engine.setProperty("resource.loader", "class")
    engine.setProperty("class.resource.loader.description", "Velocity Classpath Resource Loader")
    engine.setProperty("class.resource.loader.class",
      "org.apache.velocity.runtime.resource.loader.ClasspathResourceLoader")
    engine.init()

    val template = engine.getTemplate(scriptResource)

    val tempFile = File.createTempFile(".tmp", ".sh")

    val modifiableVariables = new java.util.HashMap[AnyRef, AnyRef]()
    modifiableVariables ++= variables

    val writer = new FileWriter(tempFile)
    try {
      template.merge(new VelocityContext(modifiableVariables), writer)
    } finally {
      IOUtils.closeQuietly(writer)
    }

    tempFile
  }

}

Source File: PortForwarding.scala From mmlspark with MIT License

5 votes

// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.io.http

import java.io.File
import java.net.URI

import com.jcraft.jsch.{JSch, Session}
import org.apache.commons.io.IOUtils

object PortForwarding {

  lazy val Jsch = new JSch()

  def forwardPortToRemote(username: String,
                          sshHost: String,
                          sshPort: Int,
                          bindAddress: String,
                          remotePortStart: Int,
                          localHost: String,
                          localPort: Int,
                          keyDir: Option[String],
                          keySas: Option[String],
                          maxRetries: Int,
                          timeout: Int
                         ): (Session, Int) = {
    keyDir.foreach(kd =>
      new File(kd).listFiles().foreach(f =>
        try {
          Jsch.addIdentity(f.getAbsolutePath)
        } catch {
          case _: com.jcraft.jsch.JSchException =>
          case e: Exception => throw e
        }
      )
    )

    keySas.foreach { ks =>
      val privateKeyBytes = IOUtils.toByteArray(new URI(ks))
      Jsch.addIdentity("forwardingKey", privateKeyBytes, null, null) //scalastyle:ignore null
    }

    val session = Jsch.getSession(username, sshHost, sshPort)
    session.setConfig("StrictHostKeyChecking", "no")
    session.setTimeout(timeout)
    session.connect()
    var attempt = 0
    var foundPort: Option[Int] = None
    while (foundPort.isEmpty && attempt <= maxRetries) {
      try {
        session.setPortForwardingR(
          bindAddress, remotePortStart + attempt, localHost, localPort)
        foundPort = Some(remotePortStart + attempt)
      } catch {
        case _: Exception =>
          println(s"failed to forward port. Attempt: $attempt")
          attempt += 1
      }
    }
    if (foundPort.isEmpty) {
      throw new RuntimeException(s"Could not find open port between " +
        s"$remotePortStart and ${remotePortStart + maxRetries}")
    }
    println(s"forwarding to ${foundPort.get}")
    (session, foundPort.get)
  }

  def forwardPortToRemote(options: Map[String, String]): (Session, Int) = {
    forwardPortToRemote(
      options("forwarding.username"),
      options("forwarding.sshhost"),
      options.getOrElse("forwarding.sshport", "22").toInt,
      options.getOrElse("forwarding.bindaddress", "*"),
      options.get("forwarding.remoteportstart")
        .orElse(options.get("forwarding.localport")).get.toInt,
      options.getOrElse("forwarding.localhost", "0.0.0.0"),
      options("forwarding.localport").toInt,
      options.get("forwarding.keydir"),
      options.get("forwarding.keysas"),
      options.getOrElse("forwarding.maxretires", "50").toInt,
      options.getOrElse("forwarding.timeout", "20000").toInt
    )
  }

}

Source File: StreamUtilities.scala From mmlspark with MIT License

5 votes

// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.core.env

import java.io.{ByteArrayOutputStream, InputStream}
import java.util.zip.ZipInputStream

import org.apache.commons.io.IOUtils

import scala.io.Source
import scala.util.Random

object StreamUtilities {

  import scala.util.{Failure, Success, Try}
  def usingMany[T <: AutoCloseable, U](disposable: Seq[T])(task: Seq[T] => U): Try[U] = {
    try {
      Success(task(disposable))
    } catch {
      case e: Exception => Failure(e)
    } finally {
      disposable.foreach(d => d.close())
    }
  }

  def using[T <: AutoCloseable, U](disposable: T)(task: T => U): Try[U] = {
    try {
      Success(task(disposable))
    } catch {
      case e: Exception => Failure(e)
    } finally {
      disposable.close()
    }
  }

  def usingSource[T <: Source, U](disposable: T)(task: T => U): Try[U] = {
    try {
      Success(task(disposable))
    } catch {
      case e: Exception => Failure(e)
    } finally {
      disposable.close()
    }
  }

  
  class ZipIterator(stream: InputStream, zipfile: String, random: Random, sampleRatio: Double = 1)
    extends Iterator[(String, Array[Byte])] {

    private val zipStream = new ZipInputStream(stream)

    private def getNext: Option[(String, Array[Byte])] = {
      var entry = zipStream.getNextEntry
      while (entry != null) {
        if (!entry.isDirectory && random.nextDouble < sampleRatio) {

          val filename = zipfile + java.io.File.separator + entry.getName

          //extracting all bytes of a given entry
          val byteStream = new ByteArrayOutputStream
          IOUtils.copy(zipStream, byteStream)
          val bytes = byteStream.toByteArray

          assert(bytes.length == entry.getSize,
            "incorrect number of bytes is read from zipstream: " + bytes.length + " instead of " + entry.getSize)

          return Some((filename, bytes))
        }
        entry = zipStream.getNextEntry
      }

      stream.close()
      None
    }

    private var nextValue = getNext

    def hasNext: Boolean = nextValue.isDefined

    def next: (String, Array[Byte]) = {
      val result = nextValue.get
      nextValue = getNext
      result
    }
  }

}

Source File: RESTHelpers.scala From mmlspark with MIT License

5 votes

// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.cognitive

import org.apache.commons.io.IOUtils
import org.apache.http.client.config.RequestConfig
import org.apache.http.client.methods._
import org.apache.http.impl.client.{CloseableHttpClient, HttpClientBuilder}
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager

import scala.concurrent.blocking
import scala.util.Try

object RESTHelpers {
  lazy val RequestTimeout = 60000

  lazy val RequestConfigVal: RequestConfig = RequestConfig.custom()
    .setConnectTimeout(RequestTimeout)
    .setConnectionRequestTimeout(RequestTimeout)
    .setSocketTimeout(RequestTimeout)
    .build()

  lazy val ConnectionManager = {
    val cm = new PoolingHttpClientConnectionManager()
    cm.setDefaultMaxPerRoute(Int.MaxValue)
    cm.setMaxTotal(Int.MaxValue)
    cm
  }

  lazy val Client: CloseableHttpClient = HttpClientBuilder
    .create().setConnectionManager(ConnectionManager)
    .setDefaultRequestConfig(RequestConfigVal).build()

  def retry[T](backoffs: List[Int], f: () => T): T = {
    try {
      f()
    } catch {
      case t: Throwable =>
        val waitTime = backoffs.headOption.getOrElse(throw t)
        println(s"Caught error: $t with message ${t.getMessage}, waiting for $waitTime")
        blocking {Thread.sleep(waitTime.toLong)}
        retry(backoffs.tail, f)
    }
  }

  //TODO use this elsewhere
  def safeSend(request: HttpRequestBase,
               backoffs: List[Int] = List(100, 500, 1000),
               expectedCodes: Set[Int] = Set(),
               close: Boolean = true): CloseableHttpResponse = {

    retry(List(100, 500, 1000), { () =>
      val response = Client.execute(request)
      try {
        if (response.getStatusLine.getStatusCode.toString.startsWith("2") ||
          expectedCodes(response.getStatusLine.getStatusCode)
        ) {
          response
        } else {
          val requestBodyOpt = Try(request match {
            case er: HttpEntityEnclosingRequestBase => IOUtils.toString(er.getEntity.getContent)
            case _ => ""
          }).get

          val responseBodyOpt = Try(IOUtils.toString(response.getEntity.getContent)).getOrElse("")

          throw new RuntimeException(
            s"Failed: " +
              s"\n\t response: $response " +
              s"\n\t requestUrl: ${request.getURI}" +
              s"\n\t requestBody: $requestBodyOpt" +
              s"\n\t responseBody: $responseBodyOpt")
        }
      } catch {
        case e: Exception =>
          response.close()
          throw e
      } finally {
        if (close) {
          response.close()
        }
      }
    })
  }

}

Source File: Secrets.scala From mmlspark with MIT License

5 votes

import java.io.IOException
import java.util.Base64

import sys.process._
import spray.json._
import DefaultJsonProtocol._
import org.apache.commons.io.IOUtils
import sbt.{SettingKey, TaskKey}

object Secrets {
  private val kvName = "mmlspark-keys"
  private val subscriptionID = "ce1dee05-8cf6-4ad6-990a-9c80868800ba"

  protected def exec(command: String): String = {
    val os = sys.props("os.name").toLowerCase
    os match {
      case x if x contains "windows" => Seq("cmd", "/C") ++ Seq(command) !!
      case _ => command !!
    }
  }

  private def getSecret(secretName: String): String = {
    println(s"fetching secret: $secretName")
    try {
      exec(s"az account set -s $subscriptionID")
      val secretJson = exec(s"az keyvault secret show --vault-name $kvName --name $secretName")
      secretJson.parseJson.asJsObject().fields("value").convertTo[String]
    } catch {
      case _: IOException =>
        println("WARNING: Could not load secret from keyvault, defaulting to the empty string." +
          " Please install az command line to perform authorized build steps like publishing")
        ""
      case _: java.lang.RuntimeException =>
        println("WARNING: Could not load secret from keyvault, defaulting to the empty string." +
          " Please install az command line to perform authorized build steps like publishing")
        ""
    }
  }

  lazy val nexusUsername: String = sys.env.getOrElse("NEXUS-UN", getSecret("nexus-un"))
  lazy val nexusPassword: String = sys.env.getOrElse("NEXUS-PW", getSecret("nexus-pw"))
  lazy val pgpPublic: String = new String(Base64.getDecoder.decode(
    sys.env.getOrElse("PGP-PUBLIC", getSecret("pgp-public")).getBytes("UTF-8")))
  lazy val pgpPrivate: String = new String(Base64.getDecoder.decode(
    sys.env.getOrElse("PGP-PRIVATE", getSecret("pgp-private")).getBytes("UTF-8")))
  lazy val pgpPassword: String = sys.env.getOrElse("PGP-PW", getSecret("pgp-pw"))
  lazy val storageKey: String = sys.env.getOrElse("STORAGE_KEY", getSecret("storage-key"))

}

Source File: SAXBench.scala From osmesa with Apache License 2.0

5 votes

package osmesa

import java.util.concurrent.TimeUnit

import org.apache.commons.io.IOUtils
import org.openjdk.jmh.annotations._
import vectorpipe.model.{Actions, Change}

import java.util.zip.GZIPInputStream
import javax.xml.parsers.{SAXParser, SAXParserFactory}
import scala.xml.XML

// --- //

@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@State(Scope.Thread)
class SAXBench {

  val sequence = 0

  @Setup
  def setup: Unit = {
  }

  def gzipInputStream(): GZIPInputStream = {
    // requires the addition of a gzipped OSC file in bench/src/main/resources
    val stream = getClass.getResourceAsStream("/942.osc.gz")
    new GZIPInputStream(stream)
  }

  def withScalaXML(): Int = {
    // requires Change.fromXML (see commit 1b04a1e81f1a88f374a086c98d58677ec537b1bf)
    val data = XML.loadString(IOUtils.toString(gzipInputStream))

    val changes = (data \ "_").flatMap { node =>
      (node \ "_").map(Change.fromXML(_, Actions.fromString(node.label), sequence))
    }

    changes.length
  }

  def withSAXParser(): Int = {
    val factory = SAXParserFactory.newInstance
    val parser = factory.newSAXParser
    val handler = new Change.ChangeHandler(sequence)
    parser.parse(gzipInputStream(), handler)
    handler.changeSeq.length
  }

  @Benchmark
  def useScala: Double = withScalaXML()
  @Benchmark
  def getSAXyGirl: Double = withSAXParser()

}

Source File: VectorGrid.scala From osmesa with Apache License 2.0

5 votes

package osmesa.analytics

import java.io.ByteArrayInputStream
import java.net.URI
import java.util.zip.GZIPInputStream

import geotrellis.proj4.WebMercator
import geotrellis.spark.tiling.ZoomedLayoutScheme
import geotrellis.vector.{Extent, PointFeature}
import geotrellis.vectortile.{Layer, VInt64, VectorTile}
import org.apache.commons.io.IOUtils
import org.apache.spark.internal.Logging
import osmesa.analytics.updater.Implicits._
import osmesa.analytics.updater._

import scala.collection.GenMap
import scala.collection.parallel.TaskSupport

trait VectorGrid extends Logging {
  // Default base zoom (highest resolution tiles produced)
  val DefaultBaseZoom: Int = 10

  // Number of cells per side in a gridded tile
  implicit val Cells: Int = 128

  // Number of cells in a gridded tile at the base of the pyramid (may be used for over-zooming)
  val BaseCells: Int = Cells

  // Default upload concurrency
  val DefaultUploadConcurrency: Int = 8

  implicit val LayoutScheme: ZoomedLayoutScheme = ZoomedLayoutScheme(WebMercator)
  val SequenceLayerName: String = "__sequences__"

  def getCommittedSequences(tile: VectorTile): Set[Int] =
    // NOTE when working with hashtags, this should be the changeset sequence, since changes from a
    // single sequence may appear in different batches depending on when changeset metadata arrives
    tile.layers
      .get(SequenceLayerName)
      .map(_.features.flatMap(f => f.data.values.map(valueToLong).map(_.intValue)))
      .map(_.toSet)
      .getOrElse(Set.empty)

  def makeSequenceLayer(sequences: Set[Int], extent: Extent, tileWidth: Int = 4096): (String, Layer) = {
    // create a second layer w/ a feature corresponding to committed sequences (in the absence of
    // available tile / layer metadata)
    val updatedSequences =
      sequences.toSeq.sorted
        .takeRight(1000)
        .zipWithIndex
        .map {
          case (seq, idx) =>
            idx.toString -> VInt64(seq)
        }
        .toMap

    val sequenceFeature = PointFeature(extent.center, updatedSequences)

    makeLayer(SequenceLayerName, extent, Seq(sequenceFeature), tileWidth)
  }

  def loadMVTs(urls: Map[URI, Extent])(
      implicit taskSupport: TaskSupport): GenMap[URI, VectorTile] = {
    // convert to a parallel collection to load more tiles concurrently
    val parUrls = urls.par
    parUrls.tasksupport = taskSupport

    parUrls.map {
      case (uri, extent) =>
        (uri,
         read(uri).map(
           bytes =>
             VectorTile.fromBytes(
               IOUtils.toByteArray(new GZIPInputStream(new ByteArrayInputStream(bytes))),
               extent)))
    } filter {
      case (_, mvt) => mvt.isDefined
    } map {
      case (uri, mvt) => uri -> mvt.get
    }
  }
}

Source File: DruidClientTest.scala From spark-druid-olap with Apache License 2.0

5 votes

package org.sparklinedata.druid.client.test

import org.apache.commons.io.IOUtils
import org.apache.spark.sql.sources.druid.DruidQueryResultIterator
import org.json4s.Extraction
import org.json4s.jackson.JsonMethods._
import org.scalatest.{BeforeAndAfterAll, FunSuite}
import org.sparklinedata.druid.Utils
import org.sparklinedata.druid.client.{DruidCoordinatorClient, DruidQueryServerClient}
import org.sparklinedata.druid.testenv.DruidCluster

class DruidClientTest extends FunSuite with BeforeAndAfterAll with TestUtils {

  import org.sparklinedata.druid.client.test.TPCHQueries._

  var brokerClient : DruidQueryServerClient = _
  var coordClient : DruidCoordinatorClient = _

  import Utils._

  override def beforeAll() = {
    brokerClient = new DruidQueryServerClient("localhost", DruidCluster.instance.brokerPort)
    coordClient = new DruidCoordinatorClient("localhost", DruidCluster.instance.coordinatorPort)
  }

  test("timeBoundary") {
    println(brokerClient.timeBoundary("tpch"))
  }

  test("coordTimeBoundary") {
    println(coordClient.timeBoundary("tpch"))
  }

  test("metaData") {
    println(brokerClient.metadata("tpch", false))
  }

  test("tpchQ1") {
    println(pretty(render(Extraction.decompose(q1))))

    val r = brokerClient.executeQuery(q1)
    r.foreach(println)

  }

  test("tpchQ3") {
    println(pretty(render(Extraction.decompose(q3))))

    val r = brokerClient.executeQuery(q3)
    r.foreach(println)

  }

  test("tpchQ1MonthGrain") {
    println(pretty(render(Extraction.decompose(q1))))

    val r = brokerClient.executeQuery(q1)
    r.foreach(println)
  }

  test("streamQueryResult") {

    def qRis = getClass.getClassLoader.getResourceAsStream("sampleQueryResult.json")

    for(i <- 0 to 5) {
      recordTime("streamed read") {

        val is = IOUtils.toBufferedInputStream(qRis)
        val it = DruidQueryResultIterator(false, is)
        while (it.hasNext) {
          it.next
        }
      }
      recordTime("list read") {
        val is = IOUtils.toBufferedInputStream(qRis)
        val it = DruidQueryResultIterator(false, is, (), true)
        while (it.hasNext) {
          it.next
        }
      }

    }

  }

  test("serversInfo") {
    val sInfo = coordClient.serversInfo
    println(pretty(render(Extraction.decompose(sInfo))))
  }

  test("dataSourceInfo") {
    val dInfo = coordClient.dataSourceInfo("tpch")
    println(pretty(render(Extraction.decompose(dInfo))))
  }

}

Source File: HadoopFileSystemLogStore.scala From delta with Apache License 2.0

5 votes

package org.apache.spark.sql.delta.storage

import java.io.{BufferedReader, FileNotFoundException, InputStreamReader}
import java.nio.charset.StandardCharsets.UTF_8
import java.nio.file.FileAlreadyExistsException
import java.util.UUID

import scala.collection.JavaConverters._

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}

import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.SparkSession


  protected def writeWithRename(
      path: Path, actions: Iterator[String], overwrite: Boolean = false): Unit = {
    val fs = path.getFileSystem(getHadoopConfiguration)

    if (!fs.exists(path.getParent)) {
      throw new FileNotFoundException(s"No such file or directory: ${path.getParent}")
    }
    if (overwrite) {
      val stream = fs.create(path, true)
      try {
        actions.map(_ + "\n").map(_.getBytes(UTF_8)).foreach(stream.write)
      } finally {
        stream.close()
      }
    } else {
      if (fs.exists(path)) {
        throw new FileAlreadyExistsException(path.toString)
      }
      val tempPath = createTempPath(path)
      var streamClosed = false // This flag is to avoid double close
      var renameDone = false // This flag is to save the delete operation in most of cases.
      val stream = fs.create(tempPath)
      try {
        actions.map(_ + "\n").map(_.getBytes(UTF_8)).foreach(stream.write)
        stream.close()
        streamClosed = true
        try {
          if (fs.rename(tempPath, path)) {
            renameDone = true
          } else {
            if (fs.exists(path)) {
              throw new FileAlreadyExistsException(path.toString)
            } else {
              throw new IllegalStateException(s"Cannot rename $tempPath to $path")
            }
          }
        } catch {
          case _: org.apache.hadoop.fs.FileAlreadyExistsException =>
            throw new FileAlreadyExistsException(path.toString)
        }
      } finally {
        if (!streamClosed) {
          stream.close()
        }
        if (!renameDone) {
          fs.delete(tempPath, false)
        }
      }
    }
  }

  protected def createTempPath(path: Path): Path = {
    new Path(path.getParent, s".${path.getName}.${UUID.randomUUID}.tmp")
  }

  override def invalidateCache(): Unit = {}
}

Source File: TestAISStreams.scala From incubator-daffodil with Apache License 2.0

5 votes

package org.apache.daffodil.layers

import org.junit.Assert._
import java.io._
import org.junit.Test
import org.apache.daffodil.io.RegexLimitingStream
import java.nio.charset.StandardCharsets
import org.apache.daffodil.util.Misc
import org.apache.commons.io.IOUtils
import org.apache.daffodil.io.LayerBoundaryMarkInsertingJavaOutputStream


  @Test def testAISPayloadArmoringDecode() = {
    val dataString = "14eGL:@000o8oQ'LMjOchmG@08HK,"
    val bba = new ByteArrayInputStream(dataString.getBytes(iso8859))
    //
    // regex is ",0*"
    //
    val rls = new RegexLimitingStream(bba, ",", ",", iso8859)
    val aas = new AISPayloadArmoringInputStream(rls)

    val baos = new ByteArrayOutputStream()
    var c: Int = -1
    while ({
      c = aas.read()
      c != -1
    }) {
      baos.write(c)
    }
    baos.close()
    val result = baos.toByteArray()
    val expected = Misc.bits2Bytes("000001 000100 101101 010111 011100 001010 010000 000000 000000 000000 110111 001000 110111 100001 101000 011100 011101 110010 011111 101011 110000 110101 010111 010000 000000 001000 011000 011011 ")
    assertEquals(expected.length, result.length)
    val pairs = expected zip result
    pairs.foreach {
      case (exp, act) =>
        assertEquals(exp, act)
    }
  }

  @Test def testAISPayloadArmoringEncode() = {
    val dataBytes = Misc.bits2Bytes("000001 000100 101101 010111 011100 001010 010000 000000 000000 000000 110111 001000 110111 100001 101000 011100 011101 110010 011111 101011 110000 110101 010111 010000 000000 001000 011000 011011 ")
    val bais = new ByteArrayInputStream(dataBytes)
    val baos = new ByteArrayOutputStream()
    val lbmijos = new LayerBoundaryMarkInsertingJavaOutputStream(baos, ",", iso8859)
    val aas = new AISPayloadArmoringOutputStream(lbmijos)
    IOUtils.copy(bais, aas)
    aas.close()
    val result = baos.toByteArray()
    val expected = "14eGL:@000o8oQ'LMjOchmG@08HK,".getBytes(iso8859)
    assertEquals(expected.length, result.length)
    (expected zip result).foreach {
      case (exp, act) =>
        assertEquals(exp, act)
    }
  }

}

Source File: services.scala From InteractiveGraph-neo4j with BSD 2-Clause "Simplified" License

5 votes

package org.grapheco.server.pidb

import java.io.{File, FileInputStream}

import org.apache.commons.io.{FileUtils, IOUtils}
import org.grapheco.server.util.{JsonUtils, Logging, ServletContextUtils}
import org.neo4j.driver.v1._
import org.neo4j.graphdb.factory.{GraphDatabaseFactory, GraphDatabaseSettings}
import org.neo4j.graphdb.{GraphDatabaseService, Label, RelationshipType}
import org.springframework.beans.factory.annotation.Autowired
import org.springframework.beans.factory.{DisposableBean, InitializingBean}
import cn.pidb.engine.{BoltService, CypherService, PidbConnector}

import scala.collection.JavaConversions._
import scala.collection.mutable
import scala.reflect.ClassTag



class PidbService(boltUrl:String, boltUser:String, boltPassword:String) extends BoltService(boltUrl, boltUser, boltPassword){


  def getRelativeOrAbsoluteFile(path: String) = {
    Some(new File(path)).map { file =>
      if (file.isAbsolute) {
        file
      }
      else {
        new File(ServletContextUtils.getServletContext.getRealPath(s"/${path}"))
      }
    }.get
  }
}

Source File: CSVOutputFormatter.scala From glow with Apache License 2.0

5 votes

package io.projectglow.transformers.pipe

import java.io.InputStream

import scala.collection.JavaConverters._

import com.univocity.parsers.csv.CsvParser
import org.apache.commons.io.IOUtils
import org.apache.spark.sql.execution.datasources.csv.{CSVDataSourceUtils, CSVUtils, UnivocityParserUtils}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{StringType, StructField, StructType}

import io.projectglow.SparkShim.{CSVOptions, UnivocityParser}

class CSVOutputFormatter(parsedOptions: CSVOptions) extends OutputFormatter {

  private def getSchema(record: Array[String]): StructType = {
    val header =
      CSVDataSourceUtils.makeSafeHeader(
        record,
        SQLConf.get.caseSensitiveAnalysis,
        parsedOptions
      )
    val fields = header.map { fieldName =>
      StructField(fieldName, StringType, nullable = true)
    }
    StructType(fields)
  }

  
  override def makeIterator(stream: InputStream): Iterator[Any] = {
    val lines = IOUtils.lineIterator(stream, "UTF-8").asScala
    val filteredLines = CSVUtils.filterCommentAndEmpty(lines, parsedOptions)

    if (filteredLines.isEmpty) {
      return Iterator.empty
    }

    val firstLine = filteredLines.next
    val csvParser = new CsvParser(parsedOptions.asParserSettings)
    val firstRecord = csvParser.parseLine(firstLine)
    val schema = getSchema(firstRecord)
    val univocityParser = new UnivocityParser(schema, schema, parsedOptions)

    val parsedIter =
      UnivocityParserUtils.parseIterator(
        Iterator(firstLine) ++ filteredLines,
        univocityParser,
        schema
      )

    val parsedIterWithoutHeader = if (parsedOptions.headerFlag) {
      parsedIter.drop(1)
    } else {
      parsedIter
    }

    Iterator(schema) ++ parsedIterWithoutHeader.map(_.copy)
  }
}

class CSVOutputFormatterFactory extends OutputFormatterFactory {
  override def name: String = "csv"

  override def makeOutputFormatter(
      options: Map[String, String]
  ): OutputFormatter = {
    val parsedOptions =
      new CSVOptions(
        options,
        SQLConf.get.csvColumnPruning,
        SQLConf.get.sessionLocalTimeZone
      )
    new CSVOutputFormatter(parsedOptions)
  }
}

Source File: DynamicResource.scala From sumobot with Apache License 2.0

5 votes

package com.sumologic.sumobot.http_frontend

import akka.http.scaladsl.model.{ContentType, ContentTypes}
import org.apache.commons.io.IOUtils
import org.fusesource.scalate.TemplateEngine
import org.fusesource.scalate.util.{FileResourceLoader, Resource}

object DynamicResource {
  private val Engine = new TemplateEngine

  Engine.allowReload = false
  Engine.allowCaching = true
  Engine.resourceLoader = new FileResourceLoader {
    override def resource(filename: String): Option[Resource] = {
      val uri = getClass.getResource(filename).toURI.toString
      val stream = getClass.getResourceAsStream(filename)
      val templateText = IOUtils.toString(stream)
      stream.close()

      Some(Resource.fromText(uri, templateText))
    }
  }
}

case class DynamicResource(templateFile: String) {
  DynamicResource.Engine.load(templateFile)

  def contents(templateVariables: Map[String, Any]): String = {
    DynamicResource.Engine.layout(templateFile, templateVariables)
  }

  val contentType: ContentType.NonBinary = ContentTypes.`text/html(UTF-8)`
}

Source File: StaticResource.scala From sumobot with Apache License 2.0

5 votes

package com.sumologic.sumobot.http_frontend

import akka.http.scaladsl.model.{ContentType, ContentTypes, HttpCharsets, MediaTypes}
import org.apache.commons.io.IOUtils

object StaticResource {
  private[http_frontend] val DefaultContentType = ContentTypes.`text/html(UTF-8)`

  case class ContentTypePair(extension: String, contentType: ContentType)

  private[http_frontend] val KnownContentTypes = Array(
    ContentTypePair(".html", ContentTypes.`text/html(UTF-8)`),
    ContentTypePair(".css", ContentType(MediaTypes.`text/css`, HttpCharsets.`UTF-8`)),
    ContentTypePair(".js", ContentType(MediaTypes.`application/javascript`, HttpCharsets.`UTF-8`))
  )
}

case class StaticResource(filename: String) {
  def contents: Array[Byte] = {
    val stream = getClass.getResourceAsStream(filename)
    val result = IOUtils.toByteArray(stream)

    stream.close()
    result
  }

  def contentType: ContentType = {
    val contentType = StaticResource.KnownContentTypes.find(contentType => filename.endsWith(contentType.extension))
      .map(contentTypePair => contentTypePair.contentType)
    contentType.getOrElse(StaticResource.DefaultContentType)
  }
}

Source File: FileUtilsSpec.scala From warp-core with MIT License

5 votes

package com.workday.warp.common.utils

import java.io.InputStream
import java.nio.charset.Charset
import java.util.zip.ZipException

import com.workday.warp.common.spec.WarpJUnitSpec
import com.workday.warp.junit.UnitTest
import org.apache.commons.io.IOUtils


  @UnitTest
  def testNonExistentZipEntryStreamCannotBeRetrievedFromZipFile(): Unit = {
    val zipEntryName: String = "DOES_NOT_EXIST"
    val filePath: String = getClass.getResource("/simpleZip.zip").getPath

    val thrown: Throwable = intercept[NullPointerException] {
      FileUtils.getStreamToLogFileInZip(filePath, "DOES_NOT_EXIST")
    }

    thrown.getMessage should be (s"Zip entry $zipEntryName not found in $filePath")
  }
}

Source File: MultipartUploadSpec.scala From peregrine with Apache License 2.0

5 votes

package io.peregrine

import io.peregrine.test.FlatSpecHelper
import com.twitter.finagle.http.{Request => FinagleRequest}
import org.apache.commons.io.IOUtils

class MultipartUploadSpec extends FlatSpecHelper {

  class ExampleApp extends Controller {
    post("/groups_file") { request =>
      val groupsParam = request.multiParams.get("groups")
      val typeParam = request.multiParams.get("type")

      render
        .header("X-Content-Type", groupsParam.get.contentType.toString)
        .header("X-Filename", groupsParam.get.filename.toString)
        .header("X-Type-Text", typeParam.get.value)
        .plain("ok").toFuture
    }

  }

  val server = new PeregrineServer
  server.register(new ExampleApp)

  "Multi part uploads with text and file fields" should "work" in {

    

    val s = getClass.getResourceAsStream("/upload.bytes")
    val b = IOUtils.toByteArray(s)
    val r = FinagleRequest.decodeBytes(b)
    send(r)

    response.code should equal (200)
    response.getHeader("X-Content-Type") should equal("Some(image/gif)")
    response.getHeader("X-Filename") should equal("Some(dealwithit.gif)")
    response.getHeader("X-Type-Text") should equal("text")
  }

}

Source File: ContainerReadableByString.scala From aloha with MIT License

5 votes

package com.eharmony.aloha.io

import scala.language.higherKinds
import org.apache.commons.io.IOUtils
import java.lang.String
import java.io.{ByteArrayOutputStream, Reader, InputStreamReader, InputStream}


    def fromReader[A](r: Reader): C[A] = {
        try {
            val baos = new ByteArrayOutputStream  // Don't need to close.
            IOUtils.copy(r, baos, inputCharset)
            fromString[A](new String(baos.toByteArray))
        }
        finally {
            IOUtils.closeQuietly(r)
        }
    }
}

Source File: ReadableByString.scala From aloha with MIT License

5 votes

package com.eharmony.aloha.io

import java.io.{InputStreamReader, ByteArrayOutputStream, Reader, InputStream}
import org.apache.commons.io.IOUtils


    final def fromReader(r: Reader): A = {
        try {
            val baos = new ByteArrayOutputStream  // Don't need to close.
            IOUtils.copy(r, baos, inputCharset)
            fromString(new String(baos.toByteArray))
        }
        finally {
            IOUtils.closeQuietly(r)
        }
    }
}

Source File: package.scala From aloha with MIT License

5 votes

package com.eharmony

import java.util.Properties

import org.apache.commons.io.IOUtils
import org.apache.commons.vfs2.VFS


package object aloha {
    def pkgName = getClass.getPackage.getName
    def version: String = _version

    private[this] lazy val _version: String = {
        val is = VFS.getManager.resolveFile("res:" + pkgName.replaceAll("\\.", "/") + "/version.properties").getContent.getInputStream
        try {
            val p = new Properties()
            p.load(is)
            p.getProperty("aloha.version")
        }
        finally {
            IOUtils closeQuietly is
        }
    }
}

Source File: StdAvroModelFactoryTest.scala From aloha with MIT License

5 votes

package com.eharmony.aloha.factory.avro

import com.eharmony.aloha.audit.impl.avro.Score
import com.eharmony.aloha.factory.ModelFactory
import com.eharmony.aloha.io.vfs.Vfs1
import com.eharmony.aloha.models.Model
import org.apache.avro.Schema
import org.apache.avro.generic.{GenericData, GenericRecord}
import org.apache.commons.io.IOUtils
import org.junit.Assert.assertEquals
import org.junit.Test
import org.junit.runner.RunWith
import org.junit.runners.BlockJUnit4ClassRunner

import scala.util.Try


  private[this] def record = {
    val r = new GenericData.Record(TheSchema)
    r.put("req_str_1", "smart handsome stubborn")
    r
  }
}

object StdAvroModelFactoryTest {
  private lazy val TheSchema = {
    val is = getClass.getClassLoader.getResourceAsStream(SchemaUrlResource)
    try new Schema.Parser().parse(is) finally IOUtils.closeQuietly(is)
  }

  private val ExpectedResult = 7d

  private val SchemaUrlResource = "avro/class7.avpr"

  private val SchemaUrl = s"res:$SchemaUrlResource"

  private val SchemaFile = new java.io.File(getClass.getClassLoader.getResource(SchemaUrlResource).getFile)

  private val SchemaVfs1FileObject = org.apache.commons.vfs.VFS.getManager.resolveFile(SchemaUrl)

  private val SchemaVfs2FileObject = org.apache.commons.vfs2.VFS.getManager.resolveFile(SchemaUrl)

  private val Imports = Seq("com.eharmony.aloha.feature.BasicFunctions._", "scala.math._")

  private val ReturnType = "Double"

  private val ModelJson =
    """
      |{
      |  "modelType": "Regression",
      |  "modelId": { "id": 0, "name": "" },
      |  "features" : {
      |    "my_attributes": "${req_str_1}.split(\"\\\\W+\").map(v => (s\"=$v\", 1.0))"
      |  },
      |  "weights": {
      |    "my_attributes=handsome": 1,
      |    "my_attributes=smart": 2,
      |    "my_attributes=stubborn": 4
      |  }
      |}
    """.stripMargin
}

Source File: VwSparseMultilabelPredictorTest.scala From aloha with MIT License

5 votes

package com.eharmony.aloha.models.vw.jni.multilabel

import java.io.{ByteArrayOutputStream, File, FileInputStream}

import com.eharmony.aloha.ModelSerializationTestHelper
import com.eharmony.aloha.io.sources.{Base64StringSource, ExternalSource, ModelSource}
import org.apache.commons.codec.binary.Base64
import org.apache.commons.io.IOUtils
import org.junit.Assert._
import org.junit.Test
import org.junit.runner.RunWith
import org.junit.runners.BlockJUnit4ClassRunner
import vowpalWabbit.learner.{VWActionScoresLearner, VWLearners}


@RunWith(classOf[BlockJUnit4ClassRunner])
class VwSparseMultilabelPredictorTest extends ModelSerializationTestHelper {
  import VwSparseMultilabelPredictorTest._

  @Test def testSerializability(): Unit = {
    val predictor = getPredictor(getModelSource(), 3)
    val ds = serializeDeserializeRoundTrip(predictor)
    assertEquals(predictor, ds)
    assertEquals(predictor.vwParams(), ds.vwParams())
    assertNotNull(ds.vwModel)
  }

  @Test def testVwParameters(): Unit = {
    val numLabelsInTrainingSet = 3
    val predictor = getPredictor(getModelSource(), numLabelsInTrainingSet)

    predictor.vwParams() match {
      case Data(vwBinFilePath, ringSize) =>
        checkVwBinFile(vwBinFilePath)
        checkVwRingSize(numLabelsInTrainingSet, ringSize.toInt)
      case ps => fail(s"Unexpected VW parameters format.  Found string: $ps")
    }
  }
}

object VwSparseMultilabelPredictorTest {
  private val Data = """\s*-i\s+(\S+)\s+--ring_size\s+(\d+)\s+--testonly\s+--quiet""".r

  private def getModelSource(): ModelSource = {
    val f = File.createTempFile("i_dont", "care")
    f.deleteOnExit()
    val learner = VWLearners.create[VWActionScoresLearner](s"--quiet --csoaa_ldf mc --csoaa_rank -f ${f.getCanonicalPath}")
    learner.close()
    val baos = new ByteArrayOutputStream()
    IOUtils.copy(new FileInputStream(f), baos)
    val src = Base64StringSource(Base64.encodeBase64URLSafeString(baos.toByteArray))
    ExternalSource(src.localVfs)
  }

  private def getPredictor(modelSrc: ModelSource, numLabelsInTrainingSet: Int) =
    VwSparseMultilabelPredictor[Any](modelSrc, Nil, Nil, numLabelsInTrainingSet)

  private def checkVwBinFile(vwBinFilePath: String): Unit = {
    val vwBinFile = new File(vwBinFilePath)
    assertTrue("VW binary file should have been written to disk", vwBinFile.exists())
    vwBinFile.deleteOnExit()
  }

  private def checkVwRingSize(numLabelsInTrainingSet: Int, ringSize: Int): Unit = {
    assertEquals(
      "vw --ring_size parameter is incorrect:",
      numLabelsInTrainingSet + VwSparseMultilabelPredictor.AddlVwRingSize,
      ringSize.toInt
    )
  }
}

Source File: UTF8TextOutputFormatter.scala From glow with Apache License 2.0

5 votes

package io.projectglow.transformers.pipe

import java.io.InputStream

import scala.collection.JavaConverters._

import org.apache.commons.io.IOUtils
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.unsafe.types.UTF8String


class UTF8TextOutputFormatter() extends OutputFormatter {

  override def makeIterator(stream: InputStream): Iterator[Any] = {
    val schema = StructType(Seq(StructField("text", StringType)))
    val iter = IOUtils.lineIterator(stream, "UTF-8").asScala.map { s =>
      new GenericInternalRow(Array(UTF8String.fromString(s)): Array[Any])
    }
    Iterator(schema) ++ iter
  }
}

class UTF8TextOutputFormatterFactory extends OutputFormatterFactory {
  override def name: String = "text"

  override def makeOutputFormatter(options: Map[String, String]): OutputFormatter = {
    new UTF8TextOutputFormatter
  }
}

Source File: TarGz$Test.scala From mystem-scala with MIT License

5 votes

package ru.stachek66.tools

import java.io.{File, FileInputStream}

import org.apache.commons.io.IOUtils
import org.junit.runner.RunWith
import org.scalatest.FunSuite
import org.scalatest.junit.JUnitRunner


class TarGz$Test extends FunSuite {

  test("tgz-test") {
    val src = new File("src/test/resources/test.txt")
    TarGz.unpack(
      new File("src/test/resources/test.tar.gz"),
      new File("src/test/resources/res.txt")) match {
      case f =>
        val content0 = IOUtils.toString(new FileInputStream(f))
        val content1 = IOUtils.toString(new FileInputStream(src))
        print(content0.trim + " vs " + content1.trim)
        assert(content0 === content1)
    }
  }
}

Source File: HDFSStore.scala From speedo with Apache License 2.0

5 votes

package com.htc.speedo.caffe

import com.twitter.bijection.Codec
import com.twitter.storehaus.Store
import com.twitter.util.{ Future, Time }

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path

object HDFSStore {
  
  val fs = rootDir.getFileSystem(conf)

  // make sure the root directory exists
  fs.mkdirs(rootDir)

  override def get(key: String) = Future {
    Some(new Path(rootDir, key)).filter(fs.exists).flatMap { path =>
      val stream = fs.open(path)
      val bytes = IOUtils.toByteArray(stream)
      stream.close
      codec.invert(bytes).toOption
    }
  }

  override def put(kv: (String, Option[V])) = Future {
    val path = new Path(rootDir, kv._1)
    kv._2 match {
      case None => fs.delete(path, false)
      case Some(v) =>
        val bytes = codec(v)
        val stream = fs.create(path, true)
        stream.write(bytes)
        stream.close
    }
  }

  override def close(time: Time) = Future { fs.close }
}

Source File: ArchiveUtils.scala From dl4scala with MIT License

5 votes

package org.dl4scala.util


import org.slf4j.LoggerFactory
import org.apache.commons.compress.archivers.tar.TarArchiveEntry
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream
import org.apache.commons.io.FileUtils
import org.apache.commons.io.IOUtils
import java.io._
import java.util.zip.GZIPInputStream
import java.util.zip.ZipInputStream

      tarIn.close()
    }
    else if (file.endsWith(".gz")) {
      val is2 = new GZIPInputStream(fin)
      val extracted = new File(target.getParent, target.getName.replace(".gz", ""))
      if (extracted.exists) extracted.delete
      extracted.createNewFile
      val fos = FileUtils.openOutputStream(extracted)
      IOUtils.copyLarge(is2, fos)
      is2.close()
      fos.flush()
      fos.close()
    }
    target.delete
  }
}

Source File: Secret.scala From skuber with Apache License 2.0

5 votes

package skuber

import java.io._
import org.apache.commons.io.IOUtils


case class Secret(
    val kind: String ="Secret",
    override val apiVersion: String = v1,
    val metadata: ObjectMeta,
    data: Map[String, Array[Byte]] = Map(),
    val `type`: String = "")
  extends ObjectResource  {
  
    def add(key: String, is: InputStream) : Secret = {
       val bytes = IOUtils.toByteArray(is)
       add(key, bytes) 
    }
    def add(key: String, bytes: Array[Byte]): Secret =
      this.copy(data = data + (key -> bytes))
} 

object Secret {

  val specification=CoreResourceSpecification(
    scope = ResourceSpecification.Scope.Namespaced,
    names = ResourceSpecification.Names(
      plural="secrets",
      singular="secret",
      kind="Secret",
      shortNames=Nil
    )
  )
  implicit val secDef = new ResourceDefinition[Secret] { def spec=specification }
  implicit val secListDef = new ResourceDefinition[SecretList] { def spec=specification }
}

Source File: SparkSvc.scala From Mastering-Spark-for-Data-Science with MIT License

5 votes

package svc

import java.io.StringWriter
import java.net.{HttpURLConnection, URL}

import com.typesafe.config.ConfigFactory
import io.gzet.recommender.Node
import org.apache.commons.io.IOUtils
import play.api.Logger
import play.api.libs.json._

class SparkSvc() {

  val config = ConfigFactory.load()
  val host = config.getString("spark.job.server.host")
  val port = config.getInt("spark.job.server.port")
  val timeout = config.getInt("spark.job.server.timeout")
  val appName = config.getString("spark.job.server.app")
  val context = config.getString("spark.job.server.context")
  val indexJob = config.getString("spark.job.index")
  val playlistJob = config.getString("spark.job.playlist")
  val playlistRecommendJob = config.getString("spark.job.personalized.playlist")

  private def getConnection(endpoint: String, params: Option[String]) = {
    try {
      val url = new URL(endpoint)
      val connection = url.openConnection().asInstanceOf[HttpURLConnection]
      connection.setDoOutput(true)
      connection.setRequestMethod("POST")
      connection.setRequestProperty("Accept", "application/json")
      if(params.isDefined){
        val os = connection.getOutputStream
        os.write(params.get.getBytes())
        os.flush()
        os.close()
      }
      val inputStream = connection.getInputStream
      val writer = new StringWriter()
      IOUtils.copy(inputStream, writer, "UTF-8")
      val ret = writer.toString
      Json.parse(ret)
    } catch {
      case e: Exception =>
        throw new Exception("Job Failed: " + e.getMessage)
    }
  }

  private def parseResponse(json: JsValue) : String = {
    val jobId = (json \ "result" \ "jobId").asOpt[String]
    if(jobId.isDefined){
      s"Job submitted [${jobId.get}]"
    } else {
      val message = (json \ "result" \ "message").asOpt[String]
      if(message.isDefined){
        throw new Exception(s"Job failed: ${message.get}")
      }
      throw new Exception("Could not find Spark job id")
    }
  }

  def index(path: String): String = {
    Logger.info("Submitting INDEX job")
    val url = s"http://$host:$port/jobs?appName=$appName&classPath=$indexJob&context=$context"
    val params = "input.dir=\"" + path + "\""
    val json = getConnection(url, Some(params))
    parseResponse(json)
  }

  def playlist() = {
    Logger.info("Submitting PLAYLIST job")
    val url = s"http://$host:$port/jobs?appName=$appName&classPath=$playlistJob&context=$context"
    val json = getConnection(url, None)
    parseResponse(json)
  }

  def playlist(id: Long) = {
    Logger.info("Submitting RECOMMEND job")
    val url = s"http://$host:$port/jobs?appName=$appName&classPath=$playlistRecommendJob&context=$context&sync=true&timeout=$timeout"
    val params = s"song.id=$id"
    val json: JsValue = getConnection(url, Some(params))
    val array = (json \ "result").as[Array[String]]
    array.map({line =>
      val Array(id, pr, song) = line.split(",").take(3)
      Node(id.toLong, song, pr.toDouble)
    }).toList
  }

}

Source File: PerformanceTester.scala From Mastering-Spark-for-Data-Science with MIT License

5 votes

package com.gravity.goose

import org.apache.commons.lang.time.StopWatch
import java.io.InputStream
import org.apache.commons.io.IOUtils
import utils.FileHelper


  def main(args: Array[String]) {

    System.out.println("testing performance of general goose extraction algos")
    implicit val config = TestUtils.NO_IMAGE_CONFIG
    val goose = new Goose(config)
    val html = FileHelper.loadResourceFile(TestUtils.staticHtmlDir + "scribd1.txt", Goose.getClass)
    val url = "http://www.scribd.com/doc/52584146/Microfinance-and-Poverty-Reduction?in_collection=2987942"

    val clock: StopWatch = new StopWatch
    System.out.println("How long does it take to extract an article?")
    clock.start()

    for (i <- 0 to 100) {
      goose.extractContent(url, html)
    }
    clock.stop()
    System.out.println("It takes " + clock.getTime + " milliseconds")
  }


}

Source File: FileHelper.scala From Mastering-Spark-for-Data-Science with MIT License

5 votes

package com.gravity.goose.utils

import org.apache.commons.io.IOUtils
import java.io.{IOException, InputStream}




object FileHelper extends Logging {

  def loadResourceFile[A](filename: String, cls: Class[A]): String = {
    var filedata: String = ""
    val is: InputStream = cls.getResourceAsStream(filename)
    try {
      filedata = IOUtils.toString(is, "UTF-8")
    }
    catch {
      case e: IOException => warn(e, e.toString)
    }
    filedata
  }
}

Source File: ProcessSpawner.scala From akka-http-health with MIT License

5 votes

package io.github.lhotari.akka.http.health

import java.io.ByteArrayOutputStream
import java.lang.System.getProperty
import java.net.{URL, URLClassLoader}

import org.apache.commons.io.IOUtils

import scala.collection.JavaConverters._
import scala.reflect.runtime.universe._

case class ProcessResult(retval: Integer, output: String)

trait ProcessSpawner {
  lazy val classpath = resolveClassPath()
  val sep = getProperty("file.separator")
  val javaExecutablePath = getProperty("java.home") + sep + "bin" + sep + "java"

  private def resolveClassPath() = {
    getClass.getClassLoader match {
      case urlClassLoader: URLClassLoader =>
        urlClassLoader.getURLs.collect {
          case url: URL => url.getFile
        }.mkString(getProperty("path.separator"))
      case _ =>
        getProperty("java.class.path")
    }
  }

  def executeInSeparateProcess[T](mainClassType: T, maxMemoryMB: Integer = 100, extraJvmOpts: Seq[String] = Nil, args: Seq[String] = Nil)(implicit tag: WeakTypeTag[T]): ProcessResult = {
    val className = tag.tpe.termSymbol.fullName
    val processBuilder = new ProcessBuilder(javaExecutablePath).redirectErrorStream(true)
    val commands = processBuilder.command()
    commands.add(s"-Xmx${maxMemoryMB}m")
    commands.addAll(extraJvmOpts.asJava)
    commands.add("-cp")
    commands.add(classpath)
    commands.add(className)
    commands.addAll(args.asJava)
    println(String.join(" ", commands))
    val process = processBuilder.start()
    val output = new ByteArrayOutputStream()
    IOUtils.copy(process.getInputStream, output)
    ProcessResult(process.waitFor(), output.toString())
  }
}

Source File: ShellBanner.scala From shellbase with Apache License 2.0

5 votes

package com.sumologic.shellbase

import org.apache.commons.io.IOUtils

class ShellBanner(resource: String) {

  def load(): String = {
    val in = getClass.getClassLoader.getResourceAsStream(resource)
    val banner = IOUtils.toString(in)
    in.close()
    banner
  }

}

object ShellBanner {
  lazy val Warning = new ShellBanner("banners/warning.txt").load()
}

Source File: ScriptRenderer.scala From shellbase with Apache License 2.0

5 votes

package com.sumologic.shellbase

import java.io._

import org.apache.commons.io.IOUtils

import scala.io.Source

class ScriptRenderer(file: File, args: Array[String]) {

  def getLines: Seq[String] = {
    val prefix = file.getName + ("_" * Math.max(0, 3 - file.getName.length))
    val output = File.createTempFile(prefix, "script")
    output.deleteOnExit()
    val reader = new FileReader(file)
    val writer = new FileWriter(output)
    try {
      VelocityRenderer.render(argsToMap, reader, writer)
    } finally {
      IOUtils.closeQuietly(reader)
      IOUtils.closeQuietly(writer)
    }
    Source.fromFile(output).getLines().toSeq
  }

  private[shellbase] def argsToMap: Map[String, String] = {
    args.map(s => {
      require(s.contains("="), s"Argument $s does not contain = sign!")
      val splitted = s.split("=")
      require(splitted.size == 2, s"Argument $s contains more than on =!")
      splitted(0) -> splitted(1)
    }).toMap
  }

}

Source File: UJESClientImplTest.scala From Linkis with Apache License 2.0

5 votes

object UJESClientImplTest extends App {

  val clientConfig = DWSClientConfigBuilder.newBuilder().addUJESServerUrl("http://localhost:port")
    .connectionTimeout(30000).discoveryEnabled(true)
    .discoveryFrequency(1, TimeUnit.MINUTES)
    .loadbalancerEnabled(true).maxConnectionSize(5)
    .retryEnabled(false).readTimeout(30000)
    .setAuthenticationStrategy(new StaticAuthenticationStrategy()).setAuthTokenKey("")
    .setAuthTokenValue("").setDWSVersion("v1").build()
  val client = UJESClient(clientConfig)

  val jobExecuteResult = client.execute(JobExecuteAction.builder().setCreator("UJESClient-Test")
    .addExecuteCode("show tables")
    .setEngineType(EngineType.SPARK).setUser("").build())
  println("execId: " + jobExecuteResult.getExecID + ", taskId: " + jobExecuteResult.taskID)
  var status = client.status(jobExecuteResult)
  while(!status.isCompleted) {
    val progress = client.progress(jobExecuteResult)
    val progressInfo = if(progress.getProgressInfo != null) progress.getProgressInfo.toList else List.empty
    println("progress: " + progress.getProgress + ", progressInfo: " + progressInfo)
    Utils.sleepQuietly(500)
    status = client.status(jobExecuteResult)
  }
  val jobInfo = client.getJobInfo(jobExecuteResult)
  val resultSet = jobInfo.getResultSetList(client).head
  val fileContents = client.resultSet(ResultSetAction.builder().setPath(resultSet).setUser(jobExecuteResult.getUser).build()).getFileContent
  println("fileContents: " + fileContents)
  IOUtils.closeQuietly(client)
}

Source File: JsonReceiver.scala From incubator-retired-iota with Apache License 2.0

5 votes

package org.apache.iota.fey

import java.io.FileOutputStream
import java.net.URL
import java.io.File

import com.eclipsesource.schema._
import org.slf4j.LoggerFactory
import play.api.libs.json._
import JSON_PATH._
import java.nio.file.{Files, Paths}

import org.apache.commons.io.IOUtils
import org.apache.commons.codec.binary.Base64
import scala.util.Properties._


  def exceptionOnRun(e: Exception): Unit
}

object HttpBasicAuth {
  val BASIC = "Basic"
  val AUTHORIZATION = "Authorization"

  def encodeCredentials(username: String, password: String): String = {
    new String(Base64.encodeBase64((username + ":" + password).getBytes))
  }

  def getHeader(username: String, password: String): String =
    BASIC + " " + encodeCredentials(username, password)
}

Source File: FeyGenericActorReceiver.scala From incubator-retired-iota with Apache License 2.0

5 votes

package org.apache.iota.fey

import java.io.{File, FileOutputStream}
import java.net.URL
import java.nio.file.{Files, Paths}
import com.eclipsesource.schema._
import akka.actor.ActorRef
import com.eclipsesource.schema.SchemaValidator
import org.apache.commons.io.IOUtils
import play.api.libs.json._
import scala.concurrent.duration._
import scala.util.Properties._

abstract class FeyGenericActorReceiver(override val params: Map[String,String] = Map.empty,
                                       override val backoff: FiniteDuration = 1.minutes,
                                       override val connectTo: Map[String,ActorRef] = Map.empty,
                                       override val schedulerTimeInterval: FiniteDuration = 2.seconds,
                                       override val orchestrationName: String = "",
                                       override val orchestrationID: String = "",
                                       override val autoScale: Boolean = false) extends FeyGenericActor{

  private[fey] val feyCore = FEY_CORE_ACTOR.actorRef

  override final def processMessage[T](message: T, sender: ActorRef): Unit = {
    try {
      val jsonString = getJSONString(message)
      if(jsonString != "{}") {
        processJson(jsonString)
      }
      startBackoff()
    }catch{
      case e: Exception => log.error(e, s"Could not process message $message")
    }
  }

  private[fey] def processJson(jsonString: String) = {
    var orchID:String = "None"
    try{
      val orchestrationJSON = Json.parse(jsonString)
      orchID = (orchestrationJSON \ JSON_PATH.GUID).as[String]
      val valid = validJson(orchestrationJSON)
      if(valid && (orchestrationJSON \ JSON_PATH.COMMAND).as[String].toUpperCase != "DELETE"){
        checkForLocation(orchestrationJSON)
      }
      if(valid) {
        feyCore ! FeyCore.ORCHESTRATION_RECEIVED(orchestrationJSON, None)
      }else{
        log.warning(s"Could not forward Orchestration $orchID. Invalid JSON schema")
      }
    } catch {
      case e: Exception =>
        log.error(e, s"Orchestration $orchID could not be forwarded")
    }
  }

  
  def resolveCredentials(credentials: Option[JsObject]):Option[(String, String)] = {
    credentials match {
      case None => None
      case Some(cred) =>
        val user = (cred \ JSON_PATH.JAR_CRED_USER).as[String]
        val password = (cred \ JSON_PATH.JAR_CRED_PASSWORD).as[String]
        Option(envOrElse(user,user), envOrElse(password,password))
    }
  }

}

Source File: JsonSinkTest.scala From eel-sdk with Apache License 2.0

5 votes

package io.eels.component.json

import io.eels.datastream.DataStream
import io.eels.schema.{Field, StructType}
import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.scalatest.{Matchers, WordSpec}

class JsonSinkTest extends WordSpec with Matchers {

  val path = new Path("test.json")
  implicit val fs: FileSystem = FileSystem.get(new Configuration())

  "JsonSink" should {
    "write multiple json docs to a file" in {
      if (fs.exists(path))
        fs.delete(path, false)

      val schema = StructType(Field("name"), Field("location"))
      val ds = DataStream.fromValues(
        schema,
        Seq(
          Vector("sam", "aylesbury"),
          Vector("jam", "aylesbury"),
          Vector("ham", "buckingham")
        )
      )

      ds.to(JsonSink(path))
      val input = IOUtils.toString(fs.open(path))
      input should include("""{"name":"sam","location":"aylesbury"}""")
      input should include("""{"name":"jam","location":"aylesbury"}""")
      input should include("""{"name":"ham","location":"buckingham"}""")

      fs.delete(path, false)
    }
    "support arrays" in {
      if (fs.exists(path))
        fs.delete(path, false)

      val schema = StructType(Field("name"), Field("skills"))
      val frame = DataStream.fromValues(
        schema,
        Seq(Vector("sam", Array("karate", "kung fu")))
      )

      frame.to(JsonSink(path))
      val input = IOUtils.toString(fs.open(path))
      input.trim shouldBe """{"name":"sam","skills":["karate","kung fu"]}"""

      fs.delete(path, false)
    }
    "support maps" in {
      if (fs.exists(path))
        fs.delete(path, false)

      val schema = StructType(Field("name"), Field("locations"))
      val frame = DataStream.fromValues(
        schema,
        Seq(Vector("sam", Map("home" -> "boro", "work" -> "london")))
      )

      frame.to(JsonSink(path))
      val input = IOUtils.toString(fs.open(path))
      input.trim shouldBe """{"name":"sam","locations":{"home":"boro","work":"london"}}"""

      fs.delete(path, false)
    }
    "support structs" in {

      case class Foo(home: String, work: String)

      if (fs.exists(path))
        fs.delete(path, false)

      val schema = StructType(Field("name"), Field("locations"))
      val frame = DataStream.fromValues(
        schema,
        Seq(Vector("sam", Foo("boro", "london")))
      )

      frame.to(JsonSink(path))
      val input = IOUtils.toString(fs.open(path))
      input.trim shouldBe """{"name":"sam","locations":{"home":"boro","work":"london"}}"""

      fs.delete(path, false)
    }
  }
}

Source File: StorageCSVWriter.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.storage.csv

import java.io._

import com.webank.wedatasphere.linkis.common.io.{MetaData, Record}
import com.webank.wedatasphere.linkis.common.utils.Logging
import com.webank.wedatasphere.linkis.storage.resultset.table.{TableMetaData, TableRecord}
import org.apache.commons.io.IOUtils


class StorageCSVWriter(val charset: String, val separator: String, val outputStream: OutputStream) extends CSVFsWriter with Logging {

  private val delimiter = separator match {
    case "," => ','
    case _ => '\t'
  }

  private val buffer: StringBuilder = new StringBuilder(50000)

  @scala.throws[IOException]
  override def addMetaData(metaData: MetaData): Unit = {
    val head = metaData.asInstanceOf[TableMetaData].columns.map(_.columnName)
    write(head)
    //IOUtils.write(compact(head).getBytes(charset),outputStream)
  }

  private def compact(row: Array[String]): String = {
    val tmp = row.foldLeft("")((l, r) => l + delimiter + r)
    tmp.substring(1, tmp.length) + "\n"
  }

  private def write(row: Array[String]) = {
    val cotent: String = compact(row)
    if (buffer.length + cotent.length > 49500) {
      IOUtils.write(buffer.toString().getBytes(charset), outputStream)
      buffer.clear()
    }
    buffer.append(cotent)
  }

  @scala.throws[IOException]
  override def addRecord(record: Record): Unit = {
    val body = record.asInstanceOf[TableRecord].row.map(_.toString) //read时候进行null替换等等
    write(body)
    //IOUtils.write(compact(body).getBytes(charset),outputStream)
  }

  override def flush(): Unit = {
    IOUtils.write(buffer.toString().getBytes(charset), outputStream)
    buffer.clear()
  }

  override def close(): Unit = {
    flush()
    IOUtils.closeQuietly(outputStream)
  }

}

Source File: StorageScriptFsReader.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.storage.script.reader

import java.io._

import com.webank.wedatasphere.linkis.common.io.{FsPath, MetaData, Record}
import com.webank.wedatasphere.linkis.storage.script._
import com.webank.wedatasphere.linkis.storage.utils.StorageUtils
import org.apache.commons.io.IOUtils

import scala.collection.mutable.ArrayBuffer



  def isMetadata(line: String, prefix: String, prefixConf: String): Boolean = {
    val regex = ("\\s*" + prefix + "\\s*(.+)\\s*" + "=" + "\\s*(.+)\\s*").r
    line match {
      case regex(_, _) => true
      case _ => {
        val split: Array[String] = line.split("=")
        if (split.size != 2) return false
        if (split(0).split(" ").filter(_ != "").size != 4) return false
        if (!split(0).split(" ").filter(_ != "")(0).equals(prefixConf)) return false
        true
      }
    }
  }
}

Source File: StorageScriptFsWriter.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.storage.script.writer

import java.io.{ByteArrayInputStream, IOException, InputStream, OutputStream}
import java.util

import com.webank.wedatasphere.linkis.common.io.{FsPath, MetaData, Record}
import com.webank.wedatasphere.linkis.storage.LineRecord
import com.webank.wedatasphere.linkis.storage.script.{Compaction, ScriptFsWriter, ScriptMetaData}
import com.webank.wedatasphere.linkis.storage.utils.{StorageConfiguration, StorageUtils}
import org.apache.commons.io.IOUtils


class StorageScriptFsWriter(val path: FsPath, val charset: String, outputStream: OutputStream = null) extends ScriptFsWriter {

  private val stringBuilder = new StringBuilder

  @scala.throws[IOException]
  override def addMetaData(metaData: MetaData): Unit = {
    val compactions = Compaction.listCompactions().filter(p => p.belongTo(StorageUtils.pathToSuffix(path.getPath)))
    val metadataLine = new util.ArrayList[String]()
    if (compactions.length > 0) {
      metaData.asInstanceOf[ScriptMetaData].getMetaData.map(compactions(0).compact).foreach(metadataLine.add)
      if (outputStream != null) {
        IOUtils.writeLines(metadataLine, "\n", outputStream, charset)
      } else {
        import scala.collection.JavaConversions._
        metadataLine.foreach(m => stringBuilder.append(s"$m\n"))
      }
    }
  }

  @scala.throws[IOException]
  override def addRecord(record: Record): Unit = {
    //转成LineRecord而不是TableRecord是为了兼容非Table类型的结果集写到本类中
    val scriptRecord = record.asInstanceOf[LineRecord]
    if (outputStream != null) {
      IOUtils.write(scriptRecord.getLine, outputStream, charset)
    } else {
      stringBuilder.append(scriptRecord.getLine)
    }
  }

  override def close(): Unit = {
    IOUtils.closeQuietly(outputStream)
  }

  override def flush(): Unit = if (outputStream != null) outputStream.flush()

  def getInputStream(): InputStream = {
    new ByteArrayInputStream(stringBuilder.toString().getBytes(StorageConfiguration.STORAGE_RS_FILE_TYPE.getValue))
  }

}

Source File: IOHelp.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.engine.io.utils

import com.webank.wedatasphere.linkis.common.io.{Fs, FsPath}
import com.webank.wedatasphere.linkis.common.utils.Utils
import com.webank.wedatasphere.linkis.storage.domain.{MethodEntity, MethodEntitySerializer}
import com.webank.wedatasphere.linkis.storage.exception.StorageErrorException
import com.webank.wedatasphere.linkis.storage.resultset.io.{IOMetaData, IORecord}
import com.webank.wedatasphere.linkis.storage.resultset.{ResultSetFactory, ResultSetReader, ResultSetWriter}
import com.webank.wedatasphere.linkis.storage.utils.{StorageConfiguration, StorageUtils}
import org.apache.commons.io.IOUtils


  def write(fs: Fs, method: MethodEntity): Unit = {
    if (method.params == null || method.params.isEmpty) throw new StorageErrorException(53003, "不支持的参数调用")
    val dest = MethodEntitySerializer.deserializerToJavaObject(method.params(0).asInstanceOf[String],classOf[FsPath])
    val overwrite = method.params(1).asInstanceOf[Boolean]
    val outputStream = fs.write(dest, overwrite)
    val content = method.params(2).asInstanceOf[String]
    Utils.tryFinally {
      val resultSet = ResultSetFactory.getInstance.getResultSetByType(ResultSetFactory.IO_TYPE)
      val reader = ResultSetReader.getResultSetReader(resultSet, content)
      while (reader.hasNext) {
        IOUtils.write(reader.getRecord.asInstanceOf[IORecord].value, outputStream)
      }
    }(IOUtils.closeQuietly(outputStream))
  }


}

Source File: TokenAuthentication.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.gateway.security.token

import java.io.File
import java.util.Properties
import java.util.concurrent.TimeUnit

import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils}
import com.webank.wedatasphere.linkis.gateway.config.GatewayConfiguration._
import com.webank.wedatasphere.linkis.gateway.http.GatewayContext
import com.webank.wedatasphere.linkis.gateway.security.{GatewaySSOUtils, SecurityFilter}
import com.webank.wedatasphere.linkis.server.Message
import org.apache.commons.io.{FileUtils, IOUtils}
import org.apache.commons.lang.StringUtils


object TokenAuthentication extends Logging {

  private val (props, file) = if(ENABLE_TOKEN_AUTHENTICATION.getValue)
    (new Properties, new File(this.getClass.getClassLoader.getResource(TOKEN_AUTHENTICATION_CONFIG.getValue).toURI.getPath))
  else (null, null)
  private var lastModified = 0l

  if(ENABLE_TOKEN_AUTHENTICATION.getValue) {
    Utils.defaultScheduler.scheduleAtFixedRate(new Runnable {
      override def run(): Unit = Utils.tryAndError(init())
    }, TOKEN_AUTHENTICATION_SCAN_INTERVAL.getValue, TOKEN_AUTHENTICATION_SCAN_INTERVAL.getValue, TimeUnit.MILLISECONDS)
    init()
  }

  private def init(): Unit = if(file.lastModified() > lastModified) {
    lastModified = file.lastModified()
    info(s"loading token authentication file $file.")
    val newProps = new Properties
    val input = FileUtils.openInputStream(file)
    Utils.tryFinally(newProps.load(input))(IOUtils.closeQuietly(input))
    props.putAll(newProps)
  }

  private def validateTokenUser(token: String, tokenUser: String): Boolean = {
    val tokenUsers = props.getProperty(token)
    if(tokenUsers == "*" || (StringUtils.isNotBlank(tokenUsers) && tokenUsers.contains(tokenUser))) true
    else false
  }

  def isTokenRequest(gatewayContext: GatewayContext) : Boolean = {
    (gatewayContext.getRequest.getHeaders.containsKey(TOKEN_KEY) &&
      gatewayContext.getRequest.getHeaders.containsKey(TOKEN_USER_KEY)) || (
      gatewayContext.getRequest.getCookies.containsKey(TOKEN_KEY) &&
        gatewayContext.getRequest.getCookies.containsKey(TOKEN_USER_KEY))
  }

  def tokenAuth(gatewayContext: GatewayContext): Boolean = {
    if(!ENABLE_TOKEN_AUTHENTICATION.getValue) {
      val message = Message.noLogin(s"Gateway未启用token认证，请采用其他认证方式!") << gatewayContext.getRequest.getRequestURI
      SecurityFilter.filterResponse(gatewayContext, message)
      return false
    }
    var token = gatewayContext.getRequest.getHeaders.get(TOKEN_KEY)(0)
    var tokenUser = gatewayContext.getRequest.getHeaders.get(TOKEN_USER_KEY)(0)
    if(StringUtils.isBlank(token) || StringUtils.isBlank(tokenUser)) {
      token = gatewayContext.getRequest.getCookies.get(TOKEN_KEY)(0).getValue
      tokenUser = gatewayContext.getRequest.getCookies.get(TOKEN_USER_KEY)(0).getValue
      if(StringUtils.isBlank(token) || StringUtils.isBlank(tokenUser)) {
        val message = Message.noLogin(s"请在Header或Cookie中同时指定$TOKEN_KEY 和 $TOKEN_USER_KEY，以便完成token认证！") << gatewayContext.getRequest.getRequestURI
        SecurityFilter.filterResponse(gatewayContext, message)
        return false
      }
    }
    if(validateTokenUser(token, tokenUser)){
      info(s"Token authentication succeed, uri: ${gatewayContext.getRequest.getRequestURI}, token: $token, tokenUser: $tokenUser.")
      GatewaySSOUtils.setLoginUser(gatewayContext.getRequest, tokenUser)
      true
    } else {
      val message = Message.noLogin(s"未授权的token$token，无法将请求绑定给tokenUser$tokenUser!") << gatewayContext.getRequest.getRequestURI
      SecurityFilter.filterResponse(gatewayContext, message)
      false
    }
  }

}

Source File: QueryUtils.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.jobhistory.util

import java.io.{InputStream, OutputStream}
import java.util.Date

import com.webank.wedatasphere.linkis.common.conf.CommonVars
import com.webank.wedatasphere.linkis.common.io.FsPath
import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils}
import com.webank.wedatasphere.linkis.jobhistory.entity.QueryTask
import com.webank.wedatasphere.linkis.protocol.query.RequestInsertTask
import com.webank.wedatasphere.linkis.storage.FSFactory
import com.webank.wedatasphere.linkis.storage.fs.FileSystem
import com.webank.wedatasphere.linkis.storage.utils.{FileSystemUtils, StorageUtils}
import org.apache.commons.io.IOUtils
import org.apache.commons.lang.time.DateFormatUtils


object QueryUtils extends Logging {

  private val CODE_STORE_PREFIX = CommonVars("bdp.dataworkcloud.query.store.prefix", "hdfs:///tmp/bdp-ide/")
  private val CODE_STORE_SUFFIX = CommonVars("bdp.dataworkcloud.query.store.suffix", "")
  private val CHARSET = "utf-8"
  private val CODE_SPLIT = ";"
  private val LENGTH_SPLIT = "#"

  def storeExecutionCode(requestInsertTask: RequestInsertTask): Unit = {
    if (requestInsertTask.getExecutionCode.length < 60000) return
    val user: String = requestInsertTask.getUmUser
    val path: String = getCodeStorePath(user)
    val fsPath: FsPath = new FsPath(path)
    val fileSystem = FSFactory.getFsByProxyUser(fsPath, user).asInstanceOf[FileSystem]
    fileSystem.init(null)
    var os: OutputStream = null
    var position = 0L
    val codeBytes = requestInsertTask.getExecutionCode.getBytes(CHARSET)
    path.intern() synchronized {
      Utils.tryFinally {
        if (!fileSystem.exists(fsPath)) FileSystemUtils.createNewFile(fsPath, user, true)
        os = fileSystem.write(fsPath, false)
        position = fileSystem.get(path).getLength
        IOUtils.write(codeBytes, os)
      } {
        IOUtils.closeQuietly(os)
        if (fileSystem != null) fileSystem.close()
      }
    }
    val length = codeBytes.length
    requestInsertTask.setExecutionCode(path + CODE_SPLIT + position + LENGTH_SPLIT + length)
  }

  def exchangeExecutionCode(queryTask: QueryTask): Unit = {
    import scala.util.control.Breaks._
    if (queryTask.getExecutionCode == null || !queryTask.getExecutionCode.startsWith(StorageUtils.HDFS_SCHEMA)) return
    val codePath = queryTask.getExecutionCode
    val path = codePath.substring(0, codePath.lastIndexOf(CODE_SPLIT))
    val codeInfo = codePath.substring(codePath.lastIndexOf(CODE_SPLIT) + 1)
    val infos: Array[String] = codeInfo.split(LENGTH_SPLIT)
    val position = infos(0).toInt
    var lengthLeft = infos(1).toInt
    val tub = new Array[Byte](1024)
    val executionCode: StringBuilder = new StringBuilder
    val fsPath: FsPath = new FsPath(path)
    val fileSystem = FSFactory.getFsByProxyUser(fsPath, queryTask.getUmUser).asInstanceOf[FileSystem]
    fileSystem.init(null)
    var is: InputStream = null
    if (!fileSystem.exists(fsPath)) return
    Utils.tryFinally {
      is = fileSystem.read(fsPath)
      if (position > 0) is.skip(position)
      breakable {
        while (lengthLeft > 0) {
          val readed = is.read(tub)
          val useful = Math.min(readed, lengthLeft)
          if (useful < 0) break()
          lengthLeft -= useful
          executionCode.append(new String(tub, 0, useful, CHARSET))
        }
      }
    } {
      if (fileSystem != null) fileSystem.close()
      IOUtils.closeQuietly(is)
    }
    queryTask.setExecutionCode(executionCode.toString())
  }

  private def getCodeStorePath(user: String): String = {
    val date: String = DateFormatUtils.format(new Date, "yyyyMMdd")
    s"${CODE_STORE_PREFIX.getValue}${user}${CODE_STORE_SUFFIX.getValue}/executionCode/${date}/_scripts"
  }
}

Source File: AbstractEngineCreator.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.enginemanager

import java.net.ServerSocket

import com.webank.wedatasphere.linkis.common.conf.DWCArgumentsParser
import com.webank.wedatasphere.linkis.common.utils.Utils
import com.webank.wedatasphere.linkis.enginemanager.conf.EngineManagerConfiguration
import com.webank.wedatasphere.linkis.enginemanager.exception.EngineManagerErrorException
import com.webank.wedatasphere.linkis.enginemanager.impl.UserTimeoutEngineResource
import com.webank.wedatasphere.linkis.enginemanager.process.{CommonProcessEngine, ProcessEngine, ProcessEngineBuilder}
import com.webank.wedatasphere.linkis.protocol.engine.{EngineCallback, RequestEngine}
import com.webank.wedatasphere.linkis.rpc.Sender
import com.webank.wedatasphere.linkis.server.{JMap, toScalaMap}
import org.apache.commons.io.IOUtils

import scala.collection.mutable.ArrayBuffer


abstract class AbstractEngineCreator extends EngineCreator {

  private val inInitPorts = ArrayBuffer[Int]()

  private def getAvailablePort: Int = synchronized {
    var port = AbstractEngineCreator.getNewPort
    while(inInitPorts.contains(port)) port = AbstractEngineCreator.getNewPort
    inInitPorts += port
    port
  }

  def removePort(port: Int): Unit = inInitPorts -= port

  protected def createProcessEngineBuilder(): ProcessEngineBuilder
  protected def getExtractSpringConfigs(requestEngine: RequestEngine): JMap[String, String] = {
    val springConf = new JMap[String, String]
    requestEngine.properties.keysIterator.filter(_.startsWith("spring.")).foreach(key => springConf.put(key.substring(7), requestEngine.properties.get(key)))
    springConf
  }
  protected def createEngine(processEngineBuilder:ProcessEngineBuilder,parser:DWCArgumentsParser):ProcessEngine={
     processEngineBuilder.getEngineResource match {
      case timeout: UserTimeoutEngineResource =>
        new CommonProcessEngine(processEngineBuilder, parser, timeout.getTimeout)
      case _ =>
        new CommonProcessEngine(processEngineBuilder, parser)
    }
  }

  override def create(ticketId: String, engineRequest: EngineResource, request: RequestEngine): Engine = {
    val port = getAvailablePort
    val processEngineBuilder = createProcessEngineBuilder()
    processEngineBuilder.setPort(port)
    processEngineBuilder.build(engineRequest, request)
    val parser = new DWCArgumentsParser
    var springConf = Map("spring.application.name" -> EngineManagerConfiguration.ENGINE_SPRING_APPLICATION_NAME.getValue,
      "server.port" -> port.toString, "spring.profiles.active" -> "engine",
      "logging.config" -> "classpath:log4j2-engine.xml",
      "eureka.client.serviceUrl.defaultZone" -> EngineManagerReceiver.getSpringConf("eureka.client.serviceUrl.defaultZone"))
    springConf = springConf ++: getExtractSpringConfigs(request).toMap
    parser.setSpringConf(springConf)
    var dwcConf = Map("ticketId" -> ticketId, "creator" -> request.creator, "user" -> request.user) ++:
      EngineCallback.callbackToMap(EngineCallback(Sender.getThisServiceInstance.getApplicationName, Sender.getThisServiceInstance.getInstance))
    if(request.properties.exists{case (k, v) => k.contains(" ") || (v != null && v.contains(" "))})
      throw new EngineManagerErrorException(30000, "Startup parameters contain spaces!(启动参数中包含空格！)")
    dwcConf = dwcConf ++: request.properties.toMap
    parser.setDWCConf(dwcConf)
    val engine = createEngine(processEngineBuilder,parser)
    engine.setTicketId(ticketId)
    engine.setPort(port)
    engine match {
      case commonEngine: CommonProcessEngine => commonEngine.setUser(request.user)
      case _ =>
    }
    engine
  }
}
object AbstractEngineCreator {
  private[enginemanager] def getNewPort: Int = {
    val socket = new ServerSocket(0)
    Utils.tryFinally(socket.getLocalPort)(IOUtils.closeQuietly(socket))
  }
}

Source File: FilesystemSnapshotStore.scala From eventuate with Apache License 2.0

5 votes

package com.rbmhtechnology.eventuate.snapshot.filesystem

import java.io._
import java.net.URLEncoder

import akka.event.{ LogSource, Logging }
import com.rbmhtechnology.eventuate._
import com.rbmhtechnology.eventuate.snapshot.SnapshotStore
import org.apache.commons.io.IOUtils

import scala.concurrent.Future
import scala.collection.immutable.Seq
import scala.util._

object FilesystemSnapshotStore {
  implicit val logSource = LogSource.fromAnyClass[FilesystemSnapshotStore]
}


class FilesystemSnapshotStore(settings: FilesystemSnapshotStoreSettings, logId: String) extends SnapshotStore {
  private val log = Logging(settings.system, classOf[FilesystemSnapshotStore])
  private val rootDir = new File(settings.rootDir, URLEncoder.encode(logId, "UTF-8"))

  rootDir.mkdirs()

  override def deleteAsync(lowerSequenceNr: Long): Future[Unit] = {
    import settings.writeDispatcher
    Future(delete(lowerSequenceNr))
  }

  override def saveAsync(snapshot: Snapshot): Future[Unit] = {
    import settings.writeDispatcher
    Future(withOutputStream(dstDir(snapshot.metadata.emitterId), snapshot.metadata.sequenceNr)(serialize(_, snapshot)))
  }

  override def loadAsync(emitterId: String): Future[Option[Snapshot]] = {
    import settings.readDispatcher
    Future(load(dstDir(emitterId)))
  }

  def delete(lowerSequenceNr: Long): Unit = for {
    emitterId <- rootDir.listFiles
    emitterDir = dstDir(emitterId.getName)
    sequenceNr <- decreasingSequenceNrs(emitterDir) if sequenceNr >= lowerSequenceNr
  } dstFile(emitterDir, sequenceNr).delete()

  def load(dir: File): Option[Snapshot] = {
    @annotation.tailrec
    def go(snrs: Seq[Long]): Option[Snapshot] = snrs.headOption match {
      case None => None
      case Some(snr) => Try(withInputStream(dir, snr)(deserialize)) match {
        case Success(s) => Some(s)
        case Failure(e) =>
          log.error(e, s"error loading snapshot ${dstFile(dir, snr)}")
          go(snrs.tail)
      }
    }
    go(decreasingSequenceNrs(dir))
  }

  private def serialize(outputStream: OutputStream, snapshot: Snapshot): Unit =
    outputStream.write(settings.serialization.serialize(snapshot).get)

  private def deserialize(inputStream: InputStream): Snapshot =
    settings.serialization.deserialize(IOUtils.toByteArray(inputStream), classOf[Snapshot]).get

  private def withOutputStream(dir: File, snr: Long)(body: OutputStream => Unit): Unit = {
    val dst = dstFile(dir, snr)
    val tmp = tmpFile(dir, snr)

    dir.mkdirs()
    withStream(new BufferedOutputStream(new FileOutputStream(tmp)), body)
    tmp.renameTo(dst)

    // do not keep more than the configured maximum number of snapshot files
    decreasingSequenceNrs(dir).drop(settings.snapshotsPerEmitterMax).foreach { snr =>
      dstFile(dir, snr).delete()
    }
  }

  private def withInputStream[A](dir: File, snr: Long)(body: InputStream => A): A =
    withStream(new BufferedInputStream(new FileInputStream(dstFile(dir, snr))), body)

  private def withStream[A <: Closeable, B](stream: A, p: A => B): B =
    try { p(stream) } finally { stream.close() }

  private val DstFilenamePattern =
    """^snr-(\d+)""".r

  private[eventuate] def dstDir(emitterId: String): File =
    new File(rootDir, URLEncoder.encode(emitterId, "UTF-8"))

  private[eventuate] def dstFile(dstDir: File, sequenceNr: Long): File =
    new File(dstDir, s"snr-${sequenceNr}")

  private[eventuate] def tmpFile(dstDir: File, sequenceNr: Long): File =
    new File(dstDir, s"tmp-${sequenceNr}")

  private[eventuate] def decreasingSequenceNrs(dir: File): Seq[Long] =
    if (!dir.exists) Nil else dir.listFiles.map(_.getName).collect { case DstFilenamePattern(snr) => snr.toLong }.toList.sorted.reverse
}

Source File: EngineExecutorManager.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.engine.execute

import com.webank.wedatasphere.linkis.common.conf.DWCArgumentsParser
import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils}
import com.webank.wedatasphere.linkis.engine.exception.EngineErrorException
import com.webank.wedatasphere.linkis.scheduler.executer.ExecutorState.{Busy, ExecutorState, Idle}
import com.webank.wedatasphere.linkis.scheduler.executer._
import com.webank.wedatasphere.linkis.scheduler.listener.ExecutorListener
import com.webank.wedatasphere.linkis.scheduler.queue.SchedulerEvent
import com.webank.wedatasphere.linkis.server.{JMap, toJavaMap}
import org.apache.commons.io.IOUtils

import scala.concurrent.duration.Duration


abstract class EngineExecutorManager extends ExecutorManager with Logging {

  private var executorListener: Option[ExecutorListener] = None
  private var resultSetListener: ResultSetListener = _
  private var jobLogListener: JobLogListener = _
  protected var executor: EngineExecutor = _

  override def setExecutorListener(executorListener: ExecutorListener): Unit = this.executorListener = Some(executorListener)
  def setResultSetListener(resultSetListener: ResultSetListener) = this.resultSetListener = resultSetListener
  def setJobLogListener(jobLogListener: JobLogListener) = this.jobLogListener = jobLogListener

  protected def isSupportParallelism: Boolean

  protected def getOrCreateCodeParser(): CodeParser

  protected def getOrCreateEngineExecutorFactory(): EngineExecutorFactory

  protected def getEngineHooks: Array[EngineHook]

  def getEngineExecutor = executor

  override protected def createExecutor(event: SchedulerEvent): Executor = null

  override def askExecutor(schedulerEvent: SchedulerEvent): Option[Executor] = {
    if(executor == null) synchronized {
      if(executor == null) {
        var options: JMap[String, String] = DWCArgumentsParser.getDWCOptionMap
        //TODO getUDF peaceWong
        getEngineHooks.foreach(hook => options = hook.beforeCreateEngine(options))
        executor = getOrCreateEngineExecutorFactory().createExecutor(options)
        executor.setCodeParser(getOrCreateCodeParser())
        executor.init()
        executor.setLogListener(jobLogListener)
        executor.setResultSetListener(resultSetListener)
        //TODO Consider adding timeout（考虑加上超时时间）
        getEngineHooks.foreach(_.afterCreatedEngine(executor))
        executorListener.foreach(executor.setExecutorListener)
        executorListener.foreach(_.onExecutorCreated(executor))
        executor.ready()
      }
    }
    executor match {
      case engine: EngineExecutor =>
        if(!ExecutorState.isAvailable(engine.state)) throw new EngineErrorException(40000, s"engine不可用，状态为${engine.state}（engine不可用，状态为${engine.state}).")
        else if(isSupportParallelism) Some(engine)
        else if(engine.state == Busy) None else synchronized {
        if(engine.state == Idle) Some(engine) else None
      }
    }
  }

  override def askExecutor(schedulerEvent: SchedulerEvent, wait: Duration): Option[Executor] = {
    val startTime = System.currentTimeMillis()
    askExecutor(schedulerEvent).orElse {
      var executor: Option[Executor] = None
      while(System.currentTimeMillis - startTime < wait.toMillis && executor.isEmpty) {
        this.executor synchronized this.executor.wait(wait.toMillis)
        executor = askExecutor(schedulerEvent)
      }
      executor
    }
  }

  def onExecutorStateChanged(fromState: ExecutorState, toState: ExecutorState): Unit = toState match {
    case Idle => this.executor synchronized this.executor.notify()
    case _ =>
  }

  override def getById(id: Long): Option[Executor] = if(executor == null) None else if(executor.getId == id) Some(executor) else None

  override def getByGroup(groupName: String): Array[Executor] = Array(executor)

  override protected def delete(executor: Executor): Unit = throw new EngineErrorException(40001, s"Unsupported method delete(不支持的方法delete).")

  override def shutdown(): Unit = {
    executor match {
      case s: SingleTaskOperateSupport =>
        Utils.tryAndWarn(s.kill())
      case c: ConcurrentTaskOperateSupport =>
        Utils.tryAndWarn(c.killAll())
    }
    IOUtils.closeQuietly(executor)
  }
}

Source File: LogWriter.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.entrance.log

import java.io.{Closeable, Flushable, OutputStream}
import java.util

import com.webank.wedatasphere.linkis.common.io.FsPath
import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils}
import com.webank.wedatasphere.linkis.entrance.exception.EntranceErrorException
import com.webank.wedatasphere.linkis.storage.FSFactory
import com.webank.wedatasphere.linkis.storage.utils.FileSystemUtils
import org.apache.commons.io.IOUtils
import org.apache.commons.lang.StringUtils


abstract class LogWriter(charset: String) extends Closeable with Flushable with Logging {

  private var firstWrite = true

  protected val outputStream: OutputStream

  def write(msg: String): Unit = synchronized {
    val log = if (!firstWrite) "\n" + msg else {
      firstWrite = false
      msg
    }
    Utils.tryQuietly({
      outputStream.write(log.getBytes(charset))
      outputStream.flush()
    }, t => {
      warn("error when write query log to outputStream.", t)
      info(msg)
    })
  }



  def flush(): Unit = Utils.tryQuietly(outputStream.flush(),  t => {
    warn("Error encounters when flush log,", t)
  })


  def close(): Unit = {
    flush()
    if (outputStream != null) IOUtils.closeQuietly(outputStream)

  }
}

abstract class AbstractLogWriter(logPath: String,
                                 user: String,
                                 charset: String) extends LogWriter(charset) {
  if(StringUtils.isBlank(logPath)) throw new EntranceErrorException(20301, "logPath cannot be empty.")
  protected val fileSystem = FSFactory.getFs(new FsPath(logPath))
  fileSystem.init(new util.HashMap[String, String]())

  protected val outputStream: OutputStream = {
    FileSystemUtils.createNewFile(new FsPath(logPath), true)
    fileSystem.write(new FsPath(logPath), true)
  }

  override def close(): Unit = {
    super.close()
    if (fileSystem != null) Utils.tryQuietly(fileSystem.close(), t => {
      warn("Error encounters when closing fileSystem", t)
    })
  }
}

Source File: ProcessInterpreter.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.engine.Interpreter

import java.io.{BufferedReader, InputStreamReader, PrintWriter}
import java.util.concurrent.TimeUnit

import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils}
import com.webank.wedatasphere.linkis.engine.spark.common.{LineBufferedStream, Starting, State, _}
import com.webank.wedatasphere.linkis.scheduler.executer.{ErrorExecuteResponse, ExecuteResponse, SuccessExecuteResponse}
import org.apache.commons.io.IOUtils
import org.json4s._

import scala.concurrent.duration.Duration
import scala.concurrent.{Await, ExecutionContext, Future}


abstract class ProcessInterpreter(process: Process) extends Interpreter with Logging {

  implicit val executor: ExecutionContext = ExecutionContext.global

  protected[this] var _state: State = Starting()

  protected[this] val stdin = new PrintWriter(process.getOutputStream)
  protected[this] val stdout = new BufferedReader(new InputStreamReader(process.getInputStream()), 1)
  protected[this] val errOut = new LineBufferedStream(process.getErrorStream())

  override def state: State = _state

  override def execute(code: String): ExecuteResponse = {
    if(code == "sc.cancelAllJobs" || code == "sc.cancelAllJobs()") {
      sendExecuteRequest(code)
    }
    _state match {
      case (Dead() | ShuttingDown() | Error() | Success()) =>
        throw new IllegalStateException("interpreter is not running")
      case Idle() =>
        require(state == Idle())
        code match {
          case "SHUTDOWN" =>
            sendShutdownRequest()
            close()
            ErrorExecuteResponse("shutdown",new Exception("shutdown"))
          case _ =>
            _state = Busy()
            sendExecuteRequest(code) match {
              case Some(rep) =>
                _state = Idle()
               // ExecuteComplete(rep)
                SuccessExecuteResponse()
              case None =>
                _state = Error()
                val errorMsg = errOut.lines.mkString(", ")
                throw new Exception(errorMsg)
            }
        }
      case _ => throw new IllegalStateException(s"interpreter is in ${_state} state, cannot do query.")
    }
  }

  Future {
    val exitCode = process.waitFor()
    if (exitCode != 0) {
      errOut.lines.foreach(println)
      println(getClass.getSimpleName+" has stopped with exit code " + process.exitValue)
      _state = Error()
    } else {
      println(getClass.getSimpleName+" has finished.")
      _state = Success()
    }
  }

  protected def waitUntilReady(): Unit

  protected def sendExecuteRequest(request: String): Option[JValue]

  protected def sendShutdownRequest(): Unit = {}


  override def close(): Unit = {
    val future = Future {
      _state match {
        case (Dead() | ShuttingDown() | Success()) =>
          Future.successful()
        case _ =>
          sendShutdownRequest()
      }
    }
    _state = Dead()
    IOUtils.closeQuietly(stdin)
    IOUtils.closeQuietly(stdout)
    errOut.close

    // Give ourselves 10 seconds to tear down the process.
    Utils.tryFinally(Await.result(future, Duration(10, TimeUnit.SECONDS))){
      process.destroy()}
  }

}

Source File: PythonEngineExecutor.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.engine.executors

import com.webank.wedatasphere.linkis.common.utils.Logging
import com.webank.wedatasphere.linkis.engine.PythonSession
import com.webank.wedatasphere.linkis.engine.exception.EngineException
import com.webank.wedatasphere.linkis.engine.execute.{EngineExecutor, EngineExecutorContext}
import com.webank.wedatasphere.linkis.engine.rs.RsOutputStream
import com.webank.wedatasphere.linkis.protocol.engine.JobProgressInfo
import com.webank.wedatasphere.linkis.resourcemanager.{LoadInstanceResource, Resource}
import com.webank.wedatasphere.linkis.rpc.Sender
import com.webank.wedatasphere.linkis.scheduler.executer._
import org.apache.commons.io.IOUtils

import scala.collection.mutable.ArrayBuffer


class PythonEngineExecutor(outputPrintLimit: Int) extends EngineExecutor(outputPrintLimit, false) with SingleTaskOperateSupport with SingleTaskInfoSupport with Logging {
  override def getName: String = Sender.getThisServiceInstance.getInstance
  private val lineOutputStream = new RsOutputStream
  private[executors] var engineExecutorContext: EngineExecutorContext = _
  override def getActualUsedResources: Resource = {
    new LoadInstanceResource(Runtime.getRuntime.totalMemory() - Runtime.getRuntime.freeMemory(), 2, 1)
  }

 private val pySession = new PythonSession

  override protected def executeLine(engineExecutorContext: EngineExecutorContext, code: String): ExecuteResponse = {
    if(engineExecutorContext != this.engineExecutorContext){
      this.engineExecutorContext = engineExecutorContext
      pySession.setEngineExecutorContext(engineExecutorContext)
      //lineOutputStream.reset(engineExecutorContext)
      info("Python executor reset new engineExecutorContext!")
    }
    engineExecutorContext.appendStdout(s"$getName >> ${code.trim}")
    pySession.execute(code)
    //lineOutputStream.flush()
   SuccessExecuteResponse()
  }

  override protected def executeCompletely(engineExecutorContext: EngineExecutorContext, code: String, completedLine: String): ExecuteResponse = {
    val newcode = completedLine + code
    info("newcode is " + newcode)
    executeLine(engineExecutorContext, newcode)
  }

  override def kill(): Boolean = true

  override def pause(): Boolean = true

  override def resume(): Boolean = true

  override def progress(): Float = {
    if (this.engineExecutorContext != null){
      this.engineExecutorContext.getCurrentParagraph / this.engineExecutorContext.getTotalParagraph.asInstanceOf[Float]
    }else 0.0f
  }

  override def getProgressInfo: Array[JobProgressInfo] = {
    val jobProgressInfos = new ArrayBuffer[JobProgressInfo]()
    jobProgressInfos.toArray
    Array.empty
  }

  override def log(): String = ""

  override def close(): Unit = {
    IOUtils.closeQuietly(lineOutputStream)
    var isKill:Boolean = false
    try {
      pySession.close
      isKill = true;
    } catch {
      case e: Throwable =>
        throw new EngineException(60004, "Engine shutdown exception（引擎关闭异常）")
    }
  }
}

Source File: CSVExecutor.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.engine.pipeline.executor

import java.io.OutputStream

import com.webank.wedatasphere.linkis.common.io.FsPath
import com.webank.wedatasphere.linkis.engine.execute.EngineExecutorContext
import com.webank.wedatasphere.linkis.engine.pipeline.OutputStreamCache
import com.webank.wedatasphere.linkis.engine.pipeline.constant.PipeLineConstant._
import com.webank.wedatasphere.linkis.engine.pipeline.conversions.FsConvertions._
import com.webank.wedatasphere.linkis.engine.pipeline.exception.PipeLineErrorException
import com.webank.wedatasphere.linkis.scheduler.executer.ExecuteResponse
import com.webank.wedatasphere.linkis.server._
import com.webank.wedatasphere.linkis.storage.FSFactory
import com.webank.wedatasphere.linkis.storage.csv.CSVFsWriter
import com.webank.wedatasphere.linkis.storage.source.FileSource
import org.apache.commons.io.IOUtils



class CSVExecutor extends PipeLineExecutor {

  override def execute(sourcePath: String, destPath: String, engineExecutorContext: EngineExecutorContext): ExecuteResponse = {
    if (!FileSource.isResultSet(sourcePath)) {
      throw new PipeLineErrorException(70005, "Not a result set file（不是结果集文件）")
    }
    val sourceFsPath = new FsPath(sourcePath)
    val destFsPath = new FsPath(s"$destPath.$Kind")
    val sourceFs = FSFactory.getFs(sourceFsPath)
    sourceFs.init(null)
    val destFs = FSFactory.getFs(destFsPath)
    destFs.init(null)
    val fileSource = FileSource.create(sourceFsPath, sourceFs)
    if (!FileSource.isTableResultSet(fileSource)) {
      throw new PipeLineErrorException(70005, "只有table类型的结果集才能转为csv")
    }
    var nullValue = options.getOrDefault(PIPELINE_OUTPUT_SHUFFLE_NULL_TYPE, "NULL")
    if (BLANK.equalsIgnoreCase(nullValue)) nullValue = ""
    val outputStream: OutputStream = destFs.write(destFsPath, options.get(PIPELINE_OUTPUT_ISOVERWRITE).toBoolean)
    OutputStreamCache.osCache += engineExecutorContext.getJobId.get -> outputStream
    val cSVFsWriter = CSVFsWriter.getCSVFSWriter(options.get(PIPELINE_OUTPUT_CHARSET), options.get(PIPELINE_FIELD_SPLIT), outputStream)
    fileSource.addParams("nullValue", nullValue).write(cSVFsWriter)
    IOUtils.closeQuietly(cSVFsWriter)
    IOUtils.closeQuietly(fileSource)
    IOUtils.closeQuietly(sourceFs)
    IOUtils.closeQuietly(destFs)
    super.execute(sourcePath, destPath, engineExecutorContext)
  }

  override def Kind: String = "csv"

}

object CSVExecutor {
  val csvExecutor = new CSVExecutor

  def getInstance: PipeLineExecutor = csvExecutor
}

Source File: ExcelExecutor.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.engine.pipeline.executor

import java.io.OutputStream

import com.webank.wedatasphere.linkis.common.io.FsPath
import com.webank.wedatasphere.linkis.engine.execute.EngineExecutorContext
import com.webank.wedatasphere.linkis.engine.pipeline.OutputStreamCache
import com.webank.wedatasphere.linkis.engine.pipeline.constant.PipeLineConstant._
import com.webank.wedatasphere.linkis.engine.pipeline.conversions.FsConvertions._
import com.webank.wedatasphere.linkis.engine.pipeline.exception.PipeLineErrorException
import com.webank.wedatasphere.linkis.scheduler.executer.ExecuteResponse
import com.webank.wedatasphere.linkis.storage.FSFactory
import com.webank.wedatasphere.linkis.storage.excel.ExcelFsWriter
import com.webank.wedatasphere.linkis.storage.source.FileSource
import org.apache.commons.io.IOUtils



class ExcelExecutor extends PipeLineExecutor {
  override def execute(sourcePath: String, destPath: String, engineExecutorContext: EngineExecutorContext): ExecuteResponse = {
    if (!FileSource.isResultSet(sourcePath)) {
      throw new PipeLineErrorException(70005, "不是结果集文件")
    }
    val sourceFsPath = new FsPath(sourcePath)
    val destFsPath = new FsPath(s"$destPath.xlsx")
    val sourceFs = FSFactory.getFs(sourceFsPath)
    sourceFs.init(null)
    val destFs = FSFactory.getFs(destFsPath)
    destFs.init(null)
    val fileSource = FileSource.create(sourceFsPath, sourceFs)
    if (!FileSource.isTableResultSet(fileSource)) {
      throw new PipeLineErrorException(70005, "只有table类型的结果集才能转为excel")
    }
    var nullValue = options.getOrDefault(PIPELINE_OUTPUT_SHUFFLE_NULL_TYPE, "NULL")
    if (BLANK.equalsIgnoreCase(nullValue)) nullValue = ""
    val outputStream: OutputStream = destFs.write(destFsPath, options.get(PIPELINE_OUTPUT_ISOVERWRITE).toBoolean)
    val excelFsWriter = ExcelFsWriter.getExcelFsWriter(DEFAULTC_HARSET, DEFAULT_SHEETNAME, DEFAULT_DATEFORMATE, outputStream)
    import scala.collection.JavaConversions._
    OutputStreamCache.osCache += engineExecutorContext.getJobId.get -> outputStream
    fileSource.addParams("nullValue", nullValue).write(excelFsWriter)
    IOUtils.closeQuietly(excelFsWriter)
    IOUtils.closeQuietly(fileSource)
    IOUtils.closeQuietly(sourceFs)
    IOUtils.closeQuietly(destFs)
    super.execute(sourcePath, destPath, engineExecutorContext)
  }

  override def Kind: String = "excel"
}

object ExcelExecutor {
  val excelExecutor = new ExcelExecutor

  def getInstance: PipeLineExecutor = excelExecutor
}

Source File: CopyExecutor.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.engine.pipeline.executor

import com.webank.wedatasphere.linkis.common.io.FsPath
import com.webank.wedatasphere.linkis.engine.execute.EngineExecutorContext
import com.webank.wedatasphere.linkis.engine.pipeline.OutputStreamCache
import com.webank.wedatasphere.linkis.engine.pipeline.constant.PipeLineConstant._
import com.webank.wedatasphere.linkis.engine.pipeline.conversions.FsConvertions._
import com.webank.wedatasphere.linkis.scheduler.executer.ExecuteResponse
import com.webank.wedatasphere.linkis.server._
import com.webank.wedatasphere.linkis.storage.FSFactory
import org.apache.commons.io.IOUtils



class CopyExecutor extends PipeLineExecutor {
  override def execute(sourcePath: String, destPath: String, engineExecutorContext: EngineExecutorContext): ExecuteResponse = {
    val sourceFsPath = new FsPath(sourcePath)
    val destFsPath = new FsPath(destPath)
    val sourceFs = FSFactory.getFs(sourceFsPath)
    sourceFs.init(null)
    val destFs = FSFactory.getFs(destFsPath)
    destFs.init(null)
    val inputStream = sourceFs.read(sourceFsPath)
    var isOverWrite = options.get(PIPELINE_OUTPUT_ISOVERWRITE).toBoolean
    //导出表目前因为是只导出到工作空间，所以别的地方暂时不修改
    if (!isOverWrite && !destFs.exists(destFsPath)) {
      isOverWrite = true
    }
    val outputStream = destFs.write(destFsPath, isOverWrite)
    OutputStreamCache.osCache += engineExecutorContext.getJobId.get -> outputStream
    IOUtils.copy(inputStream, outputStream)
    IOUtils.closeQuietly(outputStream)
    IOUtils.closeQuietly(inputStream)
    IOUtils.closeQuietly(sourceFs)
    IOUtils.closeQuietly(destFs)
    super.execute(sourcePath, destPath, engineExecutorContext)
  }

  override def Kind: String = "cp"

}

object CopyExecutor {
  val copyExecutor = new CopyExecutor

  def getInstance: PipeLineExecutor = copyExecutor
}

Source File: RewriteSwaggerConfigPlugin.scala From matcher with MIT License

5 votes

import java.io.{BufferedInputStream, ByteArrayOutputStream}
import java.nio.charset.StandardCharsets
import java.nio.file.Files

import Dependencies.Version
import org.apache.commons.compress.archivers.ArchiveStreamFactory
import org.apache.commons.io.IOUtils
import sbt.Keys._
import sbt._

// See https://github.com/swagger-api/swagger-ui/issues/5710
object RewriteSwaggerConfigPlugin extends AutoPlugin {
  override val trigger = PluginTrigger.NoTrigger
  override def projectSettings: Seq[Def.Setting[_]] =
    inConfig(Compile)(
      Seq(
        resourceGenerators += Def.task {
          val jarName       = s"swagger-ui-${Version.swaggerUi}.jar"
          val indexHtmlPath = s"META-INF/resources/webjars/swagger-ui/${Version.swaggerUi}/index.html"
          val outputFile    = resourceManaged.value / indexHtmlPath

          val html = (Compile / dependencyClasspath).value
            .find(_.data.getName == jarName)
            .flatMap(jar => fileContentFromJar(jar.data, indexHtmlPath))
            .map { new String(_, StandardCharsets.UTF_8) }

          val resource = s"$jarName:$indexHtmlPath"
          html match {
            case None => throw new RuntimeException(s"Can't find $resource")
            case Some(html) =>
              val doc = org.jsoup.parser.Parser.parse(html, "127.0.0.1")
              import scala.collection.JavaConverters._
              doc
                .body()
                .children()
                .asScala
                .find { el =>
                  el.tagName() == "script" && el.html().contains("SwaggerUIBundle")
                } match {
                case None => throw new RuntimeException("Can't patch script in index.html")
                case Some(el) =>
                  val update =
                    """
const ui = SwaggerUIBundle({
    url: "/api-docs/swagger.json",
    dom_id: '#swagger-ui',
    deepLinking: true,
    presets: [ SwaggerUIBundle.presets.apis ],
    plugins: [ SwaggerUIBundle.plugins.DownloadUrl ],
    layout: "BaseLayout",
    operationsSorter: "alpha"
});
window.ui = ui;
"""
                  // Careful! ^ will be inserted as one-liner
                  el.text(update)
              }

              Files.createDirectories(outputFile.getParentFile.toPath)
              IO.write(outputFile, doc.outerHtml())
          }

          Seq(outputFile)
        }.taskValue
      ))

  private def fileContentFromJar(jar: File, fileName: String): Option[Array[Byte]] = {
    val fs      = new BufferedInputStream(Files.newInputStream(jar.toPath))
    val factory = new ArchiveStreamFactory()
    val ais     = factory.createArchiveInputStream(fs)

    try Iterator
      .continually(ais.getNextEntry)
      .takeWhile(_ != null)
      .filter(ais.canReadEntryData)
      .find(_.getName == fileName)
      .map { _ =>
        val out = new ByteArrayOutputStream()
        IOUtils.copy(ais, out)
        out.toByteArray
      } finally fs.close()
  }
}

Source File: AsynchbasePatcher.scala From incubator-s2graph with Apache License 2.0

5 votes

package org.apache.s2graph.core.storage.hbase

import java.lang.Integer.valueOf
import java.nio.charset.StandardCharsets
import java.util.concurrent.Callable

import net.bytebuddy.ByteBuddy
import net.bytebuddy.description.modifier.Visibility.PUBLIC
import net.bytebuddy.dynamic.loading.ClassLoadingStrategy
import net.bytebuddy.implementation.FieldAccessor
import net.bytebuddy.implementation.MethodDelegation.to
import net.bytebuddy.implementation.bind.annotation.{SuperCall, This}
import net.bytebuddy.matcher.ElementMatchers._
import org.apache.commons.io.IOUtils
import org.hbase.async._
import org.objectweb.asm.Opcodes.{ACC_FINAL, ACC_PRIVATE, ACC_PROTECTED, ACC_PUBLIC}
import org.objectweb.asm._

import scala.collection.JavaConversions._


  private def loadClass(name: String): Class[_] = {
    classLoader.getResources(s"org/hbase/async/$name.class").toSeq.headOption match {
      case Some(url) =>
        val stream = url.openStream()
        val bytes = try { IOUtils.toByteArray(stream) } finally { stream.close() }

        // patch the bytecode so that the class is no longer final and the methods are all accessible
        val cw = new ClassWriter(ClassWriter.COMPUTE_FRAMES)
        new ClassReader(bytes).accept(new ClassAdapter(cw) {
          override def visit(version: Int, access: Int, name: String, signature: String, superName: String, interfaces: Array[String]): Unit = {
            super.visit(version, access & ~ACC_FINAL, name, signature, superName, interfaces)
          }
          override def visitMethod(access: Int, name: String, desc: String, signature: String, exceptions: Array[String]): MethodVisitor = {
            super.visitMethod(access & ~ACC_PRIVATE & ~ACC_PROTECTED & ~ACC_FINAL | ACC_PUBLIC, name, desc, signature, exceptions)
          }
        }, 0)
        val patched = cw.toByteArray

        defineClass.setAccessible(true)
        defineClass.invoke(classLoader, s"org.hbase.async.$name", patched, valueOf(0), valueOf(patched.length)).asInstanceOf[Class[_]]
      case None =>
        throw new ClassNotFoundException(s"Could not find Asynchbase class: $name")
    }
  }
}

Source File: InceptionFetcher.scala From incubator-s2graph with Apache License 2.0

5 votes

package org.apache.s2graph.core.fetcher.tensorflow

import java.net.URL
import java.nio.file.Paths

import com.typesafe.config.Config
import org.apache.commons.io.IOUtils
import org.apache.s2graph.core._
import org.apache.s2graph.core.types.VertexId

import scala.concurrent.{ExecutionContext, Future}


object InceptionFetcher {
  val ModelPath = "modelPath"

  def getImageBytes(urlText: String): Array[Byte] = {
    val url = new URL(urlText)

    IOUtils.toByteArray(url)
  }

  def predict(graphDef: Array[Byte],
              labels: Seq[String])(imageBytes: Array[Byte], topK: Int = 10): Seq[(String, Float)] = {
    try {
      val image = LabelImage.constructAndExecuteGraphToNormalizeImage(imageBytes)
      try {
        val labelProbabilities = LabelImage.executeInceptionGraph(graphDef, image)
        val topKIndices = labelProbabilities.zipWithIndex.sortBy(_._1).reverse
          .take(Math.min(labelProbabilities.length, topK)).map(_._2)

        val ls = topKIndices.map { idx => (labels(idx), labelProbabilities(idx)) }

        ls
      } catch {
        case e: Throwable => Nil
      } finally if (image != null) image.close()
    }
  }
}

class InceptionFetcher(graph: S2GraphLike) extends EdgeFetcher {

  import InceptionFetcher._

  import scala.collection.JavaConverters._
  import org.apache.s2graph.core.TraversalHelper._
  val builder = graph.elementBuilder

  var graphDef: Array[Byte] = _
  var labels: Seq[String] = _

  override def init(config: Config)(implicit ec: ExecutionContext): Unit = {
    val modelPath = config.getString(ModelPath)
    graphDef = LabelImage.readAllBytesOrExit(Paths.get(modelPath, "tensorflow_inception_graph.pb"))
    labels = LabelImage.readAllLinesOrExit(Paths.get(modelPath, "imagenet_comp_graph_label_strings.txt")).asScala
  }

  override def close(): Unit = {}

  override def fetches(queryRequests: Seq[QueryRequest],
                       prevStepEdges: Map[VertexId, Seq[EdgeWithScore]])(implicit ec: ExecutionContext): Future[Seq[StepResult]] = {
    val stepResultLs = queryRequests.map { queryRequest =>
      val vertex = queryRequest.vertex
      val queryParam = queryRequest.queryParam
      val shouldBuildParents = queryRequest.query.queryOption.returnTree || queryParam.whereHasParent
      val parentEdges = if (shouldBuildParents) prevStepEdges.getOrElse(queryRequest.vertex.id, Nil) else Nil

      val urlText = vertex.innerId.toIdString()

      val edgeWithScores = predict(graphDef, labels)(getImageBytes(urlText), queryParam.limit).flatMap { case (label, score) =>
        val tgtVertexId = builder.newVertexId(queryParam.label.service,
          queryParam.label.tgtColumnWithDir(queryParam.labelWithDir.dir), label)

        val props: Map[String, Any] = if (queryParam.label.metaPropsInvMap.contains("score")) Map("score" -> score) else Map.empty
        val edge = graph.toEdge(vertex.innerId.value, tgtVertexId.innerId.value, queryParam.labelName, queryParam.direction, props = props)

        edgeToEdgeWithScore(queryRequest, edge, parentEdges)
      }

      StepResult(edgeWithScores, Nil, Nil)
    }

    Future.successful(stepResultLs)
  }

  override def fetchEdgesAll()(implicit ec: ExecutionContext): Future[Seq[S2EdgeLike]] =
    Future.successful(Nil)
}

Source File: TestSpec.scala From spark-distcp with Apache License 2.0

5 votes

package com.coxautodata

import java.io.ByteArrayInputStream
import java.nio.file.Files

import com.coxautodata.objects.SerializableFileStatus
import com.coxautodata.utils.FileListing
import org.apache.commons.io.{FileUtils, IOUtils}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, LocalFileSystem, Path}
import org.scalatest.{BeforeAndAfterEach, FunSpec, Matchers}

trait TestSpec extends FunSpec with Matchers with BeforeAndAfterEach {

  var testingBaseDir: java.nio.file.Path = _
  var testingBaseDirName: String = _
  var testingBaseDirPath: Path = _
  var localFileSystem: LocalFileSystem = _

  override def beforeEach(): Unit = {
    super.beforeEach()
    testingBaseDir = Files.createTempDirectory("test_output")
    testingBaseDirName = testingBaseDir.toString
    localFileSystem = FileSystem.getLocal(new Configuration())
    testingBaseDirPath = localFileSystem.makeQualified(new Path(testingBaseDirName))
  }

  override def afterEach(): Unit = {
    super.afterEach()
    FileUtils.deleteDirectory(testingBaseDir.toFile)
  }

  def createFile(relativePath: Path, content: Array[Byte]): SerializableFileStatus = {
    val path = new Path(testingBaseDirPath, relativePath)
    localFileSystem.mkdirs(path.getParent)
    val in = new ByteArrayInputStream(content)
    val out = localFileSystem.create(path)
    IOUtils.copy(in, out)
    in.close()
    out.close()
    SerializableFileStatus(localFileSystem.getFileStatus(path))
  }

  def fileStatusToResult(f: SerializableFileStatus): FileListing = {
    FileListing(f.getPath.toString, if (f.isFile) Some(f.getLen) else None)
  }

}

Source File: Zip$Test.scala From mystem-scala with MIT License

5 votes

package ru.stachek66.tools

import java.io.{File, FileInputStream}

import org.apache.commons.io.IOUtils
import org.scalatest.FunSuite
import org.scalatest.junit.JUnitRunner

import org.junit.runner.RunWith


class Zip$Test extends FunSuite {

  test("zip-test") {
    val src = new File("src/test/resources/test.txt")
    Zip.unpack(
      new File("src/test/resources/test.zip"),
      new File("src/test/resources/res.txt")) match {
      case f =>
        val content0 = IOUtils.toString(new FileInputStream(f))
        val content1 = IOUtils.toString(new FileInputStream(src))
        print(content0.trim + " vs " + content1.trim)
        assert(content0 === content1)
    }
  }

}

Source File: Decompressor.scala From mystem-scala with MIT License

5 votes

package ru.stachek66.tools

import java.io.{IOException, File, FileOutputStream}

import org.apache.commons.compress.archivers.ArchiveInputStream
import org.apache.commons.io.IOUtils
import ru.stachek66.nlp.mystem.Properties


trait Decompressor {

  def traditionalExtension: String

  def unpack(src: File, dst: File): File

  @throws(classOf[IOException])
  private[tools] def copyUncompressedAndClose(stream: ArchiveInputStream, dest: File): File = {
    // must be read
    val entry = stream.getNextEntry
    if (entry.isDirectory)
      throw new IOException("Decompressed entry is a directory (unexpectedly)")

    val os = new FileOutputStream(dest)

    try {
      IOUtils.copy(stream, os)
    } finally {
      os.close()
      stream.close()
    }
    dest
  }
}

object Decompressor {
  def select: Decompressor =
    if (Properties.CurrentOs.contains("win")) Zip else TarGz
}

Source File: TPCDS_2_4_Queries.scala From spark-sql-perf with Apache License 2.0

5 votes

package com.databricks.spark.sql.perf.tpcds

import org.apache.commons.io.IOUtils

import com.databricks.spark.sql.perf.{Benchmark, ExecutionMode, Query}


trait Tpcds_2_4_Queries extends Benchmark {

  import ExecutionMode._

  val queryNames = Seq(
    "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10",
    "q11", "q12", "q13", "q14a", "q14b", "q15", "q16", "q17", "q18", "q19",
    "q20", "q21", "q22", "q23a", "q23b", "q24a", "q24b", "q25", "q26", "q27",
    "q28", "q29", "q30", "q31", "q32", "q33", "q34", "q35", "q36", "q37",
    "q38", "q39a", "q39b", "q40", "q41", "q42", "q43", "q44", "q45", "q46", "q47",
    "q48", "q49", "q50", "q51", "q52", "q53", "q54", "q55", "q56", "q57", "q58",
    "q59", "q60", "q61", "q62", "q63", "q64", "q65", "q66", "q67", "q68", "q69",
    "q70", "q71", "q72", "q73", "q74", "q75", "q76", "q77", "q78", "q79",
    "q80", "q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89",
    "q90", "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99",
    "ss_max"
  )

  val tpcds2_4Queries = queryNames.map { queryName =>
    val queryContent: String = IOUtils.toString(
      getClass().getClassLoader().getResourceAsStream(s"tpcds_2_4/$queryName.sql"))
    Query(queryName + "-v2.4", queryContent, description = "TPCDS 2.4 Query",
      executionMode = CollectResults)
  }

  val tpcds2_4QueriesMap = tpcds2_4Queries.map(q => q.name.split("-").get(0) -> q).toMap
}

Source File: NettyBlockTransferSecuritySuite.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.network.netty

import java.nio._
import java.util.concurrent.TimeUnit

import scala.concurrent.duration._
import scala.concurrent.{Await, Promise}
import scala.util.{Failure, Success, Try}

import org.apache.commons.io.IOUtils
import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer}
import org.apache.spark.network.shuffle.BlockFetchingListener
import org.apache.spark.network.{BlockDataManager, BlockTransferService}
import org.apache.spark.storage.{BlockId, ShuffleBlockId}
import org.apache.spark.{SecurityManager, SparkConf}
import org.mockito.Mockito._
import org.scalatest.mock.MockitoSugar
import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite, ShouldMatchers}

class NettyBlockTransferSecuritySuite extends FunSuite with MockitoSugar with ShouldMatchers {
  test("security default off") {
    val conf = new SparkConf()
      .set("spark.app.id", "app-id")
    testConnection(conf, conf) match {
      case Success(_) => // expected
      case Failure(t) => fail(t)
    }
  }

  test("security on same password") {
    val conf = new SparkConf()
      .set("spark.authenticate", "true")
      .set("spark.authenticate.secret", "good")
      .set("spark.app.id", "app-id")
    testConnection(conf, conf) match {
      case Success(_) => // expected
      case Failure(t) => fail(t)
    }
  }

  test("security on mismatch password") {
    val conf0 = new SparkConf()
      .set("spark.authenticate", "true")
      .set("spark.authenticate.secret", "good")
      .set("spark.app.id", "app-id")
    val conf1 = conf0.clone.set("spark.authenticate.secret", "bad")
    testConnection(conf0, conf1) match {
      case Success(_) => fail("Should have failed")
      case Failure(t) => t.getMessage should include ("Mismatched response")
    }
  }

  test("security mismatch auth off on server") {
    val conf0 = new SparkConf()
      .set("spark.authenticate", "true")
      .set("spark.authenticate.secret", "good")
      .set("spark.app.id", "app-id")
    val conf1 = conf0.clone.set("spark.authenticate", "false")
    testConnection(conf0, conf1) match {
      case Success(_) => fail("Should have failed")
      case Failure(t) => // any funny error may occur, sever will interpret SASL token as RPC
    }
  }

  test("security mismatch auth off on client") {
    val conf0 = new SparkConf()
      .set("spark.authenticate", "false")
      .set("spark.authenticate.secret", "good")
      .set("spark.app.id", "app-id")
    val conf1 = conf0.clone.set("spark.authenticate", "true")
    testConnection(conf0, conf1) match {
      case Success(_) => fail("Should have failed")
      case Failure(t) => t.getMessage should include ("Expected SaslMessage")
    }
  }

  
  private def fetchBlock(
      self: BlockTransferService,
      from: BlockTransferService,
      execId: String,
      blockId: BlockId): Try[ManagedBuffer] = {

    val promise = Promise[ManagedBuffer]()

    self.fetchBlocks(from.hostName, from.port, execId, Array(blockId.toString),
      new BlockFetchingListener {
        override def onBlockFetchFailure(blockId: String, exception: Throwable): Unit = {
          promise.failure(exception)
        }

        override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = {
          promise.success(data.retain())
        }
      })

    Await.ready(promise.future, FiniteDuration(1000, TimeUnit.MILLISECONDS))
    promise.future.value.get
  }
}

Source File: StreamMetadata.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.streaming

import java.io.{InputStreamReader, OutputStreamWriter}
import java.nio.charset.StandardCharsets

import scala.util.control.NonFatal

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, FSDataOutputStream, Path}
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization

import org.apache.spark.internal.Logging
import org.apache.spark.sql.streaming.StreamingQuery


  def write(
      metadata: StreamMetadata,
      metadataFile: Path,
      hadoopConf: Configuration): Unit = {
    var output: FSDataOutputStream = null
    try {
      val fs = FileSystem.get(hadoopConf)
      output = fs.create(metadataFile)
      val writer = new OutputStreamWriter(output)
      Serialization.write(metadata, writer)
      writer.close()
    } catch {
      case NonFatal(e) =>
        logError(s"Error writing stream metadata $metadata to $metadataFile", e)
        throw e
    } finally {
      IOUtils.closeQuietly(output)
    }
  }
}

Source File: AppMasterResolver.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.experiments.yarn.client

import java.io.IOException
import java.net.{HttpURLConnection, URL}
import java.nio.charset.StandardCharsets
import akka.actor.{ActorRef, ActorSystem}
import org.apache.commons.io.IOUtils
import org.apache.gearpump.experiments.yarn.glue.Records.{ApplicationId, ApplicationReport}
import org.apache.gearpump.experiments.yarn.glue.YarnClient
import org.apache.gearpump.util.{AkkaHelper, LogUtil}
import org.apache.hadoop.hdfs.web.URLConnectionFactory
import org.apache.hadoop.yarn.conf.YarnConfiguration
import scala.util.Try


class AppMasterResolver(yarnClient: YarnClient, system: ActorSystem) {
  val LOG = LogUtil.getLogger(getClass)
  val RETRY_INTERVAL_MS = 3000 // ms

  def resolve(appId: ApplicationId, timeoutSeconds: Int = 30): ActorRef = {
    val appMaster = retry(connect(appId), 1 + timeoutSeconds * 1000 / RETRY_INTERVAL_MS)
    appMaster
  }

  private def connect(appId: ApplicationId): ActorRef = {
    val report = yarnClient.getApplicationReport(appId)

    AppMasterResolver.resolveAppMasterAddress(report, system)
  }

  private def retry(fun: => ActorRef, times: Int): ActorRef = {
    var index = 0
    var result: ActorRef = null
    while (index < times && result == null) {
      Thread.sleep(RETRY_INTERVAL_MS)
      index += 1
      val tryConnect = Try(fun)
      if (tryConnect.isFailure) {
        LOG.error(s"Failed to connect YarnAppMaster(tried $index)... " +
          tryConnect.failed.get.getMessage)
      } else {
        result = tryConnect.get
      }
    }
    result
  }
}

object AppMasterResolver {
  val LOG = LogUtil.getLogger(getClass)

  def resolveAppMasterAddress(report: ApplicationReport, system: ActorSystem): ActorRef = {
    val appMasterPath = s"${report.getTrackingURL}/supervisor-actor-path"
    LOG.info(s"appMasterPath=$appMasterPath")

    val connectionFactory: URLConnectionFactory = URLConnectionFactory
      .newDefaultURLConnectionFactory(new YarnConfiguration())
    val url: URL = new URL(appMasterPath)
    val connection: HttpURLConnection = connectionFactory.openConnection(url)
      .asInstanceOf[HttpURLConnection]
    connection.setInstanceFollowRedirects(true)

    try {
      connection.connect()
    } catch {
      case e: IOException =>
        LOG.error(s"Failed to connect to AppMaster" + e.getMessage)
    }

    val status = connection.getResponseCode
    if (status == 200) {
      val stream: java.io.InputStream = connection.getInputStream
      val response = IOUtils.toString(stream, StandardCharsets.UTF_8)
      LOG.info("Successfully resolved AppMaster address: " + response)
      connection.disconnect()
      AkkaHelper.actorFor(system, response)
    } else {
      connection.disconnect()
      throw new IOException("Fail to resolve AppMaster address, please make sure " +
        s"${report.getTrackingURL} is accessible...")
    }
  }
}

Source File: FileMerger.scala From hyperion with Apache License 2.0

5 votes

package com.krux.hyperion.contrib.activity.file

import java.io._
import java.util.zip.{ GZIPInputStream, GZIPOutputStream }
import org.apache.commons.compress.compressors.bzip2.{ BZip2CompressorInputStream, BZip2CompressorOutputStream }

import org.apache.commons.io.IOUtils


case class FileMerger(destination: File, skipFirstLine: Boolean = false, headers: Option[String] = None) {
  def merge(sources: File*): File = {
    val output: OutputStream = new BufferedOutputStream({
      val s = new FileOutputStream(destination, true)
      if (destination.getName.endsWith(".gz")) new GZIPOutputStream(s)
      else if(destination.getName.endsWith(".bz2")) new BZip2CompressorOutputStream(s)
      else s
    })

    try {
      sources.foldLeft(headers -> output)(appendFile)
      destination
    } finally {
      try {
        output.close()
      } catch {
        case e: Exception => e.printStackTrace()
      }
    }
  }

  private def doSkipFirstLine(input: InputStream): InputStream = {
    while (skipFirstLine && (input.read() match {
      case -1 | '\n' => false
      case _ => true
    })) {}

    input
  }

  private def appendFile(state: (Option[String], OutputStream), source: File): (Option[String], OutputStream) = {
    val (headers, output) = state

    if (source.getName == "-") {
      print("Merging stdin...")
      headers.map(_.getBytes).foreach(output.write)
      IOUtils.copy(doSkipFirstLine(System.in), output)
      println("done")

      None -> output
    } else if (source.length() > 0) {
      print(s"Merging ${source.getAbsolutePath}...")

      val input = new BufferedInputStream({
        val s = new FileInputStream(source)
        if (source.getName.endsWith(".gz")) new GZIPInputStream(s)
        else if(source.getName.endsWith(".bz2")) new BZip2CompressorInputStream(s)
        else s
      })

      try {
        input.mark(2)

        if (input.read() != -1) {
          input.reset()

          headers.map(_.getBytes).foreach(output.write)

          IOUtils.copy(doSkipFirstLine(input), output)
        }
      } finally {
        try {
          input.close()
        } catch {
          case e: Exception => e.printStackTrace()
        } finally {
          println("done")
        }
      }

      None -> output
    } else {
      headers -> output
    }
  }
}

Source File: TikaHadoopOrcParser.scala From project-matt with MIT License

5 votes

package org.datafy.aws.app.matt.extras

import java.io.{File, FileOutputStream, IOException, InputStream}
import java.util

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration

import scala.collection.JavaConverters._
import org.apache.hadoop.fs.Path
import org.apache.hadoop.hive.serde2.objectinspector.StructField
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector
import org.apache.orc.OrcFile
import org.apache.orc.OrcFile.ReaderOptions
import org.apache.orc.Reader
import org.apache.orc.RecordReader
import org.apache.tika.exception.TikaException
import org.apache.tika.metadata.Metadata
import org.apache.tika.mime.MediaType
import org.apache.tika.parser.{AbstractParser, ParseContext}
import org.xml.sax.{ContentHandler, SAXException}

import scala.util.Random


class TikaHadoopOrcParser extends AbstractParser  {
  final val ORC_RAW = MediaType.application("x-orc")

  private val SUPPORTED_TYPES: Set[MediaType] = Set(ORC_RAW)

  def getSupportedTypes(context: ParseContext): util.Set[MediaType] = {
    SUPPORTED_TYPES.asJava
  }

  @throws(classOf[IOException])
  @throws(classOf[SAXException])
  @throws(classOf[TikaException])
  def parse(stream: InputStream, handler: ContentHandler,
            metadata: Metadata, context: ParseContext): Unit = {
    // create temp file from stream
    try {
      val fileNamePrefix = Random.alphanumeric.take(5).mkString
      val tempFile = File.createTempFile(s"orc-${fileNamePrefix}", ".orc")
      IOUtils.copy(stream, new FileOutputStream(tempFile))

      val path = new Path(tempFile.getAbsolutePath)
      val conf = new Configuration()
      val orcReader = OrcFile.createReader(path, new ReaderOptions(conf))
      val records: RecordReader = orcReader.rows()

      val storeRecord = null
      val firstBlockKey = null

    } catch {
      case e: Throwable => e.printStackTrace()
    }



//    val fields =

  }
}

Source File: TikaParquetParser.scala From project-matt with MIT License

5 votes

package org.datafy.aws.app.matt.extras

import java.io.{File, FileOutputStream, IOException, InputStream}
import java.util

import scala.collection.JavaConverters._
import org.xml.sax.{ContentHandler, SAXException}
import org.apache.tika.metadata.Metadata
import org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE
import org.apache.tika.mime.MediaType
import org.apache.tika.parser.{AbstractParser, ParseContext}
import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.parquet.hadoop.ParquetFileReader
import org.apache.parquet.hadoop.ParquetReader
import org.apache.parquet.format.converter.ParquetMetadataConverter
import org.apache.parquet.hadoop.util.HadoopInputFile
import org.apache.parquet.tools.json.JsonRecordFormatter
import org.apache.parquet.tools.read.{SimpleReadSupport, SimpleRecord}
import org.apache.tika.exception.TikaException
import org.apache.tika.sax.XHTMLContentHandler

import scala.util.Random


class TikaParquetParser extends AbstractParser {
  // make some stuff here
  final val PARQUET_RAW = MediaType.application("x-parquet")

  private val SUPPORTED_TYPES: Set[MediaType] = Set(PARQUET_RAW)

  def getSupportedTypes(context: ParseContext): util.Set[MediaType] = {
    SUPPORTED_TYPES.asJava
  }

  @throws(classOf[IOException])
  @throws(classOf[SAXException])
  @throws(classOf[TikaException])
  def parse(stream: InputStream, handler: ContentHandler,
            metadata: Metadata, context: ParseContext): Unit = {
    // create temp file from stream
    val fileNamePrefix = Random.alphanumeric.take(5).mkString
    val tempFile = File.createTempFile(s"parquet-${fileNamePrefix}", ".parquet")
    IOUtils.copy(stream, new FileOutputStream(tempFile))

    val conf = new Configuration()
    val path = new Path(tempFile.getAbsolutePath)
    val parquetMetadata = ParquetFileReader.readFooter(conf, path, ParquetMetadataConverter.NO_FILTER)
    var defaultReader: ParquetReader[SimpleRecord] = null

    val columns = parquetMetadata.getFileMetaData.getSchema.getFields
    metadata.set(CONTENT_TYPE, PARQUET_RAW.toString)
    metadata.set("Total Number of Columns", columns.size.toString)
    metadata.set("Parquet Column Names", columns.toString)

    val xhtml = new XHTMLContentHandler(handler, metadata)
    xhtml.startDocument()
    xhtml.startElement("p")

    // ::TODO:: ensure parquet reader reads all files not only file row
    try {
      defaultReader = ParquetReader.builder(new SimpleReadSupport(), new Path(tempFile.getAbsolutePath)).build()
      if(defaultReader.read() != null) {
        val values: SimpleRecord = defaultReader.read()
        val jsonFormatter = JsonRecordFormatter.fromSchema(parquetMetadata.getFileMetaData.getSchema)

        val textContent: String = jsonFormatter.formatRecord(values)
        xhtml.characters(textContent)
        xhtml.endElement("p")
        xhtml.endDocument()
      }

    } catch {
        case e: Throwable => e.printStackTrace()
          if (defaultReader != null) {
          try {
            defaultReader.close()
          } catch{
            case _: Throwable =>
          }
        }
    } finally {
      if (tempFile != null) tempFile.delete()
    }
  }

}

Source File: WholeFileReader.scala From magellan with Apache License 2.0

5 votes

package magellan.mapreduce

import java.io.InputStream

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FSDataInputStream, FileSystem, Path}
import org.apache.hadoop.io.compress.{CodecPool, CompressionCodecFactory, Decompressor}
import org.apache.hadoop.io.{NullWritable, Text}
import org.apache.hadoop.mapreduce.lib.input.FileSplit
import org.apache.hadoop.mapreduce.{InputSplit, RecordReader, TaskAttemptContext}

class WholeFileReader extends RecordReader[NullWritable, Text] {

  private val key = NullWritable.get()
  private val value = new Text()
  private var split: FileSplit = _
  private var conf: Configuration = _
  private var path: Path = _
  private var done: Boolean = false

  override def getProgress: Float = ???

  override def nextKeyValue(): Boolean = {
    if (done){
      false
    } else {
      val fs = path.getFileSystem(conf)
      var is: FSDataInputStream = null
      var in: InputStream = null
      var decompressor: Decompressor = null
      try {
        is = fs.open(split.getPath)
        val codec = new CompressionCodecFactory(conf).getCodec(path)
        if (codec != null) {
          decompressor = CodecPool.getDecompressor(codec)
          in = codec.createInputStream(is, decompressor)
        } else {
          in = is
        }
        val result = IOUtils.toByteArray(in)
        value.clear()
        value.set(result)
        done = true
        true
      } finally {
        if (in != null) {
          IOUtils.closeQuietly(in)
        }
        if (decompressor != null) {
          CodecPool.returnDecompressor(decompressor)
        }
      }
    }
  }

  override def getCurrentValue: Text = value

  override def initialize(inputSplit: InputSplit,
    taskAttemptContext: TaskAttemptContext): Unit = {
    this.split = inputSplit.asInstanceOf[FileSplit]
    this.conf = MapReduceUtils.getConfigurationFromContext(taskAttemptContext)
    this.path = this.split.getPath
  }

  override def getCurrentKey: NullWritable = key

  override def close() {}
}

Source File: FlywayMigrationsSpec.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.store

import java.math.BigInteger
import java.nio.charset.Charset
import java.security.MessageDigest

import com.daml.platform.store.FlywayMigrationsSpec._
import org.apache.commons.io.IOUtils
import org.flywaydb.core.api.configuration.FluentConfiguration
import org.flywaydb.core.api.migration.JavaMigration
import org.flywaydb.core.internal.resource.LoadableResource
import org.flywaydb.core.internal.scanner.{LocationScannerCache, ResourceNameCache, Scanner}
import org.scalatest.Matchers._
import org.scalatest.WordSpec

import scala.collection.JavaConverters._

// SQL MIGRATION AND THEIR DIGEST FILES SHOULD BE CREATED ONLY ONCE AND NEVER CHANGED AGAIN,
// OTHERWISE MIGRATIONS BREAK ON EXISTING DEPLOYMENTS!
class FlywayMigrationsSpec extends WordSpec {

  "Postgres flyway migration files" should {
    "always have a valid SHA-256 digest file accompanied" in {
      assertFlywayMigrationFileHashes(DbType.Postgres)
    }
  }

  "H2 database flyway migration files" should {
    "always have a valid SHA-256 digest file accompanied" in {
      assertFlywayMigrationFileHashes(DbType.H2Database)
    }
  }

}

object FlywayMigrationsSpec {

  private val digester = MessageDigest.getInstance("SHA-256")

  private def assertFlywayMigrationFileHashes(dbType: DbType): Unit = {
    val config = FlywayMigrations.configurationBase(dbType)
    val resourceScanner = scanner(config)
    val resources = resourceScanner.getResources("", ".sql").asScala.toSeq
    resources.size should be > 10

    resources.foreach { res =>
      val fileName = res.getFilename
      val expectedDigest =
        getExpectedDigest(fileName, fileName.dropRight(4) + ".sha256", resourceScanner)
      val currentDigest = getCurrentDigest(res, config.getEncoding)

      assert(
        currentDigest == expectedDigest,
        s"Digest of migration file $fileName has changed! It is NOT allowed to change neither existing sql migrations files nor their digests!"
      )
    }
  }

  private def scanner(config: FluentConfiguration) =
    new Scanner(
      classOf[JavaMigration],
      config.getLocations.toList.asJava,
      getClass.getClassLoader,
      config.getEncoding,
      new ResourceNameCache,
      new LocationScannerCache,
    )

  private def getExpectedDigest(
      sourceFile: String,
      digestFile: String,
      resourceScanner: Scanner[_],
  ) =
    IOUtils.toString(
      Option(resourceScanner.getResource(digestFile))
        .getOrElse(sys.error(s"""Missing sha-256 file $digestFile!
           |Are you introducing a new Flyway migration step?
           |You need to create a sha-256 digest file by either running:
           | - shasum -a 256 $sourceFile | awk '{print $$1}' > $digestFile (under the db/migration folder)
           | - or ledger/sandbox/src/main/resources/db/migration/recompute-sha256sums.sh
           |""".stripMargin))
        .read())

  private def getCurrentDigest(res: LoadableResource, encoding: Charset) = {
    val digest = digester.digest(IOUtils.toByteArray(res.read(), encoding))
    String.format(s"%0${digest.length * 2}x\n", new BigInteger(1, digest))
  }
}

Source File: FrameReader.scala From mleap with Apache License 2.0

5 votes

package ml.combust.mleap.runtime.serialization

import java.io._
import java.nio.charset.Charset

import ml.combust.mleap.ClassLoaderUtil
import ml.combust.mleap.runtime.frame.DefaultLeapFrame
import org.apache.commons.io.IOUtils
import resource._

import scala.util.Try


object FrameReader {
  def apply(format: String = BuiltinFormats.json,
            clOption: Option[ClassLoader] = None): FrameReader = {
    val cl = clOption.getOrElse(ClassLoaderUtil.findClassLoader(classOf[FrameReader].getCanonicalName))
    cl.loadClass(s"$format.DefaultFrameReader").
      newInstance().
      asInstanceOf[FrameReader]
  }
}

trait FrameReader {
  def fromBytes(bytes: Array[Byte], charset: Charset = BuiltinFormats.charset): Try[DefaultLeapFrame]

  def read(file: File): Try[DefaultLeapFrame] = read(file, BuiltinFormats.charset)
  def read(file: File, charset: Charset): Try[DefaultLeapFrame] = {
    (for(in <- managed(new FileInputStream(file))) yield {
      read(in, charset)
    }).tried.flatMap(identity)
  }

  def read(in: InputStream): Try[DefaultLeapFrame] = read(in, BuiltinFormats.charset)
  def read(in: InputStream, charset: Charset): Try[DefaultLeapFrame] = {
    Try(IOUtils.toByteArray(in)).flatMap(bytes => fromBytes(bytes, charset))
  }

  }

Source File: InstallRouteMgmt.scala From openwhisk with Apache License 2.0

5 votes

package org.apache.openwhisk.standalone

import java.io.File

import akka.http.scaladsl.model.Uri
import org.apache.commons.io.{FileUtils, IOUtils}
import org.apache.openwhisk.common.TransactionId.systemPrefix
import org.apache.openwhisk.common.{Logging, TransactionId}

import scala.sys.process.ProcessLogger
import scala.util.Try
import scala.sys.process._

case class InstallRouteMgmt(workDir: File,
                            authKey: String,
                            apiHost: Uri,
                            namespace: String,
                            gatewayUrl: Uri,
                            wsk: String)(implicit log: Logging) {
  case class Action(name: String, desc: String)
  private val noopLogger = ProcessLogger(_ => ())
  private implicit val tid: TransactionId = TransactionId(systemPrefix + "apiMgmt")
  val actionNames = Array(
    Action("createApi", "Create an API"),
    Action("deleteApi", "Delete the API"),
    Action("getApi", "Retrieve the specified API configuration (in JSON format)"))

  def run(): Unit = {
    require(wskExists, s"wsk command not found at $wsk. Route management actions cannot be installed")
    log.info(this, packageUpdateCmd.!!.trim)
    //TODO Optimize to ignore this if package already installed
    actionNames.foreach { action =>
      val name = action.name
      val actionZip = new File(workDir, s"$name.zip")
      FileUtils.copyURLToFile(IOUtils.resourceToURL(s"/$name.zip"), actionZip)
      val cmd = createActionUpdateCmd(action, name, actionZip)
      val result = cmd.!!.trim
      log.info(this, s"Installed $name - $result")
      FileUtils.deleteQuietly(actionZip)
    }
    //This log message is used by tests to confirm that actions are installed
    log.info(this, "Installed Route Management Actions")
  }

  private def createActionUpdateCmd(action: Action, name: String, actionZip: File) = {
    Seq(
      wsk,
      "--apihost",
      apiHost.toString(),
      "--auth",
      authKey,
      "action",
      "update",
      s"$namespace/apimgmt/$name",
      actionZip.getAbsolutePath,
      "-a",
      "description",
      action.desc,
      "--kind",
      "nodejs:default",
      "-a",
      "web-export",
      "true",
      "-a",
      "final",
      "true")
  }

  private def packageUpdateCmd = {
    Seq(
      wsk,
      "--apihost",
      apiHost.toString(),
      "--auth",
      authKey,
      "package",
      "update",
      s"$namespace/apimgmt",
      "--shared",
      "no",
      "-a",
      "description",
      "This package manages the gateway API configuration.",
      "-p",
      "gwUrlV2",
      gatewayUrl.toString())
  }

  def wskExists: Boolean = Try(s"$wsk property get --cliversion".!(noopLogger)).getOrElse(-1) == 0
}

Source File: UtilsTest.scala From spark-http-stream with BSD 2-Clause "Simplified" License

5 votes

import java.sql.Date

import org.apache.spark.SparkConf
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.sql.SparkSession
import org.junit.Assert
import org.junit.Test
import java.io.ByteArrayOutputStream
import java.io.InputStream
import org.apache.commons.io.IOUtils
import com.esotericsoftware.kryo.io.Input
import org.apache.spark.sql.execution.streaming.http.KryoSerializerUtils

class UtilsTest {
	@Test
	def testKryoSerDe() {
		val d1 = new Date(30000);
		val bytes = KryoSerializerUtils.serialize(d1);
		val d2 = KryoSerializerUtils.deserialize(bytes);
		Assert.assertEquals(d1, d2);

		val d3 = Map('x' -> Array("aaa", "bbb"), 'y' -> Array("ccc", "ddd"));
		println(d3);
		val bytes2 = KryoSerializerUtils.serialize(d3);
		val d4 = KryoSerializerUtils.deserialize(bytes2).asInstanceOf[Map[String, Any]];
		println(d4);
	}

	@Test
	def testEncoderSchema() {
		val spark = SparkSession.builder.master("local[4]")
			.getOrCreate();
		val sqlContext = spark.sqlContext;
		import sqlContext.implicits._
		import org.apache.spark.sql.catalyst.encoders.encoderFor
		val schema1 = encoderFor[String].schema;
		val schema2 = encoderFor[(String)].schema;
		val schema3 = encoderFor[((String))].schema;

		Assert.assertEquals(schema1, schema2);
		Assert.assertEquals(schema1, schema3);
	}

	@Test
	def testDateInTuple() {
		val spark = SparkSession.builder.master("local[4]")
			.getOrCreate();
		val sqlContext = spark.sqlContext;
		import sqlContext.implicits._

		val d1 = new Date(30000);
		val ds = sqlContext.createDataset(Seq[(Int, Date)]((1, d1)));
		val d2 = ds.collect()(0)._2;

		//NOTE: d1!=d2, maybe a bug
		println(d1.equals(d2));
	}
}

Source File: utils.scala From spark-http-stream with BSD 2-Clause "Simplified" License

5 votes

package org.apache.spark.sql.execution.streaming.http

import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.TimestampType
import org.apache.spark.SparkConf
import org.apache.commons.io.IOUtils
import org.apache.spark.serializer.KryoSerializer
import java.io.InputStream
import com.esotericsoftware.kryo.io.Input
import java.io.ByteArrayOutputStream

class WrongArgumentException(name: String, value: Any)
		extends RuntimeException(s"wrong argument: $name=$value") {
}

class MissingRequiredArgumentException(map: Map[String, String], paramName: String)
		extends RuntimeException(s"missing required argument: $paramName, all parameters=$map") {
}

class InvalidSerializerNameException(serializerName: String)
		extends RuntimeException(s"invalid serializer name: $serializerName") {
}

object SchemaUtils {
	def buildSchema(schema: StructType, includesTimestamp: Boolean, timestampColumnName: String = "_TIMESTAMP_"): StructType = {
		if (!includesTimestamp)
			schema;
		else
			StructType(schema.fields.toSeq :+ StructField(timestampColumnName, TimestampType, false));
	}
}

object Params {
	
	def deserialize(bytes: Array[Byte]): Any = {
		val kryo = kryoSerializer.newKryo();
		val input = new Input();
		input.setBuffer(bytes);
		kryo.readClassAndObject(input);
	}
}

Source File: CortexTypeGeneratorTest.scala From rug with GNU General Public License v3.0

5 votes

package com.atomist.rug.ts

import com.atomist.rug.spi.EnumParameterOrReturnType
import com.atomist.source.ArtifactSource
import com.atomist.util.Utils
import org.apache.commons.io.IOUtils
import org.scalatest.{FlatSpec, Matchers}

class CortexTypeGeneratorTest extends FlatSpec with Matchers {

  import CortexTypeGenerator._
  import DefaultTypeGeneratorConfig.CortexJson

  private val typeGen = new CortexTypeGenerator(DefaultCortexDir, DefaultCortexStubDir)

  "Type generation" should "find some types" in {
    val types = typeGen.extract(CortexJson)
    assert(types.nonEmpty)
  }

  it should "handle enums" in {
    val enumJson =
      Utils.withCloseable(getClass.getResourceAsStream("/com/atomist/rug/ts/enum_test.json"))(IOUtils.toString(_, "UTF-8"))
    val types = typeGen.extract(enumJson)
    assert(types.size === 1)
    val t = types.head
    t.allOperations.last.returnType shouldBe a[EnumParameterOrReturnType]
  }

  it should "return types with operations" in {
    val types = typeGen.extract(CortexJson)
    types.foreach(t => {
      assert(t.operations.nonEmpty, s"Type ${t.name} should have operations")
    })
  }

  import com.atomist.rug.TestUtils._

  it should "generate compiling node module" in {
    val extendedModel = typeGen.toNodeModule(CortexJson)
      .withPathAbove(".atomist/rug")
    //println(ArtifactSourceUtils.prettyListFiles(as))
    //        extendedModel.allFiles.filter(_.name.endsWith(".ts")).foreach(f =>
    //          println(s"${f.path}\n${f.content}\n\n"))

    //println(ArtifactSourceUtils.prettyListFiles(cas))

    val buildFile = extendedModel.allFiles.find(f => f.name.endsWith("Build.ts")).get
    assert(buildFile.content.contains("pullRequestNumber"),
      s"Unexpected Build file content\n${buildFile.content}")

    failOnFindingPattern(extendedModel, "Badly formatted exports",
      """export \{ [A-Za-z0-9]+ \};\n\nexport""".r, _.path.endsWith(".ts"))

    assert(extendedModel.allFiles.exists(_.name.endsWith("ChatChannel.ts")))
    assert(extendedModel.allFiles.exists(_.content.contains("Repo[]")))
    assert(extendedModel.allFiles.exists(_.content.contains("Issue[]")), "Must have back relationship from Repo to Issue")

    val buildStubFile = extendedModel.allFiles.find(f => f.content.contains("class Build ")).get
    assert(buildStubFile.content.contains("""[ "Build", "-dynamic""""), "We should have correct node tags")

    val cas = TypeScriptBuilder.compiler.compile(extendedModel + TypeScriptBuilder.compileUserModel(Seq(
      TypeScriptBuilder.coreSource,
      extendedModel
    )))
    assert(cas.allFiles.exists(_.name.endsWith("ChatChannel.js")), "Should have compiled")

  }
}

object CortexTypeGeneratorTest {

  private val typeGen = new CortexTypeGenerator(CortexTypeGenerator.DefaultCortexDir, CortexTypeGenerator.DefaultCortexStubDir)

  val fullModel: ArtifactSource = {
    val as = typeGen.toNodeModule(DefaultTypeGeneratorConfig.CortexJson)
      .withPathAbove(".atomist/rug")
    TypeScriptBuilder.compiler.compile(as + TypeScriptBuilder.compileUserModel(Seq(
      TypeScriptBuilder.coreSource,
      as
    )))
  }
}

Source File: AntlrRawFileType.scala From rug with GNU General Public License v3.0

5 votes

package com.atomist.rug.kind.grammar

import java.nio.charset.StandardCharsets

import com.atomist.source.FileArtifact
import com.atomist.tree.content.text.PositionedTreeNode
import com.atomist.tree.content.text.grammar.antlr.{AntlrGrammar, AstNodeCreationStrategy}
import com.atomist.util.Utils.withCloseable
import org.apache.commons.io.IOUtils
import org.springframework.core.io.DefaultResourceLoader


abstract class AntlrRawFileType(
                                 topLevelProduction: String,
                                 nodeCreationStrategy: AstNodeCreationStrategy,
                                 grammars: String*
                               )
  extends TypeUnderFile {

  private val g4s: Seq[String] = {
    val cp = new DefaultResourceLoader()
    val resources = grammars.map(grammar => cp.getResource(grammar))
    resources.map(r => withCloseable(r.getInputStream)(is => IOUtils.toString(is, StandardCharsets.UTF_8)))
  }

  private[kind] def parser = antlrGrammar

  private lazy val antlrGrammar = new AntlrGrammar(topLevelProduction, nodeCreationStrategy, g4s: _*)

  override def fileToRawNode(f: FileArtifact): Option[PositionedTreeNode] = {
    antlrGrammar.parse(f.content)
  }
}

Source File: StreamMetadata.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.streaming

import java.io.{InputStreamReader, OutputStreamWriter}
import java.nio.charset.StandardCharsets
import java.util.ConcurrentModificationException

import scala.util.control.NonFatal

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileAlreadyExistsException, FSDataInputStream, Path}
import org.json4s.NoTypeHints
import org.json4s.jackson.Serialization

import org.apache.spark.internal.Logging
import org.apache.spark.sql.execution.streaming.CheckpointFileManager.CancellableFSDataOutputStream
import org.apache.spark.sql.streaming.StreamingQuery


  def write(
      metadata: StreamMetadata,
      metadataFile: Path,
      hadoopConf: Configuration): Unit = {
    var output: CancellableFSDataOutputStream = null
    try {
      val fileManager = CheckpointFileManager.create(metadataFile.getParent, hadoopConf)
      output = fileManager.createAtomic(metadataFile, overwriteIfPossible = false)
      val writer = new OutputStreamWriter(output)
      Serialization.write(metadata, writer)
      writer.close()
    } catch {
      case e: FileAlreadyExistsException =>
        if (output != null) {
          output.cancel()
        }
        throw new ConcurrentModificationException(
          s"Multiple streaming queries are concurrently using $metadataFile", e)
      case e: Throwable =>
        if (output != null) {
          output.cancel()
        }
        logError(s"Error writing stream metadata $metadata to $metadataFile", e)
        throw e
    }
  }
}

Source File: TestUtils.scala From keystone with Apache License 2.0

5 votes

package keystoneml.utils

import java.io.{FileReader, ByteArrayInputStream}
import breeze.linalg.DenseMatrix
import breeze.stats.distributions.{Gaussian, RandBasis, ThreadLocalRandomGenerator, Rand}
import edu.berkeley.cs.amplab.mlmatrix.RowPartitionedMatrix
import org.apache.commons.io.IOUtils
import org.apache.commons.math3.random.MersenneTwister
import org.apache.spark.SparkContext

import scala.io.Source
import scala.util.Random


  def genChannelMajorArrayVectorizedImage(x: Int, y: Int, z: Int): ChannelMajorArrayVectorizedImage = {
    ChannelMajorArrayVectorizedImage(genData(x, y, z), ImageMetadata(x,y,z))
  }

  def genRowColumnMajorByteArrayVectorizedImage(x: Int, y: Int, z: Int): RowColumnMajorByteArrayVectorizedImage = {
    RowColumnMajorByteArrayVectorizedImage(genData(x,y,z).map(_.toByte), ImageMetadata(x,y,z))
  }

  def createRandomMatrix(
      sc: SparkContext,
      numRows: Int,
      numCols: Int,
      numParts: Int,
      seed: Int = 42): RowPartitionedMatrix = {

    val rowsPerPart = numRows / numParts
    val matrixParts = sc.parallelize(1 to numParts, numParts).mapPartitionsWithIndex { (index, part) =>
      val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed+index)))
      Iterator(DenseMatrix.rand(rowsPerPart, numCols, Gaussian(0.0, 1.0)(randBasis)))
    }
    RowPartitionedMatrix.fromMatrix(matrixParts.cache())
  }

  def createLocalRandomMatrix(numRows: Int, numCols: Int, seed: Int = 42): DenseMatrix[Double] = {
    val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed)))
    DenseMatrix.rand(numRows, numCols, Gaussian(0.0, 1.0)(randBasis))
  }
}

Source File: RulesTxtDeploymentServiceSpec.scala From smui with Apache License 2.0

5 votes

package models

import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
import java.util.zip.ZipInputStream

import org.apache.commons.io.IOUtils
import org.scalatest.{FlatSpec, Matchers}

class RulesTxtDeploymentServiceSpec extends FlatSpec with Matchers with ApplicationTestBase {

  private lazy val service = injector.instanceOf[RulesTxtDeploymentService]
  private var inputIds: Seq[SearchInputId] = Seq.empty

  override protected def beforeAll(): Unit = {
    super.beforeAll()

    createTestCores()
    inputIds = createTestRule()
  }

  private def rulesFileContent(ruleIds: Seq[SearchInputId]): String = s"""aerosmith =>
                           |	SYNONYM: mercury
                           |	DOWN(10): battery
                           |	UP(10): notebook
                           |	FILTER: zz top
                           |	@{
                           |	  "_log" : "${ruleIds.head}"
                           |	}@
                           |
                           |mercury =>
                           |	SYNONYM: aerosmith
                           |	DOWN(10): battery
                           |	UP(10): notebook
                           |	FILTER: zz top
                           |	@{
                           |	  "_log" : "${ruleIds.head}"
                           |	}@
                           |
                           |shipping =>
                           |	DECORATE: REDIRECT http://xyz.com/shipping
                           |	@{
                           |	  "_log" : "${ruleIds.last}"
                           |	}@""".stripMargin

  "RulesTxtDeploymentService" should "generate rules files with correct file names" in {
    val rulesTxt = service.generateRulesTxtContentWithFilenames(core1Id, "LIVE", logDebug = false)
    rulesTxt.solrIndexId shouldBe core1Id
    rulesTxt.decompoundRules shouldBe empty
    rulesTxt.regularRules.content.trim shouldBe rulesFileContent(inputIds)

    rulesTxt.regularRules.sourceFileName shouldBe "/tmp/search-management-ui_rules-txt.tmp"
    rulesTxt.regularRules.destinationFileName shouldBe "/usr/bin/solr/liveCore/conf/rules.txt"
  }

  it should "validate the rules files correctly" in {
    val rulesTxt = service.generateRulesTxtContentWithFilenames(core1Id, "LIVE", logDebug = false)
    service.validateCompleteRulesTxts(rulesTxt, logDebug = false) shouldBe empty

    val badRulesTxt = rulesTxt.copy(regularRules = rulesTxt.regularRules.copy(content = "a very bad rules file"))
    service.validateCompleteRulesTxts(badRulesTxt, logDebug = false) shouldBe List("Line 1: Missing input for instruction")
  }

  it should "provide a zip file with all rules files" in {
    val out = new ByteArrayOutputStream()
    service.writeAllRulesTxtFilesAsZipFileToStream(out)

    val bytes = out.toByteArray
    val zipStream = new ZipInputStream(new ByteArrayInputStream(bytes))
    val firstEntry = zipStream.getNextEntry
    firstEntry.getName shouldBe "rules_core1.txt"
    IOUtils.toString(zipStream, "UTF-8").trim shouldBe rulesFileContent(inputIds)
    val secondEntry = zipStream.getNextEntry
    secondEntry.getName shouldBe "rules_core2.txt"
    IOUtils.toString(zipStream, "UTF-8").trim shouldBe ""
  }

}

org.apache.commons.io.IOUtils Scala Examples