java.io.IOException Scala Example

Source File: CommandUtils.scala From drizzle-spark with Apache License 2.0

7 votes

package org.apache.spark.deploy.worker

import java.io.{File, FileOutputStream, InputStream, IOException}

import scala.collection.JavaConverters._
import scala.collection.Map

import org.apache.spark.SecurityManager
import org.apache.spark.deploy.Command
import org.apache.spark.internal.Logging
import org.apache.spark.launcher.WorkerCommandBuilder
import org.apache.spark.util.Utils


  def redirectStream(in: InputStream, file: File) {
    val out = new FileOutputStream(file, true)
    // TODO: It would be nice to add a shutdown hook here that explains why the output is
    //       terminating. Otherwise if the worker dies the executor logs will silently stop.
    new Thread("redirect output to " + file) {
      override def run() {
        try {
          Utils.copyStream(in, out, true)
        } catch {
          case e: IOException =>
            logInfo("Redirection to " + file + " closed: " + e.getMessage)
        }
      }
    }.start()
  }
}

Source File: GrpcServerOwner.scala From daml with Apache License 2.0

6 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.platform.apiserver

import java.io.IOException
import java.net.{BindException, InetAddress, InetSocketAddress}
import java.util.concurrent.TimeUnit.SECONDS

import com.daml.metrics.Metrics
import com.daml.platform.apiserver.GrpcServerOwner._
import com.daml.ports.Port
import com.daml.resources.{Resource, ResourceOwner}
import com.google.protobuf.Message
import io.grpc.netty.NettyServerBuilder
import io.grpc._
import io.netty.channel.socket.nio.NioServerSocketChannel
import io.netty.handler.ssl.SslContext

import scala.concurrent.{ExecutionContext, Future}
import scala.util.control.NoStackTrace

final class GrpcServerOwner(
    address: Option[String],
    desiredPort: Port,
    maxInboundMessageSize: Int,
    sslContext: Option[SslContext] = None,
    interceptors: List[ServerInterceptor] = List.empty,
    metrics: Metrics,
    eventLoopGroups: ServerEventLoopGroups,
    services: Iterable[BindableService],
) extends ResourceOwner[Server] {
  override def acquire()(implicit executionContext: ExecutionContext): Resource[Server] = {
    val host = address.map(InetAddress.getByName).getOrElse(InetAddress.getLoopbackAddress)
    Resource(Future {
      val builder = NettyServerBuilder.forAddress(new InetSocketAddress(host, desiredPort.value))
      builder.sslContext(sslContext.orNull)
      builder.channelType(classOf[NioServerSocketChannel])
      builder.permitKeepAliveTime(10, SECONDS)
      builder.permitKeepAliveWithoutCalls(true)
      builder.directExecutor()
      builder.maxInboundMessageSize(maxInboundMessageSize)
      interceptors.foreach(builder.intercept)
      builder.intercept(new MetricsInterceptor(metrics))
      eventLoopGroups.populate(builder)
      services.foreach { service =>
        builder.addService(service)
        toLegacyService(service).foreach(builder.addService)
      }
      val server = builder.build()
      try {
        server.start()
      } catch {
        case e: IOException if e.getCause != null && e.getCause.isInstanceOf[BindException] =>
          throw new UnableToBind(desiredPort, e.getCause)
      }
      server
    })(server => Future(server.shutdown().awaitTermination()))
  }

  // This exposes the existing services under com.daml also under com.digitalasset.
  // This is necessary to allow applications built with an earlier version of the SDK
  // to still work.
  // The "proxy" services will not show up on the reflection service, because of the way it
  // processes service definitions via protobuf file descriptors.
  private def toLegacyService(service: BindableService): Option[ServerServiceDefinition] = {
    val `com.daml` = "com.daml"
    val `com.digitalasset` = "com.digitalasset"

    val damlDef = service.bindService()
    val damlDesc = damlDef.getServiceDescriptor
    // Only add "proxy" services if it actually contains com.daml in the service name.
    // There are other services registered like the reflection service, that doesn't need the special treatment.
    if (damlDesc.getName.contains(`com.daml`)) {
      val digitalassetName = damlDesc.getName.replace(`com.daml`, `com.digitalasset`)
      val digitalassetDef = ServerServiceDefinition.builder(digitalassetName)
      damlDef.getMethods.forEach { methodDef =>
        val damlMethodDesc = methodDef.getMethodDescriptor
        val digitalassetMethodName =
          damlMethodDesc.getFullMethodName.replace(`com.daml`, `com.digitalasset`)
        val digitalassetMethodDesc =
          damlMethodDesc.toBuilder.setFullMethodName(digitalassetMethodName).build()
        val _ = digitalassetDef.addMethod(
          digitalassetMethodDesc.asInstanceOf[MethodDescriptor[Message, Message]],
          methodDef.getServerCallHandler.asInstanceOf[ServerCallHandler[Message, Message]]
        )
      }
      Option(digitalassetDef.build())
    } else None
  }
}

object GrpcServerOwner {

  final class UnableToBind(port: Port, cause: Throwable)
      extends RuntimeException(
        s"The API server was unable to bind to port $port. Terminate the process occupying the port, or choose a different one.",
        cause)
      with NoStackTrace

}

Source File: TrafficMonitorThread.scala From shadowsocksr-android with GNU General Public License v3.0

5 votes

package com.github.shadowsocks.utils

import java.io.{File, IOException}
import java.nio.{ByteBuffer, ByteOrder}
import java.util.concurrent.Executors

import android.content.Context
import android.net.{LocalServerSocket, LocalSocket, LocalSocketAddress}
import android.util.Log

class TrafficMonitorThread(context: Context) extends Thread {

  val TAG = "TrafficMonitorThread"
  lazy val PATH = context.getApplicationInfo.dataDir + "/stat_path"

  @volatile var serverSocket: LocalServerSocket = null
  @volatile var isRunning: Boolean = true

  def closeServerSocket() {
    if (serverSocket != null) {
      try {
        serverSocket.close()
      } catch {
        case _: Exception => // ignore
      }
      serverSocket = null
      }
  }

  def stopThread() {
    isRunning = false
    closeServerSocket()
  }

  override def run() {

    try {
      new File(PATH).delete()
    } catch {
      case _: Exception => // ignore
    }

    try {
      val localSocket = new LocalSocket
      localSocket.bind(new LocalSocketAddress(PATH, LocalSocketAddress.Namespace.FILESYSTEM))
      serverSocket = new LocalServerSocket(localSocket.getFileDescriptor)
    } catch {
      case e: IOException =>
        Log.e(TAG, "unable to bind", e)
        return
    }

    val pool = Executors.newFixedThreadPool(1)

    while (isRunning) {
      try {
        val socket = serverSocket.accept()

        pool.execute(() => {
          try {
            val input = socket.getInputStream
            val output = socket.getOutputStream

            val buffer = new Array[Byte](16)
            if (input.read(buffer) != 16) throw new IOException("Unexpected traffic stat length")
            val stat = ByteBuffer.wrap(buffer).order(ByteOrder.LITTLE_ENDIAN)
            TrafficMonitor.update(stat.getLong(0), stat.getLong(8))

            output.write(0)

            input.close()
            output.close()

          } catch {
            case e: Exception =>
              Log.e(TAG, "Error when recv traffic stat", e)
          }

          // close socket
          try {
            socket.close()
          } catch {
            case _: Exception => // ignore
          }

        })
      } catch {
        case e: IOException =>
          Log.e(TAG, "Error when accept socket", e)
          return
      }
    }
  }
}

Source File: GuardedProcess.scala From shadowsocksr-android with GNU General Public License v3.0

5 votes

package com.github.shadowsocks

import java.io.{IOException, InputStream, OutputStream}
import java.lang.System.currentTimeMillis
import java.util.concurrent.Semaphore

import android.util.Log

import scala.collection.JavaConversions._


class GuardedProcess(cmd: Seq[String]) extends Process {
  private val TAG = classOf[GuardedProcess].getSimpleName

  @volatile private var guardThread: Thread = _
  @volatile private var isDestroyed: Boolean = _
  @volatile private var process: Process = _
  @volatile private var isRestart = false

  def start(onRestartCallback: () => Unit = null): GuardedProcess = {
    val semaphore = new Semaphore(1)
    semaphore.acquire
    @volatile var ioException: IOException = null

    guardThread = new Thread(() => {
      try {
        var callback: () => Unit = null
        while (!isDestroyed) {
          Log.i(TAG, "start process: " + cmd)
          val startTime = currentTimeMillis

          process = new ProcessBuilder(cmd).redirectErrorStream(true).start

          if (callback == null) callback = onRestartCallback else callback()

          semaphore.release
          process.waitFor

          if (isRestart) {
            isRestart = false
          } else {
            if (currentTimeMillis - startTime < 1000) {
              Log.w(TAG, "process exit too fast, stop guard: " + cmd)
              isDestroyed = true
            }
          }
        }
      } catch {
        case ignored: InterruptedException =>
          Log.i(TAG, "thread interrupt, destroy process: " + cmd)
          process.destroy()
        case e: IOException => ioException = e
      } finally semaphore.release
    }, "GuardThread-" + cmd)

    guardThread.start()
    semaphore.acquire

    if (ioException != null) {
      throw ioException
    }

    this
  }

  def destroy() {
    isDestroyed = true
    guardThread.interrupt()
    process.destroy()
    try guardThread.join() catch {
      case ignored: InterruptedException =>
    }
  }

  def restart() {
    isRestart = true
    process.destroy()
  }

  def exitValue: Int = throw new UnsupportedOperationException
  def getErrorStream: InputStream = throw new UnsupportedOperationException
  def getInputStream: InputStream = throw new UnsupportedOperationException
  def getOutputStream: OutputStream = throw new UnsupportedOperationException

  @throws(classOf[InterruptedException])
  def waitFor = {
    guardThread.join()
    0
  }
}

Source File: UtilTest.scala From daml with Apache License 2.0

5 votes

// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package com.daml.codegen

import com.daml.lf.data.Ref.{QualifiedName, PackageId}

import java.io.IOException
import java.nio.file.attribute.BasicFileAttributes
import java.nio.file.{FileVisitResult, Files, Path, SimpleFileVisitor}
import com.daml.lf.{iface => I}

import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers}
import org.scalatest.prop.GeneratorDrivenPropertyChecks

class UtilTest extends UtilTestHelpers with GeneratorDrivenPropertyChecks {

  val packageInterface =
    I.Interface(packageId = PackageId.assertFromString("abcdef"), typeDecls = Map.empty)
  val scalaPackageParts = Array("com", "digitalasset")
  val scalaPackage: String = scalaPackageParts.mkString(".")
  val util =
    lf.LFUtil(
      scalaPackage,
      I.EnvironmentInterface fromReaderInterfaces packageInterface,
      outputDir.toFile)

  def damlScalaName(damlNameSpace: Array[String], name: String): util.DamlScalaName =
    util.DamlScalaName(damlNameSpace, name)

  behavior of "Util"

  it should "mkDamlScalaName for a Contract named Test" in {
    val result = util.mkDamlScalaNameFromDirsAndName(Array(), "Test")
    result shouldEqual damlScalaName(Array.empty, "Test")
    result.packageName shouldEqual scalaPackage
    result.qualifiedName shouldEqual (scalaPackage + ".Test")
  }

  it should "mkDamlScalaName for a Template names foo.bar.Test" in {
    val result = util.mkDamlScalaName(Util.Template, QualifiedName assertFromString "foo.bar:Test")
    result shouldEqual damlScalaName(Array("foo", "bar"), "Test")
    result.packageName shouldEqual (scalaPackage + ".foo.bar")
    result.qualifiedName shouldEqual (scalaPackage + ".foo.bar.Test")
  }

  "partitionEithers" should "equal scalaz separate in simple cases" in forAll {
    iis: List[Either[Int, Int]] =>
      import scalaz.syntax.monadPlus._, scalaz.std.list._, scalaz.std.either._
      Util.partitionEithers(iis) shouldBe iis.separate
  }

}

abstract class UtilTestHelpers extends FlatSpec with Matchers with BeforeAndAfterAll {

  val outputDir = Files.createTempDirectory("codegenUtilTest")

  override protected def afterAll(): Unit = {
    super.afterAll()
    deleteRecursively(outputDir)
  }

  def deleteRecursively(dir: Path): Unit = {
    Files.walkFileTree(
      dir,
      new SimpleFileVisitor[Path] {
        override def postVisitDirectory(dir: Path, exc: IOException) = {
          Files.delete(dir)
          FileVisitResult.CONTINUE
        }

        override def visitFile(file: Path, attrs: BasicFileAttributes) = {
          Files.delete(file)
          FileVisitResult.CONTINUE
        }
      }
    )
    ()
  }
}

Source File: TestOutputStream.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming

import java.io.{IOException, ObjectInputStream}
import java.util.concurrent.ConcurrentLinkedQueue

import scala.reflect.ClassTag

import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.dstream.{DStream, ForEachDStream}
import org.apache.spark.util.Utils


class TestOutputStream[T: ClassTag](parent: DStream[T],
    val output: ConcurrentLinkedQueue[Seq[T]] = new ConcurrentLinkedQueue[Seq[T]]())
  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
    val collected = rdd.collect()
    output.add(collected)
  }, false) {

  // This is to clear the output buffer every it is read from a checkpoint
  @throws(classOf[IOException])
  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
    ois.defaultReadObject()
    output.clear()
  }
}

Source File: SparkSQLCLIService.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.hive.thriftserver

import java.io.IOException
import java.util.{List => JList}
import javax.security.auth.login.LoginException

import scala.collection.JavaConverters._

import org.apache.commons.logging.Log
import org.apache.hadoop.hive.conf.HiveConf
import org.apache.hadoop.hive.shims.Utils
import org.apache.hadoop.security.UserGroupInformation
import org.apache.hive.service.{AbstractService, Service, ServiceException}
import org.apache.hive.service.Service.STATE
import org.apache.hive.service.auth.HiveAuthFactory
import org.apache.hive.service.cli._
import org.apache.hive.service.server.HiveServer2

import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._

private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLContext)
  extends CLIService(hiveServer)
  with ReflectedCompositeService {

  override def init(hiveConf: HiveConf) {
    setSuperField(this, "hiveConf", hiveConf)

    val sparkSqlSessionManager = new SparkSQLSessionManager(hiveServer, sqlContext)
    setSuperField(this, "sessionManager", sparkSqlSessionManager)
    addService(sparkSqlSessionManager)
    var sparkServiceUGI: UserGroupInformation = null

    if (UserGroupInformation.isSecurityEnabled) {
      try {
        HiveAuthFactory.loginFromKeytab(hiveConf)
        sparkServiceUGI = Utils.getUGI()
        setSuperField(this, "serviceUGI", sparkServiceUGI)
      } catch {
        case e @ (_: IOException | _: LoginException) =>
          throw new ServiceException("Unable to login to kerberos with given principal/keytab", e)
      }
    }

    initCompositeService(hiveConf)
  }

  override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = {
    getInfoType match {
      case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL")
      case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL")
      case GetInfoType.CLI_DBMS_VER => new GetInfoValue(sqlContext.sparkContext.version)
      case _ => super.getInfo(sessionHandle, getInfoType)
    }
  }
}

private[thriftserver] trait ReflectedCompositeService { this: AbstractService =>
  def initCompositeService(hiveConf: HiveConf) {
    // Emulating `CompositeService.init(hiveConf)`
    val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList")
    serviceList.asScala.foreach(_.init(hiveConf))

    // Emulating `AbstractService.init(hiveConf)`
    invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.NOTINITED)
    setAncestorField(this, 3, "hiveConf", hiveConf)
    invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.INITED)
    getAncestorField[Log](this, 3, "LOG").info(s"Service: $getName is inited.")
  }
}

Source File: ReusableStringReaderSuite.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions.xml

import java.io.IOException

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.expressions.xml.UDFXPathUtil.ReusableStringReader


class ReusableStringReaderSuite extends SparkFunSuite {

  private val fox = "Quick brown fox jumps over the lazy dog."

  test("empty reader") {
    val reader = new ReusableStringReader

    intercept[IOException] {
      reader.read()
    }

    intercept[IOException] {
      reader.ready()
    }

    reader.close()
  }

  test("mark reset") {
    val reader = new ReusableStringReader

    if (reader.markSupported()) {
      reader.asInstanceOf[ReusableStringReader].set(fox)
      assert(reader.ready())

      val cc = new Array[Char](6)
      var read = reader.read(cc)
      assert(read == 6)
      assert("Quick " == new String(cc))

      reader.mark(100)

      read = reader.read(cc)
      assert(read == 6)
      assert("brown " == new String(cc))

      reader.reset()
      read = reader.read(cc)
      assert(read == 6)
      assert("brown " == new String(cc))
    }
    reader.close()
  }

  test("skip") {
    val reader = new ReusableStringReader
    reader.asInstanceOf[ReusableStringReader].set(fox)

    // skip entire the data:
    var skipped = reader.skip(fox.length() + 1)
    assert(fox.length() == skipped)
    assert(-1 == reader.read())

    reader.asInstanceOf[ReusableStringReader].set(fox) // reset the data
    val cc = new Array[Char](6)
    var read = reader.read(cc)
    assert(read == 6)
    assert("Quick " == new String(cc))

    // skip some piece of data:
    skipped = reader.skip(30)
    assert(skipped == 30)
    read = reader.read(cc)
    assert(read == 4)
    assert("dog." == new String(cc, 0, read))

    // skip when already at EOF:
    skipped = reader.skip(300)
    assert(skipped == 0, skipped)
    assert(reader.read() == -1)

    reader.close()
  }
}

Source File: InsertIntoHadoopFsRelationCommand.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources

import java.io.IOException

import org.apache.hadoop.fs.Path

import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.execution.command.RunnableCommand

)) {
          throw new IOException(s"Unable to clear output " +
            s"directory $qualifiedOutputPath prior to writing to it")
        }
        true
      case (SaveMode.Append, _) | (SaveMode.Overwrite, _) | (SaveMode.ErrorIfExists, false) =>
        true
      case (SaveMode.Ignore, exists) =>
        !exists
      case (s, exists) =>
        throw new IllegalStateException(s"unsupported save mode $s ($exists)")
    }
    // If we are appending data to an existing dir.
    val isAppend = pathExists && (mode == SaveMode.Append)

    if (doInsertion) {
      WriteOutput.write(
        sparkSession,
        query,
        fileFormat,
        qualifiedOutputPath,
        hadoopConf,
        partitionColumns,
        bucketSpec,
        refreshFunction,
        options,
        isAppend)
    } else {
      logInfo("Skipping insertion into a relation that already exists.")
    }

    Seq.empty[Row]
  }
}

Source File: ProcessTestUtils.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.test

import java.io.{InputStream, IOException}

import scala.sys.process.BasicIO

object ProcessTestUtils {
  class ProcessOutputCapturer(stream: InputStream, capture: String => Unit) extends Thread {
    this.setDaemon(true)

    override def run(): Unit = {
      try {
        BasicIO.processFully(capture)(stream)
      } catch { case _: IOException =>
        // Ignores the IOException thrown when the process termination, which closes the input
        // stream abruptly.
      }
    }
  }
}

Source File: DStreamCheckpointData.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.dstream

import java.io.{IOException, ObjectInputStream, ObjectOutputStream}

import scala.collection.mutable.HashMap
import scala.reflect.ClassTag

import org.apache.hadoop.fs.{FileSystem, Path}

import org.apache.spark.internal.Logging
import org.apache.spark.streaming.Time
import org.apache.spark.util.Utils

private[streaming]
class DStreamCheckpointData[T: ClassTag](dstream: DStream[T])
  extends Serializable with Logging {
  protected val data = new HashMap[Time, AnyRef]()

  // Mapping of the batch time to the checkpointed RDD file of that time
  @transient private var timeToCheckpointFile = new HashMap[Time, String]
  // Mapping of the batch time to the time of the oldest checkpointed RDD
  // in that batch's checkpoint data
  @transient private var timeToOldestCheckpointFileTime = new HashMap[Time, Time]

  @transient private var fileSystem: FileSystem = null
  protected[streaming] def currentCheckpointFiles = data.asInstanceOf[HashMap[Time, String]]

  
  def restore() {
    // Create RDDs from the checkpoint data
    currentCheckpointFiles.foreach {
      case(time, file) =>
        logInfo("Restoring checkpointed RDD for time " + time + " from file '" + file + "'")
        dstream.generatedRDDs += ((time, dstream.context.sparkContext.checkpointFile[T](file)))
    }
  }

  override def toString: String = {
    "[\n" + currentCheckpointFiles.size + " checkpoint files \n" +
      currentCheckpointFiles.mkString("\n") + "\n]"
  }

  @throws(classOf[IOException])
  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
    logDebug(this.getClass().getSimpleName + ".writeObject used")
    if (dstream.context.graph != null) {
      dstream.context.graph.synchronized {
        if (dstream.context.graph.checkpointInProgress) {
          oos.defaultWriteObject()
        } else {
          val msg = "Object of " + this.getClass.getName + " is being serialized " +
            " possibly as a part of closure of an RDD operation. This is because " +
            " the DStream object is being referred to from within the closure. " +
            " Please rewrite the RDD operation inside this DStream to avoid this. " +
            " This has been enforced to avoid bloating of Spark tasks " +
            " with unnecessary objects."
          throw new java.io.NotSerializableException(msg)
        }
      }
    } else {
      throw new java.io.NotSerializableException(
        "Graph is unexpectedly null when DStream is being serialized.")
    }
  }

  @throws(classOf[IOException])
  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
    logDebug(this.getClass().getSimpleName + ".readObject used")
    ois.defaultReadObject()
    timeToOldestCheckpointFileTime = new HashMap[Time, Time]
    timeToCheckpointFile = new HashMap[Time, String]
  }
}

Source File: RawTextSender.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{ByteArrayOutputStream, IOException}
import java.net.ServerSocket
import java.nio.ByteBuffer

import scala.io.Source

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.IntParam


private[streaming]
object RawTextSender extends Logging {
  def main(args: Array[String]) {
    if (args.length != 4) {
      // scalastyle:off println
      System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
      // scalastyle:on println
      System.exit(1)
    }
    // Parse the arguments using a pattern match
    val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args

    // Repeat the input data multiple times to fill in a buffer
    val lines = Source.fromFile(file).getLines().toArray
    val bufferStream = new ByteArrayOutputStream(blockSize + 1000)
    val ser = new KryoSerializer(new SparkConf()).newInstance()
    val serStream = ser.serializeStream(bufferStream)
    var i = 0
    while (bufferStream.size < blockSize) {
      serStream.writeObject(lines(i))
      i = (i + 1) % lines.length
    }
    val array = bufferStream.toByteArray

    val countBuf = ByteBuffer.wrap(new Array[Byte](4))
    countBuf.putInt(array.length)
    countBuf.flip()

    val serverSocket = new ServerSocket(port)
    logInfo("Listening on port " + port)

    while (true) {
      val socket = serverSocket.accept()
      logInfo("Got a new connection")
      val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec)
      try {
        while (true) {
          out.write(countBuf.array)
          out.write(array)
        }
      } catch {
        case e: IOException =>
          logError("Client disconnected")
      } finally {
        socket.close()
      }
    }
  }
}

Source File: HdfsUtils.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{FileNotFoundException, IOException}

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs._

private[streaming] object HdfsUtils {

  def getOutputStream(path: String, conf: Configuration): FSDataOutputStream = {
    val dfsPath = new Path(path)
    val dfs = getFileSystemForPath(dfsPath, conf)
    // If the file exists and we have append support, append instead of creating a new file
    val stream: FSDataOutputStream = {
      if (dfs.isFile(dfsPath)) {
        if (conf.getBoolean("hdfs.append.support", false) || dfs.isInstanceOf[RawLocalFileSystem]) {
          dfs.append(dfsPath)
        } else {
          throw new IllegalStateException("File exists and there is no append support!")
        }
      } else {
        dfs.create(dfsPath)
      }
    }
    stream
  }

  def getInputStream(path: String, conf: Configuration): FSDataInputStream = {
    val dfsPath = new Path(path)
    val dfs = getFileSystemForPath(dfsPath, conf)
    try {
      dfs.open(dfsPath)
    } catch {
      case _: FileNotFoundException =>
        null
      case e: IOException =>
        // If we are really unlucky, the file may be deleted as we're opening the stream.
        // This can happen as clean up is performed by daemon threads that may be left over from
        // previous runs.
        if (!dfs.isFile(dfsPath)) null else throw e
    }
  }

  def checkState(state: Boolean, errorMsg: => String) {
    if (!state) {
      throw new IllegalStateException(errorMsg)
    }
  }

  
  def checkFileExists(path: String, conf: Configuration): Boolean = {
    val hdpPath = new Path(path)
    val fs = getFileSystemForPath(hdpPath, conf)
    fs.isFile(hdpPath)
  }
}

Source File: FileBasedWriteAheadLogReader.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{Closeable, EOFException, IOException}
import java.nio.ByteBuffer

import org.apache.hadoop.conf.Configuration

import org.apache.spark.internal.Logging


private[streaming] class FileBasedWriteAheadLogReader(path: String, conf: Configuration)
  extends Iterator[ByteBuffer] with Closeable with Logging {

  private val instream = HdfsUtils.getInputStream(path, conf)
  private var closed = (instream == null) // the file may be deleted as we're opening the stream
  private var nextItem: Option[ByteBuffer] = None

  override def hasNext: Boolean = synchronized {
    if (closed) {
      return false
    }

    if (nextItem.isDefined) { // handle the case where hasNext is called without calling next
      true
    } else {
      try {
        val length = instream.readInt()
        val buffer = new Array[Byte](length)
        instream.readFully(buffer)
        nextItem = Some(ByteBuffer.wrap(buffer))
        logTrace("Read next item " + nextItem.get)
        true
      } catch {
        case e: EOFException =>
          logDebug("Error reading next item, EOF reached", e)
          close()
          false
        case e: IOException =>
          logWarning("Error while trying to read data. If the file was deleted, " +
            "this should be okay.", e)
          close()
          if (HdfsUtils.checkFileExists(path, conf)) {
            // If file exists, this could be a legitimate error
            throw e
          } else {
            // File was deleted. This can occur when the daemon cleanup thread takes time to
            // delete the file during recovery.
            false
          }

        case e: Exception =>
          logWarning("Error while trying to read data from HDFS.", e)
          close()
          throw e
      }
    }
  }

  override def next(): ByteBuffer = synchronized {
    val data = nextItem.getOrElse {
      close()
      throw new IllegalStateException(
        "next called without calling hasNext or after hasNext returned false")
    }
    nextItem = None // Ensure the next hasNext call loads new data.
    data
  }

  override def close(): Unit = synchronized {
    if (!closed) {
      instream.close()
    }
    closed = true
  }
}

Source File: SparkHadoopMapRedUtil.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.mapred

import java.io.IOException

import org.apache.hadoop.mapreduce.{TaskAttemptContext => MapReduceTaskAttemptContext}
import org.apache.hadoop.mapreduce.{OutputCommitter => MapReduceOutputCommitter}

import org.apache.spark.{SparkEnv, TaskContext}
import org.apache.spark.executor.CommitDeniedException
import org.apache.spark.internal.Logging

object SparkHadoopMapRedUtil extends Logging {
  
  def commitTask(
      committer: MapReduceOutputCommitter,
      mrTaskContext: MapReduceTaskAttemptContext,
      jobId: Int,
      splitId: Int): Unit = {

    val mrTaskAttemptID = mrTaskContext.getTaskAttemptID

    // Called after we have decided to commit
    def performCommit(): Unit = {
      try {
        committer.commitTask(mrTaskContext)
        logInfo(s"$mrTaskAttemptID: Committed")
      } catch {
        case cause: IOException =>
          logError(s"Error committing the output of task: $mrTaskAttemptID", cause)
          committer.abortTask(mrTaskContext)
          throw cause
      }
    }

    // First, check whether the task's output has already been committed by some other attempt
    if (committer.needsTaskCommit(mrTaskContext)) {
      val shouldCoordinateWithDriver: Boolean = {
        val sparkConf = SparkEnv.get.conf
        // We only need to coordinate with the driver if there are concurrent task attempts.
        // Note that this could happen even when speculation is not enabled (e.g. see SPARK-8029).
        // This (undocumented) setting is an escape-hatch in case the commit code introduces bugs.
        sparkConf.getBoolean("spark.hadoop.outputCommitCoordination.enabled", defaultValue = true)
      }

      if (shouldCoordinateWithDriver) {
        val outputCommitCoordinator = SparkEnv.get.outputCommitCoordinator
        val taskAttemptNumber = TaskContext.get().attemptNumber()
        val canCommit = outputCommitCoordinator.canCommit(jobId, splitId, taskAttemptNumber)

        if (canCommit) {
          performCommit()
        } else {
          val message =
            s"$mrTaskAttemptID: Not committed because the driver did not authorize commit"
          logInfo(message)
          // We need to abort the task so that the driver can reschedule new attempts, if necessary
          committer.abortTask(mrTaskContext)
          throw new CommitDeniedException(message, jobId, splitId, taskAttemptNumber)
        }
      } else {
        // Speculation is disabled or a user has chosen to manually bypass the commit coordination
        performCommit()
      }
    } else {
      // Some other attempt committed the output, so we do nothing and signal success
      logInfo(s"No need to commit output of task because needsTaskCommit=false: $mrTaskAttemptID")
    }
  }
}

Source File: ReplayListenerBus.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.io.{InputStream, IOException}

import scala.io.Source

import com.fasterxml.jackson.core.JsonParseException
import org.json4s.jackson.JsonMethods._

import org.apache.spark.internal.Logging
import org.apache.spark.scheduler.ReplayListenerBus._
import org.apache.spark.util.JsonProtocol


  def replay(
      logData: InputStream,
      sourceName: String,
      maybeTruncated: Boolean = false,
      eventsFilter: ReplayEventsFilter = SELECT_ALL_FILTER): Unit = {

    var currentLine: String = null
    var lineNumber: Int = 0

    try {
      val lineEntries = Source.fromInputStream(logData)
        .getLines()
        .zipWithIndex
        .filter { case (line, _) => eventsFilter(line) }

      while (lineEntries.hasNext) {
        try {
          val entry = lineEntries.next()

          currentLine = entry._1
          lineNumber = entry._2 + 1

          postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine)))
        } catch {
          case jpe: JsonParseException =>
            // We can only ignore exception from last line of the file that might be truncated
            // the last entry may not be the very last line in the event log, but we treat it
            // as such in a best effort to replay the given input
            if (!maybeTruncated || lineEntries.hasNext) {
              throw jpe
            } else {
              logWarning(s"Got JsonParseException from log file $sourceName" +
                s" at line $lineNumber, the file might not have finished writing cleanly.")
            }
        }
      }
    } catch {
      case ioe: IOException =>
        throw ioe
      case e: Exception =>
        logError(s"Exception parsing Spark event log: $sourceName", e)
        logError(s"Malformed line #$lineNumber: $currentLine\n")
    }
  }

}


private[spark] object ReplayListenerBus {

  type ReplayEventsFilter = (String) => Boolean

  // utility filter that selects all event logs during replay
  val SELECT_ALL_FILTER: ReplayEventsFilter = { (eventString: String) => true }
}

Source File: SerializableBuffer.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value: ByteBuffer = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
}

Source File: BlockManagerId.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.storage

import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
import java.util.concurrent.ConcurrentHashMap

import org.apache.spark.SparkContext
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.util.Utils


  def apply(
      execId: String,
      host: String,
      port: Int,
      topologyInfo: Option[String] = None): BlockManagerId =
    getCachedBlockManagerId(new BlockManagerId(execId, host, port, topologyInfo))

  def apply(in: ObjectInput): BlockManagerId = {
    val obj = new BlockManagerId()
    obj.readExternal(in)
    getCachedBlockManagerId(obj)
  }

  val blockManagerIdCache = new ConcurrentHashMap[BlockManagerId, BlockManagerId]()

  def getCachedBlockManagerId(id: BlockManagerId): BlockManagerId = {
    blockManagerIdCache.putIfAbsent(id, id)
    blockManagerIdCache.get(id)
  }
}

Source File: DiskStore.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.storage

import java.io.{FileOutputStream, IOException, RandomAccessFile}
import java.nio.ByteBuffer
import java.nio.channels.FileChannel.MapMode

import com.google.common.io.Closeables

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.util.Utils
import org.apache.spark.util.io.ChunkedByteBuffer


  def put(blockId: BlockId)(writeFunc: FileOutputStream => Unit): Unit = {
    if (contains(blockId)) {
      throw new IllegalStateException(s"Block $blockId is already present in the disk store")
    }
    logDebug(s"Attempting to put block $blockId")
    val startTime = System.currentTimeMillis
    val file = diskManager.getFile(blockId)
    val fileOutputStream = new FileOutputStream(file)
    var threwException: Boolean = true
    try {
      writeFunc(fileOutputStream)
      threwException = false
    } finally {
      try {
        Closeables.close(fileOutputStream, threwException)
      } finally {
         if (threwException) {
          remove(blockId)
        }
      }
    }
    val finishTime = System.currentTimeMillis
    logDebug("Block %s stored as %s file on disk in %d ms".format(
      file.getName,
      Utils.bytesToString(file.length()),
      finishTime - startTime))
  }

  def putBytes(blockId: BlockId, bytes: ChunkedByteBuffer): Unit = {
    put(blockId) { fileOutputStream =>
      val channel = fileOutputStream.getChannel
      Utils.tryWithSafeFinally {
        bytes.writeFully(channel)
      } {
        channel.close()
      }
    }
  }

  def getBytes(blockId: BlockId): ChunkedByteBuffer = {
    val file = diskManager.getFile(blockId.name)
    val channel = new RandomAccessFile(file, "r").getChannel
    Utils.tryWithSafeFinally {
      // For small files, directly read rather than memory map
      if (file.length < minMemoryMapBytes) {
        val buf = ByteBuffer.allocate(file.length.toInt)
        channel.position(0)
        while (buf.remaining() != 0) {
          if (channel.read(buf) == -1) {
            throw new IOException("Reached EOF before filling buffer\n" +
              s"offset=0\nfile=${file.getAbsolutePath}\nbuf.remaining=${buf.remaining}")
          }
        }
        buf.flip()
        new ChunkedByteBuffer(buf)
      } else {
        new ChunkedByteBuffer(channel.map(MapMode.READ_ONLY, 0, file.length))
      }
    } {
      channel.close()
    }
  }

  def remove(blockId: BlockId): Boolean = {
    val file = diskManager.getFile(blockId.name)
    if (file.exists()) {
      val ret = file.delete()
      if (!ret) {
        logWarning(s"Error deleting ${file.getPath()}")
      }
      ret
    } else {
      false
    }
  }

  def contains(blockId: BlockId): Boolean = {
    val file = diskManager.getFile(blockId.name)
    file.exists()
  }
}

Source File: CartesianRDD.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.rdd

import java.io.{IOException, ObjectOutputStream}

import scala.reflect.ClassTag

import org.apache.spark._
import org.apache.spark.util.Utils

private[spark]
class CartesianPartition(
    idx: Int,
    @transient private val rdd1: RDD[_],
    @transient private val rdd2: RDD[_],
    s1Index: Int,
    s2Index: Int
  ) extends Partition {
  var s1 = rdd1.partitions(s1Index)
  var s2 = rdd2.partitions(s2Index)
  override val index: Int = idx

  @throws(classOf[IOException])
  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
    // Update the reference to parent split at the time of task serialization
    s1 = rdd1.partitions(s1Index)
    s2 = rdd2.partitions(s2Index)
    oos.defaultWriteObject()
  }
}

private[spark]
class CartesianRDD[T: ClassTag, U: ClassTag](
    sc: SparkContext,
    var rdd1 : RDD[T],
    var rdd2 : RDD[U])
  extends RDD[(T, U)](sc, Nil)
  with Serializable {

  val numPartitionsInRdd2 = rdd2.partitions.length

  override def getPartitions: Array[Partition] = {
    // create the cross product split
    val array = new Array[Partition](rdd1.partitions.length * rdd2.partitions.length)
    for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) {
      val idx = s1.index * numPartitionsInRdd2 + s2.index
      array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index)
    }
    array
  }

  override def getPreferredLocations(split: Partition): Seq[String] = {
    val currSplit = split.asInstanceOf[CartesianPartition]
    (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct
  }

  override def compute(split: Partition, context: TaskContext): Iterator[(T, U)] = {
    val currSplit = split.asInstanceOf[CartesianPartition]
    for (x <- rdd1.iterator(currSplit.s1, context);
         y <- rdd2.iterator(currSplit.s2, context)) yield (x, y)
  }

  override def getDependencies: Seq[Dependency[_]] = List(
    new NarrowDependency(rdd1) {
      def getParents(id: Int): Seq[Int] = List(id / numPartitionsInRdd2)
    },
    new NarrowDependency(rdd2) {
      def getParents(id: Int): Seq[Int] = List(id % numPartitionsInRdd2)
    }
  )

  override def clearDependencies() {
    super.clearDependencies()
    rdd1 = null
    rdd2 = null
  }
}

Source File: UnionRDD.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.rdd

import java.io.{IOException, ObjectOutputStream}

import scala.collection.mutable.ArrayBuffer
import scala.collection.parallel.{ForkJoinTaskSupport, ThreadPoolTaskSupport}
import scala.concurrent.forkjoin.ForkJoinPool
import scala.reflect.ClassTag

import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext}
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.util.Utils


private[spark] class UnionPartition[T: ClassTag](
    idx: Int,
    @transient private val rdd: RDD[T],
    val parentRddIndex: Int,
    @transient private val parentRddPartitionIndex: Int)
  extends Partition {

  var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex)

  def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition)

  override val index: Int = idx

  @throws(classOf[IOException])
  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
    // Update the reference to parent split at the time of task serialization
    parentPartition = rdd.partitions(parentRddPartitionIndex)
    oos.defaultWriteObject()
  }
}

object UnionRDD {
  private[spark] lazy val partitionEvalTaskSupport =
    new ForkJoinTaskSupport(new ForkJoinPool(8))
}

@DeveloperApi
class UnionRDD[T: ClassTag](
    sc: SparkContext,
    var rdds: Seq[RDD[T]])
  extends RDD[T](sc, Nil) {  // Nil since we implement getDependencies

  // visible for testing
  private[spark] val isPartitionListingParallel: Boolean =
    rdds.length > conf.getInt("spark.rdd.parallelListingThreshold", 10)

  override def getPartitions: Array[Partition] = {
    val parRDDs = if (isPartitionListingParallel) {
      val parArray = rdds.par
      parArray.tasksupport = UnionRDD.partitionEvalTaskSupport
      parArray
    } else {
      rdds
    }
    val array = new Array[Partition](parRDDs.map(_.partitions.length).seq.sum)
    var pos = 0
    for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) {
      array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index)
      pos += 1
    }
    array
  }

  override def getDependencies: Seq[Dependency[_]] = {
    val deps = new ArrayBuffer[Dependency[_]]
    var pos = 0
    for (rdd <- rdds) {
      deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length)
      pos += rdd.partitions.length
    }
    deps
  }

  override def compute(s: Partition, context: TaskContext): Iterator[T] = {
    val part = s.asInstanceOf[UnionPartition[T]]
    parent[T](part.parentRddIndex).iterator(part.parentPartition, context)
  }

  override def getPreferredLocations(s: Partition): Seq[String] =
    s.asInstanceOf[UnionPartition[T]].preferredLocations()

  override def clearDependencies() {
    super.clearDependencies()
    rdds = null
  }
}

Source File: PartitionerAwareUnionRDD.scala From drizzle-spark with Apache License 2.0

5 votes

package org.apache.spark.rdd

import java.io.{IOException, ObjectOutputStream}

import scala.reflect.ClassTag

import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext}
import org.apache.spark.util.Utils


private[spark]
class PartitionerAwareUnionRDD[T: ClassTag](
    sc: SparkContext,
    var rdds: Seq[RDD[T]]
  ) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) {
  require(rdds.nonEmpty)
  require(rdds.forall(_.partitioner.isDefined))
  require(rdds.flatMap(_.partitioner).toSet.size == 1,
    "Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner))

  override val partitioner = rdds.head.partitioner

  override def getPartitions: Array[Partition] = {
    val numPartitions = partitioner.get.numPartitions
    (0 until numPartitions).map { index =>
      new PartitionerAwareUnionRDDPartition(rdds, index)
    }.toArray
  }

  // Get the location where most of the partitions of parent RDDs are located
  override def getPreferredLocations(s: Partition): Seq[String] = {
    logDebug("Finding preferred location for " + this + ", partition " + s.index)
    val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents
    val locations = rdds.zip(parentPartitions).flatMap {
      case (rdd, part) =>
        val parentLocations = currPrefLocs(rdd, part)
        logDebug("Location of " + rdd + " partition " + part.index + " = " + parentLocations)
        parentLocations
    }
    val location = if (locations.isEmpty) {
      None
    } else {
      // Find the location that maximum number of parent partitions prefer
      Some(locations.groupBy(x => x).maxBy(_._2.length)._1)
    }
    logDebug("Selected location for " + this + ", partition " + s.index + " = " + location)
    location.toSeq
  }

  override def compute(s: Partition, context: TaskContext): Iterator[T] = {
    val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents
    rdds.zip(parentPartitions).iterator.flatMap {
      case (rdd, p) => rdd.iterator(p, context)
    }
  }

  override def clearDependencies() {
    super.clearDependencies()
    rdds = null
  }

  // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones)
  private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = {
    rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host)
  }
}

Source File: DnnStorage.scala From BigDL with Apache License 2.0

5 votes

package com.intel.analytics.bigdl.tensor

import java.io.{IOException, ObjectInputStream, ObjectOutputStream}
import com.intel.analytics.bigdl.mkl.Memory
import com.intel.analytics.bigdl.nn.mkldnn.MemoryOwner
import scala.reflect._


private[bigdl] class Pointer(val address: Long)

object DnnStorage {
  private[tensor] val CACHE_LINE_SIZE = System.getProperty("bigdl.cache.line", "64").toInt
  private[bigdl] val FLOAT_BYTES: Int = 4
  private[bigdl] val INT8_BYTES: Int = 1
  private[bigdl] val INT_BYTES: Int = 4

  import java.util.concurrent.ConcurrentHashMap
  private val nativeStorages: ConcurrentHashMap[Long, Boolean] = new ConcurrentHashMap()

  def checkAndSet(pointer: Long): Boolean = {
    nativeStorages.replace(pointer, false, true)
  }

  def add(key: Long): Unit = {
    nativeStorages.put(key, false)
  }

  def get(): Map[Long, Boolean] = {
    import scala.collection.JavaConverters._
    nativeStorages.asScala.toMap
  }
}

Source File: ECIESCoder.scala From mantis with Apache License 2.0

5 votes

package io.iohk.ethereum.crypto

import java.io.{ByteArrayInputStream, IOException}
import java.math.BigInteger
import java.security.SecureRandom

import org.spongycastle.crypto.digests.{SHA1Digest, SHA256Digest}
import org.spongycastle.crypto.engines.AESEngine
import org.spongycastle.crypto.generators.ECKeyPairGenerator
import org.spongycastle.crypto.macs.HMac
import org.spongycastle.crypto.modes.SICBlockCipher
import org.spongycastle.crypto.params._
import org.spongycastle.crypto.parsers.ECIESPublicKeyParser
import org.spongycastle.crypto.{BufferedBlockCipher, InvalidCipherTextException}
import org.spongycastle.math.ec.ECPoint

object ECIESCoder {

  val KeySize = 128
  val PublicKeyOverheadSize = 65
  val MacOverheadSize = 32
  val OverheadSize = PublicKeyOverheadSize + KeySize / 8 + MacOverheadSize

  @throws[IOException]
  @throws[InvalidCipherTextException]
  def decrypt(privKey: BigInteger, cipher: Array[Byte], macData: Option[Array[Byte]] = None): Array[Byte] = {
    val is = new ByteArrayInputStream(cipher)
    val ephemBytes = new Array[Byte](2 * ((curve.getCurve.getFieldSize + 7) / 8) + 1)
    is.read(ephemBytes)
    val ephem = curve.getCurve.decodePoint(ephemBytes)
    val IV = new Array[Byte](KeySize / 8)
    is.read(IV)
    val cipherBody = new Array[Byte](is.available)
    is.read(cipherBody)
    decrypt(ephem, privKey, Some(IV), cipherBody, macData)
  }

  @throws[InvalidCipherTextException]
  def decrypt(ephem: ECPoint, prv: BigInteger, IV: Option[Array[Byte]], cipher: Array[Byte], macData: Option[Array[Byte]]): Array[Byte] = {
    val aesEngine = new AESEngine

    val iesEngine = new EthereumIESEngine(
      kdf = Left(new ConcatKDFBytesGenerator(new SHA256Digest)),
      mac = new HMac(new SHA256Digest),
      hash = new SHA256Digest,
      cipher = Some(new BufferedBlockCipher(new SICBlockCipher(aesEngine))),
      IV = IV,
      prvSrc = Left(new ECPrivateKeyParameters(prv, curve)),
      pubSrc = Left(new ECPublicKeyParameters(ephem, curve)))


    iesEngine.processBlock(cipher, 0, cipher.length, forEncryption = false, macData)
  }

  
  @throws[IOException]
  @throws[InvalidCipherTextException]
  def encryptSimple(pub: ECPoint, secureRandom: SecureRandom, plaintext: Array[Byte]): Array[Byte] = {

    val eGen = new ECKeyPairGenerator
    val gParam = new ECKeyGenerationParameters(curve, secureRandom)
    eGen.init(gParam)

    val iesEngine = new EthereumIESEngine(
      kdf = Right(new MGF1BytesGeneratorExt(new SHA1Digest)),
      mac = new HMac(new SHA1Digest),
      hash = new SHA1Digest,
      cipher = None,
      IV = Some(new Array[Byte](0)),
      prvSrc = Right(eGen),
      pubSrc = Left(new ECPublicKeyParameters(pub, curve)),
      hashMacKey = false)

    iesEngine.processBlock(plaintext, 0, plaintext.length, forEncryption = true)
  }

  private def makeIESEngine(pub: ECPoint, prv: BigInteger, IV: Option[Array[Byte]]) = {
    val aesEngine = new AESEngine

    val iesEngine = new EthereumIESEngine(
      kdf = Left(new ConcatKDFBytesGenerator(new SHA256Digest)),
      mac = new HMac(new SHA256Digest),
      hash = new SHA256Digest,
      cipher = Some(new BufferedBlockCipher(new SICBlockCipher(aesEngine))),
      IV = IV,
      prvSrc = Left(new ECPrivateKeyParameters(prv, curve)),
      pubSrc = Left(new ECPublicKeyParameters(pub, curve)))

    iesEngine
  }

}

Source File: Main.scala From scala-json-rpc with MIT License

5 votes

package io.github.shogowada.scala.jsonrpc.example.e2e.websocket

import java.io.IOException

import io.github.shogowada.scala.jsonrpc.JSONRPCServerAndClient
import io.github.shogowada.scala.jsonrpc.Types.JSONSender
import io.github.shogowada.scala.jsonrpc.client.JSONRPCClient
import io.github.shogowada.scala.jsonrpc.serializers.UpickleJSONSerializer
import io.github.shogowada.scala.jsonrpc.server.JSONRPCServer
import io.github.shogowada.scalajs.reactjs.ReactDOM
import io.github.shogowada.scalajs.reactjs.VirtualDOM._
import org.scalajs.dom
import org.scalajs.dom.WebSocket

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.{Future, Promise}
import scala.scalajs.js.JSApp
import scala.util.{Failure, Try}

object Main extends JSApp {
  override def main(): Unit = {
    val futureWebSocket = createFutureWebSocket()
    val serverAndClient = createServerAndClient(futureWebSocket)

    val mountNode = dom.document.getElementById("mount-node")
    ReactDOM.render(
      <((new TodoListView(serverAndClient.createAPI[TodoRepositoryAPI])) ()).empty,
      mountNode
    )
  }

  private def createFutureWebSocket(): Future[WebSocket] = {
    val promisedWebSocket: Promise[WebSocket] = Promise()
    val webSocket = new dom.WebSocket(webSocketUrl)

    webSocket.onopen = (_: dom.Event) => {
      promisedWebSocket.success(webSocket)
    }

    webSocket.onerror = (event: dom.Event) => {
      promisedWebSocket.failure(new IOException(event.toString))
    }

    promisedWebSocket.future
  }

  private def webSocketUrl: String = {
    val location = dom.window.location
    val protocol = location.protocol match {
      case "http:" => "ws:"
      case "https:" => "wss:"
    }
    s"$protocol//${location.host}/jsonrpc"
  }

  private def createServerAndClient(futureWebSocket: Future[WebSocket]): JSONRPCServerAndClient[UpickleJSONSerializer] = {
    val jsonSerializer = UpickleJSONSerializer()

    val server = JSONRPCServer(jsonSerializer)

    val jsonSender: JSONSender = (json: String) => {
      futureWebSocket
          .map(webSocket => Try(webSocket.send(json)))
          .flatMap(tried => tried.fold(
            throwable => Future.failed(throwable),
            _ => Future(None)
          ))
    }
    val client = JSONRPCClient(jsonSerializer, jsonSender)

    val serverAndClient = JSONRPCServerAndClient(server, client)

    futureWebSocket.foreach(webSocket => {
      webSocket.onmessage = (event: dom.MessageEvent) => {
        val message = event.data.toString
        serverAndClient.receiveAndSend(message).onComplete {
          case Failure(throwable) => {
            println("Failed to send response", throwable)
          }
          case _ =>
        }
      }
    })

    serverAndClient
  }
}

Source File: NetworkDeviceManager.scala From slide-desktop with GNU General Public License v2.0

5 votes

package connections.network

import java.io.IOException
import java.net.SocketException

import connections.{BaseDeviceManager, ConnectionManager}
import slide.SystemInfo
import enums.ConnectionMode

class NetworkDeviceManager extends BaseDeviceManager {

    private var ndc: NetworkDeviceConnection = null
    private var backgroundScannerRunning: Boolean = true

    @throws(classOf[IOException])
    override def connect(ip: String): Unit = {
        ndc = new NetworkDeviceConnection(ip) {
            override def onClientOutOfDate(): Unit = {
                throwError("The client is out of date. Please upgrade it.")
            }
        }
        ndc.connect()
    }

    override def throwError(message: String): Unit = {}

    override def startBackgroundScanner(): Unit = {
        val t: Thread = new Thread(new Runnable {
            override def run(): Unit = {
                var dcCount: Int = 0

                var udpDiscovery: BroadcastManager = null
                try {
                    udpDiscovery = new BroadcastManager
                }
                catch {
                    case e: SocketException =>
                        throwError("Another instance of Slide is already running.")
                        System.exit(1)
                }

                while (backgroundScannerRunning) {
                    if (!SystemInfo.isNetworkIsAvailable) {
                        stopBackgroundScanner()
                    }

                    device = udpDiscovery.search
                    if (device != null) {
                        dcCount = 0
                        if (!ConnectionManager.hasConnection(ConnectionMode.WIFI)) {
                            onWifiConnectionAdded()
                        }
                    } else {
                        dcCount += 1
                        if (dcCount >= 4) {
                            if (ConnectionManager.hasConnection(ConnectionMode.WIFI)) {
                                onWifiConnectionRemoved()
                            }
                        }
                    }
                }
            }
        })
        t.start()
    }

    override def stopBackgroundScanner(): Unit =
        backgroundScannerRunning = false

    def ip: String = device.ip
}

Source File: NetworkDeviceConnection.scala From slide-desktop with GNU General Public License v2.0

5 votes

package connections.network

import java.io.{IOException, ObjectInputStream}
import java.net.InetSocketAddress

import connections.BaseDeviceConnection
import slide.Const

class NetworkDeviceConnection(val ip: String) extends BaseDeviceConnection {

    private val inetAddress: InetSocketAddress = new InetSocketAddress(ip, Const.NET_PORT)

    super.socket.connect(inetAddress, 4000)

    private val input = new ObjectInputStream(socket.getInputStream)

    @throws[IOException]
    override def connect(): Boolean = {
        this.start()
    }

    @throws[IOException]
    @throws[ClassNotFoundException]
    override def nextMessage(): Array[Short] = {
        try {
            input.readObject.asInstanceOf[Array[Short]]
        } catch {
            case e: ClassNotFoundException => new Array[Short](1)
        }
    }

    override def close(): Unit = {
        stopRunning()
        input.close()
        socket.close()
    }
}

Source File: Adb.scala From slide-desktop with GNU General Public License v2.0

5 votes

package connections.usb

import java.io.{File, IOException}

import com.android.ddmlib.AndroidDebugBridge.IDeviceChangeListener
import com.android.ddmlib.{AndroidDebugBridge, IDevice}
import slide.{Const, FileManager, SystemInfo}
import enums.OperatingSystem

object Adb {

    var usbAvailable: Boolean = false

    var isAdbInstalled: Boolean = false
    var adbFilePath: String = ""
    
    def startAdb(): Process = {
        executeAdbProcess(new ProcessBuilder(adbFilePath, "forward", "tcp:" + Const.USB_PORT, "tcp:" + Const.USB_PORT))
    }

    def adbDevices(): Process = {
        executeAdbProcess(new ProcessBuilder(adbFilePath, "devices"))
    }

    def restartAdb(): Process = {
        executeAdbProcess(new ProcessBuilder(adbFilePath, "kill-server"))
        executeAdbProcess(new ProcessBuilder(adbFilePath, "start-server"))
    }
}

Source File: UsbDeviceManager.scala From slide-desktop with GNU General Public License v2.0

5 votes

package connections.usb

import java.io.IOException

import connections.{BaseDeviceManager, ConnectionManager}
import slide.Device
import enums.ConnectionMode
import gui.ImageIcons

class UsbDeviceManager extends BaseDeviceManager {

    private var udc: UsbDeviceConnection = null
    private var backgroundScannerRunning: Boolean = true

    @throws(classOf[IOException])
    def connect(ip: String): Unit = {
        udc = new UsbDeviceConnection(ip) {
            override def onClientOutOfDate(): Unit = {
                throwError("The client is out of date. Please upgrade it.")
            }
        }
        udc.connect()
    }

    def startBackgroundScanner(): Unit = {
        val t: Thread = new Thread(new Runnable {
            def run() {
                device = new Device(ImageIcons.usbIcon, Array[String]("USB", "USB", "USB"))
                var dcCount: Int = 0
                while (backgroundScannerRunning) {
                    Thread.sleep(1000)
                    if (Adb.usbAvailable) {
                        dcCount = 0
                        if (!ConnectionManager.hasConnection(ConnectionMode.USB)) {
                            onUsbConnectionAdded()
                        }
                    }
                    else {
                        dcCount += 1
                        if (dcCount >= 2 && device != null && ConnectionManager.hasConnection(ConnectionMode.USB)) {
                            onUsbConnectionRemoved()
                        }
                    }
                }
            }
        })
        if (!Adb.isAdbAvailable) {
            throwError("Adb not found")
        } else {
            t.start()
        }
    }

    override def throwError(message: String): Unit = {}

    def stopBackgroundScanner(): Unit = backgroundScannerRunning = false
}

Source File: CustomJson.scala From matcher with MIT License

5 votes

package com.wavesplatform.dex.api.http.json

import java.io.IOException

import akka.http.scaladsl.model.MediaType
import akka.http.scaladsl.model.MediaTypes.`application/json`
import com.fasterxml.jackson.core.io.SegmentedStringWriter
import com.fasterxml.jackson.core.util.BufferRecyclers
import com.fasterxml.jackson.core.{JsonGenerator, JsonProcessingException}
import com.fasterxml.jackson.databind.module.SimpleModule
import com.fasterxml.jackson.databind.{JsonMappingException, JsonSerializer, ObjectMapper, SerializerProvider}
import play.api.libs.json._

object NumberAsStringSerializer extends JsonSerializer[JsValue] {

  private val fieldNamesToTranslate = Set(
    "amount",
    "available",
    "balance",
    "buyMatcherFee",
    "currentReward",
    "desiredReward",
    "effective",
    "fee",
    "feeAmount",
    "generating",
    "in",
    "matcherFee",
    "minIncrement",
    "minSponsoredAssetFee",
    "out",
    "price",
    "quantity",
    "regular",
    "reward",
    "sellMatcherFee",
    "sponsorBalance",
    "totalAmount",
    "totalFee",
    "totalWavesAmount",
    "value"
  )

  override def serialize(value: JsValue, json: JsonGenerator, provider: SerializerProvider): Unit = {
    value match {
      case JsNumber(v)  => json.writeNumber(v.bigDecimal)
      case JsString(v)  => json.writeString(v)
      case JsBoolean(v) => json.writeBoolean(v)

      case JsArray(elements) =>
        json.writeStartArray()
        elements.foreach { t =>
          serialize(t, json, provider)
        }
        json.writeEndArray()

      case JsObject(values) =>
        json.writeStartObject()
        values.foreach {
          case (name, JsNumber(v)) if fieldNamesToTranslate(name) =>
            json.writeStringField(name, v.bigDecimal.toPlainString)
          case (name, jsv) =>
            json.writeFieldName(name)
            serialize(jsv, json, provider)
        }
        json.writeEndObject()

      case JsNull => json.writeNull()
    }
  }
}

object CustomJson {

  val jsonWithNumbersAsStrings: MediaType.WithFixedCharset = `application/json`.withParams(Map("large-significand-format" -> "string"))

  private lazy val mapper = (new ObjectMapper)
    .registerModule(new SimpleModule("WavesJson").addSerializer(classOf[JsValue], NumberAsStringSerializer))
    .configure(JsonGenerator.Feature.WRITE_BIGDECIMAL_AS_PLAIN, true)

  def writeValueAsString(value: JsValue): String = {
    val sw = new SegmentedStringWriter(BufferRecyclers.getBufferRecycler)
    try mapper.writeValue(sw, value)
    catch {
      case e: JsonProcessingException =>
        throw e
      case e: IOException =>
        // shouldn't really happen, but is declared as possibility so:
        throw JsonMappingException.fromUnexpectedIOE(e)
    }
    sw.getAndClear
  }
}

Source File: TestHelpers.scala From matcher with MIT License

5 votes

package com.wavesplatform.dex.util

import java.io.IOException
import java.nio.file.attribute.BasicFileAttributes
import java.nio.file.{FileVisitResult, Files, Path, SimpleFileVisitor}

object TestHelpers {
  def deleteRecursively(path: Path): Unit = Files.walkFileTree(
    path,
    new SimpleFileVisitor[Path] {
      override def postVisitDirectory(dir: Path, exc: IOException): FileVisitResult = {
        Option(exc).fold {
          Files.delete(dir)
          FileVisitResult.CONTINUE
        }(throw _)
      }

      override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
        Files.delete(file)
        FileVisitResult.CONTINUE
      }
    }
  )
}

Source File: IOUtil.scala From jardiff with Apache License 2.0

5 votes

package scala.tools.jardiff

import java.io.IOException
import java.net.URI
import java.nio.file._
import java.nio.file.attribute.BasicFileAttributes
import java.util

object IOUtil {
  def rootPath(fileOrZip: Path): Path = {
    if (fileOrZip.getFileName.toString.endsWith(".jar")) {
      val uri = URI.create(s"jar:${fileOrZip.toUri}")
      newFileSystem(uri, new util.HashMap[String, Any]()).getPath("/")
    } else {
      val extSlash = ".jar/"
      val index = fileOrZip.toString.indexOf(extSlash)
      if (index == -1) {
        fileOrZip
      } else {
        val uri = URI.create("jar:" + Paths.get(fileOrZip.toString.substring(0, index + extSlash.length - 1)).toUri.toString)
        val jarEntry = fileOrZip.toString.substring(index + extSlash.length - 1)
        val system = newFileSystem(uri, new util.HashMap[String, Any]())
        system.getPath(jarEntry)
      }
    }
  }

  private def newFileSystem(uri: URI, map: java.util.Map[String, Any]) =
    try FileSystems.newFileSystem(uri, map)
    catch { case _: FileSystemAlreadyExistsException => FileSystems.getFileSystem(uri) }

  def mapRecursive(source: java.nio.file.Path, target: java.nio.file.Path)(f: (Path, Path) => Unit) = {
    Files.walkFileTree(source, util.EnumSet.of(FileVisitOption.FOLLOW_LINKS), Integer.MAX_VALUE, new FileVisitor[Path] {
      def preVisitDirectory(dir: Path, sourceBasic: BasicFileAttributes): FileVisitResult = {
        val relative = source.relativize(dir).toString
        if (!Files.exists(target.resolve(relative)))
          Files.createDirectory(target.resolve(relative))
        FileVisitResult.CONTINUE
      }

      def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
        val relative = source.relativize(file).toString
        f(file, target.resolve(relative))
        FileVisitResult.CONTINUE
      }

      def visitFileFailed(file: Path, e: IOException) = throw e

      def postVisitDirectory(dir: Path, e: IOException): FileVisitResult = {
        if (e != null) throw e
        FileVisitResult.CONTINUE
      }
    })
  }

  def deleteRecursive(p: Path): Unit = {
    import java.io.IOException
    import java.nio.file.attribute.BasicFileAttributes
    import java.nio.file.{FileVisitResult, Files, SimpleFileVisitor}
    Files.walkFileTree(p, new SimpleFileVisitor[Path]() {
      override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
        Files.delete(file)
        FileVisitResult.CONTINUE
      }

      override def preVisitDirectory(dir: Path, attrs: BasicFileAttributes): FileVisitResult = {
        if (dir.getFileName.toString == ".git")
          FileVisitResult.SKIP_SUBTREE
        else super.preVisitDirectory(dir, attrs)
      }
      override def postVisitDirectory(dir: Path, exc: IOException): FileVisitResult = {
        val listing = Files.list(dir)
        try {
          if (!listing.iterator().hasNext)
            Files.delete(dir)
        } finally {
          listing.close()
        }
        FileVisitResult.CONTINUE
      }
    })
  }
}

Source File: ServiceSpec.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

import java.io.{File, FileNotFoundException, IOException}
import java.net.ServerSocket
import java.util.Base64

import it.gov.daf.entitymanager.Entity
import it.gov.daf.entitymanager.client.Entity_managerClient
import org.specs2.mutable.Specification
import org.specs2.specification.BeforeAfterAll
import play.api.Application
import play.api.inject.guice.GuiceApplicationBuilder
import play.api.libs.ws.ahc.AhcWSClient
import play.api.test.WithServer

import scala.concurrent.Await
import scala.concurrent.duration.Duration
import scala.util.{Failure, Random, Try}

@SuppressWarnings(
  Array(
    "org.wartremover.warts.NonUnitStatements",
    "org.wartremover.warts.Throw",
    "org.wartremover.warts.Var"
  )
)
class ServiceSpec extends Specification with BeforeAfterAll {

  def getAvailablePort: Int = {
    try {
      val socket = new ServerSocket(0)
      try {
        socket.getLocalPort
      } finally {
        socket.close()
      }
    } catch {
      case e: IOException =>
        throw new IllegalStateException(s"Cannot find available port: ${e.getMessage}", e)
    }
  }

  private def constructTempDir(dirPrefix: String): Try[File] = Try {
    val rndrange = 10000000
    val file = new File(System.getProperty("java.io.tmpdir"), s"$dirPrefix${Random.nextInt(rndrange)}")
    if (!file.mkdirs())
      throw new RuntimeException("could not create temp directory: " + file.getAbsolutePath)
    file.deleteOnExit()
    file
  }

  private def deleteDirectory(path: File): Boolean = {
    if (!path.exists()) {
      throw new FileNotFoundException(path.getAbsolutePath)
    }
    var ret = true
    if (path.isDirectory)
      path.listFiles().foreach(f => ret = ret && deleteDirectory(f))
    ret && path.delete()
  }

  var tmpDir: Try[File] = Failure[File](new Exception(""))
  
  def application: Application = GuiceApplicationBuilder().
    configure("pac4j.authenticator" -> "test").
    configure("janusgraph.storage.directory" -> s"${tmpDir.map(_.getCanonicalPath).getOrElse("db")}/berkeleyje").
    configure("janusgraph.index.search.directory" -> s"${tmpDir.map(_.getCanonicalPath).getOrElse("db")}/lucene").
    build()

  "The entity_manager" should {
    "create an entity and retrieve it correctly" in new WithServer(app = application, port = getAvailablePort) {

      val ws: AhcWSClient = AhcWSClient()

      val plainCreds = "david:david"
      val plainCredsBytes = plainCreds.getBytes
      val base64CredsBytes = Base64.getEncoder.encode(plainCredsBytes)
      val base64Creds = new String(base64CredsBytes)

      val client = new Entity_managerClient(ws)(s"http://localhost:$port")

      val result = Await.result(client.createEntity(s"Basic $base64Creds", Entity("DAVID")), Duration.Inf)
      val entity = Await.result(client.getEntity(s"Basic $base64Creds", "DAVID"), Duration.Inf)

      entity must beEqualTo(Entity("DAVID"))
    }
  }

  override def beforeAll(): Unit = tmpDir = constructTempDir("test")

  override def afterAll(): Unit = tmpDir.foreach(deleteDirectory(_))
}

Source File: CatalogControllersSpec.scala From daf with BSD 3-Clause "New" or "Revised" License

5 votes

import java.io.IOException
import java.net.ServerSocket

import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import catalog_manager.yaml.MetaCatalog
import org.specs2.mutable.Specification
import play.api.Application
import play.api.http.Status
import play.api.routing.Router
import play.api.inject.guice.GuiceApplicationBuilder
import play.api.libs.json.{JsArray, JsValue, Json}
import play.api.libs.ws.WSResponse
import play.api.libs.ws.ahc.AhcWSClient
import play.api.test._
import it.gov.daf.catalogmanager
import it.gov.daf.catalogmanager.client.Catalog_managerClient

import scala.concurrent.duration.Duration
import scala.concurrent.{Await, Future}


class CatalogControllersSpec extends Specification  {

  def application: Application = GuiceApplicationBuilder().build()

  import catalog_manager.yaml.BodyReads.MetaCatalogReads

  "The catalog-manager" should {
    "Call catalog-manager/v1/dataset-catalogs return ok status" in
      new WithServer(app = application, port = 9000) {
        WsTestClient.withClient { implicit client =>
          val response: WSResponse = Await.result[WSResponse](client.
            url(s"http://localhost:9001/catalog-manager/v1/dataset-catalogs").
            execute, Duration.Inf)
          println(response.status)
          response.status must be equalTo Status.OK
        }
      }

    "Call catalog-manager/v1/dataset-catalogs return a non empty list if" +
      "you have error maybe is necessaty to add data to db" in
      new WithServer(app = application, port = 9000) {
        WsTestClient.withClient { implicit client =>
          val response: WSResponse = Await.result[WSResponse](client.
            url(s"http://localhost:9001/catalog-manager/v1/dataset-catalogs").
            execute, Duration.Inf)
          println(response.status)
          println("ALE")
          println(response.body)
          val json: JsValue = Json.parse(response.body)
          json.as[JsArray].value.size must be greaterThan (0)
        }
      }


    "The catalog-manager" should {
      "Call catalog-manager/v1/dataset-catalogs/{logical_uri} return ok status" in
        new WithServer(app = application, port = 9000) {
          val logicalUri = "daf://dataset/std/standard/standard/uri_cultura/standard"
          val url = s"http://localhost:9001/catalog-manager/v1/dataset-catalogs/$logicalUri"
          println(url)
          WsTestClient.withClient { implicit client =>
            val response: WSResponse = Await.result[WSResponse](client.
              url(url).
              execute, Duration.Inf)
            println(response.status)
            response.status must be equalTo Status.OK
          }
        }
    }

    "The catalog-manager" should {
      "Call catalog-manager/v1/dataset-catalogs/{anything} return 401" in
        new WithServer(app = application, port = 9000) {
          val logicalUri = "anything"
          val url = s"http://localhost:9001/catalog-manager/v1/dataset-catalogs/$logicalUri"
          println(url)
          WsTestClient.withClient { implicit client =>
            val response: WSResponse = Await.result[WSResponse](client.
              url(url).
              execute, Duration.Inf)
            println(response.status)
            response.status must be equalTo 401
          }
        }
    }
  }
}

Source File: TokenizerWrapper.scala From dbpedia-spotlight-model with Apache License 2.0

5 votes

package org.dbpedia.spotlight.db.concurrent

import java.io.IOException
import java.util.concurrent.TimeUnit

import akka.actor.SupervisorStrategy.Restart
import akka.actor.{Actor, ActorSystem, OneForOneStrategy, Props}
import akka.pattern.ask
import akka.routing.SmallestMailboxRouter
import akka.util
import org.apache.commons.lang.NotImplementedException
import org.dbpedia.spotlight.db.model.{StringTokenizer, TextTokenizer}
import org.dbpedia.spotlight.model.{Text, Token}

import scala.concurrent.Await



class TokenizerWrapper(val tokenizers: Seq[TextTokenizer]) extends TextTokenizer {

  var requestTimeout = 60

  val system = ActorSystem()
  val workers = tokenizers.map { case tokenizer: TextTokenizer =>
    system.actorOf(Props(new TokenizerActor(tokenizer)))
  }.seq

  def size: Int = tokenizers.size

  val router = system.actorOf(Props[TokenizerActor].withRouter(
    // This might be a hack
    SmallestMailboxRouter(scala.collection.immutable.Iterable(workers:_*)).withSupervisorStrategy(
      OneForOneStrategy(maxNrOfRetries = 10) {
        case _: IOException => Restart
      })
  )
  )

  implicit val timeout = util.Timeout(requestTimeout, TimeUnit.SECONDS)

  override def tokenizeMaybe(text: Text) {
    val futureResult = router ? TokenizerRequest(text)
    Await.result(futureResult, timeout.duration)
  }

  override def tokenize(text: Text): List[Token] = {
    tokenizeMaybe(text)
    text.featureValue[List[Token]]("tokens").get
  }

  def tokenizeRaw(text: String): Seq[String] = {
    throw new NotImplementedException()
  }

  def close() {
    system.shutdown()
  }

  def getStringTokenizer: StringTokenizer = tokenizers.head.getStringTokenizer

}

class TokenizerActor(val tokenizer: TextTokenizer) extends Actor {

  def receive = {
    case TokenizerRequest(text) => {
      try {
        sender ! tokenizer.tokenizeMaybe(text)

      } catch {
        case e: NullPointerException => throw new IOException("Could not tokenize.")
      }
    }
  }

}


case class TokenizerRequest(text: Text)

Source File: Clause_6_exception.scala From HadoopLearning with MIT License

5 votes

package com.c503.scala

import java.io.{FileNotFoundException, FileReader, IOException}


object Clause_6_exception {

  def main(args: Array[String]): Unit = {

    try {
      val f = new FileReader("input.txt")
      println(f.getClass.getName)
    } catch {
      case ex: FileNotFoundException => {
        println("Missing file exception")
      }
      case bx: IOException => {
        println("IO Exception")
      }
    } finally {
      println("Exiting finally...")
    }

  }

}

Source File: StorageScriptFsWriter.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.storage.script.writer

import java.io.{ByteArrayInputStream, IOException, InputStream, OutputStream}
import java.util

import com.webank.wedatasphere.linkis.common.io.{FsPath, MetaData, Record}
import com.webank.wedatasphere.linkis.storage.LineRecord
import com.webank.wedatasphere.linkis.storage.script.{Compaction, ScriptFsWriter, ScriptMetaData}
import com.webank.wedatasphere.linkis.storage.utils.{StorageConfiguration, StorageUtils}
import org.apache.commons.io.IOUtils


class StorageScriptFsWriter(val path: FsPath, val charset: String, outputStream: OutputStream = null) extends ScriptFsWriter {

  private val stringBuilder = new StringBuilder

  @scala.throws[IOException]
  override def addMetaData(metaData: MetaData): Unit = {
    val compactions = Compaction.listCompactions().filter(p => p.belongTo(StorageUtils.pathToSuffix(path.getPath)))
    val metadataLine = new util.ArrayList[String]()
    if (compactions.length > 0) {
      metaData.asInstanceOf[ScriptMetaData].getMetaData.map(compactions(0).compact).foreach(metadataLine.add)
      if (outputStream != null) {
        IOUtils.writeLines(metadataLine, "\n", outputStream, charset)
      } else {
        import scala.collection.JavaConversions._
        metadataLine.foreach(m => stringBuilder.append(s"$m\n"))
      }
    }
  }

  @scala.throws[IOException]
  override def addRecord(record: Record): Unit = {
    //转成LineRecord而不是TableRecord是为了兼容非Table类型的结果集写到本类中
    val scriptRecord = record.asInstanceOf[LineRecord]
    if (outputStream != null) {
      IOUtils.write(scriptRecord.getLine, outputStream, charset)
    } else {
      stringBuilder.append(scriptRecord.getLine)
    }
  }

  override def close(): Unit = {
    IOUtils.closeQuietly(outputStream)
  }

  override def flush(): Unit = if (outputStream != null) outputStream.flush()

  def getInputStream(): InputStream = {
    new ByteArrayInputStream(stringBuilder.toString().getBytes(StorageConfiguration.STORAGE_RS_FILE_TYPE.getValue))
  }

}

Source File: StorageResultSetReader.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.storage.resultset

import java.io.{ByteArrayInputStream, IOException, InputStream}

import com.webank.wedatasphere.linkis.common.io.resultset.{ResultSet, ResultSetReader}
import com.webank.wedatasphere.linkis.common.io.{MetaData, Record}
import com.webank.wedatasphere.linkis.common.utils.Logging
import com.webank.wedatasphere.linkis.storage.domain.Dolphin
import com.webank.wedatasphere.linkis.storage.exception.StorageWarnException
import com.webank.wedatasphere.linkis.storage.utils.StorageUtils

import scala.collection.mutable.ArrayBuffer



  def readLine(): Array[Byte] = {

    var rowLen = 0
    try rowLen = Dolphin.readInt(inputStream)
    catch {
      case t:StorageWarnException => info(s"Read finished(读取完毕)") ; return null
      case t: Throwable => throw t
    }

    val rowBuffer = ArrayBuffer[Byte]()
    var len = 0

    //Read the entire line, except for the data of the line length(读取整行，除了行长的数据)
    while (rowLen > 0 && len >= 0) {
      if (rowLen > READ_CACHE)
        len = StorageUtils.readBytes(inputStream,bytes, READ_CACHE)
      else
        len = StorageUtils.readBytes(inputStream,bytes, rowLen)

      if (len > 0) {
        rowLen -= len
        rowBuffer ++= bytes.slice(0, len)
      }
    }
    rowCount = rowCount + 1
    rowBuffer.toArray
  }

  @scala.throws[IOException]
  override def getRecord: Record = {
    if (metaData == null) throw new IOException("Must read metadata first(必须先读取metadata)")
    if (row ==  null) throw new IOException("Can't get the value of the field, maybe the IO stream has been read or has been closed!(拿不到字段的值，也许IO流已读取完毕或已被关闭！)")
    row
  }

  @scala.throws[IOException]
  override def getMetaData: MetaData = {
    if(metaData == null) init()
    metaData = deserializer.createMetaData(readLine())
    metaData
  }

  @scala.throws[IOException]
  override def skip(recordNum: Int): Int = {
    if(recordNum < 0 ) return -1

    if(metaData == null) getMetaData
    for(i <- recordNum until (0, -1)){
      try inputStream.skip(Dolphin.readInt(inputStream)) catch { case t: Throwable => return -1}
    }
    recordNum
  }

  @scala.throws[IOException]
  override def getPosition: Long = rowCount

  @scala.throws[IOException]
  override def hasNext: Boolean = {
    if(metaData == null) getMetaData
    val line = readLine()
    if(line == null) return  false
    row = deserializer.createRecord(line)
    if(row == null) return  false
    true
  }

  @scala.throws[IOException]
  override def available: Long = inputStream.available()

  override def close(): Unit = inputStream.close()
}

Source File: FileSystemUtils.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.storage.utils

import java.io.IOException
import java.util

import com.webank.wedatasphere.linkis.common.io.FsPath
import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils}
import com.webank.wedatasphere.linkis.storage.FSFactory
import com.webank.wedatasphere.linkis.storage.fs.FileSystem
import com.webank.wedatasphere.linkis.storage.fs.impl.LocalFileSystem


  @throws[IOException]
  def mkdirs(fileSystem: FileSystem,dest: FsPath, user: String): Boolean = {
    var parentPath = dest.getParent
    val dirsToMake = new util.Stack[FsPath]()
    dirsToMake.push(dest)
    while (!fileSystem.exists(parentPath)){
      dirsToMake.push(parentPath)
      parentPath = parentPath.getParent
    }
    if(! fileSystem.canExecute(parentPath)){
      throw new IOException("You have not permission to access path " + dest.getPath)
    }
    while (!dirsToMake.empty()){
      val path = dirsToMake.pop()
      fileSystem.mkdir(path)
      fileSystem match {
        case l:LocalFileSystem => fileSystem.setOwner(path,user)
        case _ => info(s"doesn't need to call setOwner")
      }
      //fileSystem.setOwner(path,user,StorageConfiguration.STORAGE_HDFS_GROUP.getValue)
    }
    true
  }

}

Source File: Dolphin.scala From Linkis with Apache License 2.0

5 votes

package com.webank.wedatasphere.linkis.storage.domain

import java.io.{IOException, InputStream}

import com.webank.wedatasphere.linkis.common.utils.Logging
import com.webank.wedatasphere.linkis.storage.exception.StorageWarnException
import com.webank.wedatasphere.linkis.storage.utils.{StorageConfiguration, StorageUtils}


  def getIntBytes(value: Int): Array[Byte] = {
    val str = value.toString
    val res = "0" * (INT_LEN - str.length) + str
    Dolphin.getBytes(res)
  }


  def getType(inputStream:InputStream):String = {
    val bytes = new Array[Byte](100)
    val len = StorageUtils.readBytes(inputStream,bytes, Dolphin.MAGIC_LEN + INT_LEN)
    if(len == -1) return null
    getType(Dolphin.getString(bytes, 0, len))
  }

  def getType(content: String): String = {
    if(content.length < MAGIC.length || content.substring(0, MAGIC.length) != MAGIC) throw new IOException(s"File header type must be dolphin,content:$content is not")
    content.substring(MAGIC.length, MAGIC.length + INT_LEN ).toInt.toString
  }

}

Source File: HBasePartitioner.scala From Backup-Repo with Apache License 2.0

5 votes

package org.apache.spark.sql.hbase

import java.io.{IOException, ObjectInputStream, ObjectOutputStream}

import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.serializer.JavaSerializer
import org.apache.spark.util.{CollectionsUtils, Utils}
import org.apache.spark.{Partitioner, SparkEnv}

object HBasePartitioner {
  implicit object HBaseRawOrdering extends Ordering[HBaseRawType] {
    def compare(a: HBaseRawType, b: HBaseRawType) = Bytes.compareTo(a, b)
  }
}

class HBasePartitioner (var splitKeys: Array[HBaseRawType]) extends Partitioner {
  import HBasePartitioner.HBaseRawOrdering

  type t = HBaseRawType

  lazy private val len = splitKeys.length

  // For pre-split table splitKeys(0) = bytes[0], to remove it,
  // otherwise partition 0 always be empty and
  // we will miss the last region's date when bulk load
  lazy private val realSplitKeys = if (splitKeys.isEmpty) splitKeys else splitKeys.tail

  def numPartitions = if (len == 0) 1 else len

  @transient private val binarySearch: ((Array[t], t) => Int) = CollectionsUtils.makeBinarySearch[t]

  def getPartition(key: Any): Int = {
    val k = key.asInstanceOf[t]
    var partition = 0
    if (len <= 128 && len > 0) {
      // If we have less than 128 partitions naive search
      val ordering = implicitly[Ordering[t]]
      while (partition < realSplitKeys.length && ordering.gt(k, realSplitKeys(partition))) {
        partition += 1
      }
    } else {
      // Determine which binary search method to use only once.
      partition = binarySearch(realSplitKeys, k)
      // binarySearch either returns the match location or -[insertion point]-1
      if (partition < 0) {
        partition = -partition - 1
      }
      if (partition > realSplitKeys.length) {
        partition = realSplitKeys.length
      }
    }
    partition
  }

  override def equals(other: Any): Boolean = other match {
    case r: HBasePartitioner =>
      r.splitKeys.sameElements(splitKeys)
    case _ =>
      false
  }

  override def hashCode(): Int = {
    val prime = 31
    var result = 1
    var i = 0
    while (i < splitKeys.length) {
      result = prime * result + splitKeys(i).hashCode
      i += 1
    }
    result = prime * result
    result
  }
}

Source File: WriSer.scala From flint with Apache License 2.0

5 votes

package com.twosigma.flint.hadoop

import java.io.{ DataInputStream, DataOutputStream, ObjectInputStream, ObjectOutputStream }
import java.io.IOException

import scala.reflect.{ classTag, ClassTag }

import org.apache.hadoop.io.Writable

// Note: we could make this implement InputSplit, but we do not because many input splits do a
// cast to their specific InputSplit, so we do not want to risk it. Further, this currently works
// for any Writable.
case class WriSer[T <: Writable: ClassTag](@transient var get: T) extends Serializable {
  def this() = this(null.asInstanceOf[T])

  @throws(classOf[IOException])
  private def writeObject(out: ObjectOutputStream) {
    out.writeObject(classTag[T])
    get.write(new DataOutputStream(out))
  }

  @throws(classOf[IOException])
  @throws(classOf[ClassNotFoundException])
  private def readObject(in: ObjectInputStream) {
    get = in.readObject.asInstanceOf[ClassTag[T]].runtimeClass.newInstance.asInstanceOf[T]
    get.readFields(new DataInputStream(in))
  }
}

Source File: IngestionFlow.scala From hydra with Apache License 2.0

5 votes

package hydra.ingest.services

import java.io.IOException

import cats.MonadError
import cats.implicits._
import com.pluralsight.hydra.avro.JsonToAvroConversionException
import hydra.avro.registry.SchemaRegistry
import hydra.avro.resource.SchemaResourceLoader.SchemaNotFoundException
import hydra.avro.util.SchemaWrapper
import hydra.core.ingest.HydraRequest
import hydra.core.ingest.RequestParams.{HYDRA_KAFKA_TOPIC_PARAM, HYDRA_RECORD_KEY_PARAM}
import hydra.core.transport.{AckStrategy, ValidationStrategy}
import hydra.kafka.algebras.KafkaClientAlgebra
import hydra.kafka.producer.AvroRecord
import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
import scalacache._
import scalacache.guava._
import scalacache.memoization._

import scala.concurrent.duration._
import scala.util.{Failure, Success, Try}

final class IngestionFlow[F[_]: MonadError[*[_], Throwable]: Mode](
                                                                    schemaRegistry: SchemaRegistry[F],
                                                                    kafkaClient: KafkaClientAlgebra[F],
                                                                    schemaRegistryBaseUrl: String
                                                                  ) {

  import IngestionFlow._

  implicit val guavaCache: Cache[SchemaWrapper] = GuavaCache[SchemaWrapper]

  private def getValueSchema(topicName: String): F[Schema] = {
    schemaRegistry.getLatestSchemaBySubject(topicName + "-value")
      .flatMap { maybeSchema =>
        val schemaNotFound = SchemaNotFoundException(topicName)
        MonadError[F, Throwable].fromOption(maybeSchema, SchemaNotFoundAugmentedException(schemaNotFound, topicName))
      }
  }

  private def getValueSchemaWrapper(topicName: String): F[SchemaWrapper] = memoizeF[F, SchemaWrapper](Some(2.minutes)) {
    getValueSchema(topicName).map { valueSchema =>
      SchemaWrapper.from(valueSchema)
    }
  }

  def ingest(request: HydraRequest): F[Unit] = {
    request.metadataValue(HYDRA_KAFKA_TOPIC_PARAM) match {
      case Some(topic) => getValueSchemaWrapper(topic).flatMap { schemaWrapper =>
        val useStrictValidation = request.validationStrategy == ValidationStrategy.Strict
        val payloadTryMaybe: Try[Option[GenericRecord]] = Option(request.payload) match {
          case Some(p) => convertToAvro(topic, schemaWrapper, useStrictValidation, p).map(avroRecord => Some(avroRecord.payload))
          case None => Success(None)
        }
        val v1Key = getV1RecordKey(schemaWrapper, payloadTryMaybe, request)
        MonadError[F, Throwable].fromTry(payloadTryMaybe).flatMap { payloadMaybe =>
          kafkaClient.publishStringKeyMessage((v1Key, payloadMaybe), topic).void
        }
      }
      case None => MonadError[F, Throwable].raiseError(MissingTopicNameException(request))
    }
  }

  private def getV1RecordKey(schemaWrapper: SchemaWrapper, payloadTryMaybe: Try[Option[GenericRecord]], request: HydraRequest): Option[String] = {
    val headerV1Key = request.metadata.get(HYDRA_RECORD_KEY_PARAM)
    val optionString = schemaWrapper.primaryKeys.toList match {
      case Nil => None
      case l => l.flatMap(pkName => payloadTryMaybe match {
        case Success(payloadMaybe) =>
          payloadMaybe.flatMap(p => Try(p.get(pkName)).toOption)
        case Failure(_) => None
      }).mkString("|").some
    }
    headerV1Key.orElse(optionString)
  }

  private def convertToAvro(topic: String, schemaWrapper: SchemaWrapper, useStrictValidation: Boolean, payloadString: String): Try[AvroRecord] = {
    Try(AvroRecord(topic, schemaWrapper.schema, None, payloadString, AckStrategy.Replicated, useStrictValidation)).recoverWith {
      case e: JsonToAvroConversionException =>
        val location = s"$schemaRegistryBaseUrl/subjects/$topic-value/versions/latest/schema"
        Failure(new AvroConversionAugmentedException(s"${e.getClass.getName}: ${e.getMessage} [$location]"))
      case e: IOException =>
        val location = s"$schemaRegistryBaseUrl/subjects/$topic-value/versions/latest/schema"
        Failure(new AvroConversionAugmentedException(s"${e.getMessage} [$location]"))
      case e => Failure(e)
    }
  }
}

object IngestionFlow {
  final case class MissingTopicNameException(request: HydraRequest)
    extends Exception(s"Missing the topic name in request with correlationId ${request.correlationId}")
  final case class AvroConversionAugmentedException(message: String) extends RuntimeException(message)
  final case class SchemaNotFoundAugmentedException(schemaNotFoundException: SchemaNotFoundException, topic: String)
    extends RuntimeException(s"Schema '$topic' cannot be loaded. Cause: ${schemaNotFoundException.getClass.getName}: Schema not found for $topic")
}

Source File: IngestionFlowV2.scala From hydra with Apache License 2.0

5 votes

package hydra.ingest.services

import java.io.IOException

import cats.MonadError
import cats.implicits._
import hydra.avro.registry.SchemaRegistry
import hydra.avro.resource.SchemaResourceLoader.SchemaNotFoundException
import hydra.avro.util.SchemaWrapper
import hydra.core.transport.ValidationStrategy
import hydra.kafka.algebras.KafkaClientAlgebra
import hydra.kafka.algebras.KafkaClientAlgebra.PublishResponse
import hydra.kafka.model.TopicMetadataV2Request.Subject
import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
import scalacache._
import scalacache.guava._
import scalacache.memoization._

import scala.concurrent.duration._
import scala.util.{Failure, Try}

final class IngestionFlowV2[F[_]: MonadError[*[_], Throwable]: Mode](
                                                                    schemaRegistry: SchemaRegistry[F],
                                                                    kafkaClient: KafkaClientAlgebra[F],
                                                                    schemaRegistryBaseUrl: String) {

  import IngestionFlowV2._
  import hydra.avro.convert.StringToGenericRecord._

  implicit val guavaCache: Cache[SchemaWrapper] = GuavaCache[SchemaWrapper]

  private def getSchema(subject: String): F[Schema] = {
    schemaRegistry.getLatestSchemaBySubject(subject)
      .flatMap { maybeSchema =>
        val schemaNotFound = SchemaNotFoundException(subject)
        MonadError[F, Throwable].fromOption(maybeSchema, SchemaNotFoundAugmentedException(schemaNotFound, subject))
      }
  }

  private def getSchemaWrapper(subject: Subject, isKey: Boolean): F[SchemaWrapper] = memoizeF[F, SchemaWrapper](Some(2.minutes)) {
    val suffix = if (isKey) "-key" else "-value"
    getSchema(subject.value + suffix).map { sch =>
      SchemaWrapper.from(sch)
    }
  }

  private def recover[A](subject: Subject, isKey: Boolean): PartialFunction[Throwable, Try[A]] = {
    val suffix = if (isKey) "-key" else "-value"
    val location = s"$schemaRegistryBaseUrl/subjects/${subject.value}$suffix/versions/latest/schema"
    val pf: PartialFunction[Throwable, Try[A]] = {
      case e: ValidationExtraFieldsError =>
        Failure(AvroConversionAugmentedException(s"${e.getClass.getName}: ${e.getMessage} [$location]"))
      case e: InvalidLogicalTypeError =>
        Failure(AvroConversionAugmentedException(s"${e.getClass.getName}: ${e.getMessage} [$location]"))
      case e: IOException =>
        Failure(AvroConversionAugmentedException(s"${e.getClass.getName}: ${e.getMessage} [$location]"))
      case e => Failure(e)
    }
    pf
  }

  private def getSchemas(request: V2IngestRequest, topic: Subject): F[(GenericRecord, Option[GenericRecord])] = {
    val useStrictValidation = request.validationStrategy.getOrElse(ValidationStrategy.Strict) == ValidationStrategy.Strict
    def getRecord(payload: String, schema: Schema): Try[GenericRecord] =
      payload.toGenericRecord(schema, useStrictValidation)
    for {
      kSchema <- getSchemaWrapper(topic, isKey = true)
      vSchema <- getSchemaWrapper(topic, isKey = false)
      k <- MonadError[F, Throwable].fromTry(
        getRecord(request.keyPayload, kSchema.schema).recoverWith(recover(topic, isKey = true)))
      v <- MonadError[F, Throwable].fromTry(
        request.valPayload.traverse(getRecord(_, vSchema.schema)).recoverWith(recover(topic, isKey = false)))
    } yield (k, v)
  }

  def ingest(request: V2IngestRequest, topic: Subject): F[PublishResponse] = {
    getSchemas(request, topic).flatMap { case (key, value) =>
      kafkaClient.publishMessage((key, value), topic.value).rethrow
    }
  }
}

object IngestionFlowV2 {
  final case class V2IngestRequest(keyPayload: String, valPayload: Option[String], validationStrategy: Option[ValidationStrategy])

  final case class AvroConversionAugmentedException(message: String) extends RuntimeException(message)
  final case class SchemaNotFoundAugmentedException(schemaNotFoundException: SchemaNotFoundException, topic: String)
    extends RuntimeException(s"Schema '$topic' cannot be loaded. Cause: ${schemaNotFoundException.getClass.getName}: Schema not found for $topic")
}

Source File: RelativePathSupport.scala From exodus with MIT License

5 votes

package com.wix.bazel.migrator.analyze

import java.io.IOException
import java.nio.file.{Path, Paths}

import com.fasterxml.jackson.core.{JsonGenerator, JsonParser, JsonToken}
import com.fasterxml.jackson.databind._
import com.fasterxml.jackson.databind.module.SimpleModule

class RelativePathSupportingModule extends SimpleModule {
  addDeserializer(classOf[Path], new RelativePathSupportingDeserializer)
  addSerializer(classOf[Path], new RelativePathSupportingSerializer)
}

class RelativePathSupportingSerializer extends JsonSerializer[Path] {
  @throws[IOException]
  def serialize(value: Path, gen: JsonGenerator, serializers: SerializerProvider): Unit =
    value match {
      case null => gen.writeNull()
      case _ => gen.writeString(value.toString)
    }
}

class RelativePathSupportingDeserializer extends JsonDeserializer[Path] {
  @throws[IOException]
  def deserialize(p: JsonParser, ctxt: DeserializationContext): Path =
    p.getCurrentToken match {
      case JsonToken.VALUE_NULL => null
      case JsonToken.VALUE_STRING => Paths.get(p.readValueAs(classOf[String]))
      case _ => throw ctxt.wrongTokenException(p, JsonToken.VALUE_STRING, "The value of a java.nio.file.Path must be a string")
    }
}

Source File: SourceModuleSupport.scala From exodus with MIT License

5 votes

package com.wix.bazel.migrator.analyze

import java.io.IOException

import com.fasterxml.jackson.core.{JsonGenerator, JsonParser, JsonToken}
import com.fasterxml.jackson.databind._
import com.fasterxml.jackson.databind.module.SimpleModule
import com.wix.bazel.migrator.model.SourceModule

class SourceModuleSupportingModule(modules: Set[SourceModule]) extends SimpleModule {
  addDeserializer(classOf[SourceModule], new SourceModuleSupportingDeserializer(modules))
  addSerializer(classOf[SourceModule], new SourceModuleSupportingSerializer)
}

class SourceModuleSupportingSerializer extends JsonSerializer[SourceModule] {
  @throws[IOException]
  def serialize(value: SourceModule, gen: JsonGenerator, serializers: SerializerProvider): Unit =
    value match {
      case null => gen.writeNull()
      case _ => gen.writeString(value.relativePathFromMonoRepoRoot)
    }
}

class SourceModuleSupportingDeserializer(modules: Set[SourceModule]) extends JsonDeserializer[SourceModule] {
  @throws[IOException]
  def deserialize(p: JsonParser, ctxt: DeserializationContext): SourceModule =
    p.getCurrentToken match {
      case JsonToken.VALUE_NULL => null
      case JsonToken.VALUE_STRING => {
        val relativePath = p.readValueAs(classOf[String])
        modules.find(_.relativePathFromMonoRepoRoot == relativePath)
          .getOrElse(throw ctxt.weirdStringException(relativePath, classOf[SourceModule], s"could not find module with relative path for $relativePath"))
      }
      case token => throw ctxt.wrongTokenException(p, JsonToken.VALUE_STRING, s"The value of a module must be a string and currently is $token")
    }
}

Source File: CustomHttpAuthenticationFaultHandler.scala From meteorite-core with Apache License 2.0

5 votes

package bi.meteorite.core.security.authentication

import java.io.IOException
import javax.servlet.http.HttpServletResponse

import org.apache.cxf.interceptor.security.AuthenticationException
import org.apache.cxf.transport.http.{AbstractHTTPDestination, HttpAuthenticationFaultHandler}
import org.apache.cxf.message.Message


class CustomHttpAuthenticationFaultHandler extends HttpAuthenticationFaultHandler{

  val authenticationType1 = "Basic"
  val realm1 = "CXF service"

  override def handleFault(message: Message) {
    val ex: Exception = message.getContent(classOf[Exception])
    if (ex.isInstanceOf[AuthenticationException]) {
      val resp: HttpServletResponse = message.getExchange.getInMessage.get(AbstractHTTPDestination.HTTP_RESPONSE).asInstanceOf[HttpServletResponse]
      resp.setStatus(HttpServletResponse.SC_UNAUTHORIZED)
      resp.setHeader("WWW-Authenticate", authenticationType1 + " realm=\"" + realm1 + "\"")
      resp.setHeader("Access-Control-Allow-Origin", "*")
      resp.setContentType("text/plain")

      try {
        resp.getWriter.write(ex.getMessage)
        resp.getWriter.flush
        message.getInterceptorChain.abort
      }
      catch {
        case e: IOException => {
        }
      }
    }
  }

}

Source File: XmlRelation.scala From spark-xml with Apache License 2.0

5 votes

package com.databricks.spark.xml

import java.io.IOException

import org.apache.hadoop.fs.Path

import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
import org.apache.spark.sql.sources.{PrunedScan, InsertableRelation, BaseRelation, TableScan}
import org.apache.spark.sql.types._
import com.databricks.spark.xml.util.{InferSchema, XmlFile}
import com.databricks.spark.xml.parsers.StaxXmlParser

case class XmlRelation protected[spark] (
    baseRDD: () => RDD[String],
    location: Option[String],
    parameters: Map[String, String],
    userSchema: StructType = null)(@transient val sqlContext: SQLContext)
  extends BaseRelation
  with InsertableRelation
  with PrunedScan {

  private val options = XmlOptions(parameters)

  override val schema: StructType = {
    Option(userSchema).getOrElse {
      InferSchema.infer(
        baseRDD(),
        options)
    }
  }

  override def buildScan(requiredColumns: Array[String]): RDD[Row] = {
    val requiredFields = requiredColumns.map(schema(_))
    val requestedSchema = StructType(requiredFields)
    StaxXmlParser.parse(
      baseRDD(),
      requestedSchema,
      options)
  }

  // The function below was borrowed from JSONRelation
  override def insert(data: DataFrame, overwrite: Boolean): Unit = {
    val filesystemPath = location match {
      case Some(p) => new Path(p)
      case None =>
        throw new IOException(s"Cannot INSERT into table with no path defined")
    }

    val fs = filesystemPath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)

    if (overwrite) {
      try {
        fs.delete(filesystemPath, true)
      } catch {
        case e: IOException =>
          throw new IOException(
            s"Unable to clear output directory ${filesystemPath.toString} prior"
              + s" to INSERT OVERWRITE a XML table:\n${e.toString}")
      }
      // Write the data. We assume that schema isn't changed, and we won't update it.
      XmlFile.saveAsXmlFile(data, filesystemPath.toString, parameters)
    } else {
      throw new IllegalArgumentException("XML tables only support INSERT OVERWRITE for now.")
    }
  }
}

Source File: TestZooKeeper.scala From mango with Apache License 2.0

5 votes

package com.kakao.mango.zk

import java.io.{File, IOException}
import java.net.{ServerSocket, Socket}
import java.util.concurrent.TimeUnit

import com.kakao.mango.concurrent.NamedExecutors
import com.kakao.mango.logging.{LogLevelOverrider, Logging}
import com.kakao.shaded.guava.io.Files
import org.apache.zookeeper.server.persistence.FileTxnSnapLog
import org.apache.zookeeper.server.{ServerCnxnFactory, ServerConfig, ZooKeeperServer}
import org.scalatest.{BeforeAndAfterAll, Suite}

trait TestZooKeeper extends BeforeAndAfterAll with Logging { this: Suite =>

  
  val zkServerPort = 2181
  val zkServerExecutor = NamedExecutors.single("zookeeper-server")
  var zk: ZooKeeperConnection = _

  override protected def beforeAll(): Unit = {
    logger.info("Launching a standalone ZooKeeper server for testing...")

    try {
      val socket = new ServerSocket(zkServerPort)
      socket.close()
    } catch {
      case e: IOException =>
        throw new RuntimeException(s"TCP port $zkServerPort is required for tests but not available")
    }

    zkServerExecutor.submit {
      LogLevelOverrider.error("org.apache.zookeeper")

      val datadir = Files.createTempDir().getAbsolutePath
      val config = new ServerConfig
      config.parse(Array(zkServerPort.toString, datadir))

      val zkServer = new ZooKeeperServer
      zkServer.setTxnLogFactory(new FileTxnSnapLog(new File(datadir), new File(datadir)))
      zkServer.setTickTime(6000)
      zkServer.setMinSessionTimeout(6000)
      zkServer.setMaxSessionTimeout(6000)

      val cnxnFactory = ServerCnxnFactory.createFactory

      try {
        cnxnFactory.configure(config.getClientPortAddress, 60)
        cnxnFactory.startup(zkServer)
        cnxnFactory.join()
      } catch {
        case _: InterruptedException =>
          logger.info("ZooKeeper server interrupted; shutting down...")
          cnxnFactory.shutdown()
          cnxnFactory.join()
          if (zkServer.isRunning) {
            zkServer.shutdown()
          }
          logger.info("ZooKeeper server stopped")
      }
    }

    var connected = false
    while (!connected) {
      logger.info("Waiting for ZooKeeper server to launch...")
      try {
        val socket = new Socket("localhost", zkServerPort)
        logger.info("ZooKeeper server is available")
        socket.close()

        zk = ZooKeeperConnection(s"localhost:$zkServerPort")
        connected = true
      } catch {
        case _: IOException => Thread.sleep(1000) // retry
      }
    }

    super.beforeAll()
  }

  override protected def afterAll(): Unit = {
    try super.afterAll()
    finally {
      zk.close()
      logger.info("Interrupting ZooKeeper server...")
      zkServerExecutor.shutdownNow()
      while (!zkServerExecutor.awaitTermination(1, TimeUnit.SECONDS)) {
        logger.info("awaiting ZooKeeper server termination...")
      }
      logger.info("ZooKeeper server terminated")
    }
  }
}

Source File: MapJoinPartitionsRDD.scala From spark-vlbfgs with Apache License 2.0

5 votes

package org.apache.spark.rdd

import java.io.{IOException, ObjectOutputStream}
import scala.reflect.ClassTag

import org.apache.spark._
import org.apache.spark.util.Utils

class MapJoinPartitionsPartition(
    idx: Int,
    @transient private val rdd1: RDD[_],
    @transient private val rdd2: RDD[_],
    s2IdxArr: Array[Int]) extends Partition {

  var s1 = rdd1.partitions(idx)
  var s2Arr = s2IdxArr.map(s2Idx => rdd2.partitions(s2Idx))
  override val index: Int = idx

  @throws(classOf[IOException])
  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
    s1 = rdd1.partitions(idx)
    s2Arr = s2IdxArr.map(s2Idx => rdd2.partitions(s2Idx))
    oos.defaultWriteObject()
  }
}

class MapJoinPartitionsRDD[A: ClassTag, B: ClassTag, V: ClassTag](
    sc: SparkContext,
    var idxF: (Int) => Array[Int],
    var f: (Int, Iterator[A], Array[(Int, Iterator[B])]) => Iterator[V],
    var rdd1: RDD[A],
    var rdd2: RDD[B])
  extends RDD[V](sc, Nil) {

  override def getPartitions: Array[Partition] = {
    val array = new Array[Partition](rdd1.partitions.length)
    for (s1 <- rdd1.partitions) {
      val idx = s1.index
      array(idx) = new MapJoinPartitionsPartition(idx, rdd1, rdd2, idxF(idx))
    }
    array
  }

  override def getDependencies: Seq[Dependency[_]] = List(
    new OneToOneDependency(rdd1),
    new NarrowDependency(rdd2) {
      override def getParents(partitionId: Int): Seq[Int] = {
        idxF(partitionId)
      }
    }
  )

  override def getPreferredLocations(s: Partition): Seq[String] = {
    val fp = firstParent[A]
    // println(s"pref loc: ${fp.preferredLocations(fp.partitions(s.index))}")
    fp.preferredLocations(fp.partitions(s.index))
  }

  override def compute(split: Partition, context: TaskContext): Iterator[V] = {
    val currSplit = split.asInstanceOf[MapJoinPartitionsPartition]
    f(currSplit.s1.index, rdd1.iterator(currSplit.s1, context),
      currSplit.s2Arr.map(s2 => (s2.index, rdd2.iterator(s2, context)))
    )
  }

  override def clearDependencies() {
    super.clearDependencies()
    rdd1 = null
    rdd2 = null
    idxF = null
    f = null
  }
}

Source File: MapJoinPartitionsRDDV2.scala From spark-vlbfgs with Apache License 2.0

5 votes

package org.apache.spark.rdd

import java.io.{IOException, ObjectOutputStream}

import org.apache.spark.serializer.Serializer
import org.apache.spark.{TaskContext, _}
import org.apache.spark.util.Utils

import scala.reflect.ClassTag

class MapJoinPartitionsPartitionV2(
    idx: Int,
    @transient private val rdd1: RDD[_],
    @transient private val rdd2: RDD[_],
    s2IdxArr: Array[Int]) extends Partition {

  var s1 = rdd1.partitions(idx)
  var s2Arr = s2IdxArr.map(s2Idx => rdd2.partitions(s2Idx))
  override val index: Int = idx

  @throws(classOf[IOException])
  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
    s1 = rdd1.partitions(idx)
    s2Arr = s2IdxArr.map(s2Idx => rdd2.partitions(s2Idx))
    oos.defaultWriteObject()
  }
}

class MapJoinPartitionsRDDV2[A: ClassTag, B: ClassTag, V: ClassTag](
    sc: SparkContext,
    var idxF: (Int) => Array[Int],
    var f: (Int, Iterator[A], Array[(Int, Iterator[B])]) => Iterator[V],
    var rdd1: RDD[A],
    var rdd2: RDD[B],
    preservesPartitioning: Boolean = false)
  extends RDD[V](sc, Nil) {

  var rdd2WithPid = rdd2.mapPartitionsWithIndex((pid, iter) => iter.map(x => (pid, x)))

  private val serializer: Serializer = SparkEnv.get.serializer

  override def getPartitions: Array[Partition] = {
    val array = new Array[Partition](rdd1.partitions.length)
    for (s1 <- rdd1.partitions) {
      val idx = s1.index
      array(idx) = new MapJoinPartitionsPartitionV2(idx, rdd1, rdd2, idxF(idx))
    }
    array
  }

  override def getDependencies: Seq[Dependency[_]] = List(
    new OneToOneDependency(rdd1),
    new ShuffleDependency[Int, B, B](
      rdd2WithPid.asInstanceOf[RDD[_ <: Product2[Int, B]]],
      new IdentityPartitioner(rdd2WithPid.getNumPartitions), serializer)
  )

  override def getPreferredLocations(s: Partition): Seq[String] = {
    val fp = firstParent[A]
    // println(s"pref loc: ${fp.preferredLocations(fp.partitions(s.index))}")
    fp.preferredLocations(fp.partitions(s.index))
  }

  override def compute(split: Partition, context: TaskContext): Iterator[V] = {
    val currSplit = split.asInstanceOf[MapJoinPartitionsPartitionV2]
    val rdd2Dep = dependencies(1).asInstanceOf[ShuffleDependency[Int, Any, Any]]
    val rdd2PartIter = currSplit.s2Arr.map(s2 => (s2.index,
      SparkEnv.get.shuffleManager
        .getReader[Int, B](rdd2Dep.shuffleHandle, s2.index, s2.index + 1, context)
        .read().map(x => x._2)
      ))
    val rdd1Iter = rdd1.iterator(currSplit.s1, context)
    f(currSplit.s1.index, rdd1Iter, rdd2PartIter)
  }

  override def clearDependencies() {
    super.clearDependencies()
    rdd1 = null
    rdd2 = null
    rdd2WithPid = null
    idxF = null
    f = null
  }
}

private[spark] class IdentityPartitioner(val numParts: Int) extends Partitioner {
  require(numPartitions > 0)
  override def getPartition(key: Any): Int = key.asInstanceOf[Int]
  override def numPartitions: Int = numParts
}

Source File: OrcFileOperator.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.hive.orc

import java.io.IOException

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.hive.ql.io.orc.{OrcFile, Reader}
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector

import org.apache.spark.SparkException
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.types.StructType

private[hive] object OrcFileOperator extends Logging {
  
  def getFileReader(basePath: String,
      config: Option[Configuration] = None,
      ignoreCorruptFiles: Boolean = false)
      : Option[Reader] = {
    def isWithNonEmptySchema(path: Path, reader: Reader): Boolean = {
      reader.getObjectInspector match {
        case oi: StructObjectInspector if oi.getAllStructFieldRefs.size() == 0 =>
          logInfo(
            s"ORC file $path has empty schema, it probably contains no rows. " +
              "Trying to read another ORC file to figure out the schema.")
          false
        case _ => true
      }
    }

    val conf = config.getOrElse(new Configuration)
    val fs = {
      val hdfsPath = new Path(basePath)
      hdfsPath.getFileSystem(conf)
    }

    listOrcFiles(basePath, conf).iterator.map { path =>
      val reader = try {
        Some(OrcFile.createReader(fs, path))
      } catch {
        case e: IOException =>
          if (ignoreCorruptFiles) {
            logWarning(s"Skipped the footer in the corrupted file: $path", e)
            None
          } else {
            throw new SparkException(s"Could not read footer for file: $path", e)
          }
      }
      path -> reader
    }.collectFirst {
      case (path, Some(reader)) if isWithNonEmptySchema(path, reader) => reader
    }
  }

  def readSchema(paths: Seq[String], conf: Option[Configuration], ignoreCorruptFiles: Boolean)
      : Option[StructType] = {
    // Take the first file where we can open a valid reader if we can find one.  Otherwise just
    // return None to indicate we can't infer the schema.
    paths.toIterator.map(getFileReader(_, conf, ignoreCorruptFiles)).collectFirst {
      case Some(reader) =>
        val readerInspector = reader.getObjectInspector.asInstanceOf[StructObjectInspector]
        val schema = readerInspector.getTypeName
        logDebug(s"Reading schema from file $paths, got Hive schema string: $schema")
        CatalystSqlParser.parseDataType(schema).asInstanceOf[StructType]
    }
  }

  def getObjectInspector(
      path: String, conf: Option[Configuration]): Option[StructObjectInspector] = {
    getFileReader(path, conf).map(_.getObjectInspector.asInstanceOf[StructObjectInspector])
  }

  def listOrcFiles(pathStr: String, conf: Configuration): Seq[Path] = {
    // TODO: Check if the paths coming in are already qualified and simplify.
    val origPath = new Path(pathStr)
    val fs = origPath.getFileSystem(conf)
    val paths = SparkHadoopUtil.get.listLeafStatuses(fs, origPath)
      .filterNot(_.isDirectory)
      .map(_.getPath)
      .filterNot(_.getName.startsWith("_"))
      .filterNot(_.getName.startsWith("."))
    paths
  }
}

Source File: SparkSQLCLIService.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.hive.thriftserver

import java.io.IOException
import java.util.{List => JList}
import javax.security.auth.login.LoginException

import scala.collection.JavaConverters._

import org.apache.commons.logging.Log
import org.apache.hadoop.hive.conf.HiveConf
import org.apache.hadoop.hive.conf.HiveConf.ConfVars
import org.apache.hadoop.hive.shims.Utils
import org.apache.hadoop.security.{SecurityUtil, UserGroupInformation}
import org.apache.hive.service.{AbstractService, Service, ServiceException}
import org.apache.hive.service.Service.STATE
import org.apache.hive.service.auth.HiveAuthFactory
import org.apache.hive.service.cli._
import org.apache.hive.service.server.HiveServer2

import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._

private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLContext)
  extends CLIService(hiveServer)
  with ReflectedCompositeService {

  override def init(hiveConf: HiveConf) {
    setSuperField(this, "hiveConf", hiveConf)

    val sparkSqlSessionManager = new SparkSQLSessionManager(hiveServer, sqlContext)
    setSuperField(this, "sessionManager", sparkSqlSessionManager)
    addService(sparkSqlSessionManager)
    var sparkServiceUGI: UserGroupInformation = null
    var httpUGI: UserGroupInformation = null

    if (UserGroupInformation.isSecurityEnabled) {
      try {
        val principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL)
        val keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB)
        if (principal.isEmpty || keyTabFile.isEmpty) {
          throw new IOException(
            "HiveServer2 Kerberos principal or keytab is not correctly configured")
        }

        val originalUgi = UserGroupInformation.getCurrentUser
        sparkServiceUGI = if (HiveAuthFactory.needUgiLogin(originalUgi,
          SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keyTabFile)) {
          HiveAuthFactory.loginFromKeytab(hiveConf)
          Utils.getUGI()
        } else {
          originalUgi
        }

        setSuperField(this, "serviceUGI", sparkServiceUGI)
      } catch {
        case e @ (_: IOException | _: LoginException) =>
          throw new ServiceException("Unable to login to kerberos with given principal/keytab", e)
      }

      // Try creating spnego UGI if it is configured.
      val principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_PRINCIPAL).trim
      val keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_KEYTAB).trim
      if (principal.nonEmpty && keyTabFile.nonEmpty) {
        try {
          httpUGI = HiveAuthFactory.loginFromSpnegoKeytabAndReturnUGI(hiveConf)
          setSuperField(this, "httpUGI", httpUGI)
        } catch {
          case e: IOException =>
            throw new ServiceException("Unable to login to spnego with given principal " +
              s"$principal and keytab $keyTabFile: $e", e)
        }
      }
    }

    initCompositeService(hiveConf)
  }

  override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = {
    getInfoType match {
      case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL")
      case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL")
      case GetInfoType.CLI_DBMS_VER => new GetInfoValue(sqlContext.sparkContext.version)
      case _ => super.getInfo(sessionHandle, getInfoType)
    }
  }
}

private[thriftserver] trait ReflectedCompositeService { this: AbstractService =>
  def initCompositeService(hiveConf: HiveConf) {
    // Emulating `CompositeService.init(hiveConf)`
    val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList")
    serviceList.asScala.foreach(_.init(hiveConf))

    // Emulating `AbstractService.init(hiveConf)`
    invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.NOTINITED)
    setAncestorField(this, 3, "hiveConf", hiveConf)
    invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.INITED)
    getAncestorField[Log](this, 3, "LOG").info(s"Service: $getName is inited.")
  }
}

Source File: ReusableStringReaderSuite.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions.xml

import java.io.IOException

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.expressions.xml.UDFXPathUtil.ReusableStringReader


class ReusableStringReaderSuite extends SparkFunSuite {

  private val fox = "Quick brown fox jumps over the lazy dog."

  test("empty reader") {
    val reader = new ReusableStringReader

    intercept[IOException] {
      reader.read()
    }

    intercept[IOException] {
      reader.ready()
    }

    reader.close()
  }

  test("mark reset") {
    val reader = new ReusableStringReader

    if (reader.markSupported()) {
      reader.asInstanceOf[ReusableStringReader].set(fox)
      assert(reader.ready())

      val cc = new Array[Char](6)
      var read = reader.read(cc)
      assert(read == 6)
      assert("Quick " == new String(cc))

      reader.mark(100)

      read = reader.read(cc)
      assert(read == 6)
      assert("brown " == new String(cc))

      reader.reset()
      read = reader.read(cc)
      assert(read == 6)
      assert("brown " == new String(cc))
    }
    reader.close()
  }

  test("skip") {
    val reader = new ReusableStringReader
    reader.asInstanceOf[ReusableStringReader].set(fox)

    // skip entire the data:
    var skipped = reader.skip(fox.length() + 1)
    assert(fox.length() == skipped)
    assert(-1 == reader.read())

    reader.asInstanceOf[ReusableStringReader].set(fox) // reset the data
    val cc = new Array[Char](6)
    var read = reader.read(cc)
    assert(read == 6)
    assert("Quick " == new String(cc))

    // skip some piece of data:
    skipped = reader.skip(30)
    assert(skipped == 30)
    read = reader.read(cc)
    assert(read == 4)
    assert("dog." == new String(cc, 0, read))

    // skip when already at EOF:
    skipped = reader.skip(300)
    assert(skipped == 0, skipped)
    assert(reader.read() == -1)

    reader.close()
  }
}

Source File: ProcessTestUtils.scala From XSQL with Apache License 2.0

5 votes

package org.apache.spark.sql.test

import java.io.{InputStream, IOException}

import scala.sys.process.BasicIO

object ProcessTestUtils {
  class ProcessOutputCapturer(stream: InputStream, capture: String => Unit) extends Thread {
    this.setDaemon(true)

    override def run(): Unit = {
      try {
        BasicIO.processFully(capture)(stream)
      } catch { case _: IOException =>
        // Ignores the IOException thrown when the process termination, which closes the input
        // stream abruptly.
      }
    }
  }
}

Source File: SafeCloseableTest.scala From maha with Apache License 2.0

5 votes

// Copyright 2017, Yahoo Holdings Inc.
// Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms.
package com.yahoo.maha.report

import java.io.{Closeable, IOException}

import org.mockito.Mockito._
import org.scalatest.{FunSuite, Matchers}

class SuccessCloseable extends Closeable {
  override def close(): Unit = {
    //success
  }
}
class FailCloseable extends Closeable {
  override def close(): Unit = {
    throw new IOException("fail")
  }
}
class SafeCloseableTest extends FunSuite with Matchers {
  def successWork(closeable: Closeable): Unit = {
    //success
  }
  def failWork(closeable: Closeable): Unit = {
    require(false, "fail")
  }
  test("successfully doWork") {
    safeCloseable(new SuccessCloseable)(successWork)
  }
  test("successfully close on failed doWork") {
    val closeable = spy(new SuccessCloseable)
    safeCloseable(closeable)(failWork)
    verify(closeable).close()
  }
  test("fail to close on failed closeable after failed doWork") {
    val closeable = spy(new FailCloseable)
    safeCloseable(closeable)(failWork)
    verify(closeable).close()
  }
}

Source File: ManagedPath.scala From zio-rocksdb with Apache License 2.0

5 votes

package zio.rocksdb.internal

package internal

import java.io.IOException
import java.nio.file.{ Files, Path }

import zio.{ Task, UIO, ZIO, ZManaged }

import scala.reflect.io.Directory

object ManagedPath {
  private def createTempDirectory: Task[Path] = Task {
    Files.createTempDirectory("zio-rocksdb")
  }

  private def deleteDirectory(path: Path): UIO[Boolean] = UIO {
    new Directory(path.toFile).deleteRecursively()
  }

  private def deleteDirectoryE(path: Path): UIO[Unit] =
    deleteDirectory(path) >>= {
      case true  => ZIO.unit
      case false => ZIO.die(new IOException("Could not delete path recursively"))
    }

  def apply(): ZManaged[Any, Throwable, Path] = createTempDirectory.toManaged(deleteDirectoryE)
}

Source File: FlakyHttpClient.scala From cats-retry with Apache License 2.0

5 votes

package util

import java.io.IOException

case class FlakyHttpClient() {
  private var i = 0

  def getCatGif(): String = {
    if (i > 3) {
      "cute cat gets sleepy and falls asleep"
    } else {
      i = i + 1
      throw new IOException("Failed to download")
    }
  }
}

Source File: SpecificRPCTest.scala From sbt-avrohugger with Apache License 2.0

5 votes

package test

import org.specs2.mutable.Specification

import java.io.IOException
import java.net.InetSocketAddress
import java.lang.reflect.Proxy

import org.apache.avro.specific.SpecificData
import org.apache.avro.ipc.netty.NettyServer
import org.apache.avro.ipc.netty.NettyTransceiver
import org.apache.avro.ipc.Server
import org.apache.avro.ipc.specific.SpecificRequestor
import org.apache.avro.ipc.specific.SpecificResponder

import example.proto.Mail
import example.proto.Message

class SpecificRPCTest extends Specification {
  skipAll // RPC tests fail on Linux (Ubuntu 16.04), solution unknown
  // adapted from https://github.com/phunt/avro-rpc-quickstart
  "A case class " should {
    "serialize and deserialize correctly via rpc" in {
      
      class MailImpl extends Mail {
        // in this simple example just return details of the message
        def send(message: Message): String = {
          System.out.println("Sending message")
          val response: String = message.body
          response.toString
        }
      }
      
      System.out.println("Starting server")
      // usually this would be another app, but for simplicity
      val protocol = Mail.PROTOCOL
      val responder = new SpecificResponder(protocol, new MailImpl())
      val server = new NettyServer(responder, new InetSocketAddress(65111))
      System.out.println("Server started")

      val client = new NettyTransceiver(new InetSocketAddress(65111))

      // client code - attach to the server and send a message
      val requestor = new SpecificRequestor(protocol, client, SpecificData.get)
      val mailProxy: Mail = Proxy.newProxyInstance(
        SpecificData.get.getClassLoader,
        Array(classOf[Mail]),
        requestor).asInstanceOf[Mail]

      val message = new Message("avro_user", "pat", "hello_world")

      System.out.println("Calling proxy.send with message:  " + message.toString)
      System.out.println("Result: " + mailProxy.send(message).toString)
      
      val received: String = mailProxy.send(message).toString
      

      // cleanup
      client.close
      server.close
      System.out.println("Server stopped")
      
      received === message.body
      
    }
  }

}

Source File: SpecificRPCTest.scala From sbt-avrohugger with Apache License 2.0

5 votes

package test

import org.specs2.mutable.Specification

import java.io.IOException
import java.net.InetSocketAddress
import java.lang.reflect.Proxy

import org.apache.avro.specific.SpecificData
import org.apache.avro.ipc.netty.NettyServer
import org.apache.avro.ipc.netty.NettyTransceiver
import org.apache.avro.ipc.Server
import org.apache.avro.ipc.specific.SpecificRequestor
import org.apache.avro.ipc.specific.SpecificResponder

import example.proto.Mail
import example.proto.Message

class SpecificRPCTest extends Specification {
  skipAll // RPC tests fail on Linux (Ubuntu 16.04), solution unknown
  // adapted from https://github.com/phunt/avro-rpc-quickstart
  "A case class " should {
    "serialize and deserialize correctly via rpc" in {
      
      class MailImpl extends Mail {
        // in this simple example just return details of the message
        def send(message: Message): String = {
          System.out.println("Sending message")
          val response: String = message.body
          response.toString
        }
      }
      
      System.out.println("Starting server")
      // usually this would be another app, but for simplicity
      val protocol = Mail.PROTOCOL
      val responder = new SpecificResponder(protocol, new MailImpl())
      val server = new NettyServer(responder, new InetSocketAddress(65111))
      System.out.println("Server started")

      val client = new NettyTransceiver(new InetSocketAddress(65111))

      // client code - attach to the server and send a message
      val requestor = new SpecificRequestor(protocol, client, SpecificData.get)
      val mailProxy: Mail = Proxy.newProxyInstance(
        SpecificData.get.getClassLoader,
        Array(classOf[Mail]),
        requestor).asInstanceOf[Mail]

      val message = new Message("avro_user", "pat", "hello_world")

      System.out.println("Calling proxy.send with message:  " + message.toString)
      System.out.println("Result: " + mailProxy.send(message).toString)
      
      val received: String = mailProxy.send(message).toString
      

      // cleanup
      client.close
      server.close
      System.out.println("Server stopped")
      
      received === message.body
      
    }
  }

}

Source File: SpecificRPCTest.scala From sbt-avrohugger with Apache License 2.0

5 votes

package test

import org.specs2.mutable.Specification

import java.io.IOException
import java.net.InetSocketAddress
import java.lang.reflect.Proxy

import org.apache.avro.specific.SpecificData
import org.apache.avro.ipc.netty.NettyServer
import org.apache.avro.ipc.netty.NettyTransceiver
import org.apache.avro.ipc.Server
import org.apache.avro.ipc.specific.SpecificRequestor
import org.apache.avro.ipc.specific.SpecificResponder

import example.proto.Mail
import example.proto.Message

class SpecificRPCTest extends Specification {
  skipAll // RPC tests fail on Linux (Ubuntu 16.04), solution unknown
  // adapted from https://github.com/phunt/avro-rpc-quickstart
  "A case class " should {
    "serialize and deserialize correctly via rpc" in {
      
      class MailImpl extends Mail {
        // in this simple example just return details of the message
        def send(message: Message): String = {
          System.out.println("Sending message")
          val response: String = message.body
          response.toString
        }
      }
      
      System.out.println("Starting server")
      // usually this would be another app, but for simplicity
      val protocol = Mail.PROTOCOL
      val responder = new SpecificResponder(protocol, new MailImpl())
      val server = new NettyServer(responder, new InetSocketAddress(65111))
      System.out.println("Server started")

      val client = new NettyTransceiver(new InetSocketAddress(65111))

      // client code - attach to the server and send a message
      val requestor = new SpecificRequestor(protocol, client, SpecificData.get)
      val mailProxy: Mail = Proxy.newProxyInstance(
        SpecificData.get.getClassLoader,
        Array(classOf[Mail]),
        requestor).asInstanceOf[Mail]

      val message = new Message("avro_user", "pat", "hello_world")

      System.out.println("Calling proxy.send with message:  " + message.toString)
      System.out.println("Result: " + mailProxy.send(message).toString)
      
      val received: String = mailProxy.send(message).toString
      

      // cleanup
      client.close
      server.close
      System.out.println("Server stopped")
      
      received === message.body
      
    }
  }

}

Source File: SpecificRPCTest.scala From sbt-avrohugger with Apache License 2.0

5 votes

package test

import org.specs2.mutable.Specification

import java.io.IOException
import java.net.InetSocketAddress
import java.lang.reflect.Proxy

import org.apache.avro.specific.SpecificData
import org.apache.avro.ipc.netty.NettyServer
import org.apache.avro.ipc.netty.NettyTransceiver
import org.apache.avro.ipc.Server
import org.apache.avro.ipc.specific.SpecificRequestor
import org.apache.avro.ipc.specific.SpecificResponder

import example.proto.Mail
import example.proto.Message

class SpecificRPCTest extends Specification {
  skipAll // RPC tests fail on Linux (Ubuntu 16.04), solution unknown
  // adapted from https://github.com/phunt/avro-rpc-quickstart
  "A case class " should {
    "serialize and deserialize correctly via rpc" in {
      
      class MailImpl extends Mail {
        // in this simple example just return details of the message
        def send(message: Message): String = {
          System.out.println("Sending message")
          val response: String = message.body
          response.toString
        }
      }
      
      System.out.println("Starting server")
      // usually this would be another app, but for simplicity
      val protocol = Mail.PROTOCOL
      val responder = new SpecificResponder(protocol, new MailImpl())
      val server = new NettyServer(responder, new InetSocketAddress(65111))
      System.out.println("Server started")

      val client = new NettyTransceiver(new InetSocketAddress(65111))

      // client code - attach to the server and send a message
      val requestor = new SpecificRequestor(protocol, client, SpecificData.get)
      val mailProxy: Mail = Proxy.newProxyInstance(
        SpecificData.get.getClassLoader,
        Array(classOf[Mail]),
        requestor).asInstanceOf[Mail]

      val message = new Message("avro_user", "pat", "hello_world")

      System.out.println("Calling proxy.send with message:  " + message.toString)
      System.out.println("Result: " + mailProxy.send(message).toString)
      
      val received: String = mailProxy.send(message).toString
      

      // cleanup
      client.close
      server.close
      System.out.println("Server stopped")
      
      received === message.body
      
    }
  }

}

Source File: UnsplittableSequenceFileInputFormat.scala From spark-util with Apache License 2.0

5 votes

package org.hammerlab.hadoop.splits

import java.io.IOException
import java.util

import org.apache.hadoop.fs.{ FileStatus, FileSystem, Path ⇒ HPath }
import org.apache.hadoop.mapred.{ JobConf, SequenceFileInputFormat }
import org.apache.hadoop.mapreduce.JobContext
import org.apache.hadoop.mapreduce.lib.input

import scala.collection.JavaConverters._



  override def listStatus(job: JobContext): util.List[FileStatus] =
    super
      .listStatus(job)
      .asScala
      .sortBy {
        _.getPath.getName match {
          case PartFileBasename(idx) ⇒
            idx
          case basename ⇒
            throw new IllegalArgumentException(s"Bad partition file: $basename")
        }
      }
      .asJava
}

Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0

5 votes

package scan

import java.io.FileNotFoundException
import java.io.IOException
import java.nio.file._

import cats._
import cats.data._
import cats.implicits._
import org.atnos.eff._
import org.atnos.eff.all._
import org.atnos.eff.syntax.all._
import org.atnos.eff.addon.monix._
import org.atnos.eff.addon.monix.task._
import org.atnos.eff.syntax.addon.monix.task._
import org.specs2._

import scala.collection.immutable.SortedSet
import scala.concurrent.duration._
import monix.eval._
import monix.execution.Scheduler.Implicits.global

class ScannerSpec extends mutable.Specification {

  case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem {

    def length(file: File) = fileSizes.getOrElse(file, throw new IOException())

    def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException())

    def filePath(path: String): FilePath =
      if (directories.keySet.contains(Directory(path)))
        Directory(path)
      else if (fileSizes.keySet.contains(File(path)))
        File(path)
      else
        throw new FileNotFoundException(path)
  }

  val base = Directory("base")
  val base1 = File(s"${base.path}/1.txt")
  val base2 = File(s"${base.path}/2.txt")
  val subdir = Directory(s"${base.path}/subdir")
  val sub1 = File(s"${subdir.path}/1.txt")
  val sub3 = File(s"${subdir.path}/3.txt")
  val directories = Map(
    base -> List(subdir, base1, base2),
    subdir -> List(sub1, sub3)
  )
  val fileSizes = Map(base1 -> 1L, base2 -> 2L, sub1 -> 1L, sub3 -> 3L)
  val fs = MockFilesystem(directories, fileSizes)

  type R = Fx.fx4[Task, FilesystemCmd, Reader[ScanConfig, ?], Writer[Log, ?]]

  def run[T](program: Eff[R, T]) =
    program.runReader(ScanConfig(2)).runFilesystemCmds(fs).taskAttempt.runWriter.runAsync.runSyncUnsafe(3.seconds)

  val expected = Right(new PathScan(SortedSet(FileSize(sub3, 3), FileSize(base2, 2)), 7, 4))
  val expectedLogs = Set(
    Log.info("Scan started on Directory(base)"),
    Log.debug("Scanning directory 'Directory(base)': 1 subdirectories and 2 files"),
    Log.debug("File base/1.txt Size 1 B"),
    Log.debug("File base/2.txt Size 2 B"),
    Log.debug("Scanning directory 'Directory(base/subdir)': 0 subdirectories and 2 files"),
    Log.debug("File base/subdir/1.txt Size 1 B"),
    Log.debug("File base/subdir/3.txt Size 3 B")
  )

  val (actual, logs) = run(Scanner.pathScan(base))

  "Report Format" ! {actual.mustEqual(expected)}

  "Logs messages are emitted (ignores order due to non-determinstic concurrent execution)" ! {
    logs.forall(expectedLogs.contains)
  }
}

Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0

5 votes

package scan

import java.io.FileNotFoundException
import java.io.IOException
import java.nio.file._

import cats._
import cats.data._
import cats.implicits._
import org.atnos.eff._
import org.atnos.eff.all._
import org.atnos.eff.syntax.all._
import org.atnos.eff.addon.monix._
import org.atnos.eff.addon.monix.task._
import org.atnos.eff.syntax.addon.monix.task._
import org.specs2._

import scala.collection.immutable.SortedSet
import scala.concurrent.duration._
import monix.eval._
import monix.execution.Scheduler.Implicits.global

class ScannerSpec extends mutable.Specification {

  case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem {

    def length(file: File) = fileSizes.getOrElse(file, throw new IOException())

    def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException())

    def filePath(path: String): FilePath =
      if (directories.keySet.contains(Directory(path)))
        Directory(path)
      else if (fileSizes.keySet.contains(File(path)))
        File(path)
      else
        throw new FileNotFoundException(path)
  }

  val base = Directory("base")
  val base1 = File(s"${base.path}/1.txt")
  val base2 = File(s"${base.path}/2.txt")
  val subdir = Directory(s"${base.path}/subdir")
  val sub1 = File(s"${subdir.path}/1.txt")
  val sub3 = File(s"${subdir.path}/3.txt")
  val directories = Map(
    base -> List(subdir, base1, base2),
    subdir -> List(sub1, sub3)
  )
  val fileSizes = Map(base1 -> 1L, base2 -> 2L, sub1 -> 1L, sub3 -> 3L)
  val fs = MockFilesystem(directories, fileSizes)

  type R = Fx.fx4[Task, FilesystemCmd, Reader[ScanConfig, ?], Writer[Log, ?]]

  def run[T](program: Eff[R, T]) =
    program.runReader(ScanConfig(2)).runFilesystemCmds(fs).taskAttempt.runWriter.runAsync.runSyncUnsafe(3.seconds)

  val expected = Right(new PathScan(SortedSet(FileSize(sub3, 3), FileSize(base2, 2)), 7, 4))
  val expectedLogs = Set(
    Log.info("Scan started on Directory(base)"),
    Log.debug("Scanning directory 'Directory(base)': 1 subdirectories and 2 files"),
    Log.debug("File base/1.txt Size 1 B"),
    Log.debug("File base/2.txt Size 2 B"),
    Log.debug("Scanning directory 'Directory(base/subdir)': 0 subdirectories and 2 files"),
    Log.debug("File base/subdir/1.txt Size 1 B"),
    Log.debug("File base/subdir/3.txt Size 3 B")
  )

  val (actual, logs) = run(Scanner.pathScan(base))

  "Report Format" ! {actual.mustEqual(expected)}

  "Logs messages are emitted (ignores order due to non-determinstic concurrent execution)" ! {
    logs.forall(expectedLogs.contains)
  }
}

Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0

5 votes

package scan

import java.io.FileNotFoundException
import java.io.IOException
import java.nio.file._

import cats._
import cats.data._
import cats.implicits._
import org.atnos.eff._
import org.atnos.eff.all._
import org.atnos.eff.syntax.all._
import org.atnos.eff.addon.monix._
import org.atnos.eff.addon.monix.task._
import org.atnos.eff.syntax.addon.monix.task._
import org.specs2._

import scala.collection.immutable.SortedSet
import scala.concurrent.duration._
import monix.eval._
import monix.execution.Scheduler.Implicits.global

class ScannerSpec extends mutable.Specification {

  case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem {

    def length(file: File) = fileSizes.getOrElse(file, throw new IOException())

    def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException())

    def filePath(path: String): FilePath =
      if (directories.keySet.contains(Directory(path)))
        Directory(path)
      else if (fileSizes.keySet.contains(File(path)))
        File(path)
      else
        throw new FileNotFoundException(path)
  }

  val base = Directory("base")
  val base1 = File(s"${base.path}/1.txt")
  val base2 = File(s"${base.path}/2.txt")
  val subdir = Directory(s"${base.path}/subdir")
  val sub1 = File(s"${subdir.path}/1.txt")
  val sub3 = File(s"${subdir.path}/3.txt")
  val directories = Map(
    base -> List(subdir, base1, base2),
    subdir -> List(sub1, sub3)
  )
  val fileSizes = Map(base1 -> 1L, base2 -> 2L, sub1 -> 1L, sub3 -> 3L)
  val fs = MockFilesystem(directories, fileSizes)

  type R = Fx.fx4[Task, Reader[Filesystem, ?], Reader[ScanConfig, ?], Writer[Log, ?]]

  def run[T](program: Eff[R, T], fs: Filesystem) =
    program.runReader(ScanConfig(2)).runReader(fs).taskAttempt.runWriter.runAsync.runSyncUnsafe(3.seconds)

  val expected = Right(new PathScan(SortedSet(FileSize(sub3, 3), FileSize(base2, 2)), 7, 4))
  val expectedLogs = Set(
    Log.info("Scan started on Directory(base)"),
    Log.debug("Scanning directory 'Directory(base)': 1 subdirectories and 2 files"),
    Log.debug("File base/1.txt Size 1 B"),
    Log.debug("File base/2.txt Size 2 B"),
    Log.debug("Scanning directory 'Directory(base/subdir)': 0 subdirectories and 2 files"),
    Log.debug("File base/subdir/1.txt Size 1 B"),
    Log.debug("File base/subdir/3.txt Size 3 B")
  )

  val (actual, logs) = run(Scanner.pathScan(base), fs)

  "Report Format" ! {actual.mustEqual(expected)}

  "Logs messages are emitted (ignores order due to non-determinstic concurrent execution)" ! {
    expectedLogs.forall(logs.contains)
  }
}

Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0

5 votes

package scan

import java.io.FileNotFoundException
import java.io.IOException
import java.nio.file._

import cats._
import cats.data._
import cats.implicits._
import org.atnos.eff._
import org.atnos.eff.all._
import org.atnos.eff.syntax.all._
import org.atnos.eff.addon.monix._
import org.atnos.eff.addon.monix.task._
import org.atnos.eff.syntax.addon.monix.task._
import org.specs2._

import scala.collection.immutable.SortedSet
import scala.concurrent.duration._
import monix.eval._
import monix.execution.Scheduler.Implicits.global

class ScannerSpec extends mutable.Specification {

  case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem {

    def length(file: File) = fileSizes.getOrElse(file, throw new IOException())

    def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException())

    def filePath(path: String): FilePath =
      if (directories.keySet.contains(Directory(path)))
        Directory(path)
      else if (fileSizes.keySet.contains(File(path)))
        File(path)
      else
        throw new FileNotFoundException(path)
  }

  val base = Directory("base")
  val linkTarget = File(s"/somewhere/else/7.txt")
  val base1 = File(s"${base.path}/1.txt")
  val baseLink = Symlink(s"${base.path}/7.txt", linkTarget)
  val subdir = Directory(s"${base.path}/subdir")
  val sub2 = File(s"${subdir.path}/2.txt")
  val subLink = Symlink(s"${subdir.path}/7.txt", linkTarget)
  val directories = Map(
    base -> List(subdir, base1, baseLink),
    subdir -> List(sub2, subLink)
  )
  val fileSizes = Map(base1 -> 1L, sub2 -> 2L, linkTarget -> 7L)
  val fs = MockFilesystem(directories, fileSizes)

  type R = Fx.fx5[Task, Reader[Filesystem, ?], Reader[ScanConfig, ?], Writer[Log, ?], State[Set[FilePath], ?]]

  def run[T](program: Eff[R, T], fs: Filesystem) =
    program.runReader(ScanConfig(2)).runReader(fs).evalStateZero[Set[FilePath]].taskAttempt.runWriter[Log].runAsync.runSyncUnsafe(3.seconds)

  val expected = Right(new PathScan(SortedSet(FileSize(linkTarget, 7), FileSize(sub2, 2)), 10, 3))

  val (actual, logs) = run(Scanner.pathScan[R](base), fs)

  "Report Format" ! {actual.mustEqual(expected)}

}

Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0

5 votes

package scan

import java.io.FileNotFoundException
import java.io.IOException
import java.nio.file._

import cats._
import cats.data._
import cats.implicits._
import org.atnos.eff._
import org.atnos.eff.all._
import org.atnos.eff.syntax.all._
import org.atnos.eff.addon.monix._
import org.atnos.eff.addon.monix.task._
import org.atnos.eff.syntax.addon.monix.task._
import org.specs2._

import scala.collection.immutable.SortedSet
import scala.concurrent.duration._
import monix.eval._
import monix.execution.Scheduler.Implicits.global

class ScannerSpec extends mutable.Specification {

  case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem {

    def length(file: File) = fileSizes.getOrElse(file, throw new IOException())

    def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException())

    def filePath(path: String): FilePath =
      if (directories.keySet.contains(Directory(path)))
        Directory(path)
      else if (fileSizes.keySet.contains(File(path)))
        File(path)
      else
        throw new FileNotFoundException(path)
  }

  val base = Directory("base")
  val base1 = File(s"${base.path}/1.txt")
  val base2 = File(s"${base.path}/2.txt")
  val subdir = Directory(s"${base.path}/subdir")
  val sub1 = File(s"${subdir.path}/1.txt")
  val sub3 = File(s"${subdir.path}/3.txt")
  val directories = Map(
    base -> List(subdir, base1, base2),
    subdir -> List(sub1, sub3)
  )
  val fileSizes = Map(base1 -> 1L, base2 -> 2L, sub1 -> 1L, sub3 -> 3L)
  val fs = MockFilesystem(directories, fileSizes)

  type R = Fx.fx4[Task, Reader[Filesystem, ?], Reader[ScanConfig, ?], Writer[Log, ?]]

  def run[T](program: Eff[R, T], fs: Filesystem) =
    program.runReader(ScanConfig(2)).runReader(fs).taskAttempt.runWriter.runAsync.runSyncUnsafe(3.seconds)

  val expected = Right(new PathScan(SortedSet(FileSize(sub3, 3), FileSize(base2, 2)), 7, 4))
  val expectedLogs = Set(
    Log.info("Scan started on Directory(base)"),
    Log.debug("Scanning directory 'Directory(base)': 1 subdirectories and 2 files"),
    Log.debug("File base/1.txt Size 1 B"),
    Log.debug("File base/2.txt Size 2 B"),
    Log.debug("Scanning directory 'Directory(base/subdir)': 0 subdirectories and 2 files"),
    Log.debug("File base/subdir/1.txt Size 1 B"),
    Log.debug("File base/subdir/3.txt Size 3 B")
  )

  val (actual, logs) = run(Scanner.pathScan(base), fs)

  "Report Format" ! {actual.mustEqual(expected)}

  "Logs messages are emitted (ignores order due to non-determinstic concurrent execution)" ! {
    logs.forall(expectedLogs.contains)
  }
}

Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0

5 votes

package scan

import java.io.FileNotFoundException
import java.io.IOException
import java.nio.file._

import cats._
import cats.data._
import cats.implicits._
import org.atnos.eff._
import org.atnos.eff.all._
import org.atnos.eff.syntax.all._
import org.atnos.eff.addon.monix._
import org.atnos.eff.addon.monix.task._
import org.atnos.eff.syntax.addon.monix.task._
import org.specs2._

import scala.collection.immutable.SortedSet
import scala.concurrent.duration._
import monix.eval._
import monix.execution.Scheduler.Implicits.global

class ScannerSpec extends mutable.Specification {

  case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem {

    def length(file: File) = fileSizes.getOrElse(file, throw new IOException())

    def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException())

    def filePath(path: String): FilePath =
      if (directories.keySet.contains(Directory(path)))
        Directory(path)
      else if (fileSizes.keySet.contains(File(path)))
        File(path)
      else
        throw new FileNotFoundException(path)
  }

  val base = Directory("base")
  val base1 = File(s"${base.path}/1.txt")
  val base2 = File(s"${base.path}/2.txt")
  val subdir = Directory(s"${base.path}/subdir")
  val sub1 = File(s"${subdir.path}/1.txt")
  val sub3 = File(s"${subdir.path}/3.txt")
  val directories = Map(
    base -> List(subdir, base1, base2),
    subdir -> List(sub1, sub3)
  )
  val fileSizes = Map(base1 -> 1L, base2 -> 2L, sub1 -> 1L, sub3 -> 3L)
  val fs = MockFilesystem(directories, fileSizes)

  type R = Fx.fx3[Task, Reader[Filesystem, ?], Reader[ScanConfig, ?]]

  def run[T](program: Eff[R, T], fs: Filesystem) =
    program.runReader(ScanConfig(2)).runReader(fs).runAsync.attempt.runSyncUnsafe(3.seconds)

  "file scan" ! {
    val actual = run(Scanner.pathScan(base), fs)
    val expected = Right(new PathScan(SortedSet(FileSize(sub3, 3), FileSize(base2, 2)), 7, 4))

    actual.mustEqual(expected)
  }

  "Error from Filesystem" ! {
    val emptyFs: Filesystem = MockFilesystem(directories, Map.empty)

    val actual = runE(Scanner.scanReport(Array("base", "10")), emptyFs)
    val expected =  Left(new IOException().toString)

    actual.mustEqual(expected)
  }

  type E = Fx.fx3[Task, Reader[Filesystem, ?], Either[String, ?]]
  def runE[T](program: Eff[E, T], fs: Filesystem) =
    //there are two nested Either in the stack, one from Exceptions and one from errors raised by the program
    //we convert to a common error type String then flatten
    program.runReader(fs).runEither.runAsync.attempt.runSyncUnsafe(3.seconds).leftMap(_.toString).flatten

  "Error - Report with non-numeric input" ! {
    val actual = runE(Scanner.scanReport(Array("base", "not a number")), fs)
    val expected = Left("Number of files must be numeric: not a number")

    actual.mustEqual(expected)
  }

  "Error - Report with non-positive input" ! {
    val actual = runE(Scanner.scanReport(Array("base", "-1")), fs)
    val expected = Left("Invalid number of files -1")

    actual.mustEqual(expected)
  }
}

Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0

5 votes

package scan

import java.io.FileNotFoundException
import java.io.IOException
import java.nio.file._

import cats._
import cats.data._
import cats.implicits._
import org.atnos.eff._
import org.atnos.eff.all._
import org.atnos.eff.syntax.all._
import org.atnos.eff.addon.monix._
import org.atnos.eff.addon.monix.task._
import org.atnos.eff.syntax.addon.monix.task._
import org.specs2._

import scala.collection.immutable.SortedSet
import scala.concurrent.duration._
import monix.eval._
import monix.execution.Scheduler.Implicits.global

class ScannerSpec extends mutable.Specification {

  case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem {

    def length(file: File) = fileSizes.getOrElse(file, throw new IOException())

    def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException())

    def filePath(path: String): FilePath =
      if (directories.keySet.contains(Directory(path)))
        Directory(path)
      else if (fileSizes.keySet.contains(File(path)))
        File(path)
      else
        throw new FileNotFoundException(path)
  }

  val base = Directory("base")
  val base1 = File(s"${base.path}/1.txt")
  val base2 = File(s"${base.path}/2.txt")
  val subdir = Directory(s"${base.path}/subdir")
  val sub1 = File(s"${subdir.path}/1.txt")
  val sub3 = File(s"${subdir.path}/3.txt")
  val directories = Map(
    base -> List(subdir, base1, base2),
    subdir -> List(sub1, sub3)
  )
  val fileSizes = Map(base1 -> 1L, base2 -> 2L, sub1 -> 1L, sub3 -> 3L)
  val fs = MockFilesystem(directories, fileSizes)

  type R = Fx.fx4[Task, Reader[Filesystem, ?], Reader[ScanConfig, ?], Writer[Log, ?]]

  def run[T](program: Eff[R, T], fs: Filesystem) =
    program.runReader(ScanConfig(2)).runReader(fs).taskAttempt.runWriter.runAsync.runSyncUnsafe(3.seconds)

  val expected = Right(new PathScan(SortedSet(FileSize(sub3, 3), FileSize(base2, 2)), 7, 4))
  val expectedLogs = Set(
    Log.info("Scan started on Directory(base)"),
    Log.debug("Scanning directory 'Directory(base)': 1 subdirectories and 2 files"),
    Log.debug("File base/1.txt Size 1 B"),
    Log.debug("File base/2.txt Size 2 B"),
    Log.debug("Scanning directory 'Directory(base/subdir)': 0 subdirectories and 2 files"),
    Log.debug("File base/subdir/1.txt Size 1 B"),
    Log.debug("File base/subdir/3.txt Size 3 B")
  )

  val (actual, logs) = run(Scanner.pathScan(base), fs)

  "Report Format" ! {actual.mustEqual(expected)}

  "Logs messages are emitted (ignores order due to non-determinstic concurrent execution)" ! {
    expectedLogs.forall(logs.contains)
  }
}

Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0

5 votes

package scan

import java.io.FileNotFoundException
import java.io.IOException
import java.nio.file._

import cats._
import cats.data._
import cats.implicits._
import org.atnos.eff._
import org.atnos.eff.all._
import org.atnos.eff.syntax.all._
import org.atnos.eff.addon.monix._
import org.atnos.eff.addon.monix.task._
import org.atnos.eff.syntax.addon.monix.task._
import org.specs2._

import scala.collection.immutable.SortedSet
import scala.concurrent.duration._
import monix.eval._
import monix.execution.Scheduler.Implicits.global

class ScannerSpec extends mutable.Specification {

  case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem {

    def length(file: File) = fileSizes.getOrElse(file, throw new IOException())

    def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException())

    def filePath(path: String): FilePath =
      if (directories.keySet.contains(Directory(path)))
        Directory(path)
      else if (fileSizes.keySet.contains(File(path)))
        File(path)
      else
        throw new FileNotFoundException(path)
  }

  val base = Directory("base")
  val linkTarget = File(s"/somewhere/else/7.txt")
  val base1 = File(s"${base.path}/1.txt")
  val baseLink = Symlink(s"${base.path}/7.txt", linkTarget)
  val subdir = Directory(s"${base.path}/subdir")
  val sub2 = File(s"${subdir.path}/2.txt")
  val subLink = Symlink(s"${subdir.path}/7.txt", linkTarget)
  val directories = Map(
    base -> List(subdir, base1, baseLink),
    subdir -> List(sub2, subLink)
  )
  val fileSizes = Map(base1 -> 1L, sub2 -> 2L, linkTarget -> 7L)
  val fs = MockFilesystem(directories, fileSizes)

  type R = Fx.fx5[Task, Reader[Filesystem, ?], Reader[ScanConfig, ?], Writer[Log, ?], State[Set[FilePath], ?]]

  def run[T](program: Eff[R, T], fs: Filesystem) =
    program.runReader(ScanConfig(2)).runReader(fs).evalStateZero[Set[FilePath]].taskAttempt.runWriter[Log].runAsync.runSyncUnsafe(3.seconds)

  val expected = Right(new PathScan(SortedSet(FileSize(linkTarget, 7), FileSize(sub2, 2)), 10, 3))

  val (actual, logs) = run(Scanner.pathScan[R](base), fs)

  "Report Format" ! {actual.mustEqual(expected)}

}

Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0

5 votes

package scan

import java.io.FileNotFoundException
import java.io.IOException
import java.nio.file._

import cats._
import cats.data._
import cats.implicits._
import org.atnos.eff._
import org.atnos.eff.all._
import org.atnos.eff.syntax.all._
import org.atnos.eff.addon.monix._
import org.atnos.eff.addon.monix.task._
import org.atnos.eff.syntax.addon.monix.task._
import org.specs2._

import scala.collection.immutable.SortedSet
import scala.concurrent.duration._
import monix.eval._
import monix.execution.Scheduler.Implicits.global

class ScannerSpec extends mutable.Specification {

  case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem {

    def length(file: File) = fileSizes.getOrElse(file, throw new IOException())

    def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException())

    def filePath(path: String): FilePath =
      if (directories.keySet.contains(Directory(path)))
        Directory(path)
      else if (fileSizes.keySet.contains(File(path)))
        File(path)
      else
        throw new FileNotFoundException(path)
  }

  val base = Directory("base")
  val base1 = File(s"${base.path}/1.txt")
  val base2 = File(s"${base.path}/2.txt")
  val subdir = Directory(s"${base.path}/subdir")
  val sub1 = File(s"${subdir.path}/1.txt")
  val sub3 = File(s"${subdir.path}/3.txt")
  val directories = Map(
    base -> List(subdir, base1, base2),
    subdir -> List(sub1, sub3)
  )
  val fileSizes = Map(base1 -> 1L, base2 -> 2L, sub1 -> 1L, sub3 -> 3L)
  val fs = MockFilesystem(directories, fileSizes)

  type R = Fx.fx4[Task, Reader[Filesystem, ?], Reader[ScanConfig, ?], Writer[Log, ?]]

  def run[T](program: Eff[R, T], fs: Filesystem) =
    program.runReader(ScanConfig(2)).runReader(fs).taskAttempt.runWriter.runAsync.runSyncUnsafe(3.seconds)

  val expected = Right(new PathScan(SortedSet(FileSize(sub3, 3), FileSize(base2, 2)), 7, 4))
  val expectedLogs = Set(
    Log.info("Scan started on Directory(base)"),
    Log.debug("Scanning directory 'Directory(base)': 1 subdirectories and 2 files"),
    Log.debug("File base/1.txt Size 1 B"),
    Log.debug("File base/2.txt Size 2 B"),
    Log.debug("Scanning directory 'Directory(base/subdir)': 0 subdirectories and 2 files"),
    Log.debug("File base/subdir/1.txt Size 1 B"),
    Log.debug("File base/subdir/3.txt Size 3 B")
  )

  val (actual, logs) = run(Scanner.pathScan(base), fs)

  "Report Format" ! {actual.mustEqual(expected)}

  "Logs messages are emitted (ignores order due to non-determinstic concurrent execution)" ! {
    logs.forall(expectedLogs.contains)
  }
}

Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0

5 votes

package scan

import java.io.FileNotFoundException
import java.io.IOException
import java.nio.file._

import cats._
import cats.data._
import cats.implicits._
import org.atnos.eff._
import org.atnos.eff.all._
import org.atnos.eff.syntax.all._
import org.atnos.eff.addon.monix._
import org.atnos.eff.addon.monix.task._
import org.atnos.eff.syntax.addon.monix.task._
import org.specs2._

import scala.collection.immutable.SortedSet
import scala.concurrent.duration._
import monix.eval._
import monix.execution.Scheduler.Implicits.global

class ScannerSpec extends mutable.Specification {

  case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem {

    def length(file: File) = fileSizes.getOrElse(file, throw new IOException())

    def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException())

    def filePath(path: String): FilePath =
      if (directories.keySet.contains(Directory(path)))
        Directory(path)
      else if (fileSizes.keySet.contains(File(path)))
        File(path)
      else
        throw new FileNotFoundException(path)
  }

  val base = Directory("base")
  val base1 = File(s"${base.path}/1.txt")
  val base2 = File(s"${base.path}/2.txt")
  val subdir = Directory(s"${base.path}/subdir")
  val sub1 = File(s"${subdir.path}/1.txt")
  val sub3 = File(s"${subdir.path}/3.txt")
  val directories = Map(
    base -> List(subdir, base1, base2),
    subdir -> List(sub1, sub3)
  )
  val fileSizes = Map(base1 -> 1L, base2 -> 2L, sub1 -> 1L, sub3 -> 3L)
  val fs = MockFilesystem(directories, fileSizes)

  type R = Fx.fx3[Task, Reader[Filesystem, ?], Reader[ScanConfig, ?]]

  def run[T](program: Eff[R, T], fs: Filesystem) =
    program.runReader(ScanConfig(2)).runReader(fs).runAsync.attempt.runSyncUnsafe(3.seconds)

  "file scan" ! {
    val actual = run(Scanner.pathScan(base), fs)
    val expected = Right(new PathScan(SortedSet(FileSize(sub3, 3), FileSize(base2, 2)), 7, 4))

    actual.mustEqual(expected)
  }

  "Error from Filesystem" ! {
    val emptyFs: Filesystem = MockFilesystem(directories, Map.empty)

    val actual = runE(Scanner.scanReport(Array("base", "10")), emptyFs)
    val expected = ???

    actual.mustEqual(expected)
  }

  type E = Fx.fx3[Task, Reader[Filesystem, ?], Either[String, ?]]
  def runE[T](program: Eff[E, T], fs: Filesystem) =
    //there are two nested Either in the stack, one from Exceptions and one from errors raised by the program
    //we convert to a common error type String then flatten
    program.runReader(fs).runEither.runAsync.attempt.runSyncUnsafe(3.seconds).leftMap(_.toString).flatten

  "Error - Report with non-numeric input" ! {
    val actual = runE(Scanner.scanReport(Array("base", "not a number")), fs)
    val expected = Left("Number of files must be numeric: not a number")

    actual.mustEqual(expected)
  }

  "Error - Report with non-positive input" ! {
    val actual = runE(Scanner.scanReport(Array("base", "-1")), fs)
    val expected = Left("Invalid number of files -1")

    actual.mustEqual(expected)
  }
}

Source File: FileUtil.scala From mleap with Apache License 2.0

5 votes

package ml.combust.bundle.util

import java.io.IOException
import java.nio.file.attribute.BasicFileAttributes
import java.nio.file.{FileVisitResult, Files, Path, SimpleFileVisitor}


object FileUtil {
  def rmRf(path: Path): Unit = {
    Files.walkFileTree(path, new SimpleFileVisitor[Path]() {
      override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
        Files.delete(file)
        FileVisitResult.CONTINUE
      }

      override def postVisitDirectory(dir: Path, exc: IOException): FileVisitResult = {
        Files.delete(dir)
        FileVisitResult.CONTINUE
      }
    })
  }
}

Source File: JGitSystemReader.scala From sbt-dynver with Apache License 2.0

5 votes

package sbtdynver

import java.io.{ File, IOException }
import java.net.{ InetAddress, UnknownHostException }
import java.nio.file.{ Files, InvalidPathException, Path, Paths }

import org.eclipse.jgit.internal.JGitText
import org.eclipse.jgit.lib.{ Config, Constants }
import org.eclipse.jgit.storage.file.FileBasedConfig
import org.eclipse.jgit.util.{ FS, StringUtils, SystemReader }
import org.slf4j.LoggerFactory

// Copy of org.eclipse.jgit.util.SystemReader.Default with:
// * calls to Files.createDirectories guarded by if !Files.isDirectory
//   necessary because my ~/.config is a symlink to a directory
//   which Files.createDirectories isn't happy with
object JGitSystemReader extends SystemReader {
  private val LOG = LoggerFactory.getLogger(getClass)

  lazy val init: Unit = SystemReader.setInstance(this)

  override lazy val getHostname = {
    try InetAddress.getLocalHost.getCanonicalHostName
    catch { case _: UnknownHostException => "localhost" }
  }.ensuring(_ != null)

  override def getenv(variable: String): String = System.getenv(variable)
  override def getProperty(key: String): String = System.getProperty(key)
  override def getCurrentTime: Long             = System.currentTimeMillis
  override def getTimezone(when: Long): Int     = getTimeZone.getOffset(when) / (60 * 1000)

  override def openUserConfig(parent: Config, fs: FS) =
    new FileBasedConfig(parent, new File(fs.userHome, ".gitconfig"), fs)

  override def openSystemConfig(parent: Config, fs: FS): FileBasedConfig = {
    if (StringUtils.isEmptyOrNull(getenv(Constants.GIT_CONFIG_NOSYSTEM_KEY))) {
      val configFile = fs.getGitSystemConfig
      if (configFile != null) return new FileBasedConfig(parent, configFile, fs)
    }
    new FileBasedConfig(parent, null, fs) {
      override def load(): Unit = () // do not load
      override def isOutdated   = false // regular class would bomb here
    }
  }

  override def openJGitConfig(parent: Config, fs: FS): FileBasedConfig = {
    val xdgPath = getXDGConfigHome(fs)
    if (xdgPath != null) {
      var configPath: Path = null
      try {
        configPath = xdgPath.resolve("jgit")
        if (!Files.isDirectory(configPath))
          Files.createDirectories(configPath)
        configPath = configPath.resolve(Constants.CONFIG)
        return new FileBasedConfig(parent, configPath.toFile, fs)
      } catch {
        case e: IOException =>
          LOG.error(JGitText.get.createJGitConfigFailed, configPath: Any, e)
      }
    }
    new FileBasedConfig(parent, new File(fs.userHome, ".jgitconfig"), fs)
  }

  private def getXDGConfigHome(fs: FS): Path = {
    var configHomePath = getenv(Constants.XDG_CONFIG_HOME)
    if (StringUtils.isEmptyOrNull(configHomePath))
      configHomePath = new File(fs.userHome, ".config").getAbsolutePath
    try {
      val xdgHomePath = Paths.get(configHomePath)
      if (!Files.isDirectory(xdgHomePath))
        Files.createDirectories(xdgHomePath)
      xdgHomePath
    } catch {
      case e @ (_: IOException | _: InvalidPathException) =>
        LOG.error(JGitText.get.createXDGConfigHomeFailed, configHomePath: Any, e)
        null
    }
  }
}

Source File: MockConsole.scala From zio with Apache License 2.0

5 votes

package zio.test.mock

import java.io.IOException

import zio.console.Console
import zio.{ Has, IO, UIO, URLayer, ZLayer }

object MockConsole extends Mock[Console] {

  object PutStr   extends Effect[String, Nothing, Unit]
  object PutStrLn extends Effect[String, Nothing, Unit]
  object GetStrLn extends Effect[Unit, IOException, String]

  val compose: URLayer[Has[Proxy], Console] =
    ZLayer.fromService(proxy =>
      new Console.Service {
        def putStr(line: String): UIO[Unit]   = proxy(PutStr, line)
        def putStrLn(line: String): UIO[Unit] = proxy(PutStrLn, line)
        val getStrLn: IO[IOException, String] = proxy(GetStrLn)
      }
    )
}

Source File: FinagleDesignTest.scala From airframe with Apache License 2.0

5 votes

package wvlet.airframe.http.finagle

import java.io.IOException
import java.net.URL

import wvlet.airframe.control.Control
import wvlet.airframe.http.{Endpoint, Router}
import wvlet.airspec.AirSpec
import wvlet.log.io.IOUtil


class FinagleDesignTest extends AirSpec {
  trait MyTestServer {
    @Endpoint(path = "/hello")
    def hello: String = {
      "hello"
    }
  }

  def newConfig = FinagleServerConfig(router = Router.of[MyTestServer])

  def `start server`: Unit = {
    finagleDefaultDesign
      .bind[FinagleServerConfig].toInstance(newConfig)
      .bind[FinagleSyncClient].toProvider { server: FinagleServer => Finagle.newSyncClient(server.localAddress) }
      .noLifeCycleLogging
      .build[FinagleSyncClient] { client =>
        // The server will start here
        val msg = client.get[String]("/hello")
        msg shouldBe "hello"
      }
  }

  def `no-server design` = {
    val config = newConfig
    finagleBaseDesign
      .bind[FinagleServerConfig].toInstance(config)
      .noLifeCycleLogging
      .build[FinagleServerFactory] { factory =>
        // No server should start here
        intercept[IOException] {
          Control.withResource(new URL(s"http://localhost:${config.port}").openStream()) { in =>
            IOUtil.readAsString(in)
          }
        }
      }
  }

  def `build a server from factory` = {
    finagleBaseDesign.noLifeCycleLogging.build[FinagleServerFactory] { factory =>
      val s1 = factory.newFinagleServer(newConfig)
      Control.withResource(FinagleClient.newSyncClient(s1.localAddress)) { client =>
        client.get[String]("/hello") shouldBe "hello"
      }
    }
  }
}

Source File: CourierQueryParsers.scala From naptime with Apache License 2.0

5 votes

package org.coursera.naptime.router2

import java.io.IOException

import com.linkedin.data.DataMap
import com.linkedin.data.schema.DataSchema
import com.linkedin.data.schema.validation.CoercionMode
import com.linkedin.data.schema.validation.RequiredMode
import com.linkedin.data.schema.validation.ValidateDataAgainstSchema
import com.linkedin.data.schema.validation.ValidationOptions
import com.typesafe.scalalogging.StrictLogging
import org.coursera.courier.codecs.InlineStringCodec
import org.coursera.naptime.courier.StringKeyCodec
import play.api.mvc.RequestHeader

object CourierQueryParsers extends StrictLogging {

  import CollectionResourceRouter.errorRoute

  private[this] val validationOptions =
    new ValidationOptions(RequiredMode.FIXUP_ABSENT_WITH_DEFAULT, CoercionMode.STRING_TO_PRIMITIVE)

  private[this] def parseStringToDataMap(
      paramName: String,
      schema: DataSchema,
      resourceClass: Class[_])(value: String): Either[RouteAction, DataMap] = {
    try {
      val parsed = if (value.startsWith("(") && value.endsWith(")")) {
        InlineStringCodec.instance.bytesToMap(value.getBytes("UTF-8"))
      } else {
        val codec = new StringKeyCodec(schema)
        codec.bytesToMap(value.getBytes("UTF-8"))
      }
      val validated = ValidateDataAgainstSchema.validate(parsed, schema, validationOptions)
      if (validated.isValid) {
        Right(validated.getFixed.asInstanceOf[DataMap])
      } else {
        logger.warn(
          s"${resourceClass.getName}: Bad query parameter for parameter " +
            s"'$paramName': $value. Errors: ${validated.getMessages}")
        Left(errorRoute(s"Improperly formatted value for parameter '$paramName'", resourceClass))
      }
    } catch {
      case ioException: IOException =>
        logger.warn(
          s"${resourceClass.getName}: Bad query parameter for parameter " +
            s"'$paramName': $value. Errors: ${ioException.getMessage}")
        Left(errorRoute(s"Improperly formatted value for parameter '$paramName'", resourceClass))
    }
  }

  def strictParse(
      paramName: String,
      schema: DataSchema,
      resourceClass: Class[_],
      rh: RequestHeader): Either[RouteAction, DataMap] = {
    val queryStringResults = rh.queryString.get(paramName)
    if (queryStringResults.isEmpty || queryStringResults.get.isEmpty) {
      Left(errorRoute(s"Missing required parameter '$paramName'", resourceClass))
    } else if (queryStringResults.get.tail.isEmpty) {
      val stringValue = queryStringResults.get.head
      parseStringToDataMap(paramName, schema, resourceClass)(stringValue)
    } else {
      Left(errorRoute(s"Too many query parameters for '$paramName", resourceClass))
    }
  }

  def optParse(
      paramName: String,
      schema: DataSchema,
      resourceClass: Class[_],
      rh: RequestHeader): Either[RouteAction, Option[DataMap]] = {
    val queryStringResults = rh.queryString.get(paramName)
    if (queryStringResults.isEmpty || queryStringResults.get.isEmpty) {
      Right(None)
    } else if (queryStringResults.get.tail.isEmpty) {
      val stringValue = queryStringResults.get.head
      parseStringToDataMap(paramName, schema, resourceClass)(stringValue).right.map(Some(_))
    } else {
      Left(errorRoute(s"Too many query parameters for '$paramName", resourceClass))
    }
  }

  // TODO: Add a 'QTry' query parameter type that will attempt to parse the query parameter but
  // instead of failing, will provide the valiation errors to the resource handler to do with what
  // they want.
}

Source File: RetryUtilsSuite.scala From azure-event-hubs-spark with Apache License 2.0

5 votes

package org.apache.spark.eventhubs.utils

import java.io.IOException
import java.util.concurrent.CompletableFuture

import com.microsoft.azure.eventhubs.EventHubException
import org.scalatest.FunSuite
import org.scalatest.concurrent.ScalaFutures

import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.Future

class RetryUtilsSuite extends FunSuite with ScalaFutures {

  import RetryUtilsSuite._

  test("don't retry successful Future") {
    val tries = incrementFutureIterator(1)
    val result = RetryUtils.retryScala(tries.next, "test", maxRetry = 3, delay = 1).futureValue
    assert(1 === result)
  }

  test("don't retry failed Future with normal exception") {
    val fails = Iterator(Future.failed(new IOException("not retry")))
    val tries = fails ++ incrementFutureIterator(1)
    val exception =
      RetryUtils.retryScala(tries.next, "test", maxRetry = 3, delay = 1).failed.futureValue
    assert("not retry" === exception.getMessage)
  }

  test("don't retry failed Future with non-transient EventHubException") {
    val tries = Iterator(nonTransientEHE()) ++ incrementFutureIterator(1)
    val exception =
      RetryUtils.retryScala(tries.next, "test", maxRetry = 3, delay = 1).failed.futureValue
    assert("nonTransient" === exception.getMessage)
  }

  test("retry maxRetry times until success") {
    val fails = Iterator(failedWithEHE(), causedByEHE(), failedWithEHE())
    val tries = fails ++ incrementFutureIterator(4)

    val result = RetryUtils.retryScala(tries.next, "test", maxRetry = 3, delay = 1).futureValue
    assert(4 === result)
  }

  test("retry maxRetry times until failure") {
    val fails = Iterator(failedWithEHE(), causedByEHE(), failedWithEHE(), causedByEHE())
    val tries = fails ++ incrementFutureIterator(4)

    val exception =
      RetryUtils.retryScala(tries.next, "test", maxRetry = 3, delay = 1).failed.futureValue
    assert("causedBy" === exception.getMessage)
  }

  test("retryNotNull") {
    val nullFuture: CompletableFuture[AnyRef] =
      CompletableFuture.completedFuture(null.asInstanceOf[AnyRef])
    val normalFuture: CompletableFuture[Int] =
      CompletableFuture.completedFuture(10)

    val tries = Iterator.continually(nullFuture).take(9) ++ Iterator(normalFuture)
    val result = RetryUtils.retryNotNull(tries.next, "test").futureValue
    assert(10 === result)
  }
}

object RetryUtilsSuite {
  def failedWithEHE(): Future[Int] = Future.failed(new EventHubException(true, "failedWith"))

  def causedByEHE(): Future[Int] = {
    val causedBy = new EventHubException(true, "causedBy")
    Future.failed(new IOException(causedBy))
  }

  def nonTransientEHE(): Future[Int] = Future.failed(new EventHubException(false, "nonTransient"))

  def incrementFutureIterator(value: Int = 0): Iterator[Future[Int]] =
    Iterator.from(value).map(Future(_))
}

Source File: ChangeStream.scala From changestream with MIT License

5 votes

package changestream

import java.io.IOException
import java.util.concurrent.TimeoutException

import com.github.shyiko.mysql.binlog.BinaryLogClient
import com.typesafe.config.ConfigFactory
import org.slf4j.LoggerFactory

import scala.concurrent.Await
import scala.concurrent.duration._
import scala.language.postfixOps
import scala.concurrent.ExecutionContext.Implicits.global

object ChangeStream extends App {
  protected val log = LoggerFactory.getLogger(getClass)
  protected val config = ConfigFactory.load().getConfig("changestream")
  protected val mysqlHost = config.getString("mysql.host")
  protected val mysqlPort = config.getInt("mysql.port")
  protected val overridePosition = System.getenv("OVERRIDE_POSITION") match {
    case position:String if (position != null && position.length > 0) => Some(position) //scalastyle:ignore
    case _ => None
  }
  protected val client = new BinaryLogClient(
    mysqlHost,
    mysqlPort,
    config.getString("mysql.user"),
    config.getString("mysql.password")
  )

  
  client.setKeepAliveInterval(config.getLong("mysql.keepalive"))

  ChangeStreamEventListener.setConfig(config)
  ChangestreamEventDeserializerConfig.setConfig(config)

  ChangeStreamEventListener.startControlServer(config)

  client.registerEventListener(ChangeStreamEventListener)
  client.setEventDeserializer(ChangeStreamEventDeserializer)
  client.registerLifecycleListener(ChangeStreamLifecycleListener)

  getConnected(overridePosition)

  def serverName = s"${mysqlHost}:${mysqlPort}"
  def clientId = client.getServerId
  def isConnected = client.isConnected

  def getConnectedAndWait(startingPosition: Option[String]) = Await.result(getConnected(startingPosition), 60.seconds)
  def disconnectClient = client.disconnect()

  def getConnected(startingPosition: Option[String]) = {
    log.info("Starting changestream...")

    val getPositionFuture = startingPosition match {
      case Some(_) =>
        log.info("Overriding starting binlog position with OVERRIDE_POSITION={}", overridePosition)
        ChangeStreamEventListener.setPosition(startingPosition)
      case _ =>
        ChangeStreamEventListener.getStoredPosition
    }

    getPositionFuture.map { position =>
      setBinlogClientPosition(position)
      getInternalClientConnected
    }
  }

  protected def setBinlogClientPosition(position: Option[String]) = position match {
    case Some(position) =>
      log.info("Setting starting binlog position at {}.", position)
      val Array(fileName, posLong) = position.split(":")
      client.setBinlogFilename(fileName)
      client.setBinlogPosition(java.lang.Long.valueOf(posLong))
    case None =>
      log.info("Starting binlog position in real time")
      client.setBinlogFilename(null) //scalastyle:ignore
      client.setBinlogPosition(4L)
  }

  protected def getInternalClientConnected = {
    while(!client.isConnected) {
      try {
        client.connect(5000)
      }
      catch {
        case e: IOException =>
          log.error("Failed to connect to MySQL to stream the binlog, retrying in 5 seconds...", e)
          Thread.sleep(5000)
        case e: TimeoutException =>
          log.error("Timed out connecting to MySQL to stream the binlog, retrying in 5 seconds...", e)
          Thread.sleep(5000)
        case e: Exception =>
          log.error("Failed to connect, exiting...", e)
          Await.result(ChangeStreamEventListener.shutdownAndExit(1), 60.seconds)
      }
    }
  }
}

Source File: IO.scala From perf_tester with Apache License 2.0

5 votes

package org.perftester.process

import java.nio.file.{Files, Path, SimpleFileVisitor, FileVisitResult}
import java.nio.file.attribute.BasicFileAttributes
import java.io.IOException


object IO {

  def deleteDir(root: Path, deleteRoot: Boolean): Unit = {

    object deleter extends SimpleFileVisitor[Path] {
      override def visitFile(path: Path, attr: BasicFileAttributes): FileVisitResult = {
        Files.delete(path)
        FileVisitResult.CONTINUE
      }

      override def postVisitDirectory(path: Path, e: IOException): FileVisitResult = {
        if (e eq null) {
          if (deleteRoot || path != root)
            Files.delete(path)
          FileVisitResult.CONTINUE
        } else throw e // directory iteration failed
      }
    }

    Files.walkFileTree(root, deleter)
  }

  def jarsIn(path: Path): Seq[Path] =
    Files
      .walk(path)
      .toArray()
      .map(_.asInstanceOf[Path].toAbsolutePath)
      .toList
      .filter(_.getFileName.toString.endsWith(".jar"))

  def listSourcesIn(path: Path): List[Path] = {
    def isSource(p: Path) = {
      val name = p.getFileName.toString
      name.endsWith(".scala") || name.endsWith(".java")
    }
    val maxDepth = 557
    Files
      .walk(path, maxDepth)
      .toArray
      .map(_.asInstanceOf[Path].toAbsolutePath)
      .filter(isSource)
      .toList
  }
}

Source File: Utils.scala From perf_tester with Apache License 2.0

5 votes

package org.perftester

import java.io.IOException
import java.nio.file._
import java.nio.file.attribute.{BasicFileAttributes, FileTime}
import java.time.Instant
import java.util

import ammonite.ops.{Path => aPath}

object Utils {
  def lastChangedDate(path: aPath): (Instant, String) =
    lastChangedDate(path.toNIO)

  def lastChangedDate(path: Path): (Instant, String) = {
    var latest = Files.getLastModifiedTime(path)
    var at     = path.toString

    object walker extends SimpleFileVisitor[Path] {
      override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
        val thisTime = attrs.lastModifiedTime()
        if (thisTime.compareTo(latest) > 0) {
          at = file.toString
          latest = thisTime
        }
        FileVisitResult.CONTINUE
      }

      override def preVisitDirectory(dir: Path, attrs: BasicFileAttributes): FileVisitResult = {
        if (dir.getFileName.toString == "intellij")
          FileVisitResult.SKIP_SUBTREE
        else FileVisitResult.CONTINUE
      }
    }
    Files.walkFileTree(path, util.EnumSet.noneOf(classOf[FileVisitOption]), Int.MaxValue, walker)
    (latest.toInstant, at)
  }
  def deleteDir(scalaPackDir: Path) = {
    if (Files.exists(scalaPackDir)) {
      println(s"delete pack dir $scalaPackDir")
      Files.walkFileTree(scalaPackDir, fileDeleter)
    } else {
      println(s"pack dir $scalaPackDir doesnt exist")
    }
  }
  private object fileDeleter extends SimpleFileVisitor[Path] {

    override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
      println(s"delete file $file")
      Files.delete(file)
      FileVisitResult.CONTINUE
    }

    override def postVisitDirectory(dir: Path, exc: IOException): FileVisitResult = {
      println(s"delete dir $dir")
      Files.delete(dir)
      FileVisitResult.CONTINUE
    }
  }
  def copy(source: Path, target: Path): Unit = {
    class Copier(source: Path, target: Path) extends SimpleFileVisitor[Path] {
      override def preVisitDirectory(dir: Path, attrs: BasicFileAttributes): FileVisitResult = {
        val targetDir = target.resolve(source.relativize(dir))
        println(s"copy dir $dir -> $targetDir")
        Files.copy(dir, targetDir)
        FileVisitResult.CONTINUE
      }

      override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
        val targetFile = target.resolve(source.relativize(file))
        println(s"copy file $file -> $targetFile")
        Files.copy(file, targetFile)
        FileVisitResult.CONTINUE
      }
    }

    Files.walkFileTree(source, new Copier(source, target))
  }
  def touch(path: Path): Unit = {
    if (Files.exists(path)) Files.setLastModifiedTime(path, FileTime.from(Instant.now))
    else Files.createFile(path)
  }

}

Source File: TikaParquetParser.scala From project-matt with MIT License

5 votes

package org.datafy.aws.app.matt.extras

import java.io.{File, FileOutputStream, IOException, InputStream}
import java.util

import scala.collection.JavaConverters._
import org.xml.sax.{ContentHandler, SAXException}
import org.apache.tika.metadata.Metadata
import org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE
import org.apache.tika.mime.MediaType
import org.apache.tika.parser.{AbstractParser, ParseContext}
import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.parquet.hadoop.ParquetFileReader
import org.apache.parquet.hadoop.ParquetReader
import org.apache.parquet.format.converter.ParquetMetadataConverter
import org.apache.parquet.hadoop.util.HadoopInputFile
import org.apache.parquet.tools.json.JsonRecordFormatter
import org.apache.parquet.tools.read.{SimpleReadSupport, SimpleRecord}
import org.apache.tika.exception.TikaException
import org.apache.tika.sax.XHTMLContentHandler

import scala.util.Random


class TikaParquetParser extends AbstractParser {
  // make some stuff here
  final val PARQUET_RAW = MediaType.application("x-parquet")

  private val SUPPORTED_TYPES: Set[MediaType] = Set(PARQUET_RAW)

  def getSupportedTypes(context: ParseContext): util.Set[MediaType] = {
    SUPPORTED_TYPES.asJava
  }

  @throws(classOf[IOException])
  @throws(classOf[SAXException])
  @throws(classOf[TikaException])
  def parse(stream: InputStream, handler: ContentHandler,
            metadata: Metadata, context: ParseContext): Unit = {
    // create temp file from stream
    val fileNamePrefix = Random.alphanumeric.take(5).mkString
    val tempFile = File.createTempFile(s"parquet-${fileNamePrefix}", ".parquet")
    IOUtils.copy(stream, new FileOutputStream(tempFile))

    val conf = new Configuration()
    val path = new Path(tempFile.getAbsolutePath)
    val parquetMetadata = ParquetFileReader.readFooter(conf, path, ParquetMetadataConverter.NO_FILTER)
    var defaultReader: ParquetReader[SimpleRecord] = null

    val columns = parquetMetadata.getFileMetaData.getSchema.getFields
    metadata.set(CONTENT_TYPE, PARQUET_RAW.toString)
    metadata.set("Total Number of Columns", columns.size.toString)
    metadata.set("Parquet Column Names", columns.toString)

    val xhtml = new XHTMLContentHandler(handler, metadata)
    xhtml.startDocument()
    xhtml.startElement("p")

    // ::TODO:: ensure parquet reader reads all files not only file row
    try {
      defaultReader = ParquetReader.builder(new SimpleReadSupport(), new Path(tempFile.getAbsolutePath)).build()
      if(defaultReader.read() != null) {
        val values: SimpleRecord = defaultReader.read()
        val jsonFormatter = JsonRecordFormatter.fromSchema(parquetMetadata.getFileMetaData.getSchema)

        val textContent: String = jsonFormatter.formatRecord(values)
        xhtml.characters(textContent)
        xhtml.endElement("p")
        xhtml.endDocument()
      }

    } catch {
        case e: Throwable => e.printStackTrace()
          if (defaultReader != null) {
          try {
            defaultReader.close()
          } catch{
            case _: Throwable =>
          }
        }
    } finally {
      if (tempFile != null) tempFile.delete()
    }
  }

}

Source File: TikaHadoopOrcParser.scala From project-matt with MIT License

5 votes

package org.datafy.aws.app.matt.extras

import java.io.{File, FileOutputStream, IOException, InputStream}
import java.util

import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.Configuration

import scala.collection.JavaConverters._
import org.apache.hadoop.fs.Path
import org.apache.hadoop.hive.serde2.objectinspector.StructField
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector
import org.apache.orc.OrcFile
import org.apache.orc.OrcFile.ReaderOptions
import org.apache.orc.Reader
import org.apache.orc.RecordReader
import org.apache.tika.exception.TikaException
import org.apache.tika.metadata.Metadata
import org.apache.tika.mime.MediaType
import org.apache.tika.parser.{AbstractParser, ParseContext}
import org.xml.sax.{ContentHandler, SAXException}

import scala.util.Random


class TikaHadoopOrcParser extends AbstractParser  {
  final val ORC_RAW = MediaType.application("x-orc")

  private val SUPPORTED_TYPES: Set[MediaType] = Set(ORC_RAW)

  def getSupportedTypes(context: ParseContext): util.Set[MediaType] = {
    SUPPORTED_TYPES.asJava
  }

  @throws(classOf[IOException])
  @throws(classOf[SAXException])
  @throws(classOf[TikaException])
  def parse(stream: InputStream, handler: ContentHandler,
            metadata: Metadata, context: ParseContext): Unit = {
    // create temp file from stream
    try {
      val fileNamePrefix = Random.alphanumeric.take(5).mkString
      val tempFile = File.createTempFile(s"orc-${fileNamePrefix}", ".orc")
      IOUtils.copy(stream, new FileOutputStream(tempFile))

      val path = new Path(tempFile.getAbsolutePath)
      val conf = new Configuration()
      val orcReader = OrcFile.createReader(path, new ReaderOptions(conf))
      val records: RecordReader = orcReader.rows()

      val storeRecord = null
      val firstBlockKey = null

    } catch {
      case e: Throwable => e.printStackTrace()
    }



//    val fields =

  }
}

Source File: Utilities.scala From project-matt with MIT License

5 votes

package org.datafy.aws.app.matt.extras

import org.apache.tika.Tika
import org.apache.tika.metadata.Metadata
import java.io.{BufferedInputStream, IOException, InputStream, StringWriter}
import java.util.zip.GZIPInputStream

import org.xml.sax.SAXException
import org.apache.tika.exception.TikaException
import org.apache.tika.metadata.serialization.JsonMetadata
import org.apache.tika.parser.{AutoDetectParser, ParseContext}
import org.apache.tika.parser.pkg.CompressorParser
import org.apache.tika.sax.BodyContentHandler


object Utilities {

  private val MAX_STRING_LENGTH = 2147483647

  private val tika = new Tika()
  tika.setMaxStringLength(MAX_STRING_LENGTH)

  @throws(classOf[IOException])
  @throws(classOf[SAXException])
  @throws(classOf[TikaException])
  def getParsePlainStream(inputStream: InputStream): String = {

    val autoDetectParser = new AutoDetectParser()
    val bodyContentHandler = new BodyContentHandler(MAX_STRING_LENGTH)
    val fileMetadata = new Metadata()

    if (inputStream.read() == -1) {
      return "Could not scan inputStream less than 0 bytes"
    }
    autoDetectParser.parse(inputStream, bodyContentHandler, fileMetadata)
    bodyContentHandler.toString
  }

  @throws(classOf[IOException])
  @throws(classOf[SAXException])
  @throws(classOf[TikaException])
  def getParseCompressedStream(inputStream: InputStream) = {
    
    var inputStream = myStream
    if(!inputStream.markSupported()) {
      inputStream = new BufferedInputStream(inputStream)
    }
    inputStream.mark(2)
    var magicBytes = 0
    try {
      magicBytes = inputStream.read() & 0xff | ((inputStream.read() << 8) & 0xff00)
      inputStream.reset()
    } catch  {
      case ioe: IOException => ioe.printStackTrace()
    }
    magicBytes == GZIPInputStream.GZIP_MAGIC
  }
}

case class And[A]( p1: A=>Boolean, p2: A=>Boolean ) extends (A=>Boolean) {
  def apply( a: A ) = p1(a) && p2(a)
}


case class Or[A]( p1: A=>Boolean, p2: A=>Boolean ) extends (A=>Boolean) {
  def apply( a: A ) = p1(a) || p2(a)
}

Source File: KeyVaultUtils.scala From azure-kusto-spark with Apache License 2.0

5 votes

package com.microsoft.kusto.spark.utils

import java.io.IOException

import com.microsoft.azure.CloudException
import com.microsoft.azure.keyvault.KeyVaultClient
import com.microsoft.kusto.spark.authentication._
import com.microsoft.kusto.spark.datasource._

object KeyVaultUtils {
  val AppId = "kustoAppId"
  val AppKey = "kustoAppKey"
  val AppAuthority = "kustoAppAuthority"
  val SasUrl = "blobStorageSasUrl"
  val StorageAccountId = "blobStorageAccountName"
  val StorageAccountKey = "blobStorageAccountKey"
  val Container = "blobContainer"
  var cachedClient: KeyVaultClient = _

  private def getClient(clientID: String, clientPassword: String): KeyVaultClient ={
    if(cachedClient == null) {
      cachedClient = new KeyVaultADALAuthenticator(clientID, clientPassword).getAuthenticatedClient
    }
    cachedClient
  }

  @throws[CloudException]
  @throws[IOException]
  def getStorageParamsFromKeyVault(keyVaultAuthentication: KeyVaultAuthentication): KustoStorageParameters = {
    keyVaultAuthentication match {
      case app: KeyVaultAppAuthentication =>
        val client = getClient(app.keyVaultAppID, app.keyVaultAppKey)
        getStorageParamsFromKeyVaultImpl(client, app.uri)
      case certificate: KeyVaultCertificateAuthentication => throw new UnsupportedOperationException("certificates are not yet supported")
    }
  }

  @throws[CloudException]
  @throws[IOException]
  def getAadAppParametersFromKeyVault(keyVaultAuthentication: KeyVaultAuthentication): AadApplicationAuthentication={
    keyVaultAuthentication match {
      case app: KeyVaultAppAuthentication =>
        val client = getClient(app.keyVaultAppID, app.keyVaultAppKey)
        getAadAppParamsFromKeyVaultImpl(client, app.uri)
      case certificate: KeyVaultCertificateAuthentication => throw new UnsupportedOperationException("certificates are not yet supported")
    }
  }

  private def getAadAppParamsFromKeyVaultImpl(client: KeyVaultClient, uri: String): AadApplicationAuthentication ={
    val id = client.getSecret(uri, AppId)
    val key = client.getSecret(uri, AppKey)

    var authority = client.getSecret(uri, AppAuthority).value()
    if(authority.isEmpty){
      authority = "microsoft.com"
    }

    AadApplicationAuthentication(
      ID = if (id == null) null else id.value(),
      password = if (key == null) null else key.value(),
      authority = authority)
  }

  private def getStorageParamsFromKeyVaultImpl(client: KeyVaultClient, uri: String): KustoStorageParameters = {
    val sasUrl = Option(client.getSecret(uri, SasUrl))

    val accountId =  Option(client.getSecret(uri, StorageAccountId))
    val accountKey = Option(client.getSecret(uri, StorageAccountKey))
    val container = Option(client.getSecret(uri, Container))

    if(sasUrl.isEmpty) {
      KustoStorageParameters(
        account = if(accountId.isDefined) accountId.get.value else "",
        secret = if (accountKey.isDefined) accountKey.get.value else "",
        container = if (container.isDefined) container.get.value else "",
        secretIsAccountKey = true)
    } else {
      KustoDataSourceUtils.parseSas(sasUrl.get.value)
    }
  }
}

Source File: MonixEnrichments.scala From lsp4s with Apache License 2.0

5 votes

package scala.meta.jsonrpc

import java.io.IOException
import java.io.OutputStream
import java.nio.ByteBuffer
import monix.execution.Ack
import monix.execution.Cancelable
import monix.execution.Scheduler
import monix.reactive.Observable
import monix.reactive.Observer
import scribe.LoggerSupport

object MonixEnrichments {

  
  class ObservableCurrentValue[+A](obs: Observable[A])(implicit s: Scheduler)
      extends (() => A)
      with Cancelable {
    private var value: Any = _
    private val cancelable = obs.foreach(newValue => value = newValue)
    override def apply(): A = {
      if (value == null) {
        throw new NoSuchElementException(
          "Reading from empty Observable, consider using MulticastStrategy.behavior(initialValue)"
        )
      } else {
        value.asInstanceOf[A]
      }
    }
    override def cancel(): Unit = cancelable.cancel()
  }

  implicit class XtensionObservable[A](val obs: Observable[A]) extends AnyVal {

    def focus[B: cats.Eq](f: A => B): Observable[B] =
      obs.distinctUntilChangedByKey(f).map(f)

    def toFunction0()(implicit s: Scheduler): () => A =
      toObservableCurrentValue()

    def toObservableCurrentValue()(
        implicit s: Scheduler
    ): ObservableCurrentValue[A] =
      new ObservableCurrentValue[A](obs)
  }

  implicit class XtensionObserverCompanion[A](val `_`: Observer.type)
      extends AnyVal {
    def fromOutputStream(
        out: OutputStream,
        logger: LoggerSupport
    ): Observer.Sync[ByteBuffer] = {
      new Observer.Sync[ByteBuffer] {
        private[this] var isClosed: Boolean = false
        override def onNext(elem: ByteBuffer): Ack = {
          if (isClosed) Ack.Stop
          else {
            try {
              while (elem.hasRemaining) out.write(elem.get())
              out.flush()
              Ack.Continue
            } catch {
              case _: IOException =>
                logger.error("OutputStream closed!")
                isClosed = true
                Ack.Stop
            }
          }
        }
        override def onError(ex: Throwable): Unit = ()
        override def onComplete(): Unit = out.close()
      }
    }
  }

}

Source File: ParquetFiberDataLoader.scala From OAP with Apache License 2.0

5 votes

package org.apache.spark.sql.execution.datasources.oap.io

import java.io.IOException
import java.time.ZoneId

import org.apache.hadoop.conf.Configuration
import org.apache.parquet.hadoop.ParquetFiberDataReader
import org.apache.parquet.hadoop.api.InitContext
import org.apache.parquet.hadoop.utils.Collections3

import org.apache.spark.sql.execution.datasources.oap.filecache.FiberCache
import org.apache.spark.sql.execution.datasources.parquet.{ParquetReadSupportWrapper, VectorizedColumnReader}
import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
import org.apache.spark.sql.oap.OapRuntime
import org.apache.spark.sql.types._


private[oap] case class ParquetFiberDataLoader(
    configuration: Configuration,
    reader: ParquetFiberDataReader,
    blockId: Int) {

  @throws[IOException]
  def loadSingleColumn: FiberCache = {
    val footer = reader.getFooter
    val fileSchema = footer.getFileMetaData.getSchema
    val fileMetadata = footer.getFileMetaData.getKeyValueMetaData
    val readContext = new ParquetReadSupportWrapper()
      .init(new InitContext(configuration, Collections3.toSetMultiMap(fileMetadata), fileSchema))
    val requestedSchema = readContext.getRequestedSchema
    val sparkRequestedSchemaString =
      configuration.get(ParquetReadSupportWrapper.SPARK_ROW_REQUESTED_SCHEMA)
    val sparkSchema = StructType.fromString(sparkRequestedSchemaString)
    assert(sparkSchema.length == 1, s"Only can get single column every time " +
      s"by loadSingleColumn, the columns = ${sparkSchema.mkString}")
    val dataType = sparkSchema.fields(0).dataType
    // Notes: rowIds is IntegerType in oap index.
    val rowCount = reader.getFooter.getBlocks.get(blockId).getRowCount.toInt


    val columnDescriptor = requestedSchema.getColumns.get(0)
    val originalType = requestedSchema.asGroupType.getFields.get(0).getOriginalType
    val blockMetaData = footer.getBlocks.get(blockId)
    val fiberData = reader.readFiberData(blockMetaData, columnDescriptor)
    val columnReader =
      new VectorizedColumnReader(columnDescriptor, originalType,
        fiberData.getPageReader(columnDescriptor), ZoneId.systemDefault, true)

    if (OapRuntime.getOrCreate.fiberCacheManager.dataCacheCompressEnable) {
      ParquetDataFiberCompressedWriter.dumpToCache(
        columnReader, rowCount, dataType)
    } else {
      val column = new OnHeapColumnVector(rowCount, dataType)
      columnReader.readBatch(rowCount, column)
      ParquetDataFiberWriter.dumpToCache(
        column.asInstanceOf[OnHeapColumnVector], rowCount)
    }
  }
}

Source File: AppMasterResolver.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.experiments.yarn.client

import java.io.IOException
import java.net.{HttpURLConnection, URL}
import java.nio.charset.StandardCharsets
import akka.actor.{ActorRef, ActorSystem}
import org.apache.commons.io.IOUtils
import org.apache.gearpump.experiments.yarn.glue.Records.{ApplicationId, ApplicationReport}
import org.apache.gearpump.experiments.yarn.glue.YarnClient
import org.apache.gearpump.util.{AkkaHelper, LogUtil}
import org.apache.hadoop.hdfs.web.URLConnectionFactory
import org.apache.hadoop.yarn.conf.YarnConfiguration
import scala.util.Try


class AppMasterResolver(yarnClient: YarnClient, system: ActorSystem) {
  val LOG = LogUtil.getLogger(getClass)
  val RETRY_INTERVAL_MS = 3000 // ms

  def resolve(appId: ApplicationId, timeoutSeconds: Int = 30): ActorRef = {
    val appMaster = retry(connect(appId), 1 + timeoutSeconds * 1000 / RETRY_INTERVAL_MS)
    appMaster
  }

  private def connect(appId: ApplicationId): ActorRef = {
    val report = yarnClient.getApplicationReport(appId)

    AppMasterResolver.resolveAppMasterAddress(report, system)
  }

  private def retry(fun: => ActorRef, times: Int): ActorRef = {
    var index = 0
    var result: ActorRef = null
    while (index < times && result == null) {
      Thread.sleep(RETRY_INTERVAL_MS)
      index += 1
      val tryConnect = Try(fun)
      if (tryConnect.isFailure) {
        LOG.error(s"Failed to connect YarnAppMaster(tried $index)... " +
          tryConnect.failed.get.getMessage)
      } else {
        result = tryConnect.get
      }
    }
    result
  }
}

object AppMasterResolver {
  val LOG = LogUtil.getLogger(getClass)

  def resolveAppMasterAddress(report: ApplicationReport, system: ActorSystem): ActorRef = {
    val appMasterPath = s"${report.getTrackingURL}/supervisor-actor-path"
    LOG.info(s"appMasterPath=$appMasterPath")

    val connectionFactory: URLConnectionFactory = URLConnectionFactory
      .newDefaultURLConnectionFactory(new YarnConfiguration())
    val url: URL = new URL(appMasterPath)
    val connection: HttpURLConnection = connectionFactory.openConnection(url)
      .asInstanceOf[HttpURLConnection]
    connection.setInstanceFollowRedirects(true)

    try {
      connection.connect()
    } catch {
      case e: IOException =>
        LOG.error(s"Failed to connect to AppMaster" + e.getMessage)
    }

    val status = connection.getResponseCode
    if (status == 200) {
      val stream: java.io.InputStream = connection.getInputStream
      val response = IOUtils.toString(stream, StandardCharsets.UTF_8)
      LOG.info("Successfully resolved AppMaster address: " + response)
      connection.disconnect()
      AkkaHelper.actorFor(system, response)
    } else {
      connection.disconnect()
      throw new IOException("Fail to resolve AppMaster address, please make sure " +
        s"${report.getTrackingURL} is accessible...")
    }
  }
}

Source File: FileUtils.scala From incubator-retired-gearpump with Apache License 2.0

5 votes

package org.apache.gearpump.util

import java.io.{File, IOException}
import java.nio.charset.Charset

import com.google.common.io.Files

object FileUtils {
  private val UTF8 = Charset.forName("UTF-8")

  def write(file: File, str: String): Unit = {
    Files.write(str, file, UTF8)
  }

  def read(file: File): String = {
    Files.asCharSource(file, UTF8).read()
  }

  def writeByteArrayToFile(file: File, bytes: Array[Byte]): Unit = {
    Files.write(bytes, file)
  }

  def readFileToByteArray(file: File): Array[Byte] = {
    Files.toByteArray(file)
  }

  
  def forceMkdir(directory: File): Unit = {
    if (directory.exists() && directory.isFile) {
      throw new IOException(s"Failed to create directory ${directory.toString}, it already exist")
    }
    Files.createParentDirs(directory)
    directory.mkdir()
  }
}

Source File: process.scala From scala-steward with Apache License 2.0

5 votes

package org.scalasteward.core.io

import cats.effect._
import cats.implicits._
import fs2.Stream
import java.io.{File, IOException, InputStream}
import org.scalasteward.core.util._
import scala.collection.mutable.ListBuffer
import scala.concurrent.TimeoutException
import scala.concurrent.duration.FiniteDuration

object process {
  def slurp[F[_]](
      cmd: Nel[String],
      cwd: Option[File],
      extraEnv: Map[String, String],
      timeout: FiniteDuration,
      log: String => F[Unit],
      blocker: Blocker
  )(implicit contextShift: ContextShift[F], timer: Timer[F], F: Concurrent[F]): F[List[String]] =
    createProcess(cmd, cwd, extraEnv).flatMap { process =>
      F.delay(new ListBuffer[String]).flatMap { buffer =>
        val readOut = {
          val out = readInputStream[F](process.getInputStream, blocker)
          out.evalMap(line => F.delay(appendBounded(buffer, line, 4096)) >> log(line)).compile.drain
        }

        val showCmd = (extraEnv.map { case (k, v) => s"$k=$v" }.toList ++ cmd.toList).mkString_(" ")
        val result = readOut >> F.delay(process.waitFor()) >>= { exitValue =>
          if (exitValue === 0) F.pure(buffer.toList)
          else {
            val msg = s"'$showCmd' exited with code $exitValue"
            F.raiseError[List[String]](new IOException(makeMessage(msg, buffer.toList)))
          }
        }

        val fallback = F.delay(process.destroyForcibly()) >> {
          val msg = s"'$showCmd' timed out after ${timeout.toString}"
          F.raiseError[List[String]](new TimeoutException(makeMessage(msg, buffer.toList)))
        }

        Concurrent.timeoutTo(result, timeout, fallback)
      }
    }

  private def createProcess[F[_]](
      cmd: Nel[String],
      cwd: Option[File],
      extraEnv: Map[String, String]
  )(implicit F: Sync[F]): F[Process] =
    F.delay {
      val pb = new ProcessBuilder(cmd.toList: _*)
      val env = pb.environment()
      cwd.foreach(pb.directory)
      extraEnv.foreach { case (key, value) => env.put(key, value) }
      pb.redirectErrorStream(true)
      pb.start()
    }

  private def readInputStream[F[_]](is: InputStream, blocker: Blocker)(implicit
      F: Sync[F],
      cs: ContextShift[F]
  ): Stream[F, String] =
    fs2.io
      .readInputStream(F.pure(is), chunkSize = 4096, blocker)
      .through(fs2.text.utf8Decode)
      .through(fs2.text.lines)

  private def makeMessage(prefix: String, output: List[String]): String =
    (prefix :: output).mkString("\n")
}

Source File: LivyConnectionSpec.scala From incubator-livy with Apache License 2.0

5 votes

package org.apache.livy.client.http

import java.io.IOException
import java.net.URLEncoder
import java.nio.charset.StandardCharsets.UTF_8

import org.apache.http.client.utils.URIBuilder
import org.eclipse.jetty.security._
import org.eclipse.jetty.security.authentication.BasicAuthenticator
import org.eclipse.jetty.util.security._
import org.scalatest.{BeforeAndAfterAll, FunSpecLike}
import org.scalatest.Matchers._
import org.scalatra.servlet.ScalatraListener

import org.apache.livy.{LivyBaseUnitTestSuite, LivyConf}
import org.apache.livy.server.WebServer

class LivyConnectionSpec extends FunSpecLike with BeforeAndAfterAll with LivyBaseUnitTestSuite {
  describe("LivyConnection") {
    def basicAuth(username: String, password: String, realm: String): SecurityHandler = {
      val roles = Array("user")

      val l = new HashLoginService()
      l.putUser(username, Credential.getCredential(password), roles)
      l.setName(realm)

      val constraint = new Constraint()
      constraint.setName(Constraint.__BASIC_AUTH)
      constraint.setRoles(roles)
      constraint.setAuthenticate(true)

      val cm = new ConstraintMapping()
      cm.setConstraint(constraint)
      cm.setPathSpec("/*")

      val csh = new ConstraintSecurityHandler()
      csh.setAuthenticator(new BasicAuthenticator())
      csh.setRealmName(realm)
      csh.addConstraintMapping(cm)
      csh.setLoginService(l)

      csh
    }

    def test(password: String, livyConf: LivyConf = new LivyConf()): Unit = {
      val username = "user name"

      val server = new WebServer(livyConf, "0.0.0.0", 0)
      server.context.setSecurityHandler(basicAuth(username, password, "realm"))
      server.context.setResourceBase("src/main/org/apache/livy/server")
      server.context.setInitParameter(ScalatraListener.LifeCycleKey,
        classOf[HttpClientTestBootstrap].getCanonicalName)
      server.context.addEventListener(new ScalatraListener)
      server.start()

      val utf8Name = UTF_8.name()
      val uri = new URIBuilder()
        .setScheme(server.protocol)
        .setHost(server.host)
        .setPort(server.port)
        .setUserInfo(URLEncoder.encode(username, utf8Name), URLEncoder.encode(password, utf8Name))
        .build()
      info(uri.toString)
      val conn = new LivyConnection(uri, new HttpConf(null))
      try {
        conn.get(classOf[Object], "/") should not be (null)

      } finally {
        conn.close()
      }

      server.stop()
      server.join()
    }

    it("should support HTTP auth with password") {
      test("pass:word")
    }

    it("should support HTTP auth with empty password") {
      test("")
    }

    it("should be failed with large header size") {
      val livyConf = new LivyConf()
        .set(LivyConf.REQUEST_HEADER_SIZE, 1024)
        .set(LivyConf.RESPONSE_HEADER_SIZE, 1024)
      val pwd = "test-password" * 100
      val exception = intercept[IOException](test(pwd, livyConf))
      exception.getMessage.contains("Request Header Fields Too Large") should be(true)
    }

    it("should be succeeded with configured header size") {
      val livyConf = new LivyConf()
        .set(LivyConf.REQUEST_HEADER_SIZE, 2048)
        .set(LivyConf.RESPONSE_HEADER_SIZE, 2048)
      val pwd = "test-password" * 100
      test(pwd, livyConf)
    }
  }
}

Source File: TestOutputStream.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.streaming

import java.io.{IOException, ObjectInputStream}
import java.util.concurrent.ConcurrentLinkedQueue

import scala.reflect.ClassTag

import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.dstream.{DStream, ForEachDStream}
import org.apache.spark.util.Utils


class TestOutputStream[T: ClassTag](parent: DStream[T],
    val output: ConcurrentLinkedQueue[Seq[T]] = new ConcurrentLinkedQueue[Seq[T]]())
  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
    val collected = rdd.collect()
    output.add(collected)
  }, false) {

  // This is to clear the output buffer every it is read from a checkpoint
  @throws(classOf[IOException])
  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
    ois.defaultReadObject()
    output.clear()
  }
}

Source File: SparkSQLCLIService.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.sql.hive.thriftserver

import java.io.IOException
import java.util.{List => JList}
import javax.security.auth.login.LoginException

import scala.collection.JavaConverters._

import org.apache.commons.logging.Log
import org.apache.hadoop.hive.conf.HiveConf
import org.apache.hadoop.hive.shims.Utils
import org.apache.hadoop.security.UserGroupInformation
import org.apache.hive.service.{AbstractService, Service, ServiceException}
import org.apache.hive.service.Service.STATE
import org.apache.hive.service.auth.HiveAuthFactory
import org.apache.hive.service.cli._
import org.apache.hive.service.server.HiveServer2

import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._

private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLContext)
  extends CLIService(hiveServer)
  with ReflectedCompositeService {

  override def init(hiveConf: HiveConf) {
    setSuperField(this, "hiveConf", hiveConf)

    val sparkSqlSessionManager = new SparkSQLSessionManager(hiveServer, sqlContext)
    setSuperField(this, "sessionManager", sparkSqlSessionManager)
    addService(sparkSqlSessionManager)
    var sparkServiceUGI: UserGroupInformation = null

    if (UserGroupInformation.isSecurityEnabled) {
      try {
        HiveAuthFactory.loginFromKeytab(hiveConf)
        sparkServiceUGI = Utils.getUGI()
        setSuperField(this, "serviceUGI", sparkServiceUGI)
      } catch {
        case e @ (_: IOException | _: LoginException) =>
          throw new ServiceException("Unable to login to kerberos with given principal/keytab", e)
      }
    }

    initCompositeService(hiveConf)
  }

  override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = {
    getInfoType match {
      case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL")
      case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL")
      case GetInfoType.CLI_DBMS_VER => new GetInfoValue(sqlContext.sparkContext.version)
      case _ => super.getInfo(sessionHandle, getInfoType)
    }
  }
}

private[thriftserver] trait ReflectedCompositeService { this: AbstractService =>
  def initCompositeService(hiveConf: HiveConf) {
    // Emulating `CompositeService.init(hiveConf)`
    val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList")
    serviceList.asScala.foreach(_.init(hiveConf))

    // Emulating `AbstractService.init(hiveConf)`
    invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.NOTINITED)
    setAncestorField(this, 3, "hiveConf", hiveConf)
    invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.INITED)
    getAncestorField[Log](this, 3, "LOG").info(s"Service: $getName is inited.")
  }
}

Source File: ReusableStringReaderSuite.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions.xml

import java.io.IOException

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.expressions.xml.UDFXPathUtil.ReusableStringReader


class ReusableStringReaderSuite extends SparkFunSuite {

  private val fox = "Quick brown fox jumps over the lazy dog."

  test("empty reader") {
    val reader = new ReusableStringReader

    intercept[IOException] {
      reader.read()
    }

    intercept[IOException] {
      reader.ready()
    }

    reader.close()
  }

  test("mark reset") {
    val reader = new ReusableStringReader

    if (reader.markSupported()) {
      reader.asInstanceOf[ReusableStringReader].set(fox)
      assert(reader.ready())

      val cc = new Array[Char](6)
      var read = reader.read(cc)
      assert(read == 6)
      assert("Quick " == new String(cc))

      reader.mark(100)

      read = reader.read(cc)
      assert(read == 6)
      assert("brown " == new String(cc))

      reader.reset()
      read = reader.read(cc)
      assert(read == 6)
      assert("brown " == new String(cc))
    }
    reader.close()
  }

  test("skip") {
    val reader = new ReusableStringReader
    reader.asInstanceOf[ReusableStringReader].set(fox)

    // skip entire the data:
    var skipped = reader.skip(fox.length() + 1)
    assert(fox.length() == skipped)
    assert(-1 == reader.read())

    reader.asInstanceOf[ReusableStringReader].set(fox) // reset the data
    val cc = new Array[Char](6)
    var read = reader.read(cc)
    assert(read == 6)
    assert("Quick " == new String(cc))

    // skip some piece of data:
    skipped = reader.skip(30)
    assert(skipped == 30)
    read = reader.read(cc)
    assert(read == 4)
    assert("dog." == new String(cc, 0, read))

    // skip when already at EOF:
    skipped = reader.skip(300)
    assert(skipped == 0, skipped)
    assert(reader.read() == -1)

    reader.close()
  }
}

Source File: ProcessTestUtils.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.sql.test

import java.io.{InputStream, IOException}

import scala.sys.process.BasicIO

object ProcessTestUtils {
  class ProcessOutputCapturer(stream: InputStream, capture: String => Unit) extends Thread {
    this.setDaemon(true)

    override def run(): Unit = {
      try {
        BasicIO.processFully(capture)(stream)
      } catch { case _: IOException =>
        // Ignores the IOException thrown when the process termination, which closes the input
        // stream abruptly.
      }
    }
  }
}

Source File: RawTextSender.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{ByteArrayOutputStream, IOException}
import java.net.ServerSocket
import java.nio.ByteBuffer

import scala.io.Source

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.IntParam


private[streaming]
object RawTextSender extends Logging {
  def main(args: Array[String]) {
    if (args.length != 4) {
      // scalastyle:off println
      System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
      // scalastyle:on println
      System.exit(1)
    }
    // Parse the arguments using a pattern match
    val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args

    // Repeat the input data multiple times to fill in a buffer
    val lines = Source.fromFile(file).getLines().toArray
    val bufferStream = new ByteArrayOutputStream(blockSize + 1000)
    val ser = new KryoSerializer(new SparkConf()).newInstance()
    val serStream = ser.serializeStream(bufferStream)
    var i = 0
    while (bufferStream.size < blockSize) {
      serStream.writeObject(lines(i))
      i = (i + 1) % lines.length
    }
    val array = bufferStream.toByteArray

    val countBuf = ByteBuffer.wrap(new Array[Byte](4))
    countBuf.putInt(array.length)
    countBuf.flip()

    val serverSocket = new ServerSocket(port)
    logInfo("Listening on port " + port)

    while (true) {
      val socket = serverSocket.accept()
      logInfo("Got a new connection")
      val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec)
      try {
        while (true) {
          out.write(countBuf.array)
          out.write(array)
        }
      } catch {
        case e: IOException =>
          logError("Client disconnected")
      } finally {
        socket.close()
      }
    }
  }
}

Source File: HdfsUtils.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{FileNotFoundException, IOException}

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs._

private[streaming] object HdfsUtils {

  def getOutputStream(path: String, conf: Configuration): FSDataOutputStream = {
    val dfsPath = new Path(path)
    val dfs = getFileSystemForPath(dfsPath, conf)
    // If the file exists and we have append support, append instead of creating a new file
    val stream: FSDataOutputStream = {
      if (dfs.isFile(dfsPath)) {
        if (conf.getBoolean("hdfs.append.support", false) || dfs.isInstanceOf[RawLocalFileSystem]) {
          dfs.append(dfsPath)
        } else {
          throw new IllegalStateException("File exists and there is no append support!")
        }
      } else {
        dfs.create(dfsPath)
      }
    }
    stream
  }

  def getInputStream(path: String, conf: Configuration): FSDataInputStream = {
    val dfsPath = new Path(path)
    val dfs = getFileSystemForPath(dfsPath, conf)
    try {
      dfs.open(dfsPath)
    } catch {
      case _: FileNotFoundException =>
        null
      case e: IOException =>
        // If we are really unlucky, the file may be deleted as we're opening the stream.
        // This can happen as clean up is performed by daemon threads that may be left over from
        // previous runs.
        if (!dfs.isFile(dfsPath)) null else throw e
    }
  }

  def checkState(state: Boolean, errorMsg: => String) {
    if (!state) {
      throw new IllegalStateException(errorMsg)
    }
  }

  
  def checkFileExists(path: String, conf: Configuration): Boolean = {
    val hdpPath = new Path(path)
    val fs = getFileSystemForPath(hdpPath, conf)
    fs.isFile(hdpPath)
  }
}

Source File: FileBasedWriteAheadLogReader.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{Closeable, EOFException, IOException}
import java.nio.ByteBuffer

import org.apache.hadoop.conf.Configuration

import org.apache.spark.internal.Logging


private[streaming] class FileBasedWriteAheadLogReader(path: String, conf: Configuration)
  extends Iterator[ByteBuffer] with Closeable with Logging {

  private val instream = HdfsUtils.getInputStream(path, conf)
  private var closed = (instream == null) // the file may be deleted as we're opening the stream
  private var nextItem: Option[ByteBuffer] = None

  override def hasNext: Boolean = synchronized {
    if (closed) {
      return false
    }

    if (nextItem.isDefined) { // handle the case where hasNext is called without calling next
      true
    } else {
      try {
        val length = instream.readInt()
        val buffer = new Array[Byte](length)
        instream.readFully(buffer)
        nextItem = Some(ByteBuffer.wrap(buffer))
        logTrace("Read next item " + nextItem.get)
        true
      } catch {
        case e: EOFException =>
          logDebug("Error reading next item, EOF reached", e)
          close()
          false
        case e: IOException =>
          logWarning("Error while trying to read data. If the file was deleted, " +
            "this should be okay.", e)
          close()
          if (HdfsUtils.checkFileExists(path, conf)) {
            // If file exists, this could be a legitimate error
            throw e
          } else {
            // File was deleted. This can occur when the daemon cleanup thread takes time to
            // delete the file during recovery.
            false
          }

        case e: Exception =>
          logWarning("Error while trying to read data from HDFS.", e)
          close()
          throw e
      }
    }
  }

  override def next(): ByteBuffer = synchronized {
    val data = nextItem.getOrElse {
      close()
      throw new IllegalStateException(
        "next called without calling hasNext or after hasNext returned false")
    }
    nextItem = None // Ensure the next hasNext call loads new data.
    data
  }

  override def close(): Unit = synchronized {
    if (!closed) {
      instream.close()
    }
    closed = true
  }
}

Source File: CommandUtils.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.deploy.worker

import java.io.{File, FileOutputStream, InputStream, IOException}

import scala.collection.JavaConverters._
import scala.collection.Map

import org.apache.spark.SecurityManager
import org.apache.spark.deploy.Command
import org.apache.spark.internal.Logging
import org.apache.spark.launcher.WorkerCommandBuilder
import org.apache.spark.util.Utils


  def redirectStream(in: InputStream, file: File) {
    val out = new FileOutputStream(file, true)
    // TODO: It would be nice to add a shutdown hook here that explains why the output is
    //       terminating. Otherwise if the worker dies the executor logs will silently stop.
    new Thread("redirect output to " + file) {
      override def run() {
        try {
          Utils.copyStream(in, out, true)
        } catch {
          case e: IOException =>
            logInfo("Redirection to " + file + " closed: " + e.getMessage)
        }
      }
    }.start()
  }
}

Source File: SparkHadoopMapRedUtil.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.mapred

import java.io.IOException

import org.apache.hadoop.mapreduce.{TaskAttemptContext => MapReduceTaskAttemptContext}
import org.apache.hadoop.mapreduce.{OutputCommitter => MapReduceOutputCommitter}

import org.apache.spark.{SparkEnv, TaskContext}
import org.apache.spark.executor.CommitDeniedException
import org.apache.spark.internal.Logging

object SparkHadoopMapRedUtil extends Logging {
  
  def commitTask(
      committer: MapReduceOutputCommitter,
      mrTaskContext: MapReduceTaskAttemptContext,
      jobId: Int,
      splitId: Int): Unit = {

    val mrTaskAttemptID = mrTaskContext.getTaskAttemptID

    // Called after we have decided to commit
    def performCommit(): Unit = {
      try {
        committer.commitTask(mrTaskContext)
        logInfo(s"$mrTaskAttemptID: Committed")
      } catch {
        case cause: IOException =>
          logError(s"Error committing the output of task: $mrTaskAttemptID", cause)
          committer.abortTask(mrTaskContext)
          throw cause
      }
    }

    // First, check whether the task's output has already been committed by some other attempt
    if (committer.needsTaskCommit(mrTaskContext)) {
      val shouldCoordinateWithDriver: Boolean = {
        val sparkConf = SparkEnv.get.conf
        // We only need to coordinate with the driver if there are concurrent task attempts.
        // Note that this could happen even when speculation is not enabled (e.g. see SPARK-8029).
        // This (undocumented) setting is an escape-hatch in case the commit code introduces bugs.
        sparkConf.getBoolean("spark.hadoop.outputCommitCoordination.enabled", defaultValue = true)
      }

      if (shouldCoordinateWithDriver) {
        val outputCommitCoordinator = SparkEnv.get.outputCommitCoordinator
        val taskAttemptNumber = TaskContext.get().attemptNumber()
        val canCommit = outputCommitCoordinator.canCommit(jobId, splitId, taskAttemptNumber)

        if (canCommit) {
          performCommit()
        } else {
          val message =
            s"$mrTaskAttemptID: Not committed because the driver did not authorize commit"
          logInfo(message)
          // We need to abort the task so that the driver can reschedule new attempts, if necessary
          committer.abortTask(mrTaskContext)
          throw new CommitDeniedException(message, jobId, splitId, taskAttemptNumber)
        }
      } else {
        // Speculation is disabled or a user has chosen to manually bypass the commit coordination
        performCommit()
      }
    } else {
      // Some other attempt committed the output, so we do nothing and signal success
      logInfo(s"No need to commit output of task because needsTaskCommit=false: $mrTaskAttemptID")
    }
  }
}

Source File: SerializableBuffer.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value: ByteBuffer = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
}

Source File: DiskStore.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.storage

import java.io.{FileOutputStream, IOException, RandomAccessFile}
import java.nio.ByteBuffer
import java.nio.channels.FileChannel.MapMode

import com.google.common.io.Closeables

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.util.Utils
import org.apache.spark.util.io.ChunkedByteBuffer


  def put(blockId: BlockId)(writeFunc: FileOutputStream => Unit): Unit = {
    if (contains(blockId)) {
      throw new IllegalStateException(s"Block $blockId is already present in the disk store")
    }
    logDebug(s"Attempting to put block $blockId")
    val startTime = System.currentTimeMillis
    val file = diskManager.getFile(blockId)
    val fileOutputStream = new FileOutputStream(file)
    var threwException: Boolean = true
    try {
      writeFunc(fileOutputStream)
      threwException = false
    } finally {
      try {
        Closeables.close(fileOutputStream, threwException)
      } finally {
         if (threwException) {
          remove(blockId)
        }
      }
    }
    val finishTime = System.currentTimeMillis
    logDebug("Block %s stored as %s file on disk in %d ms".format(
      file.getName,
      Utils.bytesToString(file.length()),
      finishTime - startTime))
  }

  def putBytes(blockId: BlockId, bytes: ChunkedByteBuffer): Unit = {
    put(blockId) { fileOutputStream =>
      val channel = fileOutputStream.getChannel
      Utils.tryWithSafeFinally {
        bytes.writeFully(channel)
      } {
        channel.close()
      }
    }
  }

  def getBytes(blockId: BlockId): ChunkedByteBuffer = {
    val file = diskManager.getFile(blockId.name)
    val channel = new RandomAccessFile(file, "r").getChannel
    Utils.tryWithSafeFinally {
      // For small files, directly read rather than memory map
      if (file.length < minMemoryMapBytes) {
        val buf = ByteBuffer.allocate(file.length.toInt)
        channel.position(0)
        while (buf.remaining() != 0) {
          if (channel.read(buf) == -1) {
            throw new IOException("Reached EOF before filling buffer\n" +
              s"offset=0\nfile=${file.getAbsolutePath}\nbuf.remaining=${buf.remaining}")
          }
        }
        buf.flip()
        new ChunkedByteBuffer(buf)
      } else {
        new ChunkedByteBuffer(channel.map(MapMode.READ_ONLY, 0, file.length))
      }
    } {
      channel.close()
    }
  }

  def remove(blockId: BlockId): Boolean = {
    val file = diskManager.getFile(blockId.name)
    if (file.exists()) {
      val ret = file.delete()
      if (!ret) {
        logWarning(s"Error deleting ${file.getPath()}")
      }
      ret
    } else {
      false
    }
  }

  def contains(blockId: BlockId): Boolean = {
    val file = diskManager.getFile(blockId.name)
    file.exists()
  }
}

Source File: CartesianRDD.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.rdd

import java.io.{IOException, ObjectOutputStream}

import scala.reflect.ClassTag

import org.apache.spark._
import org.apache.spark.util.Utils

private[spark]
class CartesianPartition(
    idx: Int,
    @transient private val rdd1: RDD[_],
    @transient private val rdd2: RDD[_],
    s1Index: Int,
    s2Index: Int
  ) extends Partition {
  var s1 = rdd1.partitions(s1Index)
  var s2 = rdd2.partitions(s2Index)
  override val index: Int = idx

  @throws(classOf[IOException])
  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
    // Update the reference to parent split at the time of task serialization
    s1 = rdd1.partitions(s1Index)
    s2 = rdd2.partitions(s2Index)
    oos.defaultWriteObject()
  }
}

private[spark]
class CartesianRDD[T: ClassTag, U: ClassTag](
    sc: SparkContext,
    var rdd1 : RDD[T],
    var rdd2 : RDD[U])
  extends RDD[(T, U)](sc, Nil)
  with Serializable {

  val numPartitionsInRdd2 = rdd2.partitions.length

  override def getPartitions: Array[Partition] = {
    // create the cross product split
    val array = new Array[Partition](rdd1.partitions.length * rdd2.partitions.length)
    for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) {
      val idx = s1.index * numPartitionsInRdd2 + s2.index
      array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index)
    }
    array
  }

  override def getPreferredLocations(split: Partition): Seq[String] = {
    val currSplit = split.asInstanceOf[CartesianPartition]
    (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct
  }

  override def compute(split: Partition, context: TaskContext): Iterator[(T, U)] = {
    val currSplit = split.asInstanceOf[CartesianPartition]
    for (x <- rdd1.iterator(currSplit.s1, context);
         y <- rdd2.iterator(currSplit.s2, context)) yield (x, y)
  }

  override def getDependencies: Seq[Dependency[_]] = List(
    new NarrowDependency(rdd1) {
      def getParents(id: Int): Seq[Int] = List(id / numPartitionsInRdd2)
    },
    new NarrowDependency(rdd2) {
      def getParents(id: Int): Seq[Int] = List(id % numPartitionsInRdd2)
    }
  )

  override def clearDependencies() {
    super.clearDependencies()
    rdd1 = null
    rdd2 = null
  }
}

Source File: UnionRDD.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.rdd

import java.io.{IOException, ObjectOutputStream}

import scala.collection.mutable.ArrayBuffer
import scala.collection.parallel.{ForkJoinTaskSupport, ThreadPoolTaskSupport}
import scala.concurrent.forkjoin.ForkJoinPool
import scala.reflect.ClassTag

import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext}
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.util.Utils


private[spark] class UnionPartition[T: ClassTag](
    idx: Int,
    @transient private val rdd: RDD[T],
    val parentRddIndex: Int,
    @transient private val parentRddPartitionIndex: Int)
  extends Partition {

  var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex)

  def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition)

  override val index: Int = idx

  @throws(classOf[IOException])
  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
    // Update the reference to parent split at the time of task serialization
    parentPartition = rdd.partitions(parentRddPartitionIndex)
    oos.defaultWriteObject()
  }
}

object UnionRDD {
  private[spark] lazy val partitionEvalTaskSupport =
    new ForkJoinTaskSupport(new ForkJoinPool(8))
}

@DeveloperApi
class UnionRDD[T: ClassTag](
    sc: SparkContext,
    var rdds: Seq[RDD[T]])
  extends RDD[T](sc, Nil) {  // Nil since we implement getDependencies

  // visible for testing
  private[spark] val isPartitionListingParallel: Boolean =
    rdds.length > conf.getInt("spark.rdd.parallelListingThreshold", 10)

  override def getPartitions: Array[Partition] = {
    val parRDDs = if (isPartitionListingParallel) {
      val parArray = rdds.par
      parArray.tasksupport = UnionRDD.partitionEvalTaskSupport
      parArray
    } else {
      rdds
    }
    val array = new Array[Partition](parRDDs.map(_.partitions.length).seq.sum)
    var pos = 0
    for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) {
      array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index)
      pos += 1
    }
    array
  }

  override def getDependencies: Seq[Dependency[_]] = {
    val deps = new ArrayBuffer[Dependency[_]]
    var pos = 0
    for (rdd <- rdds) {
      deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length)
      pos += rdd.partitions.length
    }
    deps
  }

  override def compute(s: Partition, context: TaskContext): Iterator[T] = {
    val part = s.asInstanceOf[UnionPartition[T]]
    parent[T](part.parentRddIndex).iterator(part.parentPartition, context)
  }

  override def getPreferredLocations(s: Partition): Seq[String] =
    s.asInstanceOf[UnionPartition[T]].preferredLocations()

  override def clearDependencies() {
    super.clearDependencies()
    rdds = null
  }
}

Source File: PartitionerAwareUnionRDD.scala From sparkoscope with Apache License 2.0

5 votes

package org.apache.spark.rdd

import java.io.{IOException, ObjectOutputStream}

import scala.reflect.ClassTag

import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext}
import org.apache.spark.util.Utils


private[spark]
class PartitionerAwareUnionRDD[T: ClassTag](
    sc: SparkContext,
    var rdds: Seq[RDD[T]]
  ) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) {
  require(rdds.nonEmpty)
  require(rdds.forall(_.partitioner.isDefined))
  require(rdds.flatMap(_.partitioner).toSet.size == 1,
    "Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner))

  override val partitioner = rdds.head.partitioner

  override def getPartitions: Array[Partition] = {
    val numPartitions = partitioner.get.numPartitions
    (0 until numPartitions).map { index =>
      new PartitionerAwareUnionRDDPartition(rdds, index)
    }.toArray
  }

  // Get the location where most of the partitions of parent RDDs are located
  override def getPreferredLocations(s: Partition): Seq[String] = {
    logDebug("Finding preferred location for " + this + ", partition " + s.index)
    val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents
    val locations = rdds.zip(parentPartitions).flatMap {
      case (rdd, part) =>
        val parentLocations = currPrefLocs(rdd, part)
        logDebug("Location of " + rdd + " partition " + part.index + " = " + parentLocations)
        parentLocations
    }
    val location = if (locations.isEmpty) {
      None
    } else {
      // Find the location that maximum number of parent partitions prefer
      Some(locations.groupBy(x => x).maxBy(_._2.length)._1)
    }
    logDebug("Selected location for " + this + ", partition " + s.index + " = " + location)
    location.toSeq
  }

  override def compute(s: Partition, context: TaskContext): Iterator[T] = {
    val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents
    rdds.zip(parentPartitions).iterator.flatMap {
      case (rdd, p) => rdd.iterator(p, context)
    }
  }

  override def clearDependencies() {
    super.clearDependencies()
    rdds = null
  }

  // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones)
  private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = {
    rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host)
  }
}

Source File: TestDiskFull.scala From eidos with Apache License 2.0

5 votes

package org.clulab.wm.eidos.utils

import java.io.BufferedOutputStream
import java.io.FileOutputStream
import java.io.IOException
import java.io.OutputStreamWriter
import java.io.PrintWriter
import java.io.SyncFailedException
import java.nio.charset.StandardCharsets

import org.clulab.wm.eidos.test.TestUtils._
import org.clulab.wm.eidos.utils.Closer.AutoCloser

class TestDiskFull extends Test {

  def test1 = {
    val file = "/E:/full.dat"
    var i = 0

    try {
      val text1 = "The quick brown fox jumped over the lazy dog."
      val text = text1 + text1

      for (limit <- 1 until 400) {
        val fos = new FileOutputStream(file)
        val osw = new OutputStreamWriter(new BufferedOutputStream(fos), StandardCharsets.UTF_8.toString)
        i = 0

        new PrintWriter(osw).autoClose { pw =>
          while (i < limit) {
            pw.print(text)
            i += 1
            //          pw.flush()
            //          osw.flush()
            //          fos.flush()
            fos.getFD.sync()
          }
        }
      }
    }
    catch {
      case exception: SyncFailedException =>
        println(s"Synchronization failed for file $file at $i")
        println("Exiting with code -2 on assumption that the disk is full")
        System.exit(-2)
      case exception: IOException =>
        println(s"IO failed for file $file at $i")
        println("Exiting with code -2 on assumption that the disk is full")
        System.exit(-2)
      case exception: Exception =>
        println(s"Exception for file $file at $i")
        exception.printStackTrace()
      case throwable: Throwable =>
        println(s"Throwable for file $file at $i")
        throwable.printStackTrace()
    }
  }

//  test1
}

Source File: ReplayListenerBus.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.io.{InputStream, IOException}

import scala.io.Source

import org.json4s.jackson.JsonMethods._

import org.apache.spark.Logging
import org.apache.spark.util.JsonProtocol


  def replay(logData: InputStream, sourceName: String): Unit = {
    var currentLine: String = null
    var lineNumber: Int = 1
    try {
      val lines = Source.fromInputStream(logData).getLines()
      lines.foreach { line =>
        currentLine = line
        postToAll(JsonProtocol.sparkEventFromJson(parse(line)))
        lineNumber += 1
      }
    } catch {
      case ioe: IOException =>
        throw ioe
      case e: Exception =>
        logError(s"Exception parsing Spark event log: $sourceName", e)
        logError(s"Malformed line #$lineNumber: $currentLine\n")
    }
  }

}

Source File: SerializableBuffer.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
}

Source File: BlockManagerId.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.storage

import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
import java.util.concurrent.ConcurrentHashMap

import org.apache.spark.SparkContext
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.util.Utils


  def apply(execId: String, host: String, port: Int) =
    getCachedBlockManagerId(new BlockManagerId(execId, host, port))

  def apply(in: ObjectInput) = {
    val obj = new BlockManagerId()
    obj.readExternal(in)
    getCachedBlockManagerId(obj)
  }

  val blockManagerIdCache = new ConcurrentHashMap[BlockManagerId, BlockManagerId]()

  def getCachedBlockManagerId(id: BlockManagerId): BlockManagerId = {
    blockManagerIdCache.putIfAbsent(id, id)
    blockManagerIdCache.get(id)
  }
}

Source File: CartesianRDD.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.rdd

import java.io.{IOException, ObjectOutputStream}

import scala.reflect.ClassTag

import org.apache.spark._
import org.apache.spark.util.Utils

private[spark]
class CartesianPartition(
    idx: Int,
    @transient rdd1: RDD[_],
    @transient rdd2: RDD[_],
    s1Index: Int,
    s2Index: Int
  ) extends Partition {
  var s1 = rdd1.partitions(s1Index)
  var s2 = rdd2.partitions(s2Index)
  override val index: Int = idx

  @throws(classOf[IOException])
  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
    // Update the reference to parent split at the time of task serialization
    s1 = rdd1.partitions(s1Index)
    s2 = rdd2.partitions(s2Index)
    oos.defaultWriteObject()
  }
}

private[spark]
class CartesianRDD[T: ClassTag, U: ClassTag](
    sc: SparkContext,
    var rdd1 : RDD[T],
    var rdd2 : RDD[U])
  extends RDD[Pair[T, U]](sc, Nil)
  with Serializable {

  val numPartitionsInRdd2 = rdd2.partitions.size

  override def getPartitions: Array[Partition] = {
    // create the cross product split
    val array = new Array[Partition](rdd1.partitions.size * rdd2.partitions.size)
    for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) {
      val idx = s1.index * numPartitionsInRdd2 + s2.index
      array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index)
    }
    array
  }

  override def getPreferredLocations(split: Partition): Seq[String] = {
    val currSplit = split.asInstanceOf[CartesianPartition]
    (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct
  }

  override def compute(split: Partition, context: TaskContext) = {
    val currSplit = split.asInstanceOf[CartesianPartition]
    for (x <- rdd1.iterator(currSplit.s1, context);
         y <- rdd2.iterator(currSplit.s2, context)) yield (x, y)
  }

  override def getDependencies: Seq[Dependency[_]] = List(
    new NarrowDependency(rdd1) {
      def getParents(id: Int): Seq[Int] = List(id / numPartitionsInRdd2)
    },
    new NarrowDependency(rdd2) {
      def getParents(id: Int): Seq[Int] = List(id % numPartitionsInRdd2)
    }
  )

  override def clearDependencies() {
    super.clearDependencies()
    rdd1 = null
    rdd2 = null
  }
}

Source File: UnionRDD.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.rdd

import java.io.{IOException, ObjectOutputStream}

import scala.collection.mutable.ArrayBuffer
import scala.reflect.ClassTag

import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext}
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.util.Utils


private[spark] class UnionPartition[T: ClassTag](
    idx: Int,
    @transient rdd: RDD[T],
    val parentRddIndex: Int,
    @transient parentRddPartitionIndex: Int)
  extends Partition {

  var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex)

  def preferredLocations() = rdd.preferredLocations(parentPartition)

  override val index: Int = idx

  @throws(classOf[IOException])
  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
    // Update the reference to parent split at the time of task serialization
    parentPartition = rdd.partitions(parentRddPartitionIndex)
    oos.defaultWriteObject()
  }
}

@DeveloperApi
class UnionRDD[T: ClassTag](
    sc: SparkContext,
    var rdds: Seq[RDD[T]])
  extends RDD[T](sc, Nil) {  // Nil since we implement getDependencies

  override def getPartitions: Array[Partition] = {
    val array = new Array[Partition](rdds.map(_.partitions.size).sum)
    var pos = 0
    for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) {
      array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index)
      pos += 1
    }
    array
  }

  override def getDependencies: Seq[Dependency[_]] = {
    val deps = new ArrayBuffer[Dependency[_]]
    var pos = 0
    for (rdd <- rdds) {
      deps += new RangeDependency(rdd, 0, pos, rdd.partitions.size)
      pos += rdd.partitions.size
    }
    deps
  }

  override def compute(s: Partition, context: TaskContext): Iterator[T] = {
    val part = s.asInstanceOf[UnionPartition[T]]
    parent[T](part.parentRddIndex).iterator(part.parentPartition, context)
  }

  override def getPreferredLocations(s: Partition): Seq[String] =
    s.asInstanceOf[UnionPartition[T]].preferredLocations()

  override def clearDependencies() {
    super.clearDependencies()
    rdds = null
  }
}

Source File: PartitionerAwareUnionRDD.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.rdd

import java.io.{IOException, ObjectOutputStream}

import scala.reflect.ClassTag

import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext}
import org.apache.spark.util.Utils


private[spark]
class PartitionerAwareUnionRDD[T: ClassTag](
    sc: SparkContext,
    var rdds: Seq[RDD[T]]
  ) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) {
  require(rdds.length > 0)
  require(rdds.flatMap(_.partitioner).toSet.size == 1,
    "Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner))

  override val partitioner = rdds.head.partitioner

  override def getPartitions: Array[Partition] = {
    val numPartitions = partitioner.get.numPartitions
    (0 until numPartitions).map(index => {
      new PartitionerAwareUnionRDDPartition(rdds, index)
    }).toArray
  }

  // Get the location where most of the partitions of parent RDDs are located
  override def getPreferredLocations(s: Partition): Seq[String] = {
    logDebug("Finding preferred location for " + this + ", partition " + s.index)
    val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents
    val locations = rdds.zip(parentPartitions).flatMap {
      case (rdd, part) => {
        val parentLocations = currPrefLocs(rdd, part)
        logDebug("Location of " + rdd + " partition " + part.index + " = " + parentLocations)
        parentLocations
      }
    }
    val location = if (locations.isEmpty) {
      None
    } else  {
      // Find the location that maximum number of parent partitions prefer
      Some(locations.groupBy(x => x).maxBy(_._2.length)._1)
    }
    logDebug("Selected location for " + this + ", partition " + s.index + " = " + location)
    location.toSeq
  }

  override def compute(s: Partition, context: TaskContext): Iterator[T] = {
    val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents
    rdds.zip(parentPartitions).iterator.flatMap {
      case (rdd, p) => rdd.iterator(p, context)
    }
  }

  override def clearDependencies() {
    super.clearDependencies()
    rdds = null
  }

  // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones)
  private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = {
    rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host)
  }
}

Source File: NotSerializableFakeTask.scala From SparkCore with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.io.{ObjectInputStream, ObjectOutputStream, IOException}

import org.apache.spark.TaskContext


private[spark] class NotSerializableFakeTask(myId: Int, stageId: Int) extends Task[Array[Byte]](stageId, 0) {
  override def runTask(context: TaskContext): Array[Byte] = Array.empty[Byte]
  override def preferredLocations: Seq[TaskLocation] = Seq[TaskLocation]()

  @throws(classOf[IOException])
  private def writeObject(out: ObjectOutputStream): Unit = {
    if (stageId == 0) {
      throw new IllegalStateException("Cannot serialize")
    }
  }

  @throws(classOf[IOException])
  private def readObject(in: ObjectInputStream): Unit = {}
}

Source File: ScalismoViewer.scala From scalismo-ui with GNU General Public License v3.0

5 votes

package scalismo.ui.app

import java.io.{File, IOException}

import scalismo.geometry._3D
import scalismo.io.{ImageIO, LandmarkIO, MeshIO, StatismoIO}
import scalismo.ui.api.ScalismoUI
import scalismo.ui.util.FileUtil

import scala.util.{Failure, Success}


object ScalismoViewer {

  def showErrorMessage(file: File, exception: Throwable): Unit = {
    val message = s"Unable to load file ${file.getName}"
    System.err.println(message)
    System.err.println(exception.getMessage)
  }

  def main(args: Array[String]): Unit = {
    scalismo.initialize()

    val ui = ScalismoUI("Scalismo Viewer")

    val defaultGroup = ui.createGroup("Default Group")

    for (filename <- args) {
      val file = new File(filename)
      if (!file.isFile) {
        showErrorMessage(file, new IOException(s"name does not identify a valid file."))
      } else {

        val basename = FileUtil.basename(file)
        val extension = FileUtil.extension(file).toLowerCase

        extension match {
          case "h5" =>
            StatismoIO.readStatismoMeshModel(new File(filename)) match {
              case Success(model) =>
                // we create for every model a new group
                val modelGroup = ui.createGroup(basename)
                ui.show(modelGroup, model, basename)
              case Failure(t) => showErrorMessage(file, t)
            }

          case "stl" =>
            MeshIO.readMesh(file) match {
              case Success(mesh) => ui.show(defaultGroup, mesh, basename)
              case Failure(t)    => showErrorMessage(file, t)
            }

          case "vtk" =>
            MeshIO.readMesh(file) match {
              case Success(mesh) => ui.show(defaultGroup, mesh, basename)
              case Failure(_) =>
                ImageIO.read3DScalarImageAsType[Float](file, resampleOblique = true) match {
                  case Success(image) => ui.show(defaultGroup, image, basename)
                  case Failure(t)     => showErrorMessage(file, t)
                }
            }

          case "nii" =>
            ImageIO.read3DScalarImageAsType[Float](file) match {
              case Success(image) => ui.show(defaultGroup, image, basename)
              case Failure(t)     => showErrorMessage(file, t)
            }

          case "json" =>
            LandmarkIO.readLandmarksJson[_3D](file) match {
              case Success(lms) => ui.show(defaultGroup, lms, basename)
              case Failure(t)   => showErrorMessage(file, t)
            }

          case "csv" =>
            LandmarkIO.readLandmarksCsv[_3D](file) match {
              case Success(lms) => ui.show(defaultGroup, lms, basename)
              case Failure(t)   => showErrorMessage(file, t)
            }

          case _ =>
            showErrorMessage(file, new IOException("Unknown file extension: " + extension))
        }
      }
    }
  }
}

Source File: SerializedCpg.scala From codepropertygraph with Apache License 2.0

5 votes

package io.shiftleft

import java.io.{File, IOException}
import java.net.{URI, URISyntaxException}
import java.nio.file.{FileSystem, FileSystems, Files}
import java.util

import com.google.protobuf.GeneratedMessageV3

class SerializedCpg extends AutoCloseable {

  
  @throws[IOException]
  def addOverlay(overlay: GeneratedMessageV3, name: String): Unit = {
    if (!isEmpty) {
      val pathInZip = zipFileSystem.getPath(s"${counter}_${name}")
      counter += 1
      val outputStream = Files.newOutputStream(pathInZip)
      overlay.writeTo(outputStream)
      outputStream.close()
    }
  }

  @throws[IOException]
  def addOverlay(overlays: Iterator[GeneratedMessageV3], name: String): Unit = {
    overlays.zipWithIndex.foreach {
      case (overlay, i) => addOverlay(overlay, name + "_" + i)
    }
  }

  @throws[IOException]
  override def close(): Unit = {
    if (!isEmpty) {
      zipFileSystem.close()
    }
  }
}

Source File: FileUtils.scala From carbondata with Apache License 2.0

5 votes

package org.apache.spark.util

import java.io.{File, IOException}

import org.apache.hadoop.conf.Configuration
import org.apache.spark.SparkContext

import org.apache.carbondata.common.logging.LogServiceFactory
import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.datastore.filesystem.CarbonFile
import org.apache.carbondata.core.datastore.impl.FileFactory
import org.apache.carbondata.core.metadata.DatabaseLocationProvider
import org.apache.carbondata.core.util.CarbonUtil
import org.apache.carbondata.events.{CreateDatabasePostExecutionEvent, OperationContext, OperationListenerBus}
import org.apache.carbondata.processing.exception.DataLoadingException

object FileUtils {
  
  def getPaths(inputPath: String): String = {
    getPaths(inputPath, FileFactory.getConfiguration)
  }

  def getPaths(inputPath: String, hadoopConf: Configuration): String = {
    if (inputPath == null || inputPath.isEmpty) {
      throw new DataLoadingException("Input file path cannot be empty.")
    } else {
      val stringBuild = new StringBuilder()
      val filePaths = inputPath.split(",").map(_.trim)
      for (i <- 0 until filePaths.size) {
        val filePath = CarbonUtil.checkAndAppendHDFSUrl(filePaths(i))
        val carbonFile = FileFactory.getCarbonFile(filePath, hadoopConf)
        if (!carbonFile.exists()) {
          throw new DataLoadingException(
            s"The input file does not exist: ${CarbonUtil.removeAKSK(filePaths(i))}" )
        }
        getPathsFromCarbonFile(carbonFile, stringBuild, hadoopConf)
      }
      if (stringBuild.nonEmpty) {
        stringBuild.substring(0, stringBuild.size - 1)
      } else {
        throw new DataLoadingException("Please check your input path and make sure " +
                                       "that files end with '.csv' and content is not empty.")
      }
    }
  }

  def getSpaceOccupied(inputPath: String, hadoopConfiguration: Configuration): Long = {
    var size : Long = 0
    if (inputPath == null || inputPath.isEmpty) {
      size
    } else {
      val filePaths = inputPath.split(",")
      for (i <- 0 until filePaths.size) {
        val carbonFile = FileFactory.getCarbonFile(filePaths(i), hadoopConfiguration)
        size = size + carbonFile.getSize
      }
      size
    }
  }

  def createDatabaseDirectory(dbName: String, storePath: String, sparkContext: SparkContext) {
    val databasePath: String =
      storePath + File.separator + DatabaseLocationProvider.get().provide(dbName.toLowerCase)
    FileFactory.mkdirs(databasePath)
    val operationContext = new OperationContext
    val createDatabasePostExecutionEvent = new CreateDatabasePostExecutionEvent(dbName,
      databasePath, sparkContext)
    OperationListenerBus.getInstance.fireEvent(createDatabasePostExecutionEvent, operationContext)
  }

}

Source File: SparkCarbonStore.scala From carbondata with Apache License 2.0

5 votes

package org.apache.carbondata.store

import java.io.IOException

import scala.collection.JavaConverters._

import org.apache.spark.{CarbonInputMetrics, SparkConf}
import org.apache.spark.sql.{CarbonEnv, SparkSession}

import org.apache.carbondata.common.annotations.InterfaceAudience
import org.apache.carbondata.core.datastore.row.CarbonRow
import org.apache.carbondata.core.index.IndexFilter
import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier
import org.apache.carbondata.core.scan.expression.Expression
import org.apache.carbondata.hadoop.CarbonProjection
import org.apache.carbondata.spark.rdd.CarbonScanRDD


  def this(storeName: String, storeLocation: String) = {
    this()
    val sparkConf = new SparkConf(loadDefaults = true)
    session = SparkSession.builder
      .config(sparkConf)
      .appName("SparkCarbonStore-" + storeName)
      .config("spark.sql.warehouse.dir", storeLocation)
      .config("spark.sql.extensions", "org.apache.spark.sql.CarbonExtensions")
      .getOrCreate()
  }

  def this(sparkSession: SparkSession) = {
    this()
    session = sparkSession
  }

  @throws[IOException]
  override def scan(
      tableIdentifier: AbsoluteTableIdentifier,
      projectColumns: Array[String]): java.util.Iterator[CarbonRow] = {
    require(tableIdentifier != null)
    require(projectColumns != null)
    scan(tableIdentifier, projectColumns, null)
  }

  @throws[IOException]
  override def scan(
      tableIdentifier: AbsoluteTableIdentifier,
      projectColumns: Array[String],
      filter: Expression): java.util.Iterator[CarbonRow] = {
    require(tableIdentifier != null)
    require(projectColumns != null)
    val table = CarbonEnv
      .getCarbonTable(Some(tableIdentifier.getDatabaseName), tableIdentifier.getTableName)(session)
    val indexFilter = if (filter == null) null else new IndexFilter(table, filter)
    val rdd = new CarbonScanRDD[CarbonRow](
      spark = session,
      columnProjection = new CarbonProjection(projectColumns),
      indexFilter = indexFilter,
      identifier = table.getAbsoluteTableIdentifier,
      serializedTableInfo = table.getTableInfo.serialize,
      tableInfo = table.getTableInfo,
      inputMetricsStats = new CarbonInputMetrics,
      partitionNames = null,
      dataTypeConverterClz = null,
      readSupportClz = classOf[CarbonRowReadSupport])
    rdd.collect
      .iterator
      .asJava
  }

  @throws[IOException]
  override def sql(sqlString: String): java.util.Iterator[CarbonRow] = {
    val df = session.sql(sqlString)
    df.rdd
      .map(row => new CarbonRow(row.toSeq.toArray.asInstanceOf[Array[Object]]))
      .collect()
      .iterator
      .asJava
  }

}

Source File: TableStatusBackupTest.scala From carbondata with Apache License 2.0

5 votes

package org.apache.spark.carbondata

import java.io.IOException

import mockit.{Mock, MockUp}
import org.apache.spark.sql.CarbonEnv
import org.apache.spark.sql.test.util.QueryTest
import org.scalatest.BeforeAndAfterAll

import org.apache.carbondata.core.constants.CarbonCommonConstants
import org.apache.carbondata.core.datastore.impl.FileFactory
import org.apache.carbondata.core.statusmanager.SegmentStatusManager
import org.apache.carbondata.core.util.CarbonProperties
import org.apache.carbondata.core.util.path.CarbonTablePath

class TableStatusBackupTest extends QueryTest with BeforeAndAfterAll {
  override protected def beforeAll(): Unit = {
    CarbonProperties.getInstance().addProperty(
      CarbonCommonConstants.ENABLE_TABLE_STATUS_BACKUP, "true")
    sql("drop table if exists source")
    sql("create table source(a string) stored as carbondata")
  }

  override protected def afterAll(): Unit = {
    sql("drop table if exists source")
    CarbonProperties.getInstance().addProperty(
      CarbonCommonConstants.ENABLE_TABLE_STATUS_BACKUP, "false")
  }

  test("backup table status file") {
    sql("insert into source values ('A'), ('B')")
    val tablePath = CarbonEnv.getCarbonTable(None, "source")(sqlContext.sparkSession).getTablePath
    val tableStatusFilePath = CarbonTablePath.getTableStatusFilePath(tablePath)
    val oldTableStatus = SegmentStatusManager.readTableStatusFile(tableStatusFilePath)

    var mock = new MockUp[SegmentStatusManager]() {
      @Mock
      @throws[IOException]
      def mockForTest(): Unit = {
        throw new IOException("thrown in mock")
      }
    }

    val exception = intercept[IOException] {
      sql("insert into source values ('A'), ('B')")
    }
    assert(exception.getMessage.contains("thrown in mock"))
    val backupPath = tableStatusFilePath + ".backup"
    assert(FileFactory.isFileExist(backupPath))
    val backupTableStatus = SegmentStatusManager.readTableStatusFile(backupPath)
    assertResult(oldTableStatus)(backupTableStatus)

    mock = new MockUp[SegmentStatusManager]() {
      @Mock
      def mockForTest(): Unit = {
      }
    }
  }
}

Source File: TestRegisterIndexCarbonTable.scala From carbondata with Apache License 2.0

5 votes

package org.apache.carbondata.spark.testsuite.secondaryindex

import java.io.{File, IOException}

import org.apache.commons.io.FileUtils
import org.apache.spark.sql.Row
import org.apache.spark.sql.test.TestQueryExecutor
import org.apache.spark.sql.test.util.QueryTest
import org.scalatest.BeforeAndAfterAll

import org.apache.carbondata.core.constants.CarbonCommonConstants


class TestRegisterIndexCarbonTable extends QueryTest with BeforeAndAfterAll {

  override def beforeAll {
    sql("drop database if exists carbon cascade")
  }

  def restoreData(dblocation: String, tableName: String) = {
    val destination = dblocation + CarbonCommonConstants.FILE_SEPARATOR + tableName
    val source = dblocation+ "_back" + CarbonCommonConstants.FILE_SEPARATOR + tableName
    try {
      FileUtils.copyDirectory(new File(source), new File(destination))
      FileUtils.deleteDirectory(new File(source))
    } catch {
      case e : Exception =>
        throw new IOException("carbon table data restore failed.")
    } finally {

    }
  }
  def backUpData(dblocation: String, tableName: String) = {
    val source = dblocation + CarbonCommonConstants.FILE_SEPARATOR + tableName
    val destination = dblocation+ "_back" + CarbonCommonConstants.FILE_SEPARATOR + tableName
    try {
      FileUtils.copyDirectory(new File(source), new File(destination))
    } catch {
      case e : Exception =>
        throw new IOException("carbon table data backup failed.")
    }
  }
  test("register tables test") {
    val location = TestQueryExecutor.warehouse +
                           CarbonCommonConstants.FILE_SEPARATOR + "dbName"
    sql("drop database if exists carbon cascade")
    sql(s"create database carbon location '${location}'")
    sql("use carbon")
    sql("""create table carbon.carbontable (c1 string,c2 int,c3 string,c5 string) STORED AS carbondata""")
    sql("insert into carbontable select 'a',1,'aa','aaa'")
    sql("create index index_on_c3 on table carbontable (c3, c5) AS 'carbondata'")
    backUpData(location, "carbontable")
    backUpData(location, "index_on_c3")
    sql("drop table carbontable")
    restoreData(location, "carbontable")
    restoreData(location, "index_on_c3")
    sql("refresh table carbontable")
    sql("refresh table index_on_c3")
    checkAnswer(sql("select count(*) from carbontable"), Row(1))
    checkAnswer(sql("select c1 from carbontable"), Seq(Row("a")))
    sql("REGISTER INDEX TABLE index_on_c3 ON carbontable")
    assert(sql("show indexes on carbontable").collect().nonEmpty)
  }
  override def afterAll {
    sql("drop database if exists carbon cascade")
    sql("use default")
  }
}

Source File: PlainOioServer.scala From netty-in-action-scala with Apache License 2.0

5 votes

package nia.chapter4

import java.io.IOException
import java.net.ServerSocket
import java.nio.charset.Charset


class PlainOioServer {
  @throws[IOException]
  def serve(port: Int): Unit = { //将服务器绑定到指定端口
    val socket = new ServerSocket(port)
    try {
      while (true) {
        val clientSocket = socket.accept
        System.out.println("Accepted connection from " + clientSocket)

        //创建一个新的线程来处理该连接
        new Thread(() ⇒ {
          try {
            //将消息写给已连接的客户端
            val out = clientSocket.getOutputStream
            out.write("Hi!\r\n".getBytes(Charset.forName("UTF-8")))
            out.flush()
            //关闭连接
            clientSocket.close()
          } catch {
            case e: IOException ⇒
              e.printStackTrace()
          } finally {
            try {
              clientSocket.close()
            } catch {
              case ex: IOException ⇒
              // ignore on close
            }
          }
        }).start() //启动线程
      }
    } catch {
      case e: IOException ⇒
        e.printStackTrace()
    }
  }
}

Source File: BlockingIoExample.scala From netty-in-action-scala with Apache License 2.0

5 votes

package nia.chapter1.scaladsl

import java.io.{ BufferedReader, IOException, InputStreamReader, PrintWriter }
import java.net.ServerSocket


  // #snip
  @throws[IOException]
  def serve(portNumber: Int): Unit = {
    //创建一个新的 ServerSocket，用以监听指定端口上的连接请求
    val serverSocket = new ServerSocket(portNumber)
    //对accept()方法的调用将被阻塞，直到一个连接建立
    val clientSocket = serverSocket.accept
    //这些流对象都派生于该套接字的流对象
    val in = new BufferedReader(new InputStreamReader(clientSocket.getInputStream))
    val out = new PrintWriter(clientSocket.getOutputStream, true)
    var request: String = in.readLine
    var response: String = null
    //处理循环开始
    while (request ne null) {
      if ("Done" != request) {
        //请求被传递给服务器的处理方法
        response = processRequest(request)
        //服务器的响应被发送给了客户端
        out.println(response)
        //继续执行处理循环
      }
      request = in.readLine
    }
    // #snip
  }
  private def processRequest(request: String): String = "Processed"
}

Source File: FileGenerator.scala From avrohugger with Apache License 2.0

5 votes

package avrohugger
package generators

import avrohugger.format.abstractions.SourceFormat
import avrohugger.input.DependencyInspector
import avrohugger.input.NestedSchemaExtractor
import avrohugger.input.reflectivecompilation.schemagen._
import avrohugger.input.parsers.{ FileInputParser, StringInputParser}
import avrohugger.matchers.TypeMatcher
import avrohugger.stores.{ ClassStore, SchemaStore }

import java.io.{File, FileNotFoundException, IOException}

import org.apache.avro.{ Protocol, Schema }
import org.apache.avro.Schema.Type.ENUM

// Unable to overload this class' methods because outDir uses a default value
private[avrohugger] object FileGenerator {

  def schemaToFile(
    schema: Schema,
    outDir: String,
    format: SourceFormat,
    classStore: ClassStore,
    schemaStore: SchemaStore,
    typeMatcher: TypeMatcher,
    restrictedFields: Boolean): Unit = {
    val topNS: Option[String] = DependencyInspector.getReferredNamespace(schema)
    val topLevelSchemas: List[Schema] =
      NestedSchemaExtractor.getNestedSchemas(schema, schemaStore, typeMatcher)
    // most-nested classes processed first
    topLevelSchemas.reverse.distinct.foreach(schema => {
      // pass in the top-level schema's namespace if the nested schema has none
      val ns = DependencyInspector.getReferredNamespace(schema) orElse topNS
      format.compile(classStore, ns, Left(schema), outDir, schemaStore, typeMatcher, restrictedFields)
    })
  }

  def protocolToFile(
    protocol: Protocol,
    outDir: String,
    format: SourceFormat,
    classStore: ClassStore,
    schemaStore: SchemaStore,
    typeMatcher: TypeMatcher,
    restrictedFields: Boolean): Unit = {
    val ns = Option(protocol.getNamespace)
    format.compile(classStore, ns, Right(protocol), outDir, schemaStore, typeMatcher, restrictedFields)
  }

  def stringToFile(
    str: String,
    outDir: String,
    format: SourceFormat,
    classStore: ClassStore,
    schemaStore: SchemaStore,
    stringParser: StringInputParser,
    typeMatcher: TypeMatcher,
    restrictedFields: Boolean): Unit = {
    val schemaOrProtocols = stringParser.getSchemaOrProtocols(str, schemaStore)
    schemaOrProtocols.foreach(schemaOrProtocol => {
      schemaOrProtocol match {
        case Left(schema) => {
          schemaToFile(schema, outDir, format, classStore, schemaStore, typeMatcher, restrictedFields)
        }
        case Right(protocol) => {
          protocolToFile(protocol, outDir, format, classStore, schemaStore, typeMatcher, restrictedFields)
        }
      }
    })
  }

  def fileToFile(
    inFile: File,
    outDir: String,
    format: SourceFormat,
    classStore: ClassStore,
    schemaStore: SchemaStore,
    fileParser: FileInputParser,
    typeMatcher: TypeMatcher,
    classLoader: ClassLoader,
    restrictedFields: Boolean): Unit = {
    val schemaOrProtocols: List[Either[Schema, Protocol]] =
      fileParser.getSchemaOrProtocols(inFile, format, classStore, classLoader)
    schemaOrProtocols.foreach(schemaOrProtocol => schemaOrProtocol match {
      case Left(schema) => {
        schemaToFile(schema, outDir, format, classStore, schemaStore, typeMatcher, restrictedFields)
      }
      case Right(protocol) => {
        protocolToFile(protocol, outDir, format, classStore, schemaStore, typeMatcher, restrictedFields)
      }
    })
  }

}

Source File: Util.scala From avrohugger with Apache License 2.0

5 votes

import java.io.BufferedReader
import java.io.File
import java.io.FileReader
import java.io.IOException

object Util {
  
  def readFile(fileName: String, maxTries: Int = 3): String = {
    def readFile0(count: Int): String = {
      try { // if file is empty, try again, it should be there
        val contents: String = scala.io.Source.fromFile(fileName).mkString
        if (contents.isEmpty && (count < maxTries)) readFile0(count + 1)
        else contents
      } catch { // if file is not found, try again, it should be there
        case e: Throwable =>
          if (count < maxTries) readFile0(count + 1)
          else sys.error("File not found: " + fileName)
      }
    }
    readFile0(0)
  }
  
  
}

Source File: Factory.scala From mystem-scala with MIT License

5 votes

package ru.stachek66.nlp.mystem.holding

import java.io.{File, IOException}
import java.nio.file.Files
import java.nio.file.attribute.PosixFilePermissions

import org.slf4j.LoggerFactory
import ru.stachek66.tools.external.FailSafeExternalProcessServer
import ru.stachek66.tools.{Decompressor, Downloader, Tools}

import scala.concurrent.duration._
import scala.sys.process._
import scala.util.Try


  def newMyStem(version: String, customExecutable: Option[File] = None): Try[MyStem] = Try {

    val ex = customExecutable match {
      case Some(exe) => exe
      case None => getExecutable(version)
    }

    version match {
      case "3.0" | "3.1" =>
        new MyStem3(
          new FailSafeExternalProcessServer(
            ex.getAbsolutePath + (if (parsingOptions.nonEmpty) " " + parsingOptions else "")))
      case _ => throw new NotImplementedError()
    }
  }

  @throws(classOf[Exception])
  private[holding] def getExecutable(version: String): File = {

    val destFile = new File(BinDestination + BIN_FILE_NAME)
    val tempFile = new File(s"${BinDestination}tmp_${System.currentTimeMillis}.${Decompressor.select.traditionalExtension}")

    if (destFile.exists) {

      log.info("Old executable file found")

      try {
        val suggestedVersion = (destFile.getAbsolutePath + " -v") !!

        log.info("Version | " + suggestedVersion)
        // not scala-way stuff
        if (suggestedVersion.contains(version))
          destFile
        else
          throw new Exception("Wrong version!")
      } catch {
        case e: Exception =>
          log.warn("Removing old binary files...", e)
          destFile.delete
          getExecutable(version)
      }
    } else Tools.withAttempt(10, 1.second) {
      try {
        Decompressor.select.unpack(
          Downloader.downloadBinaryFile(getUrl(version), tempFile), destFile)
      } finally {
        tempFile.delete()
        try {
          Files.setPosixFilePermissions(destFile.toPath, PosixFilePermissions.fromString("r-xr-xr-x")).toFile
        } catch {
          case ioe: IOException =>
            log.warn("Can't set POSIX permissions to file " + destFile.toPath)
            destFile
        }
      }
    }
  }
}

Source File: Decompressor.scala From mystem-scala with MIT License

5 votes

package ru.stachek66.tools

import java.io.{IOException, File, FileOutputStream}

import org.apache.commons.compress.archivers.ArchiveInputStream
import org.apache.commons.io.IOUtils
import ru.stachek66.nlp.mystem.Properties


trait Decompressor {

  def traditionalExtension: String

  def unpack(src: File, dst: File): File

  @throws(classOf[IOException])
  private[tools] def copyUncompressedAndClose(stream: ArchiveInputStream, dest: File): File = {
    // must be read
    val entry = stream.getNextEntry
    if (entry.isDirectory)
      throw new IOException("Decompressed entry is a directory (unexpectedly)")

    val os = new FileOutputStream(dest)

    try {
      IOUtils.copy(stream, os)
    } finally {
      os.close()
      stream.close()
    }
    dest
  }
}

object Decompressor {
  def select: Decompressor =
    if (Properties.CurrentOs.contains("win")) Zip else TarGz
}

Source File: SerializedProfilesLoader.scala From sparker with GNU General Public License v3.0

5 votes

package SparkER.Wrappers

import java.io.{IOException, _}

import SparkER.DataStructures.Profile
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD


  def loadSerializedObject(fileName: String): Any = {
    var `object`: Any = null
    try {
      val file: InputStream = new FileInputStream(fileName)
      val buffer: InputStream = new BufferedInputStream(file)
      val input: ObjectInput = new ObjectInputStream(buffer)
      try {
        `object` = input.readObject
      } finally {
        input.close
      }
    }
    catch {
      case cnfEx: ClassNotFoundException => {
        System.err.println(fileName)
        cnfEx.printStackTrace
      }
      case ioex: IOException => {
        System.err.println(fileName)
        ioex.printStackTrace
      }
    }
    return `object`
  }
}

Source File: SerializedProfilesLoader.scala From sparker with GNU General Public License v3.0

5 votes

package Wrappers

import java.io.{IOException, _}

import DataStructures.Profile
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD


  def loadSerializedObject(fileName: String): Any = {
    var `object`: Any = null
    try {
      val file: InputStream = new FileInputStream(fileName)
      val buffer: InputStream = new BufferedInputStream(file)
      val input: ObjectInput = new ObjectInputStream(buffer)
      try {
        `object` = input.readObject
      } finally {
        input.close
      }
    }
    catch {
      case cnfEx: ClassNotFoundException => {
        System.err.println(fileName)
        cnfEx.printStackTrace
      }
      case ioex: IOException => {
        System.err.println(fileName)
        ioex.printStackTrace
      }
    }
    return `object`
  }
}

Source File: SerializedProfilesLoader.scala From sparker with GNU General Public License v3.0

5 votes

package Wrappers

import java.io.{IOException, _}

import DataStructures.Profile
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD


  def loadSerializedObject(fileName: String): Any = {
    var `object`: Any = null
    try {
      val file: InputStream = new FileInputStream(fileName)
      val buffer: InputStream = new BufferedInputStream(file)
      val input: ObjectInput = new ObjectInputStream(buffer)
      try {
        `object` = input.readObject
      } finally {
        input.close
      }
    }
    catch {
      case cnfEx: ClassNotFoundException => {
        System.err.println(fileName)
        cnfEx.printStackTrace
      }
      case ioex: IOException => {
        System.err.println(fileName)
        ioex.printStackTrace
      }
    }
    return `object`
  }
}

Source File: VelocityUtils.scala From InteractiveGraph-neo4j with BSD 2-Clause "Simplified" License

5 votes

package org.grapheco.server.util

import java.io.{File, FileOutputStream, StringWriter}
import java.util.Properties

import cn.pidb.blob.Blob
import cn.pidb.engine.blob.{BlobIO, InlineBlob, RemoteBlob}
import org.apache.velocity.app.VelocityEngine
import org.apache.velocity.tools.ToolManager
import org.apache.velocity.tools.config.DefaultKey
import org.neo4j.values.storable.{BlobValue, ValueWriter}
import org.springframework.util.ClassUtils
import scala.collection.JavaConversions
import java.io.FileOutputStream
import java.io.IOException


object VelocityUtils {
  val pro = new Properties();
  val toolManager = new ToolManager();
  toolManager.configure("tools.xml");

  pro.setProperty("input.encoding", "UTF-8");
  pro.setProperty("output.encoding", "UTF-8");
  val ve = new VelocityEngine(pro);
  val props = new Properties()
  props.put("runtime.log.logsystem.class", "org.apache.velocity.runtime.log.SimpleLog4JLogSystem")
  props.put("runtime.log.logsystem.log4j.category", "velocity")
  props.put("runtime.log.logsystem.log4j.logger", "velocity")
  ve.init(props)
  def parse(expr: String, context: Map[String, Any]): Any = {
    val vc = toolManager.createContext();
    val writer = new StringWriter();

    context.foreach(kv => vc.put(kv._1,
      //is a scala Map?
      if (kv._2.isInstanceOf[Map[_, _]]) {
        JavaConversions.mapAsJavaMap(kv._2.asInstanceOf[Map[_, _]])
      }
      else {
        kv._2
      }));

    try {
      if (expr.startsWith("=")) {
        val expr1 = expr.substring(1);
        ve.evaluate(vc, writer, "", s"#set($$__VAR=$expr1)");
        var value = vc.get("__VAR");
        //if is a blob
        if(value.isInstanceOf[Blob]){
          //get blob
          var result:String = ""
          try {
            val data = value.asInstanceOf[Blob].toBytes()
            val path = ClassUtils.getDefaultClassLoader.getResource("").getPath.replace("/WEB-INF/classes","") + "static/"
            val tool = new FileSystemTool()
            result = tool.filesave(data,path, System.currentTimeMillis.toString+".jpg")
          }
          catch{
            case e:Throwable =>
              print(e.toString)
          }
          //TODO url
          return "http://localhost:9999/graphserver/static/"+result
        }
        return value
      }
      else {
        ve.evaluate(vc, writer, "", expr);
        writer.getBuffer.toString.trim
      }
    }
    catch {
      case e: Throwable =>
        throw new WrongExpressionException(expr, e);
    }
  }
}

class WrongExpressionException(msg: String, e: Throwable) extends RuntimeException(msg, e) {

}

@DefaultKey("fileTool")
class FileSystemTool {
  def exists(path: String) = new File(path).exists();



  @throws[IOException]
  def filesave(file: Array[Byte], filePath: String, fileName: String): String = { //目标目录
    val targetfile = new File(filePath)
    if (!targetfile.exists) targetfile.mkdirs
    //二进制流写入
    val out = new FileOutputStream(filePath + fileName)
    out.write(file)
    out.flush()
    out.close()
    return  fileName
  }
}

Source File: DependencyNode.scala From cuesheet with Apache License 2.0

5 votes

package com.kakao.cuesheet.deps

import java.io.{BufferedOutputStream, File, FileOutputStream, IOException}
import java.net.{URL, URLDecoder}
import java.nio.file.{Files, Paths}
import java.util.zip.{ZipEntry, ZipOutputStream}

import com.kakao.mango.io.FileSystems
import com.kakao.mango.logging.Logging
import com.kakao.shaded.guava.io.Files.createTempDir

sealed trait DependencyNode {
  def path: String
}

case class ManagedDependency(group: String, artifact: String, classifier: String = "jar")

case class ManagedDependencyNode(
  path: String,
  group: String,
  artifact: String,
  classifier: String,
  version: String,
  children: Seq[ManagedDependency]
) extends DependencyNode {
  def key = ManagedDependency(group, artifact, classifier)
}

case class DirectoryDependencyNode(path: String) extends DependencyNode with Logging {
  lazy val compressed: UnmanagedDependencyNode = {
    val tmpdir = createTempDir()
    val jar = new File(s"${tmpdir.getAbsolutePath}/local-${tmpdir.getName}.jar")
    val root = Paths.get(path)

    val output = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(jar)))
    var count = 0

    FileSystems.entries(root).foreach { path =>
      if (resourceExtensions.exists(path.toString.endsWith)) {
        val entry = new ZipEntry(root.relativize(path).toString)
        output.putNextEntry(entry)
        try {
          Files.copy(path, output)
          count += 1
        } catch {
          case e: IOException => logger.warn(s"skipping $path due to an IOException: ${e.getMessage}")
        }
        output.closeEntry()
      }
    }

    output.close()

    logger.debug(s"Successfully zipped $count files in $path into $jar")

    UnmanagedDependencyNode(jar.getAbsolutePath)
  }
}

case class JavaRuntimeDependencyNode(path: String) extends DependencyNode
case class UnmanagedDependencyNode(path: String) extends DependencyNode

object DependencyNode {

  val resolver = new ChainedArtifactResolver(
    new IvyPathArtifactResolver,
    new IvyOriginalPathArtifactResolver,
    new MavenPathArtifactResolver,
    new GradlePathArtifactResolver,
    new JavaRuntimeResolver,
    new MavenMetadataArtifactResolver,
    new UnmanagedJarResolver
  )

  def resolve(url: URL): DependencyNode = {
    if (url.getProtocol != "file") {
      throw new IllegalArgumentException("non-file dependency is not supported")
    }

    val path = URLDecoder.decode(url.getFile, "UTF-8")
    val file = new File(path)
    if (file.isDirectory) {
      return DirectoryDependencyNode(file.getAbsolutePath)
    }

    if (!file.isFile || !file.canRead) {
      throw new IllegalArgumentException(s"$path is not a file or readable")
    }

    DependencyNode.resolver.resolve(file.getAbsolutePath) match {
      case Some(node) => node
      case None => throw new IllegalArgumentException(s"Could not determine the dependency of $path")
    }
  }
}

Source File: File.scala From docspell with GNU General Public License v3.0

5 votes

package docspell.common

import java.io.IOException
import java.nio.file._
import java.nio.file.attribute.BasicFileAttributes
import java.util.concurrent.atomic.AtomicInteger

import scala.jdk.CollectionConverters._

import cats.effect._
import cats.implicits._
import fs2.Stream

object File {

  def mkDir[F[_]: Sync](dir: Path): F[Path] =
    Sync[F].delay(Files.createDirectories(dir))

  def mkTempDir[F[_]: Sync](parent: Path, prefix: String): F[Path] =
    mkDir(parent).map(p => Files.createTempDirectory(p, prefix))

  def mkTempFile[F[_]: Sync](
      parent: Path,
      prefix: String,
      suffix: Option[String] = None
  ): F[Path] =
    mkDir(parent).map(p => Files.createTempFile(p, prefix, suffix.orNull))

  def deleteDirectory[F[_]: Sync](dir: Path): F[Int] =
    Sync[F].delay {
      val count = new AtomicInteger(0)
      Files.walkFileTree(
        dir,
        new SimpleFileVisitor[Path]() {
          override def visitFile(
              file: Path,
              attrs: BasicFileAttributes
          ): FileVisitResult = {
            Files.deleteIfExists(file)
            count.incrementAndGet()
            FileVisitResult.CONTINUE
          }
          override def postVisitDirectory(dir: Path, e: IOException): FileVisitResult =
            Option(e) match {
              case Some(ex) => throw ex
              case None =>
                Files.deleteIfExists(dir)
                FileVisitResult.CONTINUE
            }
        }
      )
      count.get
    }

  def exists[F[_]: Sync](file: Path): F[Boolean] =
    Sync[F].delay(Files.exists(file))

  def existsNonEmpty[F[_]: Sync](file: Path, minSize: Long = 0): F[Boolean] =
    Sync[F].delay(Files.exists(file) && Files.size(file) > minSize)

  def deleteFile[F[_]: Sync](file: Path): F[Unit] =
    Sync[F].delay(Files.deleteIfExists(file)).map(_ => ())

  def delete[F[_]: Sync](path: Path): F[Int] =
    if (Files.isDirectory(path)) deleteDirectory(path)
    else deleteFile(path).map(_ => 1)

  def withTempDir[F[_]: Sync](parent: Path, prefix: String): Resource[F, Path] =
    Resource.make(mkTempDir(parent, prefix))(p => delete(p).map(_ => ()))

  def listFiles[F[_]: Sync](pred: Path => Boolean, dir: Path): F[List[Path]] =
    Sync[F].delay {
      val javaList =
        Files.list(dir).filter(p => pred(p)).collect(java.util.stream.Collectors.toList())
      javaList.asScala.toList.sortBy(_.getFileName.toString)
    }

  def readAll[F[_]: Sync: ContextShift](
      file: Path,
      blocker: Blocker,
      chunkSize: Int
  ): Stream[F, Byte] =
    fs2.io.file.readAll(file, blocker, chunkSize)

  def readText[F[_]: Sync: ContextShift](file: Path, blocker: Blocker): F[String] =
    readAll[F](file, blocker, 8192).through(fs2.text.utf8Decode).compile.foldMonoid
}

Source File: ProcessStreamConnectionProvider.scala From intellij-lsp with Apache License 2.0

5 votes

package com.github.gtache.lsp.client.connection

import java.io.{File, IOException, InputStream, OutputStream}
import java.util.Objects

import com.intellij.openapi.diagnostic.Logger
import org.jetbrains.annotations.Nullable


class ProcessStreamConnectionProvider(private var commands: Seq[String], private var workingDir: String) extends StreamConnectionProvider {
  private val LOG: Logger = Logger.getInstance(classOf[ProcessStreamConnectionProvider])
  @Nullable private var process: Process = _

  @throws[IOException]
  override def start(): Unit = {
    if (this.workingDir == null || this.commands == null || this.commands.isEmpty || this.commands.contains(null)) throw new IOException("Unable to start language server: " + this.toString) //$NON-NLS-1$
    val builder = createProcessBuilder
    LOG.info("Starting server process with commands " + commands + " and workingDir " + workingDir)
    this.process = builder.start

    if (!process.isAlive) throw new IOException("Unable to start language server: " + this.toString) else LOG.info("Server process started " + process)
  }

  protected def createProcessBuilder: ProcessBuilder = {
    import scala.collection.JavaConverters._
    val builder = new ProcessBuilder(getCommands.map(s => s.replace("\'", "")).asJava)
    builder.directory(new File(getWorkingDirectory))
    builder
  }

  protected def getCommands: Seq[String] = commands

  def setCommands(commands: Seq[String]): Unit = {
    this.commands = commands
  }

  protected def getWorkingDirectory: String = workingDir

  def setWorkingDirectory(workingDir: String): Unit = {
    this.workingDir = workingDir
  }

  @Nullable override def getInputStream: InputStream = {
    if (process == null) null
    else process.getInputStream
  }

  @Nullable override def getOutputStream: OutputStream = {
    if (process == null) null
    else process.getOutputStream
  }

  @Nullable override def getErrorStream: InputStream = {
    if (process == null) null
    else process.getErrorStream
  }

  override def stop(): Unit = {
    if (process != null) process.destroy()
  }

  override def equals(obj: Any): Boolean = {
    obj match {
      case other: ProcessStreamConnectionProvider =>
        getCommands.size == other.getCommands.size && this.getCommands.toSet == other.getCommands.toSet && this.getWorkingDirectory == other.getWorkingDirectory
      case _ => false
    }

  }

  override def hashCode: Int = {
    Objects.hashCode(this.getCommands) ^ Objects.hashCode(this.getWorkingDirectory)
  }
}

Source File: ProcessOverSocketStreamConnectionProvider.scala From intellij-lsp with Apache License 2.0

5 votes

package com.github.gtache.lsp.client.connection

import java.io.{IOException, InputStream, OutputStream}
import java.net.{ServerSocket, Socket}
import java.util.Objects

import com.intellij.openapi.diagnostic.Logger


class ProcessOverSocketStreamConnectionProvider(commands: Seq[String], workingDir: String, port: Int = 0) extends ProcessStreamConnectionProvider(commands, workingDir) {

  import ProcessOverSocketStreamConnectionProvider._

  private var socket: Socket = _
  private var inputStream: InputStream = _
  private var outputStream: OutputStream = _

  @throws[IOException]
  override def start(): Unit = {
    val serverSocket = new ServerSocket(port)
    val socketThread = new Thread(() => {
      try
        socket = serverSocket.accept
      catch {
        case e: IOException =>
          LOG.error(e)
      } finally try
        serverSocket.close()
      catch {
        case e: IOException =>
          LOG.error(e)
      }
    })
    socketThread.start()
    super.start()
    try {
      socketThread.join(5000)
    }
    catch {
      case e: InterruptedException =>
        LOG.error(e)
    }
    if (socket == null) throw new IOException("Unable to make socket connection: " + toString) //$NON-NLS-1$
    inputStream = socket.getInputStream
    outputStream = socket.getOutputStream
  }

  override def getInputStream: InputStream = inputStream

  override def getOutputStream: OutputStream = outputStream

  override def getErrorStream: InputStream = inputStream

  override def stop(): Unit = {
    super.stop()
    if (socket != null) try
      socket.close()
    catch {
      case e: IOException =>
        LOG.error(e)
    }
  }

  override def hashCode: Int = {
    val result = super.hashCode
    result ^ Objects.hashCode(this.port)
  }
}

object ProcessOverSocketStreamConnectionProvider {
  private val LOG = Logger.getInstance(classOf[ProcessOverSocketStreamConnectionProvider])
}

Source File: RconConnector.scala From chatoverflow with Eclipse Public License 2.0

5 votes

package org.codeoverflow.chatoverflow.requirement.service.rcon

import java.io.{DataInputStream, IOException, InputStream, OutputStream}
import java.net.{Socket, SocketException}
import java.nio.{ByteBuffer, ByteOrder}
import java.util.Random

import org.codeoverflow.chatoverflow.WithLogger
import org.codeoverflow.chatoverflow.connector.Connector

class RconConnector(override val sourceIdentifier: String) extends Connector(sourceIdentifier) with WithLogger {
  override protected var requiredCredentialKeys: List[String] = List("password", "address")
  override protected var optionalCredentialKeys: List[String] = List("port")

  private var socket: Socket = _
  private var outputStream: OutputStream = _
  private var inputStream: InputStream = _
  private var requestId: Int = 0

  def sendCommand(command: String): String = {
    logger debug s"Sending $command to RCON"
    requestId += 1
    if (write(2, command.getBytes("ASCII"))) {
      return read()
    }
    null
  }


  
  override def stop(): Boolean = {
    logger info s"Stopped RCON connector to ${credentials.get.getValue("address").get}!"
    socket.close()
    true
  }
}

Source File: WriteTSToFiles.scala From scala-tsi with MIT License

5 votes

package com.scalatsi.output

import java.io.{FileWriter, IOException}

import com.scalatsi.TypescriptType.TypescriptNamedType
import com.scalatsi.TypescriptTypeSerializer

import scala.util.Try

object WriteTSToFiles {
  def write(options: OutputOptions)(types: Set[TypescriptNamedType]): Unit = {
    try {
      val targetFile = options.targetFile
      val output     = TypescriptTypeSerializer.emits(options.styleOptions, types)

      Try {
        Option(targetFile.getParentFile).foreach(_.mkdirs())
        targetFile.createNewFile()
      } // createNewFile will continue if file exists
      .recover {
        case e: SecurityException =>
          reportFailure(s"Could not create file '$targetFile' due to JVM security stopping it", code = 2, e = e)
        case e: IOException => reportFailure(s"Could not create file '$targetFile' due to I/O problem", code = 2, e = e)
      }.get

      // TODO: For some reason scala.util.Using isn't working in 2.12, even though we have the compat library
      //      Using(new FileWriter(targetFile)) { writer =>
      //        writer.write(output)
      //      }.recover {
      //        case e: IOException => reportFailure(s"Could not write typescript to file '$targetFile' due to I/O problem", code = 2, e = e)
      //      }.get
      (for {
        writer <- Try(new FileWriter(targetFile))
        _ <- Try {
          try { writer.write(output) }
          finally { writer.close() }
        }
      } yield ()).recover {
        case e: IOException => reportFailure(s"Could not write typescript to file '$targetFile' due to I/O problem", code = 2, e = e)
      }.get

      ()
    } catch {
      case e: Throwable =>
        reportFailure(
          """Uncaught exception in scala-tsi output writer.
            |Please file a bug report at https://github.com/scala-tsi/scala-tsi/issues""".stripMargin,
          e = e
        )
    }
  }

  def reportFailure(msg: String, code: Int = 1, e: Throwable = null): Nothing = {
    require(code > 0, "Should exist with a non-zero exit code on failure")
    System.err.println(msg)
    Option(e).foreach(_.printStackTrace())
    // This will not stop SBT, and the non-zero exit will mark the task as unsuccessful
    sys.exit(code)
  }
}

Source File: PartitionProcessor.scala From etl-light with MIT License

5 votes

package yamrcraft.etlite.processors

import java.io.IOException

import org.slf4j.LoggerFactory
import yamrcraft.etlite.transformers.InboundMessage
import yamrcraft.etlite.writers.{ErrorInfo, ErrorEventWriter}
import yamrcraft.etlite.{ErrorType, EtlException, EtlSettings}

import scala.util.Try

class PartitionProcessor(jobId: Long, partitionId: Int, settings: EtlSettings) {

  val logger = LoggerFactory.getLogger(this.getClass)

  val pipeline = settings.pipeline.createFactory.createPipeline(settings.pipeline, jobId, partitionId)

  val errorsWriter: ErrorEventWriter = new ErrorEventWriter(settings.errorsFolder, jobId, partitionId)

  def processPartition(partition: Iterator[InboundMessage]): Unit = {
    logger.info(s"partition processing started [jobId=$jobId, partitionId=$partitionId]")

    partition foreach { inbound =>

      try {
        pipeline.processMessage(inbound)

      } catch {
        case e@(_: Exception) =>
          logger.error("event processing error", e)
          val errorType = e match {
            case ex: EtlException => ex.errorType.toString
            case _ : IOException => ErrorType.WriteError.toString
            case _ => ErrorType.SystemError.toString
          }
          val cause = Try(e.getCause.getMessage).getOrElse("")
          val errorInfo = ErrorInfo(errorType, Some(cause))
          errorsWriter.write((inbound.msg, errorInfo))
      }
    }

    pipeline.writer.commit()
    errorsWriter.commit()

    logger.info(s"partition processing ended [jobId=$jobId, partitionId=$partitionId]")
  }

}

Source File: TimePartitioningWriter.scala From etl-light with MIT License

5 votes

package yamrcraft.etlite.writers

import java.io.IOException

import com.typesafe.config.Config
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.joda.time.format.DateTimeFormat
import org.slf4j.LoggerFactory
import yamrcraft.etlite.EtlException
import yamrcraft.etlite.transformers.Message
import yamrcraft.etlite.utils.ConfigConversions._

import scala.collection.mutable


class TimePartitioningWriter[T](config: Config, jobId: Long, partitionId: Int,
  writerFactory: (String, String) => Writer[T]) extends Writer[Message[T]] {

  val logger = LoggerFactory.getLogger(this.getClass)

  // config settings
  val workingFolder: String = config.getString("working-folder")
  val outputFolder: String = config.getString("output-folder")
  val partitionPattern: String = config.getString("partition.pattern")
  val folderMapping: Map[String, String] = config.getConfig("record-name-to-folder-mapping").asMap

  val fs = FileSystem.get(new Configuration())

  val partitionFormat = DateTimeFormat.forPattern(partitionPattern)

  val partitionsWriters = mutable.Map[String, Writer[T]]()

  @throws(classOf[EtlException])
  @throws(classOf[IOException])
  override def write(event: Message[T]): Unit = {
    val timestamp = event.msgTimestamp
    val baseFolder = folderMapping.getOrElse(event.msgType, event.msgType)

    val writer = writerFor(baseFolder, timestamp)
    writer.write(event.msg)
  }

  override def commit() = {
    // close all writers
    partitionsWriters foreach { case (file, writer) =>
      writer.commit()
    }
  }

  @throws(classOf[EtlException])
  private def writerFor(baseFolder: String, timestamp: Long): Writer[T] = {
    val relativeFileName = new Path(s"$baseFolder/${partitionFormat.print(timestamp)}/events_${baseFolder}_job${jobId}_part$partitionId")
    val tempFile = new Path(workingFolder, relativeFileName)
    val outputFile = new Path(outputFolder, relativeFileName)

    partitionsWriters.getOrElseUpdate(tempFile.toString, writerFactory(tempFile.toString, outputFile.toString))
  }

}

Source File: SelectorProvider.scala From scalaz-nio with Apache License 2.0

5 votes

package zio.nio.channels.spi

import java.io.IOException
import java.net.ProtocolFamily
import java.nio.channels.{ Channel => JChannel, DatagramChannel => JDatagramChannel }
import java.nio.channels.spi.{ SelectorProvider => JSelectorProvider }

import zio.nio.channels.{ Pipe, Selector, ServerSocketChannel, SocketChannel }
import zio.IO

class SelectorProvider(private val selectorProvider: JSelectorProvider) {

  final val openDatagramChannel
    : IO[IOException, JDatagramChannel] = // TODO: wrapper for DatagramChannel
    IO.effect(selectorProvider.openDatagramChannel()).refineToOrDie[IOException]

  // this can throw UnsupportedOperationException - doesn't seem like a recoverable exception
  final def openDatagramChannel(
    family: ProtocolFamily
  ): IO[IOException, JDatagramChannel] = // TODO: wrapper for DatagramChannel
    IO.effect(selectorProvider.openDatagramChannel(family)).refineToOrDie[IOException]

  final val openPipe: IO[IOException, Pipe] =
    IO.effect(new Pipe(selectorProvider.openPipe())).refineToOrDie[IOException]

  final val openSelector: IO[IOException, Selector] =
    IO.effect(new Selector(selectorProvider.openSelector())).refineToOrDie[IOException]

  final val openServerSocketChannel: IO[IOException, ServerSocketChannel] =
    IO.effect(new ServerSocketChannel(selectorProvider.openServerSocketChannel()))
      .refineToOrDie[IOException]

  final val openSocketChannel: IO[IOException, SocketChannel] =
    IO.effect(new SocketChannel(selectorProvider.openSocketChannel())).refineToOrDie[IOException]

  final val inheritedChannel: IO[IOException, Option[JChannel]] = // TODO: wrapper for Channel
    IO.effect(Option(selectorProvider.inheritedChannel())).refineToOrDie[IOException]
}

object SelectorProvider {

  final val make: IO[Nothing, SelectorProvider] =
    IO.effectTotal(JSelectorProvider.provider()).map(new SelectorProvider(_))

}

Source File: AsynchronousChannelGroup.scala From scalaz-nio with Apache License 2.0

5 votes

package zio.nio.channels

import java.io.IOException
import java.nio.channels.{ AsynchronousChannelGroup => JAsynchronousChannelGroup }
import java.nio.channels.spi.{ AsynchronousChannelProvider => JAsynchronousChannelProvider }

import java.util.concurrent.{ ExecutorService => JExecutorService, ThreadFactory => JThreadFactory }
import java.util.concurrent.TimeUnit

import zio.{ IO, UIO }
import zio.duration.Duration

object AsynchronousChannelGroup {

  def apply(executor: JExecutorService, initialSize: Int): IO[Exception, AsynchronousChannelGroup] =
    IO.effect(
        new AsynchronousChannelGroup(
          JAsynchronousChannelGroup.withCachedThreadPool(executor, initialSize)
        )
      )
      .refineToOrDie[Exception]

  def apply(
    threadsNo: Int,
    threadsFactory: JThreadFactory
  ): IO[Exception, AsynchronousChannelGroup] =
    IO.effect(
        new AsynchronousChannelGroup(
          JAsynchronousChannelGroup.withFixedThreadPool(threadsNo, threadsFactory)
        )
      )
      .refineToOrDie[Exception]

  def apply(executor: JExecutorService): IO[Exception, AsynchronousChannelGroup] =
    IO.effect(
        new AsynchronousChannelGroup(JAsynchronousChannelGroup.withThreadPool(executor))
      )
      .refineToOrDie[Exception]
}

class AsynchronousChannelGroup(private[channels] val channelGroup: JAsynchronousChannelGroup) {

  def awaitTermination(timeout: Duration): IO[Exception, Boolean] =
    IO.effect(channelGroup.awaitTermination(timeout.asJava.toMillis, TimeUnit.MILLISECONDS))
      .refineToOrDie[Exception]

  val isShutdown: UIO[Boolean] = IO.effectTotal(channelGroup.isShutdown)

  val isTerminated: UIO[Boolean] = IO.effectTotal(channelGroup.isTerminated)

  val provider: UIO[JAsynchronousChannelProvider] = IO.effectTotal(channelGroup.provider())

  val shutdown: UIO[Unit] = IO.effectTotal(channelGroup.shutdown())

  val shutdownNow: IO[IOException, Unit] =
    IO.effect(channelGroup.shutdownNow()).refineToOrDie[IOException]
}

Source File: PlantUMLUtils.scala From gitbucket-plantuml-plugin with Apache License 2.0

5 votes

package com.yotaichino.gitbucket.plugins.plantuml

import java.io.ByteArrayOutputStream
import java.io.IOException
import java.lang.NullPointerException
import net.sourceforge.plantuml.FileFormat
import net.sourceforge.plantuml.FileFormatOption
import net.sourceforge.plantuml.SourceStringReader

object PlantUMLUtils {

  def generateSVGImage(source: String): Array[Byte] = generateImage(source, FileFormat.SVG)

  private def generateImage(source: String, format: FileFormat): Array[Byte] = {
    val reader = new SourceStringReader(source)
    val os = new ByteArrayOutputStream()

    try {
      reader.outputImage(os, new FileFormatOption(format)).getDescription()
    } catch {
      case _: IOException => return null
      case _: NullPointerException => return null
    } finally {
      os.close()
    }

    os.toByteArray()
  }
}

Source File: TemporaryDirectoryContext.scala From cluster-broccoli with Apache License 2.0

5 votes

package de.frosner.broccoli.util

import java.io.IOException
import java.nio.file.attribute.BasicFileAttributes
import java.nio.file.{FileVisitResult, FileVisitor, Files, Path}

import org.specs2.execute.{AsResult, Result}
import org.specs2.specification.ForEach


trait TemporaryDirectoryContext extends ForEach[Path] {
  override protected def foreach[R: AsResult](f: (Path) => R): Result = {
    val tempDirectory = Files.createTempDirectory(getClass.getName)
    try {
      AsResult(f(tempDirectory))
    } finally {
      Files.walkFileTree(
        tempDirectory,
        new FileVisitor[Path] {
          override def postVisitDirectory(dir: Path, exc: IOException): FileVisitResult = {
            Files.delete(dir)
            FileVisitResult.CONTINUE
          }

          override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
            Files.delete(file)
            FileVisitResult.CONTINUE
          }

          override def visitFileFailed(file: Path, exc: IOException): FileVisitResult = throw exc

          override def preVisitDirectory(dir: Path, attrs: BasicFileAttributes): FileVisitResult =
            FileVisitResult.CONTINUE
        }
      )
    }
  }
}

Source File: FileHelper.scala From bahir with Apache License 2.0

5 votes

package org.apache.bahir.utils

import java.io.{File, IOException}
import java.nio.file.{Files, FileVisitResult, Path, SimpleFileVisitor}
import java.nio.file.attribute.BasicFileAttributes

object FileHelper extends Logging {
  def deleteFileQuietly(file: File): Path = {
    Files.walkFileTree(file.toPath, new SimpleFileVisitor[Path]() {
      override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
        try {
          Files.delete(file)
        } catch {
          case t: Throwable => log.warn("Failed to delete", t)
        }
        FileVisitResult.CONTINUE
      }

      override def postVisitDirectory(dir: Path, exc: IOException): FileVisitResult = {
        try {
          Files.delete(dir)
        } catch {
          case t: Throwable => log.warn("Failed to delete", t)
        }
        FileVisitResult.CONTINUE
      }
    })
  }
}

Source File: ImageInputFormat.scala From flink-tensorflow with Apache License 2.0

5 votes

package org.apache.flink.contrib.tensorflow.examples.inception

import java.io.IOException
import java.util.Collections

import com.twitter.bijection.Conversion._
import org.apache.flink.api.common.io.GlobFilePathFilter
import org.apache.flink.configuration.Configuration
import org.apache.flink.contrib.tensorflow._
import org.apache.flink.contrib.tensorflow.common.functions.util.ModelUtils
import org.apache.flink.contrib.tensorflow.io.WholeFileInputFormat
import org.apache.flink.contrib.tensorflow.io.WholeFileInputFormat._
import org.apache.flink.core.fs.{FSDataInputStream, Path}
import org.slf4j.{Logger, LoggerFactory}
import org.tensorflow.contrib.scala.ByteStrings._
import resource._

import scala.collection.JavaConverters._


  override def readRecord(
       reuse: (String,ImageTensorValue),
       filePath: Path, fileStream: FSDataInputStream,
       fileLength: Long): (String,ImageTensorValue) = {

    if(fileLength > Int.MaxValue) {
      throw new IllegalArgumentException("the file is too large to be fully read")
    }
    val imageData =
      readFully(fileStream, new Array[Byte](fileLength.toInt), 0, fileLength.toInt).asByteString[ImageFile]

    val imageTensor: ImageTensorValue =
      managed(imageData.as[ImageFileTensor])
      .flatMap(x => model.normalize(x))
      .acquireAndGet(_.toValue)

    (filePath.getName, imageTensor)
  }
}

object ImageInputFormat {
  def apply(): ImageInputFormat = new ImageInputFormat
}

Source File: WholeFileInputFormat.scala From flink-tensorflow with Apache License 2.0

5 votes

package org.apache.flink.contrib.tensorflow.io

import java.io.{EOFException, IOException, InputStream}

import org.apache.flink.api.common.io.FileInputFormat
import org.apache.flink.configuration.Configuration
import org.apache.flink.core.fs._
import org.apache.flink.util.Preconditions.checkState


  @throws[IOException]
  def readRecord(reuse: T, filePath: Path, fileStream: FSDataInputStream, fileLength: Long): T

  // --------------------------------------------------------------------------------------------
  //  Lifecycle
  // --------------------------------------------------------------------------------------------

  override def nextRecord(reuse: T): T = {
    checkState(!reachedEnd())
    checkState(currentSplit != null && currentSplit.getStart == 0)
    checkState(stream != null)
    readRecord(reuse, currentSplit.getPath, stream, currentSplit.getLength)
  }

  override def reachedEnd(): Boolean = {
    stream.getPos != 0
  }
}

@SerialVersionUID(1L)
object WholeFileInputFormat {

  @throws[IOException]
  def readFully(fileStream: FSDataInputStream, fileLength: Long): Array[Byte] = {
    if(fileLength > Int.MaxValue) {
      throw new IllegalArgumentException("the file is too large to be fully read")
    }
    val buf = new Array[Byte](fileLength.toInt)
    readFully(fileStream, buf, 0, fileLength.toInt)
    buf
  }

  @throws[IOException]
  def readFully(inputStream: InputStream, buf: Array[Byte], off: Int, len: Int): Array[Byte] = {
    var bytesRead = 0
    while (bytesRead < len) {
      val read = inputStream.read(buf, off + bytesRead, len - bytesRead)
      if (read < 0) throw new EOFException("Premature end of stream")
      bytesRead += read
    }
    buf
  }
}

Source File: RangerSparkPlugin.scala From spark-ranger with Apache License 2.0

5 votes

package org.apache.ranger.authorization.spark.authorizer

import java.io.{File, IOException}

import org.apache.commons.logging.LogFactory
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext.CLIENT_TYPE
import org.apache.ranger.authorization.hadoop.config.RangerConfiguration
import org.apache.ranger.plugin.service.RangerBasePlugin

class RangerSparkPlugin private extends RangerBasePlugin("spark", "sparkSql") {
  import RangerSparkPlugin._

  private val LOG = LogFactory.getLog(classOf[RangerSparkPlugin])

  lazy val fsScheme: Array[String] = RangerConfiguration.getInstance()
    .get("ranger.plugin.spark.urlauth.filesystem.schemes", "hdfs:,file:")
    .split(",")
    .map(_.trim)

  override def init(): Unit = {
    super.init()
    val cacheDir = new File(rangerConf.get("ranger.plugin.spark.policy.cache.dir"))
    if (cacheDir.exists() &&
      (!cacheDir.isDirectory || !cacheDir.canRead || !cacheDir.canWrite)) {
      throw new IOException("Policy cache directory already exists at" +
        cacheDir.getAbsolutePath + ", but it is unavailable")
    }

    if (!cacheDir.exists() && !cacheDir.mkdirs()) {
      throw new IOException("Unable to create ranger policy cache directory at" +
        cacheDir.getAbsolutePath)
    }
    LOG.info("Policy cache directory successfully set to " + cacheDir.getAbsolutePath)
  }
}

object RangerSparkPlugin {

  private val rangerConf: RangerConfiguration = RangerConfiguration.getInstance

  val showColumnsOption: String = rangerConf.get(
    "xasecure.spark.describetable.showcolumns.authorization.option", "NONE")

  def build(): Builder = new Builder

  class Builder {

    @volatile private var sparkPlugin: RangerSparkPlugin = _

    def getOrCreate(): RangerSparkPlugin = RangerSparkPlugin.synchronized {
      if (sparkPlugin == null) {
        sparkPlugin = new RangerSparkPlugin
        sparkPlugin.init()
        sparkPlugin
      } else {
        sparkPlugin
      }
    }
  }
}

Source File: Version.scala From apalache with Apache License 2.0

5 votes

package at.forsyte.apalache.tla.tooling

import java.io.IOException
import java.util.Properties

object Version {
  private val pomProps: Properties = loadProperties("META-INF/maven/at.forsyte.apalache/tool/pom.properties")
  private val gitProps: Properties = loadProperties("at/forsyte/apalache/tla/tooling/git.properties")

  def version: String = {
    pomProps.getProperty("version", "version-dev")
  }

  def build: String = {
    gitProps.getProperty("git.commit.id.describe", "unknown-build")
  }

  private def loadProperties(name: String): Properties = {
    val resourceStream = ClassLoader.getSystemClassLoader.getResourceAsStream(name)
    var props = new Properties()
    try {
      if (resourceStream != null) {
        props.load(resourceStream)
      }
    } catch {
      case _: IOException => ()
        // ignore and set defaults, this is not a critical function

      case e: Throwable => throw e
    }

    props
  }
}

Source File: ProtocolModule.scala From zio-web with Apache License 2.0

5 votes

package zio.web

import java.io.IOException

import zio._

trait ProtocolModule extends EndpointModule {
  type ServerConfig
  type ClientConfig
  type ServerService
  type ProtocolDocs
  type Middleware[-R, +E]

  def makeServer[R <: Has[ServerConfig], E, A](
    middleware: Middleware[R, E],
    service: Service[A],
    handlers: Handlers[R, A]
  ): ZLayer[R, IOException, Has[ServerService]]

  def makeDocs(service: Service[_]): ProtocolDocs

  def makeClient[A](service: Service[A]): ZLayer[Has[ClientConfig], IOException, Has[ClientService[A]]]
}

Source File: FileHelper.scala From spark-nlp with Apache License 2.0

5 votes

package com.johnsnowlabs.util

import java.io.{File, IOException}
import java.nio.charset.Charset
import java.nio.file.{Files, Paths}
import java.security.MessageDigest
import java.text.DecimalFormat

import org.apache.commons.io.FileUtils
object FileHelper {
  def writeLines(file: String, lines: Seq[String], encoding: String = "UTF-8"): Unit = {
    val writer = Files.newBufferedWriter(Paths.get(file), Charset.forName(encoding))
    try {
      var cnt = 0
      for (line <- lines) {
        writer.write(line)
        if (cnt > 0)
          writer.write(System.lineSeparator())
        cnt += 1
      }
    }
    catch {
      case ex: IOException =>
        ex.printStackTrace()
    }
    finally if (writer != null) writer.close()
  }

  def delete(file: String, throwOnError: Boolean = false): Unit = {
    val f = new File(file)
    if (f.exists()) {
      try {
        if (f.isDirectory)
          FileUtils.deleteDirectory(f)
        else
          FileUtils.deleteQuietly(f)
      }
      catch {
        case e: Exception =>
          if (throwOnError)
            throw e
          else
            FileUtils.forceDeleteOnExit(f)
      }
    }

  }

  def generateChecksum(path: String): String = {
    val arr = Files readAllBytes (Paths get path)
    val checksum = MessageDigest.getInstance("MD5") digest arr
    checksum.map("%02X" format _).mkString
  }

  def getHumanReadableFileSize(size: Long): String = {
    if (size <= 0) return "0"
    val units = Array[String]("B", "KB", "MB", "GB", "TB", "PB", "EB")
    val digitGroups = (Math.log10(size) / Math.log10(1024)).toInt
    new DecimalFormat("#,##0.#").format(size / Math.pow(1024, digitGroups)) + " " + units(digitGroups)
  }
}

Source File: RemoraApp.scala From remora with MIT License

5 votes

import java.io.IOException
import java.net.ConnectException
import java.util.concurrent.{TimeUnit, TimeoutException}

import akka.actor.ActorSystem
import akka.stream.{ActorMaterializer, ActorMaterializerSettings, Supervision}
import com.amazonaws.services.cloudwatch.{AmazonCloudWatchAsync, AmazonCloudWatchAsyncClientBuilder}
import com.blacklocus.metrics.CloudWatchReporterBuilder
import com.codahale.metrics.jvm.{GarbageCollectorMetricSet, MemoryUsageGaugeSet, ThreadStatesGaugeSet}
import com.typesafe.scalalogging.LazyLogging
import config.{KafkaSettings, MetricsSettings}
import kafka.admin.RemoraKafkaConsumerGroupService
import reporter.RemoraDatadogReporter

import scala.concurrent.duration._
import scala.util.control.NonFatal

object RemoraApp extends App with nl.grons.metrics.scala.DefaultInstrumented with LazyLogging {

  private val actorSystemName: String = "remora"
  implicit val actorSystem = ActorSystem(actorSystemName)

  metricRegistry.registerAll(new GarbageCollectorMetricSet)
  metricRegistry.registerAll(new MemoryUsageGaugeSet)
  metricRegistry.registerAll(new ThreadStatesGaugeSet)

  lazy val decider: Supervision.Decider = {
    case _: IOException | _: ConnectException | _: TimeoutException => Supervision.Restart
    case NonFatal(err: Throwable) =>
      actorSystem.log.error(err, "Unhandled Exception in Stream: {}", err.getMessage)
      Supervision.Stop
  }

  implicit val materializer = ActorMaterializer(
    ActorMaterializerSettings(actorSystem).withSupervisionStrategy(decider))(actorSystem)

  implicit val executionContext = actorSystem.dispatchers.lookup("kafka-consumer-dispatcher")
  val kafkaSettings = KafkaSettings(actorSystem.settings.config)
  val consumer = new RemoraKafkaConsumerGroupService(kafkaSettings)
  val kafkaClientActor = actorSystem.actorOf(KafkaClientActor.props(consumer), name = "kafka-client-actor")

  Api(kafkaClientActor).start()

  val metricsSettings = MetricsSettings(actorSystem.settings.config)

  if (metricsSettings.registryOptions.enabled) {
    val exportConsumerMetricsToRegistryActor =
      actorSystem.actorOf(ExportConsumerMetricsToRegistryActor.props(kafkaClientActor),
        name = "export-consumer-metrics-actor")
    actorSystem.scheduler.schedule(0 second, metricsSettings.registryOptions.intervalSeconds second, exportConsumerMetricsToRegistryActor, "export")
  }

  if (metricsSettings.cloudWatch.enabled) {
    logger.info("Reporting metricsRegistry to Cloudwatch")
    val amazonCloudWatchAsync: AmazonCloudWatchAsync = AmazonCloudWatchAsyncClientBuilder.defaultClient

    new CloudWatchReporterBuilder()
      .withNamespace(metricsSettings.cloudWatch.name)
      .withRegistry(metricRegistry)
      .withClient(amazonCloudWatchAsync)
      .build()
      .start(metricsSettings.cloudWatch.intervalMinutes, TimeUnit.MINUTES)
  }

  if (metricsSettings.dataDog.enabled) {
    logger.info(s"Reporting metricsRegistry to Datadog at ${metricsSettings.dataDog.agentHost}:${metricsSettings.dataDog.agentPort}")
    val datadogReporter = new RemoraDatadogReporter(metricRegistry, metricsSettings.dataDog)
    datadogReporter.startReporter()
  }

}

Source File: StreamReadingThread.scala From ncdbg with BSD 3-Clause "New" or "Revised" License

5 votes

package com.programmaticallyspeaking.ncd.nashorn

import java.io.{BufferedReader, IOException, InputStream, InputStreamReader}

import scala.util.control.NonFatal

class StreamReadingThread(in: InputStream, appender: (String) => Unit) extends Thread {
  override def run(): Unit = {
    try {
      val reader = new BufferedReader(new InputStreamReader(in))
      var str = ""
      while (str != null) {
        str = reader.readLine()
        Option(str).foreach(appender)
      }
    } catch {
      case _: InterruptedException =>
        // ok
      case ex: IOException if isStreamClosed(ex) =>
        // ok
      case NonFatal(t) =>
        t.printStackTrace(System.err)
    }
  }

  private def isStreamClosed(ex: IOException) = ex.getMessage.toLowerCase == "stream closed"
}

Source File: MemoryAppender.scala From ncdbg with BSD 3-Clause "New" or "Revised" License

5 votes

package com.programmaticallyspeaking.ncd.testing

import ch.qos.logback.classic.spi.ILoggingEvent
import ch.qos.logback.core.UnsynchronizedAppenderBase
import ch.qos.logback.core.encoder.Encoder
import ch.qos.logback.core.status.ErrorStatus
import java.io.{ByteArrayOutputStream, IOException, OutputStream}
import java.nio.charset.StandardCharsets

import com.programmaticallyspeaking.ncd.messaging.{Observable, SerializedSubject}

object MemoryAppender {
  private[MemoryAppender] val logEventSubject = new SerializedSubject[String]

  def logEvents: Observable[String] = logEventSubject
}

class MemoryAppender extends UnsynchronizedAppenderBase[ILoggingEvent] {
  import MemoryAppender._
  private var encoder: Encoder[ILoggingEvent] = _
  private var outputStream = new OutputStream {
    override def write(b: Int): Unit = ???

    override def write(b: Array[Byte]): Unit = {
      val str = new String(b, StandardCharsets.UTF_8)
      logEventSubject.onNext(str)
    }
  }

  override def start(): Unit = {
    try {
      Option(encoder).foreach(_.init(outputStream))
      super.start()
    } catch {
      case e: IOException =>
        started = false
        addStatus(new ErrorStatus("Failed to initialize encoder for appender named [" + name + "].", this, e))
    }
  }

  override protected def append(event: ILoggingEvent): Unit = {
    if (!isStarted) return
    try {
      event.prepareForDeferredProcessing()
      Option(encoder).foreach(_.doEncode(event))
    } catch {
      case ioe: IOException =>
        started = false
        addStatus(new ErrorStatus("IO failure in appender", this, ioe))
    }
  }

  def setEncoder(e: Encoder[ILoggingEvent]): Unit = {
    encoder = e
  }
}

Source File: Secrets.scala From mmlspark with MIT License

5 votes

import java.io.IOException
import java.util.Base64

import sys.process._
import spray.json._
import DefaultJsonProtocol._
import org.apache.commons.io.IOUtils
import sbt.{SettingKey, TaskKey}

object Secrets {
  private val kvName = "mmlspark-keys"
  private val subscriptionID = "ce1dee05-8cf6-4ad6-990a-9c80868800ba"

  protected def exec(command: String): String = {
    val os = sys.props("os.name").toLowerCase
    os match {
      case x if x contains "windows" => Seq("cmd", "/C") ++ Seq(command) !!
      case _ => command !!
    }
  }

  private def getSecret(secretName: String): String = {
    println(s"fetching secret: $secretName")
    try {
      exec(s"az account set -s $subscriptionID")
      val secretJson = exec(s"az keyvault secret show --vault-name $kvName --name $secretName")
      secretJson.parseJson.asJsObject().fields("value").convertTo[String]
    } catch {
      case _: IOException =>
        println("WARNING: Could not load secret from keyvault, defaulting to the empty string." +
          " Please install az command line to perform authorized build steps like publishing")
        ""
      case _: java.lang.RuntimeException =>
        println("WARNING: Could not load secret from keyvault, defaulting to the empty string." +
          " Please install az command line to perform authorized build steps like publishing")
        ""
    }
  }

  lazy val nexusUsername: String = sys.env.getOrElse("NEXUS-UN", getSecret("nexus-un"))
  lazy val nexusPassword: String = sys.env.getOrElse("NEXUS-PW", getSecret("nexus-pw"))
  lazy val pgpPublic: String = new String(Base64.getDecoder.decode(
    sys.env.getOrElse("PGP-PUBLIC", getSecret("pgp-public")).getBytes("UTF-8")))
  lazy val pgpPrivate: String = new String(Base64.getDecoder.decode(
    sys.env.getOrElse("PGP-PRIVATE", getSecret("pgp-private")).getBytes("UTF-8")))
  lazy val pgpPassword: String = sys.env.getOrElse("PGP-PW", getSecret("pgp-pw"))
  lazy val storageKey: String = sys.env.getOrElse("STORAGE_KEY", getSecret("storage-key"))

}

Source File: Gateway.scala From reactive-microservices with MIT License

5 votes

import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.client.RequestBuilding
import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
import akka.http.scaladsl.model.StatusCodes._
import akka.http.scaladsl.model.{HttpRequest, HttpResponse}
import akka.http.scaladsl.unmarshalling.Unmarshal
import akka.stream.FlowMaterializer
import akka.stream.scaladsl.{Sink, Source}
import java.io.IOException
import scala.concurrent.{ExecutionContext, Future}

case class InternalLoginRequest(identityId: Long, authMethod: String = "codecard")
case class InternalReloginRequest(tokenValue: String, authMethod: String = "codecard")

class Gateway(implicit actorSystem: ActorSystem, materializer: FlowMaterializer, ec: ExecutionContext)
  extends JsonProtocols with Config {

  private val identityManagerConnectionFlow = Http().outgoingConnection(identityManagerHost, identityManagerPort)
  private val tokenManagerConnectionFlow = Http().outgoingConnection(tokenManagerHost, tokenManagerPort)

  private def requestIdentityManager(request: HttpRequest): Future[HttpResponse] = {
    Source.single(request).via(identityManagerConnectionFlow).runWith(Sink.head)
  }

  private def requestTokenManager(request: HttpRequest): Future[HttpResponse] = {
    Source.single(request).via(tokenManagerConnectionFlow).runWith(Sink.head)
  }

  def requestToken(tokenValue: String): Future[Either[String, Token]] = {
    requestTokenManager(RequestBuilding.Get(s"/tokens/$tokenValue")).flatMap { response =>
      response.status match {
        case Success(_) => Unmarshal(response.entity).to[Token].map(Right(_))
        case NotFound => Future.successful(Left("Token expired or not found"))
        case _ => Future.failed(new IOException(s"Token request failed with status ${response.status} and error ${response.entity}"))
      }
    }
  }

  def requestNewIdentity(): Future[Identity] = {
    requestIdentityManager(RequestBuilding.Post("/identities")).flatMap { response =>
      response.status match {
        case Success(_) => Unmarshal(response.entity).to[Identity]
        case _ => Future.failed(new IOException(s"Identity request failed with status ${response.status} and error ${response.entity}"))
      }
    }
  }

  def requestLogin(identityId: Long): Future[Token] = {
    val loginRequest = InternalLoginRequest(identityId)
    requestTokenManager(RequestBuilding.Post("/tokens", loginRequest)).flatMap { response =>
      response.status match {
        case Success(_) => Unmarshal(response.entity).to[Token]
        case _ => Future.failed(new IOException(s"Login request failed with status ${response.status} and error ${response.entity}"))
      }
    }
  }

  def requestRelogin(tokenValue: String): Future[Option[Token]] = {
    requestTokenManager(RequestBuilding.Patch("/tokens", InternalReloginRequest(tokenValue))).flatMap { response =>
      response.status match {
        case Success(_) => Unmarshal(response.entity).to[Token].map(Option(_))
        case NotFound => Future.successful(None)
        case _ => Future.failed(new IOException(s"Relogin request failed with status ${response.status} and error ${response.entity}"))
      }
    }
  }
}

Source File: Gateway.scala From reactive-microservices with MIT License

5 votes

import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.client.RequestBuilding
import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
import akka.http.scaladsl.model.StatusCodes._
import akka.http.scaladsl.model.{HttpRequest, HttpResponse}
import akka.http.scaladsl.unmarshalling.Unmarshal
import akka.stream.FlowMaterializer
import akka.stream.scaladsl.{Sink, Source}
import java.io.IOException
import scala.concurrent.{ExecutionContext, Future}

case class InternalLoginRequest(identityId: Long, authMethod: String = "password")
case class InternalReloginRequest(tokenValue: String, authMethod: String = "password")

class Gateway(implicit actorSystem: ActorSystem, materializer: FlowMaterializer, ec: ExecutionContext)
  extends JsonProtocols with Config {

  private val identityManagerConnectionFlow = Http().outgoingConnection(identityManagerHost, identityManagerPort)
  private val tokenManagerConnectionFlow = Http().outgoingConnection(tokenManagerHost, tokenManagerPort)

  private def requestIdentityManager(request: HttpRequest): Future[HttpResponse] = {
    Source.single(request).via(identityManagerConnectionFlow).runWith(Sink.head)
  }

  private def requestTokenManager(request: HttpRequest): Future[HttpResponse] = {
    Source.single(request).via(tokenManagerConnectionFlow).runWith(Sink.head)
  }

  def requestToken(tokenValue: String): Future[Either[String, Token]] = {
    requestTokenManager(RequestBuilding.Get(s"/tokens/$tokenValue")).flatMap { response =>
      response.status match {
        case Success(_) => Unmarshal(response.entity).to[Token].map(Right(_))
        case NotFound => Future.successful(Left("Token expired or not found"))
        case _ => Future.failed(new IOException(s"Token request failed with status ${response.status} and error ${response.entity}"))
      }
    }
  }

  def requestNewIdentity(): Future[Identity] = {
    requestIdentityManager(RequestBuilding.Post("/identities")).flatMap { response =>
      response.status match {
        case Success(_) => Unmarshal(response.entity).to[Identity]
        case _ => Future.failed(new IOException(s"Identity request failed with status ${response.status} and error ${response.entity}"))
      }
    }
  }

  def requestLogin(identityId: Long): Future[Token] = {
    val loginRequest = InternalLoginRequest(identityId)
    requestTokenManager(RequestBuilding.Post("/tokens", loginRequest)).flatMap { response =>
      response.status match {
        case Success(_) => Unmarshal(response.entity).to[Token]
        case _ => Future.failed(new IOException(s"Login request failed with status ${response.status} and error ${response.entity}"))
      }
    }
  }

  def requestRelogin(tokenValue: String): Future[Option[Token]] = {
    requestTokenManager(RequestBuilding.Patch("/tokens", InternalReloginRequest(tokenValue))).flatMap { response =>
      response.status match {
        case Success(_) => Unmarshal(response.entity).to[Token].map(Option(_))
        case NotFound => Future.successful(None)
        case _ => Future.failed(new IOException(s"Relogin request failed with status ${response.status} and error ${response.entity}"))
      }
    }
  }
}

Source File: Gateway.scala From reactive-microservices with MIT License

5 votes

import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.client.RequestBuilding
import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._
import akka.http.scaladsl.model.StatusCodes._
import akka.http.scaladsl.model.{HttpRequest, HttpResponse}
import akka.http.scaladsl.unmarshalling.Unmarshal
import akka.stream.FlowMaterializer
import akka.stream.scaladsl.{Sink, Source}
import com.restfb.DefaultFacebookClient
import com.restfb.types.User
import java.io.IOException
import scala.concurrent.{blocking, ExecutionContext, Future}
import scala.util.Try

case class InternalLoginRequest(identityId: Long, authMethod: String = "fb")
case class InternalReloginRequest(tokenValue: String, authMethod: String = "fb")

class Gateway(implicit actorSystem: ActorSystem, materializer: FlowMaterializer, ec: ExecutionContext)
  extends JsonProtocols with Config {

  private val identityManagerConnectionFlow = Http().outgoingConnection(identityManagerHost, identityManagerPort)
  private val tokenManagerConnectionFlow = Http().outgoingConnection(tokenManagerHost, tokenManagerPort)

  private def requestIdentityManager(request: HttpRequest): Future[HttpResponse] = {
    Source.single(request).via(identityManagerConnectionFlow).runWith(Sink.head)
  }

  private def requestTokenManager(request: HttpRequest): Future[HttpResponse] = {
    Source.single(request).via(tokenManagerConnectionFlow).runWith(Sink.head)
  }

  def requestToken(tokenValue: String): Future[Either[String, Token]] = {
    requestTokenManager(RequestBuilding.Get(s"/tokens/$tokenValue")).flatMap { response =>
      response.status match {
        case Success(_) => Unmarshal(response.entity).to[Token].map(Right(_))
        case NotFound => Future.successful(Left("Token expired or not found"))
        case _ => Future.failed(new IOException(s"Token request failed with status ${response.status} and error ${response.entity}"))
      }
    }
  }

  def requestNewIdentity(): Future[Identity] = {
    requestIdentityManager(RequestBuilding.Post("/identities")).flatMap { response =>
      response.status match {
        case Success(_) => Unmarshal(response.entity).to[Identity]
        case _ => Future.failed(new IOException(s"Identity request failed with status ${response.status} and error ${response.entity}"))
      }
    }
  }

  def requestLogin(identityId: Long): Future[Token] = {
    val loginRequest = InternalLoginRequest(identityId)
    requestTokenManager(RequestBuilding.Post("/tokens", loginRequest)).flatMap { response =>
      response.status match {
        case Success(_) => Unmarshal(response.entity).to[Token]
        case _ => Future.failed(new IOException(s"Login request failed with status ${response.status} and error ${response.entity}"))
      }
    }
  }

  def requestRelogin(tokenValue: String): Future[Option[Token]] = {
    requestTokenManager(RequestBuilding.Patch("/tokens", InternalReloginRequest(tokenValue))).flatMap { response =>
      response.status match {
        case Success(_) => Unmarshal(response.entity).to[Token].map(Option(_))
        case NotFound => Future.successful(None)
        case _ => Future.failed(new IOException(s"Relogin request failed with status ${response.status} and error ${response.entity}"))
      }
    }
  }

  def getFbUserDetails(accessToken: String): Try[User] = {
    Try {
      blocking {
        val client = new DefaultFacebookClient(accessToken)
        client.fetchObject("me", classOf[User])
      }
    }
  }
}

Source File: HttpHandler.scala From play-auditing with Apache License 2.0

5 votes

package uk.gov.hmrc.audit.handler

import java.io.IOException
import java.net.URL
import java.util.concurrent.TimeoutException

import akka.stream.Materializer
import org.slf4j.{Logger, LoggerFactory}
import play.api.inject.ApplicationLifecycle
import play.api.libs.json.JsValue

import scala.concurrent.{ExecutionContext, Future}
import scala.concurrent.duration.Duration


sealed trait HttpResult
object HttpResult {
  case class Response(statusCode: Int) extends HttpResult
  case object Malformed extends HttpResult
  case class Failure(msg: String, nested: Option[Throwable] = None) extends Exception(msg, nested.orNull) with HttpResult
}

abstract class HttpHandler(
  endpointUrl      : URL,
  userAgent        : String,
  connectTimeout   : Duration,
  requestTimeout   : Duration,
  materializer     : Materializer,
  lifecycle        : ApplicationLifecycle
) {
  private val logger: Logger = LoggerFactory.getLogger(getClass)

  val HTTP_STATUS_CONTINUE = 100

  val wsClient: WSClient = {
    implicit val m = materializer
    val wsClient = WSClient(connectTimeout, requestTimeout, userAgent)
    lifecycle.addStopHook { () =>
      logger.info("Closing play-auditing http connections...")
      wsClient.close()
      Future.successful(())
    }
    wsClient
  }

  def sendHttpRequest(event: JsValue)(implicit ec: ExecutionContext): Future[HttpResult] =
    try {
      logger.debug(s"Sending audit request to URL ${endpointUrl.toString}")

      wsClient.url(endpointUrl.toString)
        .post(event)
        .map { response =>
          val httpStatusCode = response.status
          logger.debug(s"Got status code : $httpStatusCode")
          response.body
          logger.debug("Response processed and closed")

          if (httpStatusCode >= HTTP_STATUS_CONTINUE) {
            logger.info(s"Got status code $httpStatusCode from HTTP server.")
            HttpResult.Response(httpStatusCode)
          } else {
            logger.warn(s"Malformed response (status $httpStatusCode) returned from server")
            HttpResult.Malformed
          }
        }.recover {
          case e: TimeoutException =>
            HttpResult.Failure("Error opening connection, or request timed out", Some(e))
          case e: IOException =>
            HttpResult.Failure("Error opening connection, or request timed out", Some(e))
        }
    } catch {
      case t: Throwable =>
        Future.successful(HttpResult.Failure("Error sending HTTP request", Some(t)))
    }
}

Source File: WireMockUtils.scala From play-auditing with Apache License 2.0

5 votes

package uk.gov.hmrc.audit

import java.io.IOException
import java.net.ServerSocket


object WireMockUtils {
  def availablePort: Int = {
    var port = 9876
    var socket: ServerSocket = null

    try {
      socket = new ServerSocket(0)
      port = socket.getLocalPort
    } catch {
      case ex: IOException =>
    } finally {
      if (socket != null) {
        try {
          socket.close()
        } catch {
          case ex: IOException =>
        }
      }
    }

    port
  }
}

Source File: ErrorHandling.scala From Argus-SAF with Apache License 2.0

5 votes

package org.argus.jawa.core.compiler.compile.io

import java.io.IOException

object ErrorHandling
{
  def translate[T](msg: => String)(f: => T): T =
    try { f }
    catch {
      case e: IOException => throw new TranslatedIOException(msg + e.toString, e)
      case e: Exception => throw new TranslatedException(msg + e.toString, e)
    }

  def wideConvert[T](f: => T): Either[Throwable, T] =
    try { Right(f) }
    catch
    {
      case ex @ (_: Exception | _: StackOverflowError) => Left(ex)
      case err @ (_: ThreadDeath | _: VirtualMachineError) => throw err
      case x: Throwable => Left(x)
    }

  def convert[T](f: => T): Either[Exception, T] =
    try { Right(f) }
    catch { case e: Exception => Left(e) }

  def reducedToString(e: Throwable): String =
    if(e.getClass == classOf[RuntimeException])
    {
      val msg = e.getMessage
      if(msg == null || msg.isEmpty) e.toString else msg
    }
    else
      e.toString
}
sealed class TranslatedException private[io](msg: String, cause: Throwable) extends RuntimeException(msg, cause) {
  override def toString: String = msg
}
final class TranslatedIOException private[io](msg: String, cause: IOException) extends TranslatedException(msg, cause)

Source File: RestartableByteArrayInputStream.scala From hail with MIT License

5 votes

package is.hail.utils

import java.io.{ IOException, InputStream }

// not thread safe
class RestartableByteArrayInputStream extends InputStream {
  private[this] var off: Int = 0
  private[this] var end: Int = 0
  private[this] var buf: Array[Byte] = null
  def this(buf: Array[Byte]) {
    this()
    restart(buf)
  }

  override def read(): Int = {
    if (off == end) {
      return -1
    }
    val b = buf(off) & 0xff
    off += 1
    b
  }
  override def read(dest: Array[Byte]): Int =
    read(dest, 0, dest.length)
  override def read(dest: Array[Byte], destOff: Int, requestedLength: Int): Int = {
    val length = math.min(requestedLength, end - off)
    System.arraycopy(buf, off, dest, destOff, length)
    off += length
    length
  }
  override def skip(n: Long): Long = {
    if (n <= 0) {
      return 0
    }
    val skipped = math.min(
      math.min(n, Integer.MAX_VALUE).toInt,
      end - off)
    off += skipped
    skipped
  }
  override def available(): Int =
    end - off
  override def markSupported(): Boolean = false
  override def mark(readAheadLimit: Int): Unit =
    throw new IOException("unsupported operation")
  override def reset(): Unit =
    throw new IOException("unsupported operation")
  override def close(): Unit =
    buf = null
  def restart(buf: Array[Byte]): Unit =
    restart(buf, 0, buf.length)
  def restart(buf: Array[Byte], start: Int, end: Int): Unit = {
    require(start >= 0)
    require(start <= end)
    require(end <= buf.length)
    this.buf = buf
    this.off = start
    this.end = end
  }
}

Source File: CustomHttpClientRetryHandler.scala From CM-Well with Apache License 2.0

5 votes

package cmwell.tools.neptune.export

import java.io.IOException

import org.apache.http.impl.client.DefaultHttpRequestRetryHandler
import org.apache.http.protocol.HttpContext
import org.slf4j.LoggerFactory

class CustomHttpClientRetryHandler extends DefaultHttpRequestRetryHandler{

  protected lazy val logger = LoggerFactory.getLogger("httP-retry_handler")

  @Override
   override def retryRequest(exception: IOException, executionCount: Int, context: HttpContext) :Boolean = {
    logger.info("Going to retry http request...")
    if (executionCount >= 5) { // Do not retry if over max retry count
      false
    } else {
      logger.error("Exception=" + exception)
      Thread.sleep(10000)
      true
    }
  }

}

Source File: ClusterSpec.scala From akka-cqrs with Apache License 2.0

5 votes

package test.support

import java.io.{File, IOException}
import java.nio.file._
import java.nio.file.attribute.BasicFileAttributes

import akka.actor.{ActorIdentity, Identify, Props}
import akka.cluster.Cluster
import akka.persistence.Persistence
import akka.persistence.journal.leveldb.{SharedLeveldbJournal, SharedLeveldbStore}
import akka.remote.testconductor.RoleName
import akka.remote.testkit.MultiNodeSpec
import akka.testkit.ImplicitSender

import scala.util.control.NonFatal

abstract class ClusterSpec
  extends MultiNodeSpec(ClusterConfig)
  with SbtMultiNodeSpec
  with ImplicitSender {

  import ClusterConfig._

  implicit val logger = system.log

  def initialParticipants = roles.size

  def deleteDirectory(path: Path): Unit = if (Files.exists(path)) {

    Files.walkFileTree(path, new SimpleFileVisitor[Path] {

      def deleteAndContinue(file: Path): FileVisitResult = {
        Files.delete(file)
        FileVisitResult.CONTINUE
      }

      override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = deleteAndContinue(file)

      override def visitFileFailed(file: Path, exc: IOException): FileVisitResult =  deleteAndContinue(file)

      override def postVisitDirectory(dir: Path, exc: IOException): FileVisitResult = {
        Option(exc).fold(deleteAndContinue(dir)) {
          case NonFatal(e) => throw e
        }
      }
    })
  }

  val storageLocations = List(
    "akka.persistence.journal.leveldb.dir",
    "akka.persistence.journal.leveldb-shared.store.dir",
    "akka.persistence.snapshot-store.local.dir").map(s => new File(system.settings.config.getString(s)))

  override protected def atStartup() {
    on(node1) {
      storageLocations.foreach(dir => deleteDirectory(dir.toPath))
    }
  }

  override protected def afterTermination() {
    on(node1) {
      storageLocations.foreach(dir => deleteDirectory(dir.toPath))
    }
  }

  def join(startOn: RoleName, joinTo: RoleName) {
    on(startOn) {
      Cluster(system) join node(joinTo).address
    }
    enterBarrier(startOn.name + "-joined")
  }

  def setupSharedJournal() {
    Persistence(system)
    on(node1) {
      system.actorOf(Props[SharedLeveldbStore], "store")
    }
    enterBarrier("persistence-started")

    system.actorSelection(node(node1) / "user" / "store") ! Identify(None)
    val sharedStore = expectMsgType[ActorIdentity].ref.get
    SharedLeveldbJournal.setStore(sharedStore, system)

    enterBarrier("after-1")
  }

  def joinCluster() {
    join(startOn = node1, joinTo = node1)
    join(startOn = node2, joinTo = node1)
    enterBarrier("after-2")
  }

  def on(nodes: RoleName*)(thunk: => Unit): Unit = {
    runOn(nodes: _*)(thunk)
  }
}

Source File: Main.scala From frdomain-extras with Apache License 2.0

5 votes

package frdomain.ch6.domain

import java.io.IOException
import zio._
import zio.blocking.Blocking

import config._
import common._
import service._
import model.{Account, Balance}
import repository.{AccountRepository, InMemoryAccountRepository, DoobieAccountRepository}

object Main {
  // uses AccountService
  val opens = 
    for {
      _ <- open("a1234", "a1name", None, None, Checking)
      _ <- open("a2345", "a2name", None, None, Checking)
      _ <- open("a3456", "a3name", Some(BigDecimal(5.8)), None, Savings)
      _ <- open("a4567", "a4name", None, None, Checking)
      _ <- open("a5678", "a5name", Some(BigDecimal(2.3)), None, Savings)
    } yield (())
  
  // uses AccountService
  val credits = 
    for {
      _ <- credit("a1234", 1000)
      _ <- credit("a2345", 2000)
      _ <- credit("a3456", 3000)
      _ <- credit("a4567", 4000)
    } yield (())
 
  // uses AccountService and ReportingService
  val program = for {
    _ <- opens
    _ <- credits
    a <- balanceByAccount
  } yield a
  

  def main(args: Array[String]): Unit = {
    val prog =
      for {
        _      <- opens
        _      <- credits
        a      <- balanceByAccount

      } yield a

    val banking: ZIO[Blocking, Object, Seq[(String, common.Amount)]] = prog.provideLayer(Application.prod.appLayer) 
    println(Runtime.default.unsafeRun(banking))
  }
}

Source File: inmemory.scala From frdomain-extras with Apache License 2.0

5 votes

package frdomain.ch6.domain

import java.io.IOException
import zio._

import common._
import repository.{InMemoryAccountRepository, DoobieAccountRepository}
import service._
import model.{Account, Balance}

object InMemory { 

  def main(args: Array[String]): Unit = {

    // uses AccountService
    val opens = 
      for {
        _ <- open("a1234", "a1name", None, None, Checking)
        _ <- open("a2345", "a2name", None, None, Checking)
        _ <- open("a3456", "a3name", Some(BigDecimal(5.8)), None, Savings)
        _ <- open("a4567", "a4name", None, None, Checking)
        _ <- open("a5678", "a5name", Some(BigDecimal(2.3)), None, Savings)
      } yield (())
  
    // uses AccountService
    val credits = 
      for {
        _ <- credit("a1234", 1000)
        _ <- credit("a2345", 2000)
        _ <- credit("a3456", 3000)
        _ <- credit("a4567", 4000)
      } yield (())
  
    // uses AccountService and ReportingService
    val program = for {
      _ <- opens
      _ <- credits
      a <- balanceByAccount
    } yield a
  
    // layers
    val appLayer = 
      InMemoryAccountRepository.layer >+> 
      AccountService.live >+> 
      ReportingService.live

    val banking = program.provideLayer(appLayer) 

    println(Runtime.default.unsafeRun(banking))
    // List((a5678,0), (a3456,3000), (a1234,1000), (a2345,2000), (a4567,4000))

  }
}

Source File: TestOutputStream.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming

import java.io.{IOException, ObjectInputStream}
import java.util.concurrent.ConcurrentLinkedQueue

import scala.reflect.ClassTag

import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.dstream.{DStream, ForEachDStream}
import org.apache.spark.util.Utils


class TestOutputStream[T: ClassTag](parent: DStream[T],
    val output: ConcurrentLinkedQueue[Seq[T]] = new ConcurrentLinkedQueue[Seq[T]]())
  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
    val collected = rdd.collect()
    output.add(collected)
  }, false) {

  // This is to clear the output buffer every it is read from a checkpoint
  @throws(classOf[IOException])
  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
    ois.defaultReadObject()
    output.clear()
  }
}

Source File: ReusableStringReaderSuite.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.catalyst.expressions.xml

import java.io.IOException

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.expressions.xml.UDFXPathUtil.ReusableStringReader


class ReusableStringReaderSuite extends SparkFunSuite {

  private val fox = "Quick brown fox jumps over the lazy dog."

  test("empty reader") {
    val reader = new ReusableStringReader

    intercept[IOException] {
      reader.read()
    }

    intercept[IOException] {
      reader.ready()
    }

    reader.close()
  }

  test("mark reset") {
    val reader = new ReusableStringReader

    if (reader.markSupported()) {
      reader.asInstanceOf[ReusableStringReader].set(fox)
      assert(reader.ready())

      val cc = new Array[Char](6)
      var read = reader.read(cc)
      assert(read == 6)
      assert("Quick " == new String(cc))

      reader.mark(100)

      read = reader.read(cc)
      assert(read == 6)
      assert("brown " == new String(cc))

      reader.reset()
      read = reader.read(cc)
      assert(read == 6)
      assert("brown " == new String(cc))
    }
    reader.close()
  }

  test("skip") {
    val reader = new ReusableStringReader
    reader.asInstanceOf[ReusableStringReader].set(fox)

    // skip entire the data:
    var skipped = reader.skip(fox.length() + 1)
    assert(fox.length() == skipped)
    assert(-1 == reader.read())

    reader.asInstanceOf[ReusableStringReader].set(fox) // reset the data
    val cc = new Array[Char](6)
    var read = reader.read(cc)
    assert(read == 6)
    assert("Quick " == new String(cc))

    // skip some piece of data:
    skipped = reader.skip(30)
    assert(skipped == 30)
    read = reader.read(cc)
    assert(read == 4)
    assert("dog." == new String(cc, 0, read))

    // skip when already at EOF:
    skipped = reader.skip(300)
    assert(skipped == 0, skipped)
    assert(reader.read() == -1)

    reader.close()
  }
}

Source File: ProcessTestUtils.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.sql.test

import java.io.{InputStream, IOException}

import scala.sys.process.BasicIO

object ProcessTestUtils {
  class ProcessOutputCapturer(stream: InputStream, capture: String => Unit) extends Thread {
    this.setDaemon(true)

    override def run(): Unit = {
      try {
        BasicIO.processFully(capture)(stream)
      } catch { case _: IOException =>
        // Ignores the IOException thrown when the process termination, which closes the input
        // stream abruptly.
      }
    }
  }
}

Source File: RawTextSender.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{ByteArrayOutputStream, IOException}
import java.net.ServerSocket
import java.nio.ByteBuffer

import scala.io.Source

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.IntParam


private[streaming]
object RawTextSender extends Logging {
  def main(args: Array[String]) {
    if (args.length != 4) {
      // scalastyle:off println
      System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
      // scalastyle:on println
      System.exit(1)
    }
    // Parse the arguments using a pattern match
    val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args

    // Repeat the input data multiple times to fill in a buffer
    val lines = Source.fromFile(file).getLines().toArray
    val bufferStream = new ByteArrayOutputStream(blockSize + 1000)
    val ser = new KryoSerializer(new SparkConf()).newInstance()
    val serStream = ser.serializeStream(bufferStream)
    var i = 0
    while (bufferStream.size < blockSize) {
      serStream.writeObject(lines(i))
      i = (i + 1) % lines.length
    }
    val array = bufferStream.toByteArray

    val countBuf = ByteBuffer.wrap(new Array[Byte](4))
    countBuf.putInt(array.length)
    countBuf.flip()

    val serverSocket = new ServerSocket(port)
    logInfo("Listening on port " + port)

    while (true) {
      val socket = serverSocket.accept()
      logInfo("Got a new connection")
      val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec)
      try {
        while (true) {
          out.write(countBuf.array)
          out.write(array)
        }
      } catch {
        case e: IOException =>
          logError("Client disconnected")
      } finally {
        socket.close()
      }
    }
  }
}

Source File: HdfsUtils.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{FileNotFoundException, IOException}

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs._

private[streaming] object HdfsUtils {

  def getOutputStream(path: String, conf: Configuration): FSDataOutputStream = {
    val dfsPath = new Path(path)
    val dfs = getFileSystemForPath(dfsPath, conf)
    // If the file exists and we have append support, append instead of creating a new file
    val stream: FSDataOutputStream = {
      if (dfs.isFile(dfsPath)) {
        if (conf.getBoolean("hdfs.append.support", false) || dfs.isInstanceOf[RawLocalFileSystem]) {
          dfs.append(dfsPath)
        } else {
          throw new IllegalStateException("File exists and there is no append support!")
        }
      } else {
        dfs.create(dfsPath)
      }
    }
    stream
  }

  def getInputStream(path: String, conf: Configuration): FSDataInputStream = {
    val dfsPath = new Path(path)
    val dfs = getFileSystemForPath(dfsPath, conf)
    try {
      dfs.open(dfsPath)
    } catch {
      case _: FileNotFoundException =>
        null
      case e: IOException =>
        // If we are really unlucky, the file may be deleted as we're opening the stream.
        // This can happen as clean up is performed by daemon threads that may be left over from
        // previous runs.
        if (!dfs.isFile(dfsPath)) null else throw e
    }
  }

  def checkState(state: Boolean, errorMsg: => String) {
    if (!state) {
      throw new IllegalStateException(errorMsg)
    }
  }

  
  def checkFileExists(path: String, conf: Configuration): Boolean = {
    val hdpPath = new Path(path)
    val fs = getFileSystemForPath(hdpPath, conf)
    fs.isFile(hdpPath)
  }
}

Source File: FileBasedWriteAheadLogReader.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{Closeable, EOFException, IOException}
import java.nio.ByteBuffer

import org.apache.hadoop.conf.Configuration

import org.apache.spark.internal.Logging


private[streaming] class FileBasedWriteAheadLogReader(path: String, conf: Configuration)
  extends Iterator[ByteBuffer] with Closeable with Logging {

  private val instream = HdfsUtils.getInputStream(path, conf)
  private var closed = (instream == null) // the file may be deleted as we're opening the stream
  private var nextItem: Option[ByteBuffer] = None

  override def hasNext: Boolean = synchronized {
    if (closed) {
      return false
    }

    if (nextItem.isDefined) { // handle the case where hasNext is called without calling next
      true
    } else {
      try {
        val length = instream.readInt()
        val buffer = new Array[Byte](length)
        instream.readFully(buffer)
        nextItem = Some(ByteBuffer.wrap(buffer))
        logTrace("Read next item " + nextItem.get)
        true
      } catch {
        case e: EOFException =>
          logDebug("Error reading next item, EOF reached", e)
          close()
          false
        case e: IOException =>
          logWarning("Error while trying to read data. If the file was deleted, " +
            "this should be okay.", e)
          close()
          if (HdfsUtils.checkFileExists(path, conf)) {
            // If file exists, this could be a legitimate error
            throw e
          } else {
            // File was deleted. This can occur when the daemon cleanup thread takes time to
            // delete the file during recovery.
            false
          }

        case e: Exception =>
          logWarning("Error while trying to read data from HDFS.", e)
          close()
          throw e
      }
    }
  }

  override def next(): ByteBuffer = synchronized {
    val data = nextItem.getOrElse {
      close()
      throw new IllegalStateException(
        "next called without calling hasNext or after hasNext returned false")
    }
    nextItem = None // Ensure the next hasNext call loads new data.
    data
  }

  override def close(): Unit = synchronized {
    if (!closed) {
      instream.close()
    }
    closed = true
  }
}

Source File: CommandUtils.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.deploy.worker

import java.io.{File, FileOutputStream, InputStream, IOException}

import scala.collection.JavaConverters._
import scala.collection.Map

import org.apache.spark.SecurityManager
import org.apache.spark.deploy.Command
import org.apache.spark.internal.Logging
import org.apache.spark.launcher.WorkerCommandBuilder
import org.apache.spark.util.Utils


  def redirectStream(in: InputStream, file: File) {
    val out = new FileOutputStream(file, true)
    // TODO: It would be nice to add a shutdown hook here that explains why the output is
    //       terminating. Otherwise if the worker dies the executor logs will silently stop.
    new Thread("redirect output to " + file) {
      override def run() {
        try {
          Utils.copyStream(in, out, true)
        } catch {
          case e: IOException =>
            logInfo("Redirection to " + file + " closed: " + e.getMessage)
        }
      }
    }.start()
  }
}

Source File: SparkHadoopMapRedUtil.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.mapred

import java.io.IOException

import org.apache.hadoop.mapreduce.{TaskAttemptContext => MapReduceTaskAttemptContext}
import org.apache.hadoop.mapreduce.{OutputCommitter => MapReduceOutputCommitter}
import org.apache.hadoop.security.UserGroupInformation

import org.apache.spark.{SparkEnv, TaskContext}
import org.apache.spark.executor.CommitDeniedException
import org.apache.spark.internal.Logging

object SparkHadoopMapRedUtil extends Logging {

  private val user = UserGroupInformation.getCurrentUser.getShortUserName
  
  def commitTask(
      committer: MapReduceOutputCommitter,
      mrTaskContext: MapReduceTaskAttemptContext,
      jobId: Int,
      splitId: Int): Unit = {

    val mrTaskAttemptID = mrTaskContext.getTaskAttemptID

    // Called after we have decided to commit
    def performCommit(): Unit = {
      try {
        committer.commitTask(mrTaskContext)
        logInfo(s"$mrTaskAttemptID: Committed")
      } catch {
        case cause: IOException =>
          logError(s"Error committing the output of task: $mrTaskAttemptID", cause)
          committer.abortTask(mrTaskContext)
          throw cause
      }
    }

    // First, check whether the task's output has already been committed by some other attempt
    if (committer.needsTaskCommit(mrTaskContext)) {
      val shouldCoordinateWithDriver: Boolean = {
        val sparkConf = SparkEnv.get(user).conf
        // We only need to coordinate with the driver if there are concurrent task attempts.
        // Note that this could happen even when speculation is not enabled (e.g. see SPARK-8029).
        // This (undocumented) setting is an escape-hatch in case the commit code introduces bugs.
        sparkConf.getBoolean("spark.hadoop.outputCommitCoordination.enabled", defaultValue = true)
      }

      if (shouldCoordinateWithDriver) {
        val outputCommitCoordinator = SparkEnv.get(user).outputCommitCoordinator
        val taskAttemptNumber = TaskContext.get().attemptNumber()
        val canCommit = outputCommitCoordinator.canCommit(jobId, splitId, taskAttemptNumber)

        if (canCommit) {
          performCommit()
        } else {
          val message =
            s"$mrTaskAttemptID: Not committed because the driver did not authorize commit"
          logInfo(message)
          // We need to abort the task so that the driver can reschedule new attempts, if necessary
          committer.abortTask(mrTaskContext)
          throw new CommitDeniedException(message, jobId, splitId, taskAttemptNumber)
        }
      } else {
        // Speculation is disabled or a user has chosen to manually bypass the commit coordination
        performCommit()
      }
    } else {
      // Some other attempt committed the output, so we do nothing and signal success
      logInfo(s"No need to commit output of task because needsTaskCommit=false: $mrTaskAttemptID")
    }
  }
}

Source File: SerializableBuffer.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value: ByteBuffer = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
}

Source File: DiskStore.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.storage

import java.io.{FileOutputStream, IOException, RandomAccessFile}
import java.nio.ByteBuffer
import java.nio.channels.FileChannel.MapMode

import com.google.common.io.Closeables

import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import org.apache.spark.util.Utils
import org.apache.spark.util.io.ChunkedByteBuffer


  def put(blockId: BlockId)(writeFunc: FileOutputStream => Unit): Unit = {
    if (contains(blockId)) {
      throw new IllegalStateException(s"Block $blockId is already present in the disk store")
    }
    logDebug(s"Attempting to put block $blockId")
    val startTime = System.currentTimeMillis
    val file = diskManager.getFile(blockId)
    val fileOutputStream = new FileOutputStream(file)
    var threwException: Boolean = true
    try {
      writeFunc(fileOutputStream)
      threwException = false
    } finally {
      try {
        Closeables.close(fileOutputStream, threwException)
      } finally {
         if (threwException) {
          remove(blockId)
        }
      }
    }
    val finishTime = System.currentTimeMillis
    logDebug("Block %s stored as %s file on disk in %d ms".format(
      file.getName,
      Utils.bytesToString(file.length()),
      finishTime - startTime))
  }

  def putBytes(blockId: BlockId, bytes: ChunkedByteBuffer): Unit = {
    put(blockId) { fileOutputStream =>
      val channel = fileOutputStream.getChannel
      Utils.tryWithSafeFinally {
        bytes.writeFully(channel)
      } {
        channel.close()
      }
    }
  }

  def getBytes(blockId: BlockId): ChunkedByteBuffer = {
    val file = diskManager.getFile(blockId.name)
    val channel = new RandomAccessFile(file, "r").getChannel
    Utils.tryWithSafeFinally {
      // For small files, directly read rather than memory map
      if (file.length < minMemoryMapBytes) {
        val buf = ByteBuffer.allocate(file.length.toInt)
        channel.position(0)
        while (buf.remaining() != 0) {
          if (channel.read(buf) == -1) {
            throw new IOException("Reached EOF before filling buffer\n" +
              s"offset=0\nfile=${file.getAbsolutePath}\nbuf.remaining=${buf.remaining}")
          }
        }
        buf.flip()
        new ChunkedByteBuffer(buf)
      } else {
        new ChunkedByteBuffer(channel.map(MapMode.READ_ONLY, 0, file.length))
      }
    } {
      channel.close()
    }
  }

  def remove(blockId: BlockId): Boolean = {
    val file = diskManager.getFile(blockId.name)
    if (file.exists()) {
      val ret = file.delete()
      if (!ret) {
        logWarning(s"Error deleting ${file.getPath()}")
      }
      ret
    } else {
      false
    }
  }

  def contains(blockId: BlockId): Boolean = {
    val file = diskManager.getFile(blockId.name)
    file.exists()
  }
}

Source File: CartesianRDD.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.rdd

import java.io.{IOException, ObjectOutputStream}

import scala.reflect.ClassTag

import org.apache.spark._
import org.apache.spark.util.Utils

private[spark]
class CartesianPartition(
    idx: Int,
    @transient private val rdd1: RDD[_],
    @transient private val rdd2: RDD[_],
    s1Index: Int,
    s2Index: Int
  ) extends Partition {
  var s1 = rdd1.partitions(s1Index)
  var s2 = rdd2.partitions(s2Index)
  override val index: Int = idx

  @throws(classOf[IOException])
  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
    // Update the reference to parent split at the time of task serialization
    s1 = rdd1.partitions(s1Index)
    s2 = rdd2.partitions(s2Index)
    oos.defaultWriteObject()
  }
}

private[spark]
class CartesianRDD[T: ClassTag, U: ClassTag](
    sc: SparkContext,
    var rdd1 : RDD[T],
    var rdd2 : RDD[U])
  extends RDD[(T, U)](sc, Nil)
  with Serializable {

  val numPartitionsInRdd2 = rdd2.partitions.length

  override def getPartitions: Array[Partition] = {
    // create the cross product split
    val array = new Array[Partition](rdd1.partitions.length * rdd2.partitions.length)
    for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) {
      val idx = s1.index * numPartitionsInRdd2 + s2.index
      array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index)
    }
    array
  }

  override def getPreferredLocations(split: Partition): Seq[String] = {
    val currSplit = split.asInstanceOf[CartesianPartition]
    (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct
  }

  override def compute(split: Partition, context: TaskContext): Iterator[(T, U)] = {
    val currSplit = split.asInstanceOf[CartesianPartition]
    for (x <- rdd1.iterator(currSplit.s1, context);
         y <- rdd2.iterator(currSplit.s2, context)) yield (x, y)
  }

  override def getDependencies: Seq[Dependency[_]] = List(
    new NarrowDependency(rdd1) {
      def getParents(id: Int): Seq[Int] = List(id / numPartitionsInRdd2)
    },
    new NarrowDependency(rdd2) {
      def getParents(id: Int): Seq[Int] = List(id % numPartitionsInRdd2)
    }
  )

  override def clearDependencies() {
    super.clearDependencies()
    rdd1 = null
    rdd2 = null
  }
}

Source File: UnionRDD.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.rdd

import java.io.{IOException, ObjectOutputStream}

import scala.collection.mutable.ArrayBuffer
import scala.collection.parallel.{ForkJoinTaskSupport, ThreadPoolTaskSupport}
import scala.concurrent.forkjoin.ForkJoinPool
import scala.reflect.ClassTag

import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext}
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.util.Utils


private[spark] class UnionPartition[T: ClassTag](
    idx: Int,
    @transient private val rdd: RDD[T],
    val parentRddIndex: Int,
    @transient private val parentRddPartitionIndex: Int)
  extends Partition {

  var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex)

  def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition)

  override val index: Int = idx

  @throws(classOf[IOException])
  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
    // Update the reference to parent split at the time of task serialization
    parentPartition = rdd.partitions(parentRddPartitionIndex)
    oos.defaultWriteObject()
  }
}

object UnionRDD {
  private[spark] lazy val partitionEvalTaskSupport =
    new ForkJoinTaskSupport(new ForkJoinPool(8))
}

@DeveloperApi
class UnionRDD[T: ClassTag](
    sc: SparkContext,
    var rdds: Seq[RDD[T]])
  extends RDD[T](sc, Nil) {  // Nil since we implement getDependencies

  // visible for testing
  private[spark] val isPartitionListingParallel: Boolean =
    rdds.length > conf.getInt("spark.rdd.parallelListingThreshold", 10)

  override def getPartitions: Array[Partition] = {
    val parRDDs = if (isPartitionListingParallel) {
      val parArray = rdds.par
      parArray.tasksupport = UnionRDD.partitionEvalTaskSupport
      parArray
    } else {
      rdds
    }
    val array = new Array[Partition](parRDDs.map(_.partitions.length).seq.sum)
    var pos = 0
    for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) {
      array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index)
      pos += 1
    }
    array
  }

  override def getDependencies: Seq[Dependency[_]] = {
    val deps = new ArrayBuffer[Dependency[_]]
    var pos = 0
    for (rdd <- rdds) {
      deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length)
      pos += rdd.partitions.length
    }
    deps
  }

  override def compute(s: Partition, context: TaskContext): Iterator[T] = {
    val part = s.asInstanceOf[UnionPartition[T]]
    parent[T](part.parentRddIndex).iterator(part.parentPartition, context)
  }

  override def getPreferredLocations(s: Partition): Seq[String] =
    s.asInstanceOf[UnionPartition[T]].preferredLocations()

  override def clearDependencies() {
    super.clearDependencies()
    rdds = null
  }
}

Source File: PartitionerAwareUnionRDD.scala From multi-tenancy-spark with Apache License 2.0

5 votes

package org.apache.spark.rdd

import java.io.{IOException, ObjectOutputStream}

import scala.reflect.ClassTag

import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext}
import org.apache.spark.util.Utils


private[spark]
class PartitionerAwareUnionRDD[T: ClassTag](
    sc: SparkContext,
    var rdds: Seq[RDD[T]]
  ) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) {
  require(rdds.nonEmpty)
  require(rdds.forall(_.partitioner.isDefined))
  require(rdds.flatMap(_.partitioner).toSet.size == 1,
    "Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner))

  override val partitioner = rdds.head.partitioner

  override def getPartitions: Array[Partition] = {
    val numPartitions = partitioner.get.numPartitions
    (0 until numPartitions).map { index =>
      new PartitionerAwareUnionRDDPartition(rdds, index)
    }.toArray
  }

  // Get the location where most of the partitions of parent RDDs are located
  override def getPreferredLocations(s: Partition): Seq[String] = {
    logDebug("Finding preferred location for " + this + ", partition " + s.index)
    val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents
    val locations = rdds.zip(parentPartitions).flatMap {
      case (rdd, part) =>
        val parentLocations = currPrefLocs(rdd, part)
        logDebug("Location of " + rdd + " partition " + part.index + " = " + parentLocations)
        parentLocations
    }
    val location = if (locations.isEmpty) {
      None
    } else {
      // Find the location that maximum number of parent partitions prefer
      Some(locations.groupBy(x => x).maxBy(_._2.length)._1)
    }
    logDebug("Selected location for " + this + ", partition " + s.index + " = " + location)
    location.toSeq
  }

  override def compute(s: Partition, context: TaskContext): Iterator[T] = {
    val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents
    rdds.zip(parentPartitions).iterator.flatMap {
      case (rdd, p) => rdd.iterator(p, context)
    }
  }

  override def clearDependencies() {
    super.clearDependencies()
    rdds = null
  }

  // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones)
  private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = {
    rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host)
  }
}

Source File: EmbeddedKsqlEngine.scala From ksql-jdbc-driver with Apache License 2.0

5 votes

package com.github.mmolimar.ksql.jdbc.embedded

import java.io.IOException

import com.github.mmolimar.ksql.jdbc.utils.TestUtils
import io.confluent.ksql.rest.server.{KsqlRestApplication, KsqlRestConfig}
import io.confluent.ksql.version.metrics.VersionCheckerAgent
import io.confluent.rest.RestConfig
import kafka.utils.Logging
import org.apache.kafka.clients.producer.ProducerConfig
import org.scalamock.scalatest.MockFactory
import io.confluent.ksql.util.KsqlConfig

import scala.collection.JavaConverters._

class EmbeddedKsqlEngine(port: Int = TestUtils.getAvailablePort, brokerList: String, connectUrl: String) extends Logging with MockFactory {

  private val config = new KsqlRestConfig(Map(
    RestConfig.LISTENERS_CONFIG -> s"http://localhost:$port",
    ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokerList,
    KsqlConfig.CONNECT_URL_PROPERTY -> connectUrl,
    "ksql.service.id" -> "ksql-jdbc",
    "ksql.streams.auto.offset.reset" -> "latest",
    "ksql.command.topic.suffix" -> "commands"
  ).asJava)

  lazy val ksqlEngine: KsqlRestApplication = {
    import io.confluent.ksql.rest.server.mock.ksqlRestApplication

    val versionCheckerAgent = mock[VersionCheckerAgent]
    (versionCheckerAgent.start _).expects(*, *).returns((): Unit).anyNumberOfTimes
    (versionCheckerAgent.updateLastRequestTime _).expects().returns((): Unit).anyNumberOfTimes
    ksqlRestApplication(config, versionCheckerAgent)
  }

  @throws[IOException]
  def startup(): Unit = {
    info("Starting up embedded KSQL engine")

    ksqlEngine.start()

    info("Started embedded Zookeeper: " + getConnection)
  }

  def shutdown(): Unit = {
    info("Shutting down embedded KSQL engine")

    TestUtils.swallow(ksqlEngine.stop())

    info("Stopped embedded KSQL engine")
  }

  def getPort: Int = port

  def getConnection: String = "localhost:" + getPort

  override def toString: String = {
    val sb: StringBuilder = new StringBuilder("KSQL{")
    sb.append("connection=").append(getConnection)
    sb.append('}')

    sb.toString
  }

}

Source File: TestOutputStream.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.streaming

import java.io.{IOException, ObjectInputStream}

import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.dstream.{DStream, ForEachDStream}
import org.apache.spark.util.Utils

import scala.collection.mutable.ArrayBuffer
import scala.reflect.ClassTag


class TestOutputStream[T: ClassTag](parent: DStream[T],
    val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]())
  extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => {
    val collected = rdd.collect()
    output += collected
  }) {

  // This is to clear the output buffer every it is read from a checkpoint
  @throws(classOf[IOException])
  private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException {
    ois.defaultReadObject()
    output.clear()
  }
}

Source File: SparkSQLCLIService.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.sql.hive.thriftserver

import java.io.IOException
import java.util.{List => JList}
import javax.security.auth.login.LoginException

import scala.collection.JavaConversions._

import org.apache.commons.logging.Log
import org.apache.hadoop.hive.conf.HiveConf
import org.apache.hadoop.hive.shims.ShimLoader
import org.apache.hadoop.security.UserGroupInformation
import org.apache.hive.service.Service.STATE
import org.apache.hive.service.auth.HiveAuthFactory
import org.apache.hive.service.cli._
import org.apache.hive.service.{AbstractService, Service, ServiceException}

import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._
import org.apache.spark.util.Utils

private[hive] class SparkSQLCLIService(hiveContext: HiveContext)
  extends CLIService
  with ReflectedCompositeService {

  override def init(hiveConf: HiveConf) {
    setSuperField(this, "hiveConf", hiveConf)

    val sparkSqlSessionManager = new SparkSQLSessionManager(hiveContext)
    setSuperField(this, "sessionManager", sparkSqlSessionManager)
    addService(sparkSqlSessionManager)
    var sparkServiceUGI: UserGroupInformation = null

    if (ShimLoader.getHadoopShims.isSecurityEnabled) {
      try {
        HiveAuthFactory.loginFromKeytab(hiveConf)
        sparkServiceUGI = ShimLoader.getHadoopShims.getUGIForConf(hiveConf)
        HiveThriftServerShim.setServerUserName(sparkServiceUGI, this)
      } catch {
        case e @ (_: IOException | _: LoginException) =>
          throw new ServiceException("Unable to login to kerberos with given principal/keytab", e)
      }
    }

    initCompositeService(hiveConf)
  }

  override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = {
    getInfoType match {
      case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL")
      case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL")
      case GetInfoType.CLI_DBMS_VER => new GetInfoValue(hiveContext.sparkContext.version)
      case _ => super.getInfo(sessionHandle, getInfoType)
    }
  }
}

private[thriftserver] trait ReflectedCompositeService { this: AbstractService =>
  def initCompositeService(hiveConf: HiveConf) {
    // Emulating `CompositeService.init(hiveConf)`
    val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList")
    serviceList.foreach(_.init(hiveConf))

    // Emulating `AbstractService.init(hiveConf)`
    invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.NOTINITED)
    setAncestorField(this, 3, "hiveConf", hiveConf)
    invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.INITED)
    getAncestorField[Log](this, 3, "LOG").info(s"Service: $getName is inited.")
  }
}

Source File: RawTextSender.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.streaming.util

import java.io.{ByteArrayOutputStream, IOException}
import java.net.ServerSocket
import java.nio.ByteBuffer

import scala.io.Source

import org.apache.spark.{SparkConf, Logging}
import org.apache.spark.serializer.KryoSerializer
import org.apache.spark.util.IntParam


private[streaming]
object RawTextSender extends Logging {
  def main(args: Array[String]) {
    if (args.length != 4) {
      System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>")
      System.exit(1)
    }
    // Parse the arguments using a pattern match
    val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args

    // Repeat the input data multiple times to fill in a buffer
    val lines = Source.fromFile(file).getLines().toArray
    val bufferStream = new ByteArrayOutputStream(blockSize + 1000)
    val ser = new KryoSerializer(new SparkConf()).newInstance()
    val serStream = ser.serializeStream(bufferStream)
    var i = 0
    while (bufferStream.size < blockSize) {
      serStream.writeObject(lines(i))
      i = (i + 1) % lines.length
    }
    val array = bufferStream.toByteArray

    val countBuf = ByteBuffer.wrap(new Array[Byte](4))
    countBuf.putInt(array.length)
    countBuf.flip()

    val serverSocket = new ServerSocket(port)
    logInfo("Listening on port " + port)

    while (true) {
      val socket = serverSocket.accept()
      logInfo("Got a new connection")
      val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec)
      try {
        while (true) {
          out.write(countBuf.array)
          out.write(array)
        }
      } catch {
        case e: IOException =>
          logError("Client disconnected")
      } finally {
        socket.close()
      }
    }
  }
}

Source File: CommandUtils.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.deploy.worker

import java.io.{File, FileOutputStream, InputStream, IOException}
import java.lang.System._

import scala.collection.JavaConversions._
import scala.collection.Map

import org.apache.spark.Logging
import org.apache.spark.deploy.Command
import org.apache.spark.launcher.WorkerCommandBuilder
import org.apache.spark.util.Utils


  def redirectStream(in: InputStream, file: File) {
    val out = new FileOutputStream(file, true)
    // TODO: It would be nice to add a shutdown hook here that explains why the output is
    //       terminating. Otherwise if the worker dies the executor logs will silently stop.
    new Thread("redirect output to " + file) {
      override def run() {
        try {
          Utils.copyStream(in, out, true)
        } catch {
          case e: IOException =>
            logInfo("Redirection to " + file + " closed: " + e.getMessage)
        }
      }
    }.start()
  }
}

Source File: ReplayListenerBus.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.scheduler

import java.io.{InputStream, IOException}

import scala.io.Source

import com.fasterxml.jackson.core.JsonParseException
import org.json4s.jackson.JsonMethods._

import org.apache.spark.Logging
import org.apache.spark.util.JsonProtocol


  def replay(
      logData: InputStream,
      sourceName: String,
      maybeTruncated: Boolean = false): Unit = {
    var currentLine: String = null
    var lineNumber: Int = 1
    try {
      val lines = Source.fromInputStream(logData).getLines()
      while (lines.hasNext) {
        currentLine = lines.next()
        try {
          postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine)))
        } catch {
          case jpe: JsonParseException =>
            // We can only ignore exception from last line of the file that might be truncated
            if (!maybeTruncated || lines.hasNext) {
              throw jpe
            } else {
              logWarning(s"Got JsonParseException from log file $sourceName" +
                s" at line $lineNumber, the file might not have finished writing cleanly.")
            }
        }
        lineNumber += 1
      }
    } catch {
      case ioe: IOException =>
        throw ioe
      case e: Exception =>
        logError(s"Exception parsing Spark event log: $sourceName", e)
        logError(s"Malformed line #$lineNumber: $currentLine\n")
    }
  }

}

Source File: SerializableBuffer.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.util

import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream}
import java.nio.ByteBuffer
import java.nio.channels.Channels


private[spark]
class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable {
  def value: ByteBuffer = buffer

  private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException {
    val length = in.readInt()
    buffer = ByteBuffer.allocate(length)
    var amountRead = 0
    val channel = Channels.newChannel(in)
    while (amountRead < length) {
      val ret = channel.read(buffer)
      if (ret == -1) {
        throw new EOFException("End of file before fully reading buffer")
      }
      amountRead += ret
    }
    buffer.rewind() // Allow us to read it later
  }

  private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException {
    out.writeInt(buffer.limit())
    if (Channels.newChannel(out).write(buffer) != buffer.limit()) {
      throw new IOException("Could not fully write buffer to output stream")
    }
    buffer.rewind() // Allow us to write it again later
  }
}

Source File: BlockManagerId.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.storage

import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
import java.util.concurrent.ConcurrentHashMap

import org.apache.spark.SparkContext
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.util.Utils


  def apply(execId: String, host: String, port: Int): BlockManagerId =
    getCachedBlockManagerId(new BlockManagerId(execId, host, port))

  def apply(in: ObjectInput): BlockManagerId = {
    val obj = new BlockManagerId()
    obj.readExternal(in)
    getCachedBlockManagerId(obj)
  }

  val blockManagerIdCache = new ConcurrentHashMap[BlockManagerId, BlockManagerId]()

  def getCachedBlockManagerId(id: BlockManagerId): BlockManagerId = {
    blockManagerIdCache.putIfAbsent(id, id)
    blockManagerIdCache.get(id)
  }
}

Source File: CartesianRDD.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.rdd

import java.io.{IOException, ObjectOutputStream}

import scala.reflect.ClassTag

import org.apache.spark._
import org.apache.spark.util.Utils

private[spark]
class CartesianPartition(
    idx: Int,
    @transient rdd1: RDD[_],
    @transient rdd2: RDD[_],
    s1Index: Int,
    s2Index: Int
  ) extends Partition {
  var s1 = rdd1.partitions(s1Index)
  var s2 = rdd2.partitions(s2Index)
  override val index: Int = idx

  @throws(classOf[IOException])
  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
    // Update the reference to parent split at the time of task serialization
    s1 = rdd1.partitions(s1Index)
    s2 = rdd2.partitions(s2Index)
    oos.defaultWriteObject()
  }
}

private[spark]
class CartesianRDD[T: ClassTag, U: ClassTag](
    sc: SparkContext,
    var rdd1 : RDD[T],
    var rdd2 : RDD[U])
  extends RDD[Pair[T, U]](sc, Nil)
  with Serializable {

  val numPartitionsInRdd2 = rdd2.partitions.length

  override def getPartitions: Array[Partition] = {
    // create the cross product split
    val array = new Array[Partition](rdd1.partitions.length * rdd2.partitions.length)
    for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) {
      val idx = s1.index * numPartitionsInRdd2 + s2.index
      array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index)
    }
    array
  }

  override def getPreferredLocations(split: Partition): Seq[String] = {
    val currSplit = split.asInstanceOf[CartesianPartition]
    (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct
  }

  override def compute(split: Partition, context: TaskContext): Iterator[(T, U)] = {
    val currSplit = split.asInstanceOf[CartesianPartition]
    for (x <- rdd1.iterator(currSplit.s1, context);
         y <- rdd2.iterator(currSplit.s2, context)) yield (x, y)
  }

  override def getDependencies: Seq[Dependency[_]] = List(
    new NarrowDependency(rdd1) {
      def getParents(id: Int): Seq[Int] = List(id / numPartitionsInRdd2)
    },
    new NarrowDependency(rdd2) {
      def getParents(id: Int): Seq[Int] = List(id % numPartitionsInRdd2)
    }
  )

  override def clearDependencies() {
    super.clearDependencies()
    rdd1 = null
    rdd2 = null
  }
}

Source File: UnionRDD.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.rdd

import java.io.{IOException, ObjectOutputStream}

import scala.collection.mutable.ArrayBuffer
import scala.reflect.ClassTag

import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext}
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.util.Utils


private[spark] class UnionPartition[T: ClassTag](
    idx: Int,
    @transient rdd: RDD[T],
    val parentRddIndex: Int,
    @transient parentRddPartitionIndex: Int)
  extends Partition {

  var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex)

  def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition)

  override val index: Int = idx

  @throws(classOf[IOException])
  private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException {
    // Update the reference to parent split at the time of task serialization
    parentPartition = rdd.partitions(parentRddPartitionIndex)
    oos.defaultWriteObject()
  }
}

@DeveloperApi
class UnionRDD[T: ClassTag](
    sc: SparkContext,
    var rdds: Seq[RDD[T]])
  extends RDD[T](sc, Nil) {  // Nil since we implement getDependencies

  override def getPartitions: Array[Partition] = {
    val array = new Array[Partition](rdds.map(_.partitions.length).sum)
    var pos = 0
    for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) {
      array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index)
      pos += 1
    }
    array
  }

  override def getDependencies: Seq[Dependency[_]] = {
    val deps = new ArrayBuffer[Dependency[_]]
    var pos = 0
    for (rdd <- rdds) {
      deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length)
      pos += rdd.partitions.length
    }
    deps
  }

  override def compute(s: Partition, context: TaskContext): Iterator[T] = {
    val part = s.asInstanceOf[UnionPartition[T]]
    parent[T](part.parentRddIndex).iterator(part.parentPartition, context)
  }

  override def getPreferredLocations(s: Partition): Seq[String] =
    s.asInstanceOf[UnionPartition[T]].preferredLocations()

  override def clearDependencies() {
    super.clearDependencies()
    rdds = null
  }
}

Source File: PartitionerAwareUnionRDD.scala From iolap with Apache License 2.0

5 votes

package org.apache.spark.rdd

import java.io.{IOException, ObjectOutputStream}

import scala.reflect.ClassTag

import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext}
import org.apache.spark.util.Utils


private[spark]
class PartitionerAwareUnionRDD[T: ClassTag](
    sc: SparkContext,
    var rdds: Seq[RDD[T]]
  ) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) {
  require(rdds.length > 0)
  require(rdds.forall(_.partitioner.isDefined))
  require(rdds.flatMap(_.partitioner).toSet.size == 1,
    "Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner))

  override val partitioner = rdds.head.partitioner

  override def getPartitions: Array[Partition] = {
    val numPartitions = partitioner.get.numPartitions
    (0 until numPartitions).map(index => {
      new PartitionerAwareUnionRDDPartition(rdds, index)
    }).toArray
  }

  // Get the location where most of the partitions of parent RDDs are located
  override def getPreferredLocations(s: Partition): Seq[String] = {
    logDebug("Finding preferred location for " + this + ", partition " + s.index)
    val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents
    val locations = rdds.zip(parentPartitions).flatMap {
      case (rdd, part) => {
        val parentLocations = currPrefLocs(rdd, part)
        logDebug("Location of " + rdd + " partition " + part.index + " = " + parentLocations)
        parentLocations
      }
    }
    val location = if (locations.isEmpty) {
      None
    } else {
      // Find the location that maximum number of parent partitions prefer
      Some(locations.groupBy(x => x).maxBy(_._2.length)._1)
    }
    logDebug("Selected location for " + this + ", partition " + s.index + " = " + location)
    location.toSeq
  }

  override def compute(s: Partition, context: TaskContext): Iterator[T] = {
    val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents
    rdds.zip(parentPartitions).iterator.flatMap {
      case (rdd, p) => rdd.iterator(p, context)
    }
  }

  override def clearDependencies() {
    super.clearDependencies()
    rdds = null
  }

  // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones)
  private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = {
    rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host)
  }
}

Source File: HDFSUtil.scala From aerosolve with Apache License 2.0

5 votes

package com.airbnb.common.ml.util

import java.io.{BufferedReader, IOException, InputStreamReader}
import java.net.URI

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}


object HDFSUtil extends ScalaLogging {

  private lazy val hadoopConfiguration = new Configuration()

  
  def lastTaskSucceed(path: String): Boolean = {
    if (dirExists(path)) {
      if (dirExists(path + "/_temporary")) {
        logger.info(s"Deleting partial data for $path.")
        deleteDirWithoutThrow(path)
        false
      } else {
        logger.info(s"$path exists")
        true
      }
    } else {
      logger.info(s"$path does not exist")
      false
    }
  }

  def dirExists(dir: String): Boolean = {
    val path = new Path(dir)
    val hdfs = FileSystem.get(
      new java.net.URI(dir), hadoopConfiguration)

    hdfs.exists(path)
  }

  def deleteDirWithoutThrow(dir: String): Unit = {
    val path = new Path(dir)
    val hdfs = FileSystem.get(
      new java.net.URI(dir), hadoopConfiguration)
    if (hdfs.exists(path)) {
      logger.warn(s"$dir exists, DELETING")
      try {
        hdfs.delete(path, true)
      } catch {
        case e: IOException => logger.error(s" exception $e")
      }
    }
  }

  def createPath(path: String): Unit = {
    val remotePath = new Path(path)
    val remoteFS = remotePath.getFileSystem(hadoopConfiguration)
    remoteFS.mkdirs(new Path(path))
  }

  def readStringFromFile(inputFile : String): String = {
    val fs = FileSystem.get(new URI(inputFile), hadoopConfiguration)
    val path = new Path(inputFile)
    val stream = fs.open(path)
    val reader = new BufferedReader(new InputStreamReader(stream))
    val str = Stream.continually(reader.readLine()).takeWhile(_ != null).mkString("\n")
    str
  }

}

Source File: DirectoryListFileFinder.scala From seahorse with Apache License 2.0

5 votes

package ai.deepsense.commons.utils

import java.io.{File, FileNotFoundException, IOException}

import scala.util.{Failure, Success, Try}


  def filePredicate(f: File, desc: Option[String]): Boolean

  def findFile(): Try[File] = {
    findFile(None)
  }

  def findFile(desc: String): Try[File] = {
    findFile(Some(desc))
  }

  def findFile(desc: Option[String]): Try[File] = {
    findPotentialFiles(
      dirsToSearch,
      listFilesInDirectory
      // convert to Try - give a nice message in the exception concerning the dirs, otherwise just Success it
    ).fold(dirs =>
      Failure(
        new IOException(s"Unable to list files in dirs: ${dirs.mkString(", ")}")
      ),
      Success[Seq[File]]
    ).flatMap(_.find(filePredicate(_, desc))
      .map(Success[File])
      .getOrElse(Failure(
        new FileNotFoundException(
          s"Unable to find file ${desc.map(_ + " ").getOrElse("")}" +
            s"in dirs: ${dirsToSearch.mkString(", ")}")
      ))
    )
  }
}

object DirectoryListFileFinder {
  type EitherBadDirsOrFiles = Either[Seq[File], Seq[File]]

  def findPotentialFiles(
      dirs: Traversable[File],
      listFilesInDirectory: File => Option[Seq[File]]): EitherBadDirsOrFiles = {
    dirs.map { dir =>
      val files = listFilesInDirectory(dir)

      // if we're unable to list files inside the dir then
      // let's not lose this information by keeping the dir in Left
      files.toRight(dir)
    }.foldLeft(Right(Seq[File]()): EitherBadDirsOrFiles) {
      case (Left(badDirs), Left(badDir)) => Left(badDir +: badDirs)
      case (Left(badDirs), Right(_)) => Left(badDirs)
      case (Right(_), Left(badDir)) => Left(Seq(badDir))
      case (Right(files), Right(files2)) => Right(files ++ files2)
      case _ => ??? // to silence buggy 2.10 non-exhaustive match warning
    }
  }
}

Source File: WriteTransformer.scala From seahorse with Apache License 2.0

5 votes

package ai.deepsense.deeplang.doperations

import java.io.{File, IOException}

import scala.reflect.runtime.{universe => ru}
import ai.deepsense.commons.utils.Version
import ai.deepsense.commons.utils.FileOperations.deleteRecursivelyIfExists
import ai.deepsense.deeplang.DOperation.Id
import ai.deepsense.deeplang.documentation.OperationDocumentation
import ai.deepsense.deeplang.doperables.Transformer
import ai.deepsense.deeplang.doperations.exceptions.DeepSenseIOException
import ai.deepsense.deeplang.params.{BooleanParam, Params, StringParam}
import ai.deepsense.deeplang.{DOperation1To0, ExecutionContext}

import java.net.URI
import org.apache.hadoop.fs.{FileSystem, Path}

case class WriteTransformer()
  extends DOperation1To0[Transformer]
  with Params
  with OperationDocumentation {

  override val id: Id = "58368deb-68d0-4657-ae3f-145160cb1e2b"
  override val name: String = "Write Transformer"
  override val description: String = "Writes a Transformer to a directory"

  override val since: Version = Version(1, 1, 0)

  val shouldOverwrite = BooleanParam(
    name = "overwrite",
    description = Some("Should an existing transformer with the same name be overwritten?")
  )
  setDefault(shouldOverwrite, true)

  def getShouldOverwrite: Boolean = $(shouldOverwrite)
  def setShouldOverwrite(value: Boolean): this.type = set(shouldOverwrite, value)

  val outputPath = StringParam(
    name = "output path",
    description = Some("The output path for writing the Transformer."))

  def getOutputPath: String = $(outputPath)
  def setOutputPath(value: String): this.type = set(outputPath, value)

  val specificParams: Array[ai.deepsense.deeplang.params.Param[_]] = Array(outputPath, shouldOverwrite)

  override protected def execute(transformer: Transformer)(context: ExecutionContext): Unit = {
    val outputDictPath = getOutputPath
    try {
      if (getShouldOverwrite) {
        removeDirectory(context, outputDictPath)
      }
      transformer.save(context, outputDictPath)
    } catch {
      case e: IOException =>
        logger.error(s"WriteTransformer error. Could not write transformer to the directory", e)
        throw DeepSenseIOException(e)
    }
  }

  private def removeDirectory(context: ExecutionContext, path: String): Unit = {
    if (path.startsWith("hdfs://")) {
      val configuration = context.sparkContext.hadoopConfiguration
      val hdfs = FileSystem.get(new URI(extractHdfsAddress(path)), configuration)
      hdfs.delete(new Path(path), true)
    } else {
      deleteRecursivelyIfExists(new File(path))
    }
  }

  private def extractHdfsAddress(path: String): String = {
    // first group: "hdfs://ip.addr.of.hdfs", second group: "/some/path/on/hdfs"
    val regex = "(hdfs:\\/\\/[^\\/]*)(.*)".r
    val regex(hdfsAddress, _) = path
    hdfsAddress
  }

  @transient
  override lazy val tTagTI_0: ru.TypeTag[Transformer] = ru.typeTag[Transformer]
}

object WriteTransformer {
  def apply(outputPath: String): WriteTransformer = {
    new WriteTransformer().setOutputPath(outputPath)
  }
}

Source File: DriverFiles.scala From seahorse with Apache License 2.0

5 votes

package ai.deepsense.deeplang.doperations.readwritedataframe.filestorage

import java.io.{File, IOException, PrintWriter}

import scala.io.Source

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.execution.datasources.csv.{DataframeToDriverCsvFileWriter, RawCsvRDDToDataframe}
import org.apache.spark.sql.{Dataset, Encoders, Row, SaveMode, DataFrame => SparkDataFrame}
import ai.deepsense.commons.resources.ManagedResource
import ai.deepsense.deeplang.ExecutionContext
import ai.deepsense.deeplang.doperables.dataframe.DataFrame
import ai.deepsense.deeplang.doperations.inout.{InputFileFormatChoice, OutputFileFormatChoice}
import ai.deepsense.deeplang.doperations.readwritedataframe.filestorage.csv.CsvOptions
import ai.deepsense.deeplang.doperations.readwritedataframe.{FilePath, FileScheme}
import ai.deepsense.deeplang.readjsondataset.JsonReader
import ai.deepsense.sparkutils.SQL

object DriverFiles extends JsonReader {

  def read(driverPath: String, fileFormat: InputFileFormatChoice)
          (implicit context: ExecutionContext): SparkDataFrame = fileFormat match {
    case csv: InputFileFormatChoice.Csv => readCsv(driverPath, csv)
    case json: InputFileFormatChoice.Json => readJson(driverPath)
    case parquet: InputFileFormatChoice.Parquet => throw ParquetNotSupported
  }

  def write(dataFrame: DataFrame, path: FilePath, fileFormat: OutputFileFormatChoice, saveMode: SaveMode)
           (implicit context: ExecutionContext): Unit = {
    path.verifyScheme(FileScheme.File)
    if (saveMode == SaveMode.ErrorIfExists && new File(path.pathWithoutScheme).exists()){
      throw new IOException(s"Output file ${path.fullPath} already exists")
    }
    fileFormat match {
      case csv: OutputFileFormatChoice.Csv => writeCsv(path, csv, dataFrame)
      case json: OutputFileFormatChoice.Json => writeJson(path, dataFrame)
      case parquet: OutputFileFormatChoice.Parquet => throw ParquetNotSupported
    }
  }

  private def readCsv
      (driverPath: String, csvChoice: InputFileFormatChoice.Csv)
      (implicit context: ExecutionContext): SparkDataFrame = {
    val params = CsvOptions.map(csvChoice.getNamesIncluded, csvChoice.getCsvColumnSeparator())
    val lines = Source.fromFile(driverPath).getLines().toStream
    val fileLinesRdd = context.sparkContext.parallelize(lines)

    RawCsvRDDToDataframe.parse(fileLinesRdd, context.sparkSQLSession.sparkSession, params)
  }

  private def readJson(driverPath: String)(implicit context: ExecutionContext) = {
    val lines = Source.fromFile(driverPath).getLines().toStream
    val fileLinesRdd = context.sparkContext.parallelize(lines)
    val sparkSession = context.sparkSQLSession.sparkSession
    readJsonFromRdd(fileLinesRdd, sparkSession)
  }

  private def writeCsv
      (path: FilePath, csvChoice: OutputFileFormatChoice.Csv, dataFrame: DataFrame)
      (implicit context: ExecutionContext): Unit = {
    val params = CsvOptions.map(csvChoice.getNamesIncluded, csvChoice.getCsvColumnSeparator())

    DataframeToDriverCsvFileWriter.write(
      dataFrame.sparkDataFrame,
      params,
      dataFrame.schema.get,
      path.pathWithoutScheme,
      context.sparkSQLSession.sparkSession
    )
  }

  private def writeJson(path: FilePath, dataFrame: DataFrame)
                       (implicit context: ExecutionContext): Unit = {
    val rawJsonLines: RDD[String] = SQL.dataFrameToJsonRDD(dataFrame.sparkDataFrame)
    writeRddToDriverFile(path.pathWithoutScheme, rawJsonLines)
  }

  private def writeRddToDriverFile(driverPath: String, lines: RDD[String]): Unit = {
    val recordSeparator = System.getProperty("line.separator", "\n")
    ManagedResource(new PrintWriter(driverPath)) { writer =>
      lines.collect().foreach(line => writer.write(line + recordSeparator))
    }
  }

}

Source File: FileDownloader.scala From seahorse with Apache License 2.0

5 votes

package ai.deepsense.deeplang.doperations.readwritedataframe.filestorage

import java.io.{BufferedWriter, FileOutputStream, IOException, OutputStreamWriter}
import java.nio.file.{Files, Paths}
import java.util.UUID

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

import ai.deepsense.deeplang.ExecutionContext
import ai.deepsense.deeplang.doperations.exceptions.DeepSenseIOException
import ai.deepsense.deeplang.doperations.readwritedataframe.FilePath

private[filestorage] object FileDownloader {

  def downloadFile(url: String)(implicit context: ExecutionContext): FilePath = {
    if (context.tempPath.startsWith("hdfs://")) {
      downloadFileToHdfs(url)
    } else {
      downloadFileToDriver(url)
    }
  }

  private def downloadFileToHdfs(url: String)(implicit context: ExecutionContext) = {
    val content = scala.io.Source.fromURL(url).getLines()
    val hdfsPath = s"${context.tempPath}/${UUID.randomUUID()}"

    val configuration = new Configuration()
    val hdfs = FileSystem.get(configuration)
    val file = new Path(hdfsPath)
    val hdfsStream = hdfs.create(file)
    val writer = new BufferedWriter(new OutputStreamWriter(hdfsStream))
    try {
      content.foreach {s =>
        writer.write(s)
        writer.newLine()
      }
    } finally {
      safeClose(writer)
      hdfs.close()
    }

    FilePath(hdfsPath)
  }

  private def downloadFileToDriver(url: String)
                                  (implicit context: ExecutionContext) = {
    val outputDirPath = Paths.get(context.tempPath)
    // We're checking if the output is a directory following symlinks.
    // The default behaviour of createDirectories is NOT to follow symlinks
    if (!Files.isDirectory(outputDirPath)) {
      Files.createDirectories(outputDirPath)
    }

    val outFilePath = Files.createTempFile(outputDirPath, "download", ".csv")
    // content is a stream. Do not invoke stuff like .toList() on it.
    val content = scala.io.Source.fromURL(url).getLines()
    val writer: BufferedWriter =
      new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilePath.toFile)))
    try {
      content.foreach {s =>
        writer.write(s)
        writer.newLine()
      }
    } finally {
      safeClose(writer)
    }
    FilePath(s"file:///$outFilePath")
  }

  private def safeClose(bufferedWriter: BufferedWriter): Unit = {
    try {
      bufferedWriter.flush()
      bufferedWriter.close()
    } catch {
      case e: IOException => throw new DeepSenseIOException(e)
    }
  }

}

Source File: WriteDataFrame.scala From seahorse with Apache License 2.0

5 votes

package ai.deepsense.deeplang.doperations

import java.io.IOException
import java.util.Properties

import scala.reflect.runtime.{universe => ru}
import ai.deepsense.commons.utils.Version
import ai.deepsense.deeplang.DOperation.Id
import ai.deepsense.deeplang._
import ai.deepsense.deeplang.documentation.OperationDocumentation
import ai.deepsense.deeplang.doperables.dataframe.DataFrame
import ai.deepsense.deeplang.doperations.exceptions.DeepSenseIOException
import ai.deepsense.deeplang.doperations.inout._
import ai.deepsense.deeplang.doperations.readwritedataframe.filestorage.DataFrameToFileWriter
import ai.deepsense.deeplang.doperations.readwritedataframe.googlestorage.DataFrameToGoogleSheetWriter
import ai.deepsense.deeplang.doperations.readwritedataframe.validators.{FilePathHasValidFileScheme, ParquetSupportedOnClusterOnly}
import ai.deepsense.deeplang.inference.{InferContext, InferenceWarnings}
import ai.deepsense.deeplang.params.choice.ChoiceParam
import ai.deepsense.deeplang.params.{Param, Params}
import org.apache.spark.sql.SaveMode

class WriteDataFrame()
  extends DOperation1To0[DataFrame]
  with Params
  with OperationDocumentation {

  override val id: Id = "9e460036-95cc-42c5-ba64-5bc767a40e4e"
  override val name: String = "Write DataFrame"
  override val description: String = "Writes a DataFrame to a file or database"

  override val since: Version = Version(0, 4, 0)

  @transient
  override lazy val tTagTI_0: ru.TypeTag[DataFrame] = ru.typeTag[DataFrame]

  val storageType = ChoiceParam[OutputStorageTypeChoice](
    name = "data storage type",
    description = Some("Storage type."))

  def getStorageType(): OutputStorageTypeChoice = $(storageType)
  def setStorageType(value: OutputStorageTypeChoice): this.type = set(storageType, value)

  val specificParams: Array[Param[_]] = Array(storageType)
  setDefault(storageType, new OutputStorageTypeChoice.File())

  override def execute(dataFrame: DataFrame)(context: ExecutionContext): Unit = {
    import OutputStorageTypeChoice._
    try {
      getStorageType() match {
        case jdbcChoice: Jdbc => writeToJdbc(jdbcChoice, context, dataFrame)
        case googleSheetChoice: GoogleSheet => DataFrameToGoogleSheetWriter.writeToGoogleSheet(
          googleSheetChoice, context, dataFrame
        )
        case fileChoice: File => DataFrameToFileWriter.writeToFile(fileChoice, context, dataFrame)
      }
    } catch {
      case e: IOException =>
        logger.error(s"WriteDataFrame error. Could not write file to designated storage", e)
        throw DeepSenseIOException(e)
    }
  }

  private def writeToJdbc(
      jdbcChoice: OutputStorageTypeChoice.Jdbc,
      context: ExecutionContext,
      dataFrame: DataFrame): Unit = {
    val properties = new Properties()
    properties.setProperty("driver", jdbcChoice.getJdbcDriverClassName)

    val jdbcUrl = jdbcChoice.getJdbcUrl
    val jdbcTableName = jdbcChoice.getJdbcTableName
    val saveMode = if (jdbcChoice.getShouldOverwrite) SaveMode.Overwrite else SaveMode.ErrorIfExists

    dataFrame.sparkDataFrame.write.mode(saveMode).jdbc(jdbcUrl, jdbcTableName, properties)
  }

  override def inferKnowledge(k0: DKnowledge[DataFrame])(context: InferContext): (Unit, InferenceWarnings) = {
    FilePathHasValidFileScheme.validate(this)
    ParquetSupportedOnClusterOnly.validate(this)
    super.inferKnowledge(k0)(context)
  }
}

Source File: Loader.scala From tensorflow_scala with Apache License 2.0

5 votes

package org.platanios.tensorflow.data

import com.typesafe.scalalogging.Logger

import java.io.IOException
import java.net.URL
import java.nio.file.{Files, Path}

import scala.collection.compat.immutable.LazyList
import scala.io.Source
import scala.util.matching.Regex


trait Loader {
  protected val logger: Logger

  protected val googleDriveConfirmTokenRegex: Regex = {
    """<a id="uc-download-link".*href="/uc\?export=download&amp;(confirm=.*)&amp;id=.*">Download anyway</a>""".r
  }

  def maybeDownload(path: Path, url: String, bufferSize: Int = 8192): Boolean = {
    if (Files.exists(path)) {
      false
    } else {
      try {
        logger.info(s"Downloading file '$url'.")
        Files.createDirectories(path.getParent)
        download(path, url, bufferSize)

        // Small hack to deal with downloading large Google Drive files.
        if (Files.size(path) < 1024 * 1024 && url.contains("drive.google.com")) {
          val content = Source.fromFile(path.toFile).getLines().mkString("\n")
          googleDriveConfirmTokenRegex.findFirstMatchIn(content) match {
            case Some(confirmToken) => download(path, s"$url&${confirmToken.group(1)}", bufferSize)
            case None => ()
          }
        }

        logger.info(s"Downloaded file '$url'.")
        true
      } catch {
        case e: IOException =>
          logger.error(s"Could not download file '$url'", e)
          throw e
      }
    }
  }

  protected def download(path: Path, url: String, bufferSize: Int = 8192): Unit = {
    val connection = new URL(url).openConnection()
    val contentLength = connection.getContentLengthLong
    val inputStream = connection.getInputStream
    val outputStream = Files.newOutputStream(path)
    val buffer = new Array[Byte](bufferSize)
    var progress = 0L
    var progressLogTime = System.currentTimeMillis
    LazyList.continually(inputStream.read(buffer)).takeWhile(_ != -1).foreach(numBytes => {
      outputStream.write(buffer, 0, numBytes)
      progress += numBytes
      val time = System.currentTimeMillis
      if (time - progressLogTime >= 1e4) {
        if (contentLength > 0) {
          val numBars = Math.floorDiv(10 * progress, contentLength).toInt
          logger.info(s"[${"=" * numBars}${" " * (10 - numBars)}] $progress / $contentLength bytes downloaded.")
          progressLogTime = time
        } else {
          logger.info(s"$progress bytes downloaded.")
          progressLogTime = time
        }
      }
    })
    outputStream.close()
  }
}

Source File: ProcessDebuggerIntegrationSpec.scala From scala-debugger with Apache License 2.0

5 votes

package org.scaladebugger.api.debuggers
import java.io.IOException
import java.util.concurrent.atomic.AtomicBoolean

import org.scaladebugger.api.utils.JDITools
import org.scaladebugger.test.helpers.ParallelMockFunSpec
import test.{ApiTestUtilities, VirtualMachineFixtures}

import scala.util.Try

class ProcessDebuggerIntegrationSpec extends ParallelMockFunSpec
  with VirtualMachineFixtures
  with ApiTestUtilities
{
  describe("ProcessDebugger") {
    it("should be able to attach to a running JVM process") {
      withProcess((pid, process) => {
        val processDebugger = ProcessDebugger(pid)

        val attachedToVirtualMachine = new AtomicBoolean(false)

        // Need to keep retrying until process is ready to be attached to
        // NOTE: If unable to connect, ensure that hostname is "localhost"
        eventually {
          processDebugger.start(_ => attachedToVirtualMachine.set(true))
        }

        // Keep checking back until we have successfully attached
        eventually {
          attachedToVirtualMachine.get() should be (true)
        }
      })
    }
  }

  
  private def withProcess[T](testCode: (Int, Process) => T): T = {
    val jvmProcess = createProcess()

    val result = Try(testCode(jvmProcess._1, jvmProcess._2))

    destroyProcess(jvmProcess._2)

    result.get
  }

  private def createProcess(): (Int, Process) = {
    val (pid, process) = JDITools.spawnAndGetPid(
      className = "org.scaladebugger.test.misc.AttachingMain",
      server = true,
      port = 0 // Assign ephemeral port
    )

    // If unable to retrieve the process PID, exit now
    if (pid <= 0) {
      process.destroy()
      throw new IOException("Unable to retrieve process PID!")
    }

    (pid, process)
  }

  private def destroyProcess(process: Process): Unit = process.destroy()
}

Source File: ServiceDiscoverySuite.scala From kyuubi with Apache License 2.0

5 votes

package org.apache.kyuubi.ha.client

import java.io.{File, IOException}
import javax.security.auth.login.Configuration

import scala.collection.JavaConverters._

import org.apache.kyuubi.{KerberizedTestHelper, KyuubiFunSuite}
import org.apache.kyuubi.KYUUBI_VERSION
import org.apache.kyuubi.config.KyuubiConf
import org.apache.kyuubi.ha.HighAvailabilityConf._
import org.apache.kyuubi.ha.server.EmbeddedZkServer
import org.apache.kyuubi.service.ServiceState

class ServiceDiscoverySuite extends KyuubiFunSuite with KerberizedTestHelper {
  val zkServer = new EmbeddedZkServer()
  val conf: KyuubiConf = KyuubiConf()

  override def beforeAll(): Unit = {
    conf.set(KyuubiConf.EMBEDDED_ZK_PORT, 0)
    zkServer.initialize(conf)
    zkServer.start()
    super.beforeAll()
  }

  override def afterAll(): Unit = {
    conf.unset(KyuubiConf.SERVER_KEYTAB)
    conf.unset(KyuubiConf.SERVER_PRINCIPAL)
    conf.unset(HA_ZK_QUORUM)
    zkServer.stop()
    super.afterAll()
  }

  test("set up zookeeper auth") {
    tryWithSecurityEnabled {
      val keytab = File.createTempFile("kentyao", ".keytab")
      val principal = "kentyao/[email protected]"

      conf.set(KyuubiConf.SERVER_KEYTAB, keytab.getCanonicalPath)
      conf.set(KyuubiConf.SERVER_PRINCIPAL, principal)

      ServiceDiscovery.setUpZooKeeperAuth(conf)
      val configuration = Configuration.getConfiguration
      val entries = configuration.getAppConfigurationEntry("KyuubiZooKeeperClient")

      assert(entries.head.getLoginModuleName === "com.sun.security.auth.module.Krb5LoginModule")
      val options = entries.head.getOptions.asScala.toMap

      assert(options("principal") === "kentyao/[email protected]")
      assert(options("useKeyTab").toString.toBoolean)

      conf.set(KyuubiConf.SERVER_KEYTAB, keytab.getName)
      val e = intercept[IOException](ServiceDiscovery.setUpZooKeeperAuth(conf))
      assert(e.getMessage === s"${KyuubiConf.SERVER_KEYTAB.key} does not exists")
    }
  }

  test("publish instance to embedded zookeeper server") {

    conf
      .unset(KyuubiConf.SERVER_KEYTAB)
      .unset(KyuubiConf.SERVER_PRINCIPAL)
      .set(HA_ZK_QUORUM, zkServer.getConnectString)

    val namespace = "kyuubiserver"
    val znodeRoot = s"/$namespace"
    val instance = "kentyao.apache.org:10009"
    var deleted = false
    val postHook = new Thread {
      override def run(): Unit = deleted = true
    }
    val serviceDiscovery = new ServiceDiscovery(instance, namespace, postHook)
    val framework = ServiceDiscovery.newZookeeperClient(conf)
    try {
      serviceDiscovery.initialize(conf)
      serviceDiscovery.start()

      assert(framework.checkExists().forPath("/abc") === null)
      assert(framework.checkExists().forPath(znodeRoot) !== null)
      val children = framework.getChildren.forPath(znodeRoot).asScala
      assert(children.head ===
        s"serviceUri=$instance;version=$KYUUBI_VERSION;sequence=0000000000")

      children.foreach { child =>
        framework.delete().forPath(s"""$znodeRoot/$child""")
      }
      Thread.sleep(5000)
      assert(deleted, "Post hook called")
      assert(serviceDiscovery.getServiceState === ServiceState.STOPPED)
    } finally {
      serviceDiscovery.stop()
      framework.close()
    }
  }
}

Source File: LogFile.scala From kyuubi with Apache License 2.0

5 votes

package yaooqinn.kyuubi.operation

import java.io.{BufferedReader, File, FileInputStream, FileNotFoundException, FileOutputStream, InputStreamReader, IOException, PrintStream}
import java.util.ArrayList

import scala.collection.JavaConverters._

import org.apache.commons.io.FileUtils
import org.apache.hadoop.io.IOUtils
import org.apache.kyuubi.Logging
import org.apache.spark.sql.Row

import yaooqinn.kyuubi.KyuubiSQLException

class LogFile private (
    file: File,
    private var reader: Option[BufferedReader],
    writer: PrintStream,
    @volatile private var isRemoved: Boolean = false) extends Logging {

  def this(file: File) = {
    this(file,
      LogFile.createReader(file, isRemoved = false),
      new PrintStream(new FileOutputStream(file)))
  }

  private def resetReader(): Unit = {
    reader.foreach(IOUtils.closeStream)
    reader = None
  }

  private def readResults(nLines: Long): Seq[Row] = {
    reader = reader.orElse(LogFile.createReader(file, isRemoved))

    val logs = new ArrayList[Row]()
    reader.foreach { r =>
      var i = 1
      try {
        var line: String = r.readLine()
        while ((i < nLines || nLines <= 0) && line != null) {
          logs.add(Row(line))
          line = r.readLine()
          i += 1
        }
      } catch {
        case e: FileNotFoundException =>
          val operationHandle = file.getName
          val path = file.getAbsolutePath
          val msg = if (isRemoved) {
            s"Operation[$operationHandle] has been closed and the log file $path has been removed"
          } else {
            s"Operation[$operationHandle] Log file $path is not found"
          }
          throw new KyuubiSQLException(msg, e)
      }
    }
    logs.asScala
  }

  
  def write(msg: String): Unit = {
    writer.print(msg)
  }


  def close(): Unit = synchronized {
    try {
      reader.foreach(_.close())
      writer.close()
      if (!isRemoved) {
        FileUtils.forceDelete(file)
        isRemoved = true
      }
    } catch {
      case e: IOException =>
        error(s"Failed to remove corresponding log file of operation: ${file.getName}", e)
    }
  }
}

object LogFile {

  def createReader(file: File, isRemoved: Boolean): Option[BufferedReader] = try {
    Option(new BufferedReader(new InputStreamReader(new FileInputStream(file))))
  } catch {
    case e: FileNotFoundException =>
      val operationHandle = file.getName
      val path = file.getAbsolutePath
      val msg = if (isRemoved) {
        s"Operation[$operationHandle] has been closed and the log file $path has been removed"
      } else {
        s"Operation[$operationHandle] Log file $path is not found"
      }
      throw new KyuubiSQLException(msg, e)
  }
}

Source File: JsonFileReporter.scala From kyuubi with Apache License 2.0

5 votes

package yaooqinn.kyuubi.metrics

import java.io.{BufferedWriter, Closeable, IOException, OutputStreamWriter}
import java.util.{Timer, TimerTask}
import java.util.concurrent.TimeUnit

import scala.util.Try
import scala.util.control.NonFatal

import com.codahale.metrics.MetricRegistry
import com.codahale.metrics.json.MetricsModule
import com.fasterxml.jackson.databind.ObjectMapper
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.fs.permission.FsPermission
import org.apache.kyuubi.Logging
import org.apache.spark.{KyuubiSparkUtil, SparkConf}
import org.apache.spark.KyuubiConf._

private[metrics] class JsonFileReporter(conf: SparkConf, registry: MetricRegistry)
  extends Closeable with Logging {

  private val jsonMapper = new ObjectMapper().registerModule(
    new MetricsModule(TimeUnit.MILLISECONDS, TimeUnit.MILLISECONDS, false))
  private val timer = new Timer(true)
  private val interval = KyuubiSparkUtil.timeStringAsMs(conf.get(METRICS_REPORT_INTERVAL))
  private val path = conf.get(METRICS_REPORT_LOCATION)
  private val hadoopConf = KyuubiSparkUtil.newConfiguration(conf)

  def start(): Unit = {
    timer.schedule(new TimerTask {
      var bw: BufferedWriter = _
      override def run(): Unit = try {
        val json = jsonMapper.writerWithDefaultPrettyPrinter().writeValueAsString(registry)
        val tmpPath = new Path(path + ".tmp")
        val tmpPathUri = tmpPath.toUri
        val fs = if (tmpPathUri.getScheme == null && tmpPathUri.getAuthority == null) {
          FileSystem.getLocal(hadoopConf)
        } else {
          FileSystem.get(tmpPathUri, hadoopConf)
        }
        fs.delete(tmpPath, true)
        bw = new BufferedWriter(new OutputStreamWriter(fs.create(tmpPath, true)))
        bw.write(json)
        bw.close()
        fs.setPermission(tmpPath, FsPermission.createImmutable(Integer.parseInt("644", 8).toShort))
        val finalPath = new Path(path)
        fs.rename(tmpPath, finalPath)
        fs.setPermission(finalPath,
          FsPermission.createImmutable(Integer.parseInt("644", 8).toShort))
      } catch {
        case NonFatal(e) => error("Error writing metrics to json file" + path, e)
      } finally {
        if (bw != null) {
          Try(bw.close())
        }
      }
    }, 0, interval)
  }

  override def close(): Unit = {
    timer.cancel()
  }
}

Source File: PlainSaslHelper.scala From kyuubi with Apache License 2.0

5 votes

package yaooqinn.kyuubi.auth

import java.io.IOException
import java.security.Security
import javax.security.auth.callback._
import javax.security.auth.login.LoginException
import javax.security.sasl.{AuthenticationException, AuthorizeCallback}

import scala.collection.JavaConverters._

import org.apache.hive.service.cli.thrift.TCLIService.Iface
import org.apache.spark.SparkConf
import org.apache.thrift.{TProcessor, TProcessorFactory}
import org.apache.thrift.transport.{TSaslServerTransport, TTransport, TTransportFactory}

import yaooqinn.kyuubi.auth.AuthMethods.AuthMethods
import yaooqinn.kyuubi.auth.PlainSaslServer.SaslPlainProvider

object PlainSaslHelper {

  // Register Plain SASL server provider

  Security.addProvider(new SaslPlainProvider())

  def getProcessFactory(service: Iface): TProcessorFactory = {
    SQLPlainProcessorFactory(service)
  }

  @throws[LoginException]
  def getTransportFactory(authTypeStr: String, conf: SparkConf): TTransportFactory = {
    val saslFactory = new TSaslServerTransport.Factory()
    try {
      val handler = new PlainServerCallbackHandler(authTypeStr, conf)
      val props = Map.empty[String, String]
      saslFactory.addServerDefinition("PLAIN", authTypeStr, null, props.asJava, handler)
    } catch {
      case e: AuthenticationException =>
        throw new LoginException("Error setting callback handler" + e);
    }
    saslFactory
  }

  private class PlainServerCallbackHandler private(authMethod: AuthMethods, conf: SparkConf)
    extends CallbackHandler {
    @throws[AuthenticationException]
    def this(authMethodStr: String, conf: SparkConf) =
      this(AuthMethods.getValidAuthMethod(authMethodStr), conf)

    @throws[IOException]
    @throws[UnsupportedCallbackException]
    override def handle(callbacks: Array[Callback]): Unit = {
      var username: String = null
      var password: String = null
      var ac: AuthorizeCallback = null
      for (callback <- callbacks) {
        callback match {
          case nc: NameCallback =>
            username = nc.getName
          case pc: PasswordCallback =>
            password = new String(pc.getPassword)
          case a: AuthorizeCallback => ac = a
          case _ => throw new UnsupportedCallbackException(callback)
        }
      }
      val provider = AuthenticationProviderFactory.getAuthenticationProvider(authMethod, conf)
      provider.authenticate(username, password)
      if (ac != null) ac.setAuthorized(true)
    }
  }

  private case class SQLPlainProcessorFactory(service: Iface) extends TProcessorFactory(null) {
    override def getProcessor(trans: TTransport): TProcessor =
      new TSetIpAddressProcessor[Iface](service)
  }
}

java.io.IOException Scala Examples