java.io.IOException Scala Examples
The following examples show how to use java.io.IOException.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: CommandUtils.scala From drizzle-spark with Apache License 2.0 | 7 votes |
package org.apache.spark.deploy.worker import java.io.{File, FileOutputStream, InputStream, IOException} import scala.collection.JavaConverters._ import scala.collection.Map import org.apache.spark.SecurityManager import org.apache.spark.deploy.Command import org.apache.spark.internal.Logging import org.apache.spark.launcher.WorkerCommandBuilder import org.apache.spark.util.Utils def redirectStream(in: InputStream, file: File) { val out = new FileOutputStream(file, true) // TODO: It would be nice to add a shutdown hook here that explains why the output is // terminating. Otherwise if the worker dies the executor logs will silently stop. new Thread("redirect output to " + file) { override def run() { try { Utils.copyStream(in, out, true) } catch { case e: IOException => logInfo("Redirection to " + file + " closed: " + e.getMessage) } } }.start() } }
Example 2
Source File: GrpcServerOwner.scala From daml with Apache License 2.0 | 6 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.platform.apiserver import java.io.IOException import java.net.{BindException, InetAddress, InetSocketAddress} import java.util.concurrent.TimeUnit.SECONDS import com.daml.metrics.Metrics import com.daml.platform.apiserver.GrpcServerOwner._ import com.daml.ports.Port import com.daml.resources.{Resource, ResourceOwner} import com.google.protobuf.Message import io.grpc.netty.NettyServerBuilder import io.grpc._ import io.netty.channel.socket.nio.NioServerSocketChannel import io.netty.handler.ssl.SslContext import scala.concurrent.{ExecutionContext, Future} import scala.util.control.NoStackTrace final class GrpcServerOwner( address: Option[String], desiredPort: Port, maxInboundMessageSize: Int, sslContext: Option[SslContext] = None, interceptors: List[ServerInterceptor] = List.empty, metrics: Metrics, eventLoopGroups: ServerEventLoopGroups, services: Iterable[BindableService], ) extends ResourceOwner[Server] { override def acquire()(implicit executionContext: ExecutionContext): Resource[Server] = { val host = address.map(InetAddress.getByName).getOrElse(InetAddress.getLoopbackAddress) Resource(Future { val builder = NettyServerBuilder.forAddress(new InetSocketAddress(host, desiredPort.value)) builder.sslContext(sslContext.orNull) builder.channelType(classOf[NioServerSocketChannel]) builder.permitKeepAliveTime(10, SECONDS) builder.permitKeepAliveWithoutCalls(true) builder.directExecutor() builder.maxInboundMessageSize(maxInboundMessageSize) interceptors.foreach(builder.intercept) builder.intercept(new MetricsInterceptor(metrics)) eventLoopGroups.populate(builder) services.foreach { service => builder.addService(service) toLegacyService(service).foreach(builder.addService) } val server = builder.build() try { server.start() } catch { case e: IOException if e.getCause != null && e.getCause.isInstanceOf[BindException] => throw new UnableToBind(desiredPort, e.getCause) } server })(server => Future(server.shutdown().awaitTermination())) } // This exposes the existing services under com.daml also under com.digitalasset. // This is necessary to allow applications built with an earlier version of the SDK // to still work. // The "proxy" services will not show up on the reflection service, because of the way it // processes service definitions via protobuf file descriptors. private def toLegacyService(service: BindableService): Option[ServerServiceDefinition] = { val `com.daml` = "com.daml" val `com.digitalasset` = "com.digitalasset" val damlDef = service.bindService() val damlDesc = damlDef.getServiceDescriptor // Only add "proxy" services if it actually contains com.daml in the service name. // There are other services registered like the reflection service, that doesn't need the special treatment. if (damlDesc.getName.contains(`com.daml`)) { val digitalassetName = damlDesc.getName.replace(`com.daml`, `com.digitalasset`) val digitalassetDef = ServerServiceDefinition.builder(digitalassetName) damlDef.getMethods.forEach { methodDef => val damlMethodDesc = methodDef.getMethodDescriptor val digitalassetMethodName = damlMethodDesc.getFullMethodName.replace(`com.daml`, `com.digitalasset`) val digitalassetMethodDesc = damlMethodDesc.toBuilder.setFullMethodName(digitalassetMethodName).build() val _ = digitalassetDef.addMethod( digitalassetMethodDesc.asInstanceOf[MethodDescriptor[Message, Message]], methodDef.getServerCallHandler.asInstanceOf[ServerCallHandler[Message, Message]] ) } Option(digitalassetDef.build()) } else None } } object GrpcServerOwner { final class UnableToBind(port: Port, cause: Throwable) extends RuntimeException( s"The API server was unable to bind to port $port. Terminate the process occupying the port, or choose a different one.", cause) with NoStackTrace }
Example 3
Source File: TrafficMonitorThread.scala From shadowsocksr-android with GNU General Public License v3.0 | 5 votes |
package com.github.shadowsocks.utils import java.io.{File, IOException} import java.nio.{ByteBuffer, ByteOrder} import java.util.concurrent.Executors import android.content.Context import android.net.{LocalServerSocket, LocalSocket, LocalSocketAddress} import android.util.Log class TrafficMonitorThread(context: Context) extends Thread { val TAG = "TrafficMonitorThread" lazy val PATH = context.getApplicationInfo.dataDir + "/stat_path" @volatile var serverSocket: LocalServerSocket = null @volatile var isRunning: Boolean = true def closeServerSocket() { if (serverSocket != null) { try { serverSocket.close() } catch { case _: Exception => // ignore } serverSocket = null } } def stopThread() { isRunning = false closeServerSocket() } override def run() { try { new File(PATH).delete() } catch { case _: Exception => // ignore } try { val localSocket = new LocalSocket localSocket.bind(new LocalSocketAddress(PATH, LocalSocketAddress.Namespace.FILESYSTEM)) serverSocket = new LocalServerSocket(localSocket.getFileDescriptor) } catch { case e: IOException => Log.e(TAG, "unable to bind", e) return } val pool = Executors.newFixedThreadPool(1) while (isRunning) { try { val socket = serverSocket.accept() pool.execute(() => { try { val input = socket.getInputStream val output = socket.getOutputStream val buffer = new Array[Byte](16) if (input.read(buffer) != 16) throw new IOException("Unexpected traffic stat length") val stat = ByteBuffer.wrap(buffer).order(ByteOrder.LITTLE_ENDIAN) TrafficMonitor.update(stat.getLong(0), stat.getLong(8)) output.write(0) input.close() output.close() } catch { case e: Exception => Log.e(TAG, "Error when recv traffic stat", e) } // close socket try { socket.close() } catch { case _: Exception => // ignore } }) } catch { case e: IOException => Log.e(TAG, "Error when accept socket", e) return } } } }
Example 4
Source File: GuardedProcess.scala From shadowsocksr-android with GNU General Public License v3.0 | 5 votes |
package com.github.shadowsocks import java.io.{IOException, InputStream, OutputStream} import java.lang.System.currentTimeMillis import java.util.concurrent.Semaphore import android.util.Log import scala.collection.JavaConversions._ class GuardedProcess(cmd: Seq[String]) extends Process { private val TAG = classOf[GuardedProcess].getSimpleName @volatile private var guardThread: Thread = _ @volatile private var isDestroyed: Boolean = _ @volatile private var process: Process = _ @volatile private var isRestart = false def start(onRestartCallback: () => Unit = null): GuardedProcess = { val semaphore = new Semaphore(1) semaphore.acquire @volatile var ioException: IOException = null guardThread = new Thread(() => { try { var callback: () => Unit = null while (!isDestroyed) { Log.i(TAG, "start process: " + cmd) val startTime = currentTimeMillis process = new ProcessBuilder(cmd).redirectErrorStream(true).start if (callback == null) callback = onRestartCallback else callback() semaphore.release process.waitFor if (isRestart) { isRestart = false } else { if (currentTimeMillis - startTime < 1000) { Log.w(TAG, "process exit too fast, stop guard: " + cmd) isDestroyed = true } } } } catch { case ignored: InterruptedException => Log.i(TAG, "thread interrupt, destroy process: " + cmd) process.destroy() case e: IOException => ioException = e } finally semaphore.release }, "GuardThread-" + cmd) guardThread.start() semaphore.acquire if (ioException != null) { throw ioException } this } def destroy() { isDestroyed = true guardThread.interrupt() process.destroy() try guardThread.join() catch { case ignored: InterruptedException => } } def restart() { isRestart = true process.destroy() } def exitValue: Int = throw new UnsupportedOperationException def getErrorStream: InputStream = throw new UnsupportedOperationException def getInputStream: InputStream = throw new UnsupportedOperationException def getOutputStream: OutputStream = throw new UnsupportedOperationException @throws(classOf[InterruptedException]) def waitFor = { guardThread.join() 0 } }
Example 5
Source File: UtilTest.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.codegen import com.daml.lf.data.Ref.{QualifiedName, PackageId} import java.io.IOException import java.nio.file.attribute.BasicFileAttributes import java.nio.file.{FileVisitResult, Files, Path, SimpleFileVisitor} import com.daml.lf.{iface => I} import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers} import org.scalatest.prop.GeneratorDrivenPropertyChecks class UtilTest extends UtilTestHelpers with GeneratorDrivenPropertyChecks { val packageInterface = I.Interface(packageId = PackageId.assertFromString("abcdef"), typeDecls = Map.empty) val scalaPackageParts = Array("com", "digitalasset") val scalaPackage: String = scalaPackageParts.mkString(".") val util = lf.LFUtil( scalaPackage, I.EnvironmentInterface fromReaderInterfaces packageInterface, outputDir.toFile) def damlScalaName(damlNameSpace: Array[String], name: String): util.DamlScalaName = util.DamlScalaName(damlNameSpace, name) behavior of "Util" it should "mkDamlScalaName for a Contract named Test" in { val result = util.mkDamlScalaNameFromDirsAndName(Array(), "Test") result shouldEqual damlScalaName(Array.empty, "Test") result.packageName shouldEqual scalaPackage result.qualifiedName shouldEqual (scalaPackage + ".Test") } it should "mkDamlScalaName for a Template names foo.bar.Test" in { val result = util.mkDamlScalaName(Util.Template, QualifiedName assertFromString "foo.bar:Test") result shouldEqual damlScalaName(Array("foo", "bar"), "Test") result.packageName shouldEqual (scalaPackage + ".foo.bar") result.qualifiedName shouldEqual (scalaPackage + ".foo.bar.Test") } "partitionEithers" should "equal scalaz separate in simple cases" in forAll { iis: List[Either[Int, Int]] => import scalaz.syntax.monadPlus._, scalaz.std.list._, scalaz.std.either._ Util.partitionEithers(iis) shouldBe iis.separate } } abstract class UtilTestHelpers extends FlatSpec with Matchers with BeforeAndAfterAll { val outputDir = Files.createTempDirectory("codegenUtilTest") override protected def afterAll(): Unit = { super.afterAll() deleteRecursively(outputDir) } def deleteRecursively(dir: Path): Unit = { Files.walkFileTree( dir, new SimpleFileVisitor[Path] { override def postVisitDirectory(dir: Path, exc: IOException) = { Files.delete(dir) FileVisitResult.CONTINUE } override def visitFile(file: Path, attrs: BasicFileAttributes) = { Files.delete(file) FileVisitResult.CONTINUE } } ) () } }
Example 6
Source File: TestOutputStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming import java.io.{IOException, ObjectInputStream} import java.util.concurrent.ConcurrentLinkedQueue import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.dstream.{DStream, ForEachDStream} import org.apache.spark.util.Utils class TestOutputStream[T: ClassTag](parent: DStream[T], val output: ConcurrentLinkedQueue[Seq[T]] = new ConcurrentLinkedQueue[Seq[T]]()) extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => { val collected = rdd.collect() output.add(collected) }, false) { // This is to clear the output buffer every it is read from a checkpoint @throws(classOf[IOException]) private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException { ois.defaultReadObject() output.clear() } }
Example 7
Source File: SparkSQLCLIService.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.io.IOException import java.util.{List => JList} import javax.security.auth.login.LoginException import scala.collection.JavaConverters._ import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.shims.Utils import org.apache.hadoop.security.UserGroupInformation import org.apache.hive.service.{AbstractService, Service, ServiceException} import org.apache.hive.service.Service.STATE import org.apache.hive.service.auth.HiveAuthFactory import org.apache.hive.service.cli._ import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLContext) extends CLIService(hiveServer) with ReflectedCompositeService { override def init(hiveConf: HiveConf) { setSuperField(this, "hiveConf", hiveConf) val sparkSqlSessionManager = new SparkSQLSessionManager(hiveServer, sqlContext) setSuperField(this, "sessionManager", sparkSqlSessionManager) addService(sparkSqlSessionManager) var sparkServiceUGI: UserGroupInformation = null if (UserGroupInformation.isSecurityEnabled) { try { HiveAuthFactory.loginFromKeytab(hiveConf) sparkServiceUGI = Utils.getUGI() setSuperField(this, "serviceUGI", sparkServiceUGI) } catch { case e @ (_: IOException | _: LoginException) => throw new ServiceException("Unable to login to kerberos with given principal/keytab", e) } } initCompositeService(hiveConf) } override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = { getInfoType match { case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_VER => new GetInfoValue(sqlContext.sparkContext.version) case _ => super.getInfo(sessionHandle, getInfoType) } } } private[thriftserver] trait ReflectedCompositeService { this: AbstractService => def initCompositeService(hiveConf: HiveConf) { // Emulating `CompositeService.init(hiveConf)` val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList") serviceList.asScala.foreach(_.init(hiveConf)) // Emulating `AbstractService.init(hiveConf)` invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.NOTINITED) setAncestorField(this, 3, "hiveConf", hiveConf) invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.INITED) getAncestorField[Log](this, 3, "LOG").info(s"Service: $getName is inited.") } }
Example 8
Source File: ReusableStringReaderSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions.xml import java.io.IOException import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.xml.UDFXPathUtil.ReusableStringReader class ReusableStringReaderSuite extends SparkFunSuite { private val fox = "Quick brown fox jumps over the lazy dog." test("empty reader") { val reader = new ReusableStringReader intercept[IOException] { reader.read() } intercept[IOException] { reader.ready() } reader.close() } test("mark reset") { val reader = new ReusableStringReader if (reader.markSupported()) { reader.asInstanceOf[ReusableStringReader].set(fox) assert(reader.ready()) val cc = new Array[Char](6) var read = reader.read(cc) assert(read == 6) assert("Quick " == new String(cc)) reader.mark(100) read = reader.read(cc) assert(read == 6) assert("brown " == new String(cc)) reader.reset() read = reader.read(cc) assert(read == 6) assert("brown " == new String(cc)) } reader.close() } test("skip") { val reader = new ReusableStringReader reader.asInstanceOf[ReusableStringReader].set(fox) // skip entire the data: var skipped = reader.skip(fox.length() + 1) assert(fox.length() == skipped) assert(-1 == reader.read()) reader.asInstanceOf[ReusableStringReader].set(fox) // reset the data val cc = new Array[Char](6) var read = reader.read(cc) assert(read == 6) assert("Quick " == new String(cc)) // skip some piece of data: skipped = reader.skip(30) assert(skipped == 30) read = reader.read(cc) assert(read == 4) assert("dog." == new String(cc, 0, read)) // skip when already at EOF: skipped = reader.skip(300) assert(skipped == 0, skipped) assert(reader.read() == -1) reader.close() } }
Example 9
Source File: InsertIntoHadoopFsRelationCommand.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import java.io.IOException import org.apache.hadoop.fs.Path import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.catalog.BucketSpec import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.execution.command.RunnableCommand )) { throw new IOException(s"Unable to clear output " + s"directory $qualifiedOutputPath prior to writing to it") } true case (SaveMode.Append, _) | (SaveMode.Overwrite, _) | (SaveMode.ErrorIfExists, false) => true case (SaveMode.Ignore, exists) => !exists case (s, exists) => throw new IllegalStateException(s"unsupported save mode $s ($exists)") } // If we are appending data to an existing dir. val isAppend = pathExists && (mode == SaveMode.Append) if (doInsertion) { WriteOutput.write( sparkSession, query, fileFormat, qualifiedOutputPath, hadoopConf, partitionColumns, bucketSpec, refreshFunction, options, isAppend) } else { logInfo("Skipping insertion into a relation that already exists.") } Seq.empty[Row] } }
Example 10
Source File: ProcessTestUtils.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.test import java.io.{InputStream, IOException} import scala.sys.process.BasicIO object ProcessTestUtils { class ProcessOutputCapturer(stream: InputStream, capture: String => Unit) extends Thread { this.setDaemon(true) override def run(): Unit = { try { BasicIO.processFully(capture)(stream) } catch { case _: IOException => // Ignores the IOException thrown when the process termination, which closes the input // stream abruptly. } } } }
Example 11
Source File: DStreamCheckpointData.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.dstream import java.io.{IOException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.HashMap import scala.reflect.ClassTag import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.internal.Logging import org.apache.spark.streaming.Time import org.apache.spark.util.Utils private[streaming] class DStreamCheckpointData[T: ClassTag](dstream: DStream[T]) extends Serializable with Logging { protected val data = new HashMap[Time, AnyRef]() // Mapping of the batch time to the checkpointed RDD file of that time @transient private var timeToCheckpointFile = new HashMap[Time, String] // Mapping of the batch time to the time of the oldest checkpointed RDD // in that batch's checkpoint data @transient private var timeToOldestCheckpointFileTime = new HashMap[Time, Time] @transient private var fileSystem: FileSystem = null protected[streaming] def currentCheckpointFiles = data.asInstanceOf[HashMap[Time, String]] def restore() { // Create RDDs from the checkpoint data currentCheckpointFiles.foreach { case(time, file) => logInfo("Restoring checkpointed RDD for time " + time + " from file '" + file + "'") dstream.generatedRDDs += ((time, dstream.context.sparkContext.checkpointFile[T](file))) } } override def toString: String = { "[\n" + currentCheckpointFiles.size + " checkpoint files \n" + currentCheckpointFiles.mkString("\n") + "\n]" } @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { logDebug(this.getClass().getSimpleName + ".writeObject used") if (dstream.context.graph != null) { dstream.context.graph.synchronized { if (dstream.context.graph.checkpointInProgress) { oos.defaultWriteObject() } else { val msg = "Object of " + this.getClass.getName + " is being serialized " + " possibly as a part of closure of an RDD operation. This is because " + " the DStream object is being referred to from within the closure. " + " Please rewrite the RDD operation inside this DStream to avoid this. " + " This has been enforced to avoid bloating of Spark tasks " + " with unnecessary objects." throw new java.io.NotSerializableException(msg) } } } else { throw new java.io.NotSerializableException( "Graph is unexpectedly null when DStream is being serialized.") } } @throws(classOf[IOException]) private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException { logDebug(this.getClass().getSimpleName + ".readObject used") ois.defaultReadObject() timeToOldestCheckpointFileTime = new HashMap[Time, Time] timeToCheckpointFile = new HashMap[Time, String] } }
Example 12
Source File: RawTextSender.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.util import java.io.{ByteArrayOutputStream, IOException} import java.net.ServerSocket import java.nio.ByteBuffer import scala.io.Source import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.serializer.KryoSerializer import org.apache.spark.util.IntParam private[streaming] object RawTextSender extends Logging { def main(args: Array[String]) { if (args.length != 4) { // scalastyle:off println System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>") // scalastyle:on println System.exit(1) } // Parse the arguments using a pattern match val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args // Repeat the input data multiple times to fill in a buffer val lines = Source.fromFile(file).getLines().toArray val bufferStream = new ByteArrayOutputStream(blockSize + 1000) val ser = new KryoSerializer(new SparkConf()).newInstance() val serStream = ser.serializeStream(bufferStream) var i = 0 while (bufferStream.size < blockSize) { serStream.writeObject(lines(i)) i = (i + 1) % lines.length } val array = bufferStream.toByteArray val countBuf = ByteBuffer.wrap(new Array[Byte](4)) countBuf.putInt(array.length) countBuf.flip() val serverSocket = new ServerSocket(port) logInfo("Listening on port " + port) while (true) { val socket = serverSocket.accept() logInfo("Got a new connection") val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec) try { while (true) { out.write(countBuf.array) out.write(array) } } catch { case e: IOException => logError("Client disconnected") } finally { socket.close() } } } }
Example 13
Source File: HdfsUtils.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.util import java.io.{FileNotFoundException, IOException} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs._ private[streaming] object HdfsUtils { def getOutputStream(path: String, conf: Configuration): FSDataOutputStream = { val dfsPath = new Path(path) val dfs = getFileSystemForPath(dfsPath, conf) // If the file exists and we have append support, append instead of creating a new file val stream: FSDataOutputStream = { if (dfs.isFile(dfsPath)) { if (conf.getBoolean("hdfs.append.support", false) || dfs.isInstanceOf[RawLocalFileSystem]) { dfs.append(dfsPath) } else { throw new IllegalStateException("File exists and there is no append support!") } } else { dfs.create(dfsPath) } } stream } def getInputStream(path: String, conf: Configuration): FSDataInputStream = { val dfsPath = new Path(path) val dfs = getFileSystemForPath(dfsPath, conf) try { dfs.open(dfsPath) } catch { case _: FileNotFoundException => null case e: IOException => // If we are really unlucky, the file may be deleted as we're opening the stream. // This can happen as clean up is performed by daemon threads that may be left over from // previous runs. if (!dfs.isFile(dfsPath)) null else throw e } } def checkState(state: Boolean, errorMsg: => String) { if (!state) { throw new IllegalStateException(errorMsg) } } def checkFileExists(path: String, conf: Configuration): Boolean = { val hdpPath = new Path(path) val fs = getFileSystemForPath(hdpPath, conf) fs.isFile(hdpPath) } }
Example 14
Source File: FileBasedWriteAheadLogReader.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.util import java.io.{Closeable, EOFException, IOException} import java.nio.ByteBuffer import org.apache.hadoop.conf.Configuration import org.apache.spark.internal.Logging private[streaming] class FileBasedWriteAheadLogReader(path: String, conf: Configuration) extends Iterator[ByteBuffer] with Closeable with Logging { private val instream = HdfsUtils.getInputStream(path, conf) private var closed = (instream == null) // the file may be deleted as we're opening the stream private var nextItem: Option[ByteBuffer] = None override def hasNext: Boolean = synchronized { if (closed) { return false } if (nextItem.isDefined) { // handle the case where hasNext is called without calling next true } else { try { val length = instream.readInt() val buffer = new Array[Byte](length) instream.readFully(buffer) nextItem = Some(ByteBuffer.wrap(buffer)) logTrace("Read next item " + nextItem.get) true } catch { case e: EOFException => logDebug("Error reading next item, EOF reached", e) close() false case e: IOException => logWarning("Error while trying to read data. If the file was deleted, " + "this should be okay.", e) close() if (HdfsUtils.checkFileExists(path, conf)) { // If file exists, this could be a legitimate error throw e } else { // File was deleted. This can occur when the daemon cleanup thread takes time to // delete the file during recovery. false } case e: Exception => logWarning("Error while trying to read data from HDFS.", e) close() throw e } } } override def next(): ByteBuffer = synchronized { val data = nextItem.getOrElse { close() throw new IllegalStateException( "next called without calling hasNext or after hasNext returned false") } nextItem = None // Ensure the next hasNext call loads new data. data } override def close(): Unit = synchronized { if (!closed) { instream.close() } closed = true } }
Example 15
Source File: SparkHadoopMapRedUtil.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mapred import java.io.IOException import org.apache.hadoop.mapreduce.{TaskAttemptContext => MapReduceTaskAttemptContext} import org.apache.hadoop.mapreduce.{OutputCommitter => MapReduceOutputCommitter} import org.apache.spark.{SparkEnv, TaskContext} import org.apache.spark.executor.CommitDeniedException import org.apache.spark.internal.Logging object SparkHadoopMapRedUtil extends Logging { def commitTask( committer: MapReduceOutputCommitter, mrTaskContext: MapReduceTaskAttemptContext, jobId: Int, splitId: Int): Unit = { val mrTaskAttemptID = mrTaskContext.getTaskAttemptID // Called after we have decided to commit def performCommit(): Unit = { try { committer.commitTask(mrTaskContext) logInfo(s"$mrTaskAttemptID: Committed") } catch { case cause: IOException => logError(s"Error committing the output of task: $mrTaskAttemptID", cause) committer.abortTask(mrTaskContext) throw cause } } // First, check whether the task's output has already been committed by some other attempt if (committer.needsTaskCommit(mrTaskContext)) { val shouldCoordinateWithDriver: Boolean = { val sparkConf = SparkEnv.get.conf // We only need to coordinate with the driver if there are concurrent task attempts. // Note that this could happen even when speculation is not enabled (e.g. see SPARK-8029). // This (undocumented) setting is an escape-hatch in case the commit code introduces bugs. sparkConf.getBoolean("spark.hadoop.outputCommitCoordination.enabled", defaultValue = true) } if (shouldCoordinateWithDriver) { val outputCommitCoordinator = SparkEnv.get.outputCommitCoordinator val taskAttemptNumber = TaskContext.get().attemptNumber() val canCommit = outputCommitCoordinator.canCommit(jobId, splitId, taskAttemptNumber) if (canCommit) { performCommit() } else { val message = s"$mrTaskAttemptID: Not committed because the driver did not authorize commit" logInfo(message) // We need to abort the task so that the driver can reschedule new attempts, if necessary committer.abortTask(mrTaskContext) throw new CommitDeniedException(message, jobId, splitId, taskAttemptNumber) } } else { // Speculation is disabled or a user has chosen to manually bypass the commit coordination performCommit() } } else { // Some other attempt committed the output, so we do nothing and signal success logInfo(s"No need to commit output of task because needsTaskCommit=false: $mrTaskAttemptID") } } }
Example 16
Source File: ReplayListenerBus.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.{InputStream, IOException} import scala.io.Source import com.fasterxml.jackson.core.JsonParseException import org.json4s.jackson.JsonMethods._ import org.apache.spark.internal.Logging import org.apache.spark.scheduler.ReplayListenerBus._ import org.apache.spark.util.JsonProtocol def replay( logData: InputStream, sourceName: String, maybeTruncated: Boolean = false, eventsFilter: ReplayEventsFilter = SELECT_ALL_FILTER): Unit = { var currentLine: String = null var lineNumber: Int = 0 try { val lineEntries = Source.fromInputStream(logData) .getLines() .zipWithIndex .filter { case (line, _) => eventsFilter(line) } while (lineEntries.hasNext) { try { val entry = lineEntries.next() currentLine = entry._1 lineNumber = entry._2 + 1 postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine))) } catch { case jpe: JsonParseException => // We can only ignore exception from last line of the file that might be truncated // the last entry may not be the very last line in the event log, but we treat it // as such in a best effort to replay the given input if (!maybeTruncated || lineEntries.hasNext) { throw jpe } else { logWarning(s"Got JsonParseException from log file $sourceName" + s" at line $lineNumber, the file might not have finished writing cleanly.") } } } } catch { case ioe: IOException => throw ioe case e: Exception => logError(s"Exception parsing Spark event log: $sourceName", e) logError(s"Malformed line #$lineNumber: $currentLine\n") } } } private[spark] object ReplayListenerBus { type ReplayEventsFilter = (String) => Boolean // utility filter that selects all event logs during replay val SELECT_ALL_FILTER: ReplayEventsFilter = { (eventString: String) => true } }
Example 17
Source File: SerializableBuffer.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream} import java.nio.ByteBuffer import java.nio.channels.Channels private[spark] class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable { def value: ByteBuffer = buffer private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { val length = in.readInt() buffer = ByteBuffer.allocate(length) var amountRead = 0 val channel = Channels.newChannel(in) while (amountRead < length) { val ret = channel.read(buffer) if (ret == -1) { throw new EOFException("End of file before fully reading buffer") } amountRead += ret } buffer.rewind() // Allow us to read it later } private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.writeInt(buffer.limit()) if (Channels.newChannel(out).write(buffer) != buffer.limit()) { throw new IOException("Could not fully write buffer to output stream") } buffer.rewind() // Allow us to write it again later } }
Example 18
Source File: BlockManagerId.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.storage import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput} import java.util.concurrent.ConcurrentHashMap import org.apache.spark.SparkContext import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils def apply( execId: String, host: String, port: Int, topologyInfo: Option[String] = None): BlockManagerId = getCachedBlockManagerId(new BlockManagerId(execId, host, port, topologyInfo)) def apply(in: ObjectInput): BlockManagerId = { val obj = new BlockManagerId() obj.readExternal(in) getCachedBlockManagerId(obj) } val blockManagerIdCache = new ConcurrentHashMap[BlockManagerId, BlockManagerId]() def getCachedBlockManagerId(id: BlockManagerId): BlockManagerId = { blockManagerIdCache.putIfAbsent(id, id) blockManagerIdCache.get(id) } }
Example 19
Source File: DiskStore.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.storage import java.io.{FileOutputStream, IOException, RandomAccessFile} import java.nio.ByteBuffer import java.nio.channels.FileChannel.MapMode import com.google.common.io.Closeables import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.util.Utils import org.apache.spark.util.io.ChunkedByteBuffer def put(blockId: BlockId)(writeFunc: FileOutputStream => Unit): Unit = { if (contains(blockId)) { throw new IllegalStateException(s"Block $blockId is already present in the disk store") } logDebug(s"Attempting to put block $blockId") val startTime = System.currentTimeMillis val file = diskManager.getFile(blockId) val fileOutputStream = new FileOutputStream(file) var threwException: Boolean = true try { writeFunc(fileOutputStream) threwException = false } finally { try { Closeables.close(fileOutputStream, threwException) } finally { if (threwException) { remove(blockId) } } } val finishTime = System.currentTimeMillis logDebug("Block %s stored as %s file on disk in %d ms".format( file.getName, Utils.bytesToString(file.length()), finishTime - startTime)) } def putBytes(blockId: BlockId, bytes: ChunkedByteBuffer): Unit = { put(blockId) { fileOutputStream => val channel = fileOutputStream.getChannel Utils.tryWithSafeFinally { bytes.writeFully(channel) } { channel.close() } } } def getBytes(blockId: BlockId): ChunkedByteBuffer = { val file = diskManager.getFile(blockId.name) val channel = new RandomAccessFile(file, "r").getChannel Utils.tryWithSafeFinally { // For small files, directly read rather than memory map if (file.length < minMemoryMapBytes) { val buf = ByteBuffer.allocate(file.length.toInt) channel.position(0) while (buf.remaining() != 0) { if (channel.read(buf) == -1) { throw new IOException("Reached EOF before filling buffer\n" + s"offset=0\nfile=${file.getAbsolutePath}\nbuf.remaining=${buf.remaining}") } } buf.flip() new ChunkedByteBuffer(buf) } else { new ChunkedByteBuffer(channel.map(MapMode.READ_ONLY, 0, file.length)) } } { channel.close() } } def remove(blockId: BlockId): Boolean = { val file = diskManager.getFile(blockId.name) if (file.exists()) { val ret = file.delete() if (!ret) { logWarning(s"Error deleting ${file.getPath()}") } ret } else { false } } def contains(blockId: BlockId): Boolean = { val file = diskManager.getFile(blockId.name) file.exists() } }
Example 20
Source File: CartesianRDD.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark._ import org.apache.spark.util.Utils private[spark] class CartesianPartition( idx: Int, @transient private val rdd1: RDD[_], @transient private val rdd2: RDD[_], s1Index: Int, s2Index: Int ) extends Partition { var s1 = rdd1.partitions(s1Index) var s2 = rdd2.partitions(s2Index) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization s1 = rdd1.partitions(s1Index) s2 = rdd2.partitions(s2Index) oos.defaultWriteObject() } } private[spark] class CartesianRDD[T: ClassTag, U: ClassTag]( sc: SparkContext, var rdd1 : RDD[T], var rdd2 : RDD[U]) extends RDD[(T, U)](sc, Nil) with Serializable { val numPartitionsInRdd2 = rdd2.partitions.length override def getPartitions: Array[Partition] = { // create the cross product split val array = new Array[Partition](rdd1.partitions.length * rdd2.partitions.length) for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) { val idx = s1.index * numPartitionsInRdd2 + s2.index array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index) } array } override def getPreferredLocations(split: Partition): Seq[String] = { val currSplit = split.asInstanceOf[CartesianPartition] (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct } override def compute(split: Partition, context: TaskContext): Iterator[(T, U)] = { val currSplit = split.asInstanceOf[CartesianPartition] for (x <- rdd1.iterator(currSplit.s1, context); y <- rdd2.iterator(currSplit.s2, context)) yield (x, y) } override def getDependencies: Seq[Dependency[_]] = List( new NarrowDependency(rdd1) { def getParents(id: Int): Seq[Int] = List(id / numPartitionsInRdd2) }, new NarrowDependency(rdd2) { def getParents(id: Int): Seq[Int] = List(id % numPartitionsInRdd2) } ) override def clearDependencies() { super.clearDependencies() rdd1 = null rdd2 = null } }
Example 21
Source File: UnionRDD.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer import scala.collection.parallel.{ForkJoinTaskSupport, ThreadPoolTaskSupport} import scala.concurrent.forkjoin.ForkJoinPool import scala.reflect.ClassTag import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils private[spark] class UnionPartition[T: ClassTag]( idx: Int, @transient private val rdd: RDD[T], val parentRddIndex: Int, @transient private val parentRddPartitionIndex: Int) extends Partition { var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex) def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization parentPartition = rdd.partitions(parentRddPartitionIndex) oos.defaultWriteObject() } } object UnionRDD { private[spark] lazy val partitionEvalTaskSupport = new ForkJoinTaskSupport(new ForkJoinPool(8)) } @DeveloperApi class UnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]]) extends RDD[T](sc, Nil) { // Nil since we implement getDependencies // visible for testing private[spark] val isPartitionListingParallel: Boolean = rdds.length > conf.getInt("spark.rdd.parallelListingThreshold", 10) override def getPartitions: Array[Partition] = { val parRDDs = if (isPartitionListingParallel) { val parArray = rdds.par parArray.tasksupport = UnionRDD.partitionEvalTaskSupport parArray } else { rdds } val array = new Array[Partition](parRDDs.map(_.partitions.length).seq.sum) var pos = 0 for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) { array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index) pos += 1 } array } override def getDependencies: Seq[Dependency[_]] = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 for (rdd <- rdds) { deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length) pos += rdd.partitions.length } deps } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val part = s.asInstanceOf[UnionPartition[T]] parent[T](part.parentRddIndex).iterator(part.parentPartition, context) } override def getPreferredLocations(s: Partition): Seq[String] = s.asInstanceOf[UnionPartition[T]].preferredLocations() override def clearDependencies() { super.clearDependencies() rdds = null } }
Example 22
Source File: PartitionerAwareUnionRDD.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext} import org.apache.spark.util.Utils private[spark] class PartitionerAwareUnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]] ) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) { require(rdds.nonEmpty) require(rdds.forall(_.partitioner.isDefined)) require(rdds.flatMap(_.partitioner).toSet.size == 1, "Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner)) override val partitioner = rdds.head.partitioner override def getPartitions: Array[Partition] = { val numPartitions = partitioner.get.numPartitions (0 until numPartitions).map { index => new PartitionerAwareUnionRDDPartition(rdds, index) }.toArray } // Get the location where most of the partitions of parent RDDs are located override def getPreferredLocations(s: Partition): Seq[String] = { logDebug("Finding preferred location for " + this + ", partition " + s.index) val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents val locations = rdds.zip(parentPartitions).flatMap { case (rdd, part) => val parentLocations = currPrefLocs(rdd, part) logDebug("Location of " + rdd + " partition " + part.index + " = " + parentLocations) parentLocations } val location = if (locations.isEmpty) { None } else { // Find the location that maximum number of parent partitions prefer Some(locations.groupBy(x => x).maxBy(_._2.length)._1) } logDebug("Selected location for " + this + ", partition " + s.index + " = " + location) location.toSeq } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents rdds.zip(parentPartitions).iterator.flatMap { case (rdd, p) => rdd.iterator(p, context) } } override def clearDependencies() { super.clearDependencies() rdds = null } // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones) private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = { rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host) } }
Example 23
Source File: DnnStorage.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.tensor import java.io.{IOException, ObjectInputStream, ObjectOutputStream} import com.intel.analytics.bigdl.mkl.Memory import com.intel.analytics.bigdl.nn.mkldnn.MemoryOwner import scala.reflect._ private[bigdl] class Pointer(val address: Long) object DnnStorage { private[tensor] val CACHE_LINE_SIZE = System.getProperty("bigdl.cache.line", "64").toInt private[bigdl] val FLOAT_BYTES: Int = 4 private[bigdl] val INT8_BYTES: Int = 1 private[bigdl] val INT_BYTES: Int = 4 import java.util.concurrent.ConcurrentHashMap private val nativeStorages: ConcurrentHashMap[Long, Boolean] = new ConcurrentHashMap() def checkAndSet(pointer: Long): Boolean = { nativeStorages.replace(pointer, false, true) } def add(key: Long): Unit = { nativeStorages.put(key, false) } def get(): Map[Long, Boolean] = { import scala.collection.JavaConverters._ nativeStorages.asScala.toMap } }
Example 24
Source File: ECIESCoder.scala From mantis with Apache License 2.0 | 5 votes |
package io.iohk.ethereum.crypto import java.io.{ByteArrayInputStream, IOException} import java.math.BigInteger import java.security.SecureRandom import org.spongycastle.crypto.digests.{SHA1Digest, SHA256Digest} import org.spongycastle.crypto.engines.AESEngine import org.spongycastle.crypto.generators.ECKeyPairGenerator import org.spongycastle.crypto.macs.HMac import org.spongycastle.crypto.modes.SICBlockCipher import org.spongycastle.crypto.params._ import org.spongycastle.crypto.parsers.ECIESPublicKeyParser import org.spongycastle.crypto.{BufferedBlockCipher, InvalidCipherTextException} import org.spongycastle.math.ec.ECPoint object ECIESCoder { val KeySize = 128 val PublicKeyOverheadSize = 65 val MacOverheadSize = 32 val OverheadSize = PublicKeyOverheadSize + KeySize / 8 + MacOverheadSize @throws[IOException] @throws[InvalidCipherTextException] def decrypt(privKey: BigInteger, cipher: Array[Byte], macData: Option[Array[Byte]] = None): Array[Byte] = { val is = new ByteArrayInputStream(cipher) val ephemBytes = new Array[Byte](2 * ((curve.getCurve.getFieldSize + 7) / 8) + 1) is.read(ephemBytes) val ephem = curve.getCurve.decodePoint(ephemBytes) val IV = new Array[Byte](KeySize / 8) is.read(IV) val cipherBody = new Array[Byte](is.available) is.read(cipherBody) decrypt(ephem, privKey, Some(IV), cipherBody, macData) } @throws[InvalidCipherTextException] def decrypt(ephem: ECPoint, prv: BigInteger, IV: Option[Array[Byte]], cipher: Array[Byte], macData: Option[Array[Byte]]): Array[Byte] = { val aesEngine = new AESEngine val iesEngine = new EthereumIESEngine( kdf = Left(new ConcatKDFBytesGenerator(new SHA256Digest)), mac = new HMac(new SHA256Digest), hash = new SHA256Digest, cipher = Some(new BufferedBlockCipher(new SICBlockCipher(aesEngine))), IV = IV, prvSrc = Left(new ECPrivateKeyParameters(prv, curve)), pubSrc = Left(new ECPublicKeyParameters(ephem, curve))) iesEngine.processBlock(cipher, 0, cipher.length, forEncryption = false, macData) } @throws[IOException] @throws[InvalidCipherTextException] def encryptSimple(pub: ECPoint, secureRandom: SecureRandom, plaintext: Array[Byte]): Array[Byte] = { val eGen = new ECKeyPairGenerator val gParam = new ECKeyGenerationParameters(curve, secureRandom) eGen.init(gParam) val iesEngine = new EthereumIESEngine( kdf = Right(new MGF1BytesGeneratorExt(new SHA1Digest)), mac = new HMac(new SHA1Digest), hash = new SHA1Digest, cipher = None, IV = Some(new Array[Byte](0)), prvSrc = Right(eGen), pubSrc = Left(new ECPublicKeyParameters(pub, curve)), hashMacKey = false) iesEngine.processBlock(plaintext, 0, plaintext.length, forEncryption = true) } private def makeIESEngine(pub: ECPoint, prv: BigInteger, IV: Option[Array[Byte]]) = { val aesEngine = new AESEngine val iesEngine = new EthereumIESEngine( kdf = Left(new ConcatKDFBytesGenerator(new SHA256Digest)), mac = new HMac(new SHA256Digest), hash = new SHA256Digest, cipher = Some(new BufferedBlockCipher(new SICBlockCipher(aesEngine))), IV = IV, prvSrc = Left(new ECPrivateKeyParameters(prv, curve)), pubSrc = Left(new ECPublicKeyParameters(pub, curve))) iesEngine } }
Example 25
Source File: Main.scala From scala-json-rpc with MIT License | 5 votes |
package io.github.shogowada.scala.jsonrpc.example.e2e.websocket import java.io.IOException import io.github.shogowada.scala.jsonrpc.JSONRPCServerAndClient import io.github.shogowada.scala.jsonrpc.Types.JSONSender import io.github.shogowada.scala.jsonrpc.client.JSONRPCClient import io.github.shogowada.scala.jsonrpc.serializers.UpickleJSONSerializer import io.github.shogowada.scala.jsonrpc.server.JSONRPCServer import io.github.shogowada.scalajs.reactjs.ReactDOM import io.github.shogowada.scalajs.reactjs.VirtualDOM._ import org.scalajs.dom import org.scalajs.dom.WebSocket import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.{Future, Promise} import scala.scalajs.js.JSApp import scala.util.{Failure, Try} object Main extends JSApp { override def main(): Unit = { val futureWebSocket = createFutureWebSocket() val serverAndClient = createServerAndClient(futureWebSocket) val mountNode = dom.document.getElementById("mount-node") ReactDOM.render( <((new TodoListView(serverAndClient.createAPI[TodoRepositoryAPI])) ()).empty, mountNode ) } private def createFutureWebSocket(): Future[WebSocket] = { val promisedWebSocket: Promise[WebSocket] = Promise() val webSocket = new dom.WebSocket(webSocketUrl) webSocket.onopen = (_: dom.Event) => { promisedWebSocket.success(webSocket) } webSocket.onerror = (event: dom.Event) => { promisedWebSocket.failure(new IOException(event.toString)) } promisedWebSocket.future } private def webSocketUrl: String = { val location = dom.window.location val protocol = location.protocol match { case "http:" => "ws:" case "https:" => "wss:" } s"$protocol//${location.host}/jsonrpc" } private def createServerAndClient(futureWebSocket: Future[WebSocket]): JSONRPCServerAndClient[UpickleJSONSerializer] = { val jsonSerializer = UpickleJSONSerializer() val server = JSONRPCServer(jsonSerializer) val jsonSender: JSONSender = (json: String) => { futureWebSocket .map(webSocket => Try(webSocket.send(json))) .flatMap(tried => tried.fold( throwable => Future.failed(throwable), _ => Future(None) )) } val client = JSONRPCClient(jsonSerializer, jsonSender) val serverAndClient = JSONRPCServerAndClient(server, client) futureWebSocket.foreach(webSocket => { webSocket.onmessage = (event: dom.MessageEvent) => { val message = event.data.toString serverAndClient.receiveAndSend(message).onComplete { case Failure(throwable) => { println("Failed to send response", throwable) } case _ => } } }) serverAndClient } }
Example 26
Source File: NetworkDeviceManager.scala From slide-desktop with GNU General Public License v2.0 | 5 votes |
package connections.network import java.io.IOException import java.net.SocketException import connections.{BaseDeviceManager, ConnectionManager} import slide.SystemInfo import enums.ConnectionMode class NetworkDeviceManager extends BaseDeviceManager { private var ndc: NetworkDeviceConnection = null private var backgroundScannerRunning: Boolean = true @throws(classOf[IOException]) override def connect(ip: String): Unit = { ndc = new NetworkDeviceConnection(ip) { override def onClientOutOfDate(): Unit = { throwError("The client is out of date. Please upgrade it.") } } ndc.connect() } override def throwError(message: String): Unit = {} override def startBackgroundScanner(): Unit = { val t: Thread = new Thread(new Runnable { override def run(): Unit = { var dcCount: Int = 0 var udpDiscovery: BroadcastManager = null try { udpDiscovery = new BroadcastManager } catch { case e: SocketException => throwError("Another instance of Slide is already running.") System.exit(1) } while (backgroundScannerRunning) { if (!SystemInfo.isNetworkIsAvailable) { stopBackgroundScanner() } device = udpDiscovery.search if (device != null) { dcCount = 0 if (!ConnectionManager.hasConnection(ConnectionMode.WIFI)) { onWifiConnectionAdded() } } else { dcCount += 1 if (dcCount >= 4) { if (ConnectionManager.hasConnection(ConnectionMode.WIFI)) { onWifiConnectionRemoved() } } } } } }) t.start() } override def stopBackgroundScanner(): Unit = backgroundScannerRunning = false def ip: String = device.ip }
Example 27
Source File: NetworkDeviceConnection.scala From slide-desktop with GNU General Public License v2.0 | 5 votes |
package connections.network import java.io.{IOException, ObjectInputStream} import java.net.InetSocketAddress import connections.BaseDeviceConnection import slide.Const class NetworkDeviceConnection(val ip: String) extends BaseDeviceConnection { private val inetAddress: InetSocketAddress = new InetSocketAddress(ip, Const.NET_PORT) super.socket.connect(inetAddress, 4000) private val input = new ObjectInputStream(socket.getInputStream) @throws[IOException] override def connect(): Boolean = { this.start() } @throws[IOException] @throws[ClassNotFoundException] override def nextMessage(): Array[Short] = { try { input.readObject.asInstanceOf[Array[Short]] } catch { case e: ClassNotFoundException => new Array[Short](1) } } override def close(): Unit = { stopRunning() input.close() socket.close() } }
Example 28
Source File: Adb.scala From slide-desktop with GNU General Public License v2.0 | 5 votes |
package connections.usb import java.io.{File, IOException} import com.android.ddmlib.AndroidDebugBridge.IDeviceChangeListener import com.android.ddmlib.{AndroidDebugBridge, IDevice} import slide.{Const, FileManager, SystemInfo} import enums.OperatingSystem object Adb { var usbAvailable: Boolean = false var isAdbInstalled: Boolean = false var adbFilePath: String = "" def startAdb(): Process = { executeAdbProcess(new ProcessBuilder(adbFilePath, "forward", "tcp:" + Const.USB_PORT, "tcp:" + Const.USB_PORT)) } def adbDevices(): Process = { executeAdbProcess(new ProcessBuilder(adbFilePath, "devices")) } def restartAdb(): Process = { executeAdbProcess(new ProcessBuilder(adbFilePath, "kill-server")) executeAdbProcess(new ProcessBuilder(adbFilePath, "start-server")) } }
Example 29
Source File: UsbDeviceManager.scala From slide-desktop with GNU General Public License v2.0 | 5 votes |
package connections.usb import java.io.IOException import connections.{BaseDeviceManager, ConnectionManager} import slide.Device import enums.ConnectionMode import gui.ImageIcons class UsbDeviceManager extends BaseDeviceManager { private var udc: UsbDeviceConnection = null private var backgroundScannerRunning: Boolean = true @throws(classOf[IOException]) def connect(ip: String): Unit = { udc = new UsbDeviceConnection(ip) { override def onClientOutOfDate(): Unit = { throwError("The client is out of date. Please upgrade it.") } } udc.connect() } def startBackgroundScanner(): Unit = { val t: Thread = new Thread(new Runnable { def run() { device = new Device(ImageIcons.usbIcon, Array[String]("USB", "USB", "USB")) var dcCount: Int = 0 while (backgroundScannerRunning) { Thread.sleep(1000) if (Adb.usbAvailable) { dcCount = 0 if (!ConnectionManager.hasConnection(ConnectionMode.USB)) { onUsbConnectionAdded() } } else { dcCount += 1 if (dcCount >= 2 && device != null && ConnectionManager.hasConnection(ConnectionMode.USB)) { onUsbConnectionRemoved() } } } } }) if (!Adb.isAdbAvailable) { throwError("Adb not found") } else { t.start() } } override def throwError(message: String): Unit = {} def stopBackgroundScanner(): Unit = backgroundScannerRunning = false }
Example 30
Source File: CustomJson.scala From matcher with MIT License | 5 votes |
package com.wavesplatform.dex.api.http.json import java.io.IOException import akka.http.scaladsl.model.MediaType import akka.http.scaladsl.model.MediaTypes.`application/json` import com.fasterxml.jackson.core.io.SegmentedStringWriter import com.fasterxml.jackson.core.util.BufferRecyclers import com.fasterxml.jackson.core.{JsonGenerator, JsonProcessingException} import com.fasterxml.jackson.databind.module.SimpleModule import com.fasterxml.jackson.databind.{JsonMappingException, JsonSerializer, ObjectMapper, SerializerProvider} import play.api.libs.json._ object NumberAsStringSerializer extends JsonSerializer[JsValue] { private val fieldNamesToTranslate = Set( "amount", "available", "balance", "buyMatcherFee", "currentReward", "desiredReward", "effective", "fee", "feeAmount", "generating", "in", "matcherFee", "minIncrement", "minSponsoredAssetFee", "out", "price", "quantity", "regular", "reward", "sellMatcherFee", "sponsorBalance", "totalAmount", "totalFee", "totalWavesAmount", "value" ) override def serialize(value: JsValue, json: JsonGenerator, provider: SerializerProvider): Unit = { value match { case JsNumber(v) => json.writeNumber(v.bigDecimal) case JsString(v) => json.writeString(v) case JsBoolean(v) => json.writeBoolean(v) case JsArray(elements) => json.writeStartArray() elements.foreach { t => serialize(t, json, provider) } json.writeEndArray() case JsObject(values) => json.writeStartObject() values.foreach { case (name, JsNumber(v)) if fieldNamesToTranslate(name) => json.writeStringField(name, v.bigDecimal.toPlainString) case (name, jsv) => json.writeFieldName(name) serialize(jsv, json, provider) } json.writeEndObject() case JsNull => json.writeNull() } } } object CustomJson { val jsonWithNumbersAsStrings: MediaType.WithFixedCharset = `application/json`.withParams(Map("large-significand-format" -> "string")) private lazy val mapper = (new ObjectMapper) .registerModule(new SimpleModule("WavesJson").addSerializer(classOf[JsValue], NumberAsStringSerializer)) .configure(JsonGenerator.Feature.WRITE_BIGDECIMAL_AS_PLAIN, true) def writeValueAsString(value: JsValue): String = { val sw = new SegmentedStringWriter(BufferRecyclers.getBufferRecycler) try mapper.writeValue(sw, value) catch { case e: JsonProcessingException => throw e case e: IOException => // shouldn't really happen, but is declared as possibility so: throw JsonMappingException.fromUnexpectedIOE(e) } sw.getAndClear } }
Example 31
Source File: TestHelpers.scala From matcher with MIT License | 5 votes |
package com.wavesplatform.dex.util import java.io.IOException import java.nio.file.attribute.BasicFileAttributes import java.nio.file.{FileVisitResult, Files, Path, SimpleFileVisitor} object TestHelpers { def deleteRecursively(path: Path): Unit = Files.walkFileTree( path, new SimpleFileVisitor[Path] { override def postVisitDirectory(dir: Path, exc: IOException): FileVisitResult = { Option(exc).fold { Files.delete(dir) FileVisitResult.CONTINUE }(throw _) } override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = { Files.delete(file) FileVisitResult.CONTINUE } } ) }
Example 32
Source File: IOUtil.scala From jardiff with Apache License 2.0 | 5 votes |
package scala.tools.jardiff import java.io.IOException import java.net.URI import java.nio.file._ import java.nio.file.attribute.BasicFileAttributes import java.util object IOUtil { def rootPath(fileOrZip: Path): Path = { if (fileOrZip.getFileName.toString.endsWith(".jar")) { val uri = URI.create(s"jar:${fileOrZip.toUri}") newFileSystem(uri, new util.HashMap[String, Any]()).getPath("/") } else { val extSlash = ".jar/" val index = fileOrZip.toString.indexOf(extSlash) if (index == -1) { fileOrZip } else { val uri = URI.create("jar:" + Paths.get(fileOrZip.toString.substring(0, index + extSlash.length - 1)).toUri.toString) val jarEntry = fileOrZip.toString.substring(index + extSlash.length - 1) val system = newFileSystem(uri, new util.HashMap[String, Any]()) system.getPath(jarEntry) } } } private def newFileSystem(uri: URI, map: java.util.Map[String, Any]) = try FileSystems.newFileSystem(uri, map) catch { case _: FileSystemAlreadyExistsException => FileSystems.getFileSystem(uri) } def mapRecursive(source: java.nio.file.Path, target: java.nio.file.Path)(f: (Path, Path) => Unit) = { Files.walkFileTree(source, util.EnumSet.of(FileVisitOption.FOLLOW_LINKS), Integer.MAX_VALUE, new FileVisitor[Path] { def preVisitDirectory(dir: Path, sourceBasic: BasicFileAttributes): FileVisitResult = { val relative = source.relativize(dir).toString if (!Files.exists(target.resolve(relative))) Files.createDirectory(target.resolve(relative)) FileVisitResult.CONTINUE } def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = { val relative = source.relativize(file).toString f(file, target.resolve(relative)) FileVisitResult.CONTINUE } def visitFileFailed(file: Path, e: IOException) = throw e def postVisitDirectory(dir: Path, e: IOException): FileVisitResult = { if (e != null) throw e FileVisitResult.CONTINUE } }) } def deleteRecursive(p: Path): Unit = { import java.io.IOException import java.nio.file.attribute.BasicFileAttributes import java.nio.file.{FileVisitResult, Files, SimpleFileVisitor} Files.walkFileTree(p, new SimpleFileVisitor[Path]() { override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = { Files.delete(file) FileVisitResult.CONTINUE } override def preVisitDirectory(dir: Path, attrs: BasicFileAttributes): FileVisitResult = { if (dir.getFileName.toString == ".git") FileVisitResult.SKIP_SUBTREE else super.preVisitDirectory(dir, attrs) } override def postVisitDirectory(dir: Path, exc: IOException): FileVisitResult = { val listing = Files.list(dir) try { if (!listing.iterator().hasNext) Files.delete(dir) } finally { listing.close() } FileVisitResult.CONTINUE } }) } }
Example 33
Source File: ServiceSpec.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
import java.io.{File, FileNotFoundException, IOException} import java.net.ServerSocket import java.util.Base64 import it.gov.daf.entitymanager.Entity import it.gov.daf.entitymanager.client.Entity_managerClient import org.specs2.mutable.Specification import org.specs2.specification.BeforeAfterAll import play.api.Application import play.api.inject.guice.GuiceApplicationBuilder import play.api.libs.ws.ahc.AhcWSClient import play.api.test.WithServer import scala.concurrent.Await import scala.concurrent.duration.Duration import scala.util.{Failure, Random, Try} @SuppressWarnings( Array( "org.wartremover.warts.NonUnitStatements", "org.wartremover.warts.Throw", "org.wartremover.warts.Var" ) ) class ServiceSpec extends Specification with BeforeAfterAll { def getAvailablePort: Int = { try { val socket = new ServerSocket(0) try { socket.getLocalPort } finally { socket.close() } } catch { case e: IOException => throw new IllegalStateException(s"Cannot find available port: ${e.getMessage}", e) } } private def constructTempDir(dirPrefix: String): Try[File] = Try { val rndrange = 10000000 val file = new File(System.getProperty("java.io.tmpdir"), s"$dirPrefix${Random.nextInt(rndrange)}") if (!file.mkdirs()) throw new RuntimeException("could not create temp directory: " + file.getAbsolutePath) file.deleteOnExit() file } private def deleteDirectory(path: File): Boolean = { if (!path.exists()) { throw new FileNotFoundException(path.getAbsolutePath) } var ret = true if (path.isDirectory) path.listFiles().foreach(f => ret = ret && deleteDirectory(f)) ret && path.delete() } var tmpDir: Try[File] = Failure[File](new Exception("")) def application: Application = GuiceApplicationBuilder(). configure("pac4j.authenticator" -> "test"). configure("janusgraph.storage.directory" -> s"${tmpDir.map(_.getCanonicalPath).getOrElse("db")}/berkeleyje"). configure("janusgraph.index.search.directory" -> s"${tmpDir.map(_.getCanonicalPath).getOrElse("db")}/lucene"). build() "The entity_manager" should { "create an entity and retrieve it correctly" in new WithServer(app = application, port = getAvailablePort) { val ws: AhcWSClient = AhcWSClient() val plainCreds = "david:david" val plainCredsBytes = plainCreds.getBytes val base64CredsBytes = Base64.getEncoder.encode(plainCredsBytes) val base64Creds = new String(base64CredsBytes) val client = new Entity_managerClient(ws)(s"http://localhost:$port") val result = Await.result(client.createEntity(s"Basic $base64Creds", Entity("DAVID")), Duration.Inf) val entity = Await.result(client.getEntity(s"Basic $base64Creds", "DAVID"), Duration.Inf) entity must beEqualTo(Entity("DAVID")) } } override def beforeAll(): Unit = tmpDir = constructTempDir("test") override def afterAll(): Unit = tmpDir.foreach(deleteDirectory(_)) }
Example 34
Source File: CatalogControllersSpec.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
import java.io.IOException import java.net.ServerSocket import akka.actor.ActorSystem import akka.stream.ActorMaterializer import catalog_manager.yaml.MetaCatalog import org.specs2.mutable.Specification import play.api.Application import play.api.http.Status import play.api.routing.Router import play.api.inject.guice.GuiceApplicationBuilder import play.api.libs.json.{JsArray, JsValue, Json} import play.api.libs.ws.WSResponse import play.api.libs.ws.ahc.AhcWSClient import play.api.test._ import it.gov.daf.catalogmanager import it.gov.daf.catalogmanager.client.Catalog_managerClient import scala.concurrent.duration.Duration import scala.concurrent.{Await, Future} class CatalogControllersSpec extends Specification { def application: Application = GuiceApplicationBuilder().build() import catalog_manager.yaml.BodyReads.MetaCatalogReads "The catalog-manager" should { "Call catalog-manager/v1/dataset-catalogs return ok status" in new WithServer(app = application, port = 9000) { WsTestClient.withClient { implicit client => val response: WSResponse = Await.result[WSResponse](client. url(s"http://localhost:9001/catalog-manager/v1/dataset-catalogs"). execute, Duration.Inf) println(response.status) response.status must be equalTo Status.OK } } "Call catalog-manager/v1/dataset-catalogs return a non empty list if" + "you have error maybe is necessaty to add data to db" in new WithServer(app = application, port = 9000) { WsTestClient.withClient { implicit client => val response: WSResponse = Await.result[WSResponse](client. url(s"http://localhost:9001/catalog-manager/v1/dataset-catalogs"). execute, Duration.Inf) println(response.status) println("ALE") println(response.body) val json: JsValue = Json.parse(response.body) json.as[JsArray].value.size must be greaterThan (0) } } "The catalog-manager" should { "Call catalog-manager/v1/dataset-catalogs/{logical_uri} return ok status" in new WithServer(app = application, port = 9000) { val logicalUri = "daf://dataset/std/standard/standard/uri_cultura/standard" val url = s"http://localhost:9001/catalog-manager/v1/dataset-catalogs/$logicalUri" println(url) WsTestClient.withClient { implicit client => val response: WSResponse = Await.result[WSResponse](client. url(url). execute, Duration.Inf) println(response.status) response.status must be equalTo Status.OK } } } "The catalog-manager" should { "Call catalog-manager/v1/dataset-catalogs/{anything} return 401" in new WithServer(app = application, port = 9000) { val logicalUri = "anything" val url = s"http://localhost:9001/catalog-manager/v1/dataset-catalogs/$logicalUri" println(url) WsTestClient.withClient { implicit client => val response: WSResponse = Await.result[WSResponse](client. url(url). execute, Duration.Inf) println(response.status) response.status must be equalTo 401 } } } } }
Example 35
Source File: TokenizerWrapper.scala From dbpedia-spotlight-model with Apache License 2.0 | 5 votes |
package org.dbpedia.spotlight.db.concurrent import java.io.IOException import java.util.concurrent.TimeUnit import akka.actor.SupervisorStrategy.Restart import akka.actor.{Actor, ActorSystem, OneForOneStrategy, Props} import akka.pattern.ask import akka.routing.SmallestMailboxRouter import akka.util import org.apache.commons.lang.NotImplementedException import org.dbpedia.spotlight.db.model.{StringTokenizer, TextTokenizer} import org.dbpedia.spotlight.model.{Text, Token} import scala.concurrent.Await class TokenizerWrapper(val tokenizers: Seq[TextTokenizer]) extends TextTokenizer { var requestTimeout = 60 val system = ActorSystem() val workers = tokenizers.map { case tokenizer: TextTokenizer => system.actorOf(Props(new TokenizerActor(tokenizer))) }.seq def size: Int = tokenizers.size val router = system.actorOf(Props[TokenizerActor].withRouter( // This might be a hack SmallestMailboxRouter(scala.collection.immutable.Iterable(workers:_*)).withSupervisorStrategy( OneForOneStrategy(maxNrOfRetries = 10) { case _: IOException => Restart }) ) ) implicit val timeout = util.Timeout(requestTimeout, TimeUnit.SECONDS) override def tokenizeMaybe(text: Text) { val futureResult = router ? TokenizerRequest(text) Await.result(futureResult, timeout.duration) } override def tokenize(text: Text): List[Token] = { tokenizeMaybe(text) text.featureValue[List[Token]]("tokens").get } def tokenizeRaw(text: String): Seq[String] = { throw new NotImplementedException() } def close() { system.shutdown() } def getStringTokenizer: StringTokenizer = tokenizers.head.getStringTokenizer } class TokenizerActor(val tokenizer: TextTokenizer) extends Actor { def receive = { case TokenizerRequest(text) => { try { sender ! tokenizer.tokenizeMaybe(text) } catch { case e: NullPointerException => throw new IOException("Could not tokenize.") } } } } case class TokenizerRequest(text: Text)
Example 36
Source File: Clause_6_exception.scala From HadoopLearning with MIT License | 5 votes |
package com.c503.scala import java.io.{FileNotFoundException, FileReader, IOException} object Clause_6_exception { def main(args: Array[String]): Unit = { try { val f = new FileReader("input.txt") println(f.getClass.getName) } catch { case ex: FileNotFoundException => { println("Missing file exception") } case bx: IOException => { println("IO Exception") } } finally { println("Exiting finally...") } } }
Example 37
Source File: StorageScriptFsWriter.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.storage.script.writer import java.io.{ByteArrayInputStream, IOException, InputStream, OutputStream} import java.util import com.webank.wedatasphere.linkis.common.io.{FsPath, MetaData, Record} import com.webank.wedatasphere.linkis.storage.LineRecord import com.webank.wedatasphere.linkis.storage.script.{Compaction, ScriptFsWriter, ScriptMetaData} import com.webank.wedatasphere.linkis.storage.utils.{StorageConfiguration, StorageUtils} import org.apache.commons.io.IOUtils class StorageScriptFsWriter(val path: FsPath, val charset: String, outputStream: OutputStream = null) extends ScriptFsWriter { private val stringBuilder = new StringBuilder @scala.throws[IOException] override def addMetaData(metaData: MetaData): Unit = { val compactions = Compaction.listCompactions().filter(p => p.belongTo(StorageUtils.pathToSuffix(path.getPath))) val metadataLine = new util.ArrayList[String]() if (compactions.length > 0) { metaData.asInstanceOf[ScriptMetaData].getMetaData.map(compactions(0).compact).foreach(metadataLine.add) if (outputStream != null) { IOUtils.writeLines(metadataLine, "\n", outputStream, charset) } else { import scala.collection.JavaConversions._ metadataLine.foreach(m => stringBuilder.append(s"$m\n")) } } } @scala.throws[IOException] override def addRecord(record: Record): Unit = { //转成LineRecord而不是TableRecord是为了兼容非Table类型的结果集写到本类中 val scriptRecord = record.asInstanceOf[LineRecord] if (outputStream != null) { IOUtils.write(scriptRecord.getLine, outputStream, charset) } else { stringBuilder.append(scriptRecord.getLine) } } override def close(): Unit = { IOUtils.closeQuietly(outputStream) } override def flush(): Unit = if (outputStream != null) outputStream.flush() def getInputStream(): InputStream = { new ByteArrayInputStream(stringBuilder.toString().getBytes(StorageConfiguration.STORAGE_RS_FILE_TYPE.getValue)) } }
Example 38
Source File: StorageResultSetReader.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.storage.resultset import java.io.{ByteArrayInputStream, IOException, InputStream} import com.webank.wedatasphere.linkis.common.io.resultset.{ResultSet, ResultSetReader} import com.webank.wedatasphere.linkis.common.io.{MetaData, Record} import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.storage.domain.Dolphin import com.webank.wedatasphere.linkis.storage.exception.StorageWarnException import com.webank.wedatasphere.linkis.storage.utils.StorageUtils import scala.collection.mutable.ArrayBuffer def readLine(): Array[Byte] = { var rowLen = 0 try rowLen = Dolphin.readInt(inputStream) catch { case t:StorageWarnException => info(s"Read finished(读取完毕)") ; return null case t: Throwable => throw t } val rowBuffer = ArrayBuffer[Byte]() var len = 0 //Read the entire line, except for the data of the line length(读取整行,除了行长的数据) while (rowLen > 0 && len >= 0) { if (rowLen > READ_CACHE) len = StorageUtils.readBytes(inputStream,bytes, READ_CACHE) else len = StorageUtils.readBytes(inputStream,bytes, rowLen) if (len > 0) { rowLen -= len rowBuffer ++= bytes.slice(0, len) } } rowCount = rowCount + 1 rowBuffer.toArray } @scala.throws[IOException] override def getRecord: Record = { if (metaData == null) throw new IOException("Must read metadata first(必须先读取metadata)") if (row == null) throw new IOException("Can't get the value of the field, maybe the IO stream has been read or has been closed!(拿不到字段的值,也许IO流已读取完毕或已被关闭!)") row } @scala.throws[IOException] override def getMetaData: MetaData = { if(metaData == null) init() metaData = deserializer.createMetaData(readLine()) metaData } @scala.throws[IOException] override def skip(recordNum: Int): Int = { if(recordNum < 0 ) return -1 if(metaData == null) getMetaData for(i <- recordNum until (0, -1)){ try inputStream.skip(Dolphin.readInt(inputStream)) catch { case t: Throwable => return -1} } recordNum } @scala.throws[IOException] override def getPosition: Long = rowCount @scala.throws[IOException] override def hasNext: Boolean = { if(metaData == null) getMetaData val line = readLine() if(line == null) return false row = deserializer.createRecord(line) if(row == null) return false true } @scala.throws[IOException] override def available: Long = inputStream.available() override def close(): Unit = inputStream.close() }
Example 39
Source File: FileSystemUtils.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.storage.utils import java.io.IOException import java.util import com.webank.wedatasphere.linkis.common.io.FsPath import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils} import com.webank.wedatasphere.linkis.storage.FSFactory import com.webank.wedatasphere.linkis.storage.fs.FileSystem import com.webank.wedatasphere.linkis.storage.fs.impl.LocalFileSystem @throws[IOException] def mkdirs(fileSystem: FileSystem,dest: FsPath, user: String): Boolean = { var parentPath = dest.getParent val dirsToMake = new util.Stack[FsPath]() dirsToMake.push(dest) while (!fileSystem.exists(parentPath)){ dirsToMake.push(parentPath) parentPath = parentPath.getParent } if(! fileSystem.canExecute(parentPath)){ throw new IOException("You have not permission to access path " + dest.getPath) } while (!dirsToMake.empty()){ val path = dirsToMake.pop() fileSystem.mkdir(path) fileSystem match { case l:LocalFileSystem => fileSystem.setOwner(path,user) case _ => info(s"doesn't need to call setOwner") } //fileSystem.setOwner(path,user,StorageConfiguration.STORAGE_HDFS_GROUP.getValue) } true } }
Example 40
Source File: Dolphin.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.storage.domain import java.io.{IOException, InputStream} import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.storage.exception.StorageWarnException import com.webank.wedatasphere.linkis.storage.utils.{StorageConfiguration, StorageUtils} def getIntBytes(value: Int): Array[Byte] = { val str = value.toString val res = "0" * (INT_LEN - str.length) + str Dolphin.getBytes(res) } def getType(inputStream:InputStream):String = { val bytes = new Array[Byte](100) val len = StorageUtils.readBytes(inputStream,bytes, Dolphin.MAGIC_LEN + INT_LEN) if(len == -1) return null getType(Dolphin.getString(bytes, 0, len)) } def getType(content: String): String = { if(content.length < MAGIC.length || content.substring(0, MAGIC.length) != MAGIC) throw new IOException(s"File header type must be dolphin,content:$content is not") content.substring(MAGIC.length, MAGIC.length + INT_LEN ).toInt.toString } }
Example 41
Source File: HBasePartitioner.scala From Backup-Repo with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hbase import java.io.{IOException, ObjectInputStream, ObjectOutputStream} import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.serializer.JavaSerializer import org.apache.spark.util.{CollectionsUtils, Utils} import org.apache.spark.{Partitioner, SparkEnv} object HBasePartitioner { implicit object HBaseRawOrdering extends Ordering[HBaseRawType] { def compare(a: HBaseRawType, b: HBaseRawType) = Bytes.compareTo(a, b) } } class HBasePartitioner (var splitKeys: Array[HBaseRawType]) extends Partitioner { import HBasePartitioner.HBaseRawOrdering type t = HBaseRawType lazy private val len = splitKeys.length // For pre-split table splitKeys(0) = bytes[0], to remove it, // otherwise partition 0 always be empty and // we will miss the last region's date when bulk load lazy private val realSplitKeys = if (splitKeys.isEmpty) splitKeys else splitKeys.tail def numPartitions = if (len == 0) 1 else len @transient private val binarySearch: ((Array[t], t) => Int) = CollectionsUtils.makeBinarySearch[t] def getPartition(key: Any): Int = { val k = key.asInstanceOf[t] var partition = 0 if (len <= 128 && len > 0) { // If we have less than 128 partitions naive search val ordering = implicitly[Ordering[t]] while (partition < realSplitKeys.length && ordering.gt(k, realSplitKeys(partition))) { partition += 1 } } else { // Determine which binary search method to use only once. partition = binarySearch(realSplitKeys, k) // binarySearch either returns the match location or -[insertion point]-1 if (partition < 0) { partition = -partition - 1 } if (partition > realSplitKeys.length) { partition = realSplitKeys.length } } partition } override def equals(other: Any): Boolean = other match { case r: HBasePartitioner => r.splitKeys.sameElements(splitKeys) case _ => false } override def hashCode(): Int = { val prime = 31 var result = 1 var i = 0 while (i < splitKeys.length) { result = prime * result + splitKeys(i).hashCode i += 1 } result = prime * result result } }
Example 42
Source File: WriSer.scala From flint with Apache License 2.0 | 5 votes |
package com.twosigma.flint.hadoop import java.io.{ DataInputStream, DataOutputStream, ObjectInputStream, ObjectOutputStream } import java.io.IOException import scala.reflect.{ classTag, ClassTag } import org.apache.hadoop.io.Writable // Note: we could make this implement InputSplit, but we do not because many input splits do a // cast to their specific InputSplit, so we do not want to risk it. Further, this currently works // for any Writable. case class WriSer[T <: Writable: ClassTag](@transient var get: T) extends Serializable { def this() = this(null.asInstanceOf[T]) @throws(classOf[IOException]) private def writeObject(out: ObjectOutputStream) { out.writeObject(classTag[T]) get.write(new DataOutputStream(out)) } @throws(classOf[IOException]) @throws(classOf[ClassNotFoundException]) private def readObject(in: ObjectInputStream) { get = in.readObject.asInstanceOf[ClassTag[T]].runtimeClass.newInstance.asInstanceOf[T] get.readFields(new DataInputStream(in)) } }
Example 43
Source File: IngestionFlow.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.ingest.services import java.io.IOException import cats.MonadError import cats.implicits._ import com.pluralsight.hydra.avro.JsonToAvroConversionException import hydra.avro.registry.SchemaRegistry import hydra.avro.resource.SchemaResourceLoader.SchemaNotFoundException import hydra.avro.util.SchemaWrapper import hydra.core.ingest.HydraRequest import hydra.core.ingest.RequestParams.{HYDRA_KAFKA_TOPIC_PARAM, HYDRA_RECORD_KEY_PARAM} import hydra.core.transport.{AckStrategy, ValidationStrategy} import hydra.kafka.algebras.KafkaClientAlgebra import hydra.kafka.producer.AvroRecord import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import scalacache._ import scalacache.guava._ import scalacache.memoization._ import scala.concurrent.duration._ import scala.util.{Failure, Success, Try} final class IngestionFlow[F[_]: MonadError[*[_], Throwable]: Mode]( schemaRegistry: SchemaRegistry[F], kafkaClient: KafkaClientAlgebra[F], schemaRegistryBaseUrl: String ) { import IngestionFlow._ implicit val guavaCache: Cache[SchemaWrapper] = GuavaCache[SchemaWrapper] private def getValueSchema(topicName: String): F[Schema] = { schemaRegistry.getLatestSchemaBySubject(topicName + "-value") .flatMap { maybeSchema => val schemaNotFound = SchemaNotFoundException(topicName) MonadError[F, Throwable].fromOption(maybeSchema, SchemaNotFoundAugmentedException(schemaNotFound, topicName)) } } private def getValueSchemaWrapper(topicName: String): F[SchemaWrapper] = memoizeF[F, SchemaWrapper](Some(2.minutes)) { getValueSchema(topicName).map { valueSchema => SchemaWrapper.from(valueSchema) } } def ingest(request: HydraRequest): F[Unit] = { request.metadataValue(HYDRA_KAFKA_TOPIC_PARAM) match { case Some(topic) => getValueSchemaWrapper(topic).flatMap { schemaWrapper => val useStrictValidation = request.validationStrategy == ValidationStrategy.Strict val payloadTryMaybe: Try[Option[GenericRecord]] = Option(request.payload) match { case Some(p) => convertToAvro(topic, schemaWrapper, useStrictValidation, p).map(avroRecord => Some(avroRecord.payload)) case None => Success(None) } val v1Key = getV1RecordKey(schemaWrapper, payloadTryMaybe, request) MonadError[F, Throwable].fromTry(payloadTryMaybe).flatMap { payloadMaybe => kafkaClient.publishStringKeyMessage((v1Key, payloadMaybe), topic).void } } case None => MonadError[F, Throwable].raiseError(MissingTopicNameException(request)) } } private def getV1RecordKey(schemaWrapper: SchemaWrapper, payloadTryMaybe: Try[Option[GenericRecord]], request: HydraRequest): Option[String] = { val headerV1Key = request.metadata.get(HYDRA_RECORD_KEY_PARAM) val optionString = schemaWrapper.primaryKeys.toList match { case Nil => None case l => l.flatMap(pkName => payloadTryMaybe match { case Success(payloadMaybe) => payloadMaybe.flatMap(p => Try(p.get(pkName)).toOption) case Failure(_) => None }).mkString("|").some } headerV1Key.orElse(optionString) } private def convertToAvro(topic: String, schemaWrapper: SchemaWrapper, useStrictValidation: Boolean, payloadString: String): Try[AvroRecord] = { Try(AvroRecord(topic, schemaWrapper.schema, None, payloadString, AckStrategy.Replicated, useStrictValidation)).recoverWith { case e: JsonToAvroConversionException => val location = s"$schemaRegistryBaseUrl/subjects/$topic-value/versions/latest/schema" Failure(new AvroConversionAugmentedException(s"${e.getClass.getName}: ${e.getMessage} [$location]")) case e: IOException => val location = s"$schemaRegistryBaseUrl/subjects/$topic-value/versions/latest/schema" Failure(new AvroConversionAugmentedException(s"${e.getMessage} [$location]")) case e => Failure(e) } } } object IngestionFlow { final case class MissingTopicNameException(request: HydraRequest) extends Exception(s"Missing the topic name in request with correlationId ${request.correlationId}") final case class AvroConversionAugmentedException(message: String) extends RuntimeException(message) final case class SchemaNotFoundAugmentedException(schemaNotFoundException: SchemaNotFoundException, topic: String) extends RuntimeException(s"Schema '$topic' cannot be loaded. Cause: ${schemaNotFoundException.getClass.getName}: Schema not found for $topic") }
Example 44
Source File: IngestionFlowV2.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.ingest.services import java.io.IOException import cats.MonadError import cats.implicits._ import hydra.avro.registry.SchemaRegistry import hydra.avro.resource.SchemaResourceLoader.SchemaNotFoundException import hydra.avro.util.SchemaWrapper import hydra.core.transport.ValidationStrategy import hydra.kafka.algebras.KafkaClientAlgebra import hydra.kafka.algebras.KafkaClientAlgebra.PublishResponse import hydra.kafka.model.TopicMetadataV2Request.Subject import org.apache.avro.Schema import org.apache.avro.generic.GenericRecord import scalacache._ import scalacache.guava._ import scalacache.memoization._ import scala.concurrent.duration._ import scala.util.{Failure, Try} final class IngestionFlowV2[F[_]: MonadError[*[_], Throwable]: Mode]( schemaRegistry: SchemaRegistry[F], kafkaClient: KafkaClientAlgebra[F], schemaRegistryBaseUrl: String) { import IngestionFlowV2._ import hydra.avro.convert.StringToGenericRecord._ implicit val guavaCache: Cache[SchemaWrapper] = GuavaCache[SchemaWrapper] private def getSchema(subject: String): F[Schema] = { schemaRegistry.getLatestSchemaBySubject(subject) .flatMap { maybeSchema => val schemaNotFound = SchemaNotFoundException(subject) MonadError[F, Throwable].fromOption(maybeSchema, SchemaNotFoundAugmentedException(schemaNotFound, subject)) } } private def getSchemaWrapper(subject: Subject, isKey: Boolean): F[SchemaWrapper] = memoizeF[F, SchemaWrapper](Some(2.minutes)) { val suffix = if (isKey) "-key" else "-value" getSchema(subject.value + suffix).map { sch => SchemaWrapper.from(sch) } } private def recover[A](subject: Subject, isKey: Boolean): PartialFunction[Throwable, Try[A]] = { val suffix = if (isKey) "-key" else "-value" val location = s"$schemaRegistryBaseUrl/subjects/${subject.value}$suffix/versions/latest/schema" val pf: PartialFunction[Throwable, Try[A]] = { case e: ValidationExtraFieldsError => Failure(AvroConversionAugmentedException(s"${e.getClass.getName}: ${e.getMessage} [$location]")) case e: InvalidLogicalTypeError => Failure(AvroConversionAugmentedException(s"${e.getClass.getName}: ${e.getMessage} [$location]")) case e: IOException => Failure(AvroConversionAugmentedException(s"${e.getClass.getName}: ${e.getMessage} [$location]")) case e => Failure(e) } pf } private def getSchemas(request: V2IngestRequest, topic: Subject): F[(GenericRecord, Option[GenericRecord])] = { val useStrictValidation = request.validationStrategy.getOrElse(ValidationStrategy.Strict) == ValidationStrategy.Strict def getRecord(payload: String, schema: Schema): Try[GenericRecord] = payload.toGenericRecord(schema, useStrictValidation) for { kSchema <- getSchemaWrapper(topic, isKey = true) vSchema <- getSchemaWrapper(topic, isKey = false) k <- MonadError[F, Throwable].fromTry( getRecord(request.keyPayload, kSchema.schema).recoverWith(recover(topic, isKey = true))) v <- MonadError[F, Throwable].fromTry( request.valPayload.traverse(getRecord(_, vSchema.schema)).recoverWith(recover(topic, isKey = false))) } yield (k, v) } def ingest(request: V2IngestRequest, topic: Subject): F[PublishResponse] = { getSchemas(request, topic).flatMap { case (key, value) => kafkaClient.publishMessage((key, value), topic.value).rethrow } } } object IngestionFlowV2 { final case class V2IngestRequest(keyPayload: String, valPayload: Option[String], validationStrategy: Option[ValidationStrategy]) final case class AvroConversionAugmentedException(message: String) extends RuntimeException(message) final case class SchemaNotFoundAugmentedException(schemaNotFoundException: SchemaNotFoundException, topic: String) extends RuntimeException(s"Schema '$topic' cannot be loaded. Cause: ${schemaNotFoundException.getClass.getName}: Schema not found for $topic") }
Example 45
Source File: RelativePathSupport.scala From exodus with MIT License | 5 votes |
package com.wix.bazel.migrator.analyze import java.io.IOException import java.nio.file.{Path, Paths} import com.fasterxml.jackson.core.{JsonGenerator, JsonParser, JsonToken} import com.fasterxml.jackson.databind._ import com.fasterxml.jackson.databind.module.SimpleModule class RelativePathSupportingModule extends SimpleModule { addDeserializer(classOf[Path], new RelativePathSupportingDeserializer) addSerializer(classOf[Path], new RelativePathSupportingSerializer) } class RelativePathSupportingSerializer extends JsonSerializer[Path] { @throws[IOException] def serialize(value: Path, gen: JsonGenerator, serializers: SerializerProvider): Unit = value match { case null => gen.writeNull() case _ => gen.writeString(value.toString) } } class RelativePathSupportingDeserializer extends JsonDeserializer[Path] { @throws[IOException] def deserialize(p: JsonParser, ctxt: DeserializationContext): Path = p.getCurrentToken match { case JsonToken.VALUE_NULL => null case JsonToken.VALUE_STRING => Paths.get(p.readValueAs(classOf[String])) case _ => throw ctxt.wrongTokenException(p, JsonToken.VALUE_STRING, "The value of a java.nio.file.Path must be a string") } }
Example 46
Source File: SourceModuleSupport.scala From exodus with MIT License | 5 votes |
package com.wix.bazel.migrator.analyze import java.io.IOException import com.fasterxml.jackson.core.{JsonGenerator, JsonParser, JsonToken} import com.fasterxml.jackson.databind._ import com.fasterxml.jackson.databind.module.SimpleModule import com.wix.bazel.migrator.model.SourceModule class SourceModuleSupportingModule(modules: Set[SourceModule]) extends SimpleModule { addDeserializer(classOf[SourceModule], new SourceModuleSupportingDeserializer(modules)) addSerializer(classOf[SourceModule], new SourceModuleSupportingSerializer) } class SourceModuleSupportingSerializer extends JsonSerializer[SourceModule] { @throws[IOException] def serialize(value: SourceModule, gen: JsonGenerator, serializers: SerializerProvider): Unit = value match { case null => gen.writeNull() case _ => gen.writeString(value.relativePathFromMonoRepoRoot) } } class SourceModuleSupportingDeserializer(modules: Set[SourceModule]) extends JsonDeserializer[SourceModule] { @throws[IOException] def deserialize(p: JsonParser, ctxt: DeserializationContext): SourceModule = p.getCurrentToken match { case JsonToken.VALUE_NULL => null case JsonToken.VALUE_STRING => { val relativePath = p.readValueAs(classOf[String]) modules.find(_.relativePathFromMonoRepoRoot == relativePath) .getOrElse(throw ctxt.weirdStringException(relativePath, classOf[SourceModule], s"could not find module with relative path for $relativePath")) } case token => throw ctxt.wrongTokenException(p, JsonToken.VALUE_STRING, s"The value of a module must be a string and currently is $token") } }
Example 47
Source File: CustomHttpAuthenticationFaultHandler.scala From meteorite-core with Apache License 2.0 | 5 votes |
package bi.meteorite.core.security.authentication import java.io.IOException import javax.servlet.http.HttpServletResponse import org.apache.cxf.interceptor.security.AuthenticationException import org.apache.cxf.transport.http.{AbstractHTTPDestination, HttpAuthenticationFaultHandler} import org.apache.cxf.message.Message class CustomHttpAuthenticationFaultHandler extends HttpAuthenticationFaultHandler{ val authenticationType1 = "Basic" val realm1 = "CXF service" override def handleFault(message: Message) { val ex: Exception = message.getContent(classOf[Exception]) if (ex.isInstanceOf[AuthenticationException]) { val resp: HttpServletResponse = message.getExchange.getInMessage.get(AbstractHTTPDestination.HTTP_RESPONSE).asInstanceOf[HttpServletResponse] resp.setStatus(HttpServletResponse.SC_UNAUTHORIZED) resp.setHeader("WWW-Authenticate", authenticationType1 + " realm=\"" + realm1 + "\"") resp.setHeader("Access-Control-Allow-Origin", "*") resp.setContentType("text/plain") try { resp.getWriter.write(ex.getMessage) resp.getWriter.flush message.getInterceptorChain.abort } catch { case e: IOException => { } } } } }
Example 48
Source File: XmlRelation.scala From spark-xml with Apache License 2.0 | 5 votes |
package com.databricks.spark.xml import java.io.IOException import org.apache.hadoop.fs.Path import org.apache.spark.rdd.RDD import org.apache.spark.sql._ import org.apache.spark.sql.sources.{PrunedScan, InsertableRelation, BaseRelation, TableScan} import org.apache.spark.sql.types._ import com.databricks.spark.xml.util.{InferSchema, XmlFile} import com.databricks.spark.xml.parsers.StaxXmlParser case class XmlRelation protected[spark] ( baseRDD: () => RDD[String], location: Option[String], parameters: Map[String, String], userSchema: StructType = null)(@transient val sqlContext: SQLContext) extends BaseRelation with InsertableRelation with PrunedScan { private val options = XmlOptions(parameters) override val schema: StructType = { Option(userSchema).getOrElse { InferSchema.infer( baseRDD(), options) } } override def buildScan(requiredColumns: Array[String]): RDD[Row] = { val requiredFields = requiredColumns.map(schema(_)) val requestedSchema = StructType(requiredFields) StaxXmlParser.parse( baseRDD(), requestedSchema, options) } // The function below was borrowed from JSONRelation override def insert(data: DataFrame, overwrite: Boolean): Unit = { val filesystemPath = location match { case Some(p) => new Path(p) case None => throw new IOException(s"Cannot INSERT into table with no path defined") } val fs = filesystemPath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration) if (overwrite) { try { fs.delete(filesystemPath, true) } catch { case e: IOException => throw new IOException( s"Unable to clear output directory ${filesystemPath.toString} prior" + s" to INSERT OVERWRITE a XML table:\n${e.toString}") } // Write the data. We assume that schema isn't changed, and we won't update it. XmlFile.saveAsXmlFile(data, filesystemPath.toString, parameters) } else { throw new IllegalArgumentException("XML tables only support INSERT OVERWRITE for now.") } } }
Example 49
Source File: TestZooKeeper.scala From mango with Apache License 2.0 | 5 votes |
package com.kakao.mango.zk import java.io.{File, IOException} import java.net.{ServerSocket, Socket} import java.util.concurrent.TimeUnit import com.kakao.mango.concurrent.NamedExecutors import com.kakao.mango.logging.{LogLevelOverrider, Logging} import com.kakao.shaded.guava.io.Files import org.apache.zookeeper.server.persistence.FileTxnSnapLog import org.apache.zookeeper.server.{ServerCnxnFactory, ServerConfig, ZooKeeperServer} import org.scalatest.{BeforeAndAfterAll, Suite} trait TestZooKeeper extends BeforeAndAfterAll with Logging { this: Suite => val zkServerPort = 2181 val zkServerExecutor = NamedExecutors.single("zookeeper-server") var zk: ZooKeeperConnection = _ override protected def beforeAll(): Unit = { logger.info("Launching a standalone ZooKeeper server for testing...") try { val socket = new ServerSocket(zkServerPort) socket.close() } catch { case e: IOException => throw new RuntimeException(s"TCP port $zkServerPort is required for tests but not available") } zkServerExecutor.submit { LogLevelOverrider.error("org.apache.zookeeper") val datadir = Files.createTempDir().getAbsolutePath val config = new ServerConfig config.parse(Array(zkServerPort.toString, datadir)) val zkServer = new ZooKeeperServer zkServer.setTxnLogFactory(new FileTxnSnapLog(new File(datadir), new File(datadir))) zkServer.setTickTime(6000) zkServer.setMinSessionTimeout(6000) zkServer.setMaxSessionTimeout(6000) val cnxnFactory = ServerCnxnFactory.createFactory try { cnxnFactory.configure(config.getClientPortAddress, 60) cnxnFactory.startup(zkServer) cnxnFactory.join() } catch { case _: InterruptedException => logger.info("ZooKeeper server interrupted; shutting down...") cnxnFactory.shutdown() cnxnFactory.join() if (zkServer.isRunning) { zkServer.shutdown() } logger.info("ZooKeeper server stopped") } } var connected = false while (!connected) { logger.info("Waiting for ZooKeeper server to launch...") try { val socket = new Socket("localhost", zkServerPort) logger.info("ZooKeeper server is available") socket.close() zk = ZooKeeperConnection(s"localhost:$zkServerPort") connected = true } catch { case _: IOException => Thread.sleep(1000) // retry } } super.beforeAll() } override protected def afterAll(): Unit = { try super.afterAll() finally { zk.close() logger.info("Interrupting ZooKeeper server...") zkServerExecutor.shutdownNow() while (!zkServerExecutor.awaitTermination(1, TimeUnit.SECONDS)) { logger.info("awaiting ZooKeeper server termination...") } logger.info("ZooKeeper server terminated") } } }
Example 50
Source File: MapJoinPartitionsRDD.scala From spark-vlbfgs with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark._ import org.apache.spark.util.Utils class MapJoinPartitionsPartition( idx: Int, @transient private val rdd1: RDD[_], @transient private val rdd2: RDD[_], s2IdxArr: Array[Int]) extends Partition { var s1 = rdd1.partitions(idx) var s2Arr = s2IdxArr.map(s2Idx => rdd2.partitions(s2Idx)) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { s1 = rdd1.partitions(idx) s2Arr = s2IdxArr.map(s2Idx => rdd2.partitions(s2Idx)) oos.defaultWriteObject() } } class MapJoinPartitionsRDD[A: ClassTag, B: ClassTag, V: ClassTag]( sc: SparkContext, var idxF: (Int) => Array[Int], var f: (Int, Iterator[A], Array[(Int, Iterator[B])]) => Iterator[V], var rdd1: RDD[A], var rdd2: RDD[B]) extends RDD[V](sc, Nil) { override def getPartitions: Array[Partition] = { val array = new Array[Partition](rdd1.partitions.length) for (s1 <- rdd1.partitions) { val idx = s1.index array(idx) = new MapJoinPartitionsPartition(idx, rdd1, rdd2, idxF(idx)) } array } override def getDependencies: Seq[Dependency[_]] = List( new OneToOneDependency(rdd1), new NarrowDependency(rdd2) { override def getParents(partitionId: Int): Seq[Int] = { idxF(partitionId) } } ) override def getPreferredLocations(s: Partition): Seq[String] = { val fp = firstParent[A] // println(s"pref loc: ${fp.preferredLocations(fp.partitions(s.index))}") fp.preferredLocations(fp.partitions(s.index)) } override def compute(split: Partition, context: TaskContext): Iterator[V] = { val currSplit = split.asInstanceOf[MapJoinPartitionsPartition] f(currSplit.s1.index, rdd1.iterator(currSplit.s1, context), currSplit.s2Arr.map(s2 => (s2.index, rdd2.iterator(s2, context))) ) } override def clearDependencies() { super.clearDependencies() rdd1 = null rdd2 = null idxF = null f = null } }
Example 51
Source File: MapJoinPartitionsRDDV2.scala From spark-vlbfgs with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import org.apache.spark.serializer.Serializer import org.apache.spark.{TaskContext, _} import org.apache.spark.util.Utils import scala.reflect.ClassTag class MapJoinPartitionsPartitionV2( idx: Int, @transient private val rdd1: RDD[_], @transient private val rdd2: RDD[_], s2IdxArr: Array[Int]) extends Partition { var s1 = rdd1.partitions(idx) var s2Arr = s2IdxArr.map(s2Idx => rdd2.partitions(s2Idx)) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { s1 = rdd1.partitions(idx) s2Arr = s2IdxArr.map(s2Idx => rdd2.partitions(s2Idx)) oos.defaultWriteObject() } } class MapJoinPartitionsRDDV2[A: ClassTag, B: ClassTag, V: ClassTag]( sc: SparkContext, var idxF: (Int) => Array[Int], var f: (Int, Iterator[A], Array[(Int, Iterator[B])]) => Iterator[V], var rdd1: RDD[A], var rdd2: RDD[B], preservesPartitioning: Boolean = false) extends RDD[V](sc, Nil) { var rdd2WithPid = rdd2.mapPartitionsWithIndex((pid, iter) => iter.map(x => (pid, x))) private val serializer: Serializer = SparkEnv.get.serializer override def getPartitions: Array[Partition] = { val array = new Array[Partition](rdd1.partitions.length) for (s1 <- rdd1.partitions) { val idx = s1.index array(idx) = new MapJoinPartitionsPartitionV2(idx, rdd1, rdd2, idxF(idx)) } array } override def getDependencies: Seq[Dependency[_]] = List( new OneToOneDependency(rdd1), new ShuffleDependency[Int, B, B]( rdd2WithPid.asInstanceOf[RDD[_ <: Product2[Int, B]]], new IdentityPartitioner(rdd2WithPid.getNumPartitions), serializer) ) override def getPreferredLocations(s: Partition): Seq[String] = { val fp = firstParent[A] // println(s"pref loc: ${fp.preferredLocations(fp.partitions(s.index))}") fp.preferredLocations(fp.partitions(s.index)) } override def compute(split: Partition, context: TaskContext): Iterator[V] = { val currSplit = split.asInstanceOf[MapJoinPartitionsPartitionV2] val rdd2Dep = dependencies(1).asInstanceOf[ShuffleDependency[Int, Any, Any]] val rdd2PartIter = currSplit.s2Arr.map(s2 => (s2.index, SparkEnv.get.shuffleManager .getReader[Int, B](rdd2Dep.shuffleHandle, s2.index, s2.index + 1, context) .read().map(x => x._2) )) val rdd1Iter = rdd1.iterator(currSplit.s1, context) f(currSplit.s1.index, rdd1Iter, rdd2PartIter) } override def clearDependencies() { super.clearDependencies() rdd1 = null rdd2 = null rdd2WithPid = null idxF = null f = null } } private[spark] class IdentityPartitioner(val numParts: Int) extends Partitioner { require(numPartitions > 0) override def getPartition(key: Any): Int = key.asInstanceOf[Int] override def numPartitions: Int = numParts }
Example 52
Source File: OrcFileOperator.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.orc import java.io.IOException import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.hadoop.hive.ql.io.orc.{OrcFile, Reader} import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector import org.apache.spark.SparkException import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.types.StructType private[hive] object OrcFileOperator extends Logging { def getFileReader(basePath: String, config: Option[Configuration] = None, ignoreCorruptFiles: Boolean = false) : Option[Reader] = { def isWithNonEmptySchema(path: Path, reader: Reader): Boolean = { reader.getObjectInspector match { case oi: StructObjectInspector if oi.getAllStructFieldRefs.size() == 0 => logInfo( s"ORC file $path has empty schema, it probably contains no rows. " + "Trying to read another ORC file to figure out the schema.") false case _ => true } } val conf = config.getOrElse(new Configuration) val fs = { val hdfsPath = new Path(basePath) hdfsPath.getFileSystem(conf) } listOrcFiles(basePath, conf).iterator.map { path => val reader = try { Some(OrcFile.createReader(fs, path)) } catch { case e: IOException => if (ignoreCorruptFiles) { logWarning(s"Skipped the footer in the corrupted file: $path", e) None } else { throw new SparkException(s"Could not read footer for file: $path", e) } } path -> reader }.collectFirst { case (path, Some(reader)) if isWithNonEmptySchema(path, reader) => reader } } def readSchema(paths: Seq[String], conf: Option[Configuration], ignoreCorruptFiles: Boolean) : Option[StructType] = { // Take the first file where we can open a valid reader if we can find one. Otherwise just // return None to indicate we can't infer the schema. paths.toIterator.map(getFileReader(_, conf, ignoreCorruptFiles)).collectFirst { case Some(reader) => val readerInspector = reader.getObjectInspector.asInstanceOf[StructObjectInspector] val schema = readerInspector.getTypeName logDebug(s"Reading schema from file $paths, got Hive schema string: $schema") CatalystSqlParser.parseDataType(schema).asInstanceOf[StructType] } } def getObjectInspector( path: String, conf: Option[Configuration]): Option[StructObjectInspector] = { getFileReader(path, conf).map(_.getObjectInspector.asInstanceOf[StructObjectInspector]) } def listOrcFiles(pathStr: String, conf: Configuration): Seq[Path] = { // TODO: Check if the paths coming in are already qualified and simplify. val origPath = new Path(pathStr) val fs = origPath.getFileSystem(conf) val paths = SparkHadoopUtil.get.listLeafStatuses(fs, origPath) .filterNot(_.isDirectory) .map(_.getPath) .filterNot(_.getName.startsWith("_")) .filterNot(_.getName.startsWith(".")) paths } }
Example 53
Source File: SparkSQLCLIService.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.io.IOException import java.util.{List => JList} import javax.security.auth.login.LoginException import scala.collection.JavaConverters._ import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hadoop.hive.shims.Utils import org.apache.hadoop.security.{SecurityUtil, UserGroupInformation} import org.apache.hive.service.{AbstractService, Service, ServiceException} import org.apache.hive.service.Service.STATE import org.apache.hive.service.auth.HiveAuthFactory import org.apache.hive.service.cli._ import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLContext) extends CLIService(hiveServer) with ReflectedCompositeService { override def init(hiveConf: HiveConf) { setSuperField(this, "hiveConf", hiveConf) val sparkSqlSessionManager = new SparkSQLSessionManager(hiveServer, sqlContext) setSuperField(this, "sessionManager", sparkSqlSessionManager) addService(sparkSqlSessionManager) var sparkServiceUGI: UserGroupInformation = null var httpUGI: UserGroupInformation = null if (UserGroupInformation.isSecurityEnabled) { try { val principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_PRINCIPAL) val keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB) if (principal.isEmpty || keyTabFile.isEmpty) { throw new IOException( "HiveServer2 Kerberos principal or keytab is not correctly configured") } val originalUgi = UserGroupInformation.getCurrentUser sparkServiceUGI = if (HiveAuthFactory.needUgiLogin(originalUgi, SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keyTabFile)) { HiveAuthFactory.loginFromKeytab(hiveConf) Utils.getUGI() } else { originalUgi } setSuperField(this, "serviceUGI", sparkServiceUGI) } catch { case e @ (_: IOException | _: LoginException) => throw new ServiceException("Unable to login to kerberos with given principal/keytab", e) } // Try creating spnego UGI if it is configured. val principal = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_PRINCIPAL).trim val keyTabFile = hiveConf.getVar(ConfVars.HIVE_SERVER2_SPNEGO_KEYTAB).trim if (principal.nonEmpty && keyTabFile.nonEmpty) { try { httpUGI = HiveAuthFactory.loginFromSpnegoKeytabAndReturnUGI(hiveConf) setSuperField(this, "httpUGI", httpUGI) } catch { case e: IOException => throw new ServiceException("Unable to login to spnego with given principal " + s"$principal and keytab $keyTabFile: $e", e) } } } initCompositeService(hiveConf) } override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = { getInfoType match { case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_VER => new GetInfoValue(sqlContext.sparkContext.version) case _ => super.getInfo(sessionHandle, getInfoType) } } } private[thriftserver] trait ReflectedCompositeService { this: AbstractService => def initCompositeService(hiveConf: HiveConf) { // Emulating `CompositeService.init(hiveConf)` val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList") serviceList.asScala.foreach(_.init(hiveConf)) // Emulating `AbstractService.init(hiveConf)` invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.NOTINITED) setAncestorField(this, 3, "hiveConf", hiveConf) invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.INITED) getAncestorField[Log](this, 3, "LOG").info(s"Service: $getName is inited.") } }
Example 54
Source File: ReusableStringReaderSuite.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions.xml import java.io.IOException import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.xml.UDFXPathUtil.ReusableStringReader class ReusableStringReaderSuite extends SparkFunSuite { private val fox = "Quick brown fox jumps over the lazy dog." test("empty reader") { val reader = new ReusableStringReader intercept[IOException] { reader.read() } intercept[IOException] { reader.ready() } reader.close() } test("mark reset") { val reader = new ReusableStringReader if (reader.markSupported()) { reader.asInstanceOf[ReusableStringReader].set(fox) assert(reader.ready()) val cc = new Array[Char](6) var read = reader.read(cc) assert(read == 6) assert("Quick " == new String(cc)) reader.mark(100) read = reader.read(cc) assert(read == 6) assert("brown " == new String(cc)) reader.reset() read = reader.read(cc) assert(read == 6) assert("brown " == new String(cc)) } reader.close() } test("skip") { val reader = new ReusableStringReader reader.asInstanceOf[ReusableStringReader].set(fox) // skip entire the data: var skipped = reader.skip(fox.length() + 1) assert(fox.length() == skipped) assert(-1 == reader.read()) reader.asInstanceOf[ReusableStringReader].set(fox) // reset the data val cc = new Array[Char](6) var read = reader.read(cc) assert(read == 6) assert("Quick " == new String(cc)) // skip some piece of data: skipped = reader.skip(30) assert(skipped == 30) read = reader.read(cc) assert(read == 4) assert("dog." == new String(cc, 0, read)) // skip when already at EOF: skipped = reader.skip(300) assert(skipped == 0, skipped) assert(reader.read() == -1) reader.close() } }
Example 55
Source File: ProcessTestUtils.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.test import java.io.{InputStream, IOException} import scala.sys.process.BasicIO object ProcessTestUtils { class ProcessOutputCapturer(stream: InputStream, capture: String => Unit) extends Thread { this.setDaemon(true) override def run(): Unit = { try { BasicIO.processFully(capture)(stream) } catch { case _: IOException => // Ignores the IOException thrown when the process termination, which closes the input // stream abruptly. } } } }
Example 56
Source File: SafeCloseableTest.scala From maha with Apache License 2.0 | 5 votes |
// Copyright 2017, Yahoo Holdings Inc. // Licensed under the terms of the Apache License 2.0. Please see LICENSE file in project root for terms. package com.yahoo.maha.report import java.io.{Closeable, IOException} import org.mockito.Mockito._ import org.scalatest.{FunSuite, Matchers} class SuccessCloseable extends Closeable { override def close(): Unit = { //success } } class FailCloseable extends Closeable { override def close(): Unit = { throw new IOException("fail") } } class SafeCloseableTest extends FunSuite with Matchers { def successWork(closeable: Closeable): Unit = { //success } def failWork(closeable: Closeable): Unit = { require(false, "fail") } test("successfully doWork") { safeCloseable(new SuccessCloseable)(successWork) } test("successfully close on failed doWork") { val closeable = spy(new SuccessCloseable) safeCloseable(closeable)(failWork) verify(closeable).close() } test("fail to close on failed closeable after failed doWork") { val closeable = spy(new FailCloseable) safeCloseable(closeable)(failWork) verify(closeable).close() } }
Example 57
Source File: ManagedPath.scala From zio-rocksdb with Apache License 2.0 | 5 votes |
package zio.rocksdb.internal package internal import java.io.IOException import java.nio.file.{ Files, Path } import zio.{ Task, UIO, ZIO, ZManaged } import scala.reflect.io.Directory object ManagedPath { private def createTempDirectory: Task[Path] = Task { Files.createTempDirectory("zio-rocksdb") } private def deleteDirectory(path: Path): UIO[Boolean] = UIO { new Directory(path.toFile).deleteRecursively() } private def deleteDirectoryE(path: Path): UIO[Unit] = deleteDirectory(path) >>= { case true => ZIO.unit case false => ZIO.die(new IOException("Could not delete path recursively")) } def apply(): ZManaged[Any, Throwable, Path] = createTempDirectory.toManaged(deleteDirectoryE) }
Example 58
Source File: FlakyHttpClient.scala From cats-retry with Apache License 2.0 | 5 votes |
package util import java.io.IOException case class FlakyHttpClient() { private var i = 0 def getCatGif(): String = { if (i > 3) { "cute cat gets sleepy and falls asleep" } else { i = i + 1 throw new IOException("Failed to download") } } }
Example 59
Source File: SpecificRPCTest.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import org.specs2.mutable.Specification import java.io.IOException import java.net.InetSocketAddress import java.lang.reflect.Proxy import org.apache.avro.specific.SpecificData import org.apache.avro.ipc.netty.NettyServer import org.apache.avro.ipc.netty.NettyTransceiver import org.apache.avro.ipc.Server import org.apache.avro.ipc.specific.SpecificRequestor import org.apache.avro.ipc.specific.SpecificResponder import example.proto.Mail import example.proto.Message class SpecificRPCTest extends Specification { skipAll // RPC tests fail on Linux (Ubuntu 16.04), solution unknown // adapted from https://github.com/phunt/avro-rpc-quickstart "A case class " should { "serialize and deserialize correctly via rpc" in { class MailImpl extends Mail { // in this simple example just return details of the message def send(message: Message): String = { System.out.println("Sending message") val response: String = message.body response.toString } } System.out.println("Starting server") // usually this would be another app, but for simplicity val protocol = Mail.PROTOCOL val responder = new SpecificResponder(protocol, new MailImpl()) val server = new NettyServer(responder, new InetSocketAddress(65111)) System.out.println("Server started") val client = new NettyTransceiver(new InetSocketAddress(65111)) // client code - attach to the server and send a message val requestor = new SpecificRequestor(protocol, client, SpecificData.get) val mailProxy: Mail = Proxy.newProxyInstance( SpecificData.get.getClassLoader, Array(classOf[Mail]), requestor).asInstanceOf[Mail] val message = new Message("avro_user", "pat", "hello_world") System.out.println("Calling proxy.send with message: " + message.toString) System.out.println("Result: " + mailProxy.send(message).toString) val received: String = mailProxy.send(message).toString // cleanup client.close server.close System.out.println("Server stopped") received === message.body } } }
Example 60
Source File: SpecificRPCTest.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import org.specs2.mutable.Specification import java.io.IOException import java.net.InetSocketAddress import java.lang.reflect.Proxy import org.apache.avro.specific.SpecificData import org.apache.avro.ipc.netty.NettyServer import org.apache.avro.ipc.netty.NettyTransceiver import org.apache.avro.ipc.Server import org.apache.avro.ipc.specific.SpecificRequestor import org.apache.avro.ipc.specific.SpecificResponder import example.proto.Mail import example.proto.Message class SpecificRPCTest extends Specification { skipAll // RPC tests fail on Linux (Ubuntu 16.04), solution unknown // adapted from https://github.com/phunt/avro-rpc-quickstart "A case class " should { "serialize and deserialize correctly via rpc" in { class MailImpl extends Mail { // in this simple example just return details of the message def send(message: Message): String = { System.out.println("Sending message") val response: String = message.body response.toString } } System.out.println("Starting server") // usually this would be another app, but for simplicity val protocol = Mail.PROTOCOL val responder = new SpecificResponder(protocol, new MailImpl()) val server = new NettyServer(responder, new InetSocketAddress(65111)) System.out.println("Server started") val client = new NettyTransceiver(new InetSocketAddress(65111)) // client code - attach to the server and send a message val requestor = new SpecificRequestor(protocol, client, SpecificData.get) val mailProxy: Mail = Proxy.newProxyInstance( SpecificData.get.getClassLoader, Array(classOf[Mail]), requestor).asInstanceOf[Mail] val message = new Message("avro_user", "pat", "hello_world") System.out.println("Calling proxy.send with message: " + message.toString) System.out.println("Result: " + mailProxy.send(message).toString) val received: String = mailProxy.send(message).toString // cleanup client.close server.close System.out.println("Server stopped") received === message.body } } }
Example 61
Source File: SpecificRPCTest.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import org.specs2.mutable.Specification import java.io.IOException import java.net.InetSocketAddress import java.lang.reflect.Proxy import org.apache.avro.specific.SpecificData import org.apache.avro.ipc.netty.NettyServer import org.apache.avro.ipc.netty.NettyTransceiver import org.apache.avro.ipc.Server import org.apache.avro.ipc.specific.SpecificRequestor import org.apache.avro.ipc.specific.SpecificResponder import example.proto.Mail import example.proto.Message class SpecificRPCTest extends Specification { skipAll // RPC tests fail on Linux (Ubuntu 16.04), solution unknown // adapted from https://github.com/phunt/avro-rpc-quickstart "A case class " should { "serialize and deserialize correctly via rpc" in { class MailImpl extends Mail { // in this simple example just return details of the message def send(message: Message): String = { System.out.println("Sending message") val response: String = message.body response.toString } } System.out.println("Starting server") // usually this would be another app, but for simplicity val protocol = Mail.PROTOCOL val responder = new SpecificResponder(protocol, new MailImpl()) val server = new NettyServer(responder, new InetSocketAddress(65111)) System.out.println("Server started") val client = new NettyTransceiver(new InetSocketAddress(65111)) // client code - attach to the server and send a message val requestor = new SpecificRequestor(protocol, client, SpecificData.get) val mailProxy: Mail = Proxy.newProxyInstance( SpecificData.get.getClassLoader, Array(classOf[Mail]), requestor).asInstanceOf[Mail] val message = new Message("avro_user", "pat", "hello_world") System.out.println("Calling proxy.send with message: " + message.toString) System.out.println("Result: " + mailProxy.send(message).toString) val received: String = mailProxy.send(message).toString // cleanup client.close server.close System.out.println("Server stopped") received === message.body } } }
Example 62
Source File: SpecificRPCTest.scala From sbt-avrohugger with Apache License 2.0 | 5 votes |
package test import org.specs2.mutable.Specification import java.io.IOException import java.net.InetSocketAddress import java.lang.reflect.Proxy import org.apache.avro.specific.SpecificData import org.apache.avro.ipc.netty.NettyServer import org.apache.avro.ipc.netty.NettyTransceiver import org.apache.avro.ipc.Server import org.apache.avro.ipc.specific.SpecificRequestor import org.apache.avro.ipc.specific.SpecificResponder import example.proto.Mail import example.proto.Message class SpecificRPCTest extends Specification { skipAll // RPC tests fail on Linux (Ubuntu 16.04), solution unknown // adapted from https://github.com/phunt/avro-rpc-quickstart "A case class " should { "serialize and deserialize correctly via rpc" in { class MailImpl extends Mail { // in this simple example just return details of the message def send(message: Message): String = { System.out.println("Sending message") val response: String = message.body response.toString } } System.out.println("Starting server") // usually this would be another app, but for simplicity val protocol = Mail.PROTOCOL val responder = new SpecificResponder(protocol, new MailImpl()) val server = new NettyServer(responder, new InetSocketAddress(65111)) System.out.println("Server started") val client = new NettyTransceiver(new InetSocketAddress(65111)) // client code - attach to the server and send a message val requestor = new SpecificRequestor(protocol, client, SpecificData.get) val mailProxy: Mail = Proxy.newProxyInstance( SpecificData.get.getClassLoader, Array(classOf[Mail]), requestor).asInstanceOf[Mail] val message = new Message("avro_user", "pat", "hello_world") System.out.println("Calling proxy.send with message: " + message.toString) System.out.println("Result: " + mailProxy.send(message).toString) val received: String = mailProxy.send(message).toString // cleanup client.close server.close System.out.println("Server stopped") received === message.body } } }
Example 63
Source File: UnsplittableSequenceFileInputFormat.scala From spark-util with Apache License 2.0 | 5 votes |
package org.hammerlab.hadoop.splits import java.io.IOException import java.util import org.apache.hadoop.fs.{ FileStatus, FileSystem, Path ⇒ HPath } import org.apache.hadoop.mapred.{ JobConf, SequenceFileInputFormat } import org.apache.hadoop.mapreduce.JobContext import org.apache.hadoop.mapreduce.lib.input import scala.collection.JavaConverters._ override def listStatus(job: JobContext): util.List[FileStatus] = super .listStatus(job) .asScala .sortBy { _.getPath.getName match { case PartFileBasename(idx) ⇒ idx case basename ⇒ throw new IllegalArgumentException(s"Bad partition file: $basename") } } .asJava }
Example 64
Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0 | 5 votes |
package scan import java.io.FileNotFoundException import java.io.IOException import java.nio.file._ import cats._ import cats.data._ import cats.implicits._ import org.atnos.eff._ import org.atnos.eff.all._ import org.atnos.eff.syntax.all._ import org.atnos.eff.addon.monix._ import org.atnos.eff.addon.monix.task._ import org.atnos.eff.syntax.addon.monix.task._ import org.specs2._ import scala.collection.immutable.SortedSet import scala.concurrent.duration._ import monix.eval._ import monix.execution.Scheduler.Implicits.global class ScannerSpec extends mutable.Specification { case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem { def length(file: File) = fileSizes.getOrElse(file, throw new IOException()) def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException()) def filePath(path: String): FilePath = if (directories.keySet.contains(Directory(path))) Directory(path) else if (fileSizes.keySet.contains(File(path))) File(path) else throw new FileNotFoundException(path) } val base = Directory("base") val base1 = File(s"${base.path}/1.txt") val base2 = File(s"${base.path}/2.txt") val subdir = Directory(s"${base.path}/subdir") val sub1 = File(s"${subdir.path}/1.txt") val sub3 = File(s"${subdir.path}/3.txt") val directories = Map( base -> List(subdir, base1, base2), subdir -> List(sub1, sub3) ) val fileSizes = Map(base1 -> 1L, base2 -> 2L, sub1 -> 1L, sub3 -> 3L) val fs = MockFilesystem(directories, fileSizes) type R = Fx.fx4[Task, FilesystemCmd, Reader[ScanConfig, ?], Writer[Log, ?]] def run[T](program: Eff[R, T]) = program.runReader(ScanConfig(2)).runFilesystemCmds(fs).taskAttempt.runWriter.runAsync.runSyncUnsafe(3.seconds) val expected = Right(new PathScan(SortedSet(FileSize(sub3, 3), FileSize(base2, 2)), 7, 4)) val expectedLogs = Set( Log.info("Scan started on Directory(base)"), Log.debug("Scanning directory 'Directory(base)': 1 subdirectories and 2 files"), Log.debug("File base/1.txt Size 1 B"), Log.debug("File base/2.txt Size 2 B"), Log.debug("Scanning directory 'Directory(base/subdir)': 0 subdirectories and 2 files"), Log.debug("File base/subdir/1.txt Size 1 B"), Log.debug("File base/subdir/3.txt Size 3 B") ) val (actual, logs) = run(Scanner.pathScan(base)) "Report Format" ! {actual.mustEqual(expected)} "Logs messages are emitted (ignores order due to non-determinstic concurrent execution)" ! { logs.forall(expectedLogs.contains) } }
Example 65
Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0 | 5 votes |
package scan import java.io.FileNotFoundException import java.io.IOException import java.nio.file._ import cats._ import cats.data._ import cats.implicits._ import org.atnos.eff._ import org.atnos.eff.all._ import org.atnos.eff.syntax.all._ import org.atnos.eff.addon.monix._ import org.atnos.eff.addon.monix.task._ import org.atnos.eff.syntax.addon.monix.task._ import org.specs2._ import scala.collection.immutable.SortedSet import scala.concurrent.duration._ import monix.eval._ import monix.execution.Scheduler.Implicits.global class ScannerSpec extends mutable.Specification { case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem { def length(file: File) = fileSizes.getOrElse(file, throw new IOException()) def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException()) def filePath(path: String): FilePath = if (directories.keySet.contains(Directory(path))) Directory(path) else if (fileSizes.keySet.contains(File(path))) File(path) else throw new FileNotFoundException(path) } val base = Directory("base") val base1 = File(s"${base.path}/1.txt") val base2 = File(s"${base.path}/2.txt") val subdir = Directory(s"${base.path}/subdir") val sub1 = File(s"${subdir.path}/1.txt") val sub3 = File(s"${subdir.path}/3.txt") val directories = Map( base -> List(subdir, base1, base2), subdir -> List(sub1, sub3) ) val fileSizes = Map(base1 -> 1L, base2 -> 2L, sub1 -> 1L, sub3 -> 3L) val fs = MockFilesystem(directories, fileSizes) type R = Fx.fx4[Task, FilesystemCmd, Reader[ScanConfig, ?], Writer[Log, ?]] def run[T](program: Eff[R, T]) = program.runReader(ScanConfig(2)).runFilesystemCmds(fs).taskAttempt.runWriter.runAsync.runSyncUnsafe(3.seconds) val expected = Right(new PathScan(SortedSet(FileSize(sub3, 3), FileSize(base2, 2)), 7, 4)) val expectedLogs = Set( Log.info("Scan started on Directory(base)"), Log.debug("Scanning directory 'Directory(base)': 1 subdirectories and 2 files"), Log.debug("File base/1.txt Size 1 B"), Log.debug("File base/2.txt Size 2 B"), Log.debug("Scanning directory 'Directory(base/subdir)': 0 subdirectories and 2 files"), Log.debug("File base/subdir/1.txt Size 1 B"), Log.debug("File base/subdir/3.txt Size 3 B") ) val (actual, logs) = run(Scanner.pathScan(base)) "Report Format" ! {actual.mustEqual(expected)} "Logs messages are emitted (ignores order due to non-determinstic concurrent execution)" ! { logs.forall(expectedLogs.contains) } }
Example 66
Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0 | 5 votes |
package scan import java.io.FileNotFoundException import java.io.IOException import java.nio.file._ import cats._ import cats.data._ import cats.implicits._ import org.atnos.eff._ import org.atnos.eff.all._ import org.atnos.eff.syntax.all._ import org.atnos.eff.addon.monix._ import org.atnos.eff.addon.monix.task._ import org.atnos.eff.syntax.addon.monix.task._ import org.specs2._ import scala.collection.immutable.SortedSet import scala.concurrent.duration._ import monix.eval._ import monix.execution.Scheduler.Implicits.global class ScannerSpec extends mutable.Specification { case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem { def length(file: File) = fileSizes.getOrElse(file, throw new IOException()) def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException()) def filePath(path: String): FilePath = if (directories.keySet.contains(Directory(path))) Directory(path) else if (fileSizes.keySet.contains(File(path))) File(path) else throw new FileNotFoundException(path) } val base = Directory("base") val base1 = File(s"${base.path}/1.txt") val base2 = File(s"${base.path}/2.txt") val subdir = Directory(s"${base.path}/subdir") val sub1 = File(s"${subdir.path}/1.txt") val sub3 = File(s"${subdir.path}/3.txt") val directories = Map( base -> List(subdir, base1, base2), subdir -> List(sub1, sub3) ) val fileSizes = Map(base1 -> 1L, base2 -> 2L, sub1 -> 1L, sub3 -> 3L) val fs = MockFilesystem(directories, fileSizes) type R = Fx.fx4[Task, Reader[Filesystem, ?], Reader[ScanConfig, ?], Writer[Log, ?]] def run[T](program: Eff[R, T], fs: Filesystem) = program.runReader(ScanConfig(2)).runReader(fs).taskAttempt.runWriter.runAsync.runSyncUnsafe(3.seconds) val expected = Right(new PathScan(SortedSet(FileSize(sub3, 3), FileSize(base2, 2)), 7, 4)) val expectedLogs = Set( Log.info("Scan started on Directory(base)"), Log.debug("Scanning directory 'Directory(base)': 1 subdirectories and 2 files"), Log.debug("File base/1.txt Size 1 B"), Log.debug("File base/2.txt Size 2 B"), Log.debug("Scanning directory 'Directory(base/subdir)': 0 subdirectories and 2 files"), Log.debug("File base/subdir/1.txt Size 1 B"), Log.debug("File base/subdir/3.txt Size 3 B") ) val (actual, logs) = run(Scanner.pathScan(base), fs) "Report Format" ! {actual.mustEqual(expected)} "Logs messages are emitted (ignores order due to non-determinstic concurrent execution)" ! { expectedLogs.forall(logs.contains) } }
Example 67
Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0 | 5 votes |
package scan import java.io.FileNotFoundException import java.io.IOException import java.nio.file._ import cats._ import cats.data._ import cats.implicits._ import org.atnos.eff._ import org.atnos.eff.all._ import org.atnos.eff.syntax.all._ import org.atnos.eff.addon.monix._ import org.atnos.eff.addon.monix.task._ import org.atnos.eff.syntax.addon.monix.task._ import org.specs2._ import scala.collection.immutable.SortedSet import scala.concurrent.duration._ import monix.eval._ import monix.execution.Scheduler.Implicits.global class ScannerSpec extends mutable.Specification { case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem { def length(file: File) = fileSizes.getOrElse(file, throw new IOException()) def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException()) def filePath(path: String): FilePath = if (directories.keySet.contains(Directory(path))) Directory(path) else if (fileSizes.keySet.contains(File(path))) File(path) else throw new FileNotFoundException(path) } val base = Directory("base") val linkTarget = File(s"/somewhere/else/7.txt") val base1 = File(s"${base.path}/1.txt") val baseLink = Symlink(s"${base.path}/7.txt", linkTarget) val subdir = Directory(s"${base.path}/subdir") val sub2 = File(s"${subdir.path}/2.txt") val subLink = Symlink(s"${subdir.path}/7.txt", linkTarget) val directories = Map( base -> List(subdir, base1, baseLink), subdir -> List(sub2, subLink) ) val fileSizes = Map(base1 -> 1L, sub2 -> 2L, linkTarget -> 7L) val fs = MockFilesystem(directories, fileSizes) type R = Fx.fx5[Task, Reader[Filesystem, ?], Reader[ScanConfig, ?], Writer[Log, ?], State[Set[FilePath], ?]] def run[T](program: Eff[R, T], fs: Filesystem) = program.runReader(ScanConfig(2)).runReader(fs).evalStateZero[Set[FilePath]].taskAttempt.runWriter[Log].runAsync.runSyncUnsafe(3.seconds) val expected = Right(new PathScan(SortedSet(FileSize(linkTarget, 7), FileSize(sub2, 2)), 10, 3)) val (actual, logs) = run(Scanner.pathScan[R](base), fs) "Report Format" ! {actual.mustEqual(expected)} }
Example 68
Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0 | 5 votes |
package scan import java.io.FileNotFoundException import java.io.IOException import java.nio.file._ import cats._ import cats.data._ import cats.implicits._ import org.atnos.eff._ import org.atnos.eff.all._ import org.atnos.eff.syntax.all._ import org.atnos.eff.addon.monix._ import org.atnos.eff.addon.monix.task._ import org.atnos.eff.syntax.addon.monix.task._ import org.specs2._ import scala.collection.immutable.SortedSet import scala.concurrent.duration._ import monix.eval._ import monix.execution.Scheduler.Implicits.global class ScannerSpec extends mutable.Specification { case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem { def length(file: File) = fileSizes.getOrElse(file, throw new IOException()) def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException()) def filePath(path: String): FilePath = if (directories.keySet.contains(Directory(path))) Directory(path) else if (fileSizes.keySet.contains(File(path))) File(path) else throw new FileNotFoundException(path) } val base = Directory("base") val base1 = File(s"${base.path}/1.txt") val base2 = File(s"${base.path}/2.txt") val subdir = Directory(s"${base.path}/subdir") val sub1 = File(s"${subdir.path}/1.txt") val sub3 = File(s"${subdir.path}/3.txt") val directories = Map( base -> List(subdir, base1, base2), subdir -> List(sub1, sub3) ) val fileSizes = Map(base1 -> 1L, base2 -> 2L, sub1 -> 1L, sub3 -> 3L) val fs = MockFilesystem(directories, fileSizes) type R = Fx.fx4[Task, Reader[Filesystem, ?], Reader[ScanConfig, ?], Writer[Log, ?]] def run[T](program: Eff[R, T], fs: Filesystem) = program.runReader(ScanConfig(2)).runReader(fs).taskAttempt.runWriter.runAsync.runSyncUnsafe(3.seconds) val expected = Right(new PathScan(SortedSet(FileSize(sub3, 3), FileSize(base2, 2)), 7, 4)) val expectedLogs = Set( Log.info("Scan started on Directory(base)"), Log.debug("Scanning directory 'Directory(base)': 1 subdirectories and 2 files"), Log.debug("File base/1.txt Size 1 B"), Log.debug("File base/2.txt Size 2 B"), Log.debug("Scanning directory 'Directory(base/subdir)': 0 subdirectories and 2 files"), Log.debug("File base/subdir/1.txt Size 1 B"), Log.debug("File base/subdir/3.txt Size 3 B") ) val (actual, logs) = run(Scanner.pathScan(base), fs) "Report Format" ! {actual.mustEqual(expected)} "Logs messages are emitted (ignores order due to non-determinstic concurrent execution)" ! { logs.forall(expectedLogs.contains) } }
Example 69
Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0 | 5 votes |
package scan import java.io.FileNotFoundException import java.io.IOException import java.nio.file._ import cats._ import cats.data._ import cats.implicits._ import org.atnos.eff._ import org.atnos.eff.all._ import org.atnos.eff.syntax.all._ import org.atnos.eff.addon.monix._ import org.atnos.eff.addon.monix.task._ import org.atnos.eff.syntax.addon.monix.task._ import org.specs2._ import scala.collection.immutable.SortedSet import scala.concurrent.duration._ import monix.eval._ import monix.execution.Scheduler.Implicits.global class ScannerSpec extends mutable.Specification { case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem { def length(file: File) = fileSizes.getOrElse(file, throw new IOException()) def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException()) def filePath(path: String): FilePath = if (directories.keySet.contains(Directory(path))) Directory(path) else if (fileSizes.keySet.contains(File(path))) File(path) else throw new FileNotFoundException(path) } val base = Directory("base") val base1 = File(s"${base.path}/1.txt") val base2 = File(s"${base.path}/2.txt") val subdir = Directory(s"${base.path}/subdir") val sub1 = File(s"${subdir.path}/1.txt") val sub3 = File(s"${subdir.path}/3.txt") val directories = Map( base -> List(subdir, base1, base2), subdir -> List(sub1, sub3) ) val fileSizes = Map(base1 -> 1L, base2 -> 2L, sub1 -> 1L, sub3 -> 3L) val fs = MockFilesystem(directories, fileSizes) type R = Fx.fx3[Task, Reader[Filesystem, ?], Reader[ScanConfig, ?]] def run[T](program: Eff[R, T], fs: Filesystem) = program.runReader(ScanConfig(2)).runReader(fs).runAsync.attempt.runSyncUnsafe(3.seconds) "file scan" ! { val actual = run(Scanner.pathScan(base), fs) val expected = Right(new PathScan(SortedSet(FileSize(sub3, 3), FileSize(base2, 2)), 7, 4)) actual.mustEqual(expected) } "Error from Filesystem" ! { val emptyFs: Filesystem = MockFilesystem(directories, Map.empty) val actual = runE(Scanner.scanReport(Array("base", "10")), emptyFs) val expected = Left(new IOException().toString) actual.mustEqual(expected) } type E = Fx.fx3[Task, Reader[Filesystem, ?], Either[String, ?]] def runE[T](program: Eff[E, T], fs: Filesystem) = //there are two nested Either in the stack, one from Exceptions and one from errors raised by the program //we convert to a common error type String then flatten program.runReader(fs).runEither.runAsync.attempt.runSyncUnsafe(3.seconds).leftMap(_.toString).flatten "Error - Report with non-numeric input" ! { val actual = runE(Scanner.scanReport(Array("base", "not a number")), fs) val expected = Left("Number of files must be numeric: not a number") actual.mustEqual(expected) } "Error - Report with non-positive input" ! { val actual = runE(Scanner.scanReport(Array("base", "-1")), fs) val expected = Left("Invalid number of files -1") actual.mustEqual(expected) } }
Example 70
Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0 | 5 votes |
package scan import java.io.FileNotFoundException import java.io.IOException import java.nio.file._ import cats._ import cats.data._ import cats.implicits._ import org.atnos.eff._ import org.atnos.eff.all._ import org.atnos.eff.syntax.all._ import org.atnos.eff.addon.monix._ import org.atnos.eff.addon.monix.task._ import org.atnos.eff.syntax.addon.monix.task._ import org.specs2._ import scala.collection.immutable.SortedSet import scala.concurrent.duration._ import monix.eval._ import monix.execution.Scheduler.Implicits.global class ScannerSpec extends mutable.Specification { case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem { def length(file: File) = fileSizes.getOrElse(file, throw new IOException()) def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException()) def filePath(path: String): FilePath = if (directories.keySet.contains(Directory(path))) Directory(path) else if (fileSizes.keySet.contains(File(path))) File(path) else throw new FileNotFoundException(path) } val base = Directory("base") val base1 = File(s"${base.path}/1.txt") val base2 = File(s"${base.path}/2.txt") val subdir = Directory(s"${base.path}/subdir") val sub1 = File(s"${subdir.path}/1.txt") val sub3 = File(s"${subdir.path}/3.txt") val directories = Map( base -> List(subdir, base1, base2), subdir -> List(sub1, sub3) ) val fileSizes = Map(base1 -> 1L, base2 -> 2L, sub1 -> 1L, sub3 -> 3L) val fs = MockFilesystem(directories, fileSizes) type R = Fx.fx4[Task, Reader[Filesystem, ?], Reader[ScanConfig, ?], Writer[Log, ?]] def run[T](program: Eff[R, T], fs: Filesystem) = program.runReader(ScanConfig(2)).runReader(fs).taskAttempt.runWriter.runAsync.runSyncUnsafe(3.seconds) val expected = Right(new PathScan(SortedSet(FileSize(sub3, 3), FileSize(base2, 2)), 7, 4)) val expectedLogs = Set( Log.info("Scan started on Directory(base)"), Log.debug("Scanning directory 'Directory(base)': 1 subdirectories and 2 files"), Log.debug("File base/1.txt Size 1 B"), Log.debug("File base/2.txt Size 2 B"), Log.debug("Scanning directory 'Directory(base/subdir)': 0 subdirectories and 2 files"), Log.debug("File base/subdir/1.txt Size 1 B"), Log.debug("File base/subdir/3.txt Size 3 B") ) val (actual, logs) = run(Scanner.pathScan(base), fs) "Report Format" ! {actual.mustEqual(expected)} "Logs messages are emitted (ignores order due to non-determinstic concurrent execution)" ! { expectedLogs.forall(logs.contains) } }
Example 71
Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0 | 5 votes |
package scan import java.io.FileNotFoundException import java.io.IOException import java.nio.file._ import cats._ import cats.data._ import cats.implicits._ import org.atnos.eff._ import org.atnos.eff.all._ import org.atnos.eff.syntax.all._ import org.atnos.eff.addon.monix._ import org.atnos.eff.addon.monix.task._ import org.atnos.eff.syntax.addon.monix.task._ import org.specs2._ import scala.collection.immutable.SortedSet import scala.concurrent.duration._ import monix.eval._ import monix.execution.Scheduler.Implicits.global class ScannerSpec extends mutable.Specification { case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem { def length(file: File) = fileSizes.getOrElse(file, throw new IOException()) def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException()) def filePath(path: String): FilePath = if (directories.keySet.contains(Directory(path))) Directory(path) else if (fileSizes.keySet.contains(File(path))) File(path) else throw new FileNotFoundException(path) } val base = Directory("base") val linkTarget = File(s"/somewhere/else/7.txt") val base1 = File(s"${base.path}/1.txt") val baseLink = Symlink(s"${base.path}/7.txt", linkTarget) val subdir = Directory(s"${base.path}/subdir") val sub2 = File(s"${subdir.path}/2.txt") val subLink = Symlink(s"${subdir.path}/7.txt", linkTarget) val directories = Map( base -> List(subdir, base1, baseLink), subdir -> List(sub2, subLink) ) val fileSizes = Map(base1 -> 1L, sub2 -> 2L, linkTarget -> 7L) val fs = MockFilesystem(directories, fileSizes) type R = Fx.fx5[Task, Reader[Filesystem, ?], Reader[ScanConfig, ?], Writer[Log, ?], State[Set[FilePath], ?]] def run[T](program: Eff[R, T], fs: Filesystem) = program.runReader(ScanConfig(2)).runReader(fs).evalStateZero[Set[FilePath]].taskAttempt.runWriter[Log].runAsync.runSyncUnsafe(3.seconds) val expected = Right(new PathScan(SortedSet(FileSize(linkTarget, 7), FileSize(sub2, 2)), 10, 3)) val (actual, logs) = run(Scanner.pathScan[R](base), fs) "Report Format" ! {actual.mustEqual(expected)} }
Example 72
Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0 | 5 votes |
package scan import java.io.FileNotFoundException import java.io.IOException import java.nio.file._ import cats._ import cats.data._ import cats.implicits._ import org.atnos.eff._ import org.atnos.eff.all._ import org.atnos.eff.syntax.all._ import org.atnos.eff.addon.monix._ import org.atnos.eff.addon.monix.task._ import org.atnos.eff.syntax.addon.monix.task._ import org.specs2._ import scala.collection.immutable.SortedSet import scala.concurrent.duration._ import monix.eval._ import monix.execution.Scheduler.Implicits.global class ScannerSpec extends mutable.Specification { case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem { def length(file: File) = fileSizes.getOrElse(file, throw new IOException()) def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException()) def filePath(path: String): FilePath = if (directories.keySet.contains(Directory(path))) Directory(path) else if (fileSizes.keySet.contains(File(path))) File(path) else throw new FileNotFoundException(path) } val base = Directory("base") val base1 = File(s"${base.path}/1.txt") val base2 = File(s"${base.path}/2.txt") val subdir = Directory(s"${base.path}/subdir") val sub1 = File(s"${subdir.path}/1.txt") val sub3 = File(s"${subdir.path}/3.txt") val directories = Map( base -> List(subdir, base1, base2), subdir -> List(sub1, sub3) ) val fileSizes = Map(base1 -> 1L, base2 -> 2L, sub1 -> 1L, sub3 -> 3L) val fs = MockFilesystem(directories, fileSizes) type R = Fx.fx4[Task, Reader[Filesystem, ?], Reader[ScanConfig, ?], Writer[Log, ?]] def run[T](program: Eff[R, T], fs: Filesystem) = program.runReader(ScanConfig(2)).runReader(fs).taskAttempt.runWriter.runAsync.runSyncUnsafe(3.seconds) val expected = Right(new PathScan(SortedSet(FileSize(sub3, 3), FileSize(base2, 2)), 7, 4)) val expectedLogs = Set( Log.info("Scan started on Directory(base)"), Log.debug("Scanning directory 'Directory(base)': 1 subdirectories and 2 files"), Log.debug("File base/1.txt Size 1 B"), Log.debug("File base/2.txt Size 2 B"), Log.debug("Scanning directory 'Directory(base/subdir)': 0 subdirectories and 2 files"), Log.debug("File base/subdir/1.txt Size 1 B"), Log.debug("File base/subdir/3.txt Size 3 B") ) val (actual, logs) = run(Scanner.pathScan(base), fs) "Report Format" ! {actual.mustEqual(expected)} "Logs messages are emitted (ignores order due to non-determinstic concurrent execution)" ! { logs.forall(expectedLogs.contains) } }
Example 73
Source File: ScannerSpec.scala From GettingWorkDoneWithExtensibleEffects with Apache License 2.0 | 5 votes |
package scan import java.io.FileNotFoundException import java.io.IOException import java.nio.file._ import cats._ import cats.data._ import cats.implicits._ import org.atnos.eff._ import org.atnos.eff.all._ import org.atnos.eff.syntax.all._ import org.atnos.eff.addon.monix._ import org.atnos.eff.addon.monix.task._ import org.atnos.eff.syntax.addon.monix.task._ import org.specs2._ import scala.collection.immutable.SortedSet import scala.concurrent.duration._ import monix.eval._ import monix.execution.Scheduler.Implicits.global class ScannerSpec extends mutable.Specification { case class MockFilesystem(directories: Map[Directory, List[FilePath]], fileSizes: Map[File, Long]) extends Filesystem { def length(file: File) = fileSizes.getOrElse(file, throw new IOException()) def listFiles(directory: Directory) = directories.getOrElse(directory, throw new IOException()) def filePath(path: String): FilePath = if (directories.keySet.contains(Directory(path))) Directory(path) else if (fileSizes.keySet.contains(File(path))) File(path) else throw new FileNotFoundException(path) } val base = Directory("base") val base1 = File(s"${base.path}/1.txt") val base2 = File(s"${base.path}/2.txt") val subdir = Directory(s"${base.path}/subdir") val sub1 = File(s"${subdir.path}/1.txt") val sub3 = File(s"${subdir.path}/3.txt") val directories = Map( base -> List(subdir, base1, base2), subdir -> List(sub1, sub3) ) val fileSizes = Map(base1 -> 1L, base2 -> 2L, sub1 -> 1L, sub3 -> 3L) val fs = MockFilesystem(directories, fileSizes) type R = Fx.fx3[Task, Reader[Filesystem, ?], Reader[ScanConfig, ?]] def run[T](program: Eff[R, T], fs: Filesystem) = program.runReader(ScanConfig(2)).runReader(fs).runAsync.attempt.runSyncUnsafe(3.seconds) "file scan" ! { val actual = run(Scanner.pathScan(base), fs) val expected = Right(new PathScan(SortedSet(FileSize(sub3, 3), FileSize(base2, 2)), 7, 4)) actual.mustEqual(expected) } "Error from Filesystem" ! { val emptyFs: Filesystem = MockFilesystem(directories, Map.empty) val actual = runE(Scanner.scanReport(Array("base", "10")), emptyFs) val expected = ??? actual.mustEqual(expected) } type E = Fx.fx3[Task, Reader[Filesystem, ?], Either[String, ?]] def runE[T](program: Eff[E, T], fs: Filesystem) = //there are two nested Either in the stack, one from Exceptions and one from errors raised by the program //we convert to a common error type String then flatten program.runReader(fs).runEither.runAsync.attempt.runSyncUnsafe(3.seconds).leftMap(_.toString).flatten "Error - Report with non-numeric input" ! { val actual = runE(Scanner.scanReport(Array("base", "not a number")), fs) val expected = Left("Number of files must be numeric: not a number") actual.mustEqual(expected) } "Error - Report with non-positive input" ! { val actual = runE(Scanner.scanReport(Array("base", "-1")), fs) val expected = Left("Invalid number of files -1") actual.mustEqual(expected) } }
Example 74
Source File: FileUtil.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.bundle.util import java.io.IOException import java.nio.file.attribute.BasicFileAttributes import java.nio.file.{FileVisitResult, Files, Path, SimpleFileVisitor} object FileUtil { def rmRf(path: Path): Unit = { Files.walkFileTree(path, new SimpleFileVisitor[Path]() { override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = { Files.delete(file) FileVisitResult.CONTINUE } override def postVisitDirectory(dir: Path, exc: IOException): FileVisitResult = { Files.delete(dir) FileVisitResult.CONTINUE } }) } }
Example 75
Source File: JGitSystemReader.scala From sbt-dynver with Apache License 2.0 | 5 votes |
package sbtdynver import java.io.{ File, IOException } import java.net.{ InetAddress, UnknownHostException } import java.nio.file.{ Files, InvalidPathException, Path, Paths } import org.eclipse.jgit.internal.JGitText import org.eclipse.jgit.lib.{ Config, Constants } import org.eclipse.jgit.storage.file.FileBasedConfig import org.eclipse.jgit.util.{ FS, StringUtils, SystemReader } import org.slf4j.LoggerFactory // Copy of org.eclipse.jgit.util.SystemReader.Default with: // * calls to Files.createDirectories guarded by if !Files.isDirectory // necessary because my ~/.config is a symlink to a directory // which Files.createDirectories isn't happy with object JGitSystemReader extends SystemReader { private val LOG = LoggerFactory.getLogger(getClass) lazy val init: Unit = SystemReader.setInstance(this) override lazy val getHostname = { try InetAddress.getLocalHost.getCanonicalHostName catch { case _: UnknownHostException => "localhost" } }.ensuring(_ != null) override def getenv(variable: String): String = System.getenv(variable) override def getProperty(key: String): String = System.getProperty(key) override def getCurrentTime: Long = System.currentTimeMillis override def getTimezone(when: Long): Int = getTimeZone.getOffset(when) / (60 * 1000) override def openUserConfig(parent: Config, fs: FS) = new FileBasedConfig(parent, new File(fs.userHome, ".gitconfig"), fs) override def openSystemConfig(parent: Config, fs: FS): FileBasedConfig = { if (StringUtils.isEmptyOrNull(getenv(Constants.GIT_CONFIG_NOSYSTEM_KEY))) { val configFile = fs.getGitSystemConfig if (configFile != null) return new FileBasedConfig(parent, configFile, fs) } new FileBasedConfig(parent, null, fs) { override def load(): Unit = () // do not load override def isOutdated = false // regular class would bomb here } } override def openJGitConfig(parent: Config, fs: FS): FileBasedConfig = { val xdgPath = getXDGConfigHome(fs) if (xdgPath != null) { var configPath: Path = null try { configPath = xdgPath.resolve("jgit") if (!Files.isDirectory(configPath)) Files.createDirectories(configPath) configPath = configPath.resolve(Constants.CONFIG) return new FileBasedConfig(parent, configPath.toFile, fs) } catch { case e: IOException => LOG.error(JGitText.get.createJGitConfigFailed, configPath: Any, e) } } new FileBasedConfig(parent, new File(fs.userHome, ".jgitconfig"), fs) } private def getXDGConfigHome(fs: FS): Path = { var configHomePath = getenv(Constants.XDG_CONFIG_HOME) if (StringUtils.isEmptyOrNull(configHomePath)) configHomePath = new File(fs.userHome, ".config").getAbsolutePath try { val xdgHomePath = Paths.get(configHomePath) if (!Files.isDirectory(xdgHomePath)) Files.createDirectories(xdgHomePath) xdgHomePath } catch { case e @ (_: IOException | _: InvalidPathException) => LOG.error(JGitText.get.createXDGConfigHomeFailed, configHomePath: Any, e) null } } }
Example 76
Source File: MockConsole.scala From zio with Apache License 2.0 | 5 votes |
package zio.test.mock import java.io.IOException import zio.console.Console import zio.{ Has, IO, UIO, URLayer, ZLayer } object MockConsole extends Mock[Console] { object PutStr extends Effect[String, Nothing, Unit] object PutStrLn extends Effect[String, Nothing, Unit] object GetStrLn extends Effect[Unit, IOException, String] val compose: URLayer[Has[Proxy], Console] = ZLayer.fromService(proxy => new Console.Service { def putStr(line: String): UIO[Unit] = proxy(PutStr, line) def putStrLn(line: String): UIO[Unit] = proxy(PutStrLn, line) val getStrLn: IO[IOException, String] = proxy(GetStrLn) } ) }
Example 77
Source File: FinagleDesignTest.scala From airframe with Apache License 2.0 | 5 votes |
package wvlet.airframe.http.finagle import java.io.IOException import java.net.URL import wvlet.airframe.control.Control import wvlet.airframe.http.{Endpoint, Router} import wvlet.airspec.AirSpec import wvlet.log.io.IOUtil class FinagleDesignTest extends AirSpec { trait MyTestServer { @Endpoint(path = "/hello") def hello: String = { "hello" } } def newConfig = FinagleServerConfig(router = Router.of[MyTestServer]) def `start server`: Unit = { finagleDefaultDesign .bind[FinagleServerConfig].toInstance(newConfig) .bind[FinagleSyncClient].toProvider { server: FinagleServer => Finagle.newSyncClient(server.localAddress) } .noLifeCycleLogging .build[FinagleSyncClient] { client => // The server will start here val msg = client.get[String]("/hello") msg shouldBe "hello" } } def `no-server design` = { val config = newConfig finagleBaseDesign .bind[FinagleServerConfig].toInstance(config) .noLifeCycleLogging .build[FinagleServerFactory] { factory => // No server should start here intercept[IOException] { Control.withResource(new URL(s"http://localhost:${config.port}").openStream()) { in => IOUtil.readAsString(in) } } } } def `build a server from factory` = { finagleBaseDesign.noLifeCycleLogging.build[FinagleServerFactory] { factory => val s1 = factory.newFinagleServer(newConfig) Control.withResource(FinagleClient.newSyncClient(s1.localAddress)) { client => client.get[String]("/hello") shouldBe "hello" } } } }
Example 78
Source File: CourierQueryParsers.scala From naptime with Apache License 2.0 | 5 votes |
package org.coursera.naptime.router2 import java.io.IOException import com.linkedin.data.DataMap import com.linkedin.data.schema.DataSchema import com.linkedin.data.schema.validation.CoercionMode import com.linkedin.data.schema.validation.RequiredMode import com.linkedin.data.schema.validation.ValidateDataAgainstSchema import com.linkedin.data.schema.validation.ValidationOptions import com.typesafe.scalalogging.StrictLogging import org.coursera.courier.codecs.InlineStringCodec import org.coursera.naptime.courier.StringKeyCodec import play.api.mvc.RequestHeader object CourierQueryParsers extends StrictLogging { import CollectionResourceRouter.errorRoute private[this] val validationOptions = new ValidationOptions(RequiredMode.FIXUP_ABSENT_WITH_DEFAULT, CoercionMode.STRING_TO_PRIMITIVE) private[this] def parseStringToDataMap( paramName: String, schema: DataSchema, resourceClass: Class[_])(value: String): Either[RouteAction, DataMap] = { try { val parsed = if (value.startsWith("(") && value.endsWith(")")) { InlineStringCodec.instance.bytesToMap(value.getBytes("UTF-8")) } else { val codec = new StringKeyCodec(schema) codec.bytesToMap(value.getBytes("UTF-8")) } val validated = ValidateDataAgainstSchema.validate(parsed, schema, validationOptions) if (validated.isValid) { Right(validated.getFixed.asInstanceOf[DataMap]) } else { logger.warn( s"${resourceClass.getName}: Bad query parameter for parameter " + s"'$paramName': $value. Errors: ${validated.getMessages}") Left(errorRoute(s"Improperly formatted value for parameter '$paramName'", resourceClass)) } } catch { case ioException: IOException => logger.warn( s"${resourceClass.getName}: Bad query parameter for parameter " + s"'$paramName': $value. Errors: ${ioException.getMessage}") Left(errorRoute(s"Improperly formatted value for parameter '$paramName'", resourceClass)) } } def strictParse( paramName: String, schema: DataSchema, resourceClass: Class[_], rh: RequestHeader): Either[RouteAction, DataMap] = { val queryStringResults = rh.queryString.get(paramName) if (queryStringResults.isEmpty || queryStringResults.get.isEmpty) { Left(errorRoute(s"Missing required parameter '$paramName'", resourceClass)) } else if (queryStringResults.get.tail.isEmpty) { val stringValue = queryStringResults.get.head parseStringToDataMap(paramName, schema, resourceClass)(stringValue) } else { Left(errorRoute(s"Too many query parameters for '$paramName", resourceClass)) } } def optParse( paramName: String, schema: DataSchema, resourceClass: Class[_], rh: RequestHeader): Either[RouteAction, Option[DataMap]] = { val queryStringResults = rh.queryString.get(paramName) if (queryStringResults.isEmpty || queryStringResults.get.isEmpty) { Right(None) } else if (queryStringResults.get.tail.isEmpty) { val stringValue = queryStringResults.get.head parseStringToDataMap(paramName, schema, resourceClass)(stringValue).right.map(Some(_)) } else { Left(errorRoute(s"Too many query parameters for '$paramName", resourceClass)) } } // TODO: Add a 'QTry' query parameter type that will attempt to parse the query parameter but // instead of failing, will provide the valiation errors to the resource handler to do with what // they want. }
Example 79
Source File: RetryUtilsSuite.scala From azure-event-hubs-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.eventhubs.utils import java.io.IOException import java.util.concurrent.CompletableFuture import com.microsoft.azure.eventhubs.EventHubException import org.scalatest.FunSuite import org.scalatest.concurrent.ScalaFutures import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future class RetryUtilsSuite extends FunSuite with ScalaFutures { import RetryUtilsSuite._ test("don't retry successful Future") { val tries = incrementFutureIterator(1) val result = RetryUtils.retryScala(tries.next, "test", maxRetry = 3, delay = 1).futureValue assert(1 === result) } test("don't retry failed Future with normal exception") { val fails = Iterator(Future.failed(new IOException("not retry"))) val tries = fails ++ incrementFutureIterator(1) val exception = RetryUtils.retryScala(tries.next, "test", maxRetry = 3, delay = 1).failed.futureValue assert("not retry" === exception.getMessage) } test("don't retry failed Future with non-transient EventHubException") { val tries = Iterator(nonTransientEHE()) ++ incrementFutureIterator(1) val exception = RetryUtils.retryScala(tries.next, "test", maxRetry = 3, delay = 1).failed.futureValue assert("nonTransient" === exception.getMessage) } test("retry maxRetry times until success") { val fails = Iterator(failedWithEHE(), causedByEHE(), failedWithEHE()) val tries = fails ++ incrementFutureIterator(4) val result = RetryUtils.retryScala(tries.next, "test", maxRetry = 3, delay = 1).futureValue assert(4 === result) } test("retry maxRetry times until failure") { val fails = Iterator(failedWithEHE(), causedByEHE(), failedWithEHE(), causedByEHE()) val tries = fails ++ incrementFutureIterator(4) val exception = RetryUtils.retryScala(tries.next, "test", maxRetry = 3, delay = 1).failed.futureValue assert("causedBy" === exception.getMessage) } test("retryNotNull") { val nullFuture: CompletableFuture[AnyRef] = CompletableFuture.completedFuture(null.asInstanceOf[AnyRef]) val normalFuture: CompletableFuture[Int] = CompletableFuture.completedFuture(10) val tries = Iterator.continually(nullFuture).take(9) ++ Iterator(normalFuture) val result = RetryUtils.retryNotNull(tries.next, "test").futureValue assert(10 === result) } } object RetryUtilsSuite { def failedWithEHE(): Future[Int] = Future.failed(new EventHubException(true, "failedWith")) def causedByEHE(): Future[Int] = { val causedBy = new EventHubException(true, "causedBy") Future.failed(new IOException(causedBy)) } def nonTransientEHE(): Future[Int] = Future.failed(new EventHubException(false, "nonTransient")) def incrementFutureIterator(value: Int = 0): Iterator[Future[Int]] = Iterator.from(value).map(Future(_)) }
Example 80
Source File: ChangeStream.scala From changestream with MIT License | 5 votes |
package changestream import java.io.IOException import java.util.concurrent.TimeoutException import com.github.shyiko.mysql.binlog.BinaryLogClient import com.typesafe.config.ConfigFactory import org.slf4j.LoggerFactory import scala.concurrent.Await import scala.concurrent.duration._ import scala.language.postfixOps import scala.concurrent.ExecutionContext.Implicits.global object ChangeStream extends App { protected val log = LoggerFactory.getLogger(getClass) protected val config = ConfigFactory.load().getConfig("changestream") protected val mysqlHost = config.getString("mysql.host") protected val mysqlPort = config.getInt("mysql.port") protected val overridePosition = System.getenv("OVERRIDE_POSITION") match { case position:String if (position != null && position.length > 0) => Some(position) //scalastyle:ignore case _ => None } protected val client = new BinaryLogClient( mysqlHost, mysqlPort, config.getString("mysql.user"), config.getString("mysql.password") ) client.setKeepAliveInterval(config.getLong("mysql.keepalive")) ChangeStreamEventListener.setConfig(config) ChangestreamEventDeserializerConfig.setConfig(config) ChangeStreamEventListener.startControlServer(config) client.registerEventListener(ChangeStreamEventListener) client.setEventDeserializer(ChangeStreamEventDeserializer) client.registerLifecycleListener(ChangeStreamLifecycleListener) getConnected(overridePosition) def serverName = s"${mysqlHost}:${mysqlPort}" def clientId = client.getServerId def isConnected = client.isConnected def getConnectedAndWait(startingPosition: Option[String]) = Await.result(getConnected(startingPosition), 60.seconds) def disconnectClient = client.disconnect() def getConnected(startingPosition: Option[String]) = { log.info("Starting changestream...") val getPositionFuture = startingPosition match { case Some(_) => log.info("Overriding starting binlog position with OVERRIDE_POSITION={}", overridePosition) ChangeStreamEventListener.setPosition(startingPosition) case _ => ChangeStreamEventListener.getStoredPosition } getPositionFuture.map { position => setBinlogClientPosition(position) getInternalClientConnected } } protected def setBinlogClientPosition(position: Option[String]) = position match { case Some(position) => log.info("Setting starting binlog position at {}.", position) val Array(fileName, posLong) = position.split(":") client.setBinlogFilename(fileName) client.setBinlogPosition(java.lang.Long.valueOf(posLong)) case None => log.info("Starting binlog position in real time") client.setBinlogFilename(null) //scalastyle:ignore client.setBinlogPosition(4L) } protected def getInternalClientConnected = { while(!client.isConnected) { try { client.connect(5000) } catch { case e: IOException => log.error("Failed to connect to MySQL to stream the binlog, retrying in 5 seconds...", e) Thread.sleep(5000) case e: TimeoutException => log.error("Timed out connecting to MySQL to stream the binlog, retrying in 5 seconds...", e) Thread.sleep(5000) case e: Exception => log.error("Failed to connect, exiting...", e) Await.result(ChangeStreamEventListener.shutdownAndExit(1), 60.seconds) } } } }
Example 81
Source File: IO.scala From perf_tester with Apache License 2.0 | 5 votes |
package org.perftester.process import java.nio.file.{Files, Path, SimpleFileVisitor, FileVisitResult} import java.nio.file.attribute.BasicFileAttributes import java.io.IOException object IO { def deleteDir(root: Path, deleteRoot: Boolean): Unit = { object deleter extends SimpleFileVisitor[Path] { override def visitFile(path: Path, attr: BasicFileAttributes): FileVisitResult = { Files.delete(path) FileVisitResult.CONTINUE } override def postVisitDirectory(path: Path, e: IOException): FileVisitResult = { if (e eq null) { if (deleteRoot || path != root) Files.delete(path) FileVisitResult.CONTINUE } else throw e // directory iteration failed } } Files.walkFileTree(root, deleter) } def jarsIn(path: Path): Seq[Path] = Files .walk(path) .toArray() .map(_.asInstanceOf[Path].toAbsolutePath) .toList .filter(_.getFileName.toString.endsWith(".jar")) def listSourcesIn(path: Path): List[Path] = { def isSource(p: Path) = { val name = p.getFileName.toString name.endsWith(".scala") || name.endsWith(".java") } val maxDepth = 557 Files .walk(path, maxDepth) .toArray .map(_.asInstanceOf[Path].toAbsolutePath) .filter(isSource) .toList } }
Example 82
Source File: Utils.scala From perf_tester with Apache License 2.0 | 5 votes |
package org.perftester import java.io.IOException import java.nio.file._ import java.nio.file.attribute.{BasicFileAttributes, FileTime} import java.time.Instant import java.util import ammonite.ops.{Path => aPath} object Utils { def lastChangedDate(path: aPath): (Instant, String) = lastChangedDate(path.toNIO) def lastChangedDate(path: Path): (Instant, String) = { var latest = Files.getLastModifiedTime(path) var at = path.toString object walker extends SimpleFileVisitor[Path] { override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = { val thisTime = attrs.lastModifiedTime() if (thisTime.compareTo(latest) > 0) { at = file.toString latest = thisTime } FileVisitResult.CONTINUE } override def preVisitDirectory(dir: Path, attrs: BasicFileAttributes): FileVisitResult = { if (dir.getFileName.toString == "intellij") FileVisitResult.SKIP_SUBTREE else FileVisitResult.CONTINUE } } Files.walkFileTree(path, util.EnumSet.noneOf(classOf[FileVisitOption]), Int.MaxValue, walker) (latest.toInstant, at) } def deleteDir(scalaPackDir: Path) = { if (Files.exists(scalaPackDir)) { println(s"delete pack dir $scalaPackDir") Files.walkFileTree(scalaPackDir, fileDeleter) } else { println(s"pack dir $scalaPackDir doesnt exist") } } private object fileDeleter extends SimpleFileVisitor[Path] { override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = { println(s"delete file $file") Files.delete(file) FileVisitResult.CONTINUE } override def postVisitDirectory(dir: Path, exc: IOException): FileVisitResult = { println(s"delete dir $dir") Files.delete(dir) FileVisitResult.CONTINUE } } def copy(source: Path, target: Path): Unit = { class Copier(source: Path, target: Path) extends SimpleFileVisitor[Path] { override def preVisitDirectory(dir: Path, attrs: BasicFileAttributes): FileVisitResult = { val targetDir = target.resolve(source.relativize(dir)) println(s"copy dir $dir -> $targetDir") Files.copy(dir, targetDir) FileVisitResult.CONTINUE } override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = { val targetFile = target.resolve(source.relativize(file)) println(s"copy file $file -> $targetFile") Files.copy(file, targetFile) FileVisitResult.CONTINUE } } Files.walkFileTree(source, new Copier(source, target)) } def touch(path: Path): Unit = { if (Files.exists(path)) Files.setLastModifiedTime(path, FileTime.from(Instant.now)) else Files.createFile(path) } }
Example 83
Source File: TikaParquetParser.scala From project-matt with MIT License | 5 votes |
package org.datafy.aws.app.matt.extras import java.io.{File, FileOutputStream, IOException, InputStream} import java.util import scala.collection.JavaConverters._ import org.xml.sax.{ContentHandler, SAXException} import org.apache.tika.metadata.Metadata import org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE import org.apache.tika.mime.MediaType import org.apache.tika.parser.{AbstractParser, ParseContext} import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.parquet.hadoop.ParquetFileReader import org.apache.parquet.hadoop.ParquetReader import org.apache.parquet.format.converter.ParquetMetadataConverter import org.apache.parquet.hadoop.util.HadoopInputFile import org.apache.parquet.tools.json.JsonRecordFormatter import org.apache.parquet.tools.read.{SimpleReadSupport, SimpleRecord} import org.apache.tika.exception.TikaException import org.apache.tika.sax.XHTMLContentHandler import scala.util.Random class TikaParquetParser extends AbstractParser { // make some stuff here final val PARQUET_RAW = MediaType.application("x-parquet") private val SUPPORTED_TYPES: Set[MediaType] = Set(PARQUET_RAW) def getSupportedTypes(context: ParseContext): util.Set[MediaType] = { SUPPORTED_TYPES.asJava } @throws(classOf[IOException]) @throws(classOf[SAXException]) @throws(classOf[TikaException]) def parse(stream: InputStream, handler: ContentHandler, metadata: Metadata, context: ParseContext): Unit = { // create temp file from stream val fileNamePrefix = Random.alphanumeric.take(5).mkString val tempFile = File.createTempFile(s"parquet-${fileNamePrefix}", ".parquet") IOUtils.copy(stream, new FileOutputStream(tempFile)) val conf = new Configuration() val path = new Path(tempFile.getAbsolutePath) val parquetMetadata = ParquetFileReader.readFooter(conf, path, ParquetMetadataConverter.NO_FILTER) var defaultReader: ParquetReader[SimpleRecord] = null val columns = parquetMetadata.getFileMetaData.getSchema.getFields metadata.set(CONTENT_TYPE, PARQUET_RAW.toString) metadata.set("Total Number of Columns", columns.size.toString) metadata.set("Parquet Column Names", columns.toString) val xhtml = new XHTMLContentHandler(handler, metadata) xhtml.startDocument() xhtml.startElement("p") // ::TODO:: ensure parquet reader reads all files not only file row try { defaultReader = ParquetReader.builder(new SimpleReadSupport(), new Path(tempFile.getAbsolutePath)).build() if(defaultReader.read() != null) { val values: SimpleRecord = defaultReader.read() val jsonFormatter = JsonRecordFormatter.fromSchema(parquetMetadata.getFileMetaData.getSchema) val textContent: String = jsonFormatter.formatRecord(values) xhtml.characters(textContent) xhtml.endElement("p") xhtml.endDocument() } } catch { case e: Throwable => e.printStackTrace() if (defaultReader != null) { try { defaultReader.close() } catch{ case _: Throwable => } } } finally { if (tempFile != null) tempFile.delete() } } }
Example 84
Source File: TikaHadoopOrcParser.scala From project-matt with MIT License | 5 votes |
package org.datafy.aws.app.matt.extras import java.io.{File, FileOutputStream, IOException, InputStream} import java.util import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import scala.collection.JavaConverters._ import org.apache.hadoop.fs.Path import org.apache.hadoop.hive.serde2.objectinspector.StructField import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector import org.apache.orc.OrcFile import org.apache.orc.OrcFile.ReaderOptions import org.apache.orc.Reader import org.apache.orc.RecordReader import org.apache.tika.exception.TikaException import org.apache.tika.metadata.Metadata import org.apache.tika.mime.MediaType import org.apache.tika.parser.{AbstractParser, ParseContext} import org.xml.sax.{ContentHandler, SAXException} import scala.util.Random class TikaHadoopOrcParser extends AbstractParser { final val ORC_RAW = MediaType.application("x-orc") private val SUPPORTED_TYPES: Set[MediaType] = Set(ORC_RAW) def getSupportedTypes(context: ParseContext): util.Set[MediaType] = { SUPPORTED_TYPES.asJava } @throws(classOf[IOException]) @throws(classOf[SAXException]) @throws(classOf[TikaException]) def parse(stream: InputStream, handler: ContentHandler, metadata: Metadata, context: ParseContext): Unit = { // create temp file from stream try { val fileNamePrefix = Random.alphanumeric.take(5).mkString val tempFile = File.createTempFile(s"orc-${fileNamePrefix}", ".orc") IOUtils.copy(stream, new FileOutputStream(tempFile)) val path = new Path(tempFile.getAbsolutePath) val conf = new Configuration() val orcReader = OrcFile.createReader(path, new ReaderOptions(conf)) val records: RecordReader = orcReader.rows() val storeRecord = null val firstBlockKey = null } catch { case e: Throwable => e.printStackTrace() } // val fields = } }
Example 85
Source File: Utilities.scala From project-matt with MIT License | 5 votes |
package org.datafy.aws.app.matt.extras import org.apache.tika.Tika import org.apache.tika.metadata.Metadata import java.io.{BufferedInputStream, IOException, InputStream, StringWriter} import java.util.zip.GZIPInputStream import org.xml.sax.SAXException import org.apache.tika.exception.TikaException import org.apache.tika.metadata.serialization.JsonMetadata import org.apache.tika.parser.{AutoDetectParser, ParseContext} import org.apache.tika.parser.pkg.CompressorParser import org.apache.tika.sax.BodyContentHandler object Utilities { private val MAX_STRING_LENGTH = 2147483647 private val tika = new Tika() tika.setMaxStringLength(MAX_STRING_LENGTH) @throws(classOf[IOException]) @throws(classOf[SAXException]) @throws(classOf[TikaException]) def getParsePlainStream(inputStream: InputStream): String = { val autoDetectParser = new AutoDetectParser() val bodyContentHandler = new BodyContentHandler(MAX_STRING_LENGTH) val fileMetadata = new Metadata() if (inputStream.read() == -1) { return "Could not scan inputStream less than 0 bytes" } autoDetectParser.parse(inputStream, bodyContentHandler, fileMetadata) bodyContentHandler.toString } @throws(classOf[IOException]) @throws(classOf[SAXException]) @throws(classOf[TikaException]) def getParseCompressedStream(inputStream: InputStream) = { var inputStream = myStream if(!inputStream.markSupported()) { inputStream = new BufferedInputStream(inputStream) } inputStream.mark(2) var magicBytes = 0 try { magicBytes = inputStream.read() & 0xff | ((inputStream.read() << 8) & 0xff00) inputStream.reset() } catch { case ioe: IOException => ioe.printStackTrace() } magicBytes == GZIPInputStream.GZIP_MAGIC } } case class And[A]( p1: A=>Boolean, p2: A=>Boolean ) extends (A=>Boolean) { def apply( a: A ) = p1(a) && p2(a) } case class Or[A]( p1: A=>Boolean, p2: A=>Boolean ) extends (A=>Boolean) { def apply( a: A ) = p1(a) || p2(a) }
Example 86
Source File: KeyVaultUtils.scala From azure-kusto-spark with Apache License 2.0 | 5 votes |
package com.microsoft.kusto.spark.utils import java.io.IOException import com.microsoft.azure.CloudException import com.microsoft.azure.keyvault.KeyVaultClient import com.microsoft.kusto.spark.authentication._ import com.microsoft.kusto.spark.datasource._ object KeyVaultUtils { val AppId = "kustoAppId" val AppKey = "kustoAppKey" val AppAuthority = "kustoAppAuthority" val SasUrl = "blobStorageSasUrl" val StorageAccountId = "blobStorageAccountName" val StorageAccountKey = "blobStorageAccountKey" val Container = "blobContainer" var cachedClient: KeyVaultClient = _ private def getClient(clientID: String, clientPassword: String): KeyVaultClient ={ if(cachedClient == null) { cachedClient = new KeyVaultADALAuthenticator(clientID, clientPassword).getAuthenticatedClient } cachedClient } @throws[CloudException] @throws[IOException] def getStorageParamsFromKeyVault(keyVaultAuthentication: KeyVaultAuthentication): KustoStorageParameters = { keyVaultAuthentication match { case app: KeyVaultAppAuthentication => val client = getClient(app.keyVaultAppID, app.keyVaultAppKey) getStorageParamsFromKeyVaultImpl(client, app.uri) case certificate: KeyVaultCertificateAuthentication => throw new UnsupportedOperationException("certificates are not yet supported") } } @throws[CloudException] @throws[IOException] def getAadAppParametersFromKeyVault(keyVaultAuthentication: KeyVaultAuthentication): AadApplicationAuthentication={ keyVaultAuthentication match { case app: KeyVaultAppAuthentication => val client = getClient(app.keyVaultAppID, app.keyVaultAppKey) getAadAppParamsFromKeyVaultImpl(client, app.uri) case certificate: KeyVaultCertificateAuthentication => throw new UnsupportedOperationException("certificates are not yet supported") } } private def getAadAppParamsFromKeyVaultImpl(client: KeyVaultClient, uri: String): AadApplicationAuthentication ={ val id = client.getSecret(uri, AppId) val key = client.getSecret(uri, AppKey) var authority = client.getSecret(uri, AppAuthority).value() if(authority.isEmpty){ authority = "microsoft.com" } AadApplicationAuthentication( ID = if (id == null) null else id.value(), password = if (key == null) null else key.value(), authority = authority) } private def getStorageParamsFromKeyVaultImpl(client: KeyVaultClient, uri: String): KustoStorageParameters = { val sasUrl = Option(client.getSecret(uri, SasUrl)) val accountId = Option(client.getSecret(uri, StorageAccountId)) val accountKey = Option(client.getSecret(uri, StorageAccountKey)) val container = Option(client.getSecret(uri, Container)) if(sasUrl.isEmpty) { KustoStorageParameters( account = if(accountId.isDefined) accountId.get.value else "", secret = if (accountKey.isDefined) accountKey.get.value else "", container = if (container.isDefined) container.get.value else "", secretIsAccountKey = true) } else { KustoDataSourceUtils.parseSas(sasUrl.get.value) } } }
Example 87
Source File: MonixEnrichments.scala From lsp4s with Apache License 2.0 | 5 votes |
package scala.meta.jsonrpc import java.io.IOException import java.io.OutputStream import java.nio.ByteBuffer import monix.execution.Ack import monix.execution.Cancelable import monix.execution.Scheduler import monix.reactive.Observable import monix.reactive.Observer import scribe.LoggerSupport object MonixEnrichments { class ObservableCurrentValue[+A](obs: Observable[A])(implicit s: Scheduler) extends (() => A) with Cancelable { private var value: Any = _ private val cancelable = obs.foreach(newValue => value = newValue) override def apply(): A = { if (value == null) { throw new NoSuchElementException( "Reading from empty Observable, consider using MulticastStrategy.behavior(initialValue)" ) } else { value.asInstanceOf[A] } } override def cancel(): Unit = cancelable.cancel() } implicit class XtensionObservable[A](val obs: Observable[A]) extends AnyVal { def focus[B: cats.Eq](f: A => B): Observable[B] = obs.distinctUntilChangedByKey(f).map(f) def toFunction0()(implicit s: Scheduler): () => A = toObservableCurrentValue() def toObservableCurrentValue()( implicit s: Scheduler ): ObservableCurrentValue[A] = new ObservableCurrentValue[A](obs) } implicit class XtensionObserverCompanion[A](val `_`: Observer.type) extends AnyVal { def fromOutputStream( out: OutputStream, logger: LoggerSupport ): Observer.Sync[ByteBuffer] = { new Observer.Sync[ByteBuffer] { private[this] var isClosed: Boolean = false override def onNext(elem: ByteBuffer): Ack = { if (isClosed) Ack.Stop else { try { while (elem.hasRemaining) out.write(elem.get()) out.flush() Ack.Continue } catch { case _: IOException => logger.error("OutputStream closed!") isClosed = true Ack.Stop } } } override def onError(ex: Throwable): Unit = () override def onComplete(): Unit = out.close() } } } }
Example 88
Source File: ParquetFiberDataLoader.scala From OAP with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.oap.io import java.io.IOException import java.time.ZoneId import org.apache.hadoop.conf.Configuration import org.apache.parquet.hadoop.ParquetFiberDataReader import org.apache.parquet.hadoop.api.InitContext import org.apache.parquet.hadoop.utils.Collections3 import org.apache.spark.sql.execution.datasources.oap.filecache.FiberCache import org.apache.spark.sql.execution.datasources.parquet.{ParquetReadSupportWrapper, VectorizedColumnReader} import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector import org.apache.spark.sql.oap.OapRuntime import org.apache.spark.sql.types._ private[oap] case class ParquetFiberDataLoader( configuration: Configuration, reader: ParquetFiberDataReader, blockId: Int) { @throws[IOException] def loadSingleColumn: FiberCache = { val footer = reader.getFooter val fileSchema = footer.getFileMetaData.getSchema val fileMetadata = footer.getFileMetaData.getKeyValueMetaData val readContext = new ParquetReadSupportWrapper() .init(new InitContext(configuration, Collections3.toSetMultiMap(fileMetadata), fileSchema)) val requestedSchema = readContext.getRequestedSchema val sparkRequestedSchemaString = configuration.get(ParquetReadSupportWrapper.SPARK_ROW_REQUESTED_SCHEMA) val sparkSchema = StructType.fromString(sparkRequestedSchemaString) assert(sparkSchema.length == 1, s"Only can get single column every time " + s"by loadSingleColumn, the columns = ${sparkSchema.mkString}") val dataType = sparkSchema.fields(0).dataType // Notes: rowIds is IntegerType in oap index. val rowCount = reader.getFooter.getBlocks.get(blockId).getRowCount.toInt val columnDescriptor = requestedSchema.getColumns.get(0) val originalType = requestedSchema.asGroupType.getFields.get(0).getOriginalType val blockMetaData = footer.getBlocks.get(blockId) val fiberData = reader.readFiberData(blockMetaData, columnDescriptor) val columnReader = new VectorizedColumnReader(columnDescriptor, originalType, fiberData.getPageReader(columnDescriptor), ZoneId.systemDefault, true) if (OapRuntime.getOrCreate.fiberCacheManager.dataCacheCompressEnable) { ParquetDataFiberCompressedWriter.dumpToCache( columnReader, rowCount, dataType) } else { val column = new OnHeapColumnVector(rowCount, dataType) columnReader.readBatch(rowCount, column) ParquetDataFiberWriter.dumpToCache( column.asInstanceOf[OnHeapColumnVector], rowCount) } } }
Example 89
Source File: AppMasterResolver.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.experiments.yarn.client import java.io.IOException import java.net.{HttpURLConnection, URL} import java.nio.charset.StandardCharsets import akka.actor.{ActorRef, ActorSystem} import org.apache.commons.io.IOUtils import org.apache.gearpump.experiments.yarn.glue.Records.{ApplicationId, ApplicationReport} import org.apache.gearpump.experiments.yarn.glue.YarnClient import org.apache.gearpump.util.{AkkaHelper, LogUtil} import org.apache.hadoop.hdfs.web.URLConnectionFactory import org.apache.hadoop.yarn.conf.YarnConfiguration import scala.util.Try class AppMasterResolver(yarnClient: YarnClient, system: ActorSystem) { val LOG = LogUtil.getLogger(getClass) val RETRY_INTERVAL_MS = 3000 // ms def resolve(appId: ApplicationId, timeoutSeconds: Int = 30): ActorRef = { val appMaster = retry(connect(appId), 1 + timeoutSeconds * 1000 / RETRY_INTERVAL_MS) appMaster } private def connect(appId: ApplicationId): ActorRef = { val report = yarnClient.getApplicationReport(appId) AppMasterResolver.resolveAppMasterAddress(report, system) } private def retry(fun: => ActorRef, times: Int): ActorRef = { var index = 0 var result: ActorRef = null while (index < times && result == null) { Thread.sleep(RETRY_INTERVAL_MS) index += 1 val tryConnect = Try(fun) if (tryConnect.isFailure) { LOG.error(s"Failed to connect YarnAppMaster(tried $index)... " + tryConnect.failed.get.getMessage) } else { result = tryConnect.get } } result } } object AppMasterResolver { val LOG = LogUtil.getLogger(getClass) def resolveAppMasterAddress(report: ApplicationReport, system: ActorSystem): ActorRef = { val appMasterPath = s"${report.getTrackingURL}/supervisor-actor-path" LOG.info(s"appMasterPath=$appMasterPath") val connectionFactory: URLConnectionFactory = URLConnectionFactory .newDefaultURLConnectionFactory(new YarnConfiguration()) val url: URL = new URL(appMasterPath) val connection: HttpURLConnection = connectionFactory.openConnection(url) .asInstanceOf[HttpURLConnection] connection.setInstanceFollowRedirects(true) try { connection.connect() } catch { case e: IOException => LOG.error(s"Failed to connect to AppMaster" + e.getMessage) } val status = connection.getResponseCode if (status == 200) { val stream: java.io.InputStream = connection.getInputStream val response = IOUtils.toString(stream, StandardCharsets.UTF_8) LOG.info("Successfully resolved AppMaster address: " + response) connection.disconnect() AkkaHelper.actorFor(system, response) } else { connection.disconnect() throw new IOException("Fail to resolve AppMaster address, please make sure " + s"${report.getTrackingURL} is accessible...") } } }
Example 90
Source File: FileUtils.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.util import java.io.{File, IOException} import java.nio.charset.Charset import com.google.common.io.Files object FileUtils { private val UTF8 = Charset.forName("UTF-8") def write(file: File, str: String): Unit = { Files.write(str, file, UTF8) } def read(file: File): String = { Files.asCharSource(file, UTF8).read() } def writeByteArrayToFile(file: File, bytes: Array[Byte]): Unit = { Files.write(bytes, file) } def readFileToByteArray(file: File): Array[Byte] = { Files.toByteArray(file) } def forceMkdir(directory: File): Unit = { if (directory.exists() && directory.isFile) { throw new IOException(s"Failed to create directory ${directory.toString}, it already exist") } Files.createParentDirs(directory) directory.mkdir() } }
Example 91
Source File: process.scala From scala-steward with Apache License 2.0 | 5 votes |
package org.scalasteward.core.io import cats.effect._ import cats.implicits._ import fs2.Stream import java.io.{File, IOException, InputStream} import org.scalasteward.core.util._ import scala.collection.mutable.ListBuffer import scala.concurrent.TimeoutException import scala.concurrent.duration.FiniteDuration object process { def slurp[F[_]]( cmd: Nel[String], cwd: Option[File], extraEnv: Map[String, String], timeout: FiniteDuration, log: String => F[Unit], blocker: Blocker )(implicit contextShift: ContextShift[F], timer: Timer[F], F: Concurrent[F]): F[List[String]] = createProcess(cmd, cwd, extraEnv).flatMap { process => F.delay(new ListBuffer[String]).flatMap { buffer => val readOut = { val out = readInputStream[F](process.getInputStream, blocker) out.evalMap(line => F.delay(appendBounded(buffer, line, 4096)) >> log(line)).compile.drain } val showCmd = (extraEnv.map { case (k, v) => s"$k=$v" }.toList ++ cmd.toList).mkString_(" ") val result = readOut >> F.delay(process.waitFor()) >>= { exitValue => if (exitValue === 0) F.pure(buffer.toList) else { val msg = s"'$showCmd' exited with code $exitValue" F.raiseError[List[String]](new IOException(makeMessage(msg, buffer.toList))) } } val fallback = F.delay(process.destroyForcibly()) >> { val msg = s"'$showCmd' timed out after ${timeout.toString}" F.raiseError[List[String]](new TimeoutException(makeMessage(msg, buffer.toList))) } Concurrent.timeoutTo(result, timeout, fallback) } } private def createProcess[F[_]]( cmd: Nel[String], cwd: Option[File], extraEnv: Map[String, String] )(implicit F: Sync[F]): F[Process] = F.delay { val pb = new ProcessBuilder(cmd.toList: _*) val env = pb.environment() cwd.foreach(pb.directory) extraEnv.foreach { case (key, value) => env.put(key, value) } pb.redirectErrorStream(true) pb.start() } private def readInputStream[F[_]](is: InputStream, blocker: Blocker)(implicit F: Sync[F], cs: ContextShift[F] ): Stream[F, String] = fs2.io .readInputStream(F.pure(is), chunkSize = 4096, blocker) .through(fs2.text.utf8Decode) .through(fs2.text.lines) private def makeMessage(prefix: String, output: List[String]): String = (prefix :: output).mkString("\n") }
Example 92
Source File: LivyConnectionSpec.scala From incubator-livy with Apache License 2.0 | 5 votes |
package org.apache.livy.client.http import java.io.IOException import java.net.URLEncoder import java.nio.charset.StandardCharsets.UTF_8 import org.apache.http.client.utils.URIBuilder import org.eclipse.jetty.security._ import org.eclipse.jetty.security.authentication.BasicAuthenticator import org.eclipse.jetty.util.security._ import org.scalatest.{BeforeAndAfterAll, FunSpecLike} import org.scalatest.Matchers._ import org.scalatra.servlet.ScalatraListener import org.apache.livy.{LivyBaseUnitTestSuite, LivyConf} import org.apache.livy.server.WebServer class LivyConnectionSpec extends FunSpecLike with BeforeAndAfterAll with LivyBaseUnitTestSuite { describe("LivyConnection") { def basicAuth(username: String, password: String, realm: String): SecurityHandler = { val roles = Array("user") val l = new HashLoginService() l.putUser(username, Credential.getCredential(password), roles) l.setName(realm) val constraint = new Constraint() constraint.setName(Constraint.__BASIC_AUTH) constraint.setRoles(roles) constraint.setAuthenticate(true) val cm = new ConstraintMapping() cm.setConstraint(constraint) cm.setPathSpec("/*") val csh = new ConstraintSecurityHandler() csh.setAuthenticator(new BasicAuthenticator()) csh.setRealmName(realm) csh.addConstraintMapping(cm) csh.setLoginService(l) csh } def test(password: String, livyConf: LivyConf = new LivyConf()): Unit = { val username = "user name" val server = new WebServer(livyConf, "0.0.0.0", 0) server.context.setSecurityHandler(basicAuth(username, password, "realm")) server.context.setResourceBase("src/main/org/apache/livy/server") server.context.setInitParameter(ScalatraListener.LifeCycleKey, classOf[HttpClientTestBootstrap].getCanonicalName) server.context.addEventListener(new ScalatraListener) server.start() val utf8Name = UTF_8.name() val uri = new URIBuilder() .setScheme(server.protocol) .setHost(server.host) .setPort(server.port) .setUserInfo(URLEncoder.encode(username, utf8Name), URLEncoder.encode(password, utf8Name)) .build() info(uri.toString) val conn = new LivyConnection(uri, new HttpConf(null)) try { conn.get(classOf[Object], "/") should not be (null) } finally { conn.close() } server.stop() server.join() } it("should support HTTP auth with password") { test("pass:word") } it("should support HTTP auth with empty password") { test("") } it("should be failed with large header size") { val livyConf = new LivyConf() .set(LivyConf.REQUEST_HEADER_SIZE, 1024) .set(LivyConf.RESPONSE_HEADER_SIZE, 1024) val pwd = "test-password" * 100 val exception = intercept[IOException](test(pwd, livyConf)) exception.getMessage.contains("Request Header Fields Too Large") should be(true) } it("should be succeeded with configured header size") { val livyConf = new LivyConf() .set(LivyConf.REQUEST_HEADER_SIZE, 2048) .set(LivyConf.RESPONSE_HEADER_SIZE, 2048) val pwd = "test-password" * 100 test(pwd, livyConf) } } }
Example 93
Source File: TestOutputStream.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming import java.io.{IOException, ObjectInputStream} import java.util.concurrent.ConcurrentLinkedQueue import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.dstream.{DStream, ForEachDStream} import org.apache.spark.util.Utils class TestOutputStream[T: ClassTag](parent: DStream[T], val output: ConcurrentLinkedQueue[Seq[T]] = new ConcurrentLinkedQueue[Seq[T]]()) extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => { val collected = rdd.collect() output.add(collected) }, false) { // This is to clear the output buffer every it is read from a checkpoint @throws(classOf[IOException]) private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException { ois.defaultReadObject() output.clear() } }
Example 94
Source File: SparkSQLCLIService.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.io.IOException import java.util.{List => JList} import javax.security.auth.login.LoginException import scala.collection.JavaConverters._ import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.shims.Utils import org.apache.hadoop.security.UserGroupInformation import org.apache.hive.service.{AbstractService, Service, ServiceException} import org.apache.hive.service.Service.STATE import org.apache.hive.service.auth.HiveAuthFactory import org.apache.hive.service.cli._ import org.apache.hive.service.server.HiveServer2 import org.apache.spark.sql.SQLContext import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ private[hive] class SparkSQLCLIService(hiveServer: HiveServer2, sqlContext: SQLContext) extends CLIService(hiveServer) with ReflectedCompositeService { override def init(hiveConf: HiveConf) { setSuperField(this, "hiveConf", hiveConf) val sparkSqlSessionManager = new SparkSQLSessionManager(hiveServer, sqlContext) setSuperField(this, "sessionManager", sparkSqlSessionManager) addService(sparkSqlSessionManager) var sparkServiceUGI: UserGroupInformation = null if (UserGroupInformation.isSecurityEnabled) { try { HiveAuthFactory.loginFromKeytab(hiveConf) sparkServiceUGI = Utils.getUGI() setSuperField(this, "serviceUGI", sparkServiceUGI) } catch { case e @ (_: IOException | _: LoginException) => throw new ServiceException("Unable to login to kerberos with given principal/keytab", e) } } initCompositeService(hiveConf) } override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = { getInfoType match { case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_VER => new GetInfoValue(sqlContext.sparkContext.version) case _ => super.getInfo(sessionHandle, getInfoType) } } } private[thriftserver] trait ReflectedCompositeService { this: AbstractService => def initCompositeService(hiveConf: HiveConf) { // Emulating `CompositeService.init(hiveConf)` val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList") serviceList.asScala.foreach(_.init(hiveConf)) // Emulating `AbstractService.init(hiveConf)` invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.NOTINITED) setAncestorField(this, 3, "hiveConf", hiveConf) invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.INITED) getAncestorField[Log](this, 3, "LOG").info(s"Service: $getName is inited.") } }
Example 95
Source File: ReusableStringReaderSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions.xml import java.io.IOException import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.xml.UDFXPathUtil.ReusableStringReader class ReusableStringReaderSuite extends SparkFunSuite { private val fox = "Quick brown fox jumps over the lazy dog." test("empty reader") { val reader = new ReusableStringReader intercept[IOException] { reader.read() } intercept[IOException] { reader.ready() } reader.close() } test("mark reset") { val reader = new ReusableStringReader if (reader.markSupported()) { reader.asInstanceOf[ReusableStringReader].set(fox) assert(reader.ready()) val cc = new Array[Char](6) var read = reader.read(cc) assert(read == 6) assert("Quick " == new String(cc)) reader.mark(100) read = reader.read(cc) assert(read == 6) assert("brown " == new String(cc)) reader.reset() read = reader.read(cc) assert(read == 6) assert("brown " == new String(cc)) } reader.close() } test("skip") { val reader = new ReusableStringReader reader.asInstanceOf[ReusableStringReader].set(fox) // skip entire the data: var skipped = reader.skip(fox.length() + 1) assert(fox.length() == skipped) assert(-1 == reader.read()) reader.asInstanceOf[ReusableStringReader].set(fox) // reset the data val cc = new Array[Char](6) var read = reader.read(cc) assert(read == 6) assert("Quick " == new String(cc)) // skip some piece of data: skipped = reader.skip(30) assert(skipped == 30) read = reader.read(cc) assert(read == 4) assert("dog." == new String(cc, 0, read)) // skip when already at EOF: skipped = reader.skip(300) assert(skipped == 0, skipped) assert(reader.read() == -1) reader.close() } }
Example 96
Source File: ProcessTestUtils.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.test import java.io.{InputStream, IOException} import scala.sys.process.BasicIO object ProcessTestUtils { class ProcessOutputCapturer(stream: InputStream, capture: String => Unit) extends Thread { this.setDaemon(true) override def run(): Unit = { try { BasicIO.processFully(capture)(stream) } catch { case _: IOException => // Ignores the IOException thrown when the process termination, which closes the input // stream abruptly. } } } }
Example 97
Source File: RawTextSender.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.util import java.io.{ByteArrayOutputStream, IOException} import java.net.ServerSocket import java.nio.ByteBuffer import scala.io.Source import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.serializer.KryoSerializer import org.apache.spark.util.IntParam private[streaming] object RawTextSender extends Logging { def main(args: Array[String]) { if (args.length != 4) { // scalastyle:off println System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>") // scalastyle:on println System.exit(1) } // Parse the arguments using a pattern match val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args // Repeat the input data multiple times to fill in a buffer val lines = Source.fromFile(file).getLines().toArray val bufferStream = new ByteArrayOutputStream(blockSize + 1000) val ser = new KryoSerializer(new SparkConf()).newInstance() val serStream = ser.serializeStream(bufferStream) var i = 0 while (bufferStream.size < blockSize) { serStream.writeObject(lines(i)) i = (i + 1) % lines.length } val array = bufferStream.toByteArray val countBuf = ByteBuffer.wrap(new Array[Byte](4)) countBuf.putInt(array.length) countBuf.flip() val serverSocket = new ServerSocket(port) logInfo("Listening on port " + port) while (true) { val socket = serverSocket.accept() logInfo("Got a new connection") val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec) try { while (true) { out.write(countBuf.array) out.write(array) } } catch { case e: IOException => logError("Client disconnected") } finally { socket.close() } } } }
Example 98
Source File: HdfsUtils.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.util import java.io.{FileNotFoundException, IOException} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs._ private[streaming] object HdfsUtils { def getOutputStream(path: String, conf: Configuration): FSDataOutputStream = { val dfsPath = new Path(path) val dfs = getFileSystemForPath(dfsPath, conf) // If the file exists and we have append support, append instead of creating a new file val stream: FSDataOutputStream = { if (dfs.isFile(dfsPath)) { if (conf.getBoolean("hdfs.append.support", false) || dfs.isInstanceOf[RawLocalFileSystem]) { dfs.append(dfsPath) } else { throw new IllegalStateException("File exists and there is no append support!") } } else { dfs.create(dfsPath) } } stream } def getInputStream(path: String, conf: Configuration): FSDataInputStream = { val dfsPath = new Path(path) val dfs = getFileSystemForPath(dfsPath, conf) try { dfs.open(dfsPath) } catch { case _: FileNotFoundException => null case e: IOException => // If we are really unlucky, the file may be deleted as we're opening the stream. // This can happen as clean up is performed by daemon threads that may be left over from // previous runs. if (!dfs.isFile(dfsPath)) null else throw e } } def checkState(state: Boolean, errorMsg: => String) { if (!state) { throw new IllegalStateException(errorMsg) } } def checkFileExists(path: String, conf: Configuration): Boolean = { val hdpPath = new Path(path) val fs = getFileSystemForPath(hdpPath, conf) fs.isFile(hdpPath) } }
Example 99
Source File: FileBasedWriteAheadLogReader.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.util import java.io.{Closeable, EOFException, IOException} import java.nio.ByteBuffer import org.apache.hadoop.conf.Configuration import org.apache.spark.internal.Logging private[streaming] class FileBasedWriteAheadLogReader(path: String, conf: Configuration) extends Iterator[ByteBuffer] with Closeable with Logging { private val instream = HdfsUtils.getInputStream(path, conf) private var closed = (instream == null) // the file may be deleted as we're opening the stream private var nextItem: Option[ByteBuffer] = None override def hasNext: Boolean = synchronized { if (closed) { return false } if (nextItem.isDefined) { // handle the case where hasNext is called without calling next true } else { try { val length = instream.readInt() val buffer = new Array[Byte](length) instream.readFully(buffer) nextItem = Some(ByteBuffer.wrap(buffer)) logTrace("Read next item " + nextItem.get) true } catch { case e: EOFException => logDebug("Error reading next item, EOF reached", e) close() false case e: IOException => logWarning("Error while trying to read data. If the file was deleted, " + "this should be okay.", e) close() if (HdfsUtils.checkFileExists(path, conf)) { // If file exists, this could be a legitimate error throw e } else { // File was deleted. This can occur when the daemon cleanup thread takes time to // delete the file during recovery. false } case e: Exception => logWarning("Error while trying to read data from HDFS.", e) close() throw e } } } override def next(): ByteBuffer = synchronized { val data = nextItem.getOrElse { close() throw new IllegalStateException( "next called without calling hasNext or after hasNext returned false") } nextItem = None // Ensure the next hasNext call loads new data. data } override def close(): Unit = synchronized { if (!closed) { instream.close() } closed = true } }
Example 100
Source File: CommandUtils.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.worker import java.io.{File, FileOutputStream, InputStream, IOException} import scala.collection.JavaConverters._ import scala.collection.Map import org.apache.spark.SecurityManager import org.apache.spark.deploy.Command import org.apache.spark.internal.Logging import org.apache.spark.launcher.WorkerCommandBuilder import org.apache.spark.util.Utils def redirectStream(in: InputStream, file: File) { val out = new FileOutputStream(file, true) // TODO: It would be nice to add a shutdown hook here that explains why the output is // terminating. Otherwise if the worker dies the executor logs will silently stop. new Thread("redirect output to " + file) { override def run() { try { Utils.copyStream(in, out, true) } catch { case e: IOException => logInfo("Redirection to " + file + " closed: " + e.getMessage) } } }.start() } }
Example 101
Source File: SparkHadoopMapRedUtil.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.mapred import java.io.IOException import org.apache.hadoop.mapreduce.{TaskAttemptContext => MapReduceTaskAttemptContext} import org.apache.hadoop.mapreduce.{OutputCommitter => MapReduceOutputCommitter} import org.apache.spark.{SparkEnv, TaskContext} import org.apache.spark.executor.CommitDeniedException import org.apache.spark.internal.Logging object SparkHadoopMapRedUtil extends Logging { def commitTask( committer: MapReduceOutputCommitter, mrTaskContext: MapReduceTaskAttemptContext, jobId: Int, splitId: Int): Unit = { val mrTaskAttemptID = mrTaskContext.getTaskAttemptID // Called after we have decided to commit def performCommit(): Unit = { try { committer.commitTask(mrTaskContext) logInfo(s"$mrTaskAttemptID: Committed") } catch { case cause: IOException => logError(s"Error committing the output of task: $mrTaskAttemptID", cause) committer.abortTask(mrTaskContext) throw cause } } // First, check whether the task's output has already been committed by some other attempt if (committer.needsTaskCommit(mrTaskContext)) { val shouldCoordinateWithDriver: Boolean = { val sparkConf = SparkEnv.get.conf // We only need to coordinate with the driver if there are concurrent task attempts. // Note that this could happen even when speculation is not enabled (e.g. see SPARK-8029). // This (undocumented) setting is an escape-hatch in case the commit code introduces bugs. sparkConf.getBoolean("spark.hadoop.outputCommitCoordination.enabled", defaultValue = true) } if (shouldCoordinateWithDriver) { val outputCommitCoordinator = SparkEnv.get.outputCommitCoordinator val taskAttemptNumber = TaskContext.get().attemptNumber() val canCommit = outputCommitCoordinator.canCommit(jobId, splitId, taskAttemptNumber) if (canCommit) { performCommit() } else { val message = s"$mrTaskAttemptID: Not committed because the driver did not authorize commit" logInfo(message) // We need to abort the task so that the driver can reschedule new attempts, if necessary committer.abortTask(mrTaskContext) throw new CommitDeniedException(message, jobId, splitId, taskAttemptNumber) } } else { // Speculation is disabled or a user has chosen to manually bypass the commit coordination performCommit() } } else { // Some other attempt committed the output, so we do nothing and signal success logInfo(s"No need to commit output of task because needsTaskCommit=false: $mrTaskAttemptID") } } }
Example 102
Source File: SerializableBuffer.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream} import java.nio.ByteBuffer import java.nio.channels.Channels private[spark] class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable { def value: ByteBuffer = buffer private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { val length = in.readInt() buffer = ByteBuffer.allocate(length) var amountRead = 0 val channel = Channels.newChannel(in) while (amountRead < length) { val ret = channel.read(buffer) if (ret == -1) { throw new EOFException("End of file before fully reading buffer") } amountRead += ret } buffer.rewind() // Allow us to read it later } private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.writeInt(buffer.limit()) if (Channels.newChannel(out).write(buffer) != buffer.limit()) { throw new IOException("Could not fully write buffer to output stream") } buffer.rewind() // Allow us to write it again later } }
Example 103
Source File: DiskStore.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.storage import java.io.{FileOutputStream, IOException, RandomAccessFile} import java.nio.ByteBuffer import java.nio.channels.FileChannel.MapMode import com.google.common.io.Closeables import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.util.Utils import org.apache.spark.util.io.ChunkedByteBuffer def put(blockId: BlockId)(writeFunc: FileOutputStream => Unit): Unit = { if (contains(blockId)) { throw new IllegalStateException(s"Block $blockId is already present in the disk store") } logDebug(s"Attempting to put block $blockId") val startTime = System.currentTimeMillis val file = diskManager.getFile(blockId) val fileOutputStream = new FileOutputStream(file) var threwException: Boolean = true try { writeFunc(fileOutputStream) threwException = false } finally { try { Closeables.close(fileOutputStream, threwException) } finally { if (threwException) { remove(blockId) } } } val finishTime = System.currentTimeMillis logDebug("Block %s stored as %s file on disk in %d ms".format( file.getName, Utils.bytesToString(file.length()), finishTime - startTime)) } def putBytes(blockId: BlockId, bytes: ChunkedByteBuffer): Unit = { put(blockId) { fileOutputStream => val channel = fileOutputStream.getChannel Utils.tryWithSafeFinally { bytes.writeFully(channel) } { channel.close() } } } def getBytes(blockId: BlockId): ChunkedByteBuffer = { val file = diskManager.getFile(blockId.name) val channel = new RandomAccessFile(file, "r").getChannel Utils.tryWithSafeFinally { // For small files, directly read rather than memory map if (file.length < minMemoryMapBytes) { val buf = ByteBuffer.allocate(file.length.toInt) channel.position(0) while (buf.remaining() != 0) { if (channel.read(buf) == -1) { throw new IOException("Reached EOF before filling buffer\n" + s"offset=0\nfile=${file.getAbsolutePath}\nbuf.remaining=${buf.remaining}") } } buf.flip() new ChunkedByteBuffer(buf) } else { new ChunkedByteBuffer(channel.map(MapMode.READ_ONLY, 0, file.length)) } } { channel.close() } } def remove(blockId: BlockId): Boolean = { val file = diskManager.getFile(blockId.name) if (file.exists()) { val ret = file.delete() if (!ret) { logWarning(s"Error deleting ${file.getPath()}") } ret } else { false } } def contains(blockId: BlockId): Boolean = { val file = diskManager.getFile(blockId.name) file.exists() } }
Example 104
Source File: CartesianRDD.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark._ import org.apache.spark.util.Utils private[spark] class CartesianPartition( idx: Int, @transient private val rdd1: RDD[_], @transient private val rdd2: RDD[_], s1Index: Int, s2Index: Int ) extends Partition { var s1 = rdd1.partitions(s1Index) var s2 = rdd2.partitions(s2Index) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization s1 = rdd1.partitions(s1Index) s2 = rdd2.partitions(s2Index) oos.defaultWriteObject() } } private[spark] class CartesianRDD[T: ClassTag, U: ClassTag]( sc: SparkContext, var rdd1 : RDD[T], var rdd2 : RDD[U]) extends RDD[(T, U)](sc, Nil) with Serializable { val numPartitionsInRdd2 = rdd2.partitions.length override def getPartitions: Array[Partition] = { // create the cross product split val array = new Array[Partition](rdd1.partitions.length * rdd2.partitions.length) for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) { val idx = s1.index * numPartitionsInRdd2 + s2.index array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index) } array } override def getPreferredLocations(split: Partition): Seq[String] = { val currSplit = split.asInstanceOf[CartesianPartition] (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct } override def compute(split: Partition, context: TaskContext): Iterator[(T, U)] = { val currSplit = split.asInstanceOf[CartesianPartition] for (x <- rdd1.iterator(currSplit.s1, context); y <- rdd2.iterator(currSplit.s2, context)) yield (x, y) } override def getDependencies: Seq[Dependency[_]] = List( new NarrowDependency(rdd1) { def getParents(id: Int): Seq[Int] = List(id / numPartitionsInRdd2) }, new NarrowDependency(rdd2) { def getParents(id: Int): Seq[Int] = List(id % numPartitionsInRdd2) } ) override def clearDependencies() { super.clearDependencies() rdd1 = null rdd2 = null } }
Example 105
Source File: UnionRDD.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer import scala.collection.parallel.{ForkJoinTaskSupport, ThreadPoolTaskSupport} import scala.concurrent.forkjoin.ForkJoinPool import scala.reflect.ClassTag import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils private[spark] class UnionPartition[T: ClassTag]( idx: Int, @transient private val rdd: RDD[T], val parentRddIndex: Int, @transient private val parentRddPartitionIndex: Int) extends Partition { var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex) def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization parentPartition = rdd.partitions(parentRddPartitionIndex) oos.defaultWriteObject() } } object UnionRDD { private[spark] lazy val partitionEvalTaskSupport = new ForkJoinTaskSupport(new ForkJoinPool(8)) } @DeveloperApi class UnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]]) extends RDD[T](sc, Nil) { // Nil since we implement getDependencies // visible for testing private[spark] val isPartitionListingParallel: Boolean = rdds.length > conf.getInt("spark.rdd.parallelListingThreshold", 10) override def getPartitions: Array[Partition] = { val parRDDs = if (isPartitionListingParallel) { val parArray = rdds.par parArray.tasksupport = UnionRDD.partitionEvalTaskSupport parArray } else { rdds } val array = new Array[Partition](parRDDs.map(_.partitions.length).seq.sum) var pos = 0 for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) { array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index) pos += 1 } array } override def getDependencies: Seq[Dependency[_]] = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 for (rdd <- rdds) { deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length) pos += rdd.partitions.length } deps } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val part = s.asInstanceOf[UnionPartition[T]] parent[T](part.parentRddIndex).iterator(part.parentPartition, context) } override def getPreferredLocations(s: Partition): Seq[String] = s.asInstanceOf[UnionPartition[T]].preferredLocations() override def clearDependencies() { super.clearDependencies() rdds = null } }
Example 106
Source File: PartitionerAwareUnionRDD.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext} import org.apache.spark.util.Utils private[spark] class PartitionerAwareUnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]] ) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) { require(rdds.nonEmpty) require(rdds.forall(_.partitioner.isDefined)) require(rdds.flatMap(_.partitioner).toSet.size == 1, "Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner)) override val partitioner = rdds.head.partitioner override def getPartitions: Array[Partition] = { val numPartitions = partitioner.get.numPartitions (0 until numPartitions).map { index => new PartitionerAwareUnionRDDPartition(rdds, index) }.toArray } // Get the location where most of the partitions of parent RDDs are located override def getPreferredLocations(s: Partition): Seq[String] = { logDebug("Finding preferred location for " + this + ", partition " + s.index) val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents val locations = rdds.zip(parentPartitions).flatMap { case (rdd, part) => val parentLocations = currPrefLocs(rdd, part) logDebug("Location of " + rdd + " partition " + part.index + " = " + parentLocations) parentLocations } val location = if (locations.isEmpty) { None } else { // Find the location that maximum number of parent partitions prefer Some(locations.groupBy(x => x).maxBy(_._2.length)._1) } logDebug("Selected location for " + this + ", partition " + s.index + " = " + location) location.toSeq } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents rdds.zip(parentPartitions).iterator.flatMap { case (rdd, p) => rdd.iterator(p, context) } } override def clearDependencies() { super.clearDependencies() rdds = null } // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones) private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = { rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host) } }
Example 107
Source File: TestDiskFull.scala From eidos with Apache License 2.0 | 5 votes |
package org.clulab.wm.eidos.utils import java.io.BufferedOutputStream import java.io.FileOutputStream import java.io.IOException import java.io.OutputStreamWriter import java.io.PrintWriter import java.io.SyncFailedException import java.nio.charset.StandardCharsets import org.clulab.wm.eidos.test.TestUtils._ import org.clulab.wm.eidos.utils.Closer.AutoCloser class TestDiskFull extends Test { def test1 = { val file = "/E:/full.dat" var i = 0 try { val text1 = "The quick brown fox jumped over the lazy dog." val text = text1 + text1 for (limit <- 1 until 400) { val fos = new FileOutputStream(file) val osw = new OutputStreamWriter(new BufferedOutputStream(fos), StandardCharsets.UTF_8.toString) i = 0 new PrintWriter(osw).autoClose { pw => while (i < limit) { pw.print(text) i += 1 // pw.flush() // osw.flush() // fos.flush() fos.getFD.sync() } } } } catch { case exception: SyncFailedException => println(s"Synchronization failed for file $file at $i") println("Exiting with code -2 on assumption that the disk is full") System.exit(-2) case exception: IOException => println(s"IO failed for file $file at $i") println("Exiting with code -2 on assumption that the disk is full") System.exit(-2) case exception: Exception => println(s"Exception for file $file at $i") exception.printStackTrace() case throwable: Throwable => println(s"Throwable for file $file at $i") throwable.printStackTrace() } } // test1 }
Example 108
Source File: ReplayListenerBus.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.{InputStream, IOException} import scala.io.Source import org.json4s.jackson.JsonMethods._ import org.apache.spark.Logging import org.apache.spark.util.JsonProtocol def replay(logData: InputStream, sourceName: String): Unit = { var currentLine: String = null var lineNumber: Int = 1 try { val lines = Source.fromInputStream(logData).getLines() lines.foreach { line => currentLine = line postToAll(JsonProtocol.sparkEventFromJson(parse(line))) lineNumber += 1 } } catch { case ioe: IOException => throw ioe case e: Exception => logError(s"Exception parsing Spark event log: $sourceName", e) logError(s"Malformed line #$lineNumber: $currentLine\n") } } }
Example 109
Source File: SerializableBuffer.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream} import java.nio.ByteBuffer import java.nio.channels.Channels private[spark] class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable { def value = buffer private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { val length = in.readInt() buffer = ByteBuffer.allocate(length) var amountRead = 0 val channel = Channels.newChannel(in) while (amountRead < length) { val ret = channel.read(buffer) if (ret == -1) { throw new EOFException("End of file before fully reading buffer") } amountRead += ret } buffer.rewind() // Allow us to read it later } private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.writeInt(buffer.limit()) if (Channels.newChannel(out).write(buffer) != buffer.limit()) { throw new IOException("Could not fully write buffer to output stream") } buffer.rewind() // Allow us to write it again later } }
Example 110
Source File: BlockManagerId.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.storage import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput} import java.util.concurrent.ConcurrentHashMap import org.apache.spark.SparkContext import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils def apply(execId: String, host: String, port: Int) = getCachedBlockManagerId(new BlockManagerId(execId, host, port)) def apply(in: ObjectInput) = { val obj = new BlockManagerId() obj.readExternal(in) getCachedBlockManagerId(obj) } val blockManagerIdCache = new ConcurrentHashMap[BlockManagerId, BlockManagerId]() def getCachedBlockManagerId(id: BlockManagerId): BlockManagerId = { blockManagerIdCache.putIfAbsent(id, id) blockManagerIdCache.get(id) } }
Example 111
Source File: CartesianRDD.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark._ import org.apache.spark.util.Utils private[spark] class CartesianPartition( idx: Int, @transient rdd1: RDD[_], @transient rdd2: RDD[_], s1Index: Int, s2Index: Int ) extends Partition { var s1 = rdd1.partitions(s1Index) var s2 = rdd2.partitions(s2Index) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization s1 = rdd1.partitions(s1Index) s2 = rdd2.partitions(s2Index) oos.defaultWriteObject() } } private[spark] class CartesianRDD[T: ClassTag, U: ClassTag]( sc: SparkContext, var rdd1 : RDD[T], var rdd2 : RDD[U]) extends RDD[Pair[T, U]](sc, Nil) with Serializable { val numPartitionsInRdd2 = rdd2.partitions.size override def getPartitions: Array[Partition] = { // create the cross product split val array = new Array[Partition](rdd1.partitions.size * rdd2.partitions.size) for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) { val idx = s1.index * numPartitionsInRdd2 + s2.index array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index) } array } override def getPreferredLocations(split: Partition): Seq[String] = { val currSplit = split.asInstanceOf[CartesianPartition] (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct } override def compute(split: Partition, context: TaskContext) = { val currSplit = split.asInstanceOf[CartesianPartition] for (x <- rdd1.iterator(currSplit.s1, context); y <- rdd2.iterator(currSplit.s2, context)) yield (x, y) } override def getDependencies: Seq[Dependency[_]] = List( new NarrowDependency(rdd1) { def getParents(id: Int): Seq[Int] = List(id / numPartitionsInRdd2) }, new NarrowDependency(rdd2) { def getParents(id: Int): Seq[Int] = List(id % numPartitionsInRdd2) } ) override def clearDependencies() { super.clearDependencies() rdd1 = null rdd2 = null } }
Example 112
Source File: UnionRDD.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils private[spark] class UnionPartition[T: ClassTag]( idx: Int, @transient rdd: RDD[T], val parentRddIndex: Int, @transient parentRddPartitionIndex: Int) extends Partition { var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex) def preferredLocations() = rdd.preferredLocations(parentPartition) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization parentPartition = rdd.partitions(parentRddPartitionIndex) oos.defaultWriteObject() } } @DeveloperApi class UnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]]) extends RDD[T](sc, Nil) { // Nil since we implement getDependencies override def getPartitions: Array[Partition] = { val array = new Array[Partition](rdds.map(_.partitions.size).sum) var pos = 0 for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) { array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index) pos += 1 } array } override def getDependencies: Seq[Dependency[_]] = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 for (rdd <- rdds) { deps += new RangeDependency(rdd, 0, pos, rdd.partitions.size) pos += rdd.partitions.size } deps } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val part = s.asInstanceOf[UnionPartition[T]] parent[T](part.parentRddIndex).iterator(part.parentPartition, context) } override def getPreferredLocations(s: Partition): Seq[String] = s.asInstanceOf[UnionPartition[T]].preferredLocations() override def clearDependencies() { super.clearDependencies() rdds = null } }
Example 113
Source File: PartitionerAwareUnionRDD.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext} import org.apache.spark.util.Utils private[spark] class PartitionerAwareUnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]] ) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) { require(rdds.length > 0) require(rdds.flatMap(_.partitioner).toSet.size == 1, "Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner)) override val partitioner = rdds.head.partitioner override def getPartitions: Array[Partition] = { val numPartitions = partitioner.get.numPartitions (0 until numPartitions).map(index => { new PartitionerAwareUnionRDDPartition(rdds, index) }).toArray } // Get the location where most of the partitions of parent RDDs are located override def getPreferredLocations(s: Partition): Seq[String] = { logDebug("Finding preferred location for " + this + ", partition " + s.index) val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents val locations = rdds.zip(parentPartitions).flatMap { case (rdd, part) => { val parentLocations = currPrefLocs(rdd, part) logDebug("Location of " + rdd + " partition " + part.index + " = " + parentLocations) parentLocations } } val location = if (locations.isEmpty) { None } else { // Find the location that maximum number of parent partitions prefer Some(locations.groupBy(x => x).maxBy(_._2.length)._1) } logDebug("Selected location for " + this + ", partition " + s.index + " = " + location) location.toSeq } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents rdds.zip(parentPartitions).iterator.flatMap { case (rdd, p) => rdd.iterator(p, context) } } override def clearDependencies() { super.clearDependencies() rdds = null } // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones) private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = { rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host) } }
Example 114
Source File: NotSerializableFakeTask.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.{ObjectInputStream, ObjectOutputStream, IOException} import org.apache.spark.TaskContext private[spark] class NotSerializableFakeTask(myId: Int, stageId: Int) extends Task[Array[Byte]](stageId, 0) { override def runTask(context: TaskContext): Array[Byte] = Array.empty[Byte] override def preferredLocations: Seq[TaskLocation] = Seq[TaskLocation]() @throws(classOf[IOException]) private def writeObject(out: ObjectOutputStream): Unit = { if (stageId == 0) { throw new IllegalStateException("Cannot serialize") } } @throws(classOf[IOException]) private def readObject(in: ObjectInputStream): Unit = {} }
Example 115
Source File: ScalismoViewer.scala From scalismo-ui with GNU General Public License v3.0 | 5 votes |
package scalismo.ui.app import java.io.{File, IOException} import scalismo.geometry._3D import scalismo.io.{ImageIO, LandmarkIO, MeshIO, StatismoIO} import scalismo.ui.api.ScalismoUI import scalismo.ui.util.FileUtil import scala.util.{Failure, Success} object ScalismoViewer { def showErrorMessage(file: File, exception: Throwable): Unit = { val message = s"Unable to load file ${file.getName}" System.err.println(message) System.err.println(exception.getMessage) } def main(args: Array[String]): Unit = { scalismo.initialize() val ui = ScalismoUI("Scalismo Viewer") val defaultGroup = ui.createGroup("Default Group") for (filename <- args) { val file = new File(filename) if (!file.isFile) { showErrorMessage(file, new IOException(s"name does not identify a valid file.")) } else { val basename = FileUtil.basename(file) val extension = FileUtil.extension(file).toLowerCase extension match { case "h5" => StatismoIO.readStatismoMeshModel(new File(filename)) match { case Success(model) => // we create for every model a new group val modelGroup = ui.createGroup(basename) ui.show(modelGroup, model, basename) case Failure(t) => showErrorMessage(file, t) } case "stl" => MeshIO.readMesh(file) match { case Success(mesh) => ui.show(defaultGroup, mesh, basename) case Failure(t) => showErrorMessage(file, t) } case "vtk" => MeshIO.readMesh(file) match { case Success(mesh) => ui.show(defaultGroup, mesh, basename) case Failure(_) => ImageIO.read3DScalarImageAsType[Float](file, resampleOblique = true) match { case Success(image) => ui.show(defaultGroup, image, basename) case Failure(t) => showErrorMessage(file, t) } } case "nii" => ImageIO.read3DScalarImageAsType[Float](file) match { case Success(image) => ui.show(defaultGroup, image, basename) case Failure(t) => showErrorMessage(file, t) } case "json" => LandmarkIO.readLandmarksJson[_3D](file) match { case Success(lms) => ui.show(defaultGroup, lms, basename) case Failure(t) => showErrorMessage(file, t) } case "csv" => LandmarkIO.readLandmarksCsv[_3D](file) match { case Success(lms) => ui.show(defaultGroup, lms, basename) case Failure(t) => showErrorMessage(file, t) } case _ => showErrorMessage(file, new IOException("Unknown file extension: " + extension)) } } } } }
Example 116
Source File: SerializedCpg.scala From codepropertygraph with Apache License 2.0 | 5 votes |
package io.shiftleft import java.io.{File, IOException} import java.net.{URI, URISyntaxException} import java.nio.file.{FileSystem, FileSystems, Files} import java.util import com.google.protobuf.GeneratedMessageV3 class SerializedCpg extends AutoCloseable { @throws[IOException] def addOverlay(overlay: GeneratedMessageV3, name: String): Unit = { if (!isEmpty) { val pathInZip = zipFileSystem.getPath(s"${counter}_${name}") counter += 1 val outputStream = Files.newOutputStream(pathInZip) overlay.writeTo(outputStream) outputStream.close() } } @throws[IOException] def addOverlay(overlays: Iterator[GeneratedMessageV3], name: String): Unit = { overlays.zipWithIndex.foreach { case (overlay, i) => addOverlay(overlay, name + "_" + i) } } @throws[IOException] override def close(): Unit = { if (!isEmpty) { zipFileSystem.close() } } }
Example 117
Source File: FileUtils.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{File, IOException} import org.apache.hadoop.conf.Configuration import org.apache.spark.SparkContext import org.apache.carbondata.common.logging.LogServiceFactory import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.datastore.filesystem.CarbonFile import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.metadata.DatabaseLocationProvider import org.apache.carbondata.core.util.CarbonUtil import org.apache.carbondata.events.{CreateDatabasePostExecutionEvent, OperationContext, OperationListenerBus} import org.apache.carbondata.processing.exception.DataLoadingException object FileUtils { def getPaths(inputPath: String): String = { getPaths(inputPath, FileFactory.getConfiguration) } def getPaths(inputPath: String, hadoopConf: Configuration): String = { if (inputPath == null || inputPath.isEmpty) { throw new DataLoadingException("Input file path cannot be empty.") } else { val stringBuild = new StringBuilder() val filePaths = inputPath.split(",").map(_.trim) for (i <- 0 until filePaths.size) { val filePath = CarbonUtil.checkAndAppendHDFSUrl(filePaths(i)) val carbonFile = FileFactory.getCarbonFile(filePath, hadoopConf) if (!carbonFile.exists()) { throw new DataLoadingException( s"The input file does not exist: ${CarbonUtil.removeAKSK(filePaths(i))}" ) } getPathsFromCarbonFile(carbonFile, stringBuild, hadoopConf) } if (stringBuild.nonEmpty) { stringBuild.substring(0, stringBuild.size - 1) } else { throw new DataLoadingException("Please check your input path and make sure " + "that files end with '.csv' and content is not empty.") } } } def getSpaceOccupied(inputPath: String, hadoopConfiguration: Configuration): Long = { var size : Long = 0 if (inputPath == null || inputPath.isEmpty) { size } else { val filePaths = inputPath.split(",") for (i <- 0 until filePaths.size) { val carbonFile = FileFactory.getCarbonFile(filePaths(i), hadoopConfiguration) size = size + carbonFile.getSize } size } } def createDatabaseDirectory(dbName: String, storePath: String, sparkContext: SparkContext) { val databasePath: String = storePath + File.separator + DatabaseLocationProvider.get().provide(dbName.toLowerCase) FileFactory.mkdirs(databasePath) val operationContext = new OperationContext val createDatabasePostExecutionEvent = new CreateDatabasePostExecutionEvent(dbName, databasePath, sparkContext) OperationListenerBus.getInstance.fireEvent(createDatabasePostExecutionEvent, operationContext) } }
Example 118
Source File: SparkCarbonStore.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.store import java.io.IOException import scala.collection.JavaConverters._ import org.apache.spark.{CarbonInputMetrics, SparkConf} import org.apache.spark.sql.{CarbonEnv, SparkSession} import org.apache.carbondata.common.annotations.InterfaceAudience import org.apache.carbondata.core.datastore.row.CarbonRow import org.apache.carbondata.core.index.IndexFilter import org.apache.carbondata.core.metadata.AbsoluteTableIdentifier import org.apache.carbondata.core.scan.expression.Expression import org.apache.carbondata.hadoop.CarbonProjection import org.apache.carbondata.spark.rdd.CarbonScanRDD def this(storeName: String, storeLocation: String) = { this() val sparkConf = new SparkConf(loadDefaults = true) session = SparkSession.builder .config(sparkConf) .appName("SparkCarbonStore-" + storeName) .config("spark.sql.warehouse.dir", storeLocation) .config("spark.sql.extensions", "org.apache.spark.sql.CarbonExtensions") .getOrCreate() } def this(sparkSession: SparkSession) = { this() session = sparkSession } @throws[IOException] override def scan( tableIdentifier: AbsoluteTableIdentifier, projectColumns: Array[String]): java.util.Iterator[CarbonRow] = { require(tableIdentifier != null) require(projectColumns != null) scan(tableIdentifier, projectColumns, null) } @throws[IOException] override def scan( tableIdentifier: AbsoluteTableIdentifier, projectColumns: Array[String], filter: Expression): java.util.Iterator[CarbonRow] = { require(tableIdentifier != null) require(projectColumns != null) val table = CarbonEnv .getCarbonTable(Some(tableIdentifier.getDatabaseName), tableIdentifier.getTableName)(session) val indexFilter = if (filter == null) null else new IndexFilter(table, filter) val rdd = new CarbonScanRDD[CarbonRow]( spark = session, columnProjection = new CarbonProjection(projectColumns), indexFilter = indexFilter, identifier = table.getAbsoluteTableIdentifier, serializedTableInfo = table.getTableInfo.serialize, tableInfo = table.getTableInfo, inputMetricsStats = new CarbonInputMetrics, partitionNames = null, dataTypeConverterClz = null, readSupportClz = classOf[CarbonRowReadSupport]) rdd.collect .iterator .asJava } @throws[IOException] override def sql(sqlString: String): java.util.Iterator[CarbonRow] = { val df = session.sql(sqlString) df.rdd .map(row => new CarbonRow(row.toSeq.toArray.asInstanceOf[Array[Object]])) .collect() .iterator .asJava } }
Example 119
Source File: TableStatusBackupTest.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.spark.carbondata import java.io.IOException import mockit.{Mock, MockUp} import org.apache.spark.sql.CarbonEnv import org.apache.spark.sql.test.util.QueryTest import org.scalatest.BeforeAndAfterAll import org.apache.carbondata.core.constants.CarbonCommonConstants import org.apache.carbondata.core.datastore.impl.FileFactory import org.apache.carbondata.core.statusmanager.SegmentStatusManager import org.apache.carbondata.core.util.CarbonProperties import org.apache.carbondata.core.util.path.CarbonTablePath class TableStatusBackupTest extends QueryTest with BeforeAndAfterAll { override protected def beforeAll(): Unit = { CarbonProperties.getInstance().addProperty( CarbonCommonConstants.ENABLE_TABLE_STATUS_BACKUP, "true") sql("drop table if exists source") sql("create table source(a string) stored as carbondata") } override protected def afterAll(): Unit = { sql("drop table if exists source") CarbonProperties.getInstance().addProperty( CarbonCommonConstants.ENABLE_TABLE_STATUS_BACKUP, "false") } test("backup table status file") { sql("insert into source values ('A'), ('B')") val tablePath = CarbonEnv.getCarbonTable(None, "source")(sqlContext.sparkSession).getTablePath val tableStatusFilePath = CarbonTablePath.getTableStatusFilePath(tablePath) val oldTableStatus = SegmentStatusManager.readTableStatusFile(tableStatusFilePath) var mock = new MockUp[SegmentStatusManager]() { @Mock @throws[IOException] def mockForTest(): Unit = { throw new IOException("thrown in mock") } } val exception = intercept[IOException] { sql("insert into source values ('A'), ('B')") } assert(exception.getMessage.contains("thrown in mock")) val backupPath = tableStatusFilePath + ".backup" assert(FileFactory.isFileExist(backupPath)) val backupTableStatus = SegmentStatusManager.readTableStatusFile(backupPath) assertResult(oldTableStatus)(backupTableStatus) mock = new MockUp[SegmentStatusManager]() { @Mock def mockForTest(): Unit = { } } } }
Example 120
Source File: TestRegisterIndexCarbonTable.scala From carbondata with Apache License 2.0 | 5 votes |
package org.apache.carbondata.spark.testsuite.secondaryindex import java.io.{File, IOException} import org.apache.commons.io.FileUtils import org.apache.spark.sql.Row import org.apache.spark.sql.test.TestQueryExecutor import org.apache.spark.sql.test.util.QueryTest import org.scalatest.BeforeAndAfterAll import org.apache.carbondata.core.constants.CarbonCommonConstants class TestRegisterIndexCarbonTable extends QueryTest with BeforeAndAfterAll { override def beforeAll { sql("drop database if exists carbon cascade") } def restoreData(dblocation: String, tableName: String) = { val destination = dblocation + CarbonCommonConstants.FILE_SEPARATOR + tableName val source = dblocation+ "_back" + CarbonCommonConstants.FILE_SEPARATOR + tableName try { FileUtils.copyDirectory(new File(source), new File(destination)) FileUtils.deleteDirectory(new File(source)) } catch { case e : Exception => throw new IOException("carbon table data restore failed.") } finally { } } def backUpData(dblocation: String, tableName: String) = { val source = dblocation + CarbonCommonConstants.FILE_SEPARATOR + tableName val destination = dblocation+ "_back" + CarbonCommonConstants.FILE_SEPARATOR + tableName try { FileUtils.copyDirectory(new File(source), new File(destination)) } catch { case e : Exception => throw new IOException("carbon table data backup failed.") } } test("register tables test") { val location = TestQueryExecutor.warehouse + CarbonCommonConstants.FILE_SEPARATOR + "dbName" sql("drop database if exists carbon cascade") sql(s"create database carbon location '${location}'") sql("use carbon") sql("""create table carbon.carbontable (c1 string,c2 int,c3 string,c5 string) STORED AS carbondata""") sql("insert into carbontable select 'a',1,'aa','aaa'") sql("create index index_on_c3 on table carbontable (c3, c5) AS 'carbondata'") backUpData(location, "carbontable") backUpData(location, "index_on_c3") sql("drop table carbontable") restoreData(location, "carbontable") restoreData(location, "index_on_c3") sql("refresh table carbontable") sql("refresh table index_on_c3") checkAnswer(sql("select count(*) from carbontable"), Row(1)) checkAnswer(sql("select c1 from carbontable"), Seq(Row("a"))) sql("REGISTER INDEX TABLE index_on_c3 ON carbontable") assert(sql("show indexes on carbontable").collect().nonEmpty) } override def afterAll { sql("drop database if exists carbon cascade") sql("use default") } }
Example 121
Source File: PlainOioServer.scala From netty-in-action-scala with Apache License 2.0 | 5 votes |
package nia.chapter4 import java.io.IOException import java.net.ServerSocket import java.nio.charset.Charset class PlainOioServer { @throws[IOException] def serve(port: Int): Unit = { //将服务器绑定到指定端口 val socket = new ServerSocket(port) try { while (true) { val clientSocket = socket.accept System.out.println("Accepted connection from " + clientSocket) //创建一个新的线程来处理该连接 new Thread(() ⇒ { try { //将消息写给已连接的客户端 val out = clientSocket.getOutputStream out.write("Hi!\r\n".getBytes(Charset.forName("UTF-8"))) out.flush() //关闭连接 clientSocket.close() } catch { case e: IOException ⇒ e.printStackTrace() } finally { try { clientSocket.close() } catch { case ex: IOException ⇒ // ignore on close } } }).start() //启动线程 } } catch { case e: IOException ⇒ e.printStackTrace() } } }
Example 122
Source File: BlockingIoExample.scala From netty-in-action-scala with Apache License 2.0 | 5 votes |
package nia.chapter1.scaladsl import java.io.{ BufferedReader, IOException, InputStreamReader, PrintWriter } import java.net.ServerSocket // #snip @throws[IOException] def serve(portNumber: Int): Unit = { //创建一个新的 ServerSocket,用以监听指定端口上的连接请求 val serverSocket = new ServerSocket(portNumber) //对accept()方法的调用将被阻塞,直到一个连接建立 val clientSocket = serverSocket.accept //这些流对象都派生于该套接字的流对象 val in = new BufferedReader(new InputStreamReader(clientSocket.getInputStream)) val out = new PrintWriter(clientSocket.getOutputStream, true) var request: String = in.readLine var response: String = null //处理循环开始 while (request ne null) { if ("Done" != request) { //请求被传递给服务器的处理方法 response = processRequest(request) //服务器的响应被发送给了客户端 out.println(response) //继续执行处理循环 } request = in.readLine } // #snip } private def processRequest(request: String): String = "Processed" }
Example 123
Source File: FileGenerator.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package generators import avrohugger.format.abstractions.SourceFormat import avrohugger.input.DependencyInspector import avrohugger.input.NestedSchemaExtractor import avrohugger.input.reflectivecompilation.schemagen._ import avrohugger.input.parsers.{ FileInputParser, StringInputParser} import avrohugger.matchers.TypeMatcher import avrohugger.stores.{ ClassStore, SchemaStore } import java.io.{File, FileNotFoundException, IOException} import org.apache.avro.{ Protocol, Schema } import org.apache.avro.Schema.Type.ENUM // Unable to overload this class' methods because outDir uses a default value private[avrohugger] object FileGenerator { def schemaToFile( schema: Schema, outDir: String, format: SourceFormat, classStore: ClassStore, schemaStore: SchemaStore, typeMatcher: TypeMatcher, restrictedFields: Boolean): Unit = { val topNS: Option[String] = DependencyInspector.getReferredNamespace(schema) val topLevelSchemas: List[Schema] = NestedSchemaExtractor.getNestedSchemas(schema, schemaStore, typeMatcher) // most-nested classes processed first topLevelSchemas.reverse.distinct.foreach(schema => { // pass in the top-level schema's namespace if the nested schema has none val ns = DependencyInspector.getReferredNamespace(schema) orElse topNS format.compile(classStore, ns, Left(schema), outDir, schemaStore, typeMatcher, restrictedFields) }) } def protocolToFile( protocol: Protocol, outDir: String, format: SourceFormat, classStore: ClassStore, schemaStore: SchemaStore, typeMatcher: TypeMatcher, restrictedFields: Boolean): Unit = { val ns = Option(protocol.getNamespace) format.compile(classStore, ns, Right(protocol), outDir, schemaStore, typeMatcher, restrictedFields) } def stringToFile( str: String, outDir: String, format: SourceFormat, classStore: ClassStore, schemaStore: SchemaStore, stringParser: StringInputParser, typeMatcher: TypeMatcher, restrictedFields: Boolean): Unit = { val schemaOrProtocols = stringParser.getSchemaOrProtocols(str, schemaStore) schemaOrProtocols.foreach(schemaOrProtocol => { schemaOrProtocol match { case Left(schema) => { schemaToFile(schema, outDir, format, classStore, schemaStore, typeMatcher, restrictedFields) } case Right(protocol) => { protocolToFile(protocol, outDir, format, classStore, schemaStore, typeMatcher, restrictedFields) } } }) } def fileToFile( inFile: File, outDir: String, format: SourceFormat, classStore: ClassStore, schemaStore: SchemaStore, fileParser: FileInputParser, typeMatcher: TypeMatcher, classLoader: ClassLoader, restrictedFields: Boolean): Unit = { val schemaOrProtocols: List[Either[Schema, Protocol]] = fileParser.getSchemaOrProtocols(inFile, format, classStore, classLoader) schemaOrProtocols.foreach(schemaOrProtocol => schemaOrProtocol match { case Left(schema) => { schemaToFile(schema, outDir, format, classStore, schemaStore, typeMatcher, restrictedFields) } case Right(protocol) => { protocolToFile(protocol, outDir, format, classStore, schemaStore, typeMatcher, restrictedFields) } }) } }
Example 124
Source File: Util.scala From avrohugger with Apache License 2.0 | 5 votes |
import java.io.BufferedReader import java.io.File import java.io.FileReader import java.io.IOException object Util { def readFile(fileName: String, maxTries: Int = 3): String = { def readFile0(count: Int): String = { try { // if file is empty, try again, it should be there val contents: String = scala.io.Source.fromFile(fileName).mkString if (contents.isEmpty && (count < maxTries)) readFile0(count + 1) else contents } catch { // if file is not found, try again, it should be there case e: Throwable => if (count < maxTries) readFile0(count + 1) else sys.error("File not found: " + fileName) } } readFile0(0) } }
Example 125
Source File: Factory.scala From mystem-scala with MIT License | 5 votes |
package ru.stachek66.nlp.mystem.holding import java.io.{File, IOException} import java.nio.file.Files import java.nio.file.attribute.PosixFilePermissions import org.slf4j.LoggerFactory import ru.stachek66.tools.external.FailSafeExternalProcessServer import ru.stachek66.tools.{Decompressor, Downloader, Tools} import scala.concurrent.duration._ import scala.sys.process._ import scala.util.Try def newMyStem(version: String, customExecutable: Option[File] = None): Try[MyStem] = Try { val ex = customExecutable match { case Some(exe) => exe case None => getExecutable(version) } version match { case "3.0" | "3.1" => new MyStem3( new FailSafeExternalProcessServer( ex.getAbsolutePath + (if (parsingOptions.nonEmpty) " " + parsingOptions else ""))) case _ => throw new NotImplementedError() } } @throws(classOf[Exception]) private[holding] def getExecutable(version: String): File = { val destFile = new File(BinDestination + BIN_FILE_NAME) val tempFile = new File(s"${BinDestination}tmp_${System.currentTimeMillis}.${Decompressor.select.traditionalExtension}") if (destFile.exists) { log.info("Old executable file found") try { val suggestedVersion = (destFile.getAbsolutePath + " -v") !! log.info("Version | " + suggestedVersion) // not scala-way stuff if (suggestedVersion.contains(version)) destFile else throw new Exception("Wrong version!") } catch { case e: Exception => log.warn("Removing old binary files...", e) destFile.delete getExecutable(version) } } else Tools.withAttempt(10, 1.second) { try { Decompressor.select.unpack( Downloader.downloadBinaryFile(getUrl(version), tempFile), destFile) } finally { tempFile.delete() try { Files.setPosixFilePermissions(destFile.toPath, PosixFilePermissions.fromString("r-xr-xr-x")).toFile } catch { case ioe: IOException => log.warn("Can't set POSIX permissions to file " + destFile.toPath) destFile } } } } }
Example 126
Source File: Decompressor.scala From mystem-scala with MIT License | 5 votes |
package ru.stachek66.tools import java.io.{IOException, File, FileOutputStream} import org.apache.commons.compress.archivers.ArchiveInputStream import org.apache.commons.io.IOUtils import ru.stachek66.nlp.mystem.Properties trait Decompressor { def traditionalExtension: String def unpack(src: File, dst: File): File @throws(classOf[IOException]) private[tools] def copyUncompressedAndClose(stream: ArchiveInputStream, dest: File): File = { // must be read val entry = stream.getNextEntry if (entry.isDirectory) throw new IOException("Decompressed entry is a directory (unexpectedly)") val os = new FileOutputStream(dest) try { IOUtils.copy(stream, os) } finally { os.close() stream.close() } dest } } object Decompressor { def select: Decompressor = if (Properties.CurrentOs.contains("win")) Zip else TarGz }
Example 127
Source File: SerializedProfilesLoader.scala From sparker with GNU General Public License v3.0 | 5 votes |
package SparkER.Wrappers import java.io.{IOException, _} import SparkER.DataStructures.Profile import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD def loadSerializedObject(fileName: String): Any = { var `object`: Any = null try { val file: InputStream = new FileInputStream(fileName) val buffer: InputStream = new BufferedInputStream(file) val input: ObjectInput = new ObjectInputStream(buffer) try { `object` = input.readObject } finally { input.close } } catch { case cnfEx: ClassNotFoundException => { System.err.println(fileName) cnfEx.printStackTrace } case ioex: IOException => { System.err.println(fileName) ioex.printStackTrace } } return `object` } }
Example 128
Source File: SerializedProfilesLoader.scala From sparker with GNU General Public License v3.0 | 5 votes |
package Wrappers import java.io.{IOException, _} import DataStructures.Profile import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD def loadSerializedObject(fileName: String): Any = { var `object`: Any = null try { val file: InputStream = new FileInputStream(fileName) val buffer: InputStream = new BufferedInputStream(file) val input: ObjectInput = new ObjectInputStream(buffer) try { `object` = input.readObject } finally { input.close } } catch { case cnfEx: ClassNotFoundException => { System.err.println(fileName) cnfEx.printStackTrace } case ioex: IOException => { System.err.println(fileName) ioex.printStackTrace } } return `object` } }
Example 129
Source File: SerializedProfilesLoader.scala From sparker with GNU General Public License v3.0 | 5 votes |
package Wrappers import java.io.{IOException, _} import DataStructures.Profile import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD def loadSerializedObject(fileName: String): Any = { var `object`: Any = null try { val file: InputStream = new FileInputStream(fileName) val buffer: InputStream = new BufferedInputStream(file) val input: ObjectInput = new ObjectInputStream(buffer) try { `object` = input.readObject } finally { input.close } } catch { case cnfEx: ClassNotFoundException => { System.err.println(fileName) cnfEx.printStackTrace } case ioex: IOException => { System.err.println(fileName) ioex.printStackTrace } } return `object` } }
Example 130
Source File: VelocityUtils.scala From InteractiveGraph-neo4j with BSD 2-Clause "Simplified" License | 5 votes |
package org.grapheco.server.util import java.io.{File, FileOutputStream, StringWriter} import java.util.Properties import cn.pidb.blob.Blob import cn.pidb.engine.blob.{BlobIO, InlineBlob, RemoteBlob} import org.apache.velocity.app.VelocityEngine import org.apache.velocity.tools.ToolManager import org.apache.velocity.tools.config.DefaultKey import org.neo4j.values.storable.{BlobValue, ValueWriter} import org.springframework.util.ClassUtils import scala.collection.JavaConversions import java.io.FileOutputStream import java.io.IOException object VelocityUtils { val pro = new Properties(); val toolManager = new ToolManager(); toolManager.configure("tools.xml"); pro.setProperty("input.encoding", "UTF-8"); pro.setProperty("output.encoding", "UTF-8"); val ve = new VelocityEngine(pro); val props = new Properties() props.put("runtime.log.logsystem.class", "org.apache.velocity.runtime.log.SimpleLog4JLogSystem") props.put("runtime.log.logsystem.log4j.category", "velocity") props.put("runtime.log.logsystem.log4j.logger", "velocity") ve.init(props) def parse(expr: String, context: Map[String, Any]): Any = { val vc = toolManager.createContext(); val writer = new StringWriter(); context.foreach(kv => vc.put(kv._1, //is a scala Map? if (kv._2.isInstanceOf[Map[_, _]]) { JavaConversions.mapAsJavaMap(kv._2.asInstanceOf[Map[_, _]]) } else { kv._2 })); try { if (expr.startsWith("=")) { val expr1 = expr.substring(1); ve.evaluate(vc, writer, "", s"#set($$__VAR=$expr1)"); var value = vc.get("__VAR"); //if is a blob if(value.isInstanceOf[Blob]){ //get blob var result:String = "" try { val data = value.asInstanceOf[Blob].toBytes() val path = ClassUtils.getDefaultClassLoader.getResource("").getPath.replace("/WEB-INF/classes","") + "static/" val tool = new FileSystemTool() result = tool.filesave(data,path, System.currentTimeMillis.toString+".jpg") } catch{ case e:Throwable => print(e.toString) } //TODO url return "http://localhost:9999/graphserver/static/"+result } return value } else { ve.evaluate(vc, writer, "", expr); writer.getBuffer.toString.trim } } catch { case e: Throwable => throw new WrongExpressionException(expr, e); } } } class WrongExpressionException(msg: String, e: Throwable) extends RuntimeException(msg, e) { } @DefaultKey("fileTool") class FileSystemTool { def exists(path: String) = new File(path).exists(); @throws[IOException] def filesave(file: Array[Byte], filePath: String, fileName: String): String = { //目标目录 val targetfile = new File(filePath) if (!targetfile.exists) targetfile.mkdirs //二进制流写入 val out = new FileOutputStream(filePath + fileName) out.write(file) out.flush() out.close() return fileName } }
Example 131
Source File: DependencyNode.scala From cuesheet with Apache License 2.0 | 5 votes |
package com.kakao.cuesheet.deps import java.io.{BufferedOutputStream, File, FileOutputStream, IOException} import java.net.{URL, URLDecoder} import java.nio.file.{Files, Paths} import java.util.zip.{ZipEntry, ZipOutputStream} import com.kakao.mango.io.FileSystems import com.kakao.mango.logging.Logging import com.kakao.shaded.guava.io.Files.createTempDir sealed trait DependencyNode { def path: String } case class ManagedDependency(group: String, artifact: String, classifier: String = "jar") case class ManagedDependencyNode( path: String, group: String, artifact: String, classifier: String, version: String, children: Seq[ManagedDependency] ) extends DependencyNode { def key = ManagedDependency(group, artifact, classifier) } case class DirectoryDependencyNode(path: String) extends DependencyNode with Logging { lazy val compressed: UnmanagedDependencyNode = { val tmpdir = createTempDir() val jar = new File(s"${tmpdir.getAbsolutePath}/local-${tmpdir.getName}.jar") val root = Paths.get(path) val output = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(jar))) var count = 0 FileSystems.entries(root).foreach { path => if (resourceExtensions.exists(path.toString.endsWith)) { val entry = new ZipEntry(root.relativize(path).toString) output.putNextEntry(entry) try { Files.copy(path, output) count += 1 } catch { case e: IOException => logger.warn(s"skipping $path due to an IOException: ${e.getMessage}") } output.closeEntry() } } output.close() logger.debug(s"Successfully zipped $count files in $path into $jar") UnmanagedDependencyNode(jar.getAbsolutePath) } } case class JavaRuntimeDependencyNode(path: String) extends DependencyNode case class UnmanagedDependencyNode(path: String) extends DependencyNode object DependencyNode { val resolver = new ChainedArtifactResolver( new IvyPathArtifactResolver, new IvyOriginalPathArtifactResolver, new MavenPathArtifactResolver, new GradlePathArtifactResolver, new JavaRuntimeResolver, new MavenMetadataArtifactResolver, new UnmanagedJarResolver ) def resolve(url: URL): DependencyNode = { if (url.getProtocol != "file") { throw new IllegalArgumentException("non-file dependency is not supported") } val path = URLDecoder.decode(url.getFile, "UTF-8") val file = new File(path) if (file.isDirectory) { return DirectoryDependencyNode(file.getAbsolutePath) } if (!file.isFile || !file.canRead) { throw new IllegalArgumentException(s"$path is not a file or readable") } DependencyNode.resolver.resolve(file.getAbsolutePath) match { case Some(node) => node case None => throw new IllegalArgumentException(s"Could not determine the dependency of $path") } } }
Example 132
Source File: File.scala From docspell with GNU General Public License v3.0 | 5 votes |
package docspell.common import java.io.IOException import java.nio.file._ import java.nio.file.attribute.BasicFileAttributes import java.util.concurrent.atomic.AtomicInteger import scala.jdk.CollectionConverters._ import cats.effect._ import cats.implicits._ import fs2.Stream object File { def mkDir[F[_]: Sync](dir: Path): F[Path] = Sync[F].delay(Files.createDirectories(dir)) def mkTempDir[F[_]: Sync](parent: Path, prefix: String): F[Path] = mkDir(parent).map(p => Files.createTempDirectory(p, prefix)) def mkTempFile[F[_]: Sync]( parent: Path, prefix: String, suffix: Option[String] = None ): F[Path] = mkDir(parent).map(p => Files.createTempFile(p, prefix, suffix.orNull)) def deleteDirectory[F[_]: Sync](dir: Path): F[Int] = Sync[F].delay { val count = new AtomicInteger(0) Files.walkFileTree( dir, new SimpleFileVisitor[Path]() { override def visitFile( file: Path, attrs: BasicFileAttributes ): FileVisitResult = { Files.deleteIfExists(file) count.incrementAndGet() FileVisitResult.CONTINUE } override def postVisitDirectory(dir: Path, e: IOException): FileVisitResult = Option(e) match { case Some(ex) => throw ex case None => Files.deleteIfExists(dir) FileVisitResult.CONTINUE } } ) count.get } def exists[F[_]: Sync](file: Path): F[Boolean] = Sync[F].delay(Files.exists(file)) def existsNonEmpty[F[_]: Sync](file: Path, minSize: Long = 0): F[Boolean] = Sync[F].delay(Files.exists(file) && Files.size(file) > minSize) def deleteFile[F[_]: Sync](file: Path): F[Unit] = Sync[F].delay(Files.deleteIfExists(file)).map(_ => ()) def delete[F[_]: Sync](path: Path): F[Int] = if (Files.isDirectory(path)) deleteDirectory(path) else deleteFile(path).map(_ => 1) def withTempDir[F[_]: Sync](parent: Path, prefix: String): Resource[F, Path] = Resource.make(mkTempDir(parent, prefix))(p => delete(p).map(_ => ())) def listFiles[F[_]: Sync](pred: Path => Boolean, dir: Path): F[List[Path]] = Sync[F].delay { val javaList = Files.list(dir).filter(p => pred(p)).collect(java.util.stream.Collectors.toList()) javaList.asScala.toList.sortBy(_.getFileName.toString) } def readAll[F[_]: Sync: ContextShift]( file: Path, blocker: Blocker, chunkSize: Int ): Stream[F, Byte] = fs2.io.file.readAll(file, blocker, chunkSize) def readText[F[_]: Sync: ContextShift](file: Path, blocker: Blocker): F[String] = readAll[F](file, blocker, 8192).through(fs2.text.utf8Decode).compile.foldMonoid }
Example 133
Source File: ProcessStreamConnectionProvider.scala From intellij-lsp with Apache License 2.0 | 5 votes |
package com.github.gtache.lsp.client.connection import java.io.{File, IOException, InputStream, OutputStream} import java.util.Objects import com.intellij.openapi.diagnostic.Logger import org.jetbrains.annotations.Nullable class ProcessStreamConnectionProvider(private var commands: Seq[String], private var workingDir: String) extends StreamConnectionProvider { private val LOG: Logger = Logger.getInstance(classOf[ProcessStreamConnectionProvider]) @Nullable private var process: Process = _ @throws[IOException] override def start(): Unit = { if (this.workingDir == null || this.commands == null || this.commands.isEmpty || this.commands.contains(null)) throw new IOException("Unable to start language server: " + this.toString) //$NON-NLS-1$ val builder = createProcessBuilder LOG.info("Starting server process with commands " + commands + " and workingDir " + workingDir) this.process = builder.start if (!process.isAlive) throw new IOException("Unable to start language server: " + this.toString) else LOG.info("Server process started " + process) } protected def createProcessBuilder: ProcessBuilder = { import scala.collection.JavaConverters._ val builder = new ProcessBuilder(getCommands.map(s => s.replace("\'", "")).asJava) builder.directory(new File(getWorkingDirectory)) builder } protected def getCommands: Seq[String] = commands def setCommands(commands: Seq[String]): Unit = { this.commands = commands } protected def getWorkingDirectory: String = workingDir def setWorkingDirectory(workingDir: String): Unit = { this.workingDir = workingDir } @Nullable override def getInputStream: InputStream = { if (process == null) null else process.getInputStream } @Nullable override def getOutputStream: OutputStream = { if (process == null) null else process.getOutputStream } @Nullable override def getErrorStream: InputStream = { if (process == null) null else process.getErrorStream } override def stop(): Unit = { if (process != null) process.destroy() } override def equals(obj: Any): Boolean = { obj match { case other: ProcessStreamConnectionProvider => getCommands.size == other.getCommands.size && this.getCommands.toSet == other.getCommands.toSet && this.getWorkingDirectory == other.getWorkingDirectory case _ => false } } override def hashCode: Int = { Objects.hashCode(this.getCommands) ^ Objects.hashCode(this.getWorkingDirectory) } }
Example 134
Source File: ProcessOverSocketStreamConnectionProvider.scala From intellij-lsp with Apache License 2.0 | 5 votes |
package com.github.gtache.lsp.client.connection import java.io.{IOException, InputStream, OutputStream} import java.net.{ServerSocket, Socket} import java.util.Objects import com.intellij.openapi.diagnostic.Logger class ProcessOverSocketStreamConnectionProvider(commands: Seq[String], workingDir: String, port: Int = 0) extends ProcessStreamConnectionProvider(commands, workingDir) { import ProcessOverSocketStreamConnectionProvider._ private var socket: Socket = _ private var inputStream: InputStream = _ private var outputStream: OutputStream = _ @throws[IOException] override def start(): Unit = { val serverSocket = new ServerSocket(port) val socketThread = new Thread(() => { try socket = serverSocket.accept catch { case e: IOException => LOG.error(e) } finally try serverSocket.close() catch { case e: IOException => LOG.error(e) } }) socketThread.start() super.start() try { socketThread.join(5000) } catch { case e: InterruptedException => LOG.error(e) } if (socket == null) throw new IOException("Unable to make socket connection: " + toString) //$NON-NLS-1$ inputStream = socket.getInputStream outputStream = socket.getOutputStream } override def getInputStream: InputStream = inputStream override def getOutputStream: OutputStream = outputStream override def getErrorStream: InputStream = inputStream override def stop(): Unit = { super.stop() if (socket != null) try socket.close() catch { case e: IOException => LOG.error(e) } } override def hashCode: Int = { val result = super.hashCode result ^ Objects.hashCode(this.port) } } object ProcessOverSocketStreamConnectionProvider { private val LOG = Logger.getInstance(classOf[ProcessOverSocketStreamConnectionProvider]) }
Example 135
Source File: RconConnector.scala From chatoverflow with Eclipse Public License 2.0 | 5 votes |
package org.codeoverflow.chatoverflow.requirement.service.rcon import java.io.{DataInputStream, IOException, InputStream, OutputStream} import java.net.{Socket, SocketException} import java.nio.{ByteBuffer, ByteOrder} import java.util.Random import org.codeoverflow.chatoverflow.WithLogger import org.codeoverflow.chatoverflow.connector.Connector class RconConnector(override val sourceIdentifier: String) extends Connector(sourceIdentifier) with WithLogger { override protected var requiredCredentialKeys: List[String] = List("password", "address") override protected var optionalCredentialKeys: List[String] = List("port") private var socket: Socket = _ private var outputStream: OutputStream = _ private var inputStream: InputStream = _ private var requestId: Int = 0 def sendCommand(command: String): String = { logger debug s"Sending $command to RCON" requestId += 1 if (write(2, command.getBytes("ASCII"))) { return read() } null } override def stop(): Boolean = { logger info s"Stopped RCON connector to ${credentials.get.getValue("address").get}!" socket.close() true } }
Example 136
Source File: WriteTSToFiles.scala From scala-tsi with MIT License | 5 votes |
package com.scalatsi.output import java.io.{FileWriter, IOException} import com.scalatsi.TypescriptType.TypescriptNamedType import com.scalatsi.TypescriptTypeSerializer import scala.util.Try object WriteTSToFiles { def write(options: OutputOptions)(types: Set[TypescriptNamedType]): Unit = { try { val targetFile = options.targetFile val output = TypescriptTypeSerializer.emits(options.styleOptions, types) Try { Option(targetFile.getParentFile).foreach(_.mkdirs()) targetFile.createNewFile() } // createNewFile will continue if file exists .recover { case e: SecurityException => reportFailure(s"Could not create file '$targetFile' due to JVM security stopping it", code = 2, e = e) case e: IOException => reportFailure(s"Could not create file '$targetFile' due to I/O problem", code = 2, e = e) }.get // TODO: For some reason scala.util.Using isn't working in 2.12, even though we have the compat library // Using(new FileWriter(targetFile)) { writer => // writer.write(output) // }.recover { // case e: IOException => reportFailure(s"Could not write typescript to file '$targetFile' due to I/O problem", code = 2, e = e) // }.get (for { writer <- Try(new FileWriter(targetFile)) _ <- Try { try { writer.write(output) } finally { writer.close() } } } yield ()).recover { case e: IOException => reportFailure(s"Could not write typescript to file '$targetFile' due to I/O problem", code = 2, e = e) }.get () } catch { case e: Throwable => reportFailure( """Uncaught exception in scala-tsi output writer. |Please file a bug report at https://github.com/scala-tsi/scala-tsi/issues""".stripMargin, e = e ) } } def reportFailure(msg: String, code: Int = 1, e: Throwable = null): Nothing = { require(code > 0, "Should exist with a non-zero exit code on failure") System.err.println(msg) Option(e).foreach(_.printStackTrace()) // This will not stop SBT, and the non-zero exit will mark the task as unsuccessful sys.exit(code) } }
Example 137
Source File: PartitionProcessor.scala From etl-light with MIT License | 5 votes |
package yamrcraft.etlite.processors import java.io.IOException import org.slf4j.LoggerFactory import yamrcraft.etlite.transformers.InboundMessage import yamrcraft.etlite.writers.{ErrorInfo, ErrorEventWriter} import yamrcraft.etlite.{ErrorType, EtlException, EtlSettings} import scala.util.Try class PartitionProcessor(jobId: Long, partitionId: Int, settings: EtlSettings) { val logger = LoggerFactory.getLogger(this.getClass) val pipeline = settings.pipeline.createFactory.createPipeline(settings.pipeline, jobId, partitionId) val errorsWriter: ErrorEventWriter = new ErrorEventWriter(settings.errorsFolder, jobId, partitionId) def processPartition(partition: Iterator[InboundMessage]): Unit = { logger.info(s"partition processing started [jobId=$jobId, partitionId=$partitionId]") partition foreach { inbound => try { pipeline.processMessage(inbound) } catch { case e@(_: Exception) => logger.error("event processing error", e) val errorType = e match { case ex: EtlException => ex.errorType.toString case _ : IOException => ErrorType.WriteError.toString case _ => ErrorType.SystemError.toString } val cause = Try(e.getCause.getMessage).getOrElse("") val errorInfo = ErrorInfo(errorType, Some(cause)) errorsWriter.write((inbound.msg, errorInfo)) } } pipeline.writer.commit() errorsWriter.commit() logger.info(s"partition processing ended [jobId=$jobId, partitionId=$partitionId]") } }
Example 138
Source File: TimePartitioningWriter.scala From etl-light with MIT License | 5 votes |
package yamrcraft.etlite.writers import java.io.IOException import com.typesafe.config.Config import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.joda.time.format.DateTimeFormat import org.slf4j.LoggerFactory import yamrcraft.etlite.EtlException import yamrcraft.etlite.transformers.Message import yamrcraft.etlite.utils.ConfigConversions._ import scala.collection.mutable class TimePartitioningWriter[T](config: Config, jobId: Long, partitionId: Int, writerFactory: (String, String) => Writer[T]) extends Writer[Message[T]] { val logger = LoggerFactory.getLogger(this.getClass) // config settings val workingFolder: String = config.getString("working-folder") val outputFolder: String = config.getString("output-folder") val partitionPattern: String = config.getString("partition.pattern") val folderMapping: Map[String, String] = config.getConfig("record-name-to-folder-mapping").asMap val fs = FileSystem.get(new Configuration()) val partitionFormat = DateTimeFormat.forPattern(partitionPattern) val partitionsWriters = mutable.Map[String, Writer[T]]() @throws(classOf[EtlException]) @throws(classOf[IOException]) override def write(event: Message[T]): Unit = { val timestamp = event.msgTimestamp val baseFolder = folderMapping.getOrElse(event.msgType, event.msgType) val writer = writerFor(baseFolder, timestamp) writer.write(event.msg) } override def commit() = { // close all writers partitionsWriters foreach { case (file, writer) => writer.commit() } } @throws(classOf[EtlException]) private def writerFor(baseFolder: String, timestamp: Long): Writer[T] = { val relativeFileName = new Path(s"$baseFolder/${partitionFormat.print(timestamp)}/events_${baseFolder}_job${jobId}_part$partitionId") val tempFile = new Path(workingFolder, relativeFileName) val outputFile = new Path(outputFolder, relativeFileName) partitionsWriters.getOrElseUpdate(tempFile.toString, writerFactory(tempFile.toString, outputFile.toString)) } }
Example 139
Source File: SelectorProvider.scala From scalaz-nio with Apache License 2.0 | 5 votes |
package zio.nio.channels.spi import java.io.IOException import java.net.ProtocolFamily import java.nio.channels.{ Channel => JChannel, DatagramChannel => JDatagramChannel } import java.nio.channels.spi.{ SelectorProvider => JSelectorProvider } import zio.nio.channels.{ Pipe, Selector, ServerSocketChannel, SocketChannel } import zio.IO class SelectorProvider(private val selectorProvider: JSelectorProvider) { final val openDatagramChannel : IO[IOException, JDatagramChannel] = // TODO: wrapper for DatagramChannel IO.effect(selectorProvider.openDatagramChannel()).refineToOrDie[IOException] // this can throw UnsupportedOperationException - doesn't seem like a recoverable exception final def openDatagramChannel( family: ProtocolFamily ): IO[IOException, JDatagramChannel] = // TODO: wrapper for DatagramChannel IO.effect(selectorProvider.openDatagramChannel(family)).refineToOrDie[IOException] final val openPipe: IO[IOException, Pipe] = IO.effect(new Pipe(selectorProvider.openPipe())).refineToOrDie[IOException] final val openSelector: IO[IOException, Selector] = IO.effect(new Selector(selectorProvider.openSelector())).refineToOrDie[IOException] final val openServerSocketChannel: IO[IOException, ServerSocketChannel] = IO.effect(new ServerSocketChannel(selectorProvider.openServerSocketChannel())) .refineToOrDie[IOException] final val openSocketChannel: IO[IOException, SocketChannel] = IO.effect(new SocketChannel(selectorProvider.openSocketChannel())).refineToOrDie[IOException] final val inheritedChannel: IO[IOException, Option[JChannel]] = // TODO: wrapper for Channel IO.effect(Option(selectorProvider.inheritedChannel())).refineToOrDie[IOException] } object SelectorProvider { final val make: IO[Nothing, SelectorProvider] = IO.effectTotal(JSelectorProvider.provider()).map(new SelectorProvider(_)) }
Example 140
Source File: AsynchronousChannelGroup.scala From scalaz-nio with Apache License 2.0 | 5 votes |
package zio.nio.channels import java.io.IOException import java.nio.channels.{ AsynchronousChannelGroup => JAsynchronousChannelGroup } import java.nio.channels.spi.{ AsynchronousChannelProvider => JAsynchronousChannelProvider } import java.util.concurrent.{ ExecutorService => JExecutorService, ThreadFactory => JThreadFactory } import java.util.concurrent.TimeUnit import zio.{ IO, UIO } import zio.duration.Duration object AsynchronousChannelGroup { def apply(executor: JExecutorService, initialSize: Int): IO[Exception, AsynchronousChannelGroup] = IO.effect( new AsynchronousChannelGroup( JAsynchronousChannelGroup.withCachedThreadPool(executor, initialSize) ) ) .refineToOrDie[Exception] def apply( threadsNo: Int, threadsFactory: JThreadFactory ): IO[Exception, AsynchronousChannelGroup] = IO.effect( new AsynchronousChannelGroup( JAsynchronousChannelGroup.withFixedThreadPool(threadsNo, threadsFactory) ) ) .refineToOrDie[Exception] def apply(executor: JExecutorService): IO[Exception, AsynchronousChannelGroup] = IO.effect( new AsynchronousChannelGroup(JAsynchronousChannelGroup.withThreadPool(executor)) ) .refineToOrDie[Exception] } class AsynchronousChannelGroup(private[channels] val channelGroup: JAsynchronousChannelGroup) { def awaitTermination(timeout: Duration): IO[Exception, Boolean] = IO.effect(channelGroup.awaitTermination(timeout.asJava.toMillis, TimeUnit.MILLISECONDS)) .refineToOrDie[Exception] val isShutdown: UIO[Boolean] = IO.effectTotal(channelGroup.isShutdown) val isTerminated: UIO[Boolean] = IO.effectTotal(channelGroup.isTerminated) val provider: UIO[JAsynchronousChannelProvider] = IO.effectTotal(channelGroup.provider()) val shutdown: UIO[Unit] = IO.effectTotal(channelGroup.shutdown()) val shutdownNow: IO[IOException, Unit] = IO.effect(channelGroup.shutdownNow()).refineToOrDie[IOException] }
Example 141
Source File: PlantUMLUtils.scala From gitbucket-plantuml-plugin with Apache License 2.0 | 5 votes |
package com.yotaichino.gitbucket.plugins.plantuml import java.io.ByteArrayOutputStream import java.io.IOException import java.lang.NullPointerException import net.sourceforge.plantuml.FileFormat import net.sourceforge.plantuml.FileFormatOption import net.sourceforge.plantuml.SourceStringReader object PlantUMLUtils { def generateSVGImage(source: String): Array[Byte] = generateImage(source, FileFormat.SVG) private def generateImage(source: String, format: FileFormat): Array[Byte] = { val reader = new SourceStringReader(source) val os = new ByteArrayOutputStream() try { reader.outputImage(os, new FileFormatOption(format)).getDescription() } catch { case _: IOException => return null case _: NullPointerException => return null } finally { os.close() } os.toByteArray() } }
Example 142
Source File: TemporaryDirectoryContext.scala From cluster-broccoli with Apache License 2.0 | 5 votes |
package de.frosner.broccoli.util import java.io.IOException import java.nio.file.attribute.BasicFileAttributes import java.nio.file.{FileVisitResult, FileVisitor, Files, Path} import org.specs2.execute.{AsResult, Result} import org.specs2.specification.ForEach trait TemporaryDirectoryContext extends ForEach[Path] { override protected def foreach[R: AsResult](f: (Path) => R): Result = { val tempDirectory = Files.createTempDirectory(getClass.getName) try { AsResult(f(tempDirectory)) } finally { Files.walkFileTree( tempDirectory, new FileVisitor[Path] { override def postVisitDirectory(dir: Path, exc: IOException): FileVisitResult = { Files.delete(dir) FileVisitResult.CONTINUE } override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = { Files.delete(file) FileVisitResult.CONTINUE } override def visitFileFailed(file: Path, exc: IOException): FileVisitResult = throw exc override def preVisitDirectory(dir: Path, attrs: BasicFileAttributes): FileVisitResult = FileVisitResult.CONTINUE } ) } } }
Example 143
Source File: FileHelper.scala From bahir with Apache License 2.0 | 5 votes |
package org.apache.bahir.utils import java.io.{File, IOException} import java.nio.file.{Files, FileVisitResult, Path, SimpleFileVisitor} import java.nio.file.attribute.BasicFileAttributes object FileHelper extends Logging { def deleteFileQuietly(file: File): Path = { Files.walkFileTree(file.toPath, new SimpleFileVisitor[Path]() { override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = { try { Files.delete(file) } catch { case t: Throwable => log.warn("Failed to delete", t) } FileVisitResult.CONTINUE } override def postVisitDirectory(dir: Path, exc: IOException): FileVisitResult = { try { Files.delete(dir) } catch { case t: Throwable => log.warn("Failed to delete", t) } FileVisitResult.CONTINUE } }) } }
Example 144
Source File: ImageInputFormat.scala From flink-tensorflow with Apache License 2.0 | 5 votes |
package org.apache.flink.contrib.tensorflow.examples.inception import java.io.IOException import java.util.Collections import com.twitter.bijection.Conversion._ import org.apache.flink.api.common.io.GlobFilePathFilter import org.apache.flink.configuration.Configuration import org.apache.flink.contrib.tensorflow._ import org.apache.flink.contrib.tensorflow.common.functions.util.ModelUtils import org.apache.flink.contrib.tensorflow.io.WholeFileInputFormat import org.apache.flink.contrib.tensorflow.io.WholeFileInputFormat._ import org.apache.flink.core.fs.{FSDataInputStream, Path} import org.slf4j.{Logger, LoggerFactory} import org.tensorflow.contrib.scala.ByteStrings._ import resource._ import scala.collection.JavaConverters._ override def readRecord( reuse: (String,ImageTensorValue), filePath: Path, fileStream: FSDataInputStream, fileLength: Long): (String,ImageTensorValue) = { if(fileLength > Int.MaxValue) { throw new IllegalArgumentException("the file is too large to be fully read") } val imageData = readFully(fileStream, new Array[Byte](fileLength.toInt), 0, fileLength.toInt).asByteString[ImageFile] val imageTensor: ImageTensorValue = managed(imageData.as[ImageFileTensor]) .flatMap(x => model.normalize(x)) .acquireAndGet(_.toValue) (filePath.getName, imageTensor) } } object ImageInputFormat { def apply(): ImageInputFormat = new ImageInputFormat }
Example 145
Source File: WholeFileInputFormat.scala From flink-tensorflow with Apache License 2.0 | 5 votes |
package org.apache.flink.contrib.tensorflow.io import java.io.{EOFException, IOException, InputStream} import org.apache.flink.api.common.io.FileInputFormat import org.apache.flink.configuration.Configuration import org.apache.flink.core.fs._ import org.apache.flink.util.Preconditions.checkState @throws[IOException] def readRecord(reuse: T, filePath: Path, fileStream: FSDataInputStream, fileLength: Long): T // -------------------------------------------------------------------------------------------- // Lifecycle // -------------------------------------------------------------------------------------------- override def nextRecord(reuse: T): T = { checkState(!reachedEnd()) checkState(currentSplit != null && currentSplit.getStart == 0) checkState(stream != null) readRecord(reuse, currentSplit.getPath, stream, currentSplit.getLength) } override def reachedEnd(): Boolean = { stream.getPos != 0 } } @SerialVersionUID(1L) object WholeFileInputFormat { @throws[IOException] def readFully(fileStream: FSDataInputStream, fileLength: Long): Array[Byte] = { if(fileLength > Int.MaxValue) { throw new IllegalArgumentException("the file is too large to be fully read") } val buf = new Array[Byte](fileLength.toInt) readFully(fileStream, buf, 0, fileLength.toInt) buf } @throws[IOException] def readFully(inputStream: InputStream, buf: Array[Byte], off: Int, len: Int): Array[Byte] = { var bytesRead = 0 while (bytesRead < len) { val read = inputStream.read(buf, off + bytesRead, len - bytesRead) if (read < 0) throw new EOFException("Premature end of stream") bytesRead += read } buf } }
Example 146
Source File: RangerSparkPlugin.scala From spark-ranger with Apache License 2.0 | 5 votes |
package org.apache.ranger.authorization.spark.authorizer import java.io.{File, IOException} import org.apache.commons.logging.LogFactory import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzSessionContext.CLIENT_TYPE import org.apache.ranger.authorization.hadoop.config.RangerConfiguration import org.apache.ranger.plugin.service.RangerBasePlugin class RangerSparkPlugin private extends RangerBasePlugin("spark", "sparkSql") { import RangerSparkPlugin._ private val LOG = LogFactory.getLog(classOf[RangerSparkPlugin]) lazy val fsScheme: Array[String] = RangerConfiguration.getInstance() .get("ranger.plugin.spark.urlauth.filesystem.schemes", "hdfs:,file:") .split(",") .map(_.trim) override def init(): Unit = { super.init() val cacheDir = new File(rangerConf.get("ranger.plugin.spark.policy.cache.dir")) if (cacheDir.exists() && (!cacheDir.isDirectory || !cacheDir.canRead || !cacheDir.canWrite)) { throw new IOException("Policy cache directory already exists at" + cacheDir.getAbsolutePath + ", but it is unavailable") } if (!cacheDir.exists() && !cacheDir.mkdirs()) { throw new IOException("Unable to create ranger policy cache directory at" + cacheDir.getAbsolutePath) } LOG.info("Policy cache directory successfully set to " + cacheDir.getAbsolutePath) } } object RangerSparkPlugin { private val rangerConf: RangerConfiguration = RangerConfiguration.getInstance val showColumnsOption: String = rangerConf.get( "xasecure.spark.describetable.showcolumns.authorization.option", "NONE") def build(): Builder = new Builder class Builder { @volatile private var sparkPlugin: RangerSparkPlugin = _ def getOrCreate(): RangerSparkPlugin = RangerSparkPlugin.synchronized { if (sparkPlugin == null) { sparkPlugin = new RangerSparkPlugin sparkPlugin.init() sparkPlugin } else { sparkPlugin } } } }
Example 147
Source File: Version.scala From apalache with Apache License 2.0 | 5 votes |
package at.forsyte.apalache.tla.tooling import java.io.IOException import java.util.Properties object Version { private val pomProps: Properties = loadProperties("META-INF/maven/at.forsyte.apalache/tool/pom.properties") private val gitProps: Properties = loadProperties("at/forsyte/apalache/tla/tooling/git.properties") def version: String = { pomProps.getProperty("version", "version-dev") } def build: String = { gitProps.getProperty("git.commit.id.describe", "unknown-build") } private def loadProperties(name: String): Properties = { val resourceStream = ClassLoader.getSystemClassLoader.getResourceAsStream(name) var props = new Properties() try { if (resourceStream != null) { props.load(resourceStream) } } catch { case _: IOException => () // ignore and set defaults, this is not a critical function case e: Throwable => throw e } props } }
Example 148
Source File: ProtocolModule.scala From zio-web with Apache License 2.0 | 5 votes |
package zio.web import java.io.IOException import zio._ trait ProtocolModule extends EndpointModule { type ServerConfig type ClientConfig type ServerService type ProtocolDocs type Middleware[-R, +E] def makeServer[R <: Has[ServerConfig], E, A]( middleware: Middleware[R, E], service: Service[A], handlers: Handlers[R, A] ): ZLayer[R, IOException, Has[ServerService]] def makeDocs(service: Service[_]): ProtocolDocs def makeClient[A](service: Service[A]): ZLayer[Has[ClientConfig], IOException, Has[ClientService[A]]] }
Example 149
Source File: FileHelper.scala From spark-nlp with Apache License 2.0 | 5 votes |
package com.johnsnowlabs.util import java.io.{File, IOException} import java.nio.charset.Charset import java.nio.file.{Files, Paths} import java.security.MessageDigest import java.text.DecimalFormat import org.apache.commons.io.FileUtils object FileHelper { def writeLines(file: String, lines: Seq[String], encoding: String = "UTF-8"): Unit = { val writer = Files.newBufferedWriter(Paths.get(file), Charset.forName(encoding)) try { var cnt = 0 for (line <- lines) { writer.write(line) if (cnt > 0) writer.write(System.lineSeparator()) cnt += 1 } } catch { case ex: IOException => ex.printStackTrace() } finally if (writer != null) writer.close() } def delete(file: String, throwOnError: Boolean = false): Unit = { val f = new File(file) if (f.exists()) { try { if (f.isDirectory) FileUtils.deleteDirectory(f) else FileUtils.deleteQuietly(f) } catch { case e: Exception => if (throwOnError) throw e else FileUtils.forceDeleteOnExit(f) } } } def generateChecksum(path: String): String = { val arr = Files readAllBytes (Paths get path) val checksum = MessageDigest.getInstance("MD5") digest arr checksum.map("%02X" format _).mkString } def getHumanReadableFileSize(size: Long): String = { if (size <= 0) return "0" val units = Array[String]("B", "KB", "MB", "GB", "TB", "PB", "EB") val digitGroups = (Math.log10(size) / Math.log10(1024)).toInt new DecimalFormat("#,##0.#").format(size / Math.pow(1024, digitGroups)) + " " + units(digitGroups) } }
Example 150
Source File: RemoraApp.scala From remora with MIT License | 5 votes |
import java.io.IOException import java.net.ConnectException import java.util.concurrent.{TimeUnit, TimeoutException} import akka.actor.ActorSystem import akka.stream.{ActorMaterializer, ActorMaterializerSettings, Supervision} import com.amazonaws.services.cloudwatch.{AmazonCloudWatchAsync, AmazonCloudWatchAsyncClientBuilder} import com.blacklocus.metrics.CloudWatchReporterBuilder import com.codahale.metrics.jvm.{GarbageCollectorMetricSet, MemoryUsageGaugeSet, ThreadStatesGaugeSet} import com.typesafe.scalalogging.LazyLogging import config.{KafkaSettings, MetricsSettings} import kafka.admin.RemoraKafkaConsumerGroupService import reporter.RemoraDatadogReporter import scala.concurrent.duration._ import scala.util.control.NonFatal object RemoraApp extends App with nl.grons.metrics.scala.DefaultInstrumented with LazyLogging { private val actorSystemName: String = "remora" implicit val actorSystem = ActorSystem(actorSystemName) metricRegistry.registerAll(new GarbageCollectorMetricSet) metricRegistry.registerAll(new MemoryUsageGaugeSet) metricRegistry.registerAll(new ThreadStatesGaugeSet) lazy val decider: Supervision.Decider = { case _: IOException | _: ConnectException | _: TimeoutException => Supervision.Restart case NonFatal(err: Throwable) => actorSystem.log.error(err, "Unhandled Exception in Stream: {}", err.getMessage) Supervision.Stop } implicit val materializer = ActorMaterializer( ActorMaterializerSettings(actorSystem).withSupervisionStrategy(decider))(actorSystem) implicit val executionContext = actorSystem.dispatchers.lookup("kafka-consumer-dispatcher") val kafkaSettings = KafkaSettings(actorSystem.settings.config) val consumer = new RemoraKafkaConsumerGroupService(kafkaSettings) val kafkaClientActor = actorSystem.actorOf(KafkaClientActor.props(consumer), name = "kafka-client-actor") Api(kafkaClientActor).start() val metricsSettings = MetricsSettings(actorSystem.settings.config) if (metricsSettings.registryOptions.enabled) { val exportConsumerMetricsToRegistryActor = actorSystem.actorOf(ExportConsumerMetricsToRegistryActor.props(kafkaClientActor), name = "export-consumer-metrics-actor") actorSystem.scheduler.schedule(0 second, metricsSettings.registryOptions.intervalSeconds second, exportConsumerMetricsToRegistryActor, "export") } if (metricsSettings.cloudWatch.enabled) { logger.info("Reporting metricsRegistry to Cloudwatch") val amazonCloudWatchAsync: AmazonCloudWatchAsync = AmazonCloudWatchAsyncClientBuilder.defaultClient new CloudWatchReporterBuilder() .withNamespace(metricsSettings.cloudWatch.name) .withRegistry(metricRegistry) .withClient(amazonCloudWatchAsync) .build() .start(metricsSettings.cloudWatch.intervalMinutes, TimeUnit.MINUTES) } if (metricsSettings.dataDog.enabled) { logger.info(s"Reporting metricsRegistry to Datadog at ${metricsSettings.dataDog.agentHost}:${metricsSettings.dataDog.agentPort}") val datadogReporter = new RemoraDatadogReporter(metricRegistry, metricsSettings.dataDog) datadogReporter.startReporter() } }
Example 151
Source File: StreamReadingThread.scala From ncdbg with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.programmaticallyspeaking.ncd.nashorn import java.io.{BufferedReader, IOException, InputStream, InputStreamReader} import scala.util.control.NonFatal class StreamReadingThread(in: InputStream, appender: (String) => Unit) extends Thread { override def run(): Unit = { try { val reader = new BufferedReader(new InputStreamReader(in)) var str = "" while (str != null) { str = reader.readLine() Option(str).foreach(appender) } } catch { case _: InterruptedException => // ok case ex: IOException if isStreamClosed(ex) => // ok case NonFatal(t) => t.printStackTrace(System.err) } } private def isStreamClosed(ex: IOException) = ex.getMessage.toLowerCase == "stream closed" }
Example 152
Source File: MemoryAppender.scala From ncdbg with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.programmaticallyspeaking.ncd.testing import ch.qos.logback.classic.spi.ILoggingEvent import ch.qos.logback.core.UnsynchronizedAppenderBase import ch.qos.logback.core.encoder.Encoder import ch.qos.logback.core.status.ErrorStatus import java.io.{ByteArrayOutputStream, IOException, OutputStream} import java.nio.charset.StandardCharsets import com.programmaticallyspeaking.ncd.messaging.{Observable, SerializedSubject} object MemoryAppender { private[MemoryAppender] val logEventSubject = new SerializedSubject[String] def logEvents: Observable[String] = logEventSubject } class MemoryAppender extends UnsynchronizedAppenderBase[ILoggingEvent] { import MemoryAppender._ private var encoder: Encoder[ILoggingEvent] = _ private var outputStream = new OutputStream { override def write(b: Int): Unit = ??? override def write(b: Array[Byte]): Unit = { val str = new String(b, StandardCharsets.UTF_8) logEventSubject.onNext(str) } } override def start(): Unit = { try { Option(encoder).foreach(_.init(outputStream)) super.start() } catch { case e: IOException => started = false addStatus(new ErrorStatus("Failed to initialize encoder for appender named [" + name + "].", this, e)) } } override protected def append(event: ILoggingEvent): Unit = { if (!isStarted) return try { event.prepareForDeferredProcessing() Option(encoder).foreach(_.doEncode(event)) } catch { case ioe: IOException => started = false addStatus(new ErrorStatus("IO failure in appender", this, ioe)) } } def setEncoder(e: Encoder[ILoggingEvent]): Unit = { encoder = e } }
Example 153
Source File: Secrets.scala From mmlspark with MIT License | 5 votes |
import java.io.IOException import java.util.Base64 import sys.process._ import spray.json._ import DefaultJsonProtocol._ import org.apache.commons.io.IOUtils import sbt.{SettingKey, TaskKey} object Secrets { private val kvName = "mmlspark-keys" private val subscriptionID = "ce1dee05-8cf6-4ad6-990a-9c80868800ba" protected def exec(command: String): String = { val os = sys.props("os.name").toLowerCase os match { case x if x contains "windows" => Seq("cmd", "/C") ++ Seq(command) !! case _ => command !! } } private def getSecret(secretName: String): String = { println(s"fetching secret: $secretName") try { exec(s"az account set -s $subscriptionID") val secretJson = exec(s"az keyvault secret show --vault-name $kvName --name $secretName") secretJson.parseJson.asJsObject().fields("value").convertTo[String] } catch { case _: IOException => println("WARNING: Could not load secret from keyvault, defaulting to the empty string." + " Please install az command line to perform authorized build steps like publishing") "" case _: java.lang.RuntimeException => println("WARNING: Could not load secret from keyvault, defaulting to the empty string." + " Please install az command line to perform authorized build steps like publishing") "" } } lazy val nexusUsername: String = sys.env.getOrElse("NEXUS-UN", getSecret("nexus-un")) lazy val nexusPassword: String = sys.env.getOrElse("NEXUS-PW", getSecret("nexus-pw")) lazy val pgpPublic: String = new String(Base64.getDecoder.decode( sys.env.getOrElse("PGP-PUBLIC", getSecret("pgp-public")).getBytes("UTF-8"))) lazy val pgpPrivate: String = new String(Base64.getDecoder.decode( sys.env.getOrElse("PGP-PRIVATE", getSecret("pgp-private")).getBytes("UTF-8"))) lazy val pgpPassword: String = sys.env.getOrElse("PGP-PW", getSecret("pgp-pw")) lazy val storageKey: String = sys.env.getOrElse("STORAGE_KEY", getSecret("storage-key")) }
Example 154
Source File: Gateway.scala From reactive-microservices with MIT License | 5 votes |
import akka.actor.ActorSystem import akka.http.scaladsl.Http import akka.http.scaladsl.client.RequestBuilding import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ import akka.http.scaladsl.model.StatusCodes._ import akka.http.scaladsl.model.{HttpRequest, HttpResponse} import akka.http.scaladsl.unmarshalling.Unmarshal import akka.stream.FlowMaterializer import akka.stream.scaladsl.{Sink, Source} import java.io.IOException import scala.concurrent.{ExecutionContext, Future} case class InternalLoginRequest(identityId: Long, authMethod: String = "codecard") case class InternalReloginRequest(tokenValue: String, authMethod: String = "codecard") class Gateway(implicit actorSystem: ActorSystem, materializer: FlowMaterializer, ec: ExecutionContext) extends JsonProtocols with Config { private val identityManagerConnectionFlow = Http().outgoingConnection(identityManagerHost, identityManagerPort) private val tokenManagerConnectionFlow = Http().outgoingConnection(tokenManagerHost, tokenManagerPort) private def requestIdentityManager(request: HttpRequest): Future[HttpResponse] = { Source.single(request).via(identityManagerConnectionFlow).runWith(Sink.head) } private def requestTokenManager(request: HttpRequest): Future[HttpResponse] = { Source.single(request).via(tokenManagerConnectionFlow).runWith(Sink.head) } def requestToken(tokenValue: String): Future[Either[String, Token]] = { requestTokenManager(RequestBuilding.Get(s"/tokens/$tokenValue")).flatMap { response => response.status match { case Success(_) => Unmarshal(response.entity).to[Token].map(Right(_)) case NotFound => Future.successful(Left("Token expired or not found")) case _ => Future.failed(new IOException(s"Token request failed with status ${response.status} and error ${response.entity}")) } } } def requestNewIdentity(): Future[Identity] = { requestIdentityManager(RequestBuilding.Post("/identities")).flatMap { response => response.status match { case Success(_) => Unmarshal(response.entity).to[Identity] case _ => Future.failed(new IOException(s"Identity request failed with status ${response.status} and error ${response.entity}")) } } } def requestLogin(identityId: Long): Future[Token] = { val loginRequest = InternalLoginRequest(identityId) requestTokenManager(RequestBuilding.Post("/tokens", loginRequest)).flatMap { response => response.status match { case Success(_) => Unmarshal(response.entity).to[Token] case _ => Future.failed(new IOException(s"Login request failed with status ${response.status} and error ${response.entity}")) } } } def requestRelogin(tokenValue: String): Future[Option[Token]] = { requestTokenManager(RequestBuilding.Patch("/tokens", InternalReloginRequest(tokenValue))).flatMap { response => response.status match { case Success(_) => Unmarshal(response.entity).to[Token].map(Option(_)) case NotFound => Future.successful(None) case _ => Future.failed(new IOException(s"Relogin request failed with status ${response.status} and error ${response.entity}")) } } } }
Example 155
Source File: Gateway.scala From reactive-microservices with MIT License | 5 votes |
import akka.actor.ActorSystem import akka.http.scaladsl.Http import akka.http.scaladsl.client.RequestBuilding import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ import akka.http.scaladsl.model.StatusCodes._ import akka.http.scaladsl.model.{HttpRequest, HttpResponse} import akka.http.scaladsl.unmarshalling.Unmarshal import akka.stream.FlowMaterializer import akka.stream.scaladsl.{Sink, Source} import java.io.IOException import scala.concurrent.{ExecutionContext, Future} case class InternalLoginRequest(identityId: Long, authMethod: String = "password") case class InternalReloginRequest(tokenValue: String, authMethod: String = "password") class Gateway(implicit actorSystem: ActorSystem, materializer: FlowMaterializer, ec: ExecutionContext) extends JsonProtocols with Config { private val identityManagerConnectionFlow = Http().outgoingConnection(identityManagerHost, identityManagerPort) private val tokenManagerConnectionFlow = Http().outgoingConnection(tokenManagerHost, tokenManagerPort) private def requestIdentityManager(request: HttpRequest): Future[HttpResponse] = { Source.single(request).via(identityManagerConnectionFlow).runWith(Sink.head) } private def requestTokenManager(request: HttpRequest): Future[HttpResponse] = { Source.single(request).via(tokenManagerConnectionFlow).runWith(Sink.head) } def requestToken(tokenValue: String): Future[Either[String, Token]] = { requestTokenManager(RequestBuilding.Get(s"/tokens/$tokenValue")).flatMap { response => response.status match { case Success(_) => Unmarshal(response.entity).to[Token].map(Right(_)) case NotFound => Future.successful(Left("Token expired or not found")) case _ => Future.failed(new IOException(s"Token request failed with status ${response.status} and error ${response.entity}")) } } } def requestNewIdentity(): Future[Identity] = { requestIdentityManager(RequestBuilding.Post("/identities")).flatMap { response => response.status match { case Success(_) => Unmarshal(response.entity).to[Identity] case _ => Future.failed(new IOException(s"Identity request failed with status ${response.status} and error ${response.entity}")) } } } def requestLogin(identityId: Long): Future[Token] = { val loginRequest = InternalLoginRequest(identityId) requestTokenManager(RequestBuilding.Post("/tokens", loginRequest)).flatMap { response => response.status match { case Success(_) => Unmarshal(response.entity).to[Token] case _ => Future.failed(new IOException(s"Login request failed with status ${response.status} and error ${response.entity}")) } } } def requestRelogin(tokenValue: String): Future[Option[Token]] = { requestTokenManager(RequestBuilding.Patch("/tokens", InternalReloginRequest(tokenValue))).flatMap { response => response.status match { case Success(_) => Unmarshal(response.entity).to[Token].map(Option(_)) case NotFound => Future.successful(None) case _ => Future.failed(new IOException(s"Relogin request failed with status ${response.status} and error ${response.entity}")) } } } }
Example 156
Source File: Gateway.scala From reactive-microservices with MIT License | 5 votes |
import akka.actor.ActorSystem import akka.http.scaladsl.Http import akka.http.scaladsl.client.RequestBuilding import akka.http.scaladsl.marshallers.sprayjson.SprayJsonSupport._ import akka.http.scaladsl.model.StatusCodes._ import akka.http.scaladsl.model.{HttpRequest, HttpResponse} import akka.http.scaladsl.unmarshalling.Unmarshal import akka.stream.FlowMaterializer import akka.stream.scaladsl.{Sink, Source} import com.restfb.DefaultFacebookClient import com.restfb.types.User import java.io.IOException import scala.concurrent.{blocking, ExecutionContext, Future} import scala.util.Try case class InternalLoginRequest(identityId: Long, authMethod: String = "fb") case class InternalReloginRequest(tokenValue: String, authMethod: String = "fb") class Gateway(implicit actorSystem: ActorSystem, materializer: FlowMaterializer, ec: ExecutionContext) extends JsonProtocols with Config { private val identityManagerConnectionFlow = Http().outgoingConnection(identityManagerHost, identityManagerPort) private val tokenManagerConnectionFlow = Http().outgoingConnection(tokenManagerHost, tokenManagerPort) private def requestIdentityManager(request: HttpRequest): Future[HttpResponse] = { Source.single(request).via(identityManagerConnectionFlow).runWith(Sink.head) } private def requestTokenManager(request: HttpRequest): Future[HttpResponse] = { Source.single(request).via(tokenManagerConnectionFlow).runWith(Sink.head) } def requestToken(tokenValue: String): Future[Either[String, Token]] = { requestTokenManager(RequestBuilding.Get(s"/tokens/$tokenValue")).flatMap { response => response.status match { case Success(_) => Unmarshal(response.entity).to[Token].map(Right(_)) case NotFound => Future.successful(Left("Token expired or not found")) case _ => Future.failed(new IOException(s"Token request failed with status ${response.status} and error ${response.entity}")) } } } def requestNewIdentity(): Future[Identity] = { requestIdentityManager(RequestBuilding.Post("/identities")).flatMap { response => response.status match { case Success(_) => Unmarshal(response.entity).to[Identity] case _ => Future.failed(new IOException(s"Identity request failed with status ${response.status} and error ${response.entity}")) } } } def requestLogin(identityId: Long): Future[Token] = { val loginRequest = InternalLoginRequest(identityId) requestTokenManager(RequestBuilding.Post("/tokens", loginRequest)).flatMap { response => response.status match { case Success(_) => Unmarshal(response.entity).to[Token] case _ => Future.failed(new IOException(s"Login request failed with status ${response.status} and error ${response.entity}")) } } } def requestRelogin(tokenValue: String): Future[Option[Token]] = { requestTokenManager(RequestBuilding.Patch("/tokens", InternalReloginRequest(tokenValue))).flatMap { response => response.status match { case Success(_) => Unmarshal(response.entity).to[Token].map(Option(_)) case NotFound => Future.successful(None) case _ => Future.failed(new IOException(s"Relogin request failed with status ${response.status} and error ${response.entity}")) } } } def getFbUserDetails(accessToken: String): Try[User] = { Try { blocking { val client = new DefaultFacebookClient(accessToken) client.fetchObject("me", classOf[User]) } } } }
Example 157
Source File: HttpHandler.scala From play-auditing with Apache License 2.0 | 5 votes |
package uk.gov.hmrc.audit.handler import java.io.IOException import java.net.URL import java.util.concurrent.TimeoutException import akka.stream.Materializer import org.slf4j.{Logger, LoggerFactory} import play.api.inject.ApplicationLifecycle import play.api.libs.json.JsValue import scala.concurrent.{ExecutionContext, Future} import scala.concurrent.duration.Duration sealed trait HttpResult object HttpResult { case class Response(statusCode: Int) extends HttpResult case object Malformed extends HttpResult case class Failure(msg: String, nested: Option[Throwable] = None) extends Exception(msg, nested.orNull) with HttpResult } abstract class HttpHandler( endpointUrl : URL, userAgent : String, connectTimeout : Duration, requestTimeout : Duration, materializer : Materializer, lifecycle : ApplicationLifecycle ) { private val logger: Logger = LoggerFactory.getLogger(getClass) val HTTP_STATUS_CONTINUE = 100 val wsClient: WSClient = { implicit val m = materializer val wsClient = WSClient(connectTimeout, requestTimeout, userAgent) lifecycle.addStopHook { () => logger.info("Closing play-auditing http connections...") wsClient.close() Future.successful(()) } wsClient } def sendHttpRequest(event: JsValue)(implicit ec: ExecutionContext): Future[HttpResult] = try { logger.debug(s"Sending audit request to URL ${endpointUrl.toString}") wsClient.url(endpointUrl.toString) .post(event) .map { response => val httpStatusCode = response.status logger.debug(s"Got status code : $httpStatusCode") response.body logger.debug("Response processed and closed") if (httpStatusCode >= HTTP_STATUS_CONTINUE) { logger.info(s"Got status code $httpStatusCode from HTTP server.") HttpResult.Response(httpStatusCode) } else { logger.warn(s"Malformed response (status $httpStatusCode) returned from server") HttpResult.Malformed } }.recover { case e: TimeoutException => HttpResult.Failure("Error opening connection, or request timed out", Some(e)) case e: IOException => HttpResult.Failure("Error opening connection, or request timed out", Some(e)) } } catch { case t: Throwable => Future.successful(HttpResult.Failure("Error sending HTTP request", Some(t))) } }
Example 158
Source File: WireMockUtils.scala From play-auditing with Apache License 2.0 | 5 votes |
package uk.gov.hmrc.audit import java.io.IOException import java.net.ServerSocket object WireMockUtils { def availablePort: Int = { var port = 9876 var socket: ServerSocket = null try { socket = new ServerSocket(0) port = socket.getLocalPort } catch { case ex: IOException => } finally { if (socket != null) { try { socket.close() } catch { case ex: IOException => } } } port } }
Example 159
Source File: ErrorHandling.scala From Argus-SAF with Apache License 2.0 | 5 votes |
package org.argus.jawa.core.compiler.compile.io import java.io.IOException object ErrorHandling { def translate[T](msg: => String)(f: => T): T = try { f } catch { case e: IOException => throw new TranslatedIOException(msg + e.toString, e) case e: Exception => throw new TranslatedException(msg + e.toString, e) } def wideConvert[T](f: => T): Either[Throwable, T] = try { Right(f) } catch { case ex @ (_: Exception | _: StackOverflowError) => Left(ex) case err @ (_: ThreadDeath | _: VirtualMachineError) => throw err case x: Throwable => Left(x) } def convert[T](f: => T): Either[Exception, T] = try { Right(f) } catch { case e: Exception => Left(e) } def reducedToString(e: Throwable): String = if(e.getClass == classOf[RuntimeException]) { val msg = e.getMessage if(msg == null || msg.isEmpty) e.toString else msg } else e.toString } sealed class TranslatedException private[io](msg: String, cause: Throwable) extends RuntimeException(msg, cause) { override def toString: String = msg } final class TranslatedIOException private[io](msg: String, cause: IOException) extends TranslatedException(msg, cause)
Example 160
Source File: RestartableByteArrayInputStream.scala From hail with MIT License | 5 votes |
package is.hail.utils import java.io.{ IOException, InputStream } // not thread safe class RestartableByteArrayInputStream extends InputStream { private[this] var off: Int = 0 private[this] var end: Int = 0 private[this] var buf: Array[Byte] = null def this(buf: Array[Byte]) { this() restart(buf) } override def read(): Int = { if (off == end) { return -1 } val b = buf(off) & 0xff off += 1 b } override def read(dest: Array[Byte]): Int = read(dest, 0, dest.length) override def read(dest: Array[Byte], destOff: Int, requestedLength: Int): Int = { val length = math.min(requestedLength, end - off) System.arraycopy(buf, off, dest, destOff, length) off += length length } override def skip(n: Long): Long = { if (n <= 0) { return 0 } val skipped = math.min( math.min(n, Integer.MAX_VALUE).toInt, end - off) off += skipped skipped } override def available(): Int = end - off override def markSupported(): Boolean = false override def mark(readAheadLimit: Int): Unit = throw new IOException("unsupported operation") override def reset(): Unit = throw new IOException("unsupported operation") override def close(): Unit = buf = null def restart(buf: Array[Byte]): Unit = restart(buf, 0, buf.length) def restart(buf: Array[Byte], start: Int, end: Int): Unit = { require(start >= 0) require(start <= end) require(end <= buf.length) this.buf = buf this.off = start this.end = end } }
Example 161
Source File: CustomHttpClientRetryHandler.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.tools.neptune.export import java.io.IOException import org.apache.http.impl.client.DefaultHttpRequestRetryHandler import org.apache.http.protocol.HttpContext import org.slf4j.LoggerFactory class CustomHttpClientRetryHandler extends DefaultHttpRequestRetryHandler{ protected lazy val logger = LoggerFactory.getLogger("httP-retry_handler") @Override override def retryRequest(exception: IOException, executionCount: Int, context: HttpContext) :Boolean = { logger.info("Going to retry http request...") if (executionCount >= 5) { // Do not retry if over max retry count false } else { logger.error("Exception=" + exception) Thread.sleep(10000) true } } }
Example 162
Source File: ClusterSpec.scala From akka-cqrs with Apache License 2.0 | 5 votes |
package test.support import java.io.{File, IOException} import java.nio.file._ import java.nio.file.attribute.BasicFileAttributes import akka.actor.{ActorIdentity, Identify, Props} import akka.cluster.Cluster import akka.persistence.Persistence import akka.persistence.journal.leveldb.{SharedLeveldbJournal, SharedLeveldbStore} import akka.remote.testconductor.RoleName import akka.remote.testkit.MultiNodeSpec import akka.testkit.ImplicitSender import scala.util.control.NonFatal abstract class ClusterSpec extends MultiNodeSpec(ClusterConfig) with SbtMultiNodeSpec with ImplicitSender { import ClusterConfig._ implicit val logger = system.log def initialParticipants = roles.size def deleteDirectory(path: Path): Unit = if (Files.exists(path)) { Files.walkFileTree(path, new SimpleFileVisitor[Path] { def deleteAndContinue(file: Path): FileVisitResult = { Files.delete(file) FileVisitResult.CONTINUE } override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = deleteAndContinue(file) override def visitFileFailed(file: Path, exc: IOException): FileVisitResult = deleteAndContinue(file) override def postVisitDirectory(dir: Path, exc: IOException): FileVisitResult = { Option(exc).fold(deleteAndContinue(dir)) { case NonFatal(e) => throw e } } }) } val storageLocations = List( "akka.persistence.journal.leveldb.dir", "akka.persistence.journal.leveldb-shared.store.dir", "akka.persistence.snapshot-store.local.dir").map(s => new File(system.settings.config.getString(s))) override protected def atStartup() { on(node1) { storageLocations.foreach(dir => deleteDirectory(dir.toPath)) } } override protected def afterTermination() { on(node1) { storageLocations.foreach(dir => deleteDirectory(dir.toPath)) } } def join(startOn: RoleName, joinTo: RoleName) { on(startOn) { Cluster(system) join node(joinTo).address } enterBarrier(startOn.name + "-joined") } def setupSharedJournal() { Persistence(system) on(node1) { system.actorOf(Props[SharedLeveldbStore], "store") } enterBarrier("persistence-started") system.actorSelection(node(node1) / "user" / "store") ! Identify(None) val sharedStore = expectMsgType[ActorIdentity].ref.get SharedLeveldbJournal.setStore(sharedStore, system) enterBarrier("after-1") } def joinCluster() { join(startOn = node1, joinTo = node1) join(startOn = node2, joinTo = node1) enterBarrier("after-2") } def on(nodes: RoleName*)(thunk: => Unit): Unit = { runOn(nodes: _*)(thunk) } }
Example 163
Source File: Main.scala From frdomain-extras with Apache License 2.0 | 5 votes |
package frdomain.ch6.domain import java.io.IOException import zio._ import zio.blocking.Blocking import config._ import common._ import service._ import model.{Account, Balance} import repository.{AccountRepository, InMemoryAccountRepository, DoobieAccountRepository} object Main { // uses AccountService val opens = for { _ <- open("a1234", "a1name", None, None, Checking) _ <- open("a2345", "a2name", None, None, Checking) _ <- open("a3456", "a3name", Some(BigDecimal(5.8)), None, Savings) _ <- open("a4567", "a4name", None, None, Checking) _ <- open("a5678", "a5name", Some(BigDecimal(2.3)), None, Savings) } yield (()) // uses AccountService val credits = for { _ <- credit("a1234", 1000) _ <- credit("a2345", 2000) _ <- credit("a3456", 3000) _ <- credit("a4567", 4000) } yield (()) // uses AccountService and ReportingService val program = for { _ <- opens _ <- credits a <- balanceByAccount } yield a def main(args: Array[String]): Unit = { val prog = for { _ <- opens _ <- credits a <- balanceByAccount } yield a val banking: ZIO[Blocking, Object, Seq[(String, common.Amount)]] = prog.provideLayer(Application.prod.appLayer) println(Runtime.default.unsafeRun(banking)) } }
Example 164
Source File: inmemory.scala From frdomain-extras with Apache License 2.0 | 5 votes |
package frdomain.ch6.domain import java.io.IOException import zio._ import common._ import repository.{InMemoryAccountRepository, DoobieAccountRepository} import service._ import model.{Account, Balance} object InMemory { def main(args: Array[String]): Unit = { // uses AccountService val opens = for { _ <- open("a1234", "a1name", None, None, Checking) _ <- open("a2345", "a2name", None, None, Checking) _ <- open("a3456", "a3name", Some(BigDecimal(5.8)), None, Savings) _ <- open("a4567", "a4name", None, None, Checking) _ <- open("a5678", "a5name", Some(BigDecimal(2.3)), None, Savings) } yield (()) // uses AccountService val credits = for { _ <- credit("a1234", 1000) _ <- credit("a2345", 2000) _ <- credit("a3456", 3000) _ <- credit("a4567", 4000) } yield (()) // uses AccountService and ReportingService val program = for { _ <- opens _ <- credits a <- balanceByAccount } yield a // layers val appLayer = InMemoryAccountRepository.layer >+> AccountService.live >+> ReportingService.live val banking = program.provideLayer(appLayer) println(Runtime.default.unsafeRun(banking)) // List((a5678,0), (a3456,3000), (a1234,1000), (a2345,2000), (a4567,4000)) } }
Example 165
Source File: TestOutputStream.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming import java.io.{IOException, ObjectInputStream} import java.util.concurrent.ConcurrentLinkedQueue import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.dstream.{DStream, ForEachDStream} import org.apache.spark.util.Utils class TestOutputStream[T: ClassTag](parent: DStream[T], val output: ConcurrentLinkedQueue[Seq[T]] = new ConcurrentLinkedQueue[Seq[T]]()) extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => { val collected = rdd.collect() output.add(collected) }, false) { // This is to clear the output buffer every it is read from a checkpoint @throws(classOf[IOException]) private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException { ois.defaultReadObject() output.clear() } }
Example 166
Source File: ReusableStringReaderSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.expressions.xml import java.io.IOException import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.xml.UDFXPathUtil.ReusableStringReader class ReusableStringReaderSuite extends SparkFunSuite { private val fox = "Quick brown fox jumps over the lazy dog." test("empty reader") { val reader = new ReusableStringReader intercept[IOException] { reader.read() } intercept[IOException] { reader.ready() } reader.close() } test("mark reset") { val reader = new ReusableStringReader if (reader.markSupported()) { reader.asInstanceOf[ReusableStringReader].set(fox) assert(reader.ready()) val cc = new Array[Char](6) var read = reader.read(cc) assert(read == 6) assert("Quick " == new String(cc)) reader.mark(100) read = reader.read(cc) assert(read == 6) assert("brown " == new String(cc)) reader.reset() read = reader.read(cc) assert(read == 6) assert("brown " == new String(cc)) } reader.close() } test("skip") { val reader = new ReusableStringReader reader.asInstanceOf[ReusableStringReader].set(fox) // skip entire the data: var skipped = reader.skip(fox.length() + 1) assert(fox.length() == skipped) assert(-1 == reader.read()) reader.asInstanceOf[ReusableStringReader].set(fox) // reset the data val cc = new Array[Char](6) var read = reader.read(cc) assert(read == 6) assert("Quick " == new String(cc)) // skip some piece of data: skipped = reader.skip(30) assert(skipped == 30) read = reader.read(cc) assert(read == 4) assert("dog." == new String(cc, 0, read)) // skip when already at EOF: skipped = reader.skip(300) assert(skipped == 0, skipped) assert(reader.read() == -1) reader.close() } }
Example 167
Source File: ProcessTestUtils.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.test import java.io.{InputStream, IOException} import scala.sys.process.BasicIO object ProcessTestUtils { class ProcessOutputCapturer(stream: InputStream, capture: String => Unit) extends Thread { this.setDaemon(true) override def run(): Unit = { try { BasicIO.processFully(capture)(stream) } catch { case _: IOException => // Ignores the IOException thrown when the process termination, which closes the input // stream abruptly. } } } }
Example 168
Source File: RawTextSender.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.util import java.io.{ByteArrayOutputStream, IOException} import java.net.ServerSocket import java.nio.ByteBuffer import scala.io.Source import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.serializer.KryoSerializer import org.apache.spark.util.IntParam private[streaming] object RawTextSender extends Logging { def main(args: Array[String]) { if (args.length != 4) { // scalastyle:off println System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>") // scalastyle:on println System.exit(1) } // Parse the arguments using a pattern match val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args // Repeat the input data multiple times to fill in a buffer val lines = Source.fromFile(file).getLines().toArray val bufferStream = new ByteArrayOutputStream(blockSize + 1000) val ser = new KryoSerializer(new SparkConf()).newInstance() val serStream = ser.serializeStream(bufferStream) var i = 0 while (bufferStream.size < blockSize) { serStream.writeObject(lines(i)) i = (i + 1) % lines.length } val array = bufferStream.toByteArray val countBuf = ByteBuffer.wrap(new Array[Byte](4)) countBuf.putInt(array.length) countBuf.flip() val serverSocket = new ServerSocket(port) logInfo("Listening on port " + port) while (true) { val socket = serverSocket.accept() logInfo("Got a new connection") val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec) try { while (true) { out.write(countBuf.array) out.write(array) } } catch { case e: IOException => logError("Client disconnected") } finally { socket.close() } } } }
Example 169
Source File: HdfsUtils.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.util import java.io.{FileNotFoundException, IOException} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs._ private[streaming] object HdfsUtils { def getOutputStream(path: String, conf: Configuration): FSDataOutputStream = { val dfsPath = new Path(path) val dfs = getFileSystemForPath(dfsPath, conf) // If the file exists and we have append support, append instead of creating a new file val stream: FSDataOutputStream = { if (dfs.isFile(dfsPath)) { if (conf.getBoolean("hdfs.append.support", false) || dfs.isInstanceOf[RawLocalFileSystem]) { dfs.append(dfsPath) } else { throw new IllegalStateException("File exists and there is no append support!") } } else { dfs.create(dfsPath) } } stream } def getInputStream(path: String, conf: Configuration): FSDataInputStream = { val dfsPath = new Path(path) val dfs = getFileSystemForPath(dfsPath, conf) try { dfs.open(dfsPath) } catch { case _: FileNotFoundException => null case e: IOException => // If we are really unlucky, the file may be deleted as we're opening the stream. // This can happen as clean up is performed by daemon threads that may be left over from // previous runs. if (!dfs.isFile(dfsPath)) null else throw e } } def checkState(state: Boolean, errorMsg: => String) { if (!state) { throw new IllegalStateException(errorMsg) } } def checkFileExists(path: String, conf: Configuration): Boolean = { val hdpPath = new Path(path) val fs = getFileSystemForPath(hdpPath, conf) fs.isFile(hdpPath) } }
Example 170
Source File: FileBasedWriteAheadLogReader.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.util import java.io.{Closeable, EOFException, IOException} import java.nio.ByteBuffer import org.apache.hadoop.conf.Configuration import org.apache.spark.internal.Logging private[streaming] class FileBasedWriteAheadLogReader(path: String, conf: Configuration) extends Iterator[ByteBuffer] with Closeable with Logging { private val instream = HdfsUtils.getInputStream(path, conf) private var closed = (instream == null) // the file may be deleted as we're opening the stream private var nextItem: Option[ByteBuffer] = None override def hasNext: Boolean = synchronized { if (closed) { return false } if (nextItem.isDefined) { // handle the case where hasNext is called without calling next true } else { try { val length = instream.readInt() val buffer = new Array[Byte](length) instream.readFully(buffer) nextItem = Some(ByteBuffer.wrap(buffer)) logTrace("Read next item " + nextItem.get) true } catch { case e: EOFException => logDebug("Error reading next item, EOF reached", e) close() false case e: IOException => logWarning("Error while trying to read data. If the file was deleted, " + "this should be okay.", e) close() if (HdfsUtils.checkFileExists(path, conf)) { // If file exists, this could be a legitimate error throw e } else { // File was deleted. This can occur when the daemon cleanup thread takes time to // delete the file during recovery. false } case e: Exception => logWarning("Error while trying to read data from HDFS.", e) close() throw e } } } override def next(): ByteBuffer = synchronized { val data = nextItem.getOrElse { close() throw new IllegalStateException( "next called without calling hasNext or after hasNext returned false") } nextItem = None // Ensure the next hasNext call loads new data. data } override def close(): Unit = synchronized { if (!closed) { instream.close() } closed = true } }
Example 171
Source File: CommandUtils.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.worker import java.io.{File, FileOutputStream, InputStream, IOException} import scala.collection.JavaConverters._ import scala.collection.Map import org.apache.spark.SecurityManager import org.apache.spark.deploy.Command import org.apache.spark.internal.Logging import org.apache.spark.launcher.WorkerCommandBuilder import org.apache.spark.util.Utils def redirectStream(in: InputStream, file: File) { val out = new FileOutputStream(file, true) // TODO: It would be nice to add a shutdown hook here that explains why the output is // terminating. Otherwise if the worker dies the executor logs will silently stop. new Thread("redirect output to " + file) { override def run() { try { Utils.copyStream(in, out, true) } catch { case e: IOException => logInfo("Redirection to " + file + " closed: " + e.getMessage) } } }.start() } }
Example 172
Source File: SparkHadoopMapRedUtil.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.mapred import java.io.IOException import org.apache.hadoop.mapreduce.{TaskAttemptContext => MapReduceTaskAttemptContext} import org.apache.hadoop.mapreduce.{OutputCommitter => MapReduceOutputCommitter} import org.apache.hadoop.security.UserGroupInformation import org.apache.spark.{SparkEnv, TaskContext} import org.apache.spark.executor.CommitDeniedException import org.apache.spark.internal.Logging object SparkHadoopMapRedUtil extends Logging { private val user = UserGroupInformation.getCurrentUser.getShortUserName def commitTask( committer: MapReduceOutputCommitter, mrTaskContext: MapReduceTaskAttemptContext, jobId: Int, splitId: Int): Unit = { val mrTaskAttemptID = mrTaskContext.getTaskAttemptID // Called after we have decided to commit def performCommit(): Unit = { try { committer.commitTask(mrTaskContext) logInfo(s"$mrTaskAttemptID: Committed") } catch { case cause: IOException => logError(s"Error committing the output of task: $mrTaskAttemptID", cause) committer.abortTask(mrTaskContext) throw cause } } // First, check whether the task's output has already been committed by some other attempt if (committer.needsTaskCommit(mrTaskContext)) { val shouldCoordinateWithDriver: Boolean = { val sparkConf = SparkEnv.get(user).conf // We only need to coordinate with the driver if there are concurrent task attempts. // Note that this could happen even when speculation is not enabled (e.g. see SPARK-8029). // This (undocumented) setting is an escape-hatch in case the commit code introduces bugs. sparkConf.getBoolean("spark.hadoop.outputCommitCoordination.enabled", defaultValue = true) } if (shouldCoordinateWithDriver) { val outputCommitCoordinator = SparkEnv.get(user).outputCommitCoordinator val taskAttemptNumber = TaskContext.get().attemptNumber() val canCommit = outputCommitCoordinator.canCommit(jobId, splitId, taskAttemptNumber) if (canCommit) { performCommit() } else { val message = s"$mrTaskAttemptID: Not committed because the driver did not authorize commit" logInfo(message) // We need to abort the task so that the driver can reschedule new attempts, if necessary committer.abortTask(mrTaskContext) throw new CommitDeniedException(message, jobId, splitId, taskAttemptNumber) } } else { // Speculation is disabled or a user has chosen to manually bypass the commit coordination performCommit() } } else { // Some other attempt committed the output, so we do nothing and signal success logInfo(s"No need to commit output of task because needsTaskCommit=false: $mrTaskAttemptID") } } }
Example 173
Source File: SerializableBuffer.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream} import java.nio.ByteBuffer import java.nio.channels.Channels private[spark] class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable { def value: ByteBuffer = buffer private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { val length = in.readInt() buffer = ByteBuffer.allocate(length) var amountRead = 0 val channel = Channels.newChannel(in) while (amountRead < length) { val ret = channel.read(buffer) if (ret == -1) { throw new EOFException("End of file before fully reading buffer") } amountRead += ret } buffer.rewind() // Allow us to read it later } private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.writeInt(buffer.limit()) if (Channels.newChannel(out).write(buffer) != buffer.limit()) { throw new IOException("Could not fully write buffer to output stream") } buffer.rewind() // Allow us to write it again later } }
Example 174
Source File: DiskStore.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.storage import java.io.{FileOutputStream, IOException, RandomAccessFile} import java.nio.ByteBuffer import java.nio.channels.FileChannel.MapMode import com.google.common.io.Closeables import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.util.Utils import org.apache.spark.util.io.ChunkedByteBuffer def put(blockId: BlockId)(writeFunc: FileOutputStream => Unit): Unit = { if (contains(blockId)) { throw new IllegalStateException(s"Block $blockId is already present in the disk store") } logDebug(s"Attempting to put block $blockId") val startTime = System.currentTimeMillis val file = diskManager.getFile(blockId) val fileOutputStream = new FileOutputStream(file) var threwException: Boolean = true try { writeFunc(fileOutputStream) threwException = false } finally { try { Closeables.close(fileOutputStream, threwException) } finally { if (threwException) { remove(blockId) } } } val finishTime = System.currentTimeMillis logDebug("Block %s stored as %s file on disk in %d ms".format( file.getName, Utils.bytesToString(file.length()), finishTime - startTime)) } def putBytes(blockId: BlockId, bytes: ChunkedByteBuffer): Unit = { put(blockId) { fileOutputStream => val channel = fileOutputStream.getChannel Utils.tryWithSafeFinally { bytes.writeFully(channel) } { channel.close() } } } def getBytes(blockId: BlockId): ChunkedByteBuffer = { val file = diskManager.getFile(blockId.name) val channel = new RandomAccessFile(file, "r").getChannel Utils.tryWithSafeFinally { // For small files, directly read rather than memory map if (file.length < minMemoryMapBytes) { val buf = ByteBuffer.allocate(file.length.toInt) channel.position(0) while (buf.remaining() != 0) { if (channel.read(buf) == -1) { throw new IOException("Reached EOF before filling buffer\n" + s"offset=0\nfile=${file.getAbsolutePath}\nbuf.remaining=${buf.remaining}") } } buf.flip() new ChunkedByteBuffer(buf) } else { new ChunkedByteBuffer(channel.map(MapMode.READ_ONLY, 0, file.length)) } } { channel.close() } } def remove(blockId: BlockId): Boolean = { val file = diskManager.getFile(blockId.name) if (file.exists()) { val ret = file.delete() if (!ret) { logWarning(s"Error deleting ${file.getPath()}") } ret } else { false } } def contains(blockId: BlockId): Boolean = { val file = diskManager.getFile(blockId.name) file.exists() } }
Example 175
Source File: CartesianRDD.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark._ import org.apache.spark.util.Utils private[spark] class CartesianPartition( idx: Int, @transient private val rdd1: RDD[_], @transient private val rdd2: RDD[_], s1Index: Int, s2Index: Int ) extends Partition { var s1 = rdd1.partitions(s1Index) var s2 = rdd2.partitions(s2Index) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization s1 = rdd1.partitions(s1Index) s2 = rdd2.partitions(s2Index) oos.defaultWriteObject() } } private[spark] class CartesianRDD[T: ClassTag, U: ClassTag]( sc: SparkContext, var rdd1 : RDD[T], var rdd2 : RDD[U]) extends RDD[(T, U)](sc, Nil) with Serializable { val numPartitionsInRdd2 = rdd2.partitions.length override def getPartitions: Array[Partition] = { // create the cross product split val array = new Array[Partition](rdd1.partitions.length * rdd2.partitions.length) for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) { val idx = s1.index * numPartitionsInRdd2 + s2.index array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index) } array } override def getPreferredLocations(split: Partition): Seq[String] = { val currSplit = split.asInstanceOf[CartesianPartition] (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct } override def compute(split: Partition, context: TaskContext): Iterator[(T, U)] = { val currSplit = split.asInstanceOf[CartesianPartition] for (x <- rdd1.iterator(currSplit.s1, context); y <- rdd2.iterator(currSplit.s2, context)) yield (x, y) } override def getDependencies: Seq[Dependency[_]] = List( new NarrowDependency(rdd1) { def getParents(id: Int): Seq[Int] = List(id / numPartitionsInRdd2) }, new NarrowDependency(rdd2) { def getParents(id: Int): Seq[Int] = List(id % numPartitionsInRdd2) } ) override def clearDependencies() { super.clearDependencies() rdd1 = null rdd2 = null } }
Example 176
Source File: UnionRDD.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer import scala.collection.parallel.{ForkJoinTaskSupport, ThreadPoolTaskSupport} import scala.concurrent.forkjoin.ForkJoinPool import scala.reflect.ClassTag import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils private[spark] class UnionPartition[T: ClassTag]( idx: Int, @transient private val rdd: RDD[T], val parentRddIndex: Int, @transient private val parentRddPartitionIndex: Int) extends Partition { var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex) def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization parentPartition = rdd.partitions(parentRddPartitionIndex) oos.defaultWriteObject() } } object UnionRDD { private[spark] lazy val partitionEvalTaskSupport = new ForkJoinTaskSupport(new ForkJoinPool(8)) } @DeveloperApi class UnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]]) extends RDD[T](sc, Nil) { // Nil since we implement getDependencies // visible for testing private[spark] val isPartitionListingParallel: Boolean = rdds.length > conf.getInt("spark.rdd.parallelListingThreshold", 10) override def getPartitions: Array[Partition] = { val parRDDs = if (isPartitionListingParallel) { val parArray = rdds.par parArray.tasksupport = UnionRDD.partitionEvalTaskSupport parArray } else { rdds } val array = new Array[Partition](parRDDs.map(_.partitions.length).seq.sum) var pos = 0 for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) { array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index) pos += 1 } array } override def getDependencies: Seq[Dependency[_]] = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 for (rdd <- rdds) { deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length) pos += rdd.partitions.length } deps } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val part = s.asInstanceOf[UnionPartition[T]] parent[T](part.parentRddIndex).iterator(part.parentPartition, context) } override def getPreferredLocations(s: Partition): Seq[String] = s.asInstanceOf[UnionPartition[T]].preferredLocations() override def clearDependencies() { super.clearDependencies() rdds = null } }
Example 177
Source File: PartitionerAwareUnionRDD.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext} import org.apache.spark.util.Utils private[spark] class PartitionerAwareUnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]] ) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) { require(rdds.nonEmpty) require(rdds.forall(_.partitioner.isDefined)) require(rdds.flatMap(_.partitioner).toSet.size == 1, "Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner)) override val partitioner = rdds.head.partitioner override def getPartitions: Array[Partition] = { val numPartitions = partitioner.get.numPartitions (0 until numPartitions).map { index => new PartitionerAwareUnionRDDPartition(rdds, index) }.toArray } // Get the location where most of the partitions of parent RDDs are located override def getPreferredLocations(s: Partition): Seq[String] = { logDebug("Finding preferred location for " + this + ", partition " + s.index) val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents val locations = rdds.zip(parentPartitions).flatMap { case (rdd, part) => val parentLocations = currPrefLocs(rdd, part) logDebug("Location of " + rdd + " partition " + part.index + " = " + parentLocations) parentLocations } val location = if (locations.isEmpty) { None } else { // Find the location that maximum number of parent partitions prefer Some(locations.groupBy(x => x).maxBy(_._2.length)._1) } logDebug("Selected location for " + this + ", partition " + s.index + " = " + location) location.toSeq } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents rdds.zip(parentPartitions).iterator.flatMap { case (rdd, p) => rdd.iterator(p, context) } } override def clearDependencies() { super.clearDependencies() rdds = null } // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones) private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = { rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host) } }
Example 178
Source File: EmbeddedKsqlEngine.scala From ksql-jdbc-driver with Apache License 2.0 | 5 votes |
package com.github.mmolimar.ksql.jdbc.embedded import java.io.IOException import com.github.mmolimar.ksql.jdbc.utils.TestUtils import io.confluent.ksql.rest.server.{KsqlRestApplication, KsqlRestConfig} import io.confluent.ksql.version.metrics.VersionCheckerAgent import io.confluent.rest.RestConfig import kafka.utils.Logging import org.apache.kafka.clients.producer.ProducerConfig import org.scalamock.scalatest.MockFactory import io.confluent.ksql.util.KsqlConfig import scala.collection.JavaConverters._ class EmbeddedKsqlEngine(port: Int = TestUtils.getAvailablePort, brokerList: String, connectUrl: String) extends Logging with MockFactory { private val config = new KsqlRestConfig(Map( RestConfig.LISTENERS_CONFIG -> s"http://localhost:$port", ProducerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokerList, KsqlConfig.CONNECT_URL_PROPERTY -> connectUrl, "ksql.service.id" -> "ksql-jdbc", "ksql.streams.auto.offset.reset" -> "latest", "ksql.command.topic.suffix" -> "commands" ).asJava) lazy val ksqlEngine: KsqlRestApplication = { import io.confluent.ksql.rest.server.mock.ksqlRestApplication val versionCheckerAgent = mock[VersionCheckerAgent] (versionCheckerAgent.start _).expects(*, *).returns((): Unit).anyNumberOfTimes (versionCheckerAgent.updateLastRequestTime _).expects().returns((): Unit).anyNumberOfTimes ksqlRestApplication(config, versionCheckerAgent) } @throws[IOException] def startup(): Unit = { info("Starting up embedded KSQL engine") ksqlEngine.start() info("Started embedded Zookeeper: " + getConnection) } def shutdown(): Unit = { info("Shutting down embedded KSQL engine") TestUtils.swallow(ksqlEngine.stop()) info("Stopped embedded KSQL engine") } def getPort: Int = port def getConnection: String = "localhost:" + getPort override def toString: String = { val sb: StringBuilder = new StringBuilder("KSQL{") sb.append("connection=").append(getConnection) sb.append('}') sb.toString } }
Example 179
Source File: TestOutputStream.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming import java.io.{IOException, ObjectInputStream} import org.apache.spark.rdd.RDD import org.apache.spark.streaming.dstream.{DStream, ForEachDStream} import org.apache.spark.util.Utils import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag class TestOutputStream[T: ClassTag](parent: DStream[T], val output: ArrayBuffer[Seq[T]] = ArrayBuffer[Seq[T]]()) extends ForEachDStream[T](parent, (rdd: RDD[T], t: Time) => { val collected = rdd.collect() output += collected }) { // This is to clear the output buffer every it is read from a checkpoint @throws(classOf[IOException]) private def readObject(ois: ObjectInputStream): Unit = Utils.tryOrIOException { ois.defaultReadObject() output.clear() } }
Example 180
Source File: SparkSQLCLIService.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.thriftserver import java.io.IOException import java.util.{List => JList} import javax.security.auth.login.LoginException import scala.collection.JavaConversions._ import org.apache.commons.logging.Log import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.shims.ShimLoader import org.apache.hadoop.security.UserGroupInformation import org.apache.hive.service.Service.STATE import org.apache.hive.service.auth.HiveAuthFactory import org.apache.hive.service.cli._ import org.apache.hive.service.{AbstractService, Service, ServiceException} import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.hive.thriftserver.ReflectionUtils._ import org.apache.spark.util.Utils private[hive] class SparkSQLCLIService(hiveContext: HiveContext) extends CLIService with ReflectedCompositeService { override def init(hiveConf: HiveConf) { setSuperField(this, "hiveConf", hiveConf) val sparkSqlSessionManager = new SparkSQLSessionManager(hiveContext) setSuperField(this, "sessionManager", sparkSqlSessionManager) addService(sparkSqlSessionManager) var sparkServiceUGI: UserGroupInformation = null if (ShimLoader.getHadoopShims.isSecurityEnabled) { try { HiveAuthFactory.loginFromKeytab(hiveConf) sparkServiceUGI = ShimLoader.getHadoopShims.getUGIForConf(hiveConf) HiveThriftServerShim.setServerUserName(sparkServiceUGI, this) } catch { case e @ (_: IOException | _: LoginException) => throw new ServiceException("Unable to login to kerberos with given principal/keytab", e) } } initCompositeService(hiveConf) } override def getInfo(sessionHandle: SessionHandle, getInfoType: GetInfoType): GetInfoValue = { getInfoType match { case GetInfoType.CLI_SERVER_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_NAME => new GetInfoValue("Spark SQL") case GetInfoType.CLI_DBMS_VER => new GetInfoValue(hiveContext.sparkContext.version) case _ => super.getInfo(sessionHandle, getInfoType) } } } private[thriftserver] trait ReflectedCompositeService { this: AbstractService => def initCompositeService(hiveConf: HiveConf) { // Emulating `CompositeService.init(hiveConf)` val serviceList = getAncestorField[JList[Service]](this, 2, "serviceList") serviceList.foreach(_.init(hiveConf)) // Emulating `AbstractService.init(hiveConf)` invoke(classOf[AbstractService], this, "ensureCurrentState", classOf[STATE] -> STATE.NOTINITED) setAncestorField(this, 3, "hiveConf", hiveConf) invoke(classOf[AbstractService], this, "changeState", classOf[STATE] -> STATE.INITED) getAncestorField[Log](this, 3, "LOG").info(s"Service: $getName is inited.") } }
Example 181
Source File: RawTextSender.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.util import java.io.{ByteArrayOutputStream, IOException} import java.net.ServerSocket import java.nio.ByteBuffer import scala.io.Source import org.apache.spark.{SparkConf, Logging} import org.apache.spark.serializer.KryoSerializer import org.apache.spark.util.IntParam private[streaming] object RawTextSender extends Logging { def main(args: Array[String]) { if (args.length != 4) { System.err.println("Usage: RawTextSender <port> <file> <blockSize> <bytesPerSec>") System.exit(1) } // Parse the arguments using a pattern match val Array(IntParam(port), file, IntParam(blockSize), IntParam(bytesPerSec)) = args // Repeat the input data multiple times to fill in a buffer val lines = Source.fromFile(file).getLines().toArray val bufferStream = new ByteArrayOutputStream(blockSize + 1000) val ser = new KryoSerializer(new SparkConf()).newInstance() val serStream = ser.serializeStream(bufferStream) var i = 0 while (bufferStream.size < blockSize) { serStream.writeObject(lines(i)) i = (i + 1) % lines.length } val array = bufferStream.toByteArray val countBuf = ByteBuffer.wrap(new Array[Byte](4)) countBuf.putInt(array.length) countBuf.flip() val serverSocket = new ServerSocket(port) logInfo("Listening on port " + port) while (true) { val socket = serverSocket.accept() logInfo("Got a new connection") val out = new RateLimitedOutputStream(socket.getOutputStream, bytesPerSec) try { while (true) { out.write(countBuf.array) out.write(array) } } catch { case e: IOException => logError("Client disconnected") } finally { socket.close() } } } }
Example 182
Source File: CommandUtils.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.worker import java.io.{File, FileOutputStream, InputStream, IOException} import java.lang.System._ import scala.collection.JavaConversions._ import scala.collection.Map import org.apache.spark.Logging import org.apache.spark.deploy.Command import org.apache.spark.launcher.WorkerCommandBuilder import org.apache.spark.util.Utils def redirectStream(in: InputStream, file: File) { val out = new FileOutputStream(file, true) // TODO: It would be nice to add a shutdown hook here that explains why the output is // terminating. Otherwise if the worker dies the executor logs will silently stop. new Thread("redirect output to " + file) { override def run() { try { Utils.copyStream(in, out, true) } catch { case e: IOException => logInfo("Redirection to " + file + " closed: " + e.getMessage) } } }.start() } }
Example 183
Source File: ReplayListenerBus.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.{InputStream, IOException} import scala.io.Source import com.fasterxml.jackson.core.JsonParseException import org.json4s.jackson.JsonMethods._ import org.apache.spark.Logging import org.apache.spark.util.JsonProtocol def replay( logData: InputStream, sourceName: String, maybeTruncated: Boolean = false): Unit = { var currentLine: String = null var lineNumber: Int = 1 try { val lines = Source.fromInputStream(logData).getLines() while (lines.hasNext) { currentLine = lines.next() try { postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine))) } catch { case jpe: JsonParseException => // We can only ignore exception from last line of the file that might be truncated if (!maybeTruncated || lines.hasNext) { throw jpe } else { logWarning(s"Got JsonParseException from log file $sourceName" + s" at line $lineNumber, the file might not have finished writing cleanly.") } } lineNumber += 1 } } catch { case ioe: IOException => throw ioe case e: Exception => logError(s"Exception parsing Spark event log: $sourceName", e) logError(s"Malformed line #$lineNumber: $currentLine\n") } } }
Example 184
Source File: SerializableBuffer.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.util import java.io.{EOFException, IOException, ObjectInputStream, ObjectOutputStream} import java.nio.ByteBuffer import java.nio.channels.Channels private[spark] class SerializableBuffer(@transient var buffer: ByteBuffer) extends Serializable { def value: ByteBuffer = buffer private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { val length = in.readInt() buffer = ByteBuffer.allocate(length) var amountRead = 0 val channel = Channels.newChannel(in) while (amountRead < length) { val ret = channel.read(buffer) if (ret == -1) { throw new EOFException("End of file before fully reading buffer") } amountRead += ret } buffer.rewind() // Allow us to read it later } private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { out.writeInt(buffer.limit()) if (Channels.newChannel(out).write(buffer) != buffer.limit()) { throw new IOException("Could not fully write buffer to output stream") } buffer.rewind() // Allow us to write it again later } }
Example 185
Source File: BlockManagerId.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.storage import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput} import java.util.concurrent.ConcurrentHashMap import org.apache.spark.SparkContext import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils def apply(execId: String, host: String, port: Int): BlockManagerId = getCachedBlockManagerId(new BlockManagerId(execId, host, port)) def apply(in: ObjectInput): BlockManagerId = { val obj = new BlockManagerId() obj.readExternal(in) getCachedBlockManagerId(obj) } val blockManagerIdCache = new ConcurrentHashMap[BlockManagerId, BlockManagerId]() def getCachedBlockManagerId(id: BlockManagerId): BlockManagerId = { blockManagerIdCache.putIfAbsent(id, id) blockManagerIdCache.get(id) } }
Example 186
Source File: CartesianRDD.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark._ import org.apache.spark.util.Utils private[spark] class CartesianPartition( idx: Int, @transient rdd1: RDD[_], @transient rdd2: RDD[_], s1Index: Int, s2Index: Int ) extends Partition { var s1 = rdd1.partitions(s1Index) var s2 = rdd2.partitions(s2Index) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization s1 = rdd1.partitions(s1Index) s2 = rdd2.partitions(s2Index) oos.defaultWriteObject() } } private[spark] class CartesianRDD[T: ClassTag, U: ClassTag]( sc: SparkContext, var rdd1 : RDD[T], var rdd2 : RDD[U]) extends RDD[Pair[T, U]](sc, Nil) with Serializable { val numPartitionsInRdd2 = rdd2.partitions.length override def getPartitions: Array[Partition] = { // create the cross product split val array = new Array[Partition](rdd1.partitions.length * rdd2.partitions.length) for (s1 <- rdd1.partitions; s2 <- rdd2.partitions) { val idx = s1.index * numPartitionsInRdd2 + s2.index array(idx) = new CartesianPartition(idx, rdd1, rdd2, s1.index, s2.index) } array } override def getPreferredLocations(split: Partition): Seq[String] = { val currSplit = split.asInstanceOf[CartesianPartition] (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct } override def compute(split: Partition, context: TaskContext): Iterator[(T, U)] = { val currSplit = split.asInstanceOf[CartesianPartition] for (x <- rdd1.iterator(currSplit.s1, context); y <- rdd2.iterator(currSplit.s2, context)) yield (x, y) } override def getDependencies: Seq[Dependency[_]] = List( new NarrowDependency(rdd1) { def getParents(id: Int): Seq[Int] = List(id / numPartitionsInRdd2) }, new NarrowDependency(rdd2) { def getParents(id: Int): Seq[Int] = List(id % numPartitionsInRdd2) } ) override def clearDependencies() { super.clearDependencies() rdd1 = null rdd2 = null } }
Example 187
Source File: UnionRDD.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.{Dependency, Partition, RangeDependency, SparkContext, TaskContext} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Utils private[spark] class UnionPartition[T: ClassTag]( idx: Int, @transient rdd: RDD[T], val parentRddIndex: Int, @transient parentRddPartitionIndex: Int) extends Partition { var parentPartition: Partition = rdd.partitions(parentRddPartitionIndex) def preferredLocations(): Seq[String] = rdd.preferredLocations(parentPartition) override val index: Int = idx @throws(classOf[IOException]) private def writeObject(oos: ObjectOutputStream): Unit = Utils.tryOrIOException { // Update the reference to parent split at the time of task serialization parentPartition = rdd.partitions(parentRddPartitionIndex) oos.defaultWriteObject() } } @DeveloperApi class UnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]]) extends RDD[T](sc, Nil) { // Nil since we implement getDependencies override def getPartitions: Array[Partition] = { val array = new Array[Partition](rdds.map(_.partitions.length).sum) var pos = 0 for ((rdd, rddIndex) <- rdds.zipWithIndex; split <- rdd.partitions) { array(pos) = new UnionPartition(pos, rdd, rddIndex, split.index) pos += 1 } array } override def getDependencies: Seq[Dependency[_]] = { val deps = new ArrayBuffer[Dependency[_]] var pos = 0 for (rdd <- rdds) { deps += new RangeDependency(rdd, 0, pos, rdd.partitions.length) pos += rdd.partitions.length } deps } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val part = s.asInstanceOf[UnionPartition[T]] parent[T](part.parentRddIndex).iterator(part.parentPartition, context) } override def getPreferredLocations(s: Partition): Seq[String] = s.asInstanceOf[UnionPartition[T]].preferredLocations() override def clearDependencies() { super.clearDependencies() rdds = null } }
Example 188
Source File: PartitionerAwareUnionRDD.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.rdd import java.io.{IOException, ObjectOutputStream} import scala.reflect.ClassTag import org.apache.spark.{OneToOneDependency, Partition, SparkContext, TaskContext} import org.apache.spark.util.Utils private[spark] class PartitionerAwareUnionRDD[T: ClassTag]( sc: SparkContext, var rdds: Seq[RDD[T]] ) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) { require(rdds.length > 0) require(rdds.forall(_.partitioner.isDefined)) require(rdds.flatMap(_.partitioner).toSet.size == 1, "Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner)) override val partitioner = rdds.head.partitioner override def getPartitions: Array[Partition] = { val numPartitions = partitioner.get.numPartitions (0 until numPartitions).map(index => { new PartitionerAwareUnionRDDPartition(rdds, index) }).toArray } // Get the location where most of the partitions of parent RDDs are located override def getPreferredLocations(s: Partition): Seq[String] = { logDebug("Finding preferred location for " + this + ", partition " + s.index) val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents val locations = rdds.zip(parentPartitions).flatMap { case (rdd, part) => { val parentLocations = currPrefLocs(rdd, part) logDebug("Location of " + rdd + " partition " + part.index + " = " + parentLocations) parentLocations } } val location = if (locations.isEmpty) { None } else { // Find the location that maximum number of parent partitions prefer Some(locations.groupBy(x => x).maxBy(_._2.length)._1) } logDebug("Selected location for " + this + ", partition " + s.index + " = " + location) location.toSeq } override def compute(s: Partition, context: TaskContext): Iterator[T] = { val parentPartitions = s.asInstanceOf[PartitionerAwareUnionRDDPartition].parents rdds.zip(parentPartitions).iterator.flatMap { case (rdd, p) => rdd.iterator(p, context) } } override def clearDependencies() { super.clearDependencies() rdds = null } // Get the *current* preferred locations from the DAGScheduler (as opposed to the static ones) private def currPrefLocs(rdd: RDD[_], part: Partition): Seq[String] = { rdd.context.getPreferredLocs(rdd, part.index).map(tl => tl.host) } }
Example 189
Source File: HDFSUtil.scala From aerosolve with Apache License 2.0 | 5 votes |
package com.airbnb.common.ml.util import java.io.{BufferedReader, IOException, InputStreamReader} import java.net.URI import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} object HDFSUtil extends ScalaLogging { private lazy val hadoopConfiguration = new Configuration() def lastTaskSucceed(path: String): Boolean = { if (dirExists(path)) { if (dirExists(path + "/_temporary")) { logger.info(s"Deleting partial data for $path.") deleteDirWithoutThrow(path) false } else { logger.info(s"$path exists") true } } else { logger.info(s"$path does not exist") false } } def dirExists(dir: String): Boolean = { val path = new Path(dir) val hdfs = FileSystem.get( new java.net.URI(dir), hadoopConfiguration) hdfs.exists(path) } def deleteDirWithoutThrow(dir: String): Unit = { val path = new Path(dir) val hdfs = FileSystem.get( new java.net.URI(dir), hadoopConfiguration) if (hdfs.exists(path)) { logger.warn(s"$dir exists, DELETING") try { hdfs.delete(path, true) } catch { case e: IOException => logger.error(s" exception $e") } } } def createPath(path: String): Unit = { val remotePath = new Path(path) val remoteFS = remotePath.getFileSystem(hadoopConfiguration) remoteFS.mkdirs(new Path(path)) } def readStringFromFile(inputFile : String): String = { val fs = FileSystem.get(new URI(inputFile), hadoopConfiguration) val path = new Path(inputFile) val stream = fs.open(path) val reader = new BufferedReader(new InputStreamReader(stream)) val str = Stream.continually(reader.readLine()).takeWhile(_ != null).mkString("\n") str } }
Example 190
Source File: DirectoryListFileFinder.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.commons.utils import java.io.{File, FileNotFoundException, IOException} import scala.util.{Failure, Success, Try} def filePredicate(f: File, desc: Option[String]): Boolean def findFile(): Try[File] = { findFile(None) } def findFile(desc: String): Try[File] = { findFile(Some(desc)) } def findFile(desc: Option[String]): Try[File] = { findPotentialFiles( dirsToSearch, listFilesInDirectory // convert to Try - give a nice message in the exception concerning the dirs, otherwise just Success it ).fold(dirs => Failure( new IOException(s"Unable to list files in dirs: ${dirs.mkString(", ")}") ), Success[Seq[File]] ).flatMap(_.find(filePredicate(_, desc)) .map(Success[File]) .getOrElse(Failure( new FileNotFoundException( s"Unable to find file ${desc.map(_ + " ").getOrElse("")}" + s"in dirs: ${dirsToSearch.mkString(", ")}") )) ) } } object DirectoryListFileFinder { type EitherBadDirsOrFiles = Either[Seq[File], Seq[File]] def findPotentialFiles( dirs: Traversable[File], listFilesInDirectory: File => Option[Seq[File]]): EitherBadDirsOrFiles = { dirs.map { dir => val files = listFilesInDirectory(dir) // if we're unable to list files inside the dir then // let's not lose this information by keeping the dir in Left files.toRight(dir) }.foldLeft(Right(Seq[File]()): EitherBadDirsOrFiles) { case (Left(badDirs), Left(badDir)) => Left(badDir +: badDirs) case (Left(badDirs), Right(_)) => Left(badDirs) case (Right(_), Left(badDir)) => Left(Seq(badDir)) case (Right(files), Right(files2)) => Right(files ++ files2) case _ => ??? // to silence buggy 2.10 non-exhaustive match warning } } }
Example 191
Source File: WriteTransformer.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperations import java.io.{File, IOException} import scala.reflect.runtime.{universe => ru} import ai.deepsense.commons.utils.Version import ai.deepsense.commons.utils.FileOperations.deleteRecursivelyIfExists import ai.deepsense.deeplang.DOperation.Id import ai.deepsense.deeplang.documentation.OperationDocumentation import ai.deepsense.deeplang.doperables.Transformer import ai.deepsense.deeplang.doperations.exceptions.DeepSenseIOException import ai.deepsense.deeplang.params.{BooleanParam, Params, StringParam} import ai.deepsense.deeplang.{DOperation1To0, ExecutionContext} import java.net.URI import org.apache.hadoop.fs.{FileSystem, Path} case class WriteTransformer() extends DOperation1To0[Transformer] with Params with OperationDocumentation { override val id: Id = "58368deb-68d0-4657-ae3f-145160cb1e2b" override val name: String = "Write Transformer" override val description: String = "Writes a Transformer to a directory" override val since: Version = Version(1, 1, 0) val shouldOverwrite = BooleanParam( name = "overwrite", description = Some("Should an existing transformer with the same name be overwritten?") ) setDefault(shouldOverwrite, true) def getShouldOverwrite: Boolean = $(shouldOverwrite) def setShouldOverwrite(value: Boolean): this.type = set(shouldOverwrite, value) val outputPath = StringParam( name = "output path", description = Some("The output path for writing the Transformer.")) def getOutputPath: String = $(outputPath) def setOutputPath(value: String): this.type = set(outputPath, value) val specificParams: Array[ai.deepsense.deeplang.params.Param[_]] = Array(outputPath, shouldOverwrite) override protected def execute(transformer: Transformer)(context: ExecutionContext): Unit = { val outputDictPath = getOutputPath try { if (getShouldOverwrite) { removeDirectory(context, outputDictPath) } transformer.save(context, outputDictPath) } catch { case e: IOException => logger.error(s"WriteTransformer error. Could not write transformer to the directory", e) throw DeepSenseIOException(e) } } private def removeDirectory(context: ExecutionContext, path: String): Unit = { if (path.startsWith("hdfs://")) { val configuration = context.sparkContext.hadoopConfiguration val hdfs = FileSystem.get(new URI(extractHdfsAddress(path)), configuration) hdfs.delete(new Path(path), true) } else { deleteRecursivelyIfExists(new File(path)) } } private def extractHdfsAddress(path: String): String = { // first group: "hdfs://ip.addr.of.hdfs", second group: "/some/path/on/hdfs" val regex = "(hdfs:\\/\\/[^\\/]*)(.*)".r val regex(hdfsAddress, _) = path hdfsAddress } @transient override lazy val tTagTI_0: ru.TypeTag[Transformer] = ru.typeTag[Transformer] } object WriteTransformer { def apply(outputPath: String): WriteTransformer = { new WriteTransformer().setOutputPath(outputPath) } }
Example 192
Source File: DriverFiles.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperations.readwritedataframe.filestorage import java.io.{File, IOException, PrintWriter} import scala.io.Source import org.apache.spark.rdd.RDD import org.apache.spark.sql.execution.datasources.csv.{DataframeToDriverCsvFileWriter, RawCsvRDDToDataframe} import org.apache.spark.sql.{Dataset, Encoders, Row, SaveMode, DataFrame => SparkDataFrame} import ai.deepsense.commons.resources.ManagedResource import ai.deepsense.deeplang.ExecutionContext import ai.deepsense.deeplang.doperables.dataframe.DataFrame import ai.deepsense.deeplang.doperations.inout.{InputFileFormatChoice, OutputFileFormatChoice} import ai.deepsense.deeplang.doperations.readwritedataframe.filestorage.csv.CsvOptions import ai.deepsense.deeplang.doperations.readwritedataframe.{FilePath, FileScheme} import ai.deepsense.deeplang.readjsondataset.JsonReader import ai.deepsense.sparkutils.SQL object DriverFiles extends JsonReader { def read(driverPath: String, fileFormat: InputFileFormatChoice) (implicit context: ExecutionContext): SparkDataFrame = fileFormat match { case csv: InputFileFormatChoice.Csv => readCsv(driverPath, csv) case json: InputFileFormatChoice.Json => readJson(driverPath) case parquet: InputFileFormatChoice.Parquet => throw ParquetNotSupported } def write(dataFrame: DataFrame, path: FilePath, fileFormat: OutputFileFormatChoice, saveMode: SaveMode) (implicit context: ExecutionContext): Unit = { path.verifyScheme(FileScheme.File) if (saveMode == SaveMode.ErrorIfExists && new File(path.pathWithoutScheme).exists()){ throw new IOException(s"Output file ${path.fullPath} already exists") } fileFormat match { case csv: OutputFileFormatChoice.Csv => writeCsv(path, csv, dataFrame) case json: OutputFileFormatChoice.Json => writeJson(path, dataFrame) case parquet: OutputFileFormatChoice.Parquet => throw ParquetNotSupported } } private def readCsv (driverPath: String, csvChoice: InputFileFormatChoice.Csv) (implicit context: ExecutionContext): SparkDataFrame = { val params = CsvOptions.map(csvChoice.getNamesIncluded, csvChoice.getCsvColumnSeparator()) val lines = Source.fromFile(driverPath).getLines().toStream val fileLinesRdd = context.sparkContext.parallelize(lines) RawCsvRDDToDataframe.parse(fileLinesRdd, context.sparkSQLSession.sparkSession, params) } private def readJson(driverPath: String)(implicit context: ExecutionContext) = { val lines = Source.fromFile(driverPath).getLines().toStream val fileLinesRdd = context.sparkContext.parallelize(lines) val sparkSession = context.sparkSQLSession.sparkSession readJsonFromRdd(fileLinesRdd, sparkSession) } private def writeCsv (path: FilePath, csvChoice: OutputFileFormatChoice.Csv, dataFrame: DataFrame) (implicit context: ExecutionContext): Unit = { val params = CsvOptions.map(csvChoice.getNamesIncluded, csvChoice.getCsvColumnSeparator()) DataframeToDriverCsvFileWriter.write( dataFrame.sparkDataFrame, params, dataFrame.schema.get, path.pathWithoutScheme, context.sparkSQLSession.sparkSession ) } private def writeJson(path: FilePath, dataFrame: DataFrame) (implicit context: ExecutionContext): Unit = { val rawJsonLines: RDD[String] = SQL.dataFrameToJsonRDD(dataFrame.sparkDataFrame) writeRddToDriverFile(path.pathWithoutScheme, rawJsonLines) } private def writeRddToDriverFile(driverPath: String, lines: RDD[String]): Unit = { val recordSeparator = System.getProperty("line.separator", "\n") ManagedResource(new PrintWriter(driverPath)) { writer => lines.collect().foreach(line => writer.write(line + recordSeparator)) } } }
Example 193
Source File: FileDownloader.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperations.readwritedataframe.filestorage import java.io.{BufferedWriter, FileOutputStream, IOException, OutputStreamWriter} import java.nio.file.{Files, Paths} import java.util.UUID import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import ai.deepsense.deeplang.ExecutionContext import ai.deepsense.deeplang.doperations.exceptions.DeepSenseIOException import ai.deepsense.deeplang.doperations.readwritedataframe.FilePath private[filestorage] object FileDownloader { def downloadFile(url: String)(implicit context: ExecutionContext): FilePath = { if (context.tempPath.startsWith("hdfs://")) { downloadFileToHdfs(url) } else { downloadFileToDriver(url) } } private def downloadFileToHdfs(url: String)(implicit context: ExecutionContext) = { val content = scala.io.Source.fromURL(url).getLines() val hdfsPath = s"${context.tempPath}/${UUID.randomUUID()}" val configuration = new Configuration() val hdfs = FileSystem.get(configuration) val file = new Path(hdfsPath) val hdfsStream = hdfs.create(file) val writer = new BufferedWriter(new OutputStreamWriter(hdfsStream)) try { content.foreach {s => writer.write(s) writer.newLine() } } finally { safeClose(writer) hdfs.close() } FilePath(hdfsPath) } private def downloadFileToDriver(url: String) (implicit context: ExecutionContext) = { val outputDirPath = Paths.get(context.tempPath) // We're checking if the output is a directory following symlinks. // The default behaviour of createDirectories is NOT to follow symlinks if (!Files.isDirectory(outputDirPath)) { Files.createDirectories(outputDirPath) } val outFilePath = Files.createTempFile(outputDirPath, "download", ".csv") // content is a stream. Do not invoke stuff like .toList() on it. val content = scala.io.Source.fromURL(url).getLines() val writer: BufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(outFilePath.toFile))) try { content.foreach {s => writer.write(s) writer.newLine() } } finally { safeClose(writer) } FilePath(s"file:///$outFilePath") } private def safeClose(bufferedWriter: BufferedWriter): Unit = { try { bufferedWriter.flush() bufferedWriter.close() } catch { case e: IOException => throw new DeepSenseIOException(e) } } }
Example 194
Source File: WriteDataFrame.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperations import java.io.IOException import java.util.Properties import scala.reflect.runtime.{universe => ru} import ai.deepsense.commons.utils.Version import ai.deepsense.deeplang.DOperation.Id import ai.deepsense.deeplang._ import ai.deepsense.deeplang.documentation.OperationDocumentation import ai.deepsense.deeplang.doperables.dataframe.DataFrame import ai.deepsense.deeplang.doperations.exceptions.DeepSenseIOException import ai.deepsense.deeplang.doperations.inout._ import ai.deepsense.deeplang.doperations.readwritedataframe.filestorage.DataFrameToFileWriter import ai.deepsense.deeplang.doperations.readwritedataframe.googlestorage.DataFrameToGoogleSheetWriter import ai.deepsense.deeplang.doperations.readwritedataframe.validators.{FilePathHasValidFileScheme, ParquetSupportedOnClusterOnly} import ai.deepsense.deeplang.inference.{InferContext, InferenceWarnings} import ai.deepsense.deeplang.params.choice.ChoiceParam import ai.deepsense.deeplang.params.{Param, Params} import org.apache.spark.sql.SaveMode class WriteDataFrame() extends DOperation1To0[DataFrame] with Params with OperationDocumentation { override val id: Id = "9e460036-95cc-42c5-ba64-5bc767a40e4e" override val name: String = "Write DataFrame" override val description: String = "Writes a DataFrame to a file or database" override val since: Version = Version(0, 4, 0) @transient override lazy val tTagTI_0: ru.TypeTag[DataFrame] = ru.typeTag[DataFrame] val storageType = ChoiceParam[OutputStorageTypeChoice]( name = "data storage type", description = Some("Storage type.")) def getStorageType(): OutputStorageTypeChoice = $(storageType) def setStorageType(value: OutputStorageTypeChoice): this.type = set(storageType, value) val specificParams: Array[Param[_]] = Array(storageType) setDefault(storageType, new OutputStorageTypeChoice.File()) override def execute(dataFrame: DataFrame)(context: ExecutionContext): Unit = { import OutputStorageTypeChoice._ try { getStorageType() match { case jdbcChoice: Jdbc => writeToJdbc(jdbcChoice, context, dataFrame) case googleSheetChoice: GoogleSheet => DataFrameToGoogleSheetWriter.writeToGoogleSheet( googleSheetChoice, context, dataFrame ) case fileChoice: File => DataFrameToFileWriter.writeToFile(fileChoice, context, dataFrame) } } catch { case e: IOException => logger.error(s"WriteDataFrame error. Could not write file to designated storage", e) throw DeepSenseIOException(e) } } private def writeToJdbc( jdbcChoice: OutputStorageTypeChoice.Jdbc, context: ExecutionContext, dataFrame: DataFrame): Unit = { val properties = new Properties() properties.setProperty("driver", jdbcChoice.getJdbcDriverClassName) val jdbcUrl = jdbcChoice.getJdbcUrl val jdbcTableName = jdbcChoice.getJdbcTableName val saveMode = if (jdbcChoice.getShouldOverwrite) SaveMode.Overwrite else SaveMode.ErrorIfExists dataFrame.sparkDataFrame.write.mode(saveMode).jdbc(jdbcUrl, jdbcTableName, properties) } override def inferKnowledge(k0: DKnowledge[DataFrame])(context: InferContext): (Unit, InferenceWarnings) = { FilePathHasValidFileScheme.validate(this) ParquetSupportedOnClusterOnly.validate(this) super.inferKnowledge(k0)(context) } }
Example 195
Source File: Loader.scala From tensorflow_scala with Apache License 2.0 | 5 votes |
package org.platanios.tensorflow.data import com.typesafe.scalalogging.Logger import java.io.IOException import java.net.URL import java.nio.file.{Files, Path} import scala.collection.compat.immutable.LazyList import scala.io.Source import scala.util.matching.Regex trait Loader { protected val logger: Logger protected val googleDriveConfirmTokenRegex: Regex = { """<a id="uc-download-link".*href="/uc\?export=download&(confirm=.*)&id=.*">Download anyway</a>""".r } def maybeDownload(path: Path, url: String, bufferSize: Int = 8192): Boolean = { if (Files.exists(path)) { false } else { try { logger.info(s"Downloading file '$url'.") Files.createDirectories(path.getParent) download(path, url, bufferSize) // Small hack to deal with downloading large Google Drive files. if (Files.size(path) < 1024 * 1024 && url.contains("drive.google.com")) { val content = Source.fromFile(path.toFile).getLines().mkString("\n") googleDriveConfirmTokenRegex.findFirstMatchIn(content) match { case Some(confirmToken) => download(path, s"$url&${confirmToken.group(1)}", bufferSize) case None => () } } logger.info(s"Downloaded file '$url'.") true } catch { case e: IOException => logger.error(s"Could not download file '$url'", e) throw e } } } protected def download(path: Path, url: String, bufferSize: Int = 8192): Unit = { val connection = new URL(url).openConnection() val contentLength = connection.getContentLengthLong val inputStream = connection.getInputStream val outputStream = Files.newOutputStream(path) val buffer = new Array[Byte](bufferSize) var progress = 0L var progressLogTime = System.currentTimeMillis LazyList.continually(inputStream.read(buffer)).takeWhile(_ != -1).foreach(numBytes => { outputStream.write(buffer, 0, numBytes) progress += numBytes val time = System.currentTimeMillis if (time - progressLogTime >= 1e4) { if (contentLength > 0) { val numBars = Math.floorDiv(10 * progress, contentLength).toInt logger.info(s"[${"=" * numBars}${" " * (10 - numBars)}] $progress / $contentLength bytes downloaded.") progressLogTime = time } else { logger.info(s"$progress bytes downloaded.") progressLogTime = time } } }) outputStream.close() } }
Example 196
Source File: ProcessDebuggerIntegrationSpec.scala From scala-debugger with Apache License 2.0 | 5 votes |
package org.scaladebugger.api.debuggers import java.io.IOException import java.util.concurrent.atomic.AtomicBoolean import org.scaladebugger.api.utils.JDITools import org.scaladebugger.test.helpers.ParallelMockFunSpec import test.{ApiTestUtilities, VirtualMachineFixtures} import scala.util.Try class ProcessDebuggerIntegrationSpec extends ParallelMockFunSpec with VirtualMachineFixtures with ApiTestUtilities { describe("ProcessDebugger") { it("should be able to attach to a running JVM process") { withProcess((pid, process) => { val processDebugger = ProcessDebugger(pid) val attachedToVirtualMachine = new AtomicBoolean(false) // Need to keep retrying until process is ready to be attached to // NOTE: If unable to connect, ensure that hostname is "localhost" eventually { processDebugger.start(_ => attachedToVirtualMachine.set(true)) } // Keep checking back until we have successfully attached eventually { attachedToVirtualMachine.get() should be (true) } }) } } private def withProcess[T](testCode: (Int, Process) => T): T = { val jvmProcess = createProcess() val result = Try(testCode(jvmProcess._1, jvmProcess._2)) destroyProcess(jvmProcess._2) result.get } private def createProcess(): (Int, Process) = { val (pid, process) = JDITools.spawnAndGetPid( className = "org.scaladebugger.test.misc.AttachingMain", server = true, port = 0 // Assign ephemeral port ) // If unable to retrieve the process PID, exit now if (pid <= 0) { process.destroy() throw new IOException("Unable to retrieve process PID!") } (pid, process) } private def destroyProcess(process: Process): Unit = process.destroy() }
Example 197
Source File: ServiceDiscoverySuite.scala From kyuubi with Apache License 2.0 | 5 votes |
package org.apache.kyuubi.ha.client import java.io.{File, IOException} import javax.security.auth.login.Configuration import scala.collection.JavaConverters._ import org.apache.kyuubi.{KerberizedTestHelper, KyuubiFunSuite} import org.apache.kyuubi.KYUUBI_VERSION import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.ha.HighAvailabilityConf._ import org.apache.kyuubi.ha.server.EmbeddedZkServer import org.apache.kyuubi.service.ServiceState class ServiceDiscoverySuite extends KyuubiFunSuite with KerberizedTestHelper { val zkServer = new EmbeddedZkServer() val conf: KyuubiConf = KyuubiConf() override def beforeAll(): Unit = { conf.set(KyuubiConf.EMBEDDED_ZK_PORT, 0) zkServer.initialize(conf) zkServer.start() super.beforeAll() } override def afterAll(): Unit = { conf.unset(KyuubiConf.SERVER_KEYTAB) conf.unset(KyuubiConf.SERVER_PRINCIPAL) conf.unset(HA_ZK_QUORUM) zkServer.stop() super.afterAll() } test("set up zookeeper auth") { tryWithSecurityEnabled { val keytab = File.createTempFile("kentyao", ".keytab") val principal = "kentyao/[email protected]" conf.set(KyuubiConf.SERVER_KEYTAB, keytab.getCanonicalPath) conf.set(KyuubiConf.SERVER_PRINCIPAL, principal) ServiceDiscovery.setUpZooKeeperAuth(conf) val configuration = Configuration.getConfiguration val entries = configuration.getAppConfigurationEntry("KyuubiZooKeeperClient") assert(entries.head.getLoginModuleName === "com.sun.security.auth.module.Krb5LoginModule") val options = entries.head.getOptions.asScala.toMap assert(options("principal") === "kentyao/[email protected]") assert(options("useKeyTab").toString.toBoolean) conf.set(KyuubiConf.SERVER_KEYTAB, keytab.getName) val e = intercept[IOException](ServiceDiscovery.setUpZooKeeperAuth(conf)) assert(e.getMessage === s"${KyuubiConf.SERVER_KEYTAB.key} does not exists") } } test("publish instance to embedded zookeeper server") { conf .unset(KyuubiConf.SERVER_KEYTAB) .unset(KyuubiConf.SERVER_PRINCIPAL) .set(HA_ZK_QUORUM, zkServer.getConnectString) val namespace = "kyuubiserver" val znodeRoot = s"/$namespace" val instance = "kentyao.apache.org:10009" var deleted = false val postHook = new Thread { override def run(): Unit = deleted = true } val serviceDiscovery = new ServiceDiscovery(instance, namespace, postHook) val framework = ServiceDiscovery.newZookeeperClient(conf) try { serviceDiscovery.initialize(conf) serviceDiscovery.start() assert(framework.checkExists().forPath("/abc") === null) assert(framework.checkExists().forPath(znodeRoot) !== null) val children = framework.getChildren.forPath(znodeRoot).asScala assert(children.head === s"serviceUri=$instance;version=$KYUUBI_VERSION;sequence=0000000000") children.foreach { child => framework.delete().forPath(s"""$znodeRoot/$child""") } Thread.sleep(5000) assert(deleted, "Post hook called") assert(serviceDiscovery.getServiceState === ServiceState.STOPPED) } finally { serviceDiscovery.stop() framework.close() } } }
Example 198
Source File: LogFile.scala From kyuubi with Apache License 2.0 | 5 votes |
package yaooqinn.kyuubi.operation import java.io.{BufferedReader, File, FileInputStream, FileNotFoundException, FileOutputStream, InputStreamReader, IOException, PrintStream} import java.util.ArrayList import scala.collection.JavaConverters._ import org.apache.commons.io.FileUtils import org.apache.hadoop.io.IOUtils import org.apache.kyuubi.Logging import org.apache.spark.sql.Row import yaooqinn.kyuubi.KyuubiSQLException class LogFile private ( file: File, private var reader: Option[BufferedReader], writer: PrintStream, @volatile private var isRemoved: Boolean = false) extends Logging { def this(file: File) = { this(file, LogFile.createReader(file, isRemoved = false), new PrintStream(new FileOutputStream(file))) } private def resetReader(): Unit = { reader.foreach(IOUtils.closeStream) reader = None } private def readResults(nLines: Long): Seq[Row] = { reader = reader.orElse(LogFile.createReader(file, isRemoved)) val logs = new ArrayList[Row]() reader.foreach { r => var i = 1 try { var line: String = r.readLine() while ((i < nLines || nLines <= 0) && line != null) { logs.add(Row(line)) line = r.readLine() i += 1 } } catch { case e: FileNotFoundException => val operationHandle = file.getName val path = file.getAbsolutePath val msg = if (isRemoved) { s"Operation[$operationHandle] has been closed and the log file $path has been removed" } else { s"Operation[$operationHandle] Log file $path is not found" } throw new KyuubiSQLException(msg, e) } } logs.asScala } def write(msg: String): Unit = { writer.print(msg) } def close(): Unit = synchronized { try { reader.foreach(_.close()) writer.close() if (!isRemoved) { FileUtils.forceDelete(file) isRemoved = true } } catch { case e: IOException => error(s"Failed to remove corresponding log file of operation: ${file.getName}", e) } } } object LogFile { def createReader(file: File, isRemoved: Boolean): Option[BufferedReader] = try { Option(new BufferedReader(new InputStreamReader(new FileInputStream(file)))) } catch { case e: FileNotFoundException => val operationHandle = file.getName val path = file.getAbsolutePath val msg = if (isRemoved) { s"Operation[$operationHandle] has been closed and the log file $path has been removed" } else { s"Operation[$operationHandle] Log file $path is not found" } throw new KyuubiSQLException(msg, e) } }
Example 199
Source File: JsonFileReporter.scala From kyuubi with Apache License 2.0 | 5 votes |
package yaooqinn.kyuubi.metrics import java.io.{BufferedWriter, Closeable, IOException, OutputStreamWriter} import java.util.{Timer, TimerTask} import java.util.concurrent.TimeUnit import scala.util.Try import scala.util.control.NonFatal import com.codahale.metrics.MetricRegistry import com.codahale.metrics.json.MetricsModule import com.fasterxml.jackson.databind.ObjectMapper import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.fs.permission.FsPermission import org.apache.kyuubi.Logging import org.apache.spark.{KyuubiSparkUtil, SparkConf} import org.apache.spark.KyuubiConf._ private[metrics] class JsonFileReporter(conf: SparkConf, registry: MetricRegistry) extends Closeable with Logging { private val jsonMapper = new ObjectMapper().registerModule( new MetricsModule(TimeUnit.MILLISECONDS, TimeUnit.MILLISECONDS, false)) private val timer = new Timer(true) private val interval = KyuubiSparkUtil.timeStringAsMs(conf.get(METRICS_REPORT_INTERVAL)) private val path = conf.get(METRICS_REPORT_LOCATION) private val hadoopConf = KyuubiSparkUtil.newConfiguration(conf) def start(): Unit = { timer.schedule(new TimerTask { var bw: BufferedWriter = _ override def run(): Unit = try { val json = jsonMapper.writerWithDefaultPrettyPrinter().writeValueAsString(registry) val tmpPath = new Path(path + ".tmp") val tmpPathUri = tmpPath.toUri val fs = if (tmpPathUri.getScheme == null && tmpPathUri.getAuthority == null) { FileSystem.getLocal(hadoopConf) } else { FileSystem.get(tmpPathUri, hadoopConf) } fs.delete(tmpPath, true) bw = new BufferedWriter(new OutputStreamWriter(fs.create(tmpPath, true))) bw.write(json) bw.close() fs.setPermission(tmpPath, FsPermission.createImmutable(Integer.parseInt("644", 8).toShort)) val finalPath = new Path(path) fs.rename(tmpPath, finalPath) fs.setPermission(finalPath, FsPermission.createImmutable(Integer.parseInt("644", 8).toShort)) } catch { case NonFatal(e) => error("Error writing metrics to json file" + path, e) } finally { if (bw != null) { Try(bw.close()) } } }, 0, interval) } override def close(): Unit = { timer.cancel() } }
Example 200
Source File: PlainSaslHelper.scala From kyuubi with Apache License 2.0 | 5 votes |
package yaooqinn.kyuubi.auth import java.io.IOException import java.security.Security import javax.security.auth.callback._ import javax.security.auth.login.LoginException import javax.security.sasl.{AuthenticationException, AuthorizeCallback} import scala.collection.JavaConverters._ import org.apache.hive.service.cli.thrift.TCLIService.Iface import org.apache.spark.SparkConf import org.apache.thrift.{TProcessor, TProcessorFactory} import org.apache.thrift.transport.{TSaslServerTransport, TTransport, TTransportFactory} import yaooqinn.kyuubi.auth.AuthMethods.AuthMethods import yaooqinn.kyuubi.auth.PlainSaslServer.SaslPlainProvider object PlainSaslHelper { // Register Plain SASL server provider Security.addProvider(new SaslPlainProvider()) def getProcessFactory(service: Iface): TProcessorFactory = { SQLPlainProcessorFactory(service) } @throws[LoginException] def getTransportFactory(authTypeStr: String, conf: SparkConf): TTransportFactory = { val saslFactory = new TSaslServerTransport.Factory() try { val handler = new PlainServerCallbackHandler(authTypeStr, conf) val props = Map.empty[String, String] saslFactory.addServerDefinition("PLAIN", authTypeStr, null, props.asJava, handler) } catch { case e: AuthenticationException => throw new LoginException("Error setting callback handler" + e); } saslFactory } private class PlainServerCallbackHandler private(authMethod: AuthMethods, conf: SparkConf) extends CallbackHandler { @throws[AuthenticationException] def this(authMethodStr: String, conf: SparkConf) = this(AuthMethods.getValidAuthMethod(authMethodStr), conf) @throws[IOException] @throws[UnsupportedCallbackException] override def handle(callbacks: Array[Callback]): Unit = { var username: String = null var password: String = null var ac: AuthorizeCallback = null for (callback <- callbacks) { callback match { case nc: NameCallback => username = nc.getName case pc: PasswordCallback => password = new String(pc.getPassword) case a: AuthorizeCallback => ac = a case _ => throw new UnsupportedCallbackException(callback) } } val provider = AuthenticationProviderFactory.getAuthenticationProvider(authMethod, conf) provider.authenticate(username, password) if (ac != null) ac.setAuthorized(true) } } private case class SQLPlainProcessorFactory(service: Iface) extends TProcessorFactory(null) { override def getProcessor(trans: TTransport): TProcessor = new TSetIpAddressProcessor[Iface](service) } }