org.apache.commons.io.IOUtils Scala Examples
The following examples show how to use org.apache.commons.io.IOUtils.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: BinaryFileReader.scala From mmlspark with MIT License | 5 votes |
// Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. package com.microsoft.ml.spark import com.microsoft.ml.spark.core.env.StreamUtilities import com.microsoft.ml.spark.core.schema.BinaryFileSchema import com.microsoft.ml.spark.core.utils.AsyncUtils import org.apache.commons.io.IOUtils import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} import org.apache.spark.binary.BinaryFileFormat import org.apache.spark.sql.catalyst.encoders.RowEncoder import org.apache.spark.sql.{DataFrame, Row, SparkSession} import org.apache.spark.binary.ConfUtils import org.apache.spark.sql.types.BinaryType import scala.concurrent.{ExecutionContext, Future} import scala.concurrent.duration.Duration object BinaryFileReader { private def recursePath(fileSystem: FileSystem, path: Path, pathFilter: FileStatus => Boolean, visitedSymlinks: Set[Path]): Array[Path] ={ val filteredPaths = fileSystem.listStatus(path).filter(pathFilter) val filteredDirs = filteredPaths.filter(fs => fs.isDirectory & !visitedSymlinks(fs.getPath)) val symlinksFound = visitedSymlinks ++ filteredDirs.filter(_.isSymlink).map(_.getPath) filteredPaths.map(_.getPath) ++ filteredDirs.map(_.getPath) .flatMap(p => recursePath(fileSystem, p, pathFilter, symlinksFound)) } def recursePath(fileSystem: FileSystem, path: Path, pathFilter: FileStatus => Boolean): Array[Path] ={ recursePath(fileSystem, path, pathFilter, Set()) } def readFromPaths(df: DataFrame, pathCol: String, bytesCol: String, concurrency: Int, timeout: Int ): DataFrame = { val outputSchema = df.schema.add(bytesCol, BinaryType, nullable = true) val encoder = RowEncoder(outputSchema) val hconf = ConfUtils.getHConf(df) df.mapPartitions { rows => val futures = rows.map {row: Row => Future { val path = new Path(row.getAs[String](pathCol)) val fs = path.getFileSystem(hconf.value) val bytes = StreamUtilities.using(fs.open(path)) {is => IOUtils.toByteArray(is)}.get val ret = Row.merge(Seq(row, Row(bytes)): _*) ret }(ExecutionContext.global) } AsyncUtils.bufferedAwait( futures,concurrency, Duration.fromNanos(timeout*(20^6).toLong))(ExecutionContext.global) }(encoder) } }
Example 2
Source File: StreamMetadata.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import java.io.{InputStreamReader, OutputStreamWriter} import java.nio.charset.StandardCharsets import scala.util.control.NonFatal import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, FSDataOutputStream, Path} import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.apache.spark.internal.Logging import org.apache.spark.sql.streaming.StreamingQuery def write( metadata: StreamMetadata, metadataFile: Path, hadoopConf: Configuration): Unit = { var output: FSDataOutputStream = null try { val fs = metadataFile.getFileSystem(hadoopConf) output = fs.create(metadataFile) val writer = new OutputStreamWriter(output) Serialization.write(metadata, writer) writer.close() } catch { case NonFatal(e) => logError(s"Error writing stream metadata $metadata to $metadataFile", e) throw e } finally { IOUtils.closeQuietly(output) } } }
Example 3
Source File: Packer.scala From haystack-traces with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trace.commons.packer import java.io.{ByteArrayInputStream, ByteArrayOutputStream, OutputStream} import java.util.zip.GZIPOutputStream import com.expedia.www.haystack.trace.commons.packer.PackerType.PackerType import com.github.luben.zstd.ZstdOutputStream import com.google.protobuf.GeneratedMessageV3 import org.apache.commons.io.IOUtils import org.xerial.snappy.SnappyOutputStream object PackerType extends Enumeration { type PackerType = Value val GZIP, SNAPPY, NONE, ZSTD = Value } case class PackedMetadata(t: PackerType) abstract class Packer[T <: GeneratedMessageV3] { val packerType: PackerType protected def compressStream(stream: OutputStream): OutputStream private def pack(protoObj: T): Array[Byte] = { val outStream = new ByteArrayOutputStream val compressedStream = compressStream(outStream) if (compressedStream != null) { IOUtils.copy(new ByteArrayInputStream(protoObj.toByteArray), compressedStream) compressedStream.close() // this flushes the data to final outStream outStream.toByteArray } else { protoObj.toByteArray } } def apply(protoObj: T): PackedMessage[T] = { PackedMessage(protoObj, pack, PackedMetadata(packerType)) } } class NoopPacker[T <: GeneratedMessageV3] extends Packer[T] { override val packerType = PackerType.NONE override protected def compressStream(stream: OutputStream): OutputStream = null } class SnappyPacker[T <: GeneratedMessageV3] extends Packer[T] { override val packerType = PackerType.SNAPPY override protected def compressStream(stream: OutputStream): OutputStream = new SnappyOutputStream(stream) } class ZstdPacker[T <: GeneratedMessageV3] extends Packer[T] { override val packerType = PackerType.ZSTD override protected def compressStream(stream: OutputStream): OutputStream = new ZstdOutputStream(stream) } class GzipPacker[T <: GeneratedMessageV3] extends Packer[T] { override val packerType = PackerType.GZIP override protected def compressStream(stream: OutputStream): OutputStream = new GZIPOutputStream(stream) }
Example 4
Source File: Unpacker.scala From haystack-traces with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trace.commons.packer import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream} import java.nio.ByteBuffer import java.util.zip.GZIPInputStream import com.expedia.open.tracing.buffer.SpanBuffer import com.github.luben.zstd.ZstdInputStream import org.apache.commons.io.IOUtils import org.json4s.jackson.Serialization import org.xerial.snappy.SnappyInputStream object Unpacker { import PackedMessage._ private def readMetadata(packedDataBytes: Array[Byte]): Array[Byte] = { val byteBuffer = ByteBuffer.wrap(packedDataBytes) val magicBytesExist = MAGIC_BYTES.indices forall { idx => byteBuffer.get() == MAGIC_BYTES.apply(idx) } if (magicBytesExist) { val headerLength = byteBuffer.getInt val metadataBytes = new Array[Byte](headerLength) byteBuffer.get(metadataBytes, 0, headerLength) metadataBytes } else { null } } private def unpack(compressedStream: InputStream) = { val outputStream = new ByteArrayOutputStream() IOUtils.copy(compressedStream, outputStream) outputStream.toByteArray } def readSpanBuffer(packedDataBytes: Array[Byte]): SpanBuffer = { var parsedDataBytes: Array[Byte] = null val metadataBytes = readMetadata(packedDataBytes) if (metadataBytes != null) { val packedMetadata = Serialization.read[PackedMetadata](new String(metadataBytes)) val compressedDataOffset = MAGIC_BYTES.length + 4 + metadataBytes.length packedMetadata.t match { case PackerType.SNAPPY => parsedDataBytes = unpack( new SnappyInputStream( new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset))) case PackerType.GZIP => parsedDataBytes = unpack( new GZIPInputStream( new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset))) case PackerType.ZSTD => parsedDataBytes = unpack( new ZstdInputStream( new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset))) case _ => return SpanBuffer.parseFrom( new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset)) } } else { parsedDataBytes = packedDataBytes } SpanBuffer.parseFrom(parsedDataBytes) } }
Example 5
Source File: Package.scala From seed with Apache License 2.0 | 5 votes |
package seed.generation import java.io.{File, FileInputStream, OutputStream} import java.util.jar.{Attributes, JarEntry, JarOutputStream, Manifest} import org.apache.commons.io.IOUtils import java.nio.file.Path import seed.Log import seed.cli.util.Ansi import scala.collection.mutable // Adapted from https://stackoverflow.com/a/1281295 object Package { def create( source: List[(Path, String)], target: OutputStream, mainClass: Option[String], classPath: List[String], log: Log ): Unit = { val manifest = new Manifest() val mainAttributes = manifest.getMainAttributes mainAttributes.put(Attributes.Name.MANIFEST_VERSION, "1.0") // TODO Set additional package fields: https://docs.oracle.com/javase/tutorial/deployment/jar/packageman.html mainClass.foreach( cls => mainAttributes.put(Attributes.Name.MAIN_CLASS, cls) ) if (classPath.nonEmpty) mainAttributes.put(Attributes.Name.CLASS_PATH, classPath.mkString(" ")) val targetFile = new JarOutputStream(target, manifest) val entryCache = mutable.Set[String]() source.foreach { case (path, jarPath) => log.debug(s"Packaging ${Ansi.italic(path.toString)}...") add(path.toFile, jarPath, targetFile, entryCache, log) } targetFile.close() } def add( source: File, jarPath: String, target: JarOutputStream, entryCache: mutable.Set[String], log: Log ): Unit = { val path = if (source.isFile) jarPath else { require(!jarPath.endsWith("/")) jarPath + "/" } val addedEntry = if (entryCache.contains(path)) { if (source.isFile) log.warn( s"Skipping file ${Ansi.italic(source.toString)} as another module already added it" ) false } else { val entry = new JarEntry(path) entry.setTime(source.lastModified) target.putNextEntry(entry) entryCache += path if (source.isFile) IOUtils.copy(new FileInputStream(source), target) true } if (!source.isFile) for (nestedFile <- source.listFiles) add(nestedFile, path + nestedFile.getName, target, entryCache, log) if (addedEntry) target.closeEntry() } }
Example 6
Source File: Http.scala From seed with Apache License 2.0 | 5 votes |
package seed.publish.util import java.net.URI import org.apache.commons.io.IOUtils import org.apache.http.{HttpHost, HttpRequest, HttpRequestInterceptor} import org.apache.http.entity.ContentType import seed.util.ZioHelpers._ import zio.Task import org.apache.http.auth.AuthScope import org.apache.http.auth.UsernamePasswordCredentials import org.apache.http.client.protocol.HttpClientContext import org.apache.http.impl.auth.BasicScheme import org.apache.http.impl.client.{BasicAuthCache, BasicCredentialsProvider} import org.apache.http.impl.nio.client.CloseableHttpAsyncClient import org.apache.http.impl.nio.client.HttpAsyncClients import org.apache.http.nio.client.methods.HttpAsyncMethods import org.apache.http.nio.protocol.HttpAsyncRequestProducer import org.apache.http.protocol.HttpContext class Http(httpClient: CloseableHttpAsyncClient) { def put(url: String, bytes: Array[Byte]): Task[String] = { val producer = HttpAsyncMethods.createPut(url, bytes, ContentType.DEFAULT_BINARY) send(url, producer) } def post(url: String, bytes: Array[Byte]): Task[String] = { val producer = HttpAsyncMethods.createPost(url, bytes, ContentType.DEFAULT_BINARY) send(url, producer) } def destroy(): Unit = httpClient.close() private def send(url: String, producer: HttpAsyncRequestProducer) = { val client = new CompletableHttpAsyncClient(httpClient) val uri = URI.create(url) val targetHost = new HttpHost(uri.getHost, uri.getPort, uri.getScheme) val authCache = new BasicAuthCache() authCache.put(targetHost, new BasicScheme()) val clientContext = HttpClientContext.create() clientContext.setAuthCache(authCache) val future = client.execute(producer, HttpAsyncMethods.createConsumer(), clientContext) fromCompletableFuture(future) .map(r => IOUtils.toString(r.getEntity.getContent, "UTF-8")) } } class CustomRequestInterceptor(log: seed.Log) extends HttpRequestInterceptor { override def process(request: HttpRequest, context: HttpContext): Unit = log.debug("Sending HTTP request " + request + "...") } object Http { def create(log: seed.Log, authHost: String, auth: (String, String)): Http = { val credsProvider = new BasicCredentialsProvider() credsProvider.setCredentials( new AuthScope(authHost, 443), new UsernamePasswordCredentials(auth._1, auth._2) ) val c = HttpAsyncClients .custom() .setDefaultCredentialsProvider(credsProvider) .addInterceptorFirst(new CustomRequestInterceptor(log)) .build() c.start() new Http(c) } }
Example 7
Source File: MimeTypeDetectorTest.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.stages.impl.feature import java.io.FileInputStream import com.salesforce.op._ import com.salesforce.op.features.types._ import com.salesforce.op.stages.base.unary.UnaryTransformer import com.salesforce.op.test.{OpTransformerSpec, TestFeatureBuilder, TestSparkContext} import com.salesforce.op.testkit.RandomText import com.salesforce.op.utils.spark.RichDataset._ import org.apache.commons.io.IOUtils import org.junit.runner.RunWith import org.scalatest.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class MimeTypeDetectorTest extends OpTransformerSpec[Text, MimeTypeDetector] with Base64TestData { val inputData = randomData val transformer = new MimeTypeDetector().setInput(randomBase64) val expectedResult = expectedRandom it should "validate the type hint" in { assertThrows[IllegalArgumentException](new MimeTypeDetector().setTypeHint("blarg")) } it should "validate the ma bytes to parse" in { assertThrows[IllegalArgumentException](new MimeTypeDetector().setMaxBytesToParse(-1L)) } it should "detect octet stream data" in { val mime = randomBase64.detectMimeTypes() mime.originStage shouldBe a[UnaryTransformer[_, _]] val result = mime.originStage.asInstanceOf[UnaryTransformer[Base64, Text]].transform(randomData) result.collect(mime) should contain theSameElementsInOrderAs expectedRandom } it should "detect other mime types" in { val mime = realBase64.detectMimeTypes() val result = mime.originStage.asInstanceOf[UnaryTransformer[Base64, Text]].transform(realData) result.collect(mime) should contain theSameElementsInOrderAs expectedMime } it should "detect other mime types with a json type hint" in { val mime = realBase64.detectMimeTypes(typeHint = Some("application/json")) val result = mime.originStage.asInstanceOf[UnaryTransformer[Base64, Text]].transform(realData) result.collect(mime) should contain theSameElementsInOrderAs expectedMimeJson } } trait Base64TestData { self: TestSparkContext => val seed = 42L lazy val (randomData, randomBase64) = { val rnd = RandomText.base64(0, 10000) rnd.reset(seed) TestFeatureBuilder(Base64.empty +: Base64("") +: rnd.take(10).toSeq) } lazy val (realData, realBase64) = TestFeatureBuilder( Seq( "811harmo24to36.mp3", "820orig36to48.wav", "face.png", "log4j.properties", "note.xml", "RunnerParams.json", "dummy.csv", "Canon_40D.jpg", "sample.pdf" ).map(loadResourceAsBase64) ) val expectedRandom = Text.empty +: Seq.fill(11)(Text("application/octet-stream")) val expectedMime = Seq( "audio/mpeg", "audio/vnd.wave", "image/png", "text/plain", "application/xml", "text/plain", "text/plain", "image/jpeg", "application/pdf" ).map(_.toText) val expectedMimeJson = Seq( "audio/mpeg", "audio/vnd.wave", "image/png", "application/json", "application/xml", "application/json", "application/json", "image/jpeg", "application/pdf" ).map(_.toText) def loadResourceAsBase64(name: String): Base64 = Base64 { val bytes = IOUtils.toByteArray(new FileInputStream(resourceFile(name = name))) new String(java.util.Base64.getEncoder.encode(bytes)) } }
Example 8
Source File: Base64Test.scala From TransmogrifAI with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.salesforce.op.features.types import java.nio.charset.Charset import com.salesforce.op.test.TestCommon import org.apache.commons.io.IOUtils import org.junit.runner.RunWith import org.scalatest.PropSpec import org.scalatest.junit.JUnitRunner import org.scalatest.prop.PropertyChecks @RunWith(classOf[JUnitRunner]) class Base64Test extends PropSpec with PropertyChecks with TestCommon { property("handle empty") { forAll(None) { (v: Option[String]) => Base64(v).asBytes shouldBe None Base64(v).asString shouldBe None Base64(v).asInputStream shouldBe None } } property("can show byte contents") { forAll { (b: Array[Byte]) => val b64 = toBase64(b) (Base64(b64).asBytes map (_.toList)) shouldBe Some(b.toList) } } property("can show string contents") { forAll { (s: String) => val b64 = toBase64(s.getBytes) Base64(b64).asString shouldBe Some(s) } } property("produce a stream") { forAll { (s: String) => val b64 = toBase64(s.getBytes) Base64(b64).asInputStream.map(IOUtils.toString(_, Charset.defaultCharset())) shouldBe Some(s) } } property("produce a stream and map over it") { forAll { (s: String) => val b64 = toBase64(s.getBytes) Base64(b64).mapInputStream(IOUtils.toString(_, Charset.defaultCharset())) shouldBe Some(s) } } def toBase64(b: Array[Byte]): String = new String(java.util.Base64.getEncoder.encode(b)) }
Example 9
Source File: StreamMetadata.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import java.io.{InputStreamReader, OutputStreamWriter} import java.nio.charset.StandardCharsets import scala.util.control.NonFatal import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, FSDataOutputStream, Path} import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.apache.spark.internal.Logging import org.apache.spark.sql.streaming.StreamingQuery def write( metadata: StreamMetadata, metadataFile: Path, hadoopConf: Configuration): Unit = { var output: FSDataOutputStream = null try { val fs = FileSystem.get(hadoopConf) output = fs.create(metadataFile) val writer = new OutputStreamWriter(output) Serialization.write(metadata, writer) writer.close() } catch { case NonFatal(e) => logError(s"Error writing stream metadata $metadata to $metadataFile", e) throw e } finally { IOUtils.closeQuietly(output) } } }
Example 10
Source File: S3ObjectUploader.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.tools.neptune.export import java.io._ import java.util import java.util.concurrent.{Executors, TimeoutException} import java.util.stream.Collectors import java.util.{Collections, Vector} import com.amazonaws.auth.profile.ProfileCredentialsProvider import com.amazonaws.services.s3.AmazonS3ClientBuilder import com.amazonaws.services.s3.model.{ObjectMetadata, PutObjectRequest} import com.amazonaws.{AmazonServiceException, ClientConfiguration, Protocol, SdkClientException} import org.apache.commons.io.{FileUtils, IOUtils} import org.slf4j.LoggerFactory import scala.concurrent.{Await, ExecutionContext, Future} import scala.concurrent.duration.{FiniteDuration, _} object S3ObjectUploader{ val executor = Executors.newFixedThreadPool(1) implicit val ec: ExecutionContext = scala.concurrent.ExecutionContext.fromExecutor(executor) protected lazy val logger = LoggerFactory.getLogger("s3_uploader") def init(proxyHost:Option[String], proxyPort:Option[Int]) = { val clientRegion = "us-east-1" val config = new ClientConfiguration config.setProtocol(Protocol.HTTPS) proxyHost.foreach(host => config.setProxyHost(host)) proxyPort.foreach(port => config.setProxyPort(port)) val s3Client = AmazonS3ClientBuilder.standard() .withRegion(clientRegion) .withClientConfiguration(config) .withCredentials(new ProfileCredentialsProvider()) .build() s3Client } def persistChunkToS3Bucket(chunkData:String, fileName:String, proxyHost:Option[String], proxyPort:Option[Int], s3Directory:String) = { try{ init(proxyHost, proxyPort).putObject(s3Directory, fileName, chunkData) } catch { case e: AmazonServiceException => e.printStackTrace() throw e case e: SdkClientException => e.printStackTrace() throw e } } def persistChunkToS3Bucket(tmpFile:File, proxyHost:Option[String], proxyPort:Option[Int], s3Directory:String, retryCount:Int = 3):Unit = { try{ val s3UploadTask = Future{init(proxyHost, proxyPort).putObject(s3Directory, tmpFile.getName, tmpFile)}(ec) Await.result(s3UploadTask, 5.minutes) tmpFile.delete() } catch { case e:TimeoutException => if(retryCount > 0) { logger.error("S3 upload task run more than 5 minutes..Going to retry") persistChunkToS3Bucket(tmpFile, proxyHost, proxyPort, s3Directory, retryCount-1) } else{ throw new Exception( "S3 upload task duration was more than 5 minutes") } case e: AmazonServiceException => e.printStackTrace() throw e case e: SdkClientException => e.printStackTrace() throw e } } }
Example 11
Source File: UtilCommands.scala From CM-Well with Apache License 2.0 | 5 votes |
import java.io.{BufferedInputStream, File} import java.nio.file.{Files, Paths} import java.security.MessageDigest import scala.util.control.Breaks._ import javax.xml.bind.DatatypeConverter import org.apache.commons.compress.archivers.ArchiveEntry import org.apache.commons.compress.archivers.tar.TarArchiveInputStream import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream import org.apache.commons.io.IOUtils object UtilCommands { val OSX_NAME = "Mac OS X" val linuxSshpass = if (Files.exists(Paths.get("bin/utils/sshpass"))) "bin/utils/sshpass" else "sshpass" val osxSshpass = "/usr/local/bin/sshpass" val sshpass = if (isOSX) osxSshpass else linuxSshpass def isOSX = System.getProperty("os.name") == OSX_NAME def verifyComponentConfNotChanged(componentName:String, configFilePath:String, expectedHash:String) = { val confContent = UtilCommands.unTarGz("./components", componentName, configFilePath) UtilCommands.checksum(componentName, configFilePath, confContent, expectedHash) } def checksum(componentName:String, configFilePath:String, confContent:Array[Byte], expectedHash:String) = { val actualHash = MessageDigest.getInstance("MD5").digest(confContent) val actualHashStr = DatatypeConverter.printHexBinary(actualHash) if (!expectedHash.equalsIgnoreCase(actualHashStr)) throw new Exception(s"$componentName configuration file $configFilePath has been changed, please change the template accordingly " + s"(the new digest is $actualHashStr)") } def unTarGz(rootFolder:String, componentName: String, configFilePath:String):Array[Byte] = { var tarArchiveInputStream:TarArchiveInputStream = null var bufferInputstream:BufferedInputStream = null val gzipCompressor:GzipCompressorInputStream = null var confContent: Array[Byte] = null try { val libDir = new File(rootFolder) val pathInput = libDir.listFiles().filter(file => file.getName.contains(componentName)) val path = Paths.get(pathInput(0).getAbsolutePath) val bufferInputStream = new BufferedInputStream(Files.newInputStream(path)) val gzipCompressor = new GzipCompressorInputStream(bufferInputStream) tarArchiveInputStream = new TarArchiveInputStream(gzipCompressor) var archiveEntry: ArchiveEntry = null archiveEntry = tarArchiveInputStream.getNextEntry if(archiveEntry.getName == "./") archiveEntry = tarArchiveInputStream.getNextEntry val extractFolder = archiveEntry.getName.replaceAll("^\\./","").split("/")(0) while (archiveEntry != null) { breakable { if (archiveEntry.getName.replaceAll("^\\./","") == s"$extractFolder/$configFilePath") { confContent = IOUtils.toByteArray(tarArchiveInputStream) break } } archiveEntry = tarArchiveInputStream.getNextEntry } } finally { if(tarArchiveInputStream != null) tarArchiveInputStream.close() if(bufferInputstream != null) bufferInputstream.close() if(gzipCompressor != null) gzipCompressor.close() } confContent } }
Example 12
Source File: VelocityRenderer.scala From shellbase with Apache License 2.0 | 5 votes |
package com.sumologic.shellbase import java.io._ import java.util.Properties import org.apache.commons.io.IOUtils import org.apache.velocity.VelocityContext import org.apache.velocity.app.{Velocity, VelocityEngine} import scala.collection.JavaConversions._ object VelocityRenderer { val props = new Properties() props.setProperty("runtime.references.strict", "true") props.setProperty("velocimacro.arguments.strict", "true") props.setProperty("velocimacro.permissions.allow.inline.local.scope", "true") props.setProperty("directive.foreach.skip.invalid", "false") props.setProperty("runtime.log.logsystem.log4j.logger", "org.apache.velocity") props.setProperty("resource.loader", "class,file") props.setProperty("class.resource.loader.description", "Velocity Classpath Resource Loader") props.setProperty("class.resource.loader.class", "org.apache.velocity.runtime.resource.loader.ClasspathResourceLoader") Velocity.init(props) def render(map: Iterable[(String, AnyRef)], reader: Reader, writer: Writer): Unit = { val context = new VelocityContext() map.foreach { pair => context.put(pair._1, pair._2) } Velocity.evaluate(context, writer, "ops util velocity renderer", reader) } def render(templateVars: Iterable[(String, AnyRef)], templatePath: String, outputPath: String): Unit = { val templateReader = new InputStreamReader(getClass.getClassLoader.getResourceAsStream(templatePath)) val outputWriter = new OutputStreamWriter(new FileOutputStream(outputPath)) try { render(templateVars, templateReader, outputWriter) } finally { IOUtils.closeQuietly(templateReader) IOUtils.closeQuietly(outputWriter) } } def createScriptFromTemplate(scriptResource: String, variables: Map[AnyRef, AnyRef] = Map[AnyRef, AnyRef]()): File = { val engine = new VelocityEngine() engine.setProperty("resource.loader", "class") engine.setProperty("class.resource.loader.description", "Velocity Classpath Resource Loader") engine.setProperty("class.resource.loader.class", "org.apache.velocity.runtime.resource.loader.ClasspathResourceLoader") engine.init() val template = engine.getTemplate(scriptResource) val tempFile = File.createTempFile(".tmp", ".sh") val modifiableVariables = new java.util.HashMap[AnyRef, AnyRef]() modifiableVariables ++= variables val writer = new FileWriter(tempFile) try { template.merge(new VelocityContext(modifiableVariables), writer) } finally { IOUtils.closeQuietly(writer) } tempFile } }
Example 13
Source File: PortForwarding.scala From mmlspark with MIT License | 5 votes |
// Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. package com.microsoft.ml.spark.io.http import java.io.File import java.net.URI import com.jcraft.jsch.{JSch, Session} import org.apache.commons.io.IOUtils object PortForwarding { lazy val Jsch = new JSch() def forwardPortToRemote(username: String, sshHost: String, sshPort: Int, bindAddress: String, remotePortStart: Int, localHost: String, localPort: Int, keyDir: Option[String], keySas: Option[String], maxRetries: Int, timeout: Int ): (Session, Int) = { keyDir.foreach(kd => new File(kd).listFiles().foreach(f => try { Jsch.addIdentity(f.getAbsolutePath) } catch { case _: com.jcraft.jsch.JSchException => case e: Exception => throw e } ) ) keySas.foreach { ks => val privateKeyBytes = IOUtils.toByteArray(new URI(ks)) Jsch.addIdentity("forwardingKey", privateKeyBytes, null, null) //scalastyle:ignore null } val session = Jsch.getSession(username, sshHost, sshPort) session.setConfig("StrictHostKeyChecking", "no") session.setTimeout(timeout) session.connect() var attempt = 0 var foundPort: Option[Int] = None while (foundPort.isEmpty && attempt <= maxRetries) { try { session.setPortForwardingR( bindAddress, remotePortStart + attempt, localHost, localPort) foundPort = Some(remotePortStart + attempt) } catch { case _: Exception => println(s"failed to forward port. Attempt: $attempt") attempt += 1 } } if (foundPort.isEmpty) { throw new RuntimeException(s"Could not find open port between " + s"$remotePortStart and ${remotePortStart + maxRetries}") } println(s"forwarding to ${foundPort.get}") (session, foundPort.get) } def forwardPortToRemote(options: Map[String, String]): (Session, Int) = { forwardPortToRemote( options("forwarding.username"), options("forwarding.sshhost"), options.getOrElse("forwarding.sshport", "22").toInt, options.getOrElse("forwarding.bindaddress", "*"), options.get("forwarding.remoteportstart") .orElse(options.get("forwarding.localport")).get.toInt, options.getOrElse("forwarding.localhost", "0.0.0.0"), options("forwarding.localport").toInt, options.get("forwarding.keydir"), options.get("forwarding.keysas"), options.getOrElse("forwarding.maxretires", "50").toInt, options.getOrElse("forwarding.timeout", "20000").toInt ) } }
Example 14
Source File: StreamUtilities.scala From mmlspark with MIT License | 5 votes |
// Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. package com.microsoft.ml.spark.core.env import java.io.{ByteArrayOutputStream, InputStream} import java.util.zip.ZipInputStream import org.apache.commons.io.IOUtils import scala.io.Source import scala.util.Random object StreamUtilities { import scala.util.{Failure, Success, Try} def usingMany[T <: AutoCloseable, U](disposable: Seq[T])(task: Seq[T] => U): Try[U] = { try { Success(task(disposable)) } catch { case e: Exception => Failure(e) } finally { disposable.foreach(d => d.close()) } } def using[T <: AutoCloseable, U](disposable: T)(task: T => U): Try[U] = { try { Success(task(disposable)) } catch { case e: Exception => Failure(e) } finally { disposable.close() } } def usingSource[T <: Source, U](disposable: T)(task: T => U): Try[U] = { try { Success(task(disposable)) } catch { case e: Exception => Failure(e) } finally { disposable.close() } } class ZipIterator(stream: InputStream, zipfile: String, random: Random, sampleRatio: Double = 1) extends Iterator[(String, Array[Byte])] { private val zipStream = new ZipInputStream(stream) private def getNext: Option[(String, Array[Byte])] = { var entry = zipStream.getNextEntry while (entry != null) { if (!entry.isDirectory && random.nextDouble < sampleRatio) { val filename = zipfile + java.io.File.separator + entry.getName //extracting all bytes of a given entry val byteStream = new ByteArrayOutputStream IOUtils.copy(zipStream, byteStream) val bytes = byteStream.toByteArray assert(bytes.length == entry.getSize, "incorrect number of bytes is read from zipstream: " + bytes.length + " instead of " + entry.getSize) return Some((filename, bytes)) } entry = zipStream.getNextEntry } stream.close() None } private var nextValue = getNext def hasNext: Boolean = nextValue.isDefined def next: (String, Array[Byte]) = { val result = nextValue.get nextValue = getNext result } } }
Example 15
Source File: RESTHelpers.scala From mmlspark with MIT License | 5 votes |
// Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. package com.microsoft.ml.spark.cognitive import org.apache.commons.io.IOUtils import org.apache.http.client.config.RequestConfig import org.apache.http.client.methods._ import org.apache.http.impl.client.{CloseableHttpClient, HttpClientBuilder} import org.apache.http.impl.conn.PoolingHttpClientConnectionManager import scala.concurrent.blocking import scala.util.Try object RESTHelpers { lazy val RequestTimeout = 60000 lazy val RequestConfigVal: RequestConfig = RequestConfig.custom() .setConnectTimeout(RequestTimeout) .setConnectionRequestTimeout(RequestTimeout) .setSocketTimeout(RequestTimeout) .build() lazy val ConnectionManager = { val cm = new PoolingHttpClientConnectionManager() cm.setDefaultMaxPerRoute(Int.MaxValue) cm.setMaxTotal(Int.MaxValue) cm } lazy val Client: CloseableHttpClient = HttpClientBuilder .create().setConnectionManager(ConnectionManager) .setDefaultRequestConfig(RequestConfigVal).build() def retry[T](backoffs: List[Int], f: () => T): T = { try { f() } catch { case t: Throwable => val waitTime = backoffs.headOption.getOrElse(throw t) println(s"Caught error: $t with message ${t.getMessage}, waiting for $waitTime") blocking {Thread.sleep(waitTime.toLong)} retry(backoffs.tail, f) } } //TODO use this elsewhere def safeSend(request: HttpRequestBase, backoffs: List[Int] = List(100, 500, 1000), expectedCodes: Set[Int] = Set(), close: Boolean = true): CloseableHttpResponse = { retry(List(100, 500, 1000), { () => val response = Client.execute(request) try { if (response.getStatusLine.getStatusCode.toString.startsWith("2") || expectedCodes(response.getStatusLine.getStatusCode) ) { response } else { val requestBodyOpt = Try(request match { case er: HttpEntityEnclosingRequestBase => IOUtils.toString(er.getEntity.getContent) case _ => "" }).get val responseBodyOpt = Try(IOUtils.toString(response.getEntity.getContent)).getOrElse("") throw new RuntimeException( s"Failed: " + s"\n\t response: $response " + s"\n\t requestUrl: ${request.getURI}" + s"\n\t requestBody: $requestBodyOpt" + s"\n\t responseBody: $responseBodyOpt") } } catch { case e: Exception => response.close() throw e } finally { if (close) { response.close() } } }) } }
Example 16
Source File: Secrets.scala From mmlspark with MIT License | 5 votes |
import java.io.IOException import java.util.Base64 import sys.process._ import spray.json._ import DefaultJsonProtocol._ import org.apache.commons.io.IOUtils import sbt.{SettingKey, TaskKey} object Secrets { private val kvName = "mmlspark-keys" private val subscriptionID = "ce1dee05-8cf6-4ad6-990a-9c80868800ba" protected def exec(command: String): String = { val os = sys.props("os.name").toLowerCase os match { case x if x contains "windows" => Seq("cmd", "/C") ++ Seq(command) !! case _ => command !! } } private def getSecret(secretName: String): String = { println(s"fetching secret: $secretName") try { exec(s"az account set -s $subscriptionID") val secretJson = exec(s"az keyvault secret show --vault-name $kvName --name $secretName") secretJson.parseJson.asJsObject().fields("value").convertTo[String] } catch { case _: IOException => println("WARNING: Could not load secret from keyvault, defaulting to the empty string." + " Please install az command line to perform authorized build steps like publishing") "" case _: java.lang.RuntimeException => println("WARNING: Could not load secret from keyvault, defaulting to the empty string." + " Please install az command line to perform authorized build steps like publishing") "" } } lazy val nexusUsername: String = sys.env.getOrElse("NEXUS-UN", getSecret("nexus-un")) lazy val nexusPassword: String = sys.env.getOrElse("NEXUS-PW", getSecret("nexus-pw")) lazy val pgpPublic: String = new String(Base64.getDecoder.decode( sys.env.getOrElse("PGP-PUBLIC", getSecret("pgp-public")).getBytes("UTF-8"))) lazy val pgpPrivate: String = new String(Base64.getDecoder.decode( sys.env.getOrElse("PGP-PRIVATE", getSecret("pgp-private")).getBytes("UTF-8"))) lazy val pgpPassword: String = sys.env.getOrElse("PGP-PW", getSecret("pgp-pw")) lazy val storageKey: String = sys.env.getOrElse("STORAGE_KEY", getSecret("storage-key")) }
Example 17
Source File: SAXBench.scala From osmesa with Apache License 2.0 | 5 votes |
package osmesa import java.util.concurrent.TimeUnit import org.apache.commons.io.IOUtils import org.openjdk.jmh.annotations._ import vectorpipe.model.{Actions, Change} import java.util.zip.GZIPInputStream import javax.xml.parsers.{SAXParser, SAXParserFactory} import scala.xml.XML // --- // @BenchmarkMode(Array(Mode.AverageTime)) @OutputTimeUnit(TimeUnit.MICROSECONDS) @State(Scope.Thread) class SAXBench { val sequence = 0 @Setup def setup: Unit = { } def gzipInputStream(): GZIPInputStream = { // requires the addition of a gzipped OSC file in bench/src/main/resources val stream = getClass.getResourceAsStream("/942.osc.gz") new GZIPInputStream(stream) } def withScalaXML(): Int = { // requires Change.fromXML (see commit 1b04a1e81f1a88f374a086c98d58677ec537b1bf) val data = XML.loadString(IOUtils.toString(gzipInputStream)) val changes = (data \ "_").flatMap { node => (node \ "_").map(Change.fromXML(_, Actions.fromString(node.label), sequence)) } changes.length } def withSAXParser(): Int = { val factory = SAXParserFactory.newInstance val parser = factory.newSAXParser val handler = new Change.ChangeHandler(sequence) parser.parse(gzipInputStream(), handler) handler.changeSeq.length } @Benchmark def useScala: Double = withScalaXML() @Benchmark def getSAXyGirl: Double = withSAXParser() }
Example 18
Source File: VectorGrid.scala From osmesa with Apache License 2.0 | 5 votes |
package osmesa.analytics import java.io.ByteArrayInputStream import java.net.URI import java.util.zip.GZIPInputStream import geotrellis.proj4.WebMercator import geotrellis.spark.tiling.ZoomedLayoutScheme import geotrellis.vector.{Extent, PointFeature} import geotrellis.vectortile.{Layer, VInt64, VectorTile} import org.apache.commons.io.IOUtils import org.apache.spark.internal.Logging import osmesa.analytics.updater.Implicits._ import osmesa.analytics.updater._ import scala.collection.GenMap import scala.collection.parallel.TaskSupport trait VectorGrid extends Logging { // Default base zoom (highest resolution tiles produced) val DefaultBaseZoom: Int = 10 // Number of cells per side in a gridded tile implicit val Cells: Int = 128 // Number of cells in a gridded tile at the base of the pyramid (may be used for over-zooming) val BaseCells: Int = Cells // Default upload concurrency val DefaultUploadConcurrency: Int = 8 implicit val LayoutScheme: ZoomedLayoutScheme = ZoomedLayoutScheme(WebMercator) val SequenceLayerName: String = "__sequences__" def getCommittedSequences(tile: VectorTile): Set[Int] = // NOTE when working with hashtags, this should be the changeset sequence, since changes from a // single sequence may appear in different batches depending on when changeset metadata arrives tile.layers .get(SequenceLayerName) .map(_.features.flatMap(f => f.data.values.map(valueToLong).map(_.intValue))) .map(_.toSet) .getOrElse(Set.empty) def makeSequenceLayer(sequences: Set[Int], extent: Extent, tileWidth: Int = 4096): (String, Layer) = { // create a second layer w/ a feature corresponding to committed sequences (in the absence of // available tile / layer metadata) val updatedSequences = sequences.toSeq.sorted .takeRight(1000) .zipWithIndex .map { case (seq, idx) => idx.toString -> VInt64(seq) } .toMap val sequenceFeature = PointFeature(extent.center, updatedSequences) makeLayer(SequenceLayerName, extent, Seq(sequenceFeature), tileWidth) } def loadMVTs(urls: Map[URI, Extent])( implicit taskSupport: TaskSupport): GenMap[URI, VectorTile] = { // convert to a parallel collection to load more tiles concurrently val parUrls = urls.par parUrls.tasksupport = taskSupport parUrls.map { case (uri, extent) => (uri, read(uri).map( bytes => VectorTile.fromBytes( IOUtils.toByteArray(new GZIPInputStream(new ByteArrayInputStream(bytes))), extent))) } filter { case (_, mvt) => mvt.isDefined } map { case (uri, mvt) => uri -> mvt.get } } }
Example 19
Source File: DruidClientTest.scala From spark-druid-olap with Apache License 2.0 | 5 votes |
package org.sparklinedata.druid.client.test import org.apache.commons.io.IOUtils import org.apache.spark.sql.sources.druid.DruidQueryResultIterator import org.json4s.Extraction import org.json4s.jackson.JsonMethods._ import org.scalatest.{BeforeAndAfterAll, FunSuite} import org.sparklinedata.druid.Utils import org.sparklinedata.druid.client.{DruidCoordinatorClient, DruidQueryServerClient} import org.sparklinedata.druid.testenv.DruidCluster class DruidClientTest extends FunSuite with BeforeAndAfterAll with TestUtils { import org.sparklinedata.druid.client.test.TPCHQueries._ var brokerClient : DruidQueryServerClient = _ var coordClient : DruidCoordinatorClient = _ import Utils._ override def beforeAll() = { brokerClient = new DruidQueryServerClient("localhost", DruidCluster.instance.brokerPort) coordClient = new DruidCoordinatorClient("localhost", DruidCluster.instance.coordinatorPort) } test("timeBoundary") { println(brokerClient.timeBoundary("tpch")) } test("coordTimeBoundary") { println(coordClient.timeBoundary("tpch")) } test("metaData") { println(brokerClient.metadata("tpch", false)) } test("tpchQ1") { println(pretty(render(Extraction.decompose(q1)))) val r = brokerClient.executeQuery(q1) r.foreach(println) } test("tpchQ3") { println(pretty(render(Extraction.decompose(q3)))) val r = brokerClient.executeQuery(q3) r.foreach(println) } test("tpchQ1MonthGrain") { println(pretty(render(Extraction.decompose(q1)))) val r = brokerClient.executeQuery(q1) r.foreach(println) } test("streamQueryResult") { def qRis = getClass.getClassLoader.getResourceAsStream("sampleQueryResult.json") for(i <- 0 to 5) { recordTime("streamed read") { val is = IOUtils.toBufferedInputStream(qRis) val it = DruidQueryResultIterator(false, is) while (it.hasNext) { it.next } } recordTime("list read") { val is = IOUtils.toBufferedInputStream(qRis) val it = DruidQueryResultIterator(false, is, (), true) while (it.hasNext) { it.next } } } } test("serversInfo") { val sInfo = coordClient.serversInfo println(pretty(render(Extraction.decompose(sInfo)))) } test("dataSourceInfo") { val dInfo = coordClient.dataSourceInfo("tpch") println(pretty(render(Extraction.decompose(dInfo)))) } }
Example 20
Source File: HadoopFileSystemLogStore.scala From delta with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.delta.storage import java.io.{BufferedReader, FileNotFoundException, InputStreamReader} import java.nio.charset.StandardCharsets.UTF_8 import java.nio.file.FileAlreadyExistsException import java.util.UUID import scala.collection.JavaConverters._ import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.SparkSession protected def writeWithRename( path: Path, actions: Iterator[String], overwrite: Boolean = false): Unit = { val fs = path.getFileSystem(getHadoopConfiguration) if (!fs.exists(path.getParent)) { throw new FileNotFoundException(s"No such file or directory: ${path.getParent}") } if (overwrite) { val stream = fs.create(path, true) try { actions.map(_ + "\n").map(_.getBytes(UTF_8)).foreach(stream.write) } finally { stream.close() } } else { if (fs.exists(path)) { throw new FileAlreadyExistsException(path.toString) } val tempPath = createTempPath(path) var streamClosed = false // This flag is to avoid double close var renameDone = false // This flag is to save the delete operation in most of cases. val stream = fs.create(tempPath) try { actions.map(_ + "\n").map(_.getBytes(UTF_8)).foreach(stream.write) stream.close() streamClosed = true try { if (fs.rename(tempPath, path)) { renameDone = true } else { if (fs.exists(path)) { throw new FileAlreadyExistsException(path.toString) } else { throw new IllegalStateException(s"Cannot rename $tempPath to $path") } } } catch { case _: org.apache.hadoop.fs.FileAlreadyExistsException => throw new FileAlreadyExistsException(path.toString) } } finally { if (!streamClosed) { stream.close() } if (!renameDone) { fs.delete(tempPath, false) } } } } protected def createTempPath(path: Path): Path = { new Path(path.getParent, s".${path.getName}.${UUID.randomUUID}.tmp") } override def invalidateCache(): Unit = {} }
Example 21
Source File: TestAISStreams.scala From incubator-daffodil with Apache License 2.0 | 5 votes |
package org.apache.daffodil.layers import org.junit.Assert._ import java.io._ import org.junit.Test import org.apache.daffodil.io.RegexLimitingStream import java.nio.charset.StandardCharsets import org.apache.daffodil.util.Misc import org.apache.commons.io.IOUtils import org.apache.daffodil.io.LayerBoundaryMarkInsertingJavaOutputStream @Test def testAISPayloadArmoringDecode() = { val dataString = "14eGL:@000o8oQ'LMjOchmG@08HK," val bba = new ByteArrayInputStream(dataString.getBytes(iso8859)) // // regex is ",0*" // val rls = new RegexLimitingStream(bba, ",", ",", iso8859) val aas = new AISPayloadArmoringInputStream(rls) val baos = new ByteArrayOutputStream() var c: Int = -1 while ({ c = aas.read() c != -1 }) { baos.write(c) } baos.close() val result = baos.toByteArray() val expected = Misc.bits2Bytes("000001 000100 101101 010111 011100 001010 010000 000000 000000 000000 110111 001000 110111 100001 101000 011100 011101 110010 011111 101011 110000 110101 010111 010000 000000 001000 011000 011011 ") assertEquals(expected.length, result.length) val pairs = expected zip result pairs.foreach { case (exp, act) => assertEquals(exp, act) } } @Test def testAISPayloadArmoringEncode() = { val dataBytes = Misc.bits2Bytes("000001 000100 101101 010111 011100 001010 010000 000000 000000 000000 110111 001000 110111 100001 101000 011100 011101 110010 011111 101011 110000 110101 010111 010000 000000 001000 011000 011011 ") val bais = new ByteArrayInputStream(dataBytes) val baos = new ByteArrayOutputStream() val lbmijos = new LayerBoundaryMarkInsertingJavaOutputStream(baos, ",", iso8859) val aas = new AISPayloadArmoringOutputStream(lbmijos) IOUtils.copy(bais, aas) aas.close() val result = baos.toByteArray() val expected = "14eGL:@000o8oQ'LMjOchmG@08HK,".getBytes(iso8859) assertEquals(expected.length, result.length) (expected zip result).foreach { case (exp, act) => assertEquals(exp, act) } } }
Example 22
Source File: services.scala From InteractiveGraph-neo4j with BSD 2-Clause "Simplified" License | 5 votes |
package org.grapheco.server.pidb import java.io.{File, FileInputStream} import org.apache.commons.io.{FileUtils, IOUtils} import org.grapheco.server.util.{JsonUtils, Logging, ServletContextUtils} import org.neo4j.driver.v1._ import org.neo4j.graphdb.factory.{GraphDatabaseFactory, GraphDatabaseSettings} import org.neo4j.graphdb.{GraphDatabaseService, Label, RelationshipType} import org.springframework.beans.factory.annotation.Autowired import org.springframework.beans.factory.{DisposableBean, InitializingBean} import cn.pidb.engine.{BoltService, CypherService, PidbConnector} import scala.collection.JavaConversions._ import scala.collection.mutable import scala.reflect.ClassTag class PidbService(boltUrl:String, boltUser:String, boltPassword:String) extends BoltService(boltUrl, boltUser, boltPassword){ def getRelativeOrAbsoluteFile(path: String) = { Some(new File(path)).map { file => if (file.isAbsolute) { file } else { new File(ServletContextUtils.getServletContext.getRealPath(s"/${path}")) } }.get } }
Example 23
Source File: CSVOutputFormatter.scala From glow with Apache License 2.0 | 5 votes |
package io.projectglow.transformers.pipe import java.io.InputStream import scala.collection.JavaConverters._ import com.univocity.parsers.csv.CsvParser import org.apache.commons.io.IOUtils import org.apache.spark.sql.execution.datasources.csv.{CSVDataSourceUtils, CSVUtils, UnivocityParserUtils} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{StringType, StructField, StructType} import io.projectglow.SparkShim.{CSVOptions, UnivocityParser} class CSVOutputFormatter(parsedOptions: CSVOptions) extends OutputFormatter { private def getSchema(record: Array[String]): StructType = { val header = CSVDataSourceUtils.makeSafeHeader( record, SQLConf.get.caseSensitiveAnalysis, parsedOptions ) val fields = header.map { fieldName => StructField(fieldName, StringType, nullable = true) } StructType(fields) } override def makeIterator(stream: InputStream): Iterator[Any] = { val lines = IOUtils.lineIterator(stream, "UTF-8").asScala val filteredLines = CSVUtils.filterCommentAndEmpty(lines, parsedOptions) if (filteredLines.isEmpty) { return Iterator.empty } val firstLine = filteredLines.next val csvParser = new CsvParser(parsedOptions.asParserSettings) val firstRecord = csvParser.parseLine(firstLine) val schema = getSchema(firstRecord) val univocityParser = new UnivocityParser(schema, schema, parsedOptions) val parsedIter = UnivocityParserUtils.parseIterator( Iterator(firstLine) ++ filteredLines, univocityParser, schema ) val parsedIterWithoutHeader = if (parsedOptions.headerFlag) { parsedIter.drop(1) } else { parsedIter } Iterator(schema) ++ parsedIterWithoutHeader.map(_.copy) } } class CSVOutputFormatterFactory extends OutputFormatterFactory { override def name: String = "csv" override def makeOutputFormatter( options: Map[String, String] ): OutputFormatter = { val parsedOptions = new CSVOptions( options, SQLConf.get.csvColumnPruning, SQLConf.get.sessionLocalTimeZone ) new CSVOutputFormatter(parsedOptions) } }
Example 24
Source File: DynamicResource.scala From sumobot with Apache License 2.0 | 5 votes |
package com.sumologic.sumobot.http_frontend import akka.http.scaladsl.model.{ContentType, ContentTypes} import org.apache.commons.io.IOUtils import org.fusesource.scalate.TemplateEngine import org.fusesource.scalate.util.{FileResourceLoader, Resource} object DynamicResource { private val Engine = new TemplateEngine Engine.allowReload = false Engine.allowCaching = true Engine.resourceLoader = new FileResourceLoader { override def resource(filename: String): Option[Resource] = { val uri = getClass.getResource(filename).toURI.toString val stream = getClass.getResourceAsStream(filename) val templateText = IOUtils.toString(stream) stream.close() Some(Resource.fromText(uri, templateText)) } } } case class DynamicResource(templateFile: String) { DynamicResource.Engine.load(templateFile) def contents(templateVariables: Map[String, Any]): String = { DynamicResource.Engine.layout(templateFile, templateVariables) } val contentType: ContentType.NonBinary = ContentTypes.`text/html(UTF-8)` }
Example 25
Source File: StaticResource.scala From sumobot with Apache License 2.0 | 5 votes |
package com.sumologic.sumobot.http_frontend import akka.http.scaladsl.model.{ContentType, ContentTypes, HttpCharsets, MediaTypes} import org.apache.commons.io.IOUtils object StaticResource { private[http_frontend] val DefaultContentType = ContentTypes.`text/html(UTF-8)` case class ContentTypePair(extension: String, contentType: ContentType) private[http_frontend] val KnownContentTypes = Array( ContentTypePair(".html", ContentTypes.`text/html(UTF-8)`), ContentTypePair(".css", ContentType(MediaTypes.`text/css`, HttpCharsets.`UTF-8`)), ContentTypePair(".js", ContentType(MediaTypes.`application/javascript`, HttpCharsets.`UTF-8`)) ) } case class StaticResource(filename: String) { def contents: Array[Byte] = { val stream = getClass.getResourceAsStream(filename) val result = IOUtils.toByteArray(stream) stream.close() result } def contentType: ContentType = { val contentType = StaticResource.KnownContentTypes.find(contentType => filename.endsWith(contentType.extension)) .map(contentTypePair => contentTypePair.contentType) contentType.getOrElse(StaticResource.DefaultContentType) } }
Example 26
Source File: FileUtilsSpec.scala From warp-core with MIT License | 5 votes |
package com.workday.warp.common.utils import java.io.InputStream import java.nio.charset.Charset import java.util.zip.ZipException import com.workday.warp.common.spec.WarpJUnitSpec import com.workday.warp.junit.UnitTest import org.apache.commons.io.IOUtils @UnitTest def testNonExistentZipEntryStreamCannotBeRetrievedFromZipFile(): Unit = { val zipEntryName: String = "DOES_NOT_EXIST" val filePath: String = getClass.getResource("/simpleZip.zip").getPath val thrown: Throwable = intercept[NullPointerException] { FileUtils.getStreamToLogFileInZip(filePath, "DOES_NOT_EXIST") } thrown.getMessage should be (s"Zip entry $zipEntryName not found in $filePath") } }
Example 27
Source File: MultipartUploadSpec.scala From peregrine with Apache License 2.0 | 5 votes |
package io.peregrine import io.peregrine.test.FlatSpecHelper import com.twitter.finagle.http.{Request => FinagleRequest} import org.apache.commons.io.IOUtils class MultipartUploadSpec extends FlatSpecHelper { class ExampleApp extends Controller { post("/groups_file") { request => val groupsParam = request.multiParams.get("groups") val typeParam = request.multiParams.get("type") render .header("X-Content-Type", groupsParam.get.contentType.toString) .header("X-Filename", groupsParam.get.filename.toString) .header("X-Type-Text", typeParam.get.value) .plain("ok").toFuture } } val server = new PeregrineServer server.register(new ExampleApp) "Multi part uploads with text and file fields" should "work" in { val s = getClass.getResourceAsStream("/upload.bytes") val b = IOUtils.toByteArray(s) val r = FinagleRequest.decodeBytes(b) send(r) response.code should equal (200) response.getHeader("X-Content-Type") should equal("Some(image/gif)") response.getHeader("X-Filename") should equal("Some(dealwithit.gif)") response.getHeader("X-Type-Text") should equal("text") } }
Example 28
Source File: ContainerReadableByString.scala From aloha with MIT License | 5 votes |
package com.eharmony.aloha.io import scala.language.higherKinds import org.apache.commons.io.IOUtils import java.lang.String import java.io.{ByteArrayOutputStream, Reader, InputStreamReader, InputStream} def fromReader[A](r: Reader): C[A] = { try { val baos = new ByteArrayOutputStream // Don't need to close. IOUtils.copy(r, baos, inputCharset) fromString[A](new String(baos.toByteArray)) } finally { IOUtils.closeQuietly(r) } } }
Example 29
Source File: ReadableByString.scala From aloha with MIT License | 5 votes |
package com.eharmony.aloha.io import java.io.{InputStreamReader, ByteArrayOutputStream, Reader, InputStream} import org.apache.commons.io.IOUtils final def fromReader(r: Reader): A = { try { val baos = new ByteArrayOutputStream // Don't need to close. IOUtils.copy(r, baos, inputCharset) fromString(new String(baos.toByteArray)) } finally { IOUtils.closeQuietly(r) } } }
Example 30
Source File: package.scala From aloha with MIT License | 5 votes |
package com.eharmony import java.util.Properties import org.apache.commons.io.IOUtils import org.apache.commons.vfs2.VFS package object aloha { def pkgName = getClass.getPackage.getName def version: String = _version private[this] lazy val _version: String = { val is = VFS.getManager.resolveFile("res:" + pkgName.replaceAll("\\.", "/") + "/version.properties").getContent.getInputStream try { val p = new Properties() p.load(is) p.getProperty("aloha.version") } finally { IOUtils closeQuietly is } } }
Example 31
Source File: StdAvroModelFactoryTest.scala From aloha with MIT License | 5 votes |
package com.eharmony.aloha.factory.avro import com.eharmony.aloha.audit.impl.avro.Score import com.eharmony.aloha.factory.ModelFactory import com.eharmony.aloha.io.vfs.Vfs1 import com.eharmony.aloha.models.Model import org.apache.avro.Schema import org.apache.avro.generic.{GenericData, GenericRecord} import org.apache.commons.io.IOUtils import org.junit.Assert.assertEquals import org.junit.Test import org.junit.runner.RunWith import org.junit.runners.BlockJUnit4ClassRunner import scala.util.Try private[this] def record = { val r = new GenericData.Record(TheSchema) r.put("req_str_1", "smart handsome stubborn") r } } object StdAvroModelFactoryTest { private lazy val TheSchema = { val is = getClass.getClassLoader.getResourceAsStream(SchemaUrlResource) try new Schema.Parser().parse(is) finally IOUtils.closeQuietly(is) } private val ExpectedResult = 7d private val SchemaUrlResource = "avro/class7.avpr" private val SchemaUrl = s"res:$SchemaUrlResource" private val SchemaFile = new java.io.File(getClass.getClassLoader.getResource(SchemaUrlResource).getFile) private val SchemaVfs1FileObject = org.apache.commons.vfs.VFS.getManager.resolveFile(SchemaUrl) private val SchemaVfs2FileObject = org.apache.commons.vfs2.VFS.getManager.resolveFile(SchemaUrl) private val Imports = Seq("com.eharmony.aloha.feature.BasicFunctions._", "scala.math._") private val ReturnType = "Double" private val ModelJson = """ |{ | "modelType": "Regression", | "modelId": { "id": 0, "name": "" }, | "features" : { | "my_attributes": "${req_str_1}.split(\"\\\\W+\").map(v => (s\"=$v\", 1.0))" | }, | "weights": { | "my_attributes=handsome": 1, | "my_attributes=smart": 2, | "my_attributes=stubborn": 4 | } |} """.stripMargin }
Example 32
Source File: VwSparseMultilabelPredictorTest.scala From aloha with MIT License | 5 votes |
package com.eharmony.aloha.models.vw.jni.multilabel import java.io.{ByteArrayOutputStream, File, FileInputStream} import com.eharmony.aloha.ModelSerializationTestHelper import com.eharmony.aloha.io.sources.{Base64StringSource, ExternalSource, ModelSource} import org.apache.commons.codec.binary.Base64 import org.apache.commons.io.IOUtils import org.junit.Assert._ import org.junit.Test import org.junit.runner.RunWith import org.junit.runners.BlockJUnit4ClassRunner import vowpalWabbit.learner.{VWActionScoresLearner, VWLearners} @RunWith(classOf[BlockJUnit4ClassRunner]) class VwSparseMultilabelPredictorTest extends ModelSerializationTestHelper { import VwSparseMultilabelPredictorTest._ @Test def testSerializability(): Unit = { val predictor = getPredictor(getModelSource(), 3) val ds = serializeDeserializeRoundTrip(predictor) assertEquals(predictor, ds) assertEquals(predictor.vwParams(), ds.vwParams()) assertNotNull(ds.vwModel) } @Test def testVwParameters(): Unit = { val numLabelsInTrainingSet = 3 val predictor = getPredictor(getModelSource(), numLabelsInTrainingSet) predictor.vwParams() match { case Data(vwBinFilePath, ringSize) => checkVwBinFile(vwBinFilePath) checkVwRingSize(numLabelsInTrainingSet, ringSize.toInt) case ps => fail(s"Unexpected VW parameters format. Found string: $ps") } } } object VwSparseMultilabelPredictorTest { private val Data = """\s*-i\s+(\S+)\s+--ring_size\s+(\d+)\s+--testonly\s+--quiet""".r private def getModelSource(): ModelSource = { val f = File.createTempFile("i_dont", "care") f.deleteOnExit() val learner = VWLearners.create[VWActionScoresLearner](s"--quiet --csoaa_ldf mc --csoaa_rank -f ${f.getCanonicalPath}") learner.close() val baos = new ByteArrayOutputStream() IOUtils.copy(new FileInputStream(f), baos) val src = Base64StringSource(Base64.encodeBase64URLSafeString(baos.toByteArray)) ExternalSource(src.localVfs) } private def getPredictor(modelSrc: ModelSource, numLabelsInTrainingSet: Int) = VwSparseMultilabelPredictor[Any](modelSrc, Nil, Nil, numLabelsInTrainingSet) private def checkVwBinFile(vwBinFilePath: String): Unit = { val vwBinFile = new File(vwBinFilePath) assertTrue("VW binary file should have been written to disk", vwBinFile.exists()) vwBinFile.deleteOnExit() } private def checkVwRingSize(numLabelsInTrainingSet: Int, ringSize: Int): Unit = { assertEquals( "vw --ring_size parameter is incorrect:", numLabelsInTrainingSet + VwSparseMultilabelPredictor.AddlVwRingSize, ringSize.toInt ) } }
Example 33
Source File: UTF8TextOutputFormatter.scala From glow with Apache License 2.0 | 5 votes |
package io.projectglow.transformers.pipe import java.io.InputStream import scala.collection.JavaConverters._ import org.apache.commons.io.IOUtils import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.types.{StringType, StructField, StructType} import org.apache.spark.unsafe.types.UTF8String class UTF8TextOutputFormatter() extends OutputFormatter { override def makeIterator(stream: InputStream): Iterator[Any] = { val schema = StructType(Seq(StructField("text", StringType))) val iter = IOUtils.lineIterator(stream, "UTF-8").asScala.map { s => new GenericInternalRow(Array(UTF8String.fromString(s)): Array[Any]) } Iterator(schema) ++ iter } } class UTF8TextOutputFormatterFactory extends OutputFormatterFactory { override def name: String = "text" override def makeOutputFormatter(options: Map[String, String]): OutputFormatter = { new UTF8TextOutputFormatter } }
Example 34
Source File: TarGz$Test.scala From mystem-scala with MIT License | 5 votes |
package ru.stachek66.tools import java.io.{File, FileInputStream} import org.apache.commons.io.IOUtils import org.junit.runner.RunWith import org.scalatest.FunSuite import org.scalatest.junit.JUnitRunner class TarGz$Test extends FunSuite { test("tgz-test") { val src = new File("src/test/resources/test.txt") TarGz.unpack( new File("src/test/resources/test.tar.gz"), new File("src/test/resources/res.txt")) match { case f => val content0 = IOUtils.toString(new FileInputStream(f)) val content1 = IOUtils.toString(new FileInputStream(src)) print(content0.trim + " vs " + content1.trim) assert(content0 === content1) } } }
Example 35
Source File: HDFSStore.scala From speedo with Apache License 2.0 | 5 votes |
package com.htc.speedo.caffe import com.twitter.bijection.Codec import com.twitter.storehaus.Store import com.twitter.util.{ Future, Time } import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path object HDFSStore { val fs = rootDir.getFileSystem(conf) // make sure the root directory exists fs.mkdirs(rootDir) override def get(key: String) = Future { Some(new Path(rootDir, key)).filter(fs.exists).flatMap { path => val stream = fs.open(path) val bytes = IOUtils.toByteArray(stream) stream.close codec.invert(bytes).toOption } } override def put(kv: (String, Option[V])) = Future { val path = new Path(rootDir, kv._1) kv._2 match { case None => fs.delete(path, false) case Some(v) => val bytes = codec(v) val stream = fs.create(path, true) stream.write(bytes) stream.close } } override def close(time: Time) = Future { fs.close } }
Example 36
Source File: ArchiveUtils.scala From dl4scala with MIT License | 5 votes |
package org.dl4scala.util import org.slf4j.LoggerFactory import org.apache.commons.compress.archivers.tar.TarArchiveEntry import org.apache.commons.compress.archivers.tar.TarArchiveInputStream import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream import org.apache.commons.io.FileUtils import org.apache.commons.io.IOUtils import java.io._ import java.util.zip.GZIPInputStream import java.util.zip.ZipInputStream tarIn.close() } else if (file.endsWith(".gz")) { val is2 = new GZIPInputStream(fin) val extracted = new File(target.getParent, target.getName.replace(".gz", "")) if (extracted.exists) extracted.delete extracted.createNewFile val fos = FileUtils.openOutputStream(extracted) IOUtils.copyLarge(is2, fos) is2.close() fos.flush() fos.close() } target.delete } }
Example 37
Source File: Secret.scala From skuber with Apache License 2.0 | 5 votes |
package skuber import java.io._ import org.apache.commons.io.IOUtils case class Secret( val kind: String ="Secret", override val apiVersion: String = v1, val metadata: ObjectMeta, data: Map[String, Array[Byte]] = Map(), val `type`: String = "") extends ObjectResource { def add(key: String, is: InputStream) : Secret = { val bytes = IOUtils.toByteArray(is) add(key, bytes) } def add(key: String, bytes: Array[Byte]): Secret = this.copy(data = data + (key -> bytes)) } object Secret { val specification=CoreResourceSpecification( scope = ResourceSpecification.Scope.Namespaced, names = ResourceSpecification.Names( plural="secrets", singular="secret", kind="Secret", shortNames=Nil ) ) implicit val secDef = new ResourceDefinition[Secret] { def spec=specification } implicit val secListDef = new ResourceDefinition[SecretList] { def spec=specification } }
Example 38
Source File: SparkSvc.scala From Mastering-Spark-for-Data-Science with MIT License | 5 votes |
package svc import java.io.StringWriter import java.net.{HttpURLConnection, URL} import com.typesafe.config.ConfigFactory import io.gzet.recommender.Node import org.apache.commons.io.IOUtils import play.api.Logger import play.api.libs.json._ class SparkSvc() { val config = ConfigFactory.load() val host = config.getString("spark.job.server.host") val port = config.getInt("spark.job.server.port") val timeout = config.getInt("spark.job.server.timeout") val appName = config.getString("spark.job.server.app") val context = config.getString("spark.job.server.context") val indexJob = config.getString("spark.job.index") val playlistJob = config.getString("spark.job.playlist") val playlistRecommendJob = config.getString("spark.job.personalized.playlist") private def getConnection(endpoint: String, params: Option[String]) = { try { val url = new URL(endpoint) val connection = url.openConnection().asInstanceOf[HttpURLConnection] connection.setDoOutput(true) connection.setRequestMethod("POST") connection.setRequestProperty("Accept", "application/json") if(params.isDefined){ val os = connection.getOutputStream os.write(params.get.getBytes()) os.flush() os.close() } val inputStream = connection.getInputStream val writer = new StringWriter() IOUtils.copy(inputStream, writer, "UTF-8") val ret = writer.toString Json.parse(ret) } catch { case e: Exception => throw new Exception("Job Failed: " + e.getMessage) } } private def parseResponse(json: JsValue) : String = { val jobId = (json \ "result" \ "jobId").asOpt[String] if(jobId.isDefined){ s"Job submitted [${jobId.get}]" } else { val message = (json \ "result" \ "message").asOpt[String] if(message.isDefined){ throw new Exception(s"Job failed: ${message.get}") } throw new Exception("Could not find Spark job id") } } def index(path: String): String = { Logger.info("Submitting INDEX job") val url = s"http://$host:$port/jobs?appName=$appName&classPath=$indexJob&context=$context" val params = "input.dir=\"" + path + "\"" val json = getConnection(url, Some(params)) parseResponse(json) } def playlist() = { Logger.info("Submitting PLAYLIST job") val url = s"http://$host:$port/jobs?appName=$appName&classPath=$playlistJob&context=$context" val json = getConnection(url, None) parseResponse(json) } def playlist(id: Long) = { Logger.info("Submitting RECOMMEND job") val url = s"http://$host:$port/jobs?appName=$appName&classPath=$playlistRecommendJob&context=$context&sync=true&timeout=$timeout" val params = s"song.id=$id" val json: JsValue = getConnection(url, Some(params)) val array = (json \ "result").as[Array[String]] array.map({line => val Array(id, pr, song) = line.split(",").take(3) Node(id.toLong, song, pr.toDouble) }).toList } }
Example 39
Source File: PerformanceTester.scala From Mastering-Spark-for-Data-Science with MIT License | 5 votes |
package com.gravity.goose import org.apache.commons.lang.time.StopWatch import java.io.InputStream import org.apache.commons.io.IOUtils import utils.FileHelper def main(args: Array[String]) { System.out.println("testing performance of general goose extraction algos") implicit val config = TestUtils.NO_IMAGE_CONFIG val goose = new Goose(config) val html = FileHelper.loadResourceFile(TestUtils.staticHtmlDir + "scribd1.txt", Goose.getClass) val url = "http://www.scribd.com/doc/52584146/Microfinance-and-Poverty-Reduction?in_collection=2987942" val clock: StopWatch = new StopWatch System.out.println("How long does it take to extract an article?") clock.start() for (i <- 0 to 100) { goose.extractContent(url, html) } clock.stop() System.out.println("It takes " + clock.getTime + " milliseconds") } }
Example 40
Source File: FileHelper.scala From Mastering-Spark-for-Data-Science with MIT License | 5 votes |
package com.gravity.goose.utils import org.apache.commons.io.IOUtils import java.io.{IOException, InputStream} object FileHelper extends Logging { def loadResourceFile[A](filename: String, cls: Class[A]): String = { var filedata: String = "" val is: InputStream = cls.getResourceAsStream(filename) try { filedata = IOUtils.toString(is, "UTF-8") } catch { case e: IOException => warn(e, e.toString) } filedata } }
Example 41
Source File: ProcessSpawner.scala From akka-http-health with MIT License | 5 votes |
package io.github.lhotari.akka.http.health import java.io.ByteArrayOutputStream import java.lang.System.getProperty import java.net.{URL, URLClassLoader} import org.apache.commons.io.IOUtils import scala.collection.JavaConverters._ import scala.reflect.runtime.universe._ case class ProcessResult(retval: Integer, output: String) trait ProcessSpawner { lazy val classpath = resolveClassPath() val sep = getProperty("file.separator") val javaExecutablePath = getProperty("java.home") + sep + "bin" + sep + "java" private def resolveClassPath() = { getClass.getClassLoader match { case urlClassLoader: URLClassLoader => urlClassLoader.getURLs.collect { case url: URL => url.getFile }.mkString(getProperty("path.separator")) case _ => getProperty("java.class.path") } } def executeInSeparateProcess[T](mainClassType: T, maxMemoryMB: Integer = 100, extraJvmOpts: Seq[String] = Nil, args: Seq[String] = Nil)(implicit tag: WeakTypeTag[T]): ProcessResult = { val className = tag.tpe.termSymbol.fullName val processBuilder = new ProcessBuilder(javaExecutablePath).redirectErrorStream(true) val commands = processBuilder.command() commands.add(s"-Xmx${maxMemoryMB}m") commands.addAll(extraJvmOpts.asJava) commands.add("-cp") commands.add(classpath) commands.add(className) commands.addAll(args.asJava) println(String.join(" ", commands)) val process = processBuilder.start() val output = new ByteArrayOutputStream() IOUtils.copy(process.getInputStream, output) ProcessResult(process.waitFor(), output.toString()) } }
Example 42
Source File: ShellBanner.scala From shellbase with Apache License 2.0 | 5 votes |
package com.sumologic.shellbase import org.apache.commons.io.IOUtils class ShellBanner(resource: String) { def load(): String = { val in = getClass.getClassLoader.getResourceAsStream(resource) val banner = IOUtils.toString(in) in.close() banner } } object ShellBanner { lazy val Warning = new ShellBanner("banners/warning.txt").load() }
Example 43
Source File: ScriptRenderer.scala From shellbase with Apache License 2.0 | 5 votes |
package com.sumologic.shellbase import java.io._ import org.apache.commons.io.IOUtils import scala.io.Source class ScriptRenderer(file: File, args: Array[String]) { def getLines: Seq[String] = { val prefix = file.getName + ("_" * Math.max(0, 3 - file.getName.length)) val output = File.createTempFile(prefix, "script") output.deleteOnExit() val reader = new FileReader(file) val writer = new FileWriter(output) try { VelocityRenderer.render(argsToMap, reader, writer) } finally { IOUtils.closeQuietly(reader) IOUtils.closeQuietly(writer) } Source.fromFile(output).getLines().toSeq } private[shellbase] def argsToMap: Map[String, String] = { args.map(s => { require(s.contains("="), s"Argument $s does not contain = sign!") val splitted = s.split("=") require(splitted.size == 2, s"Argument $s contains more than on =!") splitted(0) -> splitted(1) }).toMap } }
Example 44
Source File: UJESClientImplTest.scala From Linkis with Apache License 2.0 | 5 votes |
object UJESClientImplTest extends App { val clientConfig = DWSClientConfigBuilder.newBuilder().addUJESServerUrl("http://localhost:port") .connectionTimeout(30000).discoveryEnabled(true) .discoveryFrequency(1, TimeUnit.MINUTES) .loadbalancerEnabled(true).maxConnectionSize(5) .retryEnabled(false).readTimeout(30000) .setAuthenticationStrategy(new StaticAuthenticationStrategy()).setAuthTokenKey("") .setAuthTokenValue("").setDWSVersion("v1").build() val client = UJESClient(clientConfig) val jobExecuteResult = client.execute(JobExecuteAction.builder().setCreator("UJESClient-Test") .addExecuteCode("show tables") .setEngineType(EngineType.SPARK).setUser("").build()) println("execId: " + jobExecuteResult.getExecID + ", taskId: " + jobExecuteResult.taskID) var status = client.status(jobExecuteResult) while(!status.isCompleted) { val progress = client.progress(jobExecuteResult) val progressInfo = if(progress.getProgressInfo != null) progress.getProgressInfo.toList else List.empty println("progress: " + progress.getProgress + ", progressInfo: " + progressInfo) Utils.sleepQuietly(500) status = client.status(jobExecuteResult) } val jobInfo = client.getJobInfo(jobExecuteResult) val resultSet = jobInfo.getResultSetList(client).head val fileContents = client.resultSet(ResultSetAction.builder().setPath(resultSet).setUser(jobExecuteResult.getUser).build()).getFileContent println("fileContents: " + fileContents) IOUtils.closeQuietly(client) }
Example 45
Source File: JsonReceiver.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import java.io.FileOutputStream import java.net.URL import java.io.File import com.eclipsesource.schema._ import org.slf4j.LoggerFactory import play.api.libs.json._ import JSON_PATH._ import java.nio.file.{Files, Paths} import org.apache.commons.io.IOUtils import org.apache.commons.codec.binary.Base64 import scala.util.Properties._ def exceptionOnRun(e: Exception): Unit } object HttpBasicAuth { val BASIC = "Basic" val AUTHORIZATION = "Authorization" def encodeCredentials(username: String, password: String): String = { new String(Base64.encodeBase64((username + ":" + password).getBytes)) } def getHeader(username: String, password: String): String = BASIC + " " + encodeCredentials(username, password) }
Example 46
Source File: FeyGenericActorReceiver.scala From incubator-retired-iota with Apache License 2.0 | 5 votes |
package org.apache.iota.fey import java.io.{File, FileOutputStream} import java.net.URL import java.nio.file.{Files, Paths} import com.eclipsesource.schema._ import akka.actor.ActorRef import com.eclipsesource.schema.SchemaValidator import org.apache.commons.io.IOUtils import play.api.libs.json._ import scala.concurrent.duration._ import scala.util.Properties._ abstract class FeyGenericActorReceiver(override val params: Map[String,String] = Map.empty, override val backoff: FiniteDuration = 1.minutes, override val connectTo: Map[String,ActorRef] = Map.empty, override val schedulerTimeInterval: FiniteDuration = 2.seconds, override val orchestrationName: String = "", override val orchestrationID: String = "", override val autoScale: Boolean = false) extends FeyGenericActor{ private[fey] val feyCore = FEY_CORE_ACTOR.actorRef override final def processMessage[T](message: T, sender: ActorRef): Unit = { try { val jsonString = getJSONString(message) if(jsonString != "{}") { processJson(jsonString) } startBackoff() }catch{ case e: Exception => log.error(e, s"Could not process message $message") } } private[fey] def processJson(jsonString: String) = { var orchID:String = "None" try{ val orchestrationJSON = Json.parse(jsonString) orchID = (orchestrationJSON \ JSON_PATH.GUID).as[String] val valid = validJson(orchestrationJSON) if(valid && (orchestrationJSON \ JSON_PATH.COMMAND).as[String].toUpperCase != "DELETE"){ checkForLocation(orchestrationJSON) } if(valid) { feyCore ! FeyCore.ORCHESTRATION_RECEIVED(orchestrationJSON, None) }else{ log.warning(s"Could not forward Orchestration $orchID. Invalid JSON schema") } } catch { case e: Exception => log.error(e, s"Orchestration $orchID could not be forwarded") } } def resolveCredentials(credentials: Option[JsObject]):Option[(String, String)] = { credentials match { case None => None case Some(cred) => val user = (cred \ JSON_PATH.JAR_CRED_USER).as[String] val password = (cred \ JSON_PATH.JAR_CRED_PASSWORD).as[String] Option(envOrElse(user,user), envOrElse(password,password)) } } }
Example 47
Source File: JsonSinkTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.json import io.eels.datastream.DataStream import io.eels.schema.{Field, StructType} import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.scalatest.{Matchers, WordSpec} class JsonSinkTest extends WordSpec with Matchers { val path = new Path("test.json") implicit val fs: FileSystem = FileSystem.get(new Configuration()) "JsonSink" should { "write multiple json docs to a file" in { if (fs.exists(path)) fs.delete(path, false) val schema = StructType(Field("name"), Field("location")) val ds = DataStream.fromValues( schema, Seq( Vector("sam", "aylesbury"), Vector("jam", "aylesbury"), Vector("ham", "buckingham") ) ) ds.to(JsonSink(path)) val input = IOUtils.toString(fs.open(path)) input should include("""{"name":"sam","location":"aylesbury"}""") input should include("""{"name":"jam","location":"aylesbury"}""") input should include("""{"name":"ham","location":"buckingham"}""") fs.delete(path, false) } "support arrays" in { if (fs.exists(path)) fs.delete(path, false) val schema = StructType(Field("name"), Field("skills")) val frame = DataStream.fromValues( schema, Seq(Vector("sam", Array("karate", "kung fu"))) ) frame.to(JsonSink(path)) val input = IOUtils.toString(fs.open(path)) input.trim shouldBe """{"name":"sam","skills":["karate","kung fu"]}""" fs.delete(path, false) } "support maps" in { if (fs.exists(path)) fs.delete(path, false) val schema = StructType(Field("name"), Field("locations")) val frame = DataStream.fromValues( schema, Seq(Vector("sam", Map("home" -> "boro", "work" -> "london"))) ) frame.to(JsonSink(path)) val input = IOUtils.toString(fs.open(path)) input.trim shouldBe """{"name":"sam","locations":{"home":"boro","work":"london"}}""" fs.delete(path, false) } "support structs" in { case class Foo(home: String, work: String) if (fs.exists(path)) fs.delete(path, false) val schema = StructType(Field("name"), Field("locations")) val frame = DataStream.fromValues( schema, Seq(Vector("sam", Foo("boro", "london"))) ) frame.to(JsonSink(path)) val input = IOUtils.toString(fs.open(path)) input.trim shouldBe """{"name":"sam","locations":{"home":"boro","work":"london"}}""" fs.delete(path, false) } } }
Example 48
Source File: StorageCSVWriter.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.storage.csv import java.io._ import com.webank.wedatasphere.linkis.common.io.{MetaData, Record} import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.storage.resultset.table.{TableMetaData, TableRecord} import org.apache.commons.io.IOUtils class StorageCSVWriter(val charset: String, val separator: String, val outputStream: OutputStream) extends CSVFsWriter with Logging { private val delimiter = separator match { case "," => ',' case _ => '\t' } private val buffer: StringBuilder = new StringBuilder(50000) @scala.throws[IOException] override def addMetaData(metaData: MetaData): Unit = { val head = metaData.asInstanceOf[TableMetaData].columns.map(_.columnName) write(head) //IOUtils.write(compact(head).getBytes(charset),outputStream) } private def compact(row: Array[String]): String = { val tmp = row.foldLeft("")((l, r) => l + delimiter + r) tmp.substring(1, tmp.length) + "\n" } private def write(row: Array[String]) = { val cotent: String = compact(row) if (buffer.length + cotent.length > 49500) { IOUtils.write(buffer.toString().getBytes(charset), outputStream) buffer.clear() } buffer.append(cotent) } @scala.throws[IOException] override def addRecord(record: Record): Unit = { val body = record.asInstanceOf[TableRecord].row.map(_.toString) //read时候进行null替换等等 write(body) //IOUtils.write(compact(body).getBytes(charset),outputStream) } override def flush(): Unit = { IOUtils.write(buffer.toString().getBytes(charset), outputStream) buffer.clear() } override def close(): Unit = { flush() IOUtils.closeQuietly(outputStream) } }
Example 49
Source File: StorageScriptFsReader.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.storage.script.reader import java.io._ import com.webank.wedatasphere.linkis.common.io.{FsPath, MetaData, Record} import com.webank.wedatasphere.linkis.storage.script._ import com.webank.wedatasphere.linkis.storage.utils.StorageUtils import org.apache.commons.io.IOUtils import scala.collection.mutable.ArrayBuffer def isMetadata(line: String, prefix: String, prefixConf: String): Boolean = { val regex = ("\\s*" + prefix + "\\s*(.+)\\s*" + "=" + "\\s*(.+)\\s*").r line match { case regex(_, _) => true case _ => { val split: Array[String] = line.split("=") if (split.size != 2) return false if (split(0).split(" ").filter(_ != "").size != 4) return false if (!split(0).split(" ").filter(_ != "")(0).equals(prefixConf)) return false true } } } }
Example 50
Source File: StorageScriptFsWriter.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.storage.script.writer import java.io.{ByteArrayInputStream, IOException, InputStream, OutputStream} import java.util import com.webank.wedatasphere.linkis.common.io.{FsPath, MetaData, Record} import com.webank.wedatasphere.linkis.storage.LineRecord import com.webank.wedatasphere.linkis.storage.script.{Compaction, ScriptFsWriter, ScriptMetaData} import com.webank.wedatasphere.linkis.storage.utils.{StorageConfiguration, StorageUtils} import org.apache.commons.io.IOUtils class StorageScriptFsWriter(val path: FsPath, val charset: String, outputStream: OutputStream = null) extends ScriptFsWriter { private val stringBuilder = new StringBuilder @scala.throws[IOException] override def addMetaData(metaData: MetaData): Unit = { val compactions = Compaction.listCompactions().filter(p => p.belongTo(StorageUtils.pathToSuffix(path.getPath))) val metadataLine = new util.ArrayList[String]() if (compactions.length > 0) { metaData.asInstanceOf[ScriptMetaData].getMetaData.map(compactions(0).compact).foreach(metadataLine.add) if (outputStream != null) { IOUtils.writeLines(metadataLine, "\n", outputStream, charset) } else { import scala.collection.JavaConversions._ metadataLine.foreach(m => stringBuilder.append(s"$m\n")) } } } @scala.throws[IOException] override def addRecord(record: Record): Unit = { //转成LineRecord而不是TableRecord是为了兼容非Table类型的结果集写到本类中 val scriptRecord = record.asInstanceOf[LineRecord] if (outputStream != null) { IOUtils.write(scriptRecord.getLine, outputStream, charset) } else { stringBuilder.append(scriptRecord.getLine) } } override def close(): Unit = { IOUtils.closeQuietly(outputStream) } override def flush(): Unit = if (outputStream != null) outputStream.flush() def getInputStream(): InputStream = { new ByteArrayInputStream(stringBuilder.toString().getBytes(StorageConfiguration.STORAGE_RS_FILE_TYPE.getValue)) } }
Example 51
Source File: IOHelp.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.io.utils import com.webank.wedatasphere.linkis.common.io.{Fs, FsPath} import com.webank.wedatasphere.linkis.common.utils.Utils import com.webank.wedatasphere.linkis.storage.domain.{MethodEntity, MethodEntitySerializer} import com.webank.wedatasphere.linkis.storage.exception.StorageErrorException import com.webank.wedatasphere.linkis.storage.resultset.io.{IOMetaData, IORecord} import com.webank.wedatasphere.linkis.storage.resultset.{ResultSetFactory, ResultSetReader, ResultSetWriter} import com.webank.wedatasphere.linkis.storage.utils.{StorageConfiguration, StorageUtils} import org.apache.commons.io.IOUtils def write(fs: Fs, method: MethodEntity): Unit = { if (method.params == null || method.params.isEmpty) throw new StorageErrorException(53003, "不支持的参数调用") val dest = MethodEntitySerializer.deserializerToJavaObject(method.params(0).asInstanceOf[String],classOf[FsPath]) val overwrite = method.params(1).asInstanceOf[Boolean] val outputStream = fs.write(dest, overwrite) val content = method.params(2).asInstanceOf[String] Utils.tryFinally { val resultSet = ResultSetFactory.getInstance.getResultSetByType(ResultSetFactory.IO_TYPE) val reader = ResultSetReader.getResultSetReader(resultSet, content) while (reader.hasNext) { IOUtils.write(reader.getRecord.asInstanceOf[IORecord].value, outputStream) } }(IOUtils.closeQuietly(outputStream)) } }
Example 52
Source File: TokenAuthentication.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.gateway.security.token import java.io.File import java.util.Properties import java.util.concurrent.TimeUnit import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils} import com.webank.wedatasphere.linkis.gateway.config.GatewayConfiguration._ import com.webank.wedatasphere.linkis.gateway.http.GatewayContext import com.webank.wedatasphere.linkis.gateway.security.{GatewaySSOUtils, SecurityFilter} import com.webank.wedatasphere.linkis.server.Message import org.apache.commons.io.{FileUtils, IOUtils} import org.apache.commons.lang.StringUtils object TokenAuthentication extends Logging { private val (props, file) = if(ENABLE_TOKEN_AUTHENTICATION.getValue) (new Properties, new File(this.getClass.getClassLoader.getResource(TOKEN_AUTHENTICATION_CONFIG.getValue).toURI.getPath)) else (null, null) private var lastModified = 0l if(ENABLE_TOKEN_AUTHENTICATION.getValue) { Utils.defaultScheduler.scheduleAtFixedRate(new Runnable { override def run(): Unit = Utils.tryAndError(init()) }, TOKEN_AUTHENTICATION_SCAN_INTERVAL.getValue, TOKEN_AUTHENTICATION_SCAN_INTERVAL.getValue, TimeUnit.MILLISECONDS) init() } private def init(): Unit = if(file.lastModified() > lastModified) { lastModified = file.lastModified() info(s"loading token authentication file $file.") val newProps = new Properties val input = FileUtils.openInputStream(file) Utils.tryFinally(newProps.load(input))(IOUtils.closeQuietly(input)) props.putAll(newProps) } private def validateTokenUser(token: String, tokenUser: String): Boolean = { val tokenUsers = props.getProperty(token) if(tokenUsers == "*" || (StringUtils.isNotBlank(tokenUsers) && tokenUsers.contains(tokenUser))) true else false } def isTokenRequest(gatewayContext: GatewayContext) : Boolean = { (gatewayContext.getRequest.getHeaders.containsKey(TOKEN_KEY) && gatewayContext.getRequest.getHeaders.containsKey(TOKEN_USER_KEY)) || ( gatewayContext.getRequest.getCookies.containsKey(TOKEN_KEY) && gatewayContext.getRequest.getCookies.containsKey(TOKEN_USER_KEY)) } def tokenAuth(gatewayContext: GatewayContext): Boolean = { if(!ENABLE_TOKEN_AUTHENTICATION.getValue) { val message = Message.noLogin(s"Gateway未启用token认证,请采用其他认证方式!") << gatewayContext.getRequest.getRequestURI SecurityFilter.filterResponse(gatewayContext, message) return false } var token = gatewayContext.getRequest.getHeaders.get(TOKEN_KEY)(0) var tokenUser = gatewayContext.getRequest.getHeaders.get(TOKEN_USER_KEY)(0) if(StringUtils.isBlank(token) || StringUtils.isBlank(tokenUser)) { token = gatewayContext.getRequest.getCookies.get(TOKEN_KEY)(0).getValue tokenUser = gatewayContext.getRequest.getCookies.get(TOKEN_USER_KEY)(0).getValue if(StringUtils.isBlank(token) || StringUtils.isBlank(tokenUser)) { val message = Message.noLogin(s"请在Header或Cookie中同时指定$TOKEN_KEY 和 $TOKEN_USER_KEY,以便完成token认证!") << gatewayContext.getRequest.getRequestURI SecurityFilter.filterResponse(gatewayContext, message) return false } } if(validateTokenUser(token, tokenUser)){ info(s"Token authentication succeed, uri: ${gatewayContext.getRequest.getRequestURI}, token: $token, tokenUser: $tokenUser.") GatewaySSOUtils.setLoginUser(gatewayContext.getRequest, tokenUser) true } else { val message = Message.noLogin(s"未授权的token$token,无法将请求绑定给tokenUser$tokenUser!") << gatewayContext.getRequest.getRequestURI SecurityFilter.filterResponse(gatewayContext, message) false } } }
Example 53
Source File: QueryUtils.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.jobhistory.util import java.io.{InputStream, OutputStream} import java.util.Date import com.webank.wedatasphere.linkis.common.conf.CommonVars import com.webank.wedatasphere.linkis.common.io.FsPath import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils} import com.webank.wedatasphere.linkis.jobhistory.entity.QueryTask import com.webank.wedatasphere.linkis.protocol.query.RequestInsertTask import com.webank.wedatasphere.linkis.storage.FSFactory import com.webank.wedatasphere.linkis.storage.fs.FileSystem import com.webank.wedatasphere.linkis.storage.utils.{FileSystemUtils, StorageUtils} import org.apache.commons.io.IOUtils import org.apache.commons.lang.time.DateFormatUtils object QueryUtils extends Logging { private val CODE_STORE_PREFIX = CommonVars("bdp.dataworkcloud.query.store.prefix", "hdfs:///tmp/bdp-ide/") private val CODE_STORE_SUFFIX = CommonVars("bdp.dataworkcloud.query.store.suffix", "") private val CHARSET = "utf-8" private val CODE_SPLIT = ";" private val LENGTH_SPLIT = "#" def storeExecutionCode(requestInsertTask: RequestInsertTask): Unit = { if (requestInsertTask.getExecutionCode.length < 60000) return val user: String = requestInsertTask.getUmUser val path: String = getCodeStorePath(user) val fsPath: FsPath = new FsPath(path) val fileSystem = FSFactory.getFsByProxyUser(fsPath, user).asInstanceOf[FileSystem] fileSystem.init(null) var os: OutputStream = null var position = 0L val codeBytes = requestInsertTask.getExecutionCode.getBytes(CHARSET) path.intern() synchronized { Utils.tryFinally { if (!fileSystem.exists(fsPath)) FileSystemUtils.createNewFile(fsPath, user, true) os = fileSystem.write(fsPath, false) position = fileSystem.get(path).getLength IOUtils.write(codeBytes, os) } { IOUtils.closeQuietly(os) if (fileSystem != null) fileSystem.close() } } val length = codeBytes.length requestInsertTask.setExecutionCode(path + CODE_SPLIT + position + LENGTH_SPLIT + length) } def exchangeExecutionCode(queryTask: QueryTask): Unit = { import scala.util.control.Breaks._ if (queryTask.getExecutionCode == null || !queryTask.getExecutionCode.startsWith(StorageUtils.HDFS_SCHEMA)) return val codePath = queryTask.getExecutionCode val path = codePath.substring(0, codePath.lastIndexOf(CODE_SPLIT)) val codeInfo = codePath.substring(codePath.lastIndexOf(CODE_SPLIT) + 1) val infos: Array[String] = codeInfo.split(LENGTH_SPLIT) val position = infos(0).toInt var lengthLeft = infos(1).toInt val tub = new Array[Byte](1024) val executionCode: StringBuilder = new StringBuilder val fsPath: FsPath = new FsPath(path) val fileSystem = FSFactory.getFsByProxyUser(fsPath, queryTask.getUmUser).asInstanceOf[FileSystem] fileSystem.init(null) var is: InputStream = null if (!fileSystem.exists(fsPath)) return Utils.tryFinally { is = fileSystem.read(fsPath) if (position > 0) is.skip(position) breakable { while (lengthLeft > 0) { val readed = is.read(tub) val useful = Math.min(readed, lengthLeft) if (useful < 0) break() lengthLeft -= useful executionCode.append(new String(tub, 0, useful, CHARSET)) } } } { if (fileSystem != null) fileSystem.close() IOUtils.closeQuietly(is) } queryTask.setExecutionCode(executionCode.toString()) } private def getCodeStorePath(user: String): String = { val date: String = DateFormatUtils.format(new Date, "yyyyMMdd") s"${CODE_STORE_PREFIX.getValue}${user}${CODE_STORE_SUFFIX.getValue}/executionCode/${date}/_scripts" } }
Example 54
Source File: AbstractEngineCreator.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.enginemanager import java.net.ServerSocket import com.webank.wedatasphere.linkis.common.conf.DWCArgumentsParser import com.webank.wedatasphere.linkis.common.utils.Utils import com.webank.wedatasphere.linkis.enginemanager.conf.EngineManagerConfiguration import com.webank.wedatasphere.linkis.enginemanager.exception.EngineManagerErrorException import com.webank.wedatasphere.linkis.enginemanager.impl.UserTimeoutEngineResource import com.webank.wedatasphere.linkis.enginemanager.process.{CommonProcessEngine, ProcessEngine, ProcessEngineBuilder} import com.webank.wedatasphere.linkis.protocol.engine.{EngineCallback, RequestEngine} import com.webank.wedatasphere.linkis.rpc.Sender import com.webank.wedatasphere.linkis.server.{JMap, toScalaMap} import org.apache.commons.io.IOUtils import scala.collection.mutable.ArrayBuffer abstract class AbstractEngineCreator extends EngineCreator { private val inInitPorts = ArrayBuffer[Int]() private def getAvailablePort: Int = synchronized { var port = AbstractEngineCreator.getNewPort while(inInitPorts.contains(port)) port = AbstractEngineCreator.getNewPort inInitPorts += port port } def removePort(port: Int): Unit = inInitPorts -= port protected def createProcessEngineBuilder(): ProcessEngineBuilder protected def getExtractSpringConfigs(requestEngine: RequestEngine): JMap[String, String] = { val springConf = new JMap[String, String] requestEngine.properties.keysIterator.filter(_.startsWith("spring.")).foreach(key => springConf.put(key.substring(7), requestEngine.properties.get(key))) springConf } protected def createEngine(processEngineBuilder:ProcessEngineBuilder,parser:DWCArgumentsParser):ProcessEngine={ processEngineBuilder.getEngineResource match { case timeout: UserTimeoutEngineResource => new CommonProcessEngine(processEngineBuilder, parser, timeout.getTimeout) case _ => new CommonProcessEngine(processEngineBuilder, parser) } } override def create(ticketId: String, engineRequest: EngineResource, request: RequestEngine): Engine = { val port = getAvailablePort val processEngineBuilder = createProcessEngineBuilder() processEngineBuilder.setPort(port) processEngineBuilder.build(engineRequest, request) val parser = new DWCArgumentsParser var springConf = Map("spring.application.name" -> EngineManagerConfiguration.ENGINE_SPRING_APPLICATION_NAME.getValue, "server.port" -> port.toString, "spring.profiles.active" -> "engine", "logging.config" -> "classpath:log4j2-engine.xml", "eureka.client.serviceUrl.defaultZone" -> EngineManagerReceiver.getSpringConf("eureka.client.serviceUrl.defaultZone")) springConf = springConf ++: getExtractSpringConfigs(request).toMap parser.setSpringConf(springConf) var dwcConf = Map("ticketId" -> ticketId, "creator" -> request.creator, "user" -> request.user) ++: EngineCallback.callbackToMap(EngineCallback(Sender.getThisServiceInstance.getApplicationName, Sender.getThisServiceInstance.getInstance)) if(request.properties.exists{case (k, v) => k.contains(" ") || (v != null && v.contains(" "))}) throw new EngineManagerErrorException(30000, "Startup parameters contain spaces!(启动参数中包含空格!)") dwcConf = dwcConf ++: request.properties.toMap parser.setDWCConf(dwcConf) val engine = createEngine(processEngineBuilder,parser) engine.setTicketId(ticketId) engine.setPort(port) engine match { case commonEngine: CommonProcessEngine => commonEngine.setUser(request.user) case _ => } engine } } object AbstractEngineCreator { private[enginemanager] def getNewPort: Int = { val socket = new ServerSocket(0) Utils.tryFinally(socket.getLocalPort)(IOUtils.closeQuietly(socket)) } }
Example 55
Source File: FilesystemSnapshotStore.scala From eventuate with Apache License 2.0 | 5 votes |
package com.rbmhtechnology.eventuate.snapshot.filesystem import java.io._ import java.net.URLEncoder import akka.event.{ LogSource, Logging } import com.rbmhtechnology.eventuate._ import com.rbmhtechnology.eventuate.snapshot.SnapshotStore import org.apache.commons.io.IOUtils import scala.concurrent.Future import scala.collection.immutable.Seq import scala.util._ object FilesystemSnapshotStore { implicit val logSource = LogSource.fromAnyClass[FilesystemSnapshotStore] } class FilesystemSnapshotStore(settings: FilesystemSnapshotStoreSettings, logId: String) extends SnapshotStore { private val log = Logging(settings.system, classOf[FilesystemSnapshotStore]) private val rootDir = new File(settings.rootDir, URLEncoder.encode(logId, "UTF-8")) rootDir.mkdirs() override def deleteAsync(lowerSequenceNr: Long): Future[Unit] = { import settings.writeDispatcher Future(delete(lowerSequenceNr)) } override def saveAsync(snapshot: Snapshot): Future[Unit] = { import settings.writeDispatcher Future(withOutputStream(dstDir(snapshot.metadata.emitterId), snapshot.metadata.sequenceNr)(serialize(_, snapshot))) } override def loadAsync(emitterId: String): Future[Option[Snapshot]] = { import settings.readDispatcher Future(load(dstDir(emitterId))) } def delete(lowerSequenceNr: Long): Unit = for { emitterId <- rootDir.listFiles emitterDir = dstDir(emitterId.getName) sequenceNr <- decreasingSequenceNrs(emitterDir) if sequenceNr >= lowerSequenceNr } dstFile(emitterDir, sequenceNr).delete() def load(dir: File): Option[Snapshot] = { @annotation.tailrec def go(snrs: Seq[Long]): Option[Snapshot] = snrs.headOption match { case None => None case Some(snr) => Try(withInputStream(dir, snr)(deserialize)) match { case Success(s) => Some(s) case Failure(e) => log.error(e, s"error loading snapshot ${dstFile(dir, snr)}") go(snrs.tail) } } go(decreasingSequenceNrs(dir)) } private def serialize(outputStream: OutputStream, snapshot: Snapshot): Unit = outputStream.write(settings.serialization.serialize(snapshot).get) private def deserialize(inputStream: InputStream): Snapshot = settings.serialization.deserialize(IOUtils.toByteArray(inputStream), classOf[Snapshot]).get private def withOutputStream(dir: File, snr: Long)(body: OutputStream => Unit): Unit = { val dst = dstFile(dir, snr) val tmp = tmpFile(dir, snr) dir.mkdirs() withStream(new BufferedOutputStream(new FileOutputStream(tmp)), body) tmp.renameTo(dst) // do not keep more than the configured maximum number of snapshot files decreasingSequenceNrs(dir).drop(settings.snapshotsPerEmitterMax).foreach { snr => dstFile(dir, snr).delete() } } private def withInputStream[A](dir: File, snr: Long)(body: InputStream => A): A = withStream(new BufferedInputStream(new FileInputStream(dstFile(dir, snr))), body) private def withStream[A <: Closeable, B](stream: A, p: A => B): B = try { p(stream) } finally { stream.close() } private val DstFilenamePattern = """^snr-(\d+)""".r private[eventuate] def dstDir(emitterId: String): File = new File(rootDir, URLEncoder.encode(emitterId, "UTF-8")) private[eventuate] def dstFile(dstDir: File, sequenceNr: Long): File = new File(dstDir, s"snr-${sequenceNr}") private[eventuate] def tmpFile(dstDir: File, sequenceNr: Long): File = new File(dstDir, s"tmp-${sequenceNr}") private[eventuate] def decreasingSequenceNrs(dir: File): Seq[Long] = if (!dir.exists) Nil else dir.listFiles.map(_.getName).collect { case DstFilenamePattern(snr) => snr.toLong }.toList.sorted.reverse }
Example 56
Source File: EngineExecutorManager.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.execute import com.webank.wedatasphere.linkis.common.conf.DWCArgumentsParser import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils} import com.webank.wedatasphere.linkis.engine.exception.EngineErrorException import com.webank.wedatasphere.linkis.scheduler.executer.ExecutorState.{Busy, ExecutorState, Idle} import com.webank.wedatasphere.linkis.scheduler.executer._ import com.webank.wedatasphere.linkis.scheduler.listener.ExecutorListener import com.webank.wedatasphere.linkis.scheduler.queue.SchedulerEvent import com.webank.wedatasphere.linkis.server.{JMap, toJavaMap} import org.apache.commons.io.IOUtils import scala.concurrent.duration.Duration abstract class EngineExecutorManager extends ExecutorManager with Logging { private var executorListener: Option[ExecutorListener] = None private var resultSetListener: ResultSetListener = _ private var jobLogListener: JobLogListener = _ protected var executor: EngineExecutor = _ override def setExecutorListener(executorListener: ExecutorListener): Unit = this.executorListener = Some(executorListener) def setResultSetListener(resultSetListener: ResultSetListener) = this.resultSetListener = resultSetListener def setJobLogListener(jobLogListener: JobLogListener) = this.jobLogListener = jobLogListener protected def isSupportParallelism: Boolean protected def getOrCreateCodeParser(): CodeParser protected def getOrCreateEngineExecutorFactory(): EngineExecutorFactory protected def getEngineHooks: Array[EngineHook] def getEngineExecutor = executor override protected def createExecutor(event: SchedulerEvent): Executor = null override def askExecutor(schedulerEvent: SchedulerEvent): Option[Executor] = { if(executor == null) synchronized { if(executor == null) { var options: JMap[String, String] = DWCArgumentsParser.getDWCOptionMap //TODO getUDF peaceWong getEngineHooks.foreach(hook => options = hook.beforeCreateEngine(options)) executor = getOrCreateEngineExecutorFactory().createExecutor(options) executor.setCodeParser(getOrCreateCodeParser()) executor.init() executor.setLogListener(jobLogListener) executor.setResultSetListener(resultSetListener) //TODO Consider adding timeout(考虑加上超时时间) getEngineHooks.foreach(_.afterCreatedEngine(executor)) executorListener.foreach(executor.setExecutorListener) executorListener.foreach(_.onExecutorCreated(executor)) executor.ready() } } executor match { case engine: EngineExecutor => if(!ExecutorState.isAvailable(engine.state)) throw new EngineErrorException(40000, s"engine不可用,状态为${engine.state}(engine不可用,状态为${engine.state}).") else if(isSupportParallelism) Some(engine) else if(engine.state == Busy) None else synchronized { if(engine.state == Idle) Some(engine) else None } } } override def askExecutor(schedulerEvent: SchedulerEvent, wait: Duration): Option[Executor] = { val startTime = System.currentTimeMillis() askExecutor(schedulerEvent).orElse { var executor: Option[Executor] = None while(System.currentTimeMillis - startTime < wait.toMillis && executor.isEmpty) { this.executor synchronized this.executor.wait(wait.toMillis) executor = askExecutor(schedulerEvent) } executor } } def onExecutorStateChanged(fromState: ExecutorState, toState: ExecutorState): Unit = toState match { case Idle => this.executor synchronized this.executor.notify() case _ => } override def getById(id: Long): Option[Executor] = if(executor == null) None else if(executor.getId == id) Some(executor) else None override def getByGroup(groupName: String): Array[Executor] = Array(executor) override protected def delete(executor: Executor): Unit = throw new EngineErrorException(40001, s"Unsupported method delete(不支持的方法delete).") override def shutdown(): Unit = { executor match { case s: SingleTaskOperateSupport => Utils.tryAndWarn(s.kill()) case c: ConcurrentTaskOperateSupport => Utils.tryAndWarn(c.killAll()) } IOUtils.closeQuietly(executor) } }
Example 57
Source File: LogWriter.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.entrance.log import java.io.{Closeable, Flushable, OutputStream} import java.util import com.webank.wedatasphere.linkis.common.io.FsPath import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils} import com.webank.wedatasphere.linkis.entrance.exception.EntranceErrorException import com.webank.wedatasphere.linkis.storage.FSFactory import com.webank.wedatasphere.linkis.storage.utils.FileSystemUtils import org.apache.commons.io.IOUtils import org.apache.commons.lang.StringUtils abstract class LogWriter(charset: String) extends Closeable with Flushable with Logging { private var firstWrite = true protected val outputStream: OutputStream def write(msg: String): Unit = synchronized { val log = if (!firstWrite) "\n" + msg else { firstWrite = false msg } Utils.tryQuietly({ outputStream.write(log.getBytes(charset)) outputStream.flush() }, t => { warn("error when write query log to outputStream.", t) info(msg) }) } def flush(): Unit = Utils.tryQuietly(outputStream.flush(), t => { warn("Error encounters when flush log,", t) }) def close(): Unit = { flush() if (outputStream != null) IOUtils.closeQuietly(outputStream) } } abstract class AbstractLogWriter(logPath: String, user: String, charset: String) extends LogWriter(charset) { if(StringUtils.isBlank(logPath)) throw new EntranceErrorException(20301, "logPath cannot be empty.") protected val fileSystem = FSFactory.getFs(new FsPath(logPath)) fileSystem.init(new util.HashMap[String, String]()) protected val outputStream: OutputStream = { FileSystemUtils.createNewFile(new FsPath(logPath), true) fileSystem.write(new FsPath(logPath), true) } override def close(): Unit = { super.close() if (fileSystem != null) Utils.tryQuietly(fileSystem.close(), t => { warn("Error encounters when closing fileSystem", t) }) } }
Example 58
Source File: ProcessInterpreter.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.Interpreter import java.io.{BufferedReader, InputStreamReader, PrintWriter} import java.util.concurrent.TimeUnit import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils} import com.webank.wedatasphere.linkis.engine.spark.common.{LineBufferedStream, Starting, State, _} import com.webank.wedatasphere.linkis.scheduler.executer.{ErrorExecuteResponse, ExecuteResponse, SuccessExecuteResponse} import org.apache.commons.io.IOUtils import org.json4s._ import scala.concurrent.duration.Duration import scala.concurrent.{Await, ExecutionContext, Future} abstract class ProcessInterpreter(process: Process) extends Interpreter with Logging { implicit val executor: ExecutionContext = ExecutionContext.global protected[this] var _state: State = Starting() protected[this] val stdin = new PrintWriter(process.getOutputStream) protected[this] val stdout = new BufferedReader(new InputStreamReader(process.getInputStream()), 1) protected[this] val errOut = new LineBufferedStream(process.getErrorStream()) override def state: State = _state override def execute(code: String): ExecuteResponse = { if(code == "sc.cancelAllJobs" || code == "sc.cancelAllJobs()") { sendExecuteRequest(code) } _state match { case (Dead() | ShuttingDown() | Error() | Success()) => throw new IllegalStateException("interpreter is not running") case Idle() => require(state == Idle()) code match { case "SHUTDOWN" => sendShutdownRequest() close() ErrorExecuteResponse("shutdown",new Exception("shutdown")) case _ => _state = Busy() sendExecuteRequest(code) match { case Some(rep) => _state = Idle() // ExecuteComplete(rep) SuccessExecuteResponse() case None => _state = Error() val errorMsg = errOut.lines.mkString(", ") throw new Exception(errorMsg) } } case _ => throw new IllegalStateException(s"interpreter is in ${_state} state, cannot do query.") } } Future { val exitCode = process.waitFor() if (exitCode != 0) { errOut.lines.foreach(println) println(getClass.getSimpleName+" has stopped with exit code " + process.exitValue) _state = Error() } else { println(getClass.getSimpleName+" has finished.") _state = Success() } } protected def waitUntilReady(): Unit protected def sendExecuteRequest(request: String): Option[JValue] protected def sendShutdownRequest(): Unit = {} override def close(): Unit = { val future = Future { _state match { case (Dead() | ShuttingDown() | Success()) => Future.successful() case _ => sendShutdownRequest() } } _state = Dead() IOUtils.closeQuietly(stdin) IOUtils.closeQuietly(stdout) errOut.close // Give ourselves 10 seconds to tear down the process. Utils.tryFinally(Await.result(future, Duration(10, TimeUnit.SECONDS))){ process.destroy()} } }
Example 59
Source File: PythonEngineExecutor.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.executors import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.engine.PythonSession import com.webank.wedatasphere.linkis.engine.exception.EngineException import com.webank.wedatasphere.linkis.engine.execute.{EngineExecutor, EngineExecutorContext} import com.webank.wedatasphere.linkis.engine.rs.RsOutputStream import com.webank.wedatasphere.linkis.protocol.engine.JobProgressInfo import com.webank.wedatasphere.linkis.resourcemanager.{LoadInstanceResource, Resource} import com.webank.wedatasphere.linkis.rpc.Sender import com.webank.wedatasphere.linkis.scheduler.executer._ import org.apache.commons.io.IOUtils import scala.collection.mutable.ArrayBuffer class PythonEngineExecutor(outputPrintLimit: Int) extends EngineExecutor(outputPrintLimit, false) with SingleTaskOperateSupport with SingleTaskInfoSupport with Logging { override def getName: String = Sender.getThisServiceInstance.getInstance private val lineOutputStream = new RsOutputStream private[executors] var engineExecutorContext: EngineExecutorContext = _ override def getActualUsedResources: Resource = { new LoadInstanceResource(Runtime.getRuntime.totalMemory() - Runtime.getRuntime.freeMemory(), 2, 1) } private val pySession = new PythonSession override protected def executeLine(engineExecutorContext: EngineExecutorContext, code: String): ExecuteResponse = { if(engineExecutorContext != this.engineExecutorContext){ this.engineExecutorContext = engineExecutorContext pySession.setEngineExecutorContext(engineExecutorContext) //lineOutputStream.reset(engineExecutorContext) info("Python executor reset new engineExecutorContext!") } engineExecutorContext.appendStdout(s"$getName >> ${code.trim}") pySession.execute(code) //lineOutputStream.flush() SuccessExecuteResponse() } override protected def executeCompletely(engineExecutorContext: EngineExecutorContext, code: String, completedLine: String): ExecuteResponse = { val newcode = completedLine + code info("newcode is " + newcode) executeLine(engineExecutorContext, newcode) } override def kill(): Boolean = true override def pause(): Boolean = true override def resume(): Boolean = true override def progress(): Float = { if (this.engineExecutorContext != null){ this.engineExecutorContext.getCurrentParagraph / this.engineExecutorContext.getTotalParagraph.asInstanceOf[Float] }else 0.0f } override def getProgressInfo: Array[JobProgressInfo] = { val jobProgressInfos = new ArrayBuffer[JobProgressInfo]() jobProgressInfos.toArray Array.empty } override def log(): String = "" override def close(): Unit = { IOUtils.closeQuietly(lineOutputStream) var isKill:Boolean = false try { pySession.close isKill = true; } catch { case e: Throwable => throw new EngineException(60004, "Engine shutdown exception(引擎关闭异常)") } } }
Example 60
Source File: CSVExecutor.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.pipeline.executor import java.io.OutputStream import com.webank.wedatasphere.linkis.common.io.FsPath import com.webank.wedatasphere.linkis.engine.execute.EngineExecutorContext import com.webank.wedatasphere.linkis.engine.pipeline.OutputStreamCache import com.webank.wedatasphere.linkis.engine.pipeline.constant.PipeLineConstant._ import com.webank.wedatasphere.linkis.engine.pipeline.conversions.FsConvertions._ import com.webank.wedatasphere.linkis.engine.pipeline.exception.PipeLineErrorException import com.webank.wedatasphere.linkis.scheduler.executer.ExecuteResponse import com.webank.wedatasphere.linkis.server._ import com.webank.wedatasphere.linkis.storage.FSFactory import com.webank.wedatasphere.linkis.storage.csv.CSVFsWriter import com.webank.wedatasphere.linkis.storage.source.FileSource import org.apache.commons.io.IOUtils class CSVExecutor extends PipeLineExecutor { override def execute(sourcePath: String, destPath: String, engineExecutorContext: EngineExecutorContext): ExecuteResponse = { if (!FileSource.isResultSet(sourcePath)) { throw new PipeLineErrorException(70005, "Not a result set file(不是结果集文件)") } val sourceFsPath = new FsPath(sourcePath) val destFsPath = new FsPath(s"$destPath.$Kind") val sourceFs = FSFactory.getFs(sourceFsPath) sourceFs.init(null) val destFs = FSFactory.getFs(destFsPath) destFs.init(null) val fileSource = FileSource.create(sourceFsPath, sourceFs) if (!FileSource.isTableResultSet(fileSource)) { throw new PipeLineErrorException(70005, "只有table类型的结果集才能转为csv") } var nullValue = options.getOrDefault(PIPELINE_OUTPUT_SHUFFLE_NULL_TYPE, "NULL") if (BLANK.equalsIgnoreCase(nullValue)) nullValue = "" val outputStream: OutputStream = destFs.write(destFsPath, options.get(PIPELINE_OUTPUT_ISOVERWRITE).toBoolean) OutputStreamCache.osCache += engineExecutorContext.getJobId.get -> outputStream val cSVFsWriter = CSVFsWriter.getCSVFSWriter(options.get(PIPELINE_OUTPUT_CHARSET), options.get(PIPELINE_FIELD_SPLIT), outputStream) fileSource.addParams("nullValue", nullValue).write(cSVFsWriter) IOUtils.closeQuietly(cSVFsWriter) IOUtils.closeQuietly(fileSource) IOUtils.closeQuietly(sourceFs) IOUtils.closeQuietly(destFs) super.execute(sourcePath, destPath, engineExecutorContext) } override def Kind: String = "csv" } object CSVExecutor { val csvExecutor = new CSVExecutor def getInstance: PipeLineExecutor = csvExecutor }
Example 61
Source File: ExcelExecutor.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.pipeline.executor import java.io.OutputStream import com.webank.wedatasphere.linkis.common.io.FsPath import com.webank.wedatasphere.linkis.engine.execute.EngineExecutorContext import com.webank.wedatasphere.linkis.engine.pipeline.OutputStreamCache import com.webank.wedatasphere.linkis.engine.pipeline.constant.PipeLineConstant._ import com.webank.wedatasphere.linkis.engine.pipeline.conversions.FsConvertions._ import com.webank.wedatasphere.linkis.engine.pipeline.exception.PipeLineErrorException import com.webank.wedatasphere.linkis.scheduler.executer.ExecuteResponse import com.webank.wedatasphere.linkis.storage.FSFactory import com.webank.wedatasphere.linkis.storage.excel.ExcelFsWriter import com.webank.wedatasphere.linkis.storage.source.FileSource import org.apache.commons.io.IOUtils class ExcelExecutor extends PipeLineExecutor { override def execute(sourcePath: String, destPath: String, engineExecutorContext: EngineExecutorContext): ExecuteResponse = { if (!FileSource.isResultSet(sourcePath)) { throw new PipeLineErrorException(70005, "不是结果集文件") } val sourceFsPath = new FsPath(sourcePath) val destFsPath = new FsPath(s"$destPath.xlsx") val sourceFs = FSFactory.getFs(sourceFsPath) sourceFs.init(null) val destFs = FSFactory.getFs(destFsPath) destFs.init(null) val fileSource = FileSource.create(sourceFsPath, sourceFs) if (!FileSource.isTableResultSet(fileSource)) { throw new PipeLineErrorException(70005, "只有table类型的结果集才能转为excel") } var nullValue = options.getOrDefault(PIPELINE_OUTPUT_SHUFFLE_NULL_TYPE, "NULL") if (BLANK.equalsIgnoreCase(nullValue)) nullValue = "" val outputStream: OutputStream = destFs.write(destFsPath, options.get(PIPELINE_OUTPUT_ISOVERWRITE).toBoolean) val excelFsWriter = ExcelFsWriter.getExcelFsWriter(DEFAULTC_HARSET, DEFAULT_SHEETNAME, DEFAULT_DATEFORMATE, outputStream) import scala.collection.JavaConversions._ OutputStreamCache.osCache += engineExecutorContext.getJobId.get -> outputStream fileSource.addParams("nullValue", nullValue).write(excelFsWriter) IOUtils.closeQuietly(excelFsWriter) IOUtils.closeQuietly(fileSource) IOUtils.closeQuietly(sourceFs) IOUtils.closeQuietly(destFs) super.execute(sourcePath, destPath, engineExecutorContext) } override def Kind: String = "excel" } object ExcelExecutor { val excelExecutor = new ExcelExecutor def getInstance: PipeLineExecutor = excelExecutor }
Example 62
Source File: CopyExecutor.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.engine.pipeline.executor import com.webank.wedatasphere.linkis.common.io.FsPath import com.webank.wedatasphere.linkis.engine.execute.EngineExecutorContext import com.webank.wedatasphere.linkis.engine.pipeline.OutputStreamCache import com.webank.wedatasphere.linkis.engine.pipeline.constant.PipeLineConstant._ import com.webank.wedatasphere.linkis.engine.pipeline.conversions.FsConvertions._ import com.webank.wedatasphere.linkis.scheduler.executer.ExecuteResponse import com.webank.wedatasphere.linkis.server._ import com.webank.wedatasphere.linkis.storage.FSFactory import org.apache.commons.io.IOUtils class CopyExecutor extends PipeLineExecutor { override def execute(sourcePath: String, destPath: String, engineExecutorContext: EngineExecutorContext): ExecuteResponse = { val sourceFsPath = new FsPath(sourcePath) val destFsPath = new FsPath(destPath) val sourceFs = FSFactory.getFs(sourceFsPath) sourceFs.init(null) val destFs = FSFactory.getFs(destFsPath) destFs.init(null) val inputStream = sourceFs.read(sourceFsPath) var isOverWrite = options.get(PIPELINE_OUTPUT_ISOVERWRITE).toBoolean //导出表目前因为是只导出到工作空间,所以别的地方暂时不修改 if (!isOverWrite && !destFs.exists(destFsPath)) { isOverWrite = true } val outputStream = destFs.write(destFsPath, isOverWrite) OutputStreamCache.osCache += engineExecutorContext.getJobId.get -> outputStream IOUtils.copy(inputStream, outputStream) IOUtils.closeQuietly(outputStream) IOUtils.closeQuietly(inputStream) IOUtils.closeQuietly(sourceFs) IOUtils.closeQuietly(destFs) super.execute(sourcePath, destPath, engineExecutorContext) } override def Kind: String = "cp" } object CopyExecutor { val copyExecutor = new CopyExecutor def getInstance: PipeLineExecutor = copyExecutor }
Example 63
Source File: RewriteSwaggerConfigPlugin.scala From matcher with MIT License | 5 votes |
import java.io.{BufferedInputStream, ByteArrayOutputStream} import java.nio.charset.StandardCharsets import java.nio.file.Files import Dependencies.Version import org.apache.commons.compress.archivers.ArchiveStreamFactory import org.apache.commons.io.IOUtils import sbt.Keys._ import sbt._ // See https://github.com/swagger-api/swagger-ui/issues/5710 object RewriteSwaggerConfigPlugin extends AutoPlugin { override val trigger = PluginTrigger.NoTrigger override def projectSettings: Seq[Def.Setting[_]] = inConfig(Compile)( Seq( resourceGenerators += Def.task { val jarName = s"swagger-ui-${Version.swaggerUi}.jar" val indexHtmlPath = s"META-INF/resources/webjars/swagger-ui/${Version.swaggerUi}/index.html" val outputFile = resourceManaged.value / indexHtmlPath val html = (Compile / dependencyClasspath).value .find(_.data.getName == jarName) .flatMap(jar => fileContentFromJar(jar.data, indexHtmlPath)) .map { new String(_, StandardCharsets.UTF_8) } val resource = s"$jarName:$indexHtmlPath" html match { case None => throw new RuntimeException(s"Can't find $resource") case Some(html) => val doc = org.jsoup.parser.Parser.parse(html, "127.0.0.1") import scala.collection.JavaConverters._ doc .body() .children() .asScala .find { el => el.tagName() == "script" && el.html().contains("SwaggerUIBundle") } match { case None => throw new RuntimeException("Can't patch script in index.html") case Some(el) => val update = """ const ui = SwaggerUIBundle({ url: "/api-docs/swagger.json", dom_id: '#swagger-ui', deepLinking: true, presets: [ SwaggerUIBundle.presets.apis ], plugins: [ SwaggerUIBundle.plugins.DownloadUrl ], layout: "BaseLayout", operationsSorter: "alpha" }); window.ui = ui; """ // Careful! ^ will be inserted as one-liner el.text(update) } Files.createDirectories(outputFile.getParentFile.toPath) IO.write(outputFile, doc.outerHtml()) } Seq(outputFile) }.taskValue )) private def fileContentFromJar(jar: File, fileName: String): Option[Array[Byte]] = { val fs = new BufferedInputStream(Files.newInputStream(jar.toPath)) val factory = new ArchiveStreamFactory() val ais = factory.createArchiveInputStream(fs) try Iterator .continually(ais.getNextEntry) .takeWhile(_ != null) .filter(ais.canReadEntryData) .find(_.getName == fileName) .map { _ => val out = new ByteArrayOutputStream() IOUtils.copy(ais, out) out.toByteArray } finally fs.close() } }
Example 64
Source File: AsynchbasePatcher.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.storage.hbase import java.lang.Integer.valueOf import java.nio.charset.StandardCharsets import java.util.concurrent.Callable import net.bytebuddy.ByteBuddy import net.bytebuddy.description.modifier.Visibility.PUBLIC import net.bytebuddy.dynamic.loading.ClassLoadingStrategy import net.bytebuddy.implementation.FieldAccessor import net.bytebuddy.implementation.MethodDelegation.to import net.bytebuddy.implementation.bind.annotation.{SuperCall, This} import net.bytebuddy.matcher.ElementMatchers._ import org.apache.commons.io.IOUtils import org.hbase.async._ import org.objectweb.asm.Opcodes.{ACC_FINAL, ACC_PRIVATE, ACC_PROTECTED, ACC_PUBLIC} import org.objectweb.asm._ import scala.collection.JavaConversions._ private def loadClass(name: String): Class[_] = { classLoader.getResources(s"org/hbase/async/$name.class").toSeq.headOption match { case Some(url) => val stream = url.openStream() val bytes = try { IOUtils.toByteArray(stream) } finally { stream.close() } // patch the bytecode so that the class is no longer final and the methods are all accessible val cw = new ClassWriter(ClassWriter.COMPUTE_FRAMES) new ClassReader(bytes).accept(new ClassAdapter(cw) { override def visit(version: Int, access: Int, name: String, signature: String, superName: String, interfaces: Array[String]): Unit = { super.visit(version, access & ~ACC_FINAL, name, signature, superName, interfaces) } override def visitMethod(access: Int, name: String, desc: String, signature: String, exceptions: Array[String]): MethodVisitor = { super.visitMethod(access & ~ACC_PRIVATE & ~ACC_PROTECTED & ~ACC_FINAL | ACC_PUBLIC, name, desc, signature, exceptions) } }, 0) val patched = cw.toByteArray defineClass.setAccessible(true) defineClass.invoke(classLoader, s"org.hbase.async.$name", patched, valueOf(0), valueOf(patched.length)).asInstanceOf[Class[_]] case None => throw new ClassNotFoundException(s"Could not find Asynchbase class: $name") } } }
Example 65
Source File: InceptionFetcher.scala From incubator-s2graph with Apache License 2.0 | 5 votes |
package org.apache.s2graph.core.fetcher.tensorflow import java.net.URL import java.nio.file.Paths import com.typesafe.config.Config import org.apache.commons.io.IOUtils import org.apache.s2graph.core._ import org.apache.s2graph.core.types.VertexId import scala.concurrent.{ExecutionContext, Future} object InceptionFetcher { val ModelPath = "modelPath" def getImageBytes(urlText: String): Array[Byte] = { val url = new URL(urlText) IOUtils.toByteArray(url) } def predict(graphDef: Array[Byte], labels: Seq[String])(imageBytes: Array[Byte], topK: Int = 10): Seq[(String, Float)] = { try { val image = LabelImage.constructAndExecuteGraphToNormalizeImage(imageBytes) try { val labelProbabilities = LabelImage.executeInceptionGraph(graphDef, image) val topKIndices = labelProbabilities.zipWithIndex.sortBy(_._1).reverse .take(Math.min(labelProbabilities.length, topK)).map(_._2) val ls = topKIndices.map { idx => (labels(idx), labelProbabilities(idx)) } ls } catch { case e: Throwable => Nil } finally if (image != null) image.close() } } } class InceptionFetcher(graph: S2GraphLike) extends EdgeFetcher { import InceptionFetcher._ import scala.collection.JavaConverters._ import org.apache.s2graph.core.TraversalHelper._ val builder = graph.elementBuilder var graphDef: Array[Byte] = _ var labels: Seq[String] = _ override def init(config: Config)(implicit ec: ExecutionContext): Unit = { val modelPath = config.getString(ModelPath) graphDef = LabelImage.readAllBytesOrExit(Paths.get(modelPath, "tensorflow_inception_graph.pb")) labels = LabelImage.readAllLinesOrExit(Paths.get(modelPath, "imagenet_comp_graph_label_strings.txt")).asScala } override def close(): Unit = {} override def fetches(queryRequests: Seq[QueryRequest], prevStepEdges: Map[VertexId, Seq[EdgeWithScore]])(implicit ec: ExecutionContext): Future[Seq[StepResult]] = { val stepResultLs = queryRequests.map { queryRequest => val vertex = queryRequest.vertex val queryParam = queryRequest.queryParam val shouldBuildParents = queryRequest.query.queryOption.returnTree || queryParam.whereHasParent val parentEdges = if (shouldBuildParents) prevStepEdges.getOrElse(queryRequest.vertex.id, Nil) else Nil val urlText = vertex.innerId.toIdString() val edgeWithScores = predict(graphDef, labels)(getImageBytes(urlText), queryParam.limit).flatMap { case (label, score) => val tgtVertexId = builder.newVertexId(queryParam.label.service, queryParam.label.tgtColumnWithDir(queryParam.labelWithDir.dir), label) val props: Map[String, Any] = if (queryParam.label.metaPropsInvMap.contains("score")) Map("score" -> score) else Map.empty val edge = graph.toEdge(vertex.innerId.value, tgtVertexId.innerId.value, queryParam.labelName, queryParam.direction, props = props) edgeToEdgeWithScore(queryRequest, edge, parentEdges) } StepResult(edgeWithScores, Nil, Nil) } Future.successful(stepResultLs) } override def fetchEdgesAll()(implicit ec: ExecutionContext): Future[Seq[S2EdgeLike]] = Future.successful(Nil) }
Example 66
Source File: TestSpec.scala From spark-distcp with Apache License 2.0 | 5 votes |
package com.coxautodata import java.io.ByteArrayInputStream import java.nio.file.Files import com.coxautodata.objects.SerializableFileStatus import com.coxautodata.utils.FileListing import org.apache.commons.io.{FileUtils, IOUtils} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, LocalFileSystem, Path} import org.scalatest.{BeforeAndAfterEach, FunSpec, Matchers} trait TestSpec extends FunSpec with Matchers with BeforeAndAfterEach { var testingBaseDir: java.nio.file.Path = _ var testingBaseDirName: String = _ var testingBaseDirPath: Path = _ var localFileSystem: LocalFileSystem = _ override def beforeEach(): Unit = { super.beforeEach() testingBaseDir = Files.createTempDirectory("test_output") testingBaseDirName = testingBaseDir.toString localFileSystem = FileSystem.getLocal(new Configuration()) testingBaseDirPath = localFileSystem.makeQualified(new Path(testingBaseDirName)) } override def afterEach(): Unit = { super.afterEach() FileUtils.deleteDirectory(testingBaseDir.toFile) } def createFile(relativePath: Path, content: Array[Byte]): SerializableFileStatus = { val path = new Path(testingBaseDirPath, relativePath) localFileSystem.mkdirs(path.getParent) val in = new ByteArrayInputStream(content) val out = localFileSystem.create(path) IOUtils.copy(in, out) in.close() out.close() SerializableFileStatus(localFileSystem.getFileStatus(path)) } def fileStatusToResult(f: SerializableFileStatus): FileListing = { FileListing(f.getPath.toString, if (f.isFile) Some(f.getLen) else None) } }
Example 67
Source File: Zip$Test.scala From mystem-scala with MIT License | 5 votes |
package ru.stachek66.tools import java.io.{File, FileInputStream} import org.apache.commons.io.IOUtils import org.scalatest.FunSuite import org.scalatest.junit.JUnitRunner import org.junit.runner.RunWith class Zip$Test extends FunSuite { test("zip-test") { val src = new File("src/test/resources/test.txt") Zip.unpack( new File("src/test/resources/test.zip"), new File("src/test/resources/res.txt")) match { case f => val content0 = IOUtils.toString(new FileInputStream(f)) val content1 = IOUtils.toString(new FileInputStream(src)) print(content0.trim + " vs " + content1.trim) assert(content0 === content1) } } }
Example 68
Source File: Decompressor.scala From mystem-scala with MIT License | 5 votes |
package ru.stachek66.tools import java.io.{IOException, File, FileOutputStream} import org.apache.commons.compress.archivers.ArchiveInputStream import org.apache.commons.io.IOUtils import ru.stachek66.nlp.mystem.Properties trait Decompressor { def traditionalExtension: String def unpack(src: File, dst: File): File @throws(classOf[IOException]) private[tools] def copyUncompressedAndClose(stream: ArchiveInputStream, dest: File): File = { // must be read val entry = stream.getNextEntry if (entry.isDirectory) throw new IOException("Decompressed entry is a directory (unexpectedly)") val os = new FileOutputStream(dest) try { IOUtils.copy(stream, os) } finally { os.close() stream.close() } dest } } object Decompressor { def select: Decompressor = if (Properties.CurrentOs.contains("win")) Zip else TarGz }
Example 69
Source File: TPCDS_2_4_Queries.scala From spark-sql-perf with Apache License 2.0 | 5 votes |
package com.databricks.spark.sql.perf.tpcds import org.apache.commons.io.IOUtils import com.databricks.spark.sql.perf.{Benchmark, ExecutionMode, Query} trait Tpcds_2_4_Queries extends Benchmark { import ExecutionMode._ val queryNames = Seq( "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14a", "q14b", "q15", "q16", "q17", "q18", "q19", "q20", "q21", "q22", "q23a", "q23b", "q24a", "q24b", "q25", "q26", "q27", "q28", "q29", "q30", "q31", "q32", "q33", "q34", "q35", "q36", "q37", "q38", "q39a", "q39b", "q40", "q41", "q42", "q43", "q44", "q45", "q46", "q47", "q48", "q49", "q50", "q51", "q52", "q53", "q54", "q55", "q56", "q57", "q58", "q59", "q60", "q61", "q62", "q63", "q64", "q65", "q66", "q67", "q68", "q69", "q70", "q71", "q72", "q73", "q74", "q75", "q76", "q77", "q78", "q79", "q80", "q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90", "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99", "ss_max" ) val tpcds2_4Queries = queryNames.map { queryName => val queryContent: String = IOUtils.toString( getClass().getClassLoader().getResourceAsStream(s"tpcds_2_4/$queryName.sql")) Query(queryName + "-v2.4", queryContent, description = "TPCDS 2.4 Query", executionMode = CollectResults) } val tpcds2_4QueriesMap = tpcds2_4Queries.map(q => q.name.split("-").get(0) -> q).toMap }
Example 70
Source File: NettyBlockTransferSecuritySuite.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.network.netty import java.nio._ import java.util.concurrent.TimeUnit import scala.concurrent.duration._ import scala.concurrent.{Await, Promise} import scala.util.{Failure, Success, Try} import org.apache.commons.io.IOUtils import org.apache.spark.network.buffer.{ManagedBuffer, NioManagedBuffer} import org.apache.spark.network.shuffle.BlockFetchingListener import org.apache.spark.network.{BlockDataManager, BlockTransferService} import org.apache.spark.storage.{BlockId, ShuffleBlockId} import org.apache.spark.{SecurityManager, SparkConf} import org.mockito.Mockito._ import org.scalatest.mock.MockitoSugar import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, FunSuite, ShouldMatchers} class NettyBlockTransferSecuritySuite extends FunSuite with MockitoSugar with ShouldMatchers { test("security default off") { val conf = new SparkConf() .set("spark.app.id", "app-id") testConnection(conf, conf) match { case Success(_) => // expected case Failure(t) => fail(t) } } test("security on same password") { val conf = new SparkConf() .set("spark.authenticate", "true") .set("spark.authenticate.secret", "good") .set("spark.app.id", "app-id") testConnection(conf, conf) match { case Success(_) => // expected case Failure(t) => fail(t) } } test("security on mismatch password") { val conf0 = new SparkConf() .set("spark.authenticate", "true") .set("spark.authenticate.secret", "good") .set("spark.app.id", "app-id") val conf1 = conf0.clone.set("spark.authenticate.secret", "bad") testConnection(conf0, conf1) match { case Success(_) => fail("Should have failed") case Failure(t) => t.getMessage should include ("Mismatched response") } } test("security mismatch auth off on server") { val conf0 = new SparkConf() .set("spark.authenticate", "true") .set("spark.authenticate.secret", "good") .set("spark.app.id", "app-id") val conf1 = conf0.clone.set("spark.authenticate", "false") testConnection(conf0, conf1) match { case Success(_) => fail("Should have failed") case Failure(t) => // any funny error may occur, sever will interpret SASL token as RPC } } test("security mismatch auth off on client") { val conf0 = new SparkConf() .set("spark.authenticate", "false") .set("spark.authenticate.secret", "good") .set("spark.app.id", "app-id") val conf1 = conf0.clone.set("spark.authenticate", "true") testConnection(conf0, conf1) match { case Success(_) => fail("Should have failed") case Failure(t) => t.getMessage should include ("Expected SaslMessage") } } private def fetchBlock( self: BlockTransferService, from: BlockTransferService, execId: String, blockId: BlockId): Try[ManagedBuffer] = { val promise = Promise[ManagedBuffer]() self.fetchBlocks(from.hostName, from.port, execId, Array(blockId.toString), new BlockFetchingListener { override def onBlockFetchFailure(blockId: String, exception: Throwable): Unit = { promise.failure(exception) } override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = { promise.success(data.retain()) } }) Await.ready(promise.future, FiniteDuration(1000, TimeUnit.MILLISECONDS)) promise.future.value.get } }
Example 71
Source File: StreamMetadata.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import java.io.{InputStreamReader, OutputStreamWriter} import java.nio.charset.StandardCharsets import scala.util.control.NonFatal import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, FSDataOutputStream, Path} import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.apache.spark.internal.Logging import org.apache.spark.sql.streaming.StreamingQuery def write( metadata: StreamMetadata, metadataFile: Path, hadoopConf: Configuration): Unit = { var output: FSDataOutputStream = null try { val fs = FileSystem.get(hadoopConf) output = fs.create(metadataFile) val writer = new OutputStreamWriter(output) Serialization.write(metadata, writer) writer.close() } catch { case NonFatal(e) => logError(s"Error writing stream metadata $metadata to $metadataFile", e) throw e } finally { IOUtils.closeQuietly(output) } } }
Example 72
Source File: AppMasterResolver.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.experiments.yarn.client import java.io.IOException import java.net.{HttpURLConnection, URL} import java.nio.charset.StandardCharsets import akka.actor.{ActorRef, ActorSystem} import org.apache.commons.io.IOUtils import org.apache.gearpump.experiments.yarn.glue.Records.{ApplicationId, ApplicationReport} import org.apache.gearpump.experiments.yarn.glue.YarnClient import org.apache.gearpump.util.{AkkaHelper, LogUtil} import org.apache.hadoop.hdfs.web.URLConnectionFactory import org.apache.hadoop.yarn.conf.YarnConfiguration import scala.util.Try class AppMasterResolver(yarnClient: YarnClient, system: ActorSystem) { val LOG = LogUtil.getLogger(getClass) val RETRY_INTERVAL_MS = 3000 // ms def resolve(appId: ApplicationId, timeoutSeconds: Int = 30): ActorRef = { val appMaster = retry(connect(appId), 1 + timeoutSeconds * 1000 / RETRY_INTERVAL_MS) appMaster } private def connect(appId: ApplicationId): ActorRef = { val report = yarnClient.getApplicationReport(appId) AppMasterResolver.resolveAppMasterAddress(report, system) } private def retry(fun: => ActorRef, times: Int): ActorRef = { var index = 0 var result: ActorRef = null while (index < times && result == null) { Thread.sleep(RETRY_INTERVAL_MS) index += 1 val tryConnect = Try(fun) if (tryConnect.isFailure) { LOG.error(s"Failed to connect YarnAppMaster(tried $index)... " + tryConnect.failed.get.getMessage) } else { result = tryConnect.get } } result } } object AppMasterResolver { val LOG = LogUtil.getLogger(getClass) def resolveAppMasterAddress(report: ApplicationReport, system: ActorSystem): ActorRef = { val appMasterPath = s"${report.getTrackingURL}/supervisor-actor-path" LOG.info(s"appMasterPath=$appMasterPath") val connectionFactory: URLConnectionFactory = URLConnectionFactory .newDefaultURLConnectionFactory(new YarnConfiguration()) val url: URL = new URL(appMasterPath) val connection: HttpURLConnection = connectionFactory.openConnection(url) .asInstanceOf[HttpURLConnection] connection.setInstanceFollowRedirects(true) try { connection.connect() } catch { case e: IOException => LOG.error(s"Failed to connect to AppMaster" + e.getMessage) } val status = connection.getResponseCode if (status == 200) { val stream: java.io.InputStream = connection.getInputStream val response = IOUtils.toString(stream, StandardCharsets.UTF_8) LOG.info("Successfully resolved AppMaster address: " + response) connection.disconnect() AkkaHelper.actorFor(system, response) } else { connection.disconnect() throw new IOException("Fail to resolve AppMaster address, please make sure " + s"${report.getTrackingURL} is accessible...") } } }
Example 73
Source File: FileMerger.scala From hyperion with Apache License 2.0 | 5 votes |
package com.krux.hyperion.contrib.activity.file import java.io._ import java.util.zip.{ GZIPInputStream, GZIPOutputStream } import org.apache.commons.compress.compressors.bzip2.{ BZip2CompressorInputStream, BZip2CompressorOutputStream } import org.apache.commons.io.IOUtils case class FileMerger(destination: File, skipFirstLine: Boolean = false, headers: Option[String] = None) { def merge(sources: File*): File = { val output: OutputStream = new BufferedOutputStream({ val s = new FileOutputStream(destination, true) if (destination.getName.endsWith(".gz")) new GZIPOutputStream(s) else if(destination.getName.endsWith(".bz2")) new BZip2CompressorOutputStream(s) else s }) try { sources.foldLeft(headers -> output)(appendFile) destination } finally { try { output.close() } catch { case e: Exception => e.printStackTrace() } } } private def doSkipFirstLine(input: InputStream): InputStream = { while (skipFirstLine && (input.read() match { case -1 | '\n' => false case _ => true })) {} input } private def appendFile(state: (Option[String], OutputStream), source: File): (Option[String], OutputStream) = { val (headers, output) = state if (source.getName == "-") { print("Merging stdin...") headers.map(_.getBytes).foreach(output.write) IOUtils.copy(doSkipFirstLine(System.in), output) println("done") None -> output } else if (source.length() > 0) { print(s"Merging ${source.getAbsolutePath}...") val input = new BufferedInputStream({ val s = new FileInputStream(source) if (source.getName.endsWith(".gz")) new GZIPInputStream(s) else if(source.getName.endsWith(".bz2")) new BZip2CompressorInputStream(s) else s }) try { input.mark(2) if (input.read() != -1) { input.reset() headers.map(_.getBytes).foreach(output.write) IOUtils.copy(doSkipFirstLine(input), output) } } finally { try { input.close() } catch { case e: Exception => e.printStackTrace() } finally { println("done") } } None -> output } else { headers -> output } } }
Example 74
Source File: TikaHadoopOrcParser.scala From project-matt with MIT License | 5 votes |
package org.datafy.aws.app.matt.extras import java.io.{File, FileOutputStream, IOException, InputStream} import java.util import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import scala.collection.JavaConverters._ import org.apache.hadoop.fs.Path import org.apache.hadoop.hive.serde2.objectinspector.StructField import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector import org.apache.orc.OrcFile import org.apache.orc.OrcFile.ReaderOptions import org.apache.orc.Reader import org.apache.orc.RecordReader import org.apache.tika.exception.TikaException import org.apache.tika.metadata.Metadata import org.apache.tika.mime.MediaType import org.apache.tika.parser.{AbstractParser, ParseContext} import org.xml.sax.{ContentHandler, SAXException} import scala.util.Random class TikaHadoopOrcParser extends AbstractParser { final val ORC_RAW = MediaType.application("x-orc") private val SUPPORTED_TYPES: Set[MediaType] = Set(ORC_RAW) def getSupportedTypes(context: ParseContext): util.Set[MediaType] = { SUPPORTED_TYPES.asJava } @throws(classOf[IOException]) @throws(classOf[SAXException]) @throws(classOf[TikaException]) def parse(stream: InputStream, handler: ContentHandler, metadata: Metadata, context: ParseContext): Unit = { // create temp file from stream try { val fileNamePrefix = Random.alphanumeric.take(5).mkString val tempFile = File.createTempFile(s"orc-${fileNamePrefix}", ".orc") IOUtils.copy(stream, new FileOutputStream(tempFile)) val path = new Path(tempFile.getAbsolutePath) val conf = new Configuration() val orcReader = OrcFile.createReader(path, new ReaderOptions(conf)) val records: RecordReader = orcReader.rows() val storeRecord = null val firstBlockKey = null } catch { case e: Throwable => e.printStackTrace() } // val fields = } }
Example 75
Source File: TikaParquetParser.scala From project-matt with MIT License | 5 votes |
package org.datafy.aws.app.matt.extras import java.io.{File, FileOutputStream, IOException, InputStream} import java.util import scala.collection.JavaConverters._ import org.xml.sax.{ContentHandler, SAXException} import org.apache.tika.metadata.Metadata import org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE import org.apache.tika.mime.MediaType import org.apache.tika.parser.{AbstractParser, ParseContext} import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.parquet.hadoop.ParquetFileReader import org.apache.parquet.hadoop.ParquetReader import org.apache.parquet.format.converter.ParquetMetadataConverter import org.apache.parquet.hadoop.util.HadoopInputFile import org.apache.parquet.tools.json.JsonRecordFormatter import org.apache.parquet.tools.read.{SimpleReadSupport, SimpleRecord} import org.apache.tika.exception.TikaException import org.apache.tika.sax.XHTMLContentHandler import scala.util.Random class TikaParquetParser extends AbstractParser { // make some stuff here final val PARQUET_RAW = MediaType.application("x-parquet") private val SUPPORTED_TYPES: Set[MediaType] = Set(PARQUET_RAW) def getSupportedTypes(context: ParseContext): util.Set[MediaType] = { SUPPORTED_TYPES.asJava } @throws(classOf[IOException]) @throws(classOf[SAXException]) @throws(classOf[TikaException]) def parse(stream: InputStream, handler: ContentHandler, metadata: Metadata, context: ParseContext): Unit = { // create temp file from stream val fileNamePrefix = Random.alphanumeric.take(5).mkString val tempFile = File.createTempFile(s"parquet-${fileNamePrefix}", ".parquet") IOUtils.copy(stream, new FileOutputStream(tempFile)) val conf = new Configuration() val path = new Path(tempFile.getAbsolutePath) val parquetMetadata = ParquetFileReader.readFooter(conf, path, ParquetMetadataConverter.NO_FILTER) var defaultReader: ParquetReader[SimpleRecord] = null val columns = parquetMetadata.getFileMetaData.getSchema.getFields metadata.set(CONTENT_TYPE, PARQUET_RAW.toString) metadata.set("Total Number of Columns", columns.size.toString) metadata.set("Parquet Column Names", columns.toString) val xhtml = new XHTMLContentHandler(handler, metadata) xhtml.startDocument() xhtml.startElement("p") // ::TODO:: ensure parquet reader reads all files not only file row try { defaultReader = ParquetReader.builder(new SimpleReadSupport(), new Path(tempFile.getAbsolutePath)).build() if(defaultReader.read() != null) { val values: SimpleRecord = defaultReader.read() val jsonFormatter = JsonRecordFormatter.fromSchema(parquetMetadata.getFileMetaData.getSchema) val textContent: String = jsonFormatter.formatRecord(values) xhtml.characters(textContent) xhtml.endElement("p") xhtml.endDocument() } } catch { case e: Throwable => e.printStackTrace() if (defaultReader != null) { try { defaultReader.close() } catch{ case _: Throwable => } } } finally { if (tempFile != null) tempFile.delete() } } }
Example 76
Source File: WholeFileReader.scala From magellan with Apache License 2.0 | 5 votes |
package magellan.mapreduce import java.io.InputStream import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FSDataInputStream, FileSystem, Path} import org.apache.hadoop.io.compress.{CodecPool, CompressionCodecFactory, Decompressor} import org.apache.hadoop.io.{NullWritable, Text} import org.apache.hadoop.mapreduce.lib.input.FileSplit import org.apache.hadoop.mapreduce.{InputSplit, RecordReader, TaskAttemptContext} class WholeFileReader extends RecordReader[NullWritable, Text] { private val key = NullWritable.get() private val value = new Text() private var split: FileSplit = _ private var conf: Configuration = _ private var path: Path = _ private var done: Boolean = false override def getProgress: Float = ??? override def nextKeyValue(): Boolean = { if (done){ false } else { val fs = path.getFileSystem(conf) var is: FSDataInputStream = null var in: InputStream = null var decompressor: Decompressor = null try { is = fs.open(split.getPath) val codec = new CompressionCodecFactory(conf).getCodec(path) if (codec != null) { decompressor = CodecPool.getDecompressor(codec) in = codec.createInputStream(is, decompressor) } else { in = is } val result = IOUtils.toByteArray(in) value.clear() value.set(result) done = true true } finally { if (in != null) { IOUtils.closeQuietly(in) } if (decompressor != null) { CodecPool.returnDecompressor(decompressor) } } } } override def getCurrentValue: Text = value override def initialize(inputSplit: InputSplit, taskAttemptContext: TaskAttemptContext): Unit = { this.split = inputSplit.asInstanceOf[FileSplit] this.conf = MapReduceUtils.getConfigurationFromContext(taskAttemptContext) this.path = this.split.getPath } override def getCurrentKey: NullWritable = key override def close() {} }
Example 77
Source File: FlywayMigrationsSpec.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.platform.store import java.math.BigInteger import java.nio.charset.Charset import java.security.MessageDigest import com.daml.platform.store.FlywayMigrationsSpec._ import org.apache.commons.io.IOUtils import org.flywaydb.core.api.configuration.FluentConfiguration import org.flywaydb.core.api.migration.JavaMigration import org.flywaydb.core.internal.resource.LoadableResource import org.flywaydb.core.internal.scanner.{LocationScannerCache, ResourceNameCache, Scanner} import org.scalatest.Matchers._ import org.scalatest.WordSpec import scala.collection.JavaConverters._ // SQL MIGRATION AND THEIR DIGEST FILES SHOULD BE CREATED ONLY ONCE AND NEVER CHANGED AGAIN, // OTHERWISE MIGRATIONS BREAK ON EXISTING DEPLOYMENTS! class FlywayMigrationsSpec extends WordSpec { "Postgres flyway migration files" should { "always have a valid SHA-256 digest file accompanied" in { assertFlywayMigrationFileHashes(DbType.Postgres) } } "H2 database flyway migration files" should { "always have a valid SHA-256 digest file accompanied" in { assertFlywayMigrationFileHashes(DbType.H2Database) } } } object FlywayMigrationsSpec { private val digester = MessageDigest.getInstance("SHA-256") private def assertFlywayMigrationFileHashes(dbType: DbType): Unit = { val config = FlywayMigrations.configurationBase(dbType) val resourceScanner = scanner(config) val resources = resourceScanner.getResources("", ".sql").asScala.toSeq resources.size should be > 10 resources.foreach { res => val fileName = res.getFilename val expectedDigest = getExpectedDigest(fileName, fileName.dropRight(4) + ".sha256", resourceScanner) val currentDigest = getCurrentDigest(res, config.getEncoding) assert( currentDigest == expectedDigest, s"Digest of migration file $fileName has changed! It is NOT allowed to change neither existing sql migrations files nor their digests!" ) } } private def scanner(config: FluentConfiguration) = new Scanner( classOf[JavaMigration], config.getLocations.toList.asJava, getClass.getClassLoader, config.getEncoding, new ResourceNameCache, new LocationScannerCache, ) private def getExpectedDigest( sourceFile: String, digestFile: String, resourceScanner: Scanner[_], ) = IOUtils.toString( Option(resourceScanner.getResource(digestFile)) .getOrElse(sys.error(s"""Missing sha-256 file $digestFile! |Are you introducing a new Flyway migration step? |You need to create a sha-256 digest file by either running: | - shasum -a 256 $sourceFile | awk '{print $$1}' > $digestFile (under the db/migration folder) | - or ledger/sandbox/src/main/resources/db/migration/recompute-sha256sums.sh |""".stripMargin)) .read()) private def getCurrentDigest(res: LoadableResource, encoding: Charset) = { val digest = digester.digest(IOUtils.toByteArray(res.read(), encoding)) String.format(s"%0${digest.length * 2}x\n", new BigInteger(1, digest)) } }
Example 78
Source File: FrameReader.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.runtime.serialization import java.io._ import java.nio.charset.Charset import ml.combust.mleap.ClassLoaderUtil import ml.combust.mleap.runtime.frame.DefaultLeapFrame import org.apache.commons.io.IOUtils import resource._ import scala.util.Try object FrameReader { def apply(format: String = BuiltinFormats.json, clOption: Option[ClassLoader] = None): FrameReader = { val cl = clOption.getOrElse(ClassLoaderUtil.findClassLoader(classOf[FrameReader].getCanonicalName)) cl.loadClass(s"$format.DefaultFrameReader"). newInstance(). asInstanceOf[FrameReader] } } trait FrameReader { def fromBytes(bytes: Array[Byte], charset: Charset = BuiltinFormats.charset): Try[DefaultLeapFrame] def read(file: File): Try[DefaultLeapFrame] = read(file, BuiltinFormats.charset) def read(file: File, charset: Charset): Try[DefaultLeapFrame] = { (for(in <- managed(new FileInputStream(file))) yield { read(in, charset) }).tried.flatMap(identity) } def read(in: InputStream): Try[DefaultLeapFrame] = read(in, BuiltinFormats.charset) def read(in: InputStream, charset: Charset): Try[DefaultLeapFrame] = { Try(IOUtils.toByteArray(in)).flatMap(bytes => fromBytes(bytes, charset)) } }
Example 79
Source File: InstallRouteMgmt.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.standalone import java.io.File import akka.http.scaladsl.model.Uri import org.apache.commons.io.{FileUtils, IOUtils} import org.apache.openwhisk.common.TransactionId.systemPrefix import org.apache.openwhisk.common.{Logging, TransactionId} import scala.sys.process.ProcessLogger import scala.util.Try import scala.sys.process._ case class InstallRouteMgmt(workDir: File, authKey: String, apiHost: Uri, namespace: String, gatewayUrl: Uri, wsk: String)(implicit log: Logging) { case class Action(name: String, desc: String) private val noopLogger = ProcessLogger(_ => ()) private implicit val tid: TransactionId = TransactionId(systemPrefix + "apiMgmt") val actionNames = Array( Action("createApi", "Create an API"), Action("deleteApi", "Delete the API"), Action("getApi", "Retrieve the specified API configuration (in JSON format)")) def run(): Unit = { require(wskExists, s"wsk command not found at $wsk. Route management actions cannot be installed") log.info(this, packageUpdateCmd.!!.trim) //TODO Optimize to ignore this if package already installed actionNames.foreach { action => val name = action.name val actionZip = new File(workDir, s"$name.zip") FileUtils.copyURLToFile(IOUtils.resourceToURL(s"/$name.zip"), actionZip) val cmd = createActionUpdateCmd(action, name, actionZip) val result = cmd.!!.trim log.info(this, s"Installed $name - $result") FileUtils.deleteQuietly(actionZip) } //This log message is used by tests to confirm that actions are installed log.info(this, "Installed Route Management Actions") } private def createActionUpdateCmd(action: Action, name: String, actionZip: File) = { Seq( wsk, "--apihost", apiHost.toString(), "--auth", authKey, "action", "update", s"$namespace/apimgmt/$name", actionZip.getAbsolutePath, "-a", "description", action.desc, "--kind", "nodejs:default", "-a", "web-export", "true", "-a", "final", "true") } private def packageUpdateCmd = { Seq( wsk, "--apihost", apiHost.toString(), "--auth", authKey, "package", "update", s"$namespace/apimgmt", "--shared", "no", "-a", "description", "This package manages the gateway API configuration.", "-p", "gwUrlV2", gatewayUrl.toString()) } def wskExists: Boolean = Try(s"$wsk property get --cliversion".!(noopLogger)).getOrElse(-1) == 0 }
Example 80
Source File: UtilsTest.scala From spark-http-stream with BSD 2-Clause "Simplified" License | 5 votes |
import java.sql.Date import org.apache.spark.SparkConf import org.apache.spark.serializer.KryoSerializer import org.apache.spark.sql.SparkSession import org.junit.Assert import org.junit.Test import java.io.ByteArrayOutputStream import java.io.InputStream import org.apache.commons.io.IOUtils import com.esotericsoftware.kryo.io.Input import org.apache.spark.sql.execution.streaming.http.KryoSerializerUtils class UtilsTest { @Test def testKryoSerDe() { val d1 = new Date(30000); val bytes = KryoSerializerUtils.serialize(d1); val d2 = KryoSerializerUtils.deserialize(bytes); Assert.assertEquals(d1, d2); val d3 = Map('x' -> Array("aaa", "bbb"), 'y' -> Array("ccc", "ddd")); println(d3); val bytes2 = KryoSerializerUtils.serialize(d3); val d4 = KryoSerializerUtils.deserialize(bytes2).asInstanceOf[Map[String, Any]]; println(d4); } @Test def testEncoderSchema() { val spark = SparkSession.builder.master("local[4]") .getOrCreate(); val sqlContext = spark.sqlContext; import sqlContext.implicits._ import org.apache.spark.sql.catalyst.encoders.encoderFor val schema1 = encoderFor[String].schema; val schema2 = encoderFor[(String)].schema; val schema3 = encoderFor[((String))].schema; Assert.assertEquals(schema1, schema2); Assert.assertEquals(schema1, schema3); } @Test def testDateInTuple() { val spark = SparkSession.builder.master("local[4]") .getOrCreate(); val sqlContext = spark.sqlContext; import sqlContext.implicits._ val d1 = new Date(30000); val ds = sqlContext.createDataset(Seq[(Int, Date)]((1, d1))); val d2 = ds.collect()(0)._2; //NOTE: d1!=d2, maybe a bug println(d1.equals(d2)); } }
Example 81
Source File: utils.scala From spark-http-stream with BSD 2-Clause "Simplified" License | 5 votes |
package org.apache.spark.sql.execution.streaming.http import org.apache.spark.sql.types.StructField import org.apache.spark.sql.types.StructType import org.apache.spark.sql.types.TimestampType import org.apache.spark.SparkConf import org.apache.commons.io.IOUtils import org.apache.spark.serializer.KryoSerializer import java.io.InputStream import com.esotericsoftware.kryo.io.Input import java.io.ByteArrayOutputStream class WrongArgumentException(name: String, value: Any) extends RuntimeException(s"wrong argument: $name=$value") { } class MissingRequiredArgumentException(map: Map[String, String], paramName: String) extends RuntimeException(s"missing required argument: $paramName, all parameters=$map") { } class InvalidSerializerNameException(serializerName: String) extends RuntimeException(s"invalid serializer name: $serializerName") { } object SchemaUtils { def buildSchema(schema: StructType, includesTimestamp: Boolean, timestampColumnName: String = "_TIMESTAMP_"): StructType = { if (!includesTimestamp) schema; else StructType(schema.fields.toSeq :+ StructField(timestampColumnName, TimestampType, false)); } } object Params { def deserialize(bytes: Array[Byte]): Any = { val kryo = kryoSerializer.newKryo(); val input = new Input(); input.setBuffer(bytes); kryo.readClassAndObject(input); } }
Example 82
Source File: CortexTypeGeneratorTest.scala From rug with GNU General Public License v3.0 | 5 votes |
package com.atomist.rug.ts import com.atomist.rug.spi.EnumParameterOrReturnType import com.atomist.source.ArtifactSource import com.atomist.util.Utils import org.apache.commons.io.IOUtils import org.scalatest.{FlatSpec, Matchers} class CortexTypeGeneratorTest extends FlatSpec with Matchers { import CortexTypeGenerator._ import DefaultTypeGeneratorConfig.CortexJson private val typeGen = new CortexTypeGenerator(DefaultCortexDir, DefaultCortexStubDir) "Type generation" should "find some types" in { val types = typeGen.extract(CortexJson) assert(types.nonEmpty) } it should "handle enums" in { val enumJson = Utils.withCloseable(getClass.getResourceAsStream("/com/atomist/rug/ts/enum_test.json"))(IOUtils.toString(_, "UTF-8")) val types = typeGen.extract(enumJson) assert(types.size === 1) val t = types.head t.allOperations.last.returnType shouldBe a[EnumParameterOrReturnType] } it should "return types with operations" in { val types = typeGen.extract(CortexJson) types.foreach(t => { assert(t.operations.nonEmpty, s"Type ${t.name} should have operations") }) } import com.atomist.rug.TestUtils._ it should "generate compiling node module" in { val extendedModel = typeGen.toNodeModule(CortexJson) .withPathAbove(".atomist/rug") //println(ArtifactSourceUtils.prettyListFiles(as)) // extendedModel.allFiles.filter(_.name.endsWith(".ts")).foreach(f => // println(s"${f.path}\n${f.content}\n\n")) //println(ArtifactSourceUtils.prettyListFiles(cas)) val buildFile = extendedModel.allFiles.find(f => f.name.endsWith("Build.ts")).get assert(buildFile.content.contains("pullRequestNumber"), s"Unexpected Build file content\n${buildFile.content}") failOnFindingPattern(extendedModel, "Badly formatted exports", """export \{ [A-Za-z0-9]+ \};\n\nexport""".r, _.path.endsWith(".ts")) assert(extendedModel.allFiles.exists(_.name.endsWith("ChatChannel.ts"))) assert(extendedModel.allFiles.exists(_.content.contains("Repo[]"))) assert(extendedModel.allFiles.exists(_.content.contains("Issue[]")), "Must have back relationship from Repo to Issue") val buildStubFile = extendedModel.allFiles.find(f => f.content.contains("class Build ")).get assert(buildStubFile.content.contains("""[ "Build", "-dynamic""""), "We should have correct node tags") val cas = TypeScriptBuilder.compiler.compile(extendedModel + TypeScriptBuilder.compileUserModel(Seq( TypeScriptBuilder.coreSource, extendedModel ))) assert(cas.allFiles.exists(_.name.endsWith("ChatChannel.js")), "Should have compiled") } } object CortexTypeGeneratorTest { private val typeGen = new CortexTypeGenerator(CortexTypeGenerator.DefaultCortexDir, CortexTypeGenerator.DefaultCortexStubDir) val fullModel: ArtifactSource = { val as = typeGen.toNodeModule(DefaultTypeGeneratorConfig.CortexJson) .withPathAbove(".atomist/rug") TypeScriptBuilder.compiler.compile(as + TypeScriptBuilder.compileUserModel(Seq( TypeScriptBuilder.coreSource, as ))) } }
Example 83
Source File: AntlrRawFileType.scala From rug with GNU General Public License v3.0 | 5 votes |
package com.atomist.rug.kind.grammar import java.nio.charset.StandardCharsets import com.atomist.source.FileArtifact import com.atomist.tree.content.text.PositionedTreeNode import com.atomist.tree.content.text.grammar.antlr.{AntlrGrammar, AstNodeCreationStrategy} import com.atomist.util.Utils.withCloseable import org.apache.commons.io.IOUtils import org.springframework.core.io.DefaultResourceLoader abstract class AntlrRawFileType( topLevelProduction: String, nodeCreationStrategy: AstNodeCreationStrategy, grammars: String* ) extends TypeUnderFile { private val g4s: Seq[String] = { val cp = new DefaultResourceLoader() val resources = grammars.map(grammar => cp.getResource(grammar)) resources.map(r => withCloseable(r.getInputStream)(is => IOUtils.toString(is, StandardCharsets.UTF_8))) } private[kind] def parser = antlrGrammar private lazy val antlrGrammar = new AntlrGrammar(topLevelProduction, nodeCreationStrategy, g4s: _*) override def fileToRawNode(f: FileArtifact): Option[PositionedTreeNode] = { antlrGrammar.parse(f.content) } }
Example 84
Source File: StreamMetadata.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import java.io.{InputStreamReader, OutputStreamWriter} import java.nio.charset.StandardCharsets import java.util.ConcurrentModificationException import scala.util.control.NonFatal import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileAlreadyExistsException, FSDataInputStream, Path} import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.apache.spark.internal.Logging import org.apache.spark.sql.execution.streaming.CheckpointFileManager.CancellableFSDataOutputStream import org.apache.spark.sql.streaming.StreamingQuery def write( metadata: StreamMetadata, metadataFile: Path, hadoopConf: Configuration): Unit = { var output: CancellableFSDataOutputStream = null try { val fileManager = CheckpointFileManager.create(metadataFile.getParent, hadoopConf) output = fileManager.createAtomic(metadataFile, overwriteIfPossible = false) val writer = new OutputStreamWriter(output) Serialization.write(metadata, writer) writer.close() } catch { case e: FileAlreadyExistsException => if (output != null) { output.cancel() } throw new ConcurrentModificationException( s"Multiple streaming queries are concurrently using $metadataFile", e) case e: Throwable => if (output != null) { output.cancel() } logError(s"Error writing stream metadata $metadata to $metadataFile", e) throw e } } }
Example 85
Source File: TestUtils.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.utils import java.io.{FileReader, ByteArrayInputStream} import breeze.linalg.DenseMatrix import breeze.stats.distributions.{Gaussian, RandBasis, ThreadLocalRandomGenerator, Rand} import edu.berkeley.cs.amplab.mlmatrix.RowPartitionedMatrix import org.apache.commons.io.IOUtils import org.apache.commons.math3.random.MersenneTwister import org.apache.spark.SparkContext import scala.io.Source import scala.util.Random def genChannelMajorArrayVectorizedImage(x: Int, y: Int, z: Int): ChannelMajorArrayVectorizedImage = { ChannelMajorArrayVectorizedImage(genData(x, y, z), ImageMetadata(x,y,z)) } def genRowColumnMajorByteArrayVectorizedImage(x: Int, y: Int, z: Int): RowColumnMajorByteArrayVectorizedImage = { RowColumnMajorByteArrayVectorizedImage(genData(x,y,z).map(_.toByte), ImageMetadata(x,y,z)) } def createRandomMatrix( sc: SparkContext, numRows: Int, numCols: Int, numParts: Int, seed: Int = 42): RowPartitionedMatrix = { val rowsPerPart = numRows / numParts val matrixParts = sc.parallelize(1 to numParts, numParts).mapPartitionsWithIndex { (index, part) => val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed+index))) Iterator(DenseMatrix.rand(rowsPerPart, numCols, Gaussian(0.0, 1.0)(randBasis))) } RowPartitionedMatrix.fromMatrix(matrixParts.cache()) } def createLocalRandomMatrix(numRows: Int, numCols: Int, seed: Int = 42): DenseMatrix[Double] = { val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed))) DenseMatrix.rand(numRows, numCols, Gaussian(0.0, 1.0)(randBasis)) } }
Example 86
Source File: RulesTxtDeploymentServiceSpec.scala From smui with Apache License 2.0 | 5 votes |
package models import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.util.zip.ZipInputStream import org.apache.commons.io.IOUtils import org.scalatest.{FlatSpec, Matchers} class RulesTxtDeploymentServiceSpec extends FlatSpec with Matchers with ApplicationTestBase { private lazy val service = injector.instanceOf[RulesTxtDeploymentService] private var inputIds: Seq[SearchInputId] = Seq.empty override protected def beforeAll(): Unit = { super.beforeAll() createTestCores() inputIds = createTestRule() } private def rulesFileContent(ruleIds: Seq[SearchInputId]): String = s"""aerosmith => | SYNONYM: mercury | DOWN(10): battery | UP(10): notebook | FILTER: zz top | @{ | "_log" : "${ruleIds.head}" | }@ | |mercury => | SYNONYM: aerosmith | DOWN(10): battery | UP(10): notebook | FILTER: zz top | @{ | "_log" : "${ruleIds.head}" | }@ | |shipping => | DECORATE: REDIRECT http://xyz.com/shipping | @{ | "_log" : "${ruleIds.last}" | }@""".stripMargin "RulesTxtDeploymentService" should "generate rules files with correct file names" in { val rulesTxt = service.generateRulesTxtContentWithFilenames(core1Id, "LIVE", logDebug = false) rulesTxt.solrIndexId shouldBe core1Id rulesTxt.decompoundRules shouldBe empty rulesTxt.regularRules.content.trim shouldBe rulesFileContent(inputIds) rulesTxt.regularRules.sourceFileName shouldBe "/tmp/search-management-ui_rules-txt.tmp" rulesTxt.regularRules.destinationFileName shouldBe "/usr/bin/solr/liveCore/conf/rules.txt" } it should "validate the rules files correctly" in { val rulesTxt = service.generateRulesTxtContentWithFilenames(core1Id, "LIVE", logDebug = false) service.validateCompleteRulesTxts(rulesTxt, logDebug = false) shouldBe empty val badRulesTxt = rulesTxt.copy(regularRules = rulesTxt.regularRules.copy(content = "a very bad rules file")) service.validateCompleteRulesTxts(badRulesTxt, logDebug = false) shouldBe List("Line 1: Missing input for instruction") } it should "provide a zip file with all rules files" in { val out = new ByteArrayOutputStream() service.writeAllRulesTxtFilesAsZipFileToStream(out) val bytes = out.toByteArray val zipStream = new ZipInputStream(new ByteArrayInputStream(bytes)) val firstEntry = zipStream.getNextEntry firstEntry.getName shouldBe "rules_core1.txt" IOUtils.toString(zipStream, "UTF-8").trim shouldBe rulesFileContent(inputIds) val secondEntry = zipStream.getNextEntry secondEntry.getName shouldBe "rules_core2.txt" IOUtils.toString(zipStream, "UTF-8").trim shouldBe "" } }