java.io.InputStream Scala Examples
The following examples show how to use java.io.InputStream.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: ClientLogManager.scala From Linkis with Apache License 2.0 | 8 votes |
package com.webank.wedatasphere.linkis.entranceclient.context import java.io.{InputStream, OutputStream} import com.webank.wedatasphere.linkis.entrance.log._ import com.webank.wedatasphere.linkis.scheduler.queue.Job import com.webank.wedatasphere.linkis.server.conf.ServerConfiguration import org.apache.commons.io.input.NullInputStream import org.apache.commons.io.output.NullOutputStream class ClientLogManager extends CacheLogManager { override def getLogReader(execId: String): LogReader = { new CacheLogReader("", ServerConfiguration.BDP_SERVER_ENCODING.getValue, new Cache(10), "") { override def getInputStream: InputStream = new NullInputStream(0) } } override def createLogWriter(job: Job): LogWriter = { new NullCacheLogWriter(ServerConfiguration.BDP_SERVER_ENCODING.getValue, new Cache(20)) } class NullCacheLogWriter(charset:String, sharedCache:Cache, override protected val outputStream: OutputStream = new NullOutputStream) extends CacheLogWriter("", charset, sharedCache, "") }
Example 2
Source File: Queries.scala From daml with Apache License 2.0 | 7 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.on.sql.queries import java.io.InputStream import java.sql.{Blob, Connection, PreparedStatement} import anorm.{ BatchSql, Column, MetaDataItem, NamedParameter, RowParser, SqlMappingError, SqlParser, SqlRequestError, ToStatement } import com.google.protobuf.ByteString trait Queries extends ReadQueries with WriteQueries object Queries { val TablePrefix = "ledger" val LogTable = s"${TablePrefix}_log" val MetaTable = s"${TablePrefix}_meta" val StateTable = s"${TablePrefix}_state" // By explicitly writing a value to a "table_key" column, we ensure we only ever have one row in // the meta table. An attempt to write a second row will result in a key conflict. private[queries] val MetaTableKey = 0 def executeBatchSql( query: String, params: Iterable[Seq[NamedParameter]], )(implicit connection: Connection): Unit = { if (params.nonEmpty) BatchSql(query, params.head, params.drop(1).toArray: _*).execute() () } implicit def byteStringToStatement: ToStatement[ByteString] = new ToStatement[ByteString] { override def set(s: PreparedStatement, index: Int, v: ByteString): Unit = s.setBinaryStream(index, v.newInput(), v.size()) } implicit def columnToByteString: Column[ByteString] = Column.nonNull { (value: Any, meta: MetaDataItem) => value match { case blob: Blob => Right(ByteString.readFrom(blob.getBinaryStream)) case byteArray: Array[Byte] => Right(ByteString.copyFrom(byteArray)) case inputStream: InputStream => Right(ByteString.readFrom(inputStream)) case _ => Left[SqlRequestError, ByteString]( SqlMappingError(s"Cannot convert value of column ${meta.column} to ByteString")) } } def getBytes(columnName: String): RowParser[ByteString] = SqlParser.get(columnName)(columnToByteString) }
Example 3
Source File: CommandUtils.scala From drizzle-spark with Apache License 2.0 | 7 votes |
package org.apache.spark.deploy.worker import java.io.{File, FileOutputStream, InputStream, IOException} import scala.collection.JavaConverters._ import scala.collection.Map import org.apache.spark.SecurityManager import org.apache.spark.deploy.Command import org.apache.spark.internal.Logging import org.apache.spark.launcher.WorkerCommandBuilder import org.apache.spark.util.Utils def redirectStream(in: InputStream, file: File) { val out = new FileOutputStream(file, true) // TODO: It would be nice to add a shutdown hook here that explains why the output is // terminating. Otherwise if the worker dies the executor logs will silently stop. new Thread("redirect output to " + file) { override def run() { try { Utils.copyStream(in, out, true) } catch { case e: IOException => logInfo("Redirection to " + file + " closed: " + e.getMessage) } } }.start() } }
Example 4
Source File: GuardedProcess.scala From shadowsocksr-android with GNU General Public License v3.0 | 5 votes |
package com.github.shadowsocks import java.io.{IOException, InputStream, OutputStream} import java.lang.System.currentTimeMillis import java.util.concurrent.Semaphore import android.util.Log import scala.collection.JavaConversions._ class GuardedProcess(cmd: Seq[String]) extends Process { private val TAG = classOf[GuardedProcess].getSimpleName @volatile private var guardThread: Thread = _ @volatile private var isDestroyed: Boolean = _ @volatile private var process: Process = _ @volatile private var isRestart = false def start(onRestartCallback: () => Unit = null): GuardedProcess = { val semaphore = new Semaphore(1) semaphore.acquire @volatile var ioException: IOException = null guardThread = new Thread(() => { try { var callback: () => Unit = null while (!isDestroyed) { Log.i(TAG, "start process: " + cmd) val startTime = currentTimeMillis process = new ProcessBuilder(cmd).redirectErrorStream(true).start if (callback == null) callback = onRestartCallback else callback() semaphore.release process.waitFor if (isRestart) { isRestart = false } else { if (currentTimeMillis - startTime < 1000) { Log.w(TAG, "process exit too fast, stop guard: " + cmd) isDestroyed = true } } } } catch { case ignored: InterruptedException => Log.i(TAG, "thread interrupt, destroy process: " + cmd) process.destroy() case e: IOException => ioException = e } finally semaphore.release }, "GuardThread-" + cmd) guardThread.start() semaphore.acquire if (ioException != null) { throw ioException } this } def destroy() { isDestroyed = true guardThread.interrupt() process.destroy() try guardThread.join() catch { case ignored: InterruptedException => } } def restart() { isRestart = true process.destroy() } def exitValue: Int = throw new UnsupportedOperationException def getErrorStream: InputStream = throw new UnsupportedOperationException def getInputStream: InputStream = throw new UnsupportedOperationException def getOutputStream: OutputStream = throw new UnsupportedOperationException @throws(classOf[InterruptedException]) def waitFor = { guardThread.join() 0 } }
Example 5
Source File: SnowflakeRDD.scala From spark-snowflake with Apache License 2.0 | 5 votes |
package net.snowflake.spark.snowflake.io import java.io.InputStream import net.snowflake.spark.snowflake.io.SupportedFormat.SupportedFormat import org.apache.spark.{Partition, SparkContext, TaskContext} import org.apache.spark.rdd.RDD class SnowflakeRDD(sc: SparkContext, fileNames: List[String], format: SupportedFormat, downloadFile: String => InputStream, expectedPartitionCount: Int) extends RDD[String](sc, Nil) { @transient private val MIN_FILES_PER_PARTITION = 2 @transient private val MAX_FILES_PER_PARTITION = 10 override def compute(split: Partition, context: TaskContext): Iterator[String] = { val snowflakePartition = split.asInstanceOf[SnowflakePartition] val stringIterator = new SFRecordReader(format, snowflakePartition.index) stringIterator.setDownloadFunction(downloadFile) snowflakePartition.fileNames.foreach(name => { stringIterator.addFileName(name) }) logger.info( s"""${SnowflakeResultSetRDD.WORKER_LOG_PREFIX}: Start reading | partition ID:${snowflakePartition.index} | totalFileCount=${snowflakePartition.fileNames.size} |""".stripMargin.filter(_ >= ' ')) stringIterator } override protected def getPartitions: Array[Partition] = { var fileCountPerPartition = Math.max( MIN_FILES_PER_PARTITION, (fileNames.length + expectedPartitionCount / 2) / expectedPartitionCount ) fileCountPerPartition = Math.min(MAX_FILES_PER_PARTITION, fileCountPerPartition) val fileCount = fileNames.length val partitionCount = (fileCount + fileCountPerPartition - 1) / fileCountPerPartition logger.info(s"""${SnowflakeResultSetRDD.MASTER_LOG_PREFIX}: Total statistics: | fileCount=$fileCount filePerPartition=$fileCountPerPartition | actualPartitionCount=$partitionCount | expectedPartitionCount=$expectedPartitionCount |""".stripMargin.filter(_ >= ' ')) if (fileNames.nonEmpty) { fileNames .grouped(fileCountPerPartition) .zipWithIndex .map { case (names, index) => SnowflakePartition(names, id, index) } .toArray } else { // If the result set is empty, put one empty partition to the array. Seq[SnowflakePartition]{SnowflakePartition(fileNames, 0, 0)}.toArray } } } private case class SnowflakePartition(fileNames: List[String], rddId: Int, index: Int) extends Partition { override def hashCode(): Int = 31 * (31 + rddId) + index override def equals(other: Any): Boolean = super.equals(other) }
Example 6
Source File: ValueSerializer.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.platform.store.serialization import java.io.InputStream import com.daml.lf.archive.{Decode, Reader} import com.daml.lf.value.Value.{ContractId, VersionedValue} import com.daml.lf.value.{ValueCoder, ValueOuterClass} object ValueSerializer { def serializeValue( value: VersionedValue[ContractId], errorContext: => String, ): Array[Byte] = ValueCoder .encodeVersionedValueWithCustomVersion(ValueCoder.CidEncoder, value) .fold(error => sys.error(s"$errorContext (${error.errorMessage})"), _.toByteArray) private def deserializeValueHelper( stream: InputStream, errorContext: => Option[String], ): VersionedValue[ContractId] = ValueCoder .decodeVersionedValue( ValueCoder.CidDecoder, ValueOuterClass.VersionedValue.parseFrom( Decode.damlLfCodedInputStream(stream, Reader.PROTOBUF_RECURSION_LIMIT))) .fold( error => sys.error(errorContext.fold(error.errorMessage)(ctx => s"$ctx (${error.errorMessage})")), identity ) def deserializeValue( stream: InputStream, ): VersionedValue[ContractId] = deserializeValueHelper(stream, None) def deserializeValue( stream: InputStream, errorContext: => String, ): VersionedValue[ContractId] = deserializeValueHelper(stream, Some(errorContext)) }
Example 7
Source File: ContractSerializer.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package db.migration.translation import java.io.InputStream import com.daml.lf.archive.{Decode, Reader} import com.daml.lf.transaction.{TransactionCoder, TransactionOuterClass} import com.daml.lf.value.Value.{ContractId, ContractInst, VersionedValue} import com.daml.lf.value.ValueCoder trait ContractSerializer { def serializeContractInstance( coinst: ContractInst[VersionedValue[ContractId]]): Either[ValueCoder.EncodeError, Array[Byte]] def deserializeContractInstance( stream: InputStream): Either[ValueCoder.DecodeError, ContractInst[VersionedValue[ContractId]]] } object ContractSerializer extends ContractSerializer { override def serializeContractInstance(coinst: ContractInst[VersionedValue[ContractId]]) : Either[ValueCoder.EncodeError, Array[Byte]] = TransactionCoder .encodeContractInstance[ContractId](ValueCoder.CidEncoder, coinst) .map(_.toByteArray()) override def deserializeContractInstance(stream: InputStream) : Either[ValueCoder.DecodeError, ContractInst[VersionedValue[ContractId]]] = TransactionCoder .decodeContractInstance[ContractId]( ValueCoder.CidDecoder, TransactionOuterClass.ContractInstance.parseFrom( Decode.damlLfCodedInputStream(stream, Reader.PROTOBUF_RECURSION_LIMIT)) ) }
Example 8
Source File: TransactionSerializer.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package db.migration.translation import java.io.InputStream import com.daml.lf.archive.{Decode, Reader} import com.daml.lf.data.Ref.LedgerString import com.daml.lf.transaction.{Transaction => Tx, TransactionCoder, TransactionOuterClass} import com.daml.lf.value.ValueCoder import com.daml.lf.value.ValueCoder.{DecodeError, EncodeError} trait TransactionSerializer { def serializeTransaction( trId: LedgerString, transaction: Tx.CommittedTransaction, ): Either[EncodeError, Array[Byte]] def deserializeTransaction( trId: LedgerString, stream: InputStream, ): Either[DecodeError, Tx.CommittedTransaction] } object TransactionSerializer extends TransactionSerializer { override def serializeTransaction( trId: LedgerString, transaction: Tx.CommittedTransaction, ): Either[EncodeError, Array[Byte]] = TransactionCoder .encodeTransaction( TransactionCoder.EventIdEncoder(trId), ValueCoder.CidEncoder, transaction ) .map(_.toByteArray()) override def deserializeTransaction( trId: LedgerString, stream: InputStream): Either[DecodeError, Tx.CommittedTransaction] = TransactionCoder .decodeTransaction( TransactionCoder.EventIdDecoder(trId), ValueCoder.CidDecoder, TransactionOuterClass.Transaction.parseFrom( Decode.damlLfCodedInputStream(stream, Reader.PROTOBUF_RECURSION_LIMIT)) ) .map(Tx.CommittedTransaction(_)) }
Example 9
Source File: DarManifestReader.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.lf package archive import java.io.InputStream import java.util.jar.{Attributes, Manifest} import scala.util.{Failure, Success, Try} object DarManifestReader { private val supportedFormat = "daml-lf" def dalfNames(is: InputStream): Try[Dar[String]] = { val manifest = new Manifest(is) val attributes = value(manifest.getMainAttributes) _ for { mainDalf <- attributes("Main-Dalf") allDalfs <- attributes("Dalfs") format <- attributes("Format") _ <- checkFormat(format) } yield Dar(mainDalf, dependencies(allDalfs, mainDalf)) } private def dependencies(other: String, main: String): List[String] = { val deps = other.split(',').view.map(_.trim) deps.filter(x => x != main).toList } private def value(attributes: Attributes)(key: String): Try[String] = Option(attributes.getValue(key)) match { case None => failure(s"Cannot find attribute: $key") case Some(x) => Success(x.trim) } private def checkFormat(format: String): Try[Unit] = if (format == supportedFormat) Success(()) else failure(s"Unsupported format: $format") private def failure(msg: String) = Failure(DarManifestReaderException(msg)) case class DarManifestReaderException(msg: String) extends IllegalStateException(msg) }
Example 10
Source File: Decode.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.lf package archive import java.io.InputStream import com.daml.lf.data.Ref._ import com.daml.lf.language.Ast._ import com.daml.lf.language.LanguageMajorVersion._ import com.daml.lf.language.LanguageVersion import com.daml.daml_lf_dev.DamlLf import com.google.protobuf.CodedInputStream sealed class Decode(onlySerializableDataDefs: Boolean) extends archive.Reader[(PackageId, Package)] { import Decode._ private[lf] val decoders: PartialFunction[LanguageVersion, PayloadDecoder] = { case LanguageVersion(V1, minor) if V1.supportedMinorVersions.contains(minor) => PayloadDecoder(new DecodeV1(minor))(_.getDamlLf1) } override protected[this] def readArchivePayloadOfVersion( hash: PackageId, lf: DamlLf.ArchivePayload, version: LanguageVersion ): (PackageId, Package) = { val decoder = decoders.lift(version).getOrElse(throw ParseError(s"$version unsupported")) (hash, decoder.decoder.decodePackage(hash, decoder.extract(lf), onlySerializableDataDefs)) } } object Decode extends Decode(onlySerializableDataDefs = false) { type ParseError = Reader.ParseError val ParseError = Reader.ParseError def damlLfCodedInputStreamFromBytes( payload: Array[Byte], recursionLimit: Int = PROTOBUF_RECURSION_LIMIT ): CodedInputStream = Reader.damlLfCodedInputStreamFromBytes(payload, recursionLimit) def damlLfCodedInputStream( is: InputStream, recursionLimit: Int = PROTOBUF_RECURSION_LIMIT): CodedInputStream = Reader.damlLfCodedInputStream(is, recursionLimit) private[lf] sealed abstract class PayloadDecoder { type I val extract: DamlLf.ArchivePayload => I val decoder: OfPackage[I] } private[archive] object PayloadDecoder { def apply[I0](fi: OfPackage[I0])(k: DamlLf.ArchivePayload => I0): PayloadDecoder = new PayloadDecoder { type I = I0 override val extract = k override val decoder = fi } } private[lf] trait OfPackage[-Pkg] { type ProtoScenarioModule def protoScenarioModule(cis: CodedInputStream): ProtoScenarioModule @throws[ParseError] def decodePackage( packageId: PackageId, lfPackage: Pkg, onlySerializableDataDefs: Boolean = false): Package @throws[ParseError] def decodeScenarioModule(packageId: PackageId, lfModuleForScenario: ProtoScenarioModule): Module } private def identifierStart(c: Char) = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' || c == '$' || c == '_' private def identifierPart(c: Char): Boolean = identifierStart(c) || '0' <= c && c <= '9' def checkIdentifier(s: String): Unit = { if (s.isEmpty) throw Reader.ParseError("empty identifier") else if (!(identifierStart(s.head) && s.tail.forall(identifierPart))) throw Reader.ParseError(s"identifier $s contains invalid character") } private val decimalPattern = "[+-]*[0-9]{0,28}(\\.[0-9]{0,10})*".r.pattern def checkDecimal(s: String): Boolean = decimalPattern.matcher(s).matches() }
Example 11
Source File: DarManifestReaderTest.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.lf.archive import java.io.{ByteArrayInputStream, InputStream} import java.nio.charset.Charset import com.daml.lf.archive.DarManifestReader.DarManifestReaderException import org.scalatest.{Inside, Matchers, WordSpec} import scala.util.{Failure, Success} class DarManifestReaderTest extends WordSpec with Matchers with Inside { private val unicode = Charset.forName("UTF-8") "should read dalf names from manifest, real scenario with Dalfs line split" in { val manifest = """Manifest-Version: 1.0 |Created-By: Digital Asset packager (DAML-GHC) |Main-Dalf: com.daml.lf.archive:DarReaderTest:0.1.dalf |Dalfs: com.daml.lf.archive:DarReaderTest:0.1.dalf, daml-pri | m.dalf |Format: daml-lf |Encryption: non-encrypted""".stripMargin val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode)) val actual = DarManifestReader.dalfNames(inputStream) actual shouldBe Success( Dar("com.daml.lf.archive:DarReaderTest:0.1.dalf", List("daml-prim.dalf"))) inputStream.close() } "should read dalf names from manifest, Main-Dalf returned in the head" in { val manifest = """Main-Dalf: A.dalf |Dalfs: B.dalf, C.dalf, A.dalf, E.dalf |Format: daml-lf |Encryption: non-encrypted""".stripMargin val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode)) val actual = DarManifestReader.dalfNames(inputStream) actual shouldBe Success(Dar("A.dalf", List("B.dalf", "C.dalf", "E.dalf"))) inputStream.close() } "should read dalf names from manifest, can handle one Dalf per manifest" in { val manifest = """Main-Dalf: A.dalf |Dalfs: A.dalf |Format: daml-lf |Encryption: non-encrypted""".stripMargin val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode)) val actual = DarManifestReader.dalfNames(inputStream) actual shouldBe Success(Dar("A.dalf", List.empty)) inputStream.close() } "should return failure if Format is not daml-lf" in { val manifest = """Main-Dalf: A.dalf |Dalfs: B.dalf, C.dalf, A.dalf, E.dalf |Format: anything-different-from-daml-lf |Encryption: non-encrypted""".stripMargin val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode)) val actual = DarManifestReader.dalfNames(inputStream) inside(actual) { case Failure(DarManifestReaderException(msg)) => msg shouldBe "Unsupported format: anything-different-from-daml-lf" } inputStream.close() } }
Example 12
Source File: ProcessTestUtils.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.test import java.io.{InputStream, IOException} import scala.sys.process.BasicIO object ProcessTestUtils { class ProcessOutputCapturer(stream: InputStream, capture: String => Unit) extends Thread { this.setDaemon(true) override def run(): Unit = { try { BasicIO.processFully(capture)(stream) } catch { case _: IOException => // Ignores the IOException thrown when the process termination, which closes the input // stream abruptly. } } } }
Example 13
Source File: CryptoStreamUtils.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.security import java.io.{InputStream, OutputStream} import java.util.Properties import javax.crypto.spec.{IvParameterSpec, SecretKeySpec} import org.apache.commons.crypto.random._ import org.apache.commons.crypto.stream._ import org.apache.hadoop.io.Text import org.apache.spark.SparkConf import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ private[this] def createInitializationVector(properties: Properties): Array[Byte] = { val iv = new Array[Byte](IV_LENGTH_IN_BYTES) val initialIVStart = System.currentTimeMillis() CryptoRandomFactory.getCryptoRandom(properties).nextBytes(iv) val initialIVFinish = System.currentTimeMillis() val initialIVTime = initialIVFinish - initialIVStart if (initialIVTime > 2000) { logWarning(s"It costs ${initialIVTime} milliseconds to create the Initialization Vector " + s"used by CryptoStream") } iv } }
Example 14
Source File: MetricsConfig.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics import java.io.{FileInputStream, InputStream} import java.util.Properties import scala.collection.JavaConverters._ import scala.collection.mutable import scala.util.matching.Regex import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.util.Utils private[spark] class MetricsConfig(conf: SparkConf) extends Logging { private val DEFAULT_PREFIX = "*" private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties" private[metrics] val properties = new Properties() private[metrics] var perInstanceSubProperties: mutable.HashMap[String, Properties] = null private def setDefaultProperties(prop: Properties) { prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet") prop.setProperty("*.sink.servlet.path", "/metrics/json") prop.setProperty("master.sink.servlet.path", "/metrics/master/json") prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json") } private[this] def loadPropertiesFromFile(path: Option[String]): Unit = { var is: InputStream = null try { is = path match { case Some(f) => new FileInputStream(f) case None => Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME) } if (is != null) { properties.load(is) } } catch { case e: Exception => val file = path.getOrElse(DEFAULT_METRICS_CONF_FILENAME) logError(s"Error loading configuration file $file", e) } finally { if (is != null) { is.close() } } } }
Example 15
Source File: ReplayListenerBus.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.{InputStream, IOException} import scala.io.Source import com.fasterxml.jackson.core.JsonParseException import org.json4s.jackson.JsonMethods._ import org.apache.spark.internal.Logging import org.apache.spark.scheduler.ReplayListenerBus._ import org.apache.spark.util.JsonProtocol def replay( logData: InputStream, sourceName: String, maybeTruncated: Boolean = false, eventsFilter: ReplayEventsFilter = SELECT_ALL_FILTER): Unit = { var currentLine: String = null var lineNumber: Int = 0 try { val lineEntries = Source.fromInputStream(logData) .getLines() .zipWithIndex .filter { case (line, _) => eventsFilter(line) } while (lineEntries.hasNext) { try { val entry = lineEntries.next() currentLine = entry._1 lineNumber = entry._2 + 1 postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine))) } catch { case jpe: JsonParseException => // We can only ignore exception from last line of the file that might be truncated // the last entry may not be the very last line in the event log, but we treat it // as such in a best effort to replay the given input if (!maybeTruncated || lineEntries.hasNext) { throw jpe } else { logWarning(s"Got JsonParseException from log file $sourceName" + s" at line $lineNumber, the file might not have finished writing cleanly.") } } } } catch { case ioe: IOException => throw ioe case e: Exception => logError(s"Exception parsing Spark event log: $sourceName", e) logError(s"Malformed line #$lineNumber: $currentLine\n") } } } private[spark] object ReplayListenerBus { type ReplayEventsFilter = (String) => Boolean // utility filter that selects all event logs during replay val SELECT_ALL_FILTER: ReplayEventsFilter = { (eventString: String) => true } }
Example 16
Source File: TFRecordIterator.scala From BigDL with Apache License 2.0 | 5 votes |
package com.intel.analytics.bigdl.utils.tf import java.io.{BufferedInputStream, File, FileInputStream, InputStream} import java.nio.{ByteBuffer, ByteOrder} class TFRecordIterator(inputStream: InputStream) extends Iterator[Array[Byte]] { private var dataBuffer: Array[Byte] = null private val lengthBuffer: Array[Byte] = new Array[Byte](8) override def hasNext: Boolean = { if (dataBuffer != null) { true } else { val numOfBytes = inputStream.read(lengthBuffer) if (numOfBytes == 8) { val lengthWrapper = ByteBuffer.wrap(lengthBuffer) lengthWrapper.order(ByteOrder.LITTLE_ENDIAN) val length = lengthWrapper.getLong().toInt // todo, do crc check, simply skip now inputStream.skip(4) dataBuffer = new Array[Byte](length) inputStream.read(dataBuffer) // todo, do crc check, simply skip now inputStream.skip(4) true } else { inputStream.close() false } } } override def next(): Array[Byte] = { if (hasNext) { val data = this.dataBuffer this.dataBuffer = null data } else { throw new NoSuchElementException("next on empty iterator") } } } object TFRecordIterator { def apply(file: File): TFRecordIterator = { val inputStream = new FileInputStream(file) new TFRecordIterator(inputStream) } }
Example 17
Source File: WarcRecord.scala From ArchiveSpark with MIT License | 5 votes |
package org.archive.archivespark.specific.warc import java.io.{BufferedInputStream, InputStream} import org.archive.archivespark.dataspecs.access.DataAccessor import org.archive.archivespark.functions.StringContent import org.archive.archivespark.model.dataloads.{ByteLoad, DataLoad, TextLoad} import org.archive.archivespark.model.pointers.FieldPointer import org.archive.archivespark.model.{DataEnrichRoot, EnrichRootCompanion} import org.archive.archivespark.sparkling.cdx.CdxRecord import org.archive.archivespark.sparkling.warc.{WarcRecord => WARC} import org.archive.archivespark.specific.warc.functions.WarcPayload class WarcRecord(cdx: CdxRecord, val data: DataAccessor[InputStream]) extends DataEnrichRoot[CdxRecord, WARC](cdx) with WarcLikeRecord { override def access[R >: Null](action: WARC => R): R = data.access { stream => WARC.get(if (stream.markSupported) stream else new BufferedInputStream(stream)) match { case Some(record) => action(record) case None => null } } override def companion: EnrichRootCompanion[WarcRecord] = WarcRecord } object WarcRecord extends EnrichRootCompanion[WarcRecord] { override def dataLoad[T](load: DataLoad[T]): Option[FieldPointer[WarcRecord, T]] = (load match { case ByteLoad => Some(WarcPayload) case TextLoad => Some(StringContent) case _ => None }).map(_.asInstanceOf[FieldPointer[WarcRecord, T]]) }
Example 18
Source File: FileStreamRecord.scala From ArchiveSpark with MIT License | 5 votes |
package org.archive.archivespark.specific.raw import java.io.InputStream import org.archive.archivespark.dataspecs.access.DataAccessor import org.archive.archivespark.model.{DataEnrichRoot, EnrichRootCompanion} import org.archive.archivespark.sparkling.io.IOUtil import org.archive.archivespark.sparkling.util.{IteratorUtil, StringUtil} import scala.io.Source class FileStreamRecord(path: String, accessor: DataAccessor[InputStream], retryDelayMs: Option[Int] = None) extends DataEnrichRoot[String, InputStream](path) { override def access[R >: Null](action: InputStream => R): R = accessor.access(action) def accessSource[R >: Null](action: Source => R): R = access { stream => StringUtil.source(stream) { source => action(source) } } def lineIterator: Iterator[String] = accessor.get match { case Some(stream) => IteratorUtil.cleanup(IOUtil.lines(stream), () => stream.close()) case None => Iterator.empty } override def companion: EnrichRootCompanion[FileStreamRecord] = FileStreamRecord } object FileStreamRecord extends EnrichRootCompanion[FileStreamRecord]
Example 19
Source File: HdfsFileAccessor.scala From ArchiveSpark with MIT License | 5 votes |
package org.archive.archivespark.dataspecs.access import java.io.InputStream import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.deploy.SparkHadoopUtil import org.archive.archivespark.sparkling.io.IOUtil class HdfsFileAccessor(path: String, decompress: Boolean = true) extends CloseableDataAccessor[InputStream] { override def get: Option[InputStream] = { val fs = FileSystem.get(SparkHadoopUtil.get.conf) var stream: InputStream = null try { val raw = fs.open(new Path(path)) stream = if (decompress) IOUtil.decompress(raw, Some(path)) else raw Some(stream) } catch { case e: Exception => e.printStackTrace() if (stream != null) stream.close() None } } }
Example 20
Source File: HdfsStreamAccessor.scala From ArchiveSpark with MIT License | 5 votes |
package org.archive.archivespark.dataspecs.access import java.io.InputStream import org.apache.commons.io.input.BoundedInputStream import org.apache.hadoop.fs.{FSDataInputStream, FileSystem, Path} import org.apache.spark.deploy.SparkHadoopUtil class HdfsStreamAccessor(location: HdfsLocationInfo) extends CloseableDataAccessor[InputStream] { override def get: Option[InputStream] = { if (location.length < 0 || location.offset < 0) None else { val fs = FileSystem.get(SparkHadoopUtil.get.conf) var stream: FSDataInputStream = null try { stream = fs.open(new Path(location.path)) stream.seek(location.offset) Some(new BoundedInputStream(stream, location.length)) } catch { case e: Exception => e.printStackTrace() if (stream != null) stream.close() None } } } }
Example 21
Source File: ByteArrayAccessor.scala From ArchiveSpark with MIT License | 5 votes |
package org.archive.archivespark.dataspecs.access import java.io.InputStream import java.util.zip.GZIPInputStream import org.archive.archivespark.sparkling.io.ByteArray class ByteArrayAccessor(bytes: ByteArray, gz: Boolean = false) extends CloseableDataAccessor[InputStream] { def this(bytes: Array[Byte], gz: Boolean) = this({ val array = new ByteArray() array.append(bytes) array }, gz) def this(bytes: Array[Byte]) = this(bytes, false) override def get: Option[InputStream] = { var stream: InputStream = null try { stream = bytes.toInputStream stream = if (gz) new GZIPInputStream(stream) else stream Some(stream) } catch { case e: Exception => e.printStackTrace() if (stream != null) stream.close() None } } }
Example 22
Source File: HttpClient.scala From ArchiveSpark with MIT License | 5 votes |
package org.archive.archivespark.sparkling.http import java.io.{BufferedInputStream, InputStream} import java.net.{HttpURLConnection, URL, URLConnection} import org.archive.archivespark.sparkling.logging.LogContext import org.archive.archivespark.sparkling.util.Common import scala.collection.JavaConverters._ import scala.util.Try object HttpClient { val DefaultRetries: Int = 30 val DefaultSleepMillis: Int = 1000 val DefaultTimeoutMillis: Int = -1 implicit val logContext: LogContext = LogContext(this) def request[R](url: String, headers: Map[String, String] = Map.empty, retries: Int = DefaultRetries, sleepMillis: Int = DefaultSleepMillis, timeoutMillis: Int = DefaultTimeoutMillis)(action: InputStream => R): R = rangeRequest(url, headers, retries = retries, sleepMillis = sleepMillis, timeoutMillis = timeoutMillis)(action) def rangeRequest[R](url: String, headers: Map[String, String] = Map.empty, offset: Long = 0, length: Long = -1, retries: Int = DefaultRetries, sleepMillis: Int = DefaultSleepMillis, timeoutMillis: Int = DefaultTimeoutMillis)(action: InputStream => R): R = { rangeRequestConnection(url, headers, offset, length, retries, sleepMillis, timeoutMillis) { case connection: HttpURLConnection => val in = new BufferedInputStream(connection.getInputStream) val r = action(in) Try(in.close()) r } } def requestMessage[R](url: String, headers: Map[String, String] = Map.empty, retries: Int = DefaultRetries, sleepMillis: Int = DefaultSleepMillis, timeoutMillis: Int = DefaultTimeoutMillis)(action: HttpMessage => R): R = rangeRequestMessage(url, headers, retries = retries, sleepMillis = sleepMillis, timeoutMillis = timeoutMillis)(action) def rangeRequestMessage[R](url: String, headers: Map[String, String] = Map.empty, offset: Long = 0, length: Long = -1, retries: Int = DefaultRetries, sleepMillis: Int = DefaultSleepMillis, timeoutMillis: Int = DefaultTimeoutMillis)(action: HttpMessage => R): R = { rangeRequestConnection(url, headers, offset, length, retries, sleepMillis, timeoutMillis) { case connection: HttpURLConnection => val in = new BufferedInputStream(connection.getInputStream) val responseHeaders = connection.getHeaderFields.asScala.toMap.flatMap{case (k, v) => v.asScala.headOption.map((if (k == null) "" else k) -> _)} val message = new HttpMessage(connection.getResponseMessage, responseHeaders, in) val r = action(message) Try(in.close()) r } } def requestConnection[R](url: String, headers: Map[String, String] = Map.empty, retries: Int = DefaultRetries, sleepMillis: Int = DefaultSleepMillis, timeoutMillis: Int = DefaultTimeoutMillis)(action: URLConnection => R): R = rangeRequestConnection(url, headers, retries = retries, sleepMillis = sleepMillis, timeoutMillis = timeoutMillis)(action) def rangeRequestConnection[R](url: String, headers: Map[String, String] = Map.empty, offset: Long = 0, length: Long = -1, retries: Int = DefaultRetries, sleepMillis: Int = DefaultSleepMillis, timeoutMillis: Int = DefaultTimeoutMillis)(action: URLConnection => R): R = { Common.timeoutWithReporter(timeoutMillis) { reporter => val connection = Common.retry(retries, sleepMillis, (retry, e) => { "Request failed (" + retry + "/" + retries + "): " + url + " (" + offset + "-" + (if (length >= 0) length else "") + ") - " + e.getMessage }) { _ => reporter.alive() val connection = new URL(url).openConnection() for ((key, value) <- headers) connection.addRequestProperty(key, value) if (offset > 0 || length >= 0) connection.addRequestProperty("Range", "bytes=" + offset + "-" + (if (length >= 0) offset + length - 1 else "")) connection.asInstanceOf[HttpURLConnection] } val r = action(connection) Try(connection.disconnect()) r } } }
Example 23
Source File: HttpMessage.scala From ArchiveSpark with MIT License | 5 votes |
package org.archive.archivespark.sparkling.http import java.io.{BufferedInputStream, InputStream} import java.util.zip.GZIPInputStream import org.apache.commons.httpclient.ChunkedInputStream import org.apache.http.client.entity.DeflateInputStream import org.archive.archivespark.sparkling.io.IOUtil import org.archive.archivespark.sparkling.util.StringUtil import scala.collection.immutable.ListMap import scala.util.Try class HttpMessage (val statusLine: String, val headers: Map[String, String], val payload: InputStream) { import HttpMessage._ lazy val lowerCaseHeaders: Map[String, String] = headers.map{case (k,v) => (k.toLowerCase, v)} def contentEncoding: Option[String] = lowerCaseHeaders.get("content-encoding").map(_.toLowerCase) def mime: Option[String] = lowerCaseHeaders.get("content-type").map(_.split(';').head.trim.toLowerCase) def charset: Option[String] = { lowerCaseHeaders.get("content-type").flatMap(_.split(';').drop(1).headOption).map(_.trim) .filter(_.startsWith("charset=")) .map(_.drop(8).trim.stripPrefix("\"").stripPrefix("'").stripSuffix("'").stripSuffix("\"").split(",", 2).head.trim) .filter(_.nonEmpty).map(_.toUpperCase) } def redirectLocation: Option[String] = lowerCaseHeaders.get("location").map(_.trim) def isChunked: Boolean = lowerCaseHeaders.get("transfer-encoding").map(_.toLowerCase).contains("chunked") def status: Int = statusLine.split(" +").drop(1).headOption.flatMap(s => Try{s.toInt}.toOption).getOrElse(-1) lazy val body: InputStream = Try { var decoded = if (isChunked) new ChunkedInputStream(payload) else payload val decoders = contentEncoding.toSeq.flatMap(_.split(',').map(_.trim).flatMap(DecoderRegistry.get)) for (decoder <- decoders) decoded = decoder(decoded) new BufferedInputStream(decoded) }.getOrElse(IOUtil.emptyStream) lazy val bodyString: String = StringUtil.fromInputStream(body, charset.toSeq ++ BodyCharsets) } object HttpMessage { val Charset: String = "UTF-8" val HttpMessageStart = "HTTP/" val BodyCharsets: Seq[String] = Seq("UTF-8", "ISO-8859-1", "WINDOWS-1252") // see org.apache.http.client.protocol.ResponseContentEncoding val DecoderRegistry: Map[String, InputStream => InputStream] = Map( "gzip" -> ((in: InputStream) => new GZIPInputStream(in)), "x-gzip" -> ((in: InputStream) => new GZIPInputStream(in)), "deflate" -> ((in: InputStream) => new DeflateInputStream(in)) ) def get(in: InputStream): Option[HttpMessage] = { var line = StringUtil.readLine(in, Charset) while (line != null && !{ if (line.startsWith(HttpMessageStart)) { val statusLine = line val headers = collection.mutable.Buffer.empty[(String, String)] line = StringUtil.readLine(in, Charset) while (line != null && line.trim.nonEmpty) { val split = line.split(":", 2) if (split.length == 2) headers += ((split(0).trim, split(1).trim)) line = StringUtil.readLine(in, Charset) } return Some(new HttpMessage(statusLine, ListMap(headers: _*), in)) } false }) line = StringUtil.readLine(in, Charset) None } }
Example 24
Source File: DigestUtil.scala From ArchiveSpark with MIT License | 5 votes |
package org.archive.archivespark.sparkling.util import java.io.InputStream import org.apache.commons.codec.binary.Base32 import org.apache.commons.codec.digest.DigestUtils object DigestUtil { def sha1Base32(in: InputStream): String = { val digest = DigestUtils.sha1(in) new Base32().encodeAsString(digest).toUpperCase } def sha1Base32(bytes: Array[Byte]): String = { val digest = DigestUtils.sha1(bytes) new Base32().encodeAsString(digest).toUpperCase } def sha1Base32(str: String): String = { val digest = DigestUtils.sha1(str) new Base32().encodeAsString(digest).toUpperCase } }
Example 25
Source File: TypedInOut.scala From ArchiveSpark with MIT License | 5 votes |
package org.archive.archivespark.sparkling.io import java.io.{InputStream, OutputStream} trait TypedInOut[A] extends Serializable { trait TypedInOutWriter { def stream: OutputStream def write(record: A) def flush(): Unit def close(): Unit } def out(stream: OutputStream): TypedInOutWriter def in(stream: InputStream): Iterator[A] } object TypedInOut { def apply[A, O](writer: OutputStream => O, reader: InputStream => Iterator[A])(writeRecord: (A, O) => Unit, flushOut: O => Unit, closeOut: O => Unit): TypedInOut[A] = new TypedInOut[A] { override def out(outStream: OutputStream): TypedInOutWriter = new TypedInOutWriter { override val stream: OutputStream = outStream private val out = writer(stream) override def write(record: A): Unit = writeRecord(record, out) override def flush(): Unit = flushOut(out) override def close(): Unit = closeOut(out) } override def in(inStream: InputStream): Iterator[A] = reader(inStream) } implicit val stringInOut: TypedInOut[String] = TypedInOut(IOUtil.print(_), IOUtil.lines(_))( (r, o) => o.println(r), _.flush(), _.close() ) def toStringInOut[A](toString: A => String, fromString: String => A): TypedInOut[A] = TypedInOut(IOUtil.print(_), IOUtil.lines(_).map(fromString))( (r, o) => o.println(toString(r)), _.flush(), _.close() ) }
Example 26
Source File: NonClosingInputStream.scala From ArchiveSpark with MIT License | 5 votes |
package org.archive.archivespark.sparkling.io import java.io.InputStream class NonClosingInputStream(in: InputStream) extends InputStream { override def read(): Int = in.read() override def read(b: Array[Byte]): Int = in.read(b) override def read(b: Array[Byte], off: Int, len: Int): Int = in.read(b, off, len) override def skip(n: Long): Long = in.skip(n) override def available(): Int = in.available() override def close(): Unit = {} override def mark(readlimit: Int): Unit = in.mark(readlimit) override def reset(): Unit = in.reset() override def markSupported(): Boolean = in.markSupported() }
Example 27
Source File: GzipUtil.scala From ArchiveSpark with MIT License | 5 votes |
package org.archive.archivespark.sparkling.io import java.io.{BufferedInputStream, InputStream} import com.google.common.io.CountingInputStream import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream import org.archive.archivespark.sparkling.Sparkling import org.archive.archivespark.sparkling.util.IteratorUtil import scala.util.Try object GzipUtil { import Sparkling._ val Magic0 = 31 val Magic1 = 139 def isCompressed(in: InputStream): Boolean = { in.mark(2) val (b0, b1) = (in.read, in.read) in.reset() b0 == Magic0 && b1 == Magic1 } def decompressConcatenated(in: InputStream): Iterator[InputStream] = decompressConcatenatedWithPosition(in).map{case (pos, s) => s} def decompressConcatenatedWithPosition(in: InputStream): Iterator[(Long, InputStream)] = { val stream = new CountingInputStream(IOUtil.supportMark(new NonClosingInputStream(in))) var last: Option[InputStream] = None IteratorUtil.whileDefined { if (last.isDefined) IOUtil.readToEnd(last.get, close = true) if (IOUtil.eof(stream)) { stream.close() None } else Try { val pos = stream.getCount last = Some(new GzipCompressorInputStream(new NonClosingInputStream(stream), false)) last.map((pos, _)) }.getOrElse(None) } } def estimateCompressionFactor(in: InputStream, readUncompressedBytes: Long): Double = { val stream = new CountingInputStream(new BufferedInputStream(new NonClosingInputStream(in))) val uncompressed = new GzipCompressorInputStream(stream, true) var read = IOUtil.skip(uncompressed, readUncompressedBytes) val decompressed = stream.getCount while (decompressed == stream.getCount && !IOUtil.eof(uncompressed, markReset = false)) read += 1 val factor = read.toDouble / decompressed uncompressed.close() factor } def decompress(in: InputStream, filename: Option[String] = None, checkFile: Boolean = false): InputStream = { val buffered = if (in.markSupported()) in else new BufferedInputStream(in) if (!IOUtil.eof(buffered) && ((filename.isEmpty && !checkFile) || (filename.isDefined && filename.get.toLowerCase.endsWith(GzipExt)))) { new GzipCompressorInputStream(buffered, true) } else buffered } }
Example 28
Source File: HdfsBlockStream.scala From ArchiveSpark with MIT License | 5 votes |
package org.archive.archivespark.sparkling.io import java.io.{ByteArrayInputStream, InputStream} import org.apache.hadoop.fs.{FileSystem, Path} import org.archive.archivespark.sparkling.logging.LogContext import org.archive.archivespark.sparkling.util.Common import scala.util.Try class HdfsBlockStream (fs: FileSystem, file: String, offset: Long = 0, length: Long = -1, retries: Int = 60, sleepMillis: Int = 1000 * 60) extends InputStream { implicit val logContext: LogContext = LogContext(this) val path = new Path(file) val (blockSize: Int, fileSize: Long) = { val status = fs.getFileStatus(path) (status.getBlockSize.min(Int.MaxValue).toInt, status.getLen) } private var pos: Long = offset.max(0) private val max: Long = if (length > 0) fileSize.min(pos + length) else fileSize private val buffer = new Array[Byte](blockSize) private val emptyBlock = new ByteArrayInputStream(Array.emptyByteArray) private var block: ByteArrayInputStream = emptyBlock def ensureNextBlock(): InputStream = { if (block.available() == 0 && pos < max) { val end = pos + blockSize val blockLength = ((end - (end % blockSize)).min(max) - pos).toInt Common.retry(retries, sleepMillis, (retry, e) => { "File access failed (" + retry + "/" + retries + "): " + path + " (Offset: " + pos + ") - " + e.getMessage }) { retry => val in = fs.open(path, blockLength) if (retry > 0) Try(in.seekToNewSource(pos)) else if (pos > 0) in.seek(pos) var read = 0 while (read < blockLength) read += in.read(buffer, read, blockLength - read) Try(in.close()) } pos += blockLength block = new ByteArrayInputStream(buffer, 0, blockLength) } block } override def read(): Int = ensureNextBlock().read() override def read(b: Array[Byte]): Int = ensureNextBlock().read(b) override def read(b: Array[Byte], off: Int, len: Int): Int = ensureNextBlock().read(b, off, len) override def skip(n: Long): Long = { val available = block.available() if (n <= available) block.skip(n) else { block = emptyBlock val currentPos = pos - available val skip = n.min(max - currentPos) pos += skip - available skip } } override def available(): Int = block.available() override def close(): Unit = {} override def markSupported(): Boolean = false }
Example 29
Source File: Licence.scala From slide-desktop with GNU General Public License v2.0 | 5 votes |
package gui import java.awt.{BorderLayout, Insets} import java.io.InputStream import java.util.Scanner import javax.swing.{JFrame, JScrollPane, JTextArea, ScrollPaneConstants} object Licence extends JFrame { val istream: InputStream = getClass.getResourceAsStream("res/licence-gpl.txt") val licenseText: String = new Scanner(istream, "UTF-8").useDelimiter("\\A").next this.setTitle("Licence") this.setBounds(100, 100, 640, 800) val textField: JTextArea = new JTextArea textField.setEditable(false) textField.setMargin(new Insets(10, 10, 10, 10)) textField.setAlignmentX(0) textField.setText(licenseText) textField.setCaretPosition(0) this.add(textField, BorderLayout.CENTER) val scroll: JScrollPane = new JScrollPane(textField, ScrollPaneConstants.VERTICAL_SCROLLBAR_ALWAYS, ScrollPaneConstants.HORIZONTAL_SCROLLBAR_NEVER) this.getContentPane.add(scroll) def showLicense(): Unit = this.setVisible(true) }
Example 30
Source File: Hashes.scala From matcher with MIT License | 5 votes |
import java.io.InputStream object Hashes { def mk(algorithm: String, stream: InputStream): Array[Byte] = { import java.security.{DigestInputStream, MessageDigest} val digest = MessageDigest.getInstance(algorithm) try { val dis = new DigestInputStream(stream, digest) val buffer = new Array[Byte](8192) while (dis.read(buffer) >= 0) {} dis.close() digest.digest } finally { stream.close() } } }
Example 31
Source File: ServerIpUtil.scala From piflow with BSD 2-Clause "Simplified" License | 5 votes |
package cn.piflow.util import java.io.{File, FileInputStream, InputStream} import java.net.InetAddress import java.util.Properties object ServerIpUtil { private val prop: Properties = new Properties() var fis: InputStream = null var path :String = "" try{ val userDir = System.getProperty("user.dir") path = userDir + "/server.ip" val file = new File(path) if(!file.exists()){ file.createNewFile() } prop.load(new FileInputStream(path)) } catch{ case ex: Exception => ex.printStackTrace() } def getServerIpFile() : String = { path } def getServerIp(): String ={ val obj = prop.get("server.ip") if(obj != null){ return obj.toString } null } def main(args: Array[String]): Unit = { val ip = InetAddress.getLocalHost.getHostAddress //write ip to server.ip file FileUtil.writeFile("server.ip=" + ip, ServerIpUtil.getServerIpFile()) println(ServerIpUtil.getServerIp()) } }
Example 32
Source File: PropertyUtil.scala From piflow with BSD 2-Clause "Simplified" License | 5 votes |
package cn.piflow.util import java.io.{FileInputStream, InputStream} import java.util.Properties object PropertyUtil { private val prop: Properties = new Properties() var fis: InputStream = null var path :String = "" var classPath:String = "" var scalaPath:String = "" try{ //val path = Thread.currentThread().getContextClassLoader.getResource("config.properties").getPath //fis = this.getClass.getResourceAsStream("") val userDir = System.getProperty("user.dir") path = userDir + "/config.properties" classPath = userDir + "/classpath/" scalaPath = userDir + "/scala" prop.load(new FileInputStream(path)) } catch{ case ex: Exception => ex.printStackTrace() } def getConfigureFile() : String = { path } def getClassPath():String = { classPath } def getScalaPath():String = { scalaPath } def getPropertyValue(propertyKey: String): String ={ val obj = prop.get(propertyKey) if(obj != null){ return obj.toString } null } def getIntPropertyValue(propertyKey: String): Int ={ val obj = prop.getProperty(propertyKey) if(obj != null){ return obj.toInt } throw new NullPointerException } }
Example 33
Source File: ClasspathResources.scala From intro-to-akka-streams with Apache License 2.0 | 5 votes |
package com.github.dnvriend.streams.util import java.io.InputStream import akka.NotUsed import akka.stream.IOResult import akka.stream.scaladsl.{ Source, StreamConverters } import akka.util.ByteString import scala.concurrent.Future import scala.io.{ Source ⇒ ScalaIOSource } import scala.util.Try import scala.xml.pull.{ XMLEvent, XMLEventReader } trait ClasspathResources { def withInputStream[T](fileName: String)(f: InputStream ⇒ T): T = { val is = fromClasspathAsStream(fileName) try { f(is) } finally { Try(is.close()) } } def withXMLEventReader[T](fileName: String)(f: XMLEventReader ⇒ T): T = withInputStream(fileName) { is ⇒ f(new XMLEventReader(ScalaIOSource.fromInputStream(is))) } def withXMLEventSource[T](fileName: String)(f: Source[XMLEvent, NotUsed] ⇒ T): T = withXMLEventReader(fileName) { reader ⇒ f(Source.fromIterator(() ⇒ reader)) } def withByteStringSource[T](fileName: String)(f: Source[ByteString, Future[IOResult]] ⇒ T): T = withInputStream(fileName) { inputStream ⇒ f(StreamConverters.fromInputStream(() ⇒ inputStream)) } def streamToString(is: InputStream): String = ScalaIOSource.fromInputStream(is).mkString def fromClasspathAsString(fileName: String): String = streamToString(fromClasspathAsStream(fileName)) def fromClasspathAsStream(fileName: String): InputStream = getClass.getClassLoader.getResourceAsStream(fileName) }
Example 34
Source File: MergeStrategySpec.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package daf.filesystem import java.io.{ Closeable, InputStream } import java.util.Scanner import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{ FSDataInputStream, FSDataOutputStream, FileSystem, Path } import org.scalatest.{ BeforeAndAfterAll, Matchers, WordSpec } import scala.collection.convert.decorateAsScala._ import scala.util.{ Random, Try } class MergeStrategySpec extends WordSpec with Matchers with BeforeAndAfterAll { private implicit val fileSystem = FileSystem.getLocal(new Configuration) private val numFiles = 10 private val baseDir = "test-dir".asHadoop private val workingDir = baseDir / f"merge-strategy-spec-${Random.nextInt(10000)}%05d" private def safely[A <: Closeable, U](f: A => U) = { stream: A => val attempt = Try { f(stream) } stream.close() attempt } private def readFile(path: Path) = safely[FSDataInputStream, Seq[String]] { _.scanner.asScala.toSeq } apply fileSystem.open(path) private def readFiles = Try { fileSystem.listStatus(workingDir).toSeq.flatMap { status => readFile(status.getPath).get } } private def openFiles = Try { fileSystem.listStatus(workingDir).toSeq.map { status => fileSystem.open(status.getPath) } } private def createFile(fileName: String) = safely[FSDataOutputStream, Unit] { stream => Random.alphanumeric.grouped(200).take(10).map { randomSplits(_) }.foreach { row => stream.writeUTF { row.mkString("", ",", "\n") } } } apply fileSystem.create { workingDir / fileName } private def randomSplits(chars: Stream[Char], strings: Seq[String] = Seq.empty): Seq[String] = chars.splitAt { Random.nextInt(10) + 5 } match { case (head, tail) if tail.isEmpty => head.drop(1).mkString +: strings case (head, tail) => randomSplits(tail, head.mkString +: strings) } private def createWorkingDir = Try { fileSystem.mkdirs(workingDir) } private def createFiles = Try { 0 until numFiles foreach { index => createFile(s"test-file-$index").get } // this is relatively nasty, and should be handled in a `traverse` } private def prepareData = for { _ <- createWorkingDir _ <- createFiles } yield () private def purgeData = Try { fileSystem.delete(workingDir, true) } override def beforeAll() = prepareData.get override def afterAll() = purgeData.get "MergeStrategies info" when { "given compressed format files" must { "throw an exception" in { an[IllegalArgumentException] must be thrownBy MergeStrategies.find { FileInfo(workingDir / "test-file-0", 0, FileDataFormats.raw, FileCompressionFormats.gzip) } } } "given data as csv" must { "drop one line and merge the rest" in { safely[InputStream, Seq[String]] { new Scanner(_).asScala.toList }.andThen { attempt => for { merged <- attempt expected <- readFiles } merged.size should be { expected.size - numFiles + 1 } } apply MergeStrategies.csv.merge { openFiles.get } } } "given data as json" must { "just merge the files into one" in { safely[InputStream, Seq[String]] { new Scanner(_).asScala.toList }.andThen { attempt => for { merged <- attempt expected <- readFiles } merged.size should be { expected.size } } apply MergeStrategies.json.merge { openFiles.get } } } } }
Example 35
Source File: ScalastyleInspectionsGenerator.scala From sonar-scala with GNU Lesser General Public License v3.0 | 5 votes |
package com.mwz.sonar.scala.metadata.scalastyle import java.io.InputStream import java.nio.file.Paths import com.mwz.sonar.scala.metadata.scalastyle._ import com.typesafe.config.{Config, ConfigFactory} import org.scalastyle.{Level, _} import sbt.Keys._ import sbt._ import scala.meta._ import scala.xml.{Node, NodeSeq, XML} def transform(source: Tree, inspections: Seq[ScalastyleInspection]): Tree = { val stringified: Seq[String] = inspections.collect { case inspection => // Is there a better way of embedding multi-line text? val extraDescription = inspection.extraDescription.map(s => "\"\"\"" + s + "\"\"\"") val justification = inspection.justification.map(s => "\"\"\"" + s + "\"\"\"") val params = inspection.params.map { p => s""" |ScalastyleParam( | name = "${p.name}", | typ = ${p.typ}, | label = "${p.label}", | description = \"\"\"${p.description}\"\"\", | default = \"\"\"${p.default}\"\"\" |) """.stripMargin } // It doesn't seem to be straightforward to automatically convert a collection // into a tree using scalameta, so I'm turning it into a String so it can be parsed, // which is easier than constructing the tree manually. // Totally doable with shapeless though, but it would be a bit of an overkill in this case. s""" |ScalastyleInspection( | clazz = "${inspection.clazz}", | id = "${inspection.id}", | label = "${inspection.label}", | description = "${inspection.description}", | extraDescription = $extraDescription, | justification = $justification, | defaultLevel = ${inspection.defaultLevel}, | params = ${params.toString.parse[Term].get.syntax} |) """.stripMargin } // Transform the template file. val term: Term = stringified.toString.parse[Term].get source.transform { case q"val AllInspections: $tpe = $expr" => q"val AllInspections: $tpe = $term" } } }
Example 36
Source File: WikipediaToDBpediaClosure.scala From dbpedia-spotlight-model with Apache License 2.0 | 5 votes |
package org.dbpedia.spotlight.db import org.semanticweb.yars.nx.parser.NxParser import java.io.InputStream import org.dbpedia.spotlight.log.SpotlightLog import collection.immutable.ListSet import scala.Predef._ import org.dbpedia.spotlight.exceptions.NotADBpediaResourceException import java.net.URLDecoder import org.dbpedia.spotlight.model.SpotlightConfiguration import org.dbpedia.extraction.util.WikiUtil import scala.collection.mutable.ListBuffer def wikipediaToDBpediaURI(url: String): String = { val uri = if(url.startsWith("http:")) { getEndOfChainURI(decodedNameFromURL(url)) } else { getEndOfChainURI(decodeURL(url)) } if (disambiguationsSet.contains(uri) || uri == null) throw new NotADBpediaResourceException("Resource is a disambiguation page.") else uri } def getEndOfChainURI(uri: String): String = getEndOfChainURI(uri, Set(uri)) private def getEndOfChainURI(uri: String, alreadyTraversed:Set[String]): String = linkMap.get(uri) match { case Some(s: String) => if (alreadyTraversed.contains(s)) uri else getEndOfChainURI(s, alreadyTraversed + s) case None => uri } }
Example 37
Source File: TokenOccurrenceSource.scala From dbpedia-spotlight-model with Apache License 2.0 | 5 votes |
package org.dbpedia.spotlight.db.io import java.io.{InputStream, FileInputStream, File} import io.Source import org.dbpedia.spotlight.db.WikipediaToDBpediaClosure import org.dbpedia.spotlight.db.model.{ResourceStore, TokenTypeStore} import org.dbpedia.spotlight.log.SpotlightLog import scala.Predef._ import scala.Array import org.dbpedia.spotlight.exceptions.{DBpediaResourceNotFoundException, NotADBpediaResourceException} import org.dbpedia.spotlight.model.{TokenType, DBpediaResource} import util.TokenOccurrenceParser object TokenOccurrenceSource { def fromPigInputStream(tokenInputStream: InputStream, tokenTypeStore: TokenTypeStore, wikipediaToDBpediaClosure: WikipediaToDBpediaClosure, resStore: ResourceStore): Iterator[Triple[DBpediaResource, Array[TokenType], Array[Int]]] = { var i = 0 plainTokenOccurrenceSource(tokenInputStream, 0) map { case (wikiurl: String, tokens: Array[String], counts: Array[Int]) => { i += 1 if (i % 10000 == 0) SpotlightLog.info(this.getClass, "Read context for %d resources...", i) try { Triple( resStore.getResourceByName(wikipediaToDBpediaClosure.wikipediaToDBpediaURI(wikiurl)), tokens.map{ token => tokenTypeStore.getTokenType(token) }, counts ) } catch { case e: DBpediaResourceNotFoundException => Triple(null, null, null) case e: NotADBpediaResourceException => Triple(null, null, null) } } } } def fromPigFile(tokenFile: File, tokenStore: TokenTypeStore, wikipediaToDBpediaClosure: WikipediaToDBpediaClosure, resStore: ResourceStore, minimumCount: Int) = fromPigInputStream(new FileInputStream(tokenFile), tokenStore, wikipediaToDBpediaClosure, resStore) val tokensParser = TokenOccurrenceParser.createDefault def plainTokenOccurrenceSource(tokenInputStream: InputStream, minimumCount: Int): Iterator[Triple[String, Array[String], Array[Int]]] = { Source.fromInputStream(tokenInputStream) getLines() filter(!_.equals("")) map { line: String => { val Array(wikiurl, tokens) = line.trim().split('\t') val Pair(tokensA, countsA) = tokensParser.parse(tokens, minimumCount) Triple(wikiurl, tokensA, countsA) } } } }
Example 38
Source File: TokenSource.scala From dbpedia-spotlight-model with Apache License 2.0 | 5 votes |
package org.dbpedia.spotlight.db.io import org.dbpedia.spotlight.io.OccurrenceSource import org.dbpedia.spotlight.db.model.{StringTokenizer, SurfaceFormStore} import collection.mutable.HashMap import scala.collection.JavaConversions._ import scala.collection.JavaConverters._ import java.io.{InputStream, FileInputStream, File} import org.dbpedia.spotlight.log.SpotlightLog import org.dbpedia.spotlight.model._ object TokenSource { private val ADDITIONAL_TOKEN_COUNT = 1 def fromSFStore(sfStore: SurfaceFormStore, tokenizer: StringTokenizer): Seq[String] = { SpotlightLog.info(this.getClass, "Adding all surface form tokens to the TokenStore...") sfStore.iterateSurfaceForms.grouped(100000).toList.par.flatMap(_.map{ sf: SurfaceForm => //Tokenize all SFs first tokenizer.tokenize(sf.name) }).seq.flatten } def fromPigFile(tokenFile: File, additionalTokens: Option[Seq[String]] = None, minimumCount: Int) = fromPigInputStream(new FileInputStream(tokenFile), additionalTokens, minimumCount) def fromPigInputStream(tokenFile: InputStream, additionalTokens: Option[Seq[String]] = None, minimumCount: Int) = { val tokenMap = HashMap[String, Int]() var i = 0 TokenOccurrenceSource.plainTokenOccurrenceSource(tokenFile, minimumCount) foreach { p: Triple[String, Array[String], Array[Int]] => { i += 1 if (i % 10000 == 0) SpotlightLog.info(this.getClass, "Read context for %d resources...", i) (0 to p._2.size -1).foreach { i: Int => tokenMap.put(p._2(i), tokenMap.getOrElse(p._2(i), 0) + p._3(i)) } } } additionalTokens match { case Some(tokens) => { SpotlightLog.info(this.getClass, "Read %d additional tokens...", tokens.size) tokens.foreach { token: String => tokenMap.put(token, tokenMap.getOrElse(token, 0) + ADDITIONAL_TOKEN_COUNT) } } case None => } var id = -1 tokenMap.map{ case(token, count) => { id += 1 (new TokenType(id, token, count), count) } }.toMap.asJava } }
Example 39
Source File: TypesLoader.scala From dbpedia-spotlight-model with Apache License 2.0 | 5 votes |
package org.dbpedia.spotlight.util import java.io.{File, InputStream} import java.util.LinkedHashSet import org.dbpedia.spotlight.log.SpotlightLog import org.dbpedia.spotlight.model._ import org.semanticweb.yars.nx.parser.NxParser import scala.collection.JavaConversions._ import scala.io.Source object TypesLoader { def getTypesMap(typeDictFile : File) : Map[String, List[OntologyType]] = { SpotlightLog.info(this.getClass, "Loading types map...") if (!(typeDictFile.getName.toLowerCase endsWith ".tsv")) throw new IllegalArgumentException("types mapping only accepted in tsv format so far! can't parse "+typeDictFile) // CAUTION: this assumes that the most specific type is listed last var typesMap = Map[String,List[OntologyType]]() for (line <- Source.fromFile(typeDictFile, "UTF-8").getLines) { val elements = line.split("\t") val uri = new DBpediaResource(elements(0)).uri val t = Factory.OntologyType.fromURI(elements(1)) val typesList : List[OntologyType] = typesMap.get(uri).getOrElse(List[OntologyType]()) ::: List(t) typesMap = typesMap.updated(uri, typesList) } SpotlightLog.info(this.getClass, "Done.") typesMap } def getTypesMapFromTSV_java(input: InputStream) : java.util.Map[String,java.util.LinkedHashSet[OntologyType]] = { SpotlightLog.info(this.getClass, "Loading types map...") var typesMap = Map[String,java.util.LinkedHashSet[OntologyType]]() var i = 0; for (line <- Source.fromInputStream(input, "UTF-8").getLines) { val elements = line.split("\t") val uri = new DBpediaResource(elements(0)).uri val typeUri = elements(1) if (!typeUri.equalsIgnoreCase("http://www.w3.org/2002/07/owl#Thing")) { val t = Factory.OntologyType.fromURI(typeUri) i = i + 1; val typesList : java.util.LinkedHashSet[OntologyType] = typesMap.getOrElse(uri,new LinkedHashSet[OntologyType]()) typesList.add(t) t match { case ft: FreebaseType => typesList.add(Factory.OntologyType.fromQName("Freebase:/"+ft.domain)) //Add supertype as well to mimic inference case _ => //nothing } typesMap = typesMap.updated(uri, typesList) } } SpotlightLog.info(this.getClass, "Done. Loaded %d types for %d resources.", i,typesMap.size) typesMap } def getTypesMap_java(instanceTypesStream : InputStream) : java.util.Map[String,java.util.LinkedHashSet[OntologyType]] = { SpotlightLog.info(this.getClass, "Loading types map...") var typesMap = Map[String,java.util.LinkedHashSet[OntologyType]]() var i = 0; // CAUTION: this assumes that the most specific type is listed last val parser = new NxParser(instanceTypesStream) while (parser.hasNext) { val triple = parser.next if(!triple(2).toString.endsWith("owl#Thing")) { i = i + 1; val resource = new DBpediaResource(triple(0).toString) val t = Factory.OntologyType.fromURI(triple(2).toString) val typesList : java.util.LinkedHashSet[OntologyType] = typesMap.get(resource.uri).getOrElse(new LinkedHashSet[OntologyType]()) typesList.add(t) typesMap = typesMap.updated(resource.uri, typesList) } } SpotlightLog.info(this.getClass, "Done. Loaded %d types.", i) typesMap } }
Example 40
Source File: BMLHelper.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.filesystem.bml import java.io.{ByteArrayInputStream, InputStream} import java.util import java.util.UUID import com.webank.wedatasphere.linkis.bml.client.{BmlClient, BmlClientFactory} import com.webank.wedatasphere.linkis.bml.protocol.{BmlDownloadResponse, BmlUpdateResponse, BmlUploadResponse} import com.webank.wedatasphere.linkis.filesystem.exception.WorkspaceExceptionManager import org.springframework.stereotype.Component import scala.collection.JavaConversions._ @Component class BMLHelper { def upload(userName: String, content: String, fileName: String): util.Map[String, Object] = { val inputStream = new ByteArrayInputStream(content.getBytes("utf-8")) val client: BmlClient = createBMLClient(userName) val resource: BmlUploadResponse = client.uploadResource(userName, fileName, inputStream) if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80021) val map = new util.HashMap[String, Object] map += "resourceId" -> resource.resourceId map += "version" -> resource.version } def upload(userName: String, inputStream: InputStream, fileName: String, projectName: String): util.Map[String, Object] = { val client: BmlClient = createBMLClient(userName) val resource: BmlUploadResponse = client.uploadResource(userName, fileName, inputStream) if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80021) val map = new util.HashMap[String, Object] map += "resourceId" -> resource.resourceId map += "version" -> resource.version } def upload(userName: String, inputStream: InputStream, fileName: String): util.Map[String, Object] = { val client: BmlClient = createBMLClient(userName) val resource: BmlUploadResponse = client.uploadResource(userName, fileName, inputStream) if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80021) val map = new util.HashMap[String, Object] map += "resourceId" -> resource.resourceId map += "version" -> resource.version } def update(userName: String, resourceId: String, inputStream: InputStream): util.Map[String, Object] = { val client: BmlClient = createBMLClient(userName) val resource: BmlUpdateResponse = client.updateResource(userName, resourceId, "", inputStream) if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80022) val map = new util.HashMap[String, Object] map += "resourceId" -> resource.resourceId map += "version" -> resource.version } def update(userName: String, resourceId: String, content: String): util.Map[String, Object] = { val inputStream = new ByteArrayInputStream(content.getBytes("utf-8")) val client: BmlClient = createBMLClient(userName) val resource: BmlUpdateResponse = client.updateResource(userName, resourceId, UUID.randomUUID().toString + ".json", inputStream) if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80022) val map = new util.HashMap[String, Object] map += "resourceId" -> resource.resourceId map += "version" -> resource.version } def query(userName: String, resourceId: String, version: String): util.Map[String, Object] = { val client: BmlClient = createBMLClient(userName) var resource: BmlDownloadResponse = null if (version == null) resource = client.downloadResource(userName, resourceId, null) else resource = client.downloadResource(userName, resourceId, version) if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80023) val map = new util.HashMap[String, Object] map += "path" -> resource.fullFilePath map += "stream" -> resource.inputStream } private def inputstremToString(inputStream: InputStream): String = scala.io.Source.fromInputStream(inputStream).mkString private def createBMLClient(userName: String): BmlClient = if (userName == null) BmlClientFactory.createBmlClient() else BmlClientFactory.createBmlClient(userName) }
Example 41
Source File: QueryUtils.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.jobhistory.util import java.io.{InputStream, OutputStream} import java.util.Date import com.webank.wedatasphere.linkis.common.conf.CommonVars import com.webank.wedatasphere.linkis.common.io.FsPath import com.webank.wedatasphere.linkis.common.utils.{Logging, Utils} import com.webank.wedatasphere.linkis.jobhistory.entity.QueryTask import com.webank.wedatasphere.linkis.protocol.query.RequestInsertTask import com.webank.wedatasphere.linkis.storage.FSFactory import com.webank.wedatasphere.linkis.storage.fs.FileSystem import com.webank.wedatasphere.linkis.storage.utils.{FileSystemUtils, StorageUtils} import org.apache.commons.io.IOUtils import org.apache.commons.lang.time.DateFormatUtils object QueryUtils extends Logging { private val CODE_STORE_PREFIX = CommonVars("bdp.dataworkcloud.query.store.prefix", "hdfs:///tmp/bdp-ide/") private val CODE_STORE_SUFFIX = CommonVars("bdp.dataworkcloud.query.store.suffix", "") private val CHARSET = "utf-8" private val CODE_SPLIT = ";" private val LENGTH_SPLIT = "#" def storeExecutionCode(requestInsertTask: RequestInsertTask): Unit = { if (requestInsertTask.getExecutionCode.length < 60000) return val user: String = requestInsertTask.getUmUser val path: String = getCodeStorePath(user) val fsPath: FsPath = new FsPath(path) val fileSystem = FSFactory.getFsByProxyUser(fsPath, user).asInstanceOf[FileSystem] fileSystem.init(null) var os: OutputStream = null var position = 0L val codeBytes = requestInsertTask.getExecutionCode.getBytes(CHARSET) path.intern() synchronized { Utils.tryFinally { if (!fileSystem.exists(fsPath)) FileSystemUtils.createNewFile(fsPath, user, true) os = fileSystem.write(fsPath, false) position = fileSystem.get(path).getLength IOUtils.write(codeBytes, os) } { IOUtils.closeQuietly(os) if (fileSystem != null) fileSystem.close() } } val length = codeBytes.length requestInsertTask.setExecutionCode(path + CODE_SPLIT + position + LENGTH_SPLIT + length) } def exchangeExecutionCode(queryTask: QueryTask): Unit = { import scala.util.control.Breaks._ if (queryTask.getExecutionCode == null || !queryTask.getExecutionCode.startsWith(StorageUtils.HDFS_SCHEMA)) return val codePath = queryTask.getExecutionCode val path = codePath.substring(0, codePath.lastIndexOf(CODE_SPLIT)) val codeInfo = codePath.substring(codePath.lastIndexOf(CODE_SPLIT) + 1) val infos: Array[String] = codeInfo.split(LENGTH_SPLIT) val position = infos(0).toInt var lengthLeft = infos(1).toInt val tub = new Array[Byte](1024) val executionCode: StringBuilder = new StringBuilder val fsPath: FsPath = new FsPath(path) val fileSystem = FSFactory.getFsByProxyUser(fsPath, queryTask.getUmUser).asInstanceOf[FileSystem] fileSystem.init(null) var is: InputStream = null if (!fileSystem.exists(fsPath)) return Utils.tryFinally { is = fileSystem.read(fsPath) if (position > 0) is.skip(position) breakable { while (lengthLeft > 0) { val readed = is.read(tub) val useful = Math.min(readed, lengthLeft) if (useful < 0) break() lengthLeft -= useful executionCode.append(new String(tub, 0, useful, CHARSET)) } } } { if (fileSystem != null) fileSystem.close() IOUtils.closeQuietly(is) } queryTask.setExecutionCode(executionCode.toString()) } private def getCodeStorePath(user: String): String = { val date: String = DateFormatUtils.format(new Date, "yyyyMMdd") s"${CODE_STORE_PREFIX.getValue}${user}${CODE_STORE_SUFFIX.getValue}/executionCode/${date}/_scripts" } }
Example 42
Source File: StorageScriptFsWriter.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.storage.script.writer import java.io.{ByteArrayInputStream, IOException, InputStream, OutputStream} import java.util import com.webank.wedatasphere.linkis.common.io.{FsPath, MetaData, Record} import com.webank.wedatasphere.linkis.storage.LineRecord import com.webank.wedatasphere.linkis.storage.script.{Compaction, ScriptFsWriter, ScriptMetaData} import com.webank.wedatasphere.linkis.storage.utils.{StorageConfiguration, StorageUtils} import org.apache.commons.io.IOUtils class StorageScriptFsWriter(val path: FsPath, val charset: String, outputStream: OutputStream = null) extends ScriptFsWriter { private val stringBuilder = new StringBuilder @scala.throws[IOException] override def addMetaData(metaData: MetaData): Unit = { val compactions = Compaction.listCompactions().filter(p => p.belongTo(StorageUtils.pathToSuffix(path.getPath))) val metadataLine = new util.ArrayList[String]() if (compactions.length > 0) { metaData.asInstanceOf[ScriptMetaData].getMetaData.map(compactions(0).compact).foreach(metadataLine.add) if (outputStream != null) { IOUtils.writeLines(metadataLine, "\n", outputStream, charset) } else { import scala.collection.JavaConversions._ metadataLine.foreach(m => stringBuilder.append(s"$m\n")) } } } @scala.throws[IOException] override def addRecord(record: Record): Unit = { //转成LineRecord而不是TableRecord是为了兼容非Table类型的结果集写到本类中 val scriptRecord = record.asInstanceOf[LineRecord] if (outputStream != null) { IOUtils.write(scriptRecord.getLine, outputStream, charset) } else { stringBuilder.append(scriptRecord.getLine) } } override def close(): Unit = { IOUtils.closeQuietly(outputStream) } override def flush(): Unit = if (outputStream != null) outputStream.flush() def getInputStream(): InputStream = { new ByteArrayInputStream(stringBuilder.toString().getBytes(StorageConfiguration.STORAGE_RS_FILE_TYPE.getValue)) } }
Example 43
Source File: StorageResultSetReader.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.storage.resultset import java.io.{ByteArrayInputStream, IOException, InputStream} import com.webank.wedatasphere.linkis.common.io.resultset.{ResultSet, ResultSetReader} import com.webank.wedatasphere.linkis.common.io.{MetaData, Record} import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.storage.domain.Dolphin import com.webank.wedatasphere.linkis.storage.exception.StorageWarnException import com.webank.wedatasphere.linkis.storage.utils.StorageUtils import scala.collection.mutable.ArrayBuffer def readLine(): Array[Byte] = { var rowLen = 0 try rowLen = Dolphin.readInt(inputStream) catch { case t:StorageWarnException => info(s"Read finished(读取完毕)") ; return null case t: Throwable => throw t } val rowBuffer = ArrayBuffer[Byte]() var len = 0 //Read the entire line, except for the data of the line length(读取整行,除了行长的数据) while (rowLen > 0 && len >= 0) { if (rowLen > READ_CACHE) len = StorageUtils.readBytes(inputStream,bytes, READ_CACHE) else len = StorageUtils.readBytes(inputStream,bytes, rowLen) if (len > 0) { rowLen -= len rowBuffer ++= bytes.slice(0, len) } } rowCount = rowCount + 1 rowBuffer.toArray } @scala.throws[IOException] override def getRecord: Record = { if (metaData == null) throw new IOException("Must read metadata first(必须先读取metadata)") if (row == null) throw new IOException("Can't get the value of the field, maybe the IO stream has been read or has been closed!(拿不到字段的值,也许IO流已读取完毕或已被关闭!)") row } @scala.throws[IOException] override def getMetaData: MetaData = { if(metaData == null) init() metaData = deserializer.createMetaData(readLine()) metaData } @scala.throws[IOException] override def skip(recordNum: Int): Int = { if(recordNum < 0 ) return -1 if(metaData == null) getMetaData for(i <- recordNum until (0, -1)){ try inputStream.skip(Dolphin.readInt(inputStream)) catch { case t: Throwable => return -1} } recordNum } @scala.throws[IOException] override def getPosition: Long = rowCount @scala.throws[IOException] override def hasNext: Boolean = { if(metaData == null) getMetaData val line = readLine() if(line == null) return false row = deserializer.createRecord(line) if(row == null) return false true } @scala.throws[IOException] override def available: Long = inputStream.available() override def close(): Unit = inputStream.close() }
Example 44
Source File: ResultSetReader.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.storage.resultset import java.io.InputStream import com.webank.wedatasphere.linkis.common.io.resultset.{ResultSet, ResultSetReader} import com.webank.wedatasphere.linkis.common.io.{FsPath, MetaData, Record} import com.webank.wedatasphere.linkis.storage.FSFactory import com.webank.wedatasphere.linkis.storage.exception.StorageErrorException import com.webank.wedatasphere.linkis.storage.resultset.table.{TableMetaData, TableRecord, TableResultSet} object ResultSetReader { def getResultSetReader[K <: MetaData, V <: Record](resultSet: ResultSet[K, V], inputStream: InputStream): ResultSetReader[K, V] = { new StorageResultSetReader[K, V](resultSet, inputStream) } def getResultSetReader[K <: MetaData, V <: Record](resultSet: ResultSet[K, V], value: String): ResultSetReader[K, V] = { new StorageResultSetReader[K, V](resultSet, value) } def getResultSetReader(res: String):ResultSetReader[_ <: MetaData, _ <: Record]= { val rsFactory = ResultSetFactory.getInstance if (rsFactory.isResultSet(res)) { val resultSet = rsFactory.getResultSet(res) ResultSetReader.getResultSetReader(resultSet, res) }else { val resPath = new FsPath(res) val resultSet = rsFactory.getResultSetByPath(resPath) val fs = FSFactory.getFs(resPath) fs.init(null) ResultSetReader.getResultSetReader(resultSet, fs.read(resPath)) } } def getTableResultReader(res: String):ResultSetReader[TableMetaData,TableRecord] = { val rsFactory = ResultSetFactory.getInstance if (rsFactory.isResultSet(res)) { val resultSet = rsFactory.getResultSet(res) if (ResultSetFactory.TABLE_TYPE != resultSet.resultSetType()) { throw new StorageErrorException(52002, "Result sets that are not tables are not supported(不支持不是表格的结果集)") } ResultSetReader.getResultSetReader(resultSet.asInstanceOf[TableResultSet], res) }else { val resPath = new FsPath(res) val resultSet = rsFactory.getResultSetByPath(resPath) if (ResultSetFactory.TABLE_TYPE != resultSet.resultSetType()) { throw new StorageErrorException(52002, "Result sets that are not tables are not supported(不支持不是表格的结果集)") } val fs = FSFactory.getFs(resPath) fs.init(null) ResultSetReader.getResultSetReader(resultSet.asInstanceOf[TableResultSet], fs.read(resPath)) } } }
Example 45
Source File: Dolphin.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.storage.domain import java.io.{IOException, InputStream} import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.storage.exception.StorageWarnException import com.webank.wedatasphere.linkis.storage.utils.{StorageConfiguration, StorageUtils} def getIntBytes(value: Int): Array[Byte] = { val str = value.toString val res = "0" * (INT_LEN - str.length) + str Dolphin.getBytes(res) } def getType(inputStream:InputStream):String = { val bytes = new Array[Byte](100) val len = StorageUtils.readBytes(inputStream,bytes, Dolphin.MAGIC_LEN + INT_LEN) if(len == -1) return null getType(Dolphin.getString(bytes, 0, len)) } def getType(content: String): String = { if(content.length < MAGIC.length || content.substring(0, MAGIC.length) != MAGIC) throw new IOException(s"File header type must be dolphin,content:$content is not") content.substring(MAGIC.length, MAGIC.length + INT_LEN ).toInt.toString } }
Example 46
Source File: Main.scala From seals with Apache License 2.0 | 5 votes |
package com.example.streaming import java.io.{ InputStream, OutputStream, FileInputStream, FileOutputStream } import cats.implicits._ import cats.effect.{ IO, IOApp, Blocker, ExitCode } import fs2.{ Stream, Chunk, Pure } import dev.tauri.seals.scodec.StreamCodecs._ object Main extends IOApp { sealed trait Color final case object Brown extends Color final case object Grey extends Color sealed trait Animal final case class Elephant(name: String, tuskLength: Float) extends Animal final case class Quokka(name: String, color: Color = Brown) extends Animal final case class Quagga(name: String, speed: Double) extends Animal def transform(from: InputStream, to: OutputStream)(f: Animal => Stream[Pure, Animal]): IO[Unit] = { Blocker[IO].use { blocker => val input = fs2.io.readInputStream( IO.pure(from), chunkSize = 1024, blocker = blocker ) val sIn: Stream[IO, Animal] = input.through(streamDecoderFromReified[Animal].toPipeByte[IO]).flatMap(f) val sOut: Stream[IO, Unit] = streamEncoderFromReified[Animal].encode(sIn).flatMap { bv => Stream.chunk(Chunk.bytes(bv.bytes.toArray)) }.through(fs2.io.writeOutputStream( IO.pure(to), blocker = blocker, closeAfterUse = true )) sOut.compile.drain } } val transformer: Animal => Stream[Pure, Animal] = { case Elephant(n, tl) => Stream(Elephant(n, tl + 17)) case Quokka(n, Brown) => Stream(Quokka(n, Grey)) case q @ Quokka(_, _) => Stream(q) case Quagga(_, _) => Stream.empty } override def run(args: List[String]): IO[ExitCode] = { val (from, to) = args match { case List(from, to, _*) => (from, to) case List(from) => (from, "out.bin") case _ => ("in.bin", "out.bin") } val task = transform(new FileInputStream(from), new FileOutputStream(to))(transformer) task.as(ExitCode.Success) } }
Example 47
Source File: SparkTest.scala From Spark-Scala-Maven-Example with MIT License | 5 votes |
package net.martinprobson.spark import java.io.InputStream import grizzled.slf4j.Logging import org.apache.spark.sql.SparkSession import org.scalatest.{Outcome, fixture} class SparkTest extends fixture.FunSuite with Logging { type FixtureParam = SparkSession def withFixture(test: OneArgTest): Outcome = { val sparkSession = SparkSession.builder .appName("Test-Spark-Local") .master("local[2]") .getOrCreate() try { withFixture(test.toNoArgTest(sparkSession)) } finally sparkSession.stop } test("empsRDD rowcount") { spark => val empsRDD = spark.sparkContext.parallelize(getInputData("/data/employees.json"), 5) assert(empsRDD.count === 1000) } test("titlesRDD rowcount") { spark => val titlesRDD = spark.sparkContext.parallelize(getInputData("/data/titles.json"), 5) assert(titlesRDD.count === 1470) } private def getInputData(name: String): Seq[String] = { val is: InputStream = getClass.getResourceAsStream(name) scala.io.Source.fromInputStream(is).getLines.toSeq } }
Example 48
Source File: FileStreamRecordReader.scala From geotrellis-pointcloud with Apache License 2.0 | 5 votes |
package geotrellis.pointcloud.spark.store.hadoop.formats import org.apache.hadoop.fs._ import org.apache.hadoop.io.compress.CompressionCodecFactory import org.apache.hadoop.mapreduce._ import org.apache.hadoop.mapreduce.lib.input._ import java.io.InputStream class FileStreamRecordReader[K, V](read: InputStream => (K, V)) extends RecordReader[K, V] { private var tup: (K, V) = null private var hasNext: Boolean = true def initialize(split: InputSplit, context: TaskAttemptContext) = { val path = split.asInstanceOf[FileSplit].getPath() val conf = context.getConfiguration() val fs = path.getFileSystem(conf) val is: InputStream = { val factory = new CompressionCodecFactory(conf) val codec = factory.getCodec(path) if (codec == null) fs.open(path) else codec.createInputStream(fs.open(path)) } tup = read(is) } def close = {} def getCurrentKey = tup._1 def getCurrentValue = { hasNext = false ; tup._2 } def getProgress = 1 def nextKeyValue = hasNext } trait FileStreamInputFormat[K, V] extends FileInputFormat[K, V] { def read(is: InputStream, context: TaskAttemptContext): (K, V) override def isSplitable(context: JobContext, fileName: Path) = false override def createRecordReader(split: InputSplit, context: TaskAttemptContext): RecordReader[K, V] = new FileStreamRecordReader({ is => read(is, context) }) }
Example 49
Source File: S3PointCloudInputFormat.scala From geotrellis-pointcloud with Apache License 2.0 | 5 votes |
package geotrellis.pointcloud.spark.store.s3 import geotrellis.spark.store.s3._ import geotrellis.pointcloud.spark.store.hadoop.formats._ import geotrellis.pointcloud.util.Filesystem import io.pdal._ import io.circe.Json import io.circe.syntax._ import cats.syntax.either._ import org.apache.hadoop.mapreduce.{InputSplit, TaskAttemptContext} import org.apache.commons.io.FileUtils import java.io.{File, InputStream} import java.net.URI import scala.collection.JavaConverters._ mode match { case "s3" => new S3URIRecordReader[S3PointCloudHeader, List[PointCloud]](s3Client) { def read(key: String, uri: URI): (S3PointCloudHeader, List[PointCloud]) = { val s3Pipeline = pipeline .hcursor .downField("pipeline").downArray .downField("filename").withFocus(_ => uri.toString.asJson) .top.fold(pipeline)(identity) executePipeline(context)(key, s3Pipeline) } } case _ => val tmpDir = { val dir = PointCloudInputFormat.getTmpDir(context) if (dir == null) Filesystem.createDirectory() else Filesystem.createDirectory(dir) } new S3StreamRecordReader[S3PointCloudHeader, List[PointCloud]](s3Client) { def read(key: String, is: InputStream): (S3PointCloudHeader, List[PointCloud]) = { // copy remote file into local tmp dir tmpDir.mkdirs() // to be sure that dirs created val localPath = new File(tmpDir, key.replace("/", "_")) FileUtils.copyInputStreamToFile(is, localPath) is.close() // use local filename path if it's present in json val localPipeline = pipeline .hcursor .downField("pipeline").downArray .downField("filename").withFocus(_ => localPath.getAbsolutePath.asJson) .top.fold(pipeline)(identity) try executePipeline(context)(key, localPipeline) finally { localPath.delete() tmpDir.delete() } } } } } }
Example 50
Source File: CsvPublisher.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.csv import java.io.InputStream import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong} import com.sksamuel.exts.Logging import com.sksamuel.exts.io.Using import com.univocity.parsers.csv.CsvParser import io.eels.Row import io.eels.datastream.{DataStream, Publisher, Subscriber, Subscription} import io.eels.schema.StructType class CsvPublisher(createParser: () => CsvParser, inputFn: () => InputStream, header: Header, schema: StructType) extends Publisher[Seq[Row]] with Logging with Using { val rowsToSkip: Int = header match { case Header.FirstRow => 1 case _ => 0 } override def subscribe(subscriber: Subscriber[Seq[Row]]): Unit = { val input = inputFn() val parser = createParser() try { parser.beginParsing(input) val running = new AtomicBoolean(true) subscriber.subscribed(Subscription.fromRunning(running)) logger.debug(s"CSV Source will skip $rowsToSkip rows") val count = new AtomicLong(0) Iterator.continually(parser.parseNext) .takeWhile(_ != null) .takeWhile(_ => running.get) .drop(rowsToSkip) .map { record => Row(schema, record.toVector) } .grouped(DataStream.DefaultBatchSize) .foreach { ts => count.addAndGet(ts.size) subscriber.next(ts) } logger.debug(s"All ${count.get} rows read, notifying subscriber") subscriber.completed() } catch { case t: Throwable => logger.error(s"Error in CSV Source, subscriber will be notified", t) subscriber.error(t) } finally { logger.debug("Closing CSV source resources") parser.stopParsing() } } }
Example 51
Source File: ZincAnalysisParserTest.scala From exodus with MIT License | 5 votes |
package com.wix.build.zinc.analysis import java.io.InputStream import java.nio.charset.StandardCharsets import java.nio.file.{Files, Paths} import java.util.UUID import com.github.marschall.memoryfilesystem.MemoryFileSystemBuilder import com.wixpress.build.maven.Coordinates import org.specs2.mutable.SpecificationWithJUnit import org.specs2.specification.Scope class ZincAnalysisParserTest extends SpecificationWithJUnit { "ZincAnalysisParser" should { "parse repo with zinc analysis" in new baseCtx { private val parser = new ZincAnalysisParser(repoRoot) private val coordinatesToAnalyses: Map[Coordinates, List[ZincModuleAnalysis]] = parser.readModules() coordinatesToAnalyses must haveLength(greaterThan(0)) private val analysisList: List[ZincModuleAnalysis] = coordinatesToAnalyses.head._2 analysisList must haveLength(greaterThan(0)) } } abstract class baseCtx extends Scope { val fileSystem = MemoryFileSystemBuilder.newLinux().build() val repoRoot = fileSystem.getPath("repoRoot") Files.createDirectories(repoRoot) writeResourceAsFileToPath("/pom.xml", "pom.xml", "java-junit-sample/") writeResourceAsFileToPath("/aggregate-pom.xml", "pom.xml", "") writeResourceAsFileToPath("/compile.relations", "compile.relations","java-junit-sample/target/analysis/") writeResourceAsFileToPath("/test-compile.relations", "test-compile.relations","java-junit-sample/target/analysis/") private def writeResourceAsFileToPath(resource: String, fileName: String, path: String) = { if (path.nonEmpty) Files.createDirectories(repoRoot.resolve(path)) val stream: InputStream = getClass.getResourceAsStream(s"$resource") val compileRelations = scala.io.Source.fromInputStream(stream).mkString Files.write(repoRoot.resolve(s"$path$fileName"), compileRelations.getBytes(StandardCharsets.UTF_8)) } def path(withName: String) = repoRoot.resolve(withName) def random = UUID.randomUUID().toString } }
Example 52
Source File: PredefinedTag.scala From smui with Apache License 2.0 | 5 votes |
package models import java.io.InputStream import java.sql.Connection import play.api.Logger import play.api.libs.json.{Json, OFormat} case class PredefinedTag(property: Option[String], value: String, solrIndexName: Option[String], exported: Option[Boolean]) { } object PredefinedTag { val logger = Logger(getClass) implicit val jsonFormat: OFormat[PredefinedTag] = Json.format[PredefinedTag] def fromStream(stream: InputStream): Seq[PredefinedTag] = { try { Json.parse(stream).as[Seq[PredefinedTag]] } finally { stream.close() } } def updateInDB(predefinedTags: Seq[PredefinedTag])(implicit connection: Connection): (Seq[InputTagId], Seq[InputTag]) = { val indexIdsByName = SolrIndex.listAll.map(i => i.name -> i.id).toMap val tagsInDBByContent = InputTag.loadAll().map(t => t.tagContent -> t).toMap val newTags = predefinedTags.map { tag => TagContent(tag.solrIndexName.flatMap(indexIdsByName.get), tag.property, tag.value) -> tag }.toMap val toDelete = tagsInDBByContent.filter { case (content, tag) => tag.predefined && !newTags.contains(content) }.map(_._2.id).toSeq val toInsert = newTags.filter(t => !tagsInDBByContent.contains(t._1)).map { case (tc, t) => InputTag.create(tc.solrIndexId, t.property, t.value, t.exported.getOrElse(true), predefined = true) }.toSeq InputTag.insert(toInsert: _*) InputTag.deleteByIds(toDelete) if (toDelete.nonEmpty || toInsert.nonEmpty) { logger.info(s"Inserted ${toInsert.size} new predefined tags into the DB and deleted ${toDelete.size} no longer existing predefined tags.") } (toDelete, toInsert) } }
Example 53
Source File: SchemaReader.scala From darwin with Apache License 2.0 | 5 votes |
package it.agilelab.darwin.app.mock import java.io.{File, InputStream} import org.apache.avro.Schema object SchemaReader { def readFromResources(p: String): Schema = { read(getClass.getClassLoader.getResourceAsStream(p)) } def read(f: File): Schema = { val parser = new Schema.Parser() parser.parse(f) } def read(s: String): Schema = { val parser = new Schema.Parser() parser.parse(s) } def read(is: InputStream): Schema = { val parser = new Schema.Parser() parser.parse(is) } }
Example 54
Source File: JsonProtocol.scala From darwin with Apache License 2.0 | 5 votes |
package it.agilelab.darwin.connector.rest import java.io.InputStream import org.apache.avro.Schema import org.codehaus.jackson.map.ObjectMapper import org.codehaus.jackson.node.JsonNodeFactory import it.agilelab.darwin.common.compat._ trait JsonProtocol { val objectMapper = new ObjectMapper() def toJson(schemas : Seq[(Long,Schema)]): String = { val data = schemas.map { case (_, schema) => objectMapper.readTree(schema.toString) }.foldLeft(JsonNodeFactory.instance.arrayNode()) { case (array, node) => array.add(node) array } objectMapper.writeValueAsString(data) } def toSeqOfIdSchema(in: InputStream): Seq[(Long, Schema)] = { val node = objectMapper.readTree(in) node.getElements.toScala.map { node => val id = node.get("id").asText().toLong val schemaNode = node.get("schema") val schemaToString = objectMapper.writeValueAsString(schemaNode) val parser = new Schema.Parser() val schema = parser.parse(schemaToString) (id, schema) }.toVector } def toSchema(in: InputStream): Schema = { val parser = new Schema.Parser() parser.parse(in) } }
Example 55
Source File: ImageLoaderUtils.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.loaders import java.awt.image.BufferedImage import java.io.{InputStream, ByteArrayInputStream} import java.net.URI import java.util.zip.GZIPInputStream import javax.imageio.ImageIO import keystoneml.loaders.VOCLoader._ import org.apache.commons.compress.archivers.ArchiveStreamFactory import org.apache.commons.compress.archivers.tar.TarArchiveInputStream import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import keystoneml.pipelines.Logging import keystoneml.utils._ import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag object ImageLoaderUtils extends Logging { def loadFiles[L, I <: AbstractLabeledImage[L] : ClassTag]( filePathsRDD: RDD[URI], labelsMap: String => L, imageBuilder: (Image, L, Option[String]) => I, // TODO(etrain): We can probably do this with implicits. namePrefix: Option[String] = None): RDD[I] = { filePathsRDD.flatMap(fileUri => loadFile(fileUri, labelsMap, imageBuilder, namePrefix)) } private def loadFile[L, I <: AbstractLabeledImage[L]]( fileUri: URI, labelsMap: String => L, imageBuilder: (Image, L, Option[String]) => I, namePrefix: Option[String]): Iterator[I] = { val filePath = new Path(fileUri) val conf = new Configuration(true) val fs = FileSystem.get(filePath.toUri(), conf) val fStream = fs.open(filePath) val tarStream = new ArchiveStreamFactory().createArchiveInputStream( "tar", fStream).asInstanceOf[TarArchiveInputStream] var entry = tarStream.getNextTarEntry() val imgs = new ArrayBuffer[I] while (entry != null) { if (!entry.isDirectory && (namePrefix.isEmpty || entry.getName.startsWith(namePrefix.get))) { var offset = 0 var ret = 0 val content = new Array[Byte](entry.getSize().toInt) while (ret >= 0 && offset != entry.getSize()) { ret = tarStream.read(content, offset, content.length - offset) if (ret >= 0) { offset += ret } } val bais = new ByteArrayInputStream(content) val image = ImageUtils.loadImage(bais).map { img => imageBuilder(img, labelsMap(entry.getName), Some(entry.getName)) } imgs ++= image } entry = tarStream.getNextTarEntry() } imgs.iterator } }
Example 56
Source File: JsonConverters.scala From mango with Apache License 2.0 | 5 votes |
package com.kakao.mango.json import java.io.InputStream import com.kakao.shaded.jackson.core.{JsonParser, JsonToken} import com.kakao.shaded.jackson.databind.{DeserializationFeature, ObjectMapper, ObjectWriter} import com.kakao.shaded.jackson.module.afterburner.AfterburnerModule import com.kakao.shaded.jackson.module.scala.DefaultScalaModule import scala.language.implicitConversions import scala.reflect._ def streamJson(parser: JsonParser): JsonIterator = { val accessor = JsonTokenAccessor(parser) new Iterator[(JsonToken, JsonTokenAccessor)] { override def hasNext: Boolean = !parser.isClosed override def next(): (JsonToken, JsonTokenAccessor) = { val token = parser.nextToken() if (token == null) parser.close() (token, accessor) } } } }
Example 57
Source File: CreateJacksonParser.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.json import java.io.{ByteArrayInputStream, InputStream, InputStreamReader} import java.nio.channels.Channels import java.nio.charset.Charset import com.fasterxml.jackson.core.{JsonFactory, JsonParser} import org.apache.hadoop.io.Text import sun.nio.cs.StreamDecoder import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.unsafe.types.UTF8String private[sql] object CreateJacksonParser extends Serializable { def string(jsonFactory: JsonFactory, record: String): JsonParser = { jsonFactory.createParser(record) } def utf8String(jsonFactory: JsonFactory, record: UTF8String): JsonParser = { val bb = record.getByteBuffer assert(bb.hasArray) val bain = new ByteArrayInputStream( bb.array(), bb.arrayOffset() + bb.position(), bb.remaining()) jsonFactory.createParser(new InputStreamReader(bain, "UTF-8")) } def text(jsonFactory: JsonFactory, record: Text): JsonParser = { jsonFactory.createParser(record.getBytes, 0, record.getLength) } // Jackson parsers can be ranked according to their performance: // 1. Array based with actual encoding UTF-8 in the array. This is the fastest parser // but it doesn't allow to set encoding explicitly. Actual encoding is detected automatically // by checking leading bytes of the array. // 2. InputStream based with actual encoding UTF-8 in the stream. Encoding is detected // automatically by analyzing first bytes of the input stream. // 3. Reader based parser. This is the slowest parser used here but it allows to create // a reader with specific encoding. // The method creates a reader for an array with given encoding and sets size of internal // decoding buffer according to size of input array. private def getStreamDecoder(enc: String, in: Array[Byte], length: Int): StreamDecoder = { val bais = new ByteArrayInputStream(in, 0, length) val byteChannel = Channels.newChannel(bais) val decodingBufferSize = Math.min(length, 8192) val decoder = Charset.forName(enc).newDecoder() StreamDecoder.forDecoder(byteChannel, decoder, decodingBufferSize) } def text(enc: String, jsonFactory: JsonFactory, record: Text): JsonParser = { val sd = getStreamDecoder(enc, record.getBytes, record.getLength) jsonFactory.createParser(sd) } def inputStream(jsonFactory: JsonFactory, is: InputStream): JsonParser = { jsonFactory.createParser(is) } def inputStream(enc: String, jsonFactory: JsonFactory, is: InputStream): JsonParser = { jsonFactory.createParser(new InputStreamReader(is, enc)) } def internalRow(jsonFactory: JsonFactory, row: InternalRow): JsonParser = { val ba = row.getBinary(0) jsonFactory.createParser(ba, 0, ba.length) } def internalRow(enc: String, jsonFactory: JsonFactory, row: InternalRow): JsonParser = { val binary = row.getBinary(0) val sd = getStreamDecoder(enc, binary, binary.length) jsonFactory.createParser(sd) } }
Example 58
Source File: PythonSQLUtils.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.api.python import java.io.InputStream import java.nio.channels.Channels import org.apache.spark.api.java.JavaRDD import org.apache.spark.api.python.PythonRDDServer import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.catalyst.expressions.ExpressionInfo import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.execution.arrow.ArrowConverters import org.apache.spark.sql.types.DataType private[sql] object PythonSQLUtils { def parseDataType(typeText: String): DataType = CatalystSqlParser.parseDataType(typeText) // This is needed when generating SQL documentation for built-in functions. def listBuiltinFunctionInfos(): Array[ExpressionInfo] = { FunctionRegistry.functionSet.flatMap(f => FunctionRegistry.builtin.lookupFunction(f)).toArray } private[sql] class ArrowRDDServer(sqlContext: SQLContext) extends PythonRDDServer { override protected def streamToRDD(input: InputStream): RDD[Array[Byte]] = { // Create array to consume iterator so that we can safely close the inputStream val batches = ArrowConverters.getBatchesFromStream(Channels.newChannel(input)).toArray // Parallelize the record batches to create an RDD JavaRDD.fromRDD(sqlContext.sparkContext.parallelize(batches, batches.length)) } }
Example 59
Source File: CodecStreams.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources import java.io.{InputStream, OutputStream, OutputStreamWriter} import java.nio.charset.{Charset, StandardCharsets} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.hadoop.io.compress._ import org.apache.hadoop.mapreduce.JobContext import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat import org.apache.hadoop.util.ReflectionUtils import org.apache.spark.TaskContext object CodecStreams { private def getDecompressionCodec(config: Configuration, file: Path): Option[CompressionCodec] = { val compressionCodecs = new CompressionCodecFactory(config) Option(compressionCodecs.getCodec(file)) } def createInputStream(config: Configuration, file: Path): InputStream = { val fs = file.getFileSystem(config) val inputStream: InputStream = fs.open(file) getDecompressionCodec(config, file) .map(codec => codec.createInputStream(inputStream)) .getOrElse(inputStream) } def getCompressionExtension(context: JobContext): String = { getCompressionCodec(context) .map(_.getDefaultExtension) .getOrElse("") } }
Example 60
Source File: OffsetSeqLog.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import java.io.{InputStream, OutputStream} import java.nio.charset.StandardCharsets._ import scala.io.{Source => IOSource} import org.apache.spark.sql.SparkSession class OffsetSeqLog(sparkSession: SparkSession, path: String) extends HDFSMetadataLog[OffsetSeq](sparkSession, path) { override protected def deserialize(in: InputStream): OffsetSeq = { // called inside a try-finally where the underlying stream is closed in the caller def parseOffset(value: String): Offset = value match { case OffsetSeqLog.SERIALIZED_VOID_OFFSET => null case json => SerializedOffset(json) } val lines = IOSource.fromInputStream(in, UTF_8.name()).getLines() if (!lines.hasNext) { throw new IllegalStateException("Incomplete log file") } val version = parseVersion(lines.next(), OffsetSeqLog.VERSION) // read metadata val metadata = lines.next().trim match { case "" => None case md => Some(md) } OffsetSeq.fill(metadata, lines.map(parseOffset).toArray: _*) } override protected def serialize(offsetSeq: OffsetSeq, out: OutputStream): Unit = { // called inside a try-finally where the underlying stream is closed in the caller out.write(("v" + OffsetSeqLog.VERSION).getBytes(UTF_8)) // write metadata out.write('\n') out.write(offsetSeq.metadata.map(_.json).getOrElse("").getBytes(UTF_8)) // write offsets, one per line offsetSeq.offsets.map(_.map(_.json)).foreach { offset => out.write('\n') offset match { case Some(json: String) => out.write(json.getBytes(UTF_8)) case None => out.write(OffsetSeqLog.SERIALIZED_VOID_OFFSET.getBytes(UTF_8)) } } } } object OffsetSeqLog { private[streaming] val VERSION = 1 private val SERIALIZED_VOID_OFFSET = "-" }
Example 61
Source File: CommitLog.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import java.io.{InputStream, OutputStream} import java.nio.charset.StandardCharsets._ import scala.io.{Source => IOSource} import org.json4s.NoTypeHints import org.json4s.jackson.Serialization import org.apache.spark.sql.SparkSession class CommitLog(sparkSession: SparkSession, path: String) extends HDFSMetadataLog[CommitMetadata](sparkSession, path) { import CommitLog._ override protected def deserialize(in: InputStream): CommitMetadata = { // called inside a try-finally where the underlying stream is closed in the caller val lines = IOSource.fromInputStream(in, UTF_8.name()).getLines() if (!lines.hasNext) { throw new IllegalStateException("Incomplete log file in the offset commit log") } parseVersion(lines.next.trim, VERSION) val metadataJson = if (lines.hasNext) lines.next else EMPTY_JSON CommitMetadata(metadataJson) } override protected def serialize(metadata: CommitMetadata, out: OutputStream): Unit = { // called inside a try-finally where the underlying stream is closed in the caller out.write(s"v${VERSION}".getBytes(UTF_8)) out.write('\n') // write metadata out.write(metadata.json.getBytes(UTF_8)) } } object CommitLog { private val VERSION = 1 private val EMPTY_JSON = "{}" } case class CommitMetadata(nextBatchWatermarkMs: Long = 0) { def json: String = Serialization.write(this)(CommitMetadata.format) } object CommitMetadata { implicit val format = Serialization.formats(NoTypeHints) def apply(json: String): CommitMetadata = Serialization.read[CommitMetadata](json) }
Example 62
Source File: ProcessTestUtils.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.test import java.io.{InputStream, IOException} import scala.sys.process.BasicIO object ProcessTestUtils { class ProcessOutputCapturer(stream: InputStream, capture: String => Unit) extends Thread { this.setDaemon(true) override def run(): Unit = { try { BasicIO.processFully(capture)(stream) } catch { case _: IOException => // Ignores the IOException thrown when the process termination, which closes the input // stream abruptly. } } } }
Example 63
Source File: SQLRunnerSuite.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package com.sap.spark.cli import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream} import org.apache.spark.SparkContext import org.apache.spark.sql.{GlobalSapSQLContext, SQLContext} import org.scalatest.{BeforeAndAfterEach, FunSuite, ShouldMatchers} // good call val goodOpts = SQLRunner.parseOpts(List("a.sql", "b.sql", "-o", "output.csv")) goodOpts.sqlFiles should be(List("a.sql", "b.sql")) goodOpts.output should be(Some("output.csv")) // bad call val badOpts = SQLRunner.parseOpts(List()) badOpts.sqlFiles should be(List()) badOpts.output should be(None) // ugly call val uglyOpts = SQLRunner.parseOpts(List("a.sql", "-o", "output.csv", "b.sql")) uglyOpts.sqlFiles should be(List("a.sql", "b.sql")) uglyOpts.output should be(Some("output.csv")) } def runSQLTest(input: String, expectedOutput: String): Unit = { val inputStream: InputStream = new ByteArrayInputStream(input.getBytes()) val outputStream = new ByteArrayOutputStream() SQLRunner.sql(inputStream, outputStream) val output = outputStream.toString output should be(expectedOutput) } test("can run dummy query") { val input = "SELECT 1;" val output = "1\n" runSQLTest(input, output) } test("can run multiple dummy queries") { val input = """ |SELECT 1;SELECT 2; |SELECT 3; """.stripMargin val output = "1\n2\n3\n" runSQLTest(input, output) } test("can run a basic example with tables") { val input = """ |SELECT * FROM DEMO_TABLE; |SELECT * FROM DEMO_TABLE LIMIT 1; |DROP TABLE DEMO_TABLE; """.stripMargin val output = "1,a\n2,b\n3,c\n1,a\n" runSQLTest(input, output) } test("can run an example with comments") { val input = """ |SELECT * FROM DEMO_TABLE; -- this is the first query |SELECT * FROM DEMO_TABLE LIMIT 1; |-- now let's drop a table |DROP TABLE DEMO_TABLE; """.stripMargin val output = "1,a\n2,b\n3,c\n1,a\n" runSQLTest(input, output) } }
Example 64
Source File: S3Client.scala From akka-persistence-s3 with MIT License | 5 votes |
package akka.persistence.s3 import java.io.InputStream import com.amazonaws.auth.{ BasicAWSCredentials, DefaultAWSCredentialsProviderChain } import com.amazonaws.services.s3.{ S3ClientOptions, AmazonS3Client } import com.amazonaws.services.s3.model._ import scala.concurrent.{ Future, ExecutionContext } trait S3Client { val s3ClientConfig: S3ClientConfig lazy val client: AmazonS3Client = { val client = if (s3ClientConfig.awsUseDefaultCredentialsProviderChain) new AmazonS3Client(new DefaultAWSCredentialsProviderChain).withRegion(s3ClientConfig.region) else new AmazonS3Client(new BasicAWSCredentials(s3ClientConfig.awsKey, s3ClientConfig.awsSecret)) s3ClientConfig.endpoint.foreach { endpoint => client.withEndpoint(endpoint) () } client.setS3ClientOptions(new S3ClientOptions() .withPathStyleAccess(s3ClientConfig.options.pathStyleAccess) .withChunkedEncodingDisabled(s3ClientConfig.options.chunkedEncodingDisabled)) client } def createBucket(bucketName: String)(implicit ec: ExecutionContext): Future[Bucket] = Future { client.createBucket(bucketName) } def deleteBucket(bucketName: String)(implicit ec: ExecutionContext): Future[Unit] = Future { client.deleteBucket(bucketName) } def putObject(bucketName: String, key: String, input: InputStream, metadata: ObjectMetadata)(implicit ec: ExecutionContext): Future[PutObjectResult] = Future { client.putObject(new PutObjectRequest(bucketName, key, input, metadata)) } def getObject(bucketName: String, key: String)(implicit ec: ExecutionContext): Future[S3Object] = Future { client.getObject(new GetObjectRequest(bucketName, key)) } def listObjects(request: ListObjectsRequest)(implicit ec: ExecutionContext): Future[ObjectListing] = Future { client.listObjects(request) } def deleteObject(bucketName: String, key: String)(implicit ec: ExecutionContext): Future[Unit] = Future { client.deleteObject(bucketName, key) } def deleteObjects(request: DeleteObjectsRequest)(implicit ec: ExecutionContext): Future[Unit] = Future { client.deleteObjects(request) } }
Example 65
Source File: EnrichTruckData.scala From trucking-iot with Apache License 2.0 | 5 votes |
package com.orendainx.trucking.nifi.processors import java.io.{InputStream, OutputStream} import java.nio.charset.StandardCharsets import java.util.concurrent.atomic.AtomicReference import java.util.Scanner import com.orendainx.trucking.commons.models.{EnrichedTruckData, TruckData} import com.orendainx.trucking.enrichment.WeatherAPI import org.apache.nifi.annotation.behavior._ import org.apache.nifi.annotation.documentation.{CapabilityDescription, Tags} import org.apache.nifi.components.PropertyDescriptor import org.apache.nifi.logging.ComponentLog import org.apache.nifi.processor.io.InputStreamCallback import org.apache.nifi.processor.io.OutputStreamCallback import org.apache.nifi.processor._ import scala.collection.JavaConverters._ @Tags(Array("trucking", "data", "event", "enrich", "iot")) @CapabilityDescription("Enriches simulated truck sensor data. Find the master project and its code, documentation and corresponding tutorials at: https://github.com/orendain/trucking-iot") @InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) @TriggerSerially @WritesAttributes(Array( new WritesAttribute(attribute = "dataType", description = "The class name of the resulting enriched data type.") )) class EnrichTruckData extends AbstractProcessor { private var log: ComponentLog = _ private val RelSuccess = new Relationship.Builder().name("success").description("All generated data is routed to this relationship.").build override def init(context: ProcessorInitializationContext): Unit = { log = context.getLogger } override def onTrigger(context: ProcessContext, session: ProcessSession): Unit = { var flowFile = session.get log.debug(s"Flowfile received: $flowFile") // Convert the entire stream of bytes from the flow file into a string val content = new AtomicReference[String] session.read(flowFile, new InputStreamCallback { override def process(inputStream: InputStream) = { val scanner = new Scanner(inputStream).useDelimiter("\\A") val result = if (scanner.hasNext()) scanner.next() else "" log.debug(s"Parsed content: $result") content.set(result) } }) // Form a TruckData object from content, then creating an EnrichedTruckData object by making the appropriate // calls to WeatherAPI val truckData = TruckData.fromCSV(content.get()) val enrichedTruckData = EnrichedTruckData(truckData, WeatherAPI.default.getFog(truckData.eventType), WeatherAPI.default.getRain(truckData.eventType), WeatherAPI.default.getWind(truckData.eventType)) log.debug(s"EnrichedData generated: $enrichedTruckData") // Add the new data type as a flow file attribute flowFile = session.putAttribute(flowFile, "dataType", enrichedTruckData.getClass.getSimpleName) // Replace the flow file, writing in the new content flowFile = session.write(flowFile, new OutputStreamCallback { override def process(outputStream: OutputStream) = outputStream.write(enrichedTruckData.toCSV.getBytes(StandardCharsets.UTF_8)) }) // TODO: document what this does session.getProvenanceReporter.route(flowFile, RelSuccess) session.transfer(flowFile, RelSuccess) session.commit() } // Define properties and relationships override def getSupportedPropertyDescriptors: java.util.List[PropertyDescriptor] = List.empty[PropertyDescriptor].asJava override def getRelationships: java.util.Set[Relationship] = Set(RelSuccess).asJava }
Example 66
Source File: FileUploadServlet.scala From udash-core with Apache License 2.0 | 5 votes |
package io.udash.rpc.utils import java.io.InputStream import java.nio.file.Paths import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse} import com.avsystem.commons._ protected def handleFile(name: String, content: InputStream): Unit override protected def doPost(request: HttpServletRequest, response: HttpServletResponse): Unit = { request.getParts.asScala .filter(part => fileFields.contains(part.getName)) .foreach(filePart => { val fileName = Paths.get(filePart.getSubmittedFileName).getFileName.toString val fileContent = filePart.getInputStream handleFile(fileName, fileContent) fileContent.close() }) } }
Example 67
Source File: StreamExtensions.scala From random-projections-at-berlinbuzzwords with Apache License 2.0 | 5 votes |
package com.stefansavev.core.serialization import java.io.{InputStream, OutputStream} object StreamExtensions{ implicit class IntSerializerExt(outputStream: OutputStream) { def writeInt(value: Int): Unit = { IntSerializer.write(outputStream, value) } } implicit class IntDeSerializerExt(inputStream: InputStream) { def readInt(): Int = { IntSerializer.read(inputStream) } } implicit class DoubleArraySerializerExt(outputStream: OutputStream) { def writeDoubleArray(values: Array[Double]): Unit = { DoubleArraySerializer.write(outputStream, values) } } implicit class DoubleArrayDeSerializerExt(inputStream: InputStream) { def readDoubleArray(): Array[Double] = { DoubleArraySerializer.read(inputStream) } } }
Example 68
Source File: GenericArraySerializer.scala From random-projections-at-berlinbuzzwords with Apache License 2.0 | 5 votes |
package com.stefansavev.core.serialization import java.io.{InputStream, OutputStream} import com.stefansavev.core.serialization.PrimitiveTypeSerializers.TypedIntSerializer import scala.reflect.ClassTag object GenericArraySerializer { // class GenericArraySerializer[T: ClassTag](ev: T <:< AnyRef, nestedSerializer: TypedSerializer[T]) extends TypedSerializer[Array[T]] { val clazz = scala.reflect.classTag[T].runtimeClass if (clazz.equals(Int.getClass) || clazz.equals(Double.getClass)) { throw new IllegalStateException("GenericArraySerializer should not be applied to primitive types") } def toBinary(outputStream: OutputStream, input: Array[T]): Unit = { TypedIntSerializer.toBinary(outputStream, input.length) var i = 0 while (i < input.length) { nestedSerializer.toBinary(outputStream, input(i)) i += 1 } } def fromBinary(inputStream: InputStream): Array[T] = { val len = TypedIntSerializer.fromBinary(inputStream) val output = Array.ofDim[T](len) var i = 0 while (i < len) { output(i) = nestedSerializer.fromBinary(inputStream) i += 1 } output } def name: String = s"GenericArraySerializer(${nestedSerializer.name})" } implicit def genericArraySerializer[T: ClassTag](implicit ev: T <:< AnyRef, nestedSerializer: TypedSerializer[T]): GenericArraySerializer[T] = { new GenericArraySerializer[T](ev, nestedSerializer) } }
Example 69
Source File: IsoSerializers.scala From random-projections-at-berlinbuzzwords with Apache License 2.0 | 5 votes |
package com.stefansavev.core.serialization import java.io.{InputStream, OutputStream} object IsoSerializers { class IsoSerializer[A, B](iso: Iso[A, B], serB: TypedSerializer[B]) extends TypedSerializer[A]{ def toBinary(outputStream: OutputStream, input: A): Unit = { serB.toBinary(outputStream, iso.from(input)) } def fromBinary(inputStream: InputStream): A = { iso.to(serB.fromBinary(inputStream)) } def name: String = s"IsoSerializer(via = ${serB.name})" } implicit def isoSerializer[A, B](implicit iso: Iso[A, B], serB: TypedSerializer[B]): IsoSerializer[A, B] = { new IsoSerializer[A, B](iso, serB) } }
Example 70
Source File: String2IdHasherSerializer.scala From random-projections-at-berlinbuzzwords with Apache License 2.0 | 5 votes |
package com.stefansavev.core.string2id import java.io.{InputStream, OutputStream} import com.stefansavev.core.serialization.{StringSerializer, IntSerializer, TypedSerializer} import com.stefansavev.core.serialization.StringSerializer object String2IdHasherSerialization { implicit object String2IdHasherSerializer extends TypedSerializer[String2IdHasher] { def toBinary(outputStream: OutputStream, string2Id: String2IdHasher): Unit = { if (string2Id == null){ throw new IllegalStateException("string2IdHasher cannot be null") } val settings = string2Id.getSettings() IntSerializer.write(outputStream, settings.maxValues) IntSerializer.write(outputStream, settings.avgStringLen) IntSerializer.write(outputStream, settings.toleratedNumberOfCollisions) var id = 0 val numStrings = string2Id.numberOfUniqueStrings() IntSerializer.write(outputStream, numStrings) while(id < numStrings){ val str = string2Id.getStringAtInternalIndex(id).get StringSerializer.write(outputStream, str) id += 1 } } def fromBinary(inputStream: InputStream): String2IdHasher = { val maxValues = IntSerializer.read(inputStream) val avgStringLen = IntSerializer.read(inputStream) val numCollisions = IntSerializer.read(inputStream) val settings = new StringIdHasherSettings(maxValues, avgStringLen, numCollisions) val string2IdHasher = new String2IdHasher(settings) val numStrings = IntSerializer.read(inputStream) var i = 0 while(i < numStrings){ val str = StringSerializer.read(inputStream) val handle = string2IdHasher.add(str) val index = string2IdHasher.getInternalId(handle) if (index != i){ throw new IllegalStateException("Internal error while reading hashed strings") } i += 1 } string2IdHasher } def name: String = "String2IdHasherSerializer" } }
Example 71
Source File: utils.scala From spark-http-stream with BSD 2-Clause "Simplified" License | 5 votes |
package org.apache.spark.sql.execution.streaming.http import org.apache.spark.sql.types.StructField import org.apache.spark.sql.types.StructType import org.apache.spark.sql.types.TimestampType import org.apache.spark.SparkConf import org.apache.commons.io.IOUtils import org.apache.spark.serializer.KryoSerializer import java.io.InputStream import com.esotericsoftware.kryo.io.Input import java.io.ByteArrayOutputStream class WrongArgumentException(name: String, value: Any) extends RuntimeException(s"wrong argument: $name=$value") { } class MissingRequiredArgumentException(map: Map[String, String], paramName: String) extends RuntimeException(s"missing required argument: $paramName, all parameters=$map") { } class InvalidSerializerNameException(serializerName: String) extends RuntimeException(s"invalid serializer name: $serializerName") { } object SchemaUtils { def buildSchema(schema: StructType, includesTimestamp: Boolean, timestampColumnName: String = "_TIMESTAMP_"): StructType = { if (!includesTimestamp) schema; else StructType(schema.fields.toSeq :+ StructField(timestampColumnName, TimestampType, false)); } } object Params { def deserialize(bytes: Array[Byte]): Any = { val kryo = kryoSerializer.newKryo(); val input = new Input(); input.setBuffer(bytes); kryo.readClassAndObject(input); } }
Example 72
Source File: SerializerFactory.scala From spark-http-stream with BSD 2-Clause "Simplified" License | 5 votes |
package org.apache.spark.sql.execution.streaming.http import java.nio.ByteBuffer import org.apache.spark.serializer.SerializerInstance import org.apache.spark.serializer.DeserializationStream import org.apache.spark.serializer.SerializationStream import java.io.OutputStream import java.io.InputStream import scala.reflect.ClassTag import com.fasterxml.jackson.databind.ObjectMapper import org.apache.spark.SparkConf import org.apache.spark.serializer.JavaSerializer import org.apache.spark.serializer.KryoSerializer object SerializerFactory { val DEFAULT = new SerializerFactory { override def getSerializerInstance(serializerName: String): SerializerInstance = { serializerName.toLowerCase() match { case "kryo" ⇒ new KryoSerializer(new SparkConf()).newInstance(); case "java" ⇒ new JavaSerializer(new SparkConf()).newInstance(); case _ ⇒ throw new InvalidSerializerNameException(serializerName); } } } } trait SerializerFactory { def getSerializerInstance(serializerName: String): SerializerInstance; }
Example 73
Source File: UtilsTest.scala From spark-http-stream with BSD 2-Clause "Simplified" License | 5 votes |
import java.sql.Date import org.apache.spark.SparkConf import org.apache.spark.serializer.KryoSerializer import org.apache.spark.sql.SparkSession import org.junit.Assert import org.junit.Test import java.io.ByteArrayOutputStream import java.io.InputStream import org.apache.commons.io.IOUtils import com.esotericsoftware.kryo.io.Input import org.apache.spark.sql.execution.streaming.http.KryoSerializerUtils class UtilsTest { @Test def testKryoSerDe() { val d1 = new Date(30000); val bytes = KryoSerializerUtils.serialize(d1); val d2 = KryoSerializerUtils.deserialize(bytes); Assert.assertEquals(d1, d2); val d3 = Map('x' -> Array("aaa", "bbb"), 'y' -> Array("ccc", "ddd")); println(d3); val bytes2 = KryoSerializerUtils.serialize(d3); val d4 = KryoSerializerUtils.deserialize(bytes2).asInstanceOf[Map[String, Any]]; println(d4); } @Test def testEncoderSchema() { val spark = SparkSession.builder.master("local[4]") .getOrCreate(); val sqlContext = spark.sqlContext; import sqlContext.implicits._ import org.apache.spark.sql.catalyst.encoders.encoderFor val schema1 = encoderFor[String].schema; val schema2 = encoderFor[(String)].schema; val schema3 = encoderFor[((String))].schema; Assert.assertEquals(schema1, schema2); Assert.assertEquals(schema1, schema3); } @Test def testDateInTuple() { val spark = SparkSession.builder.master("local[4]") .getOrCreate(); val sqlContext = spark.sqlContext; import sqlContext.implicits._ val d1 = new Date(30000); val ds = sqlContext.createDataset(Seq[(Int, Date)]((1, d1))); val d2 = ds.collect()(0)._2; //NOTE: d1!=d2, maybe a bug println(d1.equals(d2)); } }
Example 74
Source File: FileUtils.scala From skeuomorph with Apache License 2.0 | 5 votes |
package higherkindness.skeuomorph import java.io.{File, FileOutputStream, InputStream} import java.nio.file.{Files, Paths, StandardOpenOption} import cats.effect.{Resource, Sync} object FileUtils { def fileHandle[F[_]: Sync](name: String): Resource[F, File] = Resource.make( Sync[F].delay(new File(name)) )(file => Sync[F].delay(file.deleteOnExit())) def fileOutputStream[F[_]: Sync](file: File): Resource[F, FileOutputStream] = Resource.make( Sync[F].delay(new FileOutputStream(file)) )(fos => Sync[F].delay(fos.close())) def fileInputStream[F[_]: Sync](name: String): Resource[F, InputStream] = Resource.make( Sync[F].delay(Files.newInputStream(Paths.get(name), StandardOpenOption.DELETE_ON_CLOSE)) )(is => Sync[F].delay(is.close())) }
Example 75
Source File: BrokerResources.scala From reactive-activemq with Apache License 2.0 | 5 votes |
package akka.stream.integration import java.io.InputStream import java.net.URL import akka.stream.integration.BrokerResources.{ QueueStat, TopicStat } import org.scalatest.BeforeAndAfterEach import scala.xml.NodeSeq trait BrokerResources extends BeforeAndAfterEach { _: TestSpec => def enableClearQueus: Boolean private def callBroker(path: String): InputStream = { val amqHost = system.settings.config.getString("amq.host") val url = new URL(s"http://$amqHost:8161" + path) val urlConnection = url.openConnection() val basicAuth = "Basic " + new String(java.util.Base64.getUrlEncoder.encode("admin:admin".getBytes())) urlConnection.addRequestProperty("Authorization", basicAuth) urlConnection.getInputStream } // communicate with the broker // private def getQueueXmlFromBroker: NodeSeq = { import scala.xml.XML XML.load(callBroker("/admin/xml/queues.jsp")) } def getTopicXmlFromBroker: NodeSeq = { import scala.xml.XML XML.load(callBroker("/admin/xml/topics.jsp")) } def getQueueStats: List[QueueStat] = (for { e ← getQueueXmlFromBroker \\ "queue" stat ← e \ "stats" } yield QueueStat( (e \ "@name").text, (stat \ "@size").text.toInt, (stat \ "@consumerCount").text.toInt, (stat \ "@enqueueCount").text.toInt, (stat \ "@dequeueCount").text.toInt )).toList def getTopicStats: List[TopicStat] = (for { e ← getTopicXmlFromBroker \\ "topic" stat ← e \ "stats" } yield TopicStat( (e \ "@name").text, (stat \ "@size").text.toInt, (stat \ "@consumerCount").text.toInt, (stat \ "@enqueueCount").text.toInt, (stat \ "@dequeueCount").text.toInt )).toList def purgeQueues(): Unit = { def purgeQueue(destinationName: String): InputStream = { val path = s"/api/jolokia/exec/org.apache.activemq:brokerName=localhost,destinationName=$destinationName,destinationType=Queue,type=Broker/purge" callBroker(path) } getQueueList.foreach(purgeQueue) } def getQueueList: List[String] = (for { e ← getQueueXmlFromBroker \\ "queue" } yield (e \ "@name").text).toList def getQueueStatFor(topic: String): Option[QueueStat] = getQueueStats.find(_.name contains topic) def getQueueMessageCount(topic: String): Option[Int] = for { stat ← getQueueStatFor(topic) } yield stat.enqueueCount - stat.dequeueCount override protected def beforeEach(): Unit = { if (enableClearQueus) purgeQueues() super.beforeEach() } } object BrokerResources { case class QueueStat(name: String, size: Int, consumerCount: Int, enqueueCount: Int, dequeueCount: Int) case class TopicStat(name: String, size: Int, consumerCount: Int, enqueueCount: Int, dequeueCount: Int) }
Example 76
Source File: ClasspathResources.scala From reactive-activemq with Apache License 2.0 | 5 votes |
package akka.stream.integration import java.io.InputStream import akka.stream.IOResult import akka.stream.scaladsl.{ Source, StreamConverters } import akka.util.ByteString import scala.concurrent.Future import scala.io.{ Source => ScalaIOSource } trait ClasspathResources { def withInputStream[T](fileName: String)(f: InputStream => T): T = { val is: InputStream = fromClasspathAsStream(fileName) try f(is) finally is.close() } def withInputStreamAsText[T](fileName: String)(f: String => T): T = f(fromClasspathAsString(fileName)) def withByteStringSource[T](fileName: String)(f: Source[ByteString, Future[IOResult]] => T): T = withInputStream(fileName) { inputStream => f(StreamConverters.fromInputStream(() => inputStream)) } def streamToString(is: InputStream): String = ScalaIOSource.fromInputStream(is).mkString def fromClasspathAsString(fileName: String): String = streamToString(fromClasspathAsStream(fileName)) def fromClasspathAsStream(fileName: String): InputStream = getClass.getClassLoader.getResourceAsStream(fileName) }
Example 77
Source File: Https.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.common import java.io.{FileInputStream, InputStream} import java.security.{KeyStore, SecureRandom} import javax.net.ssl.{KeyManagerFactory, SSLContext, TrustManagerFactory} import akka.http.scaladsl.ConnectionContext import akka.stream.TLSClientAuth import com.typesafe.sslconfig.akka.AkkaSSLConfig object Https { case class HttpsConfig(keystorePassword: String, keystoreFlavor: String, keystorePath: String, clientAuth: String) def getCertStore(password: Array[Char], flavor: String, path: String): KeyStore = { val cs: KeyStore = KeyStore.getInstance(flavor) val certStore: InputStream = new FileInputStream(path) cs.load(certStore, password) cs } def connectionContext(httpsConfig: HttpsConfig, sslConfig: Option[AkkaSSLConfig] = None) = { val keyFactoryType = "SunX509" val clientAuth = { if (httpsConfig.clientAuth.toBoolean) Some(TLSClientAuth.need) else Some(TLSClientAuth.none) } val keystorePassword = httpsConfig.keystorePassword.toCharArray val keyStore: KeyStore = KeyStore.getInstance(httpsConfig.keystoreFlavor) val keyStoreStream: InputStream = new FileInputStream(httpsConfig.keystorePath) keyStore.load(keyStoreStream, keystorePassword) val keyManagerFactory: KeyManagerFactory = KeyManagerFactory.getInstance(keyFactoryType) keyManagerFactory.init(keyStore, keystorePassword) // Currently, we are using the keystore as truststore as well, because the clients use the same keys as the // server for client authentication (if enabled). // So this code is guided by https://doc.akka.io/docs/akka-http/10.0.9/scala/http/server-side-https-support.html // This needs to be reworked, when we fix the keys and certificates. val trustManagerFactory: TrustManagerFactory = TrustManagerFactory.getInstance(keyFactoryType) trustManagerFactory.init(keyStore) val sslContext: SSLContext = SSLContext.getInstance("TLS") sslContext.init(keyManagerFactory.getKeyManagers, trustManagerFactory.getTrustManagers, new SecureRandom) ConnectionContext.https(sslContext, sslConfig, clientAuth = clientAuth) } }
Example 78
Source File: Unzip.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.standalone import java.io.{File, FileOutputStream, InputStream} import java.util.zip.ZipInputStream object Unzip { def apply(is: InputStream, dir: File): Unit = { //Based on https://stackoverflow.com/a/40547896/1035417 val zis = new ZipInputStream((is)) val dest = dir.toPath Stream.continually(zis.getNextEntry).takeWhile(_ != null).foreach { zipEntry => if (!zipEntry.isDirectory) { val outPath = dest.resolve(zipEntry.getName) val outPathParent = outPath.getParent if (!outPathParent.toFile.exists()) { outPathParent.toFile.mkdirs() } val outFile = outPath.toFile val out = new FileOutputStream(outFile) val buffer = new Array[Byte](4096) Stream.continually(zis.read(buffer)).takeWhile(_ != -1).foreach(out.write(buffer, 0, _)) out.close() } } zis.close() } }
Example 79
Source File: SwaggerAPI.scala From swagger-check with MIT License | 5 votes |
package de.leanovate.swaggercheck.schema import java.io.InputStream import com.fasterxml.jackson.annotation.{JsonCreator, JsonProperty} import com.fasterxml.jackson.core.JsonFactory import com.fasterxml.jackson.databind.annotation.JsonDeserialize import com.fasterxml.jackson.databind.{DeserializationFeature, JsonNode, MappingJsonFactory, ObjectMapper} import com.fasterxml.jackson.dataformat.yaml.YAMLFactory import com.fasterxml.jackson.module.scala.DefaultScalaModule import de.leanovate.swaggercheck.schema.jackson.JsonSchemaModule import de.leanovate.swaggercheck.schema.model.{Definition, Parameter} import scala.collection.JavaConverters._ import scala.io.Source @JsonDeserialize(builder = classOf[SwaggerAPIBuilder]) case class SwaggerAPI( basePath: Option[String], paths: Map[String, Map[String, Operation]], definitions: Map[String, Definition] ) object SwaggerAPI { val jsonMapper = objectMapper(new MappingJsonFactory()) val yamlMapper = objectMapper(new YAMLFactory()) def parse(jsonOrYaml: String): SwaggerAPI = { val mapper = if (jsonOrYaml.trim().startsWith("{")) jsonMapper else yamlMapper mapper.readValue(jsonOrYaml, classOf[SwaggerAPI]) } def parse(swaggerInput: InputStream): SwaggerAPI = { parse(Source.fromInputStream(swaggerInput).mkString) } def objectMapper(jsonFactory: JsonFactory): ObjectMapper = { val mapper = new ObjectMapper(jsonFactory) mapper.registerModule(DefaultScalaModule) mapper.registerModule(JsonSchemaModule) mapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES) mapper } } class SwaggerAPIBuilder @JsonCreator()( @JsonProperty("basePath") basePath: Option[String], @JsonProperty("consumes") consumes: Option[Seq[String]], @JsonProperty("produces") produces: Option[Seq[String]], @JsonProperty("paths") paths: Option[Map[String, JsonNode]], @JsonProperty("definitions") definitions: Option[Map[String, Definition]], @JsonProperty("parameters") globalParameters: Option[Map[String, Parameter]] ) { def build(): SwaggerAPI = { val defaultConsumes = consumes.map(_.toSet).getOrElse(Set.empty) val defaultProduces = produces.map(_.toSet).getOrElse(Set.empty) SwaggerAPI(basePath, paths.getOrElse(Map.empty).map { case (path, pathDefinition) => val defaultParameters = Option(pathDefinition.get("parameters")).map { node => node.iterator().asScala.map { element => SwaggerAPI.jsonMapper.treeToValue(element, classOf[OperationParameter]) }.toSeq }.getOrElse(Seq.empty) basePath.map(_ + path).getOrElse(path) -> pathDefinition.fields().asScala.filter(_.getKey != "parameters").map { entry => val operation = SwaggerAPI.jsonMapper.treeToValue(entry.getValue, classOf[Operation]) entry.getKey.toUpperCase -> operation.withDefaults(defaultParameters, defaultConsumes, defaultProduces).resolveGlobalParameters(globalParameters.getOrElse(Map())) }.toMap }, definitions.getOrElse(Map.empty)) } }
Example 80
Source File: MessagePack.scala From airframe with Apache License 2.0 | 5 votes |
package wvlet.airframe.msgpack.spi import java.io.{InputStream, OutputStream} import wvlet.airframe.json.{JSON, JSONScanner, JSONSource} import wvlet.airframe.msgpack.json.{NestedMessagePackBuilder, StreamMessagePackBuilder} object MessagePack { def newBufferPacker: BufferPacker = Compat.newBufferPacker def newPacker(out: OutputStream): Packer = Compat.newPacker(out) def newUnpacker(in: InputStream): Unpacker = Compat.newUnpacker(in) def newUnpacker(msgpack: Array[Byte]): Unpacker = Compat.newUnpacker(msgpack) def newUnpacker(msgpack: Array[Byte], offset: Int, len: Int): Unpacker = Compat.newUnpacker(msgpack, offset, len) def fromJSON(json: String): MsgPack = fromJSON(JSONSource.fromString(json)) def fromJSON(jsonBytes: Array[Byte]): MsgPack = fromJSON(JSONSource.fromBytes(jsonBytes)) def fromJSON(json: JSONSource): MsgPack = { val context = new StreamMessagePackBuilder() JSONScanner.scanAny(json, context) context.result } }
Example 81
Source File: Compat.scala From airframe with Apache License 2.0 | 5 votes |
package wvlet.airframe.msgpack.spi import java.io.{InputStream, OutputStream} import org.msgpack.{core => mj} import wvlet.airframe.msgpack.impl.{ BufferPackerImpl, PackerImpl, PureScalaBufferPacker, PureScalaBufferUnpacker, UnpackerImpl } import wvlet.airframe.msgpack.io.ByteArrayBuffer object Compat { def isScalaJS = false def floatToIntBits(v: Float): Int = java.lang.Float.floatToRawIntBits(v) def doubleToLongBits(v: Double): Long = java.lang.Double.doubleToRawLongBits(v) def newBufferPacker: BufferPacker = { new PureScalaBufferPacker //new BufferPackerImpl(mj.MessagePack.newDefaultBufferPacker()) } def newPacker(out: OutputStream): Packer = { // TODO: Use pure-scala packer // new PureScalaBufferPacker new PackerImpl(mj.MessagePack.newDefaultPacker(out)) } def newUnpacker(in: InputStream): Unpacker = { new UnpackerImpl(mj.MessagePack.newDefaultUnpacker(in)) } def newUnpacker(msgpack: Array[Byte]): Unpacker = { newUnpacker(msgpack, 0, msgpack.length) } def newUnpacker(msgpack: Array[Byte], offset: Int, len: Int): Unpacker = { //new UnpackerImpl(mj.MessagePack.newDefaultUnpacker(msgpack, offset, len)) // Use pure-scala unpacker new PureScalaBufferUnpacker(ByteArrayBuffer.fromArray(msgpack, offset, len)) } }
Example 82
package wvlet.airframe.control import java.io.{ByteArrayOutputStream, File, InputStream} import java.net.URL import java.nio.charset.StandardCharsets import wvlet.airframe.control.Control.withResource object IO { def readAsString(f: File): String = { readAsString(f.toURI.toURL) } def readAsString(url: URL): String = { withResource(url.openStream()) { in => readAsString(in) } } def readAsString(in: InputStream): String = { new String(readFully(in), StandardCharsets.UTF_8) } def readFully(in: InputStream): Array[Byte] = { val byteArray = if (in == null) { Array.emptyByteArray } else { withResource(new ByteArrayOutputStream) { b => val buf = new Array[Byte](8192) withResource(in) { src => var readBytes = 0 while ({ readBytes = src.read(buf); readBytes != -1 }) { b.write(buf, 0, readBytes) } } b.toByteArray } } byteArray } }
Example 83
Source File: ApplicationObjectInputStream.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.serialization import java.io.{InputStream, ObjectInputStream, ObjectStreamClass} private val classLoader = getClass.getClassLoader override def resolveClass(desc: ObjectStreamClass): Class[_] = { try { this.classLoader.loadClass(desc.getName) } catch { case _: ClassNotFoundException => // FlinkUserCodeClassLoader will fail to load some basic java classes, such as java.lang.Integer, for some // reason. Falling back to the default resolveClass implementation should successfully load these classes. super.resolveClass(desc) } } }
Example 84
Source File: JsonDataInputFormat.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.dataformats import java.io.InputStream import com.amazon.milan.HashUtil import com.amazon.milan.serialization.{DataFormatConfiguration, JavaTypeFactory, MilanObjectMapper} import com.amazon.milan.typeutil.TypeDescriptor import com.fasterxml.jackson.databind.annotation.{JsonDeserialize, JsonSerialize} import scala.collection.JavaConverters._ import scala.language.experimental.macros @JsonSerialize @JsonDeserialize class JsonDataInputFormat[T: TypeDescriptor](val config: DataFormatConfiguration) extends DataInputFormat[T] { @transient private lazy val objectMapper = new MilanObjectMapper(this.config) @transient private lazy val javaType = new JavaTypeFactory(this.objectMapper.getTypeFactory).makeJavaType(this.recordTypeDescriptor) @transient private lazy val hashCodeValue = HashUtil.combineHashCodes(this.recordTypeDescriptor.hashCode(), this.config.hashCode()) private var recordTypeDescriptor = implicitly[TypeDescriptor[T]] def this() { this(DataFormatConfiguration.default) } override def getGenericArguments: List[TypeDescriptor[_]] = List(implicitly[TypeDescriptor[T]]) override def setGenericArguments(genericArgs: List[TypeDescriptor[_]]): Unit = { this.recordTypeDescriptor = genericArgs.head.asInstanceOf[TypeDescriptor[T]] } override def readValue(bytes: Array[Byte], offset: Int, length: Int): Option[T] = { Some(this.objectMapper.readValue[T](bytes, offset, length, this.javaType)) } override def readValues(stream: InputStream): TraversableOnce[T] = { this.objectMapper.readerFor(this.javaType).readValues[T](stream).asScala } override def hashCode(): Int = this.hashCodeValue override def equals(obj: Any): Boolean = { obj match { case o: JsonDataInputFormat[T] => this.recordTypeDescriptor.equals(o.recordTypeDescriptor) && this.config.equals(o.config) case _ => false } } }
Example 85
Source File: TypeInformationDataInputFormat.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.flink.dataformats import java.io.{ByteArrayInputStream, EOFException, InputStream} import com.amazon.milan.dataformats.DataInputFormat import com.amazon.milan.typeutil.TypeDescriptor import org.apache.flink.api.common.ExecutionConfig import org.apache.flink.api.common.typeinfo.TypeInformation import org.apache.flink.api.common.typeutils.TypeSerializer import org.apache.flink.core.memory.DataInputViewStreamWrapper class TypeInformationDataInputFormat[T](typeInfo: TypeInformation[T]) extends DataInputFormat[T] { @transient private lazy val serializer = this.createSerializer() override def getGenericArguments: List[TypeDescriptor[_]] = { // This class is not intended to be serialized by GenericTypedJsonSerializer, so this should not be called. throw new UnsupportedOperationException() } override def setGenericArguments(genericArgs: List[TypeDescriptor[_]]): Unit = { // This class is not intended to be deserialized by GenericTypedJsonDeserializer, so this should not be called. throw new UnsupportedOperationException() } override def readValue(bytes: Array[Byte], offset: Int, length: Int): Option[T] = { val input = new DataInputViewStreamWrapper(new ByteArrayInputStream(bytes, offset, length)) Some(this.serializer.deserialize(input)) } override def readValues(stream: InputStream): TraversableOnce[T] = { val input = new DataInputViewStreamWrapper(stream) Stream.continually(0) .map(_ => try { Some(this.serializer.deserialize(input)) } catch { case _: EOFException => None }) .takeWhile(_.isDefined) .map(_.get) } private def createSerializer(): TypeSerializer[T] = { val config = new ExecutionConfig() this.typeInfo.createSerializer(config) } }
Example 86
Source File: History.scala From sbt-flaky with Apache License 2.0 | 5 votes |
package flaky.history import java.io.{File, FileFilter, InputStream} import java.text.SimpleDateFormat import java.util.Date import flaky.{Flaky, FlakyTestReport, Io} import org.apache.commons.vfs2.VFS import scala.xml.XML class History(project: String, historyDir: File, flakyReportDir: File, projectDir: File) { private val zipFileFilter = new FileFilter { override def accept(pathname: File): Boolean = pathname.getName.endsWith(".zip") } private def runFiles(historyDir: File): List[File] = historyDir.listFiles(zipFileFilter).toList def addCurrentToHistory(): Unit = { val timestamp = System.currentTimeMillis() val date = new SimpleDateFormat(History.dateFormat).format(new Date(timestamp)) val gitCommit = Git(projectDir).currentId().toOption val historyReportDescription = HistoryReportDescription(timestamp, gitCommit) HistoryReportDescription.save(historyReportDescription, new File(flakyReportDir, History.descriptorFile)) Zip.compressFolder(new File(historyDir, s"$date.zip"), flakyReportDir) } def removeToOldFromHistory(maxToKeep: Int): Unit = { runFiles(historyDir) .take(Math.max(runFiles(historyDir).size - maxToKeep, 0)) .foreach(_.delete()) } def createHistoryReport(): HistoryReport = { val historicalRuns: List[HistoricalRun] = runFiles(historyDir) .map(History.loadHistory) val date = new SimpleDateFormat("HH:mm dd-MM-YYYY").format(new Date()) HistoryReport(project, date, historicalRuns) } def processHistory(): HistoryReport = { historyDir.mkdirs() addCurrentToHistory() removeToOldFromHistory(20) createHistoryReport() } } case class HistoryReportDescription(timestamp: Long, gitCommitHash: Option[String]) object HistoryReportDescription { def load(in: InputStream): HistoryReportDescription = { val descriptorXml = XML.load(in) val timestamp = (descriptorXml \ "timestamp").text.trim.toLong val gitHash = (descriptorXml \ "gitCommitHash").text.trim HistoryReportDescription(timestamp, Some(gitHash)) } def save(historyReportDescription: HistoryReportDescription, file: File): Unit = { val xml = <HistoryReportDescription> <timestamp> {historyReportDescription.timestamp} </timestamp> <gitCommitHash> {historyReportDescription.gitCommitHash.getOrElse("")} </gitCommitHash> </HistoryReportDescription> val prettyXml = new scala.xml.PrettyPrinter(80, 2).format(xml) Io.writeToFile(file, prettyXml) } } object History { val descriptorFile = "descriptor.xml" val dateFormat = "yyyyMMdd-HHmmss" def loadHistory: (File) => HistoricalRun = { file => { val manager = VFS.getManager val uri = file.toURI.toString.replace("file:/", "zip:/") val fo = manager.resolveFile(uri) val report: FlakyTestReport = Flaky.createReportFromHistory(fo) val descriptorFile = Option(fo.getChild(History.descriptorFile)) val dateFromFileName = file.getName.replace(".zip","") val hrd = descriptorFile .filter(_.exists()) .map(f => HistoryReportDescription.load(f.getContent.getInputStream)) .getOrElse(HistoryReportDescription(new SimpleDateFormat(dateFormat).parse(dateFromFileName).getTime, None)) HistoricalRun(hrd, report) } } }
Example 87
Source File: PigTransformation.scala From schedoscope with Apache License 2.0 | 5 votes |
package org.schedoscope.dsl.transformations import java.io.{FileInputStream, InputStream} import org.apache.commons.lang.StringUtils import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege import org.apache.hadoop.hive.ql.udf.UDFLength import org.apache.hive.hcatalog.data.schema.HCatSchema import org.apache.hive.hcatalog.pig.HCatLoader import org.apache.pig.builtin.ParquetStorer import org.schedoscope.scheduler.service.ViewTransformationStatus case class PigTransformation(latin: String, dirsToDelete: List[String] = List()) extends Transformation { def name = "pig" override def stringsToChecksum = List(latin) description = "[..]" + StringUtils.abbreviate(latin.replaceAll("\n", "").replaceAll("\t", "").replaceAll("\\s+", " "), 60) def defaultLibraries = { // FIXME: declare jars instead of any random class included in this jar val classes = List( // needed for usage of HCatalog table management classOf[HCatLoader], classOf[HCatSchema], classOf[HiveObjectPrivilege], classOf[UDFLength], // needed for usage of storage format Parquet with pig classOf[ParquetStorer]) classes.map(cl => try { cl.getProtectionDomain().getCodeSource().getLocation().getFile } catch { case t: Throwable => null }) .filter(cl => cl != null && !"".equals(cl.trim)) } override def viewTransformationStatus = ViewTransformationStatus( name, Some(Map("latin" -> latin))) } object PigTransformation { def scriptFrom(inputStream: InputStream): String = scala.io.Source.fromInputStream(inputStream, "UTF-8").mkString def scriptFromResource(resourcePath: String): String = scriptFrom(getClass().getClassLoader().getResourceAsStream(resourcePath)) def scriptFrom(filePath: String): String = scriptFrom(new FileInputStream(filePath)) }
Example 88
Source File: OozieTransformation.scala From schedoscope with Apache License 2.0 | 5 votes |
package org.schedoscope.dsl.transformations import java.io.{FileInputStream, InputStream} import java.util.Properties import org.apache.commons.lang.StringUtils import org.schedoscope.Settings import org.schedoscope.scheduler.service.ViewTransformationStatus import scala.collection.JavaConversions._ case class OozieTransformation(bundle: String, workflow: String, var workflowAppPath: String) extends Transformation { def name = "oozie" override def fileResourcesToChecksum = List(workflowAppPath) description = StringUtils.abbreviate(s"${bundle}/${workflow}", 100) override def viewTransformationStatus = ViewTransformationStatus( name, Some(Map( "bundle" -> bundle, "workflow" -> workflow))) } object OozieTransformation { def oozieWFPath(bundle: String, workflow: String) = s"${Settings().getDriverSettings("oozie").location}/workflows/${bundle}/${workflow}/" def configurationFrom(inputStream: InputStream): Map[String, String] = { val props = new Properties() try { props.load(inputStream) } catch { case t: Throwable => } Map() ++ props } def configurationFrom(filePath: String): Map[String, String] = try configurationFrom(new FileInputStream(filePath)) catch { case t: Throwable => Map() } def configurationFromResource(resourcePath: String): Map[String, String] = try configurationFrom(getClass().getClassLoader().getResourceAsStream(resourcePath)) catch { case t: Throwable => Map() } }
Example 89
Source File: WholeFileReader.scala From magellan with Apache License 2.0 | 5 votes |
package magellan.mapreduce import java.io.InputStream import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FSDataInputStream, FileSystem, Path} import org.apache.hadoop.io.compress.{CodecPool, CompressionCodecFactory, Decompressor} import org.apache.hadoop.io.{NullWritable, Text} import org.apache.hadoop.mapreduce.lib.input.FileSplit import org.apache.hadoop.mapreduce.{InputSplit, RecordReader, TaskAttemptContext} class WholeFileReader extends RecordReader[NullWritable, Text] { private val key = NullWritable.get() private val value = new Text() private var split: FileSplit = _ private var conf: Configuration = _ private var path: Path = _ private var done: Boolean = false override def getProgress: Float = ??? override def nextKeyValue(): Boolean = { if (done){ false } else { val fs = path.getFileSystem(conf) var is: FSDataInputStream = null var in: InputStream = null var decompressor: Decompressor = null try { is = fs.open(split.getPath) val codec = new CompressionCodecFactory(conf).getCodec(path) if (codec != null) { decompressor = CodecPool.getDecompressor(codec) in = codec.createInputStream(is, decompressor) } else { in = is } val result = IOUtils.toByteArray(in) value.clear() value.set(result) done = true true } finally { if (in != null) { IOUtils.closeQuietly(in) } if (decompressor != null) { CodecPool.returnDecompressor(decompressor) } } } } override def getCurrentValue: Text = value override def initialize(inputSplit: InputSplit, taskAttemptContext: TaskAttemptContext): Unit = { this.split = inputSplit.asInstanceOf[FileSplit] this.conf = MapReduceUtils.getConfigurationFromContext(taskAttemptContext) this.path = this.split.getPath } override def getCurrentKey: NullWritable = key override def close() {} }
Example 90
Source File: HDFSFileService.scala From retail_analytics with Apache License 2.0 | 5 votes |
package models import scalaz._ import Scalaz._ import scalaz.EitherT._ import scalaz.Validation import scalaz.NonEmptyList._ import java.io.BufferedInputStream import java.io.File import java.io.FileInputStream import java.io.InputStream import org.apache.hadoop.conf._ import org.apache.hadoop.fs._ object HDFSFileService { private val conf = new Configuration() private val hdfsCoreSitePath = new Path("core-site.xml") private val hdfsHDFSSitePath = new Path("hdfs-site.xml") conf.addResource(hdfsCoreSitePath) conf.addResource(hdfsHDFSSitePath) private val fileSystem = FileSystem.get(conf) def saveFile(filepath: String): ValidationNel[Throwable, String] = { (Validation.fromTryCatch[String] { val file = new File(filepath) val out = fileSystem.create(new Path(file.getName)) val in = new BufferedInputStream(new FileInputStream(file)) var b = new Array[Byte](1024) var numBytes = in.read(b) while (numBytes > 0) { out.write(b, 0, numBytes) numBytes = in.read(b) } in.close() out.close() "File Uploaded" } leftMap { t: Throwable => nels(t) }) } def removeFile(filename: String): Boolean = { val path = new Path(filename) fileSystem.delete(path, true) } def getFile(filename: String): InputStream = { val path = new Path(filename) fileSystem.open(path) } def createFolder(folderPath: String): Unit = { val path = new Path(folderPath) if (!fileSystem.exists(path)) { fileSystem.mkdirs(path) } } }
Example 91
Source File: BufferedSource.scala From perf_tester with Apache License 2.0 | 5 votes |
package scala.io import java.io.{ InputStream, BufferedReader, InputStreamReader, PushbackReader } import Source.DefaultBufSize import scala.collection.{ Iterator, AbstractIterator } override def mkString = { // Speed up slurping of whole data set in the simplest cases. val allReader = decachedReader val sb = new StringBuilder val buf = new Array[Char](bufferSize) var n = 0 while (n != -1) { n = allReader.read(buf) if (n>0) sb.appendAll(buf, 0, n) } sb.result } }
Example 92
Source File: BaseClassifier.scala From project-matt with MIT License | 5 votes |
package org.datafy.aws.app.matt.classifiers import java.io.InputStream import org.datafy.aws.app.matt.extras.{S3KeySummary, S3Manager, Utilities} import org.datafy.aws.app.matt.models.{FullScanStats, ObjectScanStats, RiskStats, ScanObjectsModel} import com.typesafe.scalalogging._ import org.slf4j.LoggerFactory object BaseClassifier { val logger = LoggerFactory.getLogger("BaseClassifier") def setS3ScanInputPath(bucketName: String, s3Prefix: String) = { // get last object and cache in redis val redisReferenceKey = generateReferenceKey(bucketName, s3Prefix) val lastScannedKey = ScanObjectsModel.getLastScannedFromRedis(redisReferenceKey) logger.info(s"Last Scanned S3 Key: ${lastScannedKey.getOrElse("This is the first scan")}") val bucketObjects: List[S3KeySummary] = S3Manager.getBucketObjects(bucketName, s3Prefix, lastScannedKey) logger.info(s"Total Number of S3 Objects for scanning: ${bucketObjects.length}") if (!bucketObjects.isEmpty) { try { val totalSizeScanned: (String, Int) = S3Manager.computeTotalObjectSize(bucketObjects).head logger.info(s"Total size of scanned objects: ${totalSizeScanned._2}") // commence object scan here val payloadSummary = bucketObjects.map { s3Object => val s3ObjectInputStream = S3Manager.getObjectContentAsStream(s3Object.bucketName, s3Object.key) val textContent = this.scanInputStream(s3ObjectInputStream, s3Object.key) val regexClassifier = RegexClassifier.scanTextContent(textContent) val objectStats: List[RiskStats] = regexClassifier.computeRiskStats() val riskLevel = regexClassifier.getDocumentRiskLevels() val objectStatsSummary = ObjectScanStats(s3Key = s3Object.key, objectSummaryStats = objectStats, classifier = "Regex") (textContent, objectStatsSummary) } // all objects val regexClassifier = RegexClassifier.scanTextContent(payloadSummary.map(_._1)) val fullScanStats = regexClassifier.computeRiskStats() val objectScanStats = payloadSummary.map(_._2) // return to save results actor val scanStats = FullScanStats( s3Bucket = bucketName, lastScannedKey = "", summaryStats = fullScanStats, objectScanStats = objectScanStats, totalObjectsSize = Some(totalSizeScanned._2) ) val savedKey = ScanObjectsModel.saveScannedResults(scanStats) val newLastScannedKey = ScanObjectsModel.saveLastScannedToRedis(redisReferenceKey, bucketObjects) scanStats } catch { case e: Throwable => e.printStackTrace() } true }else { logger.info("No files to scan at this time.") false } } private def scanInputStream(inputStream: InputStream, s3Key: String): String = { // check if input stream is compressed if (s3Key.contains("parquet")) return Utilities.getParseParquetStream(inputStream) val check = Utilities.checkIfStreamIsCompressed(inputStream) if(check) return Utilities.getParseCompressedStream(inputStream) Utilities.getParsePlainStream(inputStream) } private def generateReferenceKey(s3Bucket: String, s3Prefix: String) = { var referenceKey = s"s3Key_${s3Bucket}" if(!s3Prefix.isEmpty) referenceKey += s":s3Prefix_${s3Prefix}" referenceKey } }
Example 93
Source File: TikaParquetParser.scala From project-matt with MIT License | 5 votes |
package org.datafy.aws.app.matt.extras import java.io.{File, FileOutputStream, IOException, InputStream} import java.util import scala.collection.JavaConverters._ import org.xml.sax.{ContentHandler, SAXException} import org.apache.tika.metadata.Metadata import org.apache.tika.metadata.HttpHeaders.CONTENT_TYPE import org.apache.tika.mime.MediaType import org.apache.tika.parser.{AbstractParser, ParseContext} import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.parquet.hadoop.ParquetFileReader import org.apache.parquet.hadoop.ParquetReader import org.apache.parquet.format.converter.ParquetMetadataConverter import org.apache.parquet.hadoop.util.HadoopInputFile import org.apache.parquet.tools.json.JsonRecordFormatter import org.apache.parquet.tools.read.{SimpleReadSupport, SimpleRecord} import org.apache.tika.exception.TikaException import org.apache.tika.sax.XHTMLContentHandler import scala.util.Random class TikaParquetParser extends AbstractParser { // make some stuff here final val PARQUET_RAW = MediaType.application("x-parquet") private val SUPPORTED_TYPES: Set[MediaType] = Set(PARQUET_RAW) def getSupportedTypes(context: ParseContext): util.Set[MediaType] = { SUPPORTED_TYPES.asJava } @throws(classOf[IOException]) @throws(classOf[SAXException]) @throws(classOf[TikaException]) def parse(stream: InputStream, handler: ContentHandler, metadata: Metadata, context: ParseContext): Unit = { // create temp file from stream val fileNamePrefix = Random.alphanumeric.take(5).mkString val tempFile = File.createTempFile(s"parquet-${fileNamePrefix}", ".parquet") IOUtils.copy(stream, new FileOutputStream(tempFile)) val conf = new Configuration() val path = new Path(tempFile.getAbsolutePath) val parquetMetadata = ParquetFileReader.readFooter(conf, path, ParquetMetadataConverter.NO_FILTER) var defaultReader: ParquetReader[SimpleRecord] = null val columns = parquetMetadata.getFileMetaData.getSchema.getFields metadata.set(CONTENT_TYPE, PARQUET_RAW.toString) metadata.set("Total Number of Columns", columns.size.toString) metadata.set("Parquet Column Names", columns.toString) val xhtml = new XHTMLContentHandler(handler, metadata) xhtml.startDocument() xhtml.startElement("p") // ::TODO:: ensure parquet reader reads all files not only file row try { defaultReader = ParquetReader.builder(new SimpleReadSupport(), new Path(tempFile.getAbsolutePath)).build() if(defaultReader.read() != null) { val values: SimpleRecord = defaultReader.read() val jsonFormatter = JsonRecordFormatter.fromSchema(parquetMetadata.getFileMetaData.getSchema) val textContent: String = jsonFormatter.formatRecord(values) xhtml.characters(textContent) xhtml.endElement("p") xhtml.endDocument() } } catch { case e: Throwable => e.printStackTrace() if (defaultReader != null) { try { defaultReader.close() } catch{ case _: Throwable => } } } finally { if (tempFile != null) tempFile.delete() } } }
Example 94
Source File: TikaHadoopOrcParser.scala From project-matt with MIT License | 5 votes |
package org.datafy.aws.app.matt.extras import java.io.{File, FileOutputStream, IOException, InputStream} import java.util import org.apache.commons.io.IOUtils import org.apache.hadoop.conf.Configuration import scala.collection.JavaConverters._ import org.apache.hadoop.fs.Path import org.apache.hadoop.hive.serde2.objectinspector.StructField import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector import org.apache.orc.OrcFile import org.apache.orc.OrcFile.ReaderOptions import org.apache.orc.Reader import org.apache.orc.RecordReader import org.apache.tika.exception.TikaException import org.apache.tika.metadata.Metadata import org.apache.tika.mime.MediaType import org.apache.tika.parser.{AbstractParser, ParseContext} import org.xml.sax.{ContentHandler, SAXException} import scala.util.Random class TikaHadoopOrcParser extends AbstractParser { final val ORC_RAW = MediaType.application("x-orc") private val SUPPORTED_TYPES: Set[MediaType] = Set(ORC_RAW) def getSupportedTypes(context: ParseContext): util.Set[MediaType] = { SUPPORTED_TYPES.asJava } @throws(classOf[IOException]) @throws(classOf[SAXException]) @throws(classOf[TikaException]) def parse(stream: InputStream, handler: ContentHandler, metadata: Metadata, context: ParseContext): Unit = { // create temp file from stream try { val fileNamePrefix = Random.alphanumeric.take(5).mkString val tempFile = File.createTempFile(s"orc-${fileNamePrefix}", ".orc") IOUtils.copy(stream, new FileOutputStream(tempFile)) val path = new Path(tempFile.getAbsolutePath) val conf = new Configuration() val orcReader = OrcFile.createReader(path, new ReaderOptions(conf)) val records: RecordReader = orcReader.rows() val storeRecord = null val firstBlockKey = null } catch { case e: Throwable => e.printStackTrace() } // val fields = } }
Example 95
Source File: Utilities.scala From project-matt with MIT License | 5 votes |
package org.datafy.aws.app.matt.extras import org.apache.tika.Tika import org.apache.tika.metadata.Metadata import java.io.{BufferedInputStream, IOException, InputStream, StringWriter} import java.util.zip.GZIPInputStream import org.xml.sax.SAXException import org.apache.tika.exception.TikaException import org.apache.tika.metadata.serialization.JsonMetadata import org.apache.tika.parser.{AutoDetectParser, ParseContext} import org.apache.tika.parser.pkg.CompressorParser import org.apache.tika.sax.BodyContentHandler object Utilities { private val MAX_STRING_LENGTH = 2147483647 private val tika = new Tika() tika.setMaxStringLength(MAX_STRING_LENGTH) @throws(classOf[IOException]) @throws(classOf[SAXException]) @throws(classOf[TikaException]) def getParsePlainStream(inputStream: InputStream): String = { val autoDetectParser = new AutoDetectParser() val bodyContentHandler = new BodyContentHandler(MAX_STRING_LENGTH) val fileMetadata = new Metadata() if (inputStream.read() == -1) { return "Could not scan inputStream less than 0 bytes" } autoDetectParser.parse(inputStream, bodyContentHandler, fileMetadata) bodyContentHandler.toString } @throws(classOf[IOException]) @throws(classOf[SAXException]) @throws(classOf[TikaException]) def getParseCompressedStream(inputStream: InputStream) = { var inputStream = myStream if(!inputStream.markSupported()) { inputStream = new BufferedInputStream(inputStream) } inputStream.mark(2) var magicBytes = 0 try { magicBytes = inputStream.read() & 0xff | ((inputStream.read() << 8) & 0xff00) inputStream.reset() } catch { case ioe: IOException => ioe.printStackTrace() } magicBytes == GZIPInputStream.GZIP_MAGIC } } case class And[A]( p1: A=>Boolean, p2: A=>Boolean ) extends (A=>Boolean) { def apply( a: A ) = p1(a) && p2(a) } case class Or[A]( p1: A=>Boolean, p2: A=>Boolean ) extends (A=>Boolean) { def apply( a: A ) = p1(a) || p2(a) }
Example 96
Source File: UtilitiesSpec.scala From project-matt with MIT License | 5 votes |
package org.datafy.aws.app.matt.extras import java.io.InputStream import org.scalatest.FlatSpec class UtilitiesSpec extends FlatSpec { val utilities: Utilities.type = Utilities val someCompressedJson: InputStream = getClass.getResourceAsStream("/UtilitiesSpec/sample-data.json.gz") "getParsePlainStream" should "return empty string when parsing parquet" in { val someParquetFile = getClass.getResourceAsStream("/UtilitiesSpec/part-r-00004.gz.parquet") val fileContents = utilities.getParsePlainStream(someParquetFile) someParquetFile.close() assert(fileContents.length == 0) } "checkIfStreamIsCompressed" should "check if a json input stream is compressed" in { val check = utilities.checkIfStreamIsCompressed(someCompressedJson) assert(check) } "getParseCompressedStream" should "read content of compressed file" in { val textContent = utilities.getParseCompressedStream(someCompressedJson) assert(textContent.length != 0) } "getParseParquetStream" should "read content of parquet file" in { val someParquetFile = getClass.getResourceAsStream("/UtilitiesSpec/userdata1.parquet") val textContent = utilities.getParseParquetStream(someParquetFile) println(textContent) assert(textContent.length != 0) } }
Example 97
Source File: SplashUtils.scala From splash with Apache License 2.0 | 5 votes |
package org.apache.spark.shuffle import java.io.{InputStream, OutputStream} import java.util.Comparator import org.apache.spark.internal.Logging import scala.util.control.NonFatal object SplashUtils extends Logging { def withResources[T <: AutoCloseable, V](r: => T)(f: T => V): V = { val resource: T = r require(resource != null, "resource is null") var exception: Throwable = null try { f(resource) } catch { case NonFatal(e) => exception = e throw e case e: Throwable => logError("fatal error received.", e) throw e } finally { closeAndAddSuppressed(exception, resource) } } private def closeAndAddSuppressed(e: Throwable, resource: AutoCloseable): Unit = { if (e != null) { try { resource.close() } catch { case NonFatal(suppressed) => e.addSuppressed(suppressed) } } else { resource.close() } } class SplashHashComparator[K] extends Comparator[K] { def compare(key1: K, key2: K): Int = { val hash1 = SplashUtils.hash(key1) val hash2 = SplashUtils.hash(key2) if (hash1 < hash2) -1 else if (hash1 == hash2) 0 else 1 } } class SplashSpillableIterator[T](var upstream: Iterator[T], val spillInMemoryIterator: Iterator[T] => SpilledFile, val getNextUpstream: SpilledFile => Iterator[T]) extends Iterator[T] with Logging { private val spillLock = new Object private var spilledFileOpt: Option[SpilledFile] = None private var cur: T = readNext() def spill(): Option[SpilledFile] = spillLock.synchronized { spilledFileOpt match { case Some(_) => // has spilled, return None None case None => // never spilled, now spilling val spilledFile = spillInMemoryIterator(upstream) spilledFileOpt = Some(spilledFile) spilledFileOpt } } def readNext(): T = spillLock.synchronized { spilledFileOpt match { case Some(spilledFile) => upstream = getNextUpstream(spilledFile) spilledFileOpt = None case None => // do nothing } if (upstream.hasNext) { upstream.next() } else { null.asInstanceOf[T] } } override def hasNext: Boolean = cur != null override def next(): T = { val ret = cur cur = readNext() ret } }
Example 98
Source File: WorkbookReader.scala From spark-excel with Apache License 2.0 | 5 votes |
package com.crealytics.spark.excel import java.io.InputStream import com.crealytics.spark.excel.Utils.MapIncluding import com.github.pjfanning.xlsx.StreamingReader import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.poi.ss.usermodel.{Workbook, WorkbookFactory} trait WorkbookReader { protected def openWorkbook(): Workbook def withWorkbook[T](f: Workbook => T): T = { val workbook = openWorkbook() val res = f(workbook) workbook.close() res } def sheetNames: Seq[String] = { withWorkbook( workbook => for (sheetIx <- (0 until workbook.getNumberOfSheets())) yield { workbook.getSheetAt(sheetIx).getSheetName() } ) } } object WorkbookReader { val WithLocationMaxRowsInMemoryAndPassword = MapIncluding(Seq("path"), optionally = Seq("maxRowsInMemory", "workbookPassword")) def apply(parameters: Map[String, String], hadoopConfiguration: Configuration): WorkbookReader = { def readFromHadoop(location: String) = { val path = new Path(location) FileSystem.get(path.toUri, hadoopConfiguration).open(path) } parameters match { case WithLocationMaxRowsInMemoryAndPassword(Seq(location), Seq(Some(maxRowsInMemory), passwordOption)) => new StreamingWorkbookReader(readFromHadoop(location), passwordOption, maxRowsInMemory.toInt) case WithLocationMaxRowsInMemoryAndPassword(Seq(location), Seq(None, passwordOption)) => new DefaultWorkbookReader(readFromHadoop(location), passwordOption) } } } class DefaultWorkbookReader(inputStreamProvider: => InputStream, workbookPassword: Option[String]) extends WorkbookReader { protected def openWorkbook(): Workbook = workbookPassword .fold(WorkbookFactory.create(inputStreamProvider))( password => WorkbookFactory.create(inputStreamProvider, password) ) } class StreamingWorkbookReader(inputStreamProvider: => InputStream, workbookPassword: Option[String], maxRowsInMem: Int) extends WorkbookReader { override protected def openWorkbook(): Workbook = { val builder = StreamingReader .builder() .rowCacheSize(maxRowsInMem) .bufferSize(4096) workbookPassword .fold(builder)(password => builder.password(password)) .open(inputStreamProvider) } }
Example 99
Source File: AggregateFileRepository.scala From lightbend-markdown with Apache License 2.0 | 5 votes |
package com.lightbend.markdown.server import java.io.{File, InputStream} import com.lightbend.markdown.DocPath import play.doc.FileHandle import play.doc.FileRepository class AggregateFileRepository(repos: Seq[FileRepository]) extends FileRepository { private def fromFirstRepo[A](load: FileRepository => Option[A]) = repos.collectFirst(Function.unlift(load)) def loadFile[A](path: String)(loader: (InputStream) => A) = fromFirstRepo(_.loadFile(path)(loader)) def handleFile[A](path: String)(handler: (FileHandle) => A) = fromFirstRepo(_.handleFile(path)(handler)) def findFileWithName(name: String) = fromFirstRepo(_.findFileWithName(name)) } class PrefixedRepository(prefix: String, repo: FileRepository) extends FileRepository { private def withPrefixStripped[T](path: String)(block: String => Option[T]): Option[T] = { if (path.startsWith(prefix)) { block(path.stripPrefix(prefix)) } else None } override def loadFile[A](path: String)(loader: (InputStream) => A): Option[A] = withPrefixStripped(path)(repo.loadFile[A](_)(loader)) override def handleFile[A](path: String)(handler: (FileHandle) => A): Option[A] = withPrefixStripped(path)(repo.handleFile[A](_)(handler)) override def findFileWithName(name: String): Option[String] = repo.findFileWithName(name).map(prefix + _) } object SourceFinder { def findPathFor(rootDir: File, docPaths: Seq[DocPath], path: String): Option[String] = { docPaths.collect { case DocPath(file, prefix) if prefix == "." => new File(file, path) case DocPath(file, prefix) if path.startsWith(prefix) => new File(file, path.stripPrefix(prefix)) }.collectFirst { case file if file.exists() => file.getCanonicalPath.stripPrefix(rootDir.getCanonicalPath).stripPrefix(File.separator) } } }
Example 100
Source File: InputOutput.scala From lsp4s with Apache License 2.0 | 5 votes |
package scala.meta.jsonrpc import java.io.InputStream import java.io.OutputStream import monix.execution.Cancelable final class InputOutput(val in: InputStream, val out: OutputStream) extends Cancelable { override def cancel(): Unit = { Cancelable.cancelAll( List( Cancelable(() => in.close()), Cancelable(() => out.close()) ) ) } }
Example 101
Source File: BaseProtocolMessage.scala From lsp4s with Apache License 2.0 | 5 votes |
package scala.meta.jsonrpc import java.io.InputStream import java.nio.ByteBuffer import java.nio.charset.StandardCharsets import java.util import io.circe.Json import io.circe.syntax._ import monix.reactive.Observable import scribe.LoggerSupport final class BaseProtocolMessage( val header: Map[String, String], val content: Array[Byte] ) { override def equals(obj: scala.Any): Boolean = this.eq(obj.asInstanceOf[Object]) || { obj match { case m: BaseProtocolMessage => header.equals(m.header) && util.Arrays.equals(content, m.content) } } override def toString: String = { val bytes = MessageWriter.write(this) StandardCharsets.UTF_8.decode(bytes).toString } } object BaseProtocolMessage { val ContentLen = "Content-Length" def apply(msg: Message): BaseProtocolMessage = fromJson(msg.asJson) def fromJson(json: Json): BaseProtocolMessage = fromBytes(json.noSpaces.getBytes(StandardCharsets.UTF_8)) def fromBytes(bytes: Array[Byte]): BaseProtocolMessage = new BaseProtocolMessage( Map("Content-Length" -> bytes.length.toString), bytes ) def fromInputStream( in: InputStream, logger: LoggerSupport ): Observable[BaseProtocolMessage] = fromBytes(Observable.fromInputStream(in), logger) def fromBytes( in: Observable[Array[Byte]], logger: LoggerSupport ): Observable[BaseProtocolMessage] = fromByteBuffers(in.map(ByteBuffer.wrap), logger) def fromByteBuffers( in: Observable[ByteBuffer], logger: LoggerSupport ): Observable[BaseProtocolMessage] = in.executeAsync.liftByOperator(new BaseProtocolMessageParser(logger)) }
Example 102
Source File: HdfsUtilsTest.scala From sparta with Apache License 2.0 | 5 votes |
package com.stratio.sparta.serving.core.utils import java.io.{FileNotFoundException, InputStream} import org.apache.hadoop.fs.{FileSystem, _} import org.junit.runner.RunWith import org.mockito.Mockito._ import org.scalatest._ import org.scalatest.junit.JUnitRunner import org.scalatest.mock.MockitoSugar import scala.util.{Failure, Try} @RunWith(classOf[JUnitRunner]) class HdfsUtilsTest extends FlatSpec with ShouldMatchers with MockitoSugar { val fileSystem: FileSystem = mock[FileSystem] val utils = new HdfsUtils(fileSystem, "stratio") "hdfs utils" should "getfiles from a path" in { val expected = Array(mock[FileStatus]) when(fileSystem.listStatus(new Path("myTestPath"))).thenReturn(expected) val result = utils.getFiles("myTestPath") result should be(expected) } it should "return single file as inputStream" in { val expected: InputStream = mock[FSDataInputStream] when(fileSystem.open(new Path("testFile"))).thenReturn(expected.asInstanceOf[FSDataInputStream]) val result: InputStream = utils.getFile("testFile") result should be(expected) } it should "write" in { val result = Try(utils.write("from", "to", true)) match { case Failure(ex: Throwable) => ex } result.isInstanceOf[FileNotFoundException] should be(true) } it should "write without override" in { val result = Try(utils.write("from", "to", false)) match { case Failure(ex: Throwable) => ex } result.isInstanceOf[FileNotFoundException] should be(true) } }
Example 103
Source File: JsonSource.scala From play-json-schema-validator with Apache License 2.0 | 5 votes |
package com.eclipsesource.schema import java.io.InputStream import java.net.URL import play.api.libs.json._ import scala.io.Source import scala.util.{Failure, Success, Try} def schemaFromUrl(url: URL)(implicit reads: Reads[SchemaType]): JsResult[SchemaType] = { for { schemaJson <- JsonSource.fromUrl(url) match { case Success(json) => JsSuccess(json) case Failure(throwable) => JsError(throwable.getMessage) } schema <- Json.fromJson[SchemaType](schemaJson) } yield schema } }
Example 104
Source File: StreamCopier.scala From scala-ssh with Apache License 2.0 | 5 votes |
package com.decodified.scalassh import annotation.tailrec import java.io.{ ByteArrayOutputStream, OutputStream, InputStream } final class StreamCopier(bufferSize: Int = 4096) { private val buffer = new Array[Byte](bufferSize) @tailrec def copy(in: InputStream, out: OutputStream) { val bytes = in.read(buffer) if (bytes > 0) { out.write(buffer, 0, bytes) copy(in, out) } else { in.close() out.close() } } def emptyToString(inputStream: InputStream, charset: String = "UTF8") = { new String(emptyToByteArray(inputStream), charset) } def emptyToByteArray(inputStream: InputStream) = { val output = new ByteArrayOutputStream() copy(inputStream, output) output.toByteArray } }
Example 105
Source File: Command.scala From scala-ssh with Apache License 2.0 | 5 votes |
package com.decodified.scalassh import net.schmizz.sshj.connection.channel.direct.Session import java.io.{ FileInputStream, File, ByteArrayInputStream, InputStream } case class Command(command: String, input: CommandInput = CommandInput.NoInput, timeout: Option[Int] = None) object Command { implicit def string2Command(cmd: String) = Command(cmd) } case class CommandInput(inputStream: Option[InputStream]) object CommandInput { lazy val NoInput = CommandInput(None) implicit def apply(input: String, charsetName: String = "UTF8"): CommandInput = apply(input.getBytes(charsetName)) implicit def apply(input: Array[Byte]): CommandInput = apply(Some(new ByteArrayInputStream(input))) implicit def apply(input: InputStream): CommandInput = apply(Some(input)) def fromFile(file: String): CommandInput = fromFile(new File(file)) def fromFile(file: File): CommandInput = new FileInputStream(file) def fromResource(resource: String): CommandInput = getClass.getClassLoader.getResourceAsStream(resource) } class CommandResult(val channel: Session.Command) { def stdErrStream: InputStream = channel.getErrorStream def stdOutStream: InputStream = channel.getInputStream lazy val stdErrBytes = new StreamCopier().emptyToByteArray(stdErrStream) lazy val stdOutBytes = new StreamCopier().emptyToByteArray(stdOutStream) def stdErrAsString(charsetname: String = "utf8") = new String(stdErrBytes, charsetname) def stdOutAsString(charsetname: String = "utf8") = new String(stdOutBytes, charsetname) lazy val exitSignal: Option[String] = Option(channel.getExitSignal).map(_.toString) lazy val exitCode: Option[Int] = Option(channel.getExitStatus) lazy val exitErrorMessage: Option[String] = Option(channel.getExitErrorMessage) }
Example 106
Source File: DFSJarStore.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.jarstore.dfs import java.io.{InputStream, OutputStream} import org.apache.gearpump.util.Constants import org.apache.gearpump.jarstore.JarStore import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import com.typesafe.config.Config import org.apache.hadoop.fs.permission.{FsAction, FsPermission} override def getFile(fileName: String): InputStream = { val filePath = new Path(rootPath, fileName) val fs = filePath.getFileSystem(new Configuration()) fs.open(filePath) } private def createDirIfNotExists(path: Path): Unit = { val fs = path.getFileSystem(new Configuration()) if (!fs.exists(path)) { fs.mkdirs(path, new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL)) } } }
Example 107
Source File: FileSystem.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.experiments.yarn.glue import java.io.{InputStream, OutputStream} import java.net.ConnectException import org.apache.gearpump.util.LogUtil import org.apache.hadoop.fs.Path import scala.util.{Failure, Success, Try} class FileSystem(yarnConfig: YarnConfig) { private val conf = yarnConfig.conf private val fs = org.apache.hadoop.fs.FileSystem.get(conf) private def LOG = LogUtil.getLogger(getClass) def open(file: String): InputStream = exceptionHandler { val path = new Path(file) fs.open(path) } def create(file: String): OutputStream = exceptionHandler { val path = new Path(file) fs.create(path) } def exists(file: String): Boolean = exceptionHandler { val path = new Path(file) fs.exists(path) } def name: String = { fs.getUri.toString } def getHomeDirectory: String = { fs.getHomeDirectory.toString } private def exceptionHandler[T](call: => T): T = { val callTry = Try(call) callTry match { case Success(v) => v case Failure(ex) => if (ex.isInstanceOf[ConnectException]) { LOG.error("Please check whether we connect to the right HDFS file system, " + "current file system is $name." + "\n. Please copy all configs under " + "$HADOOP_HOME/etc/hadoop into conf/yarnconf directory of Gearpump package, " + "so that we can use the right File system.", ex) } throw ex } } }
Example 108
Source File: KernelInputStream.scala From incubator-toree with Apache License 2.0 | 5 votes |
package org.apache.toree.kernel.protocol.v5.stream import java.io.InputStream import java.nio.charset.Charset import akka.pattern.ask import org.apache.toree.kernel.protocol.v5.content.InputRequest import org.apache.toree.kernel.protocol.v5.kernel.ActorLoader import org.apache.toree.kernel.protocol.v5.kernel.Utilities.timeout import org.apache.toree.kernel.protocol.v5.{KMBuilder, MessageType} import scala.collection.mutable.ListBuffer import scala.concurrent.{Await, Future} import KernelInputStream._ object KernelInputStream { val DefaultPrompt = "" val DefaultPassword = false } override def read(): Int = { if (!this.hasByte) this.requestBytes() this.nextByte() } private def hasByte: Boolean = internalBytes.nonEmpty private def nextByte(): Int = { val byte = internalBytes.head internalBytes = internalBytes.tail byte } private def requestBytes(): Unit = { val inputRequest = InputRequest(prompt, password) // NOTE: Assuming already provided parent header and correct ids val kernelMessage = kmBuilder .withHeader(MessageType.Outgoing.InputRequest) .withContentString(inputRequest) .build // NOTE: The same handler is being used in both request and reply val responseFuture: Future[String] = (actorLoader.load(MessageType.Incoming.InputReply) ? kernelMessage) .mapTo[String] // Block until we get a response import scala.concurrent.duration._ internalBytes ++= Await.result(responseFuture, Duration.Inf).getBytes(EncodingType) } }
Example 109
Source File: FactoryMethods.scala From incubator-toree with Apache License 2.0 | 5 votes |
package org.apache.toree.kernel.api import java.io.{InputStream, OutputStream} import org.apache.toree.kernel.protocol.v5 import org.apache.toree.kernel.protocol.v5.{KMBuilder, KernelMessage} import org.apache.toree.kernel.protocol.v5.kernel.ActorLoader import org.apache.toree.kernel.protocol.v5.stream.{KernelOutputStream, KernelInputStream} import com.typesafe.config.Config override def newKernelOutputStream( streamType: String = KernelOutputStream.DefaultStreamType, sendEmptyOutput: Boolean = config.getBoolean("send_empty_output") ): OutputStream = { new v5.stream.KernelOutputStream( actorLoader, kmBuilder, org.apache.toree.global.ScheduledTaskManager.instance, streamType = streamType, sendEmptyOutput = sendEmptyOutput ) } }
Example 110
Source File: StreamState.scala From incubator-toree with Apache License 2.0 | 5 votes |
package org.apache.toree.global import java.io.{InputStream, OutputStream, PrintStream} def withStreams[T](thunk: => T): T = { init(_inputStream, _outputStream, _errorStream) val returnValue = Console.withIn(_inputStream) { Console.withOut(_outputStream) { Console.withErr(_errorStream) { thunk } } } reset() returnValue } }
Example 111
Source File: StreamingParserSpec.scala From cormorant with MIT License | 5 votes |
package io.chrisdavenport.cormorant package fs2 import cats.data.NonEmptyList import cats.effect._ import cats.effect.testing.specs2.CatsIO import _root_.fs2.Stream import io.chrisdavenport.cormorant._ // import io.chrisdavenport.cormorant.implicits._ // import scala.concurrent.duration._ import java.io.ByteArrayInputStream import java.io.InputStream class StreamingParserSpec extends CormorantSpec with CatsIO { def ruinDelims(str: String) = augmentString(str).flatMap { case '\n' => "\r\n" case c => c.toString } "Streaming Parser" should { // https://github.com/ChristopherDavenport/cormorant/pull/84 "parse a known value that did not work with streaming" in { val x = """First Name,Last Name,Email Larry,Bordowitz,[email protected] Anonymous,Hippopotamus,[email protected]""" val source = IO.pure(new ByteArrayInputStream(ruinDelims(x).getBytes): InputStream) Stream.resource(Blocker[IO]).flatMap{blocker => _root_.fs2.io.readInputStream( source, chunkSize = 4, blocker ) } .through(_root_.fs2.text.utf8Decode) .through(parseComplete[IO]) .compile .toVector .map{ v => val header = CSV.Headers(NonEmptyList.of(CSV.Header("First Name"), CSV.Header("Last Name"), CSV.Header("Email"))) val row1 = CSV.Row(NonEmptyList.of(CSV.Field("Larry"), CSV.Field("Bordowitz"), CSV.Field("[email protected]"))) val row2 = CSV.Row(NonEmptyList.of(CSV.Field("Anonymous"), CSV.Field("Hippopotamus"), CSV.Field("[email protected]"))) Vector( (header, row1), (header, row2) ) must_=== v } } } }
Example 112
Source File: process.scala From scala-steward with Apache License 2.0 | 5 votes |
package org.scalasteward.core.io import cats.effect._ import cats.implicits._ import fs2.Stream import java.io.{File, IOException, InputStream} import org.scalasteward.core.util._ import scala.collection.mutable.ListBuffer import scala.concurrent.TimeoutException import scala.concurrent.duration.FiniteDuration object process { def slurp[F[_]]( cmd: Nel[String], cwd: Option[File], extraEnv: Map[String, String], timeout: FiniteDuration, log: String => F[Unit], blocker: Blocker )(implicit contextShift: ContextShift[F], timer: Timer[F], F: Concurrent[F]): F[List[String]] = createProcess(cmd, cwd, extraEnv).flatMap { process => F.delay(new ListBuffer[String]).flatMap { buffer => val readOut = { val out = readInputStream[F](process.getInputStream, blocker) out.evalMap(line => F.delay(appendBounded(buffer, line, 4096)) >> log(line)).compile.drain } val showCmd = (extraEnv.map { case (k, v) => s"$k=$v" }.toList ++ cmd.toList).mkString_(" ") val result = readOut >> F.delay(process.waitFor()) >>= { exitValue => if (exitValue === 0) F.pure(buffer.toList) else { val msg = s"'$showCmd' exited with code $exitValue" F.raiseError[List[String]](new IOException(makeMessage(msg, buffer.toList))) } } val fallback = F.delay(process.destroyForcibly()) >> { val msg = s"'$showCmd' timed out after ${timeout.toString}" F.raiseError[List[String]](new TimeoutException(makeMessage(msg, buffer.toList))) } Concurrent.timeoutTo(result, timeout, fallback) } } private def createProcess[F[_]]( cmd: Nel[String], cwd: Option[File], extraEnv: Map[String, String] )(implicit F: Sync[F]): F[Process] = F.delay { val pb = new ProcessBuilder(cmd.toList: _*) val env = pb.environment() cwd.foreach(pb.directory) extraEnv.foreach { case (key, value) => env.put(key, value) } pb.redirectErrorStream(true) pb.start() } private def readInputStream[F[_]](is: InputStream, blocker: Blocker)(implicit F: Sync[F], cs: ContextShift[F] ): Stream[F, String] = fs2.io .readInputStream(F.pure(is), chunkSize = 4096, blocker) .through(fs2.text.utf8Decode) .through(fs2.text.lines) private def makeMessage(prefix: String, output: List[String]): String = (prefix :: output).mkString("\n") }
Example 113
Source File: TestUtils.scala From scavro with Apache License 2.0 | 5 votes |
package org.oedura.scavro.plugin import java.io.{FileOutputStream, InputStream} import sbt._ import scala.io.Source import scala.util.Random class TestUtils(workingDir: File) { (workingDir / "in").mkdir (workingDir / "out").mkdir def tmpDir = workingDir def tmpPath = workingDir.getAbsolutePath private def extractResource(resourceName: String): File = { val is: InputStream = getClass.getResourceAsStream(s"/$resourceName") val text = Source.fromInputStream(is).mkString val os: FileOutputStream = new FileOutputStream(workingDir / "in" / resourceName) os.write(text.getBytes) os.close() is.close() workingDir / "in" / resourceName } lazy val schemaFile: File = extractResource("Number.avsc") lazy val protocolFile: File = { schemaFile extractResource("NumberSystem.avdl") } def cleanup() = { def getRecursively(f: File): Seq[File] = f.listFiles.filter(_.isDirectory).flatMap(getRecursively) ++ f.listFiles getRecursively(workingDir).foreach { f => if (!f.delete()) throw new RuntimeException("Failed to delete " + f.getAbsolutePath) } tmpDir.delete() } } object TestUtils { private val alphabet = ('a' to 'z') ++ ('A' to 'Z') ++ ('0' to '9') def randomFile(dir: File, prefix: String = "", suffix: String = "", maxTries: Int = 100, nameSize: Int = 10): File = { def randomFileImpl(triesLeft: Int): String = { val testName: String = (1 to nameSize).map(_ => alphabet(Random.nextInt(alphabet.size))).mkString if (!(dir / (prefix + testName + suffix)).exists) prefix + testName + suffix else if (triesLeft < 0) throw new Exception("Unable to find empty random file path.") else randomFileImpl(triesLeft - 1) } dir / randomFileImpl(maxTries) } def randomFileName(prefix: String, suffix: String = "", maxTries: Int = 100, nameSize: Int = 10): String = { def randomFileNameImpl(triesLeft: Int): String = { val testName: String = (1 to nameSize).map(_ => alphabet(Random.nextInt(alphabet.size))).mkString if (!file(prefix + testName + suffix).exists) prefix + testName + suffix else if (triesLeft < 0) throw new Exception("Unable to find empty random file path.") else randomFileNameImpl(triesLeft - 1) } randomFileNameImpl(maxTries) } def apply(workingDir: File) = { if (workingDir.exists && workingDir.isDirectory) new TestUtils(workingDir) else if (!workingDir.exists) { val success = workingDir.mkdirs if (success) new TestUtils(workingDir) else throw new Exception("Cannot initialize working directory") } else throw new Exception("Requested directory is occupied by ordinary file") } }
Example 114
Source File: LineBufferedStream.scala From incubator-livy with Apache License 2.0 | 5 votes |
package org.apache.livy.utils import java.io.InputStream import java.util import java.util.concurrent.locks.ReentrantLock import scala.io.Source import org.apache.livy.Logging class CircularQueue[T](var capacity: Int) extends util.LinkedList[T] { override def add(t: T): Boolean = { if (size >= capacity) removeFirst super.add(t) } } class LineBufferedStream(inputStream: InputStream, logSize: Int) extends Logging { private[this] val _lines: CircularQueue[String] = new CircularQueue[String](logSize) private[this] val _lock = new ReentrantLock() private[this] val _condition = _lock.newCondition() private[this] var _finished = false private val thread = new Thread { override def run() = { val lines = Source.fromInputStream(inputStream).getLines() for (line <- lines) { info(line) _lock.lock() try { _lines.add(line) _condition.signalAll() } finally { _lock.unlock() } } _lock.lock() try { _finished = true _condition.signalAll() } finally { _lock.unlock() } } } thread.setDaemon(true) thread.start() def lines: IndexedSeq[String] = { _lock.lock() val lines = IndexedSeq.empty[String] ++ _lines.toArray(Array.empty[String]) _lock.unlock() lines } def iterator: Iterator[String] = { new LinesIterator } def waitUntilClose(): Unit = thread.join() private class LinesIterator extends Iterator[String] { override def hasNext: Boolean = { if (_lines.size > 0) { true } else { // Otherwise we might still have more data. _lock.lock() try { if (_finished) { false } else { _condition.await() _lines.size > 0 } } finally { _lock.unlock() } } } override def next(): String = { _lock.lock() val line = _lines.poll() _lock.unlock() line } } }
Example 115
Source File: TestHelper.scala From spark-summit-2018 with GNU General Public License v3.0 | 5 votes |
package com.twilio.open.streaming.trend.discovery import java.io.{ByteArrayInputStream, InputStream} import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.google.protobuf.Message import com.googlecode.protobuf.format.JsonFormat import com.holdenkarau.spark.testing.{LocalSparkContext, SparkContextProvider} import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.SparkSession import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers, Suite} import org.slf4j.{Logger, LoggerFactory} import scala.collection.Seq import scala.io.Source import scala.reflect.ClassTag import scala.reflect.classTag object TestHelper { val log: Logger = LoggerFactory.getLogger("com.twilio.open.streaming.trend.discovery.TestHelper") val mapper: ObjectMapper = { val m = new ObjectMapper() m.registerModule(DefaultScalaModule) } val jsonFormat: JsonFormat = new JsonFormat def loadScenario[T<: Message : ClassTag](file: String): Seq[T] = { val fileString = Source.fromFile(file).mkString val parsed = mapper.readValue(fileString, classOf[Sceanario]) parsed.input.map { data => val json = mapper.writeValueAsString(data) convert[T](json) } } def convert[T<: Message : ClassTag](json: String): T = { val clazz = classTag[T].runtimeClass val builder = clazz.getMethod("newBuilder").invoke(clazz).asInstanceOf[Message.Builder] try { val input: InputStream = new ByteArrayInputStream(json.getBytes()) jsonFormat.merge(input, builder) builder.build().asInstanceOf[T] } catch { case e: Exception => throw e } } } @SerialVersionUID(1L) case class KafkaDataFrame(key: Array[Byte], topic: Array[Byte], value: Array[Byte]) extends Serializable case class Sceanario(input: Seq[Any], expected: Option[Any] = None) trait SparkSqlTest extends BeforeAndAfterAll with SparkContextProvider { self: Suite => @transient var _sparkSql: SparkSession = _ @transient private var _sc: SparkContext = _ override def sc: SparkContext = _sc def conf: SparkConf def sparkSql: SparkSession = _sparkSql override def beforeAll() { _sparkSql = SparkSession.builder().config(conf).getOrCreate() _sc = _sparkSql.sparkContext setup(_sc) super.beforeAll() } override def afterAll() { try { _sparkSql.close() _sparkSql = null LocalSparkContext.stop(_sc) _sc = null } finally { super.afterAll() } } }
Example 116
Source File: OffsetSeqLog.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import java.io.{InputStream, OutputStream} import java.nio.charset.StandardCharsets._ import scala.io.{Source => IOSource} import org.apache.spark.sql.SparkSession class OffsetSeqLog(sparkSession: SparkSession, path: String) extends HDFSMetadataLog[OffsetSeq](sparkSession, path) { override protected def deserialize(in: InputStream): OffsetSeq = { // called inside a try-finally where the underlying stream is closed in the caller def parseOffset(value: String): Offset = value match { case OffsetSeqLog.SERIALIZED_VOID_OFFSET => null case json => SerializedOffset(json) } val lines = IOSource.fromInputStream(in, UTF_8.name()).getLines() if (!lines.hasNext) { throw new IllegalStateException("Incomplete log file") } val version = lines.next() if (version != OffsetSeqLog.VERSION) { throw new IllegalStateException(s"Unknown log version: ${version}") } // read metadata val metadata = lines.next().trim match { case "" => None case md => Some(md) } OffsetSeq.fill(metadata, lines.map(parseOffset).toArray: _*) } override protected def serialize(offsetSeq: OffsetSeq, out: OutputStream): Unit = { // called inside a try-finally where the underlying stream is closed in the caller out.write(OffsetSeqLog.VERSION.getBytes(UTF_8)) // write metadata out.write('\n') out.write(offsetSeq.metadata.map(_.json).getOrElse("").getBytes(UTF_8)) // write offsets, one per line offsetSeq.offsets.map(_.map(_.json)).foreach { offset => out.write('\n') offset match { case Some(json: String) => out.write(json.getBytes(UTF_8)) case None => out.write(OffsetSeqLog.SERIALIZED_VOID_OFFSET.getBytes(UTF_8)) } } } } object OffsetSeqLog { private val VERSION = "v1" private val SERIALIZED_VOID_OFFSET = "-" }
Example 117
Source File: ProcessTestUtils.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.test import java.io.{InputStream, IOException} import scala.sys.process.BasicIO object ProcessTestUtils { class ProcessOutputCapturer(stream: InputStream, capture: String => Unit) extends Thread { this.setDaemon(true) override def run(): Unit = { try { BasicIO.processFully(capture)(stream) } catch { case _: IOException => // Ignores the IOException thrown when the process termination, which closes the input // stream abruptly. } } } }
Example 118
Source File: CryptoStreamUtils.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.security import java.io.{InputStream, OutputStream} import java.util.Properties import javax.crypto.KeyGenerator import javax.crypto.spec.{IvParameterSpec, SecretKeySpec} import org.apache.commons.crypto.random._ import org.apache.commons.crypto.stream._ import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ private[this] def createInitializationVector(properties: Properties): Array[Byte] = { val iv = new Array[Byte](IV_LENGTH_IN_BYTES) val initialIVStart = System.currentTimeMillis() CryptoRandomFactory.getCryptoRandom(properties).nextBytes(iv) val initialIVFinish = System.currentTimeMillis() val initialIVTime = initialIVFinish - initialIVStart if (initialIVTime > 2000) { logWarning(s"It costs ${initialIVTime} milliseconds to create the Initialization Vector " + s"used by CryptoStream") } iv } }
Example 119
Source File: CommandUtils.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.worker import java.io.{File, FileOutputStream, InputStream, IOException} import scala.collection.JavaConverters._ import scala.collection.Map import org.apache.spark.SecurityManager import org.apache.spark.deploy.Command import org.apache.spark.internal.Logging import org.apache.spark.launcher.WorkerCommandBuilder import org.apache.spark.util.Utils def redirectStream(in: InputStream, file: File) { val out = new FileOutputStream(file, true) // TODO: It would be nice to add a shutdown hook here that explains why the output is // terminating. Otherwise if the worker dies the executor logs will silently stop. new Thread("redirect output to " + file) { override def run() { try { Utils.copyStream(in, out, true) } catch { case e: IOException => logInfo("Redirection to " + file + " closed: " + e.getMessage) } } }.start() } }
Example 120
Source File: HDFSExecutorMetricsReplayListenerBus.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.InputStream import scala.collection.immutable import scala.collection.mutable.ListBuffer import scala.io.Source import scala.util.parsing.json._ import org.apache.spark.internal.Logging private[spark] class HDFSExecutorMetricsReplayListenerBus extends SparkListenerBus with Logging { def replay( logDataList: ListBuffer[(InputStream, String)], sourceName: String, maybeTruncated: Boolean = false): Unit = { logDataList.foreach(logData => { try { for (line <- Source.fromInputStream(logData._1).getLines()) { val hashMapParsed = JSON.parseFull(line) val hashMap = { hashMapParsed match { case Some(m: Map[String, Any]) => m case _ => new immutable.HashMap[String, Any] } } val hdfsExecutorMetrics = new HDFSExecutorMetrics( hashMap("values").asInstanceOf[Map[String, Any]], hashMap("host").asInstanceOf[String], hashMap("timestamp").asInstanceOf[Double].toLong) postToAll(hdfsExecutorMetrics) } } catch { case ex: Exception => ex.printStackTrace(); logError(ex.toString) logWarning(s"Got JsonParseException from log file $logData") } }) } }
Example 121
Source File: exercise08.scala From scala-for-the-Impatient with MIT License | 5 votes |
import java.io.{InputStream, FileInputStream} trait Buffering { this: InputStream => val BUF_SIZE: Int = 5 private val buf = new Array[Byte](BUF_SIZE) private var bufsize: Int = 0 private var pos: Int = 0 override def read(): Int = { if (pos >= bufsize) { bufsize = this.read(buf, 0, BUF_SIZE) if (bufsize > 0) -1 pos = 0 } pos += 1 buf(pos-1) } } val f = new FileInputStream("exercise08.txt") with Buffering for(i <- 1 to 10) println(f.read())
Example 122
Source File: exercise09.scala From scala-for-the-Impatient with MIT License | 5 votes |
import java.io.{InputStream, FileInputStream} trait Logger { def log(msg: String) } trait NoneLogger extends Logger { def log(msg: String) = {} } trait PrintLogger extends Logger { def log(msg: String) = println(msg) } trait Buffering { this: InputStream with Logger => val BUF_SIZE: Int = 5 private val buf = new Array[Byte](BUF_SIZE) private var bufsize: Int = 0 private var pos: Int = 0 override def read(): Int = { if (pos >= bufsize) { bufsize = this.read(buf, 0, BUF_SIZE) log("buffered %d bytes: %s".format(bufsize, buf.mkString(", "))) if (bufsize > 0) -1 pos = 0 } pos += 1 buf(pos-1) } } val f = new FileInputStream("exercise08.txt") with Buffering with PrintLogger for(i <- 1 to 10) println(f.read())
Example 123
Source File: DemoFileUploadServlet.scala From udash-demos with GNU General Public License v3.0 | 5 votes |
package io.udash.demos.files.jetty import scala.concurrent.ExecutionContext.Implicits.global import java.io.{File, InputStream} import java.nio.file.Files import java.util.UUID import io.udash.demos.files.UploadedFile import io.udash.demos.files.rpc.ClientRPC import io.udash.demos.files.services.FilesStorage import io.udash.rpc._ class DemoFileUploadServlet(uploadDir: String) extends FileUploadServlet(Set("file", "files")) { new File(uploadDir).mkdir() override protected def handleFile(name: String, content: InputStream): Unit = { val targetName: String = s"${UUID.randomUUID()}_${name.replaceAll("[^a-zA-Z0-9.-]", "_")}" val targetFile = new File(uploadDir, targetName) Files.copy(content, targetFile.toPath) FilesStorage.add( UploadedFile(name, targetName, targetFile.length()) ) // Notify clients ClientRPC(AllClients).fileStorageUpdated() } }
Example 124
Source File: MetricsConfig.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics import java.io.{FileInputStream, InputStream} import java.util.Properties import scala.collection.mutable import scala.util.matching.Regex import org.apache.spark.Logging import org.apache.spark.util.Utils private[spark] class MetricsConfig(val configFile: Option[String]) extends Logging { private val DEFAULT_PREFIX = "*" private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties" private[metrics] val properties = new Properties() private[metrics] var propertyCategories: mutable.HashMap[String, Properties] = null private def setDefaultProperties(prop: Properties) { prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet") prop.setProperty("*.sink.servlet.path", "/metrics/json") prop.setProperty("master.sink.servlet.path", "/metrics/master/json") prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json") } def initialize() { // Add default properties in case there's no properties file setDefaultProperties(properties) // If spark.metrics.conf is not set, try to get file in class path val isOpt: Option[InputStream] = configFile.map(new FileInputStream(_)).orElse { try { Option(Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME)) } catch { case e: Exception => logError("Error loading default configuration file", e) None } } isOpt.foreach { is => try { properties.load(is) } finally { is.close() } } propertyCategories = subProperties(properties, INSTANCE_REGEX) if (propertyCategories.contains(DEFAULT_PREFIX)) { import scala.collection.JavaConversions._ val defaultProperty = propertyCategories(DEFAULT_PREFIX) for { (inst, prop) <- propertyCategories if (inst != DEFAULT_PREFIX) (k, v) <- defaultProperty if (prop.getProperty(k) == null) } { prop.setProperty(k, v) } } } def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = { val subProperties = new mutable.HashMap[String, Properties] import scala.collection.JavaConversions._ prop.foreach { kv => if (regex.findPrefixOf(kv._1).isDefined) { val regex(prefix, suffix) = kv._1 subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2) } } subProperties } def getInstance(inst: String): Properties = { propertyCategories.get(inst) match { case Some(s) => s case None => propertyCategories.getOrElse(DEFAULT_PREFIX, new Properties) } } }
Example 125
Source File: ReplayListenerBus.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.{InputStream, IOException} import scala.io.Source import org.json4s.jackson.JsonMethods._ import org.apache.spark.Logging import org.apache.spark.util.JsonProtocol def replay(logData: InputStream, sourceName: String): Unit = { var currentLine: String = null var lineNumber: Int = 1 try { val lines = Source.fromInputStream(logData).getLines() lines.foreach { line => currentLine = line postToAll(JsonProtocol.sparkEventFromJson(parse(line))) lineNumber += 1 } } catch { case ioe: IOException => throw ioe case e: Exception => logError(s"Exception parsing Spark event log: $sourceName", e) logError(s"Malformed line #$lineNumber: $currentLine\n") } } }
Example 126
Source File: ProtoCpgLoader.scala From codepropertygraph with Apache License 2.0 | 5 votes |
package io.shiftleft.codepropertygraph.cpgloading import java.io.InputStream import java.nio.file.{Files, Path} import io.shiftleft.codepropertygraph.Cpg import io.shiftleft.proto.cpg.Cpg.{CpgOverlay, CpgStruct, DiffGraph} import org.apache.logging.log4j.LogManager import java.util.{Collection => JCollection, List => JList} import com.google.protobuf.GeneratedMessageV3 import scala.jdk.CollectionConverters._ import scala.util.{Failure, Success, Try, Using} import overflowdb.OdbConfig import io.shiftleft.proto.cpg.Cpg.CpgStruct.Edge import scala.collection.mutable.ArrayBuffer object ProtoCpgLoader { private val logger = LogManager.getLogger(getClass) def loadFromProtoZip(fileName: String, overflowDbConfig: OdbConfig = OdbConfig.withoutOverflow): Cpg = measureAndReport { val builder = new ProtoToCpg(overflowDbConfig) Using.Manager { use => val edgeLists: ArrayBuffer[JCollection[Edge]] = ArrayBuffer.empty use(new ZipArchive(fileName)).entries.foreach { entry => val inputStream = use(Files.newInputStream(entry)) val cpgStruct = getNextProtoCpgFromStream(inputStream) builder.addNodes(cpgStruct.getNodeList) edgeLists += cpgStruct.getEdgeList } edgeLists.foreach(edgeCollection => builder.addEdges(edgeCollection)) } match { case Failure(exception) => throw exception case Success(_) => builder.build() } } def loadFromListOfProtos(cpgs: Seq[CpgStruct], overflowDbConfig: OdbConfig): Cpg = { val builder = new ProtoToCpg(overflowDbConfig) cpgs.foreach(cpg => builder.addNodes(cpg.getNodeList)) cpgs.foreach(cpg => builder.addEdges(cpg.getEdgeList)) builder.build() } def loadFromListOfProtos(cpgs: JList[CpgStruct], overflowDbConfig: OdbConfig): Cpg = loadFromListOfProtos(cpgs.asScala.toSeq, overflowDbConfig) def loadOverlays(fileName: String): Try[Iterator[CpgOverlay]] = loadOverlays(fileName, CpgOverlay.parseFrom) def loadDiffGraphs(fileName: String): Try[Iterator[DiffGraph]] = loadOverlays(fileName, DiffGraph.parseFrom) private def loadOverlays[T <: GeneratedMessageV3](fileName: String, f: InputStream => T): Try[Iterator[T]] = Using(new ZipArchive(fileName)) { zip => zip.entries .sortWith(compareOverlayPath) .map { path => val is = Files.newInputStream(path) f(is) } .iterator } private def compareOverlayPath(a: Path, b: Path): Boolean = { val file1Split: Array[String] = a.toString.replace("/", "").split("_") val file2Split: Array[String] = b.toString.replace("/", "").split("_") if (file1Split.length < 2 || file2Split.length < 2) a.toString < b.toString else file1Split(0).toInt < file2Split(0).toInt } private def getNextProtoCpgFromStream(inputStream: InputStream) = CpgStruct.parseFrom(inputStream) private def measureAndReport[A](f: => A): A = { val start = System.currentTimeMillis() val result = f logger.info("CPG construction finished in " + (System.currentTimeMillis() - start) + "ms.") result } }
Example 127
Source File: Runner.scala From avrohugger with Apache License 2.0 | 5 votes |
package avrohugger package tool import format.abstractions.SourceFormat import format.{Scavro, SpecificRecord, Standard} import java.util.Arrays import java.util.Map import java.util.TreeMap import java.io.{InputStream, PrintStream} import org.apache.avro.tool.Tool import scala.util.{Failure, Success, Try} import scala.collection.JavaConverters._ def run(args: Array[String]): Int = { if (args.length != 0) { val tool: Tool = toolsMap.get(args(0)) if (tool != null) { val result = Try { tool.run( in, out, err, Arrays.asList(args: _*).subList(1, args.length)) } result match { case Success(0) => 0 case Success(exitCode) => err.println("Tool " + args(0) + " failed with exit code " + exitCode) exitCode case Failure(e) => err.println("Tool " + args(0) + " failed: " + e.toString) 1 } } else { err.println("Unknown tool: " + args(0)) 1 } } else { err.println("----------------") err.println("Available tools:") for (k <- toolsMap.asScala.values) { err.printf("%" + maxLen + "s %s\n", k.getName(), k.getShortDescription()) } 1 } } }
Example 128
Source File: DynaMLSSH.scala From DynaML with Apache License 2.0 | 5 votes |
package io.github.mandar2812.dynaml import java.io.{InputStream, OutputStream, PrintStream} import ammonite.ops.Path import ammonite.runtime.Storage import ammonite.sshd.{SshServer, SshServerConfig} import ammonite.sshd.util.Environment import ammonite.util.{Bind, Colors} class DynaMLSSH( sshConfig: SshServerConfig, predef: String = "", defaultPredef: Boolean = true, wd: os.Path = os.pwd, replArgs: Seq[Bind[_]] = Nil, classLoader: ClassLoader = DynaMLSSH.getClass.getClassLoader) { private lazy val sshd = SshServer( sshConfig, shellServer = DynaMLSSH.runRepl( sshConfig.ammoniteHome, predef, defaultPredef, wd, replArgs, classLoader ) ) def port = sshd.getPort def start(): Unit = sshd.start() def stop(): Unit = sshd.stop() def stopImmediately(): Unit = sshd.stop(true) } object DynaMLSSH { // Actually runs a repl inside of session serving a remote user shell. private def runRepl( homePath: os.Path, predefCode: String, defaultPredef: Boolean, wd: os.Path, replArgs: Seq[Bind[_]], replServerClassLoader: ClassLoader )(in: InputStream, out: OutputStream ): Unit = { // since sshd server has it's own customised environment, // where things like System.out will output to the // server's console, we need to prepare individual environment // to serve this particular user's session Environment.withEnvironment(Environment(replServerClassLoader, in, out)) { try { DynaML( predefCode = predefCode, predefFile = None, defaultPredef = defaultPredef, storageBackend = new Storage.Folder(homePath), wd = wd, inputStream = in, outputStream = out, errorStream = out, verboseOutput = false, remoteLogging = false, colors = Colors.Default ).run(replArgs: _*) } catch { case any: Throwable => val sshClientOutput = new PrintStream(out) sshClientOutput.println( "What a terrible failure, DynaML just blew up!" ) any.printStackTrace(sshClientOutput) } } } }
Example 129
Source File: GenericReader.scala From protobuf-generic with Apache License 2.0 | 5 votes |
package me.lyh.protobuf.generic import java.io.{InputStream, ObjectInputStream, ObjectOutputStream, OutputStream} import java.nio.ByteBuffer import java.util.{ArrayList => JArrayList, LinkedHashMap => JLinkedHashMap, TreeMap => JTreeMap} import com.google.protobuf.Descriptors.FieldDescriptor.Type import com.google.protobuf.{CodedInputStream, WireFormat} import scala.collection.JavaConverters._ object GenericReader { def of(schema: Schema): GenericReader = new GenericReader(schema) } class GenericReader(val schema: Schema) extends Serializable { def read(buf: Array[Byte]): GenericRecord = read(CodedInputStream.newInstance(buf), schema.root) def read(buf: ByteBuffer): GenericRecord = read(CodedInputStream.newInstance(buf), schema.root) def read(input: InputStream): GenericRecord = read(CodedInputStream.newInstance(input), schema.root) private def read(input: CodedInputStream, messageSchema: MessageSchema): GenericRecord = { val map = new JTreeMap[java.lang.Integer, Any]() while (!input.isAtEnd) { val tag = input.readTag() val id = WireFormat.getTagFieldNumber(tag) val field = messageSchema.fields(id) if (field.label == Label.REPEATED) { if (!map.containsKey(id)) { map.put(id, new JArrayList[Any]()) } val list = map.get(id).asInstanceOf[java.util.ArrayList[Any]] if (field.packed) { val bytesIn = CodedInputStream.newInstance(input.readByteBuffer()) while (!bytesIn.isAtEnd) { list.add(readValue(bytesIn, field)) } } else { list.add(readValue(input, field)) } } else { map.put(id, readValue(input, field)) } } val result = new JLinkedHashMap[String, Any]() map.asScala.foreach(kv => result.put(messageSchema.fields(kv._1).name, kv._2)) messageSchema.fields.valuesIterator.foreach { f => if (f.default.isDefined && !result.containsKey(f.name)) { result.put(f.name, f.default.get) } } result } private def readValue(in: CodedInputStream, field: Field): Any = field.`type` match { case Type.FLOAT => in.readFloat() case Type.DOUBLE => in.readDouble() case Type.FIXED32 => in.readFixed32() case Type.FIXED64 => in.readFixed64() case Type.INT32 => in.readInt32() case Type.INT64 => in.readInt64() case Type.UINT32 => in.readUInt32() case Type.UINT64 => in.readUInt64() case Type.SFIXED32 => in.readSFixed32() case Type.SFIXED64 => in.readSFixed64() case Type.SINT32 => in.readSInt32() case Type.SINT64 => in.readSInt64() case Type.BOOL => in.readBool() case Type.STRING => in.readString() case Type.BYTES => Base64.encode(in.readByteArray()) case Type.ENUM => schema.enums(field.schema.get).values(in.readEnum()) case Type.MESSAGE => val nestedIn = CodedInputStream.newInstance(in.readByteBuffer()) read(nestedIn, schema.messages(field.schema.get)) case Type.GROUP => throw new IllegalArgumentException("Unsupported type: GROUP") } private def readObject(in: ObjectInputStream): Unit = { val schema = Schema.fromJson(in.readUTF()) val schemaField = getClass.getDeclaredField("schema") schemaField.setAccessible(true) schemaField.set(this, schema) } private def writeObject(out: ObjectOutputStream): Unit = out.writeUTF(schema.toJson) }
Example 130
Source File: AmqpFieldValueSpec.scala From fs2-rabbit with Apache License 2.0 | 5 votes |
package dev.profunktor.fs2rabbit import java.io.{DataInputStream, DataOutputStream, InputStream, OutputStream} import java.time.Instant import com.rabbitmq.client.impl.{ValueReader, ValueWriter} import dev.profunktor.fs2rabbit.model.AmqpFieldValue._ import dev.profunktor.fs2rabbit.model.{AmqpFieldValue, ShortString} import org.scalatest.flatspec.AnyFlatSpecLike import org.scalatest.Assertion import org.scalatest.matchers.should.Matchers class AmqpFieldValueSpec extends AnyFlatSpecLike with Matchers with AmqpPropertiesArbitraries { it should "convert from and to Java primitive header values" in { val intVal = IntVal(1) val longVal = LongVal(2L) val stringVal = StringVal("hey") val arrayVal = ArrayVal(Vector(IntVal(3), IntVal(2), IntVal(1))) AmqpFieldValue.unsafeFrom(intVal.toValueWriterCompatibleJava) should be(intVal) AmqpFieldValue.unsafeFrom(longVal.toValueWriterCompatibleJava) should be(longVal) AmqpFieldValue.unsafeFrom(stringVal.toValueWriterCompatibleJava) should be(stringVal) AmqpFieldValue.unsafeFrom("fs2") should be(StringVal("fs2")) AmqpFieldValue.unsafeFrom(arrayVal.toValueWriterCompatibleJava) should be(arrayVal) } it should "preserve the same value after a round-trip through impure and from" in { forAll { amqpHeaderVal: AmqpFieldValue => AmqpFieldValue.unsafeFrom(amqpHeaderVal.toValueWriterCompatibleJava) == amqpHeaderVal } } it should "preserve the same values after a round-trip through the Java ValueReader and ValueWriter" in { forAll(assertThatValueIsPreservedThroughJavaWriteAndRead _) } it should "preserve a specific StringVal that previously failed after a round-trip through the Java ValueReader and ValueWriter" in { assertThatValueIsPreservedThroughJavaWriteAndRead(StringVal("kyvmqzlbjivLqQFukljghxdowkcmjklgSeybdy")) } it should "preserve a specific DateVal created from an Instant that has millisecond accuracy after a round-trip through the Java ValueReader and ValueWriter" in { val instant = Instant.parse("4000-11-03T20:17:29.57Z") val myDateVal = TimestampVal.from(instant) assertThatValueIsPreservedThroughJavaWriteAndRead(myDateVal) } "DecimalVal" should "reject a BigDecimal of an unscaled value with 33 bits..." in { DecimalVal.from(BigDecimal(Int.MaxValue) + BigDecimal(1)) should be(None) } it should "reject a BigDecimal with a scale over octet size" in { DecimalVal.from(new java.math.BigDecimal(java.math.BigInteger.valueOf(12345L), 1000)) should be(None) } // We need to wrap things in a dummy table because the method that would be // great to test with ValueReader, readFieldValue, is private, and so we // have to call the next best thing, readTable. private def wrapInDummyTable(value: AmqpFieldValue): TableVal = TableVal(Map(ShortString.unsafeFrom("dummyKey") -> value)) private def createWriterFromQueue(outputResults: collection.mutable.Queue[Byte]): ValueWriter = new ValueWriter({ new DataOutputStream({ new OutputStream { override def write(b: Int): Unit = outputResults.enqueue(b.toByte) } }) }) private def createReaderFromQueue(input: collection.mutable.Queue[Byte]): ValueReader = { val inputStream = new InputStream { override def read(): Int = try { val result = input.dequeue() // A signed -> unsigned conversion because bytes by default are // converted into signed ints, which is bad when the API of read // states that negative numbers indicate EOF... 0Xff & result.toInt } catch { case _: NoSuchElementException => -1 } override def available(): Int = { val result = input.size result } } new ValueReader(new DataInputStream(inputStream)) } private def assertThatValueIsPreservedThroughJavaWriteAndRead(amqpHeaderVal: AmqpFieldValue): Assertion = { val outputResultsAsTable = collection.mutable.Queue.empty[Byte] val tableWriter = createWriterFromQueue(outputResultsAsTable) tableWriter.writeTable(wrapInDummyTable(amqpHeaderVal).toValueWriterCompatibleJava) val reader = createReaderFromQueue(outputResultsAsTable) val readValue = reader.readTable() AmqpFieldValue.unsafeFrom(readValue) should be(wrapInDummyTable(amqpHeaderVal)) } }
Example 131
Source File: FromInputStreamInput.scala From borer with Mozilla Public License 2.0 | 5 votes |
package io.bullet.borer.input import java.io.InputStream import java.util import io.bullet.borer.{ByteAccess, Input} trait FromInputStreamInput { this: FromByteArrayInput with FromIteratorInput => private object FromInputStreamProvider extends Input.Provider[InputStream] { type Bytes = Array[Byte] def byteAccess = ByteAccess.ForByteArray def apply(value: InputStream) = fromInputStream(value) } implicit def FromInputStreamProvider[T <: InputStream]: Input.Provider[T] = FromInputStreamProvider.asInstanceOf[Input.Provider[T]] def fromInputStream(inputStream: InputStream, bufferSize: Int = 16384): Input[Array[Byte]] = { if (bufferSize < 256) throw new IllegalArgumentException(s"bufferSize must be >= 256 but was $bufferSize") val iterator: Iterator[Input[Array[Byte]]] = new Iterator[Input[Array[Byte]]] { private[this] val bufA = new Array[Byte](bufferSize) private[this] val bufB = new Array[Byte](bufferSize) private[this] var bufSelect: Boolean = _ private[this] var nextInput: Input[Array[Byte]] = _ def hasNext = { def tryReadNext() = { val buf = if (bufSelect) bufA else bufB nextInput = inputStream.read(buf) match { case -1 => null case `bufferSize` => bufSelect = !bufSelect fromByteArray(buf) case byteCount => fromByteArray(util.Arrays.copyOfRange(buf, 0, byteCount)) } nextInput ne null } (nextInput ne null) || tryReadNext() } def next() = if (hasNext) { val result = nextInput nextInput = null result } else throw new NoSuchElementException } fromIterator(iterator) } }
Example 132
Source File: FromInputStreamInputSpec.scala From borer with Mozilla Public License 2.0 | 5 votes |
package io.bullet.borer.input import java.io.InputStream import io.bullet.borer._ import utest._ import scala.util.Random object FromInputStreamInputSpec extends TestSuite with TestUtils { val random = new Random val tests = Tests { "FromInputStreamInput" - { def newBytesIterator = Iterator.from(0).take(10000).map(_.toByte) val bytes = newBytesIterator val inputStream = new InputStream { def read() = ??? override def read(b: Array[Byte]) = if (bytes.hasNext) { val chunk = random.nextInt(4) match { case 0 => Array.emptyByteArray case 1 | 2 => bytes.take(b.length).toArray[Byte] case 3 => bytes.take(random.nextInt(b.length) + 1).toArray[Byte] } System.arraycopy(chunk, 0, b, 0, chunk.length) chunk.length } else -1 } val input = Input.fromInputStream(inputStream, bufferSize = 300) val paddingProvider = new Input.PaddingProvider[Array[Byte]] { def padByte() = 42 def padDoubleByte(remaining: Int) = ??? def padQuadByte(remaining: Int) = ??? def padOctaByte(remaining: Int) = ??? def padBytes(rest: Array[Byte], missing: Long) = ??? } for { (a, b) <- newBytesIterator.map(_ -> input.readBytePadded(paddingProvider)) } a ==> b input.cursor ==> 10000 input.readBytePadded(paddingProvider) ==> 42 } } }
Example 133
Source File: Markdown.scala From docspell with GNU General Public License v3.0 | 5 votes |
package docspell.convert.flexmark import java.io.{InputStream, InputStreamReader} import java.nio.charset.Charset import java.util import scala.util.Try import cats.effect.Sync import cats.implicits._ import fs2.Stream import docspell.common._ import com.vladsch.flexmark.ext.gfm.strikethrough.StrikethroughExtension import com.vladsch.flexmark.ext.tables.TablesExtension import com.vladsch.flexmark.html.HtmlRenderer import com.vladsch.flexmark.parser.Parser import com.vladsch.flexmark.util.data.{DataKey, MutableDataSet} object Markdown { def toHtml( is: InputStream, cfg: MarkdownConfig, cs: Charset ): Either[Throwable, String] = { val p = createParser() val r = createRenderer() Try { val reader = new InputStreamReader(is, cs) val doc = p.parseReader(reader) wrapHtml(r.render(doc), cfg) }.toEither } def toHtml(md: String, cfg: MarkdownConfig): String = { val p = createParser() val r = createRenderer() val doc = p.parse(md) wrapHtml(r.render(doc), cfg) } def toHtml[F[_]: Sync]( data: Stream[F, Byte], cfg: MarkdownConfig, cs: Charset ): F[String] = data.through(Binary.decode(cs)).compile.foldMonoid.map(str => toHtml(str, cfg)) private def wrapHtml(body: String, cfg: MarkdownConfig): String = s"""<!DOCTYPE html> |<html> |<head> |<meta charset="utf-8"/> |<style> |${cfg.internalCss} |</style> |</head> |<body> |$body |</body> |</html> |""".stripMargin private def createParser(): Parser = { val opts = new MutableDataSet() opts.set( Parser.EXTENSIONS.asInstanceOf[DataKey[util.Collection[_]]], util.Arrays.asList(TablesExtension.create(), StrikethroughExtension.create()) ); Parser.builder(opts).build() } private def createRenderer(): HtmlRenderer = { val opts = new MutableDataSet() HtmlRenderer.builder(opts).build() } }
Example 134
Source File: PdfboxExtract.scala From docspell with GNU General Public License v3.0 | 5 votes |
package docspell.extract.pdfbox import java.io.InputStream import java.nio.file.Path import scala.util.{Try, Using} import cats.effect.Sync import cats.implicits._ import fs2.Stream import docspell.extract.internal.Text import org.apache.pdfbox.pdmodel.PDDocument import org.apache.pdfbox.text.PDFTextStripper object PdfboxExtract { def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] = data.compile .to(Array) .map(bytes => Using(PDDocument.load(bytes))(readText).toEither.flatten) def get(is: InputStream): Either[Throwable, Text] = Using(PDDocument.load(is))(readText).toEither.flatten def get(inFile: Path): Either[Throwable, Text] = Using(PDDocument.load(inFile.toFile))(readText).toEither.flatten private def readText(doc: PDDocument): Either[Throwable, Text] = Try { val stripper = new PDFTextStripper() stripper.setAddMoreFormatting(true) stripper.setLineSeparator("\n") Text(Option(stripper.getText(doc))) }.toEither }
Example 135
Source File: OdfExtract.scala From docspell with GNU General Public License v3.0 | 5 votes |
package docspell.extract.odf import java.io.{ByteArrayInputStream, InputStream} import scala.util.Try import cats.effect._ import cats.implicits._ import fs2.Stream import docspell.extract.internal.Text import org.apache.tika.metadata.Metadata import org.apache.tika.parser.ParseContext import org.apache.tika.parser.odf.OpenDocumentParser import org.apache.tika.sax.BodyContentHandler object OdfExtract { def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] = data.compile.to(Array).map(new ByteArrayInputStream(_)).map(get) def get(is: InputStream) = Try { val handler = new BodyContentHandler() val pctx = new ParseContext() val meta = new Metadata() val ooparser = new OpenDocumentParser() ooparser.parse(is, handler, meta, pctx) Text(Option(handler.toString)) }.toEither }
Example 136
Source File: RtfExtract.scala From docspell with GNU General Public License v3.0 | 5 votes |
package docspell.extract.rtf import java.io.{ByteArrayInputStream, InputStream} import javax.swing.text.rtf.RTFEditorKit import scala.util.Try import cats.effect.Sync import cats.implicits._ import fs2.Stream import docspell.common.MimeType import docspell.extract.internal.Text object RtfExtract { val rtfType = MimeType.application("rtf") def get(is: InputStream): Either[Throwable, Text] = Try { val kit = new RTFEditorKit() val doc = kit.createDefaultDocument() kit.read(is, doc, 0) Text(doc.getText(0, doc.getLength)) }.toEither def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] = data.compile.to(Array).map(new ByteArrayInputStream(_)).map(get) }
Example 137
Source File: PoiExtract.scala From docspell with GNU General Public License v3.0 | 5 votes |
package docspell.extract.poi import java.io.{ByteArrayInputStream, InputStream} import scala.util.Try import cats.data.EitherT import cats.effect.Sync import cats.implicits._ import fs2.Stream import docspell.common._ import docspell.extract.internal.Text import docspell.files.TikaMimetype import org.apache.poi.hssf.extractor.ExcelExtractor import org.apache.poi.hssf.usermodel.HSSFWorkbook import org.apache.poi.hwpf.extractor.WordExtractor import org.apache.poi.xssf.extractor.XSSFExcelExtractor import org.apache.poi.xssf.usermodel.XSSFWorkbook import org.apache.poi.xwpf.extractor.XWPFWordExtractor import org.apache.poi.xwpf.usermodel.XWPFDocument object PoiExtract { def get[F[_]: Sync]( data: Stream[F, Byte], hint: MimeTypeHint ): F[Either[Throwable, Text]] = TikaMimetype.detect(data, hint).flatMap(mt => get(data, mt)) def get[F[_]: Sync]( data: Stream[F, Byte], mime: MimeType ): F[Either[Throwable, Text]] = mime match { case PoiType.doc => getDoc(data) case PoiType.xls => getXls(data) case PoiType.xlsx => getXlsx(data) case PoiType.docx => getDocx(data) case PoiType.msoffice => EitherT(getDoc[F](data)) .recoverWith({ case _ => EitherT(getXls[F](data)) }) .value case PoiType.ooxml => EitherT(getDocx[F](data)) .recoverWith({ case _ => EitherT(getXlsx[F](data)) }) .value case mt => Sync[F].pure(Left(new Exception(s"Unsupported content: ${mt.asString}"))) } def getDocx(is: InputStream): Either[Throwable, Text] = Try { val xt = new XWPFWordExtractor(new XWPFDocument(is)) Text(Option(xt.getText)) }.toEither def getDoc(is: InputStream): Either[Throwable, Text] = Try { val xt = new WordExtractor(is) Text(Option(xt.getText)) }.toEither def getXlsx(is: InputStream): Either[Throwable, Text] = Try { val xt = new XSSFExcelExtractor(new XSSFWorkbook(is)) Text(Option(xt.getText)) }.toEither def getXls(is: InputStream): Either[Throwable, Text] = Try { val xt = new ExcelExtractor(new HSSFWorkbook(is)) Text(Option(xt.getText)) }.toEither def getDocx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] = data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getDocx) def getDoc[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] = data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getDoc) def getXlsx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] = data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getXlsx) def getXls[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] = data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getXls) }
Example 138
package docspell.files import java.io.InputStream import java.nio.file.Paths import java.util.zip.ZipInputStream import cats.effect._ import cats.implicits._ import fs2.{Pipe, Stream} import docspell.common.Binary object Zip { def unzipP[F[_]: ConcurrentEffect: ContextShift]( chunkSize: Int, blocker: Blocker ): Pipe[F, Byte, Binary[F]] = s => unzip[F](chunkSize, blocker)(s) def unzip[F[_]: ConcurrentEffect: ContextShift](chunkSize: Int, blocker: Blocker)( data: Stream[F, Byte] ): Stream[F, Binary[F]] = data.through(fs2.io.toInputStream[F]).flatMap(in => unzipJava(in, chunkSize, blocker)) def unzipJava[F[_]: Sync: ContextShift]( in: InputStream, chunkSize: Int, blocker: Blocker ): Stream[F, Binary[F]] = { val zin = new ZipInputStream(in) val nextEntry = Resource.make(Sync[F].delay(Option(zin.getNextEntry))) { case Some(_) => Sync[F].delay(zin.closeEntry()) case None => ().pure[F] } Stream .resource(nextEntry) .repeat .unNoneTerminate .map { ze => val name = Paths.get(ze.getName()).getFileName.toString val data = fs2.io.readInputStream[F]((zin: InputStream).pure[F], chunkSize, blocker, false) Binary(name, data) } } }
Example 139
Source File: ImageSize.scala From docspell with GNU General Public License v3.0 | 5 votes |
package docspell.files import java.io.{ByteArrayInputStream, InputStream} import java.nio.file.Path import javax.imageio.stream.{FileImageInputStream, ImageInputStream} import javax.imageio.{ImageIO, ImageReader} import scala.jdk.CollectionConverters._ import scala.util.{Try, Using} import cats.effect._ import cats.implicits._ import fs2.Stream object ImageSize { def get[F[_]: Sync](data: Stream[F, Byte]): F[Option[Dimension]] = data.take(768).compile.to(Array).map { ar => val iis = ImageIO.createImageInputStream(new ByteArrayInputStream(ar)) if (iis == null) sys.error("no reader given for the array") else getDimension(iis) } private def getDimension(in: ImageInputStream): Option[Dimension] = ImageIO .getImageReaders(in) .asScala .to(LazyList) .collectFirst(Function.unlift { reader => val dim = getDimension(in, reader).toOption reader.dispose() dim }) private def getDimension( in: ImageInputStream, reader: ImageReader ): Either[Throwable, Dimension] = Try { reader.setInput(in) val width = reader.getWidth(reader.getMinIndex) val height = reader.getHeight(reader.getMinIndex) Dimension(width, height) }.toEither }
Example 140
Source File: ProcessStreamConnectionProvider.scala From intellij-lsp with Apache License 2.0 | 5 votes |
package com.github.gtache.lsp.client.connection import java.io.{File, IOException, InputStream, OutputStream} import java.util.Objects import com.intellij.openapi.diagnostic.Logger import org.jetbrains.annotations.Nullable class ProcessStreamConnectionProvider(private var commands: Seq[String], private var workingDir: String) extends StreamConnectionProvider { private val LOG: Logger = Logger.getInstance(classOf[ProcessStreamConnectionProvider]) @Nullable private var process: Process = _ @throws[IOException] override def start(): Unit = { if (this.workingDir == null || this.commands == null || this.commands.isEmpty || this.commands.contains(null)) throw new IOException("Unable to start language server: " + this.toString) //$NON-NLS-1$ val builder = createProcessBuilder LOG.info("Starting server process with commands " + commands + " and workingDir " + workingDir) this.process = builder.start if (!process.isAlive) throw new IOException("Unable to start language server: " + this.toString) else LOG.info("Server process started " + process) } protected def createProcessBuilder: ProcessBuilder = { import scala.collection.JavaConverters._ val builder = new ProcessBuilder(getCommands.map(s => s.replace("\'", "")).asJava) builder.directory(new File(getWorkingDirectory)) builder } protected def getCommands: Seq[String] = commands def setCommands(commands: Seq[String]): Unit = { this.commands = commands } protected def getWorkingDirectory: String = workingDir def setWorkingDirectory(workingDir: String): Unit = { this.workingDir = workingDir } @Nullable override def getInputStream: InputStream = { if (process == null) null else process.getInputStream } @Nullable override def getOutputStream: OutputStream = { if (process == null) null else process.getOutputStream } @Nullable override def getErrorStream: InputStream = { if (process == null) null else process.getErrorStream } override def stop(): Unit = { if (process != null) process.destroy() } override def equals(obj: Any): Boolean = { obj match { case other: ProcessStreamConnectionProvider => getCommands.size == other.getCommands.size && this.getCommands.toSet == other.getCommands.toSet && this.getWorkingDirectory == other.getWorkingDirectory case _ => false } } override def hashCode: Int = { Objects.hashCode(this.getCommands) ^ Objects.hashCode(this.getWorkingDirectory) } }
Example 141
Source File: ProcessOverSocketStreamConnectionProvider.scala From intellij-lsp with Apache License 2.0 | 5 votes |
package com.github.gtache.lsp.client.connection import java.io.{IOException, InputStream, OutputStream} import java.net.{ServerSocket, Socket} import java.util.Objects import com.intellij.openapi.diagnostic.Logger class ProcessOverSocketStreamConnectionProvider(commands: Seq[String], workingDir: String, port: Int = 0) extends ProcessStreamConnectionProvider(commands, workingDir) { import ProcessOverSocketStreamConnectionProvider._ private var socket: Socket = _ private var inputStream: InputStream = _ private var outputStream: OutputStream = _ @throws[IOException] override def start(): Unit = { val serverSocket = new ServerSocket(port) val socketThread = new Thread(() => { try socket = serverSocket.accept catch { case e: IOException => LOG.error(e) } finally try serverSocket.close() catch { case e: IOException => LOG.error(e) } }) socketThread.start() super.start() try { socketThread.join(5000) } catch { case e: InterruptedException => LOG.error(e) } if (socket == null) throw new IOException("Unable to make socket connection: " + toString) //$NON-NLS-1$ inputStream = socket.getInputStream outputStream = socket.getOutputStream } override def getInputStream: InputStream = inputStream override def getOutputStream: OutputStream = outputStream override def getErrorStream: InputStream = inputStream override def stop(): Unit = { super.stop() if (socket != null) try socket.close() catch { case e: IOException => LOG.error(e) } } override def hashCode: Int = { val result = super.hashCode result ^ Objects.hashCode(this.port) } } object ProcessOverSocketStreamConnectionProvider { private val LOG = Logger.getInstance(classOf[ProcessOverSocketStreamConnectionProvider]) }
Example 142
Source File: RconConnector.scala From chatoverflow with Eclipse Public License 2.0 | 5 votes |
package org.codeoverflow.chatoverflow.requirement.service.rcon import java.io.{DataInputStream, IOException, InputStream, OutputStream} import java.net.{Socket, SocketException} import java.nio.{ByteBuffer, ByteOrder} import java.util.Random import org.codeoverflow.chatoverflow.WithLogger import org.codeoverflow.chatoverflow.connector.Connector class RconConnector(override val sourceIdentifier: String) extends Connector(sourceIdentifier) with WithLogger { override protected var requiredCredentialKeys: List[String] = List("password", "address") override protected var optionalCredentialKeys: List[String] = List("port") private var socket: Socket = _ private var outputStream: OutputStream = _ private var inputStream: InputStream = _ private var requestId: Int = 0 def sendCommand(command: String): String = { logger debug s"Sending $command to RCON" requestId += 1 if (write(2, command.getBytes("ASCII"))) { return read() } null } override def stop(): Boolean = { logger info s"Stopped RCON connector to ${credentials.get.getValue("address").get}!" socket.close() true } }
Example 143
Source File: SerialInputImpl.scala From chatoverflow with Eclipse Public License 2.0 | 5 votes |
package org.codeoverflow.chatoverflow.requirement.service.serial.impl import java.io.InputStream import org.codeoverflow.chatoverflow.WithLogger import org.codeoverflow.chatoverflow.api.io.event.serial.{SerialDataAvailableEvent, SerialEvent} import org.codeoverflow.chatoverflow.api.io.input.SerialInput import org.codeoverflow.chatoverflow.registry.Impl import org.codeoverflow.chatoverflow.requirement.impl.EventInputImpl import org.codeoverflow.chatoverflow.requirement.service.serial.SerialConnector @Impl(impl = classOf[SerialInput], connector = classOf[SerialConnector]) class SerialInputImpl extends EventInputImpl[SerialEvent, SerialConnector] with SerialInput with WithLogger { private val onInputFn = onInput _ override def start(): Boolean = { sourceConnector.get.addInputListener(onInputFn) true } private def onInput(bytes: Array[Byte]): Unit = call(new SerialDataAvailableEvent(bytes)) override def getInputStream: InputStream = sourceConnector.get.getInputStream override def stop(): Boolean = { sourceConnector.get.removeInputListener(onInputFn) true } }
Example 144
Source File: CheckPoint.scala From eclair with Apache License 2.0 | 5 votes |
package fr.acinq.eclair.blockchain.electrum import java.io.InputStream import fr.acinq.bitcoin.{Block, ByteVector32, encodeCompact} import fr.acinq.eclair.blockchain.electrum.db.HeaderDb import org.json4s.JsonAST.{JArray, JInt, JString} import org.json4s.jackson.JsonMethods def load(chainHash: ByteVector32, headerDb: HeaderDb): Vector[CheckPoint] = { val checkpoints = CheckPoint.load(chainHash) val checkpoints1 = headerDb.getTip match { case Some((height, header)) => val newcheckpoints = for {h <- checkpoints.size * RETARGETING_PERIOD - 1 + RETARGETING_PERIOD to height - RETARGETING_PERIOD by RETARGETING_PERIOD} yield { // we * should * have these headers in our db val cpheader = headerDb.getHeader(h).get val nextDiff = headerDb.getHeader(h + 1).get.bits CheckPoint(cpheader.hash, nextDiff) } checkpoints ++ newcheckpoints case None => checkpoints } checkpoints1 } }
Example 145
Source File: HeaderCreator.scala From sbt-header with Apache License 2.0 | 5 votes |
package de.heikoseeberger.sbtheader import java.io.InputStream import sbt.Logger import scala.io.Codec object HeaderCreator { def apply( fileType: FileType, commentStyle: CommentStyle, license: License, headerEmptyLine: Boolean, log: Logger, input: InputStream ): HeaderCreator = new HeaderCreator(fileType, commentStyle, license, headerEmptyLine, log, input) } final class HeaderCreator private ( fileType: FileType, commentStyle: CommentStyle, license: License, headerEmptyLine: Boolean, log: Logger, input: InputStream ) { private val crlf = """(?s)(?:.*)(\r\n)(?:.*)""".r private val cr = """(?s)(?:.*)(\r)(?:.*)""".r private val headerPattern = commentStyle.pattern private val (firstLine, text) = { val fileContent = try scala.io.Source.fromInputStream(input)(Codec.UTF8).mkString finally input.close() fileType.firstLinePattern match { case Some(pattern) => fileContent match { case pattern(first, rest) => (first, rest) case other => ("", other) } case _ => ("", fileContent) } } log.debug(s"First line of file is:$newLine$firstLine") log.debug(s"Text of file is:$newLine$text") private val fileNewLine = text match { case crlf(_) => "\r\n" case cr(_) => "\r" case _ => "\n" } private def newHeaderText(existingHeader: Option[String]) = { val suffix = if (headerEmptyLine) "" else newLine val headerText = commentStyle(license, existingHeader).stripSuffix(suffix) val headerNewLine = headerText match { case crlf(_) => "\r\n" case cr(_) => "\r" case _ => "\n" } headerText.replace(headerNewLine, fileNewLine) } private val modifiedText = text match { case headerPattern(existingText, body) => val newText = newHeaderText(Some(existingText)) if (newText == existingText) None else Some(firstLine + newText + body.replaceAll("""^\s+""", "")) // Trim left case body if body.isEmpty => None case body => Some(firstLine + newHeaderText(None) + body.replaceAll("""^\s+""", "")) // Trim left } log.debug(s"Modified text of file is:$newLine$modifiedText") def createText: Option[String] = modifiedText }
Example 146
Source File: Compression.scala From databus-maven-plugin with GNU Affero General Public License v3.0 | 5 votes |
package org.dbpedia.databus.lib import better.files._ import java.io.{BufferedInputStream, FileInputStream, InputStream} import com.codahale.metrics.MetricRegistry import org.apache.commons.compress.archivers.{ArchiveEntry, ArchiveException, ArchiveInputStream, ArchiveStreamFactory} import org.apache.commons.compress.compressors.{CompressorException, CompressorInputStream, CompressorStreamFactory} import scala.util.Try object Compression { def detectCompression(datafile: File): Option[String] = { try { Some(datafile.inputStream.map(_.buffered).apply(CompressorStreamFactory.detect)) } catch { case ce: CompressorException => None } } def detectArchive(datafile: File): Option[String] = { try { Some(datafile.inputStream.map(_.buffered).apply(ArchiveStreamFactory.detect)) } catch { case ce: ArchiveException => None } } }
Example 147
Source File: CommandExecutor.scala From renku with Apache License 2.0 | 5 votes |
package ch.renku.acceptancetests.tooling.console import java.io.{File, InputStream} import java.nio.file.Path import java.util import java.util.concurrent.ConcurrentLinkedQueue import cats.effect.IO import cats.implicits._ import ch.renku.acceptancetests.model.users.UserCredentials import ch.renku.acceptancetests.tooling.TestLogger.logger import ch.renku.acceptancetests.tooling.console.Command.UserInput import scala.jdk.CollectionConverters._ import scala.language.postfixOps import scala.sys.process._ private class CommandExecutor(command: Command) { def execute(implicit workPath: Path, userCredentials: UserCredentials): String = { implicit val output: util.Collection[String] = new ConcurrentLinkedQueue[String]() IO { executeCommand output.asString } recoverWith consoleException }.unsafeRunSync() def safeExecute(implicit workPath: Path, userCredentials: UserCredentials): String = { implicit val output: util.Collection[String] = new ConcurrentLinkedQueue[String]() IO { executeCommand output.asString } recover outputAsString }.unsafeRunSync() private def executeCommand(implicit workPath: Path, output: util.Collection[String], userCredentials: UserCredentials): Unit = command.userInputs.foldLeft(buildProcess) { (process, userInput) => process #< userInput.asStream } lazyLines ProcessLogger(logLine _) foreach logLine private def buildProcess(implicit workPath: Path) = command.maybeFileName.foldLeft(Process(command.toString.stripMargin, workPath.toFile)) { (process, fileName) => process #>> new File(workPath.toUri resolve fileName.value) } private def logLine( line: String )(implicit output: util.Collection[String], userCredentials: UserCredentials): Unit = line.trim match { case "" => () case line => val obfuscatedLine = line.replace(userCredentials.password.value, "###") output add obfuscatedLine logger debug obfuscatedLine } private def consoleException(implicit output: util.Collection[String]): PartialFunction[Throwable, IO[String]] = { case _ => ConsoleException { s"$command failed with:\n${output.asString}" }.raiseError[IO, String] } private def outputAsString(implicit output: util.Collection[String]): PartialFunction[Throwable, String] = { case _ => output.asString } private implicit class OutputOps(output: util.Collection[String]) { lazy val asString: String = output.asScala.mkString("\n") } private implicit class UserInputOps(userInput: UserInput) { import java.nio.charset.StandardCharsets.UTF_8 lazy val asStream: InputStream = new java.io.ByteArrayInputStream( userInput.value.getBytes(UTF_8.name) ) } }
Example 148
Source File: WholeFileInputFormat.scala From flink-tensorflow with Apache License 2.0 | 5 votes |
package org.apache.flink.contrib.tensorflow.io import java.io.{EOFException, IOException, InputStream} import org.apache.flink.api.common.io.FileInputFormat import org.apache.flink.configuration.Configuration import org.apache.flink.core.fs._ import org.apache.flink.util.Preconditions.checkState @throws[IOException] def readRecord(reuse: T, filePath: Path, fileStream: FSDataInputStream, fileLength: Long): T // -------------------------------------------------------------------------------------------- // Lifecycle // -------------------------------------------------------------------------------------------- override def nextRecord(reuse: T): T = { checkState(!reachedEnd()) checkState(currentSplit != null && currentSplit.getStart == 0) checkState(stream != null) readRecord(reuse, currentSplit.getPath, stream, currentSplit.getLength) } override def reachedEnd(): Boolean = { stream.getPos != 0 } } @SerialVersionUID(1L) object WholeFileInputFormat { @throws[IOException] def readFully(fileStream: FSDataInputStream, fileLength: Long): Array[Byte] = { if(fileLength > Int.MaxValue) { throw new IllegalArgumentException("the file is too large to be fully read") } val buf = new Array[Byte](fileLength.toInt) readFully(fileStream, buf, 0, fileLength.toInt) buf } @throws[IOException] def readFully(inputStream: InputStream, buf: Array[Byte], off: Int, len: Int): Array[Byte] = { var bytesRead = 0 while (bytesRead < len) { val read = inputStream.read(buf, off + bytesRead, len - bytesRead) if (read < 0) throw new EOFException("Premature end of stream") bytesRead += read } buf } }
Example 149
Source File: ZipUtil.scala From coursier with Apache License 2.0 | 5 votes |
import java.util.zip.{ZipEntry, ZipInputStream, ZipOutputStream} import java.io.{ByteArrayOutputStream, File, FileInputStream, FileOutputStream, InputStream} object ZipUtil { def addToZip(sourceZip: File, destZip: File, extra: Seq[(String, Array[Byte])]): Unit = { val is = new FileInputStream(sourceZip) val os = new FileOutputStream(destZip) val bootstrapZip = new ZipInputStream(is) val outputZip = new ZipOutputStream(os) def readFullySync(is: InputStream) = { val buffer = new ByteArrayOutputStream val data = Array.ofDim[Byte](16384) var nRead = is.read(data, 0, data.length) while (nRead != -1) { buffer.write(data, 0, nRead) nRead = is.read(data, 0, data.length) } buffer.flush() buffer.toByteArray } def zipEntries(zipStream: ZipInputStream): Iterator[(ZipEntry, Array[Byte])] = new Iterator[(ZipEntry, Array[Byte])] { private var nextEntry = Option.empty[ZipEntry] private def update() = nextEntry = Option(zipStream.getNextEntry) update() def hasNext = nextEntry.nonEmpty def next() = { val ent = nextEntry.get val data = readFullySync(zipStream) update() (ent, data) } } val extraNames = extra.map(_._1).toSet for ((ent, data) <- zipEntries(bootstrapZip) if !extraNames(ent.getName)) { // Same workaround as https://github.com/spring-projects/spring-boot/issues/13720 // (https://github.com/spring-projects/spring-boot/commit/a50646b7cc3ad941e748dfb450077e3a73706205#diff-2ff64cd06c0b25857e3e0dfdb6733174R144) ent.setCompressedSize(-1L) outputZip.putNextEntry(ent) outputZip.write(data) outputZip.closeEntry() } for ((dest, data) <- extra) { outputZip.putNextEntry(new ZipEntry(dest)) outputZip.write(data) outputZip.closeEntry() } outputZip.close() is.close() os.close() } }
Example 150
Source File: PowershellRunner.scala From coursier with Apache License 2.0 | 5 votes |
package coursier.env import java.nio.charset.StandardCharsets import java.util.Base64 import dataclass.data import java.io.InputStream import java.io.ByteArrayOutputStream @data class PowershellRunner( powershellExePath: String = "powershell.exe", options: Seq[String] = PowershellRunner.defaultOptions, encodeProgram: Boolean = true ) { def runScript(script: String): String = { // inspired by https://github.com/soc/directories-jvm/blob/1f344ef0087e8422f6c7334317e73b8763d9e483/src/main/java/io/github/soc/directories/Util.java#L147 val fullScript = "& {\n" + "[Console]::OutputEncoding = [System.Text.Encoding]::UTF8\n" + script + "\n}" val scriptArgs = if (encodeProgram) { val base64 = Base64.getEncoder() val encodedScript = base64.encodeToString(fullScript.getBytes(StandardCharsets.UTF_16LE)) Seq("-EncodedCommand", encodedScript) } else Seq("-Command", fullScript) val command = Seq(powershellExePath) ++ options ++ scriptArgs val b = new ProcessBuilder(command: _*) .redirectInput(ProcessBuilder.Redirect.PIPE) .redirectOutput(ProcessBuilder.Redirect.PIPE) .redirectError(ProcessBuilder.Redirect.INHERIT) val p: Process = b.start() p.getOutputStream.close() val outputBytes = PowershellRunner.readFully(p.getInputStream) val retCode = p.waitFor() if (retCode == 0) new String(outputBytes, StandardCharsets.UTF_8) else throw new Exception(s"Error running powershell script (exit code: $retCode)") } } object PowershellRunner { def defaultOptions: Seq[String] = Seq("-NoProfile", "-NonInteractive") private def readFully(is: InputStream): Array[Byte] = { val buffer = new ByteArrayOutputStream val data = Array.ofDim[Byte](16384) var nRead = 0 while ({ nRead = is.read(data, 0, data.length) nRead != -1 }) buffer.write(data, 0, nRead) buffer.flush() buffer.toByteArray } }
Example 151
Source File: FileUtil.scala From coursier with Apache License 2.0 | 5 votes |
package coursier.launcher.internal import java.io.{ByteArrayOutputStream, InputStream, OutputStream} import java.nio.file.attribute.PosixFilePermission import java.nio.file.{Files, LinkOption, Path} import scala.collection.JavaConverters._ private[coursier] object FileUtil { // Won't be necessary anymore with Java 9 // (https://docs.oracle.com/javase/9/docs/api/java/io/InputStream.html#readAllBytes--, // via https://stackoverflow.com/questions/1264709/convert-inputstream-to-byte-array-in-java/37681322#37681322) def readFullyUnsafe(is: InputStream): Array[Byte] = { val buffer = new ByteArrayOutputStream val data = Array.ofDim[Byte](16384) var nRead = 0 while ({ nRead = is.read(data, 0, data.length) nRead != -1 }) buffer.write(data, 0, nRead) buffer.flush() buffer.toByteArray } def readFully(is: => InputStream): Array[Byte] = { var is0: InputStream = null try { is0 = is readFullyUnsafe(is0) } finally { if (is0 != null) is0.close() } } def withOutputStream[T](path: Path)(f: OutputStream => T): T = { var os: OutputStream = null try { os = Files.newOutputStream(path) f(os) } finally { if (os != null) os.close() } } def tryMakeExecutable(path: Path): Boolean = { try { val perms = Files.getPosixFilePermissions(path).asScala.toSet var newPerms = perms if (perms(PosixFilePermission.OWNER_READ)) newPerms += PosixFilePermission.OWNER_EXECUTE if (perms(PosixFilePermission.GROUP_READ)) newPerms += PosixFilePermission.GROUP_EXECUTE if (perms(PosixFilePermission.OTHERS_READ)) newPerms += PosixFilePermission.OTHERS_EXECUTE if (newPerms != perms) Files.setPosixFilePermissions( path, newPerms.asJava ) true } catch { case _: UnsupportedOperationException => false } } def tryHideWindows(path: Path): Boolean = Windows.isWindows && { try { Files.setAttribute(path, "dos:hidden", java.lang.Boolean.TRUE, LinkOption.NOFOLLOW_LINKS) true } catch { case _: UnsupportedOperationException => false } } }
Example 152
Source File: FileUtil.scala From coursier with Apache License 2.0 | 5 votes |
package coursier.cache.internal import java.io.{ByteArrayOutputStream, InputStream} object FileUtil { // Won't be necessary anymore with Java 9 // (https://docs.oracle.com/javase/9/docs/api/java/io/InputStream.html#readAllBytes--, // via https://stackoverflow.com/questions/1264709/convert-inputstream-to-byte-array-in-java/37681322#37681322) def readFullyUnsafe(is: InputStream): Array[Byte] = { val buffer = new ByteArrayOutputStream val data = Array.ofDim[Byte](16384) var nRead = 0 while ({ nRead = is.read(data, 0, data.length) nRead != -1 }) buffer.write(data, 0, nRead) buffer.flush() buffer.toByteArray } def readFully(is: => InputStream): Array[Byte] = { var is0: InputStream = null try { is0 = is readFullyUnsafe(is0) } finally { if (is0 != null) is0.close() } } def withContent(is: InputStream, f: WithContent, bufferSize: Int = 16384): Unit = { val data = Array.ofDim[Byte](bufferSize) var nRead = is.read(data, 0, data.length) while (nRead != -1) { f(data, nRead) nRead = is.read(data, 0, data.length) } } trait WithContent { def apply(arr: Array[Byte], z: Int): Unit } class UpdateDigest(md: java.security.MessageDigest) extends FileUtil.WithContent { def apply(arr: Array[Byte], z: Int): Unit = md.update(arr, 0, z) } }
Example 153
Source File: MainClass.scala From coursier with Apache License 2.0 | 5 votes |
package coursier.install import java.io.{File, InputStream} import java.util.jar.{Manifest => JManifest} import java.util.zip.ZipFile object MainClass { private def manifestPath = "META-INF/MANIFEST.MF" def mainClasses(jars: Seq[File]): Map[(String, String), String] = { val metaInfs = jars.flatMap { f => val zf = new ZipFile(f) val entryOpt = Option(zf.getEntry(manifestPath)) entryOpt.map(e => () => zf.getInputStream(e)).toSeq } val mainClasses = metaInfs.flatMap { f => var is: InputStream = null val attributes = try { is = f() new JManifest(is).getMainAttributes } finally { if (is != null) is.close() } def attributeOpt(name: String) = Option(attributes.getValue(name)) val vendor = attributeOpt("Implementation-Vendor-Id").getOrElse("") val title = attributeOpt("Specification-Title").getOrElse("") val mainClass = attributeOpt("Main-Class") mainClass.map((vendor, title) -> _) } mainClasses.toMap } def retainedMainClassOpt( mainClasses: Map[(String, String), String], mainDependencyOpt: Option[(String, String)] ): Option[String] = if (mainClasses.size == 1) { val (_, mainClass) = mainClasses.head Some(mainClass) } else { // Trying to get the main class of the first artifact val mainClassOpt = for { (mainOrg, mainName) <- mainDependencyOpt mainClass <- mainClasses.collectFirst { case ((org, name), mainClass) if org == mainOrg && ( mainName == name || mainName.startsWith(name + "_") // Ignore cross version suffix ) => mainClass } } yield mainClass def sameOrgOnlyMainClassOpt = for { (mainOrg, mainName) <- mainDependencyOpt orgMainClasses = mainClasses.collect { case ((org, _), mainClass) if org == mainOrg => mainClass }.toSet if orgMainClasses.size == 1 } yield orgMainClasses.head mainClassOpt.orElse(sameOrgOnlyMainClassOpt) } }
Example 154
Source File: Confirm.scala From coursier with Apache License 2.0 | 5 votes |
package coursier.cli.setup import java.io.{InputStream, PrintStream} import java.util.{Locale, Scanner} import coursier.util.Task import dataclass.data import scala.annotation.tailrec trait Confirm { def confirm(message: String, default: Boolean): Task[Boolean] } object Confirm { @data class ConsoleInput( in: InputStream = System.in, out: PrintStream = System.err, locale: Locale = Locale.getDefault, @since indent: Int = 0 ) extends Confirm { private val marginOpt = if (indent > 0) Some(" " * indent) else None def confirm(message: String, default: Boolean): Task[Boolean] = Task.delay { val choice = if (default) "[Y/n]" else "[y/N]" val message0 = marginOpt match { case None => message case Some(margin) => message.linesIterator.map(margin + _).mkString(System.lineSeparator()) } out.print(s"$message0 $choice ") @tailrec def loop(): Boolean = { val scanner = new Scanner(in) val resp = scanner.nextLine() val resp0 = resp .filter(!_.isSpaceChar) .toLowerCase(locale) .distinct resp0 match { case "y" => true case "n" => false case "" => default case _ => out.print(s"Please answer Y or N. $choice ") loop() } } loop() } } @data class YesToAll( out: PrintStream = System.err ) extends Confirm { def confirm(message: String, default: Boolean): Task[Boolean] = Task.delay { out.println(message + " [Y/n] Y") true } } def default: Confirm = ConsoleInput() }
Example 155
Source File: ReaderInputStream.scala From better-files with MIT License | 5 votes |
package better.files import java.io.{InputStream, Reader} import java.nio.{ByteBuffer, CharBuffer} import java.nio.charset.{Charset, CharsetEncoder, CoderResult, CodingErrorAction} import scala.annotation.tailrec private[this] val encoderOut = ByteBuffer.allocate(bufferSize >> 4).flip().asInstanceOf[ByteBuffer] private[this] var lastCoderResult = CoderResult.UNDERFLOW private[this] var endOfInput = false private[this] def fillBuffer() = { assert(!endOfInput) if (lastCoderResult.isUnderflow) { val position = encoderIn.compact().position() // We don't use Reader#read(CharBuffer) here because it is more efficient to write directly to the underlying char array // since the default implementation copies data to a temporary char array anyway reader.read(encoderIn.array, position, encoderIn.remaining) match { case EOF => endOfInput = true case c => encoderIn.position(position + c) } encoderIn.flip() } lastCoderResult = encoder.encode(encoderIn, encoderOut.compact(), endOfInput) encoderOut.flip() } override def read(b: Array[Byte], off: Int, len: Int) = { if (len < 0 || off < 0 || (off + len) > b.length) throw new IndexOutOfBoundsException("Array Size=" + b.length + ", offset=" + off + ", length=" + len) if (len == 0) { 0 // Always return 0 if len == 0 } else { var read = 0 @tailrec def loop(off: Int, len: Int): Unit = if (len > 0) { if (encoderOut.hasRemaining) { val c = encoderOut.remaining min len encoderOut.get(b, off, c) read += c loop(off + c, len - c) } else if (!endOfInput) { fillBuffer() loop(off, len) } } loop(off, len) if (read == 0 && endOfInput) EOF else read } } @tailrec final override def read() = { if (encoderOut.hasRemaining) { encoderOut.get & 0xff } else if (endOfInput) { EOF } else { fillBuffer() read() } } override def close() = reader.close() }
Example 156
Source File: IOStreamOps.scala From scala-server-lambda with MIT License | 5 votes |
package io.github.howardjohn.lambda import java.io.{InputStream, OutputStream} import java.nio.charset.StandardCharsets import scala.io.Source object StreamOps { implicit class InputStreamOps(val is: InputStream) extends AnyVal { def consume(): String = { val contents = Source.fromInputStream(is).mkString is.close() contents } } implicit class OutputStreamOps(val os: OutputStream) extends AnyVal { def writeAndClose(contents: String): Unit = { os.write(contents.getBytes(StandardCharsets.UTF_8)) os.close() } } }
Example 157
Source File: LambdaHandler.scala From scala-server-lambda with MIT License | 5 votes |
package io.github.howardjohn.lambda import java.io.{InputStream, OutputStream} import io.github.howardjohn.lambda.ProxyEncoding._ import io.github.howardjohn.lambda.StreamOps._ trait LambdaHandler { def handleRequest(request: ProxyRequest): ProxyResponse def handle(is: InputStream, os: OutputStream): Unit = { val rawInput = is.consume() val request = parseRequest(rawInput).fold( e => throw e, identity ) val rawResponse = handleRequest(request) val response = encodeResponse(rawResponse) os.writeAndClose(response) } }
Example 158
Source File: InvokeMigrationHandler.scala From flyway-awslambda with MIT License | 5 votes |
package crossroad0201.aws.flywaylambda import java.io.{BufferedOutputStream, InputStream, OutputStream, PrintWriter} import com.amazonaws.regions.{Region, Regions} import com.amazonaws.services.lambda.runtime.{Context, RequestStreamHandler} import com.amazonaws.services.s3.{AmazonS3, AmazonS3Client} import scala.io.{BufferedSource, Codec} import scala.util.{Failure, Success, Try} class InvokeMigrationHandler extends RequestStreamHandler with S3MigrationHandlerBase { type BucketName = String type Prefix = String type ConfFileName = String override def handleRequest(input: InputStream, output: OutputStream, context: Context): Unit = { def parseInput: Try[(BucketName, Prefix, ConfFileName)] = Try { import spray.json._ import DefaultJsonProtocol._ val json = new BufferedSource(input)(Codec("UTF-8")).mkString val jsObj = JsonParser(json).toJson.asJsObject jsObj.getFields( "bucket_name", "prefix" ) match { case Seq(JsString(b), JsString(p)) => { jsObj.getFields( "flyway_conf" ) match { case Seq(JsString(c)) => (b, p, c) case _ => (b, p, "flyway.conf") } } case _ => throw new IllegalArgumentException(s"Missing require key [bucketName, prefix]. - $json") } } val logger = context.getLogger implicit val s3Client: AmazonS3 = new AmazonS3Client().withRegion(Region.getRegion(Regions.fromName(sys.env("AWS_REGION")))) (for { i <- parseInput _ = { logger.log(s"Flyway migration start. by invoke lambda function(${i._1}, ${i._2}, ${i._3}).") } r <- migrate(i._1, i._2, i._3)(context, s3Client) } yield r) match { case Success(r) => logger.log(r) val b = r.getBytes("UTF-8") val bout = new BufferedOutputStream(output) Stream.continually(bout.write(b)) bout.flush() case Failure(e) => e.printStackTrace() val w = new PrintWriter(output) w.write(e.toString) w.flush() } } }
Example 159
Source File: StreamReadingThread.scala From ncdbg with BSD 3-Clause "New" or "Revised" License | 5 votes |
package com.programmaticallyspeaking.ncd.nashorn import java.io.{BufferedReader, IOException, InputStream, InputStreamReader} import scala.util.control.NonFatal class StreamReadingThread(in: InputStream, appender: (String) => Unit) extends Thread { override def run(): Unit = { try { val reader = new BufferedReader(new InputStreamReader(in)) var str = "" while (str != null) { str = reader.readLine() Option(str).foreach(appender) } } catch { case _: InterruptedException => // ok case ex: IOException if isStreamClosed(ex) => // ok case NonFatal(t) => t.printStackTrace(System.err) } } private def isStreamClosed(ex: IOException) = ex.getMessage.toLowerCase == "stream closed" }
Example 160
Source File: AudioStreams.scala From mmlspark with MIT License | 5 votes |
// Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. package com.microsoft.ml.spark import java.io.InputStream import com.microsoft.cognitiveservices.speech.audio.PullAudioInputStreamCallback class WavStream(val wavStream: InputStream) extends PullAudioInputStreamCallback { val stream = parseWavHeader(wavStream) override def read(dataBuffer: Array[Byte]): Int = { Math.max(0, stream.read(dataBuffer, 0, dataBuffer.length)) } override def close(): Unit = { stream.close() } // region Wav File helper functions private def readUInt32(inputStream: InputStream) = { (0 until 4).foldLeft(0) { case (n, i) => n | inputStream.read << (i * 8) } } private def readUInt16(inputStream: InputStream) = { (0 until 2).foldLeft(0) { case (n, i) => n | inputStream.read << (i * 8) } } //noinspection ScalaStyle def parseWavHeader(reader: InputStream): InputStream = { // Tag "RIFF" val data = new Array[Byte](4) var numRead = reader.read(data, 0, 4) assert((numRead == 4) && (data sameElements "RIFF".getBytes), "RIFF") // Chunk size val fileLength = readUInt32(reader) numRead = reader.read(data, 0, 4) assert((numRead == 4) && (data sameElements "WAVE".getBytes), "WAVE") numRead = reader.read(data, 0, 4) assert((numRead == 4) && (data sameElements "fmt ".getBytes), "fmt ") val formatSize = readUInt32(reader) assert(formatSize >= 16, "formatSize") val formatTag = readUInt16(reader) val channels = readUInt16(reader) val samplesPerSec = readUInt32(reader) val avgBytesPerSec = readUInt32(reader) val blockAlign = readUInt16(reader) val bitsPerSample = readUInt16(reader) assert(formatTag == 1, "PCM") // PCM assert(channels == 1, "single channel") assert(samplesPerSec == 16000, "samples per second") assert(bitsPerSample == 16, "bits per sample") // Until now we have read 16 bytes in format, the rest is cbSize and is ignored // for now. if (formatSize > 16) { numRead = reader.read(new Array[Byte]((formatSize - 16).toInt)) assert(numRead == (formatSize - 16), "could not skip extended format") } // Second Chunk, data // tag: data. numRead = reader.read(data, 0, 4) assert((numRead == 4) && (data sameElements "data".getBytes)) val dataLength = readUInt32(reader) reader } } class CompressedStream(val stream: InputStream) extends PullAudioInputStreamCallback { override def read(dataBuffer: Array[Byte]): Int = { Math.max(0, stream.read(dataBuffer, 0, dataBuffer.length)) } override def close(): Unit = { stream.close() } }
Example 161
Source File: Schema.scala From mmlspark with MIT License | 5 votes |
// Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. package com.microsoft.ml.spark.downloader import java.io.InputStream import java.net.URI import org.apache.commons.codec.digest.DigestUtils import spray.json._ import scala.collection.JavaConversions._ import scala.collection.JavaConverters._ private[spark] object NamingConventions { def canonicalModelFilename(name: String, dataset: String): String = s"${name}_$dataset.model" def canonicalModelFilename(model: ModelSchema): String = s"${model.name}_${model.dataset}.model" } case class ModelSchema(name: String, dataset: String, modelType: String, override val uri: URI, override val hash: String, override val size: Long, inputNode: Int, numLayers: Int, layerNames: Array[String]) extends Schema(uri, hash, size) { def this(name: String, dataset: String, modelType: String, uri: URI, hash: String, size: Long, inputNode: Int, numLayers: Int, layerNames: java.util.ArrayList[String]) = { this(name, dataset, modelType, uri, hash, size, inputNode, numLayers, layerNames.toList.toArray) } override def updateURI(newURI: URI): this.type = this.copy(uri = newURI).asInstanceOf[this.type] } private[spark] object SchemaJsonProtocol extends DefaultJsonProtocol { implicit object URIJsonFormat extends JsonFormat[URI] { def write(u: URI): JsValue = { JsString(u.toString) } def read(value: JsValue): URI = new URI(value.asInstanceOf[JsString].value) } implicit val ModelSchemaFormat: RootJsonFormat[ModelSchema] = jsonFormat(ModelSchema.apply, "name", "dataset", "modelType", "uri", "hash", "size", "inputNode", "numLayers", "layerNames") }
Example 162
Source File: ContextObjectInputStream.scala From mmlspark with MIT License | 5 votes |
// Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. package com.microsoft.ml.spark.core.utils import java.io.{InputStream, ObjectInputStream, ObjectStreamClass} class ContextObjectInputStream(input: InputStream) extends ObjectInputStream(input) { protected override def resolveClass(desc: ObjectStreamClass): Class[_] = { try { Class.forName(desc.getName, false, Thread.currentThread().getContextClassLoader) } catch { case _: ClassNotFoundException => super.resolveClass(desc) } } }
Example 163
Source File: StreamUtilities.scala From mmlspark with MIT License | 5 votes |
// Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. package com.microsoft.ml.spark.core.env import java.io.{ByteArrayOutputStream, InputStream} import java.util.zip.ZipInputStream import org.apache.commons.io.IOUtils import scala.io.Source import scala.util.Random object StreamUtilities { import scala.util.{Failure, Success, Try} def usingMany[T <: AutoCloseable, U](disposable: Seq[T])(task: Seq[T] => U): Try[U] = { try { Success(task(disposable)) } catch { case e: Exception => Failure(e) } finally { disposable.foreach(d => d.close()) } } def using[T <: AutoCloseable, U](disposable: T)(task: T => U): Try[U] = { try { Success(task(disposable)) } catch { case e: Exception => Failure(e) } finally { disposable.close() } } def usingSource[T <: Source, U](disposable: T)(task: T => U): Try[U] = { try { Success(task(disposable)) } catch { case e: Exception => Failure(e) } finally { disposable.close() } } class ZipIterator(stream: InputStream, zipfile: String, random: Random, sampleRatio: Double = 1) extends Iterator[(String, Array[Byte])] { private val zipStream = new ZipInputStream(stream) private def getNext: Option[(String, Array[Byte])] = { var entry = zipStream.getNextEntry while (entry != null) { if (!entry.isDirectory && random.nextDouble < sampleRatio) { val filename = zipfile + java.io.File.separator + entry.getName //extracting all bytes of a given entry val byteStream = new ByteArrayOutputStream IOUtils.copy(zipStream, byteStream) val bytes = byteStream.toByteArray assert(bytes.length == entry.getSize, "incorrect number of bytes is read from zipstream: " + bytes.length + " instead of " + entry.getSize) return Some((filename, bytes)) } entry = zipStream.getNextEntry } stream.close() None } private var nextValue = getNext def hasNext: Boolean = nextValue.isDefined def next: (String, Array[Byte]) = { val result = nextValue.get nextValue = getNext result } } }
Example 164
Source File: AmandroidSettings.scala From Argus-SAF with Apache License 2.0 | 5 votes |
package org.argus.amandroid.core import java.io.{File, FileInputStream, InputStream} import org.ini4j.Wini import org.argus.jawa.core.util.FileUtil class AmandroidSettings(amandroid_home: String, iniPathOpt: Option[String]) { private val amandroid_home_uri = FileUtil.toUri(amandroid_home) private def defaultLibFiles = amandroid_home + "/androidSdk/android-25/android.jar" + java.io.File.pathSeparator + amandroid_home + "/androidSdk/support/v4/android-support-v4.jar" + java.io.File.pathSeparator + amandroid_home + "/androidSdk/support/v13/android-support-v13.jar" + java.io.File.pathSeparator + amandroid_home + "/androidSdk/support/v7/android-support-v7-appcompat.jar" private def defaultThirdPartyLibFile = amandroid_home + "/liblist.txt" private val iniUri = { iniPathOpt match { case Some(path) => FileUtil.toUri(path) case None => FileUtil.appendFileName(amandroid_home_uri, "config.ini") } } private val ini = new Wini(FileUtil.toFile(iniUri)) def timeout: Int = Option(ini.get("analysis", "timeout", classOf[Int])).getOrElse(5) def dependence_dir: Option[String] = Option(ini.get("general", "dependence_dir", classOf[String])) def debug: Boolean = ini.get("general", "debug", classOf[Boolean]) def lib_files: String = Option(ini.get("general", "lib_files", classOf[String])).getOrElse(defaultLibFiles) def third_party_lib_file: String = Option(ini.get("general", "third_party_lib_file", classOf[String])).getOrElse(defaultThirdPartyLibFile) def actor_conf_file: InputStream = Option(ini.get("concurrent", "actor_conf_file", classOf[String])) match { case Some(path) => new FileInputStream(path) case None => getClass.getResourceAsStream("/application.conf") } def static_init: Boolean = ini.get("analysis", "static_init", classOf[Boolean]) def parallel: Boolean = ini.get("analysis", "parallel", classOf[Boolean]) def k_context: Int = ini.get("analysis", "k_context", classOf[Int]) def sas_file: String = Option(ini.get("analysis", "sas_file", classOf[String])).getOrElse(amandroid_home + File.separator + "taintAnalysis" + File.separator + "sourceAndSinks" + File.separator + "TaintSourcesAndSinks.txt") def native_sas_file: String = Option(ini.get("analysis", "sas_file", classOf[String])).getOrElse(amandroid_home + File.separator + "taintAnalysis" + File.separator + "sourceAndSinks" + File.separator + "NativeSourcesAndSinks.txt") def injection_sas_file: String = Option(ini.get("analysis", "injection_sas_file", classOf[String])).getOrElse(amandroid_home + File.separator + "taintAnalysis" + File.separator + "sourceAndSinks" + File.separator + "IntentInjectionSourcesAndSinks.txt") }
Example 165
Source File: ZipUtil.scala From Argus-SAF with Apache License 2.0 | 5 votes |
package org.argus.jawa.core.util import java.io.{File, FileOutputStream, InputStream, OutputStream} import java.util.zip.{ZipEntry, ZipFile} import scala.collection.JavaConverters._ object ZipUtil { val BUFSIZE = 4096 val buffer = new Array[Byte](BUFSIZE) def unZip(source: String, targetFolder: String): Boolean = { val zipFile = new ZipFile(source) unzipAllFile(zipFile.entries.asScala.toList, getZipEntryInputStream(zipFile), new File(targetFolder)) } def getZipEntryInputStream(zipFile: ZipFile)(entry: ZipEntry): InputStream = zipFile.getInputStream(entry) def unzipAllFile(entryList: List[ZipEntry], inputGetter: ZipEntry => InputStream, targetFolder: File): Boolean = { entryList match { case entry :: entries => if (entry.isDirectory) new File(targetFolder, entry.getName).mkdirs else saveFile(inputGetter(entry), new FileOutputStream(new File(targetFolder, entry.getName))) unzipAllFile(entries, inputGetter, targetFolder) case _ => true } } def saveFile(fis: InputStream, fos: OutputStream): Unit = { writeToFile(bufferReader(fis), fos) fis.close() fos.close() } def bufferReader(fis: InputStream)(buffer: Array[Byte]): (Int, Array[Byte]) = (fis.read(buffer), buffer) def writeToFile(reader: Array[Byte] => (Int, Array[Byte]), fos: OutputStream): Boolean = { val (length, data) = reader(buffer) if (length >= 0) { fos.write(data, 0, length) writeToFile(reader, fos) } else true } }
Example 166
Source File: Parser.scala From uap-scala with Do What The F*ck You Want To Public License | 5 votes |
package org.uaparser.scala import java.io.InputStream import java.util.{ List => JList, Map => JMap } import org.uaparser.scala.Device.DeviceParser import org.uaparser.scala.OS.OSParser import org.uaparser.scala.UserAgent.UserAgentParser import org.yaml.snakeyaml.Yaml import org.yaml.snakeyaml.constructor.SafeConstructor import scala.collection.JavaConverters._ import scala.util.Try case class Parser(userAgentParser: UserAgentParser, osParser: OSParser, deviceParser: DeviceParser) extends UserAgentStringParser { def parse(agent: String): Client = Client(userAgentParser.parse(agent), osParser.parse(agent), deviceParser.parse(agent)) } object Parser { def fromInputStream(source: InputStream): Try[Parser] = Try { val yaml = new Yaml(new SafeConstructor) val javaConfig = yaml.load(source).asInstanceOf[JMap[String, JList[JMap[String, String]]]] val config = javaConfig.asScala.toMap.mapValues(_.asScala.toList.map(_.asScala.toMap.filterNot { case (_ , value) => value eq null })) val userAgentParser = UserAgentParser.fromList(config.getOrElse("user_agent_parsers", Nil)) val osParser = OSParser.fromList(config.getOrElse("os_parsers", Nil)) val deviceParser = DeviceParser.fromList(config.getOrElse("device_parsers", Nil)) Parser(userAgentParser, osParser, deviceParser) } def default: Parser = fromInputStream(this.getClass.getResourceAsStream("/regexes.yaml")).get }
Example 167
Source File: CachingParser.scala From uap-scala with Do What The F*ck You Want To Public License | 5 votes |
package org.uaparser.scala import java.io.InputStream import java.util.{ Collections, LinkedHashMap, Map => JMap } import scala.util.Try case class CachingParser(parser: Parser, maxEntries: Int) extends UserAgentStringParser { lazy val clients: JMap[String, Client] = Collections.synchronizedMap( new LinkedHashMap[String, Client](maxEntries + 1, 1.0f, true) { override protected def removeEldestEntry(eldest: JMap.Entry[String, Client]): Boolean = super.size > maxEntries } ) def parse(agent: String): Client = Option(clients.get(agent)).getOrElse { val client = parser.parse(agent) clients.put(agent, client) client } } object CachingParser { val defaultCacheSize: Int = 1000 def fromInputStream(source: InputStream, size: Int = defaultCacheSize): Try[CachingParser] = Parser.fromInputStream(source).map(CachingParser(_, size)) def default(size: Int = defaultCacheSize): CachingParser = CachingParser(Parser.default, size) }
Example 168
Source File: ByteTrackingInputStream.scala From hail with MIT License | 5 votes |
package is.hail.utils import java.io.InputStream import is.hail.io.fs.Seekable class ByteTrackingInputStream(base: InputStream) extends InputStream { var bytesRead = 0L def bytesReadAndClear(): Long = { val n = bytesRead bytesRead = 0L n } override def read(): Int = { bytesRead += 1 base.read() } override def read(b: Array[Byte]): Int = { val n = base.read(b) bytesRead += n n } override def read(b: Array[Byte], off: Int, len: Int): Int = { val n = base.read(b, off, len) bytesRead += n n } override def close(): Unit = base.close() def seek(offset: Long): Unit = { base match { case base: Seekable => base.seek(offset) case base: org.apache.hadoop.fs.Seekable => base.seek(offset) } } }
Example 169
Source File: RichInputStream.scala From hail with MIT License | 5 votes |
package is.hail.utils.richUtils import java.io.InputStream import is.hail.utils._ class RichInputStream(val in: InputStream) extends AnyVal { def readFully(to: Array[Byte], toOff: Int, n: Int): Unit = { val nRead = readRepeatedly(to, toOff, n) if (nRead < n) fatal(s"Premature end of file: expected $n bytes, found $nRead") } def readRepeatedly(to: Array[Byte], toOff: Int, n: Int): Int = { assert(toOff + n <= to.length) var read = 0 var endOfStream = false while (read < n && !endOfStream) { val r = in.read(to, toOff + read, n - read) if (r <= 0) endOfStream = true else read += r } read } def readRepeatedly(to: Array[Byte]): Int = readRepeatedly(to, 0, to.length) }
Example 170
Source File: HTTPClient.scala From hail with MIT License | 5 votes |
package is.hail.utils import java.net.URL import java.io.OutputStream import java.io.InputStream import java.net.HttpURLConnection import is.hail.utils._ import java.nio.charset.StandardCharsets import org.apache.commons.io.output.ByteArrayOutputStream object HTTPClient { def post[T]( url: String, contentLength: Int, writeBody: OutputStream => Unit, readResponse: InputStream => T = (_: InputStream) => (), chunkSize: Int = 0 ): T = { val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection] conn.setRequestMethod("POST") if (chunkSize > 0) conn.setChunkedStreamingMode(chunkSize) conn.setDoOutput(true); conn.setRequestProperty("Content-Length", Integer.toString(contentLength)) using(conn.getOutputStream())(writeBody) assert(200 <= conn.getResponseCode() && conn.getResponseCode() < 300, s"POST ${url} ${conn.getResponseCode()} ${using(conn.getErrorStream())(fullyReadInputStreamAsString)}") val result = using(conn.getInputStream())(readResponse) conn.disconnect() result } def get[T]( url: String, readResponse: InputStream => T ): T = { val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection] conn.setRequestMethod("GET") assert(200 <= conn.getResponseCode() && conn.getResponseCode() < 300, s"GET ${url} ${conn.getResponseCode()} ${using(conn.getErrorStream())(fullyReadInputStreamAsString)}") val result = using(conn.getInputStream())(readResponse) conn.disconnect() result } def delete( url: String, readResponse: InputStream => Unit = (_: InputStream) => () ): Unit = { val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection] conn.setRequestMethod("DELETE") assert(200 <= conn.getResponseCode() && conn.getResponseCode() < 300, s"DELETE ${url} ${conn.getResponseCode()} ${using(conn.getErrorStream())(fullyReadInputStreamAsString)}") val result = using(conn.getInputStream())(readResponse) conn.disconnect() result } private[this] def fullyReadInputStreamAsString(is: InputStream): String = using(new ByteArrayOutputStream()) { baos => drainInputStreamToOutputStream(is, baos) new String(baos.toByteArray(), StandardCharsets.UTF_8) } }
Example 171
Source File: RestartableByteArrayInputStream.scala From hail with MIT License | 5 votes |
package is.hail.utils import java.io.{ IOException, InputStream } // not thread safe class RestartableByteArrayInputStream extends InputStream { private[this] var off: Int = 0 private[this] var end: Int = 0 private[this] var buf: Array[Byte] = null def this(buf: Array[Byte]) { this() restart(buf) } override def read(): Int = { if (off == end) { return -1 } val b = buf(off) & 0xff off += 1 b } override def read(dest: Array[Byte]): Int = read(dest, 0, dest.length) override def read(dest: Array[Byte], destOff: Int, requestedLength: Int): Int = { val length = math.min(requestedLength, end - off) System.arraycopy(buf, off, dest, destOff, length) off += length length } override def skip(n: Long): Long = { if (n <= 0) { return 0 } val skipped = math.min( math.min(n, Integer.MAX_VALUE).toInt, end - off) off += skipped skipped } override def available(): Int = end - off override def markSupported(): Boolean = false override def mark(readAheadLimit: Int): Unit = throw new IOException("unsupported operation") override def reset(): Unit = throw new IOException("unsupported operation") override def close(): Unit = buf = null def restart(buf: Array[Byte]): Unit = restart(buf, 0, buf.length) def restart(buf: Array[Byte], start: Int, end: Int): Unit = { require(start >= 0) require(start <= end) require(end <= buf.length) this.buf = buf this.off = start this.end = end } }
Example 172
Source File: BGZipBlocks.scala From hail with MIT License | 5 votes |
package is.hail.misc import java.io.InputStream import is.hail.io.compress.BGzipInputStream import is.hail.io.fs.FS object BGZipBlocks { //Print block starts of block gzip (bgz) file def apply(fs: FS, file: String) { var buf = new Array[Byte](64 * 1024) // position of 'buf[0]' in input stream var bufPos = 0L var bufSize = 0 var posInBuf = 0 def fillBuf(is: InputStream) { val newSize = bufSize - posInBuf assert(newSize >= 0) System.arraycopy(buf, posInBuf, buf, 0, newSize) bufPos += posInBuf bufSize = newSize posInBuf = 0 def f() { val needed = buf.length - bufSize if (needed > 0) { val result = is.read(buf, bufSize, needed) if (result > 0) { bufSize += result f() } } } f() } // no decompression codec val is = fs.open(file) fillBuf(is) while (bufSize > 0) { val h = new BGzipInputStream.BGzipHeader(buf, posInBuf, bufSize) println(bufPos) posInBuf += h.getBlockSize fillBuf(is) } is.close() } }
Example 173
Source File: package.scala From hail with MIT License | 5 votes |
package is import java.io.InputStream package object hail { private object HailBuildInfo { import java.util.Properties import is.hail.utils._ val ( hail_build_user: String, hail_revision: String, hail_branch: String, hail_build_date: String, hail_repo_url: String, hail_spark_version: String, hail_pip_version: String) = { loadFromResource[(String, String, String, String, String, String, String)]("build-info.properties") { (is: InputStream) => val unknownProp = "<unknown>" val props = new Properties() props.load(is) ( props.getProperty("user", unknownProp), props.getProperty("revision", unknownProp), props.getProperty("branch", unknownProp), props.getProperty("date", unknownProp), props.getProperty("url", unknownProp), props.getProperty("sparkVersion", unknownProp), props.getProperty("hailPipVersion", unknownProp) ) } } } val HAIL_BUILD_USER = HailBuildInfo.hail_build_user val HAIL_REVISION = HailBuildInfo.hail_revision val HAIL_BRANCH = HailBuildInfo.hail_branch val HAIL_BUILD_DATE = HailBuildInfo.hail_build_date val HAIL_REPO_URL = HailBuildInfo.hail_repo_url val HAIL_SPARK_VERSION = HailBuildInfo.hail_spark_version val HAIL_PIP_VERSION = HailBuildInfo.hail_pip_version // FIXME: probably should use tags or something to choose English name val HAIL_PRETTY_VERSION = HAIL_PIP_VERSION + "-" + HAIL_REVISION.substring(0, 12) }
Example 174
Source File: CodecSpec.scala From hail with MIT License | 5 votes |
package is.hail.io import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream, OutputStream} import is.hail.annotations.{Region, RegionValue} import is.hail.asm4s.{Code, TypeInfo, Value} import is.hail.expr.ir.{EmitClassBuilder, EmitFunctionBuilder, ExecuteContext, typeToTypeInfo} import is.hail.types.encoded.EType import is.hail.types.physical.PType import is.hail.types.virtual.Type import is.hail.rvd.RVDContext import is.hail.sparkextras.ContextRDD import is.hail.utils.using import org.apache.spark.rdd.RDD trait AbstractTypedCodecSpec extends Spec { def encodedType: EType def encodedVirtualType: Type type StagedEncoderF[T] = (Value[Region], Value[T], Value[OutputBuffer]) => Code[Unit] type StagedDecoderF[T] = (Value[Region], Value[InputBuffer]) => Code[T] def buildEncoder(ctx: ExecuteContext, t: PType): (OutputStream) => Encoder def decodedPType(requestedType: Type): PType def buildDecoder(ctx: ExecuteContext, requestedType: Type): (PType, (InputStream) => Decoder) def encode(ctx: ExecuteContext, t: PType, offset: Long): Array[Byte] = { val baos = new ByteArrayOutputStream() using(buildEncoder(ctx, t)(baos))(_.writeRegionValue(offset)) baos.toByteArray } def decode(ctx: ExecuteContext, requestedType: Type, bytes: Array[Byte], region: Region): (PType, Long) = { val bais = new ByteArrayInputStream(bytes) val (pt, dec) = buildDecoder(ctx, requestedType) (pt, dec(bais).readRegionValue(region)) } def buildCodeInputBuffer(is: Code[InputStream]): Code[InputBuffer] def buildCodeOutputBuffer(os: Code[OutputStream]): Code[OutputBuffer] def buildEmitDecoderF[T](requestedType: Type, cb: EmitClassBuilder[_]): (PType, StagedDecoderF[T]) def buildEmitEncoderF[T](t: PType, cb: EmitClassBuilder[_]): StagedEncoderF[T] def buildEmitDecoderF[T](requestedType: Type, cb: EmitClassBuilder[_], ti: TypeInfo[T]): (PType, StagedDecoderF[T]) = { val (ptype, dec) = buildEmitDecoderF[T](requestedType, cb) assert(ti == typeToTypeInfo(requestedType)) ptype -> dec } def buildEmitEncoderF[T](t: PType, cb: EmitClassBuilder[_], ti: TypeInfo[T]): StagedEncoderF[T] = { assert(ti == typeToTypeInfo(t)) buildEmitEncoderF[T](t, cb) } // FIXME: is there a better place for this to live? def decodeRDD(ctx: ExecuteContext, requestedType: Type, bytes: RDD[Array[Byte]]): (PType, ContextRDD[Long]) = { val (pt, dec) = buildDecoder(ctx, requestedType) (pt, ContextRDD.weaken(bytes).cmapPartitions { (ctx, it) => RegionValue.fromBytes(dec, ctx.region, it) }) } override def toString: String = super[Spec].toString }
Example 175
Source File: DoubleInputBuffer.scala From hail with MIT License | 5 votes |
package is.hail.io import java.io.{Closeable, InputStream, OutputStream} import is.hail.annotations.Memory import is.hail.utils._ final class DoubleInputBuffer(in: InputStream, bufSize: Int) extends Closeable { private val buf = new Array[Byte](bufSize) private var end: Int = 0 private var off: Int = 0 def close() { in.close() } def readDoubles(to: Array[Double]): Unit = readDoubles(to, 0, to.length) def readDoubles(to: Array[Double], toOff0: Int, n0: Int) { assert(toOff0 >= 0) assert(n0 >= 0) assert(toOff0 <= to.length - n0) var toOff = toOff0 var n = n0.toLong while (n > 0) { if (end == off) { val len = math.min(bufSize, n << 3).toInt in.readFully(buf, 0, len) end = len off = 0 } val p = math.min(end - off, n << 3).toInt >>> 3 assert(p > 0) Memory.memcpy(to, toOff, buf, off, p) toOff += p n -= p off += (p << 3) } } } final class DoubleOutputBuffer(out: OutputStream, bufSize: Int) extends Closeable { private val buf: Array[Byte] = new Array[Byte](bufSize) private var off: Int = 0 def close() { flush() out.close() } def flush() { out.write(buf, 0, off) } def writeDoubles(from: Array[Double]): Unit = writeDoubles(from, 0, from.length) def writeDoubles(from: Array[Double], fromOff0: Int, n0: Int) { assert(n0 >= 0) assert(fromOff0 >= 0) assert(fromOff0 <= from.length - n0) var fromOff = fromOff0 var n = n0.toLong while (off + (n << 3) > bufSize) { val p = (buf.length - off) >>> 3 Memory.memcpy(buf, off, from, fromOff, p) off += (p << 3) fromOff += p n -= p out.write(buf, 0, off) off = 0 } Memory.memcpy(buf, off, from, fromOff, n) off += (n.toInt << 3) } }
Example 176
Source File: package.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.build import java.io.InputStream package object util { def resourceFromJarAsIStream(filename: String): InputStream = { val jarfile = { val r = this.getClass.getProtectionDomain.getCodeSource.getLocation.getFile if (r.head == '/') r.tail else r } val zip = new java.util.zip.ZipFile(jarfile) val ze = { val rv = zip.getEntry(filename) if (rv == null) { val toggleSlash = if (filename.head == '/') filename.drop(1) else s"/$filename" zip.getEntry(toggleSlash) } else rv } zip.getInputStream(ze) } }
Example 177
Source File: ChildFirstURLClassLoader.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.util.loading import java.io.{File, InputStream} import java.net.{URL, URLClassLoader} // scalastyle:off import sun.misc.CompoundEnumeration // scalastyle:on import scala.util.{Failure, Success, Try} class ChildFirstURLClassLoader(urls: Array[URL], parent: ClassLoader, except: Seq[String] = Seq()) extends URLClassLoader(urls, parent) { protected override def loadClass(name: String, resolve: Boolean): Class[_] = { def tryFind(findAction: => Class[_]): Option[Class[_]] = Try(findAction) match { case Failure(e: ClassNotFoundException) => None case Failure(e) => throw e case Success(c) => Some(c) } def loadLocally = if (except.exists(name.startsWith)) None else tryFind(findClass(name)) def loadFromParent = if (getParent == null) None else tryFind(getParent.loadClass(name)) val alreadyLoaded = findLoadedClass(name) if (alreadyLoaded != null) { alreadyLoaded } else { val `class` = loadLocally.getOrElse(loadFromParent.orNull) if (resolve) resolveClass(`class`) `class` } } override def getResource(name: String): URL = findResource(name) match { case null => super.getResource(name) case u => u } override def getResources(name: String): java.util.Enumeration[URL] = { val parent = getParent val localUrls = findResources(name) val parentUrls: java.util.Enumeration[URL] = if (parent != null) parent.getResources(name) else java.util.Collections.emptyEnumeration() new CompoundEnumeration(Array(localUrls, parentUrls)) } override def getResourceAsStream(name: String): InputStream = { getResource(name) match { case null => null case url => Try(url.openStream) match { case Success(x) => x case Failure(_) => null } } } } object ChildFirstURLClassLoader { def loadClassFromJar[T](className: String, jarPath: String, commonPackageNames:String, excludes: Seq[String] = Seq()): T = Loader(jarPath, excludes :+ commonPackageNames).load(className) case class Loader(jarPath: String, excludes: Seq[String] = Seq()) { val urls = if(new java.io.File(jarPath).isFile) Array(new File(jarPath).toURI.toURL) else Array[URL](new URL(jarPath)) private val cl = new ChildFirstURLClassLoader(urls, this.getClass.getClassLoader, excludes) def load[T](className: String) = cl.loadClass(className).newInstance.asInstanceOf[T] } }
Example 178
Source File: StringTests.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.util.string.test import java.io.{ByteArrayInputStream, InputStream} import org.scalatest.{FunSpec, Matchers} import cmwell.util.string._ class StringTests extends FunSpec with Matchers { private def mkString(is: InputStream) = { val buffSrc = scala.io.Source.fromInputStream(is) val res = buffSrc.mkString buffSrc.close() res } describe("mapInputStreamLines should") { it("return empty for empty input") { val input = new ByteArrayInputStream(Array.emptyByteArray) val result = mapInputStreamLines(input)(identity) result.read() should be(-1) input.close() result.close() } it("provide the delimiter as well") { val delim = '\n' val s = "provide the\ndelimiter as well" val expectedAmount = s.count(delim.==) val input = stringToInputStream(s) val result = mapInputStreamLines(input)(_.toUpperCase) mkString(result).count(delim.==) should be(expectedAmount) input.close() result.close() } it("not end with the delimiter") { val input = stringToInputStream("not end with\nthe delimiter") val result = mapInputStreamLines(input)(_.toUpperCase) mkString(result).last should be('R') input.close() result.close() } it("handle a concat mapper") { val input = stringToInputStream("handle\na\nconcat\nmapper") val result = mapInputStreamLines(input)(_ + " not") mkString(result) should be("handle not\na not\nconcat not\nmapper not") input.close() result.close() } } }
Example 179
Source File: SparqlUtils.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.tools.data.sparql.japi import java.io.InputStream import akka.actor.ActorSystem import akka.stream.ActorMaterializer import akka.stream.scaladsl.{Sink, StreamConverters} import cmwell.tools.data.sparql.SparqlProcessor import cmwell.tools.data.utils.akka.{concatByteStrings, endl} import cmwell.tools.data.utils.chunkers.GroupChunker import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.duration.FiniteDuration object SparqlUtils { def createJavaStreamFromPaths(baseUrl: String, parallelism: Int = 4, isNeedWrapping: Boolean = true, sparqlQuery: String, in: InputStream) = { implicit val system = ActorSystem("reactive-sparql-processor") implicit val mat = ActorMaterializer() SparqlProcessor .createSourceFromPathsInputStream( baseUrl = baseUrl, spQueryParamsBuilder = (p: Seq[String], v: Map[String,String], q: Boolean) => "sp.pid=" + p.head.substring(p.head.lastIndexOf('-') + 1), parallelism = parallelism, isNeedWrapping = isNeedWrapping, sparqlQuery = sparqlQuery, in = in ) .map { case (data, _) => data } .via(GroupChunker(GroupChunker.formatToGroupExtractor("ntriples"))) .map(concatByteStrings(_, endl)) .runWith(StreamConverters.asJavaStream()) } def createJavaOutputStreamFromPaths(baseUrl: String, parallelism: Int = 4, isNeedWrapping: Boolean = true, sparqlQuery: String, in: InputStream, timeout: FiniteDuration) = { implicit val system = ActorSystem("reactive-sparql-processor") implicit val mat = ActorMaterializer() SparqlProcessor .createSourceFromPathsInputStream( baseUrl = baseUrl, spQueryParamsBuilder = (p: Seq[String], v: Map[String,String], q: Boolean) => "sp.pid=" + p.head.substring(p.head.lastIndexOf('-') + 1), parallelism = parallelism, isNeedWrapping = isNeedWrapping, sparqlQuery = sparqlQuery, in = in ) .map { case (data, _) => data } .via(GroupChunker(GroupChunker.formatToGroupExtractor("ntriples"))) .map(concatByteStrings(_, endl)) .runWith(StreamConverters.asInputStream(timeout)) } }
Example 180
Source File: DownloaderUtils.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.tools.data.downloader.streams.japi import java.io.InputStream import akka.Done import akka.actor.ActorSystem import akka.stream.ActorMaterializer import akka.stream.scaladsl.Sink import cmwell.tools.data.downloader.streams.Downloader import cmwell.tools.data.utils.akka._ import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future def fromQuery(host: String, path: String, params: String, qp: String, format: String, op: String, length: Option[Int], recursive: Boolean, onFinish: Runnable): Future[Done] = { implicit val system = ActorSystem("reactive-downloader") implicit val mat = ActorMaterializer() Downloader .downloadFromQuery( baseUrl = host, path = path, params = params, qp = qp, format = format, op = op, length = length, recursive = recursive, // scalastyle:off outputHandler = println // scalastyle:on ) .andThen { case _ => cleanup() } .andThen { case _ => onFinish.run() } } def fromUuidInputStream(host: String, format: String, op: String, in: InputStream, onFinish: Runnable) = { implicit val system = ActorSystem("reactive-downloader") implicit val mat = ActorMaterializer() Downloader .downloadFromUuidInputStream( baseUrl = host, format = format, // scalastyle:off outputHandler = println, // scalastyle:on in = in ) .andThen { case _ => cleanup() } .andThen { case _ => onFinish.run() } } }
Example 181
Source File: BlockLang.scala From jgo with GNU General Public License v3.0 | 5 votes |
package jgo.tools.compiler package parser import scala.util.parsing.input.Reader import lexer._ import scope._ import interm._ import interm.types._ import stmts._ import funcs._ class BlockLang(in: Reader[Token], res: List[Type] = Nil, resNamed: Boolean = false) extends FuncContext with Statements { //def, not val. See comment in StackScoped def initialEnclosing = UniverseScope def targetFuncType = FuncType(Nil, res) def hasNamedResults = resNamed lazy val result = phrase(block)(in) } object BlockLang { import java.io.{File, InputStream, FileInputStream, InputStreamReader} import scala.collection.immutable.PagedSeq def apply(in: Reader[Char]): BlockLang = new BlockLang(Scanner(in)) def apply(inStr: String): BlockLang = new BlockLang(Scanner(inStr)) def apply(in: InputStream): BlockLang = new BlockLang(Scanner(in)) def apply(file: File): BlockLang = new BlockLang(Scanner(file)) def from(fileName: String): BlockLang = new BlockLang(Scanner.from(fileName)) }
Example 182
Source File: Scanner.scala From jgo with GNU General Public License v3.0 | 5 votes |
package jgo.tools.compiler package lexer import scala.util.parsing._ import input._ import combinator._ //portions of this class taken from scala.util.parsing.combinator.lexical.Scanners#Scanner final class Scanner private(prev: Option[Token], in: Reader[Char]) extends Reader[Token] { private def this(in: Reader[Char]) = this(None, in) private val (tok, remainingIn) = Lexical.token(prev, in) def first = { tok } lazy val rest = new Scanner(Some(tok), remainingIn) lazy val pos = Lexical.stripWhitespace(in).pos def atEnd = tok == EOF override def source = in.source override def offset = in.offset def foreach[U](f: Token => U) { var cur = this while (!cur.atEnd) { f(cur.first) cur = cur.rest } } } object Scanner { import java.io.{File, InputStream, FileInputStream, InputStreamReader} import scala.collection.immutable.PagedSeq def apply(in: Reader[Char]): Scanner = new Scanner(None, in) def apply(inStr: String): Scanner = new Scanner(new CharArrayReader(inStr.toCharArray())) def apply(in: File): Scanner = apply(new FileInputStream(in)) def apply(in: InputStream): Scanner = new Scanner(None, new PagedSeqReader(PagedSeq.fromReader(new InputStreamReader(in , "UTF-8")))) def from(fileName: String): Scanner = apply(new FileInputStream(fileName)) }
Example 183
Source File: LexTestAll.scala From jgo with GNU General Public License v3.0 | 5 votes |
import jgo.tools.compiler._ import parser.BlockLang import parser.combinatorExten._ import lexer._ import interm.codeseq._ import java.io.{File, InputStream, FileInputStream, InputStreamReader} object LexTestAll { def main(args: Array[String]) { if (args.isEmpty) testAll(new File(System.getProperty("user.home") + "/Desktop/gotest/")) else testAll(new File(args(0))) } def testAll(dir: File) { for (file <- dir.listFiles) if (file.isDirectory) testAll(file) else if (file.isFile && !file.isHidden) test(file) } def test(file: File) { println("testing: " + file.getCanonicalPath) println() var cur = Scanner(file) print("tokenization: ") while (!cur.atEnd) { print(cur.first + " ") cur = cur.rest } println() println() } }
Example 184
Source File: Unpacker.scala From comet-data-pipeline with Apache License 2.0 | 5 votes |
package com.ebiznext.comet.utils import java.io.{BufferedInputStream, InputStream} import java.nio.file.{Files, Paths} import better.files.File import org.apache.commons.compress.archivers.{ ArchiveEntry, ArchiveInputStream, ArchiveStreamFactory } import org.apache.commons.compress.compressors.{CompressorInputStream, CompressorStreamFactory} import org.apache.commons.compress.utils.IOUtils import org.apache.commons.io.input.CloseShieldInputStream import scala.util.Try object Unpacker { def unpack(archiveFile: File, directory: File): Try[Unit] = { for { inputStream <- Try(Files.newInputStream(Paths.get(archiveFile.pathAsString))) it <- open(inputStream) } yield { while (it.hasNext) { val (entry, is) = it.next() if (entry.isDirectory) { throw new Exception("Compressed archive cannot directories") } val targetFile = File(directory, entry.getName) val o = Files.newOutputStream(targetFile.path) try { IOUtils.copy(is, o) } finally { if (o != null) o.close() } } } } // https://alexwlchan.net/2019/09/unpacking-compressed-archives-in-scala/ def open(inputStream: InputStream): Try[Iterator[(ArchiveEntry, InputStream)]] = for { uncompressedInputStream <- createUncompressedStream(inputStream) archiveInputStream <- createArchiveStream(uncompressedInputStream) iterator = createIterator(archiveInputStream) } yield iterator private def createUncompressedStream(inputStream: InputStream): Try[CompressorInputStream] = Try { new CompressorStreamFactory().createCompressorInputStream( getMarkableStream(inputStream) ) } private def createArchiveStream( uncompressedInputStream: CompressorInputStream ): Try[ArchiveInputStream] = Try { new ArchiveStreamFactory() .createArchiveInputStream( getMarkableStream(uncompressedInputStream) ) } private def createIterator( archiveInputStream: ArchiveInputStream ): Iterator[(ArchiveEntry, InputStream)] = new Iterator[(ArchiveEntry, InputStream)] { var latestEntry: ArchiveEntry = _ override def hasNext: Boolean = { latestEntry = archiveInputStream.getNextEntry latestEntry != null } override def next(): (ArchiveEntry, InputStream) = (latestEntry, new CloseShieldInputStream(archiveInputStream)) } private def getMarkableStream(inputStream: InputStream): InputStream = if (inputStream.markSupported()) inputStream else new BufferedInputStream(inputStream) }
Example 185
Source File: SchemaSpec.scala From comet-data-pipeline with Apache License 2.0 | 5 votes |
package com.ebiznext.comet.schema.model import java.io.{InputStream, StringWriter} import com.ebiznext.comet.TestHelper import com.ebiznext.comet.schema.handlers.SchemaHandler class SchemaSpec extends TestHelper { new WithSettings() { val schemaHandler = new SchemaHandler(storageHandler) "Attribute type" should "be valid" in { val stream: InputStream = getClass.getResourceAsStream("/sample/default.yml") val lines = scala.io.Source.fromInputStream(stream).getLines().mkString("\n") val types = mapper.readValue(lines, classOf[Types]) val attr = Attribute( "attr", "invalid-type", // should raise error non existent type Some(true), true, Some( PrivacyLevel("MD5") ) // Should raise an error. Privacy cannot be applied on types other than string ) attr.checkValidity(schemaHandler) shouldBe Left(List("Invalid Type invalid-type")) } "Attribute privacy" should "appliable to any type" in { val attr = Attribute( "attr", "long", Some(true), true, Some( PrivacyLevel("ApproxLong(20)") ) // Should raise an error. Privacy cannot be applied on types other than stringsettings = settings ) attr.checkValidity(schemaHandler) shouldBe Right(true) } "Sub Attribute" should "be present for struct types only" in { val attr = Attribute( "attr", "long", Some(true), true, Some( PrivacyLevel("ApproxLong(20)") ), // Should raise an error. Privacy cannot be applied on types other than string attributes = Some(List[Attribute]()) ) val expectedErrors = List( "Attribute Attribute(attr,long,Some(true),true,Some(ApproxLong(20)),None,None,None,Some(List()),None,None,None) : Simple attributes cannot have sub-attributes", "Attribute Attribute(attr,long,Some(true),true,Some(ApproxLong(20)),None,None,None,Some(List()),None,None,None) : when present, attributes list cannot be empty." ) attr.checkValidity(schemaHandler) shouldBe Left(expectedErrors) } "Position serialization" should "output all fields" in { val yml = loadTextFile(s"/expected/yml/position_serialization_${versionSuffix}.yml") val attr = Attribute("hello", position = Some(Position(1, 2))) val writer = new StringWriter() mapper.writer().writeValue(writer, attr) logger.info("--" + writer.toString + "--") logger.info("++" + yml + "++") writer.toString.trim should equal(yml) } "Default value for an attribute" should "only be used for non obligatory fields" in { val requiredAttribute = Attribute("requiredAttribute", "long", required = true, default = Some("10")) requiredAttribute.checkValidity(schemaHandler) shouldBe Left( List( s"attribute with name ${requiredAttribute.name}: default value valid for optional fields only" ) ) val optionalAttribute = Attribute("optionalAttribute", "long", required = false, default = Some("10")) optionalAttribute.checkValidity(schemaHandler) shouldBe Right(true) } } }
Example 186
Source File: PackHelper.scala From lila-openingexplorer with GNU Affero General Public License v3.0 | 5 votes |
package lila.openingexplorer import java.io.{ InputStream, OutputStream } import chess.format.Uci import chess.{ Pos, Role } trait PackHelper { protected def writeUint(stream: OutputStream, v: Long) = { var value = v while (value > 127) { stream.write(((value & 127) | 128).toInt) value >>= 7 } stream.write((value & 127).toInt) } protected def readUint(stream: InputStream): Long = { var value: Long = 0 var i: Int = 0 var byte: Int = 0 do { byte = stream.read() value |= (byte.toLong & 127) << (7 * i) i += 1 } while ((byte & 128) != 0) value } protected def writeUint16(stream: OutputStream, v: Int) = { stream.write(0xff & (v >> 8)) stream.write(0xff & v) } protected def readUint16(stream: InputStream): Int = stream.read() << 8 | stream.read() protected def writeUint48(stream: OutputStream, v: Long) = { stream.write((0xff & (v >> 40)).toInt) stream.write((0xff & (v >> 32)).toInt) stream.write((0xff & (v >> 24)).toInt) stream.write((0xff & (v >> 16)).toInt) stream.write((0xff & (v >> 8)).toInt) stream.write((0xff & v).toInt) } protected def readUint48(stream: InputStream): Long = stream.read.toLong << 40 | stream.read.toLong << 32 | stream.read.toLong << 24 | stream.read.toLong << 16 | stream.read.toLong << 8 | stream.read.toLong protected def writeUci(stream: OutputStream, move: Uci.Move): Unit = writeUint16( stream, Pos.all.indexOf(move.orig) | Pos.all.indexOf(move.dest) << 6 | move.promotion.fold(0)(r => (Role.allPromotable.indexOf(r)) + 1) << 12 ) protected def writeUci(stream: OutputStream, drop: Uci.Drop): Unit = { val dest = Pos.all.indexOf(drop.pos) writeUint16(stream, dest | dest << 6 | (Role.all.indexOf(drop.role) + 1) << 12) } protected def writeUci(stream: OutputStream, move: Either[Uci.Move, Uci.Drop]): Unit = move.fold(writeUci(stream, _), writeUci(stream, _)) protected def readUci(stream: InputStream): Either[Uci.Move, Uci.Drop] = { val enc = readUint16(stream) val orig = Pos.all(enc & 63) val dest = Pos.all((enc >> 6) & 63) if (orig == dest) { Right(new Uci.Drop(Role.all((enc >> 12) - 1), dest)) } else { val role = if ((enc >> 12) != 0) Some(Role.allPromotable((enc >> 12) - 1)) else None Left(new Uci.Move(orig, dest, role)) } } }
Example 187
Source File: SubEntry.scala From lila-openingexplorer with GNU Affero General Public License v3.0 | 5 votes |
package lila.openingexplorer import chess.format.Uci import java.io.{ InputStream, OutputStream } case class SubEntry( moves: Map[Either[Uci.Move, Uci.Drop], MoveStats], gameRefs: List[GameRef] ) extends PackHelper { lazy val totalWhite = moves.values.map(_.white).sum lazy val totalDraws = moves.values.map(_.draws).sum lazy val totalBlack = moves.values.map(_.black).sum def totalGames = totalWhite + totalDraws + totalBlack def isEmpty = totalGames == 0 def totalAverageRatingSum = moves.values.map(_.averageRatingSum).sum def averageRating: Int = if (totalGames == 0) 0 else (totalAverageRatingSum / totalGames).toInt def withGameRef(game: GameRef, move: Either[Uci.Move, Uci.Drop]) = new SubEntry( moves + (move -> moves.getOrElse(move, MoveStats.empty).withGameRef(game)), game :: gameRefs ) def withExistingGameRef(game: GameRef) = copy(gameRefs = game :: gameRefs) def withoutExistingGameRef(game: GameRef, move: Either[Uci.Move, Uci.Drop]) = { val stats = moves .get(move) .map(_.withoutExistingGameRef(game: GameRef)) .getOrElse(MoveStats.empty) new SubEntry( if (stats.total > 0) moves + (move -> stats) else moves - move, gameRefs.filterNot(_.gameId == game.gameId) ) } def writeStats(out: OutputStream) = { writeUint(out, moves.size) moves.foreach { case (move, stats) => writeUci(out, move) stats.write(out) } } def write(out: OutputStream) = { writeStats(out) gameRefs .sortWith(_.averageRating > _.averageRating) .distinct .take(SubEntry.maxTopGames) .foreach(_.write(out)) } } object SubEntry extends PackHelper { val maxTopGames = 4 def empty = new SubEntry(Map.empty, List.empty) def fromGameRef(game: GameRef, move: Either[Uci.Move, Uci.Drop]) = empty.withGameRef(game, move) def fromExistingGameRef(game: GameRef) = empty.withExistingGameRef(game) def readStats(in: InputStream, gameRefs: List[GameRef] = List.empty): SubEntry = { var remainingMoves = readUint(in) val moves = scala.collection.mutable.Map.empty[Either[Uci.Move, Uci.Drop], MoveStats] while (remainingMoves > 0) { moves += (readUci(in) -> MoveStats.read(in)) remainingMoves -= 1; } new SubEntry(moves.toMap, gameRefs) } def read(in: InputStream) = { val subEntry = readStats(in) val gameRefs = scala.collection.mutable.ListBuffer.empty[GameRef] while (in.available > 0) { gameRefs += GameRef.read(in) } subEntry.copy(gameRefs = gameRefs.toList) } }
Example 188
Source File: MoveStats.scala From lila-openingexplorer with GNU Affero General Public License v3.0 | 5 votes |
package lila.openingexplorer import java.io.{ InputStream, OutputStream } import chess.Color case class MoveStats( white: Long, draws: Long, black: Long, averageRatingSum: Long ) extends PackHelper { def total = white + draws + black def isEmpty = total == 0 def averageRating: Int = if (total == 0) 0 else (averageRatingSum / total).toInt def withGameRef(game: GameRef) = { val avgRatingSum = averageRatingSum + game.averageRating game.winner match { case Some(Color.White) => copy(white = white + 1, averageRatingSum = avgRatingSum) case Some(Color.Black) => copy(black = black + 1, averageRatingSum = avgRatingSum) case None => copy(draws = draws + 1, averageRatingSum = avgRatingSum) } } def withoutExistingGameRef(game: GameRef) = { val avgRatingSum = averageRatingSum - game.averageRating game.winner match { case Some(Color.White) => copy(white = white - 1, averageRatingSum = avgRatingSum) case Some(Color.Black) => copy(black = black - 1, averageRatingSum = avgRatingSum) case None => copy(draws = draws - 1, averageRatingSum = avgRatingSum) } } def add(other: MoveStats) = new MoveStats( white + other.white, draws + other.draws, black + other.black, averageRatingSum + other.averageRatingSum ) def write(out: OutputStream) = { writeUint(out, white) writeUint(out, draws) writeUint(out, black) writeUint(out, averageRatingSum) } } object MoveStats extends PackHelper { def empty = new MoveStats(0, 0, 0, 0) def fromGameRef(game: GameRef) = empty.withGameRef(game) def read(in: InputStream) = new MoveStats(readUint(in), readUint(in), readUint(in), readUint(in)) }
Example 189
Source File: OffsetSeqLog.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.streaming import java.io.{InputStream, OutputStream} import java.nio.charset.StandardCharsets._ import scala.io.{Source => IOSource} import org.apache.spark.sql.SparkSession class OffsetSeqLog(sparkSession: SparkSession, path: String) extends HDFSMetadataLog[OffsetSeq](sparkSession, path) { override protected def deserialize(in: InputStream): OffsetSeq = { // called inside a try-finally where the underlying stream is closed in the caller def parseOffset(value: String): Offset = value match { case OffsetSeqLog.SERIALIZED_VOID_OFFSET => null case json => SerializedOffset(json) } val lines = IOSource.fromInputStream(in, UTF_8.name()).getLines() if (!lines.hasNext) { throw new IllegalStateException("Incomplete log file") } val version = lines.next() if (version != OffsetSeqLog.VERSION) { throw new IllegalStateException(s"Unknown log version: ${version}") } // read metadata val metadata = lines.next().trim match { case "" => None case md => Some(md) } OffsetSeq.fill(metadata, lines.map(parseOffset).toArray: _*) } override protected def serialize(offsetSeq: OffsetSeq, out: OutputStream): Unit = { // called inside a try-finally where the underlying stream is closed in the caller out.write(OffsetSeqLog.VERSION.getBytes(UTF_8)) // write metadata out.write('\n') out.write(offsetSeq.metadata.map(_.json).getOrElse("").getBytes(UTF_8)) // write offsets, one per line offsetSeq.offsets.map(_.map(_.json)).foreach { offset => out.write('\n') offset match { case Some(json: String) => out.write(json.getBytes(UTF_8)) case None => out.write(OffsetSeqLog.SERIALIZED_VOID_OFFSET.getBytes(UTF_8)) } } } } object OffsetSeqLog { private val VERSION = "v1" private val SERIALIZED_VOID_OFFSET = "-" }
Example 190
Source File: ProcessTestUtils.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.test import java.io.{InputStream, IOException} import scala.sys.process.BasicIO object ProcessTestUtils { class ProcessOutputCapturer(stream: InputStream, capture: String => Unit) extends Thread { this.setDaemon(true) override def run(): Unit = { try { BasicIO.processFully(capture)(stream) } catch { case _: IOException => // Ignores the IOException thrown when the process termination, which closes the input // stream abruptly. } } } }
Example 191
Source File: CryptoStreamUtils.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.security import java.io.{InputStream, OutputStream} import java.util.Properties import javax.crypto.KeyGenerator import javax.crypto.spec.{IvParameterSpec, SecretKeySpec} import org.apache.commons.crypto.random._ import org.apache.commons.crypto.stream._ import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ private[this] def createInitializationVector(properties: Properties): Array[Byte] = { val iv = new Array[Byte](IV_LENGTH_IN_BYTES) val initialIVStart = System.currentTimeMillis() CryptoRandomFactory.getCryptoRandom(properties).nextBytes(iv) val initialIVFinish = System.currentTimeMillis() val initialIVTime = initialIVFinish - initialIVStart if (initialIVTime > 2000) { logWarning(s"It costs ${initialIVTime} milliseconds to create the Initialization Vector " + s"used by CryptoStream") } iv } }
Example 192
Source File: CommandUtils.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.worker import java.io.{File, FileOutputStream, InputStream, IOException} import scala.collection.JavaConverters._ import scala.collection.Map import org.apache.spark.SecurityManager import org.apache.spark.deploy.Command import org.apache.spark.internal.Logging import org.apache.spark.launcher.WorkerCommandBuilder import org.apache.spark.util.Utils def redirectStream(in: InputStream, file: File) { val out = new FileOutputStream(file, true) // TODO: It would be nice to add a shutdown hook here that explains why the output is // terminating. Otherwise if the worker dies the executor logs will silently stop. new Thread("redirect output to " + file) { override def run() { try { Utils.copyStream(in, out, true) } catch { case e: IOException => logInfo("Redirection to " + file + " closed: " + e.getMessage) } } }.start() } }
Example 193
Source File: EventHistoryReporter.scala From sparklens with Apache License 2.0 | 5 votes |
package com.qubole.sparklens.app import java.io.{BufferedInputStream, InputStream} import java.net.URI import com.ning.compress.lzf.LZFInputStream import com.qubole.sparklens.QuboleJobListener import com.qubole.sparklens.common.Json4sWrapper import com.qubole.sparklens.helper.HDFSConfigHelper import net.jpountz.lz4.LZ4BlockInputStream import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.SparkConf import org.json4s.DefaultFormats import org.xerial.snappy.SnappyInputStream class EventHistoryReporter(file: String, extraConf: List[(String, String)] = List.empty) { // This is using reflection in spark-2.0.0 ReplayListenerBus val busKlass = Class.forName("org.apache.spark.scheduler.ReplayListenerBus") val bus = busKlass.newInstance() val addListenerMethod = busKlass.getMethod("addListener", classOf[Object]) val conf = new SparkConf() .set("spark.sparklens.reporting.disabled", "false") .set("spark.sparklens.save.data", "false") extraConf.foreach(x => { conf.set(x._1, x._2) }) val listener = new QuboleJobListener(conf) addListenerMethod.invoke(bus, listener) try { val replayMethod = busKlass.getMethod("replay", classOf[InputStream], classOf[String], classOf[Boolean]) replayMethod.invoke(bus, getDecodedInputStream(file, conf), file, boolean2Boolean(false)) } catch { case _: NoSuchMethodException => // spark binaries are 2.1* and above val replayMethod = busKlass.getMethod("replay", classOf[InputStream], classOf[String], classOf[Boolean], classOf[String => Boolean]) replayMethod.invoke(bus, getDecodedInputStream(file, conf), file, boolean2Boolean(false), getFilter _) case x: Exception => { println(s"Failed replaying events from ${file} [${x.getMessage}]") } } // Borrowed from CompressionCodecs in spark private def getDecodedInputStream(file: String, conf: SparkConf): InputStream = { val fs = FileSystem.get(new URI(file), HDFSConfigHelper.getHadoopConf(Some(conf))) val path = new Path(file) val bufStream = new BufferedInputStream(fs.open(path)) val logName = path.getName.stripSuffix(".inprogress") val codecName: Option[String] = logName.split("\\.").tail.lastOption codecName.getOrElse("") match { case "lz4" => new LZ4BlockInputStream(bufStream) case "lzf" => new LZFInputStream(bufStream) case "snappy" => new SnappyInputStream(bufStream) case _ => bufStream } } private def getFilter(eventString: String): Boolean = { implicit val formats = DefaultFormats eventFilter.contains(Json4sWrapper.parse(eventString).extract[Map[String, Any]].get("Event") .get.asInstanceOf[String]) } private def eventFilter: Set[String] = { Set( "SparkListenerTaskEnd", "SparkListenerApplicationStart", "SparkListenerApplicationEnd", "SparkListenerExecutorAdded", "SparkListenerExecutorRemoved", "SparkListenerJobStart", "SparkListenerJobEnd", "SparkListenerStageSubmitted", "SparkListenerStageCompleted" ) } }
Example 194
Source File: CustomReceiver.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.examples.streaming import java.io.{InputStreamReader, BufferedReader, InputStream} import java.net.Socket import org.apache.spark.{SparkConf, Logging} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.{Seconds, StreamingContext} import org.apache.spark.streaming.receiver.Receiver private def receive() { var socket: Socket = null var userInput: String = null try { logInfo("Connecting to " + host + ":" + port) socket = new Socket(host, port) logInfo("Connected to " + host + ":" + port) val reader = new BufferedReader(new InputStreamReader(socket.getInputStream(), "UTF-8")) userInput = reader.readLine() while(!isStopped && userInput != null) { store(userInput) userInput = reader.readLine() } reader.close() socket.close() logInfo("Stopped receiving") restart("Trying to connect again") } catch { case e: java.net.ConnectException => restart("Error connecting to " + host + ":" + port, e) case t: Throwable => restart("Error receiving data", t) } } }
Example 195
Source File: MetricsConfig.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.metrics import java.io.{FileInputStream, InputStream} import java.util.Properties import scala.collection.mutable import scala.util.matching.Regex import org.apache.spark.Logging import org.apache.spark.util.Utils private[spark] class MetricsConfig(val configFile: Option[String]) extends Logging { private val DEFAULT_PREFIX = "*" private val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r private val DEFAULT_METRICS_CONF_FILENAME = "metrics.properties" private[metrics] val properties = new Properties() private[metrics] var propertyCategories: mutable.HashMap[String, Properties] = null private def setDefaultProperties(prop: Properties) { prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet") prop.setProperty("*.sink.servlet.path", "/metrics/json") prop.setProperty("master.sink.servlet.path", "/metrics/master/json") prop.setProperty("applications.sink.servlet.path", "/metrics/applications/json") } def initialize() { // Add default properties in case there's no properties file setDefaultProperties(properties) // If spark.metrics.conf is not set, try to get file in class path val isOpt: Option[InputStream] = configFile.map(new FileInputStream(_)).orElse { try { Option(Utils.getSparkClassLoader.getResourceAsStream(DEFAULT_METRICS_CONF_FILENAME)) } catch { case e: Exception => logError("Error loading default configuration file", e) None } } isOpt.foreach { is => try { properties.load(is) } finally { is.close() } } propertyCategories = subProperties(properties, INSTANCE_REGEX) if (propertyCategories.contains(DEFAULT_PREFIX)) { import scala.collection.JavaConversions._ val defaultProperty = propertyCategories(DEFAULT_PREFIX) for { (inst, prop) <- propertyCategories if (inst != DEFAULT_PREFIX) (k, v) <- defaultProperty if (prop.getProperty(k) == null) } { prop.setProperty(k, v) } } } def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = { val subProperties = new mutable.HashMap[String, Properties] import scala.collection.JavaConversions._ prop.foreach { kv => if (regex.findPrefixOf(kv._1).isDefined) { val regex(prefix, suffix) = kv._1 subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2) } } subProperties } def getInstance(inst: String): Properties = { propertyCategories.get(inst) match { case Some(s) => s case None => propertyCategories.getOrElse(DEFAULT_PREFIX, new Properties) } } }
Example 196
Source File: CommandUtils.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.worker import java.io.{File, FileOutputStream, InputStream, IOException} import java.lang.System._ import scala.collection.JavaConversions._ import scala.collection.Map import org.apache.spark.Logging import org.apache.spark.deploy.Command import org.apache.spark.launcher.WorkerCommandBuilder import org.apache.spark.util.Utils def redirectStream(in: InputStream, file: File) { val out = new FileOutputStream(file, true) // TODO: It would be nice to add a shutdown hook here that explains why the output is // terminating. Otherwise if the worker dies the executor logs will silently stop. new Thread("redirect output to " + file) { override def run() { try { Utils.copyStream(in, out, true) } catch { case e: IOException => logInfo("Redirection to " + file + " closed: " + e.getMessage) } } }.start() } }
Example 197
Source File: ReplayListenerBus.scala From iolap with Apache License 2.0 | 5 votes |
package org.apache.spark.scheduler import java.io.{InputStream, IOException} import scala.io.Source import com.fasterxml.jackson.core.JsonParseException import org.json4s.jackson.JsonMethods._ import org.apache.spark.Logging import org.apache.spark.util.JsonProtocol def replay( logData: InputStream, sourceName: String, maybeTruncated: Boolean = false): Unit = { var currentLine: String = null var lineNumber: Int = 1 try { val lines = Source.fromInputStream(logData).getLines() while (lines.hasNext) { currentLine = lines.next() try { postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine))) } catch { case jpe: JsonParseException => // We can only ignore exception from last line of the file that might be truncated if (!maybeTruncated || lines.hasNext) { throw jpe } else { logWarning(s"Got JsonParseException from log file $sourceName" + s" at line $lineNumber, the file might not have finished writing cleanly.") } } lineNumber += 1 } } catch { case ioe: IOException => throw ioe case e: Exception => logError(s"Exception parsing Spark event log: $sourceName", e) logError(s"Malformed line #$lineNumber: $currentLine\n") } } }
Example 198
Source File: CSVUtil.scala From aerosolve with Apache License 2.0 | 5 votes |
package com.airbnb.common.ml.strategy.testutil import java.io.InputStream import scala.reflect.ClassTag import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD object CSVUtil { def readCSVToLines(path: String): Iterator[Array[String]] = { val stream : InputStream = getClass.getResourceAsStream(path) val lines = scala.io.Source.fromInputStream( stream ).getLines.drop(1) lines.map( line => line.split(",").map(_.stripPrefix("\"").stripSuffix("\"").trim) ) } def parseCSVToSeq[T:ClassTag](name: String, parseKey: (Array[String]) => String, parseSample:(Array[String]) => T): Seq[(String, Seq[T])] = { val lines = readCSVToLines(name) val samples = lines.map(cols => (parseKey(cols), parseSample(cols)) ).toSeq. groupBy(_._1).map{ case (key, seq) => (key, seq.map(_._2)) }.toSeq samples } def parseCSVToRDD[T:ClassTag](name: String, parseKey: (Array[String]) => String, parseSample:(Array[String]) => T, sc: SparkContext): RDD[(String, Seq[T])] = { val samples = parseCSVToSeq(name, parseKey, parseSample) val rdd = sc.parallelize(samples) rdd } }
Example 199
Source File: TestHelper.scala From odsc-west-streaming-trends with GNU General Public License v3.0 | 5 votes |
package com.twilio.open.streaming.trend.discovery import java.io.{ByteArrayInputStream, InputStream} import java.nio.charset.StandardCharsets import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.google.protobuf.Message import com.googlecode.protobuf.format.JsonFormat import com.holdenkarau.spark.testing.{LocalSparkContext, SparkContextProvider} import com.twilio.open.protocol.Calls.CallEvent import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.SparkSession import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers, Suite} import org.slf4j.{Logger, LoggerFactory} import scala.collection.Seq import scala.io.Source import scala.reflect.ClassTag import scala.reflect.classTag object TestHelper { val log: Logger = LoggerFactory.getLogger("com.twilio.open.streaming.trend.discovery.TestHelper") val mapper: ObjectMapper = { val m = new ObjectMapper() m.registerModule(DefaultScalaModule) } val jsonFormat: JsonFormat = new JsonFormat def loadScenario[T<: Message : ClassTag](file: String): Seq[T] = { val fileString = Source.fromFile(file).mkString val parsed = mapper.readValue(fileString, classOf[Sceanario]) parsed.input.map { data => val json = mapper.writeValueAsString(data) convert[T](json) } } def convert[T<: Message : ClassTag](json: String): T = { val clazz = classTag[T].runtimeClass val builder = clazz.getMethod("newBuilder").invoke(clazz).asInstanceOf[Message.Builder] try { val input: InputStream = new ByteArrayInputStream(json.getBytes()) jsonFormat.merge(input, builder) builder.build().asInstanceOf[T] } catch { case e: Exception => throw e } } def asMockKafkaDataFrame(event: CallEvent): MockKafkaDataFrame = { val key = event.getEventId.getBytes(StandardCharsets.UTF_8) val value = event.toByteArray MockKafkaDataFrame(key, value) } } case class MockKafkaDataFrame(key: Array[Byte], value: Array[Byte]) @SerialVersionUID(1L) case class KafkaDataFrame(key: Array[Byte], topic: Array[Byte], value: Array[Byte]) extends Serializable case class Sceanario(input: Seq[Any], expected: Option[Any] = None) trait SparkSqlTest extends BeforeAndAfterAll with SparkContextProvider { self: Suite => @transient var _sparkSql: SparkSession = _ @transient private var _sc: SparkContext = _ override def sc: SparkContext = _sc def conf: SparkConf def sparkSql: SparkSession = _sparkSql override def beforeAll() { _sparkSql = SparkSession.builder().config(conf).getOrCreate() _sc = _sparkSql.sparkContext setup(_sc) super.beforeAll() } override def afterAll() { try { _sparkSql.close() _sparkSql = null LocalSparkContext.stop(_sc) _sc = null } finally { super.afterAll() } } }
Example 200
Source File: CompressedFiles.scala From tensorflow_scala with Apache License 2.0 | 5 votes |
package org.platanios.tensorflow.data.utilities import org.apache.commons.compress.archivers.tar.TarArchiveInputStream import org.apache.commons.compress.utils.IOUtils import java.io.{File, FileOutputStream, InputStream} import java.nio.file.{Files, Path} import java.util.zip.GZIPInputStream object CompressedFiles { def decompressTGZ(tgzFilePath: Path, destinationPath: Path, bufferSize: Int = 8192): Unit = { decompressTGZStream(Files.newInputStream(tgzFilePath), destinationPath, bufferSize) } def decompressTar(tarFilePath: Path, destinationPath: Path, bufferSize: Int = 8192): Unit = { decompressTarStream(Files.newInputStream(tarFilePath), destinationPath, bufferSize) } def decompressTGZStream(tgzStream: InputStream, destinationPath: Path, bufferSize: Int = 8192): Unit = { decompressTarStream(new GZIPInputStream(tgzStream), destinationPath, bufferSize) } def decompressTarStream(tarStream: InputStream, destinationPath: Path, bufferSize: Int = 8192): Unit = { val inputStream = new TarArchiveInputStream(tarStream) var entry = inputStream.getNextTarEntry while (entry != null) { if (!entry.isDirectory) { val currentFile = new File(destinationPath.toAbsolutePath.toString, entry.getName) val parentFile = currentFile.getParentFile if (!parentFile.exists) parentFile.mkdirs() IOUtils.copy(inputStream, new FileOutputStream(currentFile)) } entry = inputStream.getNextTarEntry } inputStream.close() } }