java.io.ByteArrayInputStream Scala Examples
The following examples show how to use java.io.ByteArrayInputStream.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
Example 1
Source File: AvroDataToCatalyst.scala From spark-schema-registry with Apache License 2.0 | 6 votes |
package com.hortonworks.spark.registry.avro import java.io.ByteArrayInputStream import com.hortonworks.registries.schemaregistry.{SchemaVersionInfo, SchemaVersionKey} import com.hortonworks.registries.schemaregistry.client.SchemaRegistryClient import com.hortonworks.registries.schemaregistry.serdes.avro.AvroSnapshotDeserializer import org.apache.avro.Schema import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, UnaryExpression} import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} import org.apache.spark.sql.types.{BinaryType, DataType} import scala.collection.JavaConverters._ case class AvroDataToCatalyst(child: Expression, schemaName: String, version: Option[Int], config: Map[String, Object]) extends UnaryExpression with ExpectsInputTypes { override def inputTypes = Seq(BinaryType) @transient private lazy val srDeser: AvroSnapshotDeserializer = { val obj = new AvroSnapshotDeserializer() obj.init(config.asJava) obj } @transient private lazy val srSchema = fetchSchemaVersionInfo(schemaName, version) @transient private lazy val avroSchema = new Schema.Parser().parse(srSchema.getSchemaText) override lazy val dataType: DataType = SchemaConverters.toSqlType(avroSchema).dataType @transient private lazy val avroDeser= new AvroDeserializer(avroSchema, dataType) override def nullable: Boolean = true override def nullSafeEval(input: Any): Any = { val binary = input.asInstanceOf[Array[Byte]] val row = avroDeser.deserialize(srDeser.deserialize(new ByteArrayInputStream(binary), srSchema.getVersion)) val result = row match { case r: InternalRow => r.copy() case _ => row } result } override def simpleString: String = { s"from_sr(${child.sql}, ${dataType.simpleString})" } override def sql: String = { s"from_sr(${child.sql}, ${dataType.catalogString})" } override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val expr = ctx.addReferenceObj("this", this) defineCodeGen(ctx, ev, input => s"(${ctx.boxedType(dataType)})$expr.nullSafeEval($input)") } private def fetchSchemaVersionInfo(schemaName: String, version: Option[Int]): SchemaVersionInfo = { val srClient = new SchemaRegistryClient(config.asJava) version.map(v => srClient.getSchemaVersionInfo(new SchemaVersionKey(schemaName, v))) .getOrElse(srClient.getLatestSchemaVersionInfo(schemaName)) } }
Example 2
Source File: HDFSCredentialProvider.scala From drizzle-spark with Apache License 2.0 | 6 votes |
package org.apache.spark.deploy.yarn.security import java.io.{ByteArrayInputStream, DataInputStream} import scala.collection.JavaConverters._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier import org.apache.hadoop.mapred.Master import org.apache.hadoop.security.Credentials import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.yarn.config._ import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ private[security] class HDFSCredentialProvider extends ServiceCredentialProvider with Logging { // Token renewal interval, this value will be set in the first call, // if None means no token renewer specified, so cannot get token renewal interval. private var tokenRenewalInterval: Option[Long] = null override val serviceName: String = "hdfs" override def obtainCredentials( hadoopConf: Configuration, sparkConf: SparkConf, creds: Credentials): Option[Long] = { // NameNode to access, used to get tokens from different FileSystems nnsToAccess(hadoopConf, sparkConf).foreach { dst => val dstFs = dst.getFileSystem(hadoopConf) logInfo("getting token for namenode: " + dst) dstFs.addDelegationTokens(getTokenRenewer(hadoopConf), creds) } // Get the token renewal interval if it is not set. It will only be called once. if (tokenRenewalInterval == null) { tokenRenewalInterval = getTokenRenewalInterval(hadoopConf, sparkConf) } // Get the time of next renewal. tokenRenewalInterval.map { interval => creds.getAllTokens.asScala .filter(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND) .map { t => val identifier = new DelegationTokenIdentifier() identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier))) identifier.getIssueDate + interval }.foldLeft(0L)(math.max) } } private def getTokenRenewalInterval( hadoopConf: Configuration, sparkConf: SparkConf): Option[Long] = { // We cannot use the tokens generated with renewer yarn. Trying to renew // those will fail with an access control issue. So create new tokens with the logged in // user as renewer. sparkConf.get(PRINCIPAL).map { renewer => val creds = new Credentials() nnsToAccess(hadoopConf, sparkConf).foreach { dst => val dstFs = dst.getFileSystem(hadoopConf) dstFs.addDelegationTokens(renewer, creds) } val t = creds.getAllTokens.asScala .filter(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND) .head val newExpiration = t.renew(hadoopConf) val identifier = new DelegationTokenIdentifier() identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier))) val interval = newExpiration - identifier.getIssueDate logInfo(s"Renewal Interval is $interval") interval } } private def getTokenRenewer(conf: Configuration): String = { val delegTokenRenewer = Master.getMasterPrincipal(conf) logDebug("delegation token renewer is: " + delegTokenRenewer) if (delegTokenRenewer == null || delegTokenRenewer.length() == 0) { val errorMessage = "Can't get Master Kerberos principal for use as renewer" logError(errorMessage) throw new SparkException(errorMessage) } delegTokenRenewer } private def nnsToAccess(hadoopConf: Configuration, sparkConf: SparkConf): Set[Path] = { sparkConf.get(NAMENODES_TO_ACCESS).map(new Path(_)).toSet + sparkConf.get(STAGING_DIR).map(new Path(_)) .getOrElse(FileSystem.get(hadoopConf).getHomeDirectory) } }
Example 3
Source File: RecordReaderSuite.scala From spark-snowflake with Apache License 2.0 | 5 votes |
package net.snowflake.spark.snowflake.io import java.io.ByteArrayInputStream import net.snowflake.client.jdbc.internal.fasterxml.jackson.databind.ObjectMapper import org.scalatest.FunSuite class RecordReaderSuite extends FunSuite { val mapper: ObjectMapper = new ObjectMapper() test("Read Json File") { val record1 = s""" |{ | "name":"abc", | "age":123, | "car":[ | { | "make": "vw", | "mode": "golf", | "year": 2010 | }, | { | "make": "Audi", | "mode": "R8", | "year": 2011 | } | ] |} """.stripMargin val record2 = s""" |{ | "name":"def ghi", | "age":222, | "car":[ | { | "make": "Tesla", | "mode": "X", | "year": 2017 | } | ] |} """.stripMargin val file = record1 + record2 val recordReader: SFRecordReader = new SFRecordReader(SupportedFormat.JSON) recordReader.addStream(new ByteArrayInputStream(file.getBytes)) recordReader.addStream(new ByteArrayInputStream(file.getBytes)) val result1 = mapper.readTree(recordReader.next()) val json1 = mapper.readTree(record1) assert(json1.equals(result1)) val result2 = mapper.readTree(recordReader.next()) val json2 = mapper.readTree(record2) assert(json2.equals(result2)) val result3 = mapper.readTree(recordReader.next()) assert(json1.equals(result3)) val result4 = mapper.readTree(recordReader.next()) assert(json2.equals(result4)) assert(!recordReader.hasNext) } }
Example 4
Source File: V26_1__Fill_create_argument.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package db.migration.postgres import java.io.ByteArrayInputStream import com.daml.platform.store.serialization.ValueSerializer import db.migration.translation.ContractSerializer import org.flywaydb.core.api.migration.{BaseJavaMigration, Context} class V26_1__Fill_create_argument extends BaseJavaMigration { // left join on contracts to make sure to include divulged contracts private val SELECT_CONTRACT_DATA = """select contract_data.id, contract_data.contract |from contract_data |left join contracts | on contracts.id = contract_data.id | and contracts.archive_offset is null""".stripMargin private val UPDATE_PARTICIPANT_CONTRACTS = "update participant_contracts set create_argument = ?, template_id = ? where contract_id = ?" override def migrate(context: Context): Unit = { val conn = context.getConnection var loadContracts: java.sql.Statement = null var updateParticipantContracts: java.sql.PreparedStatement = null var rows: java.sql.ResultSet = null try { updateParticipantContracts = conn.prepareStatement(UPDATE_PARTICIPANT_CONTRACTS) loadContracts = conn.createStatement() rows = loadContracts.executeQuery(SELECT_CONTRACT_DATA) while (rows.next()) { val contractId = rows.getString("id") val contractBytes = rows.getBinaryStream("contract") val contract = ContractSerializer .deserializeContractInstance(contractBytes) .getOrElse(sys.error(s"failed to deserialize contract $contractId")) val createArgument = contract.arg val templateId = contract.template val createArgumentBytes = new ByteArrayInputStream( ValueSerializer.serializeValue( createArgument, s"failed to serialize create argument for contract $contractId")) updateParticipantContracts.setBinaryStream(1, createArgumentBytes) updateParticipantContracts.setString(2, templateId.toString) updateParticipantContracts.setString(3, contractId) updateParticipantContracts.execute() } } finally { if (loadContracts != null) { loadContracts.close() } if (updateParticipantContracts != null) { updateParticipantContracts.close() } if (rows != null) { rows.close() } } } }
Example 5
Source File: FileBasedLedgerDataExportSpec.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.ledger.participant.state.kvutils.export import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream} import java.time.Instant import com.daml.ledger.participant.state.v1 import com.google.protobuf.ByteString import org.scalatest.mockito.MockitoSugar import org.scalatest.{Matchers, WordSpec} class FileBasedLedgerDataExportSpec extends WordSpec with Matchers with MockitoSugar { // XXX SC remove in Scala 2.13; see notes in ConfSpec import scala.collection.GenTraversable, org.scalatest.enablers.Containing private[this] implicit def `fixed sig containingNatureOfGenTraversable`[ E: org.scalactic.Equality, TRAV]: Containing[TRAV with GenTraversable[E]] = Containing.containingNatureOfGenTraversable[E, GenTraversable] "addParentChild" should { "add entry to correlation ID mapping" in { val instance = new FileBasedLedgerDataExporter(mock[DataOutputStream]) instance.addParentChild("parent", "child") instance.correlationIdMapping should contain("child" -> "parent") } } "addToWriteSet" should { "append to existing data" in { val instance = new FileBasedLedgerDataExporter(mock[DataOutputStream]) instance.addParentChild("parent", "child") instance.addToWriteSet("child", Seq(keyValuePairOf("a", "b"))) instance.addToWriteSet("child", Seq(keyValuePairOf("c", "d"))) instance.bufferedKeyValueDataPerCorrelationId should contain( "parent" -> Seq(keyValuePairOf("a", "b"), keyValuePairOf("c", "d"))) } } "finishedProcessing" should { "remove all data such as submission info, write-set and child correlation IDs" in { val dataOutputStream = new DataOutputStream(new ByteArrayOutputStream()) val instance = new FileBasedLedgerDataExporter(dataOutputStream) instance.addSubmission( ByteString.copyFromUtf8("an envelope"), "parent", Instant.now(), v1.ParticipantId.assertFromString("id")) instance.addParentChild("parent", "parent") instance.addToWriteSet("parent", Seq(keyValuePairOf("a", "b"))) instance.finishedProcessing("parent") instance.inProgressSubmissions shouldBe empty instance.bufferedKeyValueDataPerCorrelationId shouldBe empty instance.correlationIdMapping shouldBe empty } } "serialized submission" should { "be readable back" in { val baos = new ByteArrayOutputStream() val dataOutputStream = new DataOutputStream(baos) val instance = new FileBasedLedgerDataExporter(dataOutputStream) val expectedRecordTimeInstant = Instant.now() val expectedParticipantId = v1.ParticipantId.assertFromString("id") instance.addSubmission( ByteString.copyFromUtf8("an envelope"), "parent", expectedRecordTimeInstant, v1.ParticipantId.assertFromString("id")) instance.addParentChild("parent", "parent") instance.addToWriteSet("parent", Seq(keyValuePairOf("a", "b"))) instance.finishedProcessing("parent") val dataInputStream = new DataInputStream(new ByteArrayInputStream(baos.toByteArray)) val (actualSubmissionInfo, actualWriteSet) = Serialization.readEntry(dataInputStream) actualSubmissionInfo.submissionEnvelope should be(ByteString.copyFromUtf8("an envelope")) actualSubmissionInfo.correlationId should be("parent") actualSubmissionInfo.recordTimeInstant should be(expectedRecordTimeInstant) actualSubmissionInfo.participantId should be(expectedParticipantId) actualWriteSet should be(Seq(keyValuePairOf("a", "b"))) } } private def keyValuePairOf(key: String, value: String): (ByteString, ByteString) = ByteString.copyFromUtf8(key) -> ByteString.copyFromUtf8(value) }
Example 6
Source File: DarManifestReaderTest.scala From daml with Apache License 2.0 | 5 votes |
// Copyright (c) 2020 Digital Asset (Switzerland) GmbH and/or its affiliates. All rights reserved. // SPDX-License-Identifier: Apache-2.0 package com.daml.lf.archive import java.io.{ByteArrayInputStream, InputStream} import java.nio.charset.Charset import com.daml.lf.archive.DarManifestReader.DarManifestReaderException import org.scalatest.{Inside, Matchers, WordSpec} import scala.util.{Failure, Success} class DarManifestReaderTest extends WordSpec with Matchers with Inside { private val unicode = Charset.forName("UTF-8") "should read dalf names from manifest, real scenario with Dalfs line split" in { val manifest = """Manifest-Version: 1.0 |Created-By: Digital Asset packager (DAML-GHC) |Main-Dalf: com.daml.lf.archive:DarReaderTest:0.1.dalf |Dalfs: com.daml.lf.archive:DarReaderTest:0.1.dalf, daml-pri | m.dalf |Format: daml-lf |Encryption: non-encrypted""".stripMargin val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode)) val actual = DarManifestReader.dalfNames(inputStream) actual shouldBe Success( Dar("com.daml.lf.archive:DarReaderTest:0.1.dalf", List("daml-prim.dalf"))) inputStream.close() } "should read dalf names from manifest, Main-Dalf returned in the head" in { val manifest = """Main-Dalf: A.dalf |Dalfs: B.dalf, C.dalf, A.dalf, E.dalf |Format: daml-lf |Encryption: non-encrypted""".stripMargin val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode)) val actual = DarManifestReader.dalfNames(inputStream) actual shouldBe Success(Dar("A.dalf", List("B.dalf", "C.dalf", "E.dalf"))) inputStream.close() } "should read dalf names from manifest, can handle one Dalf per manifest" in { val manifest = """Main-Dalf: A.dalf |Dalfs: A.dalf |Format: daml-lf |Encryption: non-encrypted""".stripMargin val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode)) val actual = DarManifestReader.dalfNames(inputStream) actual shouldBe Success(Dar("A.dalf", List.empty)) inputStream.close() } "should return failure if Format is not daml-lf" in { val manifest = """Main-Dalf: A.dalf |Dalfs: B.dalf, C.dalf, A.dalf, E.dalf |Format: anything-different-from-daml-lf |Encryption: non-encrypted""".stripMargin val inputStream: InputStream = new ByteArrayInputStream(manifest.getBytes(unicode)) val actual = DarManifestReader.dalfNames(inputStream) inside(actual) { case Failure(DarManifestReaderException(msg)) => msg shouldBe "Unsupported format: anything-different-from-daml-lf" } inputStream.close() } }
Example 7
Source File: BytecodeUtils.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.graphx.util import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import scala.collection.mutable.HashSet import scala.language.existentials import org.apache.xbean.asm5.{ClassReader, ClassVisitor, MethodVisitor} import org.apache.xbean.asm5.Opcodes._ import org.apache.spark.util.Utils private class MethodInvocationFinder(className: String, methodName: String) extends ClassVisitor(ASM5) { val methodsInvoked = new HashSet[(Class[_], String)] override def visitMethod(access: Int, name: String, desc: String, sig: String, exceptions: Array[String]): MethodVisitor = { if (name == methodName) { new MethodVisitor(ASM5) { override def visitMethodInsn( op: Int, owner: String, name: String, desc: String, itf: Boolean) { if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) { if (!skipClass(owner)) { methodsInvoked.add((Utils.classForName(owner.replace("/", ".")), name)) } } } } } else { null } } } }
Example 8
Source File: PortableDataStream.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.input import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream} import scala.collection.JavaConverters._ import com.google.common.io.{ByteStreams, Closeables} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext} import org.apache.hadoop.mapreduce.lib.input.{CombineFileInputFormat, CombineFileRecordReader, CombineFileSplit} def toArray(): Array[Byte] = { val stream = open() try { ByteStreams.toByteArray(stream) } finally { Closeables.close(stream, true) } } def getPath(): String = path }
Example 9
Source File: GenericAvroSerializerSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import com.esotericsoftware.kryo.io.{Input, Output} import org.apache.avro.{Schema, SchemaBuilder} import org.apache.avro.generic.GenericData.Record import org.apache.spark.{SharedSparkContext, SparkFunSuite} class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val schema : Schema = SchemaBuilder .record("testRecord").fields() .requiredString("data") .endRecord() val record = new Record(schema) record.put("data", "test data") test("schema compression and decompression") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) } test("record serialization and deserialization") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val outputStream = new ByteArrayOutputStream() val output = new Output(outputStream) genericSer.serializeDatum(record, output) output.flush() output.close() val input = new Input(new ByteArrayInputStream(outputStream.toByteArray)) assert(genericSer.deserializeDatum(input) === record) } test("uses schema fingerprint to decrease message size") { val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema) val output = new Output(new ByteArrayOutputStream()) val beginningNormalPosition = output.total() genericSerFull.serializeDatum(record, output) output.flush() val normalLength = output.total - beginningNormalPosition conf.registerAvroSchemas(schema) val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema) val beginningFingerprintPosition = output.total() genericSerFinger.serializeDatum(record, output) val fingerprintLength = output.total - beginningFingerprintPosition assert(fingerprintLength < normalLength) } test("caches previously seen schemas") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val compressedSchema = genericSer.compress(schema) val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema)) assert(compressedSchema.eq(genericSer.compress(schema))) assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema)))) } }
Example 10
Source File: SerializerPropertiesSuite.scala From drizzle-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import scala.util.Random import org.scalatest.Assertions import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.serializer.KryoTest.RegistratorWithoutAutoReset class SerializerPropertiesSuite extends SparkFunSuite { import SerializerPropertiesSuite._ test("JavaSerializer does not support relocation") { // Per a comment on the SPARK-4550 JIRA ticket, Java serialization appears to write out the // full class name the first time an object is written to an output stream, but subsequent // references to the class write a more compact identifier; this prevents relocation. val ser = new JavaSerializer(new SparkConf()) testSupportsRelocationOfSerializedObjects(ser, generateRandomItem) } test("KryoSerializer supports relocation when auto-reset is enabled") { val ser = new KryoSerializer(new SparkConf) assert(ser.newInstance().asInstanceOf[KryoSerializerInstance].getAutoReset()) testSupportsRelocationOfSerializedObjects(ser, generateRandomItem) } test("KryoSerializer does not support relocation when auto-reset is disabled") { val conf = new SparkConf().set("spark.kryo.registrator", classOf[RegistratorWithoutAutoReset].getName) val ser = new KryoSerializer(conf) assert(!ser.newInstance().asInstanceOf[KryoSerializerInstance].getAutoReset()) testSupportsRelocationOfSerializedObjects(ser, generateRandomItem) } } object SerializerPropertiesSuite extends Assertions { def generateRandomItem(rand: Random): Any = { val randomFunctions: Seq[() => Any] = Seq( () => rand.nextInt(), () => rand.nextString(rand.nextInt(10)), () => rand.nextDouble(), () => rand.nextBoolean(), () => (rand.nextInt(), rand.nextString(rand.nextInt(10))), () => MyCaseClass(rand.nextInt(), rand.nextString(rand.nextInt(10))), () => { val x = MyCaseClass(rand.nextInt(), rand.nextString(rand.nextInt(10))) (x, x) } ) randomFunctions(rand.nextInt(randomFunctions.size)).apply() } def testSupportsRelocationOfSerializedObjects( serializer: Serializer, generateRandomItem: Random => Any): Unit = { if (!serializer.supportsRelocationOfSerializedObjects) { return } val NUM_TRIALS = 5 val rand = new Random(42) for (_ <- 1 to NUM_TRIALS) { val items = { // Make sure that we have duplicate occurrences of the same object in the stream: val randomItems = Seq.fill(10)(generateRandomItem(rand)) randomItems ++ randomItems.take(5) } val baos = new ByteArrayOutputStream() val serStream = serializer.newInstance().serializeStream(baos) def serializeItem(item: Any): Array[Byte] = { val itemStartOffset = baos.toByteArray.length serStream.writeObject(item) serStream.flush() val itemEndOffset = baos.toByteArray.length baos.toByteArray.slice(itemStartOffset, itemEndOffset).clone() } val itemsAndSerializedItems: Seq[(Any, Array[Byte])] = { val serItems = items.map { item => (item, serializeItem(item)) } serStream.close() rand.shuffle(serItems) } val reorderedSerializedData: Array[Byte] = itemsAndSerializedItems.flatMap(_._2).toArray val deserializedItemsStream = serializer.newInstance().deserializeStream( new ByteArrayInputStream(reorderedSerializedData)) assert(deserializedItemsStream.asIterator.toSeq === itemsAndSerializedItems.map(_._1)) deserializedItemsStream.close() } } } private case class MyCaseClass(foo: Int, bar: String)
Example 11
Source File: ECIESCoder.scala From mantis with Apache License 2.0 | 5 votes |
package io.iohk.ethereum.crypto import java.io.{ByteArrayInputStream, IOException} import java.math.BigInteger import java.security.SecureRandom import org.spongycastle.crypto.digests.{SHA1Digest, SHA256Digest} import org.spongycastle.crypto.engines.AESEngine import org.spongycastle.crypto.generators.ECKeyPairGenerator import org.spongycastle.crypto.macs.HMac import org.spongycastle.crypto.modes.SICBlockCipher import org.spongycastle.crypto.params._ import org.spongycastle.crypto.parsers.ECIESPublicKeyParser import org.spongycastle.crypto.{BufferedBlockCipher, InvalidCipherTextException} import org.spongycastle.math.ec.ECPoint object ECIESCoder { val KeySize = 128 val PublicKeyOverheadSize = 65 val MacOverheadSize = 32 val OverheadSize = PublicKeyOverheadSize + KeySize / 8 + MacOverheadSize @throws[IOException] @throws[InvalidCipherTextException] def decrypt(privKey: BigInteger, cipher: Array[Byte], macData: Option[Array[Byte]] = None): Array[Byte] = { val is = new ByteArrayInputStream(cipher) val ephemBytes = new Array[Byte](2 * ((curve.getCurve.getFieldSize + 7) / 8) + 1) is.read(ephemBytes) val ephem = curve.getCurve.decodePoint(ephemBytes) val IV = new Array[Byte](KeySize / 8) is.read(IV) val cipherBody = new Array[Byte](is.available) is.read(cipherBody) decrypt(ephem, privKey, Some(IV), cipherBody, macData) } @throws[InvalidCipherTextException] def decrypt(ephem: ECPoint, prv: BigInteger, IV: Option[Array[Byte]], cipher: Array[Byte], macData: Option[Array[Byte]]): Array[Byte] = { val aesEngine = new AESEngine val iesEngine = new EthereumIESEngine( kdf = Left(new ConcatKDFBytesGenerator(new SHA256Digest)), mac = new HMac(new SHA256Digest), hash = new SHA256Digest, cipher = Some(new BufferedBlockCipher(new SICBlockCipher(aesEngine))), IV = IV, prvSrc = Left(new ECPrivateKeyParameters(prv, curve)), pubSrc = Left(new ECPublicKeyParameters(ephem, curve))) iesEngine.processBlock(cipher, 0, cipher.length, forEncryption = false, macData) } @throws[IOException] @throws[InvalidCipherTextException] def encryptSimple(pub: ECPoint, secureRandom: SecureRandom, plaintext: Array[Byte]): Array[Byte] = { val eGen = new ECKeyPairGenerator val gParam = new ECKeyGenerationParameters(curve, secureRandom) eGen.init(gParam) val iesEngine = new EthereumIESEngine( kdf = Right(new MGF1BytesGeneratorExt(new SHA1Digest)), mac = new HMac(new SHA1Digest), hash = new SHA1Digest, cipher = None, IV = Some(new Array[Byte](0)), prvSrc = Right(eGen), pubSrc = Left(new ECPublicKeyParameters(pub, curve)), hashMacKey = false) iesEngine.processBlock(plaintext, 0, plaintext.length, forEncryption = true) } private def makeIESEngine(pub: ECPoint, prv: BigInteger, IV: Option[Array[Byte]]) = { val aesEngine = new AESEngine val iesEngine = new EthereumIESEngine( kdf = Left(new ConcatKDFBytesGenerator(new SHA256Digest)), mac = new HMac(new SHA256Digest), hash = new SHA256Digest, cipher = Some(new BufferedBlockCipher(new SICBlockCipher(aesEngine))), IV = IV, prvSrc = Left(new ECPrivateKeyParameters(prv, curve)), pubSrc = Left(new ECPublicKeyParameters(pub, curve))) iesEngine } }
Example 12
Source File: HdfsBlockStream.scala From ArchiveSpark with MIT License | 5 votes |
package org.archive.archivespark.sparkling.io import java.io.{ByteArrayInputStream, InputStream} import org.apache.hadoop.fs.{FileSystem, Path} import org.archive.archivespark.sparkling.logging.LogContext import org.archive.archivespark.sparkling.util.Common import scala.util.Try class HdfsBlockStream (fs: FileSystem, file: String, offset: Long = 0, length: Long = -1, retries: Int = 60, sleepMillis: Int = 1000 * 60) extends InputStream { implicit val logContext: LogContext = LogContext(this) val path = new Path(file) val (blockSize: Int, fileSize: Long) = { val status = fs.getFileStatus(path) (status.getBlockSize.min(Int.MaxValue).toInt, status.getLen) } private var pos: Long = offset.max(0) private val max: Long = if (length > 0) fileSize.min(pos + length) else fileSize private val buffer = new Array[Byte](blockSize) private val emptyBlock = new ByteArrayInputStream(Array.emptyByteArray) private var block: ByteArrayInputStream = emptyBlock def ensureNextBlock(): InputStream = { if (block.available() == 0 && pos < max) { val end = pos + blockSize val blockLength = ((end - (end % blockSize)).min(max) - pos).toInt Common.retry(retries, sleepMillis, (retry, e) => { "File access failed (" + retry + "/" + retries + "): " + path + " (Offset: " + pos + ") - " + e.getMessage }) { retry => val in = fs.open(path, blockLength) if (retry > 0) Try(in.seekToNewSource(pos)) else if (pos > 0) in.seek(pos) var read = 0 while (read < blockLength) read += in.read(buffer, read, blockLength - read) Try(in.close()) } pos += blockLength block = new ByteArrayInputStream(buffer, 0, blockLength) } block } override def read(): Int = ensureNextBlock().read() override def read(b: Array[Byte]): Int = ensureNextBlock().read(b) override def read(b: Array[Byte], off: Int, len: Int): Int = ensureNextBlock().read(b, off, len) override def skip(n: Long): Long = { val available = block.available() if (n <= available) block.skip(n) else { block = emptyBlock val currentPos = pos - available val skip = n.min(max - currentPos) pos += skip - available skip } } override def available(): Int = block.available() override def close(): Unit = {} override def markSupported(): Boolean = false }
Example 13
Source File: PLYReadWriteTests.scala From scalismo-faces with Apache License 2.0 | 5 votes |
package scalismo.faces.io import java.io.{ByteArrayInputStream, ByteArrayOutputStream, OutputStreamWriter} import java.nio.ByteOrder import java.util.Scanner import scalismo.faces.FacesTestSuite import scalismo.faces.io.ply._ class PLYReadWriteTests extends FacesTestSuite { describe("Write-read cycles to string, big- and little endian") { def testRWEndianCycle[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A], bo: ByteOrder): Unit = { val N = toWrite.size val os = new ByteArrayOutputStream() val writer = new SequenceWriter[A] writer.write(toWrite, os, bo) val ba = os.toByteArray val is = new ByteArrayInputStream(ba) val reader = new FixedLengthSequenceReader[A] val read = reader.read(N, is, bo) read.zip(toWrite).foreach { p => p._1 shouldBe p._2 } } def testRWStringCycle[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A]): Unit = { val N = toWrite.size val os = new ByteArrayOutputStream() val osw = new OutputStreamWriter(os) val writer = new SequenceWriter[A] writer.write(toWrite, osw) osw.flush() val is = new ByteArrayInputStream(os.toByteArray) val isr = new Scanner(is) val reader = new FixedLengthSequenceReader[A] val read = reader.read(N, isr) read.zip(toWrite).foreach { p => p._1 shouldBe p._2 } } def testAllThreeCycles[A:StringWriter:StringReader:EndianWriter:EndianReader](toWrite: IndexedSeq[A]): Unit = { testRWStringCycle(toWrite) testRWEndianCycle(toWrite, ByteOrder.BIG_ENDIAN) testRWEndianCycle(toWrite, ByteOrder.LITTLE_ENDIAN) } it("should result in the same sequence of bytes") { val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toByte testAllThreeCycles(toWrite) } it("should result in the same sequence of char") { val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toChar testAllThreeCycles(toWrite) } it("should result in the same sequence of short") { val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toShort testAllThreeCycles(toWrite) } it("should result in the same sequence of int") { val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toInt testAllThreeCycles(toWrite) } it("should result in the same sequence of long") { val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toLong testAllThreeCycles(toWrite) } it("should result in the same sequence of float") { val toWrite = for (i <- 0 until 20) yield (randomDouble * 255).toFloat testAllThreeCycles(toWrite) } it("should result in the same sequence of double") { val toWrite = for (i <- 0 until 20) yield (randomDouble * 255) testAllThreeCycles(toWrite) } } }
Example 14
Source File: package.scala From spark-alchemy with Apache License 2.0 | 5 votes |
package com.swoop.alchemy.spark.expressions import java.io.{ByteArrayInputStream, DataInputStream} import com.clearspring.analytics.stream import com.clearspring.analytics.stream.cardinality.RegisterSet import com.clearspring.analytics.util.{Bits, Varint} import net.agkn.hll.HLL import net.agkn.hll.serialization.{HLLMetadata, SchemaVersionOne} import net.agkn.hll.util.BitVector package object hll { val IMPLEMENTATION_CONFIG_KEY = "com.swoop.alchemy.hll.implementation" def strmToAgkn(from: stream.cardinality.HyperLogLogPlus): net.agkn.hll.HLL = { HLL.fromBytes(strmToAgkn(from.getBytes)) } def strmToAgkn(from: Array[Byte]): Array[Byte] = { var bais = new ByteArrayInputStream(from) var oi = new DataInputStream(bais) val version = oi.readInt // the new encoding scheme includes a version field // that is always negative. if (version >= 0) { throw new UnsupportedOperationException("conversion is only supported for the new style encoding scheme") } val p = Varint.readUnsignedVarInt(oi) val sp = Varint.readUnsignedVarInt(oi) val formatType = Varint.readUnsignedVarInt(oi) if (formatType != 0) { throw new UnsupportedOperationException("conversion is only supported for non-sparse representation") } val size = Varint.readUnsignedVarInt(oi) val longArrayBytes = new Array[Byte](size) oi.readFully(longArrayBytes) val registerSet = new RegisterSet(Math.pow(2, p).toInt, Bits.getBits(longArrayBytes)) val bitVector = new BitVector(RegisterSet.REGISTER_SIZE, registerSet.count) for (i <- 0 until registerSet.count) bitVector.setRegister(i, registerSet.get(i)) val schemaVersion = new SchemaVersionOne val serializer = schemaVersion.getSerializer(net.agkn.hll.HLLType.FULL, RegisterSet.REGISTER_SIZE, registerSet.count) bitVector.getRegisterContents(serializer) var outBytes = serializer.getBytes val metadata = new HLLMetadata( schemaVersion.schemaVersionNumber(), net.agkn.hll.HLLType.FULL, p, RegisterSet.REGISTER_SIZE, 0, true, false, false ) schemaVersion.writeMetadata(outBytes, metadata) outBytes } }
Example 15
Source File: DatasetFunctionsSpec.scala From daf with BSD 3-Clause "New" or "Revised" License | 5 votes |
package daf.dataset import java.io.ByteArrayInputStream import akka.stream.ActorMaterializer import akka.stream.scaladsl.StreamConverters import controllers.modules.TestAbstractModule import daf.filesystem.MergeStrategy import daf.instances.{ AkkaInstance, ConfigurationInstance } import org.scalatest.{ BeforeAndAfterAll, MustMatchers, WordSpecLike } import scala.concurrent.Await import scala.concurrent.duration._ import scala.util.Random class DatasetFunctionsSpec extends TestAbstractModule with WordSpecLike with MustMatchers with BeforeAndAfterAll with ConfigurationInstance with AkkaInstance { implicit lazy val executionContext = actorSystem.dispatchers.lookup("akka.actor.test-dispatcher") protected implicit lazy val materializer = ActorMaterializer.create { actorSystem } override def beforeAll() = { startAkka() } def data = (1 to 5) .map { i => Random.alphanumeric.grouped(20).take(5).map { s => s"$i - ${s.mkString}" }.toStream :+ defaultSeparator } def stream = MergeStrategy.coalesced { data.map { iter => new ByteArrayInputStream( iter.mkString(defaultSeparator).getBytes("UTF-8") ) } } def source = StreamConverters.fromInputStream(() => stream, 5) "Source manipulation" must { "convert to a string source" in { Await.result( wrapDefault { asStringSource(source) }.runFold("") { _ + _ }, 5.seconds ).split(defaultSeparator).length must be { 25 } } "convert to a json source" in { Await.result( wrapJson { asStringSource(source) }.runFold("") { _ + _ }, 5.seconds ).split(jsonSeparator).length must be { 25 } } } }
Example 16
Source File: JavaSerializationConverter.scala From scala-serialization with MIT License | 5 votes |
package com.komanov.serialization.converters import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} import com.komanov.serialization.converters.IoUtils.using import com.komanov.serialization.domain.{Site, SiteEvent, SiteEventData} object JavaSerializationConverter extends MyConverter { override def toByteArray(site: Site): Array[Byte] = { using(new ByteArrayOutputStream()) { baos => using(new ObjectOutputStream(baos)) { os => os.writeObject(site) os.flush() baos.toByteArray } } } override def fromByteArray(bytes: Array[Byte]): Site = { using(new ByteArrayInputStream(bytes)) { bais => using(new ObjectInputStream(bais)) { os => os.readObject().asInstanceOf[Site] } } } override def toByteArray(event: SiteEvent): Array[Byte] = { using(new ByteArrayOutputStream()) { baos => using(new ObjectOutputStream(baos)) { os => os.writeObject(event) os.flush() baos.toByteArray } } } override def siteEventFromByteArray(clazz: Class[_], bytes: Array[Byte]): SiteEvent = { using(new ByteArrayInputStream(bytes)) { bais => using(new ObjectInputStream(bais)) { os => os.readObject().asInstanceOf[SiteEvent] } } } }
Example 17
Source File: BMLHelper.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.filesystem.bml import java.io.{ByteArrayInputStream, InputStream} import java.util import java.util.UUID import com.webank.wedatasphere.linkis.bml.client.{BmlClient, BmlClientFactory} import com.webank.wedatasphere.linkis.bml.protocol.{BmlDownloadResponse, BmlUpdateResponse, BmlUploadResponse} import com.webank.wedatasphere.linkis.filesystem.exception.WorkspaceExceptionManager import org.springframework.stereotype.Component import scala.collection.JavaConversions._ @Component class BMLHelper { def upload(userName: String, content: String, fileName: String): util.Map[String, Object] = { val inputStream = new ByteArrayInputStream(content.getBytes("utf-8")) val client: BmlClient = createBMLClient(userName) val resource: BmlUploadResponse = client.uploadResource(userName, fileName, inputStream) if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80021) val map = new util.HashMap[String, Object] map += "resourceId" -> resource.resourceId map += "version" -> resource.version } def upload(userName: String, inputStream: InputStream, fileName: String, projectName: String): util.Map[String, Object] = { val client: BmlClient = createBMLClient(userName) val resource: BmlUploadResponse = client.uploadResource(userName, fileName, inputStream) if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80021) val map = new util.HashMap[String, Object] map += "resourceId" -> resource.resourceId map += "version" -> resource.version } def upload(userName: String, inputStream: InputStream, fileName: String): util.Map[String, Object] = { val client: BmlClient = createBMLClient(userName) val resource: BmlUploadResponse = client.uploadResource(userName, fileName, inputStream) if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80021) val map = new util.HashMap[String, Object] map += "resourceId" -> resource.resourceId map += "version" -> resource.version } def update(userName: String, resourceId: String, inputStream: InputStream): util.Map[String, Object] = { val client: BmlClient = createBMLClient(userName) val resource: BmlUpdateResponse = client.updateResource(userName, resourceId, "", inputStream) if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80022) val map = new util.HashMap[String, Object] map += "resourceId" -> resource.resourceId map += "version" -> resource.version } def update(userName: String, resourceId: String, content: String): util.Map[String, Object] = { val inputStream = new ByteArrayInputStream(content.getBytes("utf-8")) val client: BmlClient = createBMLClient(userName) val resource: BmlUpdateResponse = client.updateResource(userName, resourceId, UUID.randomUUID().toString + ".json", inputStream) if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80022) val map = new util.HashMap[String, Object] map += "resourceId" -> resource.resourceId map += "version" -> resource.version } def query(userName: String, resourceId: String, version: String): util.Map[String, Object] = { val client: BmlClient = createBMLClient(userName) var resource: BmlDownloadResponse = null if (version == null) resource = client.downloadResource(userName, resourceId, null) else resource = client.downloadResource(userName, resourceId, version) if (!resource.isSuccess) throw WorkspaceExceptionManager.createException(80023) val map = new util.HashMap[String, Object] map += "path" -> resource.fullFilePath map += "stream" -> resource.inputStream } private def inputstremToString(inputStream: InputStream): String = scala.io.Source.fromInputStream(inputStream).mkString private def createBMLClient(userName: String): BmlClient = if (userName == null) BmlClientFactory.createBmlClient() else BmlClientFactory.createBmlClient(userName) }
Example 18
Source File: StorageScriptFsWriter.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.storage.script.writer import java.io.{ByteArrayInputStream, IOException, InputStream, OutputStream} import java.util import com.webank.wedatasphere.linkis.common.io.{FsPath, MetaData, Record} import com.webank.wedatasphere.linkis.storage.LineRecord import com.webank.wedatasphere.linkis.storage.script.{Compaction, ScriptFsWriter, ScriptMetaData} import com.webank.wedatasphere.linkis.storage.utils.{StorageConfiguration, StorageUtils} import org.apache.commons.io.IOUtils class StorageScriptFsWriter(val path: FsPath, val charset: String, outputStream: OutputStream = null) extends ScriptFsWriter { private val stringBuilder = new StringBuilder @scala.throws[IOException] override def addMetaData(metaData: MetaData): Unit = { val compactions = Compaction.listCompactions().filter(p => p.belongTo(StorageUtils.pathToSuffix(path.getPath))) val metadataLine = new util.ArrayList[String]() if (compactions.length > 0) { metaData.asInstanceOf[ScriptMetaData].getMetaData.map(compactions(0).compact).foreach(metadataLine.add) if (outputStream != null) { IOUtils.writeLines(metadataLine, "\n", outputStream, charset) } else { import scala.collection.JavaConversions._ metadataLine.foreach(m => stringBuilder.append(s"$m\n")) } } } @scala.throws[IOException] override def addRecord(record: Record): Unit = { //转成LineRecord而不是TableRecord是为了兼容非Table类型的结果集写到本类中 val scriptRecord = record.asInstanceOf[LineRecord] if (outputStream != null) { IOUtils.write(scriptRecord.getLine, outputStream, charset) } else { stringBuilder.append(scriptRecord.getLine) } } override def close(): Unit = { IOUtils.closeQuietly(outputStream) } override def flush(): Unit = if (outputStream != null) outputStream.flush() def getInputStream(): InputStream = { new ByteArrayInputStream(stringBuilder.toString().getBytes(StorageConfiguration.STORAGE_RS_FILE_TYPE.getValue)) } }
Example 19
Source File: StorageResultSetReader.scala From Linkis with Apache License 2.0 | 5 votes |
package com.webank.wedatasphere.linkis.storage.resultset import java.io.{ByteArrayInputStream, IOException, InputStream} import com.webank.wedatasphere.linkis.common.io.resultset.{ResultSet, ResultSetReader} import com.webank.wedatasphere.linkis.common.io.{MetaData, Record} import com.webank.wedatasphere.linkis.common.utils.Logging import com.webank.wedatasphere.linkis.storage.domain.Dolphin import com.webank.wedatasphere.linkis.storage.exception.StorageWarnException import com.webank.wedatasphere.linkis.storage.utils.StorageUtils import scala.collection.mutable.ArrayBuffer def readLine(): Array[Byte] = { var rowLen = 0 try rowLen = Dolphin.readInt(inputStream) catch { case t:StorageWarnException => info(s"Read finished(读取完毕)") ; return null case t: Throwable => throw t } val rowBuffer = ArrayBuffer[Byte]() var len = 0 //Read the entire line, except for the data of the line length(读取整行,除了行长的数据) while (rowLen > 0 && len >= 0) { if (rowLen > READ_CACHE) len = StorageUtils.readBytes(inputStream,bytes, READ_CACHE) else len = StorageUtils.readBytes(inputStream,bytes, rowLen) if (len > 0) { rowLen -= len rowBuffer ++= bytes.slice(0, len) } } rowCount = rowCount + 1 rowBuffer.toArray } @scala.throws[IOException] override def getRecord: Record = { if (metaData == null) throw new IOException("Must read metadata first(必须先读取metadata)") if (row == null) throw new IOException("Can't get the value of the field, maybe the IO stream has been read or has been closed!(拿不到字段的值,也许IO流已读取完毕或已被关闭!)") row } @scala.throws[IOException] override def getMetaData: MetaData = { if(metaData == null) init() metaData = deserializer.createMetaData(readLine()) metaData } @scala.throws[IOException] override def skip(recordNum: Int): Int = { if(recordNum < 0 ) return -1 if(metaData == null) getMetaData for(i <- recordNum until (0, -1)){ try inputStream.skip(Dolphin.readInt(inputStream)) catch { case t: Throwable => return -1} } recordNum } @scala.throws[IOException] override def getPosition: Long = rowCount @scala.throws[IOException] override def hasNext: Boolean = { if(metaData == null) getMetaData val line = readLine() if(line == null) return false row = deserializer.createRecord(line) if(row == null) return false true } @scala.throws[IOException] override def available: Long = inputStream.available() override def close(): Unit = inputStream.close() }
Example 20
Source File: StreamingSpec.scala From seals with Apache License 2.0 | 5 votes |
package com.example.streaming import java.io.{ ByteArrayInputStream, ByteArrayOutputStream } import shapeless.record._ import cats.effect.IO import org.scalatest.matchers.should.Matchers import org.scalatest.flatspec.AnyFlatSpec import fs2.Stream import scodec.Codec import scodec.bits.BitVector import scodec.stream.CodecError import dev.tauri.seals._ import dev.tauri.seals.scodec.Codecs._ import dev.tauri.seals.scodec.StreamCodecs._ class StreamingSpec extends AnyFlatSpec with Matchers { import Main.{ Animal, Elephant, Quokka, Quagga, Grey } val animals = Vector[Animal]( Elephant("Dumbo", tuskLength = 35.0f), Quokka("Nellie"), Quagga("Ford", speed = 120.0) ) val transformedAnimals = Vector[Animal]( Elephant("Dumbo", tuskLength = 35.0f + 17.0f), Quokka("Nellie", Grey) ) val animalStream = Stream.emits[IO, Animal](animals) val encoder = streamEncoderFromReified[Animal] val decoder = streamDecoderFromReified[Animal] "Encoding/decoding" should "work correctly" in { val tsk: IO[Unit] = for { bv <- encoder.encode[IO](animalStream).compile.fold(BitVector.empty)(_ ++ _) as <- decoder.decode[IO](Stream(bv)).compile.toVector } yield { as should === (animals) } tsk.unsafeRunSync() } it should "fail with incompatible models" in { val mod = Reified[Record.`'Elephant -> Elephant, 'Quokka -> Quokka`.T].model val bv: BitVector = Codec[Model].encode(mod).getOrElse(fail) val tsk: IO[Unit] = for { as <- decoder.decode[IO](Stream(bv)).compile.toVector } yield { as should === (Vector.empty) } val ex = intercept[CodecError] { tsk.unsafeRunSync() } ex.err.message should include ("incompatible models") } "Transformation" should "work correctly" in { val tsk: IO[Unit] = for { ibv <- encoder.encode[IO](animalStream).compile.fold(BitVector.empty)(_ ++ _) is = new ByteArrayInputStream(ibv.toByteArray) os = new ByteArrayOutputStream _ <- Main.transform(is, os)(Main.transformer) obv = BitVector(os.toByteArray()) transformed <- decoder.decode[IO](Stream(obv)).compile.fold(Vector.empty[Animal])(_ :+ _) } yield { transformed should === (transformedAnimals) } tsk.unsafeRunSync() } }
Example 21
Source File: CsvSourceTypeConversionTest.scala From eel-sdk with Apache License 2.0 | 5 votes |
package io.eels.component.csv import java.io.ByteArrayInputStream import java.nio.charset.StandardCharsets import io.eels.schema._ import org.scalatest.{Ignore, Matchers, WordSpec} @Ignore class CsvSourceTypeConversionTest extends WordSpec with Matchers { "CsvSource" should { "read schema" in { val exampleCsvString = """A,B,C,D |1,2.2,3,foo |4,5.5,6,bar """.stripMargin val stream = new ByteArrayInputStream(exampleCsvString.getBytes(StandardCharsets.UTF_8)) val schema = new StructType(Vector( Field("A", IntType.Signed), Field("B", DoubleType), Field("C", IntType.Signed), Field("D", StringType) )) val source = new CsvSource(() => stream) .withSchema(schema) source.schema.fields.foreach(println) val ds = source.toDataStream() val firstRow = ds.iterator.toIterable.head val firstRowA = firstRow.get("A") println(firstRowA) // prints 1 as expected println(firstRowA.getClass.getTypeName) // prints java.lang.String assert(firstRowA == 1) // this assertion will fail because firstRowA is not an Int } } }
Example 22
Source File: get_features_from_peinfo.scala From gsoc_relationship with Apache License 2.0 | 5 votes |
import com.datastax.spark.connector._ import play.api.libs.json.Json import play.api.libs.json._ import java.io.{ByteArrayOutputStream, ByteArrayInputStream} import java.util.zip.{GZIPOutputStream, GZIPInputStream} import Array.concat import org.apache.spark.sql.types._ import org.apache.spark.ml.linalg.SQLDataTypes.VectorType import org.apache.spark.ml.linalg._ import org.apache.spark.sql.Row import org.apache.spark.ml.feature.MinMaxScaler import org.apache.spark.ml.linalg.DenseVector import PreProcessingConfig._ case class peinfo_results_by_service_name_class(service_name: String, sha256: String) case class peinfo_results_by_sha256_class(sha256: String, service_name: String, results: Array[Byte]) case class peinfo_join_results_class(sha256: String, service_name: String, results: String) case class peinfo_int_final_array_rdd_class(sha256: String, array_results: Array[Double]) case class peinfo_binaray_final_array_rdd_class(sha256:String, array_results :Array[Double]) case class peinfo_final_array_rdd_class(sha256:String, array_results: Array[Double]) def unzip(x: Array[Byte]) : String = { val inputStream = new GZIPInputStream(new ByteArrayInputStream(x)) val output = scala.io.Source.fromInputStream(inputStream).mkString return output } def findAllIntinpeinfo( peinfo_json_results : JsLookupResult, time: Double): Array[Double]= { val entropy = peinfo_json_results \\ "entropy" ; val virt_address = peinfo_json_results \\ "virt_address"; val virt_size = peinfo_json_results \\ "virt_size"; val size = peinfo_json_results \\ "size"; var i= 0; var List = Array.iterate(0.0,17)(a=>a*0) for (k <- ( peinfo_json_results \\ "section_name")){ k.as[String] match { case ".text\u0000\u0000\u0000" => { List(0)=entropy(i).as[Double]; List(1)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(2)=virt_size(i).as[Double]; List(3)=size(i).as[Double] } case ".data\u0000\u0000\u0000" => { List(4)=entropy(i).as[Double]; List(5)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(6)=virt_size(i).as[Double]; List(7)=size(i).as[Double] } case ".rsrc\u0000\u0000\u0000" => { List(8)=entropy(i).as[Double]; List(9)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(10)=virt_size(i).as[Double]; List(11)=size(i).as[Double] } case ".rdata\u0000\u0000" => { List(12)=entropy(i).as[Double]; List(13)=Integer.parseInt(virt_address(i).as[String].substring(2), 16).toDouble; List(14)=virt_size(i).as[Double]; List(15)=size(i).as[Double] } case other => {} } i = i + 1 } List(16)= time return List.toArray } val peinfo_results_by_service_name_meta = sc.cassandraTable[peinfo_results_by_service_name_class](keyspace,service_name_table).where("service_name=?","peinfo") val peinfo_results_by_service_name_rdd = peinfo_results_by_service_name_meta.keyBy(x=> (x.sha256,x.service_name)) val peinfo_results_by_sha256_meta = sc.cassandraTable[peinfo_results_by_sha256_class](keyspace,sha256_table) val peinfo_results_by_sha256_rdd = peinfo_results_by_sha256_meta.keyBy(x => (x.sha256,x.service_name)) val peinfo_join_results = peinfo_results_by_service_name_rdd.join(peinfo_results_by_sha256_rdd).map(x=> (new peinfo_join_results_class(x._1._1,x._1._2, unzip(x._2._2.results)))).distinct().cache() val peinfo_int_final_array_rdd = peinfo_join_results.map(x=>(x.sha256,(Json.parse(x.results) \ "pe_sections"),{if ((Json.parse(x.results) \ "timestamp").isInstanceOf[JsUndefined]) 0.0 else (Json.parse(x.results) \ "timestamp" \\ "timestamp")(0).as[Double]})).filter(x=> !x._2.isInstanceOf[JsUndefined]).map(x=>new peinfo_int_final_array_rdd_class(x._1,findAllIntinpeinfo(x._2,x._3))) val peinfo_dllfunction_list= peinfo_join_results.map(x=>Json.parse(x.results) \ "imports").filter(x=> !x.isInstanceOf[JsUndefined]).flatMap(x=>x.as[List[Map[String, String]]].map(x=>(x("dll")+"."+x("function")))).toDF("func_name").groupBy("func_name").count.sort(desc("count")).filter("count > 10000").rdd.map(r => r.getString(0)).collect().toList implicit def bool2int(b:Boolean) = if (b) 1 else 0 def findAllBininpeinfo_dllfunction(peinfo_dllfunction : Seq[String]) : Array[Double] ={ val forlist = for (family <- peinfo_dllfunction_list) yield { (peinfo_dllfunction.contains(family):Int).toDouble } return (forlist).toArray } val List502 = Array.iterate(0.0,502)(a=>0.0) val peinfo_binaray_final_array_rdd = peinfo_join_results.map(x=>(x.sha256,(Json.parse(x.results) \ "imports"))).map(x=>new peinfo_binaray_final_array_rdd_class(x._1,{if (x._2.isInstanceOf[JsUndefined]) List502 else findAllBininpeinfo_dllfunction(x._2.as[Seq[Map[String, String]]].map(x=>(x("dll")+"."+x("function"))))})) val peinfo_int_final_array_rdd_before_join = peinfo_int_final_array_rdd.map(x=>(x.sha256,x.array_results)) val peinfo_binaray_final_array_rdd_before_join = peinfo_binaray_final_array_rdd.map(x=>(x.sha256,x.array_results)) val peinfo_array_rdd_by_join = peinfo_int_final_array_rdd_before_join.join(peinfo_binaray_final_array_rdd_before_join).map(x=> (x._1,concat(x._2._1,x._2._2))) val peinfo_final_array_rdd = peinfo_array_rdd_by_join.map(x=>new peinfo_final_array_rdd_class(x._1,x._2)) val peinfo_schema = new StructType().add("sha256", StringType).add("peinfo",VectorType) val peinfo_vector_rdd = peinfo_final_array_rdd.map(x=>(x.sha256,Vectors.dense(x.array_results))) val peinfo_vector_rowrdd = peinfo_vector_rdd.map(p => Row(p._1,p._2)) val peinfo_vector_dataframe = spark.createDataFrame(peinfo_vector_rowrdd, peinfo_schema) val peinfo_scaler = new MinMaxScaler() .setInputCol("peinfo") .setOutputCol("scaled_peinfo") val peinfo_scalerModel = peinfo_scaler.fit(peinfo_vector_dataframe) val peinfo_scaledData_df = peinfo_scalerModel.transform(peinfo_vector_dataframe) val peinfo_scaledData_rdd = peinfo_scaledData_df.select("sha256","scaled_peinfo").rdd.map(row=>(row.getAs[String]("sha256"),row.getAs[DenseVector]("scaled_peinfo"))).map(x=>new peinfo_final_array_rdd_class(x._1,x._2.toArray)) peinfo_scaledData_rdd.toDF().write.format("parquet").save(peinfo_final_array_file)
Example 23
Source File: get_features_from_objdump.scala From gsoc_relationship with Apache License 2.0 | 5 votes |
import com.datastax.spark.connector._ import play.api.libs.json.Json import play.api.libs.json._ import java.io.{ByteArrayOutputStream, ByteArrayInputStream} import java.util.zip.{GZIPOutputStream, GZIPInputStream} import PreProcessingConfig._ case class objdump_results_by_service_name_class(service_name: String, sha256: String) case class objdump_results_by_sha256_class(sha256: String, service_name: String, results: Array[Byte]) case class objdump_join_results_class(sha256: String, service_name: String, results: String) case class objdump_binaray_final_array_rdd_class(sha256: String, array_results: Array[Double]) val objdump_main_list = sc.textFile(objdump_x86Opcodes_file).collect.toList def unzip(x: Array[Byte]) : String = { val inputStream = new GZIPInputStream(new ByteArrayInputStream(x)) val output = scala.io.Source.fromInputStream(inputStream).mkString return output } def combineAllObjdumpInOne( malwarelist :Seq[play.api.libs.json.JsValue]) : List[String] ={ if (malwarelist(0).toString() == "null") return List("null") var begin = malwarelist(0).as[List[String]] for (i <- 1 to (malwarelist.size-1)){ if (malwarelist(i).toString() == "null") begin = begin else begin = begin ::: malwarelist(i).as[List[String]] } return begin } def convertToList( malwarelist :Seq[play.api.libs.json.JsValue]) : List[String] = { if (malwarelist(0).toString() == "null") return List("null") else { return malwarelist(0).as[List[String]] } } def findAllBininobjdump_main_list(malware :List[String]) : Array[Double] ={ if (malware == List("null")) return (List.fill(10000)(0.0)).toArray else { val forlist = for ( one <- malware ) yield { objdump_main_list.indexOf(one) + 1.0 } if (forlist.size < 10000){ return (List.concat(forlist,List.fill(10000-forlist.size)(0.0))).toArray } else return forlist.toArray } } val objdump_results_by_service_name_meta = sc.cassandraTable[objdump_results_by_service_name_class](keyspace,service_name_table).where("service_name=?","objdump") val objdump_results_by_service_name_rdd = objdump_results_by_service_name_meta.keyBy(x=> (x.sha256,x.service_name)) val objdump_results_by_sha256_meta = sc.cassandraTable[objdump_results_by_sha256_class](keyspace,sha256_table) val objdump_results_by_sha256_rdd = objdump_results_by_sha256_meta.keyBy(x => (x.sha256,x.service_name)) val objdump_join_results = objdump_results_by_service_name_rdd.join(objdump_results_by_sha256_rdd).map(x=> (new objdump_join_results_class(x._1._1,x._1._2, unzip(x._2._2.results)))).distinct() val objdump_binaray_final_array_rdd = objdump_join_results.map(x=>(x.sha256,(Json.parse(x.results) \\ "opcodes"))).filter(x=> (x._2.size > 0)).map(x=>(x._1,if ( x._2.size == 1 ) convertToList(x._2) else combineAllObjdumpInOne(x._2))).map(x=>(x._1,findAllBininobjdump_main_list(x._2))) objdump_binaray_final_array_rdd.toDF().write.format("parquet").save(objdump_binaray_final_array_file)
Example 24
Source File: get_VT_signatures.scala From gsoc_relationship with Apache License 2.0 | 5 votes |
import com.datastax.spark.connector._ import play.api.libs.json.Json import java.io.{ByteArrayOutputStream, ByteArrayInputStream} import java.util.zip.{GZIPOutputStream, GZIPInputStream} import PreProcessingConfig._ case class VT_results_by_service_name_class(service_name: String, sha256: String) case class VT_results_by_sha256_class(sha256: String, service_name: String, results: Array[Byte] ) case class VT_join_results_class(sha256: String, service_name: String, results: String) case class VT_sample_signatures_initial_seq_rdd_class(sha256: String, seq_results: Seq[String]) case class VT_sample_signatures_final_array_rdd_class(sha256:String, array_results:Array[Double]) def unzip(x: Array[Byte]) : String = { val inputStream = new GZIPInputStream(new ByteArrayInputStream(x)) val output = scala.io.Source.fromInputStream(inputStream).mkString return output } def deleteNumberInSampleSignatures(x: String): Boolean = { val regex = "[0-9]".r return regex.findFirstIn(x).isEmpty } val VT_results_by_service_name_meta = sc.cassandraTable[VT_results_by_service_name_class](keyspace,service_name_table).where("service_name=?","virustotal") val VT_results_by_service_name_rdd = VT_results_by_service_name_meta.keyBy(x=> (x.sha256,x.service_name)) val VT_results_by_sha256_meta = sc.cassandraTable[VT_results_by_sha256_class](keyspace,sha256_table) val VT_results_by_sha256_rdd = VT_results_by_sha256_meta.keyBy(x => (x.sha256,x.service_name)) val VT_join_results = VT_results_by_service_name_rdd.join(VT_results_by_sha256_rdd).map(x => (new VT_join_results_class(x._1._1,x._1._2, unzip(x._2._2.results)))).distinct().cache() val sample_signatures_rdd = VT_join_results.flatMap(x=>Json.parse(x.results) \ "scans" \\ "result").map(x=>Json.stringify(x)).filter( x=> !(x == "null")) val sample_signatures_split_rdd = sample_signatures_rdd.flatMap(x=>x.replaceAll("""["]""","").replaceAll("""\![a-zA-Z0-9\s\+]+""","").replaceAll("""@[a-zA-Z0-9\s\+]+""","").replaceAll("""~[a-zA-Z0-9\s\+]+""","").replaceAll("""[\(|\[|{][a-zA-Z0-9\s\+]*[\)|\]|}]""","").replaceAll("""(\.|\!|\:|\_|\-|\\|/|\[|\])"""," ").split(" ")).filter(x=>(x.size>3)).filter(x=>deleteNumberInSampleSignatures(x)).map(x=>x.toLowerCase()) val signatures_prefix_rdd = sc.textFile(VT_signatures_prefix_suffix_file).map(x=>x.toLowerCase()) val family_signatures_subtract_rdd = sample_signatures_split_rdd.subtract(signatures_prefix_rdd) val family_signatures_sorted_rdd = sc.parallelize(family_signatures_subtract_rdd.countByValue().toSeq).filter(x=>(x._2>50)).sortBy(x=>x._2,false) val family_signatures_list = family_signatures_sorted_rdd.keys.collect().toList val VT_sample_signatures_rdd = VT_join_results.map(x=>(x.sha256,(Json.parse(x.results) \ "scans" \\ "result").map(_.toString).filter( s => !(s== "null")).flatMap(x=>x.replaceAll("""["]""","").replaceAll("""\![a-zA-Z0-9\s\+]+""","").replaceAll("""@[a-zA-Z0-9\s\+]+""","").replaceAll("""~[a-zA-Z0-9\s\+]+""","").replaceAll("""[\(|\[|{][a-zA-Z0-9\s\+]*[\)|\]|}]""","").replaceAll("""(\.|\!|\:|\_|\-|\\|/|\[|\])"""," ").split(" ")).filter(x=>(x.size>3)).filter(x=>deleteNumberInSampleSignatures(x)).map(x=>x.toLowerCase()))) val VT_sample_signatures_initial_seq_rdd = VT_sample_signatures_rdd.map(x=>new VT_sample_signatures_initial_seq_rdd_class(x._1, x._2)) implicit def bool2int(b:Boolean) = if (b) 1 else 0 def findAllInFamilySignatures(sample_signatures_seq : Seq[String]) : Array[Double] ={ val forlist = for (family <- family_signatures_list) yield { (sample_signatures_seq.contains(family):Int).toDouble } return forlist.toArray } val VT_sample_signatures_final_array_rdd = VT_sample_signatures_initial_seq_rdd.map(x=>new VT_sample_signatures_final_array_rdd_class(x.sha256,findAllInFamilySignatures(x.seq_results))) VT_sample_signatures_final_array_rdd.toDF().write.format("parquet").save(VT_sample_signatures_final_array_file)
Example 25
Source File: HelperMethods.scala From gsoc_relationship with Apache License 2.0 | 5 votes |
package com.holmesprocessing.analytics.relationship.knowledgeBase import play.api.libs.json.Json import java.util.zip.{GZIPOutputStream, GZIPInputStream} import java.io.ByteArrayInputStream import java.io.ByteArrayOutputStream def score(ruleset_1: String, ruleset_2:String) : Double = { val split_1 = ruleset_1.split(",").toSeq val split_2 = ruleset_2.split(",").toSeq if (split_1.length > 0 && split_2.length > 0) { return split_1.intersect(split_2).length.toDouble/split_1.union(split_2).distinct.length.toDouble } else { return 0 } } }
Example 26
Source File: JsonSerializer.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.kafka.serializers import java.io.ByteArrayInputStream import java.util import com.fasterxml.jackson.databind.{ JsonNode, ObjectMapper, SerializationFeature } import hydra.common.config.ConfigSupport import org.apache.kafka.common.serialization._ class JsonSerializer extends Serializer[JsonNode] with ConfigSupport { import JsonSerializer._ override def serialize(topic: String, data: JsonNode): Array[Byte] = { mapper.writeValueAsBytes(data) } override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = { val indent = Option(configs.get("kafka.encoders.json.indent.output")) .map(_.toString.toBoolean) .getOrElse(false) mapper.configure(SerializationFeature.INDENT_OUTPUT, indent) } override def close(): Unit = { //nothing to do } } object JsonSerializer { val mapper = new ObjectMapper } class JsonDeserializer extends Deserializer[JsonNode] { import JsonDeserializer._ override def deserialize(topic: String, bytes: Array[Byte]): JsonNode = { mapper.readTree(new ByteArrayInputStream(bytes)) } override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = { val indent = Option(configs.get("kafka.encoders.json.indent.output")) .map(_.toString.toBoolean) .getOrElse(false) mapper.configure(SerializationFeature.INDENT_OUTPUT, indent) } override def close(): Unit = { //nothing } } object JsonDeserializer { val mapper = new ObjectMapper }
Example 27
Source File: KryoInitSpec.scala From hydra with Apache License 2.0 | 5 votes |
package hydra.core.akka import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import com.esotericsoftware.kryo.Kryo import com.esotericsoftware.kryo.io.{Input, Output} import com.romix.scala.serialization.kryo.{ EnumerationSerializer, ScalaImmutableAbstractMapSerializer, ScalaMutableMapSerializer } import org.scalatest.matchers.should.Matchers import org.scalatest.flatspec.AnyFlatSpecLike class KryoInitSpec extends Matchers with AnyFlatSpecLike { "The custom KryoInit" should "register serializers" in { val kryo = new Kryo() new KryoInit().customize(kryo) kryo.getDefaultSerializer(classOf[scala.Enumeration#Value]) shouldBe an[ EnumerationSerializer ] kryo.getDefaultSerializer(classOf[collection.immutable.Map[_, _]]) shouldBe a[ ScalaImmutableAbstractMapSerializer ] kryo.getDefaultSerializer(classOf[collection.immutable.Map[_, _]]) shouldBe a[ ScalaImmutableAbstractMapSerializer ] kryo.getDefaultSerializer(classOf[collection.mutable.HashMap[_, _]]) shouldBe a[ ScalaMutableMapSerializer ] } it should "serialize immutable maps" in { val kryo = new Kryo() new KryoInit().customize(kryo) val map1 = Map( "Rome" -> "Italy", "London" -> "England", "Paris" -> "France", "New York" -> "USA", "Tokyo" -> "Japan", "Peking" -> "China", "Brussels" -> "Belgium" ) val map2 = map1 + ("Moscow" -> "Russia") val map3 = map2 + ("Berlin" -> "Germany") val map4 = map3 + ("Germany" -> "Berlin", "Russia" -> "Moscow") roundTrip(map1, kryo) roundTrip(map2, kryo) roundTrip(map3, kryo) roundTrip(map4, kryo) } def roundTrip[T](obj: T, kryo: Kryo): T = { val outStream = new ByteArrayOutputStream() val output = new Output(outStream, 4096) kryo.writeClassAndObject(output, obj) output.flush() val input = new Input(new ByteArrayInputStream(outStream.toByteArray), 4096) val obj1 = kryo.readClassAndObject(input) assert(obj == obj1) obj1.asInstanceOf[T] } }
Example 28
Source File: BytecodeUtils.scala From graphx-algorithm with GNU General Public License v2.0 | 5 votes |
package org.apache.spark.graphx.util import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import scala.collection.mutable.HashSet import scala.language.existentials import org.apache.spark.util.Utils import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.{ClassReader, ClassVisitor, MethodVisitor} import com.esotericsoftware.reflectasm.shaded.org.objectweb.asm.Opcodes._ private class MethodInvocationFinder(className: String, methodName: String) extends ClassVisitor(ASM4) { val methodsInvoked = new HashSet[(Class[_], String)] override def visitMethod(access: Int, name: String, desc: String, sig: String, exceptions: Array[String]): MethodVisitor = { if (name == methodName) { new MethodVisitor(ASM4) { override def visitMethodInsn(op: Int, owner: String, name: String, desc: String) { if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) { if (!skipClass(owner)) { methodsInvoked.add((Class.forName(owner.replace("/", ".")), name)) } } } } } else { null } } } }
Example 29
Source File: FakeMavenRepository.scala From exodus with MIT License | 5 votes |
package com.wixpress.build.maven import java.io.ByteArrayInputStream import org.apache.commons.codec.digest.DigestUtils import org.codehaus.mojo.mrm.api.maven.Artifact import org.codehaus.mojo.mrm.impl.maven.{ArtifactStoreFileSystem, MemoryArtifactStore} import org.codehaus.mojo.mrm.plugin.FileSystemServer class FakeMavenRepository(port: Int = 0) { implicit class ExtendedArtifactDescriptor(artifact: ArtifactDescriptor) { def asArtifact(ofType: String): Artifact = { val parent = artifact.parentCoordinates val groupId = artifact.groupId .getOrElse(parent.map(_.groupId).getOrElse(throw new RuntimeException("missing groupId or parent.groupId"))) val version = artifact.version .getOrElse(parent.map(_.version).getOrElse(throw new RuntimeException("missing version or parent.version"))) new Artifact(groupId, artifact.artifactId, version, ofType) } } private val inMemoryArtifactStore = new MemoryArtifactStore private val mavenRepoManager = new FileSystemServer("foo", port, new ArtifactStoreFileSystem(inMemoryArtifactStore), "") def url: String = mavenRepoManager.getUrl def start(): Unit = mavenRepoManager.ensureStarted() def stop(): Unit = { mavenRepoManager.finish() mavenRepoManager.waitForFinished() } def addArtifacts(artifact: ArtifactDescriptor*): Unit = addArtifacts(artifact.toSet) def addCoordinates(coordinatesSet: Coordinates*): Unit = addCoordinates(coordinatesSet.toSet) def addArtifacts(artifacts: Set[ArtifactDescriptor]): Unit = artifacts.foreach(addSingleArtifact) def addCoordinates(coordinatesSet: Set[Coordinates]): Unit = coordinatesSet.foreach(addSingleCoordinates) def addSingleCoordinates(coordinates: Coordinates): Unit = addSingleArtifact(ArtifactDescriptor.anArtifact(coordinates)) def addSingleArtifact(artifact: ArtifactDescriptor): Unit = { val xml = artifact.pomXml val md5 = DigestUtils.md5Hex(xml) val sha1 = DigestUtils.sha1Hex(xml) inMemoryArtifactStore.set(artifact.asArtifact(ofType = "pom"), streamFrom(xml)) inMemoryArtifactStore.set(artifact.asArtifact(ofType = "pom.md5"), streamFrom(md5)) inMemoryArtifactStore.set(artifact.asArtifact(ofType = "pom.sha1"), streamFrom(sha1)) } private def streamFrom(input: String) = { new ByteArrayInputStream(input.getBytes("UTF-8")) } def addJarArtifact(artifact: Coordinates, jar: Array[Byte]) = inMemoryArtifactStore.set( new Artifact(artifact.groupId, artifact.artifactId, artifact.version, artifact.classifier.orNull, "jar"), new ByteArrayInputStream(jar)) def addJarSha256(artifact: Coordinates, sha256: String) = inMemoryArtifactStore.set( new Artifact( artifact.groupId, artifact.artifactId, artifact.version, artifact.classifier.orNull, artifact.packaging.value + ".sha256"), streamFrom(sha256)) }
Example 30
Source File: RulesTxtDeploymentServiceSpec.scala From smui with Apache License 2.0 | 5 votes |
package models import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.util.zip.ZipInputStream import org.apache.commons.io.IOUtils import org.scalatest.{FlatSpec, Matchers} class RulesTxtDeploymentServiceSpec extends FlatSpec with Matchers with ApplicationTestBase { private lazy val service = injector.instanceOf[RulesTxtDeploymentService] private var inputIds: Seq[SearchInputId] = Seq.empty override protected def beforeAll(): Unit = { super.beforeAll() createTestCores() inputIds = createTestRule() } private def rulesFileContent(ruleIds: Seq[SearchInputId]): String = s"""aerosmith => | SYNONYM: mercury | DOWN(10): battery | UP(10): notebook | FILTER: zz top | @{ | "_log" : "${ruleIds.head}" | }@ | |mercury => | SYNONYM: aerosmith | DOWN(10): battery | UP(10): notebook | FILTER: zz top | @{ | "_log" : "${ruleIds.head}" | }@ | |shipping => | DECORATE: REDIRECT http://xyz.com/shipping | @{ | "_log" : "${ruleIds.last}" | }@""".stripMargin "RulesTxtDeploymentService" should "generate rules files with correct file names" in { val rulesTxt = service.generateRulesTxtContentWithFilenames(core1Id, "LIVE", logDebug = false) rulesTxt.solrIndexId shouldBe core1Id rulesTxt.decompoundRules shouldBe empty rulesTxt.regularRules.content.trim shouldBe rulesFileContent(inputIds) rulesTxt.regularRules.sourceFileName shouldBe "/tmp/search-management-ui_rules-txt.tmp" rulesTxt.regularRules.destinationFileName shouldBe "/usr/bin/solr/liveCore/conf/rules.txt" } it should "validate the rules files correctly" in { val rulesTxt = service.generateRulesTxtContentWithFilenames(core1Id, "LIVE", logDebug = false) service.validateCompleteRulesTxts(rulesTxt, logDebug = false) shouldBe empty val badRulesTxt = rulesTxt.copy(regularRules = rulesTxt.regularRules.copy(content = "a very bad rules file")) service.validateCompleteRulesTxts(badRulesTxt, logDebug = false) shouldBe List("Line 1: Missing input for instruction") } it should "provide a zip file with all rules files" in { val out = new ByteArrayOutputStream() service.writeAllRulesTxtFilesAsZipFileToStream(out) val bytes = out.toByteArray val zipStream = new ZipInputStream(new ByteArrayInputStream(bytes)) val firstEntry = zipStream.getNextEntry firstEntry.getName shouldBe "rules_core1.txt" IOUtils.toString(zipStream, "UTF-8").trim shouldBe rulesFileContent(inputIds) val secondEntry = zipStream.getNextEntry secondEntry.getName shouldBe "rules_core2.txt" IOUtils.toString(zipStream, "UTF-8").trim shouldBe "" } }
Example 31
Source File: ImageLoaderUtils.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.loaders import java.awt.image.BufferedImage import java.io.{InputStream, ByteArrayInputStream} import java.net.URI import java.util.zip.GZIPInputStream import javax.imageio.ImageIO import keystoneml.loaders.VOCLoader._ import org.apache.commons.compress.archivers.ArchiveStreamFactory import org.apache.commons.compress.archivers.tar.TarArchiveInputStream import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import keystoneml.pipelines.Logging import keystoneml.utils._ import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag object ImageLoaderUtils extends Logging { def loadFiles[L, I <: AbstractLabeledImage[L] : ClassTag]( filePathsRDD: RDD[URI], labelsMap: String => L, imageBuilder: (Image, L, Option[String]) => I, // TODO(etrain): We can probably do this with implicits. namePrefix: Option[String] = None): RDD[I] = { filePathsRDD.flatMap(fileUri => loadFile(fileUri, labelsMap, imageBuilder, namePrefix)) } private def loadFile[L, I <: AbstractLabeledImage[L]]( fileUri: URI, labelsMap: String => L, imageBuilder: (Image, L, Option[String]) => I, namePrefix: Option[String]): Iterator[I] = { val filePath = new Path(fileUri) val conf = new Configuration(true) val fs = FileSystem.get(filePath.toUri(), conf) val fStream = fs.open(filePath) val tarStream = new ArchiveStreamFactory().createArchiveInputStream( "tar", fStream).asInstanceOf[TarArchiveInputStream] var entry = tarStream.getNextTarEntry() val imgs = new ArrayBuffer[I] while (entry != null) { if (!entry.isDirectory && (namePrefix.isEmpty || entry.getName.startsWith(namePrefix.get))) { var offset = 0 var ret = 0 val content = new Array[Byte](entry.getSize().toInt) while (ret >= 0 && offset != entry.getSize()) { ret = tarStream.read(content, offset, content.length - offset) if (ret >= 0) { offset += ret } } val bais = new ByteArrayInputStream(content) val image = ImageUtils.loadImage(bais).map { img => imageBuilder(img, labelsMap(entry.getName), Some(entry.getName)) } imgs ++= image } entry = tarStream.getNextTarEntry() } imgs.iterator } }
Example 32
Source File: TestUtils.scala From keystone with Apache License 2.0 | 5 votes |
package keystoneml.utils import java.io.{FileReader, ByteArrayInputStream} import breeze.linalg.DenseMatrix import breeze.stats.distributions.{Gaussian, RandBasis, ThreadLocalRandomGenerator, Rand} import edu.berkeley.cs.amplab.mlmatrix.RowPartitionedMatrix import org.apache.commons.io.IOUtils import org.apache.commons.math3.random.MersenneTwister import org.apache.spark.SparkContext import scala.io.Source import scala.util.Random def genChannelMajorArrayVectorizedImage(x: Int, y: Int, z: Int): ChannelMajorArrayVectorizedImage = { ChannelMajorArrayVectorizedImage(genData(x, y, z), ImageMetadata(x,y,z)) } def genRowColumnMajorByteArrayVectorizedImage(x: Int, y: Int, z: Int): RowColumnMajorByteArrayVectorizedImage = { RowColumnMajorByteArrayVectorizedImage(genData(x,y,z).map(_.toByte), ImageMetadata(x,y,z)) } def createRandomMatrix( sc: SparkContext, numRows: Int, numCols: Int, numParts: Int, seed: Int = 42): RowPartitionedMatrix = { val rowsPerPart = numRows / numParts val matrixParts = sc.parallelize(1 to numParts, numParts).mapPartitionsWithIndex { (index, part) => val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed+index))) Iterator(DenseMatrix.rand(rowsPerPart, numCols, Gaussian(0.0, 1.0)(randBasis))) } RowPartitionedMatrix.fromMatrix(matrixParts.cache()) } def createLocalRandomMatrix(numRows: Int, numCols: Int, seed: Int = 42): DenseMatrix[Double] = { val randBasis: RandBasis = new RandBasis(new ThreadLocalRandomGenerator(new MersenneTwister(seed))) DenseMatrix.rand(numRows, numCols, Gaussian(0.0, 1.0)(randBasis)) } }
Example 33
Source File: AggregatorTest.scala From noether with Apache License 2.0 | 5 votes |
package com.spotify.noether import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} import com.twitter.algebird.Aggregator import org.scalatest._ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers trait AggregatorTest extends AnyFlatSpec with Matchers { def run[A, B, C](aggregator: Aggregator[A, B, C])(as: Seq[A]): C = { val bs = as.map(aggregator.prepare _ compose ensureSerializable) val b = ensureSerializable(aggregator.reduce(bs)) ensureSerializable(aggregator.present(b)) } private def serializeToByteArray(value: Any): Array[Byte] = { val buffer = new ByteArrayOutputStream() val oos = new ObjectOutputStream(buffer) oos.writeObject(value) buffer.toByteArray } private def deserializeFromByteArray(encodedValue: Array[Byte]): AnyRef = { val ois = new ObjectInputStream(new ByteArrayInputStream(encodedValue)) ois.readObject() } private def ensureSerializable[T](value: T): T = deserializeFromByteArray(serializeToByteArray(value)).asInstanceOf[T] }
Example 34
Source File: TestingTypedCount.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate} import org.apache.spark.sql.hive.execution.TestingTypedCount.State import org.apache.spark.sql.types._ @ExpressionDescription( usage = "_FUNC_(expr) - A testing aggregate function resembles COUNT " + "but implements ObjectAggregateFunction.") case class TestingTypedCount( child: Expression, mutableAggBufferOffset: Int = 0, inputAggBufferOffset: Int = 0) extends TypedImperativeAggregate[TestingTypedCount.State] { def this(child: Expression) = this(child, 0, 0) override def children: Seq[Expression] = child :: Nil override def dataType: DataType = LongType override def nullable: Boolean = false override def createAggregationBuffer(): State = TestingTypedCount.State(0L) override def update(buffer: State, input: InternalRow): State = { if (child.eval(input) != null) { buffer.count += 1 } buffer } override def merge(buffer: State, input: State): State = { buffer.count += input.count buffer } override def eval(buffer: State): Any = buffer.count override def serialize(buffer: State): Array[Byte] = { val byteStream = new ByteArrayOutputStream() val dataStream = new DataOutputStream(byteStream) dataStream.writeLong(buffer.count) byteStream.toByteArray } override def deserialize(storageFormat: Array[Byte]): State = { val byteStream = new ByteArrayInputStream(storageFormat) val dataStream = new DataInputStream(byteStream) TestingTypedCount.State(dataStream.readLong()) } override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate = copy(mutableAggBufferOffset = newMutableAggBufferOffset) override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ImperativeAggregate = copy(inputAggBufferOffset = newInputAggBufferOffset) override val prettyName: String = "typed_count" } object TestingTypedCount { case class State(var count: Long) }
Example 35
Source File: CreateJacksonParser.scala From XSQL with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.json import java.io.{ByteArrayInputStream, InputStream, InputStreamReader} import java.nio.channels.Channels import java.nio.charset.Charset import com.fasterxml.jackson.core.{JsonFactory, JsonParser} import org.apache.hadoop.io.Text import sun.nio.cs.StreamDecoder import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.unsafe.types.UTF8String private[sql] object CreateJacksonParser extends Serializable { def string(jsonFactory: JsonFactory, record: String): JsonParser = { jsonFactory.createParser(record) } def utf8String(jsonFactory: JsonFactory, record: UTF8String): JsonParser = { val bb = record.getByteBuffer assert(bb.hasArray) val bain = new ByteArrayInputStream( bb.array(), bb.arrayOffset() + bb.position(), bb.remaining()) jsonFactory.createParser(new InputStreamReader(bain, "UTF-8")) } def text(jsonFactory: JsonFactory, record: Text): JsonParser = { jsonFactory.createParser(record.getBytes, 0, record.getLength) } // Jackson parsers can be ranked according to their performance: // 1. Array based with actual encoding UTF-8 in the array. This is the fastest parser // but it doesn't allow to set encoding explicitly. Actual encoding is detected automatically // by checking leading bytes of the array. // 2. InputStream based with actual encoding UTF-8 in the stream. Encoding is detected // automatically by analyzing first bytes of the input stream. // 3. Reader based parser. This is the slowest parser used here but it allows to create // a reader with specific encoding. // The method creates a reader for an array with given encoding and sets size of internal // decoding buffer according to size of input array. private def getStreamDecoder(enc: String, in: Array[Byte], length: Int): StreamDecoder = { val bais = new ByteArrayInputStream(in, 0, length) val byteChannel = Channels.newChannel(bais) val decodingBufferSize = Math.min(length, 8192) val decoder = Charset.forName(enc).newDecoder() StreamDecoder.forDecoder(byteChannel, decoder, decodingBufferSize) } def text(enc: String, jsonFactory: JsonFactory, record: Text): JsonParser = { val sd = getStreamDecoder(enc, record.getBytes, record.getLength) jsonFactory.createParser(sd) } def inputStream(jsonFactory: JsonFactory, is: InputStream): JsonParser = { jsonFactory.createParser(is) } def inputStream(enc: String, jsonFactory: JsonFactory, is: InputStream): JsonParser = { jsonFactory.createParser(new InputStreamReader(is, enc)) } def internalRow(jsonFactory: JsonFactory, row: InternalRow): JsonParser = { val ba = row.getBinary(0) jsonFactory.createParser(ba, 0, ba.length) } def internalRow(enc: String, jsonFactory: JsonFactory, row: InternalRow): JsonParser = { val binary = row.getBinary(0) val sd = getStreamDecoder(enc, binary, binary.length) jsonFactory.createParser(sd) } }
Example 36
Source File: SQLRunnerSuite.scala From HANAVora-Extensions with Apache License 2.0 | 5 votes |
package com.sap.spark.cli import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream} import org.apache.spark.SparkContext import org.apache.spark.sql.{GlobalSapSQLContext, SQLContext} import org.scalatest.{BeforeAndAfterEach, FunSuite, ShouldMatchers} // good call val goodOpts = SQLRunner.parseOpts(List("a.sql", "b.sql", "-o", "output.csv")) goodOpts.sqlFiles should be(List("a.sql", "b.sql")) goodOpts.output should be(Some("output.csv")) // bad call val badOpts = SQLRunner.parseOpts(List()) badOpts.sqlFiles should be(List()) badOpts.output should be(None) // ugly call val uglyOpts = SQLRunner.parseOpts(List("a.sql", "-o", "output.csv", "b.sql")) uglyOpts.sqlFiles should be(List("a.sql", "b.sql")) uglyOpts.output should be(Some("output.csv")) } def runSQLTest(input: String, expectedOutput: String): Unit = { val inputStream: InputStream = new ByteArrayInputStream(input.getBytes()) val outputStream = new ByteArrayOutputStream() SQLRunner.sql(inputStream, outputStream) val output = outputStream.toString output should be(expectedOutput) } test("can run dummy query") { val input = "SELECT 1;" val output = "1\n" runSQLTest(input, output) } test("can run multiple dummy queries") { val input = """ |SELECT 1;SELECT 2; |SELECT 3; """.stripMargin val output = "1\n2\n3\n" runSQLTest(input, output) } test("can run a basic example with tables") { val input = """ |SELECT * FROM DEMO_TABLE; |SELECT * FROM DEMO_TABLE LIMIT 1; |DROP TABLE DEMO_TABLE; """.stripMargin val output = "1,a\n2,b\n3,c\n1,a\n" runSQLTest(input, output) } test("can run an example with comments") { val input = """ |SELECT * FROM DEMO_TABLE; -- this is the first query |SELECT * FROM DEMO_TABLE LIMIT 1; |-- now let's drop a table |DROP TABLE DEMO_TABLE; """.stripMargin val output = "1,a\n2,b\n3,c\n1,a\n" runSQLTest(input, output) } }
Example 37
Source File: S3SnapshotStore.scala From akka-persistence-s3 with MIT License | 5 votes |
package akka.persistence.s3 package snapshot import java.io.ByteArrayInputStream import akka.actor.ActorLogging import akka.persistence.serialization.Snapshot import akka.persistence.{ SelectedSnapshot, SnapshotMetadata, SnapshotSelectionCriteria } import akka.persistence.snapshot.SnapshotStore import akka.serialization.SerializationExtension import com.amazonaws.services.s3.model.{ ObjectMetadata, S3ObjectInputStream, ListObjectsRequest } import com.typesafe.config.Config import scala.collection.JavaConversions._ import scala.collection.immutable import scala.concurrent.Future import scala.util.control.NonFatal case class SerializationResult(stream: ByteArrayInputStream, size: Int) class S3SnapshotStore(config: Config) extends SnapshotStore with ActorLogging with SnapshotKeySupport { import context.dispatcher val settings = new S3SnapshotConfig(config) val s3Client: S3Client = new S3Client { val s3ClientConfig = new S3ClientConfig(context.system.settings.config.getConfig("s3-client")) } private val serializationExtension = SerializationExtension(context.system) private val s3Dispatcher = context.system.dispatchers.lookup("s3-snapshot-store.s3-client-dispatcher") val extensionName = settings.extension override def loadAsync(persistenceId: String, criteria: SnapshotSelectionCriteria): Future[Option[SelectedSnapshot]] = { snapshotMetadatas(persistenceId, criteria) .map(_.sorted.takeRight(settings.maxLoadAttempts)) .flatMap(load) } private def load(metadata: immutable.Seq[SnapshotMetadata]): Future[Option[SelectedSnapshot]] = metadata.lastOption match { case None => Future.successful(None) case Some(md) => s3Client.getObject(settings.bucketName, snapshotKey(md))(s3Dispatcher) .map { obj => val snapshot = deserialize(obj.getObjectContent) Some(SelectedSnapshot(md, snapshot.data)) } recoverWith { case NonFatal(e) => log.error(e, s"Error loading snapshot [${md}]") load(metadata.init) // try older snapshot } } override def saveAsync(metadata: SnapshotMetadata, snapshot: Any): Future[Unit] = { val serialized = serialize(Snapshot(snapshot)) val objectMetadata = new ObjectMetadata() objectMetadata.setContentLength(serialized.size) s3Client.putObject( settings.bucketName, snapshotKey(metadata), serialized.stream, objectMetadata )(s3Dispatcher).map(_ => ()) } override def deleteAsync(metadata: SnapshotMetadata): Future[Unit] = { if (metadata.timestamp == 0L) deleteAsync(metadata.persistenceId, SnapshotSelectionCriteria(metadata.sequenceNr, Long.MaxValue, metadata.sequenceNr, Long.MinValue)) else s3Client.deleteObject(settings.bucketName, snapshotKey(metadata))(s3Dispatcher) } override def deleteAsync(persistenceId: String, criteria: SnapshotSelectionCriteria): Future[Unit] = { val metadatas = snapshotMetadatas(persistenceId, criteria) metadatas.map(list => Future.sequence(list.map(deleteAsync))) } private def snapshotMetadatas(persistenceId: String, criteria: SnapshotSelectionCriteria): Future[List[SnapshotMetadata]] = { s3Client.listObjects( new ListObjectsRequest() .withBucketName(settings.bucketName) .withPrefix(prefixFromPersistenceId(persistenceId)) .withDelimiter("/") )(s3Dispatcher) .map(_.getObjectSummaries.toList.map(s => parseKeyToMetadata(s.getKey)) .filter(m => m.sequenceNr >= criteria.minSequenceNr && m.sequenceNr <= criteria.maxSequenceNr && m.timestamp >= criteria.minTimestamp && m.timestamp <= criteria.maxTimestamp)) } protected def deserialize(inputStream: S3ObjectInputStream): Snapshot = serializationExtension.deserialize(akka.persistence.serialization.streamToBytes(inputStream), classOf[Snapshot]).get protected def serialize(snapshot: Snapshot): SerializationResult = { val serialized = serializationExtension.findSerializerFor(snapshot).toBinary(snapshot) SerializationResult(new ByteArrayInputStream(serializationExtension.findSerializerFor(snapshot).toBinary(snapshot)), serialized.size) } }
Example 38
Source File: SerializedWithSchemaToObject.scala From trucking-iot with Apache License 2.0 | 5 votes |
package com.orendainx.trucking.storm.bolts import java.io.ByteArrayInputStream import java.nio.charset.StandardCharsets import java.util import com.hortonworks.registries.schemaregistry.SchemaMetadata import com.hortonworks.registries.schemaregistry.avro.AvroSchemaProvider import com.hortonworks.registries.schemaregistry.client.SchemaRegistryClient import com.hortonworks.registries.schemaregistry.serdes.avro.AvroSnapshotDeserializer import com.orendainx.trucking.commons.models.{EnrichedTruckData, TrafficData} import com.typesafe.scalalogging.Logger import org.apache.avro.generic.{GenericData, GenericRecord} import org.apache.storm.task.{OutputCollector, TopologyContext} import org.apache.storm.topology.OutputFieldsDeclarer import org.apache.storm.topology.base.BaseRichBolt import org.apache.storm.tuple.{Fields, Tuple, Values} import scala.collection.JavaConversions._ class SerializedWithSchemaToObject extends BaseRichBolt { private lazy val log = Logger(this.getClass) private var outputCollector: OutputCollector = _ // Declare schema-related fields to be initialized when this component's prepare() method is called private var schemaRegistryClient: SchemaRegistryClient = _ private var deserializer: AvroSnapshotDeserializer = _ private var truckDataSchemaMetadata: SchemaMetadata = _ private var trafficDataSchemaMetadata: SchemaMetadata = _ override def prepare(stormConf: util.Map[_, _], context: TopologyContext, collector: OutputCollector): Unit = { outputCollector = collector val schemaRegistryUrl = stormConf.get(SchemaRegistryClient.Configuration.SCHEMA_REGISTRY_URL.name()).toString val clientConfig = Map(SchemaRegistryClient.Configuration.SCHEMA_REGISTRY_URL.name() -> schemaRegistryUrl) schemaRegistryClient = new SchemaRegistryClient(clientConfig) truckDataSchemaMetadata = schemaRegistryClient.getSchemaMetadataInfo("EnrichedTruckData").getSchemaMetadata trafficDataSchemaMetadata = schemaRegistryClient.getSchemaMetadataInfo("TrafficData").getSchemaMetadata deserializer = schemaRegistryClient.getDefaultDeserializer(AvroSchemaProvider.TYPE).asInstanceOf[AvroSnapshotDeserializer] deserializer.init(clientConfig) } override def execute(tuple: Tuple): Unit = { // Deserialize each tuple and convert it into its proper case class (e.g. EnrichedTruckData or TrafficData) val str = tuple.getStringByField("data").getBytes(StandardCharsets.UTF_8) log.info(s"str2: ${tuple.getStringByField("data")}") val bytes = new ByteArrayInputStream(str) log.info(s"bytes: $bytes") val (dataType, data) = tuple.getStringByField("dataType") match { case typ @ "EnrichedTruckData" => log.info(s"des: ${deserializer.deserialize(bytes, null)}") (typ, recordToEnrichedTruckData(deserializer.deserialize(bytes, null).asInstanceOf[GenericData.Record])) case typ @ "TrafficData" => log.info(s"des: ${deserializer.deserialize(bytes, null)}") (typ, recordToTrafficData(deserializer.deserialize(bytes, null).asInstanceOf[GenericData.Record])) } outputCollector.emit(new Values(data, dataType)) outputCollector.ack(tuple) } override def declareOutputFields(declarer: OutputFieldsDeclarer): Unit = declarer.declare(new Fields("data", "dataType")) // Helper function to convert GenericRecord (result of deserializing via Schema Registry) into JVM object private def recordToEnrichedTruckData(r: GenericRecord): EnrichedTruckData = EnrichedTruckData( r.get("eventTime").toString.toLong, r.get("truckId").toString.toInt, r.get("driverId").toString.toInt, r.get("driverName").toString, r.get("routeId").toString.toInt, r.get("routeName").toString, r.get("latitude").toString.toDouble, r.get("longitude").toString.toDouble, r.get("speed").toString.toInt, r.get("eventType").toString, r.get("foggy").toString.toInt, r.get("rainy").toString.toInt, r.get("windy").toString.toInt) // Helper function to convert GenericRecord (result of deserializing via Schema Registry) into JVM object private def recordToTrafficData(r: GenericRecord): TrafficData = TrafficData(r.get("eventTime").toString.toLong, r.get("routeId").toString.toInt, r.get("congestionLevel").toString.toInt) }
Example 39
Source File: NiFiPacketWithSchemaToObject.scala From trucking-iot with Apache License 2.0 | 5 votes |
package com.orendainx.trucking.storm.bolts import java.io.ByteArrayInputStream import java.util import com.hortonworks.registries.schemaregistry.SchemaMetadata import com.hortonworks.registries.schemaregistry.avro.AvroSchemaProvider import com.hortonworks.registries.schemaregistry.client.SchemaRegistryClient import com.hortonworks.registries.schemaregistry.serdes.avro.AvroSnapshotDeserializer import com.orendainx.trucking.commons.models.{EnrichedTruckData, TrafficData} import com.typesafe.scalalogging.Logger import org.apache.avro.generic.{GenericData, GenericRecord} import org.apache.nifi.storm.NiFiDataPacket import org.apache.storm.task.{OutputCollector, TopologyContext} import org.apache.storm.topology.OutputFieldsDeclarer import org.apache.storm.topology.base.BaseRichBolt import org.apache.storm.tuple.{Fields, Tuple, Values} import scala.collection.JavaConversions._ class NiFiPacketWithSchemaToObject extends BaseRichBolt { private lazy val log = Logger(this.getClass) private var outputCollector: OutputCollector = _ // Declare schema-related fields to be initialized when this component's prepare() method is called private var schemaRegistryClient: SchemaRegistryClient = _ private var deserializer: AvroSnapshotDeserializer = _ private var truckDataSchemaMetadata: SchemaMetadata = _ private var trafficDataSchemaMetadata: SchemaMetadata = _ override def prepare(stormConf: util.Map[_, _], context: TopologyContext, collector: OutputCollector): Unit = { outputCollector = collector val schemaRegistryUrl = stormConf.get(SchemaRegistryClient.Configuration.SCHEMA_REGISTRY_URL.name()).toString val clientConfig = Map(SchemaRegistryClient.Configuration.SCHEMA_REGISTRY_URL.name() -> schemaRegistryUrl) schemaRegistryClient = new SchemaRegistryClient(clientConfig) truckDataSchemaMetadata = schemaRegistryClient.getSchemaMetadataInfo("EnrichedTruckData").getSchemaMetadata trafficDataSchemaMetadata = schemaRegistryClient.getSchemaMetadataInfo("TrafficData").getSchemaMetadata deserializer = schemaRegistryClient.getDefaultDeserializer(AvroSchemaProvider.TYPE).asInstanceOf[AvroSnapshotDeserializer] deserializer.init(clientConfig) } override def execute(tuple: Tuple): Unit = { val dp = tuple.getValueByField("nifiDataPacket").asInstanceOf[NiFiDataPacket] // Deserialize each tuple and convert it into its proper case class (e.g. EnrichedTruckData or TrafficData) val (dataType, data) = dp.getAttributes.get("dataType") match { case typ @ "EnrichedTruckData" => (typ, recordToEnrichedTruckData(deserializer.deserialize(new ByteArrayInputStream(dp.getContent), null).asInstanceOf[GenericData.Record])) case typ @ "TrafficData" => (typ, recordToTrafficData(deserializer.deserialize(new ByteArrayInputStream(dp.getContent), null).asInstanceOf[GenericData.Record])) } outputCollector.emit(new Values(data, dataType)) outputCollector.ack(tuple) } override def declareOutputFields(declarer: OutputFieldsDeclarer): Unit = declarer.declare(new Fields("data", "dataType")) // Helper function to convert GenericRecord (result of deserializing via Schema Registry) into JVM object private def recordToEnrichedTruckData(r: GenericRecord): EnrichedTruckData = EnrichedTruckData( r.get("eventTime").toString.toLong, r.get("truckId").toString.toInt, r.get("driverId").toString.toInt, r.get("driverName").toString, r.get("routeId").toString.toInt, r.get("routeName").toString, r.get("latitude").toString.toDouble, r.get("longitude").toString.toDouble, r.get("speed").toString.toInt, r.get("eventType").toString, r.get("foggy").toString.toInt, r.get("rainy").toString.toInt, r.get("windy").toString.toInt) // Helper function to convert GenericRecord (result of deserializing via Schema Registry) into JVM object private def recordToTrafficData(r: GenericRecord): TrafficData = TrafficData(r.get("eventTime").toString.toLong, r.get("routeId").toString.toInt, r.get("congestionLevel").toString.toInt) }
Example 40
Source File: BytesWithSchemaToObject.scala From trucking-iot with Apache License 2.0 | 5 votes |
package com.orendainx.trucking.storm.bolts import java.io.ByteArrayInputStream import java.nio.charset.StandardCharsets import java.util import com.hortonworks.registries.schemaregistry.SchemaMetadata import com.hortonworks.registries.schemaregistry.avro.AvroSchemaProvider import com.hortonworks.registries.schemaregistry.client.SchemaRegistryClient import com.hortonworks.registries.schemaregistry.serdes.avro.AvroSnapshotDeserializer import com.orendainx.trucking.commons.models.{EnrichedTruckData, TrafficData} import com.typesafe.scalalogging.Logger import org.apache.avro.generic.{GenericData, GenericRecord} import org.apache.storm.task.{OutputCollector, TopologyContext} import org.apache.storm.topology.OutputFieldsDeclarer import org.apache.storm.topology.base.BaseRichBolt import org.apache.storm.tuple.{Fields, Tuple, Values} import scala.collection.JavaConversions._ // Helper function to convert GenericRecord (result of deserializing via Schema Registry) into JVM object private def recordToEnrichedTruckData(r: GenericRecord): EnrichedTruckData = EnrichedTruckData( r.get("eventTime").toString.toLong, r.get("truckId").toString.toInt, r.get("driverId").toString.toInt, r.get("driverName").toString, r.get("routeId").toString.toInt, r.get("routeName").toString, r.get("latitude").toString.toDouble, r.get("longitude").toString.toDouble, r.get("speed").toString.toInt, r.get("eventType").toString, r.get("foggy").toString.toInt, r.get("rainy").toString.toInt, r.get("windy").toString.toInt) // Helper function to convert GenericRecord (result of deserializing via Schema Registry) into JVM object private def recordToTrafficData(r: GenericRecord): TrafficData = TrafficData(r.get("eventTime").toString.toLong, r.get("routeId").toString.toInt, r.get("congestionLevel").toString.toInt) }
Example 41
Source File: RegisterNodeSerializerTest.scala From JustinDB with Apache License 2.0 | 5 votes |
package justin.db.kryo import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import com.esotericsoftware.kryo.Kryo import com.esotericsoftware.kryo.io.{Input, Output} import justin.db.actors.protocol.RegisterNode import justin.db.consistenthashing.NodeId import org.scalatest.{FlatSpec, Matchers} class RegisterNodeSerializerTest extends FlatSpec with Matchers { behavior of "RegisterNode Serializer" it should "serialize/deserialize correctly" in { // kryo init val kryo = new Kryo() kryo.register(classOf[RegisterNode], RegisterNodeSerializer) // object val serializedData = RegisterNode(NodeId(1)) // serialization val bos = new ByteArrayOutputStream() val output = new Output(bos) val _ = kryo.writeObject(output, serializedData) output.flush() // deserialization val bis = new ByteArrayInputStream(bos.toByteArray) val input = new Input(bis) val deserializedData = kryo.readObject(input, classOf[RegisterNode]) serializedData shouldBe deserializedData } }
Example 42
Source File: DataSerializerTest.scala From JustinDB with Apache License 2.0 | 5 votes |
package justin.db.kryo import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.util.UUID import com.esotericsoftware.kryo.Kryo import com.esotericsoftware.kryo.io.{Input, Output} import justin.db.Data import justin.db.consistenthashing.NodeId import justin.db.vectorclocks.{Counter, VectorClock} import org.scalatest.{FlatSpec, Matchers} class DataSerializerTest extends FlatSpec with Matchers { behavior of "Data Serializer" it should "serialize/deserialize correctly" in { // kryo init val kryo = new Kryo() kryo.register(classOf[justin.db.Data], DataSerializer) // object val vClock = VectorClock[NodeId](Map(NodeId(1) -> Counter(3))) val timestamp = System.currentTimeMillis() val serializedData = Data(id = UUID.randomUUID(), value = "some value", vClock, timestamp) // serialization val bos = new ByteArrayOutputStream() val output = new Output(bos) val _ = kryo.writeObject(output, serializedData) output.flush() // deserialization val bis = new ByteArrayInputStream(bos.toByteArray) val input = new Input(bis) val deserializedData = kryo.readObject(input, classOf[Data]) serializedData shouldBe deserializedData } }
Example 43
Source File: StorageNodeWriteDataLocalSerializerTest.scala From JustinDB with Apache License 2.0 | 5 votes |
package justin.db.kryo import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.util.UUID import com.esotericsoftware.kryo.Kryo import com.esotericsoftware.kryo.io.{Input, Output} import justin.db.Data import justin.db.actors.protocol.StorageNodeWriteDataLocal import justin.db.consistenthashing.NodeId import justin.db.vectorclocks.{Counter, VectorClock} import org.scalatest.{FlatSpec, Matchers} class StorageNodeWriteDataLocalSerializerTest extends FlatSpec with Matchers { behavior of "StorageNodeWriteDataLocal Serializer" it should "serialize/deserialize StorageNodeWriteDataLocal" in { // kryo init val kryo = new Kryo() kryo.register(classOf[StorageNodeWriteDataLocal], StorageNodeWriteDataLocalSerializer) // object val data = Data( id = UUID.randomUUID(), value = "some value", vclock = VectorClock[NodeId](Map(NodeId(1) -> Counter(3))), timestamp = System.currentTimeMillis() ) val serializedData = StorageNodeWriteDataLocal(data) // serialization val bos = new ByteArrayOutputStream() val output = new Output(bos) val _ = kryo.writeObject(output, serializedData) output.flush() // deserialization val bis = new ByteArrayInputStream(bos.toByteArray) val input = new Input(bis) val deserializedData = kryo.readObject(input, classOf[StorageNodeWriteDataLocal]) serializedData shouldBe deserializedData } }
Example 44
Source File: StorageNodeLocalReadSerializerTest.scala From JustinDB with Apache License 2.0 | 5 votes |
package justin.db.kryo import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.util.UUID import com.esotericsoftware.kryo.Kryo import com.esotericsoftware.kryo.io.{Input, Output} import justin.db.actors.protocol.StorageNodeLocalRead import org.scalatest.{FlatSpec, Matchers} class StorageNodeLocalReadSerializerTest extends FlatSpec with Matchers { behavior of "StorageNodeLocalReader Serializer" it should "serialize/deserialize correctly" in { // kryo init val kryo = new Kryo() kryo.register(classOf[StorageNodeLocalRead], StorageNodeLocalReadSerializer) // object val serializedData = StorageNodeLocalRead(UUID.randomUUID()) // serialization val bos = new ByteArrayOutputStream() val output = new Output(bos) val _ = kryo.writeObject(output, serializedData) output.flush() // deserialization val bis = new ByteArrayInputStream(bos.toByteArray) val input = new Input(bis) val deserializedData = kryo.readObject(input, classOf[StorageNodeLocalRead]) serializedData shouldBe deserializedData } }
Example 45
Source File: RocksDBStorage.scala From JustinDB with Apache License 2.0 | 5 votes |
package justin.db.storage import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File} import java.util.UUID import com.esotericsoftware.kryo.io.{Input, Output} import com.esotericsoftware.kryo.{Kryo, Serializer} import justin.db.storage.PluggableStorageProtocol.{Ack, StorageGetData} import org.rocksdb.{FlushOptions, Options, RocksDB} import scala.concurrent.Future // TODO: // Current version store every single data under one file (totally doesn't care about data originality). // Data should be eventually splitted by ring partitionId. // This might be an issue during possible data movements between nodes. final class RocksDBStorage(dir: File) extends PluggableStorageProtocol { import RocksDBStorage._ { RocksDB.loadLibrary() } private[this] val kryo = new Kryo() private[this] val db: RocksDB = { val options: Options = new Options().setCreateIfMissing(true) RocksDB.open(options, dir.getPath) } override def get(id: UUID)(resolveOriginality: (UUID) => PluggableStorageProtocol.DataOriginality): Future[PluggableStorageProtocol.StorageGetData] = { val key: Array[Byte] = uuid2bytes(kryo, id) val dataBytes: Array[Byte] = db.get(key) val justinDataOpt = Option(dataBytes).map { dataBytes => val input = new Input(new ByteArrayInputStream(dataBytes)) JustinDataSerializer.read(kryo, input, classOf[JustinData]) } Future.successful(justinDataOpt.map(StorageGetData.Single).getOrElse(StorageGetData.None)) } override def put(data: JustinData)(resolveOriginality: (UUID) => PluggableStorageProtocol.DataOriginality): Future[PluggableStorageProtocol.Ack] = { val key: Array[Byte] = uuid2bytes(kryo, data.id) val dataBytes: Array[Byte] = { val output = new Output(new ByteArrayOutputStream()) JustinDataSerializer.write(kryo, output, data) output.getBuffer } db.put(key, dataBytes) db.flush(new FlushOptions().setWaitForFlush(true)) Ack.future } } object RocksDBStorage { def uuid2bytes(kryo: Kryo, id: UUID): Array[Byte] = { val output = new Output(new ByteArrayOutputStream(), 16) UUIDSerializer.write(kryo, output, id) output.getBuffer } object UUIDSerializer extends Serializer[UUID] { override def read(kryo: Kryo, input: Input, `type`: Class[UUID]): UUID = { new UUID(input.readLong, input.readLong) } override def write(kryo: Kryo, output: Output, uuid: UUID): Unit = { output.writeLong(uuid.getMostSignificantBits) output.writeLong(uuid.getLeastSignificantBits) } } object JustinDataSerializer extends Serializer[JustinData] { override def read(kryo: Kryo, input: Input, `type`: Class[JustinData]): JustinData = { JustinData( id = UUIDSerializer.read(kryo, input, classOf[UUID]), value = input.readString(), vclock = input.readString(), timestamp = input.readLong() ) } override def write(kryo: Kryo, output: Output, data: JustinData): Unit = { UUIDSerializer.write(kryo, output, data.id) output.writeString(data.value) output.writeString(data.vclock) output.writeLong(data.timestamp) } } }
Example 46
Source File: JustinDataSerializerTest.scala From JustinDB with Apache License 2.0 | 5 votes |
package justin.db.storage import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.util.UUID import com.esotericsoftware.kryo.Kryo import com.esotericsoftware.kryo.io.{Input, Output} import justin.db.storage.RocksDBStorage.JustinDataSerializer import org.scalatest.{FlatSpec, Matchers} class JustinDataSerializerTest extends FlatSpec with Matchers { behavior of "JustinDataSerializer" it should "serialize/deserialize JustinData with Kryo" in { val kryo = new Kryo() val data = JustinData( id = UUID.randomUUID, value = "to jest przykladowa wartość", vclock = "vclock-value", timestamp = 1234124L ) // serialize val output = new Output(new ByteArrayOutputStream()) JustinDataSerializer.write(kryo, output, data) val dataBytes = output.getBuffer // deserialize val input = new Input(new ByteArrayInputStream(dataBytes)) JustinDataSerializer.read(kryo, input, classOf[JustinData]) shouldBe data } }
Example 47
Source File: UUIDSerializerTest.scala From JustinDB with Apache License 2.0 | 5 votes |
package justin.db.storage import java.io.ByteArrayInputStream import java.util.UUID import com.esotericsoftware.kryo.Kryo import com.esotericsoftware.kryo.io.Input import justin.db.storage.RocksDBStorage.UUIDSerializer import org.scalatest.{FlatSpec, Matchers} class UUIDSerializerTest extends FlatSpec with Matchers { behavior of "UUIDSerializer" it should "serialize/deserialize UUID with Kryo" in { val uuid = UUID.randomUUID() val kryo = new Kryo() // serialize val bytes = RocksDBStorage.uuid2bytes(kryo, uuid) // deserialize val input = new Input(new ByteArrayInputStream(bytes)) val id = UUIDSerializer.read(kryo, input, classOf[UUID]) uuid shouldBe id } }
Example 48
Source File: AmqpXPathCheckMaterializer.scala From gatling-amqp-plugin with Apache License 2.0 | 5 votes |
package ru.tinkoff.gatling.amqp.checks import java.io.{ByteArrayInputStream, InputStreamReader} import io.gatling.commons.validation.{safely, _} import io.gatling.core.check.xpath.{Dom, XPathCheckType, XmlParsers} import io.gatling.core.check.{CheckMaterializer, Preparer} import org.xml.sax.InputSource import ru.tinkoff.gatling.amqp.AmqpCheck import ru.tinkoff.gatling.amqp.request.AmqpProtocolMessage class AmqpXPathCheckMaterializer(xmlParsers: XmlParsers) extends CheckMaterializer[XPathCheckType, AmqpCheck, AmqpProtocolMessage, Option[Dom]](identity) { private val ErrorMapper = "Could not parse response into a DOM Document: " + _ override protected def preparer: Preparer[AmqpProtocolMessage, Option[Dom]] = message => safely(ErrorMapper) { message match { case AmqpProtocolMessage(_, payload, _) => val in = new ByteArrayInputStream(payload) Some(xmlParsers.parse(new InputSource(new InputStreamReader(in)))).success case _ => "Unsupported message type".failure } } }
Example 49
Source File: AmqpJsonPathCheckMaterializer.scala From gatling-amqp-plugin with Apache License 2.0 | 5 votes |
package ru.tinkoff.gatling.amqp.checks import java.io.ByteArrayInputStream import java.nio.charset.Charset import io.gatling.core.check.jsonpath.JsonPathCheckType import io.gatling.core.check.{CheckMaterializer, Preparer} import io.gatling.core.json.JsonParsers import ru.tinkoff.gatling.amqp.AmqpCheck import ru.tinkoff.gatling.amqp.request.AmqpProtocolMessage import scala.util.Try class AmqpJsonPathCheckMaterializer(jsonParsers: JsonParsers) extends CheckMaterializer[JsonPathCheckType, AmqpCheck, AmqpProtocolMessage, Any](identity) { override protected def preparer: Preparer[AmqpProtocolMessage, Any] = AmqpJsonPathCheckMaterializer.jsonPathPreparer(jsonParsers) } object AmqpJsonPathCheckMaterializer { private val CharsParsingThreshold = 200 * 1000 private def jsonPathPreparer(jsonParsers: JsonParsers): Preparer[AmqpProtocolMessage, Any] = replyMessage => { val bodyCharset = Try(Charset.forName(replyMessage.amqpProperties.getContentEncoding)) .getOrElse(Charset.defaultCharset()) if (replyMessage.payload.length > CharsParsingThreshold) jsonParsers.safeParse(new ByteArrayInputStream(replyMessage.payload), bodyCharset) else jsonParsers.safeParse(new String(replyMessage.payload, bodyCharset)) } }
Example 50
Source File: JQHttpClient.scala From ledger-manager-chrome with MIT License | 5 votes |
package co.ledger.manager.web.core.net import java.io.ByteArrayInputStream import co.ledger.wallet.core.net.{BasicHttpRequestLogger, HttpClient, HttpRequestExecutor, HttpRequestLogger} import co.ledger.manager.web.core.utils.JQueryHelper import org.scalajs.jquery.JQueryXHR import scala.concurrent.ExecutionContext import scala.scalajs.js class JQHttpClient(override val baseUrl: String) extends HttpClient { override implicit val ec: ExecutionContext = scala.concurrent.ExecutionContext.Implicits.global override protected val executor: HttpRequestExecutor = new HttpRequestExecutor { override def execute(responseBuilder: co.ledger.wallet.core.net.HttpClient#ResponseBuilder): Unit = { val request = responseBuilder.request val headers = js.Dictionary[js.Any]() request.headers foreach { case (key, value) => headers(key) = value.toString } try { JQueryHelper.$.ajax(js.Dictionary[js.Any]( "method" -> request.method, "url" -> request.url, "headers" -> headers, "timeout" -> (request.readTimeout.toMillis + request.connectionTimeout.toMillis), "data" -> request.bodyAsString, "complete" -> { (xhr: JQueryXHR, status: String) => responseBuilder.statusCode = xhr.status responseBuilder.statusMessage = xhr.statusText responseBuilder.bodyEncoding = "utf-8" if (xhr.status != 0) responseBuilder.body = new ByteArrayInputStream(xhr.responseText.getBytes) responseBuilder.build() } )) } catch { case er: Throwable => responseBuilder.failure(er) } request.body.close() } } override var defaultLogger: HttpRequestLogger = new BasicHttpRequestLogger } object JQHttpClient { val etcInstance = new JQHttpClient("https://api.ledgerwallet.com/blockchain/v2/ethc") val ethInstance = new JQHttpClient("https://api.ledgerwallet.com/blockchain/v2/eth") }
Example 51
Source File: SerializableSerializerTest.scala From spark-util with Apache License 2.0 | 5 votes |
package org.hammerlab.hadoop.kryo import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream } import com.esotericsoftware.kryo.Kryo import com.esotericsoftware.kryo.io.{ Input, Output } import org.hammerlab.test.Suite class SerializableSerializerTest extends Suite { test("serde") { val kryo = new Kryo() kryo.setRegistrationRequired(true) val baos = new ByteArrayOutputStream() val output = new Output(baos) val foo = new Foo foo.n = 123 foo.s = "abc" intercept[IllegalArgumentException] { kryo.writeClassAndObject(output, foo) } .getMessage should startWith("Class is not registered: org.hammerlab.hadoop.kryo.Foo") kryo.register(classOf[Foo], SerializableSerializer[Foo]()) kryo.writeClassAndObject(output, foo) output.close() val bytes = baos.toByteArray bytes.length should be(93) val bais = new ByteArrayInputStream(bytes) val input = new Input(bais) val after = kryo.readClassAndObject(input).asInstanceOf[Foo] after.n should be(foo.n) after.s should be(foo.s) } } class Foo extends Serializable { var n = 0 var s = "" private def writeObject(out: ObjectOutputStream): Unit = { out.writeInt(n) out.writeUTF(s) } private def readObject(in: ObjectInputStream): Unit = { n = in.readInt() s = in.readUTF() } }
Example 52
Source File: CloudFrontSigner.scala From openwhisk with Apache License 2.0 | 5 votes |
package org.apache.openwhisk.core.database.s3 import java.io.ByteArrayInputStream import java.nio.charset.StandardCharsets.UTF_8 import java.security.PrivateKey import java.time.Instant import java.util.Date import akka.http.scaladsl.model.Uri import com.amazonaws.auth.PEM import com.amazonaws.services.cloudfront.CloudFrontUrlSigner import com.amazonaws.services.cloudfront.util.SignerUtils import com.amazonaws.services.cloudfront.util.SignerUtils.Protocol import scala.concurrent.duration._ case class CloudFrontConfig(domainName: String, keyPairId: String, privateKey: String, timeout: FiniteDuration = 10.minutes) case class CloudFrontSigner(config: CloudFrontConfig) extends UrlSigner { private val privateKey = createPrivateKey(config.privateKey) override def getSignedURL(s3ObjectKey: String): Uri = { val resourcePath = SignerUtils.generateResourcePath(Protocol.https, config.domainName, s3ObjectKey) val date = Date.from(Instant.now().plusSeconds(config.timeout.toSeconds)) val url = CloudFrontUrlSigner.getSignedURLWithCannedPolicy(resourcePath, config.keyPairId, privateKey, date) Uri(url) } override def toString: String = s"CloudFront Signer - ${config.domainName}" private def createPrivateKey(keyContent: String): PrivateKey = { val is = new ByteArrayInputStream(keyContent.getBytes(UTF_8)) PEM.readPrivateKey(is) } }
Example 53
Source File: DefaultRowReader.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.binary import java.io.{ByteArrayInputStream, DataInputStream} import java.nio.charset.Charset import ml.combust.mleap.runtime.serialization.{BuiltinFormats, RowReader} import ml.combust.mleap.core.types.StructType import ml.combust.mleap.runtime.frame.{ArrayRow, Row} import resource._ import scala.util.Try class DefaultRowReader(override val schema: StructType) extends RowReader { private val serializers = schema.fields.map(_.dataType).map(ValueSerializer.serializerForDataType) override def fromBytes(bytes: Array[Byte], charset: Charset = BuiltinFormats.charset): Try[Row] = { (for(in <- managed(new ByteArrayInputStream(bytes))) yield { val din = new DataInputStream(in) val row = ArrayRow(new Array[Any](schema.fields.length)) var i = 0 for(s <- serializers) { row.set(i, s.read(din)) i = i + 1 } row }).tried } }
Example 54
Source File: DefaultFrameReader.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.binary import java.io.{ByteArrayInputStream, DataInputStream} import java.nio.charset.Charset import ml.combust.mleap.runtime.serialization.{BuiltinFormats, FrameReader} import ml.combust.mleap.core.types.StructType import ml.combust.mleap.json.JsonSupport._ import ml.combust.mleap.runtime.frame.{ArrayRow, DefaultLeapFrame, Row} import spray.json._ import resource._ import scala.collection.mutable import scala.util.Try class DefaultFrameReader extends FrameReader { override def fromBytes(bytes: Array[Byte], charset: Charset = BuiltinFormats.charset): Try[DefaultLeapFrame] = { (for(in <- managed(new ByteArrayInputStream(bytes))) yield { val din = new DataInputStream(in) val length = din.readInt() val schemaBytes = new Array[Byte](length) din.readFully(schemaBytes) val schema = new String(schemaBytes, BuiltinFormats.charset).parseJson.convertTo[StructType] val serializers = schema.fields.map(_.dataType).map(ValueSerializer.serializerForDataType) val rowCount = din.readInt() val rows = mutable.WrappedArray.make[Row](new Array[Row](rowCount)) for(i <- 0 until rowCount) { val row = new ArrayRow(new Array[Any](schema.fields.length)) var j = 0 for(s <- serializers) { row.set(j, s.read(din)) j = j + 1 } rows(i) = row } DefaultLeapFrame(schema, rows) }).tried } }
Example 55
Source File: XGBoostRegressionOp.scala From mleap with Apache License 2.0 | 5 votes |
package ml.combust.mleap.xgboost.runtime.bundle.ops import java.io.ByteArrayInputStream import java.nio.file.Files import ml.combust.bundle.BundleContext import ml.combust.bundle.dsl.{Model, Value} import ml.combust.bundle.op.OpModel import ml.combust.mleap.bundle.ops.MleapOp import ml.combust.mleap.runtime.MleapContext import ml.combust.mleap.xgboost.runtime.{XGBoostRegression, XGBoostRegressionModel} import ml.dmlc.xgboost4j.scala.XGBoost import resource._ class XGBoostRegressionOp extends MleapOp[XGBoostRegression, XGBoostRegressionModel] { override val Model: OpModel[MleapContext, XGBoostRegressionModel] = new OpModel[MleapContext, XGBoostRegressionModel] { override val klazz: Class[XGBoostRegressionModel] = classOf[XGBoostRegressionModel] override def opName: String = "xgboost.regression" override def store(model: Model, obj: XGBoostRegressionModel) (implicit context: BundleContext[MleapContext]): Model = { val out = Files.newOutputStream(context.file("xgboost.model")) obj.booster.saveModel(out) model .withValue("num_features", Value.int(obj.numFeatures)) .withValue("tree_limit", Value.int(obj.treeLimit)) } override def load(model: Model) (implicit context: BundleContext[MleapContext]): XGBoostRegressionModel = { val bytes = Files.readAllBytes(context.file("xgboost.model")) val booster = XGBoost.loadModel(new ByteArrayInputStream(bytes)) val treeLimit = model.value("tree_limit").getInt XGBoostRegressionModel(booster, numFeatures = model.value("num_features").getInt, treeLimit = treeLimit) } } override def model(node: XGBoostRegression): XGBoostRegressionModel = node.model }
Example 56
Source File: TestSpec.scala From spark-distcp with Apache License 2.0 | 5 votes |
package com.coxautodata import java.io.ByteArrayInputStream import java.nio.file.Files import com.coxautodata.objects.SerializableFileStatus import com.coxautodata.utils.FileListing import org.apache.commons.io.{FileUtils, IOUtils} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, LocalFileSystem, Path} import org.scalatest.{BeforeAndAfterEach, FunSpec, Matchers} trait TestSpec extends FunSpec with Matchers with BeforeAndAfterEach { var testingBaseDir: java.nio.file.Path = _ var testingBaseDirName: String = _ var testingBaseDirPath: Path = _ var localFileSystem: LocalFileSystem = _ override def beforeEach(): Unit = { super.beforeEach() testingBaseDir = Files.createTempDirectory("test_output") testingBaseDirName = testingBaseDir.toString localFileSystem = FileSystem.getLocal(new Configuration()) testingBaseDirPath = localFileSystem.makeQualified(new Path(testingBaseDirName)) } override def afterEach(): Unit = { super.afterEach() FileUtils.deleteDirectory(testingBaseDir.toFile) } def createFile(relativePath: Path, content: Array[Byte]): SerializableFileStatus = { val path = new Path(testingBaseDirPath, relativePath) localFileSystem.mkdirs(path.getParent) val in = new ByteArrayInputStream(content) val out = localFileSystem.create(path) IOUtils.copy(in, out) in.close() out.close() SerializableFileStatus(localFileSystem.getFileStatus(path)) } def fileStatusToResult(f: SerializableFileStatus): FileListing = { FileListing(f.getPath.toString, if (f.isFile) Some(f.getLen) else None) } }
Example 57
Source File: DesignSerializationTest.scala From airframe with Apache License 2.0 | 5 votes |
package wvlet.airframe import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} import DesignTest._ import wvlet.airspec.AirSpec object DesignSerializationTest { def serialize(d: Design): Array[Byte] = { val b = new ByteArrayOutputStream() val oo = new ObjectOutputStream(b) oo.writeObject(d) oo.close() b.toByteArray } def deserialize(b: Array[Byte]): Design = { val in = new ByteArrayInputStream(b) val oi = new ObjectInputStream(in) val obj = oi.readObject().asInstanceOf[Design] obj.asInstanceOf[Design] } } class DesignSerializationTest extends AirSpec { import DesignSerializationTest._ def `be serializable`: Unit = { val b = serialize(d1) val d1s = deserialize(b) d1s shouldBe (d1) } def `serialize instance binding`: Unit = { val d = Design.blanc.bind[Message].toInstance(Hello("world")) val b = serialize(d) val ds = deserialize(b) ds shouldBe (d) } }
Example 58
Source File: SerializationTest.scala From airframe with Apache License 2.0 | 5 votes |
package wvlet.log import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} import wvlet.log.io.IOUtil object SerializationTest { trait A extends LogSupport { debug("new A") def hello = debug("hello") } } class SerializationTest extends Spec { import SerializationTest._ def `logger should be serializable`: Unit = { val a = new A {} val b = new ByteArrayOutputStream() IOUtil.withResource(new ObjectOutputStream(b)) { out => out.writeObject(a) } val ser = b.toByteArray IOUtil.withResource(new ObjectInputStream(new ByteArrayInputStream(ser))) { in => debug("deserialization") val a = in.readObject().asInstanceOf[A] a.hello } } }
Example 59
Source File: TypeInformationDataInputFormat.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.flink.dataformats import java.io.{ByteArrayInputStream, EOFException, InputStream} import com.amazon.milan.dataformats.DataInputFormat import com.amazon.milan.typeutil.TypeDescriptor import org.apache.flink.api.common.ExecutionConfig import org.apache.flink.api.common.typeinfo.TypeInformation import org.apache.flink.api.common.typeutils.TypeSerializer import org.apache.flink.core.memory.DataInputViewStreamWrapper class TypeInformationDataInputFormat[T](typeInfo: TypeInformation[T]) extends DataInputFormat[T] { @transient private lazy val serializer = this.createSerializer() override def getGenericArguments: List[TypeDescriptor[_]] = { // This class is not intended to be serialized by GenericTypedJsonSerializer, so this should not be called. throw new UnsupportedOperationException() } override def setGenericArguments(genericArgs: List[TypeDescriptor[_]]): Unit = { // This class is not intended to be deserialized by GenericTypedJsonDeserializer, so this should not be called. throw new UnsupportedOperationException() } override def readValue(bytes: Array[Byte], offset: Int, length: Int): Option[T] = { val input = new DataInputViewStreamWrapper(new ByteArrayInputStream(bytes, offset, length)) Some(this.serializer.deserialize(input)) } override def readValues(stream: InputStream): TraversableOnce[T] = { val input = new DataInputViewStreamWrapper(stream) Stream.continually(0) .map(_ => try { Some(this.serializer.deserialize(input)) } catch { case _: EOFException => None }) .takeWhile(_.isDefined) .map(_.get) } private def createSerializer(): TypeSerializer[T] = { val config = new ExecutionConfig() this.typeInfo.createSerializer(config) } }
Example 60
Source File: ObjectStreamUtil.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.flink.testutil import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} object ObjectStreamUtil { def serializeAndDeserialize[T](value: T): T = { val outputStream = new ByteArrayOutputStream() val objectOutputStream = new ObjectOutputStream(outputStream) objectOutputStream.writeObject(value) val bytes = outputStream.toByteArray val objectInputStream = new ObjectInputStream(new ByteArrayInputStream(bytes)) objectInputStream.readObject().asInstanceOf[T] } }
Example 61
Source File: package.scala From milan with Apache License 2.0 | 5 votes |
package com.amazon.milan.compiler.flink import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import com.amazon.milan.compiler.flink.runtime.{UnwrapRecordsMapFunction, WrapRecordsMapFunction} import com.amazon.milan.compiler.flink.testing.IntKeyValueRecord import com.amazon.milan.compiler.flink.types.{RecordWrapper, RecordWrapperTypeInformation} import org.apache.flink.api.common.typeinfo.TypeInformation import org.apache.flink.api.common.typeutils.TypeSerializer import org.apache.flink.api.java.typeutils.ResultTypeQueryable import org.apache.flink.core.memory.{DataInputView, DataInputViewStreamWrapper, DataOutputView, DataOutputViewStreamWrapper} import org.apache.flink.streaming.api.TimeCharacteristic import org.apache.flink.streaming.api.datastream.DataStream import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment import scala.language.implicitConversions import scala.util.Random package object testutil { def getTestExecutionEnvironment: StreamExecutionEnvironment = { val env = StreamExecutionEnvironment.getExecutionEnvironment env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime) env.setBufferTimeout(0) env } def copyWithSerializer[T](value: T, serializer: TypeSerializer[T]): T = { val outputStream = new ByteArrayOutputStream() val outputView = new DataOutputViewStreamWrapper(outputStream) serializer.serialize(value, outputView) val bytes = outputStream.toByteArray val inputStream = new ByteArrayInputStream(bytes) val inputView = new DataInputViewStreamWrapper(inputStream) serializer.deserialize(inputView) } def copyData[T](writeValue: DataOutputView => Unit, readValue: DataInputView => T): T = { val outputStream = new ByteArrayOutputStream() val outputView = new DataOutputViewStreamWrapper(outputStream) writeValue(outputView) val bytes = outputStream.toByteArray val inputStream = new ByteArrayInputStream(bytes) val inputView = new DataInputViewStreamWrapper(inputStream) readValue(inputView) } def generateIntKeyValueRecords(recordCount: Int, keyCount: Int, maxValue: Int): List[IntKeyValueRecord] = { val rand = new Random(0) List.tabulate(recordCount)(_ => IntKeyValueRecord(rand.nextInt(keyCount), rand.nextInt(maxValue + 1))) } implicit class WrappedDataStreamExtensions[T >: Null, TKey >: Null <: Product](dataStream: DataStream[RecordWrapper[T, TKey]]) { def unwrap(recordTypeInformation: TypeInformation[T]): DataStream[T] = { val mapper = new UnwrapRecordsMapFunction[T, TKey](recordTypeInformation) this.dataStream.map(mapper) } def unwrap(): DataStream[T] = { val recordType = this.dataStream.getType.asInstanceOf[RecordWrapperTypeInformation[T, TKey]].valueTypeInformation this.unwrap(recordType) } } implicit class DataStreamExtensions[T >: Null](dataStream: DataStream[T]) { def wrap(recordTypeInformation: TypeInformation[T]): DataStream[RecordWrapper[T, Product]] = { val mapper = new WrapRecordsMapFunction[T](recordTypeInformation) this.dataStream.map(mapper) } def wrap(): DataStream[RecordWrapper[T, Product]] = { val recordType = this.dataStream.asInstanceOf[ResultTypeQueryable[T]].getProducedType this.wrap(recordType) } } }
Example 62
Source File: HiveQlParserImplTest.scala From schedoscope with Apache License 2.0 | 5 votes |
package org.schedoscope.lineage.parser import java.io.ByteArrayInputStream import org.apache.calcite.avatica.util.Casing import org.apache.calcite.sql.{SqlCall, SqlKind, SqlSelect} import org.scalatest.{FlatSpec, Matchers} class HiveQlParserImplTest extends FlatSpec with Matchers { "The HiveQlParserImpl" should "parse the <=> operator correctly" in { val sql = "SELECT * FROM a WHERE x <=> y" val stream = new ByteArrayInputStream(sql.getBytes) val parser = new HiveQlParserImpl(stream) parser.setIdentifierMaxLength(255) parser.setUnquotedCasing(Casing.UNCHANGED) val sqlNode = parser.parseSqlStmtEof val select = sqlNode.asInstanceOf[SqlSelect] val where = select.getWhere.asInstanceOf[SqlCall] where.getOperator.getKind should be(SqlKind.EQUALS) } }
Example 63
Source File: CodecFactory.scala From OAP with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.oap.io import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream} import scala.collection.mutable import org.apache.hadoop.conf.Configuration import org.apache.hadoop.io.compress.{CodecPool, CompressionCodec} import org.apache.hadoop.util.ReflectionUtils import org.apache.parquet.format.{CompressionCodec => ParquetCodec} import org.apache.parquet.hadoop.metadata.CompressionCodecName // This is a simple version of parquet's CodeFactory. // TODO: [linhong] Need change this into Scala Code style private[oap] class CodecFactory(conf: Configuration) { private val compressors = new mutable.HashMap[ParquetCodec, BytesCompressor] private val decompressors = new mutable.HashMap[ParquetCodec, BytesDecompressor] private val codecByName = new mutable.HashMap[String, CompressionCodec] private def getCodec(codecString: String): Option[CompressionCodec] = { codecByName.get(codecString) match { case Some(codec) => Some(codec) case None => val codecName = CompressionCodecName.valueOf(codecString) val codecClass = codecName.getHadoopCompressionCodecClass if (codecClass == null) { None } else { val codec = ReflectionUtils.newInstance(codecClass, conf).asInstanceOf[CompressionCodec] codecByName.put(codecString, codec) Some(codec) } } } def getCompressor(codec: ParquetCodec): BytesCompressor = { compressors.getOrElseUpdate(codec, new BytesCompressor(getCodec(codec.name))) } def getDecompressor(codec: ParquetCodec): BytesDecompressor = { decompressors.getOrElseUpdate(codec, new BytesDecompressor(getCodec(codec.name))) } def release(): Unit = { compressors.values.foreach(_.release()) compressors.clear() decompressors.values.foreach(_.release()) decompressors.clear() } } private[oap] class BytesCompressor(compressionCodec: Option[CompressionCodec]) { private lazy val compressedOutBuffer = new ByteArrayOutputStream() private lazy val compressor = compressionCodec match { case Some(codec) => CodecPool.getCompressor(codec) case None => null } def compress(bytes: Array[Byte]): Array[Byte] = { compressionCodec match { case Some(codec) => compressedOutBuffer.reset() // null compressor for non-native gzip if (compressor != null) { compressor.reset() } val cos = codec.createOutputStream(compressedOutBuffer, compressor) cos.write(bytes) cos.finish() cos.close() compressedOutBuffer.toByteArray case None => bytes } } def release(): Unit = CodecPool.returnCompressor(compressor) } private[oap] class BytesDecompressor(compressionCodec: Option[CompressionCodec]) { private lazy val decompressor = compressionCodec match { case Some(codec) => CodecPool.getDecompressor(codec) case None => null } def decompress(bytes: Array[Byte], uncompressedSize: Int): Array[Byte] = { compressionCodec match { case Some(codec) => decompressor.reset() val cis = codec.createInputStream(new ByteArrayInputStream(bytes), decompressor) val decompressed = new Array[Byte](uncompressedSize) new DataInputStream(cis).readFully(decompressed) decompressed case None => bytes } } def release(): Unit = CodecPool.returnDecompressor(decompressor) }
Example 64
Source File: Command.scala From scala-ssh with Apache License 2.0 | 5 votes |
package com.decodified.scalassh import net.schmizz.sshj.connection.channel.direct.Session import java.io.{ FileInputStream, File, ByteArrayInputStream, InputStream } case class Command(command: String, input: CommandInput = CommandInput.NoInput, timeout: Option[Int] = None) object Command { implicit def string2Command(cmd: String) = Command(cmd) } case class CommandInput(inputStream: Option[InputStream]) object CommandInput { lazy val NoInput = CommandInput(None) implicit def apply(input: String, charsetName: String = "UTF8"): CommandInput = apply(input.getBytes(charsetName)) implicit def apply(input: Array[Byte]): CommandInput = apply(Some(new ByteArrayInputStream(input))) implicit def apply(input: InputStream): CommandInput = apply(Some(input)) def fromFile(file: String): CommandInput = fromFile(new File(file)) def fromFile(file: File): CommandInput = new FileInputStream(file) def fromResource(resource: String): CommandInput = getClass.getClassLoader.getResourceAsStream(resource) } class CommandResult(val channel: Session.Command) { def stdErrStream: InputStream = channel.getErrorStream def stdOutStream: InputStream = channel.getInputStream lazy val stdErrBytes = new StreamCopier().emptyToByteArray(stdErrStream) lazy val stdOutBytes = new StreamCopier().emptyToByteArray(stdOutStream) def stdErrAsString(charsetname: String = "utf8") = new String(stdErrBytes, charsetname) def stdOutAsString(charsetname: String = "utf8") = new String(stdOutBytes, charsetname) lazy val exitSignal: Option[String] = Option(channel.getExitSignal).map(_.toString) lazy val exitCode: Option[Int] = Option(channel.getExitStatus) lazy val exitErrorMessage: Option[String] = Option(channel.getExitErrorMessage) }
Example 65
Source File: CCGBankToCabochaFormat.scala From jigg with Apache License 2.0 | 5 votes |
package jigg.nlp.ccg object CCGBankToCabochaFormat { case class Opts( @Help(text="Path to CCGBank file") ccgbank: File = new File(""), @Help(text="Path to output") output: File = new File(""), @Help(text="Cabocha command (path to cabocha)") cabocha: String = "cabocha" ) type Tree = ParseTree[NodeLabel] def main(args:Array[String]) = { val opts = CommandLineParser.readIn[Opts](args) val dict = new JapaneseDictionary() val extractors = TreeExtractor( new JapaneseParseTreeConverter(dict), new CCGBankReader) val trees = extractors.readTrees(opts.ccgbank, -1, true) val rawString = trees map (extractors.treeConv.toSentenceFromLabelTree) map (_.wordSeq.mkString("")) mkString ("\n") val is = new java.io.ByteArrayInputStream(rawString.getBytes("UTF-8")) val out = (Process(s"${opts.cabocha} -f1") #< is).lineStream_! val os = jigg.util.IOUtil.openOut(opts.output.getPath) out foreach { line => os.write(line + "\n") } os.flush os.close } }
Example 66
Source File: HadoopConfig.scala From incubator-retired-gearpump with Apache License 2.0 | 5 votes |
package org.apache.gearpump.streaming.examples.fsio import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream} import scala.language.implicitConversions import org.apache.hadoop.conf.Configuration import org.apache.gearpump.cluster.UserConfig import org.apache.gearpump.util.Constants._ class HadoopConfig(config: UserConfig) { def withHadoopConf(conf: Configuration): UserConfig = { config.withBytes(HADOOP_CONF, serializeHadoopConf(conf)) } def hadoopConf: Configuration = deserializeHadoopConf(config.getBytes(HADOOP_CONF).get) private def serializeHadoopConf(conf: Configuration): Array[Byte] = { val out = new ByteArrayOutputStream() val dataOut = new DataOutputStream(out) conf.write(dataOut) dataOut.close() out.toByteArray } private def deserializeHadoopConf(bytes: Array[Byte]): Configuration = { val in = new ByteArrayInputStream(bytes) val dataIn = new DataInputStream(in) val result = new Configuration() result.readFields(dataIn) dataIn.close() result } } object HadoopConfig { def empty: HadoopConfig = new HadoopConfig(UserConfig.empty) def apply(config: UserConfig): HadoopConfig = new HadoopConfig(config) implicit def userConfigToHadoopConfig(userConf: UserConfig): HadoopConfig = { HadoopConfig(userConf) } }
Example 67
Source File: WindowsPluginFrontendSpec.scala From protoc-bridge with Apache License 2.0 | 5 votes |
package protocbridge.frontend import java.io.ByteArrayInputStream import protocbridge.ProtocCodeGenerator import scala.sys.process.ProcessLogger import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers class WindowsPluginFrontendSpec extends AnyFlatSpec with Matchers { if (PluginFrontend.isWindows) { it must "execute a program that forwards input and output to given stream" in { val toSend = "ping" val toReceive = "pong" val fakeGenerator = new ProtocCodeGenerator { override def run(request: Array[Byte]): Array[Byte] = { request mustBe toSend.getBytes toReceive.getBytes } } val (path, state) = WindowsPluginFrontend.prepare(fakeGenerator) val actualOutput = scala.collection.mutable.Buffer.empty[String] val process = sys.process .Process(path.toAbsolutePath.toString) .#<(new ByteArrayInputStream(toSend.getBytes)) .run(ProcessLogger(o => actualOutput.append(o))) process.exitValue() actualOutput.mkString mustBe toReceive WindowsPluginFrontend.cleanup(state) } } }
Example 68
Source File: PluginFrontendSpec.scala From protoc-bridge with Apache License 2.0 | 5 votes |
package protocbridge.frontend import java.io.ByteArrayInputStream import com.google.protobuf.compiler.PluginProtos.CodeGeneratorResponse import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.must.Matchers class PluginFrontendSpec extends AnyFlatSpec with Matchers with ScalaCheckDrivenPropertyChecks { def expected(error: String) = CodeGeneratorResponse.newBuilder().setError(error).build() def actual(error: String) = CodeGeneratorResponse.parseFrom( PluginFrontend.createCodeGeneratorResponseWithError(error) ) "createCodeGeneratorResponseWithError" should "create valid objects" in { actual("") must be(expected("")) actual("foo") must be(expected("foo")) actual("\u2035") must be(expected("\u2035")) actual("a" * 128) must be(expected("a" * 128)) actual("a" * 256) must be(expected("a" * 256)) actual("\u3714\u3715" * 256) must be(expected("\u3714\u3715" * 256)) actual("abc" * 1000) must be(expected("abc" * 1000)) forAll(MinSuccessful(1000)) { s: String => actual(s) must be(expected(s)) } } "readInputStreamToByteArray" should "read the input stream to a byte array" in { def readInput(bs: Array[Byte]) = PluginFrontend.readInputStreamToByteArray(new ByteArrayInputStream(bs)) readInput(Array.empty) must be(Array()) readInput(Array[Byte](1, 2, 3, 4)) must be(Array(1, 2, 3, 4)) val special = Array.tabulate[Byte](10000) { n => (n % 37).toByte } readInput(special) must be(special) } }
Example 69
Source File: StreamingParserSpec.scala From cormorant with MIT License | 5 votes |
package io.chrisdavenport.cormorant package fs2 import cats.data.NonEmptyList import cats.effect._ import cats.effect.testing.specs2.CatsIO import _root_.fs2.Stream import io.chrisdavenport.cormorant._ // import io.chrisdavenport.cormorant.implicits._ // import scala.concurrent.duration._ import java.io.ByteArrayInputStream import java.io.InputStream class StreamingParserSpec extends CormorantSpec with CatsIO { def ruinDelims(str: String) = augmentString(str).flatMap { case '\n' => "\r\n" case c => c.toString } "Streaming Parser" should { // https://github.com/ChristopherDavenport/cormorant/pull/84 "parse a known value that did not work with streaming" in { val x = """First Name,Last Name,Email Larry,Bordowitz,[email protected] Anonymous,Hippopotamus,[email protected]""" val source = IO.pure(new ByteArrayInputStream(ruinDelims(x).getBytes): InputStream) Stream.resource(Blocker[IO]).flatMap{blocker => _root_.fs2.io.readInputStream( source, chunkSize = 4, blocker ) } .through(_root_.fs2.text.utf8Decode) .through(parseComplete[IO]) .compile .toVector .map{ v => val header = CSV.Headers(NonEmptyList.of(CSV.Header("First Name"), CSV.Header("Last Name"), CSV.Header("Email"))) val row1 = CSV.Row(NonEmptyList.of(CSV.Field("Larry"), CSV.Field("Bordowitz"), CSV.Field("[email protected]"))) val row2 = CSV.Row(NonEmptyList.of(CSV.Field("Anonymous"), CSV.Field("Hippopotamus"), CSV.Field("[email protected]"))) Vector( (header, row1), (header, row2) ) must_=== v } } } }
Example 70
Source File: TestHelper.scala From spark-summit-2018 with GNU General Public License v3.0 | 5 votes |
package com.twilio.open.streaming.trend.discovery import java.io.{ByteArrayInputStream, InputStream} import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.google.protobuf.Message import com.googlecode.protobuf.format.JsonFormat import com.holdenkarau.spark.testing.{LocalSparkContext, SparkContextProvider} import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.SparkSession import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers, Suite} import org.slf4j.{Logger, LoggerFactory} import scala.collection.Seq import scala.io.Source import scala.reflect.ClassTag import scala.reflect.classTag object TestHelper { val log: Logger = LoggerFactory.getLogger("com.twilio.open.streaming.trend.discovery.TestHelper") val mapper: ObjectMapper = { val m = new ObjectMapper() m.registerModule(DefaultScalaModule) } val jsonFormat: JsonFormat = new JsonFormat def loadScenario[T<: Message : ClassTag](file: String): Seq[T] = { val fileString = Source.fromFile(file).mkString val parsed = mapper.readValue(fileString, classOf[Sceanario]) parsed.input.map { data => val json = mapper.writeValueAsString(data) convert[T](json) } } def convert[T<: Message : ClassTag](json: String): T = { val clazz = classTag[T].runtimeClass val builder = clazz.getMethod("newBuilder").invoke(clazz).asInstanceOf[Message.Builder] try { val input: InputStream = new ByteArrayInputStream(json.getBytes()) jsonFormat.merge(input, builder) builder.build().asInstanceOf[T] } catch { case e: Exception => throw e } } } @SerialVersionUID(1L) case class KafkaDataFrame(key: Array[Byte], topic: Array[Byte], value: Array[Byte]) extends Serializable case class Sceanario(input: Seq[Any], expected: Option[Any] = None) trait SparkSqlTest extends BeforeAndAfterAll with SparkContextProvider { self: Suite => @transient var _sparkSql: SparkSession = _ @transient private var _sc: SparkContext = _ override def sc: SparkContext = _sc def conf: SparkConf def sparkSql: SparkSession = _sparkSql override def beforeAll() { _sparkSql = SparkSession.builder().config(conf).getOrCreate() _sc = _sparkSql.sparkContext setup(_sc) super.beforeAll() } override def afterAll() { try { _sparkSql.close() _sparkSql = null LocalSparkContext.stop(_sc) _sc = null } finally { super.afterAll() } } }
Example 71
Source File: HDFSCredentialProvider.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.deploy.yarn.security import java.io.{ByteArrayInputStream, DataInputStream} import scala.collection.JavaConverters._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier import org.apache.hadoop.mapred.Master import org.apache.hadoop.security.Credentials import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.yarn.config._ import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ private[security] class HDFSCredentialProvider extends ServiceCredentialProvider with Logging { // Token renewal interval, this value will be set in the first call, // if None means no token renewer specified, so cannot get token renewal interval. private var tokenRenewalInterval: Option[Long] = null override val serviceName: String = "hdfs" override def obtainCredentials( hadoopConf: Configuration, sparkConf: SparkConf, creds: Credentials): Option[Long] = { // NameNode to access, used to get tokens from different FileSystems nnsToAccess(hadoopConf, sparkConf).foreach { dst => val dstFs = dst.getFileSystem(hadoopConf) logInfo("getting token for namenode: " + dst) dstFs.addDelegationTokens(getTokenRenewer(hadoopConf), creds) } // Get the token renewal interval if it is not set. It will only be called once. if (tokenRenewalInterval == null) { tokenRenewalInterval = getTokenRenewalInterval(hadoopConf, sparkConf) } // Get the time of next renewal. tokenRenewalInterval.map { interval => creds.getAllTokens.asScala .filter(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND) .map { t => val identifier = new DelegationTokenIdentifier() identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier))) identifier.getIssueDate + interval }.foldLeft(0L)(math.max) } } private def getTokenRenewalInterval( hadoopConf: Configuration, sparkConf: SparkConf): Option[Long] = { // We cannot use the tokens generated with renewer yarn. Trying to renew // those will fail with an access control issue. So create new tokens with the logged in // user as renewer. sparkConf.get(PRINCIPAL).flatMap { renewer => val creds = new Credentials() nnsToAccess(hadoopConf, sparkConf).foreach { dst => val dstFs = dst.getFileSystem(hadoopConf) dstFs.addDelegationTokens(renewer, creds) } val hdfsToken = creds.getAllTokens.asScala .find(_.getKind == DelegationTokenIdentifier.HDFS_DELEGATION_KIND) hdfsToken.map { t => val newExpiration = t.renew(hadoopConf) val identifier = new DelegationTokenIdentifier() identifier.readFields(new DataInputStream(new ByteArrayInputStream(t.getIdentifier))) val interval = newExpiration - identifier.getIssueDate logInfo(s"Renewal Interval is $interval") interval } } } private def getTokenRenewer(conf: Configuration): String = { val delegTokenRenewer = Master.getMasterPrincipal(conf) logDebug("delegation token renewer is: " + delegTokenRenewer) if (delegTokenRenewer == null || delegTokenRenewer.length() == 0) { val errorMessage = "Can't get Master Kerberos principal for use as renewer" logError(errorMessage) throw new SparkException(errorMessage) } delegTokenRenewer } private def nnsToAccess(hadoopConf: Configuration, sparkConf: SparkConf): Set[Path] = { sparkConf.get(NAMENODES_TO_ACCESS).map(new Path(_)).toSet + sparkConf.get(STAGING_DIR).map(new Path(_)) .getOrElse(FileSystem.get(hadoopConf).getHomeDirectory) } }
Example 72
Source File: GenericAvroSerializerSuite.scala From sparkoscope with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import com.esotericsoftware.kryo.io.{Input, Output} import org.apache.avro.{Schema, SchemaBuilder} import org.apache.avro.generic.GenericData.Record import org.apache.spark.{SharedSparkContext, SparkFunSuite} class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val schema : Schema = SchemaBuilder .record("testRecord").fields() .requiredString("data") .endRecord() val record = new Record(schema) record.put("data", "test data") test("schema compression and decompression") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) } test("record serialization and deserialization") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val outputStream = new ByteArrayOutputStream() val output = new Output(outputStream) genericSer.serializeDatum(record, output) output.flush() output.close() val input = new Input(new ByteArrayInputStream(outputStream.toByteArray)) assert(genericSer.deserializeDatum(input) === record) } test("uses schema fingerprint to decrease message size") { val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema) val output = new Output(new ByteArrayOutputStream()) val beginningNormalPosition = output.total() genericSerFull.serializeDatum(record, output) output.flush() val normalLength = output.total - beginningNormalPosition conf.registerAvroSchemas(schema) val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema) val beginningFingerprintPosition = output.total() genericSerFinger.serializeDatum(record, output) val fingerprintLength = output.total - beginningFingerprintPosition assert(fingerprintLength < normalLength) } test("caches previously seen schemas") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val compressedSchema = genericSer.compress(schema) val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema)) assert(compressedSchema.eq(genericSer.compress(schema))) assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema)))) } }
Example 73
Source File: CompressionCodecSuite.scala From SparkCore with Apache License 2.0 | 5 votes |
package org.apache.spark.io import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import org.scalatest.FunSuite import org.apache.spark.SparkConf class CompressionCodecSuite extends FunSuite { val conf = new SparkConf(false) def testCodec(codec: CompressionCodec) { // Write 1000 integers to the output stream, compressed. val outputStream = new ByteArrayOutputStream() val out = codec.compressedOutputStream(outputStream) for (i <- 1 until 1000) { out.write(i % 256) } out.close() // Read the 1000 integers back. val inputStream = new ByteArrayInputStream(outputStream.toByteArray) val in = codec.compressedInputStream(inputStream) for (i <- 1 until 1000) { assert(in.read() === i % 256) } in.close() } test("default compression codec") { val codec = CompressionCodec.createCodec(conf) assert(codec.getClass === classOf[SnappyCompressionCodec]) testCodec(codec) } test("lz4 compression codec") { val codec = CompressionCodec.createCodec(conf, classOf[LZ4CompressionCodec].getName) assert(codec.getClass === classOf[LZ4CompressionCodec]) testCodec(codec) } test("lz4 compression codec short form") { val codec = CompressionCodec.createCodec(conf, "lz4") assert(codec.getClass === classOf[LZ4CompressionCodec]) testCodec(codec) } test("lzf compression codec") { val codec = CompressionCodec.createCodec(conf, classOf[LZFCompressionCodec].getName) assert(codec.getClass === classOf[LZFCompressionCodec]) testCodec(codec) } test("lzf compression codec short form") { val codec = CompressionCodec.createCodec(conf, "lzf") assert(codec.getClass === classOf[LZFCompressionCodec]) testCodec(codec) } test("snappy compression codec") { val codec = CompressionCodec.createCodec(conf, classOf[SnappyCompressionCodec].getName) assert(codec.getClass === classOf[SnappyCompressionCodec]) testCodec(codec) } test("snappy compression codec short form") { val codec = CompressionCodec.createCodec(conf, "snappy") assert(codec.getClass === classOf[SnappyCompressionCodec]) testCodec(codec) } test("bad compression codec") { intercept[IllegalArgumentException] { CompressionCodec.createCodec(conf, "foobar") } } }
Example 74
Source File: package.scala From chronicler with Apache License 2.0 | 5 votes |
package com.github.fsanaulla.chronicler.core import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.util.zip.{GZIPInputStream, GZIPOutputStream} package object gzip { def compress(data: Array[Byte]): (Int, Array[Byte]) = { val bArrOut = new ByteArrayOutputStream() val gzippedOut = new GZIPOutputStream(bArrOut) gzippedOut.write(data) gzippedOut.close() val gzippedData = bArrOut.toByteArray val contentLength = gzippedData.length contentLength -> gzippedData } def decompress(data: Array[Byte]): Array[Byte] = { val gis = new GZIPInputStream(new ByteArrayInputStream(data)) val out = new ByteArrayOutputStream() val buf = new Array[Byte](1024) var res = 0 while (res >= 0) { res = gis.read(buf, 0, buf.length) if (res > 0) out.write(buf, 0, res) } out.toByteArray } }
Example 75
Source File: S3Util.scala From redshift-fake-driver with Apache License 2.0 | 5 votes |
package jp.ne.opt.redshiftfake import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.charset.StandardCharsets import java.util.zip.GZIPOutputStream import com.amazonaws.services.s3.AmazonS3 import com.amazonaws.services.s3.model.{ObjectMetadata, PutObjectRequest} import jp.ne.opt.redshiftfake.util.Loan.using import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream object S3Util { def loadGzippedDataToS3(s3Client: AmazonS3, data: String, bucket: String, key: String): Unit = { val arrayOutputStream = new ByteArrayOutputStream() using(new GZIPOutputStream(arrayOutputStream)) (gzipOutStream => { gzipOutStream.write(data.getBytes(StandardCharsets.UTF_8)) }) val buf = arrayOutputStream.toByteArray val metadata = new ObjectMetadata metadata.setContentLength(buf.length) val request = new PutObjectRequest(bucket, key, new ByteArrayInputStream(buf), metadata) s3Client.putObject(request) } def loadBzipped2DataToS3(s3Client: AmazonS3, data: String, bucket: String, key: String): Unit = { val arrayOutputStream = new ByteArrayOutputStream() using(new BZip2CompressorOutputStream(arrayOutputStream)) (bzip2OutStream => { bzip2OutStream.write(data.getBytes(StandardCharsets.UTF_8)) }) val buf = arrayOutputStream.toByteArray val metadata = new ObjectMetadata metadata.setContentLength(buf.length) val request = new PutObjectRequest(bucket, key, new ByteArrayInputStream(buf), metadata) s3Client.putObject(request) } def loadDataToS3(s3Client: AmazonS3, data: String, bucket: String, key: String): Unit = { val buf = data.getBytes val metadata = new ObjectMetadata metadata.setContentLength(buf.length) val request = new PutObjectRequest(bucket, key, new ByteArrayInputStream(buf), metadata) s3Client.putObject(request) } }
Example 76
Source File: AvroTypeSpec.scala From shapeless-datatype with Apache License 2.0 | 5 votes |
package shapeless.datatype.avro import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.net.URI import java.nio.ByteBuffer import com.google.protobuf.ByteString import org.apache.avro.Schema import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io.{DecoderFactory, EncoderFactory} import org.joda.time.Instant import org.scalacheck.Prop.forAll import org.scalacheck.ScalacheckShapeless._ import org.scalacheck._ import shapeless._ import shapeless.datatype.record._ import scala.reflect.runtime.universe._ object AvroTypeSpec extends Properties("AvroType") { import shapeless.datatype.test.Records._ import shapeless.datatype.test.SerializableUtils._ implicit def compareByteArrays(x: Array[Byte], y: Array[Byte]) = java.util.Arrays.equals(x, y) implicit def compareIntArrays(x: Array[Int], y: Array[Int]) = java.util.Arrays.equals(x, y) def roundTrip[A: TypeTag, L <: HList](m: A)(implicit gen: LabelledGeneric.Aux[A, L], fromL: FromAvroRecord[L], toL: ToAvroRecord[L], mr: MatchRecord[L] ): Boolean = { val t = ensureSerializable(AvroType[A]) val f1: SerializableFunction[A, GenericRecord] = new SerializableFunction[A, GenericRecord] { override def apply(m: A): GenericRecord = t.toGenericRecord(m) } val f2: SerializableFunction[GenericRecord, Option[A]] = new SerializableFunction[GenericRecord, Option[A]] { override def apply(m: GenericRecord): Option[A] = t.fromGenericRecord(m) } val toFn = ensureSerializable(f1) val fromFn = ensureSerializable(f2) val copy = fromFn(roundTripRecord(toFn(m))) val rm = RecordMatcher[A] copy.exists(rm(_, m)) } def roundTripRecord(r: GenericRecord): GenericRecord = { val writer = new GenericDatumWriter[GenericRecord](r.getSchema) val baos = new ByteArrayOutputStream() val encoder = EncoderFactory.get().binaryEncoder(baos, null) writer.write(r, encoder) encoder.flush() baos.close() val bytes = baos.toByteArray val reader = new GenericDatumReader[GenericRecord](r.getSchema) val bais = new ByteArrayInputStream(bytes) val decoder = DecoderFactory.get().binaryDecoder(bais, null) reader.read(null, decoder) } implicit val byteStringAvroType = AvroType.at[ByteString](Schema.Type.BYTES)( v => ByteString.copyFrom(v.asInstanceOf[ByteBuffer]), v => ByteBuffer.wrap(v.toByteArray) ) implicit val instantAvroType = AvroType.at[Instant](Schema.Type.LONG)(v => new Instant(v.asInstanceOf[Long]), _.getMillis) property("required") = forAll { m: Required => roundTrip(m) } property("optional") = forAll { m: Optional => roundTrip(m) } property("repeated") = forAll { m: Repeated => roundTrip(m) } property("mixed") = forAll { m: Mixed => roundTrip(m) } property("nested") = forAll { m: Nested => roundTrip(m) } property("seqs") = forAll { m: Seqs => roundTrip(m) } implicit val uriAvroType = AvroType.at[URI](Schema.Type.STRING)(v => URI.create(v.toString), _.toString) property("custom") = forAll { m: Custom => roundTrip(m) } }
Example 77
Source File: SerializableUtils.scala From protobuf-generic with Apache License 2.0 | 5 votes |
package me.lyh.protobuf.generic.test import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} object SerializableUtils { private def serializeToByteArray(value: Serializable): Array[Byte] = { val buffer = new ByteArrayOutputStream() val oos = new ObjectOutputStream(buffer) oos.writeObject(value) buffer.toByteArray } private def deserializeFromByteArray(encodedValue: Array[Byte]): AnyRef = { val ois = new ObjectInputStream(new ByteArrayInputStream(encodedValue)) ois.readObject() } def ensureSerializable[T <: Serializable](value: T): T = deserializeFromByteArray(serializeToByteArray(value)).asInstanceOf[T] }
Example 78
Source File: SerializableUtils.scala From protobuf-generic with Apache License 2.0 | 5 votes |
package me.lyh.protobuf.generic.test import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} object SerializableUtils { private def serializeToByteArray(value: Serializable): Array[Byte] = { val buffer = new ByteArrayOutputStream() val oos = new ObjectOutputStream(buffer) oos.writeObject(value) buffer.toByteArray } private def deserializeFromByteArray(encodedValue: Array[Byte]): AnyRef = { val ois = new ObjectInputStream(new ByteArrayInputStream(encodedValue)) ois.readObject() } def ensureSerializable[T <: Serializable](value: T): T = deserializeFromByteArray(serializeToByteArray(value)).asInstanceOf[T] }
Example 79
Source File: ProtobufGenericSpec.scala From protobuf-generic with Apache License 2.0 | 5 votes |
package me.lyh.protobuf.generic.test import java.io.ByteArrayInputStream import java.nio.ByteBuffer import com.google.protobuf.{ByteString, Message} import me.lyh.protobuf.generic._ import me.lyh.protobuf.generic.proto2.Schemas._ import scala.reflect.ClassTag import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class ProtobufGenericSpec extends AnyFlatSpec with Matchers { def roundTrip[T <: Message: ClassTag](record: T): Unit = { val schema = SerializableUtils.ensureSerializable(Schema.of[T]) val schemaCopy = Schema.fromJson(schema.toJson) schemaCopy shouldBe schema val reader = SerializableUtils.ensureSerializable(GenericReader.of(schema)) val writer = SerializableUtils.ensureSerializable(GenericWriter.of(schema)) val jsonRecord = reader.read(record.toByteArray).toJson jsonRecord shouldBe reader.read(ByteBuffer.wrap(record.toByteArray)).toJson jsonRecord shouldBe reader.read(new ByteArrayInputStream(record.toByteArray)).toJson val bytes = writer.write(GenericRecord.fromJson(jsonRecord)) val recordCopy = ProtobufType[T].parseFrom(bytes) recordCopy shouldBe record } "ProtobufGeneric" should "round trip required" in { roundTrip[Required](Records.required) } it should "round trip optional" in { roundTrip[Optional](Records.optional) roundTrip[Optional](Records.optionalEmpty) } it should "round trip repeated" in { roundTrip[Repeated](Records.repeated) roundTrip[Repeated](Records.repeatedEmpty) roundTrip[RepeatedPacked](Records.repeatedPacked) roundTrip[RepeatedUnpacked](Records.repeatedUnpacked) } it should "round trip oneofs" in { Records.oneOfs.foreach(roundTrip[OneOf]) } it should "round trip mixed" in { roundTrip[Mixed](Records.mixed) roundTrip[Mixed](Records.mixedEmpty) } it should "round trip nested" in { roundTrip[Nested](Records.nested) roundTrip[Nested](Records.nestedEmpty) } it should "round trip with custom options" in { roundTrip[CustomOptionMessage](Records.customOptionMessage) roundTrip[CustomOptionMessage](Records.customOptionMessageEmpty) } it should "round trip with custom defaults" in { roundTrip[CustomDefaults](CustomDefaults.getDefaultInstance) } it should "populate default values" in { val schema = Schema.of[CustomDefaults] val record = GenericReader.of(schema).read(CustomDefaults.getDefaultInstance.toByteArray) record.get("double_field") shouldBe 101.0 record.get("float_field") shouldBe 102.0f record.get("int32_field") shouldBe 103 record.get("int64_field") shouldBe 104L record.get("uint32_field") shouldBe 105 record.get("uint64_field") shouldBe 106L record.get("sint32_field") shouldBe 107 record.get("sint64_field") shouldBe 108L record.get("fixed32_field") shouldBe 109 record.get("fixed64_field") shouldBe 110L record.get("sfixed32_field") shouldBe 111 record.get("sfixed64_field") shouldBe 112L record.get("bool_field") shouldBe true record.get("string_field") shouldBe "hello" record.get("bytes_field") shouldBe Base64.encode(ByteString.copyFromUtf8("world").toByteArray) record.get("color_field") shouldBe "GREEN" } }
Example 80
Source File: ProtobufTypeSpec.scala From protobuf-generic with Apache License 2.0 | 5 votes |
package me.lyh.protobuf.generic.test import java.io.ByteArrayInputStream import com.google.protobuf.CodedInputStream import me.lyh.protobuf.generic._ import me.lyh.protobuf.generic.proto2.Schemas._ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class ProtobufTypeSpec extends AnyFlatSpec with Matchers { private val pt = ProtobufType[Optional] private val record = Records.optional "ProtobufType.descriptor" should "work" in { pt.descriptor shouldBe Optional.getDescriptor } "ProtobufType.newBuilder" should "work" in { pt.newBuilder().build() shouldBe Optional.newBuilder().build() } "ProtobufType.parseFrom" should "support byte array" in { pt.parseFrom(record.toByteArray) shouldBe record } it should "support ByteString" in { pt.parseFrom(record.toByteString) shouldBe record } it should "support InputStream" in { pt.parseFrom(new ByteArrayInputStream(record.toByteArray)) shouldBe record } it should "support CodedInputStream" in { pt.parseFrom(CodedInputStream.newInstance(record.toByteArray)) shouldBe record } }
Example 81
Source File: OdfExtract.scala From docspell with GNU General Public License v3.0 | 5 votes |
package docspell.extract.odf import java.io.{ByteArrayInputStream, InputStream} import scala.util.Try import cats.effect._ import cats.implicits._ import fs2.Stream import docspell.extract.internal.Text import org.apache.tika.metadata.Metadata import org.apache.tika.parser.ParseContext import org.apache.tika.parser.odf.OpenDocumentParser import org.apache.tika.sax.BodyContentHandler object OdfExtract { def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] = data.compile.to(Array).map(new ByteArrayInputStream(_)).map(get) def get(is: InputStream) = Try { val handler = new BodyContentHandler() val pctx = new ParseContext() val meta = new Metadata() val ooparser = new OpenDocumentParser() ooparser.parse(is, handler, meta, pctx) Text(Option(handler.toString)) }.toEither }
Example 82
Source File: RtfExtract.scala From docspell with GNU General Public License v3.0 | 5 votes |
package docspell.extract.rtf import java.io.{ByteArrayInputStream, InputStream} import javax.swing.text.rtf.RTFEditorKit import scala.util.Try import cats.effect.Sync import cats.implicits._ import fs2.Stream import docspell.common.MimeType import docspell.extract.internal.Text object RtfExtract { val rtfType = MimeType.application("rtf") def get(is: InputStream): Either[Throwable, Text] = Try { val kit = new RTFEditorKit() val doc = kit.createDefaultDocument() kit.read(is, doc, 0) Text(doc.getText(0, doc.getLength)) }.toEither def get[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] = data.compile.to(Array).map(new ByteArrayInputStream(_)).map(get) }
Example 83
Source File: PoiExtract.scala From docspell with GNU General Public License v3.0 | 5 votes |
package docspell.extract.poi import java.io.{ByteArrayInputStream, InputStream} import scala.util.Try import cats.data.EitherT import cats.effect.Sync import cats.implicits._ import fs2.Stream import docspell.common._ import docspell.extract.internal.Text import docspell.files.TikaMimetype import org.apache.poi.hssf.extractor.ExcelExtractor import org.apache.poi.hssf.usermodel.HSSFWorkbook import org.apache.poi.hwpf.extractor.WordExtractor import org.apache.poi.xssf.extractor.XSSFExcelExtractor import org.apache.poi.xssf.usermodel.XSSFWorkbook import org.apache.poi.xwpf.extractor.XWPFWordExtractor import org.apache.poi.xwpf.usermodel.XWPFDocument object PoiExtract { def get[F[_]: Sync]( data: Stream[F, Byte], hint: MimeTypeHint ): F[Either[Throwable, Text]] = TikaMimetype.detect(data, hint).flatMap(mt => get(data, mt)) def get[F[_]: Sync]( data: Stream[F, Byte], mime: MimeType ): F[Either[Throwable, Text]] = mime match { case PoiType.doc => getDoc(data) case PoiType.xls => getXls(data) case PoiType.xlsx => getXlsx(data) case PoiType.docx => getDocx(data) case PoiType.msoffice => EitherT(getDoc[F](data)) .recoverWith({ case _ => EitherT(getXls[F](data)) }) .value case PoiType.ooxml => EitherT(getDocx[F](data)) .recoverWith({ case _ => EitherT(getXlsx[F](data)) }) .value case mt => Sync[F].pure(Left(new Exception(s"Unsupported content: ${mt.asString}"))) } def getDocx(is: InputStream): Either[Throwable, Text] = Try { val xt = new XWPFWordExtractor(new XWPFDocument(is)) Text(Option(xt.getText)) }.toEither def getDoc(is: InputStream): Either[Throwable, Text] = Try { val xt = new WordExtractor(is) Text(Option(xt.getText)) }.toEither def getXlsx(is: InputStream): Either[Throwable, Text] = Try { val xt = new XSSFExcelExtractor(new XSSFWorkbook(is)) Text(Option(xt.getText)) }.toEither def getXls(is: InputStream): Either[Throwable, Text] = Try { val xt = new ExcelExtractor(new HSSFWorkbook(is)) Text(Option(xt.getText)) }.toEither def getDocx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] = data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getDocx) def getDoc[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] = data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getDoc) def getXlsx[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] = data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getXlsx) def getXls[F[_]: Sync](data: Stream[F, Byte]): F[Either[Throwable, Text]] = data.compile.to(Array).map(new ByteArrayInputStream(_)).map(getXls) }
Example 84
Source File: ImageSize.scala From docspell with GNU General Public License v3.0 | 5 votes |
package docspell.files import java.io.{ByteArrayInputStream, InputStream} import java.nio.file.Path import javax.imageio.stream.{FileImageInputStream, ImageInputStream} import javax.imageio.{ImageIO, ImageReader} import scala.jdk.CollectionConverters._ import scala.util.{Try, Using} import cats.effect._ import cats.implicits._ import fs2.Stream object ImageSize { def get[F[_]: Sync](data: Stream[F, Byte]): F[Option[Dimension]] = data.take(768).compile.to(Array).map { ar => val iis = ImageIO.createImageInputStream(new ByteArrayInputStream(ar)) if (iis == null) sys.error("no reader given for the array") else getDimension(iis) } private def getDimension(in: ImageInputStream): Option[Dimension] = ImageIO .getImageReaders(in) .asScala .to(LazyList) .collectFirst(Function.unlift { reader => val dim = getDimension(in, reader).toOption reader.dispose() dim }) private def getDimension( in: ImageInputStream, reader: ImageReader ): Either[Throwable, Dimension] = Try { reader.setInput(in) val width = reader.getWidth(reader.getMinIndex) val height = reader.getHeight(reader.getMinIndex) Dimension(width, height) }.toEither }
Example 85
Source File: DataFinder.scala From Scala-Design-Patterns-Second-Edition with MIT License | 5 votes |
package com.ivan.nikolov.behavioral.template import java.io.{InputStreamReader, ByteArrayInputStream} import com.github.tototoshi.csv.CSVReader import com.ivan.nikolov.behavioral.template.model.Person import org.json4s.{StringInput, DefaultFormats} import org.json4s.jackson.JsonMethods abstract class DataFinder[T, Y] { def find(f: T => Option[Y]): Option[Y] = try { val data = readData() val parsed = parse(data) f(parsed) } finally { cleanup() } def readData(): Array[Byte] def parse(data: Array[Byte]): T def cleanup() } class JsonDataFinder extends DataFinder[List[Person], Person] { implicit val formats = DefaultFormats override def readData(): Array[Byte] = { val stream = this.getClass.getResourceAsStream("people.json") Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray } override def cleanup(): Unit = { System.out.println("Reading json: nothing to do.") } override def parse(data: Array[Byte]): List[Person] = JsonMethods.parse(StringInput(new String(data, "UTF-8"))).extract[List[Person]] } class CSVDataFinder extends DataFinder[List[Person], Person] { override def readData(): Array[Byte] = { val stream = this.getClass.getResourceAsStream("people.csv") Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray } override def cleanup(): Unit = { System.out.println("Reading csv: nothing to do.") } override def parse(data: Array[Byte]): List[Person] = CSVReader.open(new InputStreamReader(new ByteArrayInputStream(data))).all().map { case List(name, age, address) => Person(name, age.toInt, address) } } object DataFinderExample { def main(args: Array[String]): Unit = { val jsonDataFinder: DataFinder[List[Person], Person] = new JsonDataFinder val csvDataFinder: DataFinder[List[Person], Person] = new CSVDataFinder System.out.println(s"Find a person with name Ivan in the json: ${jsonDataFinder.find(_.find(_.name == "Ivan"))}") System.out.println(s"Find a person with name James in the json: ${jsonDataFinder.find(_.find(_.name == "James"))}") System.out.println(s"Find a person with name Maria in the csv: ${csvDataFinder.find(_.find(_.name == "Maria"))}") System.out.println(s"Find a person with name Alice in the csv: ${csvDataFinder.find(_.find(_.name == "Alice"))}") } }
Example 86
Source File: DataFinder.scala From Scala-Design-Patterns-Second-Edition with MIT License | 5 votes |
package com.ivan.nikolov.behavioral.template import java.io.{InputStreamReader, ByteArrayInputStream} import com.github.tototoshi.csv.CSVReader import com.ivan.nikolov.behavioral.template.model.Person import org.json4s.{StringInput, DefaultFormats} import org.json4s.jackson.JsonMethods abstract class DataFinder[T, Y] { def find(f: T => Option[Y]): Option[Y] = try { val data = readData() val parsed = parse(data) f(parsed) } finally { cleanup() } def readData(): Array[Byte] def parse(data: Array[Byte]): T def cleanup() } class JsonDataFinder extends DataFinder[List[Person], Person] { implicit val formats = DefaultFormats override def readData(): Array[Byte] = { val stream = this.getClass.getResourceAsStream("people.json") Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray } override def cleanup(): Unit = { System.out.println("Reading json: nothing to do.") } override def parse(data: Array[Byte]): List[Person] = JsonMethods.parse(StringInput(new String(data, "UTF-8"))).extract[List[Person]] } class CSVDataFinder extends DataFinder[List[Person], Person] { override def readData(): Array[Byte] = { val stream = this.getClass.getResourceAsStream("people.csv") Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray } override def cleanup(): Unit = { System.out.println("Reading csv: nothing to do.") } override def parse(data: Array[Byte]): List[Person] = CSVReader.open(new InputStreamReader(new ByteArrayInputStream(data))).all().map { case List(name, age, address) => Person(name, age.toInt, address) } } object DataFinderExample { def main(args: Array[String]): Unit = { val jsonDataFinder: DataFinder[List[Person], Person] = new JsonDataFinder val csvDataFinder: DataFinder[List[Person], Person] = new CSVDataFinder System.out.println(s"Find a person with name Ivan in the json: ${jsonDataFinder.find(_.find(_.name == "Ivan"))}") System.out.println(s"Find a person with name James in the json: ${jsonDataFinder.find(_.find(_.name == "James"))}") System.out.println(s"Find a person with name Maria in the csv: ${csvDataFinder.find(_.find(_.name == "Maria"))}") System.out.println(s"Find a person with name Alice in the csv: ${csvDataFinder.find(_.find(_.name == "Alice"))}") } }
Example 87
Source File: DataFinder.scala From Scala-Design-Patterns-Second-Edition with MIT License | 5 votes |
package com.ivan.nikolov.behavioral.template import java.io.{InputStreamReader, ByteArrayInputStream} import com.github.tototoshi.csv.CSVReader import com.ivan.nikolov.behavioral.template.model.Person import org.json4s.{StringInput, DefaultFormats} import org.json4s.jackson.JsonMethods abstract class DataFinder[T, Y] { def find(f: T => Option[Y]): Option[Y] = try { val data = readData() val parsed = parse(data) f(parsed) } finally { cleanup() } def readData(): Array[Byte] def parse(data: Array[Byte]): T def cleanup() } class JsonDataFinder extends DataFinder[List[Person], Person] { implicit val formats = DefaultFormats override def readData(): Array[Byte] = { val stream = this.getClass.getResourceAsStream("people.json") Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray } override def cleanup(): Unit = { System.out.println("Reading json: nothing to do.") } override def parse(data: Array[Byte]): List[Person] = JsonMethods.parse(StringInput(new String(data, "UTF-8"))).extract[List[Person]] } class CSVDataFinder extends DataFinder[List[Person], Person] { override def readData(): Array[Byte] = { val stream = this.getClass.getResourceAsStream("people.csv") Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray } override def cleanup(): Unit = { System.out.println("Reading csv: nothing to do.") } override def parse(data: Array[Byte]): List[Person] = CSVReader.open(new InputStreamReader(new ByteArrayInputStream(data))).all().map { case List(name, age, address) => Person(name, age.toInt, address) } } object DataFinderExample { def main(args: Array[String]): Unit = { val jsonDataFinder: DataFinder[List[Person], Person] = new JsonDataFinder val csvDataFinder: DataFinder[List[Person], Person] = new CSVDataFinder System.out.println(s"Find a person with name Ivan in the json: ${jsonDataFinder.find(_.find(_.name == "Ivan"))}") System.out.println(s"Find a person with name James in the json: ${jsonDataFinder.find(_.find(_.name == "James"))}") System.out.println(s"Find a person with name Maria in the csv: ${csvDataFinder.find(_.find(_.name == "Maria"))}") System.out.println(s"Find a person with name Alice in the csv: ${csvDataFinder.find(_.find(_.name == "Alice"))}") } }
Example 88
Source File: DataFinder.scala From Scala-Design-Patterns-Second-Edition with MIT License | 5 votes |
package com.ivan.nikolov.behavioral.template import java.io.{InputStreamReader, ByteArrayInputStream} import com.github.tototoshi.csv.CSVReader import com.ivan.nikolov.behavioral.template.model.Person import org.json4s.{StringInput, DefaultFormats} import org.json4s.jackson.JsonMethods abstract class DataFinder[T, Y] { def find(f: T => Option[Y]): Option[Y] = try { val data = readData() val parsed = parse(data) f(parsed) } finally { cleanup() } def readData(): Array[Byte] def parse(data: Array[Byte]): T def cleanup() } class JsonDataFinder extends DataFinder[List[Person], Person] { implicit val formats = DefaultFormats override def readData(): Array[Byte] = { val stream = this.getClass.getResourceAsStream("people.json") Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray } override def cleanup(): Unit = { System.out.println("Reading json: nothing to do.") } override def parse(data: Array[Byte]): List[Person] = JsonMethods.parse(StringInput(new String(data, "UTF-8"))).extract[List[Person]] } class CSVDataFinder extends DataFinder[List[Person], Person] { override def readData(): Array[Byte] = { val stream = this.getClass.getResourceAsStream("people.csv") Stream.continually(stream.read).takeWhile(_ != -1).map(_.toByte).toArray } override def cleanup(): Unit = { System.out.println("Reading csv: nothing to do.") } override def parse(data: Array[Byte]): List[Person] = CSVReader.open(new InputStreamReader(new ByteArrayInputStream(data))).all().map { case List(name, age, address) => Person(name, age.toInt, address) } } object DataFinderExample { def main(args: Array[String]): Unit = { val jsonDataFinder: DataFinder[List[Person], Person] = new JsonDataFinder val csvDataFinder: DataFinder[List[Person], Person] = new CSVDataFinder System.out.println(s"Find a person with name Ivan in the json: ${jsonDataFinder.find(_.find(_.name == "Ivan"))}") System.out.println(s"Find a person with name James in the json: ${jsonDataFinder.find(_.find(_.name == "James"))}") System.out.println(s"Find a person with name Maria in the csv: ${csvDataFinder.find(_.find(_.name == "Maria"))}") System.out.println(s"Find a person with name Alice in the csv: ${csvDataFinder.find(_.find(_.name == "Alice"))}") } }
Example 89
Source File: FileInputImpl.scala From chatoverflow with Eclipse Public License 2.0 | 5 votes |
package org.codeoverflow.chatoverflow.requirement.service.file.impl import java.awt.image.BufferedImage import java.io.ByteArrayInputStream import java.util.Optional import javax.imageio.ImageIO import org.codeoverflow.chatoverflow.WithLogger import org.codeoverflow.chatoverflow.api.io.input.FileInput import org.codeoverflow.chatoverflow.registry.Impl import org.codeoverflow.chatoverflow.requirement.impl.InputImpl import org.codeoverflow.chatoverflow.requirement.service.file.FileConnector @Impl(impl = classOf[FileInput], connector = classOf[FileConnector]) class FileInputImpl extends InputImpl[FileConnector] with FileInput with WithLogger { override def getFile(pathInResources: String): Optional[String] = Optional.ofNullable(sourceConnector.get.getFile(pathInResources).orNull) override def getBinaryFile(pathInResources: String): Optional[Array[Byte]] = Optional.ofNullable(sourceConnector.get.getBinaryFile(pathInResources).orNull) override def getImage(pathInResources: String): Optional[BufferedImage] = { val data = sourceConnector.get.getBinaryFile(pathInResources) if (data.isEmpty) { None } val bis = new ByteArrayInputStream(data.get) Optional.of(ImageIO.read(bis)) } override def start(): Boolean = true override def stop(): Boolean = true }
Example 90
Source File: VerifyingSpec.scala From jsoniter-scala with MIT License | 5 votes |
package com.github.plokhotnyuk.jsoniter_scala.macros import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import java.nio.charset.StandardCharsets.UTF_8 import com.github.plokhotnyuk.jsoniter_scala.core._ import org.scalatest.wordspec.AnyWordSpec import org.scalatest.matchers.should.Matchers class VerifyingSpec extends AnyWordSpec with Matchers { def verifySerDeser[T](codec: JsonValueCodec[T], obj: T, json: String, cfg: WriterConfig = WriterConfig): Unit = { verifySer(codec, obj, json, cfg) verifyDeser(codec, obj, json) } def verifySer[T](codec: JsonValueCodec[T], obj: T, json: String, cfg: WriterConfig = WriterConfig): Unit = { val len = json.getBytes(UTF_8).length verifyDirectByteBufferSer(codec, obj, len, cfg, json) verifyHeapByteBufferSer(codec, obj, len, cfg, json) verifyOutputStreamSer(codec, obj, cfg, json) verifyArraySer(codec, obj, cfg, json) } def verifyDeser[T](codec: JsonValueCodec[T], obj: T, json: String): Unit = verifyDeserByCheck[T](codec, json, check = (_: T) shouldBe obj) def verifyDeserByCheck[T](codec: JsonValueCodec[T], json: String, check: T => Unit): Unit = { val jsonBytes = json.getBytes(UTF_8) verifyDirectByteBufferDeser(codec, jsonBytes, check) verifyHeapByteBufferDeser(codec, jsonBytes, check) verifyInputStreamDeser(codec, jsonBytes, check) verifyByteArrayDeser(codec, jsonBytes, check) } def verifyDeserError[T](codec: JsonValueCodec[T], json: String, msg: String): Unit = verifyDeserError(codec, json.getBytes(UTF_8), msg) def verifyDeserError[T](codec: JsonValueCodec[T], jsonBytes: Array[Byte], msg: String): Unit = { assert(intercept[JsonReaderException](verifyDirectByteBufferDeser(codec, jsonBytes, (_: T) => ())) .getMessage.contains(msg)) assert(intercept[JsonReaderException](verifyHeapByteBufferDeser(codec, jsonBytes, (_: T) => ())) .getMessage.contains(msg)) assert(intercept[JsonReaderException](verifyInputStreamDeser(codec, jsonBytes, (_: T) => ())) .getMessage.contains(msg)) assert(intercept[JsonReaderException](verifyByteArrayDeser(codec, jsonBytes, (_: T) => ())) .getMessage.contains(msg)) } def verifyDirectByteBufferSer[T](codec: JsonValueCodec[T], obj: T, len: Int, cfg: WriterConfig, expected: String): Unit = { val directBuf = ByteBuffer.allocateDirect(len + 100) directBuf.position(0) writeToByteBuffer(obj, directBuf, cfg)(codec) directBuf.position(0) val buf = new Array[Byte](len) directBuf.get(buf) toString(buf) shouldBe expected } def verifyHeapByteBufferSer[T](codec: JsonValueCodec[T], obj: T, len: Int, cfg: WriterConfig, expected: String): Unit = { val heapBuf = ByteBuffer.wrap(new Array[Byte](len + 100)) heapBuf.position(0) writeToByteBuffer(obj, heapBuf, cfg)(codec) heapBuf.position(0) val buf = new Array[Byte](len) heapBuf.get(buf) toString(buf) shouldBe expected } def verifyOutputStreamSer[T](codec: JsonValueCodec[T], obj: T, cfg: WriterConfig, expected: String): Unit = { val baos = new ByteArrayOutputStream writeToStream(obj, baos, cfg)(codec) toString(baos.toByteArray) shouldBe expected } def verifyArraySer[T](codec: JsonValueCodec[T], obj: T, cfg: WriterConfig, expected: String): Unit = toString(writeToArray(obj, cfg)(codec)) shouldBe expected def verifyDirectByteBufferDeser[T](codec: JsonValueCodec[T], json: Array[Byte], check: T => Unit): Unit = { val directBuf = ByteBuffer.allocateDirect(json.length) directBuf.put(json) directBuf.position(0) check(readFromByteBuffer(directBuf)(codec)) } def verifyHeapByteBufferDeser[T](codec: JsonValueCodec[T], json: Array[Byte], check: T => Unit): Unit = check(readFromByteBuffer(ByteBuffer.wrap(json))(codec)) def verifyInputStreamDeser[T](codec: JsonValueCodec[T], json: Array[Byte], check: T => Unit): Unit = check(readFromStream(new ByteArrayInputStream(json))(codec)) def verifyByteArrayDeser[T](codec: JsonValueCodec[T], json: Array[Byte], check: T => Unit): Unit = check(readFromArray(json)(codec)) def toString(json: Array[Byte]): String = new String(json, 0, json.length, UTF_8) }
Example 91
Source File: Release.scala From ionroller with MIT License | 5 votes |
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File} import java.nio.file.{Files, Paths} import com.amazonaws.services.s3.model._ import com.amazonaws.services.s3.transfer.Transfer.TransferState import com.amazonaws.services.s3.transfer.TransferManager import com.amazonaws.util.IOUtils import sbt._ import scalaz.concurrent.Task object Release { lazy val releaseCli = taskKey[Unit]("Releases ION-Roller CLI") def release(ver: String, zip: File, install: File) = { val files = Seq( (install.getName, replaceVersionAndReadBytes(ver, install), "text/plain"), (zip.getName, readBytes(zip), "application/zip")) val tx = new TransferManager val tasks = for { f <- files } yield uploadFile(tx, f._1, f._2, f._3) val t = for { results <- Task.gatherUnordered(tasks) finalResult = if (results.forall(_ == TransferState.Completed)) TransferState.Completed else TransferState.Failed printTask <- Task.delay(println(finalResult)) } yield printTask t.run } def uploadFile(tx: TransferManager, name: String, getBytes: Task[Array[Byte]], contentType: String): Task[TransferState] = { for { bytes <- getBytes meta <- metadata(bytes, contentType) transferState <- upload(tx, bytes, name, meta) } yield transferState } def metadata(bytes: Array[Byte], contentType: String): Task[ObjectMetadata] = { Task.delay({ val out = new ByteArrayOutputStream out.write(bytes) val metadata = new ObjectMetadata metadata.setContentType(contentType) val contentBytes = IOUtils.toByteArray(new ByteArrayInputStream(out.toByteArray)).length.toLong // we need to call new ByteArrayInputStream again, as checking the length reads the stream metadata.setContentLength(contentBytes) metadata }) } def upload(tx: TransferManager, in: Array[Byte], name: String, meta: ObjectMetadata): Task[TransferState] = { Task.delay({ println(s"Uploading $name...") val upload = tx.upload( new PutObjectRequest("ionroller-cli", name, new ByteArrayInputStream(in), meta) .withCannedAcl(CannedAccessControlList.PublicRead) ) while (!upload.isDone) { Thread.sleep(2000) println(upload.getProgress.getPercentTransferred.toInt + "%") } upload.getState }) } def replaceVersionAndReadBytes(ver: String, file: File): Task[Array[Byte]] = { Task.delay({ scala.io.Source.fromFile(file).getLines() .map(in => if (in startsWith "VERSION=") s"VERSION=$ver" else in) .mkString("\n") .getBytes .toSeq .toArray }) } def readBytes(file: File): Task[Array[Byte]] = Task.delay({ Files.readAllBytes(Paths.get(file.getAbsolutePath)) }) }
Example 92
Source File: TemplateSpec.scala From cluster-broccoli with Apache License 2.0 | 5 votes |
package de.frosner.broccoli.models import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} import org.specs2.mutable.Specification import play.api.libs.json.Json import Template.{templateApiWrites, templatePersistenceReads} class TemplateSpec extends Specification { "A template" should { "extract only parameters specified in the parameters" in { Template("test", "Hallo {{id}}. I like {{person_name}}.", "desc", Map("id" -> ParameterInfo("id", None, None, None, ParameterType.Raw, None))).parameters === Set("id") } "not automatically extract parameters from a template" in { Template("test", "Hallo {{id}}, how is {{object}}", "desc", Map.empty).parameters === Set.empty } "create the template version correctly in" in { Template("test", "template JSON", "desc", Map.empty).version === "889df4c8118c30a28ed4f51674a0f19d" } "result in different template versions if the template JSON differs" in { Template("test", "template JSON", "desc", Map.empty).version !== Template("test", "template JSONs", "desc", Map.empty).version } "result in different template versions if the template parameter info differs" in { Template( id = "test", template = "template JSON {{id}}", description = "desc", parameterInfos = Map.empty ).version !== Template( id = "test", template = "template JSON {{id}}", description = "desc", parameterInfos = Map( "id" -> ParameterInfo("id", None, None, secret = Some(false), `type` = ParameterType.String, orderIndex = None) ) ).version } } "Template serialization" should { "work correctly" in { val originalTemplate = Template("test", "Hallo {{name}}", "desc", Map.empty) val bos = new ByteArrayOutputStream() val oos = new ObjectOutputStream(bos) oos.writeObject(originalTemplate) oos.close() val ois = new ObjectInputStream(new ByteArrayInputStream(bos.toByteArray)) val deserializedTemplate = ois.readObject() ois.close() originalTemplate === deserializedTemplate } } "Template back-end JSON serialization" should { "work" in { val template = Template( id = "t", template = "{{id}}", description = "d", parameterInfos = Map.empty ) Json .fromJson(Json.toJson(template)(Template.templatePersistenceWrites))(Template.templatePersistenceReads) .get === template } } }
Example 93
Source File: JavaSerializationBenchmark.scala From scala-commons with MIT License | 5 votes |
package com.avsystem.commons package rpc.akka.serialization import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} import org.openjdk.jmh.annotations.{Benchmark, BenchmarkMode, Fork, Measurement, Mode, Scope, State, Warmup} import org.openjdk.jmh.infra.Blackhole @Warmup(iterations = 5) @Measurement(iterations = 20) @Fork(1) @BenchmarkMode(Array(Mode.Throughput)) @State(Scope.Thread) class JavaSerializationBenchmark { val something = Something(42, Nested(4 :: 8 :: 15 :: 16 :: 23 :: 42 :: Nil, 0), "lol") val array = { val baos = new ByteArrayOutputStream() val o = new ObjectOutputStream(baos) o.writeObject(something) o.close() baos.toByteArray } @Benchmark def byteStringOutput(): Something = { val baos = new ByteArrayOutputStream() val o = new ObjectOutputStream(baos) o.writeObject(something) o.close() val array = baos.toByteArray new ObjectInputStream(new ByteArrayInputStream(array)).readObject().asInstanceOf[Something] } @Benchmark def writeTest(): Array[Byte] = { val baos = new ByteArrayOutputStream() val o = new ObjectOutputStream(baos) o.writeObject(something) o.close() baos.toByteArray } @Benchmark def readTest(): Something = { new ObjectInputStream(new ByteArrayInputStream(array)).readObject().asInstanceOf[Something] } }
Example 94
Source File: StreamInputOutputBenchmark.scala From scala-commons with MIT License | 5 votes |
package com.avsystem.commons package ser import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream} import com.avsystem.commons.serialization.{GenCodec, StreamInput, StreamOutput} import org.openjdk.jmh.annotations.{Benchmark, BenchmarkMode, Fork, Measurement, Mode, Scope, State, Warmup} import org.openjdk.jmh.infra.Blackhole case class Toplevel(int: Int, nested: Nested, str: String) case class Nested(list: List[Int], int: Int) object Toplevel { implicit val nestedCodec: GenCodec[Nested] = GenCodec.materialize[Nested] implicit val codec: GenCodec[Toplevel] = GenCodec.materialize[Toplevel] } @Warmup(iterations = 10) @Measurement(iterations = 20) @Fork(1) @BenchmarkMode(Array(Mode.Throughput)) @State(Scope.Thread) class StreamInputOutputBenchmark { val something = Toplevel(35, Nested(List(121, 122, 123, 124, 125, 126), 53), "lol") val inputArray: Array[Byte] = { val os = new ByteArrayOutputStream() GenCodec.write(new StreamOutput(new DataOutputStream(os)), something) os.toByteArray } @Benchmark def testEncode(bh: Blackhole): Unit = { val os = new ByteArrayOutputStream(inputArray.length) val output = new StreamOutput(new DataOutputStream(os)) GenCodec.write(output, something) bh.consume(os.toByteArray) } @Benchmark def testDecode(bh: Blackhole): Unit = { val is = new DataInputStream(new ByteArrayInputStream(inputArray)) val input = new StreamInput(is) bh.consume(GenCodec.read[Toplevel](input)) } @Benchmark def testEncodeRaw(bh: Blackhole): Unit = { val os = new ByteArrayOutputStream(inputArray.length) val output = new StreamOutput(new DataOutputStream(os)) val toplevelOutput = output.writeObject() toplevelOutput.writeField("int").writeSimple().writeInt(35) val nestedOutput = toplevelOutput.writeField("nested").writeObject() val listOutput = nestedOutput.writeField("list").writeList() listOutput.writeElement().writeSimple().writeInt(121) listOutput.writeElement().writeSimple().writeInt(122) listOutput.writeElement().writeSimple().writeInt(123) listOutput.writeElement().writeSimple().writeInt(124) listOutput.writeElement().writeSimple().writeInt(125) listOutput.writeElement().writeSimple().writeInt(126) listOutput.finish() nestedOutput.writeField("int").writeSimple().writeInt(53) nestedOutput.finish() toplevelOutput.writeField("str").writeSimple().writeString("lol") toplevelOutput.finish() bh.consume(os.toByteArray) } @Benchmark def testDecodeRaw(bh: Blackhole): Unit = { val is = new DataInputStream(new ByteArrayInputStream(inputArray)) val input = new StreamInput(is) val objInput = input.readObject() val intField = objInput.nextField().readSimple().readInt() val nestedInput = objInput.nextField().readObject() val listInput = nestedInput.nextField().readList() val listNested = List( listInput.nextElement().readSimple().readInt(), listInput.nextElement().readSimple().readInt(), listInput.nextElement().readSimple().readInt(), listInput.nextElement().readSimple().readInt(), listInput.nextElement().readSimple().readInt(), listInput.nextElement().readSimple().readInt() ) listInput.hasNext val intNested = nestedInput.nextField().readSimple().readInt() nestedInput.hasNext val strField = objInput.nextField().readSimple().readString() objInput.hasNext bh.consume(Toplevel(intField, Nested(listNested, intNested), strField)) } }
Example 95
Source File: StreamGenCodecTest.scala From scala-commons with MIT License | 5 votes |
package com.avsystem.commons package serialization import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream} class StreamGenCodecTest extends GenCodecRoundtripTest { type Raw = Array[Byte] def writeToOutput(write: Output => Unit): Array[Byte] = { val baos = new ByteArrayOutputStream write(new StreamOutput(new DataOutputStream(baos))) baos.toByteArray } def createInput(raw: Array[Byte]): Input = new StreamInput(new DataInputStream(new ByteArrayInputStream(raw))) }
Example 96
Source File: MessageSerializationSuite.scala From bahir with Apache License 2.0 | 5 votes |
package org.apache.spark.streaming.pubnub import java.io.ByteArrayInputStream import java.io.ByteArrayOutputStream import java.io.ObjectInputStream import java.io.ObjectOutputStream import com.google.gson.JsonParser import com.pubnub.api.models.consumer.pubsub.PNMessageResult import org.apache.spark.SparkFunSuite class MessageSerializationSuite extends SparkFunSuite { test("Full example") { checkMessageSerialization( "{\"message\":\"Hello, World!\"}", "channel1", "publisher1", "subscription1", System.currentTimeMillis * 10000 ) } test("Message from channel") { checkMessageSerialization("{\"message\":\"Hello, World!\"}", "c", "p", null, 13534398158620385L) } test("Message from subscription") { checkMessageSerialization("{\"message\":\"Hello, World!\"}", null, "p", "s", 13534397812467596L) } def checkMessageSerialization(payload: String, channel: String, publisher: String, subscription: String, timestamp: Long): Unit = { val builder = PNMessageResult.builder .message(if (payload != null) new JsonParser().parse(payload) else null) .channel(channel) .publisher(publisher) .subscription(subscription) .timetoken(timestamp) val pubNubMessage = builder.build() val sparkMessage = new SparkPubNubMessage sparkMessage.message = pubNubMessage // serializer val byteOutStream = new ByteArrayOutputStream val outputStream = new ObjectOutputStream(byteOutStream) outputStream.writeObject(sparkMessage) outputStream.flush() outputStream.close() byteOutStream.close() val serializedBytes = byteOutStream.toByteArray // deserialize val byteInStream = new ByteArrayInputStream(serializedBytes) val inputStream = new ObjectInputStream(byteInStream) val deserializedMessage = inputStream.readObject().asInstanceOf[SparkPubNubMessage] inputStream.close() byteInStream.close() assert(payload.equals(deserializedMessage.getPayload)) if (channel != null) { assert(channel.equals(deserializedMessage.getChannel)) } else { assert(deserializedMessage.getChannel == null) } if (subscription != null) { assert(subscription.equals(deserializedMessage.getSubscription)) } else { assert(deserializedMessage.getSubscription == null) } assert(publisher.equals(deserializedMessage.getPublisher)) val unixTimestamp = Math.ceil(timestamp / 10000).longValue() assert(unixTimestamp.equals(deserializedMessage.getTimestamp)) } }
Example 97
Source File: TDMLInfosetOutputter.scala From incubator-daffodil with Apache License 2.0 | 5 votes |
package org.apache.daffodil.tdml import java.io.ByteArrayInputStream import java.io.ByteArrayOutputStream import org.apache.daffodil.infoset.DIArray import org.apache.daffodil.infoset.DIComplex import org.apache.daffodil.infoset.DISimple import org.apache.daffodil.infoset.InfosetOutputter import org.apache.daffodil.infoset.JDOMInfosetInputter import org.apache.daffodil.infoset.JDOMInfosetOutputter import org.apache.daffodil.infoset.JsonInfosetInputter import org.apache.daffodil.infoset.JsonInfosetOutputter import org.apache.daffodil.infoset.ScalaXMLInfosetInputter import org.apache.daffodil.infoset.ScalaXMLInfosetOutputter import org.apache.daffodil.infoset.W3CDOMInfosetInputter import org.apache.daffodil.infoset.W3CDOMInfosetOutputter import org.apache.daffodil.infoset.XMLTextInfosetInputter import org.apache.daffodil.infoset.XMLTextInfosetOutputter class TDMLInfosetOutputter() extends InfosetOutputter { private def implString: String = "daffodil" private val jsonStream = new ByteArrayOutputStream() private val xmlStream = new ByteArrayOutputStream() private val scalaOut = new ScalaXMLInfosetOutputter() private val jdomOut = new JDOMInfosetOutputter() private val w3cdomOut = new W3CDOMInfosetOutputter() private val jsonOut = new JsonInfosetOutputter(jsonStream, false) private val xmlOut = new XMLTextInfosetOutputter(xmlStream, false) private val outputters = Seq(xmlOut, scalaOut, jdomOut, w3cdomOut, jsonOut) override def reset(): Unit = { outputters.foreach(_.reset()) } override def startSimple(simple: DISimple): Boolean = { if (!outputters.forall(_.startSimple(simple))) throw TDMLException("startSimple failed", Some(implString)) true } override def endSimple(simple: DISimple): Boolean = { if (!outputters.forall(_.endSimple(simple))) throw TDMLException("endSimple failed", Some(implString)) true } override def startComplex(complex: DIComplex): Boolean = { if (!outputters.forall(_.startComplex(complex))) throw TDMLException("startComplex failed", Some(implString)) true } override def endComplex(complex: DIComplex): Boolean = { if (!outputters.forall(_.endComplex(complex))) throw TDMLException("endComplex failed", Some(implString)) true } override def startArray(array: DIArray): Boolean = { if (!outputters.forall(_.startArray(array))) throw TDMLException("startArray failed", Some(implString)) true } override def endArray(array: DIArray): Boolean = { if (!outputters.forall(_.endArray(array))) throw TDMLException("endArray failed", Some(implString)) true } override def startDocument(): Boolean = { if (!outputters.forall(_.startDocument())) throw TDMLException("startDocument failed", Some(implString)) true } override def endDocument(): Boolean = { if (!outputters.forall(_.endDocument())) throw TDMLException("endDocument failed", Some(implString)) true } def getResult() = scalaOut.getResult def toInfosetInputter() = { val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult) val jdomIn = new JDOMInfosetInputter(jdomOut.getResult) val w3cdomIn = new W3CDOMInfosetInputter(w3cdomOut.getResult) val jsonIn = new JsonInfosetInputter(new ByteArrayInputStream(jsonStream.toByteArray)) val xmlIn = new XMLTextInfosetInputter(new ByteArrayInputStream(xmlStream.toByteArray)) new TDMLInfosetInputter(scalaIn, Seq(jdomIn, w3cdomIn, jsonIn, xmlIn)) } }
Example 98
Source File: Implicits.scala From incubator-daffodil with Apache License 2.0 | 5 votes |
package org.apache.daffodil import java.io.{ ByteArrayInputStream, BufferedInputStream } import org.apache.daffodil.xml.NS import org.apache.daffodil.exceptions.Assert import scala.language.reflectiveCalls import scala.language.implicitConversions import scala.language.{ implicitConversions, reflectiveCalls } // silences scala 2.10 warnings object Implicits { object ImplicitsSuppressUnusedImportWarning { def apply() = if (scala.math.random.isNaN()) Assert.impossible() } def intercept[T <: AnyRef](body: => Any)(implicit tag: scala.reflect.ClassTag[T]): T = { val clazz = tag.runtimeClass.asInstanceOf[Class[T]] val caught = try { body None } catch { case npe: NullPointerException => throw npe case s: scala.util.control.ControlThrowable => throw s case u: Throwable => { if (!clazz.isAssignableFrom(u.getClass)) { throw new InterceptFailedException( "Failed to intercept expected exception. Expected '%s' but got '%s'.".format(clazz.getName, u.getClass.getName)) } else { Some(u) } } } caught match { case None => throw new InterceptFailedException("Failed to intercept any exceptions.") case Some(e) => e.asInstanceOf[T] } } class InterceptFailedException(msg: String) extends RuntimeException(msg) }
Example 99
Source File: TestSerializationAndLazy.scala From incubator-daffodil with Apache License 2.0 | 5 votes |
package org.apache.daffodil.util import org.junit.Assert._ import java.io.ByteArrayOutputStream import java.io.ObjectOutputStream import java.io.ByteArrayInputStream import java.io.ObjectInputStream import org.junit.Test class ToSerialize extends Serializable { val v = 5 var lazyValWasEvaluated = false lazy val x = { // println("v is " + v) lazyValWasEvaluated = true 2 * v } } class TestSerializationAndLazy { @Test def testSerializeBeforeLazyEval(): Unit = { val instance = new ToSerialize val baos = new ByteArrayOutputStream val stream = new ObjectOutputStream(baos) stream.writeObject(instance) stream.flush() stream.close() assertFalse(instance.lazyValWasEvaluated) val ba = baos.toByteArray() val bais = new ByteArrayInputStream(ba) val istream = new ObjectInputStream(bais) val restoredInstance = istream.readObject() istream.close() assertTrue(restoredInstance.isInstanceOf[ToSerialize]) val ts = restoredInstance.asInstanceOf[ToSerialize] assertFalse(ts.lazyValWasEvaluated) ts.x assertTrue(ts.lazyValWasEvaluated) } }
Example 100
Source File: TarFlowSpec.scala From nexus with Apache License 2.0 | 5 votes |
package ch.epfl.bluebrain.nexus.storage import java.io.ByteArrayInputStream import java.nio.file.{Files, Path, Paths} import akka.actor.ActorSystem import akka.stream.alpakka.file.scaladsl.Directory import akka.stream.scaladsl.{FileIO, Source} import akka.testkit.TestKit import akka.util.ByteString import ch.epfl.bluebrain.nexus.storage.utils.{EitherValues, IOEitherValues, Randomness} import org.apache.commons.compress.archivers.tar.TarArchiveInputStream import org.apache.commons.io.FileUtils import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpecLike import org.scalatest.{BeforeAndAfterAll, Inspectors, OptionValues} import scala.annotation.tailrec class TarFlowSpec extends TestKit(ActorSystem("TarFlowSpec")) with AnyWordSpecLike with Matchers with IOEitherValues with Randomness with EitherValues with OptionValues with Inspectors with BeforeAndAfterAll { val basePath = Files.createTempDirectory("tarflow") val dir1 = basePath.resolve("one") val dir2 = basePath.resolve("two") override def afterAll(): Unit = { super.afterAll() FileUtils.cleanDirectory(basePath.toFile) () } type PathAndContent = (Path, String) "A TarFlow" should { Files.createDirectories(dir1) Files.createDirectories(dir2) def relativize(path: Path): String = basePath.getParent().relativize(path).toString "generate the byteString for a tar file correctly" in { val file1 = dir1.resolve("file1.txt") val file1Content = genString() val file2 = dir1.resolve("file3.txt") val file2Content = genString() val file3 = dir2.resolve("file3.txt") val file3Content = genString() val files = List(file1 -> file1Content, file2 -> file2Content, file3 -> file3Content) forAll(files) { case (file, content) => Source.single(ByteString(content)).runWith(FileIO.toPath(file)).futureValue } val byteString = Directory.walk(basePath).via(TarFlow.writer(basePath)).runReduce(_ ++ _).futureValue val bytes = new ByteArrayInputStream(byteString.toArray) val tar = new TarArchiveInputStream(bytes) @tailrec def readEntries( tar: TarArchiveInputStream, entries: List[PathAndContent] = Nil ): List[PathAndContent] = { val entry = tar.getNextTarEntry if (entry == null) entries else { val data = Array.ofDim[Byte](entry.getSize.toInt) tar.read(data) readEntries(tar, (Paths.get(entry.getName) -> ByteString(data).utf8String) :: entries) } } val directories = List(relativize(basePath) -> "", relativize(dir1) -> "", relativize(dir2) -> "") val untarred = readEntries(tar).map { case (path, content) => path.toString -> content } val expected = files.map { case (path, content) => relativize(path) -> content } ++ directories untarred should contain theSameElementsAs expected } } }
Example 101
Source File: ProcessBuilderUtils.scala From scalastringcourseday7 with Apache License 2.0 | 5 votes |
package util import java.io.ByteArrayInputStream import java.nio.charset.{Charset, CodingErrorAction} import text.StringOption import scala.collection.mutable.ListBuffer import scala.io.{Codec, Source} import scala.sys.process.ProcessBuilder object ProcessBuilderUtils { implicit def processToProcessUtils(repr: ProcessBuilder): ProcessBuilderUtils = { new ProcessBuilderUtils(repr) } } class ProcessBuilderUtils(repr: ProcessBuilder) { def lineStream(encoding: Charset, onMalformedInput: CodingErrorAction, onUnmappableCharacter: CodingErrorAction, replacementOpt: StringOption): Iterator[String] = { val lines: Iterator[String] = repr.lineStream_!.iterator val byteBuffer = ListBuffer.empty[Byte] while (lines.hasNext) { val line: String = lines.next.trim concat "\n" byteBuffer ++= line.getBytes } implicit val codec = Codec(encoding). onMalformedInput(onMalformedInput). onUnmappableCharacter(onUnmappableCharacter) if (replacementOpt.nonEmpty) { codec.decodingReplaceWith(replacementOpt.get) } Source.fromInputStream(new ByteArrayInputStream(byteBuffer.toArray)).getLines } }
Example 102
Source File: H2OLoader.scala From ForestFlow with Apache License 2.0 | 5 votes |
package ai.forestflow.serving.impl import java.io.{ByteArrayInputStream, FileReader} import java.nio.file.Paths import ai.forestflow.serving.MLFlow.H2OMLFlowSpec import ai.forestflow.serving.interfaces.Loader import cats.syntax.either._ import ai.forestflow.domain.{FQRV, FlavorShim, ServableSettings} import ai.forestflow.serving.MLFlow.H2OMLFlowSpec import ai.forestflow.serving.interfaces.Loader import hex.genmodel.MojoReaderBackendFactory import hex.genmodel.MojoReaderBackendFactory.CachingStrategy import io.circe.{Error, yaml} trait H2OLoader extends Loader { def version: Option[String] override def createServable(servableBinary: Array[Byte], fqrv: FQRV, settings: ServableSettings)(implicit eCTX: EnvironmentContext): H2OServable = { import hex.genmodel.MojoModel val mojoReader = MojoReaderBackendFactory.createReaderBackend( new ByteArrayInputStream(servableBinary), CachingStrategy.MEMORY) H2OServable(MojoModel.load(mojoReader), fqrv, settings) } } case class MLFlowH2OLoader(dataPath: String, version: Option[String]) extends H2OLoader { override def getRelativeServablePath(implicit eCTX: EnvironmentContext): String = { val json = yaml.parser.parse(new FileReader(Paths.get(eCTX.localDir.getAbsolutePath, dataPath, "h2o.yaml").toFile)) // TODO move "h2o.yaml" constant to configuration val h2oSpec = json .leftMap(err => err: Error) .flatMap(_.as[H2OMLFlowSpec]) .valueOr(throw _) Paths.get(dataPath, h2oSpec.modelFile).toString } } trait BasicH2OMojoLoader extends H2OLoader { this : FlavorShim with Loader => val mojoPath: String val version: Option[String] override def getRelativeServablePath(implicit eCTX: EnvironmentContext): String = mojoPath }
Example 103
Source File: ParseTests.scala From coursier with Apache License 2.0 | 5 votes |
package coursier.benchmark import java.io.ByteArrayInputStream import java.nio.charset.StandardCharsets import java.util.concurrent.TimeUnit import coursier.maven.MavenRepository import coursier.moduleString import org.apache.maven.model.io.xpp3.MavenXpp3Reader import org.openjdk.jmh.annotations._ import scala.concurrent.Await import scala.concurrent.duration.Duration @BenchmarkMode(Array(Mode.AverageTime)) @OutputTimeUnit(TimeUnit.MILLISECONDS) class ParseTests { @Benchmark def parseSparkParent(state: TestState): Unit = { val t = state.repositories.head.find( mod"org.apache.spark:spark-parent_2.12", "2.4.0", state.fetcher ).run val e = Await.result(t.future()(state.ec), Duration.Inf) assert(e.isRight) } @Benchmark def parseSparkParentXmlDom(state: TestState): Unit = { val content = state.inMemoryCache.fromCache("https://repo1.maven.org/maven2/org/apache/spark/spark-parent_2.12/2.4.0/spark-parent_2.12-2.4.0.pom") val res = MavenRepository.parseRawPomDom(content) assert(res.isRight) } @Benchmark def parseSparkParentXmlSax(state: TestState): Unit = { val content = state.inMemoryCache.fromCache("https://repo1.maven.org/maven2/org/apache/spark/spark-parent_2.12/2.4.0/spark-parent_2.12-2.4.0.pom") val res = MavenRepository.parseRawPomSax(content) assert(res.isRight) } @Benchmark def parseApacheParent(state: TestState): Unit = { val t = state.repositories.head.find( mod"org.apache:apache", "18", state.fetcher ).run val e = Await.result(t.future()(state.ec), Duration.Inf) assert(e.isRight) } @Benchmark def parseSparkParentMavenModel(state: TestState): Unit = { val b = state .inMemoryCache .fromCache("https://repo1.maven.org/maven2/org/apache/spark/spark-parent_2.12/2.4.0/spark-parent_2.12-2.4.0.pom") .getBytes(StandardCharsets.UTF_8) val reader = new MavenXpp3Reader val model = reader.read(new ByteArrayInputStream(b)) } }
Example 104
Source File: ZipTests.scala From coursier with Apache License 2.0 | 5 votes |
package coursier.cli.util import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.util.Random import java.util.zip.{Deflater, ZipEntry, ZipInputStream, ZipOutputStream} import coursier.launcher.internal.Zip import org.junit.runner.RunWith import org.scalatest.flatspec.AnyFlatSpec import org.scalatestplus.junit.JUnitRunner @RunWith(classOf[JUnitRunner]) class ZipTests extends AnyFlatSpec { "zipEntries" should "be fine with custom deflaters" in { // Inspired by https://github.com/spring-projects/spring-boot/commit/a50646b7cc3ad941e748dfb450077e3a73706205#diff-2297c301250b25e3b80301c58daf3ea0R621 val baos = new ByteArrayOutputStream val output = new ZipOutputStream(baos) { `def` = new Deflater(Deflater.NO_COMPRESSION, true) } val data = Array.ofDim[Byte](1024 * 1024) new Random().nextBytes(data) val entry = new ZipEntry("entry.dat") output.putNextEntry(entry) output.write(data) output.closeEntry() output.close() val result = baos.toByteArray val zos = new ZipOutputStream(new ByteArrayOutputStream) val entryNames = Zip.zipEntries(new ZipInputStream(new ByteArrayInputStream(result))) .map { case (ent, content) => println(ent.getCompressedSize) val name = ent.getName zos.putNextEntry(ent) zos.write(content) zos.closeEntry() name } .toVector zos.close() assert(entryNames == Vector("entry.dat")) } }
Example 105
Source File: VectorGrid.scala From osmesa with Apache License 2.0 | 5 votes |
package osmesa.analytics import java.io.ByteArrayInputStream import java.net.URI import java.util.zip.GZIPInputStream import geotrellis.proj4.WebMercator import geotrellis.spark.tiling.ZoomedLayoutScheme import geotrellis.vector.{Extent, PointFeature} import geotrellis.vectortile.{Layer, VInt64, VectorTile} import org.apache.commons.io.IOUtils import org.apache.spark.internal.Logging import osmesa.analytics.updater.Implicits._ import osmesa.analytics.updater._ import scala.collection.GenMap import scala.collection.parallel.TaskSupport trait VectorGrid extends Logging { // Default base zoom (highest resolution tiles produced) val DefaultBaseZoom: Int = 10 // Number of cells per side in a gridded tile implicit val Cells: Int = 128 // Number of cells in a gridded tile at the base of the pyramid (may be used for over-zooming) val BaseCells: Int = Cells // Default upload concurrency val DefaultUploadConcurrency: Int = 8 implicit val LayoutScheme: ZoomedLayoutScheme = ZoomedLayoutScheme(WebMercator) val SequenceLayerName: String = "__sequences__" def getCommittedSequences(tile: VectorTile): Set[Int] = // NOTE when working with hashtags, this should be the changeset sequence, since changes from a // single sequence may appear in different batches depending on when changeset metadata arrives tile.layers .get(SequenceLayerName) .map(_.features.flatMap(f => f.data.values.map(valueToLong).map(_.intValue))) .map(_.toSet) .getOrElse(Set.empty) def makeSequenceLayer(sequences: Set[Int], extent: Extent, tileWidth: Int = 4096): (String, Layer) = { // create a second layer w/ a feature corresponding to committed sequences (in the absence of // available tile / layer metadata) val updatedSequences = sequences.toSeq.sorted .takeRight(1000) .zipWithIndex .map { case (seq, idx) => idx.toString -> VInt64(seq) } .toMap val sequenceFeature = PointFeature(extent.center, updatedSequences) makeLayer(SequenceLayerName, extent, Seq(sequenceFeature), tileWidth) } def loadMVTs(urls: Map[URI, Extent])( implicit taskSupport: TaskSupport): GenMap[URI, VectorTile] = { // convert to a parallel collection to load more tiles concurrently val parUrls = urls.par parUrls.tasksupport = taskSupport parUrls.map { case (uri, extent) => (uri, read(uri).map( bytes => VectorTile.fromBytes( IOUtils.toByteArray(new GZIPInputStream(new ByteArrayInputStream(bytes))), extent))) } filter { case (_, mvt) => mvt.isDefined } map { case (uri, mvt) => uri -> mvt.get } } }
Example 106
Source File: CodecSpec.scala From hail with MIT License | 5 votes |
package is.hail.io import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream, OutputStream} import is.hail.annotations.{Region, RegionValue} import is.hail.asm4s.{Code, TypeInfo, Value} import is.hail.expr.ir.{EmitClassBuilder, EmitFunctionBuilder, ExecuteContext, typeToTypeInfo} import is.hail.types.encoded.EType import is.hail.types.physical.PType import is.hail.types.virtual.Type import is.hail.rvd.RVDContext import is.hail.sparkextras.ContextRDD import is.hail.utils.using import org.apache.spark.rdd.RDD trait AbstractTypedCodecSpec extends Spec { def encodedType: EType def encodedVirtualType: Type type StagedEncoderF[T] = (Value[Region], Value[T], Value[OutputBuffer]) => Code[Unit] type StagedDecoderF[T] = (Value[Region], Value[InputBuffer]) => Code[T] def buildEncoder(ctx: ExecuteContext, t: PType): (OutputStream) => Encoder def decodedPType(requestedType: Type): PType def buildDecoder(ctx: ExecuteContext, requestedType: Type): (PType, (InputStream) => Decoder) def encode(ctx: ExecuteContext, t: PType, offset: Long): Array[Byte] = { val baos = new ByteArrayOutputStream() using(buildEncoder(ctx, t)(baos))(_.writeRegionValue(offset)) baos.toByteArray } def decode(ctx: ExecuteContext, requestedType: Type, bytes: Array[Byte], region: Region): (PType, Long) = { val bais = new ByteArrayInputStream(bytes) val (pt, dec) = buildDecoder(ctx, requestedType) (pt, dec(bais).readRegionValue(region)) } def buildCodeInputBuffer(is: Code[InputStream]): Code[InputBuffer] def buildCodeOutputBuffer(os: Code[OutputStream]): Code[OutputBuffer] def buildEmitDecoderF[T](requestedType: Type, cb: EmitClassBuilder[_]): (PType, StagedDecoderF[T]) def buildEmitEncoderF[T](t: PType, cb: EmitClassBuilder[_]): StagedEncoderF[T] def buildEmitDecoderF[T](requestedType: Type, cb: EmitClassBuilder[_], ti: TypeInfo[T]): (PType, StagedDecoderF[T]) = { val (ptype, dec) = buildEmitDecoderF[T](requestedType, cb) assert(ti == typeToTypeInfo(requestedType)) ptype -> dec } def buildEmitEncoderF[T](t: PType, cb: EmitClassBuilder[_], ti: TypeInfo[T]): StagedEncoderF[T] = { assert(ti == typeToTypeInfo(t)) buildEmitEncoderF[T](t, cb) } // FIXME: is there a better place for this to live? def decodeRDD(ctx: ExecuteContext, requestedType: Type, bytes: RDD[Array[Byte]]): (PType, ContextRDD[Long]) = { val (pt, dec) = buildDecoder(ctx, requestedType) (pt, ContextRDD.weaken(bytes).cmapPartitions { (ctx, it) => RegionValue.fromBytes(dec, ctx.region, it) }) } override def toString: String = super[Spec].toString }
Example 107
Source File: StringTests.scala From CM-Well with Apache License 2.0 | 5 votes |
package cmwell.util.string.test import java.io.{ByteArrayInputStream, InputStream} import org.scalatest.{FunSpec, Matchers} import cmwell.util.string._ class StringTests extends FunSpec with Matchers { private def mkString(is: InputStream) = { val buffSrc = scala.io.Source.fromInputStream(is) val res = buffSrc.mkString buffSrc.close() res } describe("mapInputStreamLines should") { it("return empty for empty input") { val input = new ByteArrayInputStream(Array.emptyByteArray) val result = mapInputStreamLines(input)(identity) result.read() should be(-1) input.close() result.close() } it("provide the delimiter as well") { val delim = '\n' val s = "provide the\ndelimiter as well" val expectedAmount = s.count(delim.==) val input = stringToInputStream(s) val result = mapInputStreamLines(input)(_.toUpperCase) mkString(result).count(delim.==) should be(expectedAmount) input.close() result.close() } it("not end with the delimiter") { val input = stringToInputStream("not end with\nthe delimiter") val result = mapInputStreamLines(input)(_.toUpperCase) mkString(result).last should be('R') input.close() result.close() } it("handle a concat mapper") { val input = stringToInputStream("handle\na\nconcat\nmapper") val result = mapInputStreamLines(input)(_ + " not") mkString(result) should be("handle not\na not\nconcat not\nmapper not") input.close() result.close() } } }
Example 108
Source File: S3KVPersisted.scala From fotm-info with MIT License | 5 votes |
package info.fotm.util import java.io.ByteArrayInputStream import com.amazonaws.services.s3.AmazonS3Client import com.amazonaws.services.s3.model.{GetObjectRequest, ObjectListing, ObjectMetadata, S3ObjectInputStream} import com.amazonaws.util.IOUtils import com.twitter.bijection.Bijection import scala.collection.JavaConverters._ import scala.collection.breakOut import scala.util.Try class S3KVPersisted[K, V](bucket: String, keyPathBijection: Bijection[K, String]) (implicit valueSerializer: Bijection[V, Array[Byte]]) extends Persisted[Map[K, V]] { val s3client = new AmazonS3Client() override def save(state: Map[K, V]): Try[Unit] = Try { for ((k, v) <- state) { val path: String = keyPathBijection(k) val bytes = valueSerializer(v) val stream = new ByteArrayInputStream(bytes) val meta = new ObjectMetadata() meta.setContentLength(bytes.length) s3client.putObject(bucket, path, stream, meta) } } override def fetch(): Try[Map[K, V]] = Try { val listing: ObjectListing = s3client.listObjects(bucket) val bucketEntries = listing.getObjectSummaries.asScala.toList val s3keys = bucketEntries.map(_.getKey) val result: Map[K, V] = ( for (s3key <- s3keys) yield { println(s"Loading $s3key...") val request = new GetObjectRequest(bucket, s3key) val s3object = s3client.getObject(request) val objectData: S3ObjectInputStream = s3object.getObjectContent val bytes = IOUtils.toByteArray(objectData) objectData.close() println(s"Loaded $s3key! Deserializing...") val k = keyPathBijection.inverse(s3key) val v = valueSerializer.inverse(bytes) println(s"Done with $s3key.") (k, v) })(breakOut) result } }
Example 109
Source File: SubEntryTest.scala From lila-openingexplorer with GNU Affero General Public License v3.0 | 5 votes |
package lila.openingexplorer import java.io.{ ByteArrayInputStream, ByteArrayOutputStream } import org.specs2.mutable._ import chess.{ Color, Pos } import chess.format.Uci class SubEntryTest extends Specification { private def pipe(entry: SubEntry): SubEntry = { val out = new ByteArrayOutputStream() entry.write(out) val in = new ByteArrayInputStream(out.toByteArray) SubEntry.read(in) } "master database packer" should { "pack a single game" in { val ref = GameRef("ref00000", Some(Color.White), SpeedGroup.Blitz, 1230) val entry = SubEntry.fromGameRef(ref, Left(Uci.Move(Pos.E2, Pos.E4))) pipe(entry).gameRefs mustEqual List(ref) } "pack two games" in { val move = Left(Uci.Move(Pos.D2, Pos.D4)) val g1 = GameRef("g0000001", Some(Color.Black), SpeedGroup.Classical, 2300) val g2 = GameRef("g0000002", None, SpeedGroup.Classical, 2455) val entry = SubEntry.fromGameRef(g1, move).withGameRef(g2, move) pipe(entry).gameRefs mustEqual List(g2, g1) } } }
Example 110
Source File: PackHelperTest.scala From lila-openingexplorer with GNU Affero General Public License v3.0 | 5 votes |
package lila.openingexplorer import java.io.{ ByteArrayInputStream, ByteArrayOutputStream } import org.specs2.mutable._ import chess.format.Uci import chess.Pos import chess.{ King, Rook } class PackHelperTest extends Specification with PackHelper { def pipeMove(move: Either[Uci.Move, Uci.Drop]): Either[Uci.Move, Uci.Drop] = { val out = new ByteArrayOutputStream() writeUci(out, move) val in = new ByteArrayInputStream(out.toByteArray) readUci(in) } "the pack helper" should { "correctly pack moves" in { val move = Uci.Move(Pos.E2, Pos.E3) pipeMove(Left(move)) mustEqual Left(move) } "correctly pack promotions" in { val move = Uci.Move(Pos.A7, Pos.A8, Some(Rook)) pipeMove(Left(move)) mustEqual Left(move) } "correctly pack drops" in { val drop = Uci.Drop(King, Pos.H3) pipeMove(Right(drop)) mustEqual Right(drop) } } List(7, 127, 128, 129, 254, 255, 256, 257, 1234, 864197252500L).foreach { x => "correctly pack uint: " + x in { val out = new ByteArrayOutputStream() writeUint(out, x) val in = new ByteArrayInputStream(out.toByteArray) readUint(in) mustEqual x } } }
Example 111
Source File: Json4sSerialization.scala From kafka-serialization with Apache License 2.0 | 5 votes |
package com.ovoenergy.kafka.serialization.json4s import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStreamReader, OutputStreamWriter} import java.nio.charset.StandardCharsets import com.ovoenergy.kafka.serialization.core._ import org.apache.kafka.common.serialization.{Deserializer => KafkaDeserializer, Serializer => KafkaSerializer} import org.json4s.Formats import org.json4s.native.Serialization.{read, write} import scala.reflect.ClassTag import scala.reflect.runtime.universe._ trait Json4sSerialization { def json4sSerializer[T <: AnyRef](implicit jsonFormats: Formats): KafkaSerializer[T] = serializer { (_, data) => val bout = new ByteArrayOutputStream() val writer = new OutputStreamWriter(bout, StandardCharsets.UTF_8) // TODO Use scala-arm try { write(data, writer) writer.flush() } finally { writer.close() } bout.toByteArray } def json4sDeserializer[T: TypeTag](implicit jsonFormats: Formats): KafkaDeserializer[T] = deserializer { (_, data) => val tt = implicitly[TypeTag[T]] implicit val cl = ClassTag[T](tt.mirror.runtimeClass(tt.tpe)) read[T](new InputStreamReader(new ByteArrayInputStream(data), StandardCharsets.UTF_8)) } }
Example 112
Source File: GenericAvroSerializerSuite.scala From multi-tenancy-spark with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import com.esotericsoftware.kryo.io.{Input, Output} import org.apache.avro.{Schema, SchemaBuilder} import org.apache.avro.generic.GenericData.Record import org.apache.spark.{SharedSparkContext, SparkFunSuite} class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val schema : Schema = SchemaBuilder .record("testRecord").fields() .requiredString("data") .endRecord() val record = new Record(schema) record.put("data", "test data") test("schema compression and decompression") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) } test("record serialization and deserialization") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val outputStream = new ByteArrayOutputStream() val output = new Output(outputStream) genericSer.serializeDatum(record, output) output.flush() output.close() val input = new Input(new ByteArrayInputStream(outputStream.toByteArray)) assert(genericSer.deserializeDatum(input) === record) } test("uses schema fingerprint to decrease message size") { val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema) val output = new Output(new ByteArrayOutputStream()) val beginningNormalPosition = output.total() genericSerFull.serializeDatum(record, output) output.flush() val normalLength = output.total - beginningNormalPosition conf.registerAvroSchemas(schema) val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema) val beginningFingerprintPosition = output.total() genericSerFinger.serializeDatum(record, output) val fingerprintLength = output.total - beginningFingerprintPosition assert(fingerprintLength < normalLength) } test("caches previously seen schemas") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val compressedSchema = genericSer.compress(schema) val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema)) assert(compressedSchema.eq(genericSer.compress(schema))) assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema)))) } }
Example 113
Source File: MqttConfig.scala From akka-iot-mqtt-v2 with GNU Lesser General Public License v3.0 | 5 votes |
package akkaiot import scala.concurrent.duration._ import java.io.Serializable import java.io.ByteArrayInputStream import java.io.ByteArrayOutputStream import java.io.ObjectInputStream import java.io.ObjectOutputStream import com.sandinh.paho.akka._ import com.sandinh.paho.akka.MqttPubSub._ object MqttConfig { val topic = "akka-iot-mqtt-topic" // Pub-Sub config val psConfig = PSConfig( brokerUrl = "tcp://test.mosquitto.org:1883", userName = null, password = null, stashTimeToLive = 1.minute, stashCapacity = 8000, reconnectDelayMin = 10.millis, reconnectDelayMax = 30.seconds, cleanSession = false ) // Serialize object to byte array def writeToByteArray(obj: Any): Array[Byte] = { val baos = new ByteArrayOutputStream val oos = new ObjectOutputStream(baos) try { oos.writeObject(obj) baos.toByteArray } finally { try { oos.close } catch { case _: Throwable => // Do nothing } } } // Deserialize object from byte array def readFromByteArray[A](bytes: Array[Byte]): A = { val bais = new ByteArrayInputStream(bytes) val ois = new ObjectInputStream(bais) try { val obj = ois.readObject obj.asInstanceOf[A] } finally { try { ois.close } catch { case _: Throwable => // Do nothing } } } }
Example 114
Source File: Sedes.scala From shc with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.execution.datasources.hbase import java.io.ByteArrayInputStream import org.apache.avro.Schema import org.apache.avro.Schema.Type._ import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord} import org.apache.avro.io._ import org.apache.commons.io.output.ByteArrayOutputStream import org.apache.hadoop.hbase.util.Bytes import org.apache.spark.sql.types._ trait Sedes { def serialize(value: Any): Array[Byte] def deserialize(bytes: Array[Byte], start: Int, end: Int): Any } class DoubleSedes extends Sedes { override def serialize(value: Any): Array[Byte] = Bytes.toBytes(value.asInstanceOf[Double]) override def deserialize(bytes: Array[Byte], start: Int, end: Int): Any = { Bytes.toLong(bytes, start) } }
Example 115
Source File: package.scala From pulsar4s with Apache License 2.0 | 5 votes |
package com.sksamuel.pulsar4s import java.io.ByteArrayOutputStream import java.io.ByteArrayInputStream import java.nio.charset.StandardCharsets import com.sksamuel.avro4s.AvroSchema import com.sksamuel.avro4s.AvroInputStream import com.sksamuel.avro4s.AvroOutputStream import com.sksamuel.avro4s.Decoder import com.sksamuel.avro4s.Encoder import com.sksamuel.avro4s.SchemaFor import org.apache.pulsar.client.api.Schema import org.apache.pulsar.common.schema.{SchemaInfo, SchemaType} import scala.annotation.implicitNotFound package object avro { @implicitNotFound("No Avro Schema for type ${T} found.") implicit def avroSchema[T: Manifest: SchemaFor: Encoder: Decoder]: Schema[T] = new Schema[T] { val schema: org.apache.avro.Schema = AvroSchema[T] override def clone(): Schema[T] = this override def encode(t: T): Array[Byte] = { val baos = new ByteArrayOutputStream val aos = AvroOutputStream.binary[T].to(baos).build(schema) aos.write(t) aos.flush() aos.close() baos.toByteArray() } override def decode(bytes: Array[Byte]): T = { val bais = new ByteArrayInputStream(bytes) val ais = AvroInputStream.binary[T].from(bais).build(schema) val first = ais.iterator.next() ais.close() first } override def getSchemaInfo: SchemaInfo = new SchemaInfo() .setName(manifest[T].runtimeClass.getCanonicalName) .setType(SchemaType.AVRO) .setSchema(schema.toString.getBytes(StandardCharsets.UTF_8)) } }
Example 116
Source File: Logger.scala From c4proto with Apache License 2.0 | 5 votes |
package ee.cone.c4actor_logback_impl import java.io.ByteArrayInputStream import java.nio.file.{Files, Path, Paths} import java.nio.charset.StandardCharsets.UTF_8 import ch.qos.logback.classic.LoggerContext import ch.qos.logback.classic.joran.JoranConfigurator import com.typesafe.scalalogging.LazyLogging import ee.cone.c4actor._ import ee.cone.c4di.c4 import org.slf4j.LoggerFactory import scala.annotation.tailrec @c4("BasicLoggingApp") final class LoggerTest extends Executable with Early with LazyLogging { def run(): Unit = if(Option(System.getenv("C4LOGBACK_TEST")).nonEmpty) iteration(0L) @tailrec private def iteration(v: Long): Unit = { Thread.sleep(1000) logger.warn(s"logger test $v") logger.debug(s"logger test $v") iteration(v+1L) } } @c4("BasicLoggingApp") final class DefLoggerConfigurator( config: ListConfig, catchNonFatal: CatchNonFatal ) extends LoggerConfigurator( config.get("C4LOGBACK_XML").map(Paths.get(_)) ::: Paths.get("/tmp/logback.xml") :: Nil, catchNonFatal, 5000 ) with Executable with Early class LoggerConfigurator(paths: List[Path], catchNonFatal: CatchNonFatal, scanPeriod: Long) extends Executable { def run(): Unit = iteration("") @tailrec private def iteration(wasContent: String): Unit = { val content = s""" <configuration> <statusListener class="ch.qos.logback.core.status.NopStatusListener" /> ${paths.map(path=>if(Files.exists (path)) new String(Files.readAllBytes(path), UTF_8) else "").mkString} <appender name="CON" class="ch.qos.logback.core.ConsoleAppender"> <encoder><pattern>%d{HH:mm:ss.SSS} %-5level %logger{36} - %msg%n</pattern></encoder> </appender> <appender name="ASYNСCON" class="ch.qos.logback.classic.AsyncAppender"> <discardingThreshold>0</discardingThreshold> <queueSize>1000000</queueSize> <appender-ref ref="CON" /> </appender> <root level="INFO"> <appender-ref ref="ASYNСCON" /> </root> <shutdownHook/> </configuration> """ if(wasContent != content) reconfigure(content) Thread.sleep(scanPeriod) iteration(content) } def reconfigure(content: String): Unit = catchNonFatal{ println("logback reconfigure 2 started") val context = LoggerFactory.getILoggerFactory.asInstanceOf[LoggerContext] val configurator = new JoranConfigurator() configurator.setContext(context) context.reset() configurator.doConfigure(new ByteArrayInputStream(content.getBytes(UTF_8))) println("logback reconfigure 2 ok") }("reconfigure"){ e => () } }
Example 117
Source File: GenericAvroSerializerSuite.scala From BigDatalog with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import com.esotericsoftware.kryo.io.{Output, Input} import org.apache.avro.{SchemaBuilder, Schema} import org.apache.avro.generic.GenericData.Record import org.apache.spark.{SparkFunSuite, SharedSparkContext} class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val schema : Schema = SchemaBuilder .record("testRecord").fields() .requiredString("data") .endRecord() val record = new Record(schema) record.put("data", "test data") test("schema compression and decompression") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) } test("record serialization and deserialization") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val outputStream = new ByteArrayOutputStream() val output = new Output(outputStream) genericSer.serializeDatum(record, output) output.flush() output.close() val input = new Input(new ByteArrayInputStream(outputStream.toByteArray)) assert(genericSer.deserializeDatum(input) === record) } test("uses schema fingerprint to decrease message size") { val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema) val output = new Output(new ByteArrayOutputStream()) val beginningNormalPosition = output.total() genericSerFull.serializeDatum(record, output) output.flush() val normalLength = output.total - beginningNormalPosition conf.registerAvroSchemas(schema) val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema) val beginningFingerprintPosition = output.total() genericSerFinger.serializeDatum(record, output) val fingerprintLength = output.total - beginningFingerprintPosition assert(fingerprintLength < normalLength) } test("caches previously seen schemas") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val compressedSchema = genericSer.compress(schema) val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema)) assert(compressedSchema.eq(genericSer.compress(schema))) assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema)))) } }
Example 118
Source File: SerializationTestHelper.scala From xmlconfect with Apache License 2.0 | 5 votes |
package com.mthaler.xmlconfect import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream } object SerializationTestHelper { def serializeDeserialize[T](obj: T): T = { val bout = new ByteArrayOutputStream() val out = new ObjectOutputStream(bout) out.writeObject(obj) val bin = new ByteArrayInputStream(bout.toByteArray) val in = new ObjectInputStream(bin) in.readObject().asInstanceOf[T] } }
Example 119
Source File: ToCurlConverterTest.scala From sttp with Apache License 2.0 | 5 votes |
package sttp.client import java.io.ByteArrayInputStream import java.nio.charset.StandardCharsets import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class ToCurlConverterTest extends AnyFlatSpec with Matchers with ToCurlConverterTestExtension { private val localhost = uri"http://localhost" it should "convert base request" in { basicRequest .get(uri"$localhost") .toCurl shouldBe """curl -L --max-redirs 32 -X GET 'http://localhost'""" } it should "convert request with method to curl" in { basicRequest.get(localhost).toCurl should include("-X GET") basicRequest.post(localhost).toCurl should include("-X POST") basicRequest.put(localhost).toCurl should include("-X PUT") basicRequest.delete(localhost).toCurl should include("-X DELETE") basicRequest.patch(localhost).toCurl should include("-X PATCH") basicRequest.head(localhost).toCurl should include("-X HEAD") basicRequest.options(localhost).toCurl should include("-X OPTIONS") } it should "convert request with header" in { basicRequest.header("User-Agent", "myapp").get(localhost).toCurl should include( """-H 'User-Agent: myapp'""" ) } it should "convert request with body" in { basicRequest.body(Map("name" -> "john", "org" -> "sml")).post(localhost).toCurl should include( """-H 'Content-Type: application/x-www-form-urlencoded' -H 'Content-Length: 17' -F 'name=john&org=sml'""" ) basicRequest.body("name=john").post(localhost).toCurl should include( """-H 'Content-Type: text/plain; charset=utf-8' -H 'Content-Length: 9' --data 'name=john'""" ) basicRequest.body("name=john", StandardCharsets.ISO_8859_1.name()).post(localhost).toCurl should include( """-H 'Content-Type: text/plain; charset=ISO-8859-1' -H 'Content-Length: 9' --data 'name=john'""" ) basicRequest.body("name='john'").post(localhost).toCurl should include( """-H 'Content-Type: text/plain; charset=utf-8' -H 'Content-Length: 11' --data 'name=\'john\''""" ) basicRequest.body("name=\"john\"").post(localhost).toCurl should include( """-H 'Content-Type: text/plain; charset=utf-8' -H 'Content-Length: 11' --data 'name="john"'""" ) } it should "convert request with options" in { basicRequest.followRedirects(false).get(localhost).toCurl should not include "-L" basicRequest.maxRedirects(11).get(localhost).toCurl should include("--max-redirs 11") } it should "put placeholder when sending binary data" in { val testBodyBytes = "this is the body".getBytes("UTF-8") val curl = basicRequest .post(localhost) .body(new ByteArrayInputStream(testBodyBytes)) .toCurl curl should include("--data-binary <PLACEHOLDER>") } it should "render multipart form data if content is a plain string" in { basicRequest.multipartBody(multipart("k1", "v1"), multipart("k2", "v2")).post(localhost).toCurl should include( """--form 'k1=v1' --form 'k2=v2'""" ) } }
Example 120
Source File: RetryWhenDefaultTest.scala From sttp with Apache License 2.0 | 5 votes |
package sttp.client import java.io.ByteArrayInputStream import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers import sttp.client import sttp.model.StatusCode class RetryWhenDefaultTest extends AnyFlatSpec with Matchers { private val simpleRequest = basicRequest.get(uri"http://localhost") it should "not retry 200 response" in { RetryWhen.Default(simpleRequest, Right(Response.ok(""))) shouldBe false } it should "retry 500 response" in { RetryWhen.Default(simpleRequest, Right(Response("", StatusCode.InternalServerError))) shouldBe true } it should "retry connection exceptions" in { RetryWhen.Default(simpleRequest, Left(new client.SttpClientException.ConnectException(null))) shouldBe true } it should "not retry read exceptions" in { RetryWhen.Default(simpleRequest, Left(new client.SttpClientException.ReadException(null))) shouldBe false } it should "not retry input stream bodies" in { RetryWhen.Default( simpleRequest.body(new ByteArrayInputStream(new Array[Byte](8))), Right(Response("", StatusCode.InternalServerError)) ) shouldBe false } }
Example 121
Source File: BackupWriter.scala From recogito2 with Apache License 2.0 | 5 votes |
package controllers.document import controllers.HasConfig import java.io.{File, FileInputStream, FileOutputStream, BufferedInputStream, ByteArrayInputStream, InputStream, PrintWriter} import java.nio.file.Paths import java.math.BigInteger import java.security.{MessageDigest, DigestInputStream} import java.util.UUID import java.util.zip.{ZipEntry, ZipOutputStream} import services.HasDate import services.annotation.{Annotation, AnnotationService} import services.document.{ExtendedDocumentMetadata, DocumentToJSON} import services.generated.tables.records.{DocumentRecord, DocumentFilepartRecord} import play.api.libs.json.Json import play.api.libs.Files.TemporaryFileCreator import scala.concurrent.{ExecutionContext, Future} import storage.TempDir import storage.uploads.Uploads trait BackupWriter extends HasBackupValidation { self: HasConfig => // Frontend annotation format import services.annotation.FrontendAnnotation._ private val BUFFER_SIZE = 2048 private def writeToZip(inputStream: InputStream, filename: String, zip: ZipOutputStream) = { zip.putNextEntry(new ZipEntry(filename)) val md = MessageDigest.getInstance(ALGORITHM) val in = new DigestInputStream(new BufferedInputStream(inputStream), md) var data= new Array[Byte](BUFFER_SIZE) var count: Int = 0 while ({ count = in.read(data, 0, BUFFER_SIZE); count } > -1) { zip.write(data, 0, count) } in.close() zip.closeEntry() new BigInteger(1, md.digest()).toString(16) } def createBackup(doc: ExtendedDocumentMetadata)(implicit ctx: ExecutionContext, uploads: Uploads, annotations: AnnotationService, tmpFile: TemporaryFileCreator): Future[File] = { def getFileAsStream(owner: String, documentId: String, filename: String) = { val dir = uploads.getDocumentDir(owner, documentId).get // Fail hard if the dir doesn't exist new FileInputStream(new File(dir, filename)) } def getManifestAsStream() = { val manifest = "Recogito-Version: 2.0.1-alpha" new ByteArrayInputStream(manifest.getBytes) } def getMetadataAsStream(doc: ExtendedDocumentMetadata) = { // DocumentRecord JSON serialization import services.document.DocumentToJSON._ val json = Json.prettyPrint(Json.toJson((doc.document, doc.fileparts))) new ByteArrayInputStream(json.getBytes) } def getAnnotationsAsStream(docId: String, annotations: Seq[Annotation], parts: Seq[DocumentFilepartRecord]): InputStream = { val path = Paths.get(TempDir.get()(self.config), s"${docId}_annotations.json") val tmp = tmpFile.create(path) val writer = new PrintWriter(path.toFile) annotations.foreach(a => writer.println(Json.stringify(Json.toJson(a)))) writer.close() new FileInputStream(path.toFile) } Future { tmpFile.create(Paths.get(TempDir.get()(self.config), s"${doc.id}.zip")) } flatMap { zipFile => val zipStream = new ZipOutputStream(new FileOutputStream(zipFile.path.toFile)) writeToZip(getManifestAsStream(), "manifest", zipStream) val metadataHash = writeToZip(getMetadataAsStream(doc), "metadata.json", zipStream) val fileHashes = doc.fileparts.map { part => writeToZip(getFileAsStream(doc.ownerName, doc.id, part.getFile), "parts" + File.separator + part.getFile, zipStream) } annotations.findByDocId(doc.id).map { annotations => val annotationsHash = writeToZip(getAnnotationsAsStream(doc.id, annotations.map(_._1), doc.fileparts), "annotations.jsonl", zipStream) val signature = computeSignature(metadataHash, fileHashes, annotationsHash) writeToZip(new ByteArrayInputStream(signature.getBytes), "signature", zipStream) zipStream.close() zipFile.path.toFile } } } }
Example 122
Source File: DefineMacroCmd.scala From piglet with Apache License 2.0 | 5 votes |
package dbis.piglet.op.cmd import java.io.{ObjectInputStream, ByteArrayInputStream, ObjectOutputStream, ByteArrayOutputStream} import dbis.piglet.plan.DataflowPlan import scala.collection.mutable.ListBuffer import dbis.piglet.op.{Pipe,PigOperator} case class DefineMacroCmd( out: Pipe, macroName: String, params: Option[List[String]], stmts: List[PigOperator] ) extends PigOperator(out) { var subPlan: Option[DataflowPlan] = None var inPipes = List[Pipe]() def deepClone(): DefineMacroCmd = { val baos = new ByteArrayOutputStream() val oos = new ObjectOutputStream(baos) oos.writeObject(this) val bais = new ByteArrayInputStream(baos.toByteArray()) val ois = new ObjectInputStream(bais) ois.readObject().asInstanceOf[DefineMacroCmd] } override def preparePlan: Unit = { def pipeParamPositions(): List[Int] = { val l = ListBuffer[Int]() inPipes.foreach(i => { val pos = params.get.indexOf(i.name.substring(1)) if (pos >= 0) l += pos }) l.toList } }
Example 123
Source File: ManifestUploader.scala From teamcity-s3-plugin with Apache License 2.0 | 5 votes |
package com.gu.teamcity import java.io.ByteArrayInputStream import java.util.Date import jetbrains.buildServer.messages.{BuildMessage1, DefaultMessagesInfo, Status} import jetbrains.buildServer.serverSide.{BuildServerAdapter, SRunningBuild} import org.joda.time.{DateTime, DateTimeZone} import org.json4s.JsonAST.JObject import org.json4s.JsonDSL._ import org.json4s.native.JsonMethods._ import scala.util.{Failure, Success} class ManifestUploader(config: S3ConfigManager, s3: S3) extends BuildServerAdapter { override def beforeBuildFinish(runningBuild: SRunningBuild) { import scala.collection.convert.wrapAsScala._ if (!runningBuild.isHasInternalArtifactsOnly) { val properties = Seq( "projectName" -> S3Plugin.cleanFullName(runningBuild), "buildNumber" -> runningBuild.getBuildNumber, "startTime" -> new DateTime(runningBuild.getStartDate).withZone(DateTimeZone.UTC).toString //Joda default is ISO8601 ) ++ runningBuild.getRevisions.flatMap(revision => Seq( "revision" -> revision.getRevision, "vcsURL" -> revision.getRoot.getProperties.get("url") )) ++ Option(runningBuild.getBranch).map(b => "branch" -> b.getDisplayName ).orElse(runningBuild.getVcsRootEntries.headOption.map(r => "branch" -> r.getProperties.get("branch") )) val propertiesJSON = pretty(render(properties.foldLeft(JObject())(_ ~ _))) val jsBytes = propertiesJSON.getBytes("UTF-8") config.buildManifestBucket.map { bucket => s3.upload(bucket, runningBuild, "build.json", new ByteArrayInputStream(jsBytes), jsBytes.length) match { case Failure(e) => runningBuild.getBuildLog().message(s"Error uploading manifest: ${e.getMessage}", Status.ERROR,new Date,DefaultMessagesInfo.MSG_BUILD_FAILURE,DefaultMessagesInfo.SOURCE_ID,null) case Success(_) => runningBuild.getBuildLog().message("Manifest S3 upload complete", Status.NORMAL,new Date,DefaultMessagesInfo.MSG_TEXT,DefaultMessagesInfo.SOURCE_ID,null) } } } } private def normalMessage(text: String) = new BuildMessage1(DefaultMessagesInfo.SOURCE_ID, DefaultMessagesInfo.MSG_TEXT, Status.NORMAL, new Date, text) }
Example 124
Source File: ModelStateSerde.scala From kafka-with-akka-streams-kafka-streams-tutorial with Apache License 2.0 | 5 votes |
package com.lightbend.scala.kafkastreams.store.store import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream} import java.util import com.lightbend.model.modeldescriptor.ModelDescriptor import com.lightbend.scala.modelServer.model.PMML.PMMLModel import com.lightbend.scala.modelServer.model.tensorflow.TensorFlowModel import com.lightbend.scala.modelServer.model.{ModelToServeStats, ModelWithDescriptor} import com.lightbend.scala.kafkastreams.store.StoreState import org.apache.kafka.common.serialization.{Deserializer, Serde, Serializer} class ModelStateSerde extends Serde[StoreState] { private val mserializer = new ModelStateSerializer() private val mdeserializer = new ModelStateDeserializer() override def deserializer() = mdeserializer override def serializer() = mserializer override def configure(configs: util.Map[String, _], isKey: Boolean) = {} override def close() = {} } object ModelStateDeserializer { val factories = Map( ModelDescriptor.ModelType.PMML.index -> PMMLModel, ModelDescriptor.ModelType.TENSORFLOW.index -> TensorFlowModel ) } class ModelStateDeserializer extends Deserializer[StoreState] { override def configure(configs: util.Map[String, _], isKey: Boolean): Unit = {} override def deserialize(topic: String, data: Array[Byte]): StoreState = { if(data != null) { val input = new DataInputStream(new ByteArrayInputStream(data)) new StoreState(ModelWithDescriptor.readModel(input), ModelWithDescriptor.readModel(input), ModelToServeStats.readServingInfo(input), ModelToServeStats.readServingInfo(input)) } else new StoreState() } override def close(): Unit = {} } class ModelStateSerializer extends Serializer[StoreState] { private val bos = new ByteArrayOutputStream() override def serialize(topic: String, state: StoreState): Array[Byte] = { bos.reset() val output = new DataOutputStream(bos) ModelWithDescriptor.writeModel(output, state.currentModel.orNull) ModelWithDescriptor.writeModel(output, state.newModel.orNull) ModelToServeStats.writeServingInfo(output, state.currentState.orNull) ModelToServeStats.writeServingInfo(output, state.newState.orNull) try { output.flush() output.close() } catch { case t: Throwable => } bos.toByteArray } override def close(): Unit = {} override def configure(configs: util.Map[String, _], isKey: Boolean) = {} }
Example 125
Source File: StringParserTest.scala From MoVE with Mozilla Public License 2.0 | 5 votes |
package de.thm.move.loader.parser import java.io.{ByteArrayInputStream, InputStreamReader} import java.nio.charset.StandardCharsets import scala.util._ import de.thm.move.MoveSpec import de.thm.move.loader.parser.PropertyParser._ import de.thm.move.loader.parser.ast._ class StringParserTest extends MoveSpec { val parser = new ModelicaParser() def parseString(str:String): String = { parser.stringLiteral(str) } "The parser for Modelica strings" should "parse simple strings" in { val s = "this is a super awesome test" true shouldBe true } "PropertyParser#transformEscapeChars" should "transform literal escape characters to ansi escape characters" in { val s = "this\\t\\tis a\\n test\\rmöb\\b" parser.transformEscapeChars(s) shouldBe "this\t\tis a\n test\rmöb\b" val s2 = "\\n\\n\\t" parser.transformEscapeChars(s2) shouldBe "\n\n\t" } it should "return the same string for strings without escape characters" in { val s = "this is awesome" parser.transformEscapeChars(s) shouldBe s } }
Example 126
Source File: package.scala From MoVE with Mozilla Public License 2.0 | 5 votes |
package de.thm.move.loader import java.io.ByteArrayInputStream import java.nio.charset.StandardCharsets import scala.util._ import org.scalatest.Matchers._ import de.thm.move.loader.parser.ast._ package object parser { private val parser:ModelicaParserLike = new ModelicaParser def parse(str:String): Try[List[Model]] = parser.parse(new ByteArrayInputStream(str.getBytes(StandardCharsets.UTF_8))) val withParseSuccess: String => Model = parse(_) match { case Success(elem) => elem.head case Failure(exc) => throw exc } val withException: String => Unit = parse(_) match { case Success(_) => throw new IllegalStateException("Expected failure") case Failure(_) => //yeay } def iconEqual(icon1:Model, icon2:Model): Unit = { icon2.name shouldBe icon1.name (icon1.annot, icon2.annot) match { case (Icon(system1, shapes1, _,_),Icon(system2,shapes2,_,_)) => system2 shouldBe system1 shapes2 shouldBe shapes1 case _ => throw new AssertionError(s"Given icon1 and icon2 aren't both Icons!") } } def annotationModel(modelname:String, content:String): String = s""" |model $modelname | annotation( | $content | ); |end $modelname; """.stripMargin def graphicModel(modelname:String, content:String):String = { annotationModel(modelname, s""" |Icon( graphics = { |$content |}) """.stripMargin ) } }
Example 127
Source File: ModelSerializationTestHelper.scala From aloha with MIT License | 5 votes |
package com.eharmony.aloha import java.io.{ObjectInputStream, ByteArrayInputStream, ByteArrayOutputStream, ObjectOutputStream} trait ModelSerializationTestHelper { def serializeDeserializeRoundTrip[A <: java.io.Serializable](a: A): A = { val baos = new ByteArrayOutputStream() val oos = new ObjectOutputStream(baos) oos.writeObject(a) val bais = new ByteArrayInputStream(baos.toByteArray) val ois = new ObjectInputStream(bais) val out = ois.readObject() out.asInstanceOf[A] } }
Example 128
Source File: CypherParser.scala From ingraph with Eclipse Public License 1.0 | 5 votes |
package ingraph.compiler.cypher2gplan import java.io.ByteArrayInputStream import ingraph.compiler.exceptions.CompilerException import org.apache.log4j.{Level, Logger} import org.eclipse.emf.common.util.URI import org.eclipse.emf.ecore.resource.Resource import org.eclipse.xtext.diagnostics.Severity import org.eclipse.xtext.resource.{XtextResource, XtextResourceSet} import org.eclipse.xtext.util.CancelIndicator import org.eclipse.xtext.validation.CheckMode import org.slizaa.neo4j.opencypher.OpenCypherStandaloneSetup import org.slizaa.neo4j.opencypher.openCypher.Cypher import scala.collection.JavaConverters._ object CypherParser { def parseFile(fileName: String): Cypher = { Logger.getLogger("org.eclipse.xtext").setLevel(Level.ERROR) // https://typefox.io/how-and-why-use-xtext-without-the-ide val injector = new OpenCypherStandaloneSetup().createInjectorAndDoEMFRegistration() val resourceSet = injector.getInstance(classOf[XtextResourceSet]) val filePath = "../queries/" + fileName + ".cypher" val resource = resourceSet.getResource(URI.createFileURI(filePath), true) validateAndThrowError(resource) resource.getContents.get(0).asInstanceOf[Cypher] } def parseString(queryString: String): Cypher = { Logger.getLogger("org.eclipse.xtext").setLevel(Level.ERROR) // https://wiki.eclipse.org/Xtext/FAQ val injector = new OpenCypherStandaloneSetup().createInjectorAndDoEMFRegistration() val resourceSet = injector.getInstance(classOf[XtextResourceSet]) val resource = resourceSet.createResource(URI.createURI("http:/example.cypher")) val in = new ByteArrayInputStream(queryString.getBytes()) resource.load(in, resourceSet.getLoadOptions()) validateAndThrowError(resource) resource.getContents.get(0).asInstanceOf[Cypher] } def validateAndThrowError(resource: Resource) { var seenError = false var firstError: String = null val validator = resource.asInstanceOf[XtextResource].getResourceServiceProvider.getResourceValidator val issues = validator.validate(resource, CheckMode.ALL, CancelIndicator.NullImpl).asScala for (issue <- issues) { if (issue.getSeverity == Severity.ERROR && !seenError) { seenError = true firstError = issue.getMessage } println(issue.getMessage) } if (seenError) { throw new CompilerException(s"Error during cypher parse, the first error was: ${firstError}") } } }
Example 129
Source File: JacksonParserSuite.scala From circe-jackson with Apache License 2.0 | 5 votes |
package io.circe.jackson import cats.data.Validated import com.fasterxml.jackson.core.JsonToken import com.fasterxml.jackson.databind.{ ObjectMapper, ObjectReader } import io.circe.Json import io.circe.testing.ParserTests import java.io.{ ByteArrayInputStream, File } import scala.io.Source class JacksonParserSuite extends CirceSuite with JacksonInstances { checkAll("Parser", ParserTests(`package`).fromString(arbitraryCleanedJson, shrinkJson)) checkAll( "Parser", ParserTests(`package`).fromFunction[Array[Byte]]("fromByteArray")( s => s.getBytes("UTF-8"), p => p.parseByteArray _, p => p.decodeByteArray[Json] _, p => p.decodeByteArrayAccumulating[Json] _ )(arbitraryCleanedJson, shrinkJson) ) "parse and decode(Accumulating)" should "fail on invalid input" in forAll { (s: String) => assert(parse(s"Not JSON $s").isLeft) assert(decode[Json](s"Not JSON $s").isLeft) assert(decodeAccumulating[Json](s"Not JSON $s").isInvalid) } "parseFile and decodeFile(Accumulating)" should "parse a JSON file" in { val url = getClass.getResource("/io/circe/jackson/examples/glossary.json") val file = new File(url.toURI) assert(decodeFile[Json](file) === Right(glossary)) assert(decodeFileAccumulating[Json](file) == Validated.valid(glossary)) assert(parseFile(file) === Right(glossary)) } "parseByteArray and decodeByteArray(Accumulating)" should "parse an array of elementAsBytes" in { val bytes = glossaryAsBytes assert(decodeByteArray[Json](bytes) === Right(glossary)) assert(decodeByteArrayAccumulating[Json](bytes) === Validated.valid(glossary)) assert(parseByteArray(bytes) === Right(glossary)) } for (elementCount <- 1 to 4) { "CirceJsonDeserializer" should s"be useable with Jackson's MappingIterator " + s"with ${elementCount} elements in array" in { val input = new ByteArrayInputStream(createJsonArrayAsBytes(glossaryAsBytes, elementCount)) val objectMapper = new ObjectMapper() objectMapper.registerModule(CirceJsonModule) val jsonParser = objectMapper.getFactory.createParser(input) assert(jsonParser.nextToken() == JsonToken.START_ARRAY) assert(jsonParser.nextToken() == JsonToken.START_OBJECT) val reader = createReader(objectMapper).forType(classOf[Json]) val iterator = reader.readValues[Json](jsonParser) var counter = 0 while (iterator.hasNext) { val glossaryFromIterator = iterator.next() assert(glossary == glossaryFromIterator) counter = counter + 1 } assert(counter == elementCount) } } // workaround warnings from compiler with Jackson 2.5 @unchecked private def createReader(objectMapper: ObjectMapper): ObjectReader = objectMapper.reader() private def createJsonArrayAsBytes(elementAsBytes: Array[Byte], elementCount: Int): Array[Byte] = { val byteArrayOutput = new java.io.ByteArrayOutputStream() byteArrayOutput.write('[') for (i <- 1 to elementCount) { if (i != 1) { byteArrayOutput.write(',') } byteArrayOutput.write(elementAsBytes) } byteArrayOutput.write(']') byteArrayOutput.toByteArray } private def glossaryAsBytes = { val stream = getClass.getResourceAsStream("/io/circe/jackson/examples/glossary.json") val source = Source.fromInputStream(stream) val bytes = source.map(_.toByte).toArray source.close() bytes } }
Example 130
Source File: Zip.scala From scala-clippy with Apache License 2.0 | 5 votes |
package util import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.util.zip.{GZIPInputStream, GZIPOutputStream} object Zip { private val BufferSize = 512 def compress(string: String): Array[Byte] = { val os = new ByteArrayOutputStream(string.length() / 5) val gos = new GZIPOutputStream(os) gos.write(string.getBytes("UTF-8")) gos.close() os.close() os.toByteArray } def decompress(compressed: Array[Byte]): String = { val is = new ByteArrayInputStream(compressed) val gis = new GZIPInputStream(is, BufferSize) val string = new StringBuilder() val data = new Array[Byte](BufferSize) var bytesRead = gis.read(data) while (bytesRead != -1) { string.append(new String(data, 0, bytesRead, "UTF-8")) bytesRead = gis.read(data) } gis.close() is.close() string.toString() } }
Example 131
Source File: PlayRequestToRawBody.scala From tapir with Apache License 2.0 | 5 votes |
package sttp.tapir.server.play import java.io.ByteArrayInputStream import java.nio.charset.Charset import akka.stream.Materializer import akka.util.ByteString import play.api.mvc.{RawBuffer, Request} import play.core.parsers.Multipart import sttp.model.Part import sttp.tapir.{RawBodyType, RawPart} import sttp.tapir.internal._ import scala.concurrent.ExecutionContext.Implicits.global import scala.concurrent.Future class PlayRequestToRawBody(serverOptions: PlayServerOptions) { def apply[R](bodyType: RawBodyType[R], charset: Option[Charset], request: Request[RawBuffer], body: ByteString)(implicit mat: Materializer ): Future[R] = { bodyType match { case RawBodyType.StringBody(defaultCharset) => Future(new String(body.toArray, charset.getOrElse(defaultCharset))) case RawBodyType.ByteArrayBody => Future(body.toArray) case RawBodyType.ByteBufferBody => Future(body.toByteBuffer) case RawBodyType.InputStreamBody => Future(body.toArray).map(new ByteArrayInputStream(_)) case RawBodyType.FileBody => Future(java.nio.file.Files.write(serverOptions.temporaryFileCreator.create().path, body.toArray)) .map(p => p.toFile) case m: RawBodyType.MultipartBody => multiPartRequestToRawBody(request, m, body) } } private def multiPartRequestToRawBody[R](request: Request[RawBuffer], m: RawBodyType.MultipartBody, body: ByteString)(implicit mat: Materializer ): Future[Seq[RawPart]] = { val bodyParser = serverOptions.playBodyParsers.multipartFormData( Multipart.handleFilePartAsTemporaryFile(serverOptions.temporaryFileCreator) ) bodyParser.apply(request).run(body).flatMap { case Left(_) => Future.failed(new IllegalArgumentException("Unable to parse multipart form data.")) // TODO case Right(value) => val dataParts = value.dataParts.map { case (key, value) => apply( m.partType(key).get, charset(m.partType(key).get), request, ByteString(value.flatMap(_.getBytes).toArray) ).map(body => Part(key, body).asInstanceOf[RawPart]) }.toSeq val fileParts = value.files.map(f => { apply( m.partType(f.key).get, charset(m.partType(f.key).get), request, ByteString.apply(java.nio.file.Files.readAllBytes(f.ref.path)) ).map(body => Part(f.key, body, Map(f.key -> f.dispositionType, Part.FileNameDispositionParam -> f.filename), Nil) .asInstanceOf[RawPart] ) }) Future.sequence(dataParts ++ fileParts) } } }
Example 132
Source File: package.scala From tapir with Apache License 2.0 | 5 votes |
package sttp.tapir.server.vertx import java.io.{ByteArrayInputStream, InputStream} import io.vertx.core.buffer.Buffer import io.vertx.scala.core.Vertx import scala.concurrent.Future package object encoders { private val bufferSize = 1024 private[vertx] def inputStreamToBuffer(is: InputStream, vertx: Vertx): Future[Buffer] = { is match { case _: ByteArrayInputStream => Future.successful(inputStreamToBufferUnsafe(is)) case _ => vertx.executeBlocking(() => inputStreamToBufferUnsafe(is)) } } private def inputStreamToBufferUnsafe(is: InputStream): Buffer = { val buffer = Buffer.buffer() val buf = new Array[Byte](bufferSize) while (is.available() > 0) { val read = is.read(buf) buffer.appendBytes(buf, 0, read) } buffer } }
Example 133
Source File: FinatraRequestToRawBody.scala From tapir with Apache License 2.0 | 5 votes |
package sttp.tapir.server.finatra import java.io.ByteArrayInputStream import java.nio.ByteBuffer import java.nio.charset.Charset import com.twitter.finagle.http.Request import com.twitter.finatra.http.request.RequestUtils import com.twitter.io.Buf import com.twitter.util.Future import org.apache.commons.fileupload.FileItemHeaders import sttp.model.{Part, Header} import sttp.tapir.{RawPart, RawBodyType} import scala.collection.immutable.Seq import scala.collection.JavaConverters._ class FinatraRequestToRawBody(serverOptions: FinatraServerOptions) { def apply[R](bodyType: RawBodyType[R], body: Buf, charset: Option[Charset], request: Request): Future[R] = { def asByteArray: Array[Byte] = { val array = new Array[Byte](body.length) body.write(array, 0) array } def asByteBuffer: ByteBuffer = { val buffer = ByteBuffer.allocate(body.length) body.write(buffer) buffer.flip() buffer } bodyType match { case RawBodyType.StringBody(defaultCharset) => Future.value[R](new String(asByteArray, charset.getOrElse(defaultCharset))) case RawBodyType.ByteArrayBody => Future.value[R](asByteArray) case RawBodyType.ByteBufferBody => Future.value[R](asByteBuffer) case RawBodyType.InputStreamBody => Future.value[R](new ByteArrayInputStream(asByteArray)) case RawBodyType.FileBody => serverOptions.createFile(asByteArray) case m: RawBodyType.MultipartBody => multiPartRequestToRawBody(request, m) } } private def parseDispositionParams(headerValue: Option[String]): Map[String, String] = headerValue .map( _.split(";") .map(_.trim) .tail .map(_.split("=")) .map(array => array(0) -> array(1)) .toMap ) .getOrElse(Map.empty) private def getCharset(contentType: Option[String]): Option[Charset] = contentType.flatMap( _.split(";") .map(_.trim) .tail .map(_.split("=")) .map(array => array(0) -> array(1)) .toMap .get("charset") .map(Charset.forName) ) private def multiPartRequestToRawBody(request: Request, m: RawBodyType.MultipartBody): Future[Seq[RawPart]] = { def fileItemHeaders(headers: FileItemHeaders): Seq[Header] = { headers.getHeaderNames.asScala .flatMap { name => headers.getHeaders(name).asScala.map(name -> _) } .toSeq .filter(_._1.toLowerCase != "content-disposition") .map { case (k, v) => Header(k, v) } .toList } Future .collect( RequestUtils .multiParams(request) .flatMap { case (name, multiPartItem) => val dispositionParams: Map[String, String] = parseDispositionParams(Option(multiPartItem.headers.getHeader("content-disposition"))) val charset = getCharset(multiPartItem.contentType) for { partType <- m.partType(name) futureBody = apply(partType, Buf.ByteArray.Owned(multiPartItem.data), charset, request) } yield futureBody .map(body => Part(name, body, otherDispositionParams = dispositionParams - "name", headers = fileItemHeaders(multiPartItem.headers)) .asInstanceOf[RawPart] ) } .toSeq ) .map(_.toList) } }
Example 134
Source File: Http4sRequestToRawBody.scala From tapir with Apache License 2.0 | 5 votes |
package sttp.tapir.server.http4s import java.io.ByteArrayInputStream import cats.effect.{Blocker, ContextShift, Sync} import cats.implicits._ import fs2.Chunk import org.http4s.headers.{`Content-Disposition`, `Content-Type`} import org.http4s.{Charset, EntityDecoder, Request, multipart} import sttp.model.{Header, Part} import sttp.tapir.{RawPart, RawBodyType} class Http4sRequestToRawBody[F[_]: Sync: ContextShift](serverOptions: Http4sServerOptions[F]) { def apply[R](body: fs2.Stream[F, Byte], bodyType: RawBodyType[R], charset: Option[Charset], req: Request[F]): F[R] = { def asChunk: F[Chunk[Byte]] = body.compile.to(Chunk) def asByteArray: F[Array[Byte]] = body.compile.to(Chunk).map(_.toByteBuffer.array()) bodyType match { case RawBodyType.StringBody(defaultCharset) => asByteArray.map(new String(_, charset.map(_.nioCharset).getOrElse(defaultCharset))) case RawBodyType.ByteArrayBody => asByteArray case RawBodyType.ByteBufferBody => asChunk.map(_.toByteBuffer) case RawBodyType.InputStreamBody => asByteArray.map(new ByteArrayInputStream(_)) case RawBodyType.FileBody => serverOptions.createFile(serverOptions.blockingExecutionContext, req).flatMap { file => val fileSink = fs2.io.file.writeAll(file.toPath, Blocker.liftExecutionContext(serverOptions.blockingExecutionContext)) body.through(fileSink).compile.drain.map(_ => file) } case m: RawBodyType.MultipartBody => // TODO: use MultipartDecoder.mixedMultipart once available? implicitly[EntityDecoder[F, multipart.Multipart[F]]].decode(req, strict = false).value.flatMap { case Left(failure) => throw new IllegalArgumentException("Cannot decode multipart body: " + failure) // TODO case Right(mp) => val rawPartsF: Vector[F[RawPart]] = mp.parts .flatMap(part => part.name.flatMap(name => m.partType(name)).map((part, _)).toList) .map { case (part, codecMeta) => toRawPart(part, codecMeta, req).asInstanceOf[F[RawPart]] } val rawParts: F[Vector[RawPart]] = rawPartsF.sequence rawParts.asInstanceOf[F[R]] // R is Seq[RawPart] } } } private def toRawPart[R](part: multipart.Part[F], partType: RawBodyType[R], req: Request[F]): F[Part[R]] = { val dispositionParams = part.headers.get(`Content-Disposition`).map(_.parameters).getOrElse(Map.empty) val charset = part.headers.get(`Content-Type`).flatMap(_.charset) apply(part.body, partType, charset, req) .map(r => Part( part.name.getOrElse(""), r, otherDispositionParams = dispositionParams - Part.NameDispositionParam, headers = part.headers.toList.map(h => Header(h.name.value, h.value)) ) ) } }
Example 135
Source File: ConsoleModuleTest.scala From scala-server-toolkit with MIT License | 5 votes |
package com.avast.sst.jvm.system.console import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import cats.effect.SyncIO import org.scalatest.funsuite.AnyFunSuite import scala.{Console => SConsole} class ConsoleModuleTest extends AnyFunSuite { test("Console input") { SConsole.withIn(new ByteArrayInputStream("test input\n".getBytes("UTF-8"))) { val test = for { line <- ConsoleModule.make[SyncIO].readLine } yield assert(line === "test input") test.unsafeRunSync() } } test("Console output") { val out = new ByteArrayOutputStream() SConsole.withOut(out) { val test = for { _ <- ConsoleModule.make[SyncIO].printLine("test output") } yield () test.unsafeRunSync() } assert(out.toString("UTF-8") === "test output\n") } test("Console error") { val out = new ByteArrayOutputStream() SConsole.withErr(out) { val test = for { _ <- ConsoleModule.make[SyncIO].printLineToError("test output") } yield () test.unsafeRunSync() } assert(out.toString("UTF-8") === "test output\n") } }
Example 136
Source File: DataWeaveCLITest.scala From data-weave-native with Apache License 2.0 | 5 votes |
package org.mule.weave.dwnative.cli import java.io.ByteArrayInputStream import java.io.ByteArrayOutputStream import java.io.PrintStream import org.scalatest.FreeSpec import org.scalatest.Matchers import scala.io.Source class DataWeaveCLITest extends FreeSpec with Matchers { "should work with output application/json" in { val out = System.out try { val stream = new ByteArrayOutputStream() System.setOut(new PrintStream(stream, true)) new DataWeaveCLIRunner().run(Array("output application/json --- (1 to 3)[0]")) val source = Source.fromBytes(stream.toByteArray, "UTF-8") val result = source.mkString result.trim shouldBe "1" } finally { System.setOut(out) println("Finish OK 3") } } "should work with simple script and not output" in { val defaultOut = System.out try { val stream = new ByteArrayOutputStream() System.setOut(new PrintStream(stream, true)) new DataWeaveCLIRunner().run(Array("(1 to 3)[0]")) val source = Source.fromBytes(stream.toByteArray, "UTF-8") val result = source.mkString result.trim shouldBe "1" } finally { System.setOut(defaultOut) } } "should work ok when sending payload from stdin" in { val out = System.out val in = System.in try { val input = """[ | 1, | 2, | 3 |] """.stripMargin.trim val stream = new ByteArrayOutputStream() System.setOut(new PrintStream(stream, true)) System.setIn(new ByteArrayInputStream(input.getBytes("UTF-8"))) new DataWeaveCLIRunner().run(Array("payload[0]")) val source = Source.fromBytes(stream.toByteArray, "UTF-8") val result = source.mkString.trim source.close() result.trim shouldBe "1" } finally { System.setOut(out) System.setIn(in) println("Finish OK 2") } } "should work with light formats" in { val out = System.out val in = System.in try { val input = """[{ | "a" : 1, | "b" : 2, | "c" : 3 |}] """.stripMargin.trim val stream = new ByteArrayOutputStream() System.setOut(new PrintStream(stream, true)) System.setIn(new ByteArrayInputStream(input.getBytes("UTF-8"))) new DataWeaveCLIRunner().run(Array("input payload json output csv header=false ---payload")) val source = Source.fromBytes(stream.toByteArray, "UTF-8") val result = source.mkString.trim source.close() result.trim shouldBe "1,2,3" } finally { System.setOut(out) System.setIn(in) println("Finish OK 2") } } }
Example 137
Source File: ProxyRequestCodec.scala From aws-lambda-scala with MIT License | 5 votes |
package io.github.mkotsur.aws.codecs import java.io.ByteArrayInputStream import cats.syntax.either.catsSyntaxEither import io.circe.generic.auto._ import io.github.mkotsur.aws.handler.CanDecode import io.github.mkotsur.aws.proxy.ProxyRequest import shapeless.Generic import scala.language.{higherKinds, postfixOps} private[aws] trait ProxyRequestCodec extends AllCodec with FutureCodec { def GenericProxyRequestOf[T] = shapeless.Generic[ProxyRequest[T]] implicit def canDecodeProxyRequest[T](implicit canDecode: CanDecode[T]) = CanDecode.instance[ProxyRequest[T]] { is => { def extractBody(s: ProxyRequest[String]) = s.body match { case Some(bodyString) => canDecode.readStream(new ByteArrayInputStream(bodyString.getBytes)).map(Option.apply) case None => Right(None) } def produceProxyResponse(decodedRequestString: ProxyRequest[String], bodyOption: Option[T]) = { val reqList = Generic[ProxyRequest[String]].to(decodedRequestString) Generic[ProxyRequest[T]].from((bodyOption :: reqList.reverse.tail).reverse) } for (decodedRequest$String <- CanDecode[ProxyRequest[String]].readStream(is); decodedBodyOption <- extractBody(decodedRequest$String)) yield produceProxyResponse(decodedRequest$String, decodedBodyOption) } } }
Example 138
Source File: Serialization.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.commons.serialization import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} trait Serialization { def deserialize[T](bytes: Array[Byte]): T = { val bufferIn = new ByteArrayInputStream(bytes) val streamIn = new ObjectInputStream(bufferIn) try { streamIn.readObject().asInstanceOf[T] } finally { streamIn.close() } } def serialize[T](objectToSerialize: T): Array[Byte] = { val byteArrayOutputStream: ByteArrayOutputStream = new ByteArrayOutputStream() val oos = new ObjectOutputStream(byteArrayOutputStream) try { oos.writeObject(objectToSerialize) oos.flush() byteArrayOutputStream.toByteArray } finally { oos.close() } } def serializeDeserialize[T](obj: T): T = deserialize[T](serialize[T](obj)) } object Serialization extends Serialization
Example 139
Source File: GoogleDriveClient.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.deeplang.doperations.readwritedataframe.googlestorage import java.io.{ByteArrayInputStream, FileOutputStream} import java.util import com.google.api.client.googleapis.auth.oauth2.GoogleCredential import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport import com.google.api.client.http.FileContent import com.google.api.client.json.gson.GsonFactory import com.google.api.services.drive.model.File import com.google.api.services.drive.{Drive, DriveScopes} import io.deepsense.commons.resources.ManagedResource import io.deepsense.commons.utils.LoggerForCallerClass import io.deepsense.deeplang.doperations.inout.CsvParameters.ColumnSeparatorChoice private[googlestorage] object GoogleDriveClient { val logger = LoggerForCallerClass() val googleSheetCsvSeparator = ColumnSeparatorChoice.Comma() private val ApplicationName = "Seahorse" private val Scopes = util.Arrays.asList(DriveScopes.DRIVE) def uploadCsvFileAsGoogleSheet( credentials: GoogleCretendialsJson, sheetId: GoogleSheetId, filePath: String ): Unit = { val fileMetadata = new File().setMimeType("application/vnd.google-apps.spreadsheet") val mediaContent = new FileContent("text/csv", new java.io.File(filePath)) driveService(credentials).files.update(sheetId, fileMetadata, mediaContent).execute } def downloadGoogleSheetAsCsvFile( credentials: GoogleCretendialsJson, sheetId: GoogleSheetId, filePath: String ): Unit = { val file = new java.io.File(filePath) file.getParentFile.mkdirs() ManagedResource(new FileOutputStream(file)) { fos => driveService(credentials).files().export(sheetId, "text/csv").executeMediaAndDownloadTo(fos) logger.info(s"Downloaded google sheet id=$sheetId to the file $filePath") } } private def driveService(serviceAccountCredentials: String): Drive = { val credential = { val in = new ByteArrayInputStream(serviceAccountCredentials.getBytes) GoogleCredential.fromStream(in).createScoped(Scopes) } new Drive.Builder( GoogleNetHttpTransport.newTrustedTransport(), jsonFactory, credential ).setApplicationName(ApplicationName).build } // Default choice is JacksonFactory. However spark depends on Jackson as well // and google/spark jackson versions are binary incompatible with each other. private val jsonFactory = GsonFactory.getDefaultInstance }
Example 140
Source File: PythonNotebook.scala From seahorse-workflow-executor with Apache License 2.0 | 5 votes |
package io.deepsense.deeplang.doperations import java.io.ByteArrayInputStream import io.deepsense.commons.utils.Version import io.deepsense.deeplang.DOperation.Id import io.deepsense.deeplang.ExecutionContext import io.deepsense.deeplang.doperables.dataframe.DataFrame import scala.concurrent.Await import scala.concurrent.duration.Duration import scala.reflect.runtime.{universe => ru} import scala.util.Failure import io.deepsense.commons.rest.client.NotebookRestClient case class PythonNotebook() extends Notebook { override val id: Id = "e76ca616-0322-47a5-b390-70c9668265dd" override val name: String = "Python Notebook" override val description: String = "Creates a Python notebook with access to the DataFrame" override val since: Version = Version(1, 0, 0) override val notebookType: String = "python" override protected def execute(dataFrame: DataFrame)(context: ExecutionContext): Unit = { context.dataFrameStorage.setInputDataFrame(0, dataFrame.sparkDataFrame) headlessExecution(context) } }
Example 141
Source File: S3Brain.scala From sumobot with Apache License 2.0 | 5 votes |
package com.sumologic.sumobot.brain import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.util.Properties import akka.actor.{Actor, Props} import com.amazonaws.auth.{AWSCredentials, AWSStaticCredentialsProvider} import com.amazonaws.services.s3.{AmazonS3Client, AmazonS3ClientBuilder} import com.amazonaws.services.s3.model.ObjectMetadata import com.sumologic.sumobot.brain.Brain._ import scala.collection.JavaConverters._ import scala.collection.immutable object S3Brain { def props(credentials: AWSCredentials, bucket: String, s3Key: String): Props = Props(classOf[S3Brain], credentials, bucket, s3Key) } class S3Brain(credentials: AWSCredentials, bucket: String, s3Key: String) extends Actor { private val s3Client = AmazonS3ClientBuilder.standard() .withCredentials(new AWSStaticCredentialsProvider(credentials)).build private var brainContents: Map[String, String] = loadFromS3() override def receive: Receive = { case Store(key, value) => brainContents += (key -> value) saveToS3(brainContents) case Remove(key) => brainContents -= key saveToS3(brainContents) case Retrieve(key) => brainContents.get(key) match { case Some(value) => sender() ! ValueRetrieved(key, value) case None => sender() ! ValueMissing(key) } case ListValues(prefix) => sender() ! ValueMap(brainContents.filter(_._1.startsWith(prefix))) } private def loadFromS3(): Map[String, String] = { if (s3Client.doesBucketExistV2(bucket)) { val props = new Properties() props.load(s3Client.getObject(bucket, s3Key).getObjectContent) immutable.Map(props.asScala.toSeq: _*) } else { Map.empty } } private def saveToS3(contents: Map[String, String]): Unit = { if (!s3Client.doesBucketExistV2(bucket)) { s3Client.createBucket(bucket) } val props = new Properties() props.putAll(contents.asJava) val out = new ByteArrayOutputStream() props.store(out, "") out.flush() out.close() val in = new ByteArrayInputStream(out.toByteArray) s3Client.putObject(bucket, s3Key, in, new ObjectMetadata()) } }
Example 142
Source File: IncrementalCache.scala From sbt-idea-plugin with Apache License 2.0 | 5 votes |
package org.jetbrains.sbtidea.packaging.artifact import java.io.{BufferedOutputStream, ByteArrayInputStream, ObjectInputStream, ObjectOutputStream} import java.nio.file.{Files, Path} import sbt.Keys.TaskStreams import scala.collection.mutable trait IncrementalCache extends AutoCloseable { def fileChanged(in: Path): Boolean } class DumbIncrementalCache extends IncrementalCache { override def fileChanged(in: Path): Boolean = true override def close(): Unit = () } class PersistentIncrementalCache(private val root: Path)(implicit private val streams: TaskStreams) extends IncrementalCache { private val FILENAME = "sbtidea.cache" private val myFile = root.resolve(FILENAME) private val myData = loadOrCreate() type Data = mutable.HashMap[String, Long] private def loadFromDisk(): Either[String, Data] = { if (!Files.exists(myFile) || Files.size(myFile) <= 0) return Left("Cache file is empty or doesn't exist") val data = Files.readAllBytes(myFile) using(new ObjectInputStream(new ByteArrayInputStream(data))) { stream => Right(stream.readObject().asInstanceOf[Data]) } } private def loadOrCreate(): Data = loadFromDisk() match { case Left(message) => streams.log.info(message) new Data() case Right(value) => value } private def saveToDisk(): Unit = { import java.nio.file.StandardOpenOption._ if (!Files.exists(myFile.getParent)) { Files.createDirectories(myFile.getParent) Files.createFile(myFile) } using(new ObjectOutputStream( new BufferedOutputStream( Files.newOutputStream(myFile, CREATE, WRITE, TRUNCATE_EXISTING)))) { stream => stream.writeObject(myData) } } override def close(): Unit = saveToDisk() override def fileChanged(in: Path): Boolean = { val newTimestamp = Files.getLastModifiedTime(in).toMillis val inStr = in.toString val lastTimestamp = myData.getOrElseUpdate(inStr, newTimestamp) val result = newTimestamp > lastTimestamp myData.put(inStr, newTimestamp) result } }
Example 143
Source File: TableRowJsonIOTest.scala From ratatool with Apache License 2.0 | 5 votes |
package com.spotify.ratatool.io import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File} import com.spotify.ratatool.Schemas import com.spotify.ratatool.scalacheck._ import org.scalacheck.Gen import scala.jdk.CollectionConverters._ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class TableRowJsonIOTest extends AnyFlatSpec with Matchers { private def floatGen = Gen.choose[Float](0.0F, 1.0F) private val schema = Schemas.tableSchema private val data = Gen.listOfN(100, tableRowOf(schema) .amend(Gen.oneOf( Gen.const(null), floatGen ))(_.getRecord("nullable_fields").set("float_field")) .amend(floatGen)(_.getRecord("required_fields").set("float_field")) .amend(Gen.nonEmptyListOf(floatGen) .map(_.asJava) )(_.getRecord("repeated_fields").set("float_field")) ).sample.get "TableRowJsonIO" should "work with stream" in { val out = new ByteArrayOutputStream() TableRowJsonIO.writeToOutputStream(data, out) val in = new ByteArrayInputStream(out.toByteArray) val result = TableRowJsonIO.readFromInputStream(in).toList.map(_.toString) result should equal (data.map(_.toString)) } it should "work with file" in { val file = File.createTempFile("ratatool-", ".json") file.deleteOnExit() TableRowJsonIO.writeToFile(data, file) val result = TableRowJsonIO.readFromFile(file).toList.map(_.toString) result should equal (data.map(_.toString)) } }
Example 144
Source File: TestHelper.scala From odsc-west-streaming-trends with GNU General Public License v3.0 | 5 votes |
package com.twilio.open.streaming.trend.discovery import java.io.{ByteArrayInputStream, InputStream} import java.nio.charset.StandardCharsets import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.google.protobuf.Message import com.googlecode.protobuf.format.JsonFormat import com.holdenkarau.spark.testing.{LocalSparkContext, SparkContextProvider} import com.twilio.open.protocol.Calls.CallEvent import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.sql.SparkSession import org.scalatest.{BeforeAndAfterAll, FlatSpec, Matchers, Suite} import org.slf4j.{Logger, LoggerFactory} import scala.collection.Seq import scala.io.Source import scala.reflect.ClassTag import scala.reflect.classTag object TestHelper { val log: Logger = LoggerFactory.getLogger("com.twilio.open.streaming.trend.discovery.TestHelper") val mapper: ObjectMapper = { val m = new ObjectMapper() m.registerModule(DefaultScalaModule) } val jsonFormat: JsonFormat = new JsonFormat def loadScenario[T<: Message : ClassTag](file: String): Seq[T] = { val fileString = Source.fromFile(file).mkString val parsed = mapper.readValue(fileString, classOf[Sceanario]) parsed.input.map { data => val json = mapper.writeValueAsString(data) convert[T](json) } } def convert[T<: Message : ClassTag](json: String): T = { val clazz = classTag[T].runtimeClass val builder = clazz.getMethod("newBuilder").invoke(clazz).asInstanceOf[Message.Builder] try { val input: InputStream = new ByteArrayInputStream(json.getBytes()) jsonFormat.merge(input, builder) builder.build().asInstanceOf[T] } catch { case e: Exception => throw e } } def asMockKafkaDataFrame(event: CallEvent): MockKafkaDataFrame = { val key = event.getEventId.getBytes(StandardCharsets.UTF_8) val value = event.toByteArray MockKafkaDataFrame(key, value) } } case class MockKafkaDataFrame(key: Array[Byte], value: Array[Byte]) @SerialVersionUID(1L) case class KafkaDataFrame(key: Array[Byte], topic: Array[Byte], value: Array[Byte]) extends Serializable case class Sceanario(input: Seq[Any], expected: Option[Any] = None) trait SparkSqlTest extends BeforeAndAfterAll with SparkContextProvider { self: Suite => @transient var _sparkSql: SparkSession = _ @transient private var _sc: SparkContext = _ override def sc: SparkContext = _sc def conf: SparkConf def sparkSql: SparkSession = _sparkSql override def beforeAll() { _sparkSql = SparkSession.builder().config(conf).getOrCreate() _sc = _sparkSql.sparkContext setup(_sc) super.beforeAll() } override def afterAll() { try { _sparkSql.close() _sparkSql = null LocalSparkContext.stop(_sc) _sc = null } finally { super.afterAll() } } }
Example 145
Source File: Serialization.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.commons.serialization import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} trait Serialization { def deserialize[T](bytes: Array[Byte]): T = { val bufferIn = new ByteArrayInputStream(bytes) val streamIn = new ObjectInputStream(bufferIn) try { streamIn.readObject().asInstanceOf[T] } finally { streamIn.close() } } def serialize[T](objectToSerialize: T): Array[Byte] = { val byteArrayOutputStream: ByteArrayOutputStream = new ByteArrayOutputStream() val oos = new ObjectOutputStream(byteArrayOutputStream) try { oos.writeObject(objectToSerialize) oos.flush() byteArrayOutputStream.toByteArray } finally { oos.close() } } def serializeDeserialize[T](obj: T): T = deserialize[T](serialize[T](obj)) } object Serialization extends Serialization
Example 146
Source File: GoogleDriveClient.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperations.readwritedataframe.googlestorage import java.io.{ByteArrayInputStream, FileOutputStream} import java.util import com.google.api.client.googleapis.auth.oauth2.GoogleCredential import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport import com.google.api.client.http.FileContent import com.google.api.client.json.gson.GsonFactory import com.google.api.services.drive.model.File import com.google.api.services.drive.{Drive, DriveScopes} import ai.deepsense.commons.resources.ManagedResource import ai.deepsense.commons.utils.LoggerForCallerClass import ai.deepsense.deeplang.doperations.inout.CsvParameters.ColumnSeparatorChoice private[googlestorage] object GoogleDriveClient { val logger = LoggerForCallerClass() val googleSheetCsvSeparator = ColumnSeparatorChoice.Comma() private val ApplicationName = "Seahorse" private val Scopes = util.Arrays.asList(DriveScopes.DRIVE) def uploadCsvFileAsGoogleSheet( credentials: GoogleCretendialsJson, sheetId: GoogleSheetId, filePath: String ): Unit = { val fileMetadata = new File().setMimeType("application/vnd.google-apps.spreadsheet") val mediaContent = new FileContent("text/csv", new java.io.File(filePath)) driveService(credentials).files.update(sheetId, fileMetadata, mediaContent).execute } def downloadGoogleSheetAsCsvFile( credentials: GoogleCretendialsJson, sheetId: GoogleSheetId, filePath: String ): Unit = { val file = new java.io.File(filePath) file.getParentFile.mkdirs() ManagedResource(new FileOutputStream(file)) { fos => driveService(credentials).files().export(sheetId, "text/csv").executeMediaAndDownloadTo(fos) logger.info(s"Downloaded google sheet id=$sheetId to the file $filePath") } } private def driveService(serviceAccountCredentials: String): Drive = { val credential = { val in = new ByteArrayInputStream(serviceAccountCredentials.getBytes) GoogleCredential.fromStream(in).createScoped(Scopes) } new Drive.Builder( GoogleNetHttpTransport.newTrustedTransport(), jsonFactory, credential ).setApplicationName(ApplicationName).build } // Default choice is JacksonFactory. However spark depends on Jackson as well // and google/spark jackson versions are binary incompatible with each other. private val jsonFactory = GsonFactory.getDefaultInstance }
Example 147
Source File: PythonNotebook.scala From seahorse with Apache License 2.0 | 5 votes |
package ai.deepsense.deeplang.doperations import java.io.ByteArrayInputStream import ai.deepsense.commons.utils.Version import ai.deepsense.deeplang.DOperation.Id import ai.deepsense.deeplang.ExecutionContext import ai.deepsense.deeplang.doperables.dataframe.DataFrame import scala.concurrent.Await import scala.concurrent.duration.Duration import scala.reflect.runtime.{universe => ru} import scala.util.Failure import ai.deepsense.commons.rest.client.NotebookRestClient case class PythonNotebook() extends Notebook { override val id: Id = "e76ca616-0322-47a5-b390-70c9668265dd" override val name: String = "Python Notebook" override val description: String = "Creates a Python notebook with access to the DataFrame" override val since: Version = Version(1, 0, 0) override val notebookType: String = "python" override protected def execute(dataFrame: DataFrame)(context: ExecutionContext): Unit = { context.dataFrameStorage.setInputDataFrame(0, dataFrame.sparkDataFrame) headlessExecution(context) } }
Example 148
Source File: JavaSerde.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.core.serde import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectOutputStream} import akka.actor.ExtendedActorSystem import akka.serialization.JavaSerializer import akka.util.ClassLoaderObjectInputStream class JavaSerde(system: ExtendedActorSystem) extends Serde[AnyRef] { override def identifier: Int = 101 override def close(): Unit = () override def fromBytes(bytes: Array[Byte]): AnyRef = { val in = new ClassLoaderObjectInputStream(system.dynamicAccess.classLoader, new ByteArrayInputStream(bytes)) val obj = JavaSerializer.currentSystem.withValue(system) { in.readObject } in.close() obj } override def toBytes(o: AnyRef): Array[Byte] = { val bos = new ByteArrayOutputStream val out = new ObjectOutputStream(bos) JavaSerializer.currentSystem.withValue(system) { out.writeObject(o) } out.close() bos.toByteArray } }
Example 149
Source File: SeqSerde.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.core.serde.collection import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream} import akka.actor.ExtendedActorSystem import com.typesafe.config.Config import io.amient.affinity.core.serde.{AbstractWrapSerde, Serde, Serdes} class SeqSerde(serdes: Serdes) extends AbstractWrapSerde(serdes) with Serde[Seq[Any]] { def this(system: ExtendedActorSystem) = this(Serde.tools(system)) def this(config: Config) = this(Serde.tools(config)) override def identifier: Int = 141 override def close(): Unit = () override protected def fromBytes(bytes: Array[Byte]): Seq[Any] = { val di = new DataInputStream(new ByteArrayInputStream(bytes)) val numItems = di.readInt() val result = ((1 to numItems) map { _ => val len = di.readInt() val item = new Array[Byte](len) di.read(item) fromBinaryWrapped(item) }).toList di.close() result } override def toBytes(seq: Seq[Any]): Array[Byte] = { val os = new ByteArrayOutputStream() val d = new DataOutputStream(os) d.writeInt(seq.size) for (a: Any <- seq) a match { case ref: AnyRef => val item = toBinaryWrapped(ref) d.writeInt(item.length) d.write(item) } os.close os.toByteArray } }
Example 150
Source File: SetSerde.scala From affinity with Apache License 2.0 | 5 votes |
package io.amient.affinity.core.serde.collection import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream} import akka.actor.ExtendedActorSystem import com.typesafe.config.Config import io.amient.affinity.core.serde.{AbstractWrapSerde, Serde, Serdes} class SetSerde(serdes: Serdes) extends AbstractWrapSerde(serdes) with Serde[Set[Any]] { def this(system: ExtendedActorSystem) = this(Serde.tools(system)) def this(config: Config) = this(Serde.tools(config)) override def identifier: Int = 142 override protected def fromBytes(bytes: Array[Byte]): Set[Any] = { val di = new DataInputStream(new ByteArrayInputStream(bytes)) val numItems = di.readInt() val result = ((1 to numItems) map { _ => val len = di.readInt() val item = new Array[Byte](len) di.read(item) fromBinaryWrapped(item) }).toSet di.close() result } override def toBytes(set: Set[Any]): Array[Byte] = { val os = new ByteArrayOutputStream() val d = new DataOutputStream(os) d.writeInt(set.size) for (a: Any <- set) a match { case ref: AnyRef => val item = toBinaryWrapped(ref) d.writeInt(item.length) d.write(item) } os.close os.toByteArray } override def close() = () }
Example 151
Source File: S3DiffUploader.scala From shield with MIT License | 5 votes |
package shield.aws import java.io.{ByteArrayInputStream, InputStream} import java.nio.charset.StandardCharsets import akka.actor.{Actor, ActorLogging, Props} import com.amazonaws.auth.profile.ProfileCredentialsProvider import com.amazonaws.services.s3.AmazonS3Client import com.amazonaws.services.s3.model.ObjectMetadata import shield.actors.listeners.ComparisonDiffFile object S3DiffUploader{ def props(bucket: String, folder: String) : Props = Props(new S3DiffUploader(bucket, folder)) } class S3DiffUploader(bucket: String, folder: String) extends Actor with ActorLogging { val s3Client = new AmazonS3Client() val charset = StandardCharsets.UTF_8 val stripped = folder.stripPrefix("/").stripSuffix("/") val prefix = if (stripped.isEmpty) { stripped } else { stripped + "/" } def receive = { case file: ComparisonDiffFile => val metadata = new ObjectMetadata() metadata.setContentLength(file.contents.length) s3Client.putObject(bucket, s"$prefix${file.fileName}", new ByteArrayInputStream(file.contents), metadata) } }
Example 152
Source File: VinylRequest.scala From vinyldns with Apache License 2.0 | 5 votes |
package models import java.io.{ByteArrayInputStream, InputStream} import java.util import com.amazonaws.{ReadLimitInfo, SignableRequest} import com.amazonaws.http.HttpMethodName object VinylDNSRequest { val APPLICATION_JSON = "application/json" } case class VinylDNSRequest( method: String, url: String, path: String = "", payload: Option[String] = None, parameters: util.HashMap[String, java.util.List[String]] = new util.HashMap[String, java.util.List[String]]() ) class SignableVinylDNSRequest(origReq: VinylDNSRequest) extends SignableRequest[VinylDNSRequest] { import VinylDNSRequest._ val contentType: String = APPLICATION_JSON private val headers = new util.HashMap[String, String]() private val parameters = origReq.parameters private val uri = new java.net.URI(origReq.url) // I hate to do this, but need to be able to set the content after creation to // implement the interface properly private var contentStream: InputStream = new ByteArrayInputStream( origReq.payload.getOrElse("").getBytes("UTF-8") ) override def addHeader(name: String, value: String): Unit = headers.put(name, value) override def getHeaders: java.util.Map[String, String] = headers override def getResourcePath: String = origReq.path override def addParameter(name: String, value: String): Unit = { if (!parameters.containsKey(name)) parameters.put(name, new util.ArrayList[String]()) parameters.get(name).add(value) } override def getParameters: java.util.Map[String, java.util.List[String]] = parameters override def getEndpoint: java.net.URI = uri override def getHttpMethod: HttpMethodName = HttpMethodName.valueOf(origReq.method) override def getTimeOffset: Int = 0 override def getContent: InputStream = contentStream override def getContentUnwrapped: InputStream = getContent override def getReadLimitInfo: ReadLimitInfo = new ReadLimitInfo { override def getReadLimit: Int = -1 } override def getOriginalRequestObject: Object = origReq override def setContent(content: InputStream): Unit = contentStream = content }
Example 153
Source File: GenericAvroSerializerSuite.scala From spark1.52 with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import com.esotericsoftware.kryo.io.{Output, Input} import org.apache.avro.{SchemaBuilder, Schema} import org.apache.avro.generic.GenericData.Record import org.apache.spark.{SparkFunSuite, SharedSparkContext} class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val schema : Schema = SchemaBuilder .record("testRecord").fields() .requiredString("data") .endRecord() val record = new Record(schema) record.put("data", "test data") test("schema compression and decompression") {//模式压缩与解压缩 val genericSer = new GenericAvroSerializer(conf.getAvroSchema) assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) } test("record serialization and deserialization") {//记录序列化和反序列化 val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val outputStream = new ByteArrayOutputStream() val output = new Output(outputStream) genericSer.serializeDatum(record, output) output.flush() output.close() val input = new Input(new ByteArrayInputStream(outputStream.toByteArray)) assert(genericSer.deserializeDatum(input) === record) } //使用模式指纹以减少信息大小 test("uses schema fingerprint to decrease message size") { val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema) val output = new Output(new ByteArrayOutputStream()) val beginningNormalPosition = output.total() genericSerFull.serializeDatum(record, output) output.flush() val normalLength = output.total - beginningNormalPosition conf.registerAvroSchemas(schema) val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema) val beginningFingerprintPosition = output.total() genericSerFinger.serializeDatum(record, output) val fingerprintLength = output.total - beginningFingerprintPosition assert(fingerprintLength < normalLength) } test("caches previously seen schemas") {//缓存之前模式 val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val compressedSchema = genericSer.compress(schema) val decompressedScheam = genericSer.decompress(ByteBuffer.wrap(compressedSchema)) assert(compressedSchema.eq(genericSer.compress(schema))) assert(decompressedScheam.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema)))) } }
Example 154
Source File: Unpacker.scala From haystack-traces with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trace.commons.packer import java.io.{ByteArrayInputStream, ByteArrayOutputStream, InputStream} import java.nio.ByteBuffer import java.util.zip.GZIPInputStream import com.expedia.open.tracing.buffer.SpanBuffer import com.github.luben.zstd.ZstdInputStream import org.apache.commons.io.IOUtils import org.json4s.jackson.Serialization import org.xerial.snappy.SnappyInputStream object Unpacker { import PackedMessage._ private def readMetadata(packedDataBytes: Array[Byte]): Array[Byte] = { val byteBuffer = ByteBuffer.wrap(packedDataBytes) val magicBytesExist = MAGIC_BYTES.indices forall { idx => byteBuffer.get() == MAGIC_BYTES.apply(idx) } if (magicBytesExist) { val headerLength = byteBuffer.getInt val metadataBytes = new Array[Byte](headerLength) byteBuffer.get(metadataBytes, 0, headerLength) metadataBytes } else { null } } private def unpack(compressedStream: InputStream) = { val outputStream = new ByteArrayOutputStream() IOUtils.copy(compressedStream, outputStream) outputStream.toByteArray } def readSpanBuffer(packedDataBytes: Array[Byte]): SpanBuffer = { var parsedDataBytes: Array[Byte] = null val metadataBytes = readMetadata(packedDataBytes) if (metadataBytes != null) { val packedMetadata = Serialization.read[PackedMetadata](new String(metadataBytes)) val compressedDataOffset = MAGIC_BYTES.length + 4 + metadataBytes.length packedMetadata.t match { case PackerType.SNAPPY => parsedDataBytes = unpack( new SnappyInputStream( new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset))) case PackerType.GZIP => parsedDataBytes = unpack( new GZIPInputStream( new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset))) case PackerType.ZSTD => parsedDataBytes = unpack( new ZstdInputStream( new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset))) case _ => return SpanBuffer.parseFrom( new ByteArrayInputStream(packedDataBytes, compressedDataOffset, packedDataBytes.length - compressedDataOffset)) } } else { parsedDataBytes = packedDataBytes } SpanBuffer.parseFrom(parsedDataBytes) } }
Example 155
Source File: Packer.scala From haystack-traces with Apache License 2.0 | 5 votes |
package com.expedia.www.haystack.trace.commons.packer import java.io.{ByteArrayInputStream, ByteArrayOutputStream, OutputStream} import java.util.zip.GZIPOutputStream import com.expedia.www.haystack.trace.commons.packer.PackerType.PackerType import com.github.luben.zstd.ZstdOutputStream import com.google.protobuf.GeneratedMessageV3 import org.apache.commons.io.IOUtils import org.xerial.snappy.SnappyOutputStream object PackerType extends Enumeration { type PackerType = Value val GZIP, SNAPPY, NONE, ZSTD = Value } case class PackedMetadata(t: PackerType) abstract class Packer[T <: GeneratedMessageV3] { val packerType: PackerType protected def compressStream(stream: OutputStream): OutputStream private def pack(protoObj: T): Array[Byte] = { val outStream = new ByteArrayOutputStream val compressedStream = compressStream(outStream) if (compressedStream != null) { IOUtils.copy(new ByteArrayInputStream(protoObj.toByteArray), compressedStream) compressedStream.close() // this flushes the data to final outStream outStream.toByteArray } else { protoObj.toByteArray } } def apply(protoObj: T): PackedMessage[T] = { PackedMessage(protoObj, pack, PackedMetadata(packerType)) } } class NoopPacker[T <: GeneratedMessageV3] extends Packer[T] { override val packerType = PackerType.NONE override protected def compressStream(stream: OutputStream): OutputStream = null } class SnappyPacker[T <: GeneratedMessageV3] extends Packer[T] { override val packerType = PackerType.SNAPPY override protected def compressStream(stream: OutputStream): OutputStream = new SnappyOutputStream(stream) } class ZstdPacker[T <: GeneratedMessageV3] extends Packer[T] { override val packerType = PackerType.ZSTD override protected def compressStream(stream: OutputStream): OutputStream = new ZstdOutputStream(stream) } class GzipPacker[T <: GeneratedMessageV3] extends Packer[T] { override val packerType = PackerType.GZIP override protected def compressStream(stream: OutputStream): OutputStream = new GZIPOutputStream(stream) }
Example 156
Source File: ParquetIOTest.scala From ratatool with Apache License 2.0 | 5 votes |
package com.spotify.ratatool.io import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File} import java.nio.file.Files import com.spotify.ratatool.Schemas import com.spotify.ratatool.avro.specific.TestRecord import com.spotify.ratatool.scalacheck._ import org.apache.commons.io.FileUtils import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class ParquetIOTest extends AnyFlatSpec with Matchers { private val genericSchema = Schemas.avroSchema private val genericGen = genericRecordOf(genericSchema) private val genericData = (1 to 100).flatMap(_ => genericGen.sample) private val specificSchema = TestRecord.getClassSchema private val specificGen = specificRecordOf[TestRecord] private val specificData = (1 to 100).flatMap(_ => specificGen.sample) "ParquetIO" should "work with generic record and stream" in { val out = new ByteArrayOutputStream() ParquetIO.writeToOutputStream(genericData, genericSchema, out) val in = new ByteArrayInputStream(out.toByteArray) val result = ParquetIO.readFromInputStream(in).toList result should equal (genericData) } it should "work with generic record and file" in { val dir = Files.createTempDirectory("ratatool-") val file = new File(dir.toString, "temp.parquet") ParquetIO.writeToFile(genericData, genericSchema, file) val result = ParquetIO.readFromFile(file).toList result should equal (genericData) FileUtils.deleteDirectory(dir.toFile) } it should "work with specific record and stream" in { val out = new ByteArrayOutputStream() ParquetIO.writeToOutputStream(specificData, specificSchema, out) val in = new ByteArrayInputStream(out.toByteArray) val result = ParquetIO.readFromInputStream[TestRecord](in).toList result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_))) } it should "work with specific record and file" in { val dir = Files.createTempDirectory("ratatool-") val file = new File(dir.toString, "temp.parquet") ParquetIO.writeToFile(specificData, specificSchema, file) val result = ParquetIO.readFromFile[TestRecord](file).toList result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_))) FileUtils.deleteDirectory(dir.toFile) } }
Example 157
Source File: AvroIOTest.scala From ratatool with Apache License 2.0 | 5 votes |
package com.spotify.ratatool.io import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File} import com.spotify.ratatool.Schemas import com.spotify.ratatool.avro.specific.TestRecord import org.apache.avro.generic.GenericRecord import com.spotify.ratatool.scalacheck._ import org.scalatest.flatspec.AnyFlatSpec import org.scalatest.matchers.should.Matchers class AvroIOTest extends AnyFlatSpec with Matchers { private val genericSchema = Schemas.avroSchema private val genericGen = genericRecordOf(genericSchema) private val genericData = (1 to 100).flatMap(_ => genericGen.sample) private val specificSchema = TestRecord.getClassSchema private val specificGen = specificRecordOf[TestRecord] private val specificData = (1 to 100).flatMap(_ => specificGen.sample) "AvroIO" should "work with generic record and stream" in { val out = new ByteArrayOutputStream() AvroIO.writeToOutputStream(genericData, genericSchema, out) val in = new ByteArrayInputStream(out.toByteArray) val result = AvroIO.readFromInputStream[GenericRecord](in).toList result should equal (genericData) } it should "work with generic record and file" in { val file = File.createTempFile("ratatool-", ".avro") file.deleteOnExit() AvroIO.writeToFile(genericData, genericSchema, file) val result = AvroIO.readFromFile[GenericRecord](file).toList result should equal (genericData) } it should "work with specific record and stream" in { val out = new ByteArrayOutputStream() AvroIO.writeToOutputStream(specificData, specificSchema, out) val in = new ByteArrayInputStream(out.toByteArray) val result = AvroIO.readFromInputStream[TestRecord](in).toList result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_))) } it should "work with specific record and file" in { val file = File.createTempFile("ratatool-", ".avro") file.deleteOnExit() AvroIO.writeToFile(specificData, specificSchema, file) val result = AvroIO.readFromFile[TestRecord](file).toList result.map(FixRandomData(_)) should equal (specificData.map(FixRandomData(_))) } }
Example 158
Source File: MockHelpers.scala From guardrail with MIT License | 5 votes |
package helpers import com.fasterxml.jackson.databind.ObjectMapper import io.netty.handler.codec.http.EmptyHttpHeaders import java.io.ByteArrayInputStream import java.nio.ByteBuffer import java.nio.charset.StandardCharsets import java.util.Collections import java.util.concurrent.CompletableFuture import javax.ws.rs.container.AsyncResponse import org.asynchttpclient.Response import org.asynchttpclient.uri.Uri import org.mockito.{ ArgumentMatchersSugar, MockitoSugar } import org.scalatest.Assertions import scala.reflect.ClassTag object MockHelpers extends Assertions with MockitoSugar with ArgumentMatchersSugar { def mockAsyncResponse[T](future: CompletableFuture[T])(implicit cls: ClassTag[T]): AsyncResponse = { val asyncResponse = mock[AsyncResponse] when(asyncResponse.resume(any[T])) thenAnswer [AnyRef] { response => response match { case t: Throwable => future.completeExceptionally(t) case other: T => future.complete(other) case other => fail(s"AsyncResponse.resume expected an object of type ${cls.runtimeClass.getName}, but got ${other.getClass.getName} instead") } } asyncResponse } def mockAHCResponse[T](uri: String, status: Int, maybeBody: Option[T] = None)(implicit mapper: ObjectMapper): Response = { val response = mock[Response] when(response.getUri) thenReturn Uri.create(uri) when(response.hasResponseStatus) thenReturn true when(response.getStatusCode) thenReturn status when(response.getStatusText) thenReturn "Some Status" when(response.hasResponseHeaders) thenReturn true when(response.getHeaders) thenReturn EmptyHttpHeaders.INSTANCE when(response.getHeader(any)) thenReturn null when(response.getHeaders(any)) thenReturn Collections.emptyList() maybeBody match { case None => when(response.hasResponseBody) thenReturn true case Some(body) => val responseBytes = mapper.writeValueAsBytes(body) val responseStr = new String(responseBytes, StandardCharsets.UTF_8) when(response.hasResponseBody) thenReturn true when(response.getResponseBody(any)) thenReturn responseStr when(response.getResponseBody) thenReturn responseStr when(response.getResponseBodyAsStream) thenReturn new ByteArrayInputStream(responseBytes) when(response.getResponseBodyAsByteBuffer) thenReturn ByteBuffer.wrap(responseBytes) when(response.getResponseBodyAsBytes) thenReturn responseBytes } response } }
Example 159
Source File: MeetupReceiver.scala From meetup-stream with Apache License 2.0 | 5 votes |
package receiver import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.storage.StorageLevel import org.apache.spark.Logging import com.ning.http.client.AsyncHttpClientConfig import com.ning.http.client._ import scala.collection.mutable.ArrayBuffer import java.io.OutputStream import java.io.ByteArrayInputStream import java.io.InputStreamReader import java.io.BufferedReader import java.io.InputStream import java.io.PipedInputStream import java.io.PipedOutputStream class MeetupReceiver(url: String) extends Receiver[String](StorageLevel.MEMORY_AND_DISK_2) with Logging { @transient var client: AsyncHttpClient = _ @transient var inputPipe: PipedInputStream = _ @transient var outputPipe: PipedOutputStream = _ def onStart() { val cf = new AsyncHttpClientConfig.Builder() cf.setRequestTimeout(Integer.MAX_VALUE) cf.setReadTimeout(Integer.MAX_VALUE) cf.setPooledConnectionIdleTimeout(Integer.MAX_VALUE) client= new AsyncHttpClient(cf.build()) inputPipe = new PipedInputStream(1024 * 1024) outputPipe = new PipedOutputStream(inputPipe) val producerThread = new Thread(new DataConsumer(inputPipe)) producerThread.start() client.prepareGet(url).execute(new AsyncHandler[Unit]{ def onBodyPartReceived(bodyPart: HttpResponseBodyPart) = { bodyPart.writeTo(outputPipe) AsyncHandler.STATE.CONTINUE } def onStatusReceived(status: HttpResponseStatus) = { AsyncHandler.STATE.CONTINUE } def onHeadersReceived(headers: HttpResponseHeaders) = { AsyncHandler.STATE.CONTINUE } def onCompleted = { println("completed") } def onThrowable(t: Throwable)={ t.printStackTrace() } }) } def onStop() { if (Option(client).isDefined) client.close() if (Option(outputPipe).isDefined) { outputPipe.flush() outputPipe.close() } if (Option(inputPipe).isDefined) { inputPipe.close() } } class DataConsumer(inputStream: InputStream) extends Runnable { override def run() { val bufferedReader = new BufferedReader( new InputStreamReader( inputStream )) var input=bufferedReader.readLine() while(input!=null){ store(input) input=bufferedReader.readLine() } } } }
Example 160
Source File: GithubIssue485.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.github import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} import com.sksamuel.avro4s.record.decoder.CPWrapper import com.sksamuel.avro4s.{AvroSchema, Decoder, DefaultFieldMapper} import org.apache.avro.generic.GenericData import org.apache.avro.util.Utf8 import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import shapeless.Coproduct class GithubIssue485 extends AnyFunSuite with Matchers { test("Serializable Coproduct Decoder #485") { val baos = new ByteArrayOutputStream() val oos = new ObjectOutputStream(baos) oos.writeObject(Decoder[CPWrapper]) oos.close() val decoder = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray)).readObject().asInstanceOf[Decoder[CPWrapper]] val schema = AvroSchema[CPWrapper] val record = new GenericData.Record(schema) record.put("u", new Utf8("wibble")) decoder.decode(record) shouldBe CPWrapper(Coproduct[CPWrapper.ISBG]("wibble")) } }
Example 161
Source File: GithubIssue484.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.github import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} import com.sksamuel.avro4s.record.decoder.ScalaEnumClass import com.sksamuel.avro4s.schema.Colours import com.sksamuel.avro4s.{AvroSchema, Decoder, DefaultFieldMapper} import org.apache.avro.generic.GenericData import org.apache.avro.generic.GenericData.EnumSymbol import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers class GithubIssue484 extends AnyFunSuite with Matchers { test("Serializable Scala Enum Decoder #484") { val baos = new ByteArrayOutputStream() val oos = new ObjectOutputStream(baos) oos.writeObject(Decoder[ScalaEnumClass]) oos.close() val decoder = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray)) .readObject() .asInstanceOf[Decoder[ScalaEnumClass]] val schema = AvroSchema[ScalaEnumClass] val record = new GenericData.Record(schema) record.put("colour", new EnumSymbol(schema.getField("colour").schema(), "Green")) decoder.decode(record) shouldBe ScalaEnumClass(Colours.Green) } }
Example 162
Source File: GithubIssue432.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.github import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream} import com.sksamuel.avro4s.Encoder import org.scalatest.{FunSuite, Matchers} class GithubIssue432 extends FunSuite with Matchers { test("Serializable Encoder[BigDecimal] #432") { val oos = new ObjectOutputStream(new ByteArrayOutputStream()) oos.writeObject(Encoder.bigDecimalEncoder) oos.close() } test("Deserialized Encoder[BigDecimal] works") { val baos = new ByteArrayOutputStream() val oos = new ObjectOutputStream(baos) oos.writeObject(Encoder.bigDecimalEncoder) oos.close() val ois = new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray)) val encoder = ois.readObject().asInstanceOf[Encoder[BigDecimal]] encoder.encode(12.34) } }
Example 163
Source File: SchemaEvolutionTest.scala From avro4s with Apache License 2.0 | 5 votes |
package com.sksamuel.avro4s.record.decoder import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import com.sksamuel.avro4s._ import org.apache.avro.SchemaBuilder import org.apache.avro.generic.GenericData import org.apache.avro.util.Utf8 import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers class SchemaEvolutionTest extends AnyFunSuite with Matchers { case class Version1(original: String) case class Version2(@AvroAlias("original") renamed: String) case class P1(name: String, age: Int = 18) case class P2(name: String) case class OptionalStringTest(a: String, b: Option[String]) case class DefaultStringTest(a: String, b: String = "foo") ignore("@AvroAlias should be used when a reader schema has a field missing from the write schema") { val v1schema = AvroSchema[Version1] val v1 = Version1("hello") val baos = new ByteArrayOutputStream() val output = AvroOutputStream.data[Version1].to(baos).build() output.write(v1) output.close() // we load using a v2 schema val is = new AvroDataInputStream[Version2](new ByteArrayInputStream(baos.toByteArray), Some(v1schema)) val v2 = is.iterator.toList.head v2.renamed shouldBe v1.original } test("when decoding, if the record and schema are missing a field and the target has a scala default, use that") { val f1 = RecordFormat[P1] val f2 = RecordFormat[P2] f1.from(f2.to(P2("foo"))) shouldBe P1("foo") } test("when decoding, if the record is missing a field that is present in the schema with a default, use the default from the schema") { val schema = SchemaBuilder.record("foo").fields().requiredString("a").endRecord() val record = new GenericData.Record(schema) record.put("a", new Utf8("hello")) Decoder[DefaultStringTest].decode(record) shouldBe DefaultStringTest("hello") } test("when decoding, if the record is missing a field that is present in the schema and the type is option, then set to None") { val schema1 = SchemaBuilder.record("foo").fields().requiredString("a").endRecord() val schema2 = SchemaBuilder.record("foo").fields().requiredString("a").optionalString("b").endRecord() val record = new GenericData.Record(schema1) record.put("a", new Utf8("hello")) Decoder[OptionalStringTest].decode(record) shouldBe OptionalStringTest("hello", None) } }
Example 164
Source File: ProtoMarshaller.scala From akka-grpc with Apache License 2.0 | 5 votes |
package akka.grpc.internal import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, InputStream } import io.grpc.KnownLength import akka.annotation.InternalApi import akka.grpc.ProtobufSerializer @InternalApi class ProtoMarshaller[T <: com.google.protobuf.Message](u: ProtobufSerializer[T]) extends io.grpc.MethodDescriptor.Marshaller[T] { override def parse(stream: InputStream): T = { val baos = new ByteArrayOutputStream(math.max(64, stream.available())) val buffer = new Array[Byte](32 * 1024) // Blocking calls underneath... // we can't avoid it for the moment because we are relying on the Netty's Channel API var bytesRead = stream.read(buffer) while (bytesRead >= 0) { baos.write(buffer, 0, bytesRead) bytesRead = stream.read(buffer) } u.deserialize(akka.util.ByteString(baos.toByteArray)) } override def stream(value: T): InputStream = new ByteArrayInputStream(value.toByteArray) with KnownLength }
Example 165
Source File: Gzip.scala From akka-grpc with Apache License 2.0 | 5 votes |
package akka.grpc.internal import java.io.{ ByteArrayInputStream, ByteArrayOutputStream } import java.util.zip.{ GZIPInputStream, GZIPOutputStream } import akka.util.ByteString object Gzip extends Codec { override val name: String = "gzip" override def compress(uncompressed: ByteString): ByteString = { val baos = new ByteArrayOutputStream(uncompressed.size) val gzos = new GZIPOutputStream(baos) gzos.write(uncompressed.toArray) gzos.flush() gzos.close() ByteString(baos.toByteArray) } override def uncompress(compressed: ByteString): ByteString = { val gzis = new GZIPInputStream(new ByteArrayInputStream(compressed.toArray)) val baos = new ByteArrayOutputStream(compressed.size) val buffer = new Array[Byte](32 * 1024) var read = gzis.read(buffer) while (read != -1) { baos.write(buffer, 0, read) read = gzis.read(buffer) } ByteString(baos.toByteArray) } }
Example 166
Source File: Marshaller.scala From akka-grpc with Apache License 2.0 | 5 votes |
package akka.grpc.internal import java.io.{ ByteArrayInputStream, ByteArrayOutputStream, InputStream } import io.grpc.KnownLength import akka.annotation.InternalApi import akka.grpc.ProtobufSerializer @InternalApi final class Marshaller[T <: scalapb.GeneratedMessage](u: ProtobufSerializer[T]) extends io.grpc.MethodDescriptor.Marshaller[T] { override def parse(stream: InputStream): T = { val baos = new ByteArrayOutputStream(math.max(64, stream.available())) val buffer = new Array[Byte](32 * 1024) // Blocking calls underneath... // we can't avoid it for the moment because we are relying on the Netty's Channel API var bytesRead = stream.read(buffer) while (bytesRead >= 0) { baos.write(buffer, 0, bytesRead) bytesRead = stream.read(buffer) } u.deserialize(akka.util.ByteString(baos.toByteArray)) } override def stream(value: T): InputStream = new ByteArrayInputStream(value.toByteArray) with KnownLength }
Example 167
Source File: SslContexts.scala From kubernetes-client with Apache License 2.0 | 5 votes |
package com.goyeau.kubernetes.client.util import java.io.{ByteArrayInputStream, File, FileInputStream, InputStreamReader} import java.security.cert.{CertificateFactory, X509Certificate} import java.security.{KeyStore, SecureRandom, Security} import java.util.Base64 import com.goyeau.kubernetes.client.KubeConfig import javax.net.ssl.{KeyManagerFactory, SSLContext, TrustManagerFactory} import org.bouncycastle.jce.provider.BouncyCastleProvider import org.bouncycastle.openssl.jcajce.JcaPEMKeyConverter import org.bouncycastle.openssl.{PEMKeyPair, PEMParser} object SslContexts { private val TrustStoreSystemProperty = "javax.net.ssl.trustStore" private val TrustStorePasswordSystemProperty = "javax.net.ssl.trustStorePassword" private val KeyStoreSystemProperty = "javax.net.ssl.keyStore" private val KeyStorePasswordSystemProperty = "javax.net.ssl.keyStorePassword" def fromConfig(config: KubeConfig): SSLContext = { val sslContext = SSLContext.getInstance("TLS") sslContext.init(keyManagers(config), trustManagers(config), new SecureRandom) sslContext } private def keyManagers(config: KubeConfig) = { // Client certificate val certDataStream = config.clientCertData.map(data => new ByteArrayInputStream(Base64.getDecoder.decode(data))) val certFileStream = config.clientCertFile.map(new FileInputStream(_)) // Client key val keyDataStream = config.clientKeyData.map(data => new ByteArrayInputStream(Base64.getDecoder.decode(data))) val keyFileStream = config.clientKeyFile.map(new FileInputStream(_)) for { keyStream <- keyDataStream.orElse(keyFileStream) certStream <- certDataStream.orElse(certFileStream) } yield { Security.addProvider(new BouncyCastleProvider()) val pemKeyPair = new PEMParser(new InputStreamReader(keyStream)).readObject().asInstanceOf[PEMKeyPair] // scalafix:ok val privateKey = new JcaPEMKeyConverter().setProvider("BC").getPrivateKey(pemKeyPair.getPrivateKeyInfo) val certificateFactory = CertificateFactory.getInstance("X509") val certificate = certificateFactory.generateCertificate(certStream).asInstanceOf[X509Certificate] // scalafix:ok defaultKeyStore.setKeyEntry( certificate.getSubjectX500Principal.getName, privateKey, config.clientKeyPass.fold(Array.empty[Char])(_.toCharArray), Array(certificate) ) } val keyManagerFactory = KeyManagerFactory.getInstance(KeyManagerFactory.getDefaultAlgorithm) keyManagerFactory.init(defaultKeyStore, Array.empty) keyManagerFactory.getKeyManagers } private lazy val defaultKeyStore = { val propertyKeyStoreFile = Option(System.getProperty(KeyStoreSystemProperty, "")).filter(_.nonEmpty).map(new File(_)) val keyStore = KeyStore.getInstance(KeyStore.getDefaultType) keyStore.load( propertyKeyStoreFile.map(new FileInputStream(_)).orNull, System.getProperty(KeyStorePasswordSystemProperty, "").toCharArray ) keyStore } private def trustManagers(config: KubeConfig) = { val certDataStream = config.caCertData.map(data => new ByteArrayInputStream(Base64.getDecoder.decode(data))) val certFileStream = config.caCertFile.map(new FileInputStream(_)) certDataStream.orElse(certFileStream).foreach { certStream => val certificateFactory = CertificateFactory.getInstance("X509") val certificate = certificateFactory.generateCertificate(certStream).asInstanceOf[X509Certificate] // scalafix:ok defaultTrustStore.setCertificateEntry(certificate.getSubjectX500Principal.getName, certificate) } val trustManagerFactory = TrustManagerFactory.getInstance(TrustManagerFactory.getDefaultAlgorithm) trustManagerFactory.init(defaultTrustStore) trustManagerFactory.getTrustManagers } private lazy val defaultTrustStore = { val securityDirectory = s"${System.getProperty("java.home")}/lib/security" val propertyTrustStoreFile = Option(System.getProperty(TrustStoreSystemProperty, "")).filter(_.nonEmpty).map(new File(_)) val jssecacertsFile = Option(new File(s"$securityDirectory/jssecacerts")).filter(f => f.exists && f.isFile) val cacertsFile = new File(s"$securityDirectory/cacerts") val keyStore = KeyStore.getInstance(KeyStore.getDefaultType) keyStore.load( new FileInputStream(propertyTrustStoreFile.orElse(jssecacertsFile).getOrElse(cacertsFile)), System.getProperty(TrustStorePasswordSystemProperty, "changeit").toCharArray ) keyStore } }
Example 168
Source File: ScaleAndConvert.scala From SparkNet with MIT License | 5 votes |
package preprocessing import java.awt.image.DataBufferByte import java.io.ByteArrayInputStream import javax.imageio.ImageIO import scala.collection.mutable.ArrayBuffer import scala.collection.JavaConversions._ import net.coobird.thumbnailator._ import org.apache.spark.rdd.RDD import libs._ object ScaleAndConvert { def BufferedImageToByteArray(image: java.awt.image.BufferedImage) : Array[Byte] = { val height = image.getHeight() val width = image.getWidth() val pixels = image.getRGB(0, 0, width, height, null, 0, width) val result = new Array[Byte](3 * height * width) var row = 0 while (row < height) { var col = 0 while (col < width) { val rgb = pixels(row * width + col) result(0 * height * width + row * width + col) = ((rgb >> 16) & 0xFF).toByte result(1 * height * width + row * width + col) = ((rgb >> 8) & 0xFF).toByte result(2 * height * width + row * width + col) = (rgb & 0xFF).toByte col += 1 } row += 1 } result } def decompressImageAndResize(compressedImage: Array[Byte], height: Int, width: Int) : Option[Array[Byte]] = { // this method takes a JPEG, decompresses it, and resizes it try { val im = ImageIO.read(new ByteArrayInputStream(compressedImage)) val resizedImage = Thumbnails.of(im).forceSize(width, height).asBufferedImage() Some(BufferedImageToByteArray(resizedImage)) } catch { // If images can't be processed properly, just ignore them case e: java.lang.IllegalArgumentException => None case e: javax.imageio.IIOException => None case e: java.lang.NullPointerException => None } } }
Example 169
Source File: TestingTypedCount.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.hive.execution import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate} import org.apache.spark.sql.hive.execution.TestingTypedCount.State import org.apache.spark.sql.types._ @ExpressionDescription( usage = "_FUNC_(expr) - A testing aggregate function resembles COUNT " + "but implements ObjectAggregateFunction.") case class TestingTypedCount( child: Expression, mutableAggBufferOffset: Int = 0, inputAggBufferOffset: Int = 0) extends TypedImperativeAggregate[TestingTypedCount.State] { def this(child: Expression) = this(child, 0, 0) override def children: Seq[Expression] = child :: Nil override def dataType: DataType = LongType override def nullable: Boolean = false override def createAggregationBuffer(): State = TestingTypedCount.State(0L) override def update(buffer: State, input: InternalRow): State = { if (child.eval(input) != null) { buffer.count += 1 } buffer } override def merge(buffer: State, input: State): State = { buffer.count += input.count buffer } override def eval(buffer: State): Any = buffer.count override def serialize(buffer: State): Array[Byte] = { val byteStream = new ByteArrayOutputStream() val dataStream = new DataOutputStream(byteStream) dataStream.writeLong(buffer.count) byteStream.toByteArray } override def deserialize(storageFormat: Array[Byte]): State = { val byteStream = new ByteArrayInputStream(storageFormat) val dataStream = new DataInputStream(byteStream) TestingTypedCount.State(dataStream.readLong()) } override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate = copy(mutableAggBufferOffset = newMutableAggBufferOffset) override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ImperativeAggregate = copy(inputAggBufferOffset = newInputAggBufferOffset) override val prettyName: String = "typed_count" } object TestingTypedCount { case class State(var count: Long) }
Example 170
Source File: CreateJacksonParser.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.sql.catalyst.json import java.io.{ByteArrayInputStream, InputStream, InputStreamReader} import com.fasterxml.jackson.core.{JsonFactory, JsonParser} import org.apache.hadoop.io.Text import org.apache.spark.unsafe.types.UTF8String private[sql] object CreateJacksonParser extends Serializable { def string(jsonFactory: JsonFactory, record: String): JsonParser = { jsonFactory.createParser(record) } def utf8String(jsonFactory: JsonFactory, record: UTF8String): JsonParser = { val bb = record.getByteBuffer assert(bb.hasArray) val bain = new ByteArrayInputStream( bb.array(), bb.arrayOffset() + bb.position(), bb.remaining()) jsonFactory.createParser(new InputStreamReader(bain, "UTF-8")) } def text(jsonFactory: JsonFactory, record: Text): JsonParser = { jsonFactory.createParser(record.getBytes, 0, record.getLength) } def inputStream(jsonFactory: JsonFactory, record: InputStream): JsonParser = { jsonFactory.createParser(record) } }
Example 171
Source File: GenericAvroSerializerSuite.scala From Spark-2.3.1 with Apache License 2.0 | 5 votes |
package org.apache.spark.serializer import java.io.{ByteArrayInputStream, ByteArrayOutputStream} import java.nio.ByteBuffer import com.esotericsoftware.kryo.io.{Input, Output} import org.apache.avro.{Schema, SchemaBuilder} import org.apache.avro.generic.GenericData.Record import org.apache.spark.{SharedSparkContext, SparkFunSuite} class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") val schema : Schema = SchemaBuilder .record("testRecord").fields() .requiredString("data") .endRecord() val record = new Record(schema) record.put("data", "test data") test("schema compression and decompression") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) } test("record serialization and deserialization") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val outputStream = new ByteArrayOutputStream() val output = new Output(outputStream) genericSer.serializeDatum(record, output) output.flush() output.close() val input = new Input(new ByteArrayInputStream(outputStream.toByteArray)) assert(genericSer.deserializeDatum(input) === record) } test("uses schema fingerprint to decrease message size") { val genericSerFull = new GenericAvroSerializer(conf.getAvroSchema) val output = new Output(new ByteArrayOutputStream()) val beginningNormalPosition = output.total() genericSerFull.serializeDatum(record, output) output.flush() val normalLength = output.total - beginningNormalPosition conf.registerAvroSchemas(schema) val genericSerFinger = new GenericAvroSerializer(conf.getAvroSchema) val beginningFingerprintPosition = output.total() genericSerFinger.serializeDatum(record, output) val fingerprintLength = output.total - beginningFingerprintPosition assert(fingerprintLength < normalLength) } test("caches previously seen schemas") { val genericSer = new GenericAvroSerializer(conf.getAvroSchema) val compressedSchema = genericSer.compress(schema) val decompressedSchema = genericSer.decompress(ByteBuffer.wrap(compressedSchema)) assert(compressedSchema.eq(genericSer.compress(schema))) assert(decompressedSchema.eq(genericSer.decompress(ByteBuffer.wrap(compressedSchema)))) } }